diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,228438 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 32628, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.0648522741203875e-05, + "grad_norm": 30.493967972182688, + "learning_rate": 2.0429009193054138e-08, + "loss": 1.5817, + "step": 1 + }, + { + "epoch": 6.129704548240775e-05, + "grad_norm": 18.3156941112345, + "learning_rate": 4.0858018386108276e-08, + "loss": 1.6241, + "step": 2 + }, + { + "epoch": 9.194556822361162e-05, + "grad_norm": 4.763241272879156, + "learning_rate": 6.128702757916241e-08, + "loss": 0.8428, + "step": 3 + }, + { + "epoch": 0.0001225940909648155, + "grad_norm": 22.25406894394271, + "learning_rate": 8.171603677221655e-08, + "loss": 1.6559, + "step": 4 + }, + { + "epoch": 0.00015324261370601937, + "grad_norm": 23.233754765472387, + "learning_rate": 1.021450459652707e-07, + "loss": 1.5664, + "step": 5 + }, + { + "epoch": 0.00018389113644722325, + "grad_norm": 21.720826724853755, + "learning_rate": 1.2257405515832481e-07, + "loss": 1.6221, + "step": 6 + }, + { + "epoch": 0.00021453965918842712, + "grad_norm": 28.879189271658763, + "learning_rate": 1.4300306435137899e-07, + "loss": 1.5038, + "step": 7 + }, + { + "epoch": 0.000245188181929631, + "grad_norm": 23.625155605766647, + "learning_rate": 1.634320735444331e-07, + "loss": 1.5476, + "step": 8 + }, + { + "epoch": 0.0002758367046708349, + "grad_norm": 26.91882028108857, + "learning_rate": 1.8386108273748725e-07, + "loss": 1.6165, + "step": 9 + }, + { + "epoch": 0.00030648522741203875, + "grad_norm": 27.718717059266663, + "learning_rate": 2.042900919305414e-07, + "loss": 1.6369, + "step": 10 + }, + { + "epoch": 0.0003371337501532426, + "grad_norm": 31.26195407617415, + "learning_rate": 2.247191011235955e-07, + "loss": 1.5312, + "step": 11 + }, + { + "epoch": 0.0003677822728944465, + "grad_norm": 28.033405141130793, + "learning_rate": 2.4514811031664963e-07, + "loss": 1.4762, + "step": 12 + }, + { + "epoch": 0.0003984307956356504, + "grad_norm": 45.60381451772552, + "learning_rate": 2.655771195097038e-07, + "loss": 1.7533, + "step": 13 + }, + { + "epoch": 0.00042907931837685425, + "grad_norm": 4.360451759122601, + "learning_rate": 2.8600612870275797e-07, + "loss": 0.7788, + "step": 14 + }, + { + "epoch": 0.0004597278411180581, + "grad_norm": 17.04404361165066, + "learning_rate": 3.064351378958121e-07, + "loss": 1.5519, + "step": 15 + }, + { + "epoch": 0.000490376363859262, + "grad_norm": 4.6809486834720895, + "learning_rate": 3.268641470888662e-07, + "loss": 0.7947, + "step": 16 + }, + { + "epoch": 0.0005210248866004658, + "grad_norm": 4.6687596315658775, + "learning_rate": 3.472931562819203e-07, + "loss": 0.8315, + "step": 17 + }, + { + "epoch": 0.0005516734093416697, + "grad_norm": 4.8415429715552225, + "learning_rate": 3.677221654749745e-07, + "loss": 0.8309, + "step": 18 + }, + { + "epoch": 0.0005823219320828736, + "grad_norm": 17.849606851848158, + "learning_rate": 3.8815117466802864e-07, + "loss": 1.4729, + "step": 19 + }, + { + "epoch": 0.0006129704548240775, + "grad_norm": 12.87190826713787, + "learning_rate": 4.085801838610828e-07, + "loss": 1.4072, + "step": 20 + }, + { + "epoch": 0.0006436189775652813, + "grad_norm": 18.68524008245022, + "learning_rate": 4.2900919305413693e-07, + "loss": 1.434, + "step": 21 + }, + { + "epoch": 0.0006742675003064852, + "grad_norm": 19.80237566620811, + "learning_rate": 4.49438202247191e-07, + "loss": 1.5381, + "step": 22 + }, + { + "epoch": 0.0007049160230476891, + "grad_norm": 9.435405916065518, + "learning_rate": 4.698672114402452e-07, + "loss": 1.2651, + "step": 23 + }, + { + "epoch": 0.000735564545788893, + "grad_norm": 6.988771361578134, + "learning_rate": 4.902962206332993e-07, + "loss": 1.3877, + "step": 24 + }, + { + "epoch": 0.0007662130685300968, + "grad_norm": 6.964218476424204, + "learning_rate": 5.107252298263535e-07, + "loss": 1.4101, + "step": 25 + }, + { + "epoch": 0.0007968615912713007, + "grad_norm": 4.320831202709766, + "learning_rate": 5.311542390194075e-07, + "loss": 0.7778, + "step": 26 + }, + { + "epoch": 0.0008275101140125046, + "grad_norm": 6.334860741840969, + "learning_rate": 5.515832482124617e-07, + "loss": 1.3276, + "step": 27 + }, + { + "epoch": 0.0008581586367537085, + "grad_norm": 7.750763547961431, + "learning_rate": 5.720122574055159e-07, + "loss": 1.415, + "step": 28 + }, + { + "epoch": 0.0008888071594949123, + "grad_norm": 6.8032697377618065, + "learning_rate": 5.9244126659857e-07, + "loss": 1.3524, + "step": 29 + }, + { + "epoch": 0.0009194556822361162, + "grad_norm": 6.111747416391086, + "learning_rate": 6.128702757916242e-07, + "loss": 1.3721, + "step": 30 + }, + { + "epoch": 0.0009501042049773201, + "grad_norm": 6.051332763023607, + "learning_rate": 6.332992849846783e-07, + "loss": 1.3698, + "step": 31 + }, + { + "epoch": 0.000980752727718524, + "grad_norm": 4.807971405026247, + "learning_rate": 6.537282941777324e-07, + "loss": 1.3803, + "step": 32 + }, + { + "epoch": 0.001011401250459728, + "grad_norm": 3.831379252981244, + "learning_rate": 6.741573033707865e-07, + "loss": 0.7897, + "step": 33 + }, + { + "epoch": 0.0010420497732009316, + "grad_norm": 4.054755579306096, + "learning_rate": 6.945863125638406e-07, + "loss": 0.793, + "step": 34 + }, + { + "epoch": 0.0010726982959421356, + "grad_norm": 6.929443467642634, + "learning_rate": 7.150153217568949e-07, + "loss": 1.2382, + "step": 35 + }, + { + "epoch": 0.0011033468186833395, + "grad_norm": 5.760894193050263, + "learning_rate": 7.35444330949949e-07, + "loss": 1.3125, + "step": 36 + }, + { + "epoch": 0.0011339953414245434, + "grad_norm": 5.338703203322593, + "learning_rate": 7.558733401430031e-07, + "loss": 1.3771, + "step": 37 + }, + { + "epoch": 0.0011646438641657471, + "grad_norm": 3.87317425493285, + "learning_rate": 7.763023493360573e-07, + "loss": 0.8267, + "step": 38 + }, + { + "epoch": 0.001195292386906951, + "grad_norm": 4.554274249943934, + "learning_rate": 7.967313585291115e-07, + "loss": 1.1526, + "step": 39 + }, + { + "epoch": 0.001225940909648155, + "grad_norm": 4.684338184316146, + "learning_rate": 8.171603677221656e-07, + "loss": 1.2595, + "step": 40 + }, + { + "epoch": 0.001256589432389359, + "grad_norm": 4.587154058975048, + "learning_rate": 8.375893769152197e-07, + "loss": 1.287, + "step": 41 + }, + { + "epoch": 0.0012872379551305626, + "grad_norm": 4.795888641650047, + "learning_rate": 8.580183861082739e-07, + "loss": 1.3423, + "step": 42 + }, + { + "epoch": 0.0013178864778717666, + "grad_norm": 4.662854554846313, + "learning_rate": 8.78447395301328e-07, + "loss": 1.25, + "step": 43 + }, + { + "epoch": 0.0013485350006129705, + "grad_norm": 4.091030197014723, + "learning_rate": 8.98876404494382e-07, + "loss": 1.2985, + "step": 44 + }, + { + "epoch": 0.0013791835233541744, + "grad_norm": 5.272367730825289, + "learning_rate": 9.193054136874361e-07, + "loss": 1.2088, + "step": 45 + }, + { + "epoch": 0.0014098320460953781, + "grad_norm": 4.154272181360368, + "learning_rate": 9.397344228804904e-07, + "loss": 1.3121, + "step": 46 + }, + { + "epoch": 0.001440480568836582, + "grad_norm": 3.7558286704975803, + "learning_rate": 9.601634320735445e-07, + "loss": 1.2454, + "step": 47 + }, + { + "epoch": 0.001471129091577786, + "grad_norm": 3.5698250609285735, + "learning_rate": 9.805924412665985e-07, + "loss": 1.2106, + "step": 48 + }, + { + "epoch": 0.00150177761431899, + "grad_norm": 2.969191063472687, + "learning_rate": 1.001021450459653e-06, + "loss": 0.7383, + "step": 49 + }, + { + "epoch": 0.0015324261370601936, + "grad_norm": 3.9615323977727543, + "learning_rate": 1.021450459652707e-06, + "loss": 1.1815, + "step": 50 + }, + { + "epoch": 0.0015630746598013976, + "grad_norm": 3.7376134830232166, + "learning_rate": 1.0418794688457611e-06, + "loss": 1.2306, + "step": 51 + }, + { + "epoch": 0.0015937231825426015, + "grad_norm": 3.208716472767352, + "learning_rate": 1.062308478038815e-06, + "loss": 1.2633, + "step": 52 + }, + { + "epoch": 0.0016243717052838054, + "grad_norm": 3.7353721780646842, + "learning_rate": 1.0827374872318693e-06, + "loss": 1.2583, + "step": 53 + }, + { + "epoch": 0.0016550202280250091, + "grad_norm": 3.6589487812371284, + "learning_rate": 1.1031664964249235e-06, + "loss": 1.2286, + "step": 54 + }, + { + "epoch": 0.001685668750766213, + "grad_norm": 3.6531147246727804, + "learning_rate": 1.1235955056179777e-06, + "loss": 1.2197, + "step": 55 + }, + { + "epoch": 0.001716317273507417, + "grad_norm": 3.572959670962644, + "learning_rate": 1.1440245148110319e-06, + "loss": 1.1521, + "step": 56 + }, + { + "epoch": 0.001746965796248621, + "grad_norm": 2.647145317290758, + "learning_rate": 1.1644535240040859e-06, + "loss": 0.7606, + "step": 57 + }, + { + "epoch": 0.0017776143189898246, + "grad_norm": 3.634557973320935, + "learning_rate": 1.18488253319714e-06, + "loss": 1.1786, + "step": 58 + }, + { + "epoch": 0.0018082628417310286, + "grad_norm": 3.3637218969943636, + "learning_rate": 1.205311542390194e-06, + "loss": 1.1206, + "step": 59 + }, + { + "epoch": 0.0018389113644722325, + "grad_norm": 3.2235900661626276, + "learning_rate": 1.2257405515832485e-06, + "loss": 1.2689, + "step": 60 + }, + { + "epoch": 0.0018695598872134364, + "grad_norm": 2.4060904895120188, + "learning_rate": 1.2461695607763025e-06, + "loss": 0.7576, + "step": 61 + }, + { + "epoch": 0.0019002084099546401, + "grad_norm": 3.1631718731174856, + "learning_rate": 1.2665985699693567e-06, + "loss": 1.2148, + "step": 62 + }, + { + "epoch": 0.001930856932695844, + "grad_norm": 3.449253916289072, + "learning_rate": 1.2870275791624106e-06, + "loss": 1.2081, + "step": 63 + }, + { + "epoch": 0.001961505455437048, + "grad_norm": 3.3404422453175884, + "learning_rate": 1.3074565883554648e-06, + "loss": 1.3018, + "step": 64 + }, + { + "epoch": 0.001992153978178252, + "grad_norm": 2.8217437507603824, + "learning_rate": 1.3278855975485188e-06, + "loss": 1.0865, + "step": 65 + }, + { + "epoch": 0.002022802500919456, + "grad_norm": 2.718471539447668, + "learning_rate": 1.348314606741573e-06, + "loss": 1.1233, + "step": 66 + }, + { + "epoch": 0.0020534510236606593, + "grad_norm": 2.2022225436317413, + "learning_rate": 1.3687436159346274e-06, + "loss": 0.7491, + "step": 67 + }, + { + "epoch": 0.0020840995464018633, + "grad_norm": 3.0405637027868693, + "learning_rate": 1.3891726251276812e-06, + "loss": 1.132, + "step": 68 + }, + { + "epoch": 0.002114748069143067, + "grad_norm": 3.4925760632500356, + "learning_rate": 1.4096016343207356e-06, + "loss": 1.1336, + "step": 69 + }, + { + "epoch": 0.002145396591884271, + "grad_norm": 3.5018231409667147, + "learning_rate": 1.4300306435137898e-06, + "loss": 1.1455, + "step": 70 + }, + { + "epoch": 0.002176045114625475, + "grad_norm": 2.821159587899892, + "learning_rate": 1.4504596527068438e-06, + "loss": 1.109, + "step": 71 + }, + { + "epoch": 0.002206693637366679, + "grad_norm": 2.7354933273220565, + "learning_rate": 1.470888661899898e-06, + "loss": 1.0478, + "step": 72 + }, + { + "epoch": 0.002237342160107883, + "grad_norm": 3.0593056113716637, + "learning_rate": 1.4913176710929522e-06, + "loss": 1.1878, + "step": 73 + }, + { + "epoch": 0.002267990682849087, + "grad_norm": 2.8918629608692066, + "learning_rate": 1.5117466802860062e-06, + "loss": 1.0156, + "step": 74 + }, + { + "epoch": 0.0022986392055902903, + "grad_norm": 3.1720099152409764, + "learning_rate": 1.5321756894790604e-06, + "loss": 1.0464, + "step": 75 + }, + { + "epoch": 0.0023292877283314943, + "grad_norm": 3.1033089660907702, + "learning_rate": 1.5526046986721146e-06, + "loss": 1.0939, + "step": 76 + }, + { + "epoch": 0.002359936251072698, + "grad_norm": 2.6323471835897605, + "learning_rate": 1.5730337078651686e-06, + "loss": 1.051, + "step": 77 + }, + { + "epoch": 0.002390584773813902, + "grad_norm": 3.374147405391154, + "learning_rate": 1.593462717058223e-06, + "loss": 1.1677, + "step": 78 + }, + { + "epoch": 0.002421233296555106, + "grad_norm": 2.8109808501189164, + "learning_rate": 1.6138917262512767e-06, + "loss": 1.1327, + "step": 79 + }, + { + "epoch": 0.00245188181929631, + "grad_norm": 2.964243592252012, + "learning_rate": 1.6343207354443311e-06, + "loss": 1.1541, + "step": 80 + }, + { + "epoch": 0.002482530342037514, + "grad_norm": 1.7578752606122356, + "learning_rate": 1.6547497446373853e-06, + "loss": 0.7013, + "step": 81 + }, + { + "epoch": 0.002513178864778718, + "grad_norm": 1.747874506207181, + "learning_rate": 1.6751787538304393e-06, + "loss": 0.7206, + "step": 82 + }, + { + "epoch": 0.0025438273875199213, + "grad_norm": 2.9405059851624897, + "learning_rate": 1.6956077630234935e-06, + "loss": 1.0851, + "step": 83 + }, + { + "epoch": 0.0025744759102611253, + "grad_norm": 2.6381844181827967, + "learning_rate": 1.7160367722165477e-06, + "loss": 1.159, + "step": 84 + }, + { + "epoch": 0.002605124433002329, + "grad_norm": 3.0471008047935206, + "learning_rate": 1.7364657814096017e-06, + "loss": 1.1795, + "step": 85 + }, + { + "epoch": 0.002635772955743533, + "grad_norm": 2.7329102399282346, + "learning_rate": 1.756894790602656e-06, + "loss": 1.0347, + "step": 86 + }, + { + "epoch": 0.002666421478484737, + "grad_norm": 3.1607609924422673, + "learning_rate": 1.7773237997957101e-06, + "loss": 1.1232, + "step": 87 + }, + { + "epoch": 0.002697070001225941, + "grad_norm": 1.5649432961020648, + "learning_rate": 1.797752808988764e-06, + "loss": 0.6982, + "step": 88 + }, + { + "epoch": 0.002727718523967145, + "grad_norm": 2.760232581500901, + "learning_rate": 1.8181818181818183e-06, + "loss": 1.06, + "step": 89 + }, + { + "epoch": 0.002758367046708349, + "grad_norm": 2.567329608935741, + "learning_rate": 1.8386108273748723e-06, + "loss": 1.0352, + "step": 90 + }, + { + "epoch": 0.0027890155694495523, + "grad_norm": 3.016108073815816, + "learning_rate": 1.8590398365679265e-06, + "loss": 1.1676, + "step": 91 + }, + { + "epoch": 0.0028196640921907563, + "grad_norm": 2.6076894358822416, + "learning_rate": 1.8794688457609809e-06, + "loss": 1.0858, + "step": 92 + }, + { + "epoch": 0.00285031261493196, + "grad_norm": 1.6674487597111431, + "learning_rate": 1.8998978549540349e-06, + "loss": 0.728, + "step": 93 + }, + { + "epoch": 0.002880961137673164, + "grad_norm": 2.629601511747967, + "learning_rate": 1.920326864147089e-06, + "loss": 1.0624, + "step": 94 + }, + { + "epoch": 0.002911609660414368, + "grad_norm": 2.806882713238895, + "learning_rate": 1.940755873340143e-06, + "loss": 1.1176, + "step": 95 + }, + { + "epoch": 0.002942258183155572, + "grad_norm": 3.1241217392946843, + "learning_rate": 1.961184882533197e-06, + "loss": 1.1415, + "step": 96 + }, + { + "epoch": 0.002972906705896776, + "grad_norm": 2.8885226181740133, + "learning_rate": 1.9816138917262514e-06, + "loss": 1.1094, + "step": 97 + }, + { + "epoch": 0.00300355522863798, + "grad_norm": 2.6185717041175525, + "learning_rate": 2.002042900919306e-06, + "loss": 1.1343, + "step": 98 + }, + { + "epoch": 0.0030342037513791833, + "grad_norm": 2.9315497737821596, + "learning_rate": 2.02247191011236e-06, + "loss": 1.1203, + "step": 99 + }, + { + "epoch": 0.0030648522741203873, + "grad_norm": 3.077535189560261, + "learning_rate": 2.042900919305414e-06, + "loss": 1.1266, + "step": 100 + }, + { + "epoch": 0.003095500796861591, + "grad_norm": 2.9647000872586013, + "learning_rate": 2.063329928498468e-06, + "loss": 1.1649, + "step": 101 + }, + { + "epoch": 0.003126149319602795, + "grad_norm": 2.79351049232536, + "learning_rate": 2.0837589376915222e-06, + "loss": 1.0826, + "step": 102 + }, + { + "epoch": 0.003156797842343999, + "grad_norm": 3.157048837667011, + "learning_rate": 2.104187946884576e-06, + "loss": 1.1681, + "step": 103 + }, + { + "epoch": 0.003187446365085203, + "grad_norm": 2.4686803183590524, + "learning_rate": 2.12461695607763e-06, + "loss": 1.0172, + "step": 104 + }, + { + "epoch": 0.003218094887826407, + "grad_norm": 1.6263703765598607, + "learning_rate": 2.1450459652706846e-06, + "loss": 0.7124, + "step": 105 + }, + { + "epoch": 0.003248743410567611, + "grad_norm": 2.5788283077115084, + "learning_rate": 2.1654749744637386e-06, + "loss": 1.0704, + "step": 106 + }, + { + "epoch": 0.0032793919333088143, + "grad_norm": 2.9058369692461543, + "learning_rate": 2.1859039836567926e-06, + "loss": 1.0984, + "step": 107 + }, + { + "epoch": 0.0033100404560500183, + "grad_norm": 2.77609308815291, + "learning_rate": 2.206332992849847e-06, + "loss": 1.0837, + "step": 108 + }, + { + "epoch": 0.003340688978791222, + "grad_norm": 2.8353522502934307, + "learning_rate": 2.2267620020429014e-06, + "loss": 1.0764, + "step": 109 + }, + { + "epoch": 0.003371337501532426, + "grad_norm": 2.8236905663486014, + "learning_rate": 2.2471910112359554e-06, + "loss": 1.14, + "step": 110 + }, + { + "epoch": 0.00340198602427363, + "grad_norm": 3.06587636794078, + "learning_rate": 2.2676200204290094e-06, + "loss": 1.1432, + "step": 111 + }, + { + "epoch": 0.003432634547014834, + "grad_norm": 2.820524012217303, + "learning_rate": 2.2880490296220638e-06, + "loss": 0.9894, + "step": 112 + }, + { + "epoch": 0.003463283069756038, + "grad_norm": 2.729076331733727, + "learning_rate": 2.3084780388151178e-06, + "loss": 1.2534, + "step": 113 + }, + { + "epoch": 0.003493931592497242, + "grad_norm": 2.499528609025108, + "learning_rate": 2.3289070480081717e-06, + "loss": 1.2266, + "step": 114 + }, + { + "epoch": 0.0035245801152384453, + "grad_norm": 2.670228636023651, + "learning_rate": 2.3493360572012257e-06, + "loss": 1.0675, + "step": 115 + }, + { + "epoch": 0.0035552286379796493, + "grad_norm": 2.954445607107585, + "learning_rate": 2.36976506639428e-06, + "loss": 1.1018, + "step": 116 + }, + { + "epoch": 0.003585877160720853, + "grad_norm": 2.3766393968582347, + "learning_rate": 2.390194075587334e-06, + "loss": 1.06, + "step": 117 + }, + { + "epoch": 0.003616525683462057, + "grad_norm": 2.8359187467898246, + "learning_rate": 2.410623084780388e-06, + "loss": 1.0288, + "step": 118 + }, + { + "epoch": 0.003647174206203261, + "grad_norm": 2.689359548447686, + "learning_rate": 2.4310520939734425e-06, + "loss": 1.1591, + "step": 119 + }, + { + "epoch": 0.003677822728944465, + "grad_norm": 1.4252985971446477, + "learning_rate": 2.451481103166497e-06, + "loss": 0.7139, + "step": 120 + }, + { + "epoch": 0.003708471251685669, + "grad_norm": 1.4600835881138532, + "learning_rate": 2.4719101123595505e-06, + "loss": 0.6727, + "step": 121 + }, + { + "epoch": 0.003739119774426873, + "grad_norm": 2.253708986099978, + "learning_rate": 2.492339121552605e-06, + "loss": 1.0654, + "step": 122 + }, + { + "epoch": 0.0037697682971680763, + "grad_norm": 2.972415264798315, + "learning_rate": 2.5127681307456593e-06, + "loss": 0.9961, + "step": 123 + }, + { + "epoch": 0.0038004168199092803, + "grad_norm": 3.0948837511152067, + "learning_rate": 2.5331971399387133e-06, + "loss": 1.0391, + "step": 124 + }, + { + "epoch": 0.003831065342650484, + "grad_norm": 2.9133655233717874, + "learning_rate": 2.5536261491317673e-06, + "loss": 1.0808, + "step": 125 + }, + { + "epoch": 0.003861713865391688, + "grad_norm": 2.8042115164861863, + "learning_rate": 2.5740551583248213e-06, + "loss": 1.0235, + "step": 126 + }, + { + "epoch": 0.003892362388132892, + "grad_norm": 2.859933903476133, + "learning_rate": 2.5944841675178757e-06, + "loss": 1.059, + "step": 127 + }, + { + "epoch": 0.003923010910874096, + "grad_norm": 2.6805253212360847, + "learning_rate": 2.6149131767109297e-06, + "loss": 1.1177, + "step": 128 + }, + { + "epoch": 0.0039536594336153, + "grad_norm": 3.090559714780148, + "learning_rate": 2.635342185903984e-06, + "loss": 1.0537, + "step": 129 + }, + { + "epoch": 0.003984307956356504, + "grad_norm": 2.8733866350034845, + "learning_rate": 2.6557711950970376e-06, + "loss": 1.1004, + "step": 130 + }, + { + "epoch": 0.004014956479097708, + "grad_norm": 2.721457609534988, + "learning_rate": 2.676200204290092e-06, + "loss": 1.0165, + "step": 131 + }, + { + "epoch": 0.004045605001838912, + "grad_norm": 1.4350109370431765, + "learning_rate": 2.696629213483146e-06, + "loss": 0.7311, + "step": 132 + }, + { + "epoch": 0.004076253524580116, + "grad_norm": 2.342490789156108, + "learning_rate": 2.7170582226762004e-06, + "loss": 0.9803, + "step": 133 + }, + { + "epoch": 0.004106902047321319, + "grad_norm": 2.6792621558025833, + "learning_rate": 2.737487231869255e-06, + "loss": 1.0744, + "step": 134 + }, + { + "epoch": 0.004137550570062523, + "grad_norm": 2.6391263992083043, + "learning_rate": 2.757916241062309e-06, + "loss": 1.0264, + "step": 135 + }, + { + "epoch": 0.0041681990928037265, + "grad_norm": 2.7931460446716647, + "learning_rate": 2.7783452502553624e-06, + "loss": 1.0488, + "step": 136 + }, + { + "epoch": 0.0041988476155449305, + "grad_norm": 2.7519202376571856, + "learning_rate": 2.798774259448417e-06, + "loss": 1.0289, + "step": 137 + }, + { + "epoch": 0.004229496138286134, + "grad_norm": 2.4575678743218097, + "learning_rate": 2.8192032686414712e-06, + "loss": 1.0732, + "step": 138 + }, + { + "epoch": 0.004260144661027338, + "grad_norm": 2.733049351739151, + "learning_rate": 2.839632277834525e-06, + "loss": 1.0943, + "step": 139 + }, + { + "epoch": 0.004290793183768542, + "grad_norm": 1.3473193249729378, + "learning_rate": 2.8600612870275796e-06, + "loss": 0.7279, + "step": 140 + }, + { + "epoch": 0.004321441706509746, + "grad_norm": 2.3831161384059456, + "learning_rate": 2.8804902962206336e-06, + "loss": 0.977, + "step": 141 + }, + { + "epoch": 0.00435209022925095, + "grad_norm": 2.6096065902567354, + "learning_rate": 2.9009193054136876e-06, + "loss": 1.0558, + "step": 142 + }, + { + "epoch": 0.004382738751992154, + "grad_norm": 2.58159585843004, + "learning_rate": 2.9213483146067416e-06, + "loss": 1.0352, + "step": 143 + }, + { + "epoch": 0.004413387274733358, + "grad_norm": 2.8164485169134963, + "learning_rate": 2.941777323799796e-06, + "loss": 1.0237, + "step": 144 + }, + { + "epoch": 0.004444035797474562, + "grad_norm": 2.5796286440629173, + "learning_rate": 2.9622063329928504e-06, + "loss": 0.9967, + "step": 145 + }, + { + "epoch": 0.004474684320215766, + "grad_norm": 2.5406552905369164, + "learning_rate": 2.9826353421859044e-06, + "loss": 0.9974, + "step": 146 + }, + { + "epoch": 0.00450533284295697, + "grad_norm": 2.540197721313142, + "learning_rate": 3.003064351378958e-06, + "loss": 1.0009, + "step": 147 + }, + { + "epoch": 0.004535981365698174, + "grad_norm": 2.800985329320255, + "learning_rate": 3.0234933605720124e-06, + "loss": 1.1062, + "step": 148 + }, + { + "epoch": 0.004566629888439378, + "grad_norm": 3.0491195863770457, + "learning_rate": 3.0439223697650668e-06, + "loss": 1.1668, + "step": 149 + }, + { + "epoch": 0.004597278411180581, + "grad_norm": 1.2423018261026988, + "learning_rate": 3.0643513789581207e-06, + "loss": 0.7379, + "step": 150 + }, + { + "epoch": 0.004627926933921785, + "grad_norm": 1.2453795679007638, + "learning_rate": 3.084780388151175e-06, + "loss": 0.6964, + "step": 151 + }, + { + "epoch": 0.0046585754566629885, + "grad_norm": 3.621562461618694, + "learning_rate": 3.105209397344229e-06, + "loss": 1.0067, + "step": 152 + }, + { + "epoch": 0.0046892239794041925, + "grad_norm": 1.327634048788021, + "learning_rate": 3.125638406537283e-06, + "loss": 0.7257, + "step": 153 + }, + { + "epoch": 0.004719872502145396, + "grad_norm": 1.2861961961072481, + "learning_rate": 3.146067415730337e-06, + "loss": 0.7069, + "step": 154 + }, + { + "epoch": 0.0047505210248866, + "grad_norm": 1.176135828629108, + "learning_rate": 3.1664964249233915e-06, + "loss": 0.7114, + "step": 155 + }, + { + "epoch": 0.004781169547627804, + "grad_norm": 3.082416824100383, + "learning_rate": 3.186925434116446e-06, + "loss": 0.9052, + "step": 156 + }, + { + "epoch": 0.004811818070369008, + "grad_norm": 2.683191009853677, + "learning_rate": 3.2073544433095e-06, + "loss": 0.9765, + "step": 157 + }, + { + "epoch": 0.004842466593110212, + "grad_norm": 2.870934991577819, + "learning_rate": 3.2277834525025535e-06, + "loss": 1.0515, + "step": 158 + }, + { + "epoch": 0.004873115115851416, + "grad_norm": 3.0776998981255304, + "learning_rate": 3.248212461695608e-06, + "loss": 1.0582, + "step": 159 + }, + { + "epoch": 0.00490376363859262, + "grad_norm": 2.830344264289103, + "learning_rate": 3.2686414708886623e-06, + "loss": 1.0127, + "step": 160 + }, + { + "epoch": 0.004934412161333824, + "grad_norm": 2.9282992138424357, + "learning_rate": 3.2890704800817163e-06, + "loss": 1.0397, + "step": 161 + }, + { + "epoch": 0.004965060684075028, + "grad_norm": 2.5616006684664705, + "learning_rate": 3.3094994892747707e-06, + "loss": 1.0234, + "step": 162 + }, + { + "epoch": 0.004995709206816232, + "grad_norm": 2.610816595834288, + "learning_rate": 3.3299284984678247e-06, + "loss": 0.9744, + "step": 163 + }, + { + "epoch": 0.005026357729557436, + "grad_norm": 2.446624203582596, + "learning_rate": 3.3503575076608787e-06, + "loss": 0.9134, + "step": 164 + }, + { + "epoch": 0.00505700625229864, + "grad_norm": 2.574646560202555, + "learning_rate": 3.3707865168539327e-06, + "loss": 0.9576, + "step": 165 + }, + { + "epoch": 0.005087654775039843, + "grad_norm": 2.590262709130703, + "learning_rate": 3.391215526046987e-06, + "loss": 1.0124, + "step": 166 + }, + { + "epoch": 0.005118303297781047, + "grad_norm": 2.8806160632886697, + "learning_rate": 3.411644535240041e-06, + "loss": 0.9975, + "step": 167 + }, + { + "epoch": 0.0051489518205222505, + "grad_norm": 2.3297193035998904, + "learning_rate": 3.4320735444330955e-06, + "loss": 1.0647, + "step": 168 + }, + { + "epoch": 0.0051796003432634545, + "grad_norm": 2.3988557734057454, + "learning_rate": 3.452502553626149e-06, + "loss": 1.0654, + "step": 169 + }, + { + "epoch": 0.005210248866004658, + "grad_norm": 3.1756566398418835, + "learning_rate": 3.4729315628192034e-06, + "loss": 1.0318, + "step": 170 + }, + { + "epoch": 0.005240897388745862, + "grad_norm": 3.088968456483612, + "learning_rate": 3.493360572012258e-06, + "loss": 0.912, + "step": 171 + }, + { + "epoch": 0.005271545911487066, + "grad_norm": 2.713381130494515, + "learning_rate": 3.513789581205312e-06, + "loss": 0.865, + "step": 172 + }, + { + "epoch": 0.00530219443422827, + "grad_norm": 2.847404441056481, + "learning_rate": 3.5342185903983662e-06, + "loss": 1.0051, + "step": 173 + }, + { + "epoch": 0.005332842956969474, + "grad_norm": 2.76660983413183, + "learning_rate": 3.5546475995914202e-06, + "loss": 1.0319, + "step": 174 + }, + { + "epoch": 0.005363491479710678, + "grad_norm": 1.0513558620251888, + "learning_rate": 3.575076608784474e-06, + "loss": 0.6922, + "step": 175 + }, + { + "epoch": 0.005394140002451882, + "grad_norm": 2.3906309670723895, + "learning_rate": 3.595505617977528e-06, + "loss": 1.0108, + "step": 176 + }, + { + "epoch": 0.005424788525193086, + "grad_norm": 2.5879999073666826, + "learning_rate": 3.6159346271705826e-06, + "loss": 0.9123, + "step": 177 + }, + { + "epoch": 0.00545543704793429, + "grad_norm": 2.650376517027739, + "learning_rate": 3.6363636363636366e-06, + "loss": 0.9939, + "step": 178 + }, + { + "epoch": 0.005486085570675494, + "grad_norm": 2.831217883240634, + "learning_rate": 3.656792645556691e-06, + "loss": 1.1448, + "step": 179 + }, + { + "epoch": 0.005516734093416698, + "grad_norm": 2.26283811299664, + "learning_rate": 3.6772216547497446e-06, + "loss": 0.9702, + "step": 180 + }, + { + "epoch": 0.005547382616157902, + "grad_norm": 2.602757970154741, + "learning_rate": 3.697650663942799e-06, + "loss": 0.9468, + "step": 181 + }, + { + "epoch": 0.005578031138899105, + "grad_norm": 2.7804968598106528, + "learning_rate": 3.718079673135853e-06, + "loss": 1.0171, + "step": 182 + }, + { + "epoch": 0.005608679661640309, + "grad_norm": 1.0605807067091244, + "learning_rate": 3.7385086823289074e-06, + "loss": 0.7111, + "step": 183 + }, + { + "epoch": 0.0056393281843815125, + "grad_norm": 1.0423561796549063, + "learning_rate": 3.7589376915219618e-06, + "loss": 0.7092, + "step": 184 + }, + { + "epoch": 0.0056699767071227165, + "grad_norm": 3.16169011974763, + "learning_rate": 3.7793667007150158e-06, + "loss": 1.0748, + "step": 185 + }, + { + "epoch": 0.00570062522986392, + "grad_norm": 2.581998250338527, + "learning_rate": 3.7997957099080697e-06, + "loss": 0.9957, + "step": 186 + }, + { + "epoch": 0.005731273752605124, + "grad_norm": 2.6755667939753773, + "learning_rate": 3.820224719101124e-06, + "loss": 1.0639, + "step": 187 + }, + { + "epoch": 0.005761922275346328, + "grad_norm": 0.9751752293284478, + "learning_rate": 3.840653728294178e-06, + "loss": 0.6786, + "step": 188 + }, + { + "epoch": 0.005792570798087532, + "grad_norm": 2.811101959836897, + "learning_rate": 3.8610827374872325e-06, + "loss": 0.9947, + "step": 189 + }, + { + "epoch": 0.005823219320828736, + "grad_norm": 2.505452601784679, + "learning_rate": 3.881511746680286e-06, + "loss": 1.1234, + "step": 190 + }, + { + "epoch": 0.00585386784356994, + "grad_norm": 2.4723988142215454, + "learning_rate": 3.9019407558733405e-06, + "loss": 1.0071, + "step": 191 + }, + { + "epoch": 0.005884516366311144, + "grad_norm": 2.82421362660844, + "learning_rate": 3.922369765066394e-06, + "loss": 0.933, + "step": 192 + }, + { + "epoch": 0.005915164889052348, + "grad_norm": 2.9170019867549852, + "learning_rate": 3.9427987742594485e-06, + "loss": 1.0393, + "step": 193 + }, + { + "epoch": 0.005945813411793552, + "grad_norm": 2.7315203985667447, + "learning_rate": 3.963227783452503e-06, + "loss": 1.0087, + "step": 194 + }, + { + "epoch": 0.005976461934534756, + "grad_norm": 2.808139639874703, + "learning_rate": 3.983656792645557e-06, + "loss": 1.0234, + "step": 195 + }, + { + "epoch": 0.00600711045727596, + "grad_norm": 2.6792534589039967, + "learning_rate": 4.004085801838612e-06, + "loss": 0.9452, + "step": 196 + }, + { + "epoch": 0.006037758980017163, + "grad_norm": 2.4002560854804824, + "learning_rate": 4.024514811031665e-06, + "loss": 0.9654, + "step": 197 + }, + { + "epoch": 0.006068407502758367, + "grad_norm": 3.154529010802644, + "learning_rate": 4.04494382022472e-06, + "loss": 1.0314, + "step": 198 + }, + { + "epoch": 0.006099056025499571, + "grad_norm": 2.727186194202757, + "learning_rate": 4.065372829417773e-06, + "loss": 1.0726, + "step": 199 + }, + { + "epoch": 0.0061297045482407745, + "grad_norm": 2.976809073966541, + "learning_rate": 4.085801838610828e-06, + "loss": 1.2214, + "step": 200 + }, + { + "epoch": 0.0061603530709819785, + "grad_norm": 2.7930085049955724, + "learning_rate": 4.106230847803882e-06, + "loss": 0.9373, + "step": 201 + }, + { + "epoch": 0.006191001593723182, + "grad_norm": 2.994663251634685, + "learning_rate": 4.126659856996936e-06, + "loss": 1.0593, + "step": 202 + }, + { + "epoch": 0.006221650116464386, + "grad_norm": 1.1025809609324093, + "learning_rate": 4.14708886618999e-06, + "loss": 0.6905, + "step": 203 + }, + { + "epoch": 0.00625229863920559, + "grad_norm": 2.64562766844046, + "learning_rate": 4.1675178753830445e-06, + "loss": 0.8978, + "step": 204 + }, + { + "epoch": 0.006282947161946794, + "grad_norm": 2.5801475988842104, + "learning_rate": 4.187946884576099e-06, + "loss": 0.9635, + "step": 205 + }, + { + "epoch": 0.006313595684687998, + "grad_norm": 2.529225947792483, + "learning_rate": 4.208375893769152e-06, + "loss": 1.0209, + "step": 206 + }, + { + "epoch": 0.006344244207429202, + "grad_norm": 2.2461913051282685, + "learning_rate": 4.228804902962207e-06, + "loss": 0.9682, + "step": 207 + }, + { + "epoch": 0.006374892730170406, + "grad_norm": 0.9594725634569962, + "learning_rate": 4.24923391215526e-06, + "loss": 0.7115, + "step": 208 + }, + { + "epoch": 0.00640554125291161, + "grad_norm": 2.9352905837254957, + "learning_rate": 4.269662921348315e-06, + "loss": 1.0582, + "step": 209 + }, + { + "epoch": 0.006436189775652814, + "grad_norm": 2.9531554194178087, + "learning_rate": 4.290091930541369e-06, + "loss": 1.009, + "step": 210 + }, + { + "epoch": 0.006466838298394018, + "grad_norm": 2.7828493603116513, + "learning_rate": 4.310520939734424e-06, + "loss": 1.011, + "step": 211 + }, + { + "epoch": 0.006497486821135222, + "grad_norm": 2.8555513362371823, + "learning_rate": 4.330949948927477e-06, + "loss": 1.0104, + "step": 212 + }, + { + "epoch": 0.006528135343876425, + "grad_norm": 2.7723408448661413, + "learning_rate": 4.351378958120532e-06, + "loss": 0.9935, + "step": 213 + }, + { + "epoch": 0.006558783866617629, + "grad_norm": 2.7050876458597966, + "learning_rate": 4.371807967313585e-06, + "loss": 1.0704, + "step": 214 + }, + { + "epoch": 0.006589432389358833, + "grad_norm": 2.554821301859198, + "learning_rate": 4.3922369765066396e-06, + "loss": 0.8757, + "step": 215 + }, + { + "epoch": 0.0066200809121000365, + "grad_norm": 2.365694163140566, + "learning_rate": 4.412665985699694e-06, + "loss": 0.9435, + "step": 216 + }, + { + "epoch": 0.0066507294348412405, + "grad_norm": 2.451862001291209, + "learning_rate": 4.433094994892748e-06, + "loss": 1.0492, + "step": 217 + }, + { + "epoch": 0.006681377957582444, + "grad_norm": 2.8084334270168467, + "learning_rate": 4.453524004085803e-06, + "loss": 1.0099, + "step": 218 + }, + { + "epoch": 0.006712026480323648, + "grad_norm": 0.9785088635590857, + "learning_rate": 4.473953013278856e-06, + "loss": 0.655, + "step": 219 + }, + { + "epoch": 0.006742675003064852, + "grad_norm": 0.9611796570859155, + "learning_rate": 4.494382022471911e-06, + "loss": 0.701, + "step": 220 + }, + { + "epoch": 0.006773323525806056, + "grad_norm": 2.7385851772647, + "learning_rate": 4.514811031664964e-06, + "loss": 0.9814, + "step": 221 + }, + { + "epoch": 0.00680397204854726, + "grad_norm": 1.044119909002244, + "learning_rate": 4.535240040858019e-06, + "loss": 0.6992, + "step": 222 + }, + { + "epoch": 0.006834620571288464, + "grad_norm": 3.3779418629288354, + "learning_rate": 4.555669050051073e-06, + "loss": 1.0601, + "step": 223 + }, + { + "epoch": 0.006865269094029668, + "grad_norm": 2.9772607940732296, + "learning_rate": 4.5760980592441276e-06, + "loss": 0.9919, + "step": 224 + }, + { + "epoch": 0.006895917616770872, + "grad_norm": 2.6886162040758226, + "learning_rate": 4.596527068437181e-06, + "loss": 0.9789, + "step": 225 + }, + { + "epoch": 0.006926566139512076, + "grad_norm": 2.2226474660385738, + "learning_rate": 4.6169560776302355e-06, + "loss": 0.8962, + "step": 226 + }, + { + "epoch": 0.00695721466225328, + "grad_norm": 0.9501069342091707, + "learning_rate": 4.637385086823289e-06, + "loss": 0.6897, + "step": 227 + }, + { + "epoch": 0.006987863184994484, + "grad_norm": 3.0257729025367994, + "learning_rate": 4.6578140960163435e-06, + "loss": 0.9606, + "step": 228 + }, + { + "epoch": 0.007018511707735687, + "grad_norm": 2.771422996312676, + "learning_rate": 4.678243105209398e-06, + "loss": 1.079, + "step": 229 + }, + { + "epoch": 0.007049160230476891, + "grad_norm": 2.6932154444765404, + "learning_rate": 4.6986721144024515e-06, + "loss": 1.0123, + "step": 230 + }, + { + "epoch": 0.007079808753218095, + "grad_norm": 0.9368152225967132, + "learning_rate": 4.719101123595506e-06, + "loss": 0.6844, + "step": 231 + }, + { + "epoch": 0.0071104572759592985, + "grad_norm": 3.2758881446302714, + "learning_rate": 4.73953013278856e-06, + "loss": 1.0364, + "step": 232 + }, + { + "epoch": 0.0071411057987005025, + "grad_norm": 2.575836138808672, + "learning_rate": 4.759959141981615e-06, + "loss": 0.8499, + "step": 233 + }, + { + "epoch": 0.007171754321441706, + "grad_norm": 2.564061616035533, + "learning_rate": 4.780388151174668e-06, + "loss": 1.0385, + "step": 234 + }, + { + "epoch": 0.00720240284418291, + "grad_norm": 4.583049885448769, + "learning_rate": 4.800817160367723e-06, + "loss": 0.9532, + "step": 235 + }, + { + "epoch": 0.007233051366924114, + "grad_norm": 2.616405886728473, + "learning_rate": 4.821246169560776e-06, + "loss": 0.9018, + "step": 236 + }, + { + "epoch": 0.007263699889665318, + "grad_norm": 2.585209269903448, + "learning_rate": 4.841675178753831e-06, + "loss": 1.0251, + "step": 237 + }, + { + "epoch": 0.007294348412406522, + "grad_norm": 2.414310642286077, + "learning_rate": 4.862104187946885e-06, + "loss": 0.9793, + "step": 238 + }, + { + "epoch": 0.007324996935147726, + "grad_norm": 2.6708007458659995, + "learning_rate": 4.8825331971399395e-06, + "loss": 1.0036, + "step": 239 + }, + { + "epoch": 0.00735564545788893, + "grad_norm": 3.0377495778178845, + "learning_rate": 4.902962206332994e-06, + "loss": 0.9322, + "step": 240 + }, + { + "epoch": 0.007386293980630134, + "grad_norm": 2.592526164548802, + "learning_rate": 4.9233912155260474e-06, + "loss": 0.9943, + "step": 241 + }, + { + "epoch": 0.007416942503371338, + "grad_norm": 3.013322159115871, + "learning_rate": 4.943820224719101e-06, + "loss": 0.9467, + "step": 242 + }, + { + "epoch": 0.007447591026112542, + "grad_norm": 2.8675441888546707, + "learning_rate": 4.964249233912155e-06, + "loss": 0.9138, + "step": 243 + }, + { + "epoch": 0.007478239548853746, + "grad_norm": 2.44122227248762, + "learning_rate": 4.98467824310521e-06, + "loss": 0.8766, + "step": 244 + }, + { + "epoch": 0.007508888071594949, + "grad_norm": 2.8683058594190234, + "learning_rate": 5.005107252298263e-06, + "loss": 0.9783, + "step": 245 + }, + { + "epoch": 0.007539536594336153, + "grad_norm": 2.903741090380686, + "learning_rate": 5.025536261491319e-06, + "loss": 0.966, + "step": 246 + }, + { + "epoch": 0.007570185117077357, + "grad_norm": 2.920065983484851, + "learning_rate": 5.045965270684372e-06, + "loss": 0.9158, + "step": 247 + }, + { + "epoch": 0.0076008336398185605, + "grad_norm": 2.662475529475115, + "learning_rate": 5.066394279877427e-06, + "loss": 1.0344, + "step": 248 + }, + { + "epoch": 0.0076314821625597645, + "grad_norm": 2.889639035614433, + "learning_rate": 5.08682328907048e-06, + "loss": 0.8831, + "step": 249 + }, + { + "epoch": 0.007662130685300968, + "grad_norm": 2.4537777254415962, + "learning_rate": 5.1072522982635346e-06, + "loss": 0.9381, + "step": 250 + }, + { + "epoch": 0.007692779208042172, + "grad_norm": 2.7777729263860143, + "learning_rate": 5.127681307456589e-06, + "loss": 0.9281, + "step": 251 + }, + { + "epoch": 0.007723427730783376, + "grad_norm": 2.293750159831254, + "learning_rate": 5.1481103166496425e-06, + "loss": 0.9842, + "step": 252 + }, + { + "epoch": 0.00775407625352458, + "grad_norm": 2.4420435543918604, + "learning_rate": 5.168539325842698e-06, + "loss": 1.0062, + "step": 253 + }, + { + "epoch": 0.007784724776265784, + "grad_norm": 2.7665475037420215, + "learning_rate": 5.188968335035751e-06, + "loss": 1.0038, + "step": 254 + }, + { + "epoch": 0.007815373299006988, + "grad_norm": 2.7651199063580014, + "learning_rate": 5.209397344228805e-06, + "loss": 0.9616, + "step": 255 + }, + { + "epoch": 0.007846021821748192, + "grad_norm": 2.793232909469127, + "learning_rate": 5.229826353421859e-06, + "loss": 0.9385, + "step": 256 + }, + { + "epoch": 0.007876670344489396, + "grad_norm": 2.7740552134610947, + "learning_rate": 5.250255362614913e-06, + "loss": 1.0042, + "step": 257 + }, + { + "epoch": 0.0079073188672306, + "grad_norm": 2.572091673408714, + "learning_rate": 5.270684371807968e-06, + "loss": 0.9558, + "step": 258 + }, + { + "epoch": 0.007937967389971804, + "grad_norm": 2.4052444469419596, + "learning_rate": 5.291113381001022e-06, + "loss": 0.9185, + "step": 259 + }, + { + "epoch": 0.007968615912713008, + "grad_norm": 9.907318965919151, + "learning_rate": 5.311542390194075e-06, + "loss": 0.9626, + "step": 260 + }, + { + "epoch": 0.007999264435454212, + "grad_norm": 0.9900833050234104, + "learning_rate": 5.3319713993871305e-06, + "loss": 0.6586, + "step": 261 + }, + { + "epoch": 0.008029912958195416, + "grad_norm": 2.4866181899956157, + "learning_rate": 5.352400408580184e-06, + "loss": 0.9297, + "step": 262 + }, + { + "epoch": 0.00806056148093662, + "grad_norm": 2.457600152493836, + "learning_rate": 5.3728294177732385e-06, + "loss": 1.0298, + "step": 263 + }, + { + "epoch": 0.008091210003677823, + "grad_norm": 0.9989123385940305, + "learning_rate": 5.393258426966292e-06, + "loss": 0.6971, + "step": 264 + }, + { + "epoch": 0.008121858526419027, + "grad_norm": 2.873248224487878, + "learning_rate": 5.413687436159347e-06, + "loss": 0.9608, + "step": 265 + }, + { + "epoch": 0.008152507049160231, + "grad_norm": 2.4712830275529485, + "learning_rate": 5.434116445352401e-06, + "loss": 0.949, + "step": 266 + }, + { + "epoch": 0.008183155571901435, + "grad_norm": 2.428472675174206, + "learning_rate": 5.4545454545454545e-06, + "loss": 1.0313, + "step": 267 + }, + { + "epoch": 0.008213804094642637, + "grad_norm": 0.9204090786478454, + "learning_rate": 5.47497446373851e-06, + "loss": 0.648, + "step": 268 + }, + { + "epoch": 0.008244452617383841, + "grad_norm": 2.854856278999458, + "learning_rate": 5.495403472931563e-06, + "loss": 1.1202, + "step": 269 + }, + { + "epoch": 0.008275101140125045, + "grad_norm": 2.706001724312656, + "learning_rate": 5.515832482124618e-06, + "loss": 0.9969, + "step": 270 + }, + { + "epoch": 0.00830574966286625, + "grad_norm": 0.890237986393971, + "learning_rate": 5.536261491317671e-06, + "loss": 0.6747, + "step": 271 + }, + { + "epoch": 0.008336398185607453, + "grad_norm": 0.9590408672503857, + "learning_rate": 5.556690500510725e-06, + "loss": 0.6941, + "step": 272 + }, + { + "epoch": 0.008367046708348657, + "grad_norm": 2.622587505287746, + "learning_rate": 5.57711950970378e-06, + "loss": 0.9592, + "step": 273 + }, + { + "epoch": 0.008397695231089861, + "grad_norm": 2.6171733425431762, + "learning_rate": 5.597548518896834e-06, + "loss": 0.9422, + "step": 274 + }, + { + "epoch": 0.008428343753831065, + "grad_norm": 2.603303773430295, + "learning_rate": 5.617977528089889e-06, + "loss": 0.9239, + "step": 275 + }, + { + "epoch": 0.008458992276572269, + "grad_norm": 0.9219756562328341, + "learning_rate": 5.6384065372829424e-06, + "loss": 0.6821, + "step": 276 + }, + { + "epoch": 0.008489640799313473, + "grad_norm": 2.7734585699278202, + "learning_rate": 5.658835546475996e-06, + "loss": 0.9716, + "step": 277 + }, + { + "epoch": 0.008520289322054677, + "grad_norm": 2.9012975670343786, + "learning_rate": 5.67926455566905e-06, + "loss": 1.023, + "step": 278 + }, + { + "epoch": 0.00855093784479588, + "grad_norm": 0.9208429675348674, + "learning_rate": 5.699693564862104e-06, + "loss": 0.6857, + "step": 279 + }, + { + "epoch": 0.008581586367537085, + "grad_norm": 2.5503282581464326, + "learning_rate": 5.720122574055159e-06, + "loss": 0.9324, + "step": 280 + }, + { + "epoch": 0.008612234890278288, + "grad_norm": 2.845793986285864, + "learning_rate": 5.740551583248213e-06, + "loss": 0.9589, + "step": 281 + }, + { + "epoch": 0.008642883413019492, + "grad_norm": 2.5774212356506094, + "learning_rate": 5.760980592441267e-06, + "loss": 0.9752, + "step": 282 + }, + { + "epoch": 0.008673531935760696, + "grad_norm": 3.3867953060285294, + "learning_rate": 5.781409601634322e-06, + "loss": 1.0925, + "step": 283 + }, + { + "epoch": 0.0087041804585019, + "grad_norm": 2.250944051295965, + "learning_rate": 5.801838610827375e-06, + "loss": 0.9902, + "step": 284 + }, + { + "epoch": 0.008734828981243104, + "grad_norm": 2.7013504819061906, + "learning_rate": 5.82226762002043e-06, + "loss": 0.9758, + "step": 285 + }, + { + "epoch": 0.008765477503984308, + "grad_norm": 2.6089194167873857, + "learning_rate": 5.842696629213483e-06, + "loss": 0.9593, + "step": 286 + }, + { + "epoch": 0.008796126026725512, + "grad_norm": 2.3762687395008344, + "learning_rate": 5.863125638406538e-06, + "loss": 1.0078, + "step": 287 + }, + { + "epoch": 0.008826774549466716, + "grad_norm": 2.4760990177402697, + "learning_rate": 5.883554647599592e-06, + "loss": 0.9626, + "step": 288 + }, + { + "epoch": 0.00885742307220792, + "grad_norm": 3.0462055247879056, + "learning_rate": 5.9039836567926455e-06, + "loss": 0.9145, + "step": 289 + }, + { + "epoch": 0.008888071594949124, + "grad_norm": 2.8228920649334497, + "learning_rate": 5.924412665985701e-06, + "loss": 0.9642, + "step": 290 + }, + { + "epoch": 0.008918720117690328, + "grad_norm": 2.6351624537395257, + "learning_rate": 5.944841675178754e-06, + "loss": 0.8952, + "step": 291 + }, + { + "epoch": 0.008949368640431532, + "grad_norm": 2.5806948155639384, + "learning_rate": 5.965270684371809e-06, + "loss": 0.8203, + "step": 292 + }, + { + "epoch": 0.008980017163172736, + "grad_norm": 0.95046861950925, + "learning_rate": 5.985699693564862e-06, + "loss": 0.6629, + "step": 293 + }, + { + "epoch": 0.00901066568591394, + "grad_norm": 2.6758542609966476, + "learning_rate": 6.006128702757916e-06, + "loss": 0.9635, + "step": 294 + }, + { + "epoch": 0.009041314208655143, + "grad_norm": 2.5562518030289634, + "learning_rate": 6.026557711950971e-06, + "loss": 0.9888, + "step": 295 + }, + { + "epoch": 0.009071962731396347, + "grad_norm": 2.203675006901022, + "learning_rate": 6.046986721144025e-06, + "loss": 0.9371, + "step": 296 + }, + { + "epoch": 0.009102611254137551, + "grad_norm": 0.9671812004324499, + "learning_rate": 6.06741573033708e-06, + "loss": 0.6728, + "step": 297 + }, + { + "epoch": 0.009133259776878755, + "grad_norm": 0.8812442518665757, + "learning_rate": 6.0878447395301335e-06, + "loss": 0.6454, + "step": 298 + }, + { + "epoch": 0.00916390829961996, + "grad_norm": 2.601328924514981, + "learning_rate": 6.108273748723187e-06, + "loss": 0.9477, + "step": 299 + }, + { + "epoch": 0.009194556822361161, + "grad_norm": 2.583233848983035, + "learning_rate": 6.1287027579162415e-06, + "loss": 0.8375, + "step": 300 + }, + { + "epoch": 0.009225205345102365, + "grad_norm": 2.5351869886481073, + "learning_rate": 6.149131767109295e-06, + "loss": 0.9335, + "step": 301 + }, + { + "epoch": 0.00925585386784357, + "grad_norm": 2.386695109703272, + "learning_rate": 6.16956077630235e-06, + "loss": 0.9334, + "step": 302 + }, + { + "epoch": 0.009286502390584773, + "grad_norm": 0.8826130833880514, + "learning_rate": 6.189989785495404e-06, + "loss": 0.6658, + "step": 303 + }, + { + "epoch": 0.009317150913325977, + "grad_norm": 2.7620465831270127, + "learning_rate": 6.210418794688458e-06, + "loss": 0.912, + "step": 304 + }, + { + "epoch": 0.009347799436067181, + "grad_norm": 2.6230876661737814, + "learning_rate": 6.230847803881513e-06, + "loss": 0.9622, + "step": 305 + }, + { + "epoch": 0.009378447958808385, + "grad_norm": 2.5076323155464015, + "learning_rate": 6.251276813074566e-06, + "loss": 0.8238, + "step": 306 + }, + { + "epoch": 0.009409096481549589, + "grad_norm": 2.6117219731450656, + "learning_rate": 6.271705822267621e-06, + "loss": 0.9695, + "step": 307 + }, + { + "epoch": 0.009439745004290793, + "grad_norm": 2.407022058187931, + "learning_rate": 6.292134831460674e-06, + "loss": 0.9739, + "step": 308 + }, + { + "epoch": 0.009470393527031997, + "grad_norm": 2.935957559963329, + "learning_rate": 6.3125638406537295e-06, + "loss": 1.0833, + "step": 309 + }, + { + "epoch": 0.0095010420497732, + "grad_norm": 0.8949590910967985, + "learning_rate": 6.332992849846783e-06, + "loss": 0.6566, + "step": 310 + }, + { + "epoch": 0.009531690572514405, + "grad_norm": 2.8814955620933485, + "learning_rate": 6.353421859039837e-06, + "loss": 0.9364, + "step": 311 + }, + { + "epoch": 0.009562339095255609, + "grad_norm": 2.8466325048406746, + "learning_rate": 6.373850868232892e-06, + "loss": 0.8966, + "step": 312 + }, + { + "epoch": 0.009592987617996812, + "grad_norm": 2.7370304314309046, + "learning_rate": 6.3942798774259454e-06, + "loss": 1.0726, + "step": 313 + }, + { + "epoch": 0.009623636140738016, + "grad_norm": 0.9452134359598455, + "learning_rate": 6.414708886619e-06, + "loss": 0.6784, + "step": 314 + }, + { + "epoch": 0.00965428466347922, + "grad_norm": 2.553628472558678, + "learning_rate": 6.435137895812053e-06, + "loss": 0.994, + "step": 315 + }, + { + "epoch": 0.009684933186220424, + "grad_norm": 2.704179750705698, + "learning_rate": 6.455566905005107e-06, + "loss": 0.8371, + "step": 316 + }, + { + "epoch": 0.009715581708961628, + "grad_norm": 2.7758669330328596, + "learning_rate": 6.475995914198162e-06, + "loss": 1.1296, + "step": 317 + }, + { + "epoch": 0.009746230231702832, + "grad_norm": 2.935989552334638, + "learning_rate": 6.496424923391216e-06, + "loss": 0.9797, + "step": 318 + }, + { + "epoch": 0.009776878754444036, + "grad_norm": 2.518422145608009, + "learning_rate": 6.51685393258427e-06, + "loss": 0.8674, + "step": 319 + }, + { + "epoch": 0.00980752727718524, + "grad_norm": 2.4814238994158937, + "learning_rate": 6.537282941777325e-06, + "loss": 0.8347, + "step": 320 + }, + { + "epoch": 0.009838175799926444, + "grad_norm": 2.9103175947143023, + "learning_rate": 6.557711950970378e-06, + "loss": 1.0032, + "step": 321 + }, + { + "epoch": 0.009868824322667648, + "grad_norm": 2.5782504039664107, + "learning_rate": 6.5781409601634326e-06, + "loss": 0.9307, + "step": 322 + }, + { + "epoch": 0.009899472845408852, + "grad_norm": 2.6048953779020056, + "learning_rate": 6.598569969356486e-06, + "loss": 0.9462, + "step": 323 + }, + { + "epoch": 0.009930121368150056, + "grad_norm": 2.2691107252543605, + "learning_rate": 6.618998978549541e-06, + "loss": 0.8167, + "step": 324 + }, + { + "epoch": 0.00996076989089126, + "grad_norm": 2.2750955490940243, + "learning_rate": 6.639427987742595e-06, + "loss": 0.9706, + "step": 325 + }, + { + "epoch": 0.009991418413632464, + "grad_norm": 2.7535165001525157, + "learning_rate": 6.659856996935649e-06, + "loss": 0.9397, + "step": 326 + }, + { + "epoch": 0.010022066936373667, + "grad_norm": 2.6741949699774596, + "learning_rate": 6.680286006128704e-06, + "loss": 0.9763, + "step": 327 + }, + { + "epoch": 0.010052715459114871, + "grad_norm": 3.218318574451944, + "learning_rate": 6.700715015321757e-06, + "loss": 0.9766, + "step": 328 + }, + { + "epoch": 0.010083363981856075, + "grad_norm": 2.5371356208160245, + "learning_rate": 6.721144024514812e-06, + "loss": 0.8429, + "step": 329 + }, + { + "epoch": 0.01011401250459728, + "grad_norm": 2.340373876864956, + "learning_rate": 6.741573033707865e-06, + "loss": 0.902, + "step": 330 + }, + { + "epoch": 0.010144661027338481, + "grad_norm": 2.570654602832883, + "learning_rate": 6.7620020429009206e-06, + "loss": 0.9225, + "step": 331 + }, + { + "epoch": 0.010175309550079685, + "grad_norm": 2.8906550238459756, + "learning_rate": 6.782431052093974e-06, + "loss": 0.9928, + "step": 332 + }, + { + "epoch": 0.01020595807282089, + "grad_norm": 2.3349565633238014, + "learning_rate": 6.802860061287028e-06, + "loss": 0.8969, + "step": 333 + }, + { + "epoch": 0.010236606595562093, + "grad_norm": 2.7049693413003015, + "learning_rate": 6.823289070480082e-06, + "loss": 0.9972, + "step": 334 + }, + { + "epoch": 0.010267255118303297, + "grad_norm": 2.450837830192933, + "learning_rate": 6.8437180796731365e-06, + "loss": 0.885, + "step": 335 + }, + { + "epoch": 0.010297903641044501, + "grad_norm": 2.7202051705265946, + "learning_rate": 6.864147088866191e-06, + "loss": 0.8777, + "step": 336 + }, + { + "epoch": 0.010328552163785705, + "grad_norm": 2.688965209325227, + "learning_rate": 6.8845760980592445e-06, + "loss": 0.9932, + "step": 337 + }, + { + "epoch": 0.010359200686526909, + "grad_norm": 2.9319568859941634, + "learning_rate": 6.905005107252298e-06, + "loss": 0.8436, + "step": 338 + }, + { + "epoch": 0.010389849209268113, + "grad_norm": 3.052622159432128, + "learning_rate": 6.925434116445353e-06, + "loss": 0.893, + "step": 339 + }, + { + "epoch": 0.010420497732009317, + "grad_norm": 2.4701672821090757, + "learning_rate": 6.945863125638407e-06, + "loss": 0.9673, + "step": 340 + }, + { + "epoch": 0.01045114625475052, + "grad_norm": 2.7929280463211756, + "learning_rate": 6.966292134831461e-06, + "loss": 0.9732, + "step": 341 + }, + { + "epoch": 0.010481794777491725, + "grad_norm": 2.82319938837507, + "learning_rate": 6.986721144024516e-06, + "loss": 0.9114, + "step": 342 + }, + { + "epoch": 0.010512443300232929, + "grad_norm": 3.0042769655448143, + "learning_rate": 7.007150153217569e-06, + "loss": 1.0067, + "step": 343 + }, + { + "epoch": 0.010543091822974133, + "grad_norm": 2.6539157944190834, + "learning_rate": 7.027579162410624e-06, + "loss": 0.9576, + "step": 344 + }, + { + "epoch": 0.010573740345715336, + "grad_norm": 2.4437483991651128, + "learning_rate": 7.048008171603677e-06, + "loss": 1.0526, + "step": 345 + }, + { + "epoch": 0.01060438886845654, + "grad_norm": 0.9083369865883485, + "learning_rate": 7.0684371807967325e-06, + "loss": 0.6823, + "step": 346 + }, + { + "epoch": 0.010635037391197744, + "grad_norm": 2.424874065991257, + "learning_rate": 7.088866189989786e-06, + "loss": 0.8982, + "step": 347 + }, + { + "epoch": 0.010665685913938948, + "grad_norm": 3.3403666150636355, + "learning_rate": 7.1092951991828404e-06, + "loss": 0.9684, + "step": 348 + }, + { + "epoch": 0.010696334436680152, + "grad_norm": 2.6039513491327204, + "learning_rate": 7.129724208375894e-06, + "loss": 0.9663, + "step": 349 + }, + { + "epoch": 0.010726982959421356, + "grad_norm": 2.6135094881167693, + "learning_rate": 7.150153217568948e-06, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 0.01075763148216256, + "grad_norm": 2.3549186283981602, + "learning_rate": 7.170582226762003e-06, + "loss": 0.8617, + "step": 351 + }, + { + "epoch": 0.010788280004903764, + "grad_norm": 2.790903385571358, + "learning_rate": 7.191011235955056e-06, + "loss": 0.9545, + "step": 352 + }, + { + "epoch": 0.010818928527644968, + "grad_norm": 2.503572399648045, + "learning_rate": 7.211440245148112e-06, + "loss": 0.9476, + "step": 353 + }, + { + "epoch": 0.010849577050386172, + "grad_norm": 2.773039029989183, + "learning_rate": 7.231869254341165e-06, + "loss": 1.042, + "step": 354 + }, + { + "epoch": 0.010880225573127376, + "grad_norm": 2.4748389485073146, + "learning_rate": 7.252298263534219e-06, + "loss": 0.9631, + "step": 355 + }, + { + "epoch": 0.01091087409586858, + "grad_norm": 2.4779423028684135, + "learning_rate": 7.272727272727273e-06, + "loss": 0.9177, + "step": 356 + }, + { + "epoch": 0.010941522618609784, + "grad_norm": 2.441972768503756, + "learning_rate": 7.293156281920328e-06, + "loss": 0.9958, + "step": 357 + }, + { + "epoch": 0.010972171141350988, + "grad_norm": 2.7325103827877757, + "learning_rate": 7.313585291113382e-06, + "loss": 1.0153, + "step": 358 + }, + { + "epoch": 0.011002819664092191, + "grad_norm": 2.305937296751103, + "learning_rate": 7.3340143003064355e-06, + "loss": 0.8936, + "step": 359 + }, + { + "epoch": 0.011033468186833395, + "grad_norm": 2.4650238933570137, + "learning_rate": 7.354443309499489e-06, + "loss": 0.9684, + "step": 360 + }, + { + "epoch": 0.0110641167095746, + "grad_norm": 2.730309027686112, + "learning_rate": 7.374872318692544e-06, + "loss": 0.9034, + "step": 361 + }, + { + "epoch": 0.011094765232315803, + "grad_norm": 2.6065886961277913, + "learning_rate": 7.395301327885598e-06, + "loss": 0.9511, + "step": 362 + }, + { + "epoch": 0.011125413755057005, + "grad_norm": 0.8921857102451739, + "learning_rate": 7.415730337078652e-06, + "loss": 0.655, + "step": 363 + }, + { + "epoch": 0.01115606227779821, + "grad_norm": 2.5867548192029988, + "learning_rate": 7.436159346271706e-06, + "loss": 0.9303, + "step": 364 + }, + { + "epoch": 0.011186710800539413, + "grad_norm": 2.5233038007658766, + "learning_rate": 7.456588355464761e-06, + "loss": 0.8817, + "step": 365 + }, + { + "epoch": 0.011217359323280617, + "grad_norm": 0.8997909867333483, + "learning_rate": 7.477017364657815e-06, + "loss": 0.6591, + "step": 366 + }, + { + "epoch": 0.011248007846021821, + "grad_norm": 2.409465910270309, + "learning_rate": 7.497446373850868e-06, + "loss": 0.9209, + "step": 367 + }, + { + "epoch": 0.011278656368763025, + "grad_norm": 0.905632549447766, + "learning_rate": 7.5178753830439235e-06, + "loss": 0.6733, + "step": 368 + }, + { + "epoch": 0.011309304891504229, + "grad_norm": 2.6991095677106944, + "learning_rate": 7.538304392236977e-06, + "loss": 1.0115, + "step": 369 + }, + { + "epoch": 0.011339953414245433, + "grad_norm": 2.7618298686472174, + "learning_rate": 7.5587334014300315e-06, + "loss": 0.864, + "step": 370 + }, + { + "epoch": 0.011370601936986637, + "grad_norm": 0.9083941490153253, + "learning_rate": 7.579162410623085e-06, + "loss": 0.6872, + "step": 371 + }, + { + "epoch": 0.01140125045972784, + "grad_norm": 2.278728187960731, + "learning_rate": 7.5995914198161395e-06, + "loss": 0.9141, + "step": 372 + }, + { + "epoch": 0.011431898982469045, + "grad_norm": 2.2427178878477894, + "learning_rate": 7.620020429009194e-06, + "loss": 0.9027, + "step": 373 + }, + { + "epoch": 0.011462547505210249, + "grad_norm": 2.452009605618793, + "learning_rate": 7.640449438202247e-06, + "loss": 0.9233, + "step": 374 + }, + { + "epoch": 0.011493196027951453, + "grad_norm": 2.566153713125024, + "learning_rate": 7.660878447395303e-06, + "loss": 0.9125, + "step": 375 + }, + { + "epoch": 0.011523844550692657, + "grad_norm": 0.9012053988911358, + "learning_rate": 7.681307456588356e-06, + "loss": 0.6375, + "step": 376 + }, + { + "epoch": 0.01155449307343386, + "grad_norm": 2.462177845391429, + "learning_rate": 7.70173646578141e-06, + "loss": 0.885, + "step": 377 + }, + { + "epoch": 0.011585141596175064, + "grad_norm": 0.9144605337968716, + "learning_rate": 7.722165474974465e-06, + "loss": 0.625, + "step": 378 + }, + { + "epoch": 0.011615790118916268, + "grad_norm": 0.9072658569232978, + "learning_rate": 7.742594484167519e-06, + "loss": 0.6751, + "step": 379 + }, + { + "epoch": 0.011646438641657472, + "grad_norm": 2.7039224572697558, + "learning_rate": 7.763023493360572e-06, + "loss": 0.879, + "step": 380 + }, + { + "epoch": 0.011677087164398676, + "grad_norm": 2.4736618999823095, + "learning_rate": 7.783452502553627e-06, + "loss": 0.9956, + "step": 381 + }, + { + "epoch": 0.01170773568713988, + "grad_norm": 2.4675601348774636, + "learning_rate": 7.803881511746681e-06, + "loss": 0.9052, + "step": 382 + }, + { + "epoch": 0.011738384209881084, + "grad_norm": 2.674725335216739, + "learning_rate": 7.824310520939735e-06, + "loss": 1.0051, + "step": 383 + }, + { + "epoch": 0.011769032732622288, + "grad_norm": 2.460398445502392, + "learning_rate": 7.844739530132788e-06, + "loss": 0.9376, + "step": 384 + }, + { + "epoch": 0.011799681255363492, + "grad_norm": 2.6580698396472426, + "learning_rate": 7.865168539325843e-06, + "loss": 0.9804, + "step": 385 + }, + { + "epoch": 0.011830329778104696, + "grad_norm": 2.6009746397551714, + "learning_rate": 7.885597548518897e-06, + "loss": 0.9407, + "step": 386 + }, + { + "epoch": 0.0118609783008459, + "grad_norm": 2.3513499709928896, + "learning_rate": 7.906026557711952e-06, + "loss": 0.9451, + "step": 387 + }, + { + "epoch": 0.011891626823587104, + "grad_norm": 2.5018659995101373, + "learning_rate": 7.926455566905006e-06, + "loss": 0.9639, + "step": 388 + }, + { + "epoch": 0.011922275346328308, + "grad_norm": 2.3190006724083263, + "learning_rate": 7.94688457609806e-06, + "loss": 0.9734, + "step": 389 + }, + { + "epoch": 0.011952923869069512, + "grad_norm": 2.3900218503791324, + "learning_rate": 7.967313585291115e-06, + "loss": 1.0052, + "step": 390 + }, + { + "epoch": 0.011983572391810715, + "grad_norm": 2.3821027897120737, + "learning_rate": 7.987742594484168e-06, + "loss": 0.894, + "step": 391 + }, + { + "epoch": 0.01201422091455192, + "grad_norm": 2.668904975335589, + "learning_rate": 8.008171603677223e-06, + "loss": 0.8659, + "step": 392 + }, + { + "epoch": 0.012044869437293123, + "grad_norm": 2.66695129128256, + "learning_rate": 8.028600612870277e-06, + "loss": 0.8381, + "step": 393 + }, + { + "epoch": 0.012075517960034325, + "grad_norm": 2.6476212882276844, + "learning_rate": 8.04902962206333e-06, + "loss": 0.8787, + "step": 394 + }, + { + "epoch": 0.01210616648277553, + "grad_norm": 2.5299720081805495, + "learning_rate": 8.069458631256384e-06, + "loss": 0.9933, + "step": 395 + }, + { + "epoch": 0.012136815005516733, + "grad_norm": 2.3630778147934595, + "learning_rate": 8.08988764044944e-06, + "loss": 0.9657, + "step": 396 + }, + { + "epoch": 0.012167463528257937, + "grad_norm": 2.63028981255221, + "learning_rate": 8.110316649642493e-06, + "loss": 1.0, + "step": 397 + }, + { + "epoch": 0.012198112050999141, + "grad_norm": 0.9646772963858871, + "learning_rate": 8.130745658835547e-06, + "loss": 0.6681, + "step": 398 + }, + { + "epoch": 0.012228760573740345, + "grad_norm": 2.8496667714269863, + "learning_rate": 8.1511746680286e-06, + "loss": 1.0012, + "step": 399 + }, + { + "epoch": 0.012259409096481549, + "grad_norm": 0.9111075925465106, + "learning_rate": 8.171603677221655e-06, + "loss": 0.6622, + "step": 400 + }, + { + "epoch": 0.012290057619222753, + "grad_norm": 2.6541417449034856, + "learning_rate": 8.192032686414709e-06, + "loss": 0.9307, + "step": 401 + }, + { + "epoch": 0.012320706141963957, + "grad_norm": 0.9302893531298074, + "learning_rate": 8.212461695607764e-06, + "loss": 0.6423, + "step": 402 + }, + { + "epoch": 0.01235135466470516, + "grad_norm": 2.751750047174816, + "learning_rate": 8.232890704800818e-06, + "loss": 0.9258, + "step": 403 + }, + { + "epoch": 0.012382003187446365, + "grad_norm": 2.7001608358949243, + "learning_rate": 8.253319713993871e-06, + "loss": 1.0407, + "step": 404 + }, + { + "epoch": 0.012412651710187569, + "grad_norm": 2.2857708703942166, + "learning_rate": 8.273748723186927e-06, + "loss": 0.95, + "step": 405 + }, + { + "epoch": 0.012443300232928773, + "grad_norm": 2.817482846659702, + "learning_rate": 8.29417773237998e-06, + "loss": 1.0186, + "step": 406 + }, + { + "epoch": 0.012473948755669977, + "grad_norm": 2.362544835476629, + "learning_rate": 8.314606741573035e-06, + "loss": 0.9629, + "step": 407 + }, + { + "epoch": 0.01250459727841118, + "grad_norm": 2.6745770412239636, + "learning_rate": 8.335035750766089e-06, + "loss": 0.8947, + "step": 408 + }, + { + "epoch": 0.012535245801152384, + "grad_norm": 2.4451556565170183, + "learning_rate": 8.355464759959142e-06, + "loss": 0.9727, + "step": 409 + }, + { + "epoch": 0.012565894323893588, + "grad_norm": 2.354592244289903, + "learning_rate": 8.375893769152198e-06, + "loss": 0.9745, + "step": 410 + }, + { + "epoch": 0.012596542846634792, + "grad_norm": 2.775879493171172, + "learning_rate": 8.396322778345251e-06, + "loss": 0.9563, + "step": 411 + }, + { + "epoch": 0.012627191369375996, + "grad_norm": 2.438351579257735, + "learning_rate": 8.416751787538305e-06, + "loss": 0.9087, + "step": 412 + }, + { + "epoch": 0.0126578398921172, + "grad_norm": 2.4446526413621545, + "learning_rate": 8.437180796731358e-06, + "loss": 0.8752, + "step": 413 + }, + { + "epoch": 0.012688488414858404, + "grad_norm": 2.665402142187261, + "learning_rate": 8.457609805924414e-06, + "loss": 0.9633, + "step": 414 + }, + { + "epoch": 0.012719136937599608, + "grad_norm": 3.18732407930314, + "learning_rate": 8.478038815117467e-06, + "loss": 0.8923, + "step": 415 + }, + { + "epoch": 0.012749785460340812, + "grad_norm": 2.5505640749916347, + "learning_rate": 8.49846782431052e-06, + "loss": 0.8882, + "step": 416 + }, + { + "epoch": 0.012780433983082016, + "grad_norm": 1.0497625678049896, + "learning_rate": 8.518896833503576e-06, + "loss": 0.6454, + "step": 417 + }, + { + "epoch": 0.01281108250582322, + "grad_norm": 1.0057457164553905, + "learning_rate": 8.53932584269663e-06, + "loss": 0.636, + "step": 418 + }, + { + "epoch": 0.012841731028564424, + "grad_norm": 2.839269367933278, + "learning_rate": 8.559754851889685e-06, + "loss": 0.9789, + "step": 419 + }, + { + "epoch": 0.012872379551305628, + "grad_norm": 2.500907553873001, + "learning_rate": 8.580183861082738e-06, + "loss": 0.9618, + "step": 420 + }, + { + "epoch": 0.012903028074046832, + "grad_norm": 2.701179610770963, + "learning_rate": 8.600612870275792e-06, + "loss": 0.7812, + "step": 421 + }, + { + "epoch": 0.012933676596788036, + "grad_norm": 2.5914017984909075, + "learning_rate": 8.621041879468847e-06, + "loss": 0.9984, + "step": 422 + }, + { + "epoch": 0.01296432511952924, + "grad_norm": 2.647306289273282, + "learning_rate": 8.6414708886619e-06, + "loss": 1.0101, + "step": 423 + }, + { + "epoch": 0.012994973642270443, + "grad_norm": 1.0116222719899568, + "learning_rate": 8.661899897854954e-06, + "loss": 0.6986, + "step": 424 + }, + { + "epoch": 0.013025622165011647, + "grad_norm": 2.3357346600655937, + "learning_rate": 8.68232890704801e-06, + "loss": 0.8321, + "step": 425 + }, + { + "epoch": 0.01305627068775285, + "grad_norm": 2.6126936733822803, + "learning_rate": 8.702757916241063e-06, + "loss": 0.9604, + "step": 426 + }, + { + "epoch": 0.013086919210494053, + "grad_norm": 2.5418287536318194, + "learning_rate": 8.723186925434117e-06, + "loss": 0.8947, + "step": 427 + }, + { + "epoch": 0.013117567733235257, + "grad_norm": 2.80581073150837, + "learning_rate": 8.74361593462717e-06, + "loss": 0.8808, + "step": 428 + }, + { + "epoch": 0.013148216255976461, + "grad_norm": 2.8836623767779823, + "learning_rate": 8.764044943820226e-06, + "loss": 1.0078, + "step": 429 + }, + { + "epoch": 0.013178864778717665, + "grad_norm": 2.821844971829169, + "learning_rate": 8.784473953013279e-06, + "loss": 0.8602, + "step": 430 + }, + { + "epoch": 0.01320951330145887, + "grad_norm": 2.481348907326151, + "learning_rate": 8.804902962206334e-06, + "loss": 0.998, + "step": 431 + }, + { + "epoch": 0.013240161824200073, + "grad_norm": 2.565006417500468, + "learning_rate": 8.825331971399388e-06, + "loss": 0.8614, + "step": 432 + }, + { + "epoch": 0.013270810346941277, + "grad_norm": 2.649039075174963, + "learning_rate": 8.845760980592442e-06, + "loss": 0.8804, + "step": 433 + }, + { + "epoch": 0.013301458869682481, + "grad_norm": 3.0637634220833614, + "learning_rate": 8.866189989785497e-06, + "loss": 0.8539, + "step": 434 + }, + { + "epoch": 0.013332107392423685, + "grad_norm": 2.4498711388308485, + "learning_rate": 8.88661899897855e-06, + "loss": 0.9981, + "step": 435 + }, + { + "epoch": 0.013362755915164889, + "grad_norm": 2.919892231617152, + "learning_rate": 8.907048008171606e-06, + "loss": 0.9101, + "step": 436 + }, + { + "epoch": 0.013393404437906093, + "grad_norm": 2.5096666038332502, + "learning_rate": 8.927477017364659e-06, + "loss": 0.9923, + "step": 437 + }, + { + "epoch": 0.013424052960647297, + "grad_norm": 2.5877162424549067, + "learning_rate": 8.947906026557713e-06, + "loss": 0.9826, + "step": 438 + }, + { + "epoch": 0.0134547014833885, + "grad_norm": 2.8393774123272206, + "learning_rate": 8.968335035750766e-06, + "loss": 1.0079, + "step": 439 + }, + { + "epoch": 0.013485350006129704, + "grad_norm": 2.631700502879085, + "learning_rate": 8.988764044943822e-06, + "loss": 0.8404, + "step": 440 + }, + { + "epoch": 0.013515998528870908, + "grad_norm": 2.5201381511924588, + "learning_rate": 9.009193054136875e-06, + "loss": 0.9295, + "step": 441 + }, + { + "epoch": 0.013546647051612112, + "grad_norm": 2.6145405013604917, + "learning_rate": 9.029622063329929e-06, + "loss": 0.9762, + "step": 442 + }, + { + "epoch": 0.013577295574353316, + "grad_norm": 2.4165900054249776, + "learning_rate": 9.050051072522982e-06, + "loss": 0.8968, + "step": 443 + }, + { + "epoch": 0.01360794409709452, + "grad_norm": 2.7122279028197074, + "learning_rate": 9.070480081716037e-06, + "loss": 0.8448, + "step": 444 + }, + { + "epoch": 0.013638592619835724, + "grad_norm": 2.487873925498577, + "learning_rate": 9.090909090909091e-06, + "loss": 0.8534, + "step": 445 + }, + { + "epoch": 0.013669241142576928, + "grad_norm": 2.5209311217115706, + "learning_rate": 9.111338100102146e-06, + "loss": 0.9852, + "step": 446 + }, + { + "epoch": 0.013699889665318132, + "grad_norm": 2.473469130876919, + "learning_rate": 9.1317671092952e-06, + "loss": 0.9845, + "step": 447 + }, + { + "epoch": 0.013730538188059336, + "grad_norm": 2.355664856969988, + "learning_rate": 9.152196118488255e-06, + "loss": 0.9268, + "step": 448 + }, + { + "epoch": 0.01376118671080054, + "grad_norm": 2.6763719799300585, + "learning_rate": 9.172625127681309e-06, + "loss": 0.8985, + "step": 449 + }, + { + "epoch": 0.013791835233541744, + "grad_norm": 2.490515967454956, + "learning_rate": 9.193054136874362e-06, + "loss": 0.8784, + "step": 450 + }, + { + "epoch": 0.013822483756282948, + "grad_norm": 1.0623301849334859, + "learning_rate": 9.213483146067417e-06, + "loss": 0.6571, + "step": 451 + }, + { + "epoch": 0.013853132279024152, + "grad_norm": 2.504678428208797, + "learning_rate": 9.233912155260471e-06, + "loss": 0.9843, + "step": 452 + }, + { + "epoch": 0.013883780801765356, + "grad_norm": 0.9335532874477227, + "learning_rate": 9.254341164453525e-06, + "loss": 0.6718, + "step": 453 + }, + { + "epoch": 0.01391442932450656, + "grad_norm": 2.7067867188592545, + "learning_rate": 9.274770173646578e-06, + "loss": 0.9809, + "step": 454 + }, + { + "epoch": 0.013945077847247763, + "grad_norm": 2.9533471355146315, + "learning_rate": 9.295199182839633e-06, + "loss": 0.8833, + "step": 455 + }, + { + "epoch": 0.013975726369988967, + "grad_norm": 2.776235371588843, + "learning_rate": 9.315628192032687e-06, + "loss": 0.8572, + "step": 456 + }, + { + "epoch": 0.01400637489273017, + "grad_norm": 2.6774192615560293, + "learning_rate": 9.33605720122574e-06, + "loss": 0.8252, + "step": 457 + }, + { + "epoch": 0.014037023415471373, + "grad_norm": 2.6961369768149503, + "learning_rate": 9.356486210418796e-06, + "loss": 1.0021, + "step": 458 + }, + { + "epoch": 0.014067671938212577, + "grad_norm": 2.6011810119366627, + "learning_rate": 9.37691521961185e-06, + "loss": 0.9013, + "step": 459 + }, + { + "epoch": 0.014098320460953781, + "grad_norm": 2.5778294439898164, + "learning_rate": 9.397344228804903e-06, + "loss": 0.8628, + "step": 460 + }, + { + "epoch": 0.014128968983694985, + "grad_norm": 1.127438987934581, + "learning_rate": 9.417773237997958e-06, + "loss": 0.6632, + "step": 461 + }, + { + "epoch": 0.01415961750643619, + "grad_norm": 2.5832140677843625, + "learning_rate": 9.438202247191012e-06, + "loss": 0.8804, + "step": 462 + }, + { + "epoch": 0.014190266029177393, + "grad_norm": 1.010149353987427, + "learning_rate": 9.458631256384067e-06, + "loss": 0.6551, + "step": 463 + }, + { + "epoch": 0.014220914551918597, + "grad_norm": 2.365090668706274, + "learning_rate": 9.47906026557712e-06, + "loss": 0.9694, + "step": 464 + }, + { + "epoch": 0.014251563074659801, + "grad_norm": 2.7683640931632483, + "learning_rate": 9.499489274770174e-06, + "loss": 0.9021, + "step": 465 + }, + { + "epoch": 0.014282211597401005, + "grad_norm": 2.4233426240101523, + "learning_rate": 9.51991828396323e-06, + "loss": 1.015, + "step": 466 + }, + { + "epoch": 0.014312860120142209, + "grad_norm": 2.5566735789991446, + "learning_rate": 9.540347293156283e-06, + "loss": 0.7997, + "step": 467 + }, + { + "epoch": 0.014343508642883413, + "grad_norm": 2.2991490004139368, + "learning_rate": 9.560776302349337e-06, + "loss": 0.8949, + "step": 468 + }, + { + "epoch": 0.014374157165624617, + "grad_norm": 3.0066299842585256, + "learning_rate": 9.58120531154239e-06, + "loss": 1.0088, + "step": 469 + }, + { + "epoch": 0.01440480568836582, + "grad_norm": 2.6822173305358916, + "learning_rate": 9.601634320735445e-06, + "loss": 1.0069, + "step": 470 + }, + { + "epoch": 0.014435454211107025, + "grad_norm": 2.3440519905022543, + "learning_rate": 9.622063329928499e-06, + "loss": 0.9744, + "step": 471 + }, + { + "epoch": 0.014466102733848228, + "grad_norm": 2.5971034836648634, + "learning_rate": 9.642492339121552e-06, + "loss": 0.9148, + "step": 472 + }, + { + "epoch": 0.014496751256589432, + "grad_norm": 3.020310614565524, + "learning_rate": 9.662921348314608e-06, + "loss": 0.957, + "step": 473 + }, + { + "epoch": 0.014527399779330636, + "grad_norm": 2.5083922581110327, + "learning_rate": 9.683350357507661e-06, + "loss": 0.9478, + "step": 474 + }, + { + "epoch": 0.01455804830207184, + "grad_norm": 2.7416622859518474, + "learning_rate": 9.703779366700717e-06, + "loss": 0.8711, + "step": 475 + }, + { + "epoch": 0.014588696824813044, + "grad_norm": 2.1805052971316154, + "learning_rate": 9.72420837589377e-06, + "loss": 0.8491, + "step": 476 + }, + { + "epoch": 0.014619345347554248, + "grad_norm": 2.6904558862832437, + "learning_rate": 9.744637385086824e-06, + "loss": 0.8944, + "step": 477 + }, + { + "epoch": 0.014649993870295452, + "grad_norm": 2.5780979539787743, + "learning_rate": 9.765066394279879e-06, + "loss": 0.9779, + "step": 478 + }, + { + "epoch": 0.014680642393036656, + "grad_norm": 2.735719199311913, + "learning_rate": 9.785495403472932e-06, + "loss": 1.0299, + "step": 479 + }, + { + "epoch": 0.01471129091577786, + "grad_norm": 2.4348212317447855, + "learning_rate": 9.805924412665988e-06, + "loss": 0.9135, + "step": 480 + }, + { + "epoch": 0.014741939438519064, + "grad_norm": 1.2369020581196317, + "learning_rate": 9.826353421859041e-06, + "loss": 0.6498, + "step": 481 + }, + { + "epoch": 0.014772587961260268, + "grad_norm": 2.560875083432557, + "learning_rate": 9.846782431052095e-06, + "loss": 1.0222, + "step": 482 + }, + { + "epoch": 0.014803236484001472, + "grad_norm": 2.7231034268247374, + "learning_rate": 9.867211440245148e-06, + "loss": 0.9616, + "step": 483 + }, + { + "epoch": 0.014833885006742676, + "grad_norm": 2.505323554505598, + "learning_rate": 9.887640449438202e-06, + "loss": 0.9411, + "step": 484 + }, + { + "epoch": 0.01486453352948388, + "grad_norm": 2.6415569047212895, + "learning_rate": 9.908069458631257e-06, + "loss": 1.0325, + "step": 485 + }, + { + "epoch": 0.014895182052225083, + "grad_norm": 0.9020359306085192, + "learning_rate": 9.92849846782431e-06, + "loss": 0.6388, + "step": 486 + }, + { + "epoch": 0.014925830574966287, + "grad_norm": 2.314402213073592, + "learning_rate": 9.948927477017364e-06, + "loss": 0.8005, + "step": 487 + }, + { + "epoch": 0.014956479097707491, + "grad_norm": 2.73805647696946, + "learning_rate": 9.96935648621042e-06, + "loss": 0.9039, + "step": 488 + }, + { + "epoch": 0.014987127620448694, + "grad_norm": 2.5943375501176438, + "learning_rate": 9.989785495403473e-06, + "loss": 0.7391, + "step": 489 + }, + { + "epoch": 0.015017776143189897, + "grad_norm": 2.858248508163274, + "learning_rate": 1.0010214504596527e-05, + "loss": 1.0547, + "step": 490 + }, + { + "epoch": 0.015048424665931101, + "grad_norm": 2.4060029655142805, + "learning_rate": 1.0030643513789582e-05, + "loss": 0.9921, + "step": 491 + }, + { + "epoch": 0.015079073188672305, + "grad_norm": 2.3062462308670244, + "learning_rate": 1.0051072522982637e-05, + "loss": 0.9451, + "step": 492 + }, + { + "epoch": 0.01510972171141351, + "grad_norm": 2.7375646015175015, + "learning_rate": 1.0071501532175689e-05, + "loss": 0.984, + "step": 493 + }, + { + "epoch": 0.015140370234154713, + "grad_norm": 2.257770786717886, + "learning_rate": 1.0091930541368744e-05, + "loss": 0.9849, + "step": 494 + }, + { + "epoch": 0.015171018756895917, + "grad_norm": 2.429458898706918, + "learning_rate": 1.01123595505618e-05, + "loss": 0.9217, + "step": 495 + }, + { + "epoch": 0.015201667279637121, + "grad_norm": 2.31169036041781, + "learning_rate": 1.0132788559754853e-05, + "loss": 0.7846, + "step": 496 + }, + { + "epoch": 0.015232315802378325, + "grad_norm": 2.4165503391260015, + "learning_rate": 1.0153217568947907e-05, + "loss": 0.9171, + "step": 497 + }, + { + "epoch": 0.015262964325119529, + "grad_norm": 2.7318197532892308, + "learning_rate": 1.017364657814096e-05, + "loss": 0.9135, + "step": 498 + }, + { + "epoch": 0.015293612847860733, + "grad_norm": 2.491214726370569, + "learning_rate": 1.0194075587334016e-05, + "loss": 0.9549, + "step": 499 + }, + { + "epoch": 0.015324261370601937, + "grad_norm": 2.6079807182517762, + "learning_rate": 1.0214504596527069e-05, + "loss": 0.8906, + "step": 500 + }, + { + "epoch": 0.01535490989334314, + "grad_norm": 2.0736005448907098, + "learning_rate": 1.0234933605720123e-05, + "loss": 0.8984, + "step": 501 + }, + { + "epoch": 0.015385558416084345, + "grad_norm": 2.4206719850192595, + "learning_rate": 1.0255362614913178e-05, + "loss": 0.9105, + "step": 502 + }, + { + "epoch": 0.015416206938825549, + "grad_norm": 2.4599245754852492, + "learning_rate": 1.0275791624106233e-05, + "loss": 1.0655, + "step": 503 + }, + { + "epoch": 0.015446855461566752, + "grad_norm": 2.5737152837286885, + "learning_rate": 1.0296220633299285e-05, + "loss": 0.9126, + "step": 504 + }, + { + "epoch": 0.015477503984307956, + "grad_norm": 1.25089092897634, + "learning_rate": 1.031664964249234e-05, + "loss": 0.6604, + "step": 505 + }, + { + "epoch": 0.01550815250704916, + "grad_norm": 2.4968097708745938, + "learning_rate": 1.0337078651685396e-05, + "loss": 0.9177, + "step": 506 + }, + { + "epoch": 0.015538801029790364, + "grad_norm": 2.515385488576832, + "learning_rate": 1.0357507660878447e-05, + "loss": 0.9393, + "step": 507 + }, + { + "epoch": 0.015569449552531568, + "grad_norm": 2.31493151278487, + "learning_rate": 1.0377936670071503e-05, + "loss": 0.8479, + "step": 508 + }, + { + "epoch": 0.015600098075272772, + "grad_norm": 2.2725157515926666, + "learning_rate": 1.0398365679264556e-05, + "loss": 0.8328, + "step": 509 + }, + { + "epoch": 0.015630746598013976, + "grad_norm": 2.4147535450112, + "learning_rate": 1.041879468845761e-05, + "loss": 0.948, + "step": 510 + }, + { + "epoch": 0.015661395120755178, + "grad_norm": 0.9300424415621329, + "learning_rate": 1.0439223697650665e-05, + "loss": 0.6628, + "step": 511 + }, + { + "epoch": 0.015692043643496384, + "grad_norm": 0.9131244572047386, + "learning_rate": 1.0459652706843719e-05, + "loss": 0.6372, + "step": 512 + }, + { + "epoch": 0.015722692166237586, + "grad_norm": 0.8965901064590638, + "learning_rate": 1.0480081716036774e-05, + "loss": 0.6591, + "step": 513 + }, + { + "epoch": 0.015753340688978792, + "grad_norm": 2.7151017460968134, + "learning_rate": 1.0500510725229826e-05, + "loss": 0.8609, + "step": 514 + }, + { + "epoch": 0.015783989211719994, + "grad_norm": 2.261595226368063, + "learning_rate": 1.0520939734422881e-05, + "loss": 0.9629, + "step": 515 + }, + { + "epoch": 0.0158146377344612, + "grad_norm": 2.382156484392524, + "learning_rate": 1.0541368743615936e-05, + "loss": 0.9413, + "step": 516 + }, + { + "epoch": 0.015845286257202402, + "grad_norm": 2.36404644257781, + "learning_rate": 1.0561797752808988e-05, + "loss": 0.9477, + "step": 517 + }, + { + "epoch": 0.015875934779943607, + "grad_norm": 2.958660115777564, + "learning_rate": 1.0582226762002043e-05, + "loss": 1.0113, + "step": 518 + }, + { + "epoch": 0.01590658330268481, + "grad_norm": 2.317929102899697, + "learning_rate": 1.0602655771195099e-05, + "loss": 0.8597, + "step": 519 + }, + { + "epoch": 0.015937231825426015, + "grad_norm": 2.519236135751738, + "learning_rate": 1.062308478038815e-05, + "loss": 0.9996, + "step": 520 + }, + { + "epoch": 0.015967880348167218, + "grad_norm": 2.64057627300198, + "learning_rate": 1.0643513789581206e-05, + "loss": 0.9763, + "step": 521 + }, + { + "epoch": 0.015998528870908423, + "grad_norm": 2.580422064997271, + "learning_rate": 1.0663942798774261e-05, + "loss": 0.9842, + "step": 522 + }, + { + "epoch": 0.016029177393649625, + "grad_norm": 2.822226124816371, + "learning_rate": 1.0684371807967315e-05, + "loss": 0.9109, + "step": 523 + }, + { + "epoch": 0.01605982591639083, + "grad_norm": 2.3319567420580807, + "learning_rate": 1.0704800817160368e-05, + "loss": 0.8918, + "step": 524 + }, + { + "epoch": 0.016090474439132033, + "grad_norm": 2.5106945840281694, + "learning_rate": 1.0725229826353423e-05, + "loss": 0.8885, + "step": 525 + }, + { + "epoch": 0.01612112296187324, + "grad_norm": 2.9231548530282323, + "learning_rate": 1.0745658835546477e-05, + "loss": 0.9769, + "step": 526 + }, + { + "epoch": 0.01615177148461444, + "grad_norm": 3.232648082215264, + "learning_rate": 1.076608784473953e-05, + "loss": 0.8914, + "step": 527 + }, + { + "epoch": 0.016182420007355647, + "grad_norm": 2.2902579281509485, + "learning_rate": 1.0786516853932584e-05, + "loss": 0.8653, + "step": 528 + }, + { + "epoch": 0.01621306853009685, + "grad_norm": 2.2417910399024334, + "learning_rate": 1.080694586312564e-05, + "loss": 0.8957, + "step": 529 + }, + { + "epoch": 0.016243717052838055, + "grad_norm": 1.0953570263727799, + "learning_rate": 1.0827374872318695e-05, + "loss": 0.687, + "step": 530 + }, + { + "epoch": 0.016274365575579257, + "grad_norm": 1.0360618780424233, + "learning_rate": 1.0847803881511747e-05, + "loss": 0.632, + "step": 531 + }, + { + "epoch": 0.016305014098320463, + "grad_norm": 2.9729801590375704, + "learning_rate": 1.0868232890704802e-05, + "loss": 0.9371, + "step": 532 + }, + { + "epoch": 0.016335662621061665, + "grad_norm": 0.894652761948578, + "learning_rate": 1.0888661899897857e-05, + "loss": 0.6366, + "step": 533 + }, + { + "epoch": 0.01636631114380287, + "grad_norm": 2.3157551093381286, + "learning_rate": 1.0909090909090909e-05, + "loss": 0.9622, + "step": 534 + }, + { + "epoch": 0.016396959666544073, + "grad_norm": 2.535414760922774, + "learning_rate": 1.0929519918283964e-05, + "loss": 0.9273, + "step": 535 + }, + { + "epoch": 0.016427608189285275, + "grad_norm": 2.326239482535601, + "learning_rate": 1.094994892747702e-05, + "loss": 0.7813, + "step": 536 + }, + { + "epoch": 0.01645825671202648, + "grad_norm": 2.2728259697421, + "learning_rate": 1.0970377936670071e-05, + "loss": 1.007, + "step": 537 + }, + { + "epoch": 0.016488905234767683, + "grad_norm": 1.1203801526198023, + "learning_rate": 1.0990806945863127e-05, + "loss": 0.6666, + "step": 538 + }, + { + "epoch": 0.01651955375750889, + "grad_norm": 2.4852310291162314, + "learning_rate": 1.101123595505618e-05, + "loss": 0.9026, + "step": 539 + }, + { + "epoch": 0.01655020228025009, + "grad_norm": 2.2933941846353423, + "learning_rate": 1.1031664964249235e-05, + "loss": 0.8307, + "step": 540 + }, + { + "epoch": 0.016580850802991296, + "grad_norm": 2.8022887077352876, + "learning_rate": 1.1052093973442289e-05, + "loss": 0.973, + "step": 541 + }, + { + "epoch": 0.0166114993257325, + "grad_norm": 0.9497385934971538, + "learning_rate": 1.1072522982635342e-05, + "loss": 0.6548, + "step": 542 + }, + { + "epoch": 0.016642147848473704, + "grad_norm": 2.4277966935754702, + "learning_rate": 1.1092951991828398e-05, + "loss": 0.9411, + "step": 543 + }, + { + "epoch": 0.016672796371214906, + "grad_norm": 2.5785134491782666, + "learning_rate": 1.111338100102145e-05, + "loss": 0.7967, + "step": 544 + }, + { + "epoch": 0.016703444893956112, + "grad_norm": 2.6212251703985165, + "learning_rate": 1.1133810010214505e-05, + "loss": 0.927, + "step": 545 + }, + { + "epoch": 0.016734093416697314, + "grad_norm": 2.5671596844157167, + "learning_rate": 1.115423901940756e-05, + "loss": 0.8672, + "step": 546 + }, + { + "epoch": 0.01676474193943852, + "grad_norm": 0.9635581490232266, + "learning_rate": 1.1174668028600615e-05, + "loss": 0.667, + "step": 547 + }, + { + "epoch": 0.016795390462179722, + "grad_norm": 2.5578674236525707, + "learning_rate": 1.1195097037793667e-05, + "loss": 0.981, + "step": 548 + }, + { + "epoch": 0.016826038984920928, + "grad_norm": 2.6659090911608705, + "learning_rate": 1.1215526046986723e-05, + "loss": 0.8979, + "step": 549 + }, + { + "epoch": 0.01685668750766213, + "grad_norm": 2.8288873636455594, + "learning_rate": 1.1235955056179778e-05, + "loss": 0.8604, + "step": 550 + }, + { + "epoch": 0.016887336030403335, + "grad_norm": 3.060441414547314, + "learning_rate": 1.125638406537283e-05, + "loss": 0.9181, + "step": 551 + }, + { + "epoch": 0.016917984553144538, + "grad_norm": 2.4374408354925685, + "learning_rate": 1.1276813074565885e-05, + "loss": 0.985, + "step": 552 + }, + { + "epoch": 0.016948633075885743, + "grad_norm": 2.442865214099849, + "learning_rate": 1.1297242083758938e-05, + "loss": 0.8765, + "step": 553 + }, + { + "epoch": 0.016979281598626945, + "grad_norm": 2.7633005740799543, + "learning_rate": 1.1317671092951992e-05, + "loss": 0.8559, + "step": 554 + }, + { + "epoch": 0.01700993012136815, + "grad_norm": 2.3967210593742134, + "learning_rate": 1.1338100102145047e-05, + "loss": 0.8338, + "step": 555 + }, + { + "epoch": 0.017040578644109353, + "grad_norm": 1.0493809218605588, + "learning_rate": 1.13585291113381e-05, + "loss": 0.6578, + "step": 556 + }, + { + "epoch": 0.01707122716685056, + "grad_norm": 0.9062036556106473, + "learning_rate": 1.1378958120531156e-05, + "loss": 0.6531, + "step": 557 + }, + { + "epoch": 0.01710187568959176, + "grad_norm": 2.520160506864784, + "learning_rate": 1.1399387129724208e-05, + "loss": 0.9186, + "step": 558 + }, + { + "epoch": 0.017132524212332967, + "grad_norm": 0.8584207693818771, + "learning_rate": 1.1419816138917263e-05, + "loss": 0.6274, + "step": 559 + }, + { + "epoch": 0.01716317273507417, + "grad_norm": 0.9367947579227631, + "learning_rate": 1.1440245148110318e-05, + "loss": 0.6783, + "step": 560 + }, + { + "epoch": 0.017193821257815375, + "grad_norm": 2.2376999609100703, + "learning_rate": 1.146067415730337e-05, + "loss": 0.8806, + "step": 561 + }, + { + "epoch": 0.017224469780556577, + "grad_norm": 2.7727932534172983, + "learning_rate": 1.1481103166496426e-05, + "loss": 0.8574, + "step": 562 + }, + { + "epoch": 0.017255118303297783, + "grad_norm": 2.5383462723246955, + "learning_rate": 1.1501532175689481e-05, + "loss": 0.9484, + "step": 563 + }, + { + "epoch": 0.017285766826038985, + "grad_norm": 2.3998731479941506, + "learning_rate": 1.1521961184882534e-05, + "loss": 0.9516, + "step": 564 + }, + { + "epoch": 0.01731641534878019, + "grad_norm": 2.8557628017089693, + "learning_rate": 1.1542390194075588e-05, + "loss": 0.8825, + "step": 565 + }, + { + "epoch": 0.017347063871521393, + "grad_norm": 2.0943322402462594, + "learning_rate": 1.1562819203268643e-05, + "loss": 0.8702, + "step": 566 + }, + { + "epoch": 0.017377712394262595, + "grad_norm": 2.5232226236697755, + "learning_rate": 1.1583248212461697e-05, + "loss": 0.932, + "step": 567 + }, + { + "epoch": 0.0174083609170038, + "grad_norm": 2.2261402230079304, + "learning_rate": 1.160367722165475e-05, + "loss": 0.9358, + "step": 568 + }, + { + "epoch": 0.017439009439745003, + "grad_norm": 2.923796060338871, + "learning_rate": 1.1624106230847804e-05, + "loss": 0.9232, + "step": 569 + }, + { + "epoch": 0.01746965796248621, + "grad_norm": 2.477751311044558, + "learning_rate": 1.164453524004086e-05, + "loss": 0.8909, + "step": 570 + }, + { + "epoch": 0.01750030648522741, + "grad_norm": 2.5965497534207223, + "learning_rate": 1.1664964249233913e-05, + "loss": 0.8732, + "step": 571 + }, + { + "epoch": 0.017530955007968616, + "grad_norm": 2.525101506141215, + "learning_rate": 1.1685393258426966e-05, + "loss": 0.8896, + "step": 572 + }, + { + "epoch": 0.01756160353070982, + "grad_norm": 2.574374721196832, + "learning_rate": 1.1705822267620022e-05, + "loss": 0.8683, + "step": 573 + }, + { + "epoch": 0.017592252053451024, + "grad_norm": 1.1327264538929538, + "learning_rate": 1.1726251276813077e-05, + "loss": 0.6413, + "step": 574 + }, + { + "epoch": 0.017622900576192226, + "grad_norm": 1.0200428415299942, + "learning_rate": 1.1746680286006129e-05, + "loss": 0.6537, + "step": 575 + }, + { + "epoch": 0.017653549098933432, + "grad_norm": 2.6531999129631174, + "learning_rate": 1.1767109295199184e-05, + "loss": 1.0526, + "step": 576 + }, + { + "epoch": 0.017684197621674634, + "grad_norm": 2.512459148080209, + "learning_rate": 1.178753830439224e-05, + "loss": 0.9232, + "step": 577 + }, + { + "epoch": 0.01771484614441584, + "grad_norm": 2.309424875965714, + "learning_rate": 1.1807967313585291e-05, + "loss": 1.0363, + "step": 578 + }, + { + "epoch": 0.017745494667157042, + "grad_norm": 2.434753284370783, + "learning_rate": 1.1828396322778346e-05, + "loss": 0.883, + "step": 579 + }, + { + "epoch": 0.017776143189898248, + "grad_norm": 2.392751644239127, + "learning_rate": 1.1848825331971402e-05, + "loss": 0.855, + "step": 580 + }, + { + "epoch": 0.01780679171263945, + "grad_norm": 2.365009326519032, + "learning_rate": 1.1869254341164453e-05, + "loss": 0.975, + "step": 581 + }, + { + "epoch": 0.017837440235380655, + "grad_norm": 2.3596217146098475, + "learning_rate": 1.1889683350357509e-05, + "loss": 0.8888, + "step": 582 + }, + { + "epoch": 0.017868088758121858, + "grad_norm": 2.7266429210156264, + "learning_rate": 1.1910112359550562e-05, + "loss": 0.796, + "step": 583 + }, + { + "epoch": 0.017898737280863063, + "grad_norm": 2.2573548579520124, + "learning_rate": 1.1930541368743618e-05, + "loss": 0.9776, + "step": 584 + }, + { + "epoch": 0.017929385803604266, + "grad_norm": 3.12776866723955, + "learning_rate": 1.1950970377936671e-05, + "loss": 0.7857, + "step": 585 + }, + { + "epoch": 0.01796003432634547, + "grad_norm": 2.575938246668722, + "learning_rate": 1.1971399387129725e-05, + "loss": 0.9156, + "step": 586 + }, + { + "epoch": 0.017990682849086673, + "grad_norm": 2.4400728819410786, + "learning_rate": 1.199182839632278e-05, + "loss": 0.9239, + "step": 587 + }, + { + "epoch": 0.01802133137182788, + "grad_norm": 2.376719424230559, + "learning_rate": 1.2012257405515832e-05, + "loss": 0.8575, + "step": 588 + }, + { + "epoch": 0.01805197989456908, + "grad_norm": 2.881971662721351, + "learning_rate": 1.2032686414708887e-05, + "loss": 0.9536, + "step": 589 + }, + { + "epoch": 0.018082628417310287, + "grad_norm": 2.28457965200418, + "learning_rate": 1.2053115423901942e-05, + "loss": 0.91, + "step": 590 + }, + { + "epoch": 0.01811327694005149, + "grad_norm": 2.4152872481341454, + "learning_rate": 1.2073544433094998e-05, + "loss": 0.9196, + "step": 591 + }, + { + "epoch": 0.018143925462792695, + "grad_norm": 2.457240553443885, + "learning_rate": 1.209397344228805e-05, + "loss": 1.0008, + "step": 592 + }, + { + "epoch": 0.018174573985533897, + "grad_norm": 2.434549212382549, + "learning_rate": 1.2114402451481105e-05, + "loss": 0.9115, + "step": 593 + }, + { + "epoch": 0.018205222508275103, + "grad_norm": 1.3847886169903407, + "learning_rate": 1.213483146067416e-05, + "loss": 0.6433, + "step": 594 + }, + { + "epoch": 0.018235871031016305, + "grad_norm": 1.2305932867426028, + "learning_rate": 1.2155260469867212e-05, + "loss": 0.6372, + "step": 595 + }, + { + "epoch": 0.01826651955375751, + "grad_norm": 2.4277340763564323, + "learning_rate": 1.2175689479060267e-05, + "loss": 1.0064, + "step": 596 + }, + { + "epoch": 0.018297168076498713, + "grad_norm": 2.670573784761962, + "learning_rate": 1.219611848825332e-05, + "loss": 0.8326, + "step": 597 + }, + { + "epoch": 0.01832781659923992, + "grad_norm": 0.9440602521981647, + "learning_rate": 1.2216547497446374e-05, + "loss": 0.6342, + "step": 598 + }, + { + "epoch": 0.01835846512198112, + "grad_norm": 2.43604863603499, + "learning_rate": 1.223697650663943e-05, + "loss": 0.9383, + "step": 599 + }, + { + "epoch": 0.018389113644722323, + "grad_norm": 2.745931898851946, + "learning_rate": 1.2257405515832483e-05, + "loss": 0.962, + "step": 600 + }, + { + "epoch": 0.01841976216746353, + "grad_norm": 2.6453581868740748, + "learning_rate": 1.2277834525025538e-05, + "loss": 0.8595, + "step": 601 + }, + { + "epoch": 0.01845041069020473, + "grad_norm": 2.6430020134778425, + "learning_rate": 1.229826353421859e-05, + "loss": 1.0166, + "step": 602 + }, + { + "epoch": 0.018481059212945936, + "grad_norm": 2.3192212001006967, + "learning_rate": 1.2318692543411645e-05, + "loss": 0.9336, + "step": 603 + }, + { + "epoch": 0.01851170773568714, + "grad_norm": 2.495764555085359, + "learning_rate": 1.23391215526047e-05, + "loss": 0.8668, + "step": 604 + }, + { + "epoch": 0.018542356258428344, + "grad_norm": 2.40283277571277, + "learning_rate": 1.2359550561797752e-05, + "loss": 0.8616, + "step": 605 + }, + { + "epoch": 0.018573004781169546, + "grad_norm": 2.609522726197586, + "learning_rate": 1.2379979570990808e-05, + "loss": 0.9117, + "step": 606 + }, + { + "epoch": 0.018603653303910752, + "grad_norm": 2.6687295728197977, + "learning_rate": 1.2400408580183863e-05, + "loss": 0.8921, + "step": 607 + }, + { + "epoch": 0.018634301826651954, + "grad_norm": 1.4877164883907728, + "learning_rate": 1.2420837589376917e-05, + "loss": 0.6664, + "step": 608 + }, + { + "epoch": 0.01866495034939316, + "grad_norm": 3.348678755859582, + "learning_rate": 1.244126659856997e-05, + "loss": 0.753, + "step": 609 + }, + { + "epoch": 0.018695598872134362, + "grad_norm": 1.0914766783333192, + "learning_rate": 1.2461695607763025e-05, + "loss": 0.6595, + "step": 610 + }, + { + "epoch": 0.018726247394875568, + "grad_norm": 2.0666531319219397, + "learning_rate": 1.2482124616956079e-05, + "loss": 0.788, + "step": 611 + }, + { + "epoch": 0.01875689591761677, + "grad_norm": 0.8949029230772396, + "learning_rate": 1.2502553626149133e-05, + "loss": 0.6343, + "step": 612 + }, + { + "epoch": 0.018787544440357976, + "grad_norm": 2.4051578192879868, + "learning_rate": 1.2522982635342186e-05, + "loss": 0.8557, + "step": 613 + }, + { + "epoch": 0.018818192963099178, + "grad_norm": 2.510835714277149, + "learning_rate": 1.2543411644535241e-05, + "loss": 1.0124, + "step": 614 + }, + { + "epoch": 0.018848841485840383, + "grad_norm": 1.1348823756269943, + "learning_rate": 1.2563840653728295e-05, + "loss": 0.6736, + "step": 615 + }, + { + "epoch": 0.018879490008581586, + "grad_norm": 2.3840943965085866, + "learning_rate": 1.2584269662921348e-05, + "loss": 0.9338, + "step": 616 + }, + { + "epoch": 0.01891013853132279, + "grad_norm": 1.0430133187320831, + "learning_rate": 1.2604698672114404e-05, + "loss": 0.6255, + "step": 617 + }, + { + "epoch": 0.018940787054063993, + "grad_norm": 1.0499269465471943, + "learning_rate": 1.2625127681307459e-05, + "loss": 0.6538, + "step": 618 + }, + { + "epoch": 0.0189714355768052, + "grad_norm": 2.3804699074357503, + "learning_rate": 1.264555669050051e-05, + "loss": 0.9801, + "step": 619 + }, + { + "epoch": 0.0190020840995464, + "grad_norm": 2.414343252521227, + "learning_rate": 1.2665985699693566e-05, + "loss": 0.7885, + "step": 620 + }, + { + "epoch": 0.019032732622287607, + "grad_norm": 0.8837283986390693, + "learning_rate": 1.2686414708886621e-05, + "loss": 0.6394, + "step": 621 + }, + { + "epoch": 0.01906338114502881, + "grad_norm": 2.5613486349893786, + "learning_rate": 1.2706843718079673e-05, + "loss": 0.9393, + "step": 622 + }, + { + "epoch": 0.019094029667770015, + "grad_norm": 2.642699932836508, + "learning_rate": 1.2727272727272728e-05, + "loss": 0.9738, + "step": 623 + }, + { + "epoch": 0.019124678190511217, + "grad_norm": 2.573893806612231, + "learning_rate": 1.2747701736465784e-05, + "loss": 0.9935, + "step": 624 + }, + { + "epoch": 0.019155326713252423, + "grad_norm": 1.0010979169146392, + "learning_rate": 1.2768130745658837e-05, + "loss": 0.6338, + "step": 625 + }, + { + "epoch": 0.019185975235993625, + "grad_norm": 1.0161407900366533, + "learning_rate": 1.2788559754851891e-05, + "loss": 0.6339, + "step": 626 + }, + { + "epoch": 0.01921662375873483, + "grad_norm": 2.9892017935294413, + "learning_rate": 1.2808988764044944e-05, + "loss": 0.934, + "step": 627 + }, + { + "epoch": 0.019247272281476033, + "grad_norm": 2.5133825727798595, + "learning_rate": 1.2829417773238e-05, + "loss": 0.8334, + "step": 628 + }, + { + "epoch": 0.01927792080421724, + "grad_norm": 3.3873517217114992, + "learning_rate": 1.2849846782431053e-05, + "loss": 0.9205, + "step": 629 + }, + { + "epoch": 0.01930856932695844, + "grad_norm": 2.54070068464443, + "learning_rate": 1.2870275791624107e-05, + "loss": 0.9769, + "step": 630 + }, + { + "epoch": 0.019339217849699643, + "grad_norm": 2.499651491905379, + "learning_rate": 1.2890704800817162e-05, + "loss": 0.8732, + "step": 631 + }, + { + "epoch": 0.01936986637244085, + "grad_norm": 1.018386260208386, + "learning_rate": 1.2911133810010214e-05, + "loss": 0.6248, + "step": 632 + }, + { + "epoch": 0.01940051489518205, + "grad_norm": 2.6649264525559424, + "learning_rate": 1.293156281920327e-05, + "loss": 0.949, + "step": 633 + }, + { + "epoch": 0.019431163417923256, + "grad_norm": 2.2441049801169415, + "learning_rate": 1.2951991828396324e-05, + "loss": 0.9355, + "step": 634 + }, + { + "epoch": 0.01946181194066446, + "grad_norm": 2.336232621029421, + "learning_rate": 1.297242083758938e-05, + "loss": 0.9075, + "step": 635 + }, + { + "epoch": 0.019492460463405664, + "grad_norm": 2.516984501928382, + "learning_rate": 1.2992849846782432e-05, + "loss": 0.8596, + "step": 636 + }, + { + "epoch": 0.019523108986146866, + "grad_norm": 2.496612000706578, + "learning_rate": 1.3013278855975487e-05, + "loss": 0.8618, + "step": 637 + }, + { + "epoch": 0.019553757508888072, + "grad_norm": 2.6219953745191424, + "learning_rate": 1.303370786516854e-05, + "loss": 1.0617, + "step": 638 + }, + { + "epoch": 0.019584406031629274, + "grad_norm": 2.311970003909093, + "learning_rate": 1.3054136874361594e-05, + "loss": 0.8392, + "step": 639 + }, + { + "epoch": 0.01961505455437048, + "grad_norm": 2.3068196179881344, + "learning_rate": 1.307456588355465e-05, + "loss": 0.8109, + "step": 640 + }, + { + "epoch": 0.019645703077111682, + "grad_norm": 1.009102530561734, + "learning_rate": 1.3094994892747703e-05, + "loss": 0.6485, + "step": 641 + }, + { + "epoch": 0.019676351599852888, + "grad_norm": 0.9139805386656235, + "learning_rate": 1.3115423901940756e-05, + "loss": 0.6557, + "step": 642 + }, + { + "epoch": 0.01970700012259409, + "grad_norm": 2.6831339018089166, + "learning_rate": 1.313585291113381e-05, + "loss": 0.9114, + "step": 643 + }, + { + "epoch": 0.019737648645335296, + "grad_norm": 2.457082717685276, + "learning_rate": 1.3156281920326865e-05, + "loss": 0.8815, + "step": 644 + }, + { + "epoch": 0.019768297168076498, + "grad_norm": 2.631974106002617, + "learning_rate": 1.317671092951992e-05, + "loss": 0.8971, + "step": 645 + }, + { + "epoch": 0.019798945690817703, + "grad_norm": 2.6753515021748573, + "learning_rate": 1.3197139938712972e-05, + "loss": 0.8112, + "step": 646 + }, + { + "epoch": 0.019829594213558906, + "grad_norm": 2.3705033184038196, + "learning_rate": 1.3217568947906028e-05, + "loss": 0.8678, + "step": 647 + }, + { + "epoch": 0.01986024273630011, + "grad_norm": 2.5446409673152584, + "learning_rate": 1.3237997957099083e-05, + "loss": 0.9207, + "step": 648 + }, + { + "epoch": 0.019890891259041314, + "grad_norm": 2.7143722862369764, + "learning_rate": 1.3258426966292135e-05, + "loss": 0.9738, + "step": 649 + }, + { + "epoch": 0.01992153978178252, + "grad_norm": 2.840880695229363, + "learning_rate": 1.327885597548519e-05, + "loss": 0.9949, + "step": 650 + }, + { + "epoch": 0.01995218830452372, + "grad_norm": 2.5213279800375332, + "learning_rate": 1.3299284984678245e-05, + "loss": 0.9815, + "step": 651 + }, + { + "epoch": 0.019982836827264927, + "grad_norm": 2.5767941017568625, + "learning_rate": 1.3319713993871299e-05, + "loss": 0.9198, + "step": 652 + }, + { + "epoch": 0.02001348535000613, + "grad_norm": 2.6148774257292184, + "learning_rate": 1.3340143003064352e-05, + "loss": 0.8677, + "step": 653 + }, + { + "epoch": 0.020044133872747335, + "grad_norm": 1.3753387844203353, + "learning_rate": 1.3360572012257408e-05, + "loss": 0.63, + "step": 654 + }, + { + "epoch": 0.020074782395488537, + "grad_norm": 2.5535769008548055, + "learning_rate": 1.3381001021450461e-05, + "loss": 0.9002, + "step": 655 + }, + { + "epoch": 0.020105430918229743, + "grad_norm": 2.2836946690982294, + "learning_rate": 1.3401430030643515e-05, + "loss": 0.8965, + "step": 656 + }, + { + "epoch": 0.020136079440970945, + "grad_norm": 2.7342170331857676, + "learning_rate": 1.3421859039836568e-05, + "loss": 1.0005, + "step": 657 + }, + { + "epoch": 0.02016672796371215, + "grad_norm": 2.345592126837684, + "learning_rate": 1.3442288049029623e-05, + "loss": 0.8982, + "step": 658 + }, + { + "epoch": 0.020197376486453353, + "grad_norm": 2.291380209519442, + "learning_rate": 1.3462717058222677e-05, + "loss": 0.8678, + "step": 659 + }, + { + "epoch": 0.02022802500919456, + "grad_norm": 2.659178879141225, + "learning_rate": 1.348314606741573e-05, + "loss": 0.9602, + "step": 660 + }, + { + "epoch": 0.02025867353193576, + "grad_norm": 2.1906104363597523, + "learning_rate": 1.3503575076608786e-05, + "loss": 0.93, + "step": 661 + }, + { + "epoch": 0.020289322054676963, + "grad_norm": 2.6018404662739, + "learning_rate": 1.3524004085801841e-05, + "loss": 0.8825, + "step": 662 + }, + { + "epoch": 0.02031997057741817, + "grad_norm": 2.0292758392958254, + "learning_rate": 1.3544433094994893e-05, + "loss": 0.8389, + "step": 663 + }, + { + "epoch": 0.02035061910015937, + "grad_norm": 1.194288543090341, + "learning_rate": 1.3564862104187948e-05, + "loss": 0.6568, + "step": 664 + }, + { + "epoch": 0.020381267622900576, + "grad_norm": 2.4985486741017358, + "learning_rate": 1.3585291113381003e-05, + "loss": 0.9723, + "step": 665 + }, + { + "epoch": 0.02041191614564178, + "grad_norm": 2.272060604531793, + "learning_rate": 1.3605720122574055e-05, + "loss": 0.8471, + "step": 666 + }, + { + "epoch": 0.020442564668382984, + "grad_norm": 2.4535152566418987, + "learning_rate": 1.362614913176711e-05, + "loss": 0.9228, + "step": 667 + }, + { + "epoch": 0.020473213191124186, + "grad_norm": 2.769753664469895, + "learning_rate": 1.3646578140960164e-05, + "loss": 0.8703, + "step": 668 + }, + { + "epoch": 0.020503861713865392, + "grad_norm": 2.5562955716561397, + "learning_rate": 1.366700715015322e-05, + "loss": 0.91, + "step": 669 + }, + { + "epoch": 0.020534510236606594, + "grad_norm": 2.4812274993359407, + "learning_rate": 1.3687436159346273e-05, + "loss": 0.8602, + "step": 670 + }, + { + "epoch": 0.0205651587593478, + "grad_norm": 2.3576666588221493, + "learning_rate": 1.3707865168539327e-05, + "loss": 0.8455, + "step": 671 + }, + { + "epoch": 0.020595807282089002, + "grad_norm": 2.385643191853194, + "learning_rate": 1.3728294177732382e-05, + "loss": 0.9293, + "step": 672 + }, + { + "epoch": 0.020626455804830208, + "grad_norm": 2.413501795151944, + "learning_rate": 1.3748723186925434e-05, + "loss": 0.9165, + "step": 673 + }, + { + "epoch": 0.02065710432757141, + "grad_norm": 2.492495074771294, + "learning_rate": 1.3769152196118489e-05, + "loss": 0.8513, + "step": 674 + }, + { + "epoch": 0.020687752850312616, + "grad_norm": 2.437285088696878, + "learning_rate": 1.3789581205311544e-05, + "loss": 0.7983, + "step": 675 + }, + { + "epoch": 0.020718401373053818, + "grad_norm": 2.4621247960539945, + "learning_rate": 1.3810010214504596e-05, + "loss": 0.9431, + "step": 676 + }, + { + "epoch": 0.020749049895795024, + "grad_norm": 2.316251940839069, + "learning_rate": 1.3830439223697651e-05, + "loss": 0.9323, + "step": 677 + }, + { + "epoch": 0.020779698418536226, + "grad_norm": 2.4107031125935867, + "learning_rate": 1.3850868232890707e-05, + "loss": 0.9046, + "step": 678 + }, + { + "epoch": 0.02081034694127743, + "grad_norm": 2.3020986890759856, + "learning_rate": 1.3871297242083762e-05, + "loss": 0.872, + "step": 679 + }, + { + "epoch": 0.020840995464018634, + "grad_norm": 2.402548217673263, + "learning_rate": 1.3891726251276814e-05, + "loss": 0.8818, + "step": 680 + }, + { + "epoch": 0.02087164398675984, + "grad_norm": 2.4347952307859604, + "learning_rate": 1.3912155260469869e-05, + "loss": 0.844, + "step": 681 + }, + { + "epoch": 0.02090229250950104, + "grad_norm": 2.450350625348943, + "learning_rate": 1.3932584269662923e-05, + "loss": 0.9026, + "step": 682 + }, + { + "epoch": 0.020932941032242247, + "grad_norm": 2.1890993322153918, + "learning_rate": 1.3953013278855976e-05, + "loss": 0.8557, + "step": 683 + }, + { + "epoch": 0.02096358955498345, + "grad_norm": 2.4119534132685105, + "learning_rate": 1.3973442288049031e-05, + "loss": 0.9153, + "step": 684 + }, + { + "epoch": 0.020994238077724655, + "grad_norm": 2.5151673144182185, + "learning_rate": 1.3993871297242085e-05, + "loss": 0.9319, + "step": 685 + }, + { + "epoch": 0.021024886600465857, + "grad_norm": 2.0953192512421266, + "learning_rate": 1.4014300306435138e-05, + "loss": 0.9005, + "step": 686 + }, + { + "epoch": 0.021055535123207063, + "grad_norm": 2.118802787062145, + "learning_rate": 1.4034729315628192e-05, + "loss": 0.9144, + "step": 687 + }, + { + "epoch": 0.021086183645948265, + "grad_norm": 2.534952621361351, + "learning_rate": 1.4055158324821247e-05, + "loss": 0.8293, + "step": 688 + }, + { + "epoch": 0.02111683216868947, + "grad_norm": 2.270547752120755, + "learning_rate": 1.4075587334014303e-05, + "loss": 0.8275, + "step": 689 + }, + { + "epoch": 0.021147480691430673, + "grad_norm": 2.406765071078133, + "learning_rate": 1.4096016343207354e-05, + "loss": 0.8722, + "step": 690 + }, + { + "epoch": 0.02117812921417188, + "grad_norm": 2.429138156474836, + "learning_rate": 1.411644535240041e-05, + "loss": 0.9267, + "step": 691 + }, + { + "epoch": 0.02120877773691308, + "grad_norm": 0.944639876742255, + "learning_rate": 1.4136874361593465e-05, + "loss": 0.6571, + "step": 692 + }, + { + "epoch": 0.021239426259654283, + "grad_norm": 2.6990218996249795, + "learning_rate": 1.4157303370786517e-05, + "loss": 0.9067, + "step": 693 + }, + { + "epoch": 0.02127007478239549, + "grad_norm": 2.2576652430195545, + "learning_rate": 1.4177732379979572e-05, + "loss": 0.9162, + "step": 694 + }, + { + "epoch": 0.02130072330513669, + "grad_norm": 2.2829681196742415, + "learning_rate": 1.4198161389172627e-05, + "loss": 0.8631, + "step": 695 + }, + { + "epoch": 0.021331371827877896, + "grad_norm": 2.5212718545660358, + "learning_rate": 1.4218590398365681e-05, + "loss": 0.9103, + "step": 696 + }, + { + "epoch": 0.0213620203506191, + "grad_norm": 2.4768862377907017, + "learning_rate": 1.4239019407558734e-05, + "loss": 0.9369, + "step": 697 + }, + { + "epoch": 0.021392668873360304, + "grad_norm": 2.1151834253334543, + "learning_rate": 1.4259448416751788e-05, + "loss": 0.8068, + "step": 698 + }, + { + "epoch": 0.021423317396101507, + "grad_norm": 0.9214628028942057, + "learning_rate": 1.4279877425944843e-05, + "loss": 0.643, + "step": 699 + }, + { + "epoch": 0.021453965918842712, + "grad_norm": 2.3708119908847043, + "learning_rate": 1.4300306435137897e-05, + "loss": 0.9108, + "step": 700 + }, + { + "epoch": 0.021484614441583914, + "grad_norm": 2.515794093669089, + "learning_rate": 1.432073544433095e-05, + "loss": 0.9058, + "step": 701 + }, + { + "epoch": 0.02151526296432512, + "grad_norm": 2.3193213046431316, + "learning_rate": 1.4341164453524006e-05, + "loss": 0.895, + "step": 702 + }, + { + "epoch": 0.021545911487066322, + "grad_norm": 2.219505187869636, + "learning_rate": 1.4361593462717057e-05, + "loss": 0.9533, + "step": 703 + }, + { + "epoch": 0.021576560009807528, + "grad_norm": 2.294814892804555, + "learning_rate": 1.4382022471910113e-05, + "loss": 0.8219, + "step": 704 + }, + { + "epoch": 0.02160720853254873, + "grad_norm": 2.267804745431921, + "learning_rate": 1.4402451481103168e-05, + "loss": 0.7617, + "step": 705 + }, + { + "epoch": 0.021637857055289936, + "grad_norm": 2.4547301045498555, + "learning_rate": 1.4422880490296223e-05, + "loss": 0.9754, + "step": 706 + }, + { + "epoch": 0.021668505578031138, + "grad_norm": 2.113588251597983, + "learning_rate": 1.4443309499489275e-05, + "loss": 0.9103, + "step": 707 + }, + { + "epoch": 0.021699154100772344, + "grad_norm": 2.1443967055386532, + "learning_rate": 1.446373850868233e-05, + "loss": 0.9553, + "step": 708 + }, + { + "epoch": 0.021729802623513546, + "grad_norm": 2.3782545415938072, + "learning_rate": 1.4484167517875386e-05, + "loss": 0.9428, + "step": 709 + }, + { + "epoch": 0.02176045114625475, + "grad_norm": 2.2765209432970694, + "learning_rate": 1.4504596527068438e-05, + "loss": 0.9025, + "step": 710 + }, + { + "epoch": 0.021791099668995954, + "grad_norm": 2.2482934682354903, + "learning_rate": 1.4525025536261493e-05, + "loss": 0.9382, + "step": 711 + }, + { + "epoch": 0.02182174819173716, + "grad_norm": 2.1364251506493845, + "learning_rate": 1.4545454545454546e-05, + "loss": 0.8784, + "step": 712 + }, + { + "epoch": 0.02185239671447836, + "grad_norm": 2.199109623029564, + "learning_rate": 1.4565883554647602e-05, + "loss": 0.8714, + "step": 713 + }, + { + "epoch": 0.021883045237219567, + "grad_norm": 2.2758777011300793, + "learning_rate": 1.4586312563840655e-05, + "loss": 0.8746, + "step": 714 + }, + { + "epoch": 0.02191369375996077, + "grad_norm": 2.6340425849837454, + "learning_rate": 1.4606741573033709e-05, + "loss": 0.9673, + "step": 715 + }, + { + "epoch": 0.021944342282701975, + "grad_norm": 2.1700101026454384, + "learning_rate": 1.4627170582226764e-05, + "loss": 0.8642, + "step": 716 + }, + { + "epoch": 0.021974990805443177, + "grad_norm": 2.307140085340721, + "learning_rate": 1.4647599591419816e-05, + "loss": 1.0076, + "step": 717 + }, + { + "epoch": 0.022005639328184383, + "grad_norm": 2.186799013283771, + "learning_rate": 1.4668028600612871e-05, + "loss": 0.9302, + "step": 718 + }, + { + "epoch": 0.022036287850925585, + "grad_norm": 2.3954349959154237, + "learning_rate": 1.4688457609805926e-05, + "loss": 0.963, + "step": 719 + }, + { + "epoch": 0.02206693637366679, + "grad_norm": 2.572040201197431, + "learning_rate": 1.4708886618998978e-05, + "loss": 0.8674, + "step": 720 + }, + { + "epoch": 0.022097584896407993, + "grad_norm": 2.7019140906898333, + "learning_rate": 1.4729315628192033e-05, + "loss": 0.8602, + "step": 721 + }, + { + "epoch": 0.0221282334191492, + "grad_norm": 2.4256747652511272, + "learning_rate": 1.4749744637385089e-05, + "loss": 0.8623, + "step": 722 + }, + { + "epoch": 0.0221588819418904, + "grad_norm": 2.2905078533903103, + "learning_rate": 1.4770173646578142e-05, + "loss": 0.9195, + "step": 723 + }, + { + "epoch": 0.022189530464631606, + "grad_norm": 2.189863689226502, + "learning_rate": 1.4790602655771196e-05, + "loss": 0.8271, + "step": 724 + }, + { + "epoch": 0.02222017898737281, + "grad_norm": 2.3682352420423753, + "learning_rate": 1.4811031664964251e-05, + "loss": 0.9402, + "step": 725 + }, + { + "epoch": 0.02225082751011401, + "grad_norm": 2.7839072914330916, + "learning_rate": 1.4831460674157305e-05, + "loss": 1.0077, + "step": 726 + }, + { + "epoch": 0.022281476032855217, + "grad_norm": 0.9356721386493174, + "learning_rate": 1.4851889683350358e-05, + "loss": 0.667, + "step": 727 + }, + { + "epoch": 0.02231212455559642, + "grad_norm": 2.21519563693354, + "learning_rate": 1.4872318692543412e-05, + "loss": 0.9506, + "step": 728 + }, + { + "epoch": 0.022342773078337624, + "grad_norm": 2.1191274725999008, + "learning_rate": 1.4892747701736467e-05, + "loss": 0.861, + "step": 729 + }, + { + "epoch": 0.022373421601078827, + "grad_norm": 2.316280388351454, + "learning_rate": 1.4913176710929522e-05, + "loss": 0.8695, + "step": 730 + }, + { + "epoch": 0.022404070123820032, + "grad_norm": 2.2008451662221757, + "learning_rate": 1.4933605720122574e-05, + "loss": 0.8823, + "step": 731 + }, + { + "epoch": 0.022434718646561234, + "grad_norm": 2.1042936237238568, + "learning_rate": 1.495403472931563e-05, + "loss": 0.9175, + "step": 732 + }, + { + "epoch": 0.02246536716930244, + "grad_norm": 2.4303228283601603, + "learning_rate": 1.4974463738508685e-05, + "loss": 0.8643, + "step": 733 + }, + { + "epoch": 0.022496015692043642, + "grad_norm": 2.102490578587638, + "learning_rate": 1.4994892747701737e-05, + "loss": 0.9177, + "step": 734 + }, + { + "epoch": 0.022526664214784848, + "grad_norm": 2.5698618099078208, + "learning_rate": 1.5015321756894792e-05, + "loss": 0.8348, + "step": 735 + }, + { + "epoch": 0.02255731273752605, + "grad_norm": 0.9127548619056359, + "learning_rate": 1.5035750766087847e-05, + "loss": 0.6432, + "step": 736 + }, + { + "epoch": 0.022587961260267256, + "grad_norm": 2.280846478628618, + "learning_rate": 1.5056179775280899e-05, + "loss": 0.9721, + "step": 737 + }, + { + "epoch": 0.022618609783008458, + "grad_norm": 2.3542828284226456, + "learning_rate": 1.5076608784473954e-05, + "loss": 0.9482, + "step": 738 + }, + { + "epoch": 0.022649258305749664, + "grad_norm": 2.4977420256120273, + "learning_rate": 1.509703779366701e-05, + "loss": 0.8357, + "step": 739 + }, + { + "epoch": 0.022679906828490866, + "grad_norm": 2.2846873772218528, + "learning_rate": 1.5117466802860063e-05, + "loss": 0.7725, + "step": 740 + }, + { + "epoch": 0.02271055535123207, + "grad_norm": 0.8488925447160675, + "learning_rate": 1.5137895812053117e-05, + "loss": 0.6299, + "step": 741 + }, + { + "epoch": 0.022741203873973274, + "grad_norm": 2.4086477595596447, + "learning_rate": 1.515832482124617e-05, + "loss": 0.9351, + "step": 742 + }, + { + "epoch": 0.02277185239671448, + "grad_norm": 2.2209003361519972, + "learning_rate": 1.5178753830439225e-05, + "loss": 0.9245, + "step": 743 + }, + { + "epoch": 0.02280250091945568, + "grad_norm": 2.5894671723874954, + "learning_rate": 1.5199182839632279e-05, + "loss": 1.0219, + "step": 744 + }, + { + "epoch": 0.022833149442196887, + "grad_norm": 2.1564326250073305, + "learning_rate": 1.5219611848825333e-05, + "loss": 0.9138, + "step": 745 + }, + { + "epoch": 0.02286379796493809, + "grad_norm": 2.623348770463395, + "learning_rate": 1.5240040858018388e-05, + "loss": 0.9838, + "step": 746 + }, + { + "epoch": 0.022894446487679295, + "grad_norm": 2.3135993954199403, + "learning_rate": 1.526046986721144e-05, + "loss": 0.914, + "step": 747 + }, + { + "epoch": 0.022925095010420497, + "grad_norm": 2.2926175742735477, + "learning_rate": 1.5280898876404495e-05, + "loss": 0.8083, + "step": 748 + }, + { + "epoch": 0.022955743533161703, + "grad_norm": 2.1601431777497937, + "learning_rate": 1.530132788559755e-05, + "loss": 0.8979, + "step": 749 + }, + { + "epoch": 0.022986392055902905, + "grad_norm": 2.2963130798292406, + "learning_rate": 1.5321756894790605e-05, + "loss": 0.9186, + "step": 750 + }, + { + "epoch": 0.02301704057864411, + "grad_norm": 2.1240650307342985, + "learning_rate": 1.5342185903983657e-05, + "loss": 0.8802, + "step": 751 + }, + { + "epoch": 0.023047689101385313, + "grad_norm": 2.599094699207521, + "learning_rate": 1.5362614913176713e-05, + "loss": 0.9524, + "step": 752 + }, + { + "epoch": 0.02307833762412652, + "grad_norm": 2.169779123212642, + "learning_rate": 1.5383043922369768e-05, + "loss": 0.8695, + "step": 753 + }, + { + "epoch": 0.02310898614686772, + "grad_norm": 2.2740830912374776, + "learning_rate": 1.540347293156282e-05, + "loss": 0.8701, + "step": 754 + }, + { + "epoch": 0.023139634669608927, + "grad_norm": 2.6328075785586944, + "learning_rate": 1.5423901940755875e-05, + "loss": 0.8928, + "step": 755 + }, + { + "epoch": 0.02317028319235013, + "grad_norm": 2.4622262719292043, + "learning_rate": 1.544433094994893e-05, + "loss": 0.8447, + "step": 756 + }, + { + "epoch": 0.02320093171509133, + "grad_norm": 2.2279038019328605, + "learning_rate": 1.5464759959141985e-05, + "loss": 0.7979, + "step": 757 + }, + { + "epoch": 0.023231580237832537, + "grad_norm": 2.6014495274689495, + "learning_rate": 1.5485188968335037e-05, + "loss": 0.8695, + "step": 758 + }, + { + "epoch": 0.02326222876057374, + "grad_norm": 2.1504915468675363, + "learning_rate": 1.5505617977528093e-05, + "loss": 0.8317, + "step": 759 + }, + { + "epoch": 0.023292877283314944, + "grad_norm": 2.211856551072762, + "learning_rate": 1.5526046986721144e-05, + "loss": 0.9827, + "step": 760 + }, + { + "epoch": 0.023323525806056147, + "grad_norm": 2.4112110213244726, + "learning_rate": 1.55464759959142e-05, + "loss": 0.8462, + "step": 761 + }, + { + "epoch": 0.023354174328797352, + "grad_norm": 2.259647916745342, + "learning_rate": 1.5566905005107255e-05, + "loss": 0.921, + "step": 762 + }, + { + "epoch": 0.023384822851538555, + "grad_norm": 2.4817086424749957, + "learning_rate": 1.5587334014300307e-05, + "loss": 1.0026, + "step": 763 + }, + { + "epoch": 0.02341547137427976, + "grad_norm": 0.9951489475615349, + "learning_rate": 1.5607763023493362e-05, + "loss": 0.6345, + "step": 764 + }, + { + "epoch": 0.023446119897020962, + "grad_norm": 2.40273011579576, + "learning_rate": 1.5628192032686414e-05, + "loss": 0.888, + "step": 765 + }, + { + "epoch": 0.023476768419762168, + "grad_norm": 0.8933812825574575, + "learning_rate": 1.564862104187947e-05, + "loss": 0.6519, + "step": 766 + }, + { + "epoch": 0.02350741694250337, + "grad_norm": 2.819303844637927, + "learning_rate": 1.5669050051072524e-05, + "loss": 1.009, + "step": 767 + }, + { + "epoch": 0.023538065465244576, + "grad_norm": 2.0993275589625506, + "learning_rate": 1.5689479060265576e-05, + "loss": 0.8782, + "step": 768 + }, + { + "epoch": 0.023568713987985778, + "grad_norm": 2.2824763568748843, + "learning_rate": 1.570990806945863e-05, + "loss": 0.8366, + "step": 769 + }, + { + "epoch": 0.023599362510726984, + "grad_norm": 2.3077584434834844, + "learning_rate": 1.5730337078651687e-05, + "loss": 0.7842, + "step": 770 + }, + { + "epoch": 0.023630011033468186, + "grad_norm": 2.168813981510537, + "learning_rate": 1.575076608784474e-05, + "loss": 0.8492, + "step": 771 + }, + { + "epoch": 0.02366065955620939, + "grad_norm": 2.3691725948809914, + "learning_rate": 1.5771195097037794e-05, + "loss": 0.9882, + "step": 772 + }, + { + "epoch": 0.023691308078950594, + "grad_norm": 1.128755466954411, + "learning_rate": 1.579162410623085e-05, + "loss": 0.6356, + "step": 773 + }, + { + "epoch": 0.0237219566016918, + "grad_norm": 2.2242374477134614, + "learning_rate": 1.5812053115423904e-05, + "loss": 0.9285, + "step": 774 + }, + { + "epoch": 0.023752605124433, + "grad_norm": 2.486039674106647, + "learning_rate": 1.5832482124616956e-05, + "loss": 0.8625, + "step": 775 + }, + { + "epoch": 0.023783253647174207, + "grad_norm": 2.3425940506822047, + "learning_rate": 1.585291113381001e-05, + "loss": 0.944, + "step": 776 + }, + { + "epoch": 0.02381390216991541, + "grad_norm": 2.1305122351260013, + "learning_rate": 1.5873340143003067e-05, + "loss": 0.844, + "step": 777 + }, + { + "epoch": 0.023844550692656615, + "grad_norm": 2.3392222917326273, + "learning_rate": 1.589376915219612e-05, + "loss": 0.8724, + "step": 778 + }, + { + "epoch": 0.023875199215397817, + "grad_norm": 2.344410726183772, + "learning_rate": 1.5914198161389174e-05, + "loss": 0.981, + "step": 779 + }, + { + "epoch": 0.023905847738139023, + "grad_norm": 2.5647007497291954, + "learning_rate": 1.593462717058223e-05, + "loss": 0.7789, + "step": 780 + }, + { + "epoch": 0.023936496260880225, + "grad_norm": 0.913766905973926, + "learning_rate": 1.595505617977528e-05, + "loss": 0.6389, + "step": 781 + }, + { + "epoch": 0.02396714478362143, + "grad_norm": 2.1176976381316, + "learning_rate": 1.5975485188968336e-05, + "loss": 0.8413, + "step": 782 + }, + { + "epoch": 0.023997793306362633, + "grad_norm": 2.4752070293741566, + "learning_rate": 1.599591419816139e-05, + "loss": 0.9, + "step": 783 + }, + { + "epoch": 0.02402844182910384, + "grad_norm": 2.6598487159826347, + "learning_rate": 1.6016343207354447e-05, + "loss": 0.9623, + "step": 784 + }, + { + "epoch": 0.02405909035184504, + "grad_norm": 1.9179377946432858, + "learning_rate": 1.60367722165475e-05, + "loss": 0.7801, + "step": 785 + }, + { + "epoch": 0.024089738874586247, + "grad_norm": 2.0700640505400094, + "learning_rate": 1.6057201225740554e-05, + "loss": 0.9059, + "step": 786 + }, + { + "epoch": 0.02412038739732745, + "grad_norm": 2.2865669337941874, + "learning_rate": 1.607763023493361e-05, + "loss": 0.8897, + "step": 787 + }, + { + "epoch": 0.02415103592006865, + "grad_norm": 2.4547643927502385, + "learning_rate": 1.609805924412666e-05, + "loss": 0.9802, + "step": 788 + }, + { + "epoch": 0.024181684442809857, + "grad_norm": 2.399199718576705, + "learning_rate": 1.6118488253319716e-05, + "loss": 0.9564, + "step": 789 + }, + { + "epoch": 0.02421233296555106, + "grad_norm": 2.0472546450397986, + "learning_rate": 1.6138917262512768e-05, + "loss": 0.9495, + "step": 790 + }, + { + "epoch": 0.024242981488292265, + "grad_norm": 0.9088449434929005, + "learning_rate": 1.6159346271705823e-05, + "loss": 0.6493, + "step": 791 + }, + { + "epoch": 0.024273630011033467, + "grad_norm": 2.246669299042267, + "learning_rate": 1.617977528089888e-05, + "loss": 0.9146, + "step": 792 + }, + { + "epoch": 0.024304278533774672, + "grad_norm": 2.0672675773745555, + "learning_rate": 1.620020429009193e-05, + "loss": 0.943, + "step": 793 + }, + { + "epoch": 0.024334927056515875, + "grad_norm": 2.3883588878070974, + "learning_rate": 1.6220633299284986e-05, + "loss": 0.8903, + "step": 794 + }, + { + "epoch": 0.02436557557925708, + "grad_norm": 2.2933105658916735, + "learning_rate": 1.6241062308478038e-05, + "loss": 0.9081, + "step": 795 + }, + { + "epoch": 0.024396224101998282, + "grad_norm": 0.8471668303766506, + "learning_rate": 1.6261491317671093e-05, + "loss": 0.612, + "step": 796 + }, + { + "epoch": 0.024426872624739488, + "grad_norm": 2.3706584122421663, + "learning_rate": 1.6281920326864148e-05, + "loss": 0.9243, + "step": 797 + }, + { + "epoch": 0.02445752114748069, + "grad_norm": 2.121494806627518, + "learning_rate": 1.63023493360572e-05, + "loss": 0.875, + "step": 798 + }, + { + "epoch": 0.024488169670221896, + "grad_norm": 2.2085160689182683, + "learning_rate": 1.6322778345250255e-05, + "loss": 0.893, + "step": 799 + }, + { + "epoch": 0.024518818192963098, + "grad_norm": 2.2572466739954162, + "learning_rate": 1.634320735444331e-05, + "loss": 0.8896, + "step": 800 + }, + { + "epoch": 0.024549466715704304, + "grad_norm": 2.193543071626696, + "learning_rate": 1.6363636363636366e-05, + "loss": 0.9387, + "step": 801 + }, + { + "epoch": 0.024580115238445506, + "grad_norm": 2.262118381248826, + "learning_rate": 1.6384065372829418e-05, + "loss": 0.8445, + "step": 802 + }, + { + "epoch": 0.02461076376118671, + "grad_norm": 2.435321066837295, + "learning_rate": 1.6404494382022473e-05, + "loss": 0.9434, + "step": 803 + }, + { + "epoch": 0.024641412283927914, + "grad_norm": 2.301272537771189, + "learning_rate": 1.6424923391215528e-05, + "loss": 0.8937, + "step": 804 + }, + { + "epoch": 0.02467206080666912, + "grad_norm": 2.1480029438827883, + "learning_rate": 1.644535240040858e-05, + "loss": 0.8293, + "step": 805 + }, + { + "epoch": 0.02470270932941032, + "grad_norm": 2.007160705387077, + "learning_rate": 1.6465781409601635e-05, + "loss": 0.8363, + "step": 806 + }, + { + "epoch": 0.024733357852151527, + "grad_norm": 2.5017971721680796, + "learning_rate": 1.648621041879469e-05, + "loss": 0.8758, + "step": 807 + }, + { + "epoch": 0.02476400637489273, + "grad_norm": 2.3108983775116063, + "learning_rate": 1.6506639427987743e-05, + "loss": 0.8976, + "step": 808 + }, + { + "epoch": 0.024794654897633935, + "grad_norm": 1.0646174484849393, + "learning_rate": 1.6527068437180798e-05, + "loss": 0.6692, + "step": 809 + }, + { + "epoch": 0.024825303420375137, + "grad_norm": 0.8833773484425094, + "learning_rate": 1.6547497446373853e-05, + "loss": 0.6329, + "step": 810 + }, + { + "epoch": 0.024855951943116343, + "grad_norm": 2.4250109614712927, + "learning_rate": 1.6567926455566908e-05, + "loss": 0.9288, + "step": 811 + }, + { + "epoch": 0.024886600465857545, + "grad_norm": 2.242605967041266, + "learning_rate": 1.658835546475996e-05, + "loss": 0.8507, + "step": 812 + }, + { + "epoch": 0.02491724898859875, + "grad_norm": 2.260341190214544, + "learning_rate": 1.6608784473953015e-05, + "loss": 0.9993, + "step": 813 + }, + { + "epoch": 0.024947897511339953, + "grad_norm": 2.215546830638552, + "learning_rate": 1.662921348314607e-05, + "loss": 0.9169, + "step": 814 + }, + { + "epoch": 0.02497854603408116, + "grad_norm": 2.17604966685721, + "learning_rate": 1.6649642492339123e-05, + "loss": 0.9557, + "step": 815 + }, + { + "epoch": 0.02500919455682236, + "grad_norm": 2.0911078207823897, + "learning_rate": 1.6670071501532178e-05, + "loss": 0.7526, + "step": 816 + }, + { + "epoch": 0.025039843079563567, + "grad_norm": 2.2242502540122464, + "learning_rate": 1.6690500510725233e-05, + "loss": 0.8117, + "step": 817 + }, + { + "epoch": 0.02507049160230477, + "grad_norm": 2.354508626928835, + "learning_rate": 1.6710929519918285e-05, + "loss": 0.9028, + "step": 818 + }, + { + "epoch": 0.02510114012504597, + "grad_norm": 1.563047765066956, + "learning_rate": 1.673135852911134e-05, + "loss": 0.6889, + "step": 819 + }, + { + "epoch": 0.025131788647787177, + "grad_norm": 1.109277491878629, + "learning_rate": 1.6751787538304395e-05, + "loss": 0.6406, + "step": 820 + }, + { + "epoch": 0.02516243717052838, + "grad_norm": 2.7268596735900053, + "learning_rate": 1.6772216547497447e-05, + "loss": 0.855, + "step": 821 + }, + { + "epoch": 0.025193085693269585, + "grad_norm": 2.5537986302381213, + "learning_rate": 1.6792645556690503e-05, + "loss": 0.9373, + "step": 822 + }, + { + "epoch": 0.025223734216010787, + "grad_norm": 3.001215993510347, + "learning_rate": 1.6813074565883554e-05, + "loss": 0.967, + "step": 823 + }, + { + "epoch": 0.025254382738751992, + "grad_norm": 2.4781697897605697, + "learning_rate": 1.683350357507661e-05, + "loss": 0.9247, + "step": 824 + }, + { + "epoch": 0.025285031261493195, + "grad_norm": 2.3526706615950994, + "learning_rate": 1.6853932584269665e-05, + "loss": 0.9549, + "step": 825 + }, + { + "epoch": 0.0253156797842344, + "grad_norm": 2.370158141441427, + "learning_rate": 1.6874361593462717e-05, + "loss": 0.8719, + "step": 826 + }, + { + "epoch": 0.025346328306975602, + "grad_norm": 2.270603314107824, + "learning_rate": 1.6894790602655772e-05, + "loss": 0.8963, + "step": 827 + }, + { + "epoch": 0.025376976829716808, + "grad_norm": 2.350957659150907, + "learning_rate": 1.6915219611848827e-05, + "loss": 0.9051, + "step": 828 + }, + { + "epoch": 0.02540762535245801, + "grad_norm": 2.7172607813374436, + "learning_rate": 1.693564862104188e-05, + "loss": 0.9772, + "step": 829 + }, + { + "epoch": 0.025438273875199216, + "grad_norm": 2.641489520954732, + "learning_rate": 1.6956077630234934e-05, + "loss": 0.9766, + "step": 830 + }, + { + "epoch": 0.025468922397940418, + "grad_norm": 2.4864283630648516, + "learning_rate": 1.697650663942799e-05, + "loss": 1.0802, + "step": 831 + }, + { + "epoch": 0.025499570920681624, + "grad_norm": 2.5821340828594, + "learning_rate": 1.699693564862104e-05, + "loss": 0.9669, + "step": 832 + }, + { + "epoch": 0.025530219443422826, + "grad_norm": 2.529460432797043, + "learning_rate": 1.7017364657814097e-05, + "loss": 0.9221, + "step": 833 + }, + { + "epoch": 0.02556086796616403, + "grad_norm": 2.184081362371753, + "learning_rate": 1.7037793667007152e-05, + "loss": 0.931, + "step": 834 + }, + { + "epoch": 0.025591516488905234, + "grad_norm": 2.5299712873613096, + "learning_rate": 1.7058222676200207e-05, + "loss": 0.7783, + "step": 835 + }, + { + "epoch": 0.02562216501164644, + "grad_norm": 2.6617723889687634, + "learning_rate": 1.707865168539326e-05, + "loss": 0.9915, + "step": 836 + }, + { + "epoch": 0.025652813534387642, + "grad_norm": 2.2792099697110166, + "learning_rate": 1.7099080694586314e-05, + "loss": 0.8883, + "step": 837 + }, + { + "epoch": 0.025683462057128847, + "grad_norm": 2.5377934865159086, + "learning_rate": 1.711950970377937e-05, + "loss": 0.9838, + "step": 838 + }, + { + "epoch": 0.02571411057987005, + "grad_norm": 2.2165416258938375, + "learning_rate": 1.713993871297242e-05, + "loss": 0.875, + "step": 839 + }, + { + "epoch": 0.025744759102611255, + "grad_norm": 2.2395027821643745, + "learning_rate": 1.7160367722165477e-05, + "loss": 0.8931, + "step": 840 + }, + { + "epoch": 0.025775407625352457, + "grad_norm": 2.165754274368847, + "learning_rate": 1.7180796731358532e-05, + "loss": 0.8753, + "step": 841 + }, + { + "epoch": 0.025806056148093663, + "grad_norm": 2.398522958804236, + "learning_rate": 1.7201225740551584e-05, + "loss": 0.8381, + "step": 842 + }, + { + "epoch": 0.025836704670834865, + "grad_norm": 2.5861627332280426, + "learning_rate": 1.722165474974464e-05, + "loss": 0.81, + "step": 843 + }, + { + "epoch": 0.02586735319357607, + "grad_norm": 2.236191485023975, + "learning_rate": 1.7242083758937694e-05, + "loss": 0.9768, + "step": 844 + }, + { + "epoch": 0.025898001716317273, + "grad_norm": 2.550957928981061, + "learning_rate": 1.726251276813075e-05, + "loss": 0.9213, + "step": 845 + }, + { + "epoch": 0.02592865023905848, + "grad_norm": 2.260644475824352, + "learning_rate": 1.72829417773238e-05, + "loss": 0.8204, + "step": 846 + }, + { + "epoch": 0.02595929876179968, + "grad_norm": 2.584160249566185, + "learning_rate": 1.7303370786516857e-05, + "loss": 0.6868, + "step": 847 + }, + { + "epoch": 0.025989947284540887, + "grad_norm": 2.6026852321425866, + "learning_rate": 1.732379979570991e-05, + "loss": 0.8356, + "step": 848 + }, + { + "epoch": 0.02602059580728209, + "grad_norm": 2.456765494525309, + "learning_rate": 1.7344228804902964e-05, + "loss": 0.8749, + "step": 849 + }, + { + "epoch": 0.026051244330023295, + "grad_norm": 2.415658198621835, + "learning_rate": 1.736465781409602e-05, + "loss": 0.9843, + "step": 850 + }, + { + "epoch": 0.026081892852764497, + "grad_norm": 2.276460015003999, + "learning_rate": 1.738508682328907e-05, + "loss": 0.8721, + "step": 851 + }, + { + "epoch": 0.0261125413755057, + "grad_norm": 2.2501379823606285, + "learning_rate": 1.7405515832482126e-05, + "loss": 0.7886, + "step": 852 + }, + { + "epoch": 0.026143189898246905, + "grad_norm": 2.6228120490334286, + "learning_rate": 1.7425944841675178e-05, + "loss": 0.9556, + "step": 853 + }, + { + "epoch": 0.026173838420988107, + "grad_norm": 2.214302405451103, + "learning_rate": 1.7446373850868234e-05, + "loss": 0.9158, + "step": 854 + }, + { + "epoch": 0.026204486943729313, + "grad_norm": 2.1552251451416615, + "learning_rate": 1.746680286006129e-05, + "loss": 0.9737, + "step": 855 + }, + { + "epoch": 0.026235135466470515, + "grad_norm": 2.5636931841127115, + "learning_rate": 1.748723186925434e-05, + "loss": 0.9248, + "step": 856 + }, + { + "epoch": 0.02626578398921172, + "grad_norm": 2.4696996642549056, + "learning_rate": 1.7507660878447396e-05, + "loss": 0.9425, + "step": 857 + }, + { + "epoch": 0.026296432511952923, + "grad_norm": 2.3314783137029527, + "learning_rate": 1.752808988764045e-05, + "loss": 0.9768, + "step": 858 + }, + { + "epoch": 0.026327081034694128, + "grad_norm": 2.188107437842775, + "learning_rate": 1.7548518896833503e-05, + "loss": 0.8398, + "step": 859 + }, + { + "epoch": 0.02635772955743533, + "grad_norm": 2.3258094779167897, + "learning_rate": 1.7568947906026558e-05, + "loss": 0.8656, + "step": 860 + }, + { + "epoch": 0.026388378080176536, + "grad_norm": 2.266160196031564, + "learning_rate": 1.7589376915219614e-05, + "loss": 0.9522, + "step": 861 + }, + { + "epoch": 0.02641902660291774, + "grad_norm": 2.244667233504949, + "learning_rate": 1.760980592441267e-05, + "loss": 0.9061, + "step": 862 + }, + { + "epoch": 0.026449675125658944, + "grad_norm": 1.8759646061506494, + "learning_rate": 1.763023493360572e-05, + "loss": 0.6924, + "step": 863 + }, + { + "epoch": 0.026480323648400146, + "grad_norm": 2.6053785806616143, + "learning_rate": 1.7650663942798776e-05, + "loss": 0.9348, + "step": 864 + }, + { + "epoch": 0.026510972171141352, + "grad_norm": 2.411555301522078, + "learning_rate": 1.767109295199183e-05, + "loss": 0.938, + "step": 865 + }, + { + "epoch": 0.026541620693882554, + "grad_norm": 2.6881657004216297, + "learning_rate": 1.7691521961184883e-05, + "loss": 0.9499, + "step": 866 + }, + { + "epoch": 0.02657226921662376, + "grad_norm": 2.222281119130815, + "learning_rate": 1.7711950970377938e-05, + "loss": 0.891, + "step": 867 + }, + { + "epoch": 0.026602917739364962, + "grad_norm": 2.36352796160113, + "learning_rate": 1.7732379979570994e-05, + "loss": 0.9145, + "step": 868 + }, + { + "epoch": 0.026633566262106168, + "grad_norm": 2.39324547410254, + "learning_rate": 1.7752808988764045e-05, + "loss": 0.9204, + "step": 869 + }, + { + "epoch": 0.02666421478484737, + "grad_norm": 2.4749580204581845, + "learning_rate": 1.77732379979571e-05, + "loss": 0.8486, + "step": 870 + }, + { + "epoch": 0.026694863307588575, + "grad_norm": 2.147035648891549, + "learning_rate": 1.7793667007150156e-05, + "loss": 0.8315, + "step": 871 + }, + { + "epoch": 0.026725511830329778, + "grad_norm": 2.649985551776469, + "learning_rate": 1.781409601634321e-05, + "loss": 0.9464, + "step": 872 + }, + { + "epoch": 0.026756160353070983, + "grad_norm": 2.1705246091966326, + "learning_rate": 1.7834525025536263e-05, + "loss": 0.9043, + "step": 873 + }, + { + "epoch": 0.026786808875812185, + "grad_norm": 1.0879243485871362, + "learning_rate": 1.7854954034729318e-05, + "loss": 0.6962, + "step": 874 + }, + { + "epoch": 0.02681745739855339, + "grad_norm": 2.848820043233013, + "learning_rate": 1.7875383043922374e-05, + "loss": 0.8915, + "step": 875 + }, + { + "epoch": 0.026848105921294593, + "grad_norm": 2.300697671186625, + "learning_rate": 1.7895812053115425e-05, + "loss": 0.8392, + "step": 876 + }, + { + "epoch": 0.0268787544440358, + "grad_norm": 2.8918341878765497, + "learning_rate": 1.791624106230848e-05, + "loss": 0.982, + "step": 877 + }, + { + "epoch": 0.026909402966777, + "grad_norm": 2.199110159667056, + "learning_rate": 1.7936670071501533e-05, + "loss": 0.8213, + "step": 878 + }, + { + "epoch": 0.026940051489518207, + "grad_norm": 2.64344144968277, + "learning_rate": 1.7957099080694588e-05, + "loss": 0.9171, + "step": 879 + }, + { + "epoch": 0.02697070001225941, + "grad_norm": 2.2831548542561224, + "learning_rate": 1.7977528089887643e-05, + "loss": 0.9096, + "step": 880 + }, + { + "epoch": 0.027001348535000615, + "grad_norm": 2.1794992845904, + "learning_rate": 1.7997957099080695e-05, + "loss": 0.8683, + "step": 881 + }, + { + "epoch": 0.027031997057741817, + "grad_norm": 1.029408774566427, + "learning_rate": 1.801838610827375e-05, + "loss": 0.6836, + "step": 882 + }, + { + "epoch": 0.02706264558048302, + "grad_norm": 2.2455159755372827, + "learning_rate": 1.8038815117466802e-05, + "loss": 0.8951, + "step": 883 + }, + { + "epoch": 0.027093294103224225, + "grad_norm": 2.295742979781576, + "learning_rate": 1.8059244126659857e-05, + "loss": 0.9122, + "step": 884 + }, + { + "epoch": 0.027123942625965427, + "grad_norm": 3.18650394686046, + "learning_rate": 1.8079673135852913e-05, + "loss": 0.9093, + "step": 885 + }, + { + "epoch": 0.027154591148706633, + "grad_norm": 2.2475426803449974, + "learning_rate": 1.8100102145045964e-05, + "loss": 0.9989, + "step": 886 + }, + { + "epoch": 0.027185239671447835, + "grad_norm": 1.9925150061897976, + "learning_rate": 1.812053115423902e-05, + "loss": 0.8159, + "step": 887 + }, + { + "epoch": 0.02721588819418904, + "grad_norm": 2.2665475402610973, + "learning_rate": 1.8140960163432075e-05, + "loss": 0.9488, + "step": 888 + }, + { + "epoch": 0.027246536716930243, + "grad_norm": 1.9245667834531412, + "learning_rate": 1.816138917262513e-05, + "loss": 0.8464, + "step": 889 + }, + { + "epoch": 0.02727718523967145, + "grad_norm": 2.544426768680159, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.9157, + "step": 890 + }, + { + "epoch": 0.02730783376241265, + "grad_norm": 2.23068061347051, + "learning_rate": 1.8202247191011237e-05, + "loss": 0.8208, + "step": 891 + }, + { + "epoch": 0.027338482285153856, + "grad_norm": 2.3615430536874262, + "learning_rate": 1.8222676200204293e-05, + "loss": 0.8988, + "step": 892 + }, + { + "epoch": 0.02736913080789506, + "grad_norm": 1.9556353205975716, + "learning_rate": 1.8243105209397344e-05, + "loss": 0.8827, + "step": 893 + }, + { + "epoch": 0.027399779330636264, + "grad_norm": 2.338988414256518, + "learning_rate": 1.82635342185904e-05, + "loss": 0.8747, + "step": 894 + }, + { + "epoch": 0.027430427853377466, + "grad_norm": 2.06270068014544, + "learning_rate": 1.8283963227783455e-05, + "loss": 0.7769, + "step": 895 + }, + { + "epoch": 0.027461076376118672, + "grad_norm": 2.5576607093325667, + "learning_rate": 1.830439223697651e-05, + "loss": 0.967, + "step": 896 + }, + { + "epoch": 0.027491724898859874, + "grad_norm": 2.341014618423138, + "learning_rate": 1.8324821246169562e-05, + "loss": 0.7999, + "step": 897 + }, + { + "epoch": 0.02752237342160108, + "grad_norm": 2.191494624149204, + "learning_rate": 1.8345250255362617e-05, + "loss": 0.8498, + "step": 898 + }, + { + "epoch": 0.027553021944342282, + "grad_norm": 0.9060567211289217, + "learning_rate": 1.8365679264555673e-05, + "loss": 0.6643, + "step": 899 + }, + { + "epoch": 0.027583670467083488, + "grad_norm": 2.0821140564836402, + "learning_rate": 1.8386108273748724e-05, + "loss": 0.8396, + "step": 900 + }, + { + "epoch": 0.02761431898982469, + "grad_norm": 2.412065393210783, + "learning_rate": 1.840653728294178e-05, + "loss": 0.9255, + "step": 901 + }, + { + "epoch": 0.027644967512565895, + "grad_norm": 2.0655487781982296, + "learning_rate": 1.8426966292134835e-05, + "loss": 0.9473, + "step": 902 + }, + { + "epoch": 0.027675616035307098, + "grad_norm": 2.532452402043399, + "learning_rate": 1.8447395301327887e-05, + "loss": 0.9654, + "step": 903 + }, + { + "epoch": 0.027706264558048303, + "grad_norm": 2.18531069994713, + "learning_rate": 1.8467824310520942e-05, + "loss": 0.9649, + "step": 904 + }, + { + "epoch": 0.027736913080789505, + "grad_norm": 2.1429650948467103, + "learning_rate": 1.8488253319713997e-05, + "loss": 0.9605, + "step": 905 + }, + { + "epoch": 0.02776756160353071, + "grad_norm": 2.233441165416269, + "learning_rate": 1.850868232890705e-05, + "loss": 0.9073, + "step": 906 + }, + { + "epoch": 0.027798210126271913, + "grad_norm": 2.158704404485131, + "learning_rate": 1.8529111338100104e-05, + "loss": 0.863, + "step": 907 + }, + { + "epoch": 0.02782885864901312, + "grad_norm": 2.301702330427692, + "learning_rate": 1.8549540347293156e-05, + "loss": 0.925, + "step": 908 + }, + { + "epoch": 0.02785950717175432, + "grad_norm": 0.8722055862303176, + "learning_rate": 1.856996935648621e-05, + "loss": 0.6589, + "step": 909 + }, + { + "epoch": 0.027890155694495527, + "grad_norm": 2.1206820850784163, + "learning_rate": 1.8590398365679267e-05, + "loss": 0.7923, + "step": 910 + }, + { + "epoch": 0.02792080421723673, + "grad_norm": 2.2071322252300654, + "learning_rate": 1.861082737487232e-05, + "loss": 0.817, + "step": 911 + }, + { + "epoch": 0.027951452739977935, + "grad_norm": 2.3385599547013456, + "learning_rate": 1.8631256384065374e-05, + "loss": 1.0121, + "step": 912 + }, + { + "epoch": 0.027982101262719137, + "grad_norm": 2.164982359112029, + "learning_rate": 1.8651685393258426e-05, + "loss": 0.9374, + "step": 913 + }, + { + "epoch": 0.02801274978546034, + "grad_norm": 2.067184196524621, + "learning_rate": 1.867211440245148e-05, + "loss": 0.9794, + "step": 914 + }, + { + "epoch": 0.028043398308201545, + "grad_norm": 2.030642355217253, + "learning_rate": 1.8692543411644536e-05, + "loss": 0.9015, + "step": 915 + }, + { + "epoch": 0.028074046830942747, + "grad_norm": 2.2454592814885, + "learning_rate": 1.871297242083759e-05, + "loss": 0.938, + "step": 916 + }, + { + "epoch": 0.028104695353683953, + "grad_norm": 1.982642470605912, + "learning_rate": 1.8733401430030644e-05, + "loss": 0.8687, + "step": 917 + }, + { + "epoch": 0.028135343876425155, + "grad_norm": 1.992687725310106, + "learning_rate": 1.87538304392237e-05, + "loss": 0.9101, + "step": 918 + }, + { + "epoch": 0.02816599239916636, + "grad_norm": 2.0855027676240154, + "learning_rate": 1.8774259448416754e-05, + "loss": 0.9606, + "step": 919 + }, + { + "epoch": 0.028196640921907563, + "grad_norm": 2.2014345686814973, + "learning_rate": 1.8794688457609806e-05, + "loss": 0.8968, + "step": 920 + }, + { + "epoch": 0.02822728944464877, + "grad_norm": 2.282684963495983, + "learning_rate": 1.881511746680286e-05, + "loss": 0.8996, + "step": 921 + }, + { + "epoch": 0.02825793796738997, + "grad_norm": 2.0534550210867577, + "learning_rate": 1.8835546475995916e-05, + "loss": 0.9013, + "step": 922 + }, + { + "epoch": 0.028288586490131176, + "grad_norm": 2.157021181634154, + "learning_rate": 1.885597548518897e-05, + "loss": 0.7988, + "step": 923 + }, + { + "epoch": 0.02831923501287238, + "grad_norm": 0.8467015925177491, + "learning_rate": 1.8876404494382024e-05, + "loss": 0.6764, + "step": 924 + }, + { + "epoch": 0.028349883535613584, + "grad_norm": 0.8289167341645636, + "learning_rate": 1.889683350357508e-05, + "loss": 0.668, + "step": 925 + }, + { + "epoch": 0.028380532058354786, + "grad_norm": 2.2407579091525194, + "learning_rate": 1.8917262512768134e-05, + "loss": 0.7919, + "step": 926 + }, + { + "epoch": 0.028411180581095992, + "grad_norm": 2.1804522891191467, + "learning_rate": 1.8937691521961186e-05, + "loss": 0.9527, + "step": 927 + }, + { + "epoch": 0.028441829103837194, + "grad_norm": 2.1764601389953033, + "learning_rate": 1.895812053115424e-05, + "loss": 1.0072, + "step": 928 + }, + { + "epoch": 0.0284724776265784, + "grad_norm": 2.1030274803586186, + "learning_rate": 1.8978549540347296e-05, + "loss": 0.8371, + "step": 929 + }, + { + "epoch": 0.028503126149319602, + "grad_norm": 2.145489363553328, + "learning_rate": 1.8998978549540348e-05, + "loss": 0.887, + "step": 930 + }, + { + "epoch": 0.028533774672060808, + "grad_norm": 2.8539019880784067, + "learning_rate": 1.9019407558733404e-05, + "loss": 0.7498, + "step": 931 + }, + { + "epoch": 0.02856442319480201, + "grad_norm": 2.2944915533739216, + "learning_rate": 1.903983656792646e-05, + "loss": 0.943, + "step": 932 + }, + { + "epoch": 0.028595071717543216, + "grad_norm": 2.0849534135607657, + "learning_rate": 1.906026557711951e-05, + "loss": 0.7855, + "step": 933 + }, + { + "epoch": 0.028625720240284418, + "grad_norm": 2.5094261278186116, + "learning_rate": 1.9080694586312566e-05, + "loss": 0.9568, + "step": 934 + }, + { + "epoch": 0.028656368763025623, + "grad_norm": 1.2782845836102923, + "learning_rate": 1.910112359550562e-05, + "loss": 0.6635, + "step": 935 + }, + { + "epoch": 0.028687017285766826, + "grad_norm": 2.4355661357579996, + "learning_rate": 1.9121552604698673e-05, + "loss": 0.9039, + "step": 936 + }, + { + "epoch": 0.02871766580850803, + "grad_norm": 2.3909950032313323, + "learning_rate": 1.9141981613891728e-05, + "loss": 0.8776, + "step": 937 + }, + { + "epoch": 0.028748314331249233, + "grad_norm": 2.5018805980547736, + "learning_rate": 1.916241062308478e-05, + "loss": 1.014, + "step": 938 + }, + { + "epoch": 0.02877896285399044, + "grad_norm": 2.0993327178380294, + "learning_rate": 1.9182839632277835e-05, + "loss": 0.7372, + "step": 939 + }, + { + "epoch": 0.02880961137673164, + "grad_norm": 2.326872466132623, + "learning_rate": 1.920326864147089e-05, + "loss": 0.9141, + "step": 940 + }, + { + "epoch": 0.028840259899472847, + "grad_norm": 2.0305469756009633, + "learning_rate": 1.9223697650663943e-05, + "loss": 0.8818, + "step": 941 + }, + { + "epoch": 0.02887090842221405, + "grad_norm": 2.6513152047698685, + "learning_rate": 1.9244126659856998e-05, + "loss": 0.9364, + "step": 942 + }, + { + "epoch": 0.028901556944955255, + "grad_norm": 1.1730394556445423, + "learning_rate": 1.9264555669050053e-05, + "loss": 0.6706, + "step": 943 + }, + { + "epoch": 0.028932205467696457, + "grad_norm": 2.3142143419787446, + "learning_rate": 1.9284984678243105e-05, + "loss": 0.9398, + "step": 944 + }, + { + "epoch": 0.02896285399043766, + "grad_norm": 2.1790632339225326, + "learning_rate": 1.930541368743616e-05, + "loss": 0.9355, + "step": 945 + }, + { + "epoch": 0.028993502513178865, + "grad_norm": 2.1597517089217644, + "learning_rate": 1.9325842696629215e-05, + "loss": 0.8992, + "step": 946 + }, + { + "epoch": 0.029024151035920067, + "grad_norm": 2.219568116334477, + "learning_rate": 1.9346271705822267e-05, + "loss": 0.9503, + "step": 947 + }, + { + "epoch": 0.029054799558661273, + "grad_norm": 2.001276048907056, + "learning_rate": 1.9366700715015323e-05, + "loss": 0.8511, + "step": 948 + }, + { + "epoch": 0.029085448081402475, + "grad_norm": 2.1425870324909173, + "learning_rate": 1.9387129724208378e-05, + "loss": 0.8854, + "step": 949 + }, + { + "epoch": 0.02911609660414368, + "grad_norm": 2.037752960070619, + "learning_rate": 1.9407558733401433e-05, + "loss": 0.9008, + "step": 950 + }, + { + "epoch": 0.029146745126884883, + "grad_norm": 1.870567506410992, + "learning_rate": 1.9427987742594485e-05, + "loss": 0.8215, + "step": 951 + }, + { + "epoch": 0.02917739364962609, + "grad_norm": 2.2472164418719003, + "learning_rate": 1.944841675178754e-05, + "loss": 0.8785, + "step": 952 + }, + { + "epoch": 0.02920804217236729, + "grad_norm": 2.397830921362208, + "learning_rate": 1.9468845760980595e-05, + "loss": 0.8938, + "step": 953 + }, + { + "epoch": 0.029238690695108496, + "grad_norm": 2.1977823302643866, + "learning_rate": 1.9489274770173647e-05, + "loss": 0.9993, + "step": 954 + }, + { + "epoch": 0.0292693392178497, + "grad_norm": 2.303548437549388, + "learning_rate": 1.9509703779366703e-05, + "loss": 0.9728, + "step": 955 + }, + { + "epoch": 0.029299987740590904, + "grad_norm": 1.9787790177568383, + "learning_rate": 1.9530132788559758e-05, + "loss": 0.9397, + "step": 956 + }, + { + "epoch": 0.029330636263332106, + "grad_norm": 2.3156163480839003, + "learning_rate": 1.955056179775281e-05, + "loss": 0.8873, + "step": 957 + }, + { + "epoch": 0.029361284786073312, + "grad_norm": 2.0961936343028085, + "learning_rate": 1.9570990806945865e-05, + "loss": 0.9766, + "step": 958 + }, + { + "epoch": 0.029391933308814514, + "grad_norm": 2.207620210353232, + "learning_rate": 1.959141981613892e-05, + "loss": 0.89, + "step": 959 + }, + { + "epoch": 0.02942258183155572, + "grad_norm": 2.165712542582907, + "learning_rate": 1.9611848825331975e-05, + "loss": 0.8789, + "step": 960 + }, + { + "epoch": 0.029453230354296922, + "grad_norm": 2.2688415214763586, + "learning_rate": 1.9632277834525027e-05, + "loss": 0.8931, + "step": 961 + }, + { + "epoch": 0.029483878877038128, + "grad_norm": 1.1020568866561649, + "learning_rate": 1.9652706843718083e-05, + "loss": 0.6797, + "step": 962 + }, + { + "epoch": 0.02951452739977933, + "grad_norm": 2.230987169752958, + "learning_rate": 1.9673135852911134e-05, + "loss": 0.919, + "step": 963 + }, + { + "epoch": 0.029545175922520536, + "grad_norm": 2.256064913408597, + "learning_rate": 1.969356486210419e-05, + "loss": 0.8418, + "step": 964 + }, + { + "epoch": 0.029575824445261738, + "grad_norm": 2.0634882632387725, + "learning_rate": 1.9713993871297245e-05, + "loss": 0.9051, + "step": 965 + }, + { + "epoch": 0.029606472968002943, + "grad_norm": 2.057761116195948, + "learning_rate": 1.9734422880490297e-05, + "loss": 0.8985, + "step": 966 + }, + { + "epoch": 0.029637121490744146, + "grad_norm": 2.29173234292947, + "learning_rate": 1.9754851889683352e-05, + "loss": 0.9315, + "step": 967 + }, + { + "epoch": 0.02966777001348535, + "grad_norm": 2.0851790309090394, + "learning_rate": 1.9775280898876404e-05, + "loss": 0.7643, + "step": 968 + }, + { + "epoch": 0.029698418536226553, + "grad_norm": 2.166005705459702, + "learning_rate": 1.979570990806946e-05, + "loss": 0.9695, + "step": 969 + }, + { + "epoch": 0.02972906705896776, + "grad_norm": 2.1513222634211258, + "learning_rate": 1.9816138917262514e-05, + "loss": 0.8109, + "step": 970 + }, + { + "epoch": 0.02975971558170896, + "grad_norm": 2.799441036021039, + "learning_rate": 1.9836567926455566e-05, + "loss": 0.9755, + "step": 971 + }, + { + "epoch": 0.029790364104450167, + "grad_norm": 2.1116276344609934, + "learning_rate": 1.985699693564862e-05, + "loss": 0.9659, + "step": 972 + }, + { + "epoch": 0.02982101262719137, + "grad_norm": 2.0041385140226766, + "learning_rate": 1.9877425944841677e-05, + "loss": 0.7754, + "step": 973 + }, + { + "epoch": 0.029851661149932575, + "grad_norm": 1.0444606288715939, + "learning_rate": 1.989785495403473e-05, + "loss": 0.6706, + "step": 974 + }, + { + "epoch": 0.029882309672673777, + "grad_norm": 2.217495561051509, + "learning_rate": 1.9918283963227784e-05, + "loss": 0.8937, + "step": 975 + }, + { + "epoch": 0.029912958195414983, + "grad_norm": 2.2754378363535483, + "learning_rate": 1.993871297242084e-05, + "loss": 0.9318, + "step": 976 + }, + { + "epoch": 0.029943606718156185, + "grad_norm": 1.9599418847983785, + "learning_rate": 1.9959141981613895e-05, + "loss": 0.8804, + "step": 977 + }, + { + "epoch": 0.029974255240897387, + "grad_norm": 2.105868487553985, + "learning_rate": 1.9979570990806946e-05, + "loss": 0.9473, + "step": 978 + }, + { + "epoch": 0.030004903763638593, + "grad_norm": 2.309131924892109, + "learning_rate": 2e-05, + "loss": 0.9381, + "step": 979 + }, + { + "epoch": 0.030035552286379795, + "grad_norm": 2.2385854571500423, + "learning_rate": 1.9999999950733723e-05, + "loss": 0.9994, + "step": 980 + }, + { + "epoch": 0.030066200809121, + "grad_norm": 2.348136842948986, + "learning_rate": 1.9999999802934886e-05, + "loss": 0.8436, + "step": 981 + }, + { + "epoch": 0.030096849331862203, + "grad_norm": 2.1691814440854214, + "learning_rate": 1.9999999556603492e-05, + "loss": 0.9627, + "step": 982 + }, + { + "epoch": 0.03012749785460341, + "grad_norm": 2.2244898542736835, + "learning_rate": 1.9999999211739543e-05, + "loss": 1.0351, + "step": 983 + }, + { + "epoch": 0.03015814637734461, + "grad_norm": 2.685389694864552, + "learning_rate": 1.9999998768343044e-05, + "loss": 0.8986, + "step": 984 + }, + { + "epoch": 0.030188794900085816, + "grad_norm": 0.8809481303666737, + "learning_rate": 1.9999998226414e-05, + "loss": 0.6779, + "step": 985 + }, + { + "epoch": 0.03021944342282702, + "grad_norm": 2.283147145800024, + "learning_rate": 1.9999997585952412e-05, + "loss": 0.926, + "step": 986 + }, + { + "epoch": 0.030250091945568224, + "grad_norm": 2.36907327298119, + "learning_rate": 1.999999684695829e-05, + "loss": 0.8614, + "step": 987 + }, + { + "epoch": 0.030280740468309426, + "grad_norm": 2.338023565516452, + "learning_rate": 1.999999600943164e-05, + "loss": 0.8877, + "step": 988 + }, + { + "epoch": 0.030311388991050632, + "grad_norm": 0.9425525339479315, + "learning_rate": 1.9999995073372472e-05, + "loss": 0.6678, + "step": 989 + }, + { + "epoch": 0.030342037513791834, + "grad_norm": 2.4265533083697064, + "learning_rate": 1.9999994038780796e-05, + "loss": 0.8946, + "step": 990 + }, + { + "epoch": 0.03037268603653304, + "grad_norm": 2.308952865827204, + "learning_rate": 1.9999992905656612e-05, + "loss": 0.9601, + "step": 991 + }, + { + "epoch": 0.030403334559274242, + "grad_norm": 0.8300474406787752, + "learning_rate": 1.999999167399995e-05, + "loss": 0.6306, + "step": 992 + }, + { + "epoch": 0.030433983082015448, + "grad_norm": 2.1009600202413594, + "learning_rate": 1.9999990343810803e-05, + "loss": 0.7689, + "step": 993 + }, + { + "epoch": 0.03046463160475665, + "grad_norm": 2.232735846257404, + "learning_rate": 1.9999988915089197e-05, + "loss": 1.0329, + "step": 994 + }, + { + "epoch": 0.030495280127497856, + "grad_norm": 0.9571906713530214, + "learning_rate": 1.999998738783514e-05, + "loss": 0.6807, + "step": 995 + }, + { + "epoch": 0.030525928650239058, + "grad_norm": 2.1266732848243546, + "learning_rate": 1.999998576204865e-05, + "loss": 0.8597, + "step": 996 + }, + { + "epoch": 0.030556577172980263, + "grad_norm": 2.030247914305526, + "learning_rate": 1.9999984037729742e-05, + "loss": 0.9802, + "step": 997 + }, + { + "epoch": 0.030587225695721466, + "grad_norm": 2.040797260531638, + "learning_rate": 1.999998221487843e-05, + "loss": 0.8788, + "step": 998 + }, + { + "epoch": 0.03061787421846267, + "grad_norm": 2.310324026775167, + "learning_rate": 1.9999980293494738e-05, + "loss": 0.888, + "step": 999 + }, + { + "epoch": 0.030648522741203874, + "grad_norm": 0.83865832206257, + "learning_rate": 1.9999978273578677e-05, + "loss": 0.66, + "step": 1000 + }, + { + "epoch": 0.03067917126394508, + "grad_norm": 2.0181536334331662, + "learning_rate": 1.9999976155130275e-05, + "loss": 0.9247, + "step": 1001 + }, + { + "epoch": 0.03070981978668628, + "grad_norm": 1.9911347832430981, + "learning_rate": 1.999997393814955e-05, + "loss": 0.8463, + "step": 1002 + }, + { + "epoch": 0.030740468309427487, + "grad_norm": 1.983393942169425, + "learning_rate": 1.999997162263652e-05, + "loss": 0.8307, + "step": 1003 + }, + { + "epoch": 0.03077111683216869, + "grad_norm": 2.022469700055875, + "learning_rate": 1.999996920859121e-05, + "loss": 0.9308, + "step": 1004 + }, + { + "epoch": 0.030801765354909895, + "grad_norm": 2.2273735016423344, + "learning_rate": 1.999996669601365e-05, + "loss": 0.8674, + "step": 1005 + }, + { + "epoch": 0.030832413877651097, + "grad_norm": 2.0441143988529813, + "learning_rate": 1.9999964084903855e-05, + "loss": 0.9548, + "step": 1006 + }, + { + "epoch": 0.030863062400392303, + "grad_norm": 2.283082751795126, + "learning_rate": 1.9999961375261862e-05, + "loss": 0.8317, + "step": 1007 + }, + { + "epoch": 0.030893710923133505, + "grad_norm": 1.9568621713180556, + "learning_rate": 1.9999958567087684e-05, + "loss": 0.9441, + "step": 1008 + }, + { + "epoch": 0.030924359445874707, + "grad_norm": 1.994559496441306, + "learning_rate": 1.999995566038136e-05, + "loss": 0.9533, + "step": 1009 + }, + { + "epoch": 0.030955007968615913, + "grad_norm": 1.9229966233964573, + "learning_rate": 1.999995265514291e-05, + "loss": 0.7991, + "step": 1010 + }, + { + "epoch": 0.030985656491357115, + "grad_norm": 1.9070981658936206, + "learning_rate": 1.9999949551372372e-05, + "loss": 0.7895, + "step": 1011 + }, + { + "epoch": 0.03101630501409832, + "grad_norm": 2.008922955641477, + "learning_rate": 1.999994634906977e-05, + "loss": 0.8599, + "step": 1012 + }, + { + "epoch": 0.031046953536839523, + "grad_norm": 2.3022968176818512, + "learning_rate": 1.9999943048235137e-05, + "loss": 0.9308, + "step": 1013 + }, + { + "epoch": 0.03107760205958073, + "grad_norm": 2.2098193092688914, + "learning_rate": 1.999993964886851e-05, + "loss": 0.8998, + "step": 1014 + }, + { + "epoch": 0.03110825058232193, + "grad_norm": 2.262873230078516, + "learning_rate": 1.9999936150969918e-05, + "loss": 0.9002, + "step": 1015 + }, + { + "epoch": 0.031138899105063136, + "grad_norm": 1.9895220297616418, + "learning_rate": 1.9999932554539395e-05, + "loss": 0.8217, + "step": 1016 + }, + { + "epoch": 0.03116954762780434, + "grad_norm": 1.9735743049326322, + "learning_rate": 1.9999928859576975e-05, + "loss": 0.9239, + "step": 1017 + }, + { + "epoch": 0.031200196150545544, + "grad_norm": 2.216689574449724, + "learning_rate": 1.9999925066082705e-05, + "loss": 1.0021, + "step": 1018 + }, + { + "epoch": 0.031230844673286746, + "grad_norm": 2.1010261757374393, + "learning_rate": 1.9999921174056606e-05, + "loss": 0.927, + "step": 1019 + }, + { + "epoch": 0.03126149319602795, + "grad_norm": 2.093448657784933, + "learning_rate": 1.9999917183498732e-05, + "loss": 0.9058, + "step": 1020 + }, + { + "epoch": 0.03129214171876916, + "grad_norm": 2.186485798457432, + "learning_rate": 1.9999913094409114e-05, + "loss": 0.97, + "step": 1021 + }, + { + "epoch": 0.031322790241510357, + "grad_norm": 1.860882387855947, + "learning_rate": 1.999990890678779e-05, + "loss": 0.8275, + "step": 1022 + }, + { + "epoch": 0.03135343876425156, + "grad_norm": 2.164016261227075, + "learning_rate": 1.999990462063481e-05, + "loss": 0.969, + "step": 1023 + }, + { + "epoch": 0.03138408728699277, + "grad_norm": 2.4320477831155665, + "learning_rate": 1.9999900235950207e-05, + "loss": 0.8898, + "step": 1024 + }, + { + "epoch": 0.031414735809733974, + "grad_norm": 2.281783799820304, + "learning_rate": 1.999989575273403e-05, + "loss": 0.9112, + "step": 1025 + }, + { + "epoch": 0.03144538433247517, + "grad_norm": 2.077832544941178, + "learning_rate": 1.9999891170986326e-05, + "loss": 0.8339, + "step": 1026 + }, + { + "epoch": 0.03147603285521638, + "grad_norm": 2.0871770972646133, + "learning_rate": 1.999988649070713e-05, + "loss": 0.8872, + "step": 1027 + }, + { + "epoch": 0.031506681377957584, + "grad_norm": 2.4306139755095413, + "learning_rate": 1.9999881711896494e-05, + "loss": 0.9626, + "step": 1028 + }, + { + "epoch": 0.03153732990069879, + "grad_norm": 2.0239983479640213, + "learning_rate": 1.9999876834554467e-05, + "loss": 0.9451, + "step": 1029 + }, + { + "epoch": 0.03156797842343999, + "grad_norm": 2.1716189243031194, + "learning_rate": 1.9999871858681096e-05, + "loss": 0.9748, + "step": 1030 + }, + { + "epoch": 0.031598626946181194, + "grad_norm": 2.2407418410376234, + "learning_rate": 1.9999866784276424e-05, + "loss": 0.9267, + "step": 1031 + }, + { + "epoch": 0.0316292754689224, + "grad_norm": 2.3488461479098923, + "learning_rate": 1.9999861611340512e-05, + "loss": 0.9686, + "step": 1032 + }, + { + "epoch": 0.031659923991663605, + "grad_norm": 2.576264826436668, + "learning_rate": 1.99998563398734e-05, + "loss": 0.9659, + "step": 1033 + }, + { + "epoch": 0.031690572514404804, + "grad_norm": 2.098045784144076, + "learning_rate": 1.999985096987515e-05, + "loss": 0.9501, + "step": 1034 + }, + { + "epoch": 0.03172122103714601, + "grad_norm": 2.2951793528160263, + "learning_rate": 1.9999845501345806e-05, + "loss": 0.9001, + "step": 1035 + }, + { + "epoch": 0.031751869559887215, + "grad_norm": 2.2574909769105433, + "learning_rate": 1.9999839934285426e-05, + "loss": 0.9041, + "step": 1036 + }, + { + "epoch": 0.03178251808262842, + "grad_norm": 0.9559545908592992, + "learning_rate": 1.9999834268694064e-05, + "loss": 0.6641, + "step": 1037 + }, + { + "epoch": 0.03181316660536962, + "grad_norm": 0.8771196871853472, + "learning_rate": 1.9999828504571778e-05, + "loss": 0.6404, + "step": 1038 + }, + { + "epoch": 0.031843815128110825, + "grad_norm": 2.0977974578013447, + "learning_rate": 1.9999822641918625e-05, + "loss": 0.9183, + "step": 1039 + }, + { + "epoch": 0.03187446365085203, + "grad_norm": 2.1914707803390643, + "learning_rate": 1.9999816680734662e-05, + "loss": 1.0058, + "step": 1040 + }, + { + "epoch": 0.03190511217359323, + "grad_norm": 2.3423783787331676, + "learning_rate": 1.999981062101994e-05, + "loss": 0.8839, + "step": 1041 + }, + { + "epoch": 0.031935760696334435, + "grad_norm": 2.1361969770367795, + "learning_rate": 1.999980446277453e-05, + "loss": 0.8888, + "step": 1042 + }, + { + "epoch": 0.03196640921907564, + "grad_norm": 1.9040943491596385, + "learning_rate": 1.9999798205998486e-05, + "loss": 0.7862, + "step": 1043 + }, + { + "epoch": 0.031997057741816846, + "grad_norm": 1.1434469617427776, + "learning_rate": 1.9999791850691875e-05, + "loss": 0.6782, + "step": 1044 + }, + { + "epoch": 0.032027706264558045, + "grad_norm": 2.2733414003754504, + "learning_rate": 1.9999785396854753e-05, + "loss": 0.9288, + "step": 1045 + }, + { + "epoch": 0.03205835478729925, + "grad_norm": 2.085592906798706, + "learning_rate": 1.9999778844487187e-05, + "loss": 0.9627, + "step": 1046 + }, + { + "epoch": 0.032089003310040456, + "grad_norm": 2.0343245930018066, + "learning_rate": 1.9999772193589246e-05, + "loss": 0.9552, + "step": 1047 + }, + { + "epoch": 0.03211965183278166, + "grad_norm": 0.8696422801119605, + "learning_rate": 1.9999765444160983e-05, + "loss": 0.6563, + "step": 1048 + }, + { + "epoch": 0.03215030035552286, + "grad_norm": 2.3558843323079883, + "learning_rate": 1.999975859620248e-05, + "loss": 0.8783, + "step": 1049 + }, + { + "epoch": 0.032180948878264067, + "grad_norm": 1.8622706459720968, + "learning_rate": 1.9999751649713794e-05, + "loss": 0.8896, + "step": 1050 + }, + { + "epoch": 0.03221159740100527, + "grad_norm": 2.214030425570332, + "learning_rate": 1.9999744604694996e-05, + "loss": 0.9737, + "step": 1051 + }, + { + "epoch": 0.03224224592374648, + "grad_norm": 2.244730958586319, + "learning_rate": 1.9999737461146155e-05, + "loss": 0.8947, + "step": 1052 + }, + { + "epoch": 0.03227289444648768, + "grad_norm": 2.09433188388089, + "learning_rate": 1.9999730219067345e-05, + "loss": 0.9413, + "step": 1053 + }, + { + "epoch": 0.03230354296922888, + "grad_norm": 0.8026657736403225, + "learning_rate": 1.9999722878458633e-05, + "loss": 0.6544, + "step": 1054 + }, + { + "epoch": 0.03233419149197009, + "grad_norm": 0.866426570079266, + "learning_rate": 1.9999715439320095e-05, + "loss": 0.6587, + "step": 1055 + }, + { + "epoch": 0.032364840014711294, + "grad_norm": 2.0085909603836014, + "learning_rate": 1.9999707901651797e-05, + "loss": 0.8268, + "step": 1056 + }, + { + "epoch": 0.03239548853745249, + "grad_norm": 2.146063320362971, + "learning_rate": 1.9999700265453825e-05, + "loss": 0.9382, + "step": 1057 + }, + { + "epoch": 0.0324261370601937, + "grad_norm": 1.9223092018384706, + "learning_rate": 1.9999692530726243e-05, + "loss": 0.8201, + "step": 1058 + }, + { + "epoch": 0.032456785582934904, + "grad_norm": 2.1972748572001457, + "learning_rate": 1.9999684697469132e-05, + "loss": 0.9012, + "step": 1059 + }, + { + "epoch": 0.03248743410567611, + "grad_norm": 2.168824899445523, + "learning_rate": 1.999967676568257e-05, + "loss": 0.8924, + "step": 1060 + }, + { + "epoch": 0.03251808262841731, + "grad_norm": 1.938030844407273, + "learning_rate": 1.999966873536664e-05, + "loss": 0.8132, + "step": 1061 + }, + { + "epoch": 0.032548731151158514, + "grad_norm": 0.9139769206429674, + "learning_rate": 1.9999660606521406e-05, + "loss": 0.6446, + "step": 1062 + }, + { + "epoch": 0.03257937967389972, + "grad_norm": 2.0138107842306057, + "learning_rate": 1.9999652379146963e-05, + "loss": 0.9164, + "step": 1063 + }, + { + "epoch": 0.032610028196640925, + "grad_norm": 2.3099292700081144, + "learning_rate": 1.9999644053243384e-05, + "loss": 0.8832, + "step": 1064 + }, + { + "epoch": 0.032640676719382124, + "grad_norm": 2.066682196779274, + "learning_rate": 1.9999635628810758e-05, + "loss": 0.8177, + "step": 1065 + }, + { + "epoch": 0.03267132524212333, + "grad_norm": 2.1670186193581644, + "learning_rate": 1.999962710584916e-05, + "loss": 1.0116, + "step": 1066 + }, + { + "epoch": 0.032701973764864535, + "grad_norm": 1.9692935973743764, + "learning_rate": 1.9999618484358677e-05, + "loss": 0.8291, + "step": 1067 + }, + { + "epoch": 0.03273262228760574, + "grad_norm": 2.29988691092022, + "learning_rate": 1.99996097643394e-05, + "loss": 0.9786, + "step": 1068 + }, + { + "epoch": 0.03276327081034694, + "grad_norm": 2.242209720700411, + "learning_rate": 1.9999600945791403e-05, + "loss": 0.9625, + "step": 1069 + }, + { + "epoch": 0.032793919333088145, + "grad_norm": 2.1465246715978656, + "learning_rate": 1.999959202871478e-05, + "loss": 0.8668, + "step": 1070 + }, + { + "epoch": 0.03282456785582935, + "grad_norm": 1.8212300964742953, + "learning_rate": 1.999958301310962e-05, + "loss": 0.8099, + "step": 1071 + }, + { + "epoch": 0.03285521637857055, + "grad_norm": 2.1156468561985493, + "learning_rate": 1.9999573898976013e-05, + "loss": 0.8571, + "step": 1072 + }, + { + "epoch": 0.032885864901311755, + "grad_norm": 2.3212936745510717, + "learning_rate": 1.999956468631404e-05, + "loss": 0.9551, + "step": 1073 + }, + { + "epoch": 0.03291651342405296, + "grad_norm": 2.4079908810696304, + "learning_rate": 1.9999555375123802e-05, + "loss": 0.8739, + "step": 1074 + }, + { + "epoch": 0.032947161946794166, + "grad_norm": 2.0207343276537335, + "learning_rate": 1.999954596540539e-05, + "loss": 0.9821, + "step": 1075 + }, + { + "epoch": 0.032977810469535365, + "grad_norm": 2.261061154959169, + "learning_rate": 1.9999536457158883e-05, + "loss": 0.8243, + "step": 1076 + }, + { + "epoch": 0.03300845899227657, + "grad_norm": 1.248030870886308, + "learning_rate": 1.999952685038439e-05, + "loss": 0.6582, + "step": 1077 + }, + { + "epoch": 0.03303910751501778, + "grad_norm": 2.0730293011884413, + "learning_rate": 1.9999517145082002e-05, + "loss": 0.8789, + "step": 1078 + }, + { + "epoch": 0.03306975603775898, + "grad_norm": 2.5422968614707164, + "learning_rate": 1.999950734125181e-05, + "loss": 0.7385, + "step": 1079 + }, + { + "epoch": 0.03310040456050018, + "grad_norm": 2.3349906340312194, + "learning_rate": 1.9999497438893915e-05, + "loss": 0.9983, + "step": 1080 + }, + { + "epoch": 0.03313105308324139, + "grad_norm": 2.284077294533952, + "learning_rate": 1.9999487438008413e-05, + "loss": 0.8905, + "step": 1081 + }, + { + "epoch": 0.03316170160598259, + "grad_norm": 2.0493720819382193, + "learning_rate": 1.9999477338595404e-05, + "loss": 0.8703, + "step": 1082 + }, + { + "epoch": 0.0331923501287238, + "grad_norm": 1.1321521851960106, + "learning_rate": 1.9999467140654985e-05, + "loss": 0.6636, + "step": 1083 + }, + { + "epoch": 0.033222998651465, + "grad_norm": 2.105050180731787, + "learning_rate": 1.999945684418726e-05, + "loss": 0.8264, + "step": 1084 + }, + { + "epoch": 0.0332536471742062, + "grad_norm": 2.367383214990398, + "learning_rate": 1.9999446449192325e-05, + "loss": 0.902, + "step": 1085 + }, + { + "epoch": 0.03328429569694741, + "grad_norm": 2.244189143310521, + "learning_rate": 1.999943595567029e-05, + "loss": 0.8662, + "step": 1086 + }, + { + "epoch": 0.033314944219688614, + "grad_norm": 2.2327772780793897, + "learning_rate": 1.999942536362125e-05, + "loss": 1.0714, + "step": 1087 + }, + { + "epoch": 0.03334559274242981, + "grad_norm": 2.2620472700507013, + "learning_rate": 1.9999414673045314e-05, + "loss": 0.6443, + "step": 1088 + }, + { + "epoch": 0.03337624126517102, + "grad_norm": 2.0792399933708827, + "learning_rate": 1.9999403883942586e-05, + "loss": 0.9938, + "step": 1089 + }, + { + "epoch": 0.033406889787912224, + "grad_norm": 2.435346234437353, + "learning_rate": 1.9999392996313175e-05, + "loss": 0.8152, + "step": 1090 + }, + { + "epoch": 0.03343753831065343, + "grad_norm": 1.6421858514184386, + "learning_rate": 1.9999382010157187e-05, + "loss": 0.8578, + "step": 1091 + }, + { + "epoch": 0.03346818683339463, + "grad_norm": 0.9001789806580522, + "learning_rate": 1.999937092547473e-05, + "loss": 0.6343, + "step": 1092 + }, + { + "epoch": 0.033498835356135834, + "grad_norm": 2.0241709437706104, + "learning_rate": 1.999935974226591e-05, + "loss": 0.8906, + "step": 1093 + }, + { + "epoch": 0.03352948387887704, + "grad_norm": 1.9167406144608377, + "learning_rate": 1.9999348460530842e-05, + "loss": 0.8269, + "step": 1094 + }, + { + "epoch": 0.033560132401618245, + "grad_norm": 2.0045580138460846, + "learning_rate": 1.9999337080269634e-05, + "loss": 0.9432, + "step": 1095 + }, + { + "epoch": 0.033590780924359444, + "grad_norm": 2.0303974656407155, + "learning_rate": 1.99993256014824e-05, + "loss": 1.0015, + "step": 1096 + }, + { + "epoch": 0.03362142944710065, + "grad_norm": 2.1004258984836484, + "learning_rate": 1.9999314024169253e-05, + "loss": 0.8924, + "step": 1097 + }, + { + "epoch": 0.033652077969841855, + "grad_norm": 1.7153653956453931, + "learning_rate": 1.9999302348330304e-05, + "loss": 0.8031, + "step": 1098 + }, + { + "epoch": 0.03368272649258306, + "grad_norm": 1.9609016897747549, + "learning_rate": 1.9999290573965676e-05, + "loss": 0.9001, + "step": 1099 + }, + { + "epoch": 0.03371337501532426, + "grad_norm": 2.0192171857963173, + "learning_rate": 1.9999278701075472e-05, + "loss": 0.9929, + "step": 1100 + }, + { + "epoch": 0.033744023538065465, + "grad_norm": 1.8422355664571535, + "learning_rate": 1.999926672965982e-05, + "loss": 0.848, + "step": 1101 + }, + { + "epoch": 0.03377467206080667, + "grad_norm": 0.9129584804657817, + "learning_rate": 1.9999254659718835e-05, + "loss": 0.6573, + "step": 1102 + }, + { + "epoch": 0.03380532058354787, + "grad_norm": 2.179468089547012, + "learning_rate": 1.9999242491252636e-05, + "loss": 0.9329, + "step": 1103 + }, + { + "epoch": 0.033835969106289075, + "grad_norm": 2.0176926296662354, + "learning_rate": 1.9999230224261343e-05, + "loss": 0.9426, + "step": 1104 + }, + { + "epoch": 0.03386661762903028, + "grad_norm": 2.1615979512380097, + "learning_rate": 1.9999217858745075e-05, + "loss": 0.853, + "step": 1105 + }, + { + "epoch": 0.03389726615177149, + "grad_norm": 2.3869537145629742, + "learning_rate": 1.9999205394703957e-05, + "loss": 0.9335, + "step": 1106 + }, + { + "epoch": 0.033927914674512685, + "grad_norm": 2.215303850044538, + "learning_rate": 1.9999192832138105e-05, + "loss": 0.8245, + "step": 1107 + }, + { + "epoch": 0.03395856319725389, + "grad_norm": 2.171901870543179, + "learning_rate": 1.999918017104765e-05, + "loss": 0.8511, + "step": 1108 + }, + { + "epoch": 0.0339892117199951, + "grad_norm": 1.852815375050474, + "learning_rate": 1.9999167411432715e-05, + "loss": 0.83, + "step": 1109 + }, + { + "epoch": 0.0340198602427363, + "grad_norm": 1.9776811621025876, + "learning_rate": 1.9999154553293425e-05, + "loss": 0.8686, + "step": 1110 + }, + { + "epoch": 0.0340505087654775, + "grad_norm": 1.7942208326829332, + "learning_rate": 1.9999141596629905e-05, + "loss": 0.9639, + "step": 1111 + }, + { + "epoch": 0.03408115728821871, + "grad_norm": 2.1005638519838588, + "learning_rate": 1.9999128541442287e-05, + "loss": 0.8259, + "step": 1112 + }, + { + "epoch": 0.03411180581095991, + "grad_norm": 2.1379244187097006, + "learning_rate": 1.9999115387730698e-05, + "loss": 0.8607, + "step": 1113 + }, + { + "epoch": 0.03414245433370112, + "grad_norm": 1.9261353735311544, + "learning_rate": 1.9999102135495265e-05, + "loss": 0.8587, + "step": 1114 + }, + { + "epoch": 0.03417310285644232, + "grad_norm": 1.8097314984690922, + "learning_rate": 1.9999088784736117e-05, + "loss": 0.8274, + "step": 1115 + }, + { + "epoch": 0.03420375137918352, + "grad_norm": 1.9154138301795531, + "learning_rate": 1.9999075335453394e-05, + "loss": 0.9822, + "step": 1116 + }, + { + "epoch": 0.03423439990192473, + "grad_norm": 2.17263847209496, + "learning_rate": 1.999906178764722e-05, + "loss": 0.9522, + "step": 1117 + }, + { + "epoch": 0.034265048424665934, + "grad_norm": 0.8973658932393132, + "learning_rate": 1.999904814131773e-05, + "loss": 0.6384, + "step": 1118 + }, + { + "epoch": 0.03429569694740713, + "grad_norm": 2.131172093049234, + "learning_rate": 1.9999034396465066e-05, + "loss": 0.9729, + "step": 1119 + }, + { + "epoch": 0.03432634547014834, + "grad_norm": 2.33455836648908, + "learning_rate": 1.9999020553089354e-05, + "loss": 0.8243, + "step": 1120 + }, + { + "epoch": 0.034356993992889544, + "grad_norm": 2.003971049132387, + "learning_rate": 1.9999006611190737e-05, + "loss": 0.8271, + "step": 1121 + }, + { + "epoch": 0.03438764251563075, + "grad_norm": 2.0641871095230746, + "learning_rate": 1.9998992570769348e-05, + "loss": 0.9329, + "step": 1122 + }, + { + "epoch": 0.03441829103837195, + "grad_norm": 2.085978964823232, + "learning_rate": 1.9998978431825327e-05, + "loss": 0.871, + "step": 1123 + }, + { + "epoch": 0.034448939561113154, + "grad_norm": 2.3065660280722393, + "learning_rate": 1.9998964194358812e-05, + "loss": 0.9251, + "step": 1124 + }, + { + "epoch": 0.03447958808385436, + "grad_norm": 2.107546835989289, + "learning_rate": 1.9998949858369944e-05, + "loss": 0.8667, + "step": 1125 + }, + { + "epoch": 0.034510236606595565, + "grad_norm": 2.027643819524005, + "learning_rate": 1.9998935423858866e-05, + "loss": 0.7775, + "step": 1126 + }, + { + "epoch": 0.034540885129336764, + "grad_norm": 2.058405351631798, + "learning_rate": 1.999892089082572e-05, + "loss": 0.9443, + "step": 1127 + }, + { + "epoch": 0.03457153365207797, + "grad_norm": 1.93990827789993, + "learning_rate": 1.9998906259270648e-05, + "loss": 0.8212, + "step": 1128 + }, + { + "epoch": 0.034602182174819175, + "grad_norm": 2.218186582725486, + "learning_rate": 1.9998891529193793e-05, + "loss": 0.816, + "step": 1129 + }, + { + "epoch": 0.03463283069756038, + "grad_norm": 1.8606737888579274, + "learning_rate": 1.99988767005953e-05, + "loss": 0.7957, + "step": 1130 + }, + { + "epoch": 0.03466347922030158, + "grad_norm": 2.1227494824747843, + "learning_rate": 1.999886177347532e-05, + "loss": 0.9165, + "step": 1131 + }, + { + "epoch": 0.034694127743042785, + "grad_norm": 0.8931468260366818, + "learning_rate": 1.9998846747833993e-05, + "loss": 0.6588, + "step": 1132 + }, + { + "epoch": 0.03472477626578399, + "grad_norm": 2.2419874442021754, + "learning_rate": 1.9998831623671474e-05, + "loss": 1.0055, + "step": 1133 + }, + { + "epoch": 0.03475542478852519, + "grad_norm": 2.165513610652603, + "learning_rate": 1.9998816400987907e-05, + "loss": 0.9225, + "step": 1134 + }, + { + "epoch": 0.034786073311266395, + "grad_norm": 1.9075725427843326, + "learning_rate": 1.9998801079783445e-05, + "loss": 0.915, + "step": 1135 + }, + { + "epoch": 0.0348167218340076, + "grad_norm": 1.8961802461274089, + "learning_rate": 1.9998785660058235e-05, + "loss": 0.8926, + "step": 1136 + }, + { + "epoch": 0.03484737035674881, + "grad_norm": 2.0579508453665887, + "learning_rate": 1.9998770141812435e-05, + "loss": 0.8886, + "step": 1137 + }, + { + "epoch": 0.034878018879490005, + "grad_norm": 0.7970176062584249, + "learning_rate": 1.999875452504619e-05, + "loss": 0.656, + "step": 1138 + }, + { + "epoch": 0.03490866740223121, + "grad_norm": 2.0812925517178624, + "learning_rate": 1.999873880975966e-05, + "loss": 0.9454, + "step": 1139 + }, + { + "epoch": 0.03493931592497242, + "grad_norm": 1.9023262043017382, + "learning_rate": 1.9998722995953e-05, + "loss": 0.8964, + "step": 1140 + }, + { + "epoch": 0.03496996444771362, + "grad_norm": 2.045274023336064, + "learning_rate": 1.9998707083626365e-05, + "loss": 0.9498, + "step": 1141 + }, + { + "epoch": 0.03500061297045482, + "grad_norm": 1.9368650926171986, + "learning_rate": 1.999869107277991e-05, + "loss": 0.8792, + "step": 1142 + }, + { + "epoch": 0.03503126149319603, + "grad_norm": 2.0397179393103495, + "learning_rate": 1.9998674963413795e-05, + "loss": 0.8517, + "step": 1143 + }, + { + "epoch": 0.03506191001593723, + "grad_norm": 2.2810260726006937, + "learning_rate": 1.999865875552817e-05, + "loss": 0.8486, + "step": 1144 + }, + { + "epoch": 0.03509255853867844, + "grad_norm": 2.3992457566819736, + "learning_rate": 1.9998642449123208e-05, + "loss": 0.9283, + "step": 1145 + }, + { + "epoch": 0.03512320706141964, + "grad_norm": 1.963080507206331, + "learning_rate": 1.9998626044199067e-05, + "loss": 0.9508, + "step": 1146 + }, + { + "epoch": 0.03515385558416084, + "grad_norm": 2.1145857583287557, + "learning_rate": 1.9998609540755896e-05, + "loss": 0.8199, + "step": 1147 + }, + { + "epoch": 0.03518450410690205, + "grad_norm": 2.060273327666909, + "learning_rate": 1.9998592938793876e-05, + "loss": 0.9232, + "step": 1148 + }, + { + "epoch": 0.035215152629643254, + "grad_norm": 1.8621078559479052, + "learning_rate": 1.9998576238313156e-05, + "loss": 0.7563, + "step": 1149 + }, + { + "epoch": 0.03524580115238445, + "grad_norm": 2.047709676685081, + "learning_rate": 1.9998559439313906e-05, + "loss": 0.968, + "step": 1150 + }, + { + "epoch": 0.03527644967512566, + "grad_norm": 0.9377777465940808, + "learning_rate": 1.9998542541796297e-05, + "loss": 0.6571, + "step": 1151 + }, + { + "epoch": 0.035307098197866864, + "grad_norm": 1.9024680714073057, + "learning_rate": 1.9998525545760482e-05, + "loss": 0.8378, + "step": 1152 + }, + { + "epoch": 0.03533774672060807, + "grad_norm": 2.0030481682113646, + "learning_rate": 1.999850845120664e-05, + "loss": 0.8205, + "step": 1153 + }, + { + "epoch": 0.03536839524334927, + "grad_norm": 1.9469298895766713, + "learning_rate": 1.9998491258134938e-05, + "loss": 0.8777, + "step": 1154 + }, + { + "epoch": 0.035399043766090474, + "grad_norm": 2.237503229079267, + "learning_rate": 1.9998473966545543e-05, + "loss": 0.8457, + "step": 1155 + }, + { + "epoch": 0.03542969228883168, + "grad_norm": 2.1815066709955815, + "learning_rate": 1.9998456576438628e-05, + "loss": 0.8864, + "step": 1156 + }, + { + "epoch": 0.035460340811572885, + "grad_norm": 0.9048417515324352, + "learning_rate": 1.999843908781436e-05, + "loss": 0.6582, + "step": 1157 + }, + { + "epoch": 0.035490989334314084, + "grad_norm": 2.066620173828957, + "learning_rate": 1.999842150067291e-05, + "loss": 0.9675, + "step": 1158 + }, + { + "epoch": 0.03552163785705529, + "grad_norm": 1.8831298017252416, + "learning_rate": 1.9998403815014454e-05, + "loss": 0.8918, + "step": 1159 + }, + { + "epoch": 0.035552286379796495, + "grad_norm": 2.0522925248788466, + "learning_rate": 1.9998386030839172e-05, + "loss": 0.8185, + "step": 1160 + }, + { + "epoch": 0.0355829349025377, + "grad_norm": 1.8841798606847007, + "learning_rate": 1.9998368148147235e-05, + "loss": 0.866, + "step": 1161 + }, + { + "epoch": 0.0356135834252789, + "grad_norm": 2.1660199618984617, + "learning_rate": 1.9998350166938815e-05, + "loss": 0.8618, + "step": 1162 + }, + { + "epoch": 0.035644231948020105, + "grad_norm": 2.2508433185645624, + "learning_rate": 1.9998332087214096e-05, + "loss": 0.8676, + "step": 1163 + }, + { + "epoch": 0.03567488047076131, + "grad_norm": 1.947033544844867, + "learning_rate": 1.9998313908973248e-05, + "loss": 0.835, + "step": 1164 + }, + { + "epoch": 0.03570552899350252, + "grad_norm": 1.9125277093912953, + "learning_rate": 1.9998295632216458e-05, + "loss": 0.8561, + "step": 1165 + }, + { + "epoch": 0.035736177516243715, + "grad_norm": 2.0182731689421898, + "learning_rate": 1.9998277256943902e-05, + "loss": 0.8731, + "step": 1166 + }, + { + "epoch": 0.03576682603898492, + "grad_norm": 1.077784606154568, + "learning_rate": 1.9998258783155763e-05, + "loss": 0.6733, + "step": 1167 + }, + { + "epoch": 0.03579747456172613, + "grad_norm": 2.2009992017917783, + "learning_rate": 1.999824021085222e-05, + "loss": 0.9278, + "step": 1168 + }, + { + "epoch": 0.035828123084467325, + "grad_norm": 1.896785758996291, + "learning_rate": 1.999822154003346e-05, + "loss": 0.9777, + "step": 1169 + }, + { + "epoch": 0.03585877160720853, + "grad_norm": 2.122560266480338, + "learning_rate": 1.9998202770699663e-05, + "loss": 0.8704, + "step": 1170 + }, + { + "epoch": 0.03588942012994974, + "grad_norm": 2.0967126071940467, + "learning_rate": 1.999818390285102e-05, + "loss": 0.84, + "step": 1171 + }, + { + "epoch": 0.03592006865269094, + "grad_norm": 1.9420496687440063, + "learning_rate": 1.999816493648771e-05, + "loss": 0.9028, + "step": 1172 + }, + { + "epoch": 0.03595071717543214, + "grad_norm": 1.8992589858088063, + "learning_rate": 1.999814587160992e-05, + "loss": 0.8344, + "step": 1173 + }, + { + "epoch": 0.03598136569817335, + "grad_norm": 2.1737961396288474, + "learning_rate": 1.9998126708217846e-05, + "loss": 0.8862, + "step": 1174 + }, + { + "epoch": 0.03601201422091455, + "grad_norm": 2.1792024359253683, + "learning_rate": 1.999810744631167e-05, + "loss": 0.9331, + "step": 1175 + }, + { + "epoch": 0.03604266274365576, + "grad_norm": 2.0374922103220263, + "learning_rate": 1.999808808589158e-05, + "loss": 0.7518, + "step": 1176 + }, + { + "epoch": 0.03607331126639696, + "grad_norm": 2.3363558348698357, + "learning_rate": 1.9998068626957775e-05, + "loss": 0.8278, + "step": 1177 + }, + { + "epoch": 0.03610395978913816, + "grad_norm": 2.313326619146034, + "learning_rate": 1.999804906951044e-05, + "loss": 0.8668, + "step": 1178 + }, + { + "epoch": 0.03613460831187937, + "grad_norm": 1.9469251447168887, + "learning_rate": 1.9998029413549766e-05, + "loss": 0.8465, + "step": 1179 + }, + { + "epoch": 0.036165256834620574, + "grad_norm": 2.08438587091803, + "learning_rate": 1.9998009659075952e-05, + "loss": 0.9069, + "step": 1180 + }, + { + "epoch": 0.03619590535736177, + "grad_norm": 2.4001098184088616, + "learning_rate": 1.999798980608919e-05, + "loss": 0.9262, + "step": 1181 + }, + { + "epoch": 0.03622655388010298, + "grad_norm": 1.9217210919950096, + "learning_rate": 1.999796985458968e-05, + "loss": 0.8307, + "step": 1182 + }, + { + "epoch": 0.036257202402844184, + "grad_norm": 2.105584169042356, + "learning_rate": 1.999794980457761e-05, + "loss": 0.7896, + "step": 1183 + }, + { + "epoch": 0.03628785092558539, + "grad_norm": 2.134891730646698, + "learning_rate": 1.9997929656053187e-05, + "loss": 0.8809, + "step": 1184 + }, + { + "epoch": 0.03631849944832659, + "grad_norm": 2.14734445693123, + "learning_rate": 1.9997909409016603e-05, + "loss": 0.9111, + "step": 1185 + }, + { + "epoch": 0.036349147971067794, + "grad_norm": 1.9669609883109542, + "learning_rate": 1.999788906346806e-05, + "loss": 0.8679, + "step": 1186 + }, + { + "epoch": 0.036379796493809, + "grad_norm": 1.7736009864353157, + "learning_rate": 1.9997868619407757e-05, + "loss": 0.8455, + "step": 1187 + }, + { + "epoch": 0.036410445016550205, + "grad_norm": 1.358193572060686, + "learning_rate": 1.9997848076835895e-05, + "loss": 0.6571, + "step": 1188 + }, + { + "epoch": 0.036441093539291404, + "grad_norm": 2.10864519973319, + "learning_rate": 1.999782743575268e-05, + "loss": 0.8712, + "step": 1189 + }, + { + "epoch": 0.03647174206203261, + "grad_norm": 2.1036643743714643, + "learning_rate": 1.9997806696158314e-05, + "loss": 0.9133, + "step": 1190 + }, + { + "epoch": 0.036502390584773815, + "grad_norm": 2.329492557711916, + "learning_rate": 1.9997785858052998e-05, + "loss": 0.8759, + "step": 1191 + }, + { + "epoch": 0.03653303910751502, + "grad_norm": 2.024824137784732, + "learning_rate": 1.9997764921436943e-05, + "loss": 0.9628, + "step": 1192 + }, + { + "epoch": 0.03656368763025622, + "grad_norm": 1.85748158229523, + "learning_rate": 1.999774388631035e-05, + "loss": 0.9303, + "step": 1193 + }, + { + "epoch": 0.036594336152997425, + "grad_norm": 2.264120541861978, + "learning_rate": 1.999772275267343e-05, + "loss": 0.8957, + "step": 1194 + }, + { + "epoch": 0.03662498467573863, + "grad_norm": 2.299010933599472, + "learning_rate": 1.9997701520526387e-05, + "loss": 0.9346, + "step": 1195 + }, + { + "epoch": 0.03665563319847984, + "grad_norm": 2.1974229380424783, + "learning_rate": 1.9997680189869434e-05, + "loss": 0.937, + "step": 1196 + }, + { + "epoch": 0.036686281721221035, + "grad_norm": 1.2206095881215389, + "learning_rate": 1.9997658760702782e-05, + "loss": 0.6602, + "step": 1197 + }, + { + "epoch": 0.03671693024396224, + "grad_norm": 2.034197879408072, + "learning_rate": 1.999763723302664e-05, + "loss": 0.8704, + "step": 1198 + }, + { + "epoch": 0.03674757876670345, + "grad_norm": 2.2199969616054536, + "learning_rate": 1.9997615606841218e-05, + "loss": 0.8936, + "step": 1199 + }, + { + "epoch": 0.036778227289444645, + "grad_norm": 2.1746020467632095, + "learning_rate": 1.999759388214673e-05, + "loss": 0.9217, + "step": 1200 + }, + { + "epoch": 0.03680887581218585, + "grad_norm": 2.1157708289060073, + "learning_rate": 1.9997572058943396e-05, + "loss": 0.9323, + "step": 1201 + }, + { + "epoch": 0.03683952433492706, + "grad_norm": 2.0772348067580158, + "learning_rate": 1.9997550137231426e-05, + "loss": 0.8744, + "step": 1202 + }, + { + "epoch": 0.03687017285766826, + "grad_norm": 2.19942341164228, + "learning_rate": 1.9997528117011035e-05, + "loss": 0.8321, + "step": 1203 + }, + { + "epoch": 0.03690082138040946, + "grad_norm": 2.042468027098931, + "learning_rate": 1.999750599828244e-05, + "loss": 0.8046, + "step": 1204 + }, + { + "epoch": 0.03693146990315067, + "grad_norm": 1.9822682869567407, + "learning_rate": 1.999748378104586e-05, + "loss": 0.8812, + "step": 1205 + }, + { + "epoch": 0.03696211842589187, + "grad_norm": 2.0012433145109205, + "learning_rate": 1.999746146530152e-05, + "loss": 0.9753, + "step": 1206 + }, + { + "epoch": 0.03699276694863308, + "grad_norm": 1.9777956682748938, + "learning_rate": 1.9997439051049628e-05, + "loss": 0.8088, + "step": 1207 + }, + { + "epoch": 0.03702341547137428, + "grad_norm": 2.0649857601264223, + "learning_rate": 1.9997416538290414e-05, + "loss": 0.767, + "step": 1208 + }, + { + "epoch": 0.03705406399411548, + "grad_norm": 1.3278732087438692, + "learning_rate": 1.99973939270241e-05, + "loss": 0.7062, + "step": 1209 + }, + { + "epoch": 0.03708471251685669, + "grad_norm": 1.895054647308249, + "learning_rate": 1.99973712172509e-05, + "loss": 0.8668, + "step": 1210 + }, + { + "epoch": 0.037115361039597894, + "grad_norm": 0.9626320808509135, + "learning_rate": 1.9997348408971048e-05, + "loss": 0.699, + "step": 1211 + }, + { + "epoch": 0.03714600956233909, + "grad_norm": 2.0493314362324213, + "learning_rate": 1.999732550218476e-05, + "loss": 0.8127, + "step": 1212 + }, + { + "epoch": 0.0371766580850803, + "grad_norm": 2.1800566346345813, + "learning_rate": 1.999730249689227e-05, + "loss": 1.0258, + "step": 1213 + }, + { + "epoch": 0.037207306607821504, + "grad_norm": 2.0644898159536416, + "learning_rate": 1.99972793930938e-05, + "loss": 0.8637, + "step": 1214 + }, + { + "epoch": 0.03723795513056271, + "grad_norm": 2.048866656649602, + "learning_rate": 1.9997256190789572e-05, + "loss": 0.8631, + "step": 1215 + }, + { + "epoch": 0.03726860365330391, + "grad_norm": 1.9417163450203987, + "learning_rate": 1.9997232889979825e-05, + "loss": 0.8325, + "step": 1216 + }, + { + "epoch": 0.037299252176045114, + "grad_norm": 1.9966335849489125, + "learning_rate": 1.9997209490664787e-05, + "loss": 0.9154, + "step": 1217 + }, + { + "epoch": 0.03732990069878632, + "grad_norm": 2.003391607794057, + "learning_rate": 1.9997185992844683e-05, + "loss": 0.8484, + "step": 1218 + }, + { + "epoch": 0.037360549221527525, + "grad_norm": 1.8959143852935159, + "learning_rate": 1.999716239651975e-05, + "loss": 0.8255, + "step": 1219 + }, + { + "epoch": 0.037391197744268724, + "grad_norm": 2.3709335915104144, + "learning_rate": 1.9997138701690214e-05, + "loss": 0.9752, + "step": 1220 + }, + { + "epoch": 0.03742184626700993, + "grad_norm": 2.125848583366075, + "learning_rate": 1.9997114908356317e-05, + "loss": 0.8948, + "step": 1221 + }, + { + "epoch": 0.037452494789751135, + "grad_norm": 1.9686037901731461, + "learning_rate": 1.9997091016518285e-05, + "loss": 0.7017, + "step": 1222 + }, + { + "epoch": 0.03748314331249234, + "grad_norm": 1.9667305534279769, + "learning_rate": 1.999706702617636e-05, + "loss": 0.925, + "step": 1223 + }, + { + "epoch": 0.03751379183523354, + "grad_norm": 2.6623331197573075, + "learning_rate": 1.9997042937330776e-05, + "loss": 0.8527, + "step": 1224 + }, + { + "epoch": 0.037544440357974745, + "grad_norm": 1.97651243723481, + "learning_rate": 1.999701874998177e-05, + "loss": 0.8625, + "step": 1225 + }, + { + "epoch": 0.03757508888071595, + "grad_norm": 2.098773012948223, + "learning_rate": 1.9996994464129578e-05, + "loss": 0.9452, + "step": 1226 + }, + { + "epoch": 0.03760573740345716, + "grad_norm": 1.986719193334023, + "learning_rate": 1.9996970079774444e-05, + "loss": 0.9106, + "step": 1227 + }, + { + "epoch": 0.037636385926198355, + "grad_norm": 2.1855730143158043, + "learning_rate": 1.9996945596916605e-05, + "loss": 0.9282, + "step": 1228 + }, + { + "epoch": 0.03766703444893956, + "grad_norm": 2.1297494099418324, + "learning_rate": 1.9996921015556305e-05, + "loss": 1.0004, + "step": 1229 + }, + { + "epoch": 0.03769768297168077, + "grad_norm": 1.7978874283550135, + "learning_rate": 1.999689633569378e-05, + "loss": 0.8639, + "step": 1230 + }, + { + "epoch": 0.037728331494421966, + "grad_norm": 1.1724719459967992, + "learning_rate": 1.999687155732928e-05, + "loss": 0.672, + "step": 1231 + }, + { + "epoch": 0.03775898001716317, + "grad_norm": 2.4350954166580694, + "learning_rate": 1.9996846680463048e-05, + "loss": 0.9233, + "step": 1232 + }, + { + "epoch": 0.03778962853990438, + "grad_norm": 2.1232374531254488, + "learning_rate": 1.9996821705095327e-05, + "loss": 0.8721, + "step": 1233 + }, + { + "epoch": 0.03782027706264558, + "grad_norm": 1.9076698315037033, + "learning_rate": 1.9996796631226364e-05, + "loss": 0.852, + "step": 1234 + }, + { + "epoch": 0.03785092558538678, + "grad_norm": 2.02297526722644, + "learning_rate": 1.9996771458856405e-05, + "loss": 0.9416, + "step": 1235 + }, + { + "epoch": 0.03788157410812799, + "grad_norm": 1.8934361420833326, + "learning_rate": 1.9996746187985702e-05, + "loss": 0.9165, + "step": 1236 + }, + { + "epoch": 0.03791222263086919, + "grad_norm": 2.4637909175033563, + "learning_rate": 1.9996720818614496e-05, + "loss": 0.9228, + "step": 1237 + }, + { + "epoch": 0.0379428711536104, + "grad_norm": 1.9482182917364432, + "learning_rate": 1.9996695350743046e-05, + "loss": 0.8855, + "step": 1238 + }, + { + "epoch": 0.0379735196763516, + "grad_norm": 2.0315739856847137, + "learning_rate": 1.9996669784371598e-05, + "loss": 0.86, + "step": 1239 + }, + { + "epoch": 0.0380041681990928, + "grad_norm": 0.8942154223952169, + "learning_rate": 1.9996644119500406e-05, + "loss": 0.6267, + "step": 1240 + }, + { + "epoch": 0.03803481672183401, + "grad_norm": 2.129242091874755, + "learning_rate": 1.999661835612972e-05, + "loss": 0.9045, + "step": 1241 + }, + { + "epoch": 0.038065465244575214, + "grad_norm": 1.8388172935294318, + "learning_rate": 1.9996592494259794e-05, + "loss": 0.8761, + "step": 1242 + }, + { + "epoch": 0.03809611376731641, + "grad_norm": 2.0619492662226726, + "learning_rate": 1.999656653389089e-05, + "loss": 0.9271, + "step": 1243 + }, + { + "epoch": 0.03812676229005762, + "grad_norm": 2.0068590726600526, + "learning_rate": 1.9996540475023253e-05, + "loss": 0.8698, + "step": 1244 + }, + { + "epoch": 0.038157410812798824, + "grad_norm": 2.2414365409666734, + "learning_rate": 1.9996514317657144e-05, + "loss": 0.8007, + "step": 1245 + }, + { + "epoch": 0.03818805933554003, + "grad_norm": 1.9397094779516064, + "learning_rate": 1.9996488061792827e-05, + "loss": 0.8666, + "step": 1246 + }, + { + "epoch": 0.03821870785828123, + "grad_norm": 0.8826337844792257, + "learning_rate": 1.999646170743055e-05, + "loss": 0.7033, + "step": 1247 + }, + { + "epoch": 0.038249356381022434, + "grad_norm": 0.8445262161595001, + "learning_rate": 1.999643525457058e-05, + "loss": 0.6465, + "step": 1248 + }, + { + "epoch": 0.03828000490376364, + "grad_norm": 1.9989264169036058, + "learning_rate": 1.9996408703213183e-05, + "loss": 0.929, + "step": 1249 + }, + { + "epoch": 0.038310653426504845, + "grad_norm": 1.779348869873771, + "learning_rate": 1.9996382053358605e-05, + "loss": 0.7623, + "step": 1250 + }, + { + "epoch": 0.038341301949246044, + "grad_norm": 2.1085477565635595, + "learning_rate": 1.999635530500712e-05, + "loss": 0.8302, + "step": 1251 + }, + { + "epoch": 0.03837195047198725, + "grad_norm": 2.1644834540183506, + "learning_rate": 1.9996328458158983e-05, + "loss": 0.8699, + "step": 1252 + }, + { + "epoch": 0.038402598994728455, + "grad_norm": 1.8834741655573197, + "learning_rate": 1.999630151281447e-05, + "loss": 0.8463, + "step": 1253 + }, + { + "epoch": 0.03843324751746966, + "grad_norm": 1.847994098563845, + "learning_rate": 1.999627446897384e-05, + "loss": 0.8374, + "step": 1254 + }, + { + "epoch": 0.03846389604021086, + "grad_norm": 1.8817852536458979, + "learning_rate": 1.999624732663736e-05, + "loss": 0.9586, + "step": 1255 + }, + { + "epoch": 0.038494544562952066, + "grad_norm": 1.1349761689263362, + "learning_rate": 1.9996220085805296e-05, + "loss": 0.6573, + "step": 1256 + }, + { + "epoch": 0.03852519308569327, + "grad_norm": 2.05346815571288, + "learning_rate": 1.9996192746477917e-05, + "loss": 0.8131, + "step": 1257 + }, + { + "epoch": 0.03855584160843448, + "grad_norm": 1.806865860551282, + "learning_rate": 1.9996165308655497e-05, + "loss": 0.7879, + "step": 1258 + }, + { + "epoch": 0.038586490131175676, + "grad_norm": 1.744974579631317, + "learning_rate": 1.99961377723383e-05, + "loss": 0.9423, + "step": 1259 + }, + { + "epoch": 0.03861713865391688, + "grad_norm": 2.293137214111833, + "learning_rate": 1.9996110137526598e-05, + "loss": 0.9131, + "step": 1260 + }, + { + "epoch": 0.03864778717665809, + "grad_norm": 1.732091518946056, + "learning_rate": 1.9996082404220667e-05, + "loss": 0.7634, + "step": 1261 + }, + { + "epoch": 0.038678435699399286, + "grad_norm": 0.8849985317437802, + "learning_rate": 1.999605457242078e-05, + "loss": 0.6704, + "step": 1262 + }, + { + "epoch": 0.03870908422214049, + "grad_norm": 2.208158801845369, + "learning_rate": 1.9996026642127208e-05, + "loss": 1.0279, + "step": 1263 + }, + { + "epoch": 0.0387397327448817, + "grad_norm": 1.86731155035824, + "learning_rate": 1.9995998613340227e-05, + "loss": 0.9092, + "step": 1264 + }, + { + "epoch": 0.0387703812676229, + "grad_norm": 1.9457000115594685, + "learning_rate": 1.9995970486060117e-05, + "loss": 0.915, + "step": 1265 + }, + { + "epoch": 0.0388010297903641, + "grad_norm": 2.0241028497425346, + "learning_rate": 1.999594226028715e-05, + "loss": 1.0015, + "step": 1266 + }, + { + "epoch": 0.03883167831310531, + "grad_norm": 2.2242496940892615, + "learning_rate": 1.9995913936021607e-05, + "loss": 0.8454, + "step": 1267 + }, + { + "epoch": 0.03886232683584651, + "grad_norm": 0.8798844568628571, + "learning_rate": 1.9995885513263767e-05, + "loss": 0.654, + "step": 1268 + }, + { + "epoch": 0.03889297535858772, + "grad_norm": 2.1645914211526316, + "learning_rate": 1.9995856992013908e-05, + "loss": 0.945, + "step": 1269 + }, + { + "epoch": 0.03892362388132892, + "grad_norm": 2.015257224505115, + "learning_rate": 1.9995828372272314e-05, + "loss": 0.9474, + "step": 1270 + }, + { + "epoch": 0.03895427240407012, + "grad_norm": 2.338407845848422, + "learning_rate": 1.9995799654039265e-05, + "loss": 0.944, + "step": 1271 + }, + { + "epoch": 0.03898492092681133, + "grad_norm": 2.0290136550632405, + "learning_rate": 1.9995770837315044e-05, + "loss": 0.872, + "step": 1272 + }, + { + "epoch": 0.039015569449552534, + "grad_norm": 2.155354632692292, + "learning_rate": 1.9995741922099936e-05, + "loss": 0.9332, + "step": 1273 + }, + { + "epoch": 0.03904621797229373, + "grad_norm": 2.2134777712620384, + "learning_rate": 1.9995712908394225e-05, + "loss": 0.9411, + "step": 1274 + }, + { + "epoch": 0.03907686649503494, + "grad_norm": 1.8628611755572, + "learning_rate": 1.9995683796198196e-05, + "loss": 0.9052, + "step": 1275 + }, + { + "epoch": 0.039107515017776144, + "grad_norm": 0.8193388620342852, + "learning_rate": 1.999565458551214e-05, + "loss": 0.6316, + "step": 1276 + }, + { + "epoch": 0.03913816354051735, + "grad_norm": 2.202209014638835, + "learning_rate": 1.9995625276336338e-05, + "loss": 0.8862, + "step": 1277 + }, + { + "epoch": 0.03916881206325855, + "grad_norm": 2.028740441833287, + "learning_rate": 1.9995595868671083e-05, + "loss": 1.0094, + "step": 1278 + }, + { + "epoch": 0.039199460585999754, + "grad_norm": 2.0505249414389857, + "learning_rate": 1.999556636251667e-05, + "loss": 0.8148, + "step": 1279 + }, + { + "epoch": 0.03923010910874096, + "grad_norm": 2.04441379411349, + "learning_rate": 1.999553675787338e-05, + "loss": 0.9068, + "step": 1280 + }, + { + "epoch": 0.039260757631482165, + "grad_norm": 2.0572565007684696, + "learning_rate": 1.999550705474151e-05, + "loss": 0.8446, + "step": 1281 + }, + { + "epoch": 0.039291406154223364, + "grad_norm": 1.8759507651828633, + "learning_rate": 1.999547725312135e-05, + "loss": 0.8081, + "step": 1282 + }, + { + "epoch": 0.03932205467696457, + "grad_norm": 2.145878365543344, + "learning_rate": 1.99954473530132e-05, + "loss": 0.9163, + "step": 1283 + }, + { + "epoch": 0.039352703199705776, + "grad_norm": 2.2551636746660653, + "learning_rate": 1.999541735441734e-05, + "loss": 0.8585, + "step": 1284 + }, + { + "epoch": 0.03938335172244698, + "grad_norm": 2.210782609604146, + "learning_rate": 1.9995387257334084e-05, + "loss": 0.8669, + "step": 1285 + }, + { + "epoch": 0.03941400024518818, + "grad_norm": 2.122410638770755, + "learning_rate": 1.9995357061763715e-05, + "loss": 0.7419, + "step": 1286 + }, + { + "epoch": 0.039444648767929386, + "grad_norm": 1.9753736607386059, + "learning_rate": 1.999532676770654e-05, + "loss": 0.9448, + "step": 1287 + }, + { + "epoch": 0.03947529729067059, + "grad_norm": 1.897384398434589, + "learning_rate": 1.999529637516285e-05, + "loss": 1.0288, + "step": 1288 + }, + { + "epoch": 0.0395059458134118, + "grad_norm": 0.9470115491969611, + "learning_rate": 1.9995265884132945e-05, + "loss": 0.6496, + "step": 1289 + }, + { + "epoch": 0.039536594336152996, + "grad_norm": 1.940632687557836, + "learning_rate": 1.999523529461713e-05, + "loss": 0.8216, + "step": 1290 + }, + { + "epoch": 0.0395672428588942, + "grad_norm": 2.0255133229853484, + "learning_rate": 1.999520460661571e-05, + "loss": 0.7805, + "step": 1291 + }, + { + "epoch": 0.03959789138163541, + "grad_norm": 1.8101187595457289, + "learning_rate": 1.9995173820128976e-05, + "loss": 0.8597, + "step": 1292 + }, + { + "epoch": 0.039628539904376606, + "grad_norm": 1.960176777346034, + "learning_rate": 1.9995142935157235e-05, + "loss": 0.7821, + "step": 1293 + }, + { + "epoch": 0.03965918842711781, + "grad_norm": 2.417457714732655, + "learning_rate": 1.9995111951700796e-05, + "loss": 0.7855, + "step": 1294 + }, + { + "epoch": 0.03968983694985902, + "grad_norm": 2.114409649367152, + "learning_rate": 1.9995080869759962e-05, + "loss": 0.9187, + "step": 1295 + }, + { + "epoch": 0.03972048547260022, + "grad_norm": 1.8711853298060555, + "learning_rate": 1.9995049689335038e-05, + "loss": 0.8548, + "step": 1296 + }, + { + "epoch": 0.03975113399534142, + "grad_norm": 1.8958346629079659, + "learning_rate": 1.999501841042633e-05, + "loss": 0.716, + "step": 1297 + }, + { + "epoch": 0.03978178251808263, + "grad_norm": 2.3327825909224953, + "learning_rate": 1.999498703303415e-05, + "loss": 0.9595, + "step": 1298 + }, + { + "epoch": 0.03981243104082383, + "grad_norm": 2.340223595764678, + "learning_rate": 1.999495555715881e-05, + "loss": 0.8716, + "step": 1299 + }, + { + "epoch": 0.03984307956356504, + "grad_norm": 2.073020329269152, + "learning_rate": 1.9994923982800613e-05, + "loss": 0.9134, + "step": 1300 + }, + { + "epoch": 0.03987372808630624, + "grad_norm": 1.8795513009176164, + "learning_rate": 1.999489230995987e-05, + "loss": 0.9661, + "step": 1301 + }, + { + "epoch": 0.03990437660904744, + "grad_norm": 0.9244307723629918, + "learning_rate": 1.99948605386369e-05, + "loss": 0.6908, + "step": 1302 + }, + { + "epoch": 0.03993502513178865, + "grad_norm": 1.986028847545782, + "learning_rate": 1.9994828668832005e-05, + "loss": 0.9273, + "step": 1303 + }, + { + "epoch": 0.039965673654529854, + "grad_norm": 2.1774028805388057, + "learning_rate": 1.999479670054551e-05, + "loss": 0.9486, + "step": 1304 + }, + { + "epoch": 0.03999632217727105, + "grad_norm": 1.9855851128671076, + "learning_rate": 1.9994764633777727e-05, + "loss": 0.8272, + "step": 1305 + }, + { + "epoch": 0.04002697070001226, + "grad_norm": 2.2254932214317185, + "learning_rate": 1.9994732468528968e-05, + "loss": 0.9265, + "step": 1306 + }, + { + "epoch": 0.040057619222753464, + "grad_norm": 2.324086331426406, + "learning_rate": 1.9994700204799553e-05, + "loss": 0.851, + "step": 1307 + }, + { + "epoch": 0.04008826774549467, + "grad_norm": 1.9896111162712449, + "learning_rate": 1.9994667842589802e-05, + "loss": 0.8054, + "step": 1308 + }, + { + "epoch": 0.04011891626823587, + "grad_norm": 0.8655959409777965, + "learning_rate": 1.999463538190003e-05, + "loss": 0.6323, + "step": 1309 + }, + { + "epoch": 0.040149564790977074, + "grad_norm": 2.1903980870840773, + "learning_rate": 1.9994602822730558e-05, + "loss": 0.9273, + "step": 1310 + }, + { + "epoch": 0.04018021331371828, + "grad_norm": 1.8375917893500644, + "learning_rate": 1.9994570165081708e-05, + "loss": 0.9281, + "step": 1311 + }, + { + "epoch": 0.040210861836459486, + "grad_norm": 2.2063438414704097, + "learning_rate": 1.99945374089538e-05, + "loss": 0.9767, + "step": 1312 + }, + { + "epoch": 0.040241510359200684, + "grad_norm": 1.8268273736623097, + "learning_rate": 1.9994504554347157e-05, + "loss": 0.8654, + "step": 1313 + }, + { + "epoch": 0.04027215888194189, + "grad_norm": 1.9809725493359671, + "learning_rate": 1.9994471601262106e-05, + "loss": 0.8756, + "step": 1314 + }, + { + "epoch": 0.040302807404683096, + "grad_norm": 2.024143758555477, + "learning_rate": 1.9994438549698965e-05, + "loss": 0.9173, + "step": 1315 + }, + { + "epoch": 0.0403334559274243, + "grad_norm": 2.175386032615549, + "learning_rate": 1.999440539965807e-05, + "loss": 0.938, + "step": 1316 + }, + { + "epoch": 0.0403641044501655, + "grad_norm": 2.2285110484505943, + "learning_rate": 1.9994372151139737e-05, + "loss": 0.8604, + "step": 1317 + }, + { + "epoch": 0.040394752972906706, + "grad_norm": 1.9436727980668242, + "learning_rate": 1.99943388041443e-05, + "loss": 0.9514, + "step": 1318 + }, + { + "epoch": 0.04042540149564791, + "grad_norm": 2.133951241754595, + "learning_rate": 1.9994305358672083e-05, + "loss": 0.9337, + "step": 1319 + }, + { + "epoch": 0.04045605001838912, + "grad_norm": 2.1112051230355577, + "learning_rate": 1.999427181472342e-05, + "loss": 0.8883, + "step": 1320 + }, + { + "epoch": 0.040486698541130316, + "grad_norm": 1.9306602583666101, + "learning_rate": 1.999423817229864e-05, + "loss": 0.9209, + "step": 1321 + }, + { + "epoch": 0.04051734706387152, + "grad_norm": 1.8911212203821814, + "learning_rate": 1.9994204431398075e-05, + "loss": 0.9658, + "step": 1322 + }, + { + "epoch": 0.04054799558661273, + "grad_norm": 1.045893281104883, + "learning_rate": 1.9994170592022054e-05, + "loss": 0.6779, + "step": 1323 + }, + { + "epoch": 0.040578644109353926, + "grad_norm": 0.8389426367521865, + "learning_rate": 1.9994136654170915e-05, + "loss": 0.6323, + "step": 1324 + }, + { + "epoch": 0.04060929263209513, + "grad_norm": 2.063575227203874, + "learning_rate": 1.999410261784499e-05, + "loss": 0.983, + "step": 1325 + }, + { + "epoch": 0.04063994115483634, + "grad_norm": 0.8475208058131543, + "learning_rate": 1.9994068483044616e-05, + "loss": 0.6567, + "step": 1326 + }, + { + "epoch": 0.04067058967757754, + "grad_norm": 2.0195694511929796, + "learning_rate": 1.9994034249770126e-05, + "loss": 0.8092, + "step": 1327 + }, + { + "epoch": 0.04070123820031874, + "grad_norm": 2.0104905264686654, + "learning_rate": 1.999399991802186e-05, + "loss": 0.7762, + "step": 1328 + }, + { + "epoch": 0.04073188672305995, + "grad_norm": 2.06288688691356, + "learning_rate": 1.9993965487800155e-05, + "loss": 0.8857, + "step": 1329 + }, + { + "epoch": 0.04076253524580115, + "grad_norm": 2.036290143803117, + "learning_rate": 1.999393095910535e-05, + "loss": 1.0113, + "step": 1330 + }, + { + "epoch": 0.04079318376854236, + "grad_norm": 1.8147166873282135, + "learning_rate": 1.9993896331937793e-05, + "loss": 0.8389, + "step": 1331 + }, + { + "epoch": 0.04082383229128356, + "grad_norm": 1.8194313910130957, + "learning_rate": 1.999386160629781e-05, + "loss": 0.8027, + "step": 1332 + }, + { + "epoch": 0.04085448081402476, + "grad_norm": 1.9465124804168308, + "learning_rate": 1.9993826782185754e-05, + "loss": 0.9076, + "step": 1333 + }, + { + "epoch": 0.04088512933676597, + "grad_norm": 1.948862735846738, + "learning_rate": 1.999379185960197e-05, + "loss": 0.9842, + "step": 1334 + }, + { + "epoch": 0.040915777859507174, + "grad_norm": 1.9003192596429732, + "learning_rate": 1.9993756838546793e-05, + "loss": 0.9156, + "step": 1335 + }, + { + "epoch": 0.04094642638224837, + "grad_norm": 1.8906285690222453, + "learning_rate": 1.9993721719020572e-05, + "loss": 0.9039, + "step": 1336 + }, + { + "epoch": 0.04097707490498958, + "grad_norm": 1.040639510128624, + "learning_rate": 1.999368650102366e-05, + "loss": 0.6818, + "step": 1337 + }, + { + "epoch": 0.041007723427730784, + "grad_norm": 1.926197858398859, + "learning_rate": 1.9993651184556394e-05, + "loss": 0.8516, + "step": 1338 + }, + { + "epoch": 0.04103837195047199, + "grad_norm": 1.9036508127495215, + "learning_rate": 1.9993615769619125e-05, + "loss": 0.8854, + "step": 1339 + }, + { + "epoch": 0.04106902047321319, + "grad_norm": 1.819865192526941, + "learning_rate": 1.9993580256212203e-05, + "loss": 0.7901, + "step": 1340 + }, + { + "epoch": 0.041099668995954394, + "grad_norm": 1.9100232324696802, + "learning_rate": 1.999354464433598e-05, + "loss": 0.7948, + "step": 1341 + }, + { + "epoch": 0.0411303175186956, + "grad_norm": 1.9437696004524558, + "learning_rate": 1.9993508933990803e-05, + "loss": 0.8087, + "step": 1342 + }, + { + "epoch": 0.041160966041436806, + "grad_norm": 1.9493803295283372, + "learning_rate": 1.9993473125177026e-05, + "loss": 0.9497, + "step": 1343 + }, + { + "epoch": 0.041191614564178004, + "grad_norm": 2.0449815334644232, + "learning_rate": 1.9993437217895e-05, + "loss": 0.8679, + "step": 1344 + }, + { + "epoch": 0.04122226308691921, + "grad_norm": 1.874745698054617, + "learning_rate": 1.9993401212145084e-05, + "loss": 0.8638, + "step": 1345 + }, + { + "epoch": 0.041252911609660416, + "grad_norm": 2.1305410663828726, + "learning_rate": 1.9993365107927625e-05, + "loss": 0.9527, + "step": 1346 + }, + { + "epoch": 0.04128356013240162, + "grad_norm": 0.9087359826723349, + "learning_rate": 1.9993328905242983e-05, + "loss": 0.6507, + "step": 1347 + }, + { + "epoch": 0.04131420865514282, + "grad_norm": 2.0713544766053436, + "learning_rate": 1.9993292604091516e-05, + "loss": 0.9699, + "step": 1348 + }, + { + "epoch": 0.041344857177884026, + "grad_norm": 2.155274156472617, + "learning_rate": 1.9993256204473577e-05, + "loss": 0.9171, + "step": 1349 + }, + { + "epoch": 0.04137550570062523, + "grad_norm": 2.166478209573226, + "learning_rate": 1.9993219706389532e-05, + "loss": 0.9402, + "step": 1350 + }, + { + "epoch": 0.04140615422336644, + "grad_norm": 1.8617541657773484, + "learning_rate": 1.9993183109839736e-05, + "loss": 0.8849, + "step": 1351 + }, + { + "epoch": 0.041436802746107636, + "grad_norm": 1.9022309720463335, + "learning_rate": 1.999314641482455e-05, + "loss": 1.0095, + "step": 1352 + }, + { + "epoch": 0.04146745126884884, + "grad_norm": 2.0089597881159262, + "learning_rate": 1.999310962134433e-05, + "loss": 0.8741, + "step": 1353 + }, + { + "epoch": 0.04149809979159005, + "grad_norm": 1.8477254547092345, + "learning_rate": 1.999307272939945e-05, + "loss": 0.8937, + "step": 1354 + }, + { + "epoch": 0.041528748314331246, + "grad_norm": 1.8440055714656083, + "learning_rate": 1.9993035738990265e-05, + "loss": 0.8918, + "step": 1355 + }, + { + "epoch": 0.04155939683707245, + "grad_norm": 2.0443723028716714, + "learning_rate": 1.9992998650117144e-05, + "loss": 0.7922, + "step": 1356 + }, + { + "epoch": 0.04159004535981366, + "grad_norm": 2.1339060206582965, + "learning_rate": 1.999296146278045e-05, + "loss": 0.9017, + "step": 1357 + }, + { + "epoch": 0.04162069388255486, + "grad_norm": 1.6386511964981119, + "learning_rate": 1.9992924176980547e-05, + "loss": 0.8041, + "step": 1358 + }, + { + "epoch": 0.04165134240529606, + "grad_norm": 1.8906075368973196, + "learning_rate": 1.9992886792717808e-05, + "loss": 0.791, + "step": 1359 + }, + { + "epoch": 0.04168199092803727, + "grad_norm": 1.8820118351424493, + "learning_rate": 1.99928493099926e-05, + "loss": 0.7767, + "step": 1360 + }, + { + "epoch": 0.04171263945077847, + "grad_norm": 2.218164007632604, + "learning_rate": 1.9992811728805287e-05, + "loss": 0.9204, + "step": 1361 + }, + { + "epoch": 0.04174328797351968, + "grad_norm": 1.7239234464459867, + "learning_rate": 1.9992774049156244e-05, + "loss": 0.8797, + "step": 1362 + }, + { + "epoch": 0.04177393649626088, + "grad_norm": 1.8845874907777977, + "learning_rate": 1.9992736271045845e-05, + "loss": 0.8621, + "step": 1363 + }, + { + "epoch": 0.04180458501900208, + "grad_norm": 0.9482324925699895, + "learning_rate": 1.9992698394474455e-05, + "loss": 0.6723, + "step": 1364 + }, + { + "epoch": 0.04183523354174329, + "grad_norm": 2.178835074262474, + "learning_rate": 1.999266041944245e-05, + "loss": 0.9313, + "step": 1365 + }, + { + "epoch": 0.041865882064484494, + "grad_norm": 1.9332034066650818, + "learning_rate": 1.999262234595021e-05, + "loss": 0.8354, + "step": 1366 + }, + { + "epoch": 0.04189653058722569, + "grad_norm": 1.915858554163932, + "learning_rate": 1.9992584173998103e-05, + "loss": 0.8406, + "step": 1367 + }, + { + "epoch": 0.0419271791099669, + "grad_norm": 2.092728529237827, + "learning_rate": 1.9992545903586507e-05, + "loss": 0.833, + "step": 1368 + }, + { + "epoch": 0.041957827632708104, + "grad_norm": 1.895557280414513, + "learning_rate": 1.99925075347158e-05, + "loss": 0.8076, + "step": 1369 + }, + { + "epoch": 0.04198847615544931, + "grad_norm": 2.098887479352501, + "learning_rate": 1.999246906738636e-05, + "loss": 0.8843, + "step": 1370 + }, + { + "epoch": 0.04201912467819051, + "grad_norm": 2.0245174313418857, + "learning_rate": 1.9992430501598563e-05, + "loss": 0.9362, + "step": 1371 + }, + { + "epoch": 0.042049773200931714, + "grad_norm": 0.9224813200217155, + "learning_rate": 1.9992391837352794e-05, + "loss": 0.671, + "step": 1372 + }, + { + "epoch": 0.04208042172367292, + "grad_norm": 0.8260915330462941, + "learning_rate": 1.999235307464943e-05, + "loss": 0.6412, + "step": 1373 + }, + { + "epoch": 0.042111070246414126, + "grad_norm": 2.0938026557918152, + "learning_rate": 1.9992314213488857e-05, + "loss": 0.8078, + "step": 1374 + }, + { + "epoch": 0.042141718769155324, + "grad_norm": 2.0790595002049472, + "learning_rate": 1.9992275253871455e-05, + "loss": 0.8533, + "step": 1375 + }, + { + "epoch": 0.04217236729189653, + "grad_norm": 1.8547301966805463, + "learning_rate": 1.999223619579761e-05, + "loss": 0.8913, + "step": 1376 + }, + { + "epoch": 0.042203015814637736, + "grad_norm": 1.9065065571245634, + "learning_rate": 1.99921970392677e-05, + "loss": 0.8058, + "step": 1377 + }, + { + "epoch": 0.04223366433737894, + "grad_norm": 2.2126239494950313, + "learning_rate": 1.9992157784282118e-05, + "loss": 0.7583, + "step": 1378 + }, + { + "epoch": 0.04226431286012014, + "grad_norm": 1.0222530186164145, + "learning_rate": 1.999211843084125e-05, + "loss": 0.6584, + "step": 1379 + }, + { + "epoch": 0.042294961382861346, + "grad_norm": 2.1426518268932906, + "learning_rate": 1.9992078978945482e-05, + "loss": 0.7743, + "step": 1380 + }, + { + "epoch": 0.04232560990560255, + "grad_norm": 2.0432650985309264, + "learning_rate": 1.9992039428595203e-05, + "loss": 0.9209, + "step": 1381 + }, + { + "epoch": 0.04235625842834376, + "grad_norm": 2.0193753658325404, + "learning_rate": 1.99919997797908e-05, + "loss": 0.8524, + "step": 1382 + }, + { + "epoch": 0.042386906951084956, + "grad_norm": 1.9887208810143848, + "learning_rate": 1.999196003253267e-05, + "loss": 0.7565, + "step": 1383 + }, + { + "epoch": 0.04241755547382616, + "grad_norm": 1.9308572652878333, + "learning_rate": 1.9991920186821203e-05, + "loss": 0.9096, + "step": 1384 + }, + { + "epoch": 0.04244820399656737, + "grad_norm": 0.8786404683976297, + "learning_rate": 1.999188024265679e-05, + "loss": 0.6683, + "step": 1385 + }, + { + "epoch": 0.042478852519308566, + "grad_norm": 2.09505733766922, + "learning_rate": 1.9991840200039817e-05, + "loss": 0.9122, + "step": 1386 + }, + { + "epoch": 0.04250950104204977, + "grad_norm": 2.225361482183209, + "learning_rate": 1.9991800058970695e-05, + "loss": 0.8049, + "step": 1387 + }, + { + "epoch": 0.04254014956479098, + "grad_norm": 2.1480077302897924, + "learning_rate": 1.9991759819449806e-05, + "loss": 0.8729, + "step": 1388 + }, + { + "epoch": 0.04257079808753218, + "grad_norm": 2.1037056237191756, + "learning_rate": 1.999171948147755e-05, + "loss": 0.9499, + "step": 1389 + }, + { + "epoch": 0.04260144661027338, + "grad_norm": 1.885004432946538, + "learning_rate": 1.999167904505433e-05, + "loss": 0.9247, + "step": 1390 + }, + { + "epoch": 0.04263209513301459, + "grad_norm": 0.9103795247995786, + "learning_rate": 1.9991638510180532e-05, + "loss": 0.6526, + "step": 1391 + }, + { + "epoch": 0.04266274365575579, + "grad_norm": 0.8708734929232858, + "learning_rate": 1.999159787685657e-05, + "loss": 0.6519, + "step": 1392 + }, + { + "epoch": 0.042693392178497, + "grad_norm": 1.9781486819124543, + "learning_rate": 1.9991557145082838e-05, + "loss": 0.8939, + "step": 1393 + }, + { + "epoch": 0.0427240407012382, + "grad_norm": 2.2344176208681246, + "learning_rate": 1.9991516314859735e-05, + "loss": 0.8762, + "step": 1394 + }, + { + "epoch": 0.0427546892239794, + "grad_norm": 1.9993625673856184, + "learning_rate": 1.9991475386187665e-05, + "loss": 0.9061, + "step": 1395 + }, + { + "epoch": 0.04278533774672061, + "grad_norm": 1.7832461448550272, + "learning_rate": 1.999143435906703e-05, + "loss": 0.8041, + "step": 1396 + }, + { + "epoch": 0.042815986269461814, + "grad_norm": 1.990024780550857, + "learning_rate": 1.999139323349824e-05, + "loss": 0.8793, + "step": 1397 + }, + { + "epoch": 0.04284663479220301, + "grad_norm": 2.2070732363843337, + "learning_rate": 1.9991352009481692e-05, + "loss": 0.9968, + "step": 1398 + }, + { + "epoch": 0.04287728331494422, + "grad_norm": 1.8761964823578878, + "learning_rate": 1.99913106870178e-05, + "loss": 0.9113, + "step": 1399 + }, + { + "epoch": 0.042907931837685424, + "grad_norm": 1.8588722126984505, + "learning_rate": 1.9991269266106962e-05, + "loss": 0.9277, + "step": 1400 + }, + { + "epoch": 0.04293858036042663, + "grad_norm": 2.0499902989836696, + "learning_rate": 1.9991227746749596e-05, + "loss": 0.9303, + "step": 1401 + }, + { + "epoch": 0.04296922888316783, + "grad_norm": 2.1596836877962957, + "learning_rate": 1.9991186128946107e-05, + "loss": 0.8628, + "step": 1402 + }, + { + "epoch": 0.042999877405909034, + "grad_norm": 1.929943287167392, + "learning_rate": 1.99911444126969e-05, + "loss": 0.7463, + "step": 1403 + }, + { + "epoch": 0.04303052592865024, + "grad_norm": 1.7525527384295416, + "learning_rate": 1.9991102598002396e-05, + "loss": 0.7042, + "step": 1404 + }, + { + "epoch": 0.043061174451391446, + "grad_norm": 2.5365844937274624, + "learning_rate": 1.9991060684863e-05, + "loss": 0.9067, + "step": 1405 + }, + { + "epoch": 0.043091822974132644, + "grad_norm": 2.065547454977718, + "learning_rate": 1.9991018673279125e-05, + "loss": 0.9311, + "step": 1406 + }, + { + "epoch": 0.04312247149687385, + "grad_norm": 1.9197784426417646, + "learning_rate": 1.9990976563251187e-05, + "loss": 0.7885, + "step": 1407 + }, + { + "epoch": 0.043153120019615056, + "grad_norm": 1.899521628703033, + "learning_rate": 1.9990934354779603e-05, + "loss": 0.789, + "step": 1408 + }, + { + "epoch": 0.04318376854235626, + "grad_norm": 2.0182898427821843, + "learning_rate": 1.999089204786479e-05, + "loss": 0.8776, + "step": 1409 + }, + { + "epoch": 0.04321441706509746, + "grad_norm": 1.9059967683075012, + "learning_rate": 1.9990849642507155e-05, + "loss": 0.9646, + "step": 1410 + }, + { + "epoch": 0.043245065587838666, + "grad_norm": 1.752303191345889, + "learning_rate": 1.999080713870712e-05, + "loss": 0.8133, + "step": 1411 + }, + { + "epoch": 0.04327571411057987, + "grad_norm": 2.299164563695799, + "learning_rate": 1.9990764536465112e-05, + "loss": 0.9881, + "step": 1412 + }, + { + "epoch": 0.04330636263332108, + "grad_norm": 1.9451059497458574, + "learning_rate": 1.999072183578154e-05, + "loss": 0.8396, + "step": 1413 + }, + { + "epoch": 0.043337011156062276, + "grad_norm": 1.9235948410491401, + "learning_rate": 1.9990679036656836e-05, + "loss": 0.9054, + "step": 1414 + }, + { + "epoch": 0.04336765967880348, + "grad_norm": 1.906397661362485, + "learning_rate": 1.9990636139091412e-05, + "loss": 0.9332, + "step": 1415 + }, + { + "epoch": 0.04339830820154469, + "grad_norm": 1.6855061871737793, + "learning_rate": 1.999059314308569e-05, + "loss": 0.6674, + "step": 1416 + }, + { + "epoch": 0.04342895672428589, + "grad_norm": 2.0532068193639783, + "learning_rate": 1.9990550048640103e-05, + "loss": 0.8996, + "step": 1417 + }, + { + "epoch": 0.04345960524702709, + "grad_norm": 1.798798964747075, + "learning_rate": 1.9990506855755067e-05, + "loss": 0.8663, + "step": 1418 + }, + { + "epoch": 0.0434902537697683, + "grad_norm": 2.110163371286434, + "learning_rate": 1.9990463564431013e-05, + "loss": 0.8618, + "step": 1419 + }, + { + "epoch": 0.0435209022925095, + "grad_norm": 0.9128392868173173, + "learning_rate": 1.9990420174668364e-05, + "loss": 0.6496, + "step": 1420 + }, + { + "epoch": 0.0435515508152507, + "grad_norm": 2.2118923167980453, + "learning_rate": 1.999037668646755e-05, + "loss": 0.8153, + "step": 1421 + }, + { + "epoch": 0.04358219933799191, + "grad_norm": 2.0195694503138992, + "learning_rate": 1.9990333099828997e-05, + "loss": 0.8126, + "step": 1422 + }, + { + "epoch": 0.04361284786073311, + "grad_norm": 2.1686287594874187, + "learning_rate": 1.9990289414753136e-05, + "loss": 0.975, + "step": 1423 + }, + { + "epoch": 0.04364349638347432, + "grad_norm": 1.8701678369524988, + "learning_rate": 1.9990245631240398e-05, + "loss": 0.8206, + "step": 1424 + }, + { + "epoch": 0.04367414490621552, + "grad_norm": 2.030572027551448, + "learning_rate": 1.999020174929121e-05, + "loss": 0.9326, + "step": 1425 + }, + { + "epoch": 0.04370479342895672, + "grad_norm": 1.1683792764007, + "learning_rate": 1.9990157768906012e-05, + "loss": 0.6493, + "step": 1426 + }, + { + "epoch": 0.04373544195169793, + "grad_norm": 2.065810945746208, + "learning_rate": 1.9990113690085232e-05, + "loss": 0.8636, + "step": 1427 + }, + { + "epoch": 0.043766090474439134, + "grad_norm": 2.1245353602670862, + "learning_rate": 1.999006951282931e-05, + "loss": 0.9533, + "step": 1428 + }, + { + "epoch": 0.04379673899718033, + "grad_norm": 1.912509256277853, + "learning_rate": 1.999002523713867e-05, + "loss": 0.9032, + "step": 1429 + }, + { + "epoch": 0.04382738751992154, + "grad_norm": 1.960218362465226, + "learning_rate": 1.998998086301376e-05, + "loss": 0.8837, + "step": 1430 + }, + { + "epoch": 0.043858036042662744, + "grad_norm": 1.865432914100228, + "learning_rate": 1.998993639045501e-05, + "loss": 0.9005, + "step": 1431 + }, + { + "epoch": 0.04388868456540395, + "grad_norm": 2.0017940941718333, + "learning_rate": 1.9989891819462864e-05, + "loss": 0.8877, + "step": 1432 + }, + { + "epoch": 0.04391933308814515, + "grad_norm": 0.9867072587547248, + "learning_rate": 1.9989847150037756e-05, + "loss": 0.681, + "step": 1433 + }, + { + "epoch": 0.043949981610886354, + "grad_norm": 2.1825492984835657, + "learning_rate": 1.9989802382180126e-05, + "loss": 0.9598, + "step": 1434 + }, + { + "epoch": 0.04398063013362756, + "grad_norm": 0.8654480014981554, + "learning_rate": 1.998975751589042e-05, + "loss": 0.6628, + "step": 1435 + }, + { + "epoch": 0.044011278656368766, + "grad_norm": 1.970964220250001, + "learning_rate": 1.9989712551169074e-05, + "loss": 0.9444, + "step": 1436 + }, + { + "epoch": 0.044041927179109965, + "grad_norm": 1.7300900369417869, + "learning_rate": 1.998966748801654e-05, + "loss": 0.7801, + "step": 1437 + }, + { + "epoch": 0.04407257570185117, + "grad_norm": 1.9170286256362117, + "learning_rate": 1.998962232643325e-05, + "loss": 0.8146, + "step": 1438 + }, + { + "epoch": 0.044103224224592376, + "grad_norm": 2.2377489754946036, + "learning_rate": 1.9989577066419658e-05, + "loss": 0.8827, + "step": 1439 + }, + { + "epoch": 0.04413387274733358, + "grad_norm": 1.9857127843445532, + "learning_rate": 1.998953170797621e-05, + "loss": 0.9854, + "step": 1440 + }, + { + "epoch": 0.04416452127007478, + "grad_norm": 1.8013188906266138, + "learning_rate": 1.9989486251103345e-05, + "loss": 1.0245, + "step": 1441 + }, + { + "epoch": 0.044195169792815986, + "grad_norm": 1.7576379517170138, + "learning_rate": 1.9989440695801518e-05, + "loss": 0.843, + "step": 1442 + }, + { + "epoch": 0.04422581831555719, + "grad_norm": 1.8173048812359736, + "learning_rate": 1.9989395042071176e-05, + "loss": 0.8588, + "step": 1443 + }, + { + "epoch": 0.0442564668382984, + "grad_norm": 2.0905850432791877, + "learning_rate": 1.998934928991277e-05, + "loss": 0.8878, + "step": 1444 + }, + { + "epoch": 0.044287115361039596, + "grad_norm": 1.881766238099579, + "learning_rate": 1.9989303439326747e-05, + "loss": 0.7414, + "step": 1445 + }, + { + "epoch": 0.0443177638837808, + "grad_norm": 1.9556947336366215, + "learning_rate": 1.9989257490313564e-05, + "loss": 0.8878, + "step": 1446 + }, + { + "epoch": 0.04434841240652201, + "grad_norm": 1.833550626729478, + "learning_rate": 1.9989211442873672e-05, + "loss": 0.7755, + "step": 1447 + }, + { + "epoch": 0.04437906092926321, + "grad_norm": 1.7765006281412363, + "learning_rate": 1.998916529700752e-05, + "loss": 0.7721, + "step": 1448 + }, + { + "epoch": 0.04440970945200441, + "grad_norm": 1.7244347692583408, + "learning_rate": 1.998911905271557e-05, + "loss": 0.8221, + "step": 1449 + }, + { + "epoch": 0.04444035797474562, + "grad_norm": 2.116981110182825, + "learning_rate": 1.998907270999827e-05, + "loss": 0.8348, + "step": 1450 + }, + { + "epoch": 0.04447100649748682, + "grad_norm": 1.8209958547163159, + "learning_rate": 1.9989026268856083e-05, + "loss": 0.7867, + "step": 1451 + }, + { + "epoch": 0.04450165502022802, + "grad_norm": 2.153270941468729, + "learning_rate": 1.9988979729289466e-05, + "loss": 0.9406, + "step": 1452 + }, + { + "epoch": 0.04453230354296923, + "grad_norm": 1.6745446175376115, + "learning_rate": 1.9988933091298874e-05, + "loss": 0.8429, + "step": 1453 + }, + { + "epoch": 0.04456295206571043, + "grad_norm": 2.031381127157169, + "learning_rate": 1.998888635488477e-05, + "loss": 0.7564, + "step": 1454 + }, + { + "epoch": 0.04459360058845164, + "grad_norm": 1.8655219194963055, + "learning_rate": 1.9988839520047612e-05, + "loss": 0.9263, + "step": 1455 + }, + { + "epoch": 0.04462424911119284, + "grad_norm": 1.8329412329291588, + "learning_rate": 1.9988792586787863e-05, + "loss": 0.8559, + "step": 1456 + }, + { + "epoch": 0.04465489763393404, + "grad_norm": 1.7842999645557522, + "learning_rate": 1.9988745555105983e-05, + "loss": 0.8849, + "step": 1457 + }, + { + "epoch": 0.04468554615667525, + "grad_norm": 1.4947854326993073, + "learning_rate": 1.998869842500244e-05, + "loss": 0.6872, + "step": 1458 + }, + { + "epoch": 0.044716194679416454, + "grad_norm": 1.8610496258384923, + "learning_rate": 1.9988651196477695e-05, + "loss": 0.8653, + "step": 1459 + }, + { + "epoch": 0.04474684320215765, + "grad_norm": 1.981373985470605, + "learning_rate": 1.998860386953221e-05, + "loss": 0.8385, + "step": 1460 + }, + { + "epoch": 0.04477749172489886, + "grad_norm": 2.0822243851374616, + "learning_rate": 1.998855644416646e-05, + "loss": 0.8071, + "step": 1461 + }, + { + "epoch": 0.044808140247640064, + "grad_norm": 1.913840622313211, + "learning_rate": 1.9988508920380907e-05, + "loss": 0.852, + "step": 1462 + }, + { + "epoch": 0.04483878877038127, + "grad_norm": 2.0989864028228147, + "learning_rate": 1.998846129817602e-05, + "loss": 0.9125, + "step": 1463 + }, + { + "epoch": 0.04486943729312247, + "grad_norm": 2.196407067133192, + "learning_rate": 1.9988413577552267e-05, + "loss": 0.9602, + "step": 1464 + }, + { + "epoch": 0.044900085815863675, + "grad_norm": 2.0786944744021234, + "learning_rate": 1.998836575851012e-05, + "loss": 0.8577, + "step": 1465 + }, + { + "epoch": 0.04493073433860488, + "grad_norm": 2.1521096924743865, + "learning_rate": 1.9988317841050048e-05, + "loss": 0.897, + "step": 1466 + }, + { + "epoch": 0.044961382861346086, + "grad_norm": 2.3506540932668827, + "learning_rate": 1.998826982517253e-05, + "loss": 0.8218, + "step": 1467 + }, + { + "epoch": 0.044992031384087285, + "grad_norm": 2.0520670853873155, + "learning_rate": 1.998822171087803e-05, + "loss": 0.8252, + "step": 1468 + }, + { + "epoch": 0.04502267990682849, + "grad_norm": 2.1547360686815913, + "learning_rate": 1.9988173498167024e-05, + "loss": 0.9221, + "step": 1469 + }, + { + "epoch": 0.045053328429569696, + "grad_norm": 1.381185794857828, + "learning_rate": 1.998812518703999e-05, + "loss": 0.6552, + "step": 1470 + }, + { + "epoch": 0.0450839769523109, + "grad_norm": 2.535169310947535, + "learning_rate": 1.9988076777497404e-05, + "loss": 0.9668, + "step": 1471 + }, + { + "epoch": 0.0451146254750521, + "grad_norm": 1.911858460024626, + "learning_rate": 1.9988028269539744e-05, + "loss": 0.7974, + "step": 1472 + }, + { + "epoch": 0.045145273997793306, + "grad_norm": 1.9413484760445294, + "learning_rate": 1.9987979663167483e-05, + "loss": 0.9909, + "step": 1473 + }, + { + "epoch": 0.04517592252053451, + "grad_norm": 1.9262625892700813, + "learning_rate": 1.99879309583811e-05, + "loss": 0.8216, + "step": 1474 + }, + { + "epoch": 0.04520657104327572, + "grad_norm": 2.000012523747253, + "learning_rate": 1.998788215518108e-05, + "loss": 0.8729, + "step": 1475 + }, + { + "epoch": 0.045237219566016916, + "grad_norm": 1.8707315346326017, + "learning_rate": 1.9987833253567904e-05, + "loss": 0.8572, + "step": 1476 + }, + { + "epoch": 0.04526786808875812, + "grad_norm": 2.1260863992604038, + "learning_rate": 1.9987784253542052e-05, + "loss": 1.0475, + "step": 1477 + }, + { + "epoch": 0.04529851661149933, + "grad_norm": 1.8676198029745812, + "learning_rate": 1.9987735155104005e-05, + "loss": 0.8047, + "step": 1478 + }, + { + "epoch": 0.04532916513424053, + "grad_norm": 1.9992210795154162, + "learning_rate": 1.998768595825425e-05, + "loss": 0.9243, + "step": 1479 + }, + { + "epoch": 0.04535981365698173, + "grad_norm": 1.87111898739041, + "learning_rate": 1.9987636662993264e-05, + "loss": 0.8733, + "step": 1480 + }, + { + "epoch": 0.04539046217972294, + "grad_norm": 2.010217886946911, + "learning_rate": 1.998758726932154e-05, + "loss": 0.8872, + "step": 1481 + }, + { + "epoch": 0.04542111070246414, + "grad_norm": 2.1592072587064894, + "learning_rate": 1.9987537777239566e-05, + "loss": 0.7885, + "step": 1482 + }, + { + "epoch": 0.04545175922520534, + "grad_norm": 1.7182583596445333, + "learning_rate": 1.998748818674783e-05, + "loss": 0.8152, + "step": 1483 + }, + { + "epoch": 0.04548240774794655, + "grad_norm": 1.0865685448214373, + "learning_rate": 1.998743849784681e-05, + "loss": 0.6674, + "step": 1484 + }, + { + "epoch": 0.04551305627068775, + "grad_norm": 2.054399238726033, + "learning_rate": 1.9987388710537008e-05, + "loss": 0.804, + "step": 1485 + }, + { + "epoch": 0.04554370479342896, + "grad_norm": 2.1315769535858466, + "learning_rate": 1.998733882481891e-05, + "loss": 0.8185, + "step": 1486 + }, + { + "epoch": 0.04557435331617016, + "grad_norm": 1.9489690076062045, + "learning_rate": 1.9987288840693005e-05, + "loss": 0.9966, + "step": 1487 + }, + { + "epoch": 0.04560500183891136, + "grad_norm": 1.771413425532532, + "learning_rate": 1.9987238758159785e-05, + "loss": 0.8563, + "step": 1488 + }, + { + "epoch": 0.04563565036165257, + "grad_norm": 1.7246616416179466, + "learning_rate": 1.998718857721975e-05, + "loss": 0.8619, + "step": 1489 + }, + { + "epoch": 0.045666298884393774, + "grad_norm": 1.992126413326241, + "learning_rate": 1.998713829787339e-05, + "loss": 0.8521, + "step": 1490 + }, + { + "epoch": 0.04569694740713497, + "grad_norm": 1.7792489913694138, + "learning_rate": 1.9987087920121203e-05, + "loss": 0.8071, + "step": 1491 + }, + { + "epoch": 0.04572759592987618, + "grad_norm": 1.942668455713046, + "learning_rate": 1.998703744396368e-05, + "loss": 0.8943, + "step": 1492 + }, + { + "epoch": 0.045758244452617385, + "grad_norm": 1.9861723433872873, + "learning_rate": 1.998698686940132e-05, + "loss": 0.9593, + "step": 1493 + }, + { + "epoch": 0.04578889297535859, + "grad_norm": 1.842803949221948, + "learning_rate": 1.9986936196434627e-05, + "loss": 0.9987, + "step": 1494 + }, + { + "epoch": 0.04581954149809979, + "grad_norm": 1.9395481651592197, + "learning_rate": 1.9986885425064097e-05, + "loss": 0.8796, + "step": 1495 + }, + { + "epoch": 0.045850190020840995, + "grad_norm": 2.0203242206336762, + "learning_rate": 1.998683455529023e-05, + "loss": 0.8124, + "step": 1496 + }, + { + "epoch": 0.0458808385435822, + "grad_norm": 1.8965503793066156, + "learning_rate": 1.998678358711352e-05, + "loss": 0.8326, + "step": 1497 + }, + { + "epoch": 0.045911487066323406, + "grad_norm": 2.0528899454079625, + "learning_rate": 1.9986732520534486e-05, + "loss": 0.8936, + "step": 1498 + }, + { + "epoch": 0.045942135589064605, + "grad_norm": 1.8319929328571702, + "learning_rate": 1.9986681355553617e-05, + "loss": 0.7518, + "step": 1499 + }, + { + "epoch": 0.04597278411180581, + "grad_norm": 1.1841947430809094, + "learning_rate": 1.998663009217142e-05, + "loss": 0.6447, + "step": 1500 + }, + { + "epoch": 0.046003432634547016, + "grad_norm": 2.199005701938297, + "learning_rate": 1.9986578730388402e-05, + "loss": 0.8941, + "step": 1501 + }, + { + "epoch": 0.04603408115728822, + "grad_norm": 1.9226371156825566, + "learning_rate": 1.998652727020507e-05, + "loss": 0.9334, + "step": 1502 + }, + { + "epoch": 0.04606472968002942, + "grad_norm": 0.7872558875887598, + "learning_rate": 1.9986475711621928e-05, + "loss": 0.653, + "step": 1503 + }, + { + "epoch": 0.046095378202770626, + "grad_norm": 2.24280033431283, + "learning_rate": 1.9986424054639484e-05, + "loss": 0.9591, + "step": 1504 + }, + { + "epoch": 0.04612602672551183, + "grad_norm": 0.8283083371469863, + "learning_rate": 1.9986372299258254e-05, + "loss": 0.6437, + "step": 1505 + }, + { + "epoch": 0.04615667524825304, + "grad_norm": 1.7746939638233272, + "learning_rate": 1.9986320445478737e-05, + "loss": 0.887, + "step": 1506 + }, + { + "epoch": 0.046187323770994236, + "grad_norm": 1.8451286654136048, + "learning_rate": 1.9986268493301453e-05, + "loss": 0.8192, + "step": 1507 + }, + { + "epoch": 0.04621797229373544, + "grad_norm": 1.8701511195566272, + "learning_rate": 1.998621644272691e-05, + "loss": 0.8536, + "step": 1508 + }, + { + "epoch": 0.04624862081647665, + "grad_norm": 2.074086030323145, + "learning_rate": 1.998616429375562e-05, + "loss": 1.0767, + "step": 1509 + }, + { + "epoch": 0.04627926933921785, + "grad_norm": 0.8516942413666989, + "learning_rate": 1.99861120463881e-05, + "loss": 0.6809, + "step": 1510 + }, + { + "epoch": 0.04630991786195905, + "grad_norm": 1.8800037385453279, + "learning_rate": 1.998605970062486e-05, + "loss": 0.8287, + "step": 1511 + }, + { + "epoch": 0.04634056638470026, + "grad_norm": 1.8177228656702304, + "learning_rate": 1.9986007256466422e-05, + "loss": 0.8041, + "step": 1512 + }, + { + "epoch": 0.04637121490744146, + "grad_norm": 2.0370046247087084, + "learning_rate": 1.99859547139133e-05, + "loss": 0.9541, + "step": 1513 + }, + { + "epoch": 0.04640186343018266, + "grad_norm": 1.9219793515723251, + "learning_rate": 1.9985902072966007e-05, + "loss": 0.8291, + "step": 1514 + }, + { + "epoch": 0.04643251195292387, + "grad_norm": 0.8617696660861449, + "learning_rate": 1.9985849333625067e-05, + "loss": 0.6585, + "step": 1515 + }, + { + "epoch": 0.04646316047566507, + "grad_norm": 1.9649778140256544, + "learning_rate": 1.9985796495891e-05, + "loss": 0.87, + "step": 1516 + }, + { + "epoch": 0.04649380899840628, + "grad_norm": 1.8456667726228222, + "learning_rate": 1.9985743559764327e-05, + "loss": 0.8044, + "step": 1517 + }, + { + "epoch": 0.04652445752114748, + "grad_norm": 1.9880774743262433, + "learning_rate": 1.9985690525245564e-05, + "loss": 0.8191, + "step": 1518 + }, + { + "epoch": 0.04655510604388868, + "grad_norm": 0.8033183378676644, + "learning_rate": 1.998563739233524e-05, + "loss": 0.6662, + "step": 1519 + }, + { + "epoch": 0.04658575456662989, + "grad_norm": 2.5770601008734073, + "learning_rate": 1.9985584161033876e-05, + "loss": 0.9283, + "step": 1520 + }, + { + "epoch": 0.046616403089371095, + "grad_norm": 0.7957726911043185, + "learning_rate": 1.9985530831341996e-05, + "loss": 0.6672, + "step": 1521 + }, + { + "epoch": 0.04664705161211229, + "grad_norm": 1.8287439467119708, + "learning_rate": 1.9985477403260122e-05, + "loss": 0.8178, + "step": 1522 + }, + { + "epoch": 0.0466777001348535, + "grad_norm": 0.7529569556443899, + "learning_rate": 1.9985423876788787e-05, + "loss": 0.6365, + "step": 1523 + }, + { + "epoch": 0.046708348657594705, + "grad_norm": 1.9600363619503616, + "learning_rate": 1.9985370251928518e-05, + "loss": 0.88, + "step": 1524 + }, + { + "epoch": 0.04673899718033591, + "grad_norm": 2.0847836465232295, + "learning_rate": 1.9985316528679836e-05, + "loss": 0.9439, + "step": 1525 + }, + { + "epoch": 0.04676964570307711, + "grad_norm": 2.131803780699138, + "learning_rate": 1.998526270704328e-05, + "loss": 0.9344, + "step": 1526 + }, + { + "epoch": 0.046800294225818315, + "grad_norm": 0.8418333310086065, + "learning_rate": 1.9985208787019374e-05, + "loss": 0.631, + "step": 1527 + }, + { + "epoch": 0.04683094274855952, + "grad_norm": 0.7874506523926668, + "learning_rate": 1.998515476860865e-05, + "loss": 0.6352, + "step": 1528 + }, + { + "epoch": 0.046861591271300726, + "grad_norm": 1.932352026616718, + "learning_rate": 1.9985100651811642e-05, + "loss": 0.7848, + "step": 1529 + }, + { + "epoch": 0.046892239794041925, + "grad_norm": 1.7992622597607963, + "learning_rate": 1.9985046436628884e-05, + "loss": 0.8422, + "step": 1530 + }, + { + "epoch": 0.04692288831678313, + "grad_norm": 0.8428834148814575, + "learning_rate": 1.9984992123060908e-05, + "loss": 0.6899, + "step": 1531 + }, + { + "epoch": 0.046953536839524336, + "grad_norm": 1.9860481348006234, + "learning_rate": 1.998493771110825e-05, + "loss": 0.9487, + "step": 1532 + }, + { + "epoch": 0.04698418536226554, + "grad_norm": 1.767334135880165, + "learning_rate": 1.9984883200771443e-05, + "loss": 0.9086, + "step": 1533 + }, + { + "epoch": 0.04701483388500674, + "grad_norm": 0.7958720413069237, + "learning_rate": 1.9984828592051028e-05, + "loss": 0.6596, + "step": 1534 + }, + { + "epoch": 0.047045482407747946, + "grad_norm": 2.009999220249621, + "learning_rate": 1.9984773884947546e-05, + "loss": 0.8536, + "step": 1535 + }, + { + "epoch": 0.04707613093048915, + "grad_norm": 1.7538674995993064, + "learning_rate": 1.9984719079461527e-05, + "loss": 0.9235, + "step": 1536 + }, + { + "epoch": 0.04710677945323036, + "grad_norm": 1.7841193430764202, + "learning_rate": 1.998466417559352e-05, + "loss": 0.8398, + "step": 1537 + }, + { + "epoch": 0.047137427975971556, + "grad_norm": 0.8559008545568837, + "learning_rate": 1.998460917334406e-05, + "loss": 0.6575, + "step": 1538 + }, + { + "epoch": 0.04716807649871276, + "grad_norm": 1.9968169038058146, + "learning_rate": 1.998455407271369e-05, + "loss": 0.9179, + "step": 1539 + }, + { + "epoch": 0.04719872502145397, + "grad_norm": 1.7291774813142153, + "learning_rate": 1.998449887370296e-05, + "loss": 0.8433, + "step": 1540 + }, + { + "epoch": 0.04722937354419517, + "grad_norm": 1.972462056554641, + "learning_rate": 1.9984443576312404e-05, + "loss": 0.7733, + "step": 1541 + }, + { + "epoch": 0.04726002206693637, + "grad_norm": 1.950895675354389, + "learning_rate": 1.998438818054257e-05, + "loss": 0.821, + "step": 1542 + }, + { + "epoch": 0.04729067058967758, + "grad_norm": 1.9880223565930395, + "learning_rate": 1.9984332686394005e-05, + "loss": 0.8386, + "step": 1543 + }, + { + "epoch": 0.04732131911241878, + "grad_norm": 1.8510885390398635, + "learning_rate": 1.9984277093867258e-05, + "loss": 0.8159, + "step": 1544 + }, + { + "epoch": 0.04735196763515998, + "grad_norm": 1.8778239603729738, + "learning_rate": 1.9984221402962872e-05, + "loss": 0.8581, + "step": 1545 + }, + { + "epoch": 0.04738261615790119, + "grad_norm": 1.9190366846334403, + "learning_rate": 1.99841656136814e-05, + "loss": 0.919, + "step": 1546 + }, + { + "epoch": 0.04741326468064239, + "grad_norm": 2.226866166377881, + "learning_rate": 1.9984109726023386e-05, + "loss": 0.7683, + "step": 1547 + }, + { + "epoch": 0.0474439132033836, + "grad_norm": 1.7759585584307964, + "learning_rate": 1.9984053739989388e-05, + "loss": 0.8269, + "step": 1548 + }, + { + "epoch": 0.0474745617261248, + "grad_norm": 1.960455702040046, + "learning_rate": 1.998399765557995e-05, + "loss": 0.8526, + "step": 1549 + }, + { + "epoch": 0.047505210248866, + "grad_norm": 1.6998311171190832, + "learning_rate": 1.9983941472795633e-05, + "loss": 0.765, + "step": 1550 + }, + { + "epoch": 0.04753585877160721, + "grad_norm": 1.8310873990385552, + "learning_rate": 1.9983885191636982e-05, + "loss": 0.771, + "step": 1551 + }, + { + "epoch": 0.047566507294348415, + "grad_norm": 1.8330881263834868, + "learning_rate": 1.9983828812104558e-05, + "loss": 0.9268, + "step": 1552 + }, + { + "epoch": 0.04759715581708961, + "grad_norm": 0.8326490951491142, + "learning_rate": 1.9983772334198913e-05, + "loss": 0.6502, + "step": 1553 + }, + { + "epoch": 0.04762780433983082, + "grad_norm": 1.9095917304679924, + "learning_rate": 1.9983715757920606e-05, + "loss": 0.7994, + "step": 1554 + }, + { + "epoch": 0.047658452862572025, + "grad_norm": 0.8258749806290246, + "learning_rate": 1.9983659083270194e-05, + "loss": 0.6847, + "step": 1555 + }, + { + "epoch": 0.04768910138531323, + "grad_norm": 0.783250640132174, + "learning_rate": 1.998360231024823e-05, + "loss": 0.6417, + "step": 1556 + }, + { + "epoch": 0.04771974990805443, + "grad_norm": 0.8224911042480978, + "learning_rate": 1.9983545438855284e-05, + "loss": 0.6681, + "step": 1557 + }, + { + "epoch": 0.047750398430795635, + "grad_norm": 2.0172302794218804, + "learning_rate": 1.9983488469091905e-05, + "loss": 0.9221, + "step": 1558 + }, + { + "epoch": 0.04778104695353684, + "grad_norm": 2.0055578762233752, + "learning_rate": 1.9983431400958665e-05, + "loss": 0.9255, + "step": 1559 + }, + { + "epoch": 0.047811695476278046, + "grad_norm": 1.8314950064131892, + "learning_rate": 1.998337423445612e-05, + "loss": 0.9326, + "step": 1560 + }, + { + "epoch": 0.047842343999019245, + "grad_norm": 1.9756573469790195, + "learning_rate": 1.998331696958483e-05, + "loss": 0.8553, + "step": 1561 + }, + { + "epoch": 0.04787299252176045, + "grad_norm": 1.9484002218259695, + "learning_rate": 1.9983259606345367e-05, + "loss": 0.8761, + "step": 1562 + }, + { + "epoch": 0.047903641044501656, + "grad_norm": 1.8100694641416477, + "learning_rate": 1.998320214473829e-05, + "loss": 0.8064, + "step": 1563 + }, + { + "epoch": 0.04793428956724286, + "grad_norm": 0.9609567603213119, + "learning_rate": 1.9983144584764173e-05, + "loss": 0.644, + "step": 1564 + }, + { + "epoch": 0.04796493808998406, + "grad_norm": 1.9265014795167024, + "learning_rate": 1.9983086926423577e-05, + "loss": 0.8524, + "step": 1565 + }, + { + "epoch": 0.047995586612725266, + "grad_norm": 1.6341824712923085, + "learning_rate": 1.998302916971707e-05, + "loss": 0.8123, + "step": 1566 + }, + { + "epoch": 0.04802623513546647, + "grad_norm": 1.867816255177254, + "learning_rate": 1.9982971314645217e-05, + "loss": 0.8263, + "step": 1567 + }, + { + "epoch": 0.04805688365820768, + "grad_norm": 0.845814654930799, + "learning_rate": 1.99829133612086e-05, + "loss": 0.6637, + "step": 1568 + }, + { + "epoch": 0.048087532180948876, + "grad_norm": 2.0011197635370572, + "learning_rate": 1.998285530940778e-05, + "loss": 0.8725, + "step": 1569 + }, + { + "epoch": 0.04811818070369008, + "grad_norm": 1.7443312974244818, + "learning_rate": 1.9982797159243336e-05, + "loss": 0.7632, + "step": 1570 + }, + { + "epoch": 0.04814882922643129, + "grad_norm": 0.8760861801826424, + "learning_rate": 1.9982738910715837e-05, + "loss": 0.6326, + "step": 1571 + }, + { + "epoch": 0.04817947774917249, + "grad_norm": 0.8892007220368546, + "learning_rate": 1.9982680563825855e-05, + "loss": 0.6591, + "step": 1572 + }, + { + "epoch": 0.04821012627191369, + "grad_norm": 2.1170591907159286, + "learning_rate": 1.9982622118573968e-05, + "loss": 0.8227, + "step": 1573 + }, + { + "epoch": 0.0482407747946549, + "grad_norm": 1.657757199943213, + "learning_rate": 1.9982563574960753e-05, + "loss": 0.8764, + "step": 1574 + }, + { + "epoch": 0.0482714233173961, + "grad_norm": 0.7782037737569107, + "learning_rate": 1.9982504932986783e-05, + "loss": 0.6413, + "step": 1575 + }, + { + "epoch": 0.0483020718401373, + "grad_norm": 1.946260054820625, + "learning_rate": 1.9982446192652632e-05, + "loss": 0.923, + "step": 1576 + }, + { + "epoch": 0.04833272036287851, + "grad_norm": 1.8406367969507482, + "learning_rate": 1.9982387353958895e-05, + "loss": 0.7852, + "step": 1577 + }, + { + "epoch": 0.04836336888561971, + "grad_norm": 2.001492348434039, + "learning_rate": 1.9982328416906137e-05, + "loss": 0.9139, + "step": 1578 + }, + { + "epoch": 0.04839401740836092, + "grad_norm": 1.9651686853073551, + "learning_rate": 1.998226938149494e-05, + "loss": 0.9299, + "step": 1579 + }, + { + "epoch": 0.04842466593110212, + "grad_norm": 1.8870261187224588, + "learning_rate": 1.998221024772589e-05, + "loss": 0.7936, + "step": 1580 + }, + { + "epoch": 0.04845531445384332, + "grad_norm": 1.7853237391586232, + "learning_rate": 1.998215101559957e-05, + "loss": 0.7984, + "step": 1581 + }, + { + "epoch": 0.04848596297658453, + "grad_norm": 2.038338395370242, + "learning_rate": 1.9982091685116563e-05, + "loss": 0.9885, + "step": 1582 + }, + { + "epoch": 0.048516611499325735, + "grad_norm": 2.2323117718299064, + "learning_rate": 1.9982032256277452e-05, + "loss": 0.9487, + "step": 1583 + }, + { + "epoch": 0.04854726002206693, + "grad_norm": 2.0264751501602674, + "learning_rate": 1.9981972729082823e-05, + "loss": 0.9503, + "step": 1584 + }, + { + "epoch": 0.04857790854480814, + "grad_norm": 1.778531311712035, + "learning_rate": 1.9981913103533262e-05, + "loss": 0.8447, + "step": 1585 + }, + { + "epoch": 0.048608557067549345, + "grad_norm": 2.0409173271864542, + "learning_rate": 1.9981853379629356e-05, + "loss": 0.8389, + "step": 1586 + }, + { + "epoch": 0.04863920559029055, + "grad_norm": 1.8847228421514697, + "learning_rate": 1.9981793557371694e-05, + "loss": 0.8849, + "step": 1587 + }, + { + "epoch": 0.04866985411303175, + "grad_norm": 2.1691322475902663, + "learning_rate": 1.9981733636760873e-05, + "loss": 0.8663, + "step": 1588 + }, + { + "epoch": 0.048700502635772955, + "grad_norm": 2.1702755329930836, + "learning_rate": 1.998167361779747e-05, + "loss": 0.9426, + "step": 1589 + }, + { + "epoch": 0.04873115115851416, + "grad_norm": 1.6852360969114932, + "learning_rate": 1.9981613500482086e-05, + "loss": 0.7609, + "step": 1590 + }, + { + "epoch": 0.048761799681255366, + "grad_norm": 1.888792626555846, + "learning_rate": 1.9981553284815306e-05, + "loss": 0.8219, + "step": 1591 + }, + { + "epoch": 0.048792448203996565, + "grad_norm": 2.0063512411261186, + "learning_rate": 1.9981492970797732e-05, + "loss": 0.8435, + "step": 1592 + }, + { + "epoch": 0.04882309672673777, + "grad_norm": 2.0210237777651137, + "learning_rate": 1.9981432558429953e-05, + "loss": 0.8944, + "step": 1593 + }, + { + "epoch": 0.048853745249478976, + "grad_norm": 1.9516091430433988, + "learning_rate": 1.9981372047712565e-05, + "loss": 0.9532, + "step": 1594 + }, + { + "epoch": 0.04888439377222018, + "grad_norm": 2.224163219381348, + "learning_rate": 1.9981311438646164e-05, + "loss": 0.9613, + "step": 1595 + }, + { + "epoch": 0.04891504229496138, + "grad_norm": 1.6888984007479588, + "learning_rate": 1.9981250731231347e-05, + "loss": 0.8102, + "step": 1596 + }, + { + "epoch": 0.048945690817702586, + "grad_norm": 1.8729323352834726, + "learning_rate": 1.9981189925468714e-05, + "loss": 0.7128, + "step": 1597 + }, + { + "epoch": 0.04897633934044379, + "grad_norm": 1.8288228133333826, + "learning_rate": 1.998112902135886e-05, + "loss": 0.9573, + "step": 1598 + }, + { + "epoch": 0.049006987863185, + "grad_norm": 2.3740255628100733, + "learning_rate": 1.998106801890239e-05, + "loss": 0.9812, + "step": 1599 + }, + { + "epoch": 0.049037636385926196, + "grad_norm": 1.8101788625816893, + "learning_rate": 1.9981006918099903e-05, + "loss": 0.8702, + "step": 1600 + }, + { + "epoch": 0.0490682849086674, + "grad_norm": 2.01514275358991, + "learning_rate": 1.9980945718952004e-05, + "loss": 0.7872, + "step": 1601 + }, + { + "epoch": 0.04909893343140861, + "grad_norm": 1.9586458603865304, + "learning_rate": 1.998088442145929e-05, + "loss": 0.8006, + "step": 1602 + }, + { + "epoch": 0.04912958195414981, + "grad_norm": 1.8339656769244368, + "learning_rate": 1.998082302562237e-05, + "loss": 0.9469, + "step": 1603 + }, + { + "epoch": 0.04916023047689101, + "grad_norm": 1.7249511787908176, + "learning_rate": 1.9980761531441844e-05, + "loss": 0.8094, + "step": 1604 + }, + { + "epoch": 0.04919087899963222, + "grad_norm": 1.9724327927993106, + "learning_rate": 1.9980699938918323e-05, + "loss": 0.917, + "step": 1605 + }, + { + "epoch": 0.04922152752237342, + "grad_norm": 1.0651503467176808, + "learning_rate": 1.998063824805241e-05, + "loss": 0.6664, + "step": 1606 + }, + { + "epoch": 0.04925217604511462, + "grad_norm": 1.979926995020696, + "learning_rate": 1.9980576458844714e-05, + "loss": 0.8883, + "step": 1607 + }, + { + "epoch": 0.04928282456785583, + "grad_norm": 2.027619231530878, + "learning_rate": 1.9980514571295847e-05, + "loss": 0.9263, + "step": 1608 + }, + { + "epoch": 0.04931347309059703, + "grad_norm": 2.0394263378511903, + "learning_rate": 1.9980452585406416e-05, + "loss": 0.8599, + "step": 1609 + }, + { + "epoch": 0.04934412161333824, + "grad_norm": 1.9221481932174092, + "learning_rate": 1.998039050117703e-05, + "loss": 0.9723, + "step": 1610 + }, + { + "epoch": 0.04937477013607944, + "grad_norm": 0.8167954531903108, + "learning_rate": 1.9980328318608305e-05, + "loss": 0.6636, + "step": 1611 + }, + { + "epoch": 0.04940541865882064, + "grad_norm": 1.8263225975757806, + "learning_rate": 1.9980266037700853e-05, + "loss": 0.8813, + "step": 1612 + }, + { + "epoch": 0.04943606718156185, + "grad_norm": 1.7977240210049248, + "learning_rate": 1.9980203658455285e-05, + "loss": 0.8874, + "step": 1613 + }, + { + "epoch": 0.049466715704303055, + "grad_norm": 2.0990883122498842, + "learning_rate": 1.9980141180872215e-05, + "loss": 0.984, + "step": 1614 + }, + { + "epoch": 0.049497364227044253, + "grad_norm": 2.040035324928879, + "learning_rate": 1.998007860495226e-05, + "loss": 0.7781, + "step": 1615 + }, + { + "epoch": 0.04952801274978546, + "grad_norm": 1.9264832762995463, + "learning_rate": 1.998001593069604e-05, + "loss": 0.797, + "step": 1616 + }, + { + "epoch": 0.049558661272526665, + "grad_norm": 1.9846660285337727, + "learning_rate": 1.9979953158104165e-05, + "loss": 0.9138, + "step": 1617 + }, + { + "epoch": 0.04958930979526787, + "grad_norm": 1.7336178323154545, + "learning_rate": 1.9979890287177265e-05, + "loss": 0.8367, + "step": 1618 + }, + { + "epoch": 0.04961995831800907, + "grad_norm": 2.059118854816179, + "learning_rate": 1.9979827317915946e-05, + "loss": 0.8325, + "step": 1619 + }, + { + "epoch": 0.049650606840750275, + "grad_norm": 1.691612241996755, + "learning_rate": 1.9979764250320838e-05, + "loss": 0.8517, + "step": 1620 + }, + { + "epoch": 0.04968125536349148, + "grad_norm": 1.9013243085457419, + "learning_rate": 1.997970108439256e-05, + "loss": 0.8675, + "step": 1621 + }, + { + "epoch": 0.049711903886232686, + "grad_norm": 1.8860425702523949, + "learning_rate": 1.9979637820131735e-05, + "loss": 0.9811, + "step": 1622 + }, + { + "epoch": 0.049742552408973885, + "grad_norm": 1.9876165146046751, + "learning_rate": 1.9979574457538978e-05, + "loss": 0.7829, + "step": 1623 + }, + { + "epoch": 0.04977320093171509, + "grad_norm": 2.133465158390725, + "learning_rate": 1.997951099661493e-05, + "loss": 0.9652, + "step": 1624 + }, + { + "epoch": 0.049803849454456296, + "grad_norm": 1.980542289684164, + "learning_rate": 1.99794474373602e-05, + "loss": 0.8626, + "step": 1625 + }, + { + "epoch": 0.0498344979771975, + "grad_norm": 2.060393998744041, + "learning_rate": 1.997938377977542e-05, + "loss": 0.8685, + "step": 1626 + }, + { + "epoch": 0.0498651464999387, + "grad_norm": 2.528801421015868, + "learning_rate": 1.9979320023861225e-05, + "loss": 0.8976, + "step": 1627 + }, + { + "epoch": 0.049895795022679906, + "grad_norm": 1.7146199600697085, + "learning_rate": 1.9979256169618232e-05, + "loss": 0.8943, + "step": 1628 + }, + { + "epoch": 0.04992644354542111, + "grad_norm": 2.072571971961808, + "learning_rate": 1.9979192217047075e-05, + "loss": 0.8869, + "step": 1629 + }, + { + "epoch": 0.04995709206816232, + "grad_norm": 1.943193912971721, + "learning_rate": 1.9979128166148386e-05, + "loss": 0.9203, + "step": 1630 + }, + { + "epoch": 0.049987740590903516, + "grad_norm": 1.810915462947598, + "learning_rate": 1.997906401692279e-05, + "loss": 0.901, + "step": 1631 + }, + { + "epoch": 0.05001838911364472, + "grad_norm": 0.9138780633294042, + "learning_rate": 1.997899976937093e-05, + "loss": 0.6572, + "step": 1632 + }, + { + "epoch": 0.05004903763638593, + "grad_norm": 1.7181491816137673, + "learning_rate": 1.9978935423493423e-05, + "loss": 0.8869, + "step": 1633 + }, + { + "epoch": 0.05007968615912713, + "grad_norm": 2.06292127145423, + "learning_rate": 1.997887097929092e-05, + "loss": 0.9033, + "step": 1634 + }, + { + "epoch": 0.05011033468186833, + "grad_norm": 0.799320222664414, + "learning_rate": 1.997880643676404e-05, + "loss": 0.6573, + "step": 1635 + }, + { + "epoch": 0.05014098320460954, + "grad_norm": 2.0810284561958112, + "learning_rate": 1.9978741795913436e-05, + "loss": 0.8846, + "step": 1636 + }, + { + "epoch": 0.05017163172735074, + "grad_norm": 1.829824567592679, + "learning_rate": 1.997867705673973e-05, + "loss": 0.8997, + "step": 1637 + }, + { + "epoch": 0.05020228025009194, + "grad_norm": 2.054947375604793, + "learning_rate": 1.9978612219243567e-05, + "loss": 0.8626, + "step": 1638 + }, + { + "epoch": 0.05023292877283315, + "grad_norm": 1.8631462711595992, + "learning_rate": 1.9978547283425583e-05, + "loss": 0.8393, + "step": 1639 + }, + { + "epoch": 0.05026357729557435, + "grad_norm": 0.9431711330186716, + "learning_rate": 1.9978482249286424e-05, + "loss": 0.6626, + "step": 1640 + }, + { + "epoch": 0.05029422581831556, + "grad_norm": 0.8247257856487304, + "learning_rate": 1.9978417116826723e-05, + "loss": 0.662, + "step": 1641 + }, + { + "epoch": 0.05032487434105676, + "grad_norm": 2.1574331771734183, + "learning_rate": 1.9978351886047127e-05, + "loss": 0.9008, + "step": 1642 + }, + { + "epoch": 0.050355522863797963, + "grad_norm": 2.148238096516187, + "learning_rate": 1.9978286556948273e-05, + "loss": 0.8466, + "step": 1643 + }, + { + "epoch": 0.05038617138653917, + "grad_norm": 1.9989930017882378, + "learning_rate": 1.997822112953081e-05, + "loss": 0.9163, + "step": 1644 + }, + { + "epoch": 0.050416819909280375, + "grad_norm": 1.7040435371593396, + "learning_rate": 1.9978155603795383e-05, + "loss": 0.8701, + "step": 1645 + }, + { + "epoch": 0.050447468432021574, + "grad_norm": 1.9305129475029004, + "learning_rate": 1.9978089979742635e-05, + "loss": 0.8724, + "step": 1646 + }, + { + "epoch": 0.05047811695476278, + "grad_norm": 2.021038008282111, + "learning_rate": 1.9978024257373217e-05, + "loss": 1.0223, + "step": 1647 + }, + { + "epoch": 0.050508765477503985, + "grad_norm": 1.7953413807553271, + "learning_rate": 1.9977958436687767e-05, + "loss": 0.827, + "step": 1648 + }, + { + "epoch": 0.05053941400024519, + "grad_norm": 2.1987827346171662, + "learning_rate": 1.9977892517686942e-05, + "loss": 0.8984, + "step": 1649 + }, + { + "epoch": 0.05057006252298639, + "grad_norm": 1.9093544985168112, + "learning_rate": 1.997782650037139e-05, + "loss": 0.927, + "step": 1650 + }, + { + "epoch": 0.050600711045727595, + "grad_norm": 1.744416317264958, + "learning_rate": 1.997776038474176e-05, + "loss": 0.7852, + "step": 1651 + }, + { + "epoch": 0.0506313595684688, + "grad_norm": 1.8606368403376101, + "learning_rate": 1.9977694170798702e-05, + "loss": 0.9137, + "step": 1652 + }, + { + "epoch": 0.050662008091210006, + "grad_norm": 1.9008536909919085, + "learning_rate": 1.9977627858542875e-05, + "loss": 0.8916, + "step": 1653 + }, + { + "epoch": 0.050692656613951205, + "grad_norm": 1.994780784625029, + "learning_rate": 1.9977561447974923e-05, + "loss": 0.8451, + "step": 1654 + }, + { + "epoch": 0.05072330513669241, + "grad_norm": 1.066391152899057, + "learning_rate": 1.9977494939095505e-05, + "loss": 0.6382, + "step": 1655 + }, + { + "epoch": 0.050753953659433616, + "grad_norm": 2.1612122524507535, + "learning_rate": 1.997742833190528e-05, + "loss": 0.8531, + "step": 1656 + }, + { + "epoch": 0.05078460218217482, + "grad_norm": 1.979531243552526, + "learning_rate": 1.99773616264049e-05, + "loss": 0.8299, + "step": 1657 + }, + { + "epoch": 0.05081525070491602, + "grad_norm": 2.0207106180896406, + "learning_rate": 1.9977294822595023e-05, + "loss": 0.9592, + "step": 1658 + }, + { + "epoch": 0.050845899227657226, + "grad_norm": 2.1558704859712856, + "learning_rate": 1.9977227920476304e-05, + "loss": 0.8597, + "step": 1659 + }, + { + "epoch": 0.05087654775039843, + "grad_norm": 1.7792872197770144, + "learning_rate": 1.997716092004941e-05, + "loss": 0.8241, + "step": 1660 + }, + { + "epoch": 0.05090719627313964, + "grad_norm": 2.140566978757261, + "learning_rate": 1.9977093821314994e-05, + "loss": 0.8958, + "step": 1661 + }, + { + "epoch": 0.050937844795880836, + "grad_norm": 1.9655029876776526, + "learning_rate": 1.997702662427372e-05, + "loss": 0.842, + "step": 1662 + }, + { + "epoch": 0.05096849331862204, + "grad_norm": 1.7226461427399726, + "learning_rate": 1.9976959328926254e-05, + "loss": 0.7894, + "step": 1663 + }, + { + "epoch": 0.05099914184136325, + "grad_norm": 1.9130164333349327, + "learning_rate": 1.997689193527325e-05, + "loss": 0.7563, + "step": 1664 + }, + { + "epoch": 0.05102979036410445, + "grad_norm": 0.9613804527692623, + "learning_rate": 1.9976824443315378e-05, + "loss": 0.6683, + "step": 1665 + }, + { + "epoch": 0.05106043888684565, + "grad_norm": 2.147090343364041, + "learning_rate": 1.9976756853053306e-05, + "loss": 0.8984, + "step": 1666 + }, + { + "epoch": 0.05109108740958686, + "grad_norm": 2.0941528406651027, + "learning_rate": 1.997668916448769e-05, + "loss": 0.758, + "step": 1667 + }, + { + "epoch": 0.05112173593232806, + "grad_norm": 1.6887269435540258, + "learning_rate": 1.9976621377619206e-05, + "loss": 0.825, + "step": 1668 + }, + { + "epoch": 0.05115238445506927, + "grad_norm": 2.0941338166701318, + "learning_rate": 1.997655349244852e-05, + "loss": 0.8118, + "step": 1669 + }, + { + "epoch": 0.05118303297781047, + "grad_norm": 2.003312811405014, + "learning_rate": 1.9976485508976297e-05, + "loss": 0.8511, + "step": 1670 + }, + { + "epoch": 0.051213681500551674, + "grad_norm": 1.9065464932040317, + "learning_rate": 1.9976417427203212e-05, + "loss": 0.7628, + "step": 1671 + }, + { + "epoch": 0.05124433002329288, + "grad_norm": 0.927609447615263, + "learning_rate": 1.9976349247129934e-05, + "loss": 0.6644, + "step": 1672 + }, + { + "epoch": 0.05127497854603408, + "grad_norm": 1.9781319181761208, + "learning_rate": 1.9976280968757134e-05, + "loss": 0.7923, + "step": 1673 + }, + { + "epoch": 0.051305627068775284, + "grad_norm": 1.8620829658553446, + "learning_rate": 1.9976212592085483e-05, + "loss": 0.7771, + "step": 1674 + }, + { + "epoch": 0.05133627559151649, + "grad_norm": 1.8332674300935439, + "learning_rate": 1.9976144117115658e-05, + "loss": 0.8365, + "step": 1675 + }, + { + "epoch": 0.051366924114257695, + "grad_norm": 2.008431245272282, + "learning_rate": 1.9976075543848334e-05, + "loss": 0.8568, + "step": 1676 + }, + { + "epoch": 0.051397572636998894, + "grad_norm": 2.038318800366828, + "learning_rate": 1.997600687228418e-05, + "loss": 0.9217, + "step": 1677 + }, + { + "epoch": 0.0514282211597401, + "grad_norm": 0.7802682490881006, + "learning_rate": 1.9975938102423885e-05, + "loss": 0.6468, + "step": 1678 + }, + { + "epoch": 0.051458869682481305, + "grad_norm": 2.1200404366858656, + "learning_rate": 1.997586923426812e-05, + "loss": 0.7854, + "step": 1679 + }, + { + "epoch": 0.05148951820522251, + "grad_norm": 2.207258313658508, + "learning_rate": 1.9975800267817553e-05, + "loss": 0.8903, + "step": 1680 + }, + { + "epoch": 0.05152016672796371, + "grad_norm": 2.0961074233804045, + "learning_rate": 1.997573120307288e-05, + "loss": 0.8535, + "step": 1681 + }, + { + "epoch": 0.051550815250704915, + "grad_norm": 1.8788782266602193, + "learning_rate": 1.9975662040034777e-05, + "loss": 0.8814, + "step": 1682 + }, + { + "epoch": 0.05158146377344612, + "grad_norm": 1.970556439189275, + "learning_rate": 1.997559277870392e-05, + "loss": 0.8782, + "step": 1683 + }, + { + "epoch": 0.051612112296187326, + "grad_norm": 1.8604370610036614, + "learning_rate": 1.9975523419080994e-05, + "loss": 0.9369, + "step": 1684 + }, + { + "epoch": 0.051642760818928525, + "grad_norm": 1.7179238664446626, + "learning_rate": 1.9975453961166687e-05, + "loss": 0.854, + "step": 1685 + }, + { + "epoch": 0.05167340934166973, + "grad_norm": 1.6700328327149685, + "learning_rate": 1.997538440496168e-05, + "loss": 0.8469, + "step": 1686 + }, + { + "epoch": 0.051704057864410936, + "grad_norm": 1.8591631012677583, + "learning_rate": 1.9975314750466658e-05, + "loss": 0.9048, + "step": 1687 + }, + { + "epoch": 0.05173470638715214, + "grad_norm": 2.3071393575643837, + "learning_rate": 1.9975244997682302e-05, + "loss": 0.8527, + "step": 1688 + }, + { + "epoch": 0.05176535490989334, + "grad_norm": 1.8126069239102622, + "learning_rate": 1.997517514660931e-05, + "loss": 0.8418, + "step": 1689 + }, + { + "epoch": 0.051796003432634546, + "grad_norm": 1.8683587570238687, + "learning_rate": 1.9975105197248364e-05, + "loss": 0.9353, + "step": 1690 + }, + { + "epoch": 0.05182665195537575, + "grad_norm": 1.7229564891796882, + "learning_rate": 1.9975035149600154e-05, + "loss": 0.8315, + "step": 1691 + }, + { + "epoch": 0.05185730047811696, + "grad_norm": 1.7562012333853336, + "learning_rate": 1.997496500366537e-05, + "loss": 0.8568, + "step": 1692 + }, + { + "epoch": 0.051887949000858156, + "grad_norm": 1.7686758406198908, + "learning_rate": 1.9974894759444707e-05, + "loss": 0.8502, + "step": 1693 + }, + { + "epoch": 0.05191859752359936, + "grad_norm": 1.839407432662043, + "learning_rate": 1.997482441693885e-05, + "loss": 0.8635, + "step": 1694 + }, + { + "epoch": 0.05194924604634057, + "grad_norm": 2.049254403167384, + "learning_rate": 1.9974753976148496e-05, + "loss": 0.8098, + "step": 1695 + }, + { + "epoch": 0.051979894569081773, + "grad_norm": 1.685500267571574, + "learning_rate": 1.9974683437074338e-05, + "loss": 0.7988, + "step": 1696 + }, + { + "epoch": 0.05201054309182297, + "grad_norm": 1.9438790561117336, + "learning_rate": 1.9974612799717073e-05, + "loss": 0.8379, + "step": 1697 + }, + { + "epoch": 0.05204119161456418, + "grad_norm": 1.9340618121821014, + "learning_rate": 1.9974542064077397e-05, + "loss": 0.9375, + "step": 1698 + }, + { + "epoch": 0.052071840137305384, + "grad_norm": 1.7606859367076706, + "learning_rate": 1.9974471230156006e-05, + "loss": 0.8468, + "step": 1699 + }, + { + "epoch": 0.05210248866004659, + "grad_norm": 2.1011974834108305, + "learning_rate": 1.9974400297953597e-05, + "loss": 0.8003, + "step": 1700 + }, + { + "epoch": 0.05213313718278779, + "grad_norm": 1.9360075308451405, + "learning_rate": 1.9974329267470872e-05, + "loss": 0.8268, + "step": 1701 + }, + { + "epoch": 0.052163785705528994, + "grad_norm": 1.8028748908276468, + "learning_rate": 1.9974258138708528e-05, + "loss": 0.7739, + "step": 1702 + }, + { + "epoch": 0.0521944342282702, + "grad_norm": 0.9237307824904681, + "learning_rate": 1.9974186911667264e-05, + "loss": 0.6528, + "step": 1703 + }, + { + "epoch": 0.0522250827510114, + "grad_norm": 0.8615871478063933, + "learning_rate": 1.9974115586347787e-05, + "loss": 0.655, + "step": 1704 + }, + { + "epoch": 0.052255731273752604, + "grad_norm": 2.2212875555040457, + "learning_rate": 1.9974044162750793e-05, + "loss": 0.94, + "step": 1705 + }, + { + "epoch": 0.05228637979649381, + "grad_norm": 2.024926695880339, + "learning_rate": 1.9973972640876992e-05, + "loss": 0.8562, + "step": 1706 + }, + { + "epoch": 0.052317028319235015, + "grad_norm": 1.899121652969162, + "learning_rate": 1.9973901020727087e-05, + "loss": 0.7918, + "step": 1707 + }, + { + "epoch": 0.052347676841976214, + "grad_norm": 1.9799049323353224, + "learning_rate": 1.9973829302301788e-05, + "loss": 0.9142, + "step": 1708 + }, + { + "epoch": 0.05237832536471742, + "grad_norm": 1.8612743449497382, + "learning_rate": 1.997375748560179e-05, + "loss": 0.7764, + "step": 1709 + }, + { + "epoch": 0.052408973887458625, + "grad_norm": 1.294564534236753, + "learning_rate": 1.997368557062781e-05, + "loss": 0.661, + "step": 1710 + }, + { + "epoch": 0.05243962241019983, + "grad_norm": 1.0321567932023716, + "learning_rate": 1.9973613557380555e-05, + "loss": 0.6412, + "step": 1711 + }, + { + "epoch": 0.05247027093294103, + "grad_norm": 2.229322858549619, + "learning_rate": 1.9973541445860735e-05, + "loss": 0.8419, + "step": 1712 + }, + { + "epoch": 0.052500919455682235, + "grad_norm": 2.0562699209400934, + "learning_rate": 1.9973469236069058e-05, + "loss": 0.8119, + "step": 1713 + }, + { + "epoch": 0.05253156797842344, + "grad_norm": 1.9293774952076985, + "learning_rate": 1.9973396928006234e-05, + "loss": 0.8457, + "step": 1714 + }, + { + "epoch": 0.052562216501164646, + "grad_norm": 1.6838014315498049, + "learning_rate": 1.9973324521672982e-05, + "loss": 0.8008, + "step": 1715 + }, + { + "epoch": 0.052592865023905845, + "grad_norm": 1.2757634875973514, + "learning_rate": 1.997325201707001e-05, + "loss": 0.6886, + "step": 1716 + }, + { + "epoch": 0.05262351354664705, + "grad_norm": 2.0458550816001444, + "learning_rate": 1.9973179414198033e-05, + "loss": 0.8724, + "step": 1717 + }, + { + "epoch": 0.052654162069388256, + "grad_norm": 2.105687316708107, + "learning_rate": 1.997310671305777e-05, + "loss": 0.8315, + "step": 1718 + }, + { + "epoch": 0.05268481059212946, + "grad_norm": 1.6948229539001252, + "learning_rate": 1.9973033913649934e-05, + "loss": 0.6715, + "step": 1719 + }, + { + "epoch": 0.05271545911487066, + "grad_norm": 0.8590206843415867, + "learning_rate": 1.997296101597524e-05, + "loss": 0.684, + "step": 1720 + }, + { + "epoch": 0.052746107637611866, + "grad_norm": 1.8995843061661275, + "learning_rate": 1.9972888020034413e-05, + "loss": 0.8954, + "step": 1721 + }, + { + "epoch": 0.05277675616035307, + "grad_norm": 0.8336597863945738, + "learning_rate": 1.997281492582817e-05, + "loss": 0.6264, + "step": 1722 + }, + { + "epoch": 0.05280740468309428, + "grad_norm": 0.8121173920081193, + "learning_rate": 1.9972741733357228e-05, + "loss": 0.646, + "step": 1723 + }, + { + "epoch": 0.05283805320583548, + "grad_norm": 2.104710963596649, + "learning_rate": 1.997266844262231e-05, + "loss": 0.9215, + "step": 1724 + }, + { + "epoch": 0.05286870172857668, + "grad_norm": 1.7246353980517681, + "learning_rate": 1.9972595053624137e-05, + "loss": 0.9407, + "step": 1725 + }, + { + "epoch": 0.05289935025131789, + "grad_norm": 2.0459561533015878, + "learning_rate": 1.9972521566363437e-05, + "loss": 0.9175, + "step": 1726 + }, + { + "epoch": 0.052929998774059094, + "grad_norm": 1.0007501559294847, + "learning_rate": 1.9972447980840925e-05, + "loss": 0.6485, + "step": 1727 + }, + { + "epoch": 0.05296064729680029, + "grad_norm": 0.8997908868510044, + "learning_rate": 1.9972374297057335e-05, + "loss": 0.5956, + "step": 1728 + }, + { + "epoch": 0.0529912958195415, + "grad_norm": 1.7483686476476292, + "learning_rate": 1.997230051501339e-05, + "loss": 0.7936, + "step": 1729 + }, + { + "epoch": 0.053021944342282704, + "grad_norm": 1.8955750696831433, + "learning_rate": 1.9972226634709813e-05, + "loss": 0.9088, + "step": 1730 + }, + { + "epoch": 0.05305259286502391, + "grad_norm": 0.8167715058425545, + "learning_rate": 1.9972152656147337e-05, + "loss": 0.6401, + "step": 1731 + }, + { + "epoch": 0.05308324138776511, + "grad_norm": 1.888868162039913, + "learning_rate": 1.997207857932669e-05, + "loss": 0.8982, + "step": 1732 + }, + { + "epoch": 0.053113889910506314, + "grad_norm": 0.8467591632194634, + "learning_rate": 1.9972004404248604e-05, + "loss": 0.6052, + "step": 1733 + }, + { + "epoch": 0.05314453843324752, + "grad_norm": 0.8607169266218069, + "learning_rate": 1.9971930130913804e-05, + "loss": 0.6394, + "step": 1734 + }, + { + "epoch": 0.05317518695598872, + "grad_norm": 2.009656150049846, + "learning_rate": 1.9971855759323026e-05, + "loss": 0.8691, + "step": 1735 + }, + { + "epoch": 0.053205835478729924, + "grad_norm": 2.0987867312293558, + "learning_rate": 1.9971781289477e-05, + "loss": 0.8781, + "step": 1736 + }, + { + "epoch": 0.05323648400147113, + "grad_norm": 0.7984264084561947, + "learning_rate": 1.9971706721376464e-05, + "loss": 0.6374, + "step": 1737 + }, + { + "epoch": 0.053267132524212335, + "grad_norm": 1.784586791365223, + "learning_rate": 1.997163205502215e-05, + "loss": 0.8763, + "step": 1738 + }, + { + "epoch": 0.053297781046953534, + "grad_norm": 1.7774063307861376, + "learning_rate": 1.9971557290414793e-05, + "loss": 0.8571, + "step": 1739 + }, + { + "epoch": 0.05332842956969474, + "grad_norm": 2.294201094501177, + "learning_rate": 1.997148242755513e-05, + "loss": 0.9632, + "step": 1740 + }, + { + "epoch": 0.053359078092435945, + "grad_norm": 1.7399076850607433, + "learning_rate": 1.9971407466443903e-05, + "loss": 0.7885, + "step": 1741 + }, + { + "epoch": 0.05338972661517715, + "grad_norm": 1.8062073249083919, + "learning_rate": 1.9971332407081846e-05, + "loss": 0.8482, + "step": 1742 + }, + { + "epoch": 0.05342037513791835, + "grad_norm": 2.0714052326764376, + "learning_rate": 1.9971257249469694e-05, + "loss": 0.8704, + "step": 1743 + }, + { + "epoch": 0.053451023660659555, + "grad_norm": 1.8446111553047153, + "learning_rate": 1.9971181993608198e-05, + "loss": 0.9038, + "step": 1744 + }, + { + "epoch": 0.05348167218340076, + "grad_norm": 0.9589885796156226, + "learning_rate": 1.9971106639498094e-05, + "loss": 0.64, + "step": 1745 + }, + { + "epoch": 0.053512320706141966, + "grad_norm": 0.8076736081933992, + "learning_rate": 1.9971031187140123e-05, + "loss": 0.655, + "step": 1746 + }, + { + "epoch": 0.053542969228883165, + "grad_norm": 2.4694510062721977, + "learning_rate": 1.9970955636535034e-05, + "loss": 0.8298, + "step": 1747 + }, + { + "epoch": 0.05357361775162437, + "grad_norm": 2.173104075547811, + "learning_rate": 1.9970879987683566e-05, + "loss": 0.9117, + "step": 1748 + }, + { + "epoch": 0.053604266274365577, + "grad_norm": 2.0196947019789038, + "learning_rate": 1.9970804240586464e-05, + "loss": 0.8462, + "step": 1749 + }, + { + "epoch": 0.05363491479710678, + "grad_norm": 2.1810203480807004, + "learning_rate": 1.997072839524448e-05, + "loss": 0.8972, + "step": 1750 + }, + { + "epoch": 0.05366556331984798, + "grad_norm": 0.8732671002198422, + "learning_rate": 1.9970652451658358e-05, + "loss": 0.6045, + "step": 1751 + }, + { + "epoch": 0.05369621184258919, + "grad_norm": 2.114255356378297, + "learning_rate": 1.9970576409828847e-05, + "loss": 0.8485, + "step": 1752 + }, + { + "epoch": 0.05372686036533039, + "grad_norm": 2.086961145234, + "learning_rate": 1.997050026975669e-05, + "loss": 0.9009, + "step": 1753 + }, + { + "epoch": 0.0537575088880716, + "grad_norm": 2.479687020490568, + "learning_rate": 1.997042403144265e-05, + "loss": 0.8568, + "step": 1754 + }, + { + "epoch": 0.0537881574108128, + "grad_norm": 2.0427909597570957, + "learning_rate": 1.9970347694887466e-05, + "loss": 0.9393, + "step": 1755 + }, + { + "epoch": 0.053818805933554, + "grad_norm": 1.6580574097784408, + "learning_rate": 1.9970271260091897e-05, + "loss": 0.8504, + "step": 1756 + }, + { + "epoch": 0.05384945445629521, + "grad_norm": 1.7884306240188774, + "learning_rate": 1.9970194727056694e-05, + "loss": 0.7582, + "step": 1757 + }, + { + "epoch": 0.053880102979036414, + "grad_norm": 1.730074693366334, + "learning_rate": 1.997011809578261e-05, + "loss": 0.6799, + "step": 1758 + }, + { + "epoch": 0.05391075150177761, + "grad_norm": 2.2270412770692336, + "learning_rate": 1.99700413662704e-05, + "loss": 0.8674, + "step": 1759 + }, + { + "epoch": 0.05394140002451882, + "grad_norm": 2.1781407240788764, + "learning_rate": 1.996996453852083e-05, + "loss": 0.8654, + "step": 1760 + }, + { + "epoch": 0.053972048547260024, + "grad_norm": 1.9915212493401953, + "learning_rate": 1.9969887612534638e-05, + "loss": 0.8631, + "step": 1761 + }, + { + "epoch": 0.05400269707000123, + "grad_norm": 1.8460493517335321, + "learning_rate": 1.99698105883126e-05, + "loss": 0.8774, + "step": 1762 + }, + { + "epoch": 0.05403334559274243, + "grad_norm": 1.7798141280848394, + "learning_rate": 1.9969733465855463e-05, + "loss": 0.8229, + "step": 1763 + }, + { + "epoch": 0.054063994115483634, + "grad_norm": 0.8273609128039442, + "learning_rate": 1.9969656245163996e-05, + "loss": 0.6456, + "step": 1764 + }, + { + "epoch": 0.05409464263822484, + "grad_norm": 2.2559734815473185, + "learning_rate": 1.996957892623895e-05, + "loss": 0.8702, + "step": 1765 + }, + { + "epoch": 0.05412529116096604, + "grad_norm": 2.039349831498708, + "learning_rate": 1.9969501509081094e-05, + "loss": 0.8894, + "step": 1766 + }, + { + "epoch": 0.054155939683707244, + "grad_norm": 2.0490638997368893, + "learning_rate": 1.996942399369119e-05, + "loss": 0.9655, + "step": 1767 + }, + { + "epoch": 0.05418658820644845, + "grad_norm": 0.8069640569380975, + "learning_rate": 1.9969346380069997e-05, + "loss": 0.6633, + "step": 1768 + }, + { + "epoch": 0.054217236729189655, + "grad_norm": 2.14868232339563, + "learning_rate": 1.9969268668218286e-05, + "loss": 0.7824, + "step": 1769 + }, + { + "epoch": 0.054247885251930854, + "grad_norm": 2.141232595906718, + "learning_rate": 1.9969190858136822e-05, + "loss": 0.7843, + "step": 1770 + }, + { + "epoch": 0.05427853377467206, + "grad_norm": 0.7885660736224838, + "learning_rate": 1.9969112949826366e-05, + "loss": 0.6183, + "step": 1771 + }, + { + "epoch": 0.054309182297413265, + "grad_norm": 1.9240956851851982, + "learning_rate": 1.9969034943287692e-05, + "loss": 0.7976, + "step": 1772 + }, + { + "epoch": 0.05433983082015447, + "grad_norm": 2.211101950831757, + "learning_rate": 1.9968956838521565e-05, + "loss": 0.823, + "step": 1773 + }, + { + "epoch": 0.05437047934289567, + "grad_norm": 2.130583860927606, + "learning_rate": 1.9968878635528757e-05, + "loss": 0.8684, + "step": 1774 + }, + { + "epoch": 0.054401127865636875, + "grad_norm": 0.8710305926861808, + "learning_rate": 1.9968800334310034e-05, + "loss": 0.6399, + "step": 1775 + }, + { + "epoch": 0.05443177638837808, + "grad_norm": 1.9178005006744079, + "learning_rate": 1.9968721934866173e-05, + "loss": 0.8233, + "step": 1776 + }, + { + "epoch": 0.054462424911119287, + "grad_norm": 1.9244541056683488, + "learning_rate": 1.9968643437197944e-05, + "loss": 0.8793, + "step": 1777 + }, + { + "epoch": 0.054493073433860485, + "grad_norm": 1.8837123740803094, + "learning_rate": 1.996856484130612e-05, + "loss": 0.8805, + "step": 1778 + }, + { + "epoch": 0.05452372195660169, + "grad_norm": 1.8886552596839397, + "learning_rate": 1.996848614719148e-05, + "loss": 0.834, + "step": 1779 + }, + { + "epoch": 0.0545543704793429, + "grad_norm": 1.749872966140432, + "learning_rate": 1.9968407354854786e-05, + "loss": 0.8122, + "step": 1780 + }, + { + "epoch": 0.0545850190020841, + "grad_norm": 1.756200711314567, + "learning_rate": 1.996832846429683e-05, + "loss": 0.7216, + "step": 1781 + }, + { + "epoch": 0.0546156675248253, + "grad_norm": 2.096914616492033, + "learning_rate": 1.9968249475518385e-05, + "loss": 0.7685, + "step": 1782 + }, + { + "epoch": 0.05464631604756651, + "grad_norm": 2.0063569964904677, + "learning_rate": 1.9968170388520224e-05, + "loss": 0.8401, + "step": 1783 + }, + { + "epoch": 0.05467696457030771, + "grad_norm": 1.751200025921344, + "learning_rate": 1.9968091203303132e-05, + "loss": 0.9045, + "step": 1784 + }, + { + "epoch": 0.05470761309304892, + "grad_norm": 1.904209865526494, + "learning_rate": 1.9968011919867883e-05, + "loss": 0.8578, + "step": 1785 + }, + { + "epoch": 0.05473826161579012, + "grad_norm": 2.037330590774876, + "learning_rate": 1.9967932538215268e-05, + "loss": 0.8631, + "step": 1786 + }, + { + "epoch": 0.05476891013853132, + "grad_norm": 0.8080454138297782, + "learning_rate": 1.996785305834606e-05, + "loss": 0.6518, + "step": 1787 + }, + { + "epoch": 0.05479955866127253, + "grad_norm": 1.7707398432926051, + "learning_rate": 1.9967773480261042e-05, + "loss": 0.8797, + "step": 1788 + }, + { + "epoch": 0.054830207184013734, + "grad_norm": 2.041874282479388, + "learning_rate": 1.996769380396101e-05, + "loss": 0.8167, + "step": 1789 + }, + { + "epoch": 0.05486085570675493, + "grad_norm": 1.7876092968308666, + "learning_rate": 1.9967614029446735e-05, + "loss": 0.8399, + "step": 1790 + }, + { + "epoch": 0.05489150422949614, + "grad_norm": 1.8724137714326226, + "learning_rate": 1.996753415671901e-05, + "loss": 0.9182, + "step": 1791 + }, + { + "epoch": 0.054922152752237344, + "grad_norm": 1.907445327655583, + "learning_rate": 1.9967454185778617e-05, + "loss": 0.837, + "step": 1792 + }, + { + "epoch": 0.05495280127497855, + "grad_norm": 2.0088082826931712, + "learning_rate": 1.9967374116626354e-05, + "loss": 0.7398, + "step": 1793 + }, + { + "epoch": 0.05498344979771975, + "grad_norm": 2.0195796643340365, + "learning_rate": 1.9967293949263e-05, + "loss": 0.9267, + "step": 1794 + }, + { + "epoch": 0.055014098320460954, + "grad_norm": 1.6672047006567343, + "learning_rate": 1.9967213683689345e-05, + "loss": 0.8336, + "step": 1795 + }, + { + "epoch": 0.05504474684320216, + "grad_norm": 1.810281070354763, + "learning_rate": 1.9967133319906188e-05, + "loss": 0.8131, + "step": 1796 + }, + { + "epoch": 0.05507539536594336, + "grad_norm": 0.8210563686013262, + "learning_rate": 1.9967052857914315e-05, + "loss": 0.6363, + "step": 1797 + }, + { + "epoch": 0.055106043888684564, + "grad_norm": 2.0501460517839374, + "learning_rate": 1.996697229771452e-05, + "loss": 0.8654, + "step": 1798 + }, + { + "epoch": 0.05513669241142577, + "grad_norm": 2.0547776823498087, + "learning_rate": 1.9966891639307596e-05, + "loss": 0.9295, + "step": 1799 + }, + { + "epoch": 0.055167340934166975, + "grad_norm": 2.1911243751484437, + "learning_rate": 1.9966810882694343e-05, + "loss": 0.783, + "step": 1800 + }, + { + "epoch": 0.055197989456908174, + "grad_norm": 1.8932440477235615, + "learning_rate": 1.9966730027875548e-05, + "loss": 0.8642, + "step": 1801 + }, + { + "epoch": 0.05522863797964938, + "grad_norm": 2.0357636762841205, + "learning_rate": 1.9966649074852014e-05, + "loss": 0.8813, + "step": 1802 + }, + { + "epoch": 0.055259286502390585, + "grad_norm": 1.7664738860746878, + "learning_rate": 1.9966568023624534e-05, + "loss": 0.7756, + "step": 1803 + }, + { + "epoch": 0.05528993502513179, + "grad_norm": 2.0986246465899794, + "learning_rate": 1.996648687419391e-05, + "loss": 0.8438, + "step": 1804 + }, + { + "epoch": 0.05532058354787299, + "grad_norm": 1.903262945000118, + "learning_rate": 1.9966405626560943e-05, + "loss": 0.8754, + "step": 1805 + }, + { + "epoch": 0.055351232070614195, + "grad_norm": 1.9398966536994395, + "learning_rate": 1.996632428072643e-05, + "loss": 0.6589, + "step": 1806 + }, + { + "epoch": 0.0553818805933554, + "grad_norm": 1.8178088404673771, + "learning_rate": 1.9966242836691173e-05, + "loss": 0.786, + "step": 1807 + }, + { + "epoch": 0.05541252911609661, + "grad_norm": 0.8420093550960261, + "learning_rate": 1.9966161294455973e-05, + "loss": 0.6289, + "step": 1808 + }, + { + "epoch": 0.055443177638837805, + "grad_norm": 1.936734797199582, + "learning_rate": 1.996607965402164e-05, + "loss": 0.9133, + "step": 1809 + }, + { + "epoch": 0.05547382616157901, + "grad_norm": 2.249048189173313, + "learning_rate": 1.9965997915388974e-05, + "loss": 0.8378, + "step": 1810 + }, + { + "epoch": 0.05550447468432022, + "grad_norm": 0.7478907709163908, + "learning_rate": 1.996591607855878e-05, + "loss": 0.6804, + "step": 1811 + }, + { + "epoch": 0.05553512320706142, + "grad_norm": 1.8855950982065524, + "learning_rate": 1.9965834143531865e-05, + "loss": 0.9727, + "step": 1812 + }, + { + "epoch": 0.05556577172980262, + "grad_norm": 1.7320339733601904, + "learning_rate": 1.9965752110309036e-05, + "loss": 0.9305, + "step": 1813 + }, + { + "epoch": 0.05559642025254383, + "grad_norm": 0.7587308082899376, + "learning_rate": 1.99656699788911e-05, + "loss": 0.6613, + "step": 1814 + }, + { + "epoch": 0.05562706877528503, + "grad_norm": 2.3286261182356194, + "learning_rate": 1.9965587749278872e-05, + "loss": 0.9017, + "step": 1815 + }, + { + "epoch": 0.05565771729802624, + "grad_norm": 2.091646103536228, + "learning_rate": 1.9965505421473153e-05, + "loss": 0.9877, + "step": 1816 + }, + { + "epoch": 0.05568836582076744, + "grad_norm": 1.7191269654155912, + "learning_rate": 1.9965422995474764e-05, + "loss": 0.8325, + "step": 1817 + }, + { + "epoch": 0.05571901434350864, + "grad_norm": 0.7635410299714326, + "learning_rate": 1.996534047128451e-05, + "loss": 0.5998, + "step": 1818 + }, + { + "epoch": 0.05574966286624985, + "grad_norm": 1.884260527150571, + "learning_rate": 1.9965257848903205e-05, + "loss": 0.821, + "step": 1819 + }, + { + "epoch": 0.055780311388991054, + "grad_norm": 1.8458809911900895, + "learning_rate": 1.996517512833167e-05, + "loss": 0.8631, + "step": 1820 + }, + { + "epoch": 0.05581095991173225, + "grad_norm": 0.7803763636075675, + "learning_rate": 1.996509230957071e-05, + "loss": 0.6614, + "step": 1821 + }, + { + "epoch": 0.05584160843447346, + "grad_norm": 2.092181571614468, + "learning_rate": 1.9965009392621148e-05, + "loss": 0.855, + "step": 1822 + }, + { + "epoch": 0.055872256957214664, + "grad_norm": 1.9372765645193717, + "learning_rate": 1.9964926377483794e-05, + "loss": 0.831, + "step": 1823 + }, + { + "epoch": 0.05590290547995587, + "grad_norm": 0.7961024775344425, + "learning_rate": 1.9964843264159476e-05, + "loss": 0.6542, + "step": 1824 + }, + { + "epoch": 0.05593355400269707, + "grad_norm": 1.9368158248512592, + "learning_rate": 1.996476005264901e-05, + "loss": 0.8565, + "step": 1825 + }, + { + "epoch": 0.055964202525438274, + "grad_norm": 0.7865293262070903, + "learning_rate": 1.9964676742953208e-05, + "loss": 0.676, + "step": 1826 + }, + { + "epoch": 0.05599485104817948, + "grad_norm": 1.6398942326768138, + "learning_rate": 1.9964593335072898e-05, + "loss": 0.8471, + "step": 1827 + }, + { + "epoch": 0.05602549957092068, + "grad_norm": 1.8203965044596582, + "learning_rate": 1.99645098290089e-05, + "loss": 0.8121, + "step": 1828 + }, + { + "epoch": 0.056056148093661884, + "grad_norm": 1.9789521101087832, + "learning_rate": 1.996442622476204e-05, + "loss": 0.9081, + "step": 1829 + }, + { + "epoch": 0.05608679661640309, + "grad_norm": 1.9456630087558329, + "learning_rate": 1.9964342522333136e-05, + "loss": 0.8003, + "step": 1830 + }, + { + "epoch": 0.056117445139144295, + "grad_norm": 1.8298303397293691, + "learning_rate": 1.9964258721723015e-05, + "loss": 0.9178, + "step": 1831 + }, + { + "epoch": 0.056148093661885494, + "grad_norm": 1.991035188494951, + "learning_rate": 1.9964174822932505e-05, + "loss": 0.8682, + "step": 1832 + }, + { + "epoch": 0.0561787421846267, + "grad_norm": 1.8221545275146322, + "learning_rate": 1.9964090825962434e-05, + "loss": 0.9616, + "step": 1833 + }, + { + "epoch": 0.056209390707367905, + "grad_norm": 1.8730751732427717, + "learning_rate": 1.9964006730813624e-05, + "loss": 0.7929, + "step": 1834 + }, + { + "epoch": 0.05624003923010911, + "grad_norm": 1.939806532501072, + "learning_rate": 1.9963922537486905e-05, + "loss": 0.7892, + "step": 1835 + }, + { + "epoch": 0.05627068775285031, + "grad_norm": 1.8353510359442773, + "learning_rate": 1.996383824598311e-05, + "loss": 0.7553, + "step": 1836 + }, + { + "epoch": 0.056301336275591515, + "grad_norm": 1.9302020758757386, + "learning_rate": 1.9963753856303064e-05, + "loss": 0.9891, + "step": 1837 + }, + { + "epoch": 0.05633198479833272, + "grad_norm": 1.9233481762532578, + "learning_rate": 1.99636693684476e-05, + "loss": 0.9474, + "step": 1838 + }, + { + "epoch": 0.05636263332107393, + "grad_norm": 1.9428020431710324, + "learning_rate": 1.996358478241756e-05, + "loss": 0.8664, + "step": 1839 + }, + { + "epoch": 0.056393281843815125, + "grad_norm": 1.7536282598483706, + "learning_rate": 1.9963500098213765e-05, + "loss": 0.7928, + "step": 1840 + }, + { + "epoch": 0.05642393036655633, + "grad_norm": 1.8912317693676666, + "learning_rate": 1.9963415315837058e-05, + "loss": 0.8416, + "step": 1841 + }, + { + "epoch": 0.05645457888929754, + "grad_norm": 1.9294814207179278, + "learning_rate": 1.9963330435288268e-05, + "loss": 0.8279, + "step": 1842 + }, + { + "epoch": 0.05648522741203874, + "grad_norm": 1.819454975861544, + "learning_rate": 1.9963245456568233e-05, + "loss": 0.8042, + "step": 1843 + }, + { + "epoch": 0.05651587593477994, + "grad_norm": 2.008586009964697, + "learning_rate": 1.996316037967779e-05, + "loss": 0.8763, + "step": 1844 + }, + { + "epoch": 0.05654652445752115, + "grad_norm": 1.9234521473197077, + "learning_rate": 1.9963075204617783e-05, + "loss": 1.0239, + "step": 1845 + }, + { + "epoch": 0.05657717298026235, + "grad_norm": 1.972725709552968, + "learning_rate": 1.9962989931389045e-05, + "loss": 0.8496, + "step": 1846 + }, + { + "epoch": 0.05660782150300356, + "grad_norm": 2.0854415639617425, + "learning_rate": 1.9962904559992417e-05, + "loss": 0.9339, + "step": 1847 + }, + { + "epoch": 0.05663847002574476, + "grad_norm": 1.7869125650471758, + "learning_rate": 1.9962819090428743e-05, + "loss": 0.8714, + "step": 1848 + }, + { + "epoch": 0.05666911854848596, + "grad_norm": 1.9682686984081923, + "learning_rate": 1.9962733522698863e-05, + "loss": 0.9492, + "step": 1849 + }, + { + "epoch": 0.05669976707122717, + "grad_norm": 1.8149971974402868, + "learning_rate": 1.9962647856803617e-05, + "loss": 0.7445, + "step": 1850 + }, + { + "epoch": 0.056730415593968374, + "grad_norm": 1.8844867010607298, + "learning_rate": 1.9962562092743857e-05, + "loss": 0.8845, + "step": 1851 + }, + { + "epoch": 0.05676106411670957, + "grad_norm": 1.8944454811314093, + "learning_rate": 1.9962476230520425e-05, + "loss": 0.8204, + "step": 1852 + }, + { + "epoch": 0.05679171263945078, + "grad_norm": 2.0819549265792077, + "learning_rate": 1.9962390270134162e-05, + "loss": 0.9619, + "step": 1853 + }, + { + "epoch": 0.056822361162191984, + "grad_norm": 1.9964361079498048, + "learning_rate": 1.9962304211585918e-05, + "loss": 0.7868, + "step": 1854 + }, + { + "epoch": 0.05685300968493319, + "grad_norm": 2.1567948285186347, + "learning_rate": 1.9962218054876547e-05, + "loss": 0.8719, + "step": 1855 + }, + { + "epoch": 0.05688365820767439, + "grad_norm": 1.841456520129195, + "learning_rate": 1.996213180000689e-05, + "loss": 0.8165, + "step": 1856 + }, + { + "epoch": 0.056914306730415594, + "grad_norm": 2.080506593102429, + "learning_rate": 1.9962045446977795e-05, + "loss": 0.9186, + "step": 1857 + }, + { + "epoch": 0.0569449552531568, + "grad_norm": 1.7398596739777992, + "learning_rate": 1.9961958995790122e-05, + "loss": 0.8615, + "step": 1858 + }, + { + "epoch": 0.056975603775898, + "grad_norm": 1.7322442449722202, + "learning_rate": 1.9961872446444716e-05, + "loss": 0.7207, + "step": 1859 + }, + { + "epoch": 0.057006252298639204, + "grad_norm": 1.827949762999373, + "learning_rate": 1.9961785798942433e-05, + "loss": 0.815, + "step": 1860 + }, + { + "epoch": 0.05703690082138041, + "grad_norm": 1.9581275435004504, + "learning_rate": 1.9961699053284125e-05, + "loss": 0.8188, + "step": 1861 + }, + { + "epoch": 0.057067549344121615, + "grad_norm": 2.0064486101483623, + "learning_rate": 1.996161220947065e-05, + "loss": 0.7429, + "step": 1862 + }, + { + "epoch": 0.057098197866862814, + "grad_norm": 1.881091170673388, + "learning_rate": 1.9961525267502858e-05, + "loss": 0.7791, + "step": 1863 + }, + { + "epoch": 0.05712884638960402, + "grad_norm": 1.8866247059445618, + "learning_rate": 1.996143822738161e-05, + "loss": 0.8292, + "step": 1864 + }, + { + "epoch": 0.057159494912345225, + "grad_norm": 0.9259620099474811, + "learning_rate": 1.9961351089107762e-05, + "loss": 0.6437, + "step": 1865 + }, + { + "epoch": 0.05719014343508643, + "grad_norm": 1.8218874291477025, + "learning_rate": 1.9961263852682173e-05, + "loss": 0.8024, + "step": 1866 + }, + { + "epoch": 0.05722079195782763, + "grad_norm": 1.9139431492756263, + "learning_rate": 1.9961176518105706e-05, + "loss": 0.8609, + "step": 1867 + }, + { + "epoch": 0.057251440480568835, + "grad_norm": 2.1915673990721256, + "learning_rate": 1.996108908537921e-05, + "loss": 0.8684, + "step": 1868 + }, + { + "epoch": 0.05728208900331004, + "grad_norm": 1.68217448891668, + "learning_rate": 1.996100155450356e-05, + "loss": 0.7278, + "step": 1869 + }, + { + "epoch": 0.05731273752605125, + "grad_norm": 1.6301656880639093, + "learning_rate": 1.9960913925479616e-05, + "loss": 0.8237, + "step": 1870 + }, + { + "epoch": 0.057343386048792445, + "grad_norm": 1.8837643558673172, + "learning_rate": 1.9960826198308233e-05, + "loss": 0.9717, + "step": 1871 + }, + { + "epoch": 0.05737403457153365, + "grad_norm": 2.0963983347209463, + "learning_rate": 1.996073837299028e-05, + "loss": 0.8302, + "step": 1872 + }, + { + "epoch": 0.05740468309427486, + "grad_norm": 1.9997246741383545, + "learning_rate": 1.996065044952663e-05, + "loss": 0.9555, + "step": 1873 + }, + { + "epoch": 0.05743533161701606, + "grad_norm": 32.91339270146312, + "learning_rate": 1.9960562427918137e-05, + "loss": 1.2025, + "step": 1874 + }, + { + "epoch": 0.05746598013975726, + "grad_norm": 1.8991162651590994, + "learning_rate": 1.9960474308165676e-05, + "loss": 0.9128, + "step": 1875 + }, + { + "epoch": 0.05749662866249847, + "grad_norm": 27.374895076213203, + "learning_rate": 1.996038609027011e-05, + "loss": 0.9843, + "step": 1876 + }, + { + "epoch": 0.05752727718523967, + "grad_norm": 1.777588698933224, + "learning_rate": 1.9960297774232316e-05, + "loss": 0.8839, + "step": 1877 + }, + { + "epoch": 0.05755792570798088, + "grad_norm": 79.37199677570939, + "learning_rate": 1.9960209360053157e-05, + "loss": 0.9652, + "step": 1878 + }, + { + "epoch": 0.05758857423072208, + "grad_norm": 2.685375321847394, + "learning_rate": 1.996012084773351e-05, + "loss": 0.8756, + "step": 1879 + }, + { + "epoch": 0.05761922275346328, + "grad_norm": 1.990533484868836, + "learning_rate": 1.996003223727424e-05, + "loss": 0.7868, + "step": 1880 + }, + { + "epoch": 0.05764987127620449, + "grad_norm": 2.023022866771795, + "learning_rate": 1.9959943528676223e-05, + "loss": 0.8801, + "step": 1881 + }, + { + "epoch": 0.057680519798945694, + "grad_norm": 2.3993673842275505, + "learning_rate": 1.995985472194034e-05, + "loss": 0.85, + "step": 1882 + }, + { + "epoch": 0.05771116832168689, + "grad_norm": 3.043436352024141, + "learning_rate": 1.9959765817067455e-05, + "loss": 0.9944, + "step": 1883 + }, + { + "epoch": 0.0577418168444281, + "grad_norm": 4.194743644944136, + "learning_rate": 1.995967681405845e-05, + "loss": 0.9043, + "step": 1884 + }, + { + "epoch": 0.057772465367169304, + "grad_norm": 18.731551825978073, + "learning_rate": 1.99595877129142e-05, + "loss": 1.0978, + "step": 1885 + }, + { + "epoch": 0.05780311388991051, + "grad_norm": 3.163345413635302, + "learning_rate": 1.9959498513635587e-05, + "loss": 0.8117, + "step": 1886 + }, + { + "epoch": 0.05783376241265171, + "grad_norm": 1.9552793002275954, + "learning_rate": 1.9959409216223485e-05, + "loss": 0.8063, + "step": 1887 + }, + { + "epoch": 0.057864410935392914, + "grad_norm": 1.0620765821319869, + "learning_rate": 1.995931982067878e-05, + "loss": 0.6624, + "step": 1888 + }, + { + "epoch": 0.05789505945813412, + "grad_norm": 2.1615681280971066, + "learning_rate": 1.9959230327002344e-05, + "loss": 0.8812, + "step": 1889 + }, + { + "epoch": 0.05792570798087532, + "grad_norm": 2.278788038124208, + "learning_rate": 1.9959140735195063e-05, + "loss": 0.7969, + "step": 1890 + }, + { + "epoch": 0.057956356503616524, + "grad_norm": 2.1235469546778125, + "learning_rate": 1.995905104525782e-05, + "loss": 0.8994, + "step": 1891 + }, + { + "epoch": 0.05798700502635773, + "grad_norm": 0.8967360774326991, + "learning_rate": 1.99589612571915e-05, + "loss": 0.6512, + "step": 1892 + }, + { + "epoch": 0.058017653549098935, + "grad_norm": 1.967261676380148, + "learning_rate": 1.9958871370996992e-05, + "loss": 0.7746, + "step": 1893 + }, + { + "epoch": 0.058048302071840134, + "grad_norm": 2.0472681639574613, + "learning_rate": 1.995878138667517e-05, + "loss": 0.8504, + "step": 1894 + }, + { + "epoch": 0.05807895059458134, + "grad_norm": 1.9403521826241474, + "learning_rate": 1.9958691304226928e-05, + "loss": 0.8011, + "step": 1895 + }, + { + "epoch": 0.058109599117322545, + "grad_norm": 2.082373983824349, + "learning_rate": 1.995860112365315e-05, + "loss": 0.8336, + "step": 1896 + }, + { + "epoch": 0.05814024764006375, + "grad_norm": 1.7597249124347387, + "learning_rate": 1.995851084495473e-05, + "loss": 0.8251, + "step": 1897 + }, + { + "epoch": 0.05817089616280495, + "grad_norm": 2.0494348499795514, + "learning_rate": 1.9958420468132555e-05, + "loss": 1.0205, + "step": 1898 + }, + { + "epoch": 0.058201544685546155, + "grad_norm": 0.9300324808937178, + "learning_rate": 1.9958329993187514e-05, + "loss": 0.6656, + "step": 1899 + }, + { + "epoch": 0.05823219320828736, + "grad_norm": 1.8724469879675771, + "learning_rate": 1.9958239420120503e-05, + "loss": 0.9091, + "step": 1900 + }, + { + "epoch": 0.05826284173102857, + "grad_norm": 2.127289636595053, + "learning_rate": 1.9958148748932406e-05, + "loss": 0.8769, + "step": 1901 + }, + { + "epoch": 0.058293490253769766, + "grad_norm": 2.5615515376560154, + "learning_rate": 1.9958057979624127e-05, + "loss": 0.8124, + "step": 1902 + }, + { + "epoch": 0.05832413877651097, + "grad_norm": 1.8618770531456308, + "learning_rate": 1.995796711219655e-05, + "loss": 0.9376, + "step": 1903 + }, + { + "epoch": 0.05835478729925218, + "grad_norm": 0.9315736243972755, + "learning_rate": 1.9957876146650577e-05, + "loss": 0.6964, + "step": 1904 + }, + { + "epoch": 0.05838543582199338, + "grad_norm": 2.020148960716303, + "learning_rate": 1.99577850829871e-05, + "loss": 0.9066, + "step": 1905 + }, + { + "epoch": 0.05841608434473458, + "grad_norm": 2.0730565459651586, + "learning_rate": 1.9957693921207024e-05, + "loss": 0.8522, + "step": 1906 + }, + { + "epoch": 0.05844673286747579, + "grad_norm": 2.0504453696853466, + "learning_rate": 1.995760266131124e-05, + "loss": 0.8219, + "step": 1907 + }, + { + "epoch": 0.05847738139021699, + "grad_norm": 1.8294964994637812, + "learning_rate": 1.9957511303300645e-05, + "loss": 0.9641, + "step": 1908 + }, + { + "epoch": 0.0585080299129582, + "grad_norm": 1.7575994859445596, + "learning_rate": 1.995741984717615e-05, + "loss": 0.8429, + "step": 1909 + }, + { + "epoch": 0.0585386784356994, + "grad_norm": 1.8400994656127838, + "learning_rate": 1.9957328292938646e-05, + "loss": 0.89, + "step": 1910 + }, + { + "epoch": 0.0585693269584406, + "grad_norm": 1.9727432472992719, + "learning_rate": 1.9957236640589037e-05, + "loss": 0.8605, + "step": 1911 + }, + { + "epoch": 0.05859997548118181, + "grad_norm": 1.970871672762004, + "learning_rate": 1.9957144890128228e-05, + "loss": 0.8735, + "step": 1912 + }, + { + "epoch": 0.058630624003923014, + "grad_norm": 2.0471561137164453, + "learning_rate": 1.9957053041557128e-05, + "loss": 0.9441, + "step": 1913 + }, + { + "epoch": 0.05866127252666421, + "grad_norm": 1.7108581411296448, + "learning_rate": 1.9956961094876634e-05, + "loss": 0.8206, + "step": 1914 + }, + { + "epoch": 0.05869192104940542, + "grad_norm": 1.9689644675723865, + "learning_rate": 1.995686905008765e-05, + "loss": 0.8445, + "step": 1915 + }, + { + "epoch": 0.058722569572146624, + "grad_norm": 1.7826062345161617, + "learning_rate": 1.9956776907191093e-05, + "loss": 0.9377, + "step": 1916 + }, + { + "epoch": 0.05875321809488783, + "grad_norm": 1.7499480304300643, + "learning_rate": 1.9956684666187863e-05, + "loss": 0.7487, + "step": 1917 + }, + { + "epoch": 0.05878386661762903, + "grad_norm": 1.8658437480078396, + "learning_rate": 1.9956592327078872e-05, + "loss": 0.8451, + "step": 1918 + }, + { + "epoch": 0.058814515140370234, + "grad_norm": 1.94231298585685, + "learning_rate": 1.995649988986503e-05, + "loss": 0.8679, + "step": 1919 + }, + { + "epoch": 0.05884516366311144, + "grad_norm": 1.1421944402293784, + "learning_rate": 1.9956407354547246e-05, + "loss": 0.6815, + "step": 1920 + }, + { + "epoch": 0.058875812185852645, + "grad_norm": 1.9804276660589244, + "learning_rate": 1.995631472112644e-05, + "loss": 0.9004, + "step": 1921 + }, + { + "epoch": 0.058906460708593844, + "grad_norm": 2.0288387972225093, + "learning_rate": 1.9956221989603508e-05, + "loss": 0.8882, + "step": 1922 + }, + { + "epoch": 0.05893710923133505, + "grad_norm": 1.7388853733579925, + "learning_rate": 1.9956129159979377e-05, + "loss": 0.9204, + "step": 1923 + }, + { + "epoch": 0.058967757754076255, + "grad_norm": 1.7807713747040943, + "learning_rate": 1.995603623225496e-05, + "loss": 0.9238, + "step": 1924 + }, + { + "epoch": 0.058998406276817454, + "grad_norm": 1.9815955282780016, + "learning_rate": 1.9955943206431166e-05, + "loss": 0.7954, + "step": 1925 + }, + { + "epoch": 0.05902905479955866, + "grad_norm": 2.198197763912837, + "learning_rate": 1.995585008250892e-05, + "loss": 0.9664, + "step": 1926 + }, + { + "epoch": 0.059059703322299865, + "grad_norm": 1.9085582963561434, + "learning_rate": 1.9955756860489132e-05, + "loss": 0.9044, + "step": 1927 + }, + { + "epoch": 0.05909035184504107, + "grad_norm": 1.1433462345843246, + "learning_rate": 1.9955663540372727e-05, + "loss": 0.6876, + "step": 1928 + }, + { + "epoch": 0.05912100036778227, + "grad_norm": 0.8957984612836971, + "learning_rate": 1.9955570122160624e-05, + "loss": 0.637, + "step": 1929 + }, + { + "epoch": 0.059151648890523476, + "grad_norm": 0.7664211238247511, + "learning_rate": 1.995547660585374e-05, + "loss": 0.6582, + "step": 1930 + }, + { + "epoch": 0.05918229741326468, + "grad_norm": 2.098605428497463, + "learning_rate": 1.9955382991452996e-05, + "loss": 0.9468, + "step": 1931 + }, + { + "epoch": 0.05921294593600589, + "grad_norm": 2.0667133355554363, + "learning_rate": 1.9955289278959315e-05, + "loss": 0.8793, + "step": 1932 + }, + { + "epoch": 0.059243594458747086, + "grad_norm": 2.0447959827017734, + "learning_rate": 1.9955195468373625e-05, + "loss": 1.0006, + "step": 1933 + }, + { + "epoch": 0.05927424298148829, + "grad_norm": 2.069438300510292, + "learning_rate": 1.9955101559696845e-05, + "loss": 1.0179, + "step": 1934 + }, + { + "epoch": 0.0593048915042295, + "grad_norm": 1.8317471121616955, + "learning_rate": 1.9955007552929905e-05, + "loss": 0.8443, + "step": 1935 + }, + { + "epoch": 0.0593355400269707, + "grad_norm": 1.881385342783108, + "learning_rate": 1.9954913448073724e-05, + "loss": 0.8508, + "step": 1936 + }, + { + "epoch": 0.0593661885497119, + "grad_norm": 1.8239226855106219, + "learning_rate": 1.9954819245129237e-05, + "loss": 0.8791, + "step": 1937 + }, + { + "epoch": 0.05939683707245311, + "grad_norm": 1.8105255889277065, + "learning_rate": 1.995472494409737e-05, + "loss": 0.8564, + "step": 1938 + }, + { + "epoch": 0.05942748559519431, + "grad_norm": 1.9454584580335463, + "learning_rate": 1.9954630544979046e-05, + "loss": 0.9764, + "step": 1939 + }, + { + "epoch": 0.05945813411793552, + "grad_norm": 1.714632338352169, + "learning_rate": 1.99545360477752e-05, + "loss": 0.6952, + "step": 1940 + }, + { + "epoch": 0.05948878264067672, + "grad_norm": 2.0035493571283873, + "learning_rate": 1.9954441452486768e-05, + "loss": 0.8996, + "step": 1941 + }, + { + "epoch": 0.05951943116341792, + "grad_norm": 1.7521080600598606, + "learning_rate": 1.9954346759114677e-05, + "loss": 0.8667, + "step": 1942 + }, + { + "epoch": 0.05955007968615913, + "grad_norm": 1.7425701774530598, + "learning_rate": 1.995425196765986e-05, + "loss": 0.7719, + "step": 1943 + }, + { + "epoch": 0.059580728208900334, + "grad_norm": 1.9493018529609554, + "learning_rate": 1.995415707812325e-05, + "loss": 0.8214, + "step": 1944 + }, + { + "epoch": 0.05961137673164153, + "grad_norm": 1.8450368127915557, + "learning_rate": 1.9954062090505784e-05, + "loss": 0.8234, + "step": 1945 + }, + { + "epoch": 0.05964202525438274, + "grad_norm": 1.735154133697394, + "learning_rate": 1.9953967004808398e-05, + "loss": 0.9586, + "step": 1946 + }, + { + "epoch": 0.059672673777123944, + "grad_norm": 1.687275603408903, + "learning_rate": 1.995387182103203e-05, + "loss": 0.7834, + "step": 1947 + }, + { + "epoch": 0.05970332229986515, + "grad_norm": 1.7625081566964782, + "learning_rate": 1.9953776539177613e-05, + "loss": 0.7784, + "step": 1948 + }, + { + "epoch": 0.05973397082260635, + "grad_norm": 1.846327306139802, + "learning_rate": 1.995368115924609e-05, + "loss": 0.9757, + "step": 1949 + }, + { + "epoch": 0.059764619345347554, + "grad_norm": 1.7177525266100175, + "learning_rate": 1.99535856812384e-05, + "loss": 0.8255, + "step": 1950 + }, + { + "epoch": 0.05979526786808876, + "grad_norm": 1.673973844588314, + "learning_rate": 1.9953490105155482e-05, + "loss": 0.8554, + "step": 1951 + }, + { + "epoch": 0.059825916390829965, + "grad_norm": 1.5838834978664977, + "learning_rate": 1.995339443099828e-05, + "loss": 0.7235, + "step": 1952 + }, + { + "epoch": 0.059856564913571164, + "grad_norm": 2.0156877864640035, + "learning_rate": 1.995329865876774e-05, + "loss": 0.8052, + "step": 1953 + }, + { + "epoch": 0.05988721343631237, + "grad_norm": 1.957757140400976, + "learning_rate": 1.99532027884648e-05, + "loss": 0.857, + "step": 1954 + }, + { + "epoch": 0.059917861959053575, + "grad_norm": 2.277125062314613, + "learning_rate": 1.99531068200904e-05, + "loss": 0.8343, + "step": 1955 + }, + { + "epoch": 0.059948510481794774, + "grad_norm": 0.8604158150916105, + "learning_rate": 1.99530107536455e-05, + "loss": 0.6595, + "step": 1956 + }, + { + "epoch": 0.05997915900453598, + "grad_norm": 1.9467432812573728, + "learning_rate": 1.995291458913103e-05, + "loss": 0.9327, + "step": 1957 + }, + { + "epoch": 0.060009807527277186, + "grad_norm": 0.8860769465086773, + "learning_rate": 1.9952818326547954e-05, + "loss": 0.6488, + "step": 1958 + }, + { + "epoch": 0.06004045605001839, + "grad_norm": 1.8310947297842508, + "learning_rate": 1.995272196589721e-05, + "loss": 0.8802, + "step": 1959 + }, + { + "epoch": 0.06007110457275959, + "grad_norm": 1.9045578388839899, + "learning_rate": 1.995262550717975e-05, + "loss": 0.9452, + "step": 1960 + }, + { + "epoch": 0.060101753095500796, + "grad_norm": 0.8530473046788996, + "learning_rate": 1.9952528950396523e-05, + "loss": 0.6584, + "step": 1961 + }, + { + "epoch": 0.060132401618242, + "grad_norm": 1.7441854655096132, + "learning_rate": 1.9952432295548484e-05, + "loss": 0.8903, + "step": 1962 + }, + { + "epoch": 0.06016305014098321, + "grad_norm": 0.8707913635926962, + "learning_rate": 1.995233554263658e-05, + "loss": 0.6884, + "step": 1963 + }, + { + "epoch": 0.060193698663724406, + "grad_norm": 1.9792004995166157, + "learning_rate": 1.995223869166177e-05, + "loss": 0.9316, + "step": 1964 + }, + { + "epoch": 0.06022434718646561, + "grad_norm": 1.9233401446988139, + "learning_rate": 1.9952141742625004e-05, + "loss": 0.836, + "step": 1965 + }, + { + "epoch": 0.06025499570920682, + "grad_norm": 2.202913124309388, + "learning_rate": 1.9952044695527242e-05, + "loss": 0.9366, + "step": 1966 + }, + { + "epoch": 0.06028564423194802, + "grad_norm": 1.867763324977625, + "learning_rate": 1.9951947550369435e-05, + "loss": 0.9048, + "step": 1967 + }, + { + "epoch": 0.06031629275468922, + "grad_norm": 1.9028152881476261, + "learning_rate": 1.9951850307152542e-05, + "loss": 0.9853, + "step": 1968 + }, + { + "epoch": 0.06034694127743043, + "grad_norm": 1.8210445005655194, + "learning_rate": 1.995175296587752e-05, + "loss": 0.8316, + "step": 1969 + }, + { + "epoch": 0.06037758980017163, + "grad_norm": 2.2172499988575134, + "learning_rate": 1.9951655526545334e-05, + "loss": 0.9101, + "step": 1970 + }, + { + "epoch": 0.06040823832291284, + "grad_norm": 2.0602842164296153, + "learning_rate": 1.9951557989156937e-05, + "loss": 0.9067, + "step": 1971 + }, + { + "epoch": 0.06043888684565404, + "grad_norm": 1.935751000173471, + "learning_rate": 1.9951460353713296e-05, + "loss": 0.7954, + "step": 1972 + }, + { + "epoch": 0.06046953536839524, + "grad_norm": 1.8660398582540756, + "learning_rate": 1.9951362620215365e-05, + "loss": 0.8338, + "step": 1973 + }, + { + "epoch": 0.06050018389113645, + "grad_norm": 1.0565696377971867, + "learning_rate": 1.9951264788664115e-05, + "loss": 0.6581, + "step": 1974 + }, + { + "epoch": 0.060530832413877654, + "grad_norm": 1.8018710775377011, + "learning_rate": 1.995116685906051e-05, + "loss": 0.8895, + "step": 1975 + }, + { + "epoch": 0.06056148093661885, + "grad_norm": 1.941407119036879, + "learning_rate": 1.9951068831405506e-05, + "loss": 0.8834, + "step": 1976 + }, + { + "epoch": 0.06059212945936006, + "grad_norm": 1.7582116732271493, + "learning_rate": 1.995097070570008e-05, + "loss": 0.9617, + "step": 1977 + }, + { + "epoch": 0.060622777982101264, + "grad_norm": 1.9149831605016399, + "learning_rate": 1.9950872481945188e-05, + "loss": 0.8773, + "step": 1978 + }, + { + "epoch": 0.06065342650484247, + "grad_norm": 2.069741493355811, + "learning_rate": 1.995077416014181e-05, + "loss": 0.7856, + "step": 1979 + }, + { + "epoch": 0.06068407502758367, + "grad_norm": 0.8031097260917004, + "learning_rate": 1.9950675740290902e-05, + "loss": 0.6579, + "step": 1980 + }, + { + "epoch": 0.060714723550324874, + "grad_norm": 1.9296278739436183, + "learning_rate": 1.9950577222393442e-05, + "loss": 0.8973, + "step": 1981 + }, + { + "epoch": 0.06074537207306608, + "grad_norm": 1.8973507002846826, + "learning_rate": 1.9950478606450397e-05, + "loss": 0.9124, + "step": 1982 + }, + { + "epoch": 0.060776020595807285, + "grad_norm": 0.8347887549050136, + "learning_rate": 1.9950379892462743e-05, + "loss": 0.6443, + "step": 1983 + }, + { + "epoch": 0.060806669118548484, + "grad_norm": 1.8548132530555488, + "learning_rate": 1.995028108043145e-05, + "loss": 0.8319, + "step": 1984 + }, + { + "epoch": 0.06083731764128969, + "grad_norm": 1.8319609341494654, + "learning_rate": 1.995018217035749e-05, + "loss": 0.9357, + "step": 1985 + }, + { + "epoch": 0.060867966164030896, + "grad_norm": 1.888395717642766, + "learning_rate": 1.9950083162241843e-05, + "loss": 0.8052, + "step": 1986 + }, + { + "epoch": 0.060898614686772094, + "grad_norm": 1.9253060703965368, + "learning_rate": 1.9949984056085477e-05, + "loss": 0.8657, + "step": 1987 + }, + { + "epoch": 0.0609292632095133, + "grad_norm": 0.780167570271539, + "learning_rate": 1.9949884851889373e-05, + "loss": 0.643, + "step": 1988 + }, + { + "epoch": 0.060959911732254506, + "grad_norm": 1.9666060374922611, + "learning_rate": 1.9949785549654508e-05, + "loss": 0.8253, + "step": 1989 + }, + { + "epoch": 0.06099056025499571, + "grad_norm": 1.861891663107093, + "learning_rate": 1.9949686149381858e-05, + "loss": 0.9631, + "step": 1990 + }, + { + "epoch": 0.06102120877773691, + "grad_norm": 1.7305567225701388, + "learning_rate": 1.994958665107241e-05, + "loss": 0.8433, + "step": 1991 + }, + { + "epoch": 0.061051857300478116, + "grad_norm": 1.878105727312511, + "learning_rate": 1.9949487054727138e-05, + "loss": 0.9841, + "step": 1992 + }, + { + "epoch": 0.06108250582321932, + "grad_norm": 1.8698876036731027, + "learning_rate": 1.994938736034702e-05, + "loss": 0.9999, + "step": 1993 + }, + { + "epoch": 0.06111315434596053, + "grad_norm": 1.9291249834282498, + "learning_rate": 1.9949287567933043e-05, + "loss": 0.8124, + "step": 1994 + }, + { + "epoch": 0.061143802868701726, + "grad_norm": 1.7955238068534503, + "learning_rate": 1.9949187677486194e-05, + "loss": 0.8531, + "step": 1995 + }, + { + "epoch": 0.06117445139144293, + "grad_norm": 1.8997217196219782, + "learning_rate": 1.994908768900745e-05, + "loss": 0.8759, + "step": 1996 + }, + { + "epoch": 0.06120509991418414, + "grad_norm": 1.787873487474987, + "learning_rate": 1.99489876024978e-05, + "loss": 0.8498, + "step": 1997 + }, + { + "epoch": 0.06123574843692534, + "grad_norm": 1.8538859001663222, + "learning_rate": 1.9948887417958232e-05, + "loss": 0.7798, + "step": 1998 + }, + { + "epoch": 0.06126639695966654, + "grad_norm": 0.9358951582304911, + "learning_rate": 1.9948787135389728e-05, + "loss": 0.6809, + "step": 1999 + }, + { + "epoch": 0.06129704548240775, + "grad_norm": 2.0753698954970803, + "learning_rate": 1.994868675479328e-05, + "loss": 0.9271, + "step": 2000 + }, + { + "epoch": 0.06132769400514895, + "grad_norm": 2.1910415037350432, + "learning_rate": 1.9948586276169877e-05, + "loss": 0.8119, + "step": 2001 + }, + { + "epoch": 0.06135834252789016, + "grad_norm": 1.954940833507461, + "learning_rate": 1.9948485699520506e-05, + "loss": 0.7982, + "step": 2002 + }, + { + "epoch": 0.06138899105063136, + "grad_norm": 1.7756692626780965, + "learning_rate": 1.994838502484616e-05, + "loss": 0.7977, + "step": 2003 + }, + { + "epoch": 0.06141963957337256, + "grad_norm": 1.6496021500514015, + "learning_rate": 1.994828425214783e-05, + "loss": 0.8211, + "step": 2004 + }, + { + "epoch": 0.06145028809611377, + "grad_norm": 1.9014088281271, + "learning_rate": 1.9948183381426512e-05, + "loss": 0.9198, + "step": 2005 + }, + { + "epoch": 0.061480936618854974, + "grad_norm": 1.8060282109223253, + "learning_rate": 1.9948082412683197e-05, + "loss": 0.845, + "step": 2006 + }, + { + "epoch": 0.06151158514159617, + "grad_norm": 0.8327356262515986, + "learning_rate": 1.994798134591888e-05, + "loss": 0.6618, + "step": 2007 + }, + { + "epoch": 0.06154223366433738, + "grad_norm": 1.7987599997931412, + "learning_rate": 1.994788018113456e-05, + "loss": 0.8901, + "step": 2008 + }, + { + "epoch": 0.061572882187078584, + "grad_norm": 2.196820270107557, + "learning_rate": 1.994777891833123e-05, + "loss": 0.8812, + "step": 2009 + }, + { + "epoch": 0.06160353070981979, + "grad_norm": 1.827327332741674, + "learning_rate": 1.994767755750989e-05, + "loss": 0.8398, + "step": 2010 + }, + { + "epoch": 0.06163417923256099, + "grad_norm": 1.8763285107399594, + "learning_rate": 1.9947576098671535e-05, + "loss": 0.9722, + "step": 2011 + }, + { + "epoch": 0.061664827755302194, + "grad_norm": 1.9039931689778653, + "learning_rate": 1.9947474541817168e-05, + "loss": 0.9042, + "step": 2012 + }, + { + "epoch": 0.0616954762780434, + "grad_norm": 1.7668648300348375, + "learning_rate": 1.994737288694779e-05, + "loss": 0.7916, + "step": 2013 + }, + { + "epoch": 0.061726124800784606, + "grad_norm": 1.6915582893063459, + "learning_rate": 1.9947271134064403e-05, + "loss": 0.7728, + "step": 2014 + }, + { + "epoch": 0.061756773323525804, + "grad_norm": 0.8007417817641587, + "learning_rate": 1.994716928316801e-05, + "loss": 0.6522, + "step": 2015 + }, + { + "epoch": 0.06178742184626701, + "grad_norm": 0.7817176410190196, + "learning_rate": 1.9947067334259608e-05, + "loss": 0.6426, + "step": 2016 + }, + { + "epoch": 0.061818070369008216, + "grad_norm": 1.7988234547037054, + "learning_rate": 1.994696528734021e-05, + "loss": 0.8731, + "step": 2017 + }, + { + "epoch": 0.061848718891749414, + "grad_norm": 1.762675366699225, + "learning_rate": 1.9946863142410815e-05, + "loss": 0.8029, + "step": 2018 + }, + { + "epoch": 0.06187936741449062, + "grad_norm": 1.7610831649264453, + "learning_rate": 1.9946760899472436e-05, + "loss": 0.9173, + "step": 2019 + }, + { + "epoch": 0.061910015937231826, + "grad_norm": 1.856745869869656, + "learning_rate": 1.9946658558526077e-05, + "loss": 0.8241, + "step": 2020 + }, + { + "epoch": 0.06194066445997303, + "grad_norm": 1.772123373458529, + "learning_rate": 1.994655611957274e-05, + "loss": 0.912, + "step": 2021 + }, + { + "epoch": 0.06197131298271423, + "grad_norm": 1.7448705965597116, + "learning_rate": 1.9946453582613447e-05, + "loss": 0.7855, + "step": 2022 + }, + { + "epoch": 0.062001961505455436, + "grad_norm": 2.3217820428892444, + "learning_rate": 1.99463509476492e-05, + "loss": 1.0543, + "step": 2023 + }, + { + "epoch": 0.06203261002819664, + "grad_norm": 2.368583772911222, + "learning_rate": 1.9946248214681012e-05, + "loss": 0.8439, + "step": 2024 + }, + { + "epoch": 0.06206325855093785, + "grad_norm": 1.6371730900234456, + "learning_rate": 1.9946145383709898e-05, + "loss": 0.8523, + "step": 2025 + }, + { + "epoch": 0.062093907073679046, + "grad_norm": 1.8776048687696567, + "learning_rate": 1.994604245473686e-05, + "loss": 0.927, + "step": 2026 + }, + { + "epoch": 0.06212455559642025, + "grad_norm": 1.944996072362145, + "learning_rate": 1.9945939427762933e-05, + "loss": 0.9235, + "step": 2027 + }, + { + "epoch": 0.06215520411916146, + "grad_norm": 1.7310718475288869, + "learning_rate": 1.994583630278911e-05, + "loss": 0.8057, + "step": 2028 + }, + { + "epoch": 0.06218585264190266, + "grad_norm": 1.9850075951961752, + "learning_rate": 1.9945733079816424e-05, + "loss": 0.8847, + "step": 2029 + }, + { + "epoch": 0.06221650116464386, + "grad_norm": 1.092249149537227, + "learning_rate": 1.994562975884588e-05, + "loss": 0.6665, + "step": 2030 + }, + { + "epoch": 0.06224714968738507, + "grad_norm": 1.6147085775545387, + "learning_rate": 1.9945526339878504e-05, + "loss": 0.8122, + "step": 2031 + }, + { + "epoch": 0.06227779821012627, + "grad_norm": 1.7752183962503185, + "learning_rate": 1.9945422822915314e-05, + "loss": 0.8442, + "step": 2032 + }, + { + "epoch": 0.06230844673286748, + "grad_norm": 1.7670003589517356, + "learning_rate": 1.9945319207957328e-05, + "loss": 0.8085, + "step": 2033 + }, + { + "epoch": 0.06233909525560868, + "grad_norm": 0.9212076248866616, + "learning_rate": 1.9945215495005564e-05, + "loss": 0.6799, + "step": 2034 + }, + { + "epoch": 0.06236974377834988, + "grad_norm": 1.810272102479429, + "learning_rate": 1.994511168406105e-05, + "loss": 0.8817, + "step": 2035 + }, + { + "epoch": 0.06240039230109109, + "grad_norm": 1.722561607740317, + "learning_rate": 1.9945007775124806e-05, + "loss": 0.7315, + "step": 2036 + }, + { + "epoch": 0.062431040823832294, + "grad_norm": 2.0900425369662914, + "learning_rate": 1.9944903768197854e-05, + "loss": 0.9298, + "step": 2037 + }, + { + "epoch": 0.06246168934657349, + "grad_norm": 0.8292032275693986, + "learning_rate": 1.994479966328122e-05, + "loss": 0.6509, + "step": 2038 + }, + { + "epoch": 0.0624923378693147, + "grad_norm": 1.584726866562812, + "learning_rate": 1.9944695460375934e-05, + "loss": 0.8835, + "step": 2039 + }, + { + "epoch": 0.0625229863920559, + "grad_norm": 1.802031285114454, + "learning_rate": 1.9944591159483017e-05, + "loss": 0.8074, + "step": 2040 + }, + { + "epoch": 0.06255363491479711, + "grad_norm": 1.6708182193250916, + "learning_rate": 1.9944486760603498e-05, + "loss": 0.8498, + "step": 2041 + }, + { + "epoch": 0.06258428343753832, + "grad_norm": 1.9004996328495583, + "learning_rate": 1.994438226373841e-05, + "loss": 0.9722, + "step": 2042 + }, + { + "epoch": 0.06261493196027952, + "grad_norm": 1.824414897555305, + "learning_rate": 1.9944277668888774e-05, + "loss": 0.849, + "step": 2043 + }, + { + "epoch": 0.06264558048302071, + "grad_norm": 1.9478120401611967, + "learning_rate": 1.994417297605563e-05, + "loss": 0.8683, + "step": 2044 + }, + { + "epoch": 0.06267622900576192, + "grad_norm": 2.0867651570472043, + "learning_rate": 1.994406818524e-05, + "loss": 0.8196, + "step": 2045 + }, + { + "epoch": 0.06270687752850312, + "grad_norm": 1.9506648837270695, + "learning_rate": 1.9943963296442927e-05, + "loss": 0.7943, + "step": 2046 + }, + { + "epoch": 0.06273752605124433, + "grad_norm": 1.9271072891351504, + "learning_rate": 1.994385830966544e-05, + "loss": 0.8667, + "step": 2047 + }, + { + "epoch": 0.06276817457398554, + "grad_norm": 1.7598440425975397, + "learning_rate": 1.994375322490857e-05, + "loss": 0.8822, + "step": 2048 + }, + { + "epoch": 0.06279882309672674, + "grad_norm": 1.0090198632221246, + "learning_rate": 1.9943648042173355e-05, + "loss": 0.6583, + "step": 2049 + }, + { + "epoch": 0.06282947161946795, + "grad_norm": 1.910872847233016, + "learning_rate": 1.9943542761460835e-05, + "loss": 0.9075, + "step": 2050 + }, + { + "epoch": 0.06286012014220914, + "grad_norm": 1.941338147789598, + "learning_rate": 1.994343738277204e-05, + "loss": 0.7771, + "step": 2051 + }, + { + "epoch": 0.06289076866495034, + "grad_norm": 1.7544354197634322, + "learning_rate": 1.9943331906108014e-05, + "loss": 0.923, + "step": 2052 + }, + { + "epoch": 0.06292141718769155, + "grad_norm": 1.8946725228151327, + "learning_rate": 1.9943226331469793e-05, + "loss": 0.8567, + "step": 2053 + }, + { + "epoch": 0.06295206571043276, + "grad_norm": 1.6696084695077722, + "learning_rate": 1.9943120658858422e-05, + "loss": 0.7699, + "step": 2054 + }, + { + "epoch": 0.06298271423317396, + "grad_norm": 1.9140161186671643, + "learning_rate": 1.9943014888274938e-05, + "loss": 0.8307, + "step": 2055 + }, + { + "epoch": 0.06301336275591517, + "grad_norm": 1.8010222441582626, + "learning_rate": 1.9942909019720384e-05, + "loss": 0.8738, + "step": 2056 + }, + { + "epoch": 0.06304401127865637, + "grad_norm": 1.9774538816419025, + "learning_rate": 1.9942803053195803e-05, + "loss": 0.8617, + "step": 2057 + }, + { + "epoch": 0.06307465980139758, + "grad_norm": 2.021656832197124, + "learning_rate": 1.994269698870224e-05, + "loss": 0.8263, + "step": 2058 + }, + { + "epoch": 0.06310530832413877, + "grad_norm": 1.0027599647248433, + "learning_rate": 1.994259082624074e-05, + "loss": 0.6623, + "step": 2059 + }, + { + "epoch": 0.06313595684687998, + "grad_norm": 1.9152471262271424, + "learning_rate": 1.9942484565812348e-05, + "loss": 0.7952, + "step": 2060 + }, + { + "epoch": 0.06316660536962118, + "grad_norm": 1.7749108237393603, + "learning_rate": 1.994237820741811e-05, + "loss": 0.874, + "step": 2061 + }, + { + "epoch": 0.06319725389236239, + "grad_norm": 1.7587682729035774, + "learning_rate": 1.994227175105908e-05, + "loss": 0.9492, + "step": 2062 + }, + { + "epoch": 0.06322790241510359, + "grad_norm": 1.7116688104602975, + "learning_rate": 1.99421651967363e-05, + "loss": 0.9029, + "step": 2063 + }, + { + "epoch": 0.0632585509378448, + "grad_norm": 1.7148899617275601, + "learning_rate": 1.9942058544450822e-05, + "loss": 0.8463, + "step": 2064 + }, + { + "epoch": 0.063289199460586, + "grad_norm": 1.7667197690397227, + "learning_rate": 1.9941951794203698e-05, + "loss": 0.8965, + "step": 2065 + }, + { + "epoch": 0.06331984798332721, + "grad_norm": 1.7093903712834373, + "learning_rate": 1.994184494599598e-05, + "loss": 0.9129, + "step": 2066 + }, + { + "epoch": 0.0633504965060684, + "grad_norm": 1.979259455701072, + "learning_rate": 1.994173799982872e-05, + "loss": 0.8803, + "step": 2067 + }, + { + "epoch": 0.06338114502880961, + "grad_norm": 1.9200200671950127, + "learning_rate": 1.994163095570297e-05, + "loss": 0.9143, + "step": 2068 + }, + { + "epoch": 0.06341179355155081, + "grad_norm": 1.6343653436554952, + "learning_rate": 1.9941523813619786e-05, + "loss": 0.7827, + "step": 2069 + }, + { + "epoch": 0.06344244207429202, + "grad_norm": 1.8692353659360117, + "learning_rate": 1.9941416573580228e-05, + "loss": 0.8187, + "step": 2070 + }, + { + "epoch": 0.06347309059703322, + "grad_norm": 0.8809810522856308, + "learning_rate": 1.9941309235585344e-05, + "loss": 0.6547, + "step": 2071 + }, + { + "epoch": 0.06350373911977443, + "grad_norm": 0.822250241993723, + "learning_rate": 1.99412017996362e-05, + "loss": 0.6599, + "step": 2072 + }, + { + "epoch": 0.06353438764251564, + "grad_norm": 2.0076788102590326, + "learning_rate": 1.994109426573385e-05, + "loss": 0.8758, + "step": 2073 + }, + { + "epoch": 0.06356503616525684, + "grad_norm": 0.7815611164474032, + "learning_rate": 1.9940986633879355e-05, + "loss": 0.6309, + "step": 2074 + }, + { + "epoch": 0.06359568468799803, + "grad_norm": 2.1656457730605476, + "learning_rate": 1.9940878904073776e-05, + "loss": 0.9587, + "step": 2075 + }, + { + "epoch": 0.06362633321073924, + "grad_norm": 1.495161547891731, + "learning_rate": 1.994077107631817e-05, + "loss": 0.7952, + "step": 2076 + }, + { + "epoch": 0.06365698173348044, + "grad_norm": 1.9201038365464709, + "learning_rate": 1.9940663150613607e-05, + "loss": 0.9004, + "step": 2077 + }, + { + "epoch": 0.06368763025622165, + "grad_norm": 1.846037565443491, + "learning_rate": 1.9940555126961145e-05, + "loss": 0.7983, + "step": 2078 + }, + { + "epoch": 0.06371827877896286, + "grad_norm": 2.0228828011033504, + "learning_rate": 1.9940447005361852e-05, + "loss": 0.9456, + "step": 2079 + }, + { + "epoch": 0.06374892730170406, + "grad_norm": 1.8832691493382887, + "learning_rate": 1.9940338785816792e-05, + "loss": 0.9746, + "step": 2080 + }, + { + "epoch": 0.06377957582444527, + "grad_norm": 2.014133231421653, + "learning_rate": 1.9940230468327025e-05, + "loss": 0.8098, + "step": 2081 + }, + { + "epoch": 0.06381022434718646, + "grad_norm": 1.0909244603527144, + "learning_rate": 1.9940122052893626e-05, + "loss": 0.6676, + "step": 2082 + }, + { + "epoch": 0.06384087286992766, + "grad_norm": 1.976664489319905, + "learning_rate": 1.9940013539517664e-05, + "loss": 0.7912, + "step": 2083 + }, + { + "epoch": 0.06387152139266887, + "grad_norm": 1.8427000620265523, + "learning_rate": 1.9939904928200204e-05, + "loss": 0.908, + "step": 2084 + }, + { + "epoch": 0.06390216991541008, + "grad_norm": 1.8006215604164646, + "learning_rate": 1.9939796218942317e-05, + "loss": 0.7556, + "step": 2085 + }, + { + "epoch": 0.06393281843815128, + "grad_norm": 0.8704666077230504, + "learning_rate": 1.9939687411745073e-05, + "loss": 0.6825, + "step": 2086 + }, + { + "epoch": 0.06396346696089249, + "grad_norm": 1.8149810544823735, + "learning_rate": 1.993957850660955e-05, + "loss": 0.9161, + "step": 2087 + }, + { + "epoch": 0.06399411548363369, + "grad_norm": 1.9636720131247, + "learning_rate": 1.9939469503536814e-05, + "loss": 0.8894, + "step": 2088 + }, + { + "epoch": 0.0640247640063749, + "grad_norm": 1.8996712632230932, + "learning_rate": 1.9939360402527944e-05, + "loss": 0.8724, + "step": 2089 + }, + { + "epoch": 0.06405541252911609, + "grad_norm": 1.9116316567727136, + "learning_rate": 1.993925120358401e-05, + "loss": 0.8281, + "step": 2090 + }, + { + "epoch": 0.0640860610518573, + "grad_norm": 1.7908711319653974, + "learning_rate": 1.993914190670609e-05, + "loss": 0.8096, + "step": 2091 + }, + { + "epoch": 0.0641167095745985, + "grad_norm": 2.201779776624591, + "learning_rate": 1.9939032511895265e-05, + "loss": 0.8925, + "step": 2092 + }, + { + "epoch": 0.06414735809733971, + "grad_norm": 1.8640797875400117, + "learning_rate": 1.9938923019152607e-05, + "loss": 0.7614, + "step": 2093 + }, + { + "epoch": 0.06417800662008091, + "grad_norm": 0.8393805548558168, + "learning_rate": 1.99388134284792e-05, + "loss": 0.6438, + "step": 2094 + }, + { + "epoch": 0.06420865514282212, + "grad_norm": 1.7467117669604222, + "learning_rate": 1.993870373987612e-05, + "loss": 0.9627, + "step": 2095 + }, + { + "epoch": 0.06423930366556332, + "grad_norm": 1.8407470071340244, + "learning_rate": 1.993859395334445e-05, + "loss": 0.9425, + "step": 2096 + }, + { + "epoch": 0.06426995218830453, + "grad_norm": 2.0471798593030055, + "learning_rate": 1.9938484068885268e-05, + "loss": 0.9303, + "step": 2097 + }, + { + "epoch": 0.06430060071104572, + "grad_norm": 1.7597467321964895, + "learning_rate": 1.993837408649966e-05, + "loss": 0.8295, + "step": 2098 + }, + { + "epoch": 0.06433124923378693, + "grad_norm": 1.8646798688806925, + "learning_rate": 1.9938264006188714e-05, + "loss": 0.845, + "step": 2099 + }, + { + "epoch": 0.06436189775652813, + "grad_norm": 1.8089294555176831, + "learning_rate": 1.993815382795351e-05, + "loss": 0.9546, + "step": 2100 + }, + { + "epoch": 0.06439254627926934, + "grad_norm": 1.8926576510253115, + "learning_rate": 1.9938043551795126e-05, + "loss": 0.8602, + "step": 2101 + }, + { + "epoch": 0.06442319480201054, + "grad_norm": 1.80909734860991, + "learning_rate": 1.9937933177714663e-05, + "loss": 0.9075, + "step": 2102 + }, + { + "epoch": 0.06445384332475175, + "grad_norm": 1.9530466621836173, + "learning_rate": 1.9937822705713195e-05, + "loss": 0.8501, + "step": 2103 + }, + { + "epoch": 0.06448449184749296, + "grad_norm": 1.9473250341257713, + "learning_rate": 1.9937712135791826e-05, + "loss": 0.8306, + "step": 2104 + }, + { + "epoch": 0.06451514037023416, + "grad_norm": 1.6818021441698257, + "learning_rate": 1.9937601467951632e-05, + "loss": 0.7793, + "step": 2105 + }, + { + "epoch": 0.06454578889297535, + "grad_norm": 1.9807848647293662, + "learning_rate": 1.9937490702193708e-05, + "loss": 0.8359, + "step": 2106 + }, + { + "epoch": 0.06457643741571656, + "grad_norm": 1.7726297520850596, + "learning_rate": 1.9937379838519144e-05, + "loss": 0.8449, + "step": 2107 + }, + { + "epoch": 0.06460708593845776, + "grad_norm": 1.984426564242373, + "learning_rate": 1.9937268876929035e-05, + "loss": 0.8604, + "step": 2108 + }, + { + "epoch": 0.06463773446119897, + "grad_norm": 1.8947503151368124, + "learning_rate": 1.9937157817424472e-05, + "loss": 0.7972, + "step": 2109 + }, + { + "epoch": 0.06466838298394018, + "grad_norm": 1.8349353849913526, + "learning_rate": 1.9937046660006553e-05, + "loss": 0.9007, + "step": 2110 + }, + { + "epoch": 0.06469903150668138, + "grad_norm": 1.7434242983412795, + "learning_rate": 1.993693540467637e-05, + "loss": 0.8509, + "step": 2111 + }, + { + "epoch": 0.06472968002942259, + "grad_norm": 1.8095796048365127, + "learning_rate": 1.9936824051435023e-05, + "loss": 0.8976, + "step": 2112 + }, + { + "epoch": 0.06476032855216378, + "grad_norm": 1.751435416477481, + "learning_rate": 1.9936712600283604e-05, + "loss": 0.8426, + "step": 2113 + }, + { + "epoch": 0.06479097707490498, + "grad_norm": 0.8578334329131384, + "learning_rate": 1.993660105122321e-05, + "loss": 0.6456, + "step": 2114 + }, + { + "epoch": 0.06482162559764619, + "grad_norm": 1.592000763820724, + "learning_rate": 1.9936489404254946e-05, + "loss": 0.9413, + "step": 2115 + }, + { + "epoch": 0.0648522741203874, + "grad_norm": 1.9072167633779749, + "learning_rate": 1.993637765937991e-05, + "loss": 0.896, + "step": 2116 + }, + { + "epoch": 0.0648829226431286, + "grad_norm": 0.7310247513967613, + "learning_rate": 1.9936265816599204e-05, + "loss": 0.5882, + "step": 2117 + }, + { + "epoch": 0.06491357116586981, + "grad_norm": 1.793727566080527, + "learning_rate": 1.9936153875913923e-05, + "loss": 0.8212, + "step": 2118 + }, + { + "epoch": 0.06494421968861101, + "grad_norm": 1.7378883924836377, + "learning_rate": 1.9936041837325183e-05, + "loss": 0.8353, + "step": 2119 + }, + { + "epoch": 0.06497486821135222, + "grad_norm": 1.814280643301158, + "learning_rate": 1.9935929700834077e-05, + "loss": 0.9196, + "step": 2120 + }, + { + "epoch": 0.06500551673409341, + "grad_norm": 1.7033533143507922, + "learning_rate": 1.9935817466441708e-05, + "loss": 0.8758, + "step": 2121 + }, + { + "epoch": 0.06503616525683462, + "grad_norm": 1.8731368633149392, + "learning_rate": 1.9935705134149195e-05, + "loss": 0.8802, + "step": 2122 + }, + { + "epoch": 0.06506681377957582, + "grad_norm": 1.7418692013091135, + "learning_rate": 1.9935592703957635e-05, + "loss": 0.8907, + "step": 2123 + }, + { + "epoch": 0.06509746230231703, + "grad_norm": 2.0077263414361237, + "learning_rate": 1.9935480175868137e-05, + "loss": 0.9228, + "step": 2124 + }, + { + "epoch": 0.06512811082505823, + "grad_norm": 0.9315531448921347, + "learning_rate": 1.993536754988181e-05, + "loss": 0.6161, + "step": 2125 + }, + { + "epoch": 0.06515875934779944, + "grad_norm": 1.7441945917022135, + "learning_rate": 1.9935254825999767e-05, + "loss": 0.8413, + "step": 2126 + }, + { + "epoch": 0.06518940787054064, + "grad_norm": 1.7452078283507033, + "learning_rate": 1.9935142004223116e-05, + "loss": 0.8158, + "step": 2127 + }, + { + "epoch": 0.06522005639328185, + "grad_norm": 1.8751228143705183, + "learning_rate": 1.9935029084552967e-05, + "loss": 0.8315, + "step": 2128 + }, + { + "epoch": 0.06525070491602304, + "grad_norm": 0.8208721515549651, + "learning_rate": 1.9934916066990438e-05, + "loss": 0.6622, + "step": 2129 + }, + { + "epoch": 0.06528135343876425, + "grad_norm": 2.1205193796568245, + "learning_rate": 1.9934802951536633e-05, + "loss": 0.967, + "step": 2130 + }, + { + "epoch": 0.06531200196150545, + "grad_norm": 2.013417658646281, + "learning_rate": 1.9934689738192677e-05, + "loss": 0.7969, + "step": 2131 + }, + { + "epoch": 0.06534265048424666, + "grad_norm": 1.8674142138307464, + "learning_rate": 1.993457642695968e-05, + "loss": 0.9809, + "step": 2132 + }, + { + "epoch": 0.06537329900698786, + "grad_norm": 1.9671629206625931, + "learning_rate": 1.993446301783876e-05, + "loss": 0.7235, + "step": 2133 + }, + { + "epoch": 0.06540394752972907, + "grad_norm": 1.8864235266466705, + "learning_rate": 1.9934349510831033e-05, + "loss": 0.9547, + "step": 2134 + }, + { + "epoch": 0.06543459605247028, + "grad_norm": 1.7431845035998883, + "learning_rate": 1.993423590593762e-05, + "loss": 0.9219, + "step": 2135 + }, + { + "epoch": 0.06546524457521148, + "grad_norm": 1.7526046285513333, + "learning_rate": 1.9934122203159636e-05, + "loss": 0.8441, + "step": 2136 + }, + { + "epoch": 0.06549589309795267, + "grad_norm": 1.7605597958619854, + "learning_rate": 1.993400840249821e-05, + "loss": 0.8406, + "step": 2137 + }, + { + "epoch": 0.06552654162069388, + "grad_norm": 1.937202737638947, + "learning_rate": 1.9933894503954452e-05, + "loss": 0.8121, + "step": 2138 + }, + { + "epoch": 0.06555719014343508, + "grad_norm": 1.7300737496837622, + "learning_rate": 1.993378050752949e-05, + "loss": 0.836, + "step": 2139 + }, + { + "epoch": 0.06558783866617629, + "grad_norm": 1.7043091564026023, + "learning_rate": 1.993366641322445e-05, + "loss": 0.8419, + "step": 2140 + }, + { + "epoch": 0.0656184871889175, + "grad_norm": 1.8767285379047591, + "learning_rate": 1.9933552221040448e-05, + "loss": 0.8515, + "step": 2141 + }, + { + "epoch": 0.0656491357116587, + "grad_norm": 1.807714025195433, + "learning_rate": 1.993343793097862e-05, + "loss": 0.8963, + "step": 2142 + }, + { + "epoch": 0.06567978423439991, + "grad_norm": 1.786281566373513, + "learning_rate": 1.9933323543040084e-05, + "loss": 0.916, + "step": 2143 + }, + { + "epoch": 0.0657104327571411, + "grad_norm": 1.7944126484106033, + "learning_rate": 1.993320905722597e-05, + "loss": 0.8185, + "step": 2144 + }, + { + "epoch": 0.0657410812798823, + "grad_norm": 1.6604376074454366, + "learning_rate": 1.9933094473537406e-05, + "loss": 0.833, + "step": 2145 + }, + { + "epoch": 0.06577172980262351, + "grad_norm": 1.8061219674826612, + "learning_rate": 1.993297979197552e-05, + "loss": 0.9132, + "step": 2146 + }, + { + "epoch": 0.06580237832536472, + "grad_norm": 1.9769547940775225, + "learning_rate": 1.9932865012541445e-05, + "loss": 0.8746, + "step": 2147 + }, + { + "epoch": 0.06583302684810592, + "grad_norm": 1.8251611768592535, + "learning_rate": 1.9932750135236304e-05, + "loss": 0.8787, + "step": 2148 + }, + { + "epoch": 0.06586367537084713, + "grad_norm": 2.0955448146748394, + "learning_rate": 1.993263516006124e-05, + "loss": 0.8836, + "step": 2149 + }, + { + "epoch": 0.06589432389358833, + "grad_norm": 1.9738999316797485, + "learning_rate": 1.9932520087017376e-05, + "loss": 0.8894, + "step": 2150 + }, + { + "epoch": 0.06592497241632954, + "grad_norm": 1.6992443197412013, + "learning_rate": 1.9932404916105855e-05, + "loss": 0.9228, + "step": 2151 + }, + { + "epoch": 0.06595562093907073, + "grad_norm": 1.834974251958576, + "learning_rate": 1.9932289647327805e-05, + "loss": 0.8671, + "step": 2152 + }, + { + "epoch": 0.06598626946181194, + "grad_norm": 0.9483497321170251, + "learning_rate": 1.9932174280684365e-05, + "loss": 0.6666, + "step": 2153 + }, + { + "epoch": 0.06601691798455314, + "grad_norm": 2.165232125446464, + "learning_rate": 1.9932058816176665e-05, + "loss": 0.9325, + "step": 2154 + }, + { + "epoch": 0.06604756650729435, + "grad_norm": 2.1283970000947994, + "learning_rate": 1.9931943253805856e-05, + "loss": 0.7263, + "step": 2155 + }, + { + "epoch": 0.06607821503003555, + "grad_norm": 2.0517736196816667, + "learning_rate": 1.9931827593573064e-05, + "loss": 0.974, + "step": 2156 + }, + { + "epoch": 0.06610886355277676, + "grad_norm": 1.8215548434932376, + "learning_rate": 1.9931711835479436e-05, + "loss": 0.9493, + "step": 2157 + }, + { + "epoch": 0.06613951207551796, + "grad_norm": 0.8938286232431193, + "learning_rate": 1.9931595979526106e-05, + "loss": 0.6218, + "step": 2158 + }, + { + "epoch": 0.06617016059825917, + "grad_norm": 1.8128323791140823, + "learning_rate": 1.9931480025714225e-05, + "loss": 0.853, + "step": 2159 + }, + { + "epoch": 0.06620080912100036, + "grad_norm": 1.762477578582916, + "learning_rate": 1.9931363974044927e-05, + "loss": 0.8578, + "step": 2160 + }, + { + "epoch": 0.06623145764374157, + "grad_norm": 1.8353598598489822, + "learning_rate": 1.993124782451936e-05, + "loss": 0.8534, + "step": 2161 + }, + { + "epoch": 0.06626210616648277, + "grad_norm": 1.8328012189913938, + "learning_rate": 1.9931131577138666e-05, + "loss": 0.7869, + "step": 2162 + }, + { + "epoch": 0.06629275468922398, + "grad_norm": 1.7049431235069696, + "learning_rate": 1.9931015231903994e-05, + "loss": 0.9144, + "step": 2163 + }, + { + "epoch": 0.06632340321196518, + "grad_norm": 2.0435112923424437, + "learning_rate": 1.9930898788816485e-05, + "loss": 0.9875, + "step": 2164 + }, + { + "epoch": 0.06635405173470639, + "grad_norm": 1.9246507256877876, + "learning_rate": 1.993078224787729e-05, + "loss": 0.8941, + "step": 2165 + }, + { + "epoch": 0.0663847002574476, + "grad_norm": 0.8302337843514913, + "learning_rate": 1.993066560908756e-05, + "loss": 0.6542, + "step": 2166 + }, + { + "epoch": 0.0664153487801888, + "grad_norm": 1.9487619595414667, + "learning_rate": 1.9930548872448435e-05, + "loss": 0.902, + "step": 2167 + }, + { + "epoch": 0.06644599730293, + "grad_norm": 1.881266284151641, + "learning_rate": 1.9930432037961075e-05, + "loss": 0.8478, + "step": 2168 + }, + { + "epoch": 0.0664766458256712, + "grad_norm": 1.9972620524146456, + "learning_rate": 1.9930315105626627e-05, + "loss": 0.8684, + "step": 2169 + }, + { + "epoch": 0.0665072943484124, + "grad_norm": 1.6921627188308068, + "learning_rate": 1.993019807544624e-05, + "loss": 0.8776, + "step": 2170 + }, + { + "epoch": 0.06653794287115361, + "grad_norm": 1.7066643944856181, + "learning_rate": 1.993008094742108e-05, + "loss": 0.8568, + "step": 2171 + }, + { + "epoch": 0.06656859139389482, + "grad_norm": 0.7784895564108943, + "learning_rate": 1.992996372155228e-05, + "loss": 0.6472, + "step": 2172 + }, + { + "epoch": 0.06659923991663602, + "grad_norm": 1.8528059272158814, + "learning_rate": 1.9929846397841014e-05, + "loss": 0.8721, + "step": 2173 + }, + { + "epoch": 0.06662988843937723, + "grad_norm": 1.9486963507569552, + "learning_rate": 1.992972897628843e-05, + "loss": 0.8708, + "step": 2174 + }, + { + "epoch": 0.06666053696211842, + "grad_norm": 1.830420599093482, + "learning_rate": 1.9929611456895684e-05, + "loss": 0.8534, + "step": 2175 + }, + { + "epoch": 0.06669118548485962, + "grad_norm": 1.9696701721900625, + "learning_rate": 1.9929493839663937e-05, + "loss": 0.7787, + "step": 2176 + }, + { + "epoch": 0.06672183400760083, + "grad_norm": 1.8055621014257353, + "learning_rate": 1.9929376124594346e-05, + "loss": 0.8602, + "step": 2177 + }, + { + "epoch": 0.06675248253034204, + "grad_norm": 1.8783157177970176, + "learning_rate": 1.9929258311688068e-05, + "loss": 0.8927, + "step": 2178 + }, + { + "epoch": 0.06678313105308324, + "grad_norm": 1.578416370767629, + "learning_rate": 1.9929140400946273e-05, + "loss": 0.8867, + "step": 2179 + }, + { + "epoch": 0.06681377957582445, + "grad_norm": 1.5475133818971798, + "learning_rate": 1.9929022392370115e-05, + "loss": 0.696, + "step": 2180 + }, + { + "epoch": 0.06684442809856565, + "grad_norm": 0.8963017597174101, + "learning_rate": 1.9928904285960758e-05, + "loss": 0.67, + "step": 2181 + }, + { + "epoch": 0.06687507662130686, + "grad_norm": 1.7187440595268468, + "learning_rate": 1.9928786081719367e-05, + "loss": 0.8851, + "step": 2182 + }, + { + "epoch": 0.06690572514404805, + "grad_norm": 1.7225838321084441, + "learning_rate": 1.992866777964711e-05, + "loss": 0.7431, + "step": 2183 + }, + { + "epoch": 0.06693637366678926, + "grad_norm": 1.6944097977234798, + "learning_rate": 1.9928549379745143e-05, + "loss": 0.7811, + "step": 2184 + }, + { + "epoch": 0.06696702218953046, + "grad_norm": 1.717803882552261, + "learning_rate": 1.992843088201464e-05, + "loss": 0.8088, + "step": 2185 + }, + { + "epoch": 0.06699767071227167, + "grad_norm": 1.8207609840943508, + "learning_rate": 1.992831228645677e-05, + "loss": 0.8363, + "step": 2186 + }, + { + "epoch": 0.06702831923501287, + "grad_norm": 1.6345944627164275, + "learning_rate": 1.9928193593072697e-05, + "loss": 0.906, + "step": 2187 + }, + { + "epoch": 0.06705896775775408, + "grad_norm": 1.8842266338090135, + "learning_rate": 1.9928074801863596e-05, + "loss": 0.9815, + "step": 2188 + }, + { + "epoch": 0.06708961628049528, + "grad_norm": 1.8455095225425098, + "learning_rate": 1.992795591283063e-05, + "loss": 0.8977, + "step": 2189 + }, + { + "epoch": 0.06712026480323649, + "grad_norm": 1.7403061328010971, + "learning_rate": 1.9927836925974976e-05, + "loss": 0.8196, + "step": 2190 + }, + { + "epoch": 0.06715091332597768, + "grad_norm": 1.5558265072520856, + "learning_rate": 1.99277178412978e-05, + "loss": 0.7233, + "step": 2191 + }, + { + "epoch": 0.06718156184871889, + "grad_norm": 1.7097456035660468, + "learning_rate": 1.9927598658800285e-05, + "loss": 0.7614, + "step": 2192 + }, + { + "epoch": 0.0672122103714601, + "grad_norm": 1.6958502436485123, + "learning_rate": 1.99274793784836e-05, + "loss": 0.8564, + "step": 2193 + }, + { + "epoch": 0.0672428588942013, + "grad_norm": 1.7788311632107296, + "learning_rate": 1.9927360000348915e-05, + "loss": 0.8024, + "step": 2194 + }, + { + "epoch": 0.0672735074169425, + "grad_norm": 1.8250364669499113, + "learning_rate": 1.992724052439742e-05, + "loss": 0.8091, + "step": 2195 + }, + { + "epoch": 0.06730415593968371, + "grad_norm": 1.9506161634944394, + "learning_rate": 1.992712095063028e-05, + "loss": 0.8301, + "step": 2196 + }, + { + "epoch": 0.06733480446242492, + "grad_norm": 1.6944669907237775, + "learning_rate": 1.992700127904868e-05, + "loss": 0.8315, + "step": 2197 + }, + { + "epoch": 0.06736545298516612, + "grad_norm": 1.7513686063511678, + "learning_rate": 1.9926881509653794e-05, + "loss": 0.7999, + "step": 2198 + }, + { + "epoch": 0.06739610150790731, + "grad_norm": 2.0568679266217043, + "learning_rate": 1.99267616424468e-05, + "loss": 0.9023, + "step": 2199 + }, + { + "epoch": 0.06742675003064852, + "grad_norm": 1.039230981370825, + "learning_rate": 1.992664167742889e-05, + "loss": 0.6674, + "step": 2200 + }, + { + "epoch": 0.06745739855338972, + "grad_norm": 1.814626264704855, + "learning_rate": 1.992652161460124e-05, + "loss": 0.9358, + "step": 2201 + }, + { + "epoch": 0.06748804707613093, + "grad_norm": 1.789331756496065, + "learning_rate": 1.992640145396503e-05, + "loss": 0.8766, + "step": 2202 + }, + { + "epoch": 0.06751869559887214, + "grad_norm": 0.7710869066491602, + "learning_rate": 1.9926281195521446e-05, + "loss": 0.6577, + "step": 2203 + }, + { + "epoch": 0.06754934412161334, + "grad_norm": 2.1070852526234463, + "learning_rate": 1.9926160839271675e-05, + "loss": 0.8418, + "step": 2204 + }, + { + "epoch": 0.06757999264435455, + "grad_norm": 1.6777360244761343, + "learning_rate": 1.9926040385216905e-05, + "loss": 0.8631, + "step": 2205 + }, + { + "epoch": 0.06761064116709574, + "grad_norm": 1.7428846722587994, + "learning_rate": 1.9925919833358316e-05, + "loss": 0.9428, + "step": 2206 + }, + { + "epoch": 0.06764128968983694, + "grad_norm": 1.8176923361109185, + "learning_rate": 1.9925799183697098e-05, + "loss": 0.8601, + "step": 2207 + }, + { + "epoch": 0.06767193821257815, + "grad_norm": 2.056131340414714, + "learning_rate": 1.9925678436234444e-05, + "loss": 0.8724, + "step": 2208 + }, + { + "epoch": 0.06770258673531936, + "grad_norm": 1.888032013653572, + "learning_rate": 1.9925557590971537e-05, + "loss": 0.7753, + "step": 2209 + }, + { + "epoch": 0.06773323525806056, + "grad_norm": 1.8125998401739087, + "learning_rate": 1.9925436647909575e-05, + "loss": 0.7472, + "step": 2210 + }, + { + "epoch": 0.06776388378080177, + "grad_norm": 1.046205281296468, + "learning_rate": 1.992531560704974e-05, + "loss": 0.6491, + "step": 2211 + }, + { + "epoch": 0.06779453230354297, + "grad_norm": 1.7394159013538977, + "learning_rate": 1.9925194468393242e-05, + "loss": 0.7798, + "step": 2212 + }, + { + "epoch": 0.06782518082628418, + "grad_norm": 0.8418791427769531, + "learning_rate": 1.9925073231941253e-05, + "loss": 0.6348, + "step": 2213 + }, + { + "epoch": 0.06785582934902537, + "grad_norm": 2.1103640817307134, + "learning_rate": 1.9924951897694983e-05, + "loss": 0.9139, + "step": 2214 + }, + { + "epoch": 0.06788647787176658, + "grad_norm": 2.122262350030493, + "learning_rate": 1.9924830465655622e-05, + "loss": 0.7972, + "step": 2215 + }, + { + "epoch": 0.06791712639450778, + "grad_norm": 1.8878091432390132, + "learning_rate": 1.9924708935824366e-05, + "loss": 0.7725, + "step": 2216 + }, + { + "epoch": 0.06794777491724899, + "grad_norm": 1.9500817219490216, + "learning_rate": 1.9924587308202415e-05, + "loss": 0.9253, + "step": 2217 + }, + { + "epoch": 0.0679784234399902, + "grad_norm": 1.9991609948073348, + "learning_rate": 1.9924465582790966e-05, + "loss": 0.9285, + "step": 2218 + }, + { + "epoch": 0.0680090719627314, + "grad_norm": 1.9715010580178252, + "learning_rate": 1.9924343759591215e-05, + "loss": 0.8601, + "step": 2219 + }, + { + "epoch": 0.0680397204854726, + "grad_norm": 1.1275881261578582, + "learning_rate": 1.992422183860437e-05, + "loss": 0.6329, + "step": 2220 + }, + { + "epoch": 0.06807036900821381, + "grad_norm": 1.8834492413298054, + "learning_rate": 1.9924099819831624e-05, + "loss": 0.9077, + "step": 2221 + }, + { + "epoch": 0.068101017530955, + "grad_norm": 1.9867661681672586, + "learning_rate": 1.9923977703274188e-05, + "loss": 0.7757, + "step": 2222 + }, + { + "epoch": 0.06813166605369621, + "grad_norm": 1.9152239494297072, + "learning_rate": 1.9923855488933256e-05, + "loss": 0.9268, + "step": 2223 + }, + { + "epoch": 0.06816231457643741, + "grad_norm": 1.8952161916615942, + "learning_rate": 1.992373317681004e-05, + "loss": 0.904, + "step": 2224 + }, + { + "epoch": 0.06819296309917862, + "grad_norm": 1.809204516105464, + "learning_rate": 1.992361076690574e-05, + "loss": 0.8486, + "step": 2225 + }, + { + "epoch": 0.06822361162191982, + "grad_norm": 1.7279530565263217, + "learning_rate": 1.9923488259221562e-05, + "loss": 0.8643, + "step": 2226 + }, + { + "epoch": 0.06825426014466103, + "grad_norm": 0.895914083140305, + "learning_rate": 1.9923365653758718e-05, + "loss": 0.6611, + "step": 2227 + }, + { + "epoch": 0.06828490866740224, + "grad_norm": 1.944526771256258, + "learning_rate": 1.9923242950518416e-05, + "loss": 0.8002, + "step": 2228 + }, + { + "epoch": 0.06831555719014344, + "grad_norm": 1.864805155254437, + "learning_rate": 1.9923120149501858e-05, + "loss": 0.8593, + "step": 2229 + }, + { + "epoch": 0.06834620571288463, + "grad_norm": 1.9023322470502142, + "learning_rate": 1.992299725071026e-05, + "loss": 0.7787, + "step": 2230 + }, + { + "epoch": 0.06837685423562584, + "grad_norm": 1.9012052454033486, + "learning_rate": 1.992287425414483e-05, + "loss": 0.8464, + "step": 2231 + }, + { + "epoch": 0.06840750275836704, + "grad_norm": 2.095820316458464, + "learning_rate": 1.9922751159806783e-05, + "loss": 0.9166, + "step": 2232 + }, + { + "epoch": 0.06843815128110825, + "grad_norm": 1.8855990371121532, + "learning_rate": 1.992262796769733e-05, + "loss": 0.8517, + "step": 2233 + }, + { + "epoch": 0.06846879980384946, + "grad_norm": 1.7572433265478238, + "learning_rate": 1.992250467781768e-05, + "loss": 0.846, + "step": 2234 + }, + { + "epoch": 0.06849944832659066, + "grad_norm": 1.9346765196822147, + "learning_rate": 1.9922381290169058e-05, + "loss": 0.839, + "step": 2235 + }, + { + "epoch": 0.06853009684933187, + "grad_norm": 1.832235700749645, + "learning_rate": 1.992225780475267e-05, + "loss": 0.8239, + "step": 2236 + }, + { + "epoch": 0.06856074537207306, + "grad_norm": 1.6493750700717842, + "learning_rate": 1.992213422156974e-05, + "loss": 0.9907, + "step": 2237 + }, + { + "epoch": 0.06859139389481426, + "grad_norm": 1.7058944426284477, + "learning_rate": 1.9922010540621483e-05, + "loss": 0.7471, + "step": 2238 + }, + { + "epoch": 0.06862204241755547, + "grad_norm": 1.9512208705019605, + "learning_rate": 1.992188676190912e-05, + "loss": 0.8115, + "step": 2239 + }, + { + "epoch": 0.06865269094029668, + "grad_norm": 1.787550297347746, + "learning_rate": 1.9921762885433862e-05, + "loss": 0.9415, + "step": 2240 + }, + { + "epoch": 0.06868333946303788, + "grad_norm": 1.7734979823000565, + "learning_rate": 1.992163891119694e-05, + "loss": 0.8638, + "step": 2241 + }, + { + "epoch": 0.06871398798577909, + "grad_norm": 1.8576967337132064, + "learning_rate": 1.992151483919957e-05, + "loss": 0.8901, + "step": 2242 + }, + { + "epoch": 0.0687446365085203, + "grad_norm": 1.6836954506756658, + "learning_rate": 1.9921390669442977e-05, + "loss": 0.788, + "step": 2243 + }, + { + "epoch": 0.0687752850312615, + "grad_norm": 1.7740306837713173, + "learning_rate": 1.9921266401928384e-05, + "loss": 0.8573, + "step": 2244 + }, + { + "epoch": 0.06880593355400269, + "grad_norm": 1.6731361818787736, + "learning_rate": 1.992114203665701e-05, + "loss": 0.8794, + "step": 2245 + }, + { + "epoch": 0.0688365820767439, + "grad_norm": 2.0831076370426715, + "learning_rate": 1.992101757363009e-05, + "loss": 0.7829, + "step": 2246 + }, + { + "epoch": 0.0688672305994851, + "grad_norm": 1.7747240383381873, + "learning_rate": 1.992089301284884e-05, + "loss": 0.9159, + "step": 2247 + }, + { + "epoch": 0.06889787912222631, + "grad_norm": 1.670315305163272, + "learning_rate": 1.99207683543145e-05, + "loss": 0.8354, + "step": 2248 + }, + { + "epoch": 0.06892852764496751, + "grad_norm": 1.9548051810045883, + "learning_rate": 1.9920643598028284e-05, + "loss": 0.8618, + "step": 2249 + }, + { + "epoch": 0.06895917616770872, + "grad_norm": 1.714593449477667, + "learning_rate": 1.992051874399143e-05, + "loss": 0.7816, + "step": 2250 + }, + { + "epoch": 0.06898982469044992, + "grad_norm": 1.6829821270852667, + "learning_rate": 1.992039379220517e-05, + "loss": 0.8436, + "step": 2251 + }, + { + "epoch": 0.06902047321319113, + "grad_norm": 0.9048023629623101, + "learning_rate": 1.9920268742670728e-05, + "loss": 0.6705, + "step": 2252 + }, + { + "epoch": 0.06905112173593232, + "grad_norm": 2.07302302049258, + "learning_rate": 1.9920143595389342e-05, + "loss": 0.8174, + "step": 2253 + }, + { + "epoch": 0.06908177025867353, + "grad_norm": 1.8448504792200329, + "learning_rate": 1.9920018350362244e-05, + "loss": 0.8884, + "step": 2254 + }, + { + "epoch": 0.06911241878141473, + "grad_norm": 1.7746517125769743, + "learning_rate": 1.9919893007590665e-05, + "loss": 0.7215, + "step": 2255 + }, + { + "epoch": 0.06914306730415594, + "grad_norm": 1.8280721773353195, + "learning_rate": 1.9919767567075844e-05, + "loss": 0.8176, + "step": 2256 + }, + { + "epoch": 0.06917371582689714, + "grad_norm": 0.8092276601019838, + "learning_rate": 1.991964202881901e-05, + "loss": 0.6487, + "step": 2257 + }, + { + "epoch": 0.06920436434963835, + "grad_norm": 1.6881827933917184, + "learning_rate": 1.991951639282141e-05, + "loss": 0.8315, + "step": 2258 + }, + { + "epoch": 0.06923501287237956, + "grad_norm": 1.664274907482683, + "learning_rate": 1.9919390659084275e-05, + "loss": 0.8818, + "step": 2259 + }, + { + "epoch": 0.06926566139512076, + "grad_norm": 1.8032036677470755, + "learning_rate": 1.9919264827608848e-05, + "loss": 0.9455, + "step": 2260 + }, + { + "epoch": 0.06929630991786195, + "grad_norm": 2.1280530209081427, + "learning_rate": 1.9919138898396366e-05, + "loss": 0.9471, + "step": 2261 + }, + { + "epoch": 0.06932695844060316, + "grad_norm": 1.99284307809036, + "learning_rate": 1.9919012871448072e-05, + "loss": 0.8606, + "step": 2262 + }, + { + "epoch": 0.06935760696334436, + "grad_norm": 1.6949634617385057, + "learning_rate": 1.9918886746765204e-05, + "loss": 0.8283, + "step": 2263 + }, + { + "epoch": 0.06938825548608557, + "grad_norm": 1.822110955644134, + "learning_rate": 1.9918760524349004e-05, + "loss": 0.8826, + "step": 2264 + }, + { + "epoch": 0.06941890400882678, + "grad_norm": 1.7496705837481428, + "learning_rate": 1.9918634204200723e-05, + "loss": 0.809, + "step": 2265 + }, + { + "epoch": 0.06944955253156798, + "grad_norm": 2.0586570060645015, + "learning_rate": 1.99185077863216e-05, + "loss": 0.8519, + "step": 2266 + }, + { + "epoch": 0.06948020105430919, + "grad_norm": 1.7892370716661583, + "learning_rate": 1.9918381270712882e-05, + "loss": 0.9596, + "step": 2267 + }, + { + "epoch": 0.06951084957705038, + "grad_norm": 1.6802863781341764, + "learning_rate": 1.9918254657375815e-05, + "loss": 0.8149, + "step": 2268 + }, + { + "epoch": 0.06954149809979158, + "grad_norm": 1.8412594051636042, + "learning_rate": 1.9918127946311648e-05, + "loss": 0.8872, + "step": 2269 + }, + { + "epoch": 0.06957214662253279, + "grad_norm": 1.6484232559449956, + "learning_rate": 1.991800113752163e-05, + "loss": 0.8129, + "step": 2270 + }, + { + "epoch": 0.069602795145274, + "grad_norm": 1.718029860907854, + "learning_rate": 1.991787423100701e-05, + "loss": 0.8909, + "step": 2271 + }, + { + "epoch": 0.0696334436680152, + "grad_norm": 0.8742486721092615, + "learning_rate": 1.9917747226769032e-05, + "loss": 0.6504, + "step": 2272 + }, + { + "epoch": 0.06966409219075641, + "grad_norm": 1.6639689842009406, + "learning_rate": 1.9917620124808958e-05, + "loss": 0.8398, + "step": 2273 + }, + { + "epoch": 0.06969474071349761, + "grad_norm": 1.5246036310947597, + "learning_rate": 1.9917492925128035e-05, + "loss": 0.8095, + "step": 2274 + }, + { + "epoch": 0.06972538923623882, + "grad_norm": 1.6377343033480527, + "learning_rate": 1.9917365627727516e-05, + "loss": 0.8207, + "step": 2275 + }, + { + "epoch": 0.06975603775898001, + "grad_norm": 1.7695664094639099, + "learning_rate": 1.9917238232608654e-05, + "loss": 0.8821, + "step": 2276 + }, + { + "epoch": 0.06978668628172122, + "grad_norm": 1.663633522955562, + "learning_rate": 1.9917110739772708e-05, + "loss": 0.7812, + "step": 2277 + }, + { + "epoch": 0.06981733480446242, + "grad_norm": 1.9244284226807729, + "learning_rate": 1.9916983149220933e-05, + "loss": 0.9817, + "step": 2278 + }, + { + "epoch": 0.06984798332720363, + "grad_norm": 1.8176901160597219, + "learning_rate": 1.9916855460954584e-05, + "loss": 0.7436, + "step": 2279 + }, + { + "epoch": 0.06987863184994483, + "grad_norm": 1.5501985863843633, + "learning_rate": 1.9916727674974924e-05, + "loss": 0.8082, + "step": 2280 + }, + { + "epoch": 0.06990928037268604, + "grad_norm": 0.9197012035022283, + "learning_rate": 1.9916599791283206e-05, + "loss": 0.6507, + "step": 2281 + }, + { + "epoch": 0.06993992889542724, + "grad_norm": 1.742654373078265, + "learning_rate": 1.9916471809880692e-05, + "loss": 0.9641, + "step": 2282 + }, + { + "epoch": 0.06997057741816845, + "grad_norm": 0.7950636925567216, + "learning_rate": 1.9916343730768645e-05, + "loss": 0.6531, + "step": 2283 + }, + { + "epoch": 0.07000122594090964, + "grad_norm": 1.8016965599190442, + "learning_rate": 1.9916215553948328e-05, + "loss": 0.9536, + "step": 2284 + }, + { + "epoch": 0.07003187446365085, + "grad_norm": 0.8111557153718437, + "learning_rate": 1.9916087279421002e-05, + "loss": 0.6496, + "step": 2285 + }, + { + "epoch": 0.07006252298639205, + "grad_norm": 1.7064639844216318, + "learning_rate": 1.991595890718793e-05, + "loss": 0.7794, + "step": 2286 + }, + { + "epoch": 0.07009317150913326, + "grad_norm": 1.8976569456063137, + "learning_rate": 1.9915830437250376e-05, + "loss": 1.0315, + "step": 2287 + }, + { + "epoch": 0.07012382003187446, + "grad_norm": 1.861427888805745, + "learning_rate": 1.991570186960961e-05, + "loss": 0.8583, + "step": 2288 + }, + { + "epoch": 0.07015446855461567, + "grad_norm": 1.889030102871121, + "learning_rate": 1.9915573204266897e-05, + "loss": 0.7465, + "step": 2289 + }, + { + "epoch": 0.07018511707735688, + "grad_norm": 1.5911252062701398, + "learning_rate": 1.99154444412235e-05, + "loss": 0.8484, + "step": 2290 + }, + { + "epoch": 0.07021576560009808, + "grad_norm": 0.8989062708604343, + "learning_rate": 1.9915315580480694e-05, + "loss": 0.6538, + "step": 2291 + }, + { + "epoch": 0.07024641412283927, + "grad_norm": 1.820671029830544, + "learning_rate": 1.991518662203975e-05, + "loss": 0.8803, + "step": 2292 + }, + { + "epoch": 0.07027706264558048, + "grad_norm": 1.5888313701728256, + "learning_rate": 1.991505756590193e-05, + "loss": 0.7053, + "step": 2293 + }, + { + "epoch": 0.07030771116832168, + "grad_norm": 1.9661818756895932, + "learning_rate": 1.9914928412068516e-05, + "loss": 0.9462, + "step": 2294 + }, + { + "epoch": 0.07033835969106289, + "grad_norm": 1.6084078226131766, + "learning_rate": 1.9914799160540772e-05, + "loss": 0.807, + "step": 2295 + }, + { + "epoch": 0.0703690082138041, + "grad_norm": 1.8767728738369078, + "learning_rate": 1.9914669811319974e-05, + "loss": 0.9295, + "step": 2296 + }, + { + "epoch": 0.0703996567365453, + "grad_norm": 1.8986828607115909, + "learning_rate": 1.99145403644074e-05, + "loss": 0.8563, + "step": 2297 + }, + { + "epoch": 0.07043030525928651, + "grad_norm": 1.927904240941028, + "learning_rate": 1.9914410819804325e-05, + "loss": 0.9, + "step": 2298 + }, + { + "epoch": 0.07046095378202771, + "grad_norm": 1.5922680349777656, + "learning_rate": 1.991428117751202e-05, + "loss": 0.8214, + "step": 2299 + }, + { + "epoch": 0.0704916023047689, + "grad_norm": 0.8948255314742168, + "learning_rate": 1.9914151437531765e-05, + "loss": 0.6699, + "step": 2300 + }, + { + "epoch": 0.07052225082751011, + "grad_norm": 1.931439921951308, + "learning_rate": 1.991402159986484e-05, + "loss": 0.9337, + "step": 2301 + }, + { + "epoch": 0.07055289935025132, + "grad_norm": 1.9585448494300983, + "learning_rate": 1.9913891664512527e-05, + "loss": 0.8446, + "step": 2302 + }, + { + "epoch": 0.07058354787299252, + "grad_norm": 0.7880582889851525, + "learning_rate": 1.9913761631476102e-05, + "loss": 0.6429, + "step": 2303 + }, + { + "epoch": 0.07061419639573373, + "grad_norm": 1.8290289891137732, + "learning_rate": 1.9913631500756846e-05, + "loss": 0.8392, + "step": 2304 + }, + { + "epoch": 0.07064484491847493, + "grad_norm": 1.9743318504397533, + "learning_rate": 1.9913501272356042e-05, + "loss": 0.8792, + "step": 2305 + }, + { + "epoch": 0.07067549344121614, + "grad_norm": 1.958053608374243, + "learning_rate": 1.9913370946274972e-05, + "loss": 0.9193, + "step": 2306 + }, + { + "epoch": 0.07070614196395733, + "grad_norm": 1.9211036447429588, + "learning_rate": 1.9913240522514924e-05, + "loss": 0.8816, + "step": 2307 + }, + { + "epoch": 0.07073679048669854, + "grad_norm": 0.8603682857015379, + "learning_rate": 1.991311000107718e-05, + "loss": 0.666, + "step": 2308 + }, + { + "epoch": 0.07076743900943974, + "grad_norm": 1.7773271920235785, + "learning_rate": 1.9912979381963026e-05, + "loss": 0.884, + "step": 2309 + }, + { + "epoch": 0.07079808753218095, + "grad_norm": 1.7699859320894422, + "learning_rate": 1.9912848665173752e-05, + "loss": 0.8817, + "step": 2310 + }, + { + "epoch": 0.07082873605492215, + "grad_norm": 1.8012056374978822, + "learning_rate": 1.9912717850710642e-05, + "loss": 0.7815, + "step": 2311 + }, + { + "epoch": 0.07085938457766336, + "grad_norm": 1.8616966090072413, + "learning_rate": 1.9912586938574988e-05, + "loss": 0.9332, + "step": 2312 + }, + { + "epoch": 0.07089003310040456, + "grad_norm": 0.7543633548590926, + "learning_rate": 1.9912455928768076e-05, + "loss": 0.6481, + "step": 2313 + }, + { + "epoch": 0.07092068162314577, + "grad_norm": 0.803939610864604, + "learning_rate": 1.99123248212912e-05, + "loss": 0.657, + "step": 2314 + }, + { + "epoch": 0.07095133014588696, + "grad_norm": 1.9502373594675297, + "learning_rate": 1.9912193616145654e-05, + "loss": 0.8214, + "step": 2315 + }, + { + "epoch": 0.07098197866862817, + "grad_norm": 1.9675012386968254, + "learning_rate": 1.991206231333273e-05, + "loss": 0.8343, + "step": 2316 + }, + { + "epoch": 0.07101262719136937, + "grad_norm": 0.7501866654528762, + "learning_rate": 1.9911930912853713e-05, + "loss": 0.64, + "step": 2317 + }, + { + "epoch": 0.07104327571411058, + "grad_norm": 1.7505331172185432, + "learning_rate": 1.9911799414709908e-05, + "loss": 1.0231, + "step": 2318 + }, + { + "epoch": 0.07107392423685178, + "grad_norm": 1.9808038994502817, + "learning_rate": 1.9911667818902608e-05, + "loss": 0.9953, + "step": 2319 + }, + { + "epoch": 0.07110457275959299, + "grad_norm": 1.7691906876687422, + "learning_rate": 1.9911536125433107e-05, + "loss": 0.8736, + "step": 2320 + }, + { + "epoch": 0.0711352212823342, + "grad_norm": 1.9055859402208277, + "learning_rate": 1.991140433430271e-05, + "loss": 0.8721, + "step": 2321 + }, + { + "epoch": 0.0711658698050754, + "grad_norm": 2.1145340749625983, + "learning_rate": 1.9911272445512707e-05, + "loss": 0.9232, + "step": 2322 + }, + { + "epoch": 0.0711965183278166, + "grad_norm": 1.6629376014641435, + "learning_rate": 1.9911140459064396e-05, + "loss": 0.7862, + "step": 2323 + }, + { + "epoch": 0.0712271668505578, + "grad_norm": 1.6646500526817558, + "learning_rate": 1.9911008374959085e-05, + "loss": 0.7764, + "step": 2324 + }, + { + "epoch": 0.071257815373299, + "grad_norm": 1.7722787538954716, + "learning_rate": 1.9910876193198075e-05, + "loss": 0.8768, + "step": 2325 + }, + { + "epoch": 0.07128846389604021, + "grad_norm": 1.957068974287921, + "learning_rate": 1.9910743913782667e-05, + "loss": 0.7883, + "step": 2326 + }, + { + "epoch": 0.07131911241878142, + "grad_norm": 2.0060829313664224, + "learning_rate": 1.991061153671416e-05, + "loss": 0.9077, + "step": 2327 + }, + { + "epoch": 0.07134976094152262, + "grad_norm": 1.9355147557526382, + "learning_rate": 1.9910479061993865e-05, + "loss": 0.8658, + "step": 2328 + }, + { + "epoch": 0.07138040946426383, + "grad_norm": 1.8736212987365617, + "learning_rate": 1.991034648962308e-05, + "loss": 0.9096, + "step": 2329 + }, + { + "epoch": 0.07141105798700503, + "grad_norm": 1.6390261792790133, + "learning_rate": 1.991021381960312e-05, + "loss": 0.7496, + "step": 2330 + }, + { + "epoch": 0.07144170650974623, + "grad_norm": 0.8662155372890042, + "learning_rate": 1.9910081051935285e-05, + "loss": 0.6314, + "step": 2331 + }, + { + "epoch": 0.07147235503248743, + "grad_norm": 1.6871775816097943, + "learning_rate": 1.9909948186620886e-05, + "loss": 0.8209, + "step": 2332 + }, + { + "epoch": 0.07150300355522864, + "grad_norm": 1.921511401388808, + "learning_rate": 1.990981522366123e-05, + "loss": 0.9805, + "step": 2333 + }, + { + "epoch": 0.07153365207796984, + "grad_norm": 2.1669740774371515, + "learning_rate": 1.9909682163057635e-05, + "loss": 0.8944, + "step": 2334 + }, + { + "epoch": 0.07156430060071105, + "grad_norm": 1.8072035415757035, + "learning_rate": 1.9909549004811398e-05, + "loss": 0.9781, + "step": 2335 + }, + { + "epoch": 0.07159494912345225, + "grad_norm": 2.1387326194512406, + "learning_rate": 1.9909415748923842e-05, + "loss": 0.8573, + "step": 2336 + }, + { + "epoch": 0.07162559764619346, + "grad_norm": 2.0274739824359056, + "learning_rate": 1.990928239539628e-05, + "loss": 0.8313, + "step": 2337 + }, + { + "epoch": 0.07165624616893465, + "grad_norm": 1.7572277834710395, + "learning_rate": 1.9909148944230022e-05, + "loss": 0.8572, + "step": 2338 + }, + { + "epoch": 0.07168689469167586, + "grad_norm": 1.6894971499265725, + "learning_rate": 1.9909015395426384e-05, + "loss": 0.8657, + "step": 2339 + }, + { + "epoch": 0.07171754321441706, + "grad_norm": 1.6927189375051008, + "learning_rate": 1.990888174898668e-05, + "loss": 0.8311, + "step": 2340 + }, + { + "epoch": 0.07174819173715827, + "grad_norm": 1.5581429646931444, + "learning_rate": 1.990874800491223e-05, + "loss": 0.8236, + "step": 2341 + }, + { + "epoch": 0.07177884025989947, + "grad_norm": 1.6453121180936485, + "learning_rate": 1.9908614163204353e-05, + "loss": 0.7585, + "step": 2342 + }, + { + "epoch": 0.07180948878264068, + "grad_norm": 1.8380839360455292, + "learning_rate": 1.9908480223864363e-05, + "loss": 0.8771, + "step": 2343 + }, + { + "epoch": 0.07184013730538188, + "grad_norm": 1.9731374217219215, + "learning_rate": 1.9908346186893584e-05, + "loss": 0.9388, + "step": 2344 + }, + { + "epoch": 0.07187078582812309, + "grad_norm": 1.666305848721761, + "learning_rate": 1.9908212052293334e-05, + "loss": 0.7591, + "step": 2345 + }, + { + "epoch": 0.07190143435086428, + "grad_norm": 1.8028016748109736, + "learning_rate": 1.9908077820064937e-05, + "loss": 0.8532, + "step": 2346 + }, + { + "epoch": 0.07193208287360549, + "grad_norm": 1.8304668206976378, + "learning_rate": 1.990794349020971e-05, + "loss": 0.8804, + "step": 2347 + }, + { + "epoch": 0.0719627313963467, + "grad_norm": 1.8352447340701072, + "learning_rate": 1.990780906272898e-05, + "loss": 0.8661, + "step": 2348 + }, + { + "epoch": 0.0719933799190879, + "grad_norm": 1.8457343500969199, + "learning_rate": 1.9907674537624078e-05, + "loss": 0.7084, + "step": 2349 + }, + { + "epoch": 0.0720240284418291, + "grad_norm": 1.8365306359699538, + "learning_rate": 1.990753991489632e-05, + "loss": 0.8168, + "step": 2350 + }, + { + "epoch": 0.07205467696457031, + "grad_norm": 1.8699306295713254, + "learning_rate": 1.990740519454704e-05, + "loss": 0.9001, + "step": 2351 + }, + { + "epoch": 0.07208532548731152, + "grad_norm": 1.8800948176265806, + "learning_rate": 1.990727037657756e-05, + "loss": 0.8395, + "step": 2352 + }, + { + "epoch": 0.07211597401005272, + "grad_norm": 0.8237618312773441, + "learning_rate": 1.9907135460989208e-05, + "loss": 0.6505, + "step": 2353 + }, + { + "epoch": 0.07214662253279391, + "grad_norm": 1.6377604412096785, + "learning_rate": 1.9907000447783315e-05, + "loss": 0.7554, + "step": 2354 + }, + { + "epoch": 0.07217727105553512, + "grad_norm": 0.7640392641410257, + "learning_rate": 1.9906865336961214e-05, + "loss": 0.627, + "step": 2355 + }, + { + "epoch": 0.07220791957827633, + "grad_norm": 1.9816890832546012, + "learning_rate": 1.9906730128524235e-05, + "loss": 0.89, + "step": 2356 + }, + { + "epoch": 0.07223856810101753, + "grad_norm": 1.9098851552999296, + "learning_rate": 1.9906594822473705e-05, + "loss": 0.8397, + "step": 2357 + }, + { + "epoch": 0.07226921662375874, + "grad_norm": 1.8682401781110585, + "learning_rate": 1.9906459418810966e-05, + "loss": 0.8622, + "step": 2358 + }, + { + "epoch": 0.07229986514649994, + "grad_norm": 1.7012707008151702, + "learning_rate": 1.9906323917537346e-05, + "loss": 0.7672, + "step": 2359 + }, + { + "epoch": 0.07233051366924115, + "grad_norm": 0.859681015275222, + "learning_rate": 1.990618831865418e-05, + "loss": 0.6653, + "step": 2360 + }, + { + "epoch": 0.07236116219198235, + "grad_norm": 1.7538833193602954, + "learning_rate": 1.9906052622162808e-05, + "loss": 0.8197, + "step": 2361 + }, + { + "epoch": 0.07239181071472355, + "grad_norm": 2.023449087228927, + "learning_rate": 1.9905916828064565e-05, + "loss": 0.8834, + "step": 2362 + }, + { + "epoch": 0.07242245923746475, + "grad_norm": 1.6209752401988016, + "learning_rate": 1.990578093636079e-05, + "loss": 0.8599, + "step": 2363 + }, + { + "epoch": 0.07245310776020596, + "grad_norm": 1.8434204386495563, + "learning_rate": 1.990564494705282e-05, + "loss": 0.9141, + "step": 2364 + }, + { + "epoch": 0.07248375628294716, + "grad_norm": 1.5975474523428113, + "learning_rate": 1.9905508860141995e-05, + "loss": 0.6814, + "step": 2365 + }, + { + "epoch": 0.07251440480568837, + "grad_norm": 1.5766438518727282, + "learning_rate": 1.9905372675629655e-05, + "loss": 0.9261, + "step": 2366 + }, + { + "epoch": 0.07254505332842957, + "grad_norm": 1.589907571853619, + "learning_rate": 1.9905236393517147e-05, + "loss": 0.8076, + "step": 2367 + }, + { + "epoch": 0.07257570185117078, + "grad_norm": 2.081626104018939, + "learning_rate": 1.990510001380581e-05, + "loss": 0.7388, + "step": 2368 + }, + { + "epoch": 0.07260635037391197, + "grad_norm": 2.00349004809341, + "learning_rate": 1.990496353649699e-05, + "loss": 0.9134, + "step": 2369 + }, + { + "epoch": 0.07263699889665318, + "grad_norm": 1.662746060648275, + "learning_rate": 1.9904826961592026e-05, + "loss": 0.8324, + "step": 2370 + }, + { + "epoch": 0.07266764741939438, + "grad_norm": 1.904824507103621, + "learning_rate": 1.990469028909227e-05, + "loss": 0.8867, + "step": 2371 + }, + { + "epoch": 0.07269829594213559, + "grad_norm": 1.9644353751828627, + "learning_rate": 1.9904553518999063e-05, + "loss": 0.8771, + "step": 2372 + }, + { + "epoch": 0.0727289444648768, + "grad_norm": 1.8336297431180917, + "learning_rate": 1.990441665131376e-05, + "loss": 0.8674, + "step": 2373 + }, + { + "epoch": 0.072759592987618, + "grad_norm": 0.8882889475047704, + "learning_rate": 1.99042796860377e-05, + "loss": 0.655, + "step": 2374 + }, + { + "epoch": 0.0727902415103592, + "grad_norm": 0.8740690533551535, + "learning_rate": 1.9904142623172246e-05, + "loss": 0.6543, + "step": 2375 + }, + { + "epoch": 0.07282089003310041, + "grad_norm": 1.790808822726886, + "learning_rate": 1.9904005462718735e-05, + "loss": 0.7902, + "step": 2376 + }, + { + "epoch": 0.0728515385558416, + "grad_norm": 1.6939324241922669, + "learning_rate": 1.9903868204678525e-05, + "loss": 0.8112, + "step": 2377 + }, + { + "epoch": 0.07288218707858281, + "grad_norm": 2.012086193587947, + "learning_rate": 1.990373084905297e-05, + "loss": 0.8897, + "step": 2378 + }, + { + "epoch": 0.07291283560132401, + "grad_norm": 1.9415549499634523, + "learning_rate": 1.990359339584342e-05, + "loss": 0.9061, + "step": 2379 + }, + { + "epoch": 0.07294348412406522, + "grad_norm": 0.9428624825387015, + "learning_rate": 1.990345584505123e-05, + "loss": 0.6496, + "step": 2380 + }, + { + "epoch": 0.07297413264680642, + "grad_norm": 1.683913834142391, + "learning_rate": 1.9903318196677756e-05, + "loss": 0.8056, + "step": 2381 + }, + { + "epoch": 0.07300478116954763, + "grad_norm": 1.7248349783921553, + "learning_rate": 1.9903180450724352e-05, + "loss": 0.9075, + "step": 2382 + }, + { + "epoch": 0.07303542969228884, + "grad_norm": 1.8198509316815965, + "learning_rate": 1.990304260719238e-05, + "loss": 0.9404, + "step": 2383 + }, + { + "epoch": 0.07306607821503004, + "grad_norm": 2.1029497312711065, + "learning_rate": 1.9902904666083192e-05, + "loss": 0.7935, + "step": 2384 + }, + { + "epoch": 0.07309672673777123, + "grad_norm": 0.8076880160902877, + "learning_rate": 1.9902766627398153e-05, + "loss": 0.6407, + "step": 2385 + }, + { + "epoch": 0.07312737526051244, + "grad_norm": 2.015578612302033, + "learning_rate": 1.9902628491138622e-05, + "loss": 0.9346, + "step": 2386 + }, + { + "epoch": 0.07315802378325365, + "grad_norm": 1.7131983670352797, + "learning_rate": 1.9902490257305957e-05, + "loss": 0.8897, + "step": 2387 + }, + { + "epoch": 0.07318867230599485, + "grad_norm": 1.6985243867911335, + "learning_rate": 1.990235192590152e-05, + "loss": 0.7836, + "step": 2388 + }, + { + "epoch": 0.07321932082873606, + "grad_norm": 1.6811003566119802, + "learning_rate": 1.9902213496926677e-05, + "loss": 0.8475, + "step": 2389 + }, + { + "epoch": 0.07324996935147726, + "grad_norm": 1.7738557947592326, + "learning_rate": 1.990207497038279e-05, + "loss": 0.8937, + "step": 2390 + }, + { + "epoch": 0.07328061787421847, + "grad_norm": 0.8081789071580144, + "learning_rate": 1.9901936346271228e-05, + "loss": 0.6638, + "step": 2391 + }, + { + "epoch": 0.07331126639695967, + "grad_norm": 1.6130152299372513, + "learning_rate": 1.9901797624593352e-05, + "loss": 0.8069, + "step": 2392 + }, + { + "epoch": 0.07334191491970087, + "grad_norm": 1.787433521900859, + "learning_rate": 1.990165880535053e-05, + "loss": 0.8883, + "step": 2393 + }, + { + "epoch": 0.07337256344244207, + "grad_norm": 0.7613281627353173, + "learning_rate": 1.9901519888544132e-05, + "loss": 0.6473, + "step": 2394 + }, + { + "epoch": 0.07340321196518328, + "grad_norm": 1.722120745114926, + "learning_rate": 1.9901380874175526e-05, + "loss": 0.8279, + "step": 2395 + }, + { + "epoch": 0.07343386048792448, + "grad_norm": 1.8546751527496923, + "learning_rate": 1.9901241762246078e-05, + "loss": 0.9707, + "step": 2396 + }, + { + "epoch": 0.07346450901066569, + "grad_norm": 1.7971491904602976, + "learning_rate": 1.9901102552757158e-05, + "loss": 0.7925, + "step": 2397 + }, + { + "epoch": 0.0734951575334069, + "grad_norm": 1.8459771831066507, + "learning_rate": 1.9900963245710147e-05, + "loss": 0.915, + "step": 2398 + }, + { + "epoch": 0.0735258060561481, + "grad_norm": 1.930081639289611, + "learning_rate": 1.990082384110641e-05, + "loss": 0.8838, + "step": 2399 + }, + { + "epoch": 0.07355645457888929, + "grad_norm": 1.74025758511631, + "learning_rate": 1.9900684338947322e-05, + "loss": 0.8827, + "step": 2400 + }, + { + "epoch": 0.0735871031016305, + "grad_norm": 1.8722913512830113, + "learning_rate": 1.9900544739234263e-05, + "loss": 0.7501, + "step": 2401 + }, + { + "epoch": 0.0736177516243717, + "grad_norm": 0.7982916659309409, + "learning_rate": 1.99004050419686e-05, + "loss": 0.6367, + "step": 2402 + }, + { + "epoch": 0.07364840014711291, + "grad_norm": 1.7182342427466721, + "learning_rate": 1.990026524715171e-05, + "loss": 0.8868, + "step": 2403 + }, + { + "epoch": 0.07367904866985411, + "grad_norm": 2.0567984250801508, + "learning_rate": 1.9900125354784976e-05, + "loss": 0.8497, + "step": 2404 + }, + { + "epoch": 0.07370969719259532, + "grad_norm": 1.6386582878913545, + "learning_rate": 1.9899985364869774e-05, + "loss": 0.8145, + "step": 2405 + }, + { + "epoch": 0.07374034571533652, + "grad_norm": 0.7292802446295621, + "learning_rate": 1.9899845277407483e-05, + "loss": 0.6383, + "step": 2406 + }, + { + "epoch": 0.07377099423807773, + "grad_norm": 1.7949069786660983, + "learning_rate": 1.9899705092399482e-05, + "loss": 0.9666, + "step": 2407 + }, + { + "epoch": 0.07380164276081892, + "grad_norm": 1.8123506694582054, + "learning_rate": 1.9899564809847157e-05, + "loss": 0.8771, + "step": 2408 + }, + { + "epoch": 0.07383229128356013, + "grad_norm": 1.746559227916125, + "learning_rate": 1.9899424429751883e-05, + "loss": 0.9279, + "step": 2409 + }, + { + "epoch": 0.07386293980630133, + "grad_norm": 1.7121578636764956, + "learning_rate": 1.989928395211505e-05, + "loss": 0.8339, + "step": 2410 + }, + { + "epoch": 0.07389358832904254, + "grad_norm": 1.7870319474361418, + "learning_rate": 1.989914337693804e-05, + "loss": 1.0159, + "step": 2411 + }, + { + "epoch": 0.07392423685178375, + "grad_norm": 1.5990727851880822, + "learning_rate": 1.9899002704222236e-05, + "loss": 0.8773, + "step": 2412 + }, + { + "epoch": 0.07395488537452495, + "grad_norm": 1.5477395726496965, + "learning_rate": 1.9898861933969024e-05, + "loss": 0.8247, + "step": 2413 + }, + { + "epoch": 0.07398553389726616, + "grad_norm": 1.82876929244765, + "learning_rate": 1.9898721066179796e-05, + "loss": 0.9951, + "step": 2414 + }, + { + "epoch": 0.07401618242000736, + "grad_norm": 1.7502060969165383, + "learning_rate": 1.9898580100855933e-05, + "loss": 0.8962, + "step": 2415 + }, + { + "epoch": 0.07404683094274855, + "grad_norm": 0.929908371980143, + "learning_rate": 1.9898439037998832e-05, + "loss": 0.6829, + "step": 2416 + }, + { + "epoch": 0.07407747946548976, + "grad_norm": 1.9676086015713927, + "learning_rate": 1.9898297877609875e-05, + "loss": 0.8564, + "step": 2417 + }, + { + "epoch": 0.07410812798823097, + "grad_norm": 1.7062721169894948, + "learning_rate": 1.9898156619690458e-05, + "loss": 0.866, + "step": 2418 + }, + { + "epoch": 0.07413877651097217, + "grad_norm": 1.7451612961359015, + "learning_rate": 1.9898015264241973e-05, + "loss": 0.7935, + "step": 2419 + }, + { + "epoch": 0.07416942503371338, + "grad_norm": 1.8685588876891568, + "learning_rate": 1.989787381126581e-05, + "loss": 0.8778, + "step": 2420 + }, + { + "epoch": 0.07420007355645458, + "grad_norm": 1.7371387604927133, + "learning_rate": 1.989773226076336e-05, + "loss": 0.8221, + "step": 2421 + }, + { + "epoch": 0.07423072207919579, + "grad_norm": 1.6385101471859558, + "learning_rate": 1.9897590612736024e-05, + "loss": 0.7119, + "step": 2422 + }, + { + "epoch": 0.074261370601937, + "grad_norm": 1.8124798940683389, + "learning_rate": 1.9897448867185198e-05, + "loss": 0.7817, + "step": 2423 + }, + { + "epoch": 0.07429201912467819, + "grad_norm": 1.6898641558827843, + "learning_rate": 1.9897307024112273e-05, + "loss": 0.8358, + "step": 2424 + }, + { + "epoch": 0.07432266764741939, + "grad_norm": 1.641736045504124, + "learning_rate": 1.989716508351865e-05, + "loss": 0.8192, + "step": 2425 + }, + { + "epoch": 0.0743533161701606, + "grad_norm": 0.9952609392240214, + "learning_rate": 1.989702304540573e-05, + "loss": 0.6621, + "step": 2426 + }, + { + "epoch": 0.0743839646929018, + "grad_norm": 1.97618293559088, + "learning_rate": 1.98968809097749e-05, + "loss": 0.9825, + "step": 2427 + }, + { + "epoch": 0.07441461321564301, + "grad_norm": 1.6794238250604774, + "learning_rate": 1.989673867662758e-05, + "loss": 0.821, + "step": 2428 + }, + { + "epoch": 0.07444526173838421, + "grad_norm": 2.2027236127619494, + "learning_rate": 1.989659634596516e-05, + "loss": 0.8934, + "step": 2429 + }, + { + "epoch": 0.07447591026112542, + "grad_norm": 1.8120686606802425, + "learning_rate": 1.989645391778904e-05, + "loss": 1.079, + "step": 2430 + }, + { + "epoch": 0.07450655878386661, + "grad_norm": 1.7034102051996427, + "learning_rate": 1.9896311392100633e-05, + "loss": 0.8925, + "step": 2431 + }, + { + "epoch": 0.07453720730660782, + "grad_norm": 2.0393223247780354, + "learning_rate": 1.9896168768901334e-05, + "loss": 0.9731, + "step": 2432 + }, + { + "epoch": 0.07456785582934902, + "grad_norm": 1.817560487120759, + "learning_rate": 1.9896026048192555e-05, + "loss": 0.9439, + "step": 2433 + }, + { + "epoch": 0.07459850435209023, + "grad_norm": 1.750485439268033, + "learning_rate": 1.9895883229975697e-05, + "loss": 0.8931, + "step": 2434 + }, + { + "epoch": 0.07462915287483143, + "grad_norm": 2.1146165364642764, + "learning_rate": 1.989574031425217e-05, + "loss": 0.8262, + "step": 2435 + }, + { + "epoch": 0.07465980139757264, + "grad_norm": 1.9601008500029709, + "learning_rate": 1.989559730102338e-05, + "loss": 0.8586, + "step": 2436 + }, + { + "epoch": 0.07469044992031385, + "grad_norm": 0.9828839746796185, + "learning_rate": 1.9895454190290743e-05, + "loss": 0.6488, + "step": 2437 + }, + { + "epoch": 0.07472109844305505, + "grad_norm": 1.6849778342026944, + "learning_rate": 1.989531098205566e-05, + "loss": 0.8543, + "step": 2438 + }, + { + "epoch": 0.07475174696579624, + "grad_norm": 1.927800206752633, + "learning_rate": 1.9895167676319547e-05, + "loss": 0.86, + "step": 2439 + }, + { + "epoch": 0.07478239548853745, + "grad_norm": 1.7398603216047288, + "learning_rate": 1.9895024273083813e-05, + "loss": 0.8244, + "step": 2440 + }, + { + "epoch": 0.07481304401127865, + "grad_norm": 1.6190500187838335, + "learning_rate": 1.9894880772349876e-05, + "loss": 0.7835, + "step": 2441 + }, + { + "epoch": 0.07484369253401986, + "grad_norm": 1.8594822999841554, + "learning_rate": 1.9894737174119148e-05, + "loss": 0.8655, + "step": 2442 + }, + { + "epoch": 0.07487434105676107, + "grad_norm": 1.7613935559920537, + "learning_rate": 1.989459347839304e-05, + "loss": 1.0519, + "step": 2443 + }, + { + "epoch": 0.07490498957950227, + "grad_norm": 1.7702520984960242, + "learning_rate": 1.989444968517297e-05, + "loss": 0.7634, + "step": 2444 + }, + { + "epoch": 0.07493563810224348, + "grad_norm": 1.7145313284266768, + "learning_rate": 1.9894305794460357e-05, + "loss": 0.8388, + "step": 2445 + }, + { + "epoch": 0.07496628662498468, + "grad_norm": 1.8759710812807147, + "learning_rate": 1.989416180625662e-05, + "loss": 0.8595, + "step": 2446 + }, + { + "epoch": 0.07499693514772587, + "grad_norm": 1.9166626529932438, + "learning_rate": 1.9894017720563172e-05, + "loss": 0.7444, + "step": 2447 + }, + { + "epoch": 0.07502758367046708, + "grad_norm": 1.9165465135565087, + "learning_rate": 1.9893873537381438e-05, + "loss": 0.7492, + "step": 2448 + }, + { + "epoch": 0.07505823219320829, + "grad_norm": 2.079859761251981, + "learning_rate": 1.9893729256712835e-05, + "loss": 0.7817, + "step": 2449 + }, + { + "epoch": 0.07508888071594949, + "grad_norm": 1.650366191698141, + "learning_rate": 1.9893584878558787e-05, + "loss": 0.779, + "step": 2450 + }, + { + "epoch": 0.0751195292386907, + "grad_norm": 1.7684182218588866, + "learning_rate": 1.9893440402920716e-05, + "loss": 0.7782, + "step": 2451 + }, + { + "epoch": 0.0751501777614319, + "grad_norm": 0.9382234239676522, + "learning_rate": 1.9893295829800046e-05, + "loss": 0.6277, + "step": 2452 + }, + { + "epoch": 0.07518082628417311, + "grad_norm": 1.911099906016548, + "learning_rate": 1.9893151159198196e-05, + "loss": 0.9527, + "step": 2453 + }, + { + "epoch": 0.07521147480691431, + "grad_norm": 0.8205493563944722, + "learning_rate": 1.9893006391116603e-05, + "loss": 0.628, + "step": 2454 + }, + { + "epoch": 0.0752421233296555, + "grad_norm": 1.786102863528785, + "learning_rate": 1.989286152555668e-05, + "loss": 0.9306, + "step": 2455 + }, + { + "epoch": 0.07527277185239671, + "grad_norm": 1.7430720665271062, + "learning_rate": 1.9892716562519866e-05, + "loss": 0.8418, + "step": 2456 + }, + { + "epoch": 0.07530342037513792, + "grad_norm": 1.6346707614132103, + "learning_rate": 1.9892571502007585e-05, + "loss": 0.851, + "step": 2457 + }, + { + "epoch": 0.07533406889787912, + "grad_norm": 2.0038116767524903, + "learning_rate": 1.9892426344021267e-05, + "loss": 0.8914, + "step": 2458 + }, + { + "epoch": 0.07536471742062033, + "grad_norm": 2.0939848187942265, + "learning_rate": 1.9892281088562337e-05, + "loss": 0.855, + "step": 2459 + }, + { + "epoch": 0.07539536594336153, + "grad_norm": 1.1527292945469805, + "learning_rate": 1.9892135735632232e-05, + "loss": 0.6789, + "step": 2460 + }, + { + "epoch": 0.07542601446610274, + "grad_norm": 1.6820449952136014, + "learning_rate": 1.9891990285232383e-05, + "loss": 0.8577, + "step": 2461 + }, + { + "epoch": 0.07545666298884393, + "grad_norm": 0.8231867213585057, + "learning_rate": 1.9891844737364222e-05, + "loss": 0.6276, + "step": 2462 + }, + { + "epoch": 0.07548731151158514, + "grad_norm": 1.595058474384754, + "learning_rate": 1.9891699092029183e-05, + "loss": 0.7766, + "step": 2463 + }, + { + "epoch": 0.07551796003432634, + "grad_norm": 1.7449481031231255, + "learning_rate": 1.9891553349228705e-05, + "loss": 0.8392, + "step": 2464 + }, + { + "epoch": 0.07554860855706755, + "grad_norm": 0.7765805433054745, + "learning_rate": 1.989140750896422e-05, + "loss": 0.6308, + "step": 2465 + }, + { + "epoch": 0.07557925707980875, + "grad_norm": 1.760092908039798, + "learning_rate": 1.9891261571237167e-05, + "loss": 0.8817, + "step": 2466 + }, + { + "epoch": 0.07560990560254996, + "grad_norm": 1.9121899969119787, + "learning_rate": 1.989111553604898e-05, + "loss": 0.8932, + "step": 2467 + }, + { + "epoch": 0.07564055412529117, + "grad_norm": 0.9265749082455165, + "learning_rate": 1.98909694034011e-05, + "loss": 0.6694, + "step": 2468 + }, + { + "epoch": 0.07567120264803237, + "grad_norm": 1.8045788202308284, + "learning_rate": 1.989082317329497e-05, + "loss": 0.8226, + "step": 2469 + }, + { + "epoch": 0.07570185117077356, + "grad_norm": 1.7581363134518528, + "learning_rate": 1.989067684573203e-05, + "loss": 0.9241, + "step": 2470 + }, + { + "epoch": 0.07573249969351477, + "grad_norm": 1.725719107670488, + "learning_rate": 1.9890530420713717e-05, + "loss": 0.9261, + "step": 2471 + }, + { + "epoch": 0.07576314821625597, + "grad_norm": 1.6469539895282386, + "learning_rate": 1.989038389824148e-05, + "loss": 0.8353, + "step": 2472 + }, + { + "epoch": 0.07579379673899718, + "grad_norm": 1.8012517982692555, + "learning_rate": 1.989023727831676e-05, + "loss": 0.8103, + "step": 2473 + }, + { + "epoch": 0.07582444526173839, + "grad_norm": 1.8917613143785572, + "learning_rate": 1.9890090560941e-05, + "loss": 0.9262, + "step": 2474 + }, + { + "epoch": 0.07585509378447959, + "grad_norm": 1.7982079043632346, + "learning_rate": 1.9889943746115645e-05, + "loss": 0.8935, + "step": 2475 + }, + { + "epoch": 0.0758857423072208, + "grad_norm": 2.0238039265614147, + "learning_rate": 1.9889796833842147e-05, + "loss": 0.8566, + "step": 2476 + }, + { + "epoch": 0.075916390829962, + "grad_norm": 1.6823013116348133, + "learning_rate": 1.988964982412195e-05, + "loss": 0.8526, + "step": 2477 + }, + { + "epoch": 0.0759470393527032, + "grad_norm": 1.7192689685533413, + "learning_rate": 1.9889502716956505e-05, + "loss": 0.8422, + "step": 2478 + }, + { + "epoch": 0.0759776878754444, + "grad_norm": 1.8386753092131334, + "learning_rate": 1.9889355512347258e-05, + "loss": 0.8212, + "step": 2479 + }, + { + "epoch": 0.0760083363981856, + "grad_norm": 1.7968598190806684, + "learning_rate": 1.9889208210295656e-05, + "loss": 0.8662, + "step": 2480 + }, + { + "epoch": 0.07603898492092681, + "grad_norm": 1.8183243373998734, + "learning_rate": 1.988906081080316e-05, + "loss": 0.7715, + "step": 2481 + }, + { + "epoch": 0.07606963344366802, + "grad_norm": 0.9069628422993984, + "learning_rate": 1.9888913313871217e-05, + "loss": 0.6384, + "step": 2482 + }, + { + "epoch": 0.07610028196640922, + "grad_norm": 0.8434579430095562, + "learning_rate": 1.988876571950128e-05, + "loss": 0.6433, + "step": 2483 + }, + { + "epoch": 0.07613093048915043, + "grad_norm": 1.752417339302922, + "learning_rate": 1.9888618027694807e-05, + "loss": 0.8998, + "step": 2484 + }, + { + "epoch": 0.07616157901189163, + "grad_norm": 1.9094785058358028, + "learning_rate": 1.9888470238453248e-05, + "loss": 0.866, + "step": 2485 + }, + { + "epoch": 0.07619222753463283, + "grad_norm": 2.267180801346473, + "learning_rate": 1.9888322351778063e-05, + "loss": 0.8438, + "step": 2486 + }, + { + "epoch": 0.07622287605737403, + "grad_norm": 1.6857382214952574, + "learning_rate": 1.9888174367670706e-05, + "loss": 0.8236, + "step": 2487 + }, + { + "epoch": 0.07625352458011524, + "grad_norm": 1.8904347366404635, + "learning_rate": 1.9888026286132637e-05, + "loss": 0.8948, + "step": 2488 + }, + { + "epoch": 0.07628417310285644, + "grad_norm": 1.1727576892660037, + "learning_rate": 1.9887878107165317e-05, + "loss": 0.6806, + "step": 2489 + }, + { + "epoch": 0.07631482162559765, + "grad_norm": 1.0491521262925083, + "learning_rate": 1.9887729830770205e-05, + "loss": 0.6716, + "step": 2490 + }, + { + "epoch": 0.07634547014833885, + "grad_norm": 1.802367213502348, + "learning_rate": 1.9887581456948756e-05, + "loss": 0.8735, + "step": 2491 + }, + { + "epoch": 0.07637611867108006, + "grad_norm": 0.8700432168683809, + "learning_rate": 1.9887432985702442e-05, + "loss": 0.675, + "step": 2492 + }, + { + "epoch": 0.07640676719382125, + "grad_norm": 1.8164135934207393, + "learning_rate": 1.988728441703272e-05, + "loss": 0.8065, + "step": 2493 + }, + { + "epoch": 0.07643741571656246, + "grad_norm": 1.7569106075524283, + "learning_rate": 1.988713575094105e-05, + "loss": 0.8894, + "step": 2494 + }, + { + "epoch": 0.07646806423930366, + "grad_norm": 1.8375802825551169, + "learning_rate": 1.9886986987428905e-05, + "loss": 0.9198, + "step": 2495 + }, + { + "epoch": 0.07649871276204487, + "grad_norm": 1.275440977057264, + "learning_rate": 1.988683812649775e-05, + "loss": 0.6289, + "step": 2496 + }, + { + "epoch": 0.07652936128478607, + "grad_norm": 1.855997482155228, + "learning_rate": 1.988668916814905e-05, + "loss": 0.9198, + "step": 2497 + }, + { + "epoch": 0.07656000980752728, + "grad_norm": 1.5821983531460933, + "learning_rate": 1.9886540112384267e-05, + "loss": 0.8143, + "step": 2498 + }, + { + "epoch": 0.07659065833026849, + "grad_norm": 1.6590923500448638, + "learning_rate": 1.988639095920488e-05, + "loss": 0.7579, + "step": 2499 + }, + { + "epoch": 0.07662130685300969, + "grad_norm": 1.9401584055305559, + "learning_rate": 1.988624170861235e-05, + "loss": 0.7092, + "step": 2500 + }, + { + "epoch": 0.07665195537575088, + "grad_norm": 1.8152631185927726, + "learning_rate": 1.988609236060815e-05, + "loss": 0.8484, + "step": 2501 + }, + { + "epoch": 0.07668260389849209, + "grad_norm": 0.7968256615740729, + "learning_rate": 1.9885942915193753e-05, + "loss": 0.6464, + "step": 2502 + }, + { + "epoch": 0.0767132524212333, + "grad_norm": 1.7609574303309086, + "learning_rate": 1.9885793372370635e-05, + "loss": 0.7983, + "step": 2503 + }, + { + "epoch": 0.0767439009439745, + "grad_norm": 1.848063242760385, + "learning_rate": 1.9885643732140262e-05, + "loss": 0.8449, + "step": 2504 + }, + { + "epoch": 0.0767745494667157, + "grad_norm": 1.8514235801093188, + "learning_rate": 1.9885493994504113e-05, + "loss": 0.8316, + "step": 2505 + }, + { + "epoch": 0.07680519798945691, + "grad_norm": 0.9103791277737336, + "learning_rate": 1.988534415946366e-05, + "loss": 0.6461, + "step": 2506 + }, + { + "epoch": 0.07683584651219812, + "grad_norm": 1.8724916740531676, + "learning_rate": 1.9885194227020386e-05, + "loss": 0.7807, + "step": 2507 + }, + { + "epoch": 0.07686649503493932, + "grad_norm": 2.2754909033824506, + "learning_rate": 1.988504419717576e-05, + "loss": 0.8655, + "step": 2508 + }, + { + "epoch": 0.07689714355768051, + "grad_norm": 1.816318200650202, + "learning_rate": 1.9884894069931267e-05, + "loss": 0.8602, + "step": 2509 + }, + { + "epoch": 0.07692779208042172, + "grad_norm": 1.8510188344982805, + "learning_rate": 1.9884743845288382e-05, + "loss": 0.7807, + "step": 2510 + }, + { + "epoch": 0.07695844060316293, + "grad_norm": 1.7815914988934878, + "learning_rate": 1.9884593523248586e-05, + "loss": 0.9234, + "step": 2511 + }, + { + "epoch": 0.07698908912590413, + "grad_norm": 1.6274378974458845, + "learning_rate": 1.988444310381336e-05, + "loss": 0.6643, + "step": 2512 + }, + { + "epoch": 0.07701973764864534, + "grad_norm": 1.852189672649901, + "learning_rate": 1.9884292586984193e-05, + "loss": 0.8182, + "step": 2513 + }, + { + "epoch": 0.07705038617138654, + "grad_norm": 1.916875232622127, + "learning_rate": 1.9884141972762558e-05, + "loss": 0.794, + "step": 2514 + }, + { + "epoch": 0.07708103469412775, + "grad_norm": 1.6957143955289584, + "learning_rate": 1.9883991261149944e-05, + "loss": 0.7544, + "step": 2515 + }, + { + "epoch": 0.07711168321686895, + "grad_norm": 1.7272466232437595, + "learning_rate": 1.9883840452147834e-05, + "loss": 0.8716, + "step": 2516 + }, + { + "epoch": 0.07714233173961015, + "grad_norm": 1.615695847127961, + "learning_rate": 1.9883689545757715e-05, + "loss": 0.8179, + "step": 2517 + }, + { + "epoch": 0.07717298026235135, + "grad_norm": 1.532647187590265, + "learning_rate": 1.9883538541981076e-05, + "loss": 0.7935, + "step": 2518 + }, + { + "epoch": 0.07720362878509256, + "grad_norm": 1.547039292740707, + "learning_rate": 1.9883387440819403e-05, + "loss": 0.8474, + "step": 2519 + }, + { + "epoch": 0.07723427730783376, + "grad_norm": 1.7471405531360467, + "learning_rate": 1.9883236242274182e-05, + "loss": 0.9114, + "step": 2520 + }, + { + "epoch": 0.07726492583057497, + "grad_norm": 1.6498248147954255, + "learning_rate": 1.988308494634691e-05, + "loss": 0.8018, + "step": 2521 + }, + { + "epoch": 0.07729557435331617, + "grad_norm": 1.8660682372945863, + "learning_rate": 1.988293355303907e-05, + "loss": 0.8605, + "step": 2522 + }, + { + "epoch": 0.07732622287605738, + "grad_norm": 1.9112259143950863, + "learning_rate": 1.9882782062352155e-05, + "loss": 0.9145, + "step": 2523 + }, + { + "epoch": 0.07735687139879857, + "grad_norm": 1.8487606590948982, + "learning_rate": 1.9882630474287663e-05, + "loss": 0.8483, + "step": 2524 + }, + { + "epoch": 0.07738751992153978, + "grad_norm": 0.9230691391274533, + "learning_rate": 1.9882478788847088e-05, + "loss": 0.6393, + "step": 2525 + }, + { + "epoch": 0.07741816844428098, + "grad_norm": 2.6324107529353427, + "learning_rate": 1.9882327006031913e-05, + "loss": 0.8212, + "step": 2526 + }, + { + "epoch": 0.07744881696702219, + "grad_norm": 1.94392694525661, + "learning_rate": 1.9882175125843647e-05, + "loss": 0.8447, + "step": 2527 + }, + { + "epoch": 0.0774794654897634, + "grad_norm": 1.6424824744754905, + "learning_rate": 1.9882023148283776e-05, + "loss": 0.8886, + "step": 2528 + }, + { + "epoch": 0.0775101140125046, + "grad_norm": 1.6567222300293163, + "learning_rate": 1.9881871073353806e-05, + "loss": 0.6733, + "step": 2529 + }, + { + "epoch": 0.0775407625352458, + "grad_norm": 1.7919981608638407, + "learning_rate": 1.988171890105523e-05, + "loss": 0.8356, + "step": 2530 + }, + { + "epoch": 0.07757141105798701, + "grad_norm": 0.8462439058588069, + "learning_rate": 1.9881566631389557e-05, + "loss": 0.6399, + "step": 2531 + }, + { + "epoch": 0.0776020595807282, + "grad_norm": 0.8032573840472812, + "learning_rate": 1.988141426435827e-05, + "loss": 0.623, + "step": 2532 + }, + { + "epoch": 0.07763270810346941, + "grad_norm": 1.956654029917886, + "learning_rate": 1.9881261799962885e-05, + "loss": 0.7382, + "step": 2533 + }, + { + "epoch": 0.07766335662621061, + "grad_norm": 1.8943014303449253, + "learning_rate": 1.9881109238204896e-05, + "loss": 0.859, + "step": 2534 + }, + { + "epoch": 0.07769400514895182, + "grad_norm": 1.803982322377293, + "learning_rate": 1.9880956579085812e-05, + "loss": 0.7778, + "step": 2535 + }, + { + "epoch": 0.07772465367169303, + "grad_norm": 1.7714763212059526, + "learning_rate": 1.9880803822607135e-05, + "loss": 0.8246, + "step": 2536 + }, + { + "epoch": 0.07775530219443423, + "grad_norm": 1.9634002991200883, + "learning_rate": 1.988065096877037e-05, + "loss": 0.9553, + "step": 2537 + }, + { + "epoch": 0.07778595071717544, + "grad_norm": 1.778666209614606, + "learning_rate": 1.988049801757702e-05, + "loss": 0.9129, + "step": 2538 + }, + { + "epoch": 0.07781659923991664, + "grad_norm": 1.7138130310053108, + "learning_rate": 1.98803449690286e-05, + "loss": 0.8454, + "step": 2539 + }, + { + "epoch": 0.07784724776265783, + "grad_norm": 1.83020657040761, + "learning_rate": 1.9880191823126606e-05, + "loss": 0.9049, + "step": 2540 + }, + { + "epoch": 0.07787789628539904, + "grad_norm": 1.6900291784634942, + "learning_rate": 1.988003857987256e-05, + "loss": 0.8871, + "step": 2541 + }, + { + "epoch": 0.07790854480814025, + "grad_norm": 1.9089493966655715, + "learning_rate": 1.987988523926796e-05, + "loss": 0.8503, + "step": 2542 + }, + { + "epoch": 0.07793919333088145, + "grad_norm": 1.9100013592199478, + "learning_rate": 1.9879731801314327e-05, + "loss": 0.8293, + "step": 2543 + }, + { + "epoch": 0.07796984185362266, + "grad_norm": 1.8648741043773207, + "learning_rate": 1.9879578266013172e-05, + "loss": 0.8589, + "step": 2544 + }, + { + "epoch": 0.07800049037636386, + "grad_norm": 2.2099335635014996, + "learning_rate": 1.9879424633365997e-05, + "loss": 0.8141, + "step": 2545 + }, + { + "epoch": 0.07803113889910507, + "grad_norm": 1.9100972431599894, + "learning_rate": 1.987927090337433e-05, + "loss": 0.9235, + "step": 2546 + }, + { + "epoch": 0.07806178742184627, + "grad_norm": 1.8083947839412506, + "learning_rate": 1.9879117076039676e-05, + "loss": 0.8743, + "step": 2547 + }, + { + "epoch": 0.07809243594458747, + "grad_norm": 1.8365498958196644, + "learning_rate": 1.9878963151363554e-05, + "loss": 0.8188, + "step": 2548 + }, + { + "epoch": 0.07812308446732867, + "grad_norm": 1.6131221319653193, + "learning_rate": 1.987880912934748e-05, + "loss": 0.8406, + "step": 2549 + }, + { + "epoch": 0.07815373299006988, + "grad_norm": 2.0039252192492136, + "learning_rate": 1.987865500999297e-05, + "loss": 0.785, + "step": 2550 + }, + { + "epoch": 0.07818438151281108, + "grad_norm": 2.0599183643580186, + "learning_rate": 1.987850079330155e-05, + "loss": 0.947, + "step": 2551 + }, + { + "epoch": 0.07821503003555229, + "grad_norm": 1.8438660107964517, + "learning_rate": 1.987834647927473e-05, + "loss": 0.8787, + "step": 2552 + }, + { + "epoch": 0.0782456785582935, + "grad_norm": 1.8838681574943716, + "learning_rate": 1.9878192067914038e-05, + "loss": 0.8601, + "step": 2553 + }, + { + "epoch": 0.0782763270810347, + "grad_norm": 1.6795917181051816, + "learning_rate": 1.9878037559220987e-05, + "loss": 0.8053, + "step": 2554 + }, + { + "epoch": 0.07830697560377589, + "grad_norm": 1.8005334936667428, + "learning_rate": 1.9877882953197108e-05, + "loss": 0.8047, + "step": 2555 + }, + { + "epoch": 0.0783376241265171, + "grad_norm": 1.6446497131542612, + "learning_rate": 1.9877728249843922e-05, + "loss": 0.7714, + "step": 2556 + }, + { + "epoch": 0.0783682726492583, + "grad_norm": 2.048806747409211, + "learning_rate": 1.987757344916295e-05, + "loss": 0.841, + "step": 2557 + }, + { + "epoch": 0.07839892117199951, + "grad_norm": 1.9686253126480866, + "learning_rate": 1.987741855115572e-05, + "loss": 0.832, + "step": 2558 + }, + { + "epoch": 0.07842956969474071, + "grad_norm": 1.8177997959937895, + "learning_rate": 1.987726355582376e-05, + "loss": 0.8001, + "step": 2559 + }, + { + "epoch": 0.07846021821748192, + "grad_norm": 1.8565155144194954, + "learning_rate": 1.987710846316859e-05, + "loss": 0.9352, + "step": 2560 + }, + { + "epoch": 0.07849086674022313, + "grad_norm": 1.82252187361971, + "learning_rate": 1.987695327319175e-05, + "loss": 0.9114, + "step": 2561 + }, + { + "epoch": 0.07852151526296433, + "grad_norm": 1.827745490093865, + "learning_rate": 1.9876797985894757e-05, + "loss": 0.8494, + "step": 2562 + }, + { + "epoch": 0.07855216378570552, + "grad_norm": 1.8404183999654748, + "learning_rate": 1.987664260127915e-05, + "loss": 0.9052, + "step": 2563 + }, + { + "epoch": 0.07858281230844673, + "grad_norm": 1.907020043541447, + "learning_rate": 1.9876487119346454e-05, + "loss": 0.9001, + "step": 2564 + }, + { + "epoch": 0.07861346083118793, + "grad_norm": 1.184526100803246, + "learning_rate": 1.9876331540098202e-05, + "loss": 0.6617, + "step": 2565 + }, + { + "epoch": 0.07864410935392914, + "grad_norm": 1.816407467317629, + "learning_rate": 1.987617586353593e-05, + "loss": 0.829, + "step": 2566 + }, + { + "epoch": 0.07867475787667035, + "grad_norm": 0.7897837594675855, + "learning_rate": 1.987602008966117e-05, + "loss": 0.6492, + "step": 2567 + }, + { + "epoch": 0.07870540639941155, + "grad_norm": 1.7860385413616053, + "learning_rate": 1.9875864218475458e-05, + "loss": 0.8854, + "step": 2568 + }, + { + "epoch": 0.07873605492215276, + "grad_norm": 0.8598864996152233, + "learning_rate": 1.9875708249980326e-05, + "loss": 0.6394, + "step": 2569 + }, + { + "epoch": 0.07876670344489396, + "grad_norm": 1.936841129325341, + "learning_rate": 1.9875552184177318e-05, + "loss": 0.8341, + "step": 2570 + }, + { + "epoch": 0.07879735196763515, + "grad_norm": 1.685684133014567, + "learning_rate": 1.9875396021067964e-05, + "loss": 0.7904, + "step": 2571 + }, + { + "epoch": 0.07882800049037636, + "grad_norm": 1.830535940838247, + "learning_rate": 1.987523976065381e-05, + "loss": 0.9214, + "step": 2572 + }, + { + "epoch": 0.07885864901311757, + "grad_norm": 1.732405498904504, + "learning_rate": 1.9875083402936388e-05, + "loss": 0.9533, + "step": 2573 + }, + { + "epoch": 0.07888929753585877, + "grad_norm": 1.6266335974924477, + "learning_rate": 1.9874926947917247e-05, + "loss": 0.9141, + "step": 2574 + }, + { + "epoch": 0.07891994605859998, + "grad_norm": 0.9179653580042485, + "learning_rate": 1.987477039559792e-05, + "loss": 0.6423, + "step": 2575 + }, + { + "epoch": 0.07895059458134118, + "grad_norm": 1.8314726762222633, + "learning_rate": 1.9874613745979955e-05, + "loss": 0.845, + "step": 2576 + }, + { + "epoch": 0.07898124310408239, + "grad_norm": 1.981436219781489, + "learning_rate": 1.9874456999064896e-05, + "loss": 0.9269, + "step": 2577 + }, + { + "epoch": 0.0790118916268236, + "grad_norm": 1.8551479722287838, + "learning_rate": 1.9874300154854286e-05, + "loss": 0.911, + "step": 2578 + }, + { + "epoch": 0.07904254014956479, + "grad_norm": 1.848089237237402, + "learning_rate": 1.9874143213349667e-05, + "loss": 0.8375, + "step": 2579 + }, + { + "epoch": 0.07907318867230599, + "grad_norm": 1.7806633061236994, + "learning_rate": 1.9873986174552587e-05, + "loss": 0.8427, + "step": 2580 + }, + { + "epoch": 0.0791038371950472, + "grad_norm": 1.6178882426147831, + "learning_rate": 1.98738290384646e-05, + "loss": 0.8361, + "step": 2581 + }, + { + "epoch": 0.0791344857177884, + "grad_norm": 1.629027723125878, + "learning_rate": 1.987367180508725e-05, + "loss": 0.749, + "step": 2582 + }, + { + "epoch": 0.07916513424052961, + "grad_norm": 0.8187539425008467, + "learning_rate": 1.987351447442208e-05, + "loss": 0.6196, + "step": 2583 + }, + { + "epoch": 0.07919578276327081, + "grad_norm": 1.8271416356324441, + "learning_rate": 1.9873357046470648e-05, + "loss": 0.8033, + "step": 2584 + }, + { + "epoch": 0.07922643128601202, + "grad_norm": 1.896775147924495, + "learning_rate": 1.9873199521234503e-05, + "loss": 0.9516, + "step": 2585 + }, + { + "epoch": 0.07925707980875321, + "grad_norm": 2.0604821412848606, + "learning_rate": 1.9873041898715198e-05, + "loss": 0.8506, + "step": 2586 + }, + { + "epoch": 0.07928772833149442, + "grad_norm": 0.7488749890705083, + "learning_rate": 1.9872884178914284e-05, + "loss": 0.6464, + "step": 2587 + }, + { + "epoch": 0.07931837685423562, + "grad_norm": 1.7866042482933213, + "learning_rate": 1.9872726361833313e-05, + "loss": 0.8322, + "step": 2588 + }, + { + "epoch": 0.07934902537697683, + "grad_norm": 1.639050863759968, + "learning_rate": 1.9872568447473848e-05, + "loss": 0.8052, + "step": 2589 + }, + { + "epoch": 0.07937967389971803, + "grad_norm": 1.764209683688381, + "learning_rate": 1.987241043583744e-05, + "loss": 0.7597, + "step": 2590 + }, + { + "epoch": 0.07941032242245924, + "grad_norm": 1.744291662989436, + "learning_rate": 1.9872252326925642e-05, + "loss": 0.9027, + "step": 2591 + }, + { + "epoch": 0.07944097094520045, + "grad_norm": 1.6747723021265803, + "learning_rate": 1.9872094120740016e-05, + "loss": 0.9387, + "step": 2592 + }, + { + "epoch": 0.07947161946794165, + "grad_norm": 1.7938385944606097, + "learning_rate": 1.9871935817282126e-05, + "loss": 0.7769, + "step": 2593 + }, + { + "epoch": 0.07950226799068284, + "grad_norm": 1.8475385286189738, + "learning_rate": 1.9871777416553523e-05, + "loss": 0.8399, + "step": 2594 + }, + { + "epoch": 0.07953291651342405, + "grad_norm": 0.8887958859013824, + "learning_rate": 1.987161891855577e-05, + "loss": 0.652, + "step": 2595 + }, + { + "epoch": 0.07956356503616525, + "grad_norm": 1.568244868698133, + "learning_rate": 1.987146032329043e-05, + "loss": 0.838, + "step": 2596 + }, + { + "epoch": 0.07959421355890646, + "grad_norm": 1.7537666408062433, + "learning_rate": 1.9871301630759073e-05, + "loss": 0.9096, + "step": 2597 + }, + { + "epoch": 0.07962486208164767, + "grad_norm": 1.8099285012043143, + "learning_rate": 1.987114284096325e-05, + "loss": 0.9216, + "step": 2598 + }, + { + "epoch": 0.07965551060438887, + "grad_norm": 1.867475152096149, + "learning_rate": 1.987098395390453e-05, + "loss": 0.8972, + "step": 2599 + }, + { + "epoch": 0.07968615912713008, + "grad_norm": 1.5611289735204974, + "learning_rate": 1.9870824969584478e-05, + "loss": 0.8058, + "step": 2600 + }, + { + "epoch": 0.07971680764987128, + "grad_norm": 1.6931145244548256, + "learning_rate": 1.9870665888004666e-05, + "loss": 0.8216, + "step": 2601 + }, + { + "epoch": 0.07974745617261247, + "grad_norm": 0.8657724322686491, + "learning_rate": 1.9870506709166655e-05, + "loss": 0.6567, + "step": 2602 + }, + { + "epoch": 0.07977810469535368, + "grad_norm": 1.6555242791972649, + "learning_rate": 1.9870347433072015e-05, + "loss": 0.7869, + "step": 2603 + }, + { + "epoch": 0.07980875321809489, + "grad_norm": 1.86098957565478, + "learning_rate": 1.987018805972232e-05, + "loss": 0.9013, + "step": 2604 + }, + { + "epoch": 0.07983940174083609, + "grad_norm": 2.047580086552434, + "learning_rate": 1.9870028589119134e-05, + "loss": 0.9446, + "step": 2605 + }, + { + "epoch": 0.0798700502635773, + "grad_norm": 1.6745962121927485, + "learning_rate": 1.9869869021264033e-05, + "loss": 0.8378, + "step": 2606 + }, + { + "epoch": 0.0799006987863185, + "grad_norm": 1.8299591510694007, + "learning_rate": 1.9869709356158586e-05, + "loss": 0.8688, + "step": 2607 + }, + { + "epoch": 0.07993134730905971, + "grad_norm": 1.8282928109483088, + "learning_rate": 1.9869549593804364e-05, + "loss": 0.8754, + "step": 2608 + }, + { + "epoch": 0.07996199583180091, + "grad_norm": 1.7345632288432369, + "learning_rate": 1.986938973420295e-05, + "loss": 0.7842, + "step": 2609 + }, + { + "epoch": 0.0799926443545421, + "grad_norm": 1.9813440055837932, + "learning_rate": 1.986922977735591e-05, + "loss": 0.8177, + "step": 2610 + }, + { + "epoch": 0.08002329287728331, + "grad_norm": 1.5922356216295561, + "learning_rate": 1.9869069723264826e-05, + "loss": 0.8767, + "step": 2611 + }, + { + "epoch": 0.08005394140002452, + "grad_norm": 1.6208084509406855, + "learning_rate": 1.9868909571931273e-05, + "loss": 0.8721, + "step": 2612 + }, + { + "epoch": 0.08008458992276572, + "grad_norm": 1.8531140206584493, + "learning_rate": 1.986874932335683e-05, + "loss": 0.8475, + "step": 2613 + }, + { + "epoch": 0.08011523844550693, + "grad_norm": 1.6019790961064695, + "learning_rate": 1.9868588977543074e-05, + "loss": 0.8275, + "step": 2614 + }, + { + "epoch": 0.08014588696824813, + "grad_norm": 0.9617895463458178, + "learning_rate": 1.9868428534491584e-05, + "loss": 0.6208, + "step": 2615 + }, + { + "epoch": 0.08017653549098934, + "grad_norm": 0.8557249611472658, + "learning_rate": 1.9868267994203943e-05, + "loss": 0.6635, + "step": 2616 + }, + { + "epoch": 0.08020718401373053, + "grad_norm": 2.0885597356418906, + "learning_rate": 1.9868107356681735e-05, + "loss": 0.9688, + "step": 2617 + }, + { + "epoch": 0.08023783253647174, + "grad_norm": 1.982653900683305, + "learning_rate": 1.9867946621926538e-05, + "loss": 0.8051, + "step": 2618 + }, + { + "epoch": 0.08026848105921294, + "grad_norm": 1.7070409835271345, + "learning_rate": 1.9867785789939937e-05, + "loss": 0.7327, + "step": 2619 + }, + { + "epoch": 0.08029912958195415, + "grad_norm": 1.0968411963080107, + "learning_rate": 1.986762486072352e-05, + "loss": 0.6693, + "step": 2620 + }, + { + "epoch": 0.08032977810469535, + "grad_norm": 1.8647872252929134, + "learning_rate": 1.9867463834278872e-05, + "loss": 0.9063, + "step": 2621 + }, + { + "epoch": 0.08036042662743656, + "grad_norm": 1.8065842672642547, + "learning_rate": 1.9867302710607575e-05, + "loss": 0.8228, + "step": 2622 + }, + { + "epoch": 0.08039107515017777, + "grad_norm": 1.9645922831057394, + "learning_rate": 1.9867141489711218e-05, + "loss": 0.8094, + "step": 2623 + }, + { + "epoch": 0.08042172367291897, + "grad_norm": 1.9584231982962528, + "learning_rate": 1.9866980171591396e-05, + "loss": 0.8654, + "step": 2624 + }, + { + "epoch": 0.08045237219566016, + "grad_norm": 1.8442248972473665, + "learning_rate": 1.9866818756249694e-05, + "loss": 0.9694, + "step": 2625 + }, + { + "epoch": 0.08048302071840137, + "grad_norm": 1.6393560776857226, + "learning_rate": 1.98666572436877e-05, + "loss": 0.8488, + "step": 2626 + }, + { + "epoch": 0.08051366924114257, + "grad_norm": 1.669641679201406, + "learning_rate": 1.986649563390701e-05, + "loss": 0.8957, + "step": 2627 + }, + { + "epoch": 0.08054431776388378, + "grad_norm": 1.7984401806358403, + "learning_rate": 1.9866333926909208e-05, + "loss": 0.9641, + "step": 2628 + }, + { + "epoch": 0.08057496628662499, + "grad_norm": 1.9884410743248946, + "learning_rate": 1.98661721226959e-05, + "loss": 0.9072, + "step": 2629 + }, + { + "epoch": 0.08060561480936619, + "grad_norm": 1.73970420546303, + "learning_rate": 1.986601022126867e-05, + "loss": 0.7679, + "step": 2630 + }, + { + "epoch": 0.0806362633321074, + "grad_norm": 1.7903954555186488, + "learning_rate": 1.986584822262912e-05, + "loss": 0.9827, + "step": 2631 + }, + { + "epoch": 0.0806669118548486, + "grad_norm": 1.8267974324842569, + "learning_rate": 1.9865686126778837e-05, + "loss": 0.9057, + "step": 2632 + }, + { + "epoch": 0.0806975603775898, + "grad_norm": 1.8040050171398303, + "learning_rate": 1.9865523933719432e-05, + "loss": 0.8409, + "step": 2633 + }, + { + "epoch": 0.080728208900331, + "grad_norm": 1.8397046148621305, + "learning_rate": 1.9865361643452493e-05, + "loss": 0.7234, + "step": 2634 + }, + { + "epoch": 0.0807588574230722, + "grad_norm": 1.6241473116089362, + "learning_rate": 1.986519925597962e-05, + "loss": 0.8761, + "step": 2635 + }, + { + "epoch": 0.08078950594581341, + "grad_norm": 1.7871893088635973, + "learning_rate": 1.9865036771302414e-05, + "loss": 0.9836, + "step": 2636 + }, + { + "epoch": 0.08082015446855462, + "grad_norm": 2.044358406586379, + "learning_rate": 1.9864874189422475e-05, + "loss": 0.7888, + "step": 2637 + }, + { + "epoch": 0.08085080299129582, + "grad_norm": 1.664048701191586, + "learning_rate": 1.9864711510341413e-05, + "loss": 0.8398, + "step": 2638 + }, + { + "epoch": 0.08088145151403703, + "grad_norm": 1.8989960139482436, + "learning_rate": 1.986454873406082e-05, + "loss": 0.7587, + "step": 2639 + }, + { + "epoch": 0.08091210003677823, + "grad_norm": 1.892069688968932, + "learning_rate": 1.9864385860582305e-05, + "loss": 0.8447, + "step": 2640 + }, + { + "epoch": 0.08094274855951943, + "grad_norm": 2.1323109476846467, + "learning_rate": 1.986422288990747e-05, + "loss": 0.8215, + "step": 2641 + }, + { + "epoch": 0.08097339708226063, + "grad_norm": 0.8092169079221678, + "learning_rate": 1.986405982203793e-05, + "loss": 0.6207, + "step": 2642 + }, + { + "epoch": 0.08100404560500184, + "grad_norm": 1.6770355888880077, + "learning_rate": 1.9863896656975278e-05, + "loss": 0.7892, + "step": 2643 + }, + { + "epoch": 0.08103469412774304, + "grad_norm": 1.8989930712561995, + "learning_rate": 1.9863733394721132e-05, + "loss": 0.773, + "step": 2644 + }, + { + "epoch": 0.08106534265048425, + "grad_norm": 1.7515675201954826, + "learning_rate": 1.9863570035277095e-05, + "loss": 0.9318, + "step": 2645 + }, + { + "epoch": 0.08109599117322545, + "grad_norm": 1.690861931343803, + "learning_rate": 1.9863406578644778e-05, + "loss": 0.8286, + "step": 2646 + }, + { + "epoch": 0.08112663969596666, + "grad_norm": 1.6418525997526185, + "learning_rate": 1.9863243024825794e-05, + "loss": 0.8101, + "step": 2647 + }, + { + "epoch": 0.08115728821870785, + "grad_norm": 1.5563461273160277, + "learning_rate": 1.9863079373821754e-05, + "loss": 0.7497, + "step": 2648 + }, + { + "epoch": 0.08118793674144906, + "grad_norm": 1.7910733502910936, + "learning_rate": 1.986291562563427e-05, + "loss": 0.822, + "step": 2649 + }, + { + "epoch": 0.08121858526419026, + "grad_norm": 1.8250913099558965, + "learning_rate": 1.986275178026495e-05, + "loss": 0.8384, + "step": 2650 + }, + { + "epoch": 0.08124923378693147, + "grad_norm": 1.7285108895939107, + "learning_rate": 1.986258783771542e-05, + "loss": 0.8701, + "step": 2651 + }, + { + "epoch": 0.08127988230967267, + "grad_norm": 1.797324607705682, + "learning_rate": 1.986242379798728e-05, + "loss": 0.8365, + "step": 2652 + }, + { + "epoch": 0.08131053083241388, + "grad_norm": 0.874281142711668, + "learning_rate": 1.9862259661082164e-05, + "loss": 0.6722, + "step": 2653 + }, + { + "epoch": 0.08134117935515509, + "grad_norm": 1.9217386480641607, + "learning_rate": 1.9862095427001672e-05, + "loss": 0.9015, + "step": 2654 + }, + { + "epoch": 0.08137182787789629, + "grad_norm": 1.763761043363255, + "learning_rate": 1.9861931095747437e-05, + "loss": 0.8615, + "step": 2655 + }, + { + "epoch": 0.08140247640063748, + "grad_norm": 1.963892252416117, + "learning_rate": 1.986176666732107e-05, + "loss": 0.853, + "step": 2656 + }, + { + "epoch": 0.08143312492337869, + "grad_norm": 1.8901145002172324, + "learning_rate": 1.986160214172419e-05, + "loss": 0.8897, + "step": 2657 + }, + { + "epoch": 0.0814637734461199, + "grad_norm": 1.8104136430982192, + "learning_rate": 1.9861437518958425e-05, + "loss": 0.7666, + "step": 2658 + }, + { + "epoch": 0.0814944219688611, + "grad_norm": 1.830542608764918, + "learning_rate": 1.986127279902539e-05, + "loss": 0.8083, + "step": 2659 + }, + { + "epoch": 0.0815250704916023, + "grad_norm": 1.7417061193194632, + "learning_rate": 1.986110798192671e-05, + "loss": 0.8812, + "step": 2660 + }, + { + "epoch": 0.08155571901434351, + "grad_norm": 1.8538470245207181, + "learning_rate": 1.986094306766401e-05, + "loss": 0.8769, + "step": 2661 + }, + { + "epoch": 0.08158636753708472, + "grad_norm": 1.878149081815696, + "learning_rate": 1.9860778056238916e-05, + "loss": 0.9028, + "step": 2662 + }, + { + "epoch": 0.08161701605982592, + "grad_norm": 1.7161415783442657, + "learning_rate": 1.9860612947653055e-05, + "loss": 0.8395, + "step": 2663 + }, + { + "epoch": 0.08164766458256711, + "grad_norm": 1.763412391633573, + "learning_rate": 1.986044774190805e-05, + "loss": 0.7421, + "step": 2664 + }, + { + "epoch": 0.08167831310530832, + "grad_norm": 1.91781792003776, + "learning_rate": 1.986028243900553e-05, + "loss": 0.8438, + "step": 2665 + }, + { + "epoch": 0.08170896162804953, + "grad_norm": 1.8998093934077254, + "learning_rate": 1.9860117038947123e-05, + "loss": 0.8248, + "step": 2666 + }, + { + "epoch": 0.08173961015079073, + "grad_norm": 1.618952840515163, + "learning_rate": 1.9859951541734462e-05, + "loss": 0.9233, + "step": 2667 + }, + { + "epoch": 0.08177025867353194, + "grad_norm": 1.8190666247505733, + "learning_rate": 1.985978594736918e-05, + "loss": 0.8125, + "step": 2668 + }, + { + "epoch": 0.08180090719627314, + "grad_norm": 1.708873803620966, + "learning_rate": 1.98596202558529e-05, + "loss": 0.8743, + "step": 2669 + }, + { + "epoch": 0.08183155571901435, + "grad_norm": 1.8447739940596166, + "learning_rate": 1.9859454467187256e-05, + "loss": 0.8706, + "step": 2670 + }, + { + "epoch": 0.08186220424175555, + "grad_norm": 1.9251862527108476, + "learning_rate": 1.985928858137389e-05, + "loss": 0.9312, + "step": 2671 + }, + { + "epoch": 0.08189285276449675, + "grad_norm": 1.9232467161785745, + "learning_rate": 1.9859122598414426e-05, + "loss": 0.9142, + "step": 2672 + }, + { + "epoch": 0.08192350128723795, + "grad_norm": 1.000351968336466, + "learning_rate": 1.9858956518310506e-05, + "loss": 0.6433, + "step": 2673 + }, + { + "epoch": 0.08195414980997916, + "grad_norm": 1.6156680927302984, + "learning_rate": 1.9858790341063765e-05, + "loss": 0.6595, + "step": 2674 + }, + { + "epoch": 0.08198479833272036, + "grad_norm": 1.716382846730798, + "learning_rate": 1.9858624066675844e-05, + "loss": 0.8199, + "step": 2675 + }, + { + "epoch": 0.08201544685546157, + "grad_norm": 2.0298099287501135, + "learning_rate": 1.985845769514837e-05, + "loss": 0.8068, + "step": 2676 + }, + { + "epoch": 0.08204609537820277, + "grad_norm": 1.877515392966515, + "learning_rate": 1.9858291226482995e-05, + "loss": 0.8093, + "step": 2677 + }, + { + "epoch": 0.08207674390094398, + "grad_norm": 1.8168923496758669, + "learning_rate": 1.9858124660681356e-05, + "loss": 0.8245, + "step": 2678 + }, + { + "epoch": 0.08210739242368517, + "grad_norm": 1.7289957424047846, + "learning_rate": 1.9857957997745087e-05, + "loss": 0.9016, + "step": 2679 + }, + { + "epoch": 0.08213804094642638, + "grad_norm": 2.011752914548485, + "learning_rate": 1.985779123767584e-05, + "loss": 0.8597, + "step": 2680 + }, + { + "epoch": 0.08216868946916758, + "grad_norm": 0.8502447590567411, + "learning_rate": 1.9857624380475253e-05, + "loss": 0.6258, + "step": 2681 + }, + { + "epoch": 0.08219933799190879, + "grad_norm": 1.757377621903472, + "learning_rate": 1.985745742614497e-05, + "loss": 0.8041, + "step": 2682 + }, + { + "epoch": 0.08222998651465, + "grad_norm": 1.9961110946008238, + "learning_rate": 1.9857290374686637e-05, + "loss": 0.8001, + "step": 2683 + }, + { + "epoch": 0.0822606350373912, + "grad_norm": 1.7092657395085256, + "learning_rate": 1.98571232261019e-05, + "loss": 0.8746, + "step": 2684 + }, + { + "epoch": 0.0822912835601324, + "grad_norm": 2.0177372430982996, + "learning_rate": 1.9856955980392403e-05, + "loss": 0.7561, + "step": 2685 + }, + { + "epoch": 0.08232193208287361, + "grad_norm": 1.704332355052846, + "learning_rate": 1.9856788637559802e-05, + "loss": 0.916, + "step": 2686 + }, + { + "epoch": 0.0823525806056148, + "grad_norm": 0.8004262896989683, + "learning_rate": 1.9856621197605736e-05, + "loss": 0.6256, + "step": 2687 + }, + { + "epoch": 0.08238322912835601, + "grad_norm": 1.6894955102948648, + "learning_rate": 1.985645366053186e-05, + "loss": 0.7482, + "step": 2688 + }, + { + "epoch": 0.08241387765109721, + "grad_norm": 1.7496428632867709, + "learning_rate": 1.9856286026339824e-05, + "loss": 0.8428, + "step": 2689 + }, + { + "epoch": 0.08244452617383842, + "grad_norm": 1.8519064712618607, + "learning_rate": 1.9856118295031286e-05, + "loss": 0.859, + "step": 2690 + }, + { + "epoch": 0.08247517469657963, + "grad_norm": 1.5825947142542978, + "learning_rate": 1.9855950466607887e-05, + "loss": 0.7279, + "step": 2691 + }, + { + "epoch": 0.08250582321932083, + "grad_norm": 1.785609093817189, + "learning_rate": 1.9855782541071285e-05, + "loss": 0.7868, + "step": 2692 + }, + { + "epoch": 0.08253647174206204, + "grad_norm": 1.6254709713519462, + "learning_rate": 1.985561451842314e-05, + "loss": 0.7691, + "step": 2693 + }, + { + "epoch": 0.08256712026480324, + "grad_norm": 1.688770154415389, + "learning_rate": 1.9855446398665102e-05, + "loss": 1.028, + "step": 2694 + }, + { + "epoch": 0.08259776878754443, + "grad_norm": 1.830208362549769, + "learning_rate": 1.985527818179883e-05, + "loss": 0.7338, + "step": 2695 + }, + { + "epoch": 0.08262841731028564, + "grad_norm": 1.625538157992708, + "learning_rate": 1.9855109867825977e-05, + "loss": 0.8693, + "step": 2696 + }, + { + "epoch": 0.08265906583302685, + "grad_norm": 2.026653463202492, + "learning_rate": 1.985494145674821e-05, + "loss": 0.8883, + "step": 2697 + }, + { + "epoch": 0.08268971435576805, + "grad_norm": 1.5788399240971174, + "learning_rate": 1.9854772948567178e-05, + "loss": 0.6661, + "step": 2698 + }, + { + "epoch": 0.08272036287850926, + "grad_norm": 1.798260485352521, + "learning_rate": 1.985460434328455e-05, + "loss": 0.9064, + "step": 2699 + }, + { + "epoch": 0.08275101140125046, + "grad_norm": 1.86372127190441, + "learning_rate": 1.9854435640901984e-05, + "loss": 0.7918, + "step": 2700 + }, + { + "epoch": 0.08278165992399167, + "grad_norm": 1.766926109394524, + "learning_rate": 1.985426684142114e-05, + "loss": 0.7909, + "step": 2701 + }, + { + "epoch": 0.08281230844673287, + "grad_norm": 1.9962967914755918, + "learning_rate": 1.9854097944843686e-05, + "loss": 0.7665, + "step": 2702 + }, + { + "epoch": 0.08284295696947407, + "grad_norm": 1.7400065955492088, + "learning_rate": 1.985392895117128e-05, + "loss": 0.816, + "step": 2703 + }, + { + "epoch": 0.08287360549221527, + "grad_norm": 1.9535827949539089, + "learning_rate": 1.9853759860405596e-05, + "loss": 0.8699, + "step": 2704 + }, + { + "epoch": 0.08290425401495648, + "grad_norm": 1.6094588981795286, + "learning_rate": 1.9853590672548292e-05, + "loss": 0.7986, + "step": 2705 + }, + { + "epoch": 0.08293490253769768, + "grad_norm": 0.9012006286116963, + "learning_rate": 1.9853421387601036e-05, + "loss": 0.6448, + "step": 2706 + }, + { + "epoch": 0.08296555106043889, + "grad_norm": 1.7803889964634967, + "learning_rate": 1.98532520055655e-05, + "loss": 0.8121, + "step": 2707 + }, + { + "epoch": 0.0829961995831801, + "grad_norm": 1.8026368235042811, + "learning_rate": 1.9853082526443352e-05, + "loss": 0.9176, + "step": 2708 + }, + { + "epoch": 0.0830268481059213, + "grad_norm": 1.738447047434475, + "learning_rate": 1.9852912950236262e-05, + "loss": 0.8332, + "step": 2709 + }, + { + "epoch": 0.08305749662866249, + "grad_norm": 0.800213826223912, + "learning_rate": 1.9852743276945897e-05, + "loss": 0.6585, + "step": 2710 + }, + { + "epoch": 0.0830881451514037, + "grad_norm": 1.726717046506566, + "learning_rate": 1.9852573506573932e-05, + "loss": 0.9263, + "step": 2711 + }, + { + "epoch": 0.0831187936741449, + "grad_norm": 1.8631283478806617, + "learning_rate": 1.985240363912204e-05, + "loss": 1.0348, + "step": 2712 + }, + { + "epoch": 0.08314944219688611, + "grad_norm": 0.7701244549333659, + "learning_rate": 1.9852233674591896e-05, + "loss": 0.6529, + "step": 2713 + }, + { + "epoch": 0.08318009071962731, + "grad_norm": 1.7211077911061514, + "learning_rate": 1.985206361298517e-05, + "loss": 0.8249, + "step": 2714 + }, + { + "epoch": 0.08321073924236852, + "grad_norm": 1.6963962691009593, + "learning_rate": 1.985189345430354e-05, + "loss": 0.8838, + "step": 2715 + }, + { + "epoch": 0.08324138776510973, + "grad_norm": 1.6575559089173195, + "learning_rate": 1.9851723198548685e-05, + "loss": 0.8261, + "step": 2716 + }, + { + "epoch": 0.08327203628785093, + "grad_norm": 1.8109802461105282, + "learning_rate": 1.985155284572228e-05, + "loss": 0.7912, + "step": 2717 + }, + { + "epoch": 0.08330268481059212, + "grad_norm": 1.7268690567017098, + "learning_rate": 1.9851382395826003e-05, + "loss": 0.9018, + "step": 2718 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.9713874460721976, + "learning_rate": 1.9851211848861536e-05, + "loss": 0.9112, + "step": 2719 + }, + { + "epoch": 0.08336398185607453, + "grad_norm": 1.8602271148151563, + "learning_rate": 1.9851041204830557e-05, + "loss": 0.9882, + "step": 2720 + }, + { + "epoch": 0.08339463037881574, + "grad_norm": 1.8218286683894975, + "learning_rate": 1.9850870463734748e-05, + "loss": 0.8696, + "step": 2721 + }, + { + "epoch": 0.08342527890155695, + "grad_norm": 0.8630288001049967, + "learning_rate": 1.9850699625575794e-05, + "loss": 0.6629, + "step": 2722 + }, + { + "epoch": 0.08345592742429815, + "grad_norm": 1.6786245881800461, + "learning_rate": 1.9850528690355375e-05, + "loss": 0.7892, + "step": 2723 + }, + { + "epoch": 0.08348657594703936, + "grad_norm": 1.7616196279166223, + "learning_rate": 1.9850357658075175e-05, + "loss": 0.8652, + "step": 2724 + }, + { + "epoch": 0.08351722446978056, + "grad_norm": 1.9647928379594939, + "learning_rate": 1.9850186528736882e-05, + "loss": 0.9886, + "step": 2725 + }, + { + "epoch": 0.08354787299252175, + "grad_norm": 1.7523548684296788, + "learning_rate": 1.9850015302342182e-05, + "loss": 0.8185, + "step": 2726 + }, + { + "epoch": 0.08357852151526296, + "grad_norm": 1.8595129587299033, + "learning_rate": 1.9849843978892764e-05, + "loss": 0.9279, + "step": 2727 + }, + { + "epoch": 0.08360917003800417, + "grad_norm": 2.0233582360115983, + "learning_rate": 1.9849672558390306e-05, + "loss": 0.8489, + "step": 2728 + }, + { + "epoch": 0.08363981856074537, + "grad_norm": 1.8943458565900233, + "learning_rate": 1.984950104083651e-05, + "loss": 0.7782, + "step": 2729 + }, + { + "epoch": 0.08367046708348658, + "grad_norm": 1.783353498177745, + "learning_rate": 1.9849329426233057e-05, + "loss": 0.8563, + "step": 2730 + }, + { + "epoch": 0.08370111560622778, + "grad_norm": 1.7964287237459908, + "learning_rate": 1.984915771458164e-05, + "loss": 0.8818, + "step": 2731 + }, + { + "epoch": 0.08373176412896899, + "grad_norm": 1.6803842804755, + "learning_rate": 1.9848985905883954e-05, + "loss": 0.8349, + "step": 2732 + }, + { + "epoch": 0.0837624126517102, + "grad_norm": 1.8635316616405613, + "learning_rate": 1.984881400014169e-05, + "loss": 0.846, + "step": 2733 + }, + { + "epoch": 0.08379306117445139, + "grad_norm": 1.8651839547227793, + "learning_rate": 1.9848641997356542e-05, + "loss": 0.8664, + "step": 2734 + }, + { + "epoch": 0.08382370969719259, + "grad_norm": 1.7655922977186869, + "learning_rate": 1.9848469897530202e-05, + "loss": 0.9002, + "step": 2735 + }, + { + "epoch": 0.0838543582199338, + "grad_norm": 1.7976662977555846, + "learning_rate": 1.984829770066437e-05, + "loss": 0.8159, + "step": 2736 + }, + { + "epoch": 0.083885006742675, + "grad_norm": 1.8252912230806346, + "learning_rate": 1.984812540676074e-05, + "loss": 0.7702, + "step": 2737 + }, + { + "epoch": 0.08391565526541621, + "grad_norm": 1.7568508690397318, + "learning_rate": 1.9847953015821012e-05, + "loss": 0.8776, + "step": 2738 + }, + { + "epoch": 0.08394630378815741, + "grad_norm": 1.7749353993596444, + "learning_rate": 1.9847780527846886e-05, + "loss": 0.9706, + "step": 2739 + }, + { + "epoch": 0.08397695231089862, + "grad_norm": 2.145313916362374, + "learning_rate": 1.9847607942840053e-05, + "loss": 0.9166, + "step": 2740 + }, + { + "epoch": 0.08400760083363981, + "grad_norm": 1.645578746419574, + "learning_rate": 1.9847435260802225e-05, + "loss": 0.8833, + "step": 2741 + }, + { + "epoch": 0.08403824935638102, + "grad_norm": 1.504286563303107, + "learning_rate": 1.9847262481735093e-05, + "loss": 0.7862, + "step": 2742 + }, + { + "epoch": 0.08406889787912222, + "grad_norm": 1.6092643504484447, + "learning_rate": 1.9847089605640367e-05, + "loss": 0.7632, + "step": 2743 + }, + { + "epoch": 0.08409954640186343, + "grad_norm": 1.7984922727201307, + "learning_rate": 1.9846916632519744e-05, + "loss": 0.8693, + "step": 2744 + }, + { + "epoch": 0.08413019492460463, + "grad_norm": 1.8895699850124386, + "learning_rate": 1.9846743562374937e-05, + "loss": 0.8305, + "step": 2745 + }, + { + "epoch": 0.08416084344734584, + "grad_norm": 1.7555047656599876, + "learning_rate": 1.9846570395207645e-05, + "loss": 0.8357, + "step": 2746 + }, + { + "epoch": 0.08419149197008705, + "grad_norm": 1.6619379496602495, + "learning_rate": 1.9846397131019573e-05, + "loss": 0.8786, + "step": 2747 + }, + { + "epoch": 0.08422214049282825, + "grad_norm": 1.834969810607623, + "learning_rate": 1.9846223769812432e-05, + "loss": 0.8169, + "step": 2748 + }, + { + "epoch": 0.08425278901556944, + "grad_norm": 1.7465483127953023, + "learning_rate": 1.984605031158793e-05, + "loss": 0.8674, + "step": 2749 + }, + { + "epoch": 0.08428343753831065, + "grad_norm": 1.641302304190422, + "learning_rate": 1.9845876756347774e-05, + "loss": 0.8181, + "step": 2750 + }, + { + "epoch": 0.08431408606105185, + "grad_norm": 1.8565056293726718, + "learning_rate": 1.9845703104093675e-05, + "loss": 0.9316, + "step": 2751 + }, + { + "epoch": 0.08434473458379306, + "grad_norm": 1.7934470298519087, + "learning_rate": 1.9845529354827344e-05, + "loss": 0.8179, + "step": 2752 + }, + { + "epoch": 0.08437538310653427, + "grad_norm": 1.8721544783906006, + "learning_rate": 1.9845355508550496e-05, + "loss": 0.8202, + "step": 2753 + }, + { + "epoch": 0.08440603162927547, + "grad_norm": 1.956215542576693, + "learning_rate": 1.9845181565264838e-05, + "loss": 0.9554, + "step": 2754 + }, + { + "epoch": 0.08443668015201668, + "grad_norm": 1.6031302411115635, + "learning_rate": 1.9845007524972088e-05, + "loss": 0.9106, + "step": 2755 + }, + { + "epoch": 0.08446732867475788, + "grad_norm": 1.880851223896434, + "learning_rate": 1.984483338767396e-05, + "loss": 0.8873, + "step": 2756 + }, + { + "epoch": 0.08449797719749907, + "grad_norm": 1.7687920580627148, + "learning_rate": 1.984465915337217e-05, + "loss": 0.8432, + "step": 2757 + }, + { + "epoch": 0.08452862572024028, + "grad_norm": 1.7818154395653425, + "learning_rate": 1.9844484822068432e-05, + "loss": 0.8027, + "step": 2758 + }, + { + "epoch": 0.08455927424298149, + "grad_norm": 1.6681682896896892, + "learning_rate": 1.9844310393764468e-05, + "loss": 0.7978, + "step": 2759 + }, + { + "epoch": 0.08458992276572269, + "grad_norm": 0.9443645596861574, + "learning_rate": 1.9844135868461998e-05, + "loss": 0.6681, + "step": 2760 + }, + { + "epoch": 0.0846205712884639, + "grad_norm": 1.7453257687072117, + "learning_rate": 1.984396124616273e-05, + "loss": 0.7614, + "step": 2761 + }, + { + "epoch": 0.0846512198112051, + "grad_norm": 1.6268684495505277, + "learning_rate": 1.98437865268684e-05, + "loss": 0.773, + "step": 2762 + }, + { + "epoch": 0.08468186833394631, + "grad_norm": 1.6224074061811493, + "learning_rate": 1.9843611710580724e-05, + "loss": 0.7425, + "step": 2763 + }, + { + "epoch": 0.08471251685668751, + "grad_norm": 1.5822583241956762, + "learning_rate": 1.984343679730142e-05, + "loss": 0.7682, + "step": 2764 + }, + { + "epoch": 0.0847431653794287, + "grad_norm": 1.6606303611120186, + "learning_rate": 1.9843261787032217e-05, + "loss": 0.8374, + "step": 2765 + }, + { + "epoch": 0.08477381390216991, + "grad_norm": 1.6981045804533055, + "learning_rate": 1.9843086679774838e-05, + "loss": 0.8266, + "step": 2766 + }, + { + "epoch": 0.08480446242491112, + "grad_norm": 0.9526603021471701, + "learning_rate": 1.9842911475531005e-05, + "loss": 0.6747, + "step": 2767 + }, + { + "epoch": 0.08483511094765232, + "grad_norm": 1.849941273432567, + "learning_rate": 1.9842736174302444e-05, + "loss": 0.764, + "step": 2768 + }, + { + "epoch": 0.08486575947039353, + "grad_norm": 1.7866557545642507, + "learning_rate": 1.9842560776090888e-05, + "loss": 0.9559, + "step": 2769 + }, + { + "epoch": 0.08489640799313473, + "grad_norm": 1.7127165343647197, + "learning_rate": 1.984238528089806e-05, + "loss": 0.8125, + "step": 2770 + }, + { + "epoch": 0.08492705651587594, + "grad_norm": 1.9663755319705878, + "learning_rate": 1.9842209688725697e-05, + "loss": 0.8499, + "step": 2771 + }, + { + "epoch": 0.08495770503861713, + "grad_norm": 1.8724479863593964, + "learning_rate": 1.9842033999575522e-05, + "loss": 0.8532, + "step": 2772 + }, + { + "epoch": 0.08498835356135834, + "grad_norm": 1.7750729542036539, + "learning_rate": 1.9841858213449266e-05, + "loss": 0.7601, + "step": 2773 + }, + { + "epoch": 0.08501900208409954, + "grad_norm": 2.016474785785976, + "learning_rate": 1.984168233034866e-05, + "loss": 0.9619, + "step": 2774 + }, + { + "epoch": 0.08504965060684075, + "grad_norm": 0.866355090541299, + "learning_rate": 1.9841506350275445e-05, + "loss": 0.6609, + "step": 2775 + }, + { + "epoch": 0.08508029912958195, + "grad_norm": 1.7667827925691701, + "learning_rate": 1.984133027323135e-05, + "loss": 0.8548, + "step": 2776 + }, + { + "epoch": 0.08511094765232316, + "grad_norm": 1.7008406790631116, + "learning_rate": 1.984115409921811e-05, + "loss": 0.7809, + "step": 2777 + }, + { + "epoch": 0.08514159617506437, + "grad_norm": 0.7638175434702001, + "learning_rate": 1.9840977828237455e-05, + "loss": 0.6507, + "step": 2778 + }, + { + "epoch": 0.08517224469780557, + "grad_norm": 1.9193596967579052, + "learning_rate": 1.984080146029113e-05, + "loss": 0.8496, + "step": 2779 + }, + { + "epoch": 0.08520289322054676, + "grad_norm": 0.8124711022650898, + "learning_rate": 1.984062499538087e-05, + "loss": 0.6673, + "step": 2780 + }, + { + "epoch": 0.08523354174328797, + "grad_norm": 1.9233067833032185, + "learning_rate": 1.984044843350842e-05, + "loss": 0.8737, + "step": 2781 + }, + { + "epoch": 0.08526419026602917, + "grad_norm": 1.757919083599449, + "learning_rate": 1.9840271774675508e-05, + "loss": 0.9719, + "step": 2782 + }, + { + "epoch": 0.08529483878877038, + "grad_norm": 1.6808669210999763, + "learning_rate": 1.984009501888388e-05, + "loss": 0.8004, + "step": 2783 + }, + { + "epoch": 0.08532548731151159, + "grad_norm": 1.6602244499240144, + "learning_rate": 1.9839918166135283e-05, + "loss": 0.6992, + "step": 2784 + }, + { + "epoch": 0.08535613583425279, + "grad_norm": 0.8816123877061903, + "learning_rate": 1.983974121643145e-05, + "loss": 0.6419, + "step": 2785 + }, + { + "epoch": 0.085386784356994, + "grad_norm": 1.911619293277552, + "learning_rate": 1.983956416977413e-05, + "loss": 0.8603, + "step": 2786 + }, + { + "epoch": 0.0854174328797352, + "grad_norm": 1.97956461090372, + "learning_rate": 1.9839387026165068e-05, + "loss": 0.9109, + "step": 2787 + }, + { + "epoch": 0.0854480814024764, + "grad_norm": 1.7505780266865028, + "learning_rate": 1.9839209785606005e-05, + "loss": 0.8042, + "step": 2788 + }, + { + "epoch": 0.0854787299252176, + "grad_norm": 1.6211245639274823, + "learning_rate": 1.9839032448098696e-05, + "loss": 0.7972, + "step": 2789 + }, + { + "epoch": 0.0855093784479588, + "grad_norm": 1.6277137133264723, + "learning_rate": 1.983885501364488e-05, + "loss": 0.7824, + "step": 2790 + }, + { + "epoch": 0.08554002697070001, + "grad_norm": 1.8439248244860698, + "learning_rate": 1.983867748224631e-05, + "loss": 0.7366, + "step": 2791 + }, + { + "epoch": 0.08557067549344122, + "grad_norm": 2.1103535525231965, + "learning_rate": 1.983849985390473e-05, + "loss": 0.8405, + "step": 2792 + }, + { + "epoch": 0.08560132401618242, + "grad_norm": 1.633750238116683, + "learning_rate": 1.9838322128621895e-05, + "loss": 0.8724, + "step": 2793 + }, + { + "epoch": 0.08563197253892363, + "grad_norm": 1.6821563042261916, + "learning_rate": 1.9838144306399555e-05, + "loss": 0.8433, + "step": 2794 + }, + { + "epoch": 0.08566262106166483, + "grad_norm": 0.8683050186404029, + "learning_rate": 1.983796638723946e-05, + "loss": 0.6481, + "step": 2795 + }, + { + "epoch": 0.08569326958440603, + "grad_norm": 1.7897480864128832, + "learning_rate": 1.9837788371143368e-05, + "loss": 0.8434, + "step": 2796 + }, + { + "epoch": 0.08572391810714723, + "grad_norm": 1.5448589890541273, + "learning_rate": 1.9837610258113028e-05, + "loss": 0.8322, + "step": 2797 + }, + { + "epoch": 0.08575456662988844, + "grad_norm": 1.924188354623055, + "learning_rate": 1.98374320481502e-05, + "loss": 0.8722, + "step": 2798 + }, + { + "epoch": 0.08578521515262964, + "grad_norm": 1.6814009512920618, + "learning_rate": 1.9837253741256634e-05, + "loss": 0.7696, + "step": 2799 + }, + { + "epoch": 0.08581586367537085, + "grad_norm": 1.6279142659365242, + "learning_rate": 1.983707533743409e-05, + "loss": 0.8464, + "step": 2800 + }, + { + "epoch": 0.08584651219811205, + "grad_norm": 1.7299741196447431, + "learning_rate": 1.9836896836684328e-05, + "loss": 0.906, + "step": 2801 + }, + { + "epoch": 0.08587716072085326, + "grad_norm": 1.6290493055984052, + "learning_rate": 1.9836718239009105e-05, + "loss": 0.9047, + "step": 2802 + }, + { + "epoch": 0.08590780924359445, + "grad_norm": 1.8559537412319078, + "learning_rate": 1.9836539544410178e-05, + "loss": 0.7881, + "step": 2803 + }, + { + "epoch": 0.08593845776633566, + "grad_norm": 1.812046412055808, + "learning_rate": 1.983636075288931e-05, + "loss": 0.82, + "step": 2804 + }, + { + "epoch": 0.08596910628907686, + "grad_norm": 1.8018289070145534, + "learning_rate": 1.9836181864448263e-05, + "loss": 0.6735, + "step": 2805 + }, + { + "epoch": 0.08599975481181807, + "grad_norm": 1.880376738068484, + "learning_rate": 1.98360028790888e-05, + "loss": 0.9231, + "step": 2806 + }, + { + "epoch": 0.08603040333455927, + "grad_norm": 1.6147525249458252, + "learning_rate": 1.9835823796812686e-05, + "loss": 0.8619, + "step": 2807 + }, + { + "epoch": 0.08606105185730048, + "grad_norm": 1.9849287520506895, + "learning_rate": 1.983564461762168e-05, + "loss": 0.841, + "step": 2808 + }, + { + "epoch": 0.08609170038004169, + "grad_norm": 2.0311148925558933, + "learning_rate": 1.9835465341517553e-05, + "loss": 0.7616, + "step": 2809 + }, + { + "epoch": 0.08612234890278289, + "grad_norm": 1.7752230721398847, + "learning_rate": 1.9835285968502068e-05, + "loss": 0.829, + "step": 2810 + }, + { + "epoch": 0.08615299742552408, + "grad_norm": 2.090175716828157, + "learning_rate": 1.9835106498577e-05, + "loss": 0.9132, + "step": 2811 + }, + { + "epoch": 0.08618364594826529, + "grad_norm": 0.9658195637417007, + "learning_rate": 1.9834926931744102e-05, + "loss": 0.6528, + "step": 2812 + }, + { + "epoch": 0.0862142944710065, + "grad_norm": 1.7395415453612812, + "learning_rate": 1.983474726800516e-05, + "loss": 0.8243, + "step": 2813 + }, + { + "epoch": 0.0862449429937477, + "grad_norm": 1.8035896374902776, + "learning_rate": 1.9834567507361932e-05, + "loss": 0.9081, + "step": 2814 + }, + { + "epoch": 0.0862755915164889, + "grad_norm": 0.7713775022572577, + "learning_rate": 1.9834387649816195e-05, + "loss": 0.6631, + "step": 2815 + }, + { + "epoch": 0.08630624003923011, + "grad_norm": 1.7441732099527578, + "learning_rate": 1.983420769536972e-05, + "loss": 0.92, + "step": 2816 + }, + { + "epoch": 0.08633688856197132, + "grad_norm": 1.7606532728775, + "learning_rate": 1.983402764402428e-05, + "loss": 0.931, + "step": 2817 + }, + { + "epoch": 0.08636753708471252, + "grad_norm": 1.7605003707161482, + "learning_rate": 1.983384749578165e-05, + "loss": 0.7582, + "step": 2818 + }, + { + "epoch": 0.08639818560745371, + "grad_norm": 1.743712716985263, + "learning_rate": 1.9833667250643608e-05, + "loss": 0.9207, + "step": 2819 + }, + { + "epoch": 0.08642883413019492, + "grad_norm": 1.800682066973335, + "learning_rate": 1.983348690861192e-05, + "loss": 0.9402, + "step": 2820 + }, + { + "epoch": 0.08645948265293613, + "grad_norm": 1.941949297026481, + "learning_rate": 1.9833306469688377e-05, + "loss": 0.8592, + "step": 2821 + }, + { + "epoch": 0.08649013117567733, + "grad_norm": 1.559336658700195, + "learning_rate": 1.9833125933874743e-05, + "loss": 0.8053, + "step": 2822 + }, + { + "epoch": 0.08652077969841854, + "grad_norm": 1.9578645175609497, + "learning_rate": 1.9832945301172808e-05, + "loss": 0.7944, + "step": 2823 + }, + { + "epoch": 0.08655142822115974, + "grad_norm": 1.0272550159492226, + "learning_rate": 1.9832764571584344e-05, + "loss": 0.6574, + "step": 2824 + }, + { + "epoch": 0.08658207674390095, + "grad_norm": 0.9037869657925446, + "learning_rate": 1.9832583745111137e-05, + "loss": 0.6794, + "step": 2825 + }, + { + "epoch": 0.08661272526664215, + "grad_norm": 1.7455950944294851, + "learning_rate": 1.9832402821754962e-05, + "loss": 0.8139, + "step": 2826 + }, + { + "epoch": 0.08664337378938335, + "grad_norm": 1.863010611385687, + "learning_rate": 1.9832221801517612e-05, + "loss": 0.8417, + "step": 2827 + }, + { + "epoch": 0.08667402231212455, + "grad_norm": 1.7943553602773097, + "learning_rate": 1.9832040684400865e-05, + "loss": 0.891, + "step": 2828 + }, + { + "epoch": 0.08670467083486576, + "grad_norm": 1.5550460544185745, + "learning_rate": 1.9831859470406503e-05, + "loss": 0.7173, + "step": 2829 + }, + { + "epoch": 0.08673531935760696, + "grad_norm": 1.6063275688525855, + "learning_rate": 1.9831678159536313e-05, + "loss": 0.806, + "step": 2830 + }, + { + "epoch": 0.08676596788034817, + "grad_norm": 1.6441214941298876, + "learning_rate": 1.9831496751792082e-05, + "loss": 0.8516, + "step": 2831 + }, + { + "epoch": 0.08679661640308937, + "grad_norm": 1.668793744286885, + "learning_rate": 1.98313152471756e-05, + "loss": 0.8921, + "step": 2832 + }, + { + "epoch": 0.08682726492583058, + "grad_norm": 1.5882408336981217, + "learning_rate": 1.9831133645688653e-05, + "loss": 0.8408, + "step": 2833 + }, + { + "epoch": 0.08685791344857179, + "grad_norm": 1.414837381315083, + "learning_rate": 1.9830951947333032e-05, + "loss": 0.6738, + "step": 2834 + }, + { + "epoch": 0.08688856197131298, + "grad_norm": 1.7142669806383741, + "learning_rate": 1.9830770152110523e-05, + "loss": 0.8872, + "step": 2835 + }, + { + "epoch": 0.08691921049405418, + "grad_norm": 2.0969342021921653, + "learning_rate": 1.9830588260022923e-05, + "loss": 0.831, + "step": 2836 + }, + { + "epoch": 0.08694985901679539, + "grad_norm": 1.80257528418417, + "learning_rate": 1.9830406271072023e-05, + "loss": 0.8386, + "step": 2837 + }, + { + "epoch": 0.0869805075395366, + "grad_norm": 1.6902832858576615, + "learning_rate": 1.983022418525961e-05, + "loss": 0.81, + "step": 2838 + }, + { + "epoch": 0.0870111560622778, + "grad_norm": 1.6875986080936114, + "learning_rate": 1.9830042002587486e-05, + "loss": 0.7378, + "step": 2839 + }, + { + "epoch": 0.087041804585019, + "grad_norm": 1.7875948958213213, + "learning_rate": 1.9829859723057443e-05, + "loss": 0.8683, + "step": 2840 + }, + { + "epoch": 0.08707245310776021, + "grad_norm": 1.6802274500616334, + "learning_rate": 1.9829677346671278e-05, + "loss": 0.8168, + "step": 2841 + }, + { + "epoch": 0.0871031016305014, + "grad_norm": 2.0074373874649867, + "learning_rate": 1.9829494873430787e-05, + "loss": 0.9104, + "step": 2842 + }, + { + "epoch": 0.08713375015324261, + "grad_norm": 1.7583640944719103, + "learning_rate": 1.9829312303337768e-05, + "loss": 0.9157, + "step": 2843 + }, + { + "epoch": 0.08716439867598381, + "grad_norm": 1.6965528685568276, + "learning_rate": 1.9829129636394016e-05, + "loss": 0.8146, + "step": 2844 + }, + { + "epoch": 0.08719504719872502, + "grad_norm": 1.9099131667584062, + "learning_rate": 1.9828946872601336e-05, + "loss": 0.8731, + "step": 2845 + }, + { + "epoch": 0.08722569572146623, + "grad_norm": 1.6931450017877192, + "learning_rate": 1.9828764011961532e-05, + "loss": 0.7516, + "step": 2846 + }, + { + "epoch": 0.08725634424420743, + "grad_norm": 2.1742055858013694, + "learning_rate": 1.98285810544764e-05, + "loss": 0.8201, + "step": 2847 + }, + { + "epoch": 0.08728699276694864, + "grad_norm": 1.7752156671739507, + "learning_rate": 1.9828398000147742e-05, + "loss": 0.8986, + "step": 2848 + }, + { + "epoch": 0.08731764128968984, + "grad_norm": 1.060952395212955, + "learning_rate": 1.982821484897736e-05, + "loss": 0.6634, + "step": 2849 + }, + { + "epoch": 0.08734828981243103, + "grad_norm": 1.6301181981493866, + "learning_rate": 1.9828031600967073e-05, + "loss": 0.8148, + "step": 2850 + }, + { + "epoch": 0.08737893833517224, + "grad_norm": 1.675544369237903, + "learning_rate": 1.982784825611867e-05, + "loss": 0.7393, + "step": 2851 + }, + { + "epoch": 0.08740958685791345, + "grad_norm": 1.923036925517845, + "learning_rate": 1.982766481443396e-05, + "loss": 0.8745, + "step": 2852 + }, + { + "epoch": 0.08744023538065465, + "grad_norm": 1.7315005577645848, + "learning_rate": 1.982748127591476e-05, + "loss": 0.8768, + "step": 2853 + }, + { + "epoch": 0.08747088390339586, + "grad_norm": 1.9446771494018562, + "learning_rate": 1.982729764056287e-05, + "loss": 0.775, + "step": 2854 + }, + { + "epoch": 0.08750153242613706, + "grad_norm": 0.8304465987216777, + "learning_rate": 1.9827113908380102e-05, + "loss": 0.6652, + "step": 2855 + }, + { + "epoch": 0.08753218094887827, + "grad_norm": 1.6530084110853962, + "learning_rate": 1.9826930079368268e-05, + "loss": 0.8883, + "step": 2856 + }, + { + "epoch": 0.08756282947161947, + "grad_norm": 1.608263942678079, + "learning_rate": 1.9826746153529174e-05, + "loss": 0.7277, + "step": 2857 + }, + { + "epoch": 0.08759347799436067, + "grad_norm": 1.832045927744177, + "learning_rate": 1.9826562130864636e-05, + "loss": 0.8583, + "step": 2858 + }, + { + "epoch": 0.08762412651710187, + "grad_norm": 1.7605192548670818, + "learning_rate": 1.982637801137647e-05, + "loss": 0.935, + "step": 2859 + }, + { + "epoch": 0.08765477503984308, + "grad_norm": 2.113343944639964, + "learning_rate": 1.9826193795066487e-05, + "loss": 0.9536, + "step": 2860 + }, + { + "epoch": 0.08768542356258428, + "grad_norm": 1.8151706542844077, + "learning_rate": 1.9826009481936503e-05, + "loss": 0.8988, + "step": 2861 + }, + { + "epoch": 0.08771607208532549, + "grad_norm": 1.6724816584674693, + "learning_rate": 1.982582507198833e-05, + "loss": 0.8454, + "step": 2862 + }, + { + "epoch": 0.0877467206080667, + "grad_norm": 1.9518882052407627, + "learning_rate": 1.9825640565223793e-05, + "loss": 0.8651, + "step": 2863 + }, + { + "epoch": 0.0877773691308079, + "grad_norm": 1.7524958746768111, + "learning_rate": 1.9825455961644703e-05, + "loss": 0.8845, + "step": 2864 + }, + { + "epoch": 0.0878080176535491, + "grad_norm": 1.736861204673834, + "learning_rate": 1.982527126125288e-05, + "loss": 0.9101, + "step": 2865 + }, + { + "epoch": 0.0878386661762903, + "grad_norm": 0.8319688268668863, + "learning_rate": 1.9825086464050147e-05, + "loss": 0.642, + "step": 2866 + }, + { + "epoch": 0.0878693146990315, + "grad_norm": 1.5642410614055615, + "learning_rate": 1.9824901570038323e-05, + "loss": 0.8247, + "step": 2867 + }, + { + "epoch": 0.08789996322177271, + "grad_norm": 1.663995851206741, + "learning_rate": 1.9824716579219233e-05, + "loss": 0.8538, + "step": 2868 + }, + { + "epoch": 0.08793061174451391, + "grad_norm": 0.770467871302813, + "learning_rate": 1.9824531491594695e-05, + "loss": 0.6219, + "step": 2869 + }, + { + "epoch": 0.08796126026725512, + "grad_norm": 1.687467431183974, + "learning_rate": 1.9824346307166532e-05, + "loss": 0.8806, + "step": 2870 + }, + { + "epoch": 0.08799190878999633, + "grad_norm": 1.7032808144004787, + "learning_rate": 1.982416102593657e-05, + "loss": 0.8629, + "step": 2871 + }, + { + "epoch": 0.08802255731273753, + "grad_norm": 1.5439838182244197, + "learning_rate": 1.982397564790664e-05, + "loss": 0.887, + "step": 2872 + }, + { + "epoch": 0.08805320583547872, + "grad_norm": 0.7893472532574626, + "learning_rate": 1.9823790173078563e-05, + "loss": 0.6415, + "step": 2873 + }, + { + "epoch": 0.08808385435821993, + "grad_norm": 1.6925974177770842, + "learning_rate": 1.9823604601454168e-05, + "loss": 0.8649, + "step": 2874 + }, + { + "epoch": 0.08811450288096113, + "grad_norm": 1.7289969004524781, + "learning_rate": 1.9823418933035282e-05, + "loss": 0.8685, + "step": 2875 + }, + { + "epoch": 0.08814515140370234, + "grad_norm": 1.6455968687731102, + "learning_rate": 1.982323316782374e-05, + "loss": 0.7891, + "step": 2876 + }, + { + "epoch": 0.08817579992644355, + "grad_norm": 1.8647404520240298, + "learning_rate": 1.9823047305821363e-05, + "loss": 0.8766, + "step": 2877 + }, + { + "epoch": 0.08820644844918475, + "grad_norm": 1.7790258977658084, + "learning_rate": 1.9822861347029988e-05, + "loss": 0.8711, + "step": 2878 + }, + { + "epoch": 0.08823709697192596, + "grad_norm": 0.7911653802419092, + "learning_rate": 1.982267529145145e-05, + "loss": 0.6597, + "step": 2879 + }, + { + "epoch": 0.08826774549466716, + "grad_norm": 1.7529909653146305, + "learning_rate": 1.982248913908758e-05, + "loss": 0.7588, + "step": 2880 + }, + { + "epoch": 0.08829839401740835, + "grad_norm": 1.646213180419671, + "learning_rate": 1.9822302889940208e-05, + "loss": 0.9262, + "step": 2881 + }, + { + "epoch": 0.08832904254014956, + "grad_norm": 1.6256001553076023, + "learning_rate": 1.9822116544011174e-05, + "loss": 0.7226, + "step": 2882 + }, + { + "epoch": 0.08835969106289077, + "grad_norm": 1.676461705119337, + "learning_rate": 1.982193010130231e-05, + "loss": 0.8992, + "step": 2883 + }, + { + "epoch": 0.08839033958563197, + "grad_norm": 1.6293143180799587, + "learning_rate": 1.9821743561815458e-05, + "loss": 0.8587, + "step": 2884 + }, + { + "epoch": 0.08842098810837318, + "grad_norm": 1.624552522711737, + "learning_rate": 1.9821556925552454e-05, + "loss": 0.8827, + "step": 2885 + }, + { + "epoch": 0.08845163663111438, + "grad_norm": 1.6282465341580503, + "learning_rate": 1.9821370192515137e-05, + "loss": 0.7548, + "step": 2886 + }, + { + "epoch": 0.08848228515385559, + "grad_norm": 1.6002529182162588, + "learning_rate": 1.9821183362705345e-05, + "loss": 0.7903, + "step": 2887 + }, + { + "epoch": 0.0885129336765968, + "grad_norm": 0.7734005593381451, + "learning_rate": 1.9820996436124923e-05, + "loss": 0.655, + "step": 2888 + }, + { + "epoch": 0.08854358219933799, + "grad_norm": 1.879079826303225, + "learning_rate": 1.9820809412775706e-05, + "loss": 0.8051, + "step": 2889 + }, + { + "epoch": 0.08857423072207919, + "grad_norm": 1.7590384650955802, + "learning_rate": 1.9820622292659544e-05, + "loss": 0.8663, + "step": 2890 + }, + { + "epoch": 0.0886048792448204, + "grad_norm": 1.773703369713342, + "learning_rate": 1.9820435075778278e-05, + "loss": 0.8127, + "step": 2891 + }, + { + "epoch": 0.0886355277675616, + "grad_norm": 1.6758462570049026, + "learning_rate": 1.982024776213375e-05, + "loss": 0.7049, + "step": 2892 + }, + { + "epoch": 0.08866617629030281, + "grad_norm": 0.7568972415959387, + "learning_rate": 1.982006035172781e-05, + "loss": 0.6605, + "step": 2893 + }, + { + "epoch": 0.08869682481304401, + "grad_norm": 1.7953457822545318, + "learning_rate": 1.9819872844562304e-05, + "loss": 0.881, + "step": 2894 + }, + { + "epoch": 0.08872747333578522, + "grad_norm": 1.6714500585084386, + "learning_rate": 1.9819685240639077e-05, + "loss": 0.8847, + "step": 2895 + }, + { + "epoch": 0.08875812185852643, + "grad_norm": 0.7278945590187605, + "learning_rate": 1.981949753995998e-05, + "loss": 0.6557, + "step": 2896 + }, + { + "epoch": 0.08878877038126762, + "grad_norm": 1.7226618630016666, + "learning_rate": 1.981930974252686e-05, + "loss": 0.8221, + "step": 2897 + }, + { + "epoch": 0.08881941890400882, + "grad_norm": 1.6614962677254668, + "learning_rate": 1.9819121848341568e-05, + "loss": 0.8439, + "step": 2898 + }, + { + "epoch": 0.08885006742675003, + "grad_norm": 1.7011088025176615, + "learning_rate": 1.981893385740596e-05, + "loss": 0.8152, + "step": 2899 + }, + { + "epoch": 0.08888071594949123, + "grad_norm": 1.7550718568018517, + "learning_rate": 1.981874576972188e-05, + "loss": 0.8787, + "step": 2900 + }, + { + "epoch": 0.08891136447223244, + "grad_norm": 0.791313414375316, + "learning_rate": 1.9818557585291187e-05, + "loss": 0.6418, + "step": 2901 + }, + { + "epoch": 0.08894201299497365, + "grad_norm": 0.8032847220966459, + "learning_rate": 1.9818369304115733e-05, + "loss": 0.675, + "step": 2902 + }, + { + "epoch": 0.08897266151771485, + "grad_norm": 1.7488223920410677, + "learning_rate": 1.9818180926197376e-05, + "loss": 0.8015, + "step": 2903 + }, + { + "epoch": 0.08900331004045604, + "grad_norm": 1.6535561589712309, + "learning_rate": 1.981799245153797e-05, + "loss": 0.7341, + "step": 2904 + }, + { + "epoch": 0.08903395856319725, + "grad_norm": 0.7805646012606445, + "learning_rate": 1.9817803880139372e-05, + "loss": 0.6487, + "step": 2905 + }, + { + "epoch": 0.08906460708593845, + "grad_norm": 2.030680617712876, + "learning_rate": 1.9817615212003442e-05, + "loss": 0.9822, + "step": 2906 + }, + { + "epoch": 0.08909525560867966, + "grad_norm": 1.8222789833357225, + "learning_rate": 1.9817426447132036e-05, + "loss": 0.9552, + "step": 2907 + }, + { + "epoch": 0.08912590413142087, + "grad_norm": 0.8262296780009601, + "learning_rate": 1.9817237585527014e-05, + "loss": 0.6374, + "step": 2908 + }, + { + "epoch": 0.08915655265416207, + "grad_norm": 1.661876762966768, + "learning_rate": 1.981704862719024e-05, + "loss": 0.7762, + "step": 2909 + }, + { + "epoch": 0.08918720117690328, + "grad_norm": 1.873227411610658, + "learning_rate": 1.9816859572123574e-05, + "loss": 0.8353, + "step": 2910 + }, + { + "epoch": 0.08921784969964448, + "grad_norm": 1.845217655352414, + "learning_rate": 1.9816670420328876e-05, + "loss": 0.7816, + "step": 2911 + }, + { + "epoch": 0.08924849822238567, + "grad_norm": 1.723514430498787, + "learning_rate": 1.9816481171808016e-05, + "loss": 0.8082, + "step": 2912 + }, + { + "epoch": 0.08927914674512688, + "grad_norm": 1.5459852769051259, + "learning_rate": 1.9816291826562852e-05, + "loss": 0.7692, + "step": 2913 + }, + { + "epoch": 0.08930979526786809, + "grad_norm": 1.8451230027640915, + "learning_rate": 1.9816102384595256e-05, + "loss": 0.8863, + "step": 2914 + }, + { + "epoch": 0.08934044379060929, + "grad_norm": 1.6921262906601475, + "learning_rate": 1.9815912845907092e-05, + "loss": 0.7857, + "step": 2915 + }, + { + "epoch": 0.0893710923133505, + "grad_norm": 1.8364253731495408, + "learning_rate": 1.9815723210500227e-05, + "loss": 0.8296, + "step": 2916 + }, + { + "epoch": 0.0894017408360917, + "grad_norm": 1.7021430357851304, + "learning_rate": 1.9815533478376528e-05, + "loss": 0.8828, + "step": 2917 + }, + { + "epoch": 0.08943238935883291, + "grad_norm": 1.7325718911803683, + "learning_rate": 1.9815343649537865e-05, + "loss": 0.894, + "step": 2918 + }, + { + "epoch": 0.08946303788157411, + "grad_norm": 0.8177910518307584, + "learning_rate": 1.9815153723986112e-05, + "loss": 0.6481, + "step": 2919 + }, + { + "epoch": 0.0894936864043153, + "grad_norm": 1.8297206830100077, + "learning_rate": 1.981496370172314e-05, + "loss": 0.7964, + "step": 2920 + }, + { + "epoch": 0.08952433492705651, + "grad_norm": 1.7498471769270403, + "learning_rate": 1.9814773582750816e-05, + "loss": 0.9086, + "step": 2921 + }, + { + "epoch": 0.08955498344979772, + "grad_norm": 1.8223817946021903, + "learning_rate": 1.981458336707102e-05, + "loss": 0.9134, + "step": 2922 + }, + { + "epoch": 0.08958563197253892, + "grad_norm": 1.8070269873290792, + "learning_rate": 1.9814393054685618e-05, + "loss": 0.7973, + "step": 2923 + }, + { + "epoch": 0.08961628049528013, + "grad_norm": 1.6205574321296519, + "learning_rate": 1.9814202645596494e-05, + "loss": 0.8769, + "step": 2924 + }, + { + "epoch": 0.08964692901802133, + "grad_norm": 1.588124013556332, + "learning_rate": 1.981401213980552e-05, + "loss": 0.803, + "step": 2925 + }, + { + "epoch": 0.08967757754076254, + "grad_norm": 1.9892431785175027, + "learning_rate": 1.981382153731457e-05, + "loss": 0.8412, + "step": 2926 + }, + { + "epoch": 0.08970822606350375, + "grad_norm": 1.8281414644708747, + "learning_rate": 1.9813630838125527e-05, + "loss": 0.9073, + "step": 2927 + }, + { + "epoch": 0.08973887458624494, + "grad_norm": 0.8321584160787464, + "learning_rate": 1.981344004224027e-05, + "loss": 0.6381, + "step": 2928 + }, + { + "epoch": 0.08976952310898614, + "grad_norm": 1.8679617144155316, + "learning_rate": 1.981324914966068e-05, + "loss": 0.8795, + "step": 2929 + }, + { + "epoch": 0.08980017163172735, + "grad_norm": 1.5222135750827135, + "learning_rate": 1.981305816038863e-05, + "loss": 0.8181, + "step": 2930 + }, + { + "epoch": 0.08983082015446855, + "grad_norm": 1.3876718930350782, + "learning_rate": 1.981286707442601e-05, + "loss": 0.8051, + "step": 2931 + }, + { + "epoch": 0.08986146867720976, + "grad_norm": 1.6559692500317915, + "learning_rate": 1.98126758917747e-05, + "loss": 0.8196, + "step": 2932 + }, + { + "epoch": 0.08989211719995097, + "grad_norm": 1.6784983208929334, + "learning_rate": 1.981248461243658e-05, + "loss": 0.8208, + "step": 2933 + }, + { + "epoch": 0.08992276572269217, + "grad_norm": 0.7670665458044867, + "learning_rate": 1.9812293236413544e-05, + "loss": 0.6302, + "step": 2934 + }, + { + "epoch": 0.08995341424543336, + "grad_norm": 0.809632898973439, + "learning_rate": 1.981210176370747e-05, + "loss": 0.6311, + "step": 2935 + }, + { + "epoch": 0.08998406276817457, + "grad_norm": 1.7814791223242956, + "learning_rate": 1.9811910194320244e-05, + "loss": 0.787, + "step": 2936 + }, + { + "epoch": 0.09001471129091577, + "grad_norm": 1.6909327588222798, + "learning_rate": 1.981171852825376e-05, + "loss": 0.9157, + "step": 2937 + }, + { + "epoch": 0.09004535981365698, + "grad_norm": 0.823827980206362, + "learning_rate": 1.98115267655099e-05, + "loss": 0.6433, + "step": 2938 + }, + { + "epoch": 0.09007600833639819, + "grad_norm": 1.5846074255962164, + "learning_rate": 1.981133490609056e-05, + "loss": 0.8417, + "step": 2939 + }, + { + "epoch": 0.09010665685913939, + "grad_norm": 1.7388023351027273, + "learning_rate": 1.9811142949997624e-05, + "loss": 0.8446, + "step": 2940 + }, + { + "epoch": 0.0901373053818806, + "grad_norm": 1.5735690836249605, + "learning_rate": 1.9810950897232986e-05, + "loss": 0.8056, + "step": 2941 + }, + { + "epoch": 0.0901679539046218, + "grad_norm": 1.722713959408043, + "learning_rate": 1.981075874779854e-05, + "loss": 0.8667, + "step": 2942 + }, + { + "epoch": 0.090198602427363, + "grad_norm": 1.8020697377615003, + "learning_rate": 1.9810566501696178e-05, + "loss": 0.9265, + "step": 2943 + }, + { + "epoch": 0.0902292509501042, + "grad_norm": 1.5944591250529752, + "learning_rate": 1.981037415892779e-05, + "loss": 0.8044, + "step": 2944 + }, + { + "epoch": 0.0902598994728454, + "grad_norm": 0.7941263748619376, + "learning_rate": 1.981018171949528e-05, + "loss": 0.6494, + "step": 2945 + }, + { + "epoch": 0.09029054799558661, + "grad_norm": 1.6795272044126774, + "learning_rate": 1.980998918340054e-05, + "loss": 0.814, + "step": 2946 + }, + { + "epoch": 0.09032119651832782, + "grad_norm": 0.7653195570310171, + "learning_rate": 1.9809796550645467e-05, + "loss": 0.6424, + "step": 2947 + }, + { + "epoch": 0.09035184504106902, + "grad_norm": 1.8740088815565028, + "learning_rate": 1.9809603821231957e-05, + "loss": 0.7565, + "step": 2948 + }, + { + "epoch": 0.09038249356381023, + "grad_norm": 2.145276013290805, + "learning_rate": 1.9809410995161908e-05, + "loss": 0.8456, + "step": 2949 + }, + { + "epoch": 0.09041314208655143, + "grad_norm": 1.847813918112985, + "learning_rate": 1.9809218072437227e-05, + "loss": 0.917, + "step": 2950 + }, + { + "epoch": 0.09044379060929263, + "grad_norm": 1.638435519047731, + "learning_rate": 1.980902505305981e-05, + "loss": 0.9615, + "step": 2951 + }, + { + "epoch": 0.09047443913203383, + "grad_norm": 1.70361271140783, + "learning_rate": 1.9808831937031554e-05, + "loss": 0.6773, + "step": 2952 + }, + { + "epoch": 0.09050508765477504, + "grad_norm": 1.6088984690520023, + "learning_rate": 1.9808638724354373e-05, + "loss": 0.7797, + "step": 2953 + }, + { + "epoch": 0.09053573617751624, + "grad_norm": 1.9292245827962973, + "learning_rate": 1.980844541503016e-05, + "loss": 0.7858, + "step": 2954 + }, + { + "epoch": 0.09056638470025745, + "grad_norm": 1.9439877998893265, + "learning_rate": 1.980825200906083e-05, + "loss": 0.8419, + "step": 2955 + }, + { + "epoch": 0.09059703322299865, + "grad_norm": 1.830352070451023, + "learning_rate": 1.9808058506448283e-05, + "loss": 0.8262, + "step": 2956 + }, + { + "epoch": 0.09062768174573986, + "grad_norm": 1.8104609912176424, + "learning_rate": 1.9807864907194423e-05, + "loss": 0.8379, + "step": 2957 + }, + { + "epoch": 0.09065833026848107, + "grad_norm": 1.9362784359641352, + "learning_rate": 1.980767121130116e-05, + "loss": 0.8039, + "step": 2958 + }, + { + "epoch": 0.09068897879122226, + "grad_norm": 1.765668116064676, + "learning_rate": 1.9807477418770406e-05, + "loss": 0.7628, + "step": 2959 + }, + { + "epoch": 0.09071962731396346, + "grad_norm": 1.8360861740637247, + "learning_rate": 1.9807283529604067e-05, + "loss": 0.7962, + "step": 2960 + }, + { + "epoch": 0.09075027583670467, + "grad_norm": 1.1037430624388176, + "learning_rate": 1.9807089543804055e-05, + "loss": 0.634, + "step": 2961 + }, + { + "epoch": 0.09078092435944587, + "grad_norm": 1.6572685424681999, + "learning_rate": 1.9806895461372278e-05, + "loss": 0.855, + "step": 2962 + }, + { + "epoch": 0.09081157288218708, + "grad_norm": 1.7125937657544361, + "learning_rate": 1.980670128231065e-05, + "loss": 0.8846, + "step": 2963 + }, + { + "epoch": 0.09084222140492829, + "grad_norm": 0.8014950046110815, + "learning_rate": 1.9806507006621087e-05, + "loss": 0.6682, + "step": 2964 + }, + { + "epoch": 0.09087286992766949, + "grad_norm": 1.8368166347871833, + "learning_rate": 1.98063126343055e-05, + "loss": 0.9747, + "step": 2965 + }, + { + "epoch": 0.09090351845041068, + "grad_norm": 0.8075764603057127, + "learning_rate": 1.980611816536581e-05, + "loss": 0.6269, + "step": 2966 + }, + { + "epoch": 0.09093416697315189, + "grad_norm": 1.6089388744910398, + "learning_rate": 1.9805923599803928e-05, + "loss": 0.8028, + "step": 2967 + }, + { + "epoch": 0.0909648154958931, + "grad_norm": 1.7994816299765117, + "learning_rate": 1.9805728937621768e-05, + "loss": 0.8799, + "step": 2968 + }, + { + "epoch": 0.0909954640186343, + "grad_norm": 1.606329249041863, + "learning_rate": 1.9805534178821254e-05, + "loss": 0.8783, + "step": 2969 + }, + { + "epoch": 0.0910261125413755, + "grad_norm": 0.8065446908918923, + "learning_rate": 1.9805339323404303e-05, + "loss": 0.6372, + "step": 2970 + }, + { + "epoch": 0.09105676106411671, + "grad_norm": 1.866451745908191, + "learning_rate": 1.9805144371372832e-05, + "loss": 0.9776, + "step": 2971 + }, + { + "epoch": 0.09108740958685792, + "grad_norm": 1.883847428177028, + "learning_rate": 1.9804949322728767e-05, + "loss": 0.7829, + "step": 2972 + }, + { + "epoch": 0.09111805810959912, + "grad_norm": 1.7417039555114686, + "learning_rate": 1.9804754177474027e-05, + "loss": 0.8161, + "step": 2973 + }, + { + "epoch": 0.09114870663234032, + "grad_norm": 0.7693034310084542, + "learning_rate": 1.980455893561054e-05, + "loss": 0.6414, + "step": 2974 + }, + { + "epoch": 0.09117935515508152, + "grad_norm": 1.6045415933431457, + "learning_rate": 1.980436359714022e-05, + "loss": 0.817, + "step": 2975 + }, + { + "epoch": 0.09121000367782273, + "grad_norm": 1.7328944224378473, + "learning_rate": 1.9804168162064997e-05, + "loss": 0.8779, + "step": 2976 + }, + { + "epoch": 0.09124065220056393, + "grad_norm": 1.7699375872142407, + "learning_rate": 1.9803972630386797e-05, + "loss": 0.8119, + "step": 2977 + }, + { + "epoch": 0.09127130072330514, + "grad_norm": 1.9904221261771444, + "learning_rate": 1.9803777002107545e-05, + "loss": 0.8562, + "step": 2978 + }, + { + "epoch": 0.09130194924604634, + "grad_norm": 0.7841050233274532, + "learning_rate": 1.9803581277229177e-05, + "loss": 0.6528, + "step": 2979 + }, + { + "epoch": 0.09133259776878755, + "grad_norm": 0.7483098246929883, + "learning_rate": 1.980338545575361e-05, + "loss": 0.6305, + "step": 2980 + }, + { + "epoch": 0.09136324629152875, + "grad_norm": 1.8386761602227786, + "learning_rate": 1.9803189537682773e-05, + "loss": 0.671, + "step": 2981 + }, + { + "epoch": 0.09139389481426995, + "grad_norm": 1.5447789298243364, + "learning_rate": 1.9802993523018607e-05, + "loss": 0.8586, + "step": 2982 + }, + { + "epoch": 0.09142454333701115, + "grad_norm": 0.7775997755176779, + "learning_rate": 1.9802797411763036e-05, + "loss": 0.6641, + "step": 2983 + }, + { + "epoch": 0.09145519185975236, + "grad_norm": 1.691703764704918, + "learning_rate": 1.9802601203917993e-05, + "loss": 0.8564, + "step": 2984 + }, + { + "epoch": 0.09148584038249356, + "grad_norm": 1.6829218458687303, + "learning_rate": 1.980240489948541e-05, + "loss": 0.8547, + "step": 2985 + }, + { + "epoch": 0.09151648890523477, + "grad_norm": 1.7267354475539545, + "learning_rate": 1.9802208498467228e-05, + "loss": 0.8854, + "step": 2986 + }, + { + "epoch": 0.09154713742797597, + "grad_norm": 1.6836833730253395, + "learning_rate": 1.9802012000865377e-05, + "loss": 0.8057, + "step": 2987 + }, + { + "epoch": 0.09157778595071718, + "grad_norm": 1.5389524988416643, + "learning_rate": 1.9801815406681794e-05, + "loss": 0.7737, + "step": 2988 + }, + { + "epoch": 0.09160843447345839, + "grad_norm": 1.8652880720599243, + "learning_rate": 1.9801618715918413e-05, + "loss": 0.7715, + "step": 2989 + }, + { + "epoch": 0.09163908299619958, + "grad_norm": 0.8750319008115791, + "learning_rate": 1.9801421928577176e-05, + "loss": 0.6547, + "step": 2990 + }, + { + "epoch": 0.09166973151894078, + "grad_norm": 1.609743611870145, + "learning_rate": 1.9801225044660023e-05, + "loss": 0.7955, + "step": 2991 + }, + { + "epoch": 0.09170038004168199, + "grad_norm": 1.5597897923325148, + "learning_rate": 1.980102806416889e-05, + "loss": 0.8293, + "step": 2992 + }, + { + "epoch": 0.0917310285644232, + "grad_norm": 1.5561660310970669, + "learning_rate": 1.980083098710572e-05, + "loss": 0.8816, + "step": 2993 + }, + { + "epoch": 0.0917616770871644, + "grad_norm": 1.8667823995037909, + "learning_rate": 1.9800633813472453e-05, + "loss": 0.8409, + "step": 2994 + }, + { + "epoch": 0.0917923256099056, + "grad_norm": 1.7461399956929764, + "learning_rate": 1.9800436543271035e-05, + "loss": 0.8532, + "step": 2995 + }, + { + "epoch": 0.09182297413264681, + "grad_norm": 1.655231959763987, + "learning_rate": 1.980023917650341e-05, + "loss": 0.8111, + "step": 2996 + }, + { + "epoch": 0.091853622655388, + "grad_norm": 1.6718102672921151, + "learning_rate": 1.980004171317152e-05, + "loss": 0.8583, + "step": 2997 + }, + { + "epoch": 0.09188427117812921, + "grad_norm": 1.6750608352119458, + "learning_rate": 1.979984415327731e-05, + "loss": 0.8265, + "step": 2998 + }, + { + "epoch": 0.09191491970087041, + "grad_norm": 1.7023818744197114, + "learning_rate": 1.979964649682273e-05, + "loss": 0.8025, + "step": 2999 + }, + { + "epoch": 0.09194556822361162, + "grad_norm": 1.666577610398157, + "learning_rate": 1.9799448743809725e-05, + "loss": 0.786, + "step": 3000 + }, + { + "epoch": 0.09197621674635283, + "grad_norm": 1.7693108827345037, + "learning_rate": 1.9799250894240243e-05, + "loss": 0.8475, + "step": 3001 + }, + { + "epoch": 0.09200686526909403, + "grad_norm": 1.8755576356128827, + "learning_rate": 1.9799052948116237e-05, + "loss": 0.8947, + "step": 3002 + }, + { + "epoch": 0.09203751379183524, + "grad_norm": 0.8522742730929002, + "learning_rate": 1.9798854905439652e-05, + "loss": 0.642, + "step": 3003 + }, + { + "epoch": 0.09206816231457644, + "grad_norm": 1.7520062936121832, + "learning_rate": 1.979865676621245e-05, + "loss": 0.8228, + "step": 3004 + }, + { + "epoch": 0.09209881083731764, + "grad_norm": 1.9174077974431882, + "learning_rate": 1.9798458530436567e-05, + "loss": 0.8838, + "step": 3005 + }, + { + "epoch": 0.09212945936005884, + "grad_norm": 2.194203982360426, + "learning_rate": 1.9798260198113966e-05, + "loss": 0.8449, + "step": 3006 + }, + { + "epoch": 0.09216010788280005, + "grad_norm": 1.7180189863992026, + "learning_rate": 1.9798061769246604e-05, + "loss": 0.9327, + "step": 3007 + }, + { + "epoch": 0.09219075640554125, + "grad_norm": 1.7177497642500312, + "learning_rate": 1.979786324383643e-05, + "loss": 0.8032, + "step": 3008 + }, + { + "epoch": 0.09222140492828246, + "grad_norm": 1.5795674435920406, + "learning_rate": 1.9797664621885403e-05, + "loss": 0.7826, + "step": 3009 + }, + { + "epoch": 0.09225205345102366, + "grad_norm": 1.7541568236431886, + "learning_rate": 1.979746590339548e-05, + "loss": 0.8719, + "step": 3010 + }, + { + "epoch": 0.09228270197376487, + "grad_norm": 1.6522118910420847, + "learning_rate": 1.979726708836862e-05, + "loss": 0.7232, + "step": 3011 + }, + { + "epoch": 0.09231335049650607, + "grad_norm": 1.8529749211347177, + "learning_rate": 1.979706817680678e-05, + "loss": 0.7991, + "step": 3012 + }, + { + "epoch": 0.09234399901924727, + "grad_norm": 1.748278832634218, + "learning_rate": 1.979686916871192e-05, + "loss": 0.9212, + "step": 3013 + }, + { + "epoch": 0.09237464754198847, + "grad_norm": 1.950690565490035, + "learning_rate": 1.9796670064086002e-05, + "loss": 0.9192, + "step": 3014 + }, + { + "epoch": 0.09240529606472968, + "grad_norm": 1.6475928495068757, + "learning_rate": 1.9796470862930984e-05, + "loss": 0.8143, + "step": 3015 + }, + { + "epoch": 0.09243594458747088, + "grad_norm": 1.5466469402821696, + "learning_rate": 1.9796271565248836e-05, + "loss": 0.7537, + "step": 3016 + }, + { + "epoch": 0.09246659311021209, + "grad_norm": 0.8710906131458559, + "learning_rate": 1.9796072171041517e-05, + "loss": 0.6672, + "step": 3017 + }, + { + "epoch": 0.0924972416329533, + "grad_norm": 1.8081741417143167, + "learning_rate": 1.9795872680310993e-05, + "loss": 0.8837, + "step": 3018 + }, + { + "epoch": 0.0925278901556945, + "grad_norm": 0.7944847868205533, + "learning_rate": 1.9795673093059228e-05, + "loss": 0.647, + "step": 3019 + }, + { + "epoch": 0.0925585386784357, + "grad_norm": 0.7496987621193865, + "learning_rate": 1.9795473409288187e-05, + "loss": 0.6049, + "step": 3020 + }, + { + "epoch": 0.0925891872011769, + "grad_norm": 1.710727426353926, + "learning_rate": 1.9795273628999846e-05, + "loss": 0.8075, + "step": 3021 + }, + { + "epoch": 0.0926198357239181, + "grad_norm": 1.6525611901422959, + "learning_rate": 1.9795073752196163e-05, + "loss": 0.7785, + "step": 3022 + }, + { + "epoch": 0.09265048424665931, + "grad_norm": 0.8726665777096032, + "learning_rate": 1.9794873778879116e-05, + "loss": 0.6764, + "step": 3023 + }, + { + "epoch": 0.09268113276940051, + "grad_norm": 0.8028737859336209, + "learning_rate": 1.979467370905067e-05, + "loss": 0.657, + "step": 3024 + }, + { + "epoch": 0.09271178129214172, + "grad_norm": 2.043169627835846, + "learning_rate": 1.9794473542712794e-05, + "loss": 0.9386, + "step": 3025 + }, + { + "epoch": 0.09274242981488293, + "grad_norm": 1.7016300943772322, + "learning_rate": 1.979427327986747e-05, + "loss": 0.7664, + "step": 3026 + }, + { + "epoch": 0.09277307833762413, + "grad_norm": 1.887220721367264, + "learning_rate": 1.979407292051666e-05, + "loss": 1.0487, + "step": 3027 + }, + { + "epoch": 0.09280372686036532, + "grad_norm": 1.6439257275020722, + "learning_rate": 1.9793872464662346e-05, + "loss": 0.7197, + "step": 3028 + }, + { + "epoch": 0.09283437538310653, + "grad_norm": 1.7373146408971196, + "learning_rate": 1.9793671912306503e-05, + "loss": 0.7571, + "step": 3029 + }, + { + "epoch": 0.09286502390584774, + "grad_norm": 1.5281162311559218, + "learning_rate": 1.9793471263451103e-05, + "loss": 0.7835, + "step": 3030 + }, + { + "epoch": 0.09289567242858894, + "grad_norm": 1.6664923684613373, + "learning_rate": 1.9793270518098124e-05, + "loss": 0.7741, + "step": 3031 + }, + { + "epoch": 0.09292632095133015, + "grad_norm": 1.6465286515875852, + "learning_rate": 1.9793069676249547e-05, + "loss": 0.8111, + "step": 3032 + }, + { + "epoch": 0.09295696947407135, + "grad_norm": 1.8683408714002527, + "learning_rate": 1.9792868737907345e-05, + "loss": 0.7943, + "step": 3033 + }, + { + "epoch": 0.09298761799681256, + "grad_norm": 1.6822968478873577, + "learning_rate": 1.9792667703073505e-05, + "loss": 0.922, + "step": 3034 + }, + { + "epoch": 0.09301826651955376, + "grad_norm": 1.70551742555647, + "learning_rate": 1.9792466571750005e-05, + "loss": 0.8584, + "step": 3035 + }, + { + "epoch": 0.09304891504229496, + "grad_norm": 1.7380403525655965, + "learning_rate": 1.9792265343938824e-05, + "loss": 0.8187, + "step": 3036 + }, + { + "epoch": 0.09307956356503616, + "grad_norm": 1.6084754467451068, + "learning_rate": 1.979206401964195e-05, + "loss": 0.8035, + "step": 3037 + }, + { + "epoch": 0.09311021208777737, + "grad_norm": 1.6809119171898066, + "learning_rate": 1.9791862598861362e-05, + "loss": 0.8071, + "step": 3038 + }, + { + "epoch": 0.09314086061051857, + "grad_norm": 1.6463327766076858, + "learning_rate": 1.9791661081599047e-05, + "loss": 0.7901, + "step": 3039 + }, + { + "epoch": 0.09317150913325978, + "grad_norm": 1.4712886581371363, + "learning_rate": 1.9791459467856988e-05, + "loss": 0.7494, + "step": 3040 + }, + { + "epoch": 0.09320215765600098, + "grad_norm": 1.9408575668705734, + "learning_rate": 1.9791257757637175e-05, + "loss": 0.7392, + "step": 3041 + }, + { + "epoch": 0.09323280617874219, + "grad_norm": 1.7892577954356261, + "learning_rate": 1.9791055950941597e-05, + "loss": 0.8658, + "step": 3042 + }, + { + "epoch": 0.0932634547014834, + "grad_norm": 1.8298645412305796, + "learning_rate": 1.9790854047772236e-05, + "loss": 0.8529, + "step": 3043 + }, + { + "epoch": 0.09329410322422459, + "grad_norm": 2.049445936511851, + "learning_rate": 1.9790652048131084e-05, + "loss": 0.8093, + "step": 3044 + }, + { + "epoch": 0.09332475174696579, + "grad_norm": 1.639631513128736, + "learning_rate": 1.9790449952020133e-05, + "loss": 0.8075, + "step": 3045 + }, + { + "epoch": 0.093355400269707, + "grad_norm": 1.8148862645230306, + "learning_rate": 1.9790247759441376e-05, + "loss": 0.8217, + "step": 3046 + }, + { + "epoch": 0.0933860487924482, + "grad_norm": 1.7499909468883765, + "learning_rate": 1.97900454703968e-05, + "loss": 0.8156, + "step": 3047 + }, + { + "epoch": 0.09341669731518941, + "grad_norm": 1.7049916189782308, + "learning_rate": 1.9789843084888404e-05, + "loss": 0.7509, + "step": 3048 + }, + { + "epoch": 0.09344734583793061, + "grad_norm": 1.926449366936717, + "learning_rate": 1.9789640602918178e-05, + "loss": 0.8832, + "step": 3049 + }, + { + "epoch": 0.09347799436067182, + "grad_norm": 1.7350150966328644, + "learning_rate": 1.978943802448812e-05, + "loss": 0.9208, + "step": 3050 + }, + { + "epoch": 0.09350864288341303, + "grad_norm": 1.8926321748990496, + "learning_rate": 1.978923534960022e-05, + "loss": 0.8325, + "step": 3051 + }, + { + "epoch": 0.09353929140615422, + "grad_norm": 1.709539770239621, + "learning_rate": 1.9789032578256485e-05, + "loss": 0.7967, + "step": 3052 + }, + { + "epoch": 0.09356993992889542, + "grad_norm": 1.6495841802161364, + "learning_rate": 1.9788829710458905e-05, + "loss": 0.8352, + "step": 3053 + }, + { + "epoch": 0.09360058845163663, + "grad_norm": 1.4619647139503977, + "learning_rate": 1.978862674620948e-05, + "loss": 0.8082, + "step": 3054 + }, + { + "epoch": 0.09363123697437783, + "grad_norm": 1.8235386221245309, + "learning_rate": 1.9788423685510213e-05, + "loss": 0.8554, + "step": 3055 + }, + { + "epoch": 0.09366188549711904, + "grad_norm": 1.230562929518348, + "learning_rate": 1.9788220528363102e-05, + "loss": 0.6856, + "step": 3056 + }, + { + "epoch": 0.09369253401986025, + "grad_norm": 1.6536892270356163, + "learning_rate": 1.978801727477015e-05, + "loss": 0.7508, + "step": 3057 + }, + { + "epoch": 0.09372318254260145, + "grad_norm": 0.7677835176621867, + "learning_rate": 1.978781392473336e-05, + "loss": 0.6266, + "step": 3058 + }, + { + "epoch": 0.09375383106534264, + "grad_norm": 1.9117912796140066, + "learning_rate": 1.9787610478254732e-05, + "loss": 0.7451, + "step": 3059 + }, + { + "epoch": 0.09378447958808385, + "grad_norm": 1.963216816214504, + "learning_rate": 1.9787406935336277e-05, + "loss": 0.8524, + "step": 3060 + }, + { + "epoch": 0.09381512811082506, + "grad_norm": 1.927678824916391, + "learning_rate": 1.9787203295979994e-05, + "loss": 0.8602, + "step": 3061 + }, + { + "epoch": 0.09384577663356626, + "grad_norm": 1.1074946327932484, + "learning_rate": 1.9786999560187895e-05, + "loss": 0.6578, + "step": 3062 + }, + { + "epoch": 0.09387642515630747, + "grad_norm": 1.667176404852728, + "learning_rate": 1.9786795727961987e-05, + "loss": 0.8203, + "step": 3063 + }, + { + "epoch": 0.09390707367904867, + "grad_norm": 1.8698486848849551, + "learning_rate": 1.9786591799304274e-05, + "loss": 1.0297, + "step": 3064 + }, + { + "epoch": 0.09393772220178988, + "grad_norm": 0.87039567429885, + "learning_rate": 1.978638777421677e-05, + "loss": 0.6653, + "step": 3065 + }, + { + "epoch": 0.09396837072453108, + "grad_norm": 1.7939730065506294, + "learning_rate": 1.9786183652701482e-05, + "loss": 0.9157, + "step": 3066 + }, + { + "epoch": 0.09399901924727228, + "grad_norm": 2.1596223531058043, + "learning_rate": 1.9785979434760422e-05, + "loss": 0.9649, + "step": 3067 + }, + { + "epoch": 0.09402966777001348, + "grad_norm": 2.0504372871154852, + "learning_rate": 1.9785775120395604e-05, + "loss": 0.8324, + "step": 3068 + }, + { + "epoch": 0.09406031629275469, + "grad_norm": 0.8127703353405464, + "learning_rate": 1.9785570709609038e-05, + "loss": 0.6367, + "step": 3069 + }, + { + "epoch": 0.09409096481549589, + "grad_norm": 1.9302221509987, + "learning_rate": 1.978536620240274e-05, + "loss": 0.8836, + "step": 3070 + }, + { + "epoch": 0.0941216133382371, + "grad_norm": 1.8262999155494555, + "learning_rate": 1.978516159877873e-05, + "loss": 0.9738, + "step": 3071 + }, + { + "epoch": 0.0941522618609783, + "grad_norm": 2.049352062744661, + "learning_rate": 1.9784956898739014e-05, + "loss": 0.9729, + "step": 3072 + }, + { + "epoch": 0.09418291038371951, + "grad_norm": 0.7917122388485412, + "learning_rate": 1.9784752102285614e-05, + "loss": 0.6655, + "step": 3073 + }, + { + "epoch": 0.09421355890646071, + "grad_norm": 1.9157252317643194, + "learning_rate": 1.978454720942055e-05, + "loss": 0.9131, + "step": 3074 + }, + { + "epoch": 0.0942442074292019, + "grad_norm": 1.5619433186820741, + "learning_rate": 1.978434222014584e-05, + "loss": 0.7494, + "step": 3075 + }, + { + "epoch": 0.09427485595194311, + "grad_norm": 1.6152261643265815, + "learning_rate": 1.97841371344635e-05, + "loss": 0.8835, + "step": 3076 + }, + { + "epoch": 0.09430550447468432, + "grad_norm": 1.5722161428855794, + "learning_rate": 1.9783931952375555e-05, + "loss": 0.9147, + "step": 3077 + }, + { + "epoch": 0.09433615299742552, + "grad_norm": 1.6645235597306733, + "learning_rate": 1.9783726673884023e-05, + "loss": 0.7776, + "step": 3078 + }, + { + "epoch": 0.09436680152016673, + "grad_norm": 1.6145407686671496, + "learning_rate": 1.978352129899093e-05, + "loss": 0.8392, + "step": 3079 + }, + { + "epoch": 0.09439745004290793, + "grad_norm": 1.6237376892216588, + "learning_rate": 1.97833158276983e-05, + "loss": 0.873, + "step": 3080 + }, + { + "epoch": 0.09442809856564914, + "grad_norm": 1.8147207430616077, + "learning_rate": 1.9783110260008155e-05, + "loss": 0.8959, + "step": 3081 + }, + { + "epoch": 0.09445874708839035, + "grad_norm": 1.8378616319237886, + "learning_rate": 1.9782904595922523e-05, + "loss": 0.8632, + "step": 3082 + }, + { + "epoch": 0.09448939561113154, + "grad_norm": 1.5474255910941845, + "learning_rate": 1.9782698835443426e-05, + "loss": 0.7983, + "step": 3083 + }, + { + "epoch": 0.09452004413387274, + "grad_norm": 0.8245638716252407, + "learning_rate": 1.9782492978572895e-05, + "loss": 0.6453, + "step": 3084 + }, + { + "epoch": 0.09455069265661395, + "grad_norm": 1.903310988251372, + "learning_rate": 1.978228702531296e-05, + "loss": 0.9459, + "step": 3085 + }, + { + "epoch": 0.09458134117935516, + "grad_norm": 1.828880628012261, + "learning_rate": 1.9782080975665648e-05, + "loss": 0.8414, + "step": 3086 + }, + { + "epoch": 0.09461198970209636, + "grad_norm": 1.7405326261429956, + "learning_rate": 1.9781874829632986e-05, + "loss": 0.8764, + "step": 3087 + }, + { + "epoch": 0.09464263822483757, + "grad_norm": 1.6448863573250958, + "learning_rate": 1.9781668587217012e-05, + "loss": 0.8167, + "step": 3088 + }, + { + "epoch": 0.09467328674757877, + "grad_norm": 1.699062114578056, + "learning_rate": 1.978146224841975e-05, + "loss": 0.8261, + "step": 3089 + }, + { + "epoch": 0.09470393527031996, + "grad_norm": 1.813543566025014, + "learning_rate": 1.9781255813243245e-05, + "loss": 0.8697, + "step": 3090 + }, + { + "epoch": 0.09473458379306117, + "grad_norm": 1.7194387124349932, + "learning_rate": 1.9781049281689517e-05, + "loss": 0.8785, + "step": 3091 + }, + { + "epoch": 0.09476523231580238, + "grad_norm": 1.7559260328610462, + "learning_rate": 1.9780842653760612e-05, + "loss": 0.751, + "step": 3092 + }, + { + "epoch": 0.09479588083854358, + "grad_norm": 1.5516899744761783, + "learning_rate": 1.978063592945856e-05, + "loss": 0.7413, + "step": 3093 + }, + { + "epoch": 0.09482652936128479, + "grad_norm": 1.6489952184432515, + "learning_rate": 1.97804291087854e-05, + "loss": 0.8505, + "step": 3094 + }, + { + "epoch": 0.09485717788402599, + "grad_norm": 0.8449542026973562, + "learning_rate": 1.9780222191743168e-05, + "loss": 0.6429, + "step": 3095 + }, + { + "epoch": 0.0948878264067672, + "grad_norm": 1.82147383805836, + "learning_rate": 1.9780015178333908e-05, + "loss": 0.7461, + "step": 3096 + }, + { + "epoch": 0.0949184749295084, + "grad_norm": 1.6682088620942255, + "learning_rate": 1.9779808068559655e-05, + "loss": 0.8485, + "step": 3097 + }, + { + "epoch": 0.0949491234522496, + "grad_norm": 1.5874192771625815, + "learning_rate": 1.9779600862422448e-05, + "loss": 0.8793, + "step": 3098 + }, + { + "epoch": 0.0949797719749908, + "grad_norm": 1.6213477323169025, + "learning_rate": 1.9779393559924333e-05, + "loss": 0.6744, + "step": 3099 + }, + { + "epoch": 0.095010420497732, + "grad_norm": 1.794437711558659, + "learning_rate": 1.977918616106735e-05, + "loss": 0.8699, + "step": 3100 + }, + { + "epoch": 0.09504106902047321, + "grad_norm": 0.8051680678747666, + "learning_rate": 1.9778978665853546e-05, + "loss": 0.6715, + "step": 3101 + }, + { + "epoch": 0.09507171754321442, + "grad_norm": 1.7063505589366614, + "learning_rate": 1.9778771074284964e-05, + "loss": 0.8274, + "step": 3102 + }, + { + "epoch": 0.09510236606595562, + "grad_norm": 0.733126686629217, + "learning_rate": 1.9778563386363646e-05, + "loss": 0.6217, + "step": 3103 + }, + { + "epoch": 0.09513301458869683, + "grad_norm": 1.6782412927701813, + "learning_rate": 1.9778355602091643e-05, + "loss": 0.9142, + "step": 3104 + }, + { + "epoch": 0.09516366311143803, + "grad_norm": 1.8111549187884202, + "learning_rate": 1.9778147721470997e-05, + "loss": 0.7889, + "step": 3105 + }, + { + "epoch": 0.09519431163417923, + "grad_norm": 1.5978069034204767, + "learning_rate": 1.9777939744503762e-05, + "loss": 0.8168, + "step": 3106 + }, + { + "epoch": 0.09522496015692043, + "grad_norm": 0.8138729321688962, + "learning_rate": 1.9777731671191987e-05, + "loss": 0.6253, + "step": 3107 + }, + { + "epoch": 0.09525560867966164, + "grad_norm": 1.7121571217748863, + "learning_rate": 1.9777523501537716e-05, + "loss": 0.819, + "step": 3108 + }, + { + "epoch": 0.09528625720240284, + "grad_norm": 1.7117831365342362, + "learning_rate": 1.9777315235543006e-05, + "loss": 0.9117, + "step": 3109 + }, + { + "epoch": 0.09531690572514405, + "grad_norm": 0.754282471032501, + "learning_rate": 1.9777106873209908e-05, + "loss": 0.6459, + "step": 3110 + }, + { + "epoch": 0.09534755424788526, + "grad_norm": 0.7881757113086256, + "learning_rate": 1.9776898414540474e-05, + "loss": 0.6523, + "step": 3111 + }, + { + "epoch": 0.09537820277062646, + "grad_norm": 1.610549220271681, + "learning_rate": 1.9776689859536756e-05, + "loss": 0.7353, + "step": 3112 + }, + { + "epoch": 0.09540885129336767, + "grad_norm": 2.019854963003305, + "learning_rate": 1.9776481208200814e-05, + "loss": 0.886, + "step": 3113 + }, + { + "epoch": 0.09543949981610886, + "grad_norm": 1.7391677586830454, + "learning_rate": 1.9776272460534703e-05, + "loss": 0.9152, + "step": 3114 + }, + { + "epoch": 0.09547014833885006, + "grad_norm": 1.841996269454991, + "learning_rate": 1.9776063616540474e-05, + "loss": 0.6823, + "step": 3115 + }, + { + "epoch": 0.09550079686159127, + "grad_norm": 1.5333580554716795, + "learning_rate": 1.977585467622019e-05, + "loss": 0.7931, + "step": 3116 + }, + { + "epoch": 0.09553144538433248, + "grad_norm": 0.8381021701230599, + "learning_rate": 1.977564563957591e-05, + "loss": 0.6518, + "step": 3117 + }, + { + "epoch": 0.09556209390707368, + "grad_norm": 1.8974540208541895, + "learning_rate": 1.9775436506609693e-05, + "loss": 0.7503, + "step": 3118 + }, + { + "epoch": 0.09559274242981489, + "grad_norm": 1.7374681716778864, + "learning_rate": 1.97752272773236e-05, + "loss": 0.8348, + "step": 3119 + }, + { + "epoch": 0.09562339095255609, + "grad_norm": 1.6020852736193196, + "learning_rate": 1.9775017951719687e-05, + "loss": 0.7963, + "step": 3120 + }, + { + "epoch": 0.09565403947529728, + "grad_norm": 0.7503285976989225, + "learning_rate": 1.9774808529800024e-05, + "loss": 0.624, + "step": 3121 + }, + { + "epoch": 0.09568468799803849, + "grad_norm": 1.7094876282044542, + "learning_rate": 1.9774599011566668e-05, + "loss": 0.7769, + "step": 3122 + }, + { + "epoch": 0.0957153365207797, + "grad_norm": 1.7935551279231354, + "learning_rate": 1.977438939702169e-05, + "loss": 0.792, + "step": 3123 + }, + { + "epoch": 0.0957459850435209, + "grad_norm": 1.5638498646005843, + "learning_rate": 1.9774179686167154e-05, + "loss": 0.8242, + "step": 3124 + }, + { + "epoch": 0.0957766335662621, + "grad_norm": 1.8535673056271949, + "learning_rate": 1.9773969879005123e-05, + "loss": 0.8424, + "step": 3125 + }, + { + "epoch": 0.09580728208900331, + "grad_norm": 1.5459512165334033, + "learning_rate": 1.9773759975537666e-05, + "loss": 0.797, + "step": 3126 + }, + { + "epoch": 0.09583793061174452, + "grad_norm": 1.839490192493102, + "learning_rate": 1.977354997576685e-05, + "loss": 0.8531, + "step": 3127 + }, + { + "epoch": 0.09586857913448572, + "grad_norm": 1.973917646442644, + "learning_rate": 1.9773339879694747e-05, + "loss": 0.841, + "step": 3128 + }, + { + "epoch": 0.09589922765722692, + "grad_norm": 1.789471670516314, + "learning_rate": 1.9773129687323426e-05, + "loss": 0.8403, + "step": 3129 + }, + { + "epoch": 0.09592987617996812, + "grad_norm": 0.8676666766020786, + "learning_rate": 1.9772919398654956e-05, + "loss": 0.6236, + "step": 3130 + }, + { + "epoch": 0.09596052470270933, + "grad_norm": 1.7750703169364637, + "learning_rate": 1.9772709013691413e-05, + "loss": 0.8136, + "step": 3131 + }, + { + "epoch": 0.09599117322545053, + "grad_norm": 1.7250513451926885, + "learning_rate": 1.9772498532434864e-05, + "loss": 0.8201, + "step": 3132 + }, + { + "epoch": 0.09602182174819174, + "grad_norm": 1.741179751692021, + "learning_rate": 1.977228795488739e-05, + "loss": 0.8565, + "step": 3133 + }, + { + "epoch": 0.09605247027093294, + "grad_norm": 1.71529492784598, + "learning_rate": 1.9772077281051062e-05, + "loss": 0.8452, + "step": 3134 + }, + { + "epoch": 0.09608311879367415, + "grad_norm": 1.6737952723926497, + "learning_rate": 1.9771866510927956e-05, + "loss": 0.8371, + "step": 3135 + }, + { + "epoch": 0.09611376731641535, + "grad_norm": 1.7604378618793515, + "learning_rate": 1.9771655644520146e-05, + "loss": 0.8514, + "step": 3136 + }, + { + "epoch": 0.09614441583915655, + "grad_norm": 1.8949952512433337, + "learning_rate": 1.9771444681829714e-05, + "loss": 0.8544, + "step": 3137 + }, + { + "epoch": 0.09617506436189775, + "grad_norm": 0.7974016449181529, + "learning_rate": 1.977123362285874e-05, + "loss": 0.6622, + "step": 3138 + }, + { + "epoch": 0.09620571288463896, + "grad_norm": 1.9744278663831778, + "learning_rate": 1.97710224676093e-05, + "loss": 0.8567, + "step": 3139 + }, + { + "epoch": 0.09623636140738016, + "grad_norm": 1.7035147029857947, + "learning_rate": 1.9770811216083476e-05, + "loss": 0.9955, + "step": 3140 + }, + { + "epoch": 0.09626700993012137, + "grad_norm": 1.6031760309185275, + "learning_rate": 1.9770599868283348e-05, + "loss": 0.9215, + "step": 3141 + }, + { + "epoch": 0.09629765845286258, + "grad_norm": 0.7408284771100146, + "learning_rate": 1.9770388424210997e-05, + "loss": 0.6347, + "step": 3142 + }, + { + "epoch": 0.09632830697560378, + "grad_norm": 1.653953999443104, + "learning_rate": 1.9770176883868513e-05, + "loss": 0.8209, + "step": 3143 + }, + { + "epoch": 0.09635895549834499, + "grad_norm": 1.6366823725281425, + "learning_rate": 1.9769965247257973e-05, + "loss": 0.7892, + "step": 3144 + }, + { + "epoch": 0.09638960402108618, + "grad_norm": 1.732612627738722, + "learning_rate": 1.9769753514381472e-05, + "loss": 0.8511, + "step": 3145 + }, + { + "epoch": 0.09642025254382738, + "grad_norm": 1.9130857075746361, + "learning_rate": 1.9769541685241082e-05, + "loss": 0.8748, + "step": 3146 + }, + { + "epoch": 0.09645090106656859, + "grad_norm": 0.7934747441626271, + "learning_rate": 1.9769329759838905e-05, + "loss": 0.6445, + "step": 3147 + }, + { + "epoch": 0.0964815495893098, + "grad_norm": 1.8485842127573036, + "learning_rate": 1.976911773817702e-05, + "loss": 0.8609, + "step": 3148 + }, + { + "epoch": 0.096512198112051, + "grad_norm": 1.6722861122275137, + "learning_rate": 1.9768905620257514e-05, + "loss": 0.6318, + "step": 3149 + }, + { + "epoch": 0.0965428466347922, + "grad_norm": 1.7087483925438245, + "learning_rate": 1.9768693406082486e-05, + "loss": 0.7596, + "step": 3150 + }, + { + "epoch": 0.09657349515753341, + "grad_norm": 1.7311631619573362, + "learning_rate": 1.976848109565402e-05, + "loss": 0.877, + "step": 3151 + }, + { + "epoch": 0.0966041436802746, + "grad_norm": 1.8016342046446898, + "learning_rate": 1.976826868897421e-05, + "loss": 0.8724, + "step": 3152 + }, + { + "epoch": 0.09663479220301581, + "grad_norm": 1.7682407762741157, + "learning_rate": 1.9768056186045153e-05, + "loss": 0.8107, + "step": 3153 + }, + { + "epoch": 0.09666544072575702, + "grad_norm": 1.684486390190067, + "learning_rate": 1.976784358686894e-05, + "loss": 0.7328, + "step": 3154 + }, + { + "epoch": 0.09669608924849822, + "grad_norm": 1.6219416646564888, + "learning_rate": 1.976763089144766e-05, + "loss": 0.8607, + "step": 3155 + }, + { + "epoch": 0.09672673777123943, + "grad_norm": 0.7971385664497789, + "learning_rate": 1.9767418099783418e-05, + "loss": 0.6408, + "step": 3156 + }, + { + "epoch": 0.09675738629398063, + "grad_norm": 1.6563694441044705, + "learning_rate": 1.9767205211878302e-05, + "loss": 0.7792, + "step": 3157 + }, + { + "epoch": 0.09678803481672184, + "grad_norm": 1.9150563305978812, + "learning_rate": 1.9766992227734417e-05, + "loss": 0.9009, + "step": 3158 + }, + { + "epoch": 0.09681868333946304, + "grad_norm": 1.5387145365074626, + "learning_rate": 1.9766779147353857e-05, + "loss": 0.8989, + "step": 3159 + }, + { + "epoch": 0.09684933186220424, + "grad_norm": 1.4743980777742196, + "learning_rate": 1.9766565970738723e-05, + "loss": 0.7478, + "step": 3160 + }, + { + "epoch": 0.09687998038494544, + "grad_norm": 1.5994385292898943, + "learning_rate": 1.976635269789112e-05, + "loss": 0.8102, + "step": 3161 + }, + { + "epoch": 0.09691062890768665, + "grad_norm": 1.733300205900833, + "learning_rate": 1.9766139328813142e-05, + "loss": 0.7937, + "step": 3162 + }, + { + "epoch": 0.09694127743042785, + "grad_norm": 0.7966063195729579, + "learning_rate": 1.9765925863506893e-05, + "loss": 0.63, + "step": 3163 + }, + { + "epoch": 0.09697192595316906, + "grad_norm": 1.7593771601319694, + "learning_rate": 1.976571230197448e-05, + "loss": 0.8204, + "step": 3164 + }, + { + "epoch": 0.09700257447591026, + "grad_norm": 1.6144101255835592, + "learning_rate": 1.9765498644218003e-05, + "loss": 0.8737, + "step": 3165 + }, + { + "epoch": 0.09703322299865147, + "grad_norm": 1.7717076112545032, + "learning_rate": 1.9765284890239568e-05, + "loss": 0.7492, + "step": 3166 + }, + { + "epoch": 0.09706387152139268, + "grad_norm": 1.7943660665773333, + "learning_rate": 1.9765071040041283e-05, + "loss": 0.9874, + "step": 3167 + }, + { + "epoch": 0.09709452004413387, + "grad_norm": 1.6618369136887359, + "learning_rate": 1.976485709362526e-05, + "loss": 0.869, + "step": 3168 + }, + { + "epoch": 0.09712516856687507, + "grad_norm": 1.672516640567002, + "learning_rate": 1.9764643050993597e-05, + "loss": 0.7793, + "step": 3169 + }, + { + "epoch": 0.09715581708961628, + "grad_norm": 1.6377277759504005, + "learning_rate": 1.976442891214841e-05, + "loss": 0.8876, + "step": 3170 + }, + { + "epoch": 0.09718646561235748, + "grad_norm": 0.7995525921953068, + "learning_rate": 1.9764214677091803e-05, + "loss": 0.6637, + "step": 3171 + }, + { + "epoch": 0.09721711413509869, + "grad_norm": 1.7045151143657056, + "learning_rate": 1.9764000345825893e-05, + "loss": 0.8608, + "step": 3172 + }, + { + "epoch": 0.0972477626578399, + "grad_norm": 0.7567212370979276, + "learning_rate": 1.9763785918352787e-05, + "loss": 0.6374, + "step": 3173 + }, + { + "epoch": 0.0972784111805811, + "grad_norm": 1.608148663475905, + "learning_rate": 1.97635713946746e-05, + "loss": 0.8454, + "step": 3174 + }, + { + "epoch": 0.0973090597033223, + "grad_norm": 1.551154454744598, + "learning_rate": 1.976335677479345e-05, + "loss": 0.78, + "step": 3175 + }, + { + "epoch": 0.0973397082260635, + "grad_norm": 1.9926377592016695, + "learning_rate": 1.9763142058711447e-05, + "loss": 0.9074, + "step": 3176 + }, + { + "epoch": 0.0973703567488047, + "grad_norm": 1.5948218221367694, + "learning_rate": 1.9762927246430704e-05, + "loss": 0.8139, + "step": 3177 + }, + { + "epoch": 0.09740100527154591, + "grad_norm": 1.753010808667041, + "learning_rate": 1.976271233795334e-05, + "loss": 0.845, + "step": 3178 + }, + { + "epoch": 0.09743165379428712, + "grad_norm": 2.0524659634130797, + "learning_rate": 1.976249733328148e-05, + "loss": 0.9294, + "step": 3179 + }, + { + "epoch": 0.09746230231702832, + "grad_norm": 1.7880067004977978, + "learning_rate": 1.9762282232417228e-05, + "loss": 0.8544, + "step": 3180 + }, + { + "epoch": 0.09749295083976953, + "grad_norm": 1.5248513064673264, + "learning_rate": 1.976206703536272e-05, + "loss": 0.7635, + "step": 3181 + }, + { + "epoch": 0.09752359936251073, + "grad_norm": 1.6466770333337064, + "learning_rate": 1.976185174212006e-05, + "loss": 0.8999, + "step": 3182 + }, + { + "epoch": 0.09755424788525192, + "grad_norm": 1.8332762177603095, + "learning_rate": 1.976163635269138e-05, + "loss": 0.7825, + "step": 3183 + }, + { + "epoch": 0.09758489640799313, + "grad_norm": 1.8357192143801886, + "learning_rate": 1.97614208670788e-05, + "loss": 0.7492, + "step": 3184 + }, + { + "epoch": 0.09761554493073434, + "grad_norm": 1.7128547354130965, + "learning_rate": 1.976120528528444e-05, + "loss": 0.8644, + "step": 3185 + }, + { + "epoch": 0.09764619345347554, + "grad_norm": 1.8004832922559262, + "learning_rate": 1.9760989607310432e-05, + "loss": 0.787, + "step": 3186 + }, + { + "epoch": 0.09767684197621675, + "grad_norm": 1.61329188144435, + "learning_rate": 1.976077383315889e-05, + "loss": 0.7914, + "step": 3187 + }, + { + "epoch": 0.09770749049895795, + "grad_norm": 1.5422348483321948, + "learning_rate": 1.976055796283195e-05, + "loss": 0.8301, + "step": 3188 + }, + { + "epoch": 0.09773813902169916, + "grad_norm": 1.9137751007998816, + "learning_rate": 1.9760341996331737e-05, + "loss": 0.8851, + "step": 3189 + }, + { + "epoch": 0.09776878754444036, + "grad_norm": 1.760842308882397, + "learning_rate": 1.976012593366037e-05, + "loss": 0.9375, + "step": 3190 + }, + { + "epoch": 0.09779943606718156, + "grad_norm": 1.7285976194847141, + "learning_rate": 1.9759909774819992e-05, + "loss": 0.8583, + "step": 3191 + }, + { + "epoch": 0.09783008458992276, + "grad_norm": 1.7477595618677557, + "learning_rate": 1.9759693519812723e-05, + "loss": 0.7463, + "step": 3192 + }, + { + "epoch": 0.09786073311266397, + "grad_norm": 1.4643681110196183, + "learning_rate": 1.97594771686407e-05, + "loss": 0.7848, + "step": 3193 + }, + { + "epoch": 0.09789138163540517, + "grad_norm": 1.6841562412409077, + "learning_rate": 1.9759260721306044e-05, + "loss": 0.8697, + "step": 3194 + }, + { + "epoch": 0.09792203015814638, + "grad_norm": 1.0015233383810864, + "learning_rate": 1.9759044177810897e-05, + "loss": 0.6514, + "step": 3195 + }, + { + "epoch": 0.09795267868088758, + "grad_norm": 1.9172851412545646, + "learning_rate": 1.9758827538157394e-05, + "loss": 0.8732, + "step": 3196 + }, + { + "epoch": 0.09798332720362879, + "grad_norm": 1.7946542864088, + "learning_rate": 1.9758610802347665e-05, + "loss": 0.8173, + "step": 3197 + }, + { + "epoch": 0.09801397572637, + "grad_norm": 0.7855351054592813, + "learning_rate": 1.9758393970383846e-05, + "loss": 0.667, + "step": 3198 + }, + { + "epoch": 0.09804462424911119, + "grad_norm": 1.8530119825410716, + "learning_rate": 1.975817704226808e-05, + "loss": 0.9094, + "step": 3199 + }, + { + "epoch": 0.09807527277185239, + "grad_norm": 1.6373610642227954, + "learning_rate": 1.975796001800249e-05, + "loss": 0.8029, + "step": 3200 + }, + { + "epoch": 0.0981059212945936, + "grad_norm": 1.646694401259506, + "learning_rate": 1.975774289758923e-05, + "loss": 0.8446, + "step": 3201 + }, + { + "epoch": 0.0981365698173348, + "grad_norm": 0.9798023647689565, + "learning_rate": 1.975752568103043e-05, + "loss": 0.6417, + "step": 3202 + }, + { + "epoch": 0.09816721834007601, + "grad_norm": 1.8002952089050697, + "learning_rate": 1.975730836832823e-05, + "loss": 0.8381, + "step": 3203 + }, + { + "epoch": 0.09819786686281722, + "grad_norm": 0.7595384941854041, + "learning_rate": 1.975709095948478e-05, + "loss": 0.649, + "step": 3204 + }, + { + "epoch": 0.09822851538555842, + "grad_norm": 1.6937560275836308, + "learning_rate": 1.9756873454502213e-05, + "loss": 0.7965, + "step": 3205 + }, + { + "epoch": 0.09825916390829963, + "grad_norm": 0.7563905950325996, + "learning_rate": 1.9756655853382676e-05, + "loss": 0.6409, + "step": 3206 + }, + { + "epoch": 0.09828981243104082, + "grad_norm": 1.9467677910990244, + "learning_rate": 1.975643815612831e-05, + "loss": 0.8419, + "step": 3207 + }, + { + "epoch": 0.09832046095378202, + "grad_norm": 1.753191745290337, + "learning_rate": 1.9756220362741267e-05, + "loss": 0.7956, + "step": 3208 + }, + { + "epoch": 0.09835110947652323, + "grad_norm": 1.6742831884020175, + "learning_rate": 1.9756002473223685e-05, + "loss": 0.8142, + "step": 3209 + }, + { + "epoch": 0.09838175799926444, + "grad_norm": 1.4857249619983484, + "learning_rate": 1.9755784487577715e-05, + "loss": 0.8517, + "step": 3210 + }, + { + "epoch": 0.09841240652200564, + "grad_norm": 1.6274722773555275, + "learning_rate": 1.9755566405805507e-05, + "loss": 0.8411, + "step": 3211 + }, + { + "epoch": 0.09844305504474685, + "grad_norm": 1.7908563427420494, + "learning_rate": 1.9755348227909205e-05, + "loss": 0.7819, + "step": 3212 + }, + { + "epoch": 0.09847370356748805, + "grad_norm": 0.9239977804161471, + "learning_rate": 1.9755129953890964e-05, + "loss": 0.6479, + "step": 3213 + }, + { + "epoch": 0.09850435209022924, + "grad_norm": 1.8131482128954572, + "learning_rate": 1.9754911583752928e-05, + "loss": 0.8616, + "step": 3214 + }, + { + "epoch": 0.09853500061297045, + "grad_norm": 1.727844035196873, + "learning_rate": 1.9754693117497253e-05, + "loss": 0.7223, + "step": 3215 + }, + { + "epoch": 0.09856564913571166, + "grad_norm": 1.7071487616189105, + "learning_rate": 1.9754474555126092e-05, + "loss": 0.815, + "step": 3216 + }, + { + "epoch": 0.09859629765845286, + "grad_norm": 1.6732287300555566, + "learning_rate": 1.9754255896641595e-05, + "loss": 0.8038, + "step": 3217 + }, + { + "epoch": 0.09862694618119407, + "grad_norm": 1.7260648317074239, + "learning_rate": 1.975403714204592e-05, + "loss": 0.8813, + "step": 3218 + }, + { + "epoch": 0.09865759470393527, + "grad_norm": 0.7840630511455599, + "learning_rate": 1.9753818291341224e-05, + "loss": 0.6321, + "step": 3219 + }, + { + "epoch": 0.09868824322667648, + "grad_norm": 1.534531648434237, + "learning_rate": 1.9753599344529656e-05, + "loss": 0.7724, + "step": 3220 + }, + { + "epoch": 0.09871889174941768, + "grad_norm": 1.8130568564518854, + "learning_rate": 1.9753380301613384e-05, + "loss": 1.0312, + "step": 3221 + }, + { + "epoch": 0.09874954027215888, + "grad_norm": 1.815947960024572, + "learning_rate": 1.9753161162594553e-05, + "loss": 0.8365, + "step": 3222 + }, + { + "epoch": 0.09878018879490008, + "grad_norm": 1.7736050438162505, + "learning_rate": 1.9752941927475335e-05, + "loss": 0.862, + "step": 3223 + }, + { + "epoch": 0.09881083731764129, + "grad_norm": 1.533733043477314, + "learning_rate": 1.9752722596257884e-05, + "loss": 0.7915, + "step": 3224 + }, + { + "epoch": 0.09884148584038249, + "grad_norm": 0.8516503045585282, + "learning_rate": 1.9752503168944363e-05, + "loss": 0.6371, + "step": 3225 + }, + { + "epoch": 0.0988721343631237, + "grad_norm": 0.7731099657991974, + "learning_rate": 1.975228364553693e-05, + "loss": 0.6463, + "step": 3226 + }, + { + "epoch": 0.0989027828858649, + "grad_norm": 1.601395551333172, + "learning_rate": 1.975206402603775e-05, + "loss": 0.7456, + "step": 3227 + }, + { + "epoch": 0.09893343140860611, + "grad_norm": 1.9534325830438455, + "learning_rate": 1.975184431044899e-05, + "loss": 0.9562, + "step": 3228 + }, + { + "epoch": 0.09896407993134732, + "grad_norm": 1.8689760119730896, + "learning_rate": 1.9751624498772815e-05, + "loss": 0.8595, + "step": 3229 + }, + { + "epoch": 0.09899472845408851, + "grad_norm": 1.906808608538717, + "learning_rate": 1.9751404591011387e-05, + "loss": 0.8903, + "step": 3230 + }, + { + "epoch": 0.09902537697682971, + "grad_norm": 1.73903050903759, + "learning_rate": 1.9751184587166876e-05, + "loss": 0.8424, + "step": 3231 + }, + { + "epoch": 0.09905602549957092, + "grad_norm": 1.977058310186585, + "learning_rate": 1.9750964487241445e-05, + "loss": 0.8381, + "step": 3232 + }, + { + "epoch": 0.09908667402231212, + "grad_norm": 1.853593837384588, + "learning_rate": 1.9750744291237267e-05, + "loss": 0.7851, + "step": 3233 + }, + { + "epoch": 0.09911732254505333, + "grad_norm": 1.542855153456081, + "learning_rate": 1.9750523999156513e-05, + "loss": 0.8204, + "step": 3234 + }, + { + "epoch": 0.09914797106779454, + "grad_norm": 1.0541134983242864, + "learning_rate": 1.975030361100135e-05, + "loss": 0.6444, + "step": 3235 + }, + { + "epoch": 0.09917861959053574, + "grad_norm": 0.8816342579228992, + "learning_rate": 1.975008312677395e-05, + "loss": 0.6474, + "step": 3236 + }, + { + "epoch": 0.09920926811327695, + "grad_norm": 1.8686108967484323, + "learning_rate": 1.9749862546476487e-05, + "loss": 0.9116, + "step": 3237 + }, + { + "epoch": 0.09923991663601814, + "grad_norm": 1.70632210596712, + "learning_rate": 1.9749641870111133e-05, + "loss": 0.8661, + "step": 3238 + }, + { + "epoch": 0.09927056515875934, + "grad_norm": 1.8000756148497066, + "learning_rate": 1.9749421097680065e-05, + "loss": 0.773, + "step": 3239 + }, + { + "epoch": 0.09930121368150055, + "grad_norm": 1.7601964542267365, + "learning_rate": 1.9749200229185456e-05, + "loss": 0.9132, + "step": 3240 + }, + { + "epoch": 0.09933186220424176, + "grad_norm": 1.595173793367592, + "learning_rate": 1.974897926462948e-05, + "loss": 0.9313, + "step": 3241 + }, + { + "epoch": 0.09936251072698296, + "grad_norm": 1.6569522095709386, + "learning_rate": 1.9748758204014318e-05, + "loss": 0.7935, + "step": 3242 + }, + { + "epoch": 0.09939315924972417, + "grad_norm": 1.8564905337142583, + "learning_rate": 1.974853704734215e-05, + "loss": 0.8059, + "step": 3243 + }, + { + "epoch": 0.09942380777246537, + "grad_norm": 1.776024783218951, + "learning_rate": 1.974831579461515e-05, + "loss": 0.8097, + "step": 3244 + }, + { + "epoch": 0.09945445629520656, + "grad_norm": 1.7910820080798564, + "learning_rate": 1.97480944458355e-05, + "loss": 0.7078, + "step": 3245 + }, + { + "epoch": 0.09948510481794777, + "grad_norm": 1.9747464116273747, + "learning_rate": 1.974787300100538e-05, + "loss": 0.8237, + "step": 3246 + }, + { + "epoch": 0.09951575334068898, + "grad_norm": 1.7881882297283274, + "learning_rate": 1.9747651460126976e-05, + "loss": 0.8676, + "step": 3247 + }, + { + "epoch": 0.09954640186343018, + "grad_norm": 0.8252192208072243, + "learning_rate": 1.9747429823202467e-05, + "loss": 0.6425, + "step": 3248 + }, + { + "epoch": 0.09957705038617139, + "grad_norm": 1.763610326269001, + "learning_rate": 1.9747208090234035e-05, + "loss": 0.7757, + "step": 3249 + }, + { + "epoch": 0.09960769890891259, + "grad_norm": 1.6522513613855443, + "learning_rate": 1.9746986261223874e-05, + "loss": 0.814, + "step": 3250 + }, + { + "epoch": 0.0996383474316538, + "grad_norm": 1.6506915511819618, + "learning_rate": 1.974676433617416e-05, + "loss": 0.9035, + "step": 3251 + }, + { + "epoch": 0.099668995954395, + "grad_norm": 1.953782958561705, + "learning_rate": 1.974654231508708e-05, + "loss": 0.9479, + "step": 3252 + }, + { + "epoch": 0.0996996444771362, + "grad_norm": 1.762807268225378, + "learning_rate": 1.974632019796483e-05, + "loss": 0.9168, + "step": 3253 + }, + { + "epoch": 0.0997302929998774, + "grad_norm": 1.7271198515662458, + "learning_rate": 1.974609798480959e-05, + "loss": 0.8436, + "step": 3254 + }, + { + "epoch": 0.0997609415226186, + "grad_norm": 1.6318398159672407, + "learning_rate": 1.9745875675623557e-05, + "loss": 0.6874, + "step": 3255 + }, + { + "epoch": 0.09979159004535981, + "grad_norm": 2.002012954634069, + "learning_rate": 1.9745653270408913e-05, + "loss": 0.877, + "step": 3256 + }, + { + "epoch": 0.09982223856810102, + "grad_norm": 1.890686185863082, + "learning_rate": 1.9745430769167856e-05, + "loss": 0.8134, + "step": 3257 + }, + { + "epoch": 0.09985288709084222, + "grad_norm": 1.8283610824232401, + "learning_rate": 1.9745208171902576e-05, + "loss": 0.9938, + "step": 3258 + }, + { + "epoch": 0.09988353561358343, + "grad_norm": 1.7144471616875143, + "learning_rate": 1.9744985478615266e-05, + "loss": 0.8447, + "step": 3259 + }, + { + "epoch": 0.09991418413632464, + "grad_norm": 1.9308374511576654, + "learning_rate": 1.974476268930812e-05, + "loss": 0.7731, + "step": 3260 + }, + { + "epoch": 0.09994483265906583, + "grad_norm": 1.5887583380388712, + "learning_rate": 1.9744539803983335e-05, + "loss": 0.8456, + "step": 3261 + }, + { + "epoch": 0.09997548118180703, + "grad_norm": 1.979516139137804, + "learning_rate": 1.9744316822643105e-05, + "loss": 0.9633, + "step": 3262 + }, + { + "epoch": 0.10000612970454824, + "grad_norm": 1.6132131511316052, + "learning_rate": 1.974409374528963e-05, + "loss": 0.854, + "step": 3263 + }, + { + "epoch": 0.10003677822728944, + "grad_norm": 1.8030137391271286, + "learning_rate": 1.9743870571925107e-05, + "loss": 0.7676, + "step": 3264 + }, + { + "epoch": 0.10006742675003065, + "grad_norm": 1.7225434672679616, + "learning_rate": 1.974364730255173e-05, + "loss": 0.8246, + "step": 3265 + }, + { + "epoch": 0.10009807527277186, + "grad_norm": 1.9065930354428757, + "learning_rate": 1.9743423937171708e-05, + "loss": 0.8206, + "step": 3266 + }, + { + "epoch": 0.10012872379551306, + "grad_norm": 1.7919918966142185, + "learning_rate": 1.9743200475787234e-05, + "loss": 0.9462, + "step": 3267 + }, + { + "epoch": 0.10015937231825427, + "grad_norm": 1.7847889366573357, + "learning_rate": 1.9742976918400513e-05, + "loss": 0.7465, + "step": 3268 + }, + { + "epoch": 0.10019002084099546, + "grad_norm": 1.696239871578549, + "learning_rate": 1.9742753265013744e-05, + "loss": 0.8473, + "step": 3269 + }, + { + "epoch": 0.10022066936373666, + "grad_norm": 1.6617616819487684, + "learning_rate": 1.974252951562914e-05, + "loss": 0.8114, + "step": 3270 + }, + { + "epoch": 0.10025131788647787, + "grad_norm": 1.3952500839410376, + "learning_rate": 1.9742305670248897e-05, + "loss": 0.699, + "step": 3271 + }, + { + "epoch": 0.10028196640921908, + "grad_norm": 1.7470469287708092, + "learning_rate": 1.9742081728875226e-05, + "loss": 0.9075, + "step": 3272 + }, + { + "epoch": 0.10031261493196028, + "grad_norm": 1.9873781423085657, + "learning_rate": 1.974185769151033e-05, + "loss": 0.9793, + "step": 3273 + }, + { + "epoch": 0.10034326345470149, + "grad_norm": 1.6715721759274145, + "learning_rate": 1.9741633558156417e-05, + "loss": 0.7891, + "step": 3274 + }, + { + "epoch": 0.10037391197744269, + "grad_norm": 1.7739432368802694, + "learning_rate": 1.9741409328815698e-05, + "loss": 0.8843, + "step": 3275 + }, + { + "epoch": 0.10040456050018388, + "grad_norm": 1.7852155166253894, + "learning_rate": 1.9741185003490378e-05, + "loss": 0.9458, + "step": 3276 + }, + { + "epoch": 0.10043520902292509, + "grad_norm": 1.7298987819324123, + "learning_rate": 1.9740960582182672e-05, + "loss": 0.8461, + "step": 3277 + }, + { + "epoch": 0.1004658575456663, + "grad_norm": 0.8735940287056916, + "learning_rate": 1.9740736064894786e-05, + "loss": 0.6711, + "step": 3278 + }, + { + "epoch": 0.1004965060684075, + "grad_norm": 1.6102410995707643, + "learning_rate": 1.9740511451628937e-05, + "loss": 0.8308, + "step": 3279 + }, + { + "epoch": 0.1005271545911487, + "grad_norm": 2.0184526593999736, + "learning_rate": 1.9740286742387336e-05, + "loss": 0.8785, + "step": 3280 + }, + { + "epoch": 0.10055780311388991, + "grad_norm": 1.7014681286732316, + "learning_rate": 1.97400619371722e-05, + "loss": 0.8546, + "step": 3281 + }, + { + "epoch": 0.10058845163663112, + "grad_norm": 1.8950241321281447, + "learning_rate": 1.973983703598574e-05, + "loss": 0.7793, + "step": 3282 + }, + { + "epoch": 0.10061910015937232, + "grad_norm": 0.8210363152520306, + "learning_rate": 1.973961203883017e-05, + "loss": 0.6344, + "step": 3283 + }, + { + "epoch": 0.10064974868211352, + "grad_norm": 1.598543327761737, + "learning_rate": 1.9739386945707716e-05, + "loss": 0.78, + "step": 3284 + }, + { + "epoch": 0.10068039720485472, + "grad_norm": 1.8305155067142609, + "learning_rate": 1.973916175662059e-05, + "loss": 0.7969, + "step": 3285 + }, + { + "epoch": 0.10071104572759593, + "grad_norm": 1.8372358318260595, + "learning_rate": 1.9738936471571008e-05, + "loss": 0.8702, + "step": 3286 + }, + { + "epoch": 0.10074169425033713, + "grad_norm": 1.6623065414449767, + "learning_rate": 1.9738711090561193e-05, + "loss": 0.8317, + "step": 3287 + }, + { + "epoch": 0.10077234277307834, + "grad_norm": 1.5132178274626564, + "learning_rate": 1.9738485613593367e-05, + "loss": 0.7032, + "step": 3288 + }, + { + "epoch": 0.10080299129581954, + "grad_norm": 1.849315670314211, + "learning_rate": 1.9738260040669753e-05, + "loss": 0.8571, + "step": 3289 + }, + { + "epoch": 0.10083363981856075, + "grad_norm": 1.7920685787379098, + "learning_rate": 1.973803437179257e-05, + "loss": 0.831, + "step": 3290 + }, + { + "epoch": 0.10086428834130196, + "grad_norm": 1.4776063957125871, + "learning_rate": 1.973780860696404e-05, + "loss": 0.9877, + "step": 3291 + }, + { + "epoch": 0.10089493686404315, + "grad_norm": 1.6491610290951197, + "learning_rate": 1.9737582746186393e-05, + "loss": 0.8442, + "step": 3292 + }, + { + "epoch": 0.10092558538678435, + "grad_norm": 1.8867427841578985, + "learning_rate": 1.973735678946185e-05, + "loss": 0.8487, + "step": 3293 + }, + { + "epoch": 0.10095623390952556, + "grad_norm": 1.8183762330536117, + "learning_rate": 1.9737130736792642e-05, + "loss": 0.7089, + "step": 3294 + }, + { + "epoch": 0.10098688243226676, + "grad_norm": 1.6274736927471787, + "learning_rate": 1.973690458818099e-05, + "loss": 0.7613, + "step": 3295 + }, + { + "epoch": 0.10101753095500797, + "grad_norm": 1.695052715703391, + "learning_rate": 1.973667834362913e-05, + "loss": 0.9125, + "step": 3296 + }, + { + "epoch": 0.10104817947774918, + "grad_norm": 1.545247379458761, + "learning_rate": 1.9736452003139286e-05, + "loss": 0.8447, + "step": 3297 + }, + { + "epoch": 0.10107882800049038, + "grad_norm": 1.61664046123921, + "learning_rate": 1.9736225566713686e-05, + "loss": 0.7658, + "step": 3298 + }, + { + "epoch": 0.10110947652323159, + "grad_norm": 1.8399622768122927, + "learning_rate": 1.9735999034354568e-05, + "loss": 0.9233, + "step": 3299 + }, + { + "epoch": 0.10114012504597278, + "grad_norm": 1.7492523439545318, + "learning_rate": 1.9735772406064158e-05, + "loss": 0.8566, + "step": 3300 + }, + { + "epoch": 0.10117077356871398, + "grad_norm": 1.7499660346114434, + "learning_rate": 1.973554568184469e-05, + "loss": 0.8577, + "step": 3301 + }, + { + "epoch": 0.10120142209145519, + "grad_norm": 1.756413857139216, + "learning_rate": 1.97353188616984e-05, + "loss": 0.8987, + "step": 3302 + }, + { + "epoch": 0.1012320706141964, + "grad_norm": 1.8069510100420993, + "learning_rate": 1.9735091945627527e-05, + "loss": 0.8513, + "step": 3303 + }, + { + "epoch": 0.1012627191369376, + "grad_norm": 1.7614326802981106, + "learning_rate": 1.9734864933634302e-05, + "loss": 0.8848, + "step": 3304 + }, + { + "epoch": 0.1012933676596788, + "grad_norm": 1.6550085820605842, + "learning_rate": 1.9734637825720958e-05, + "loss": 0.7181, + "step": 3305 + }, + { + "epoch": 0.10132401618242001, + "grad_norm": 2.266387051790587, + "learning_rate": 1.9734410621889736e-05, + "loss": 0.846, + "step": 3306 + }, + { + "epoch": 0.1013546647051612, + "grad_norm": 1.9898395799701507, + "learning_rate": 1.9734183322142878e-05, + "loss": 0.9372, + "step": 3307 + }, + { + "epoch": 0.10138531322790241, + "grad_norm": 1.5037424583568841, + "learning_rate": 1.9733955926482623e-05, + "loss": 0.7788, + "step": 3308 + }, + { + "epoch": 0.10141596175064362, + "grad_norm": 1.5918288959968754, + "learning_rate": 1.9733728434911205e-05, + "loss": 0.7984, + "step": 3309 + }, + { + "epoch": 0.10144661027338482, + "grad_norm": 1.7173455162219387, + "learning_rate": 1.9733500847430873e-05, + "loss": 0.8664, + "step": 3310 + }, + { + "epoch": 0.10147725879612603, + "grad_norm": 1.7096664455919586, + "learning_rate": 1.9733273164043867e-05, + "loss": 0.9046, + "step": 3311 + }, + { + "epoch": 0.10150790731886723, + "grad_norm": 1.7356356715241552, + "learning_rate": 1.973304538475243e-05, + "loss": 0.8707, + "step": 3312 + }, + { + "epoch": 0.10153855584160844, + "grad_norm": 0.9392875436826514, + "learning_rate": 1.9732817509558804e-05, + "loss": 0.6634, + "step": 3313 + }, + { + "epoch": 0.10156920436434964, + "grad_norm": 0.8010643761645206, + "learning_rate": 1.9732589538465243e-05, + "loss": 0.6432, + "step": 3314 + }, + { + "epoch": 0.10159985288709084, + "grad_norm": 1.9533769764799016, + "learning_rate": 1.973236147147398e-05, + "loss": 0.8917, + "step": 3315 + }, + { + "epoch": 0.10163050140983204, + "grad_norm": 0.7409229412632563, + "learning_rate": 1.973213330858727e-05, + "loss": 0.6608, + "step": 3316 + }, + { + "epoch": 0.10166114993257325, + "grad_norm": 1.6060016148504412, + "learning_rate": 1.9731905049807364e-05, + "loss": 0.7184, + "step": 3317 + }, + { + "epoch": 0.10169179845531445, + "grad_norm": 0.7777332266594283, + "learning_rate": 1.9731676695136505e-05, + "loss": 0.6699, + "step": 3318 + }, + { + "epoch": 0.10172244697805566, + "grad_norm": 1.6462947722272854, + "learning_rate": 1.973144824457695e-05, + "loss": 0.9292, + "step": 3319 + }, + { + "epoch": 0.10175309550079686, + "grad_norm": 1.6674808067418778, + "learning_rate": 1.9731219698130942e-05, + "loss": 0.8641, + "step": 3320 + }, + { + "epoch": 0.10178374402353807, + "grad_norm": 2.0138593806716583, + "learning_rate": 1.9730991055800738e-05, + "loss": 0.9609, + "step": 3321 + }, + { + "epoch": 0.10181439254627928, + "grad_norm": 1.5381404910235792, + "learning_rate": 1.9730762317588587e-05, + "loss": 0.814, + "step": 3322 + }, + { + "epoch": 0.10184504106902047, + "grad_norm": 1.6951918023332568, + "learning_rate": 1.9730533483496744e-05, + "loss": 0.8638, + "step": 3323 + }, + { + "epoch": 0.10187568959176167, + "grad_norm": 1.687066865991896, + "learning_rate": 1.973030455352747e-05, + "loss": 0.8183, + "step": 3324 + }, + { + "epoch": 0.10190633811450288, + "grad_norm": 1.7894012211402397, + "learning_rate": 1.973007552768301e-05, + "loss": 0.7105, + "step": 3325 + }, + { + "epoch": 0.10193698663724408, + "grad_norm": 0.8898188761767745, + "learning_rate": 1.972984640596563e-05, + "loss": 0.6496, + "step": 3326 + }, + { + "epoch": 0.10196763515998529, + "grad_norm": 1.5400931706942216, + "learning_rate": 1.9729617188377584e-05, + "loss": 0.8657, + "step": 3327 + }, + { + "epoch": 0.1019982836827265, + "grad_norm": 1.7826412500562256, + "learning_rate": 1.972938787492113e-05, + "loss": 0.809, + "step": 3328 + }, + { + "epoch": 0.1020289322054677, + "grad_norm": 1.682144135035354, + "learning_rate": 1.9729158465598527e-05, + "loss": 0.9191, + "step": 3329 + }, + { + "epoch": 0.1020595807282089, + "grad_norm": 1.5284248417666273, + "learning_rate": 1.9728928960412034e-05, + "loss": 0.8229, + "step": 3330 + }, + { + "epoch": 0.1020902292509501, + "grad_norm": 1.8033038700495785, + "learning_rate": 1.972869935936392e-05, + "loss": 0.9017, + "step": 3331 + }, + { + "epoch": 0.1021208777736913, + "grad_norm": 1.5476254014399893, + "learning_rate": 1.9728469662456436e-05, + "loss": 0.8468, + "step": 3332 + }, + { + "epoch": 0.10215152629643251, + "grad_norm": 1.644178008387025, + "learning_rate": 1.9728239869691856e-05, + "loss": 0.822, + "step": 3333 + }, + { + "epoch": 0.10218217481917372, + "grad_norm": 0.8088509948309779, + "learning_rate": 1.972800998107244e-05, + "loss": 0.6648, + "step": 3334 + }, + { + "epoch": 0.10221282334191492, + "grad_norm": 0.7443209347004371, + "learning_rate": 1.972777999660045e-05, + "loss": 0.6289, + "step": 3335 + }, + { + "epoch": 0.10224347186465613, + "grad_norm": 1.7622353075910453, + "learning_rate": 1.9727549916278156e-05, + "loss": 0.8202, + "step": 3336 + }, + { + "epoch": 0.10227412038739733, + "grad_norm": 1.5992141216774982, + "learning_rate": 1.9727319740107823e-05, + "loss": 0.9297, + "step": 3337 + }, + { + "epoch": 0.10230476891013854, + "grad_norm": 1.7382299125347336, + "learning_rate": 1.972708946809172e-05, + "loss": 0.8887, + "step": 3338 + }, + { + "epoch": 0.10233541743287973, + "grad_norm": 1.841424114897134, + "learning_rate": 1.9726859100232113e-05, + "loss": 0.865, + "step": 3339 + }, + { + "epoch": 0.10236606595562094, + "grad_norm": 1.6888554653860313, + "learning_rate": 1.972662863653128e-05, + "loss": 0.8978, + "step": 3340 + }, + { + "epoch": 0.10239671447836214, + "grad_norm": 1.6950032102875043, + "learning_rate": 1.9726398076991482e-05, + "loss": 0.8296, + "step": 3341 + }, + { + "epoch": 0.10242736300110335, + "grad_norm": 1.5353175665322139, + "learning_rate": 1.9726167421615e-05, + "loss": 0.7449, + "step": 3342 + }, + { + "epoch": 0.10245801152384455, + "grad_norm": 1.07109143922048, + "learning_rate": 1.9725936670404096e-05, + "loss": 0.6876, + "step": 3343 + }, + { + "epoch": 0.10248866004658576, + "grad_norm": 1.644325622093274, + "learning_rate": 1.9725705823361053e-05, + "loss": 0.8783, + "step": 3344 + }, + { + "epoch": 0.10251930856932696, + "grad_norm": 1.6783679788957675, + "learning_rate": 1.972547488048814e-05, + "loss": 0.8375, + "step": 3345 + }, + { + "epoch": 0.10254995709206816, + "grad_norm": 1.9076749317261033, + "learning_rate": 1.9725243841787634e-05, + "loss": 0.7606, + "step": 3346 + }, + { + "epoch": 0.10258060561480936, + "grad_norm": 1.518654234778627, + "learning_rate": 1.9725012707261816e-05, + "loss": 0.7797, + "step": 3347 + }, + { + "epoch": 0.10261125413755057, + "grad_norm": 1.6324078875062198, + "learning_rate": 1.972478147691296e-05, + "loss": 0.822, + "step": 3348 + }, + { + "epoch": 0.10264190266029177, + "grad_norm": 1.4931388662726628, + "learning_rate": 1.972455015074334e-05, + "loss": 0.8468, + "step": 3349 + }, + { + "epoch": 0.10267255118303298, + "grad_norm": 0.7817307974241746, + "learning_rate": 1.972431872875524e-05, + "loss": 0.6447, + "step": 3350 + }, + { + "epoch": 0.10270319970577418, + "grad_norm": 1.756636431839389, + "learning_rate": 1.972408721095094e-05, + "loss": 0.8027, + "step": 3351 + }, + { + "epoch": 0.10273384822851539, + "grad_norm": 1.6515648135397405, + "learning_rate": 1.9723855597332723e-05, + "loss": 0.8906, + "step": 3352 + }, + { + "epoch": 0.1027644967512566, + "grad_norm": 1.5889429770871997, + "learning_rate": 1.9723623887902865e-05, + "loss": 0.8356, + "step": 3353 + }, + { + "epoch": 0.10279514527399779, + "grad_norm": 1.722694384055293, + "learning_rate": 1.9723392082663656e-05, + "loss": 0.8161, + "step": 3354 + }, + { + "epoch": 0.10282579379673899, + "grad_norm": 1.7315777628860214, + "learning_rate": 1.9723160181617374e-05, + "loss": 0.8506, + "step": 3355 + }, + { + "epoch": 0.1028564423194802, + "grad_norm": 1.87964450960875, + "learning_rate": 1.972292818476631e-05, + "loss": 0.933, + "step": 3356 + }, + { + "epoch": 0.1028870908422214, + "grad_norm": 0.7760685633220308, + "learning_rate": 1.972269609211274e-05, + "loss": 0.6549, + "step": 3357 + }, + { + "epoch": 0.10291773936496261, + "grad_norm": 1.7676486345890334, + "learning_rate": 1.972246390365897e-05, + "loss": 0.8962, + "step": 3358 + }, + { + "epoch": 0.10294838788770382, + "grad_norm": 1.446437277474743, + "learning_rate": 1.9722231619407266e-05, + "loss": 0.8049, + "step": 3359 + }, + { + "epoch": 0.10297903641044502, + "grad_norm": 1.6222300405611743, + "learning_rate": 1.972199923935993e-05, + "loss": 0.7596, + "step": 3360 + }, + { + "epoch": 0.10300968493318623, + "grad_norm": 1.7589388549569565, + "learning_rate": 1.9721766763519244e-05, + "loss": 0.9465, + "step": 3361 + }, + { + "epoch": 0.10304033345592742, + "grad_norm": 1.6445600705605905, + "learning_rate": 1.972153419188751e-05, + "loss": 0.7836, + "step": 3362 + }, + { + "epoch": 0.10307098197866862, + "grad_norm": 1.6667598318299417, + "learning_rate": 1.972130152446701e-05, + "loss": 0.8142, + "step": 3363 + }, + { + "epoch": 0.10310163050140983, + "grad_norm": 1.8070423825930546, + "learning_rate": 1.9721068761260032e-05, + "loss": 0.9338, + "step": 3364 + }, + { + "epoch": 0.10313227902415104, + "grad_norm": 1.769615524801431, + "learning_rate": 1.9720835902268882e-05, + "loss": 1.0066, + "step": 3365 + }, + { + "epoch": 0.10316292754689224, + "grad_norm": 0.7848392474098578, + "learning_rate": 1.9720602947495847e-05, + "loss": 0.6076, + "step": 3366 + }, + { + "epoch": 0.10319357606963345, + "grad_norm": 1.6775854068593203, + "learning_rate": 1.9720369896943226e-05, + "loss": 0.8747, + "step": 3367 + }, + { + "epoch": 0.10322422459237465, + "grad_norm": 1.5507338756068894, + "learning_rate": 1.972013675061331e-05, + "loss": 0.7501, + "step": 3368 + }, + { + "epoch": 0.10325487311511586, + "grad_norm": 1.6859666628078591, + "learning_rate": 1.9719903508508406e-05, + "loss": 0.812, + "step": 3369 + }, + { + "epoch": 0.10328552163785705, + "grad_norm": 1.6315173822861562, + "learning_rate": 1.9719670170630798e-05, + "loss": 0.8936, + "step": 3370 + }, + { + "epoch": 0.10331617016059826, + "grad_norm": 1.878359797971761, + "learning_rate": 1.9719436736982796e-05, + "loss": 0.9214, + "step": 3371 + }, + { + "epoch": 0.10334681868333946, + "grad_norm": 1.6414292305904405, + "learning_rate": 1.97192032075667e-05, + "loss": 0.8135, + "step": 3372 + }, + { + "epoch": 0.10337746720608067, + "grad_norm": 1.812397325928558, + "learning_rate": 1.9718969582384805e-05, + "loss": 0.8846, + "step": 3373 + }, + { + "epoch": 0.10340811572882187, + "grad_norm": 1.6797142771240035, + "learning_rate": 1.9718735861439416e-05, + "loss": 0.7997, + "step": 3374 + }, + { + "epoch": 0.10343876425156308, + "grad_norm": 1.7286652758600065, + "learning_rate": 1.9718502044732836e-05, + "loss": 0.9038, + "step": 3375 + }, + { + "epoch": 0.10346941277430428, + "grad_norm": 0.8118458018871273, + "learning_rate": 1.971826813226737e-05, + "loss": 0.662, + "step": 3376 + }, + { + "epoch": 0.10350006129704548, + "grad_norm": 1.6594105131226395, + "learning_rate": 1.971803412404532e-05, + "loss": 0.8283, + "step": 3377 + }, + { + "epoch": 0.10353070981978668, + "grad_norm": 1.8038600865747265, + "learning_rate": 1.9717800020068995e-05, + "loss": 0.8379, + "step": 3378 + }, + { + "epoch": 0.10356135834252789, + "grad_norm": 1.690588856050908, + "learning_rate": 1.9717565820340696e-05, + "loss": 0.8522, + "step": 3379 + }, + { + "epoch": 0.10359200686526909, + "grad_norm": 1.6072096591468399, + "learning_rate": 1.971733152486274e-05, + "loss": 0.7272, + "step": 3380 + }, + { + "epoch": 0.1036226553880103, + "grad_norm": 1.6837384846021326, + "learning_rate": 1.9717097133637425e-05, + "loss": 0.8476, + "step": 3381 + }, + { + "epoch": 0.1036533039107515, + "grad_norm": 1.455085448237781, + "learning_rate": 1.971686264666707e-05, + "loss": 0.7881, + "step": 3382 + }, + { + "epoch": 0.10368395243349271, + "grad_norm": 1.8088105378054642, + "learning_rate": 1.971662806395398e-05, + "loss": 0.8542, + "step": 3383 + }, + { + "epoch": 0.10371460095623392, + "grad_norm": 1.677220987834206, + "learning_rate": 1.9716393385500467e-05, + "loss": 0.7816, + "step": 3384 + }, + { + "epoch": 0.10374524947897511, + "grad_norm": 1.8821396793151892, + "learning_rate": 1.9716158611308843e-05, + "loss": 0.9162, + "step": 3385 + }, + { + "epoch": 0.10377589800171631, + "grad_norm": 1.306986998989371, + "learning_rate": 1.9715923741381425e-05, + "loss": 0.7284, + "step": 3386 + }, + { + "epoch": 0.10380654652445752, + "grad_norm": 1.7586764354076658, + "learning_rate": 1.971568877572052e-05, + "loss": 0.8772, + "step": 3387 + }, + { + "epoch": 0.10383719504719872, + "grad_norm": 1.566914912608522, + "learning_rate": 1.9715453714328454e-05, + "loss": 0.7458, + "step": 3388 + }, + { + "epoch": 0.10386784356993993, + "grad_norm": 1.6493405324350168, + "learning_rate": 1.9715218557207532e-05, + "loss": 0.8277, + "step": 3389 + }, + { + "epoch": 0.10389849209268114, + "grad_norm": 1.642096863209476, + "learning_rate": 1.9714983304360077e-05, + "loss": 0.8076, + "step": 3390 + }, + { + "epoch": 0.10392914061542234, + "grad_norm": 1.637441891127324, + "learning_rate": 1.9714747955788405e-05, + "loss": 0.788, + "step": 3391 + }, + { + "epoch": 0.10395978913816355, + "grad_norm": 1.665138273501712, + "learning_rate": 1.9714512511494837e-05, + "loss": 0.9447, + "step": 3392 + }, + { + "epoch": 0.10399043766090474, + "grad_norm": 1.4730473853773254, + "learning_rate": 1.971427697148169e-05, + "loss": 0.7811, + "step": 3393 + }, + { + "epoch": 0.10402108618364594, + "grad_norm": 1.7236106442577739, + "learning_rate": 1.971404133575129e-05, + "loss": 0.7937, + "step": 3394 + }, + { + "epoch": 0.10405173470638715, + "grad_norm": 1.612282770894919, + "learning_rate": 1.9713805604305954e-05, + "loss": 0.8946, + "step": 3395 + }, + { + "epoch": 0.10408238322912836, + "grad_norm": 1.6804015115709403, + "learning_rate": 1.9713569777148e-05, + "loss": 0.775, + "step": 3396 + }, + { + "epoch": 0.10411303175186956, + "grad_norm": 1.5159165608640568, + "learning_rate": 1.9713333854279765e-05, + "loss": 0.8557, + "step": 3397 + }, + { + "epoch": 0.10414368027461077, + "grad_norm": 1.6301807152642613, + "learning_rate": 1.9713097835703564e-05, + "loss": 0.7507, + "step": 3398 + }, + { + "epoch": 0.10417432879735197, + "grad_norm": 1.455404775564919, + "learning_rate": 1.9712861721421724e-05, + "loss": 0.8083, + "step": 3399 + }, + { + "epoch": 0.10420497732009318, + "grad_norm": 1.5497861398518216, + "learning_rate": 1.9712625511436572e-05, + "loss": 0.8611, + "step": 3400 + }, + { + "epoch": 0.10423562584283437, + "grad_norm": 1.5838729659144888, + "learning_rate": 1.971238920575044e-05, + "loss": 0.7908, + "step": 3401 + }, + { + "epoch": 0.10426627436557558, + "grad_norm": 1.6580836366982807, + "learning_rate": 1.9712152804365647e-05, + "loss": 0.8014, + "step": 3402 + }, + { + "epoch": 0.10429692288831678, + "grad_norm": 1.7498660720404693, + "learning_rate": 1.971191630728453e-05, + "loss": 0.8665, + "step": 3403 + }, + { + "epoch": 0.10432757141105799, + "grad_norm": 1.5690746562820908, + "learning_rate": 1.9711679714509417e-05, + "loss": 0.6903, + "step": 3404 + }, + { + "epoch": 0.10435821993379919, + "grad_norm": 1.5437255341983467, + "learning_rate": 1.9711443026042638e-05, + "loss": 0.7997, + "step": 3405 + }, + { + "epoch": 0.1043888684565404, + "grad_norm": 1.8216492962431394, + "learning_rate": 1.9711206241886526e-05, + "loss": 0.8388, + "step": 3406 + }, + { + "epoch": 0.1044195169792816, + "grad_norm": 1.6019828462202244, + "learning_rate": 1.9710969362043417e-05, + "loss": 0.9199, + "step": 3407 + }, + { + "epoch": 0.1044501655020228, + "grad_norm": 1.7138688068073948, + "learning_rate": 1.9710732386515637e-05, + "loss": 0.8976, + "step": 3408 + }, + { + "epoch": 0.104480814024764, + "grad_norm": 1.7415679443884151, + "learning_rate": 1.9710495315305528e-05, + "loss": 0.9709, + "step": 3409 + }, + { + "epoch": 0.10451146254750521, + "grad_norm": 1.7220840867461489, + "learning_rate": 1.9710258148415428e-05, + "loss": 0.7891, + "step": 3410 + }, + { + "epoch": 0.10454211107024641, + "grad_norm": 0.7593405565149188, + "learning_rate": 1.9710020885847664e-05, + "loss": 0.6444, + "step": 3411 + }, + { + "epoch": 0.10457275959298762, + "grad_norm": 2.011163711417915, + "learning_rate": 1.9709783527604584e-05, + "loss": 0.9884, + "step": 3412 + }, + { + "epoch": 0.10460340811572882, + "grad_norm": 1.7217770084061628, + "learning_rate": 1.970954607368852e-05, + "loss": 0.7682, + "step": 3413 + }, + { + "epoch": 0.10463405663847003, + "grad_norm": 0.7342220553728936, + "learning_rate": 1.970930852410182e-05, + "loss": 0.6494, + "step": 3414 + }, + { + "epoch": 0.10466470516121124, + "grad_norm": 1.6712654403260072, + "learning_rate": 1.970907087884681e-05, + "loss": 0.9344, + "step": 3415 + }, + { + "epoch": 0.10469535368395243, + "grad_norm": 1.771518101717739, + "learning_rate": 1.9708833137925848e-05, + "loss": 0.8485, + "step": 3416 + }, + { + "epoch": 0.10472600220669363, + "grad_norm": 0.7658805547334894, + "learning_rate": 1.9708595301341264e-05, + "loss": 0.6554, + "step": 3417 + }, + { + "epoch": 0.10475665072943484, + "grad_norm": 1.6969641284295771, + "learning_rate": 1.9708357369095408e-05, + "loss": 0.8155, + "step": 3418 + }, + { + "epoch": 0.10478729925217604, + "grad_norm": 1.5833727899788252, + "learning_rate": 1.970811934119062e-05, + "loss": 0.9141, + "step": 3419 + }, + { + "epoch": 0.10481794777491725, + "grad_norm": 1.6436358084237446, + "learning_rate": 1.9707881217629255e-05, + "loss": 0.8795, + "step": 3420 + }, + { + "epoch": 0.10484859629765846, + "grad_norm": 1.6432821203574477, + "learning_rate": 1.9707642998413648e-05, + "loss": 0.8338, + "step": 3421 + }, + { + "epoch": 0.10487924482039966, + "grad_norm": 1.6283393556918317, + "learning_rate": 1.970740468354615e-05, + "loss": 0.8713, + "step": 3422 + }, + { + "epoch": 0.10490989334314087, + "grad_norm": 1.6841301963658708, + "learning_rate": 1.9707166273029114e-05, + "loss": 0.86, + "step": 3423 + }, + { + "epoch": 0.10494054186588206, + "grad_norm": 1.7654442145845575, + "learning_rate": 1.970692776686488e-05, + "loss": 0.8067, + "step": 3424 + }, + { + "epoch": 0.10497119038862326, + "grad_norm": 0.876076710999654, + "learning_rate": 1.9706689165055807e-05, + "loss": 0.6294, + "step": 3425 + }, + { + "epoch": 0.10500183891136447, + "grad_norm": 1.9969236926329506, + "learning_rate": 1.970645046760424e-05, + "loss": 0.8904, + "step": 3426 + }, + { + "epoch": 0.10503248743410568, + "grad_norm": 1.5489226516087025, + "learning_rate": 1.9706211674512534e-05, + "loss": 0.9538, + "step": 3427 + }, + { + "epoch": 0.10506313595684688, + "grad_norm": 1.7138787245194347, + "learning_rate": 1.970597278578304e-05, + "loss": 0.9049, + "step": 3428 + }, + { + "epoch": 0.10509378447958809, + "grad_norm": 1.6981736061883281, + "learning_rate": 1.9705733801418116e-05, + "loss": 0.7971, + "step": 3429 + }, + { + "epoch": 0.10512443300232929, + "grad_norm": 1.7560644016939582, + "learning_rate": 1.970549472142011e-05, + "loss": 0.8557, + "step": 3430 + }, + { + "epoch": 0.1051550815250705, + "grad_norm": 1.5759454306812914, + "learning_rate": 1.970525554579138e-05, + "loss": 0.8439, + "step": 3431 + }, + { + "epoch": 0.10518573004781169, + "grad_norm": 1.7662434439461465, + "learning_rate": 1.9705016274534287e-05, + "loss": 0.8167, + "step": 3432 + }, + { + "epoch": 0.1052163785705529, + "grad_norm": 1.5725935801361466, + "learning_rate": 1.9704776907651185e-05, + "loss": 0.8448, + "step": 3433 + }, + { + "epoch": 0.1052470270932941, + "grad_norm": 1.6617294334683288, + "learning_rate": 1.9704537445144432e-05, + "loss": 0.7998, + "step": 3434 + }, + { + "epoch": 0.10527767561603531, + "grad_norm": 0.8359873272363417, + "learning_rate": 1.970429788701639e-05, + "loss": 0.6476, + "step": 3435 + }, + { + "epoch": 0.10530832413877651, + "grad_norm": 1.8871786010586782, + "learning_rate": 1.9704058233269416e-05, + "loss": 0.8342, + "step": 3436 + }, + { + "epoch": 0.10533897266151772, + "grad_norm": 1.8437707111949562, + "learning_rate": 1.9703818483905876e-05, + "loss": 0.8248, + "step": 3437 + }, + { + "epoch": 0.10536962118425892, + "grad_norm": 0.7839585089708753, + "learning_rate": 1.9703578638928128e-05, + "loss": 0.6497, + "step": 3438 + }, + { + "epoch": 0.10540026970700012, + "grad_norm": 1.8028267017491018, + "learning_rate": 1.9703338698338538e-05, + "loss": 0.8893, + "step": 3439 + }, + { + "epoch": 0.10543091822974132, + "grad_norm": 1.7568047018939845, + "learning_rate": 1.9703098662139467e-05, + "loss": 0.8825, + "step": 3440 + }, + { + "epoch": 0.10546156675248253, + "grad_norm": 1.5450728968000746, + "learning_rate": 1.970285853033328e-05, + "loss": 0.7813, + "step": 3441 + }, + { + "epoch": 0.10549221527522373, + "grad_norm": 0.7469461671591318, + "learning_rate": 1.9702618302922353e-05, + "loss": 0.6652, + "step": 3442 + }, + { + "epoch": 0.10552286379796494, + "grad_norm": 1.7194651887493655, + "learning_rate": 1.970237797990904e-05, + "loss": 0.8591, + "step": 3443 + }, + { + "epoch": 0.10555351232070614, + "grad_norm": 1.7282586903271941, + "learning_rate": 1.9702137561295714e-05, + "loss": 0.879, + "step": 3444 + }, + { + "epoch": 0.10558416084344735, + "grad_norm": 1.5439182606777526, + "learning_rate": 1.9701897047084746e-05, + "loss": 0.7848, + "step": 3445 + }, + { + "epoch": 0.10561480936618856, + "grad_norm": 1.6511449997331937, + "learning_rate": 1.9701656437278504e-05, + "loss": 0.7538, + "step": 3446 + }, + { + "epoch": 0.10564545788892975, + "grad_norm": 0.7768592579479676, + "learning_rate": 1.970141573187936e-05, + "loss": 0.6146, + "step": 3447 + }, + { + "epoch": 0.10567610641167095, + "grad_norm": 1.6860875804645843, + "learning_rate": 1.9701174930889683e-05, + "loss": 0.8603, + "step": 3448 + }, + { + "epoch": 0.10570675493441216, + "grad_norm": 1.7485710820692888, + "learning_rate": 1.9700934034311844e-05, + "loss": 0.6613, + "step": 3449 + }, + { + "epoch": 0.10573740345715336, + "grad_norm": 1.4611776113658157, + "learning_rate": 1.9700693042148224e-05, + "loss": 0.7582, + "step": 3450 + }, + { + "epoch": 0.10576805197989457, + "grad_norm": 1.6653991356382136, + "learning_rate": 1.9700451954401192e-05, + "loss": 0.9214, + "step": 3451 + }, + { + "epoch": 0.10579870050263578, + "grad_norm": 1.8634489566482932, + "learning_rate": 1.970021077107313e-05, + "loss": 0.8445, + "step": 3452 + }, + { + "epoch": 0.10582934902537698, + "grad_norm": 1.7792187681015776, + "learning_rate": 1.9699969492166403e-05, + "loss": 0.8331, + "step": 3453 + }, + { + "epoch": 0.10585999754811819, + "grad_norm": 1.7122685262465156, + "learning_rate": 1.9699728117683397e-05, + "loss": 0.8795, + "step": 3454 + }, + { + "epoch": 0.10589064607085938, + "grad_norm": 1.7733689203355925, + "learning_rate": 1.9699486647626485e-05, + "loss": 0.8147, + "step": 3455 + }, + { + "epoch": 0.10592129459360058, + "grad_norm": 1.5935981384973987, + "learning_rate": 1.9699245081998054e-05, + "loss": 0.8367, + "step": 3456 + }, + { + "epoch": 0.10595194311634179, + "grad_norm": 0.7708612946745127, + "learning_rate": 1.9699003420800477e-05, + "loss": 0.6437, + "step": 3457 + }, + { + "epoch": 0.105982591639083, + "grad_norm": 2.090949469745762, + "learning_rate": 1.969876166403614e-05, + "loss": 0.8991, + "step": 3458 + }, + { + "epoch": 0.1060132401618242, + "grad_norm": 1.9031676737603016, + "learning_rate": 1.969851981170742e-05, + "loss": 0.9107, + "step": 3459 + }, + { + "epoch": 0.10604388868456541, + "grad_norm": 1.928489145106651, + "learning_rate": 1.9698277863816703e-05, + "loss": 0.825, + "step": 3460 + }, + { + "epoch": 0.10607453720730661, + "grad_norm": 1.5902439682576852, + "learning_rate": 1.9698035820366374e-05, + "loss": 0.8328, + "step": 3461 + }, + { + "epoch": 0.10610518573004782, + "grad_norm": 1.902851800450486, + "learning_rate": 1.9697793681358816e-05, + "loss": 0.7597, + "step": 3462 + }, + { + "epoch": 0.10613583425278901, + "grad_norm": 1.721638650610405, + "learning_rate": 1.9697551446796414e-05, + "loss": 0.953, + "step": 3463 + }, + { + "epoch": 0.10616648277553022, + "grad_norm": 1.520951228494169, + "learning_rate": 1.969730911668156e-05, + "loss": 0.8097, + "step": 3464 + }, + { + "epoch": 0.10619713129827142, + "grad_norm": 2.8190258771481442, + "learning_rate": 1.9697066691016636e-05, + "loss": 0.82, + "step": 3465 + }, + { + "epoch": 0.10622777982101263, + "grad_norm": 1.8555323681444473, + "learning_rate": 1.9696824169804034e-05, + "loss": 1.044, + "step": 3466 + }, + { + "epoch": 0.10625842834375383, + "grad_norm": 0.7711939631020182, + "learning_rate": 1.9696581553046143e-05, + "loss": 0.6262, + "step": 3467 + }, + { + "epoch": 0.10628907686649504, + "grad_norm": 1.5793840124575786, + "learning_rate": 1.969633884074535e-05, + "loss": 0.8471, + "step": 3468 + }, + { + "epoch": 0.10631972538923624, + "grad_norm": 1.7286319325293076, + "learning_rate": 1.969609603290405e-05, + "loss": 0.8165, + "step": 3469 + }, + { + "epoch": 0.10635037391197744, + "grad_norm": 1.530863991813516, + "learning_rate": 1.9695853129524636e-05, + "loss": 0.6762, + "step": 3470 + }, + { + "epoch": 0.10638102243471864, + "grad_norm": 1.6523633222747254, + "learning_rate": 1.96956101306095e-05, + "loss": 0.7428, + "step": 3471 + }, + { + "epoch": 0.10641167095745985, + "grad_norm": 1.8088928875402739, + "learning_rate": 1.969536703616104e-05, + "loss": 0.9221, + "step": 3472 + }, + { + "epoch": 0.10644231948020105, + "grad_norm": 1.6783539195133923, + "learning_rate": 1.9695123846181645e-05, + "loss": 0.8935, + "step": 3473 + }, + { + "epoch": 0.10647296800294226, + "grad_norm": 1.713259140811692, + "learning_rate": 1.9694880560673712e-05, + "loss": 0.9432, + "step": 3474 + }, + { + "epoch": 0.10650361652568346, + "grad_norm": 1.7630333937068345, + "learning_rate": 1.9694637179639643e-05, + "loss": 0.8368, + "step": 3475 + }, + { + "epoch": 0.10653426504842467, + "grad_norm": 1.7683828775516128, + "learning_rate": 1.9694393703081832e-05, + "loss": 0.8252, + "step": 3476 + }, + { + "epoch": 0.10656491357116588, + "grad_norm": 1.6470548013321746, + "learning_rate": 1.969415013100268e-05, + "loss": 0.8149, + "step": 3477 + }, + { + "epoch": 0.10659556209390707, + "grad_norm": 1.8071139729149994, + "learning_rate": 1.9693906463404588e-05, + "loss": 0.8923, + "step": 3478 + }, + { + "epoch": 0.10662621061664827, + "grad_norm": 1.5979744761654326, + "learning_rate": 1.9693662700289954e-05, + "loss": 0.9021, + "step": 3479 + }, + { + "epoch": 0.10665685913938948, + "grad_norm": 2.1129497158431034, + "learning_rate": 1.969341884166118e-05, + "loss": 0.9127, + "step": 3480 + }, + { + "epoch": 0.10668750766213068, + "grad_norm": 1.7062429109761312, + "learning_rate": 1.9693174887520674e-05, + "loss": 0.9644, + "step": 3481 + }, + { + "epoch": 0.10671815618487189, + "grad_norm": 1.7596471933000934, + "learning_rate": 1.969293083787083e-05, + "loss": 0.777, + "step": 3482 + }, + { + "epoch": 0.1067488047076131, + "grad_norm": 1.7889422195183733, + "learning_rate": 1.969268669271406e-05, + "loss": 0.8763, + "step": 3483 + }, + { + "epoch": 0.1067794532303543, + "grad_norm": 1.7415051662804202, + "learning_rate": 1.9692442452052773e-05, + "loss": 0.9144, + "step": 3484 + }, + { + "epoch": 0.10681010175309551, + "grad_norm": 1.659620475446881, + "learning_rate": 1.9692198115889366e-05, + "loss": 0.8286, + "step": 3485 + }, + { + "epoch": 0.1068407502758367, + "grad_norm": 1.9073830787716701, + "learning_rate": 1.969195368422625e-05, + "loss": 0.7842, + "step": 3486 + }, + { + "epoch": 0.1068713987985779, + "grad_norm": 1.6309382107734338, + "learning_rate": 1.9691709157065836e-05, + "loss": 0.7802, + "step": 3487 + }, + { + "epoch": 0.10690204732131911, + "grad_norm": 1.5220398516698455, + "learning_rate": 1.9691464534410533e-05, + "loss": 0.8438, + "step": 3488 + }, + { + "epoch": 0.10693269584406032, + "grad_norm": 1.621477761433377, + "learning_rate": 1.9691219816262748e-05, + "loss": 0.8176, + "step": 3489 + }, + { + "epoch": 0.10696334436680152, + "grad_norm": 1.574280561106144, + "learning_rate": 1.9690975002624897e-05, + "loss": 0.8473, + "step": 3490 + }, + { + "epoch": 0.10699399288954273, + "grad_norm": 2.036925141646682, + "learning_rate": 1.969073009349939e-05, + "loss": 0.9232, + "step": 3491 + }, + { + "epoch": 0.10702464141228393, + "grad_norm": 1.6632342580500337, + "learning_rate": 1.969048508888864e-05, + "loss": 0.8241, + "step": 3492 + }, + { + "epoch": 0.10705528993502514, + "grad_norm": 1.791110635988498, + "learning_rate": 1.9690239988795058e-05, + "loss": 0.9068, + "step": 3493 + }, + { + "epoch": 0.10708593845776633, + "grad_norm": 1.7379736127386516, + "learning_rate": 1.968999479322107e-05, + "loss": 0.9531, + "step": 3494 + }, + { + "epoch": 0.10711658698050754, + "grad_norm": 1.8003928835134404, + "learning_rate": 1.9689749502169073e-05, + "loss": 0.758, + "step": 3495 + }, + { + "epoch": 0.10714723550324874, + "grad_norm": 1.7952296581839327, + "learning_rate": 1.96895041156415e-05, + "loss": 0.876, + "step": 3496 + }, + { + "epoch": 0.10717788402598995, + "grad_norm": 1.6546210909841637, + "learning_rate": 1.9689258633640763e-05, + "loss": 0.9072, + "step": 3497 + }, + { + "epoch": 0.10720853254873115, + "grad_norm": 1.8195061565715778, + "learning_rate": 1.9689013056169285e-05, + "loss": 0.9844, + "step": 3498 + }, + { + "epoch": 0.10723918107147236, + "grad_norm": 1.6778352130176537, + "learning_rate": 1.968876738322948e-05, + "loss": 0.7612, + "step": 3499 + }, + { + "epoch": 0.10726982959421356, + "grad_norm": 1.8460325117047267, + "learning_rate": 1.968852161482377e-05, + "loss": 0.8779, + "step": 3500 + }, + { + "epoch": 0.10730047811695476, + "grad_norm": 1.5788600429167148, + "learning_rate": 1.9688275750954577e-05, + "loss": 0.8605, + "step": 3501 + }, + { + "epoch": 0.10733112663969596, + "grad_norm": 1.8543122020939855, + "learning_rate": 1.9688029791624326e-05, + "loss": 0.8199, + "step": 3502 + }, + { + "epoch": 0.10736177516243717, + "grad_norm": 1.8644608371284108, + "learning_rate": 1.9687783736835436e-05, + "loss": 0.7887, + "step": 3503 + }, + { + "epoch": 0.10739242368517837, + "grad_norm": 1.790544607765597, + "learning_rate": 1.968753758659034e-05, + "loss": 0.79, + "step": 3504 + }, + { + "epoch": 0.10742307220791958, + "grad_norm": 0.8596781631108055, + "learning_rate": 1.968729134089145e-05, + "loss": 0.6591, + "step": 3505 + }, + { + "epoch": 0.10745372073066078, + "grad_norm": 1.5655085014648777, + "learning_rate": 1.9687044999741204e-05, + "loss": 0.9063, + "step": 3506 + }, + { + "epoch": 0.10748436925340199, + "grad_norm": 1.7509504960706606, + "learning_rate": 1.9686798563142026e-05, + "loss": 0.8588, + "step": 3507 + }, + { + "epoch": 0.1075150177761432, + "grad_norm": 1.6065743474214003, + "learning_rate": 1.9686552031096342e-05, + "loss": 0.7538, + "step": 3508 + }, + { + "epoch": 0.10754566629888439, + "grad_norm": 1.7303265751134298, + "learning_rate": 1.9686305403606583e-05, + "loss": 0.7757, + "step": 3509 + }, + { + "epoch": 0.1075763148216256, + "grad_norm": 1.8152003323442252, + "learning_rate": 1.9686058680675178e-05, + "loss": 0.7759, + "step": 3510 + }, + { + "epoch": 0.1076069633443668, + "grad_norm": 1.6457079677141941, + "learning_rate": 1.968581186230456e-05, + "loss": 0.8864, + "step": 3511 + }, + { + "epoch": 0.107637611867108, + "grad_norm": 1.5430086640130027, + "learning_rate": 1.9685564948497155e-05, + "loss": 0.8108, + "step": 3512 + }, + { + "epoch": 0.10766826038984921, + "grad_norm": 1.6631782720276078, + "learning_rate": 1.9685317939255403e-05, + "loss": 0.786, + "step": 3513 + }, + { + "epoch": 0.10769890891259042, + "grad_norm": 1.9503362364269698, + "learning_rate": 1.9685070834581736e-05, + "loss": 0.8394, + "step": 3514 + }, + { + "epoch": 0.10772955743533162, + "grad_norm": 1.5627463053402704, + "learning_rate": 1.9684823634478585e-05, + "loss": 0.8036, + "step": 3515 + }, + { + "epoch": 0.10776020595807283, + "grad_norm": 1.8093680990327747, + "learning_rate": 1.9684576338948395e-05, + "loss": 0.9223, + "step": 3516 + }, + { + "epoch": 0.10779085448081402, + "grad_norm": 1.7543612968713655, + "learning_rate": 1.9684328947993593e-05, + "loss": 0.9336, + "step": 3517 + }, + { + "epoch": 0.10782150300355522, + "grad_norm": 1.8100090683917365, + "learning_rate": 1.9684081461616617e-05, + "loss": 0.8075, + "step": 3518 + }, + { + "epoch": 0.10785215152629643, + "grad_norm": 1.8623300035945913, + "learning_rate": 1.9683833879819912e-05, + "loss": 0.7909, + "step": 3519 + }, + { + "epoch": 0.10788280004903764, + "grad_norm": 1.5620973218737304, + "learning_rate": 1.9683586202605914e-05, + "loss": 0.846, + "step": 3520 + }, + { + "epoch": 0.10791344857177884, + "grad_norm": 1.5845496507079655, + "learning_rate": 1.9683338429977064e-05, + "loss": 0.7907, + "step": 3521 + }, + { + "epoch": 0.10794409709452005, + "grad_norm": 1.4688311183549665, + "learning_rate": 1.96830905619358e-05, + "loss": 0.8602, + "step": 3522 + }, + { + "epoch": 0.10797474561726125, + "grad_norm": 1.645080626572241, + "learning_rate": 1.968284259848457e-05, + "loss": 0.7859, + "step": 3523 + }, + { + "epoch": 0.10800539414000246, + "grad_norm": 1.6001574133670091, + "learning_rate": 1.9682594539625813e-05, + "loss": 0.8843, + "step": 3524 + }, + { + "epoch": 0.10803604266274365, + "grad_norm": 1.506563633803289, + "learning_rate": 1.9682346385361975e-05, + "loss": 0.7813, + "step": 3525 + }, + { + "epoch": 0.10806669118548486, + "grad_norm": 1.8817881355165915, + "learning_rate": 1.96820981356955e-05, + "loss": 0.794, + "step": 3526 + }, + { + "epoch": 0.10809733970822606, + "grad_norm": 1.5935644916099854, + "learning_rate": 1.9681849790628836e-05, + "loss": 0.8396, + "step": 3527 + }, + { + "epoch": 0.10812798823096727, + "grad_norm": 1.713887732859326, + "learning_rate": 1.9681601350164427e-05, + "loss": 1.0087, + "step": 3528 + }, + { + "epoch": 0.10815863675370847, + "grad_norm": 1.5897721917490077, + "learning_rate": 1.9681352814304725e-05, + "loss": 0.8332, + "step": 3529 + }, + { + "epoch": 0.10818928527644968, + "grad_norm": 1.9058150857251739, + "learning_rate": 1.9681104183052176e-05, + "loss": 0.8389, + "step": 3530 + }, + { + "epoch": 0.10821993379919088, + "grad_norm": 1.6823554194758896, + "learning_rate": 1.968085545640923e-05, + "loss": 0.9331, + "step": 3531 + }, + { + "epoch": 0.10825058232193208, + "grad_norm": 1.4410722161403329, + "learning_rate": 1.968060663437834e-05, + "loss": 0.7187, + "step": 3532 + }, + { + "epoch": 0.10828123084467328, + "grad_norm": 1.8076440669378389, + "learning_rate": 1.9680357716961952e-05, + "loss": 0.8087, + "step": 3533 + }, + { + "epoch": 0.10831187936741449, + "grad_norm": 1.9276166585089618, + "learning_rate": 1.9680108704162525e-05, + "loss": 0.9631, + "step": 3534 + }, + { + "epoch": 0.1083425278901557, + "grad_norm": 1.5260027103574996, + "learning_rate": 1.967985959598251e-05, + "loss": 0.8559, + "step": 3535 + }, + { + "epoch": 0.1083731764128969, + "grad_norm": 1.6877050172964612, + "learning_rate": 1.9679610392424365e-05, + "loss": 0.8435, + "step": 3536 + }, + { + "epoch": 0.1084038249356381, + "grad_norm": 1.6521863100329108, + "learning_rate": 1.9679361093490536e-05, + "loss": 0.9841, + "step": 3537 + }, + { + "epoch": 0.10843447345837931, + "grad_norm": 1.4605309428902495, + "learning_rate": 1.9679111699183488e-05, + "loss": 0.805, + "step": 3538 + }, + { + "epoch": 0.10846512198112052, + "grad_norm": 1.7380426463178933, + "learning_rate": 1.967886220950568e-05, + "loss": 0.8499, + "step": 3539 + }, + { + "epoch": 0.10849577050386171, + "grad_norm": 1.6060901840852926, + "learning_rate": 1.9678612624459558e-05, + "loss": 0.8619, + "step": 3540 + }, + { + "epoch": 0.10852641902660291, + "grad_norm": 1.5913090196854132, + "learning_rate": 1.9678362944047595e-05, + "loss": 0.7617, + "step": 3541 + }, + { + "epoch": 0.10855706754934412, + "grad_norm": 1.7659888118235896, + "learning_rate": 1.9678113168272246e-05, + "loss": 0.758, + "step": 3542 + }, + { + "epoch": 0.10858771607208532, + "grad_norm": 1.6366179970613892, + "learning_rate": 1.9677863297135972e-05, + "loss": 0.8642, + "step": 3543 + }, + { + "epoch": 0.10861836459482653, + "grad_norm": 1.7022329057566175, + "learning_rate": 1.967761333064123e-05, + "loss": 0.9278, + "step": 3544 + }, + { + "epoch": 0.10864901311756774, + "grad_norm": 1.7854190160754042, + "learning_rate": 1.967736326879049e-05, + "loss": 0.7548, + "step": 3545 + }, + { + "epoch": 0.10867966164030894, + "grad_norm": 1.8105673901310293, + "learning_rate": 1.9677113111586217e-05, + "loss": 0.7159, + "step": 3546 + }, + { + "epoch": 0.10871031016305015, + "grad_norm": 1.8233162537857817, + "learning_rate": 1.9676862859030868e-05, + "loss": 0.8013, + "step": 3547 + }, + { + "epoch": 0.10874095868579134, + "grad_norm": 1.9173161721950247, + "learning_rate": 1.9676612511126918e-05, + "loss": 0.9252, + "step": 3548 + }, + { + "epoch": 0.10877160720853254, + "grad_norm": 1.7986113128976617, + "learning_rate": 1.9676362067876826e-05, + "loss": 0.7764, + "step": 3549 + }, + { + "epoch": 0.10880225573127375, + "grad_norm": 1.6216576798868017, + "learning_rate": 1.9676111529283065e-05, + "loss": 0.8319, + "step": 3550 + }, + { + "epoch": 0.10883290425401496, + "grad_norm": 0.901949107705063, + "learning_rate": 1.96758608953481e-05, + "loss": 0.66, + "step": 3551 + }, + { + "epoch": 0.10886355277675616, + "grad_norm": 1.5798999867798502, + "learning_rate": 1.9675610166074398e-05, + "loss": 0.8684, + "step": 3552 + }, + { + "epoch": 0.10889420129949737, + "grad_norm": 1.6860580992607648, + "learning_rate": 1.967535934146444e-05, + "loss": 0.8628, + "step": 3553 + }, + { + "epoch": 0.10892484982223857, + "grad_norm": 0.7472036054960509, + "learning_rate": 1.9675108421520687e-05, + "loss": 0.6712, + "step": 3554 + }, + { + "epoch": 0.10895549834497978, + "grad_norm": 1.6995279182353067, + "learning_rate": 1.9674857406245613e-05, + "loss": 0.8275, + "step": 3555 + }, + { + "epoch": 0.10898614686772097, + "grad_norm": 1.6374073219831247, + "learning_rate": 1.9674606295641698e-05, + "loss": 0.8268, + "step": 3556 + }, + { + "epoch": 0.10901679539046218, + "grad_norm": 1.7643354296974934, + "learning_rate": 1.967435508971141e-05, + "loss": 0.7733, + "step": 3557 + }, + { + "epoch": 0.10904744391320338, + "grad_norm": 1.7069043782162772, + "learning_rate": 1.9674103788457228e-05, + "loss": 0.9155, + "step": 3558 + }, + { + "epoch": 0.10907809243594459, + "grad_norm": 0.884442105265182, + "learning_rate": 1.9673852391881623e-05, + "loss": 0.6753, + "step": 3559 + }, + { + "epoch": 0.1091087409586858, + "grad_norm": 1.6186599129296073, + "learning_rate": 1.9673600899987076e-05, + "loss": 0.8198, + "step": 3560 + }, + { + "epoch": 0.109139389481427, + "grad_norm": 1.757805271087934, + "learning_rate": 1.9673349312776065e-05, + "loss": 0.8502, + "step": 3561 + }, + { + "epoch": 0.1091700380041682, + "grad_norm": 0.7351532431315783, + "learning_rate": 1.967309763025107e-05, + "loss": 0.6812, + "step": 3562 + }, + { + "epoch": 0.1092006865269094, + "grad_norm": 1.6650105608111108, + "learning_rate": 1.9672845852414567e-05, + "loss": 0.657, + "step": 3563 + }, + { + "epoch": 0.1092313350496506, + "grad_norm": 1.817065296841196, + "learning_rate": 1.967259397926904e-05, + "loss": 0.796, + "step": 3564 + }, + { + "epoch": 0.10926198357239181, + "grad_norm": 1.7651052866035004, + "learning_rate": 1.9672342010816966e-05, + "loss": 0.7145, + "step": 3565 + }, + { + "epoch": 0.10929263209513301, + "grad_norm": 1.7250929005814704, + "learning_rate": 1.9672089947060834e-05, + "loss": 0.8205, + "step": 3566 + }, + { + "epoch": 0.10932328061787422, + "grad_norm": 1.7001466119449398, + "learning_rate": 1.9671837788003128e-05, + "loss": 0.856, + "step": 3567 + }, + { + "epoch": 0.10935392914061542, + "grad_norm": 0.8209702857479382, + "learning_rate": 1.9671585533646324e-05, + "loss": 0.6677, + "step": 3568 + }, + { + "epoch": 0.10938457766335663, + "grad_norm": 1.8189505048440004, + "learning_rate": 1.9671333183992916e-05, + "loss": 0.8403, + "step": 3569 + }, + { + "epoch": 0.10941522618609784, + "grad_norm": 1.5444271628595296, + "learning_rate": 1.967108073904539e-05, + "loss": 0.8235, + "step": 3570 + }, + { + "epoch": 0.10944587470883903, + "grad_norm": 1.629737001140791, + "learning_rate": 1.9670828198806227e-05, + "loss": 0.8365, + "step": 3571 + }, + { + "epoch": 0.10947652323158023, + "grad_norm": 1.5936670269635733, + "learning_rate": 1.967057556327792e-05, + "loss": 0.8526, + "step": 3572 + }, + { + "epoch": 0.10950717175432144, + "grad_norm": 1.8327824933635266, + "learning_rate": 1.967032283246296e-05, + "loss": 0.8521, + "step": 3573 + }, + { + "epoch": 0.10953782027706264, + "grad_norm": 0.7781900459616418, + "learning_rate": 1.9670070006363834e-05, + "loss": 0.6738, + "step": 3574 + }, + { + "epoch": 0.10956846879980385, + "grad_norm": 1.7236729719344883, + "learning_rate": 1.9669817084983035e-05, + "loss": 0.7809, + "step": 3575 + }, + { + "epoch": 0.10959911732254506, + "grad_norm": 1.6269597421785034, + "learning_rate": 1.966956406832305e-05, + "loss": 0.8662, + "step": 3576 + }, + { + "epoch": 0.10962976584528626, + "grad_norm": 1.6221111998409923, + "learning_rate": 1.9669310956386377e-05, + "loss": 0.8113, + "step": 3577 + }, + { + "epoch": 0.10966041436802747, + "grad_norm": 1.5825077950342474, + "learning_rate": 1.9669057749175512e-05, + "loss": 0.8018, + "step": 3578 + }, + { + "epoch": 0.10969106289076866, + "grad_norm": 1.8356226828199527, + "learning_rate": 1.966880444669295e-05, + "loss": 0.8397, + "step": 3579 + }, + { + "epoch": 0.10972171141350986, + "grad_norm": 1.8548849145715531, + "learning_rate": 1.9668551048941177e-05, + "loss": 1.0202, + "step": 3580 + }, + { + "epoch": 0.10975235993625107, + "grad_norm": 0.7746758339122676, + "learning_rate": 1.96682975559227e-05, + "loss": 0.6411, + "step": 3581 + }, + { + "epoch": 0.10978300845899228, + "grad_norm": 0.8067861059061875, + "learning_rate": 1.9668043967640013e-05, + "loss": 0.6819, + "step": 3582 + }, + { + "epoch": 0.10981365698173348, + "grad_norm": 1.7006415044145342, + "learning_rate": 1.9667790284095617e-05, + "loss": 0.8353, + "step": 3583 + }, + { + "epoch": 0.10984430550447469, + "grad_norm": 1.7383169993795489, + "learning_rate": 1.9667536505292005e-05, + "loss": 0.9257, + "step": 3584 + }, + { + "epoch": 0.1098749540272159, + "grad_norm": 1.8625297188052006, + "learning_rate": 1.9667282631231688e-05, + "loss": 0.8494, + "step": 3585 + }, + { + "epoch": 0.1099056025499571, + "grad_norm": 1.7184164783551892, + "learning_rate": 1.966702866191716e-05, + "loss": 0.8008, + "step": 3586 + }, + { + "epoch": 0.10993625107269829, + "grad_norm": 1.5983767212284825, + "learning_rate": 1.9666774597350923e-05, + "loss": 0.7808, + "step": 3587 + }, + { + "epoch": 0.1099668995954395, + "grad_norm": 1.7361079896890168, + "learning_rate": 1.9666520437535482e-05, + "loss": 0.7794, + "step": 3588 + }, + { + "epoch": 0.1099975481181807, + "grad_norm": 1.90290711406948, + "learning_rate": 1.9666266182473345e-05, + "loss": 0.8299, + "step": 3589 + }, + { + "epoch": 0.11002819664092191, + "grad_norm": 1.6193578888423654, + "learning_rate": 1.9666011832167012e-05, + "loss": 0.8193, + "step": 3590 + }, + { + "epoch": 0.11005884516366311, + "grad_norm": 1.7298731163209982, + "learning_rate": 1.966575738661899e-05, + "loss": 0.8808, + "step": 3591 + }, + { + "epoch": 0.11008949368640432, + "grad_norm": 1.6616630496888118, + "learning_rate": 1.9665502845831792e-05, + "loss": 0.7923, + "step": 3592 + }, + { + "epoch": 0.11012014220914552, + "grad_norm": 1.9658564173850688, + "learning_rate": 1.966524820980792e-05, + "loss": 0.9236, + "step": 3593 + }, + { + "epoch": 0.11015079073188672, + "grad_norm": 1.5673595185054885, + "learning_rate": 1.9664993478549885e-05, + "loss": 0.7515, + "step": 3594 + }, + { + "epoch": 0.11018143925462792, + "grad_norm": 1.6639877135134318, + "learning_rate": 1.9664738652060192e-05, + "loss": 0.7743, + "step": 3595 + }, + { + "epoch": 0.11021208777736913, + "grad_norm": 1.5944812695058317, + "learning_rate": 1.966448373034136e-05, + "loss": 0.7545, + "step": 3596 + }, + { + "epoch": 0.11024273630011033, + "grad_norm": 1.789407408551719, + "learning_rate": 1.9664228713395896e-05, + "loss": 0.7556, + "step": 3597 + }, + { + "epoch": 0.11027338482285154, + "grad_norm": 1.0109821203171088, + "learning_rate": 1.9663973601226313e-05, + "loss": 0.6614, + "step": 3598 + }, + { + "epoch": 0.11030403334559274, + "grad_norm": 1.665060577438612, + "learning_rate": 1.9663718393835127e-05, + "loss": 0.8293, + "step": 3599 + }, + { + "epoch": 0.11033468186833395, + "grad_norm": 1.6371479976522658, + "learning_rate": 1.9663463091224847e-05, + "loss": 0.7646, + "step": 3600 + }, + { + "epoch": 0.11036533039107516, + "grad_norm": 1.7624674110444354, + "learning_rate": 1.9663207693397997e-05, + "loss": 0.8985, + "step": 3601 + }, + { + "epoch": 0.11039597891381635, + "grad_norm": 1.682901691211659, + "learning_rate": 1.9662952200357085e-05, + "loss": 0.8252, + "step": 3602 + }, + { + "epoch": 0.11042662743655755, + "grad_norm": 1.7633141495168776, + "learning_rate": 1.966269661210464e-05, + "loss": 0.8101, + "step": 3603 + }, + { + "epoch": 0.11045727595929876, + "grad_norm": 1.7817422165767, + "learning_rate": 1.966244092864317e-05, + "loss": 0.7857, + "step": 3604 + }, + { + "epoch": 0.11048792448203996, + "grad_norm": 1.4094302683847098, + "learning_rate": 1.966218514997519e-05, + "loss": 0.7364, + "step": 3605 + }, + { + "epoch": 0.11051857300478117, + "grad_norm": 1.9984854839782653, + "learning_rate": 1.9661929276103235e-05, + "loss": 0.7958, + "step": 3606 + }, + { + "epoch": 0.11054922152752238, + "grad_norm": 1.4748611600718646, + "learning_rate": 1.9661673307029817e-05, + "loss": 0.7903, + "step": 3607 + }, + { + "epoch": 0.11057987005026358, + "grad_norm": 1.656698818041676, + "learning_rate": 1.9661417242757462e-05, + "loss": 0.8355, + "step": 3608 + }, + { + "epoch": 0.11061051857300479, + "grad_norm": 1.5486201429673476, + "learning_rate": 1.9661161083288686e-05, + "loss": 0.8945, + "step": 3609 + }, + { + "epoch": 0.11064116709574598, + "grad_norm": 0.9044483384805351, + "learning_rate": 1.9660904828626025e-05, + "loss": 0.6955, + "step": 3610 + }, + { + "epoch": 0.11067181561848718, + "grad_norm": 1.6155707380420432, + "learning_rate": 1.966064847877199e-05, + "loss": 0.8526, + "step": 3611 + }, + { + "epoch": 0.11070246414122839, + "grad_norm": 1.6172170707132525, + "learning_rate": 1.9660392033729117e-05, + "loss": 0.9409, + "step": 3612 + }, + { + "epoch": 0.1107331126639696, + "grad_norm": 1.6739164635126658, + "learning_rate": 1.966013549349993e-05, + "loss": 0.8844, + "step": 3613 + }, + { + "epoch": 0.1107637611867108, + "grad_norm": 1.6621387394942688, + "learning_rate": 1.9659878858086955e-05, + "loss": 0.8553, + "step": 3614 + }, + { + "epoch": 0.11079440970945201, + "grad_norm": 1.6948067130668092, + "learning_rate": 1.965962212749272e-05, + "loss": 0.802, + "step": 3615 + }, + { + "epoch": 0.11082505823219321, + "grad_norm": 1.7567507190318103, + "learning_rate": 1.965936530171976e-05, + "loss": 0.8614, + "step": 3616 + }, + { + "epoch": 0.11085570675493442, + "grad_norm": 1.7070686750224568, + "learning_rate": 1.96591083807706e-05, + "loss": 0.855, + "step": 3617 + }, + { + "epoch": 0.11088635527767561, + "grad_norm": 1.6035332027458848, + "learning_rate": 1.9658851364647777e-05, + "loss": 0.8888, + "step": 3618 + }, + { + "epoch": 0.11091700380041682, + "grad_norm": 1.523089489349907, + "learning_rate": 1.9658594253353818e-05, + "loss": 0.7745, + "step": 3619 + }, + { + "epoch": 0.11094765232315802, + "grad_norm": 1.5418294545451854, + "learning_rate": 1.965833704689126e-05, + "loss": 0.7613, + "step": 3620 + }, + { + "epoch": 0.11097830084589923, + "grad_norm": 1.5918558250296866, + "learning_rate": 1.9658079745262633e-05, + "loss": 0.8435, + "step": 3621 + }, + { + "epoch": 0.11100894936864043, + "grad_norm": 0.8348465062139677, + "learning_rate": 1.9657822348470476e-05, + "loss": 0.6488, + "step": 3622 + }, + { + "epoch": 0.11103959789138164, + "grad_norm": 1.5109664447924296, + "learning_rate": 1.9657564856517325e-05, + "loss": 0.8771, + "step": 3623 + }, + { + "epoch": 0.11107024641412284, + "grad_norm": 1.7929425508164163, + "learning_rate": 1.9657307269405715e-05, + "loss": 0.8485, + "step": 3624 + }, + { + "epoch": 0.11110089493686404, + "grad_norm": 1.6131627904598649, + "learning_rate": 1.965704958713819e-05, + "loss": 0.8345, + "step": 3625 + }, + { + "epoch": 0.11113154345960524, + "grad_norm": 1.716052164847165, + "learning_rate": 1.965679180971728e-05, + "loss": 0.8043, + "step": 3626 + }, + { + "epoch": 0.11116219198234645, + "grad_norm": 1.713791399415057, + "learning_rate": 1.965653393714553e-05, + "loss": 0.8669, + "step": 3627 + }, + { + "epoch": 0.11119284050508765, + "grad_norm": 1.8457172317210762, + "learning_rate": 1.9656275969425483e-05, + "loss": 0.8072, + "step": 3628 + }, + { + "epoch": 0.11122348902782886, + "grad_norm": 1.5834155480983532, + "learning_rate": 1.9656017906559678e-05, + "loss": 0.7426, + "step": 3629 + }, + { + "epoch": 0.11125413755057006, + "grad_norm": 1.697382895867819, + "learning_rate": 1.9655759748550656e-05, + "loss": 0.9017, + "step": 3630 + }, + { + "epoch": 0.11128478607331127, + "grad_norm": 1.7929132528383291, + "learning_rate": 1.9655501495400963e-05, + "loss": 0.8734, + "step": 3631 + }, + { + "epoch": 0.11131543459605248, + "grad_norm": 0.7838211702588163, + "learning_rate": 1.9655243147113145e-05, + "loss": 0.702, + "step": 3632 + }, + { + "epoch": 0.11134608311879367, + "grad_norm": 1.684130908862264, + "learning_rate": 1.9654984703689745e-05, + "loss": 0.875, + "step": 3633 + }, + { + "epoch": 0.11137673164153487, + "grad_norm": 0.753945931129832, + "learning_rate": 1.965472616513331e-05, + "loss": 0.6502, + "step": 3634 + }, + { + "epoch": 0.11140738016427608, + "grad_norm": 1.3858543187891803, + "learning_rate": 1.9654467531446387e-05, + "loss": 0.7999, + "step": 3635 + }, + { + "epoch": 0.11143802868701728, + "grad_norm": 1.5133368418708613, + "learning_rate": 1.965420880263153e-05, + "loss": 0.7955, + "step": 3636 + }, + { + "epoch": 0.11146867720975849, + "grad_norm": 1.7001669961196968, + "learning_rate": 1.965394997869128e-05, + "loss": 0.7671, + "step": 3637 + }, + { + "epoch": 0.1114993257324997, + "grad_norm": 1.5918396455231487, + "learning_rate": 1.9653691059628185e-05, + "loss": 0.842, + "step": 3638 + }, + { + "epoch": 0.1115299742552409, + "grad_norm": 1.6265014312323554, + "learning_rate": 1.965343204544481e-05, + "loss": 0.8133, + "step": 3639 + }, + { + "epoch": 0.11156062277798211, + "grad_norm": 1.8821736411009833, + "learning_rate": 1.9653172936143697e-05, + "loss": 0.8963, + "step": 3640 + }, + { + "epoch": 0.1115912713007233, + "grad_norm": 1.5191225506957002, + "learning_rate": 1.9652913731727397e-05, + "loss": 0.9092, + "step": 3641 + }, + { + "epoch": 0.1116219198234645, + "grad_norm": 1.5544573052794348, + "learning_rate": 1.9652654432198473e-05, + "loss": 0.7804, + "step": 3642 + }, + { + "epoch": 0.11165256834620571, + "grad_norm": 1.8509589304390177, + "learning_rate": 1.9652395037559475e-05, + "loss": 0.9953, + "step": 3643 + }, + { + "epoch": 0.11168321686894692, + "grad_norm": 1.899065384202616, + "learning_rate": 1.9652135547812958e-05, + "loss": 0.9393, + "step": 3644 + }, + { + "epoch": 0.11171386539168812, + "grad_norm": 1.6278295925091817, + "learning_rate": 1.965187596296148e-05, + "loss": 0.8548, + "step": 3645 + }, + { + "epoch": 0.11174451391442933, + "grad_norm": 1.5253683618274165, + "learning_rate": 1.9651616283007596e-05, + "loss": 0.8288, + "step": 3646 + }, + { + "epoch": 0.11177516243717053, + "grad_norm": 1.8274569707909598, + "learning_rate": 1.965135650795387e-05, + "loss": 0.8318, + "step": 3647 + }, + { + "epoch": 0.11180581095991174, + "grad_norm": 1.4569317560703898, + "learning_rate": 1.965109663780286e-05, + "loss": 0.801, + "step": 3648 + }, + { + "epoch": 0.11183645948265293, + "grad_norm": 1.6120676118378665, + "learning_rate": 1.9650836672557127e-05, + "loss": 0.7629, + "step": 3649 + }, + { + "epoch": 0.11186710800539414, + "grad_norm": 1.784363119928321, + "learning_rate": 1.9650576612219225e-05, + "loss": 0.8051, + "step": 3650 + }, + { + "epoch": 0.11189775652813534, + "grad_norm": 1.8721054007428655, + "learning_rate": 1.9650316456791727e-05, + "loss": 0.9331, + "step": 3651 + }, + { + "epoch": 0.11192840505087655, + "grad_norm": 1.7598065986694953, + "learning_rate": 1.965005620627719e-05, + "loss": 0.8895, + "step": 3652 + }, + { + "epoch": 0.11195905357361775, + "grad_norm": 1.711995907956079, + "learning_rate": 1.964979586067818e-05, + "loss": 0.7907, + "step": 3653 + }, + { + "epoch": 0.11198970209635896, + "grad_norm": 1.7847258566954483, + "learning_rate": 1.9649535419997263e-05, + "loss": 0.7282, + "step": 3654 + }, + { + "epoch": 0.11202035061910016, + "grad_norm": 1.6575414559852095, + "learning_rate": 1.9649274884237007e-05, + "loss": 0.8255, + "step": 3655 + }, + { + "epoch": 0.11205099914184136, + "grad_norm": 1.4400601458183313, + "learning_rate": 1.964901425339997e-05, + "loss": 0.7573, + "step": 3656 + }, + { + "epoch": 0.11208164766458256, + "grad_norm": 1.7925186436622804, + "learning_rate": 1.9648753527488733e-05, + "loss": 0.8184, + "step": 3657 + }, + { + "epoch": 0.11211229618732377, + "grad_norm": 1.9463273151766516, + "learning_rate": 1.9648492706505856e-05, + "loss": 0.8597, + "step": 3658 + }, + { + "epoch": 0.11214294471006497, + "grad_norm": 1.7622923768513485, + "learning_rate": 1.9648231790453912e-05, + "loss": 0.7854, + "step": 3659 + }, + { + "epoch": 0.11217359323280618, + "grad_norm": 0.9390684073956036, + "learning_rate": 1.9647970779335472e-05, + "loss": 0.6635, + "step": 3660 + }, + { + "epoch": 0.11220424175554738, + "grad_norm": 1.9846691379302017, + "learning_rate": 1.964770967315311e-05, + "loss": 0.8201, + "step": 3661 + }, + { + "epoch": 0.11223489027828859, + "grad_norm": 1.71818162679185, + "learning_rate": 1.9647448471909393e-05, + "loss": 0.8667, + "step": 3662 + }, + { + "epoch": 0.1122655388010298, + "grad_norm": 1.6432707845089058, + "learning_rate": 1.9647187175606896e-05, + "loss": 0.8675, + "step": 3663 + }, + { + "epoch": 0.11229618732377099, + "grad_norm": 1.6983816508173697, + "learning_rate": 1.9646925784248197e-05, + "loss": 0.8929, + "step": 3664 + }, + { + "epoch": 0.1123268358465122, + "grad_norm": 1.6438322372473406, + "learning_rate": 1.9646664297835874e-05, + "loss": 0.8425, + "step": 3665 + }, + { + "epoch": 0.1123574843692534, + "grad_norm": 1.6473844742295307, + "learning_rate": 1.9646402716372495e-05, + "loss": 0.8165, + "step": 3666 + }, + { + "epoch": 0.1123881328919946, + "grad_norm": 1.8545306421392205, + "learning_rate": 1.9646141039860642e-05, + "loss": 0.8803, + "step": 3667 + }, + { + "epoch": 0.11241878141473581, + "grad_norm": 1.613606089509567, + "learning_rate": 1.9645879268302894e-05, + "loss": 0.8386, + "step": 3668 + }, + { + "epoch": 0.11244942993747702, + "grad_norm": 1.920323243657795, + "learning_rate": 1.9645617401701828e-05, + "loss": 0.7817, + "step": 3669 + }, + { + "epoch": 0.11248007846021822, + "grad_norm": 1.7887115113515772, + "learning_rate": 1.964535544006003e-05, + "loss": 0.7368, + "step": 3670 + }, + { + "epoch": 0.11251072698295943, + "grad_norm": 0.8889549354162244, + "learning_rate": 1.9645093383380075e-05, + "loss": 0.6425, + "step": 3671 + }, + { + "epoch": 0.11254137550570062, + "grad_norm": 1.5116879185170364, + "learning_rate": 1.9644831231664545e-05, + "loss": 0.7857, + "step": 3672 + }, + { + "epoch": 0.11257202402844182, + "grad_norm": 2.082676840638323, + "learning_rate": 1.9644568984916027e-05, + "loss": 0.8566, + "step": 3673 + }, + { + "epoch": 0.11260267255118303, + "grad_norm": 1.6933403647667091, + "learning_rate": 1.9644306643137105e-05, + "loss": 0.8392, + "step": 3674 + }, + { + "epoch": 0.11263332107392424, + "grad_norm": 1.886748067018734, + "learning_rate": 1.9644044206330355e-05, + "loss": 0.9016, + "step": 3675 + }, + { + "epoch": 0.11266396959666544, + "grad_norm": 1.8377107202104783, + "learning_rate": 1.9643781674498378e-05, + "loss": 0.871, + "step": 3676 + }, + { + "epoch": 0.11269461811940665, + "grad_norm": 1.6655038796838408, + "learning_rate": 1.9643519047643748e-05, + "loss": 0.789, + "step": 3677 + }, + { + "epoch": 0.11272526664214785, + "grad_norm": 1.533963559252832, + "learning_rate": 1.9643256325769063e-05, + "loss": 0.7701, + "step": 3678 + }, + { + "epoch": 0.11275591516488906, + "grad_norm": 1.7059122952394425, + "learning_rate": 1.96429935088769e-05, + "loss": 0.8849, + "step": 3679 + }, + { + "epoch": 0.11278656368763025, + "grad_norm": 0.748803638322228, + "learning_rate": 1.9642730596969857e-05, + "loss": 0.6419, + "step": 3680 + }, + { + "epoch": 0.11281721221037146, + "grad_norm": 1.6261928845973983, + "learning_rate": 1.964246759005052e-05, + "loss": 0.8416, + "step": 3681 + }, + { + "epoch": 0.11284786073311266, + "grad_norm": 1.767188504322435, + "learning_rate": 1.9642204488121486e-05, + "loss": 0.8481, + "step": 3682 + }, + { + "epoch": 0.11287850925585387, + "grad_norm": 1.6060071046800306, + "learning_rate": 1.964194129118534e-05, + "loss": 0.7437, + "step": 3683 + }, + { + "epoch": 0.11290915777859507, + "grad_norm": 1.5146749093602943, + "learning_rate": 1.9641677999244682e-05, + "loss": 0.8175, + "step": 3684 + }, + { + "epoch": 0.11293980630133628, + "grad_norm": 1.8052988693821568, + "learning_rate": 1.964141461230211e-05, + "loss": 0.8983, + "step": 3685 + }, + { + "epoch": 0.11297045482407748, + "grad_norm": 1.7150954427344351, + "learning_rate": 1.9641151130360204e-05, + "loss": 0.8866, + "step": 3686 + }, + { + "epoch": 0.11300110334681868, + "grad_norm": 1.8023966796445743, + "learning_rate": 1.964088755342157e-05, + "loss": 0.9825, + "step": 3687 + }, + { + "epoch": 0.11303175186955988, + "grad_norm": 1.6159138625104823, + "learning_rate": 1.964062388148881e-05, + "loss": 0.8403, + "step": 3688 + }, + { + "epoch": 0.11306240039230109, + "grad_norm": 1.64463618227209, + "learning_rate": 1.9640360114564513e-05, + "loss": 0.7715, + "step": 3689 + }, + { + "epoch": 0.1130930489150423, + "grad_norm": 1.7527444688537437, + "learning_rate": 1.964009625265128e-05, + "loss": 0.8225, + "step": 3690 + }, + { + "epoch": 0.1131236974377835, + "grad_norm": 1.6142022720306293, + "learning_rate": 1.9639832295751713e-05, + "loss": 0.8971, + "step": 3691 + }, + { + "epoch": 0.1131543459605247, + "grad_norm": 1.7854654627770752, + "learning_rate": 1.9639568243868412e-05, + "loss": 1.0097, + "step": 3692 + }, + { + "epoch": 0.11318499448326591, + "grad_norm": 1.5777771633279059, + "learning_rate": 1.9639304097003976e-05, + "loss": 0.8019, + "step": 3693 + }, + { + "epoch": 0.11321564300600712, + "grad_norm": 1.7064472062102594, + "learning_rate": 1.9639039855161014e-05, + "loss": 0.8284, + "step": 3694 + }, + { + "epoch": 0.11324629152874831, + "grad_norm": 1.7258645417576264, + "learning_rate": 1.9638775518342124e-05, + "loss": 0.8033, + "step": 3695 + }, + { + "epoch": 0.11327694005148951, + "grad_norm": 1.74472981469425, + "learning_rate": 1.9638511086549913e-05, + "loss": 0.7989, + "step": 3696 + }, + { + "epoch": 0.11330758857423072, + "grad_norm": 0.8168343342086212, + "learning_rate": 1.963824655978699e-05, + "loss": 0.6555, + "step": 3697 + }, + { + "epoch": 0.11333823709697192, + "grad_norm": 1.595858202325842, + "learning_rate": 1.9637981938055953e-05, + "loss": 0.8331, + "step": 3698 + }, + { + "epoch": 0.11336888561971313, + "grad_norm": 1.5859067112182132, + "learning_rate": 1.9637717221359413e-05, + "loss": 0.8414, + "step": 3699 + }, + { + "epoch": 0.11339953414245434, + "grad_norm": 1.5884182355902299, + "learning_rate": 1.9637452409699982e-05, + "loss": 0.9478, + "step": 3700 + }, + { + "epoch": 0.11343018266519554, + "grad_norm": 0.7218859672040947, + "learning_rate": 1.9637187503080267e-05, + "loss": 0.6567, + "step": 3701 + }, + { + "epoch": 0.11346083118793675, + "grad_norm": 0.7512034289848577, + "learning_rate": 1.9636922501502877e-05, + "loss": 0.6639, + "step": 3702 + }, + { + "epoch": 0.11349147971067794, + "grad_norm": 1.7192200867078167, + "learning_rate": 1.9636657404970423e-05, + "loss": 0.9531, + "step": 3703 + }, + { + "epoch": 0.11352212823341915, + "grad_norm": 0.7033316283456735, + "learning_rate": 1.963639221348552e-05, + "loss": 0.6545, + "step": 3704 + }, + { + "epoch": 0.11355277675616035, + "grad_norm": 1.69300476949227, + "learning_rate": 1.963612692705078e-05, + "loss": 0.9174, + "step": 3705 + }, + { + "epoch": 0.11358342527890156, + "grad_norm": 0.7225477572303267, + "learning_rate": 1.9635861545668812e-05, + "loss": 0.6414, + "step": 3706 + }, + { + "epoch": 0.11361407380164276, + "grad_norm": 1.5749579480080056, + "learning_rate": 1.9635596069342235e-05, + "loss": 0.9162, + "step": 3707 + }, + { + "epoch": 0.11364472232438397, + "grad_norm": 1.7971077804961408, + "learning_rate": 1.9635330498073666e-05, + "loss": 0.9304, + "step": 3708 + }, + { + "epoch": 0.11367537084712517, + "grad_norm": 2.7557682456638815, + "learning_rate": 1.963506483186572e-05, + "loss": 0.7247, + "step": 3709 + }, + { + "epoch": 0.11370601936986638, + "grad_norm": 0.751326615062294, + "learning_rate": 1.9634799070721015e-05, + "loss": 0.6281, + "step": 3710 + }, + { + "epoch": 0.11373666789260757, + "grad_norm": 1.7578001267363956, + "learning_rate": 1.963453321464217e-05, + "loss": 0.9467, + "step": 3711 + }, + { + "epoch": 0.11376731641534878, + "grad_norm": 0.7726418921246222, + "learning_rate": 1.9634267263631804e-05, + "loss": 0.6617, + "step": 3712 + }, + { + "epoch": 0.11379796493808998, + "grad_norm": 0.7244099757470486, + "learning_rate": 1.9634001217692538e-05, + "loss": 0.6486, + "step": 3713 + }, + { + "epoch": 0.11382861346083119, + "grad_norm": 0.7427105228222164, + "learning_rate": 1.9633735076826993e-05, + "loss": 0.6288, + "step": 3714 + }, + { + "epoch": 0.1138592619835724, + "grad_norm": 1.6275951354062805, + "learning_rate": 1.963346884103779e-05, + "loss": 0.8735, + "step": 3715 + }, + { + "epoch": 0.1138899105063136, + "grad_norm": 1.6346978181871117, + "learning_rate": 1.9633202510327555e-05, + "loss": 0.9493, + "step": 3716 + }, + { + "epoch": 0.1139205590290548, + "grad_norm": 1.7025402009296189, + "learning_rate": 1.9632936084698912e-05, + "loss": 0.9311, + "step": 3717 + }, + { + "epoch": 0.113951207551796, + "grad_norm": 1.6072969627041622, + "learning_rate": 1.963266956415448e-05, + "loss": 0.8463, + "step": 3718 + }, + { + "epoch": 0.1139818560745372, + "grad_norm": 1.6666380551430082, + "learning_rate": 1.9632402948696895e-05, + "loss": 0.8137, + "step": 3719 + }, + { + "epoch": 0.11401250459727841, + "grad_norm": 1.6472385487089816, + "learning_rate": 1.963213623832878e-05, + "loss": 0.7742, + "step": 3720 + }, + { + "epoch": 0.11404315312001961, + "grad_norm": 1.5570596615663677, + "learning_rate": 1.9631869433052756e-05, + "loss": 0.814, + "step": 3721 + }, + { + "epoch": 0.11407380164276082, + "grad_norm": 1.6341904778620953, + "learning_rate": 1.9631602532871462e-05, + "loss": 0.8757, + "step": 3722 + }, + { + "epoch": 0.11410445016550202, + "grad_norm": 1.7125295911696807, + "learning_rate": 1.9631335537787526e-05, + "loss": 0.9512, + "step": 3723 + }, + { + "epoch": 0.11413509868824323, + "grad_norm": 1.6553364484510484, + "learning_rate": 1.9631068447803576e-05, + "loss": 0.833, + "step": 3724 + }, + { + "epoch": 0.11416574721098444, + "grad_norm": 1.6348533379212336, + "learning_rate": 1.963080126292224e-05, + "loss": 0.8188, + "step": 3725 + }, + { + "epoch": 0.11419639573372563, + "grad_norm": 1.7966627629137502, + "learning_rate": 1.963053398314616e-05, + "loss": 0.7938, + "step": 3726 + }, + { + "epoch": 0.11422704425646683, + "grad_norm": 1.5723098239608213, + "learning_rate": 1.9630266608477964e-05, + "loss": 0.6804, + "step": 3727 + }, + { + "epoch": 0.11425769277920804, + "grad_norm": 1.8851244289276554, + "learning_rate": 1.9629999138920285e-05, + "loss": 0.8231, + "step": 3728 + }, + { + "epoch": 0.11428834130194925, + "grad_norm": 1.6443458230606407, + "learning_rate": 1.962973157447576e-05, + "loss": 0.8567, + "step": 3729 + }, + { + "epoch": 0.11431898982469045, + "grad_norm": 1.522462766428482, + "learning_rate": 1.962946391514703e-05, + "loss": 0.7756, + "step": 3730 + }, + { + "epoch": 0.11434963834743166, + "grad_norm": 1.6205216105539915, + "learning_rate": 1.962919616093672e-05, + "loss": 0.8386, + "step": 3731 + }, + { + "epoch": 0.11438028687017286, + "grad_norm": 1.1283237572771636, + "learning_rate": 1.9628928311847483e-05, + "loss": 0.6808, + "step": 3732 + }, + { + "epoch": 0.11441093539291407, + "grad_norm": 1.5776413725395295, + "learning_rate": 1.962866036788195e-05, + "loss": 0.7695, + "step": 3733 + }, + { + "epoch": 0.11444158391565526, + "grad_norm": 1.7576297367813167, + "learning_rate": 1.9628392329042767e-05, + "loss": 0.8435, + "step": 3734 + }, + { + "epoch": 0.11447223243839647, + "grad_norm": 1.8763349555631206, + "learning_rate": 1.962812419533257e-05, + "loss": 0.801, + "step": 3735 + }, + { + "epoch": 0.11450288096113767, + "grad_norm": 0.7490499455361379, + "learning_rate": 1.9627855966753996e-05, + "loss": 0.6279, + "step": 3736 + }, + { + "epoch": 0.11453352948387888, + "grad_norm": 1.7274333443755632, + "learning_rate": 1.9627587643309698e-05, + "loss": 0.9743, + "step": 3737 + }, + { + "epoch": 0.11456417800662008, + "grad_norm": 1.5135727473953666, + "learning_rate": 1.962731922500231e-05, + "loss": 0.876, + "step": 3738 + }, + { + "epoch": 0.11459482652936129, + "grad_norm": 1.6872394369509005, + "learning_rate": 1.962705071183449e-05, + "loss": 0.7454, + "step": 3739 + }, + { + "epoch": 0.1146254750521025, + "grad_norm": 1.9116331074322814, + "learning_rate": 1.9626782103808872e-05, + "loss": 0.9351, + "step": 3740 + }, + { + "epoch": 0.1146561235748437, + "grad_norm": 1.7773307525358881, + "learning_rate": 1.962651340092811e-05, + "loss": 0.7963, + "step": 3741 + }, + { + "epoch": 0.11468677209758489, + "grad_norm": 1.6473031945673728, + "learning_rate": 1.9626244603194844e-05, + "loss": 0.8971, + "step": 3742 + }, + { + "epoch": 0.1147174206203261, + "grad_norm": 1.7912287005242993, + "learning_rate": 1.9625975710611728e-05, + "loss": 0.9042, + "step": 3743 + }, + { + "epoch": 0.1147480691430673, + "grad_norm": 1.6567956647020432, + "learning_rate": 1.9625706723181413e-05, + "loss": 0.8475, + "step": 3744 + }, + { + "epoch": 0.11477871766580851, + "grad_norm": 1.7583783329634102, + "learning_rate": 1.9625437640906546e-05, + "loss": 0.7746, + "step": 3745 + }, + { + "epoch": 0.11480936618854971, + "grad_norm": 1.9377266164278344, + "learning_rate": 1.962516846378978e-05, + "loss": 0.8263, + "step": 3746 + }, + { + "epoch": 0.11484001471129092, + "grad_norm": 1.65275012602743, + "learning_rate": 1.9624899191833765e-05, + "loss": 0.7798, + "step": 3747 + }, + { + "epoch": 0.11487066323403212, + "grad_norm": 1.6290783068726589, + "learning_rate": 1.9624629825041154e-05, + "loss": 0.8189, + "step": 3748 + }, + { + "epoch": 0.11490131175677332, + "grad_norm": 1.6576327818952146, + "learning_rate": 1.9624360363414606e-05, + "loss": 0.9399, + "step": 3749 + }, + { + "epoch": 0.11493196027951452, + "grad_norm": 1.7770173110652616, + "learning_rate": 1.962409080695677e-05, + "loss": 0.7677, + "step": 3750 + }, + { + "epoch": 0.11496260880225573, + "grad_norm": 1.7488765404838054, + "learning_rate": 1.9623821155670308e-05, + "loss": 0.838, + "step": 3751 + }, + { + "epoch": 0.11499325732499693, + "grad_norm": 1.0313673756778647, + "learning_rate": 1.9623551409557868e-05, + "loss": 0.6719, + "step": 3752 + }, + { + "epoch": 0.11502390584773814, + "grad_norm": 1.687715788918347, + "learning_rate": 1.962328156862212e-05, + "loss": 0.7247, + "step": 3753 + }, + { + "epoch": 0.11505455437047934, + "grad_norm": 1.882658764103032, + "learning_rate": 1.9623011632865713e-05, + "loss": 0.898, + "step": 3754 + }, + { + "epoch": 0.11508520289322055, + "grad_norm": 1.592796569126842, + "learning_rate": 1.962274160229131e-05, + "loss": 0.7854, + "step": 3755 + }, + { + "epoch": 0.11511585141596176, + "grad_norm": 1.8239910269434674, + "learning_rate": 1.9622471476901573e-05, + "loss": 0.8544, + "step": 3756 + }, + { + "epoch": 0.11514649993870295, + "grad_norm": 1.6519630780114647, + "learning_rate": 1.9622201256699165e-05, + "loss": 0.9077, + "step": 3757 + }, + { + "epoch": 0.11517714846144415, + "grad_norm": 1.6755064007926919, + "learning_rate": 1.9621930941686746e-05, + "loss": 0.7969, + "step": 3758 + }, + { + "epoch": 0.11520779698418536, + "grad_norm": 1.1480593304609013, + "learning_rate": 1.9621660531866976e-05, + "loss": 0.679, + "step": 3759 + }, + { + "epoch": 0.11523844550692657, + "grad_norm": 1.669730513178115, + "learning_rate": 1.9621390027242522e-05, + "loss": 0.8671, + "step": 3760 + }, + { + "epoch": 0.11526909402966777, + "grad_norm": 1.593398188420862, + "learning_rate": 1.9621119427816053e-05, + "loss": 0.7745, + "step": 3761 + }, + { + "epoch": 0.11529974255240898, + "grad_norm": 1.685381658103244, + "learning_rate": 1.9620848733590233e-05, + "loss": 0.9462, + "step": 3762 + }, + { + "epoch": 0.11533039107515018, + "grad_norm": 1.6523867666121832, + "learning_rate": 1.9620577944567727e-05, + "loss": 0.872, + "step": 3763 + }, + { + "epoch": 0.11536103959789139, + "grad_norm": 1.4819524157807098, + "learning_rate": 1.9620307060751207e-05, + "loss": 0.8383, + "step": 3764 + }, + { + "epoch": 0.11539168812063258, + "grad_norm": 1.8104103828527365, + "learning_rate": 1.9620036082143338e-05, + "loss": 0.8646, + "step": 3765 + }, + { + "epoch": 0.11542233664337379, + "grad_norm": 1.8544402139911509, + "learning_rate": 1.9619765008746793e-05, + "loss": 0.8104, + "step": 3766 + }, + { + "epoch": 0.11545298516611499, + "grad_norm": 0.8392022988819929, + "learning_rate": 1.9619493840564243e-05, + "loss": 0.6485, + "step": 3767 + }, + { + "epoch": 0.1154836336888562, + "grad_norm": 1.5230422971419053, + "learning_rate": 1.9619222577598357e-05, + "loss": 0.8016, + "step": 3768 + }, + { + "epoch": 0.1155142822115974, + "grad_norm": 1.552390391555086, + "learning_rate": 1.9618951219851815e-05, + "loss": 0.8118, + "step": 3769 + }, + { + "epoch": 0.11554493073433861, + "grad_norm": 0.7662954352640364, + "learning_rate": 1.961867976732728e-05, + "loss": 0.6605, + "step": 3770 + }, + { + "epoch": 0.11557557925707981, + "grad_norm": 1.7810690179250492, + "learning_rate": 1.9618408220027434e-05, + "loss": 0.8638, + "step": 3771 + }, + { + "epoch": 0.11560622777982102, + "grad_norm": 1.614398849173816, + "learning_rate": 1.961813657795495e-05, + "loss": 0.8495, + "step": 3772 + }, + { + "epoch": 0.11563687630256221, + "grad_norm": 1.585896550188545, + "learning_rate": 1.9617864841112504e-05, + "loss": 0.7767, + "step": 3773 + }, + { + "epoch": 0.11566752482530342, + "grad_norm": 1.946185348335209, + "learning_rate": 1.961759300950278e-05, + "loss": 0.8579, + "step": 3774 + }, + { + "epoch": 0.11569817334804462, + "grad_norm": 0.7678124458443979, + "learning_rate": 1.9617321083128447e-05, + "loss": 0.6466, + "step": 3775 + }, + { + "epoch": 0.11572882187078583, + "grad_norm": 0.7527693586174565, + "learning_rate": 1.961704906199219e-05, + "loss": 0.655, + "step": 3776 + }, + { + "epoch": 0.11575947039352703, + "grad_norm": 0.7495104550221778, + "learning_rate": 1.9616776946096685e-05, + "loss": 0.6693, + "step": 3777 + }, + { + "epoch": 0.11579011891626824, + "grad_norm": 1.463645647006644, + "learning_rate": 1.9616504735444622e-05, + "loss": 0.8, + "step": 3778 + }, + { + "epoch": 0.11582076743900944, + "grad_norm": 1.756361351877873, + "learning_rate": 1.9616232430038673e-05, + "loss": 0.8073, + "step": 3779 + }, + { + "epoch": 0.11585141596175064, + "grad_norm": 0.7895180344754434, + "learning_rate": 1.9615960029881527e-05, + "loss": 0.6349, + "step": 3780 + }, + { + "epoch": 0.11588206448449184, + "grad_norm": 1.4913567993992032, + "learning_rate": 1.9615687534975866e-05, + "loss": 0.8422, + "step": 3781 + }, + { + "epoch": 0.11591271300723305, + "grad_norm": 0.7459014081826544, + "learning_rate": 1.9615414945324375e-05, + "loss": 0.6303, + "step": 3782 + }, + { + "epoch": 0.11594336152997425, + "grad_norm": 1.9117797012021622, + "learning_rate": 1.961514226092974e-05, + "loss": 0.8586, + "step": 3783 + }, + { + "epoch": 0.11597401005271546, + "grad_norm": 0.7153958845406864, + "learning_rate": 1.961486948179465e-05, + "loss": 0.6174, + "step": 3784 + }, + { + "epoch": 0.11600465857545667, + "grad_norm": 1.9376563815059726, + "learning_rate": 1.961459660792179e-05, + "loss": 0.8067, + "step": 3785 + }, + { + "epoch": 0.11603530709819787, + "grad_norm": 1.6010152796708759, + "learning_rate": 1.961432363931385e-05, + "loss": 0.8304, + "step": 3786 + }, + { + "epoch": 0.11606595562093908, + "grad_norm": 1.6288543191974603, + "learning_rate": 1.961405057597352e-05, + "loss": 0.8121, + "step": 3787 + }, + { + "epoch": 0.11609660414368027, + "grad_norm": 1.669479197515086, + "learning_rate": 1.9613777417903487e-05, + "loss": 0.9062, + "step": 3788 + }, + { + "epoch": 0.11612725266642147, + "grad_norm": 1.801102881351048, + "learning_rate": 1.9613504165106446e-05, + "loss": 0.7811, + "step": 3789 + }, + { + "epoch": 0.11615790118916268, + "grad_norm": 1.597648376143386, + "learning_rate": 1.961323081758509e-05, + "loss": 0.7967, + "step": 3790 + }, + { + "epoch": 0.11618854971190389, + "grad_norm": 1.7300930308693665, + "learning_rate": 1.961295737534211e-05, + "loss": 0.8219, + "step": 3791 + }, + { + "epoch": 0.11621919823464509, + "grad_norm": 1.811071310971127, + "learning_rate": 1.9612683838380204e-05, + "loss": 0.8165, + "step": 3792 + }, + { + "epoch": 0.1162498467573863, + "grad_norm": 1.6190399713318298, + "learning_rate": 1.9612410206702063e-05, + "loss": 0.8715, + "step": 3793 + }, + { + "epoch": 0.1162804952801275, + "grad_norm": 1.677976727877862, + "learning_rate": 1.9612136480310385e-05, + "loss": 0.8124, + "step": 3794 + }, + { + "epoch": 0.11631114380286871, + "grad_norm": 0.8779495363861951, + "learning_rate": 1.9611862659207864e-05, + "loss": 0.6675, + "step": 3795 + }, + { + "epoch": 0.1163417923256099, + "grad_norm": 1.7950197009699007, + "learning_rate": 1.9611588743397207e-05, + "loss": 0.8832, + "step": 3796 + }, + { + "epoch": 0.1163724408483511, + "grad_norm": 1.6862069733813818, + "learning_rate": 1.96113147328811e-05, + "loss": 0.9496, + "step": 3797 + }, + { + "epoch": 0.11640308937109231, + "grad_norm": 1.4707159028721057, + "learning_rate": 1.9611040627662254e-05, + "loss": 0.8125, + "step": 3798 + }, + { + "epoch": 0.11643373789383352, + "grad_norm": 2.1159069642414043, + "learning_rate": 1.9610766427743367e-05, + "loss": 0.9358, + "step": 3799 + }, + { + "epoch": 0.11646438641657472, + "grad_norm": 0.7686041324339088, + "learning_rate": 1.9610492133127138e-05, + "loss": 0.6262, + "step": 3800 + }, + { + "epoch": 0.11649503493931593, + "grad_norm": 1.598812573853565, + "learning_rate": 1.9610217743816267e-05, + "loss": 0.8294, + "step": 3801 + }, + { + "epoch": 0.11652568346205713, + "grad_norm": 1.6424706575813703, + "learning_rate": 1.9609943259813466e-05, + "loss": 0.7944, + "step": 3802 + }, + { + "epoch": 0.11655633198479834, + "grad_norm": 1.778545835676026, + "learning_rate": 1.9609668681121435e-05, + "loss": 0.8712, + "step": 3803 + }, + { + "epoch": 0.11658698050753953, + "grad_norm": 0.7825879862707075, + "learning_rate": 1.960939400774288e-05, + "loss": 0.6435, + "step": 3804 + }, + { + "epoch": 0.11661762903028074, + "grad_norm": 1.6331097802985872, + "learning_rate": 1.9609119239680505e-05, + "loss": 0.8331, + "step": 3805 + }, + { + "epoch": 0.11664827755302194, + "grad_norm": 1.6670050356481068, + "learning_rate": 1.960884437693702e-05, + "loss": 0.7922, + "step": 3806 + }, + { + "epoch": 0.11667892607576315, + "grad_norm": 1.7796287656620284, + "learning_rate": 1.9608569419515133e-05, + "loss": 0.7954, + "step": 3807 + }, + { + "epoch": 0.11670957459850435, + "grad_norm": 0.7526164495982542, + "learning_rate": 1.9608294367417553e-05, + "loss": 0.6361, + "step": 3808 + }, + { + "epoch": 0.11674022312124556, + "grad_norm": 1.8158722855106644, + "learning_rate": 1.9608019220646992e-05, + "loss": 0.808, + "step": 3809 + }, + { + "epoch": 0.11677087164398676, + "grad_norm": 1.756708414327064, + "learning_rate": 1.9607743979206157e-05, + "loss": 0.8822, + "step": 3810 + }, + { + "epoch": 0.11680152016672796, + "grad_norm": 1.7211052436851357, + "learning_rate": 1.9607468643097765e-05, + "loss": 0.8534, + "step": 3811 + }, + { + "epoch": 0.11683216868946916, + "grad_norm": 0.736390606624157, + "learning_rate": 1.9607193212324524e-05, + "loss": 0.6421, + "step": 3812 + }, + { + "epoch": 0.11686281721221037, + "grad_norm": 1.6214213272774731, + "learning_rate": 1.960691768688915e-05, + "loss": 0.8328, + "step": 3813 + }, + { + "epoch": 0.11689346573495157, + "grad_norm": 1.692163851024461, + "learning_rate": 1.960664206679436e-05, + "loss": 0.7851, + "step": 3814 + }, + { + "epoch": 0.11692411425769278, + "grad_norm": 1.458596194151739, + "learning_rate": 1.9606366352042867e-05, + "loss": 0.7047, + "step": 3815 + }, + { + "epoch": 0.11695476278043399, + "grad_norm": 1.652223209639812, + "learning_rate": 1.9606090542637388e-05, + "loss": 0.8094, + "step": 3816 + }, + { + "epoch": 0.11698541130317519, + "grad_norm": 1.6230365280025738, + "learning_rate": 1.960581463858064e-05, + "loss": 0.8615, + "step": 3817 + }, + { + "epoch": 0.1170160598259164, + "grad_norm": 1.6962512261766487, + "learning_rate": 1.9605538639875344e-05, + "loss": 0.9068, + "step": 3818 + }, + { + "epoch": 0.11704670834865759, + "grad_norm": 1.7080354623553051, + "learning_rate": 1.9605262546524217e-05, + "loss": 0.759, + "step": 3819 + }, + { + "epoch": 0.1170773568713988, + "grad_norm": 1.8816881034977047, + "learning_rate": 1.9604986358529983e-05, + "loss": 0.8509, + "step": 3820 + }, + { + "epoch": 0.11710800539414, + "grad_norm": 1.809449942559249, + "learning_rate": 1.9604710075895358e-05, + "loss": 0.8721, + "step": 3821 + }, + { + "epoch": 0.1171386539168812, + "grad_norm": 1.4220902846259271, + "learning_rate": 1.960443369862307e-05, + "loss": 0.7705, + "step": 3822 + }, + { + "epoch": 0.11716930243962241, + "grad_norm": 1.7196009319155638, + "learning_rate": 1.9604157226715833e-05, + "loss": 0.8143, + "step": 3823 + }, + { + "epoch": 0.11719995096236362, + "grad_norm": 0.7917750938623699, + "learning_rate": 1.9603880660176384e-05, + "loss": 0.632, + "step": 3824 + }, + { + "epoch": 0.11723059948510482, + "grad_norm": 1.5297064516646826, + "learning_rate": 1.9603603999007437e-05, + "loss": 0.8511, + "step": 3825 + }, + { + "epoch": 0.11726124800784603, + "grad_norm": 1.4300659174953485, + "learning_rate": 1.9603327243211728e-05, + "loss": 0.7855, + "step": 3826 + }, + { + "epoch": 0.11729189653058722, + "grad_norm": 1.9485301801235773, + "learning_rate": 1.9603050392791975e-05, + "loss": 0.8972, + "step": 3827 + }, + { + "epoch": 0.11732254505332843, + "grad_norm": 1.576004815828137, + "learning_rate": 1.960277344775091e-05, + "loss": 0.992, + "step": 3828 + }, + { + "epoch": 0.11735319357606963, + "grad_norm": 1.9335246369525554, + "learning_rate": 1.960249640809126e-05, + "loss": 0.868, + "step": 3829 + }, + { + "epoch": 0.11738384209881084, + "grad_norm": 0.7663586589300528, + "learning_rate": 1.9602219273815762e-05, + "loss": 0.6754, + "step": 3830 + }, + { + "epoch": 0.11741449062155204, + "grad_norm": 1.8718229132113982, + "learning_rate": 1.9601942044927137e-05, + "loss": 0.827, + "step": 3831 + }, + { + "epoch": 0.11744513914429325, + "grad_norm": 1.62127628459694, + "learning_rate": 1.9601664721428116e-05, + "loss": 0.7544, + "step": 3832 + }, + { + "epoch": 0.11747578766703445, + "grad_norm": 0.7701228515305101, + "learning_rate": 1.960138730332144e-05, + "loss": 0.6447, + "step": 3833 + }, + { + "epoch": 0.11750643618977566, + "grad_norm": 1.73394680514172, + "learning_rate": 1.960110979060984e-05, + "loss": 0.9001, + "step": 3834 + }, + { + "epoch": 0.11753708471251685, + "grad_norm": 1.5438452586410534, + "learning_rate": 1.9600832183296044e-05, + "loss": 0.8136, + "step": 3835 + }, + { + "epoch": 0.11756773323525806, + "grad_norm": 0.7336432771585149, + "learning_rate": 1.9600554481382797e-05, + "loss": 0.6708, + "step": 3836 + }, + { + "epoch": 0.11759838175799926, + "grad_norm": 0.7123548901182194, + "learning_rate": 1.9600276684872827e-05, + "loss": 0.6444, + "step": 3837 + }, + { + "epoch": 0.11762903028074047, + "grad_norm": 0.7172039512700822, + "learning_rate": 1.9599998793768873e-05, + "loss": 0.6514, + "step": 3838 + }, + { + "epoch": 0.11765967880348167, + "grad_norm": 1.7602168925844701, + "learning_rate": 1.9599720808073676e-05, + "loss": 0.8688, + "step": 3839 + }, + { + "epoch": 0.11769032732622288, + "grad_norm": 0.740831941147452, + "learning_rate": 1.9599442727789976e-05, + "loss": 0.6478, + "step": 3840 + }, + { + "epoch": 0.11772097584896409, + "grad_norm": 1.4513289852034394, + "learning_rate": 1.9599164552920508e-05, + "loss": 0.7726, + "step": 3841 + }, + { + "epoch": 0.11775162437170529, + "grad_norm": 1.7052674712723892, + "learning_rate": 1.9598886283468017e-05, + "loss": 0.9231, + "step": 3842 + }, + { + "epoch": 0.11778227289444648, + "grad_norm": 1.823270021245965, + "learning_rate": 1.959860791943524e-05, + "loss": 0.8254, + "step": 3843 + }, + { + "epoch": 0.11781292141718769, + "grad_norm": 1.5924437619044494, + "learning_rate": 1.959832946082493e-05, + "loss": 0.8162, + "step": 3844 + }, + { + "epoch": 0.1178435699399289, + "grad_norm": 1.6027489682329137, + "learning_rate": 1.959805090763982e-05, + "loss": 0.8964, + "step": 3845 + }, + { + "epoch": 0.1178742184626701, + "grad_norm": 1.648919579134315, + "learning_rate": 1.959777225988266e-05, + "loss": 0.7369, + "step": 3846 + }, + { + "epoch": 0.1179048669854113, + "grad_norm": 1.8137762500690993, + "learning_rate": 1.9597493517556193e-05, + "loss": 0.9797, + "step": 3847 + }, + { + "epoch": 0.11793551550815251, + "grad_norm": 1.8977705972895256, + "learning_rate": 1.9597214680663165e-05, + "loss": 0.867, + "step": 3848 + }, + { + "epoch": 0.11796616403089372, + "grad_norm": 0.9138828080895283, + "learning_rate": 1.9596935749206328e-05, + "loss": 0.648, + "step": 3849 + }, + { + "epoch": 0.11799681255363491, + "grad_norm": 0.801640465031305, + "learning_rate": 1.9596656723188427e-05, + "loss": 0.6325, + "step": 3850 + }, + { + "epoch": 0.11802746107637611, + "grad_norm": 1.7919684572456347, + "learning_rate": 1.959637760261221e-05, + "loss": 0.7856, + "step": 3851 + }, + { + "epoch": 0.11805810959911732, + "grad_norm": 1.752748231368698, + "learning_rate": 1.959609838748043e-05, + "loss": 0.8192, + "step": 3852 + }, + { + "epoch": 0.11808875812185853, + "grad_norm": 1.8555738964378674, + "learning_rate": 1.959581907779584e-05, + "loss": 0.8848, + "step": 3853 + }, + { + "epoch": 0.11811940664459973, + "grad_norm": 1.5837984361289803, + "learning_rate": 1.9595539673561188e-05, + "loss": 0.8741, + "step": 3854 + }, + { + "epoch": 0.11815005516734094, + "grad_norm": 1.610837275248335, + "learning_rate": 1.9595260174779227e-05, + "loss": 0.733, + "step": 3855 + }, + { + "epoch": 0.11818070369008214, + "grad_norm": 1.6903035586589505, + "learning_rate": 1.9594980581452712e-05, + "loss": 0.7732, + "step": 3856 + }, + { + "epoch": 0.11821135221282335, + "grad_norm": 2.142576803146924, + "learning_rate": 1.9594700893584405e-05, + "loss": 0.9852, + "step": 3857 + }, + { + "epoch": 0.11824200073556454, + "grad_norm": 1.6230527781416046, + "learning_rate": 1.9594421111177046e-05, + "loss": 0.8579, + "step": 3858 + }, + { + "epoch": 0.11827264925830575, + "grad_norm": 1.578023683924311, + "learning_rate": 1.9594141234233407e-05, + "loss": 0.7956, + "step": 3859 + }, + { + "epoch": 0.11830329778104695, + "grad_norm": 1.6115615679778705, + "learning_rate": 1.9593861262756236e-05, + "loss": 0.8716, + "step": 3860 + }, + { + "epoch": 0.11833394630378816, + "grad_norm": 1.8175481732391339, + "learning_rate": 1.9593581196748298e-05, + "loss": 0.9552, + "step": 3861 + }, + { + "epoch": 0.11836459482652936, + "grad_norm": 1.7230372011212505, + "learning_rate": 1.959330103621235e-05, + "loss": 0.8822, + "step": 3862 + }, + { + "epoch": 0.11839524334927057, + "grad_norm": 1.595334527736837, + "learning_rate": 1.959302078115115e-05, + "loss": 0.9564, + "step": 3863 + }, + { + "epoch": 0.11842589187201177, + "grad_norm": 1.7238286678424937, + "learning_rate": 1.9592740431567463e-05, + "loss": 0.8789, + "step": 3864 + }, + { + "epoch": 0.11845654039475298, + "grad_norm": 1.6553675082109827, + "learning_rate": 1.959245998746405e-05, + "loss": 0.8588, + "step": 3865 + }, + { + "epoch": 0.11848718891749417, + "grad_norm": 1.6942425840905453, + "learning_rate": 1.9592179448843675e-05, + "loss": 0.8807, + "step": 3866 + }, + { + "epoch": 0.11851783744023538, + "grad_norm": 1.6308150406181525, + "learning_rate": 1.9591898815709102e-05, + "loss": 0.8301, + "step": 3867 + }, + { + "epoch": 0.11854848596297658, + "grad_norm": 1.7521641521971874, + "learning_rate": 1.959161808806309e-05, + "loss": 0.885, + "step": 3868 + }, + { + "epoch": 0.11857913448571779, + "grad_norm": 1.672013480911285, + "learning_rate": 1.9591337265908417e-05, + "loss": 0.756, + "step": 3869 + }, + { + "epoch": 0.118609783008459, + "grad_norm": 1.6220681033718924, + "learning_rate": 1.9591056349247845e-05, + "loss": 0.761, + "step": 3870 + }, + { + "epoch": 0.1186404315312002, + "grad_norm": 2.0058502305996773, + "learning_rate": 1.9590775338084138e-05, + "loss": 0.8781, + "step": 3871 + }, + { + "epoch": 0.1186710800539414, + "grad_norm": 1.712792523626485, + "learning_rate": 1.959049423242007e-05, + "loss": 0.7805, + "step": 3872 + }, + { + "epoch": 0.11870172857668261, + "grad_norm": 1.6299127148270305, + "learning_rate": 1.9590213032258406e-05, + "loss": 0.8979, + "step": 3873 + }, + { + "epoch": 0.1187323770994238, + "grad_norm": 1.6194258640516381, + "learning_rate": 1.9589931737601917e-05, + "loss": 0.819, + "step": 3874 + }, + { + "epoch": 0.11876302562216501, + "grad_norm": 1.7692192886438733, + "learning_rate": 1.958965034845338e-05, + "loss": 0.8661, + "step": 3875 + }, + { + "epoch": 0.11879367414490621, + "grad_norm": 1.8059703519653423, + "learning_rate": 1.9589368864815562e-05, + "loss": 0.8488, + "step": 3876 + }, + { + "epoch": 0.11882432266764742, + "grad_norm": 1.5635965382595902, + "learning_rate": 1.9589087286691243e-05, + "loss": 0.898, + "step": 3877 + }, + { + "epoch": 0.11885497119038863, + "grad_norm": 1.4517934635012884, + "learning_rate": 1.958880561408319e-05, + "loss": 0.8866, + "step": 3878 + }, + { + "epoch": 0.11888561971312983, + "grad_norm": 1.6699271582056605, + "learning_rate": 1.9588523846994184e-05, + "loss": 0.8426, + "step": 3879 + }, + { + "epoch": 0.11891626823587104, + "grad_norm": 1.9706161282625678, + "learning_rate": 1.9588241985427e-05, + "loss": 0.8769, + "step": 3880 + }, + { + "epoch": 0.11894691675861223, + "grad_norm": 1.7668513692514332, + "learning_rate": 1.9587960029384413e-05, + "loss": 0.7131, + "step": 3881 + }, + { + "epoch": 0.11897756528135343, + "grad_norm": 1.0540828693313111, + "learning_rate": 1.9587677978869203e-05, + "loss": 0.6579, + "step": 3882 + }, + { + "epoch": 0.11900821380409464, + "grad_norm": 1.615998352468902, + "learning_rate": 1.9587395833884148e-05, + "loss": 0.8621, + "step": 3883 + }, + { + "epoch": 0.11903886232683585, + "grad_norm": 1.5538775312273692, + "learning_rate": 1.9587113594432032e-05, + "loss": 0.8066, + "step": 3884 + }, + { + "epoch": 0.11906951084957705, + "grad_norm": 1.542779701562174, + "learning_rate": 1.958683126051563e-05, + "loss": 0.8069, + "step": 3885 + }, + { + "epoch": 0.11910015937231826, + "grad_norm": 1.7219558496717606, + "learning_rate": 1.9586548832137725e-05, + "loss": 0.8167, + "step": 3886 + }, + { + "epoch": 0.11913080789505946, + "grad_norm": 1.5948094501836327, + "learning_rate": 1.9586266309301104e-05, + "loss": 0.7631, + "step": 3887 + }, + { + "epoch": 0.11916145641780067, + "grad_norm": 1.6435237849977664, + "learning_rate": 1.958598369200855e-05, + "loss": 0.839, + "step": 3888 + }, + { + "epoch": 0.11919210494054186, + "grad_norm": 1.6597321634642008, + "learning_rate": 1.9585700980262842e-05, + "loss": 0.7826, + "step": 3889 + }, + { + "epoch": 0.11922275346328307, + "grad_norm": 0.8897754220478203, + "learning_rate": 1.958541817406677e-05, + "loss": 0.6556, + "step": 3890 + }, + { + "epoch": 0.11925340198602427, + "grad_norm": 1.917711908405607, + "learning_rate": 1.9585135273423122e-05, + "loss": 0.7644, + "step": 3891 + }, + { + "epoch": 0.11928405050876548, + "grad_norm": 1.8036290804416295, + "learning_rate": 1.9584852278334682e-05, + "loss": 0.9687, + "step": 3892 + }, + { + "epoch": 0.11931469903150668, + "grad_norm": 0.8041478420915747, + "learning_rate": 1.9584569188804244e-05, + "loss": 0.6523, + "step": 3893 + }, + { + "epoch": 0.11934534755424789, + "grad_norm": 1.645608388155791, + "learning_rate": 1.958428600483459e-05, + "loss": 0.7913, + "step": 3894 + }, + { + "epoch": 0.1193759960769891, + "grad_norm": 1.7798395786865953, + "learning_rate": 1.9584002726428513e-05, + "loss": 0.8194, + "step": 3895 + }, + { + "epoch": 0.1194066445997303, + "grad_norm": 0.7464205318664519, + "learning_rate": 1.9583719353588807e-05, + "loss": 0.644, + "step": 3896 + }, + { + "epoch": 0.11943729312247149, + "grad_norm": 1.62476166916437, + "learning_rate": 1.9583435886318263e-05, + "loss": 0.8699, + "step": 3897 + }, + { + "epoch": 0.1194679416452127, + "grad_norm": 1.6389763419998151, + "learning_rate": 1.958315232461967e-05, + "loss": 0.8253, + "step": 3898 + }, + { + "epoch": 0.1194985901679539, + "grad_norm": 1.685857765481343, + "learning_rate": 1.9582868668495828e-05, + "loss": 0.8577, + "step": 3899 + }, + { + "epoch": 0.11952923869069511, + "grad_norm": 0.7542142074308855, + "learning_rate": 1.9582584917949528e-05, + "loss": 0.6442, + "step": 3900 + }, + { + "epoch": 0.11955988721343631, + "grad_norm": 1.4895899906578585, + "learning_rate": 1.9582301072983567e-05, + "loss": 0.7208, + "step": 3901 + }, + { + "epoch": 0.11959053573617752, + "grad_norm": 1.5575829964853243, + "learning_rate": 1.958201713360074e-05, + "loss": 0.8385, + "step": 3902 + }, + { + "epoch": 0.11962118425891873, + "grad_norm": 1.733966445129016, + "learning_rate": 1.958173309980385e-05, + "loss": 0.829, + "step": 3903 + }, + { + "epoch": 0.11965183278165993, + "grad_norm": 1.6720228019116528, + "learning_rate": 1.958144897159569e-05, + "loss": 0.8703, + "step": 3904 + }, + { + "epoch": 0.11968248130440112, + "grad_norm": 1.6462937270750766, + "learning_rate": 1.9581164748979064e-05, + "loss": 0.8519, + "step": 3905 + }, + { + "epoch": 0.11971312982714233, + "grad_norm": 1.7279220849033952, + "learning_rate": 1.9580880431956767e-05, + "loss": 0.8071, + "step": 3906 + }, + { + "epoch": 0.11974377834988353, + "grad_norm": 1.608390636000211, + "learning_rate": 1.9580596020531607e-05, + "loss": 0.7494, + "step": 3907 + }, + { + "epoch": 0.11977442687262474, + "grad_norm": 0.8305812408926104, + "learning_rate": 1.958031151470638e-05, + "loss": 0.627, + "step": 3908 + }, + { + "epoch": 0.11980507539536595, + "grad_norm": 1.5991251040793282, + "learning_rate": 1.9580026914483895e-05, + "loss": 0.8624, + "step": 3909 + }, + { + "epoch": 0.11983572391810715, + "grad_norm": 1.6929014811660157, + "learning_rate": 1.9579742219866954e-05, + "loss": 0.8827, + "step": 3910 + }, + { + "epoch": 0.11986637244084836, + "grad_norm": 1.6276747251958945, + "learning_rate": 1.957945743085836e-05, + "loss": 0.8424, + "step": 3911 + }, + { + "epoch": 0.11989702096358955, + "grad_norm": 1.6746644296657853, + "learning_rate": 1.957917254746092e-05, + "loss": 0.8596, + "step": 3912 + }, + { + "epoch": 0.11992766948633075, + "grad_norm": 1.8554187759366527, + "learning_rate": 1.9578887569677444e-05, + "loss": 0.8616, + "step": 3913 + }, + { + "epoch": 0.11995831800907196, + "grad_norm": 0.7597335294251707, + "learning_rate": 1.9578602497510736e-05, + "loss": 0.6527, + "step": 3914 + }, + { + "epoch": 0.11998896653181317, + "grad_norm": 1.5419225946173039, + "learning_rate": 1.9578317330963608e-05, + "loss": 0.8578, + "step": 3915 + }, + { + "epoch": 0.12001961505455437, + "grad_norm": 1.4341659530154054, + "learning_rate": 1.957803207003887e-05, + "loss": 0.7968, + "step": 3916 + }, + { + "epoch": 0.12005026357729558, + "grad_norm": 1.5729020902579378, + "learning_rate": 1.957774671473933e-05, + "loss": 0.8051, + "step": 3917 + }, + { + "epoch": 0.12008091210003678, + "grad_norm": 1.8627072950270938, + "learning_rate": 1.95774612650678e-05, + "loss": 0.8778, + "step": 3918 + }, + { + "epoch": 0.12011156062277799, + "grad_norm": 1.700206371398941, + "learning_rate": 1.9577175721027094e-05, + "loss": 0.7287, + "step": 3919 + }, + { + "epoch": 0.12014220914551918, + "grad_norm": 1.6754727596864025, + "learning_rate": 1.9576890082620026e-05, + "loss": 0.7504, + "step": 3920 + }, + { + "epoch": 0.12017285766826039, + "grad_norm": 1.5546644783900665, + "learning_rate": 1.957660434984941e-05, + "loss": 0.9296, + "step": 3921 + }, + { + "epoch": 0.12020350619100159, + "grad_norm": 1.597408065342042, + "learning_rate": 1.9576318522718062e-05, + "loss": 0.871, + "step": 3922 + }, + { + "epoch": 0.1202341547137428, + "grad_norm": 0.7572571596559752, + "learning_rate": 1.9576032601228795e-05, + "loss": 0.6587, + "step": 3923 + }, + { + "epoch": 0.120264803236484, + "grad_norm": 0.7640424173602013, + "learning_rate": 1.957574658538443e-05, + "loss": 0.6476, + "step": 3924 + }, + { + "epoch": 0.12029545175922521, + "grad_norm": 0.7047213037159353, + "learning_rate": 1.957546047518778e-05, + "loss": 0.622, + "step": 3925 + }, + { + "epoch": 0.12032610028196641, + "grad_norm": 1.6526583706110078, + "learning_rate": 1.9575174270641674e-05, + "loss": 0.8407, + "step": 3926 + }, + { + "epoch": 0.12035674880470762, + "grad_norm": 1.6732193751425901, + "learning_rate": 1.9574887971748925e-05, + "loss": 0.8507, + "step": 3927 + }, + { + "epoch": 0.12038739732744881, + "grad_norm": 1.6107502601818902, + "learning_rate": 1.9574601578512353e-05, + "loss": 0.899, + "step": 3928 + }, + { + "epoch": 0.12041804585019002, + "grad_norm": 1.6657132863358044, + "learning_rate": 1.9574315090934785e-05, + "loss": 0.8499, + "step": 3929 + }, + { + "epoch": 0.12044869437293122, + "grad_norm": 1.5907725476751815, + "learning_rate": 1.9574028509019035e-05, + "loss": 0.8505, + "step": 3930 + }, + { + "epoch": 0.12047934289567243, + "grad_norm": 1.5746608773088508, + "learning_rate": 1.9573741832767937e-05, + "loss": 0.8106, + "step": 3931 + }, + { + "epoch": 0.12050999141841363, + "grad_norm": 1.424126112440188, + "learning_rate": 1.957345506218431e-05, + "loss": 0.7527, + "step": 3932 + }, + { + "epoch": 0.12054063994115484, + "grad_norm": 1.6118608548455233, + "learning_rate": 1.957316819727098e-05, + "loss": 0.8101, + "step": 3933 + }, + { + "epoch": 0.12057128846389605, + "grad_norm": 1.622543337412442, + "learning_rate": 1.9572881238030775e-05, + "loss": 0.8547, + "step": 3934 + }, + { + "epoch": 0.12060193698663725, + "grad_norm": 1.5858294391098062, + "learning_rate": 1.957259418446652e-05, + "loss": 0.8232, + "step": 3935 + }, + { + "epoch": 0.12063258550937844, + "grad_norm": 0.9115417370433972, + "learning_rate": 1.9572307036581047e-05, + "loss": 0.6666, + "step": 3936 + }, + { + "epoch": 0.12066323403211965, + "grad_norm": 1.505262088467889, + "learning_rate": 1.957201979437718e-05, + "loss": 0.7564, + "step": 3937 + }, + { + "epoch": 0.12069388255486085, + "grad_norm": 1.8453558748705594, + "learning_rate": 1.957173245785776e-05, + "loss": 0.912, + "step": 3938 + }, + { + "epoch": 0.12072453107760206, + "grad_norm": 1.8287756329264797, + "learning_rate": 1.9571445027025606e-05, + "loss": 0.878, + "step": 3939 + }, + { + "epoch": 0.12075517960034327, + "grad_norm": 1.7149520618275271, + "learning_rate": 1.9571157501883558e-05, + "loss": 0.8971, + "step": 3940 + }, + { + "epoch": 0.12078582812308447, + "grad_norm": 1.6796796764148458, + "learning_rate": 1.9570869882434443e-05, + "loss": 0.8158, + "step": 3941 + }, + { + "epoch": 0.12081647664582568, + "grad_norm": 1.6661970690117176, + "learning_rate": 1.9570582168681102e-05, + "loss": 0.9185, + "step": 3942 + }, + { + "epoch": 0.12084712516856687, + "grad_norm": 1.8347555577900123, + "learning_rate": 1.9570294360626363e-05, + "loss": 0.834, + "step": 3943 + }, + { + "epoch": 0.12087777369130807, + "grad_norm": 0.8103582743647365, + "learning_rate": 1.957000645827307e-05, + "loss": 0.6434, + "step": 3944 + }, + { + "epoch": 0.12090842221404928, + "grad_norm": 1.6097239648245762, + "learning_rate": 1.9569718461624048e-05, + "loss": 0.8322, + "step": 3945 + }, + { + "epoch": 0.12093907073679049, + "grad_norm": 1.5820738898592828, + "learning_rate": 1.9569430370682144e-05, + "loss": 0.7697, + "step": 3946 + }, + { + "epoch": 0.12096971925953169, + "grad_norm": 1.4033595881368064, + "learning_rate": 1.9569142185450193e-05, + "loss": 0.8122, + "step": 3947 + }, + { + "epoch": 0.1210003677822729, + "grad_norm": 1.7400906245836036, + "learning_rate": 1.956885390593104e-05, + "loss": 0.858, + "step": 3948 + }, + { + "epoch": 0.1210310163050141, + "grad_norm": 0.7330436209240243, + "learning_rate": 1.9568565532127516e-05, + "loss": 0.5988, + "step": 3949 + }, + { + "epoch": 0.12106166482775531, + "grad_norm": 1.6335377519061998, + "learning_rate": 1.956827706404247e-05, + "loss": 0.8692, + "step": 3950 + }, + { + "epoch": 0.1210923133504965, + "grad_norm": 1.5615201286269003, + "learning_rate": 1.9567988501678743e-05, + "loss": 0.738, + "step": 3951 + }, + { + "epoch": 0.1211229618732377, + "grad_norm": 0.7662503932143239, + "learning_rate": 1.9567699845039177e-05, + "loss": 0.6545, + "step": 3952 + }, + { + "epoch": 0.12115361039597891, + "grad_norm": 1.6360600206528868, + "learning_rate": 1.9567411094126613e-05, + "loss": 0.912, + "step": 3953 + }, + { + "epoch": 0.12118425891872012, + "grad_norm": 1.65843403475946, + "learning_rate": 1.9567122248943903e-05, + "loss": 0.8284, + "step": 3954 + }, + { + "epoch": 0.12121490744146132, + "grad_norm": 1.5552784581519445, + "learning_rate": 1.956683330949389e-05, + "loss": 0.7551, + "step": 3955 + }, + { + "epoch": 0.12124555596420253, + "grad_norm": 1.7425817309139022, + "learning_rate": 1.956654427577942e-05, + "loss": 0.8499, + "step": 3956 + }, + { + "epoch": 0.12127620448694373, + "grad_norm": 1.5899593117987005, + "learning_rate": 1.956625514780334e-05, + "loss": 0.8449, + "step": 3957 + }, + { + "epoch": 0.12130685300968494, + "grad_norm": 1.642374476988826, + "learning_rate": 1.9565965925568503e-05, + "loss": 0.8082, + "step": 3958 + }, + { + "epoch": 0.12133750153242613, + "grad_norm": 1.8609364521948504, + "learning_rate": 1.9565676609077756e-05, + "loss": 0.8925, + "step": 3959 + }, + { + "epoch": 0.12136815005516734, + "grad_norm": 0.8684336410508582, + "learning_rate": 1.9565387198333946e-05, + "loss": 0.6635, + "step": 3960 + }, + { + "epoch": 0.12139879857790854, + "grad_norm": 1.8230379720300167, + "learning_rate": 1.9565097693339932e-05, + "loss": 0.8444, + "step": 3961 + }, + { + "epoch": 0.12142944710064975, + "grad_norm": 1.7532363058903608, + "learning_rate": 1.9564808094098562e-05, + "loss": 0.8344, + "step": 3962 + }, + { + "epoch": 0.12146009562339095, + "grad_norm": 1.726144614819362, + "learning_rate": 1.956451840061269e-05, + "loss": 0.7908, + "step": 3963 + }, + { + "epoch": 0.12149074414613216, + "grad_norm": 1.8931811041006692, + "learning_rate": 1.956422861288517e-05, + "loss": 0.793, + "step": 3964 + }, + { + "epoch": 0.12152139266887337, + "grad_norm": 1.482266899489212, + "learning_rate": 1.956393873091886e-05, + "loss": 0.7172, + "step": 3965 + }, + { + "epoch": 0.12155204119161457, + "grad_norm": 1.5473302278305672, + "learning_rate": 1.9563648754716617e-05, + "loss": 0.8116, + "step": 3966 + }, + { + "epoch": 0.12158268971435576, + "grad_norm": 1.6309876791937874, + "learning_rate": 1.9563358684281294e-05, + "loss": 0.8209, + "step": 3967 + }, + { + "epoch": 0.12161333823709697, + "grad_norm": 1.6840080287707926, + "learning_rate": 1.9563068519615748e-05, + "loss": 0.8346, + "step": 3968 + }, + { + "epoch": 0.12164398675983817, + "grad_norm": 1.5688431582982425, + "learning_rate": 1.9562778260722845e-05, + "loss": 0.7976, + "step": 3969 + }, + { + "epoch": 0.12167463528257938, + "grad_norm": 1.7000645166437227, + "learning_rate": 1.9562487907605438e-05, + "loss": 0.9284, + "step": 3970 + }, + { + "epoch": 0.12170528380532059, + "grad_norm": 0.9000475343810727, + "learning_rate": 1.9562197460266393e-05, + "loss": 0.6693, + "step": 3971 + }, + { + "epoch": 0.12173593232806179, + "grad_norm": 0.8036520215151639, + "learning_rate": 1.956190691870857e-05, + "loss": 0.6823, + "step": 3972 + }, + { + "epoch": 0.121766580850803, + "grad_norm": 1.5073817072468763, + "learning_rate": 1.956161628293483e-05, + "loss": 0.7528, + "step": 3973 + }, + { + "epoch": 0.12179722937354419, + "grad_norm": 1.6296679991326553, + "learning_rate": 1.956132555294804e-05, + "loss": 0.8089, + "step": 3974 + }, + { + "epoch": 0.1218278778962854, + "grad_norm": 1.912541343569682, + "learning_rate": 1.9561034728751062e-05, + "loss": 0.9815, + "step": 3975 + }, + { + "epoch": 0.1218585264190266, + "grad_norm": 1.0405832673508475, + "learning_rate": 1.9560743810346763e-05, + "loss": 0.6802, + "step": 3976 + }, + { + "epoch": 0.1218891749417678, + "grad_norm": 1.6681780638876877, + "learning_rate": 1.9560452797738007e-05, + "loss": 0.8516, + "step": 3977 + }, + { + "epoch": 0.12191982346450901, + "grad_norm": 1.6773806092162769, + "learning_rate": 1.9560161690927665e-05, + "loss": 0.9311, + "step": 3978 + }, + { + "epoch": 0.12195047198725022, + "grad_norm": 1.924579888755924, + "learning_rate": 1.9559870489918605e-05, + "loss": 0.8987, + "step": 3979 + }, + { + "epoch": 0.12198112050999142, + "grad_norm": 1.652300528425033, + "learning_rate": 1.9559579194713695e-05, + "loss": 0.8152, + "step": 3980 + }, + { + "epoch": 0.12201176903273263, + "grad_norm": 1.7660351151589286, + "learning_rate": 1.9559287805315804e-05, + "loss": 0.9494, + "step": 3981 + }, + { + "epoch": 0.12204241755547382, + "grad_norm": 1.5186738438858203, + "learning_rate": 1.9558996321727805e-05, + "loss": 0.8436, + "step": 3982 + }, + { + "epoch": 0.12207306607821503, + "grad_norm": 1.5255339260690655, + "learning_rate": 1.955870474395257e-05, + "loss": 0.7469, + "step": 3983 + }, + { + "epoch": 0.12210371460095623, + "grad_norm": 0.7585109331414743, + "learning_rate": 1.9558413071992974e-05, + "loss": 0.6526, + "step": 3984 + }, + { + "epoch": 0.12213436312369744, + "grad_norm": 1.5590497767616363, + "learning_rate": 1.955812130585188e-05, + "loss": 0.7333, + "step": 3985 + }, + { + "epoch": 0.12216501164643864, + "grad_norm": 1.8223640224222097, + "learning_rate": 1.9557829445532178e-05, + "loss": 0.8118, + "step": 3986 + }, + { + "epoch": 0.12219566016917985, + "grad_norm": 1.7568118719833528, + "learning_rate": 1.9557537491036734e-05, + "loss": 0.9652, + "step": 3987 + }, + { + "epoch": 0.12222630869192105, + "grad_norm": 1.658253828109768, + "learning_rate": 1.955724544236843e-05, + "loss": 0.8803, + "step": 3988 + }, + { + "epoch": 0.12225695721466226, + "grad_norm": 1.7293180692426484, + "learning_rate": 1.9556953299530143e-05, + "loss": 0.903, + "step": 3989 + }, + { + "epoch": 0.12228760573740345, + "grad_norm": 1.675309649950571, + "learning_rate": 1.9556661062524745e-05, + "loss": 0.8851, + "step": 3990 + }, + { + "epoch": 0.12231825426014466, + "grad_norm": 1.4784364308382016, + "learning_rate": 1.9556368731355122e-05, + "loss": 0.6863, + "step": 3991 + }, + { + "epoch": 0.12234890278288586, + "grad_norm": 0.8159679785215405, + "learning_rate": 1.9556076306024156e-05, + "loss": 0.6488, + "step": 3992 + }, + { + "epoch": 0.12237955130562707, + "grad_norm": 1.4361276440557982, + "learning_rate": 1.955578378653472e-05, + "loss": 0.8078, + "step": 3993 + }, + { + "epoch": 0.12241019982836827, + "grad_norm": 1.56061811041557, + "learning_rate": 1.9555491172889706e-05, + "loss": 0.836, + "step": 3994 + }, + { + "epoch": 0.12244084835110948, + "grad_norm": 2.006402698639306, + "learning_rate": 1.9555198465091988e-05, + "loss": 0.9138, + "step": 3995 + }, + { + "epoch": 0.12247149687385069, + "grad_norm": 1.8078619558236766, + "learning_rate": 1.9554905663144458e-05, + "loss": 0.7776, + "step": 3996 + }, + { + "epoch": 0.12250214539659189, + "grad_norm": 1.5547089212666974, + "learning_rate": 1.9554612767049998e-05, + "loss": 0.8624, + "step": 3997 + }, + { + "epoch": 0.12253279391933308, + "grad_norm": 1.621377618267059, + "learning_rate": 1.9554319776811492e-05, + "loss": 0.7957, + "step": 3998 + }, + { + "epoch": 0.12256344244207429, + "grad_norm": 1.7544459402378767, + "learning_rate": 1.955402669243183e-05, + "loss": 0.8172, + "step": 3999 + }, + { + "epoch": 0.1225940909648155, + "grad_norm": 1.540403955957636, + "learning_rate": 1.9553733513913896e-05, + "loss": 0.8017, + "step": 4000 + }, + { + "epoch": 0.1226247394875567, + "grad_norm": 1.4923144785346387, + "learning_rate": 1.9553440241260585e-05, + "loss": 0.7833, + "step": 4001 + }, + { + "epoch": 0.1226553880102979, + "grad_norm": 1.5680116718740589, + "learning_rate": 1.9553146874474782e-05, + "loss": 0.7864, + "step": 4002 + }, + { + "epoch": 0.12268603653303911, + "grad_norm": 1.7514255770730989, + "learning_rate": 1.9552853413559376e-05, + "loss": 0.8987, + "step": 4003 + }, + { + "epoch": 0.12271668505578032, + "grad_norm": 1.8779660453290121, + "learning_rate": 1.9552559858517265e-05, + "loss": 0.8026, + "step": 4004 + }, + { + "epoch": 0.12274733357852151, + "grad_norm": 1.7476089128871886, + "learning_rate": 1.9552266209351335e-05, + "loss": 0.8566, + "step": 4005 + }, + { + "epoch": 0.12277798210126271, + "grad_norm": 1.6398998377938927, + "learning_rate": 1.9551972466064482e-05, + "loss": 0.817, + "step": 4006 + }, + { + "epoch": 0.12280863062400392, + "grad_norm": 1.632800660278307, + "learning_rate": 1.95516786286596e-05, + "loss": 0.8496, + "step": 4007 + }, + { + "epoch": 0.12283927914674513, + "grad_norm": 0.83023662756855, + "learning_rate": 1.9551384697139585e-05, + "loss": 0.6168, + "step": 4008 + }, + { + "epoch": 0.12286992766948633, + "grad_norm": 1.710807556694041, + "learning_rate": 1.9551090671507333e-05, + "loss": 0.9088, + "step": 4009 + }, + { + "epoch": 0.12290057619222754, + "grad_norm": 1.426681737649576, + "learning_rate": 1.955079655176574e-05, + "loss": 0.7753, + "step": 4010 + }, + { + "epoch": 0.12293122471496874, + "grad_norm": 1.7376454610859984, + "learning_rate": 1.9550502337917707e-05, + "loss": 0.771, + "step": 4011 + }, + { + "epoch": 0.12296187323770995, + "grad_norm": 1.6328601463042556, + "learning_rate": 1.955020802996613e-05, + "loss": 0.7909, + "step": 4012 + }, + { + "epoch": 0.12299252176045114, + "grad_norm": 1.6180178304069586, + "learning_rate": 1.954991362791391e-05, + "loss": 0.8149, + "step": 4013 + }, + { + "epoch": 0.12302317028319235, + "grad_norm": 0.7765344761929401, + "learning_rate": 1.9549619131763946e-05, + "loss": 0.6665, + "step": 4014 + }, + { + "epoch": 0.12305381880593355, + "grad_norm": 1.8763815154437942, + "learning_rate": 1.9549324541519142e-05, + "loss": 0.8462, + "step": 4015 + }, + { + "epoch": 0.12308446732867476, + "grad_norm": 1.9116703712844354, + "learning_rate": 1.95490298571824e-05, + "loss": 0.8905, + "step": 4016 + }, + { + "epoch": 0.12311511585141596, + "grad_norm": 1.6819806816315168, + "learning_rate": 1.9548735078756626e-05, + "loss": 0.7643, + "step": 4017 + }, + { + "epoch": 0.12314576437415717, + "grad_norm": 1.7940076589393574, + "learning_rate": 1.954844020624472e-05, + "loss": 0.8691, + "step": 4018 + }, + { + "epoch": 0.12317641289689837, + "grad_norm": 1.6014694129393616, + "learning_rate": 1.9548145239649588e-05, + "loss": 0.8108, + "step": 4019 + }, + { + "epoch": 0.12320706141963958, + "grad_norm": 1.6389088805504386, + "learning_rate": 1.9547850178974138e-05, + "loss": 0.7484, + "step": 4020 + }, + { + "epoch": 0.12323770994238077, + "grad_norm": 1.7805921193016538, + "learning_rate": 1.9547555024221282e-05, + "loss": 0.8095, + "step": 4021 + }, + { + "epoch": 0.12326835846512198, + "grad_norm": 1.827674777779938, + "learning_rate": 1.954725977539392e-05, + "loss": 0.7633, + "step": 4022 + }, + { + "epoch": 0.12329900698786318, + "grad_norm": 1.714319871693845, + "learning_rate": 1.9546964432494964e-05, + "loss": 0.8677, + "step": 4023 + }, + { + "epoch": 0.12332965551060439, + "grad_norm": 1.6349098653340817, + "learning_rate": 1.9546668995527326e-05, + "loss": 0.9073, + "step": 4024 + }, + { + "epoch": 0.1233603040333456, + "grad_norm": 0.7589077433925878, + "learning_rate": 1.9546373464493914e-05, + "loss": 0.6588, + "step": 4025 + }, + { + "epoch": 0.1233909525560868, + "grad_norm": 1.7231689819630756, + "learning_rate": 1.9546077839397643e-05, + "loss": 0.7809, + "step": 4026 + }, + { + "epoch": 0.123421601078828, + "grad_norm": 1.6658206669746496, + "learning_rate": 1.9545782120241425e-05, + "loss": 0.7295, + "step": 4027 + }, + { + "epoch": 0.12345224960156921, + "grad_norm": 1.7152773002230215, + "learning_rate": 1.9545486307028176e-05, + "loss": 0.7854, + "step": 4028 + }, + { + "epoch": 0.1234828981243104, + "grad_norm": 0.69194568131816, + "learning_rate": 1.9545190399760804e-05, + "loss": 0.5895, + "step": 4029 + }, + { + "epoch": 0.12351354664705161, + "grad_norm": 1.6247777270091879, + "learning_rate": 1.954489439844223e-05, + "loss": 0.7955, + "step": 4030 + }, + { + "epoch": 0.12354419516979281, + "grad_norm": 1.6797510154722126, + "learning_rate": 1.954459830307537e-05, + "loss": 0.8229, + "step": 4031 + }, + { + "epoch": 0.12357484369253402, + "grad_norm": 1.7005820095661712, + "learning_rate": 1.954430211366314e-05, + "loss": 0.8353, + "step": 4032 + }, + { + "epoch": 0.12360549221527523, + "grad_norm": 0.7359170884123694, + "learning_rate": 1.9544005830208455e-05, + "loss": 0.6164, + "step": 4033 + }, + { + "epoch": 0.12363614073801643, + "grad_norm": 1.7148643607344392, + "learning_rate": 1.9543709452714247e-05, + "loss": 0.8207, + "step": 4034 + }, + { + "epoch": 0.12366678926075764, + "grad_norm": 1.4952359593125868, + "learning_rate": 1.9543412981183423e-05, + "loss": 0.9229, + "step": 4035 + }, + { + "epoch": 0.12369743778349883, + "grad_norm": 1.6077496136905711, + "learning_rate": 1.954311641561891e-05, + "loss": 0.7855, + "step": 4036 + }, + { + "epoch": 0.12372808630624003, + "grad_norm": 1.5369479372761214, + "learning_rate": 1.954281975602363e-05, + "loss": 0.8646, + "step": 4037 + }, + { + "epoch": 0.12375873482898124, + "grad_norm": 1.468067495623188, + "learning_rate": 1.9542523002400502e-05, + "loss": 0.6916, + "step": 4038 + }, + { + "epoch": 0.12378938335172245, + "grad_norm": 1.6148043478307164, + "learning_rate": 1.9542226154752457e-05, + "loss": 0.8924, + "step": 4039 + }, + { + "epoch": 0.12382003187446365, + "grad_norm": 1.6127827624257138, + "learning_rate": 1.9541929213082416e-05, + "loss": 0.8464, + "step": 4040 + }, + { + "epoch": 0.12385068039720486, + "grad_norm": 1.6597931890380637, + "learning_rate": 1.9541632177393304e-05, + "loss": 0.9376, + "step": 4041 + }, + { + "epoch": 0.12388132891994606, + "grad_norm": 1.4083408435689622, + "learning_rate": 1.9541335047688048e-05, + "loss": 0.7874, + "step": 4042 + }, + { + "epoch": 0.12391197744268727, + "grad_norm": 1.532088093866763, + "learning_rate": 1.954103782396958e-05, + "loss": 0.8708, + "step": 4043 + }, + { + "epoch": 0.12394262596542846, + "grad_norm": 1.672090545401422, + "learning_rate": 1.9540740506240822e-05, + "loss": 0.7249, + "step": 4044 + }, + { + "epoch": 0.12397327448816967, + "grad_norm": 1.8138431059450792, + "learning_rate": 1.9540443094504707e-05, + "loss": 0.8547, + "step": 4045 + }, + { + "epoch": 0.12400392301091087, + "grad_norm": 1.7358173240492272, + "learning_rate": 1.9540145588764164e-05, + "loss": 0.9122, + "step": 4046 + }, + { + "epoch": 0.12403457153365208, + "grad_norm": 1.4453772720130142, + "learning_rate": 1.9539847989022128e-05, + "loss": 0.7872, + "step": 4047 + }, + { + "epoch": 0.12406522005639328, + "grad_norm": 1.41036823549344, + "learning_rate": 1.9539550295281525e-05, + "loss": 0.8096, + "step": 4048 + }, + { + "epoch": 0.12409586857913449, + "grad_norm": 2.104402529747835, + "learning_rate": 1.9539252507545296e-05, + "loss": 0.8603, + "step": 4049 + }, + { + "epoch": 0.1241265171018757, + "grad_norm": 1.75914968652853, + "learning_rate": 1.9538954625816373e-05, + "loss": 0.8093, + "step": 4050 + }, + { + "epoch": 0.1241571656246169, + "grad_norm": 1.837802788231522, + "learning_rate": 1.9538656650097688e-05, + "loss": 0.7769, + "step": 4051 + }, + { + "epoch": 0.12418781414735809, + "grad_norm": 1.862513897529821, + "learning_rate": 1.9538358580392177e-05, + "loss": 0.8817, + "step": 4052 + }, + { + "epoch": 0.1242184626700993, + "grad_norm": 1.6090991426354269, + "learning_rate": 1.9538060416702777e-05, + "loss": 0.7742, + "step": 4053 + }, + { + "epoch": 0.1242491111928405, + "grad_norm": 1.7133802632989301, + "learning_rate": 1.953776215903243e-05, + "loss": 0.9023, + "step": 4054 + }, + { + "epoch": 0.12427975971558171, + "grad_norm": 0.81436497025808, + "learning_rate": 1.953746380738407e-05, + "loss": 0.6387, + "step": 4055 + }, + { + "epoch": 0.12431040823832291, + "grad_norm": 1.7281094447141938, + "learning_rate": 1.953716536176064e-05, + "loss": 0.8375, + "step": 4056 + }, + { + "epoch": 0.12434105676106412, + "grad_norm": 1.7959418964194214, + "learning_rate": 1.953686682216508e-05, + "loss": 0.9202, + "step": 4057 + }, + { + "epoch": 0.12437170528380533, + "grad_norm": 1.8893814901844648, + "learning_rate": 1.953656818860033e-05, + "loss": 0.8737, + "step": 4058 + }, + { + "epoch": 0.12440235380654653, + "grad_norm": 1.6749941737675518, + "learning_rate": 1.9536269461069334e-05, + "loss": 0.8051, + "step": 4059 + }, + { + "epoch": 0.12443300232928772, + "grad_norm": 1.5630185264826884, + "learning_rate": 1.9535970639575038e-05, + "loss": 0.8189, + "step": 4060 + }, + { + "epoch": 0.12446365085202893, + "grad_norm": 1.6981907786164476, + "learning_rate": 1.9535671724120376e-05, + "loss": 0.8307, + "step": 4061 + }, + { + "epoch": 0.12449429937477013, + "grad_norm": 1.5089863434459443, + "learning_rate": 1.9535372714708308e-05, + "loss": 0.9002, + "step": 4062 + }, + { + "epoch": 0.12452494789751134, + "grad_norm": 1.8564478592797247, + "learning_rate": 1.953507361134177e-05, + "loss": 0.8884, + "step": 4063 + }, + { + "epoch": 0.12455559642025255, + "grad_norm": 0.8015896304776964, + "learning_rate": 1.953477441402371e-05, + "loss": 0.665, + "step": 4064 + }, + { + "epoch": 0.12458624494299375, + "grad_norm": 1.916132455368016, + "learning_rate": 1.9534475122757082e-05, + "loss": 0.8438, + "step": 4065 + }, + { + "epoch": 0.12461689346573496, + "grad_norm": 1.6459486967311716, + "learning_rate": 1.953417573754483e-05, + "loss": 0.7771, + "step": 4066 + }, + { + "epoch": 0.12464754198847615, + "grad_norm": 1.9980824078635904, + "learning_rate": 1.9533876258389905e-05, + "loss": 0.8686, + "step": 4067 + }, + { + "epoch": 0.12467819051121735, + "grad_norm": 1.5221814438963168, + "learning_rate": 1.953357668529526e-05, + "loss": 0.9107, + "step": 4068 + }, + { + "epoch": 0.12470883903395856, + "grad_norm": 0.7410695791689245, + "learning_rate": 1.9533277018263838e-05, + "loss": 0.6481, + "step": 4069 + }, + { + "epoch": 0.12473948755669977, + "grad_norm": 1.6734099110457838, + "learning_rate": 1.9532977257298605e-05, + "loss": 0.7995, + "step": 4070 + }, + { + "epoch": 0.12477013607944097, + "grad_norm": 1.8062052955160266, + "learning_rate": 1.9532677402402504e-05, + "loss": 0.8768, + "step": 4071 + }, + { + "epoch": 0.12480078460218218, + "grad_norm": 1.8039357741615238, + "learning_rate": 1.9532377453578496e-05, + "loss": 0.853, + "step": 4072 + }, + { + "epoch": 0.12483143312492338, + "grad_norm": 1.5837601477443977, + "learning_rate": 1.9532077410829532e-05, + "loss": 0.8219, + "step": 4073 + }, + { + "epoch": 0.12486208164766459, + "grad_norm": 1.644974321276689, + "learning_rate": 1.9531777274158573e-05, + "loss": 0.7705, + "step": 4074 + }, + { + "epoch": 0.12489273017040578, + "grad_norm": 1.5037925791557574, + "learning_rate": 1.953147704356857e-05, + "loss": 0.7995, + "step": 4075 + }, + { + "epoch": 0.12492337869314699, + "grad_norm": 1.5405064980071437, + "learning_rate": 1.9531176719062486e-05, + "loss": 0.7797, + "step": 4076 + }, + { + "epoch": 0.12495402721588819, + "grad_norm": 1.7551689505994659, + "learning_rate": 1.953087630064328e-05, + "loss": 0.8567, + "step": 4077 + }, + { + "epoch": 0.1249846757386294, + "grad_norm": 1.627500435134118, + "learning_rate": 1.9530575788313913e-05, + "loss": 0.7251, + "step": 4078 + }, + { + "epoch": 0.1250153242613706, + "grad_norm": 1.536748008770103, + "learning_rate": 1.9530275182077342e-05, + "loss": 0.7222, + "step": 4079 + }, + { + "epoch": 0.1250459727841118, + "grad_norm": 1.6331199579721931, + "learning_rate": 1.9529974481936532e-05, + "loss": 0.8027, + "step": 4080 + }, + { + "epoch": 0.12507662130685301, + "grad_norm": 0.8169037262908496, + "learning_rate": 1.9529673687894443e-05, + "loss": 0.6411, + "step": 4081 + }, + { + "epoch": 0.12510726982959422, + "grad_norm": 1.5836809350984875, + "learning_rate": 1.9529372799954043e-05, + "loss": 0.931, + "step": 4082 + }, + { + "epoch": 0.12513791835233543, + "grad_norm": 1.5412035238441757, + "learning_rate": 1.9529071818118295e-05, + "loss": 0.8921, + "step": 4083 + }, + { + "epoch": 0.12516856687507663, + "grad_norm": 1.624174563684798, + "learning_rate": 1.9528770742390165e-05, + "loss": 0.8642, + "step": 4084 + }, + { + "epoch": 0.12519921539781784, + "grad_norm": 1.546547974754482, + "learning_rate": 1.9528469572772616e-05, + "loss": 0.8105, + "step": 4085 + }, + { + "epoch": 0.12522986392055904, + "grad_norm": 1.5516781739497802, + "learning_rate": 1.9528168309268622e-05, + "loss": 0.8234, + "step": 4086 + }, + { + "epoch": 0.12526051244330022, + "grad_norm": 1.8390497646473916, + "learning_rate": 1.9527866951881142e-05, + "loss": 0.8569, + "step": 4087 + }, + { + "epoch": 0.12529116096604143, + "grad_norm": 1.7122438426834057, + "learning_rate": 1.9527565500613155e-05, + "loss": 0.8916, + "step": 4088 + }, + { + "epoch": 0.12532180948878263, + "grad_norm": 1.5911626161126222, + "learning_rate": 1.952726395546763e-05, + "loss": 0.8212, + "step": 4089 + }, + { + "epoch": 0.12535245801152384, + "grad_norm": 1.5930338606495007, + "learning_rate": 1.952696231644753e-05, + "loss": 0.8405, + "step": 4090 + }, + { + "epoch": 0.12538310653426504, + "grad_norm": 1.598477489818102, + "learning_rate": 1.9526660583555835e-05, + "loss": 0.6965, + "step": 4091 + }, + { + "epoch": 0.12541375505700625, + "grad_norm": 1.4613751988404864, + "learning_rate": 1.9526358756795517e-05, + "loss": 0.7564, + "step": 4092 + }, + { + "epoch": 0.12544440357974745, + "grad_norm": 1.5219907224948017, + "learning_rate": 1.9526056836169545e-05, + "loss": 0.8606, + "step": 4093 + }, + { + "epoch": 0.12547505210248866, + "grad_norm": 1.607942853460475, + "learning_rate": 1.95257548216809e-05, + "loss": 0.8976, + "step": 4094 + }, + { + "epoch": 0.12550570062522987, + "grad_norm": 1.6746871558851664, + "learning_rate": 1.9525452713332557e-05, + "loss": 0.8961, + "step": 4095 + }, + { + "epoch": 0.12553634914797107, + "grad_norm": 1.8893571201327253, + "learning_rate": 1.9525150511127494e-05, + "loss": 0.8736, + "step": 4096 + }, + { + "epoch": 0.12556699767071228, + "grad_norm": 1.63686388004072, + "learning_rate": 1.952484821506868e-05, + "loss": 0.8602, + "step": 4097 + }, + { + "epoch": 0.12559764619345348, + "grad_norm": 1.7281179361868961, + "learning_rate": 1.9524545825159103e-05, + "loss": 0.7446, + "step": 4098 + }, + { + "epoch": 0.1256282947161947, + "grad_norm": 0.8547732985941849, + "learning_rate": 1.9524243341401735e-05, + "loss": 0.6773, + "step": 4099 + }, + { + "epoch": 0.1256589432389359, + "grad_norm": 1.667415235601382, + "learning_rate": 1.9523940763799564e-05, + "loss": 0.8892, + "step": 4100 + }, + { + "epoch": 0.1256895917616771, + "grad_norm": 1.5023000299809461, + "learning_rate": 1.9523638092355564e-05, + "loss": 0.7652, + "step": 4101 + }, + { + "epoch": 0.12572024028441828, + "grad_norm": 1.8612647678146106, + "learning_rate": 1.9523335327072725e-05, + "loss": 0.8947, + "step": 4102 + }, + { + "epoch": 0.12575088880715948, + "grad_norm": 1.7618360286364445, + "learning_rate": 1.9523032467954028e-05, + "loss": 0.7701, + "step": 4103 + }, + { + "epoch": 0.1257815373299007, + "grad_norm": 1.6855340872587976, + "learning_rate": 1.9522729515002454e-05, + "loss": 0.8241, + "step": 4104 + }, + { + "epoch": 0.1258121858526419, + "grad_norm": 1.8061364841750873, + "learning_rate": 1.9522426468220988e-05, + "loss": 0.7377, + "step": 4105 + }, + { + "epoch": 0.1258428343753831, + "grad_norm": 1.7116818165934808, + "learning_rate": 1.9522123327612615e-05, + "loss": 0.7506, + "step": 4106 + }, + { + "epoch": 0.1258734828981243, + "grad_norm": 1.6927657229325581, + "learning_rate": 1.9521820093180327e-05, + "loss": 0.8775, + "step": 4107 + }, + { + "epoch": 0.1259041314208655, + "grad_norm": 1.5567744076831307, + "learning_rate": 1.952151676492711e-05, + "loss": 0.7946, + "step": 4108 + }, + { + "epoch": 0.12593477994360672, + "grad_norm": 1.7069494393117304, + "learning_rate": 1.9521213342855953e-05, + "loss": 0.7568, + "step": 4109 + }, + { + "epoch": 0.12596542846634792, + "grad_norm": 1.7390245605311843, + "learning_rate": 1.9520909826969846e-05, + "loss": 0.7496, + "step": 4110 + }, + { + "epoch": 0.12599607698908913, + "grad_norm": 0.9495225240930092, + "learning_rate": 1.9520606217271775e-05, + "loss": 0.655, + "step": 4111 + }, + { + "epoch": 0.12602672551183033, + "grad_norm": 0.7840147806733866, + "learning_rate": 1.9520302513764736e-05, + "loss": 0.6411, + "step": 4112 + }, + { + "epoch": 0.12605737403457154, + "grad_norm": 1.7931502936524986, + "learning_rate": 1.9519998716451723e-05, + "loss": 0.8847, + "step": 4113 + }, + { + "epoch": 0.12608802255731275, + "grad_norm": 1.6628634319627882, + "learning_rate": 1.9519694825335723e-05, + "loss": 0.8664, + "step": 4114 + }, + { + "epoch": 0.12611867108005395, + "grad_norm": 1.7670459446173277, + "learning_rate": 1.9519390840419735e-05, + "loss": 0.8735, + "step": 4115 + }, + { + "epoch": 0.12614931960279516, + "grad_norm": 1.9315257627967337, + "learning_rate": 1.9519086761706757e-05, + "loss": 0.7331, + "step": 4116 + }, + { + "epoch": 0.12617996812553636, + "grad_norm": 1.578538156583294, + "learning_rate": 1.9518782589199778e-05, + "loss": 0.802, + "step": 4117 + }, + { + "epoch": 0.12621061664827754, + "grad_norm": 1.6601767386696478, + "learning_rate": 1.95184783229018e-05, + "loss": 0.7894, + "step": 4118 + }, + { + "epoch": 0.12624126517101875, + "grad_norm": 1.8065327548321897, + "learning_rate": 1.9518173962815817e-05, + "loss": 0.9007, + "step": 4119 + }, + { + "epoch": 0.12627191369375995, + "grad_norm": 1.7414671730950089, + "learning_rate": 1.9517869508944835e-05, + "loss": 0.775, + "step": 4120 + }, + { + "epoch": 0.12630256221650116, + "grad_norm": 1.8791820167883095, + "learning_rate": 1.9517564961291846e-05, + "loss": 0.8496, + "step": 4121 + }, + { + "epoch": 0.12633321073924236, + "grad_norm": 1.7627021478258644, + "learning_rate": 1.9517260319859855e-05, + "loss": 0.7929, + "step": 4122 + }, + { + "epoch": 0.12636385926198357, + "grad_norm": 1.630490828173011, + "learning_rate": 1.9516955584651864e-05, + "loss": 0.9072, + "step": 4123 + }, + { + "epoch": 0.12639450778472477, + "grad_norm": 1.6350801897447835, + "learning_rate": 1.9516650755670875e-05, + "loss": 0.7905, + "step": 4124 + }, + { + "epoch": 0.12642515630746598, + "grad_norm": 2.0223378609001266, + "learning_rate": 1.951634583291989e-05, + "loss": 0.9512, + "step": 4125 + }, + { + "epoch": 0.12645580483020719, + "grad_norm": 1.6593294821239681, + "learning_rate": 1.9516040816401912e-05, + "loss": 0.6969, + "step": 4126 + }, + { + "epoch": 0.1264864533529484, + "grad_norm": 1.6151891877112712, + "learning_rate": 1.9515735706119952e-05, + "loss": 0.7491, + "step": 4127 + }, + { + "epoch": 0.1265171018756896, + "grad_norm": 1.5862123754393913, + "learning_rate": 1.9515430502077016e-05, + "loss": 0.7958, + "step": 4128 + }, + { + "epoch": 0.1265477503984308, + "grad_norm": 1.456032557101382, + "learning_rate": 1.9515125204276107e-05, + "loss": 0.8272, + "step": 4129 + }, + { + "epoch": 0.126578398921172, + "grad_norm": 1.520734040971708, + "learning_rate": 1.9514819812720232e-05, + "loss": 0.7911, + "step": 4130 + }, + { + "epoch": 0.12660904744391321, + "grad_norm": 1.5673796665134618, + "learning_rate": 1.9514514327412406e-05, + "loss": 0.9236, + "step": 4131 + }, + { + "epoch": 0.12663969596665442, + "grad_norm": 1.6648617987608623, + "learning_rate": 1.9514208748355634e-05, + "loss": 0.8609, + "step": 4132 + }, + { + "epoch": 0.1266703444893956, + "grad_norm": 1.4864642226485898, + "learning_rate": 1.9513903075552928e-05, + "loss": 0.8088, + "step": 4133 + }, + { + "epoch": 0.1267009930121368, + "grad_norm": 1.6388334318094109, + "learning_rate": 1.9513597309007303e-05, + "loss": 0.7973, + "step": 4134 + }, + { + "epoch": 0.126731641534878, + "grad_norm": 1.6724903555932362, + "learning_rate": 1.951329144872177e-05, + "loss": 0.7331, + "step": 4135 + }, + { + "epoch": 0.12676229005761921, + "grad_norm": 1.4592389079686212, + "learning_rate": 1.951298549469934e-05, + "loss": 0.7715, + "step": 4136 + }, + { + "epoch": 0.12679293858036042, + "grad_norm": 1.6377262704052782, + "learning_rate": 1.9512679446943033e-05, + "loss": 0.8781, + "step": 4137 + }, + { + "epoch": 0.12682358710310163, + "grad_norm": 1.4780580737910762, + "learning_rate": 1.951237330545586e-05, + "loss": 0.7159, + "step": 4138 + }, + { + "epoch": 0.12685423562584283, + "grad_norm": 1.7870373413272636, + "learning_rate": 1.951206707024084e-05, + "loss": 0.8475, + "step": 4139 + }, + { + "epoch": 0.12688488414858404, + "grad_norm": 1.7891948294381144, + "learning_rate": 1.9511760741300985e-05, + "loss": 0.7479, + "step": 4140 + }, + { + "epoch": 0.12691553267132524, + "grad_norm": 1.6174342199359182, + "learning_rate": 1.9511454318639323e-05, + "loss": 0.8596, + "step": 4141 + }, + { + "epoch": 0.12694618119406645, + "grad_norm": 1.6863177156388767, + "learning_rate": 1.9511147802258862e-05, + "loss": 0.7832, + "step": 4142 + }, + { + "epoch": 0.12697682971680765, + "grad_norm": 1.69648888820073, + "learning_rate": 1.9510841192162633e-05, + "loss": 0.8646, + "step": 4143 + }, + { + "epoch": 0.12700747823954886, + "grad_norm": 1.5704200379900255, + "learning_rate": 1.9510534488353653e-05, + "loss": 0.895, + "step": 4144 + }, + { + "epoch": 0.12703812676229007, + "grad_norm": 1.7455188103717372, + "learning_rate": 1.951022769083494e-05, + "loss": 0.8842, + "step": 4145 + }, + { + "epoch": 0.12706877528503127, + "grad_norm": 1.3770584154750942, + "learning_rate": 1.950992079960952e-05, + "loss": 0.7836, + "step": 4146 + }, + { + "epoch": 0.12709942380777248, + "grad_norm": 1.8486069696756007, + "learning_rate": 1.950961381468042e-05, + "loss": 0.8359, + "step": 4147 + }, + { + "epoch": 0.12713007233051368, + "grad_norm": 1.6605360199363153, + "learning_rate": 1.950930673605066e-05, + "loss": 0.7495, + "step": 4148 + }, + { + "epoch": 0.12716072085325486, + "grad_norm": 1.78801135723447, + "learning_rate": 1.950899956372327e-05, + "loss": 0.8028, + "step": 4149 + }, + { + "epoch": 0.12719136937599607, + "grad_norm": 1.5533832757746677, + "learning_rate": 1.950869229770127e-05, + "loss": 0.8537, + "step": 4150 + }, + { + "epoch": 0.12722201789873727, + "grad_norm": 1.6811249006230486, + "learning_rate": 1.9508384937987698e-05, + "loss": 0.8274, + "step": 4151 + }, + { + "epoch": 0.12725266642147848, + "grad_norm": 1.5698302199585807, + "learning_rate": 1.950807748458557e-05, + "loss": 0.8617, + "step": 4152 + }, + { + "epoch": 0.12728331494421968, + "grad_norm": 1.8346557841318103, + "learning_rate": 1.9507769937497928e-05, + "loss": 0.8851, + "step": 4153 + }, + { + "epoch": 0.1273139634669609, + "grad_norm": 1.83574266260754, + "learning_rate": 1.9507462296727793e-05, + "loss": 0.8285, + "step": 4154 + }, + { + "epoch": 0.1273446119897021, + "grad_norm": 1.7611013913340248, + "learning_rate": 1.95071545622782e-05, + "loss": 0.9048, + "step": 4155 + }, + { + "epoch": 0.1273752605124433, + "grad_norm": 1.608582555830635, + "learning_rate": 1.9506846734152177e-05, + "loss": 0.7694, + "step": 4156 + }, + { + "epoch": 0.1274059090351845, + "grad_norm": 1.7386075940262729, + "learning_rate": 1.9506538812352763e-05, + "loss": 0.8779, + "step": 4157 + }, + { + "epoch": 0.1274365575579257, + "grad_norm": 1.5786661132690032, + "learning_rate": 1.950623079688299e-05, + "loss": 0.8153, + "step": 4158 + }, + { + "epoch": 0.12746720608066692, + "grad_norm": 1.5862189343383477, + "learning_rate": 1.9505922687745894e-05, + "loss": 0.8234, + "step": 4159 + }, + { + "epoch": 0.12749785460340812, + "grad_norm": 1.518178795394562, + "learning_rate": 1.950561448494451e-05, + "loss": 0.8174, + "step": 4160 + }, + { + "epoch": 0.12752850312614933, + "grad_norm": 1.4453929726069448, + "learning_rate": 1.950530618848187e-05, + "loss": 0.7255, + "step": 4161 + }, + { + "epoch": 0.12755915164889053, + "grad_norm": 0.8985342549254299, + "learning_rate": 1.9504997798361024e-05, + "loss": 0.6728, + "step": 4162 + }, + { + "epoch": 0.12758980017163174, + "grad_norm": 1.5597092516822701, + "learning_rate": 1.9504689314584994e-05, + "loss": 0.7905, + "step": 4163 + }, + { + "epoch": 0.12762044869437292, + "grad_norm": 0.7421218478216611, + "learning_rate": 1.950438073715683e-05, + "loss": 0.6601, + "step": 4164 + }, + { + "epoch": 0.12765109721711412, + "grad_norm": 1.8385873018632457, + "learning_rate": 1.9504072066079576e-05, + "loss": 0.8577, + "step": 4165 + }, + { + "epoch": 0.12768174573985533, + "grad_norm": 0.7622212181586402, + "learning_rate": 1.9503763301356264e-05, + "loss": 0.6408, + "step": 4166 + }, + { + "epoch": 0.12771239426259653, + "grad_norm": 1.8207314558005452, + "learning_rate": 1.9503454442989942e-05, + "loss": 0.8666, + "step": 4167 + }, + { + "epoch": 0.12774304278533774, + "grad_norm": 1.8583089853996764, + "learning_rate": 1.9503145490983654e-05, + "loss": 0.8963, + "step": 4168 + }, + { + "epoch": 0.12777369130807895, + "grad_norm": 0.8176034272204614, + "learning_rate": 1.9502836445340438e-05, + "loss": 0.6697, + "step": 4169 + }, + { + "epoch": 0.12780433983082015, + "grad_norm": 1.8210024806724625, + "learning_rate": 1.9502527306063347e-05, + "loss": 0.9334, + "step": 4170 + }, + { + "epoch": 0.12783498835356136, + "grad_norm": 1.7629375722541751, + "learning_rate": 1.9502218073155417e-05, + "loss": 0.8148, + "step": 4171 + }, + { + "epoch": 0.12786563687630256, + "grad_norm": 1.6362964213990834, + "learning_rate": 1.9501908746619708e-05, + "loss": 0.8918, + "step": 4172 + }, + { + "epoch": 0.12789628539904377, + "grad_norm": 1.6962608392575305, + "learning_rate": 1.9501599326459255e-05, + "loss": 0.8462, + "step": 4173 + }, + { + "epoch": 0.12792693392178497, + "grad_norm": 1.7631433374714902, + "learning_rate": 1.9501289812677117e-05, + "loss": 0.8706, + "step": 4174 + }, + { + "epoch": 0.12795758244452618, + "grad_norm": 1.7719499071636855, + "learning_rate": 1.9500980205276338e-05, + "loss": 0.8109, + "step": 4175 + }, + { + "epoch": 0.12798823096726739, + "grad_norm": 1.7438281782626865, + "learning_rate": 1.950067050425997e-05, + "loss": 0.7829, + "step": 4176 + }, + { + "epoch": 0.1280188794900086, + "grad_norm": 1.5386539280527485, + "learning_rate": 1.9500360709631062e-05, + "loss": 0.7944, + "step": 4177 + }, + { + "epoch": 0.1280495280127498, + "grad_norm": 1.5428866114917328, + "learning_rate": 1.9500050821392674e-05, + "loss": 0.8704, + "step": 4178 + }, + { + "epoch": 0.128080176535491, + "grad_norm": 1.545130772905461, + "learning_rate": 1.949974083954785e-05, + "loss": 0.8008, + "step": 4179 + }, + { + "epoch": 0.12811082505823218, + "grad_norm": 1.647933626879715, + "learning_rate": 1.9499430764099654e-05, + "loss": 0.7352, + "step": 4180 + }, + { + "epoch": 0.1281414735809734, + "grad_norm": 1.7123948613688043, + "learning_rate": 1.9499120595051134e-05, + "loss": 0.8468, + "step": 4181 + }, + { + "epoch": 0.1281721221037146, + "grad_norm": 1.6580488341619835, + "learning_rate": 1.9498810332405345e-05, + "loss": 0.8076, + "step": 4182 + }, + { + "epoch": 0.1282027706264558, + "grad_norm": 1.6274029291960392, + "learning_rate": 1.9498499976165353e-05, + "loss": 0.8721, + "step": 4183 + }, + { + "epoch": 0.128233419149197, + "grad_norm": 0.8169269781520785, + "learning_rate": 1.9498189526334207e-05, + "loss": 0.639, + "step": 4184 + }, + { + "epoch": 0.1282640676719382, + "grad_norm": 1.40415921267325, + "learning_rate": 1.949787898291497e-05, + "loss": 0.7694, + "step": 4185 + }, + { + "epoch": 0.12829471619467941, + "grad_norm": 1.5793149738093322, + "learning_rate": 1.94975683459107e-05, + "loss": 0.765, + "step": 4186 + }, + { + "epoch": 0.12832536471742062, + "grad_norm": 1.645070057199503, + "learning_rate": 1.949725761532446e-05, + "loss": 0.9252, + "step": 4187 + }, + { + "epoch": 0.12835601324016183, + "grad_norm": 0.744574657024824, + "learning_rate": 1.9496946791159312e-05, + "loss": 0.6391, + "step": 4188 + }, + { + "epoch": 0.12838666176290303, + "grad_norm": 1.347904205716357, + "learning_rate": 1.9496635873418316e-05, + "loss": 0.7865, + "step": 4189 + }, + { + "epoch": 0.12841731028564424, + "grad_norm": 1.661020715027584, + "learning_rate": 1.9496324862104537e-05, + "loss": 0.8803, + "step": 4190 + }, + { + "epoch": 0.12844795880838544, + "grad_norm": 1.6577230577323578, + "learning_rate": 1.949601375722104e-05, + "loss": 0.8673, + "step": 4191 + }, + { + "epoch": 0.12847860733112665, + "grad_norm": 0.7363988210493431, + "learning_rate": 1.949570255877089e-05, + "loss": 0.6585, + "step": 4192 + }, + { + "epoch": 0.12850925585386785, + "grad_norm": 1.5628584973810202, + "learning_rate": 1.9495391266757152e-05, + "loss": 0.8271, + "step": 4193 + }, + { + "epoch": 0.12853990437660906, + "grad_norm": 1.8075203371854731, + "learning_rate": 1.9495079881182898e-05, + "loss": 0.8239, + "step": 4194 + }, + { + "epoch": 0.12857055289935024, + "grad_norm": 1.5248481582750073, + "learning_rate": 1.9494768402051186e-05, + "loss": 0.8045, + "step": 4195 + }, + { + "epoch": 0.12860120142209144, + "grad_norm": 1.4190370085426067, + "learning_rate": 1.9494456829365094e-05, + "loss": 0.8039, + "step": 4196 + }, + { + "epoch": 0.12863184994483265, + "grad_norm": 1.546171038328423, + "learning_rate": 1.949414516312769e-05, + "loss": 0.7639, + "step": 4197 + }, + { + "epoch": 0.12866249846757385, + "grad_norm": 0.8162476789892285, + "learning_rate": 1.9493833403342046e-05, + "loss": 0.6438, + "step": 4198 + }, + { + "epoch": 0.12869314699031506, + "grad_norm": 0.7793265230769381, + "learning_rate": 1.9493521550011235e-05, + "loss": 0.6619, + "step": 4199 + }, + { + "epoch": 0.12872379551305627, + "grad_norm": 1.724442975067804, + "learning_rate": 1.9493209603138324e-05, + "loss": 0.8561, + "step": 4200 + }, + { + "epoch": 0.12875444403579747, + "grad_norm": 1.6647151132722973, + "learning_rate": 1.949289756272639e-05, + "loss": 0.7992, + "step": 4201 + }, + { + "epoch": 0.12878509255853868, + "grad_norm": 1.6141501131515377, + "learning_rate": 1.9492585428778502e-05, + "loss": 0.8772, + "step": 4202 + }, + { + "epoch": 0.12881574108127988, + "grad_norm": 1.674529491778565, + "learning_rate": 1.949227320129775e-05, + "loss": 0.9102, + "step": 4203 + }, + { + "epoch": 0.1288463896040211, + "grad_norm": 1.7041544855959165, + "learning_rate": 1.9491960880287196e-05, + "loss": 0.802, + "step": 4204 + }, + { + "epoch": 0.1288770381267623, + "grad_norm": 0.8953725588355158, + "learning_rate": 1.9491648465749926e-05, + "loss": 0.644, + "step": 4205 + }, + { + "epoch": 0.1289076866495035, + "grad_norm": 1.585068908711665, + "learning_rate": 1.9491335957689013e-05, + "loss": 0.8043, + "step": 4206 + }, + { + "epoch": 0.1289383351722447, + "grad_norm": 1.689975197076884, + "learning_rate": 1.9491023356107538e-05, + "loss": 0.9383, + "step": 4207 + }, + { + "epoch": 0.1289689836949859, + "grad_norm": 2.068903017896252, + "learning_rate": 1.949071066100858e-05, + "loss": 0.8889, + "step": 4208 + }, + { + "epoch": 0.12899963221772712, + "grad_norm": 0.7298369793786237, + "learning_rate": 1.9490397872395225e-05, + "loss": 0.6075, + "step": 4209 + }, + { + "epoch": 0.12903028074046832, + "grad_norm": 0.7490380979552261, + "learning_rate": 1.949008499027055e-05, + "loss": 0.6501, + "step": 4210 + }, + { + "epoch": 0.1290609292632095, + "grad_norm": 1.7715802533277893, + "learning_rate": 1.9489772014637642e-05, + "loss": 0.9554, + "step": 4211 + }, + { + "epoch": 0.1290915777859507, + "grad_norm": 1.7536421197569396, + "learning_rate": 1.948945894549958e-05, + "loss": 0.9499, + "step": 4212 + }, + { + "epoch": 0.1291222263086919, + "grad_norm": 1.7664530305420323, + "learning_rate": 1.948914578285945e-05, + "loss": 0.9637, + "step": 4213 + }, + { + "epoch": 0.12915287483143312, + "grad_norm": 1.6168463277393457, + "learning_rate": 1.948883252672034e-05, + "loss": 0.8184, + "step": 4214 + }, + { + "epoch": 0.12918352335417432, + "grad_norm": 1.6884624869795977, + "learning_rate": 1.9488519177085333e-05, + "loss": 0.9088, + "step": 4215 + }, + { + "epoch": 0.12921417187691553, + "grad_norm": 1.723060067204662, + "learning_rate": 1.9488205733957523e-05, + "loss": 0.8327, + "step": 4216 + }, + { + "epoch": 0.12924482039965673, + "grad_norm": 1.4572563708054773, + "learning_rate": 1.9487892197339993e-05, + "loss": 0.7356, + "step": 4217 + }, + { + "epoch": 0.12927546892239794, + "grad_norm": 1.6224269485530323, + "learning_rate": 1.948757856723583e-05, + "loss": 0.823, + "step": 4218 + }, + { + "epoch": 0.12930611744513915, + "grad_norm": 1.640169956522226, + "learning_rate": 1.948726484364813e-05, + "loss": 0.8392, + "step": 4219 + }, + { + "epoch": 0.12933676596788035, + "grad_norm": 1.6657058466637493, + "learning_rate": 1.9486951026579986e-05, + "loss": 0.8085, + "step": 4220 + }, + { + "epoch": 0.12936741449062156, + "grad_norm": 0.85099276498136, + "learning_rate": 1.9486637116034483e-05, + "loss": 0.6295, + "step": 4221 + }, + { + "epoch": 0.12939806301336276, + "grad_norm": 1.6888897415677366, + "learning_rate": 1.9486323112014716e-05, + "loss": 0.8324, + "step": 4222 + }, + { + "epoch": 0.12942871153610397, + "grad_norm": 1.6329226812727138, + "learning_rate": 1.948600901452378e-05, + "loss": 0.8391, + "step": 4223 + }, + { + "epoch": 0.12945936005884517, + "grad_norm": 1.7784043445849123, + "learning_rate": 1.948569482356477e-05, + "loss": 0.8092, + "step": 4224 + }, + { + "epoch": 0.12949000858158638, + "grad_norm": 1.6028503232647877, + "learning_rate": 1.9485380539140784e-05, + "loss": 0.7959, + "step": 4225 + }, + { + "epoch": 0.12952065710432756, + "grad_norm": 1.5717700462704618, + "learning_rate": 1.948506616125492e-05, + "loss": 0.8303, + "step": 4226 + }, + { + "epoch": 0.12955130562706876, + "grad_norm": 1.5893402355018185, + "learning_rate": 1.9484751689910263e-05, + "loss": 0.8501, + "step": 4227 + }, + { + "epoch": 0.12958195414980997, + "grad_norm": 1.4438798630168779, + "learning_rate": 1.9484437125109928e-05, + "loss": 0.7922, + "step": 4228 + }, + { + "epoch": 0.12961260267255117, + "grad_norm": 0.8401754287525293, + "learning_rate": 1.9484122466857004e-05, + "loss": 0.6377, + "step": 4229 + }, + { + "epoch": 0.12964325119529238, + "grad_norm": 1.6118000945171038, + "learning_rate": 1.9483807715154597e-05, + "loss": 0.8745, + "step": 4230 + }, + { + "epoch": 0.1296738997180336, + "grad_norm": 1.75805389776501, + "learning_rate": 1.9483492870005808e-05, + "loss": 0.8547, + "step": 4231 + }, + { + "epoch": 0.1297045482407748, + "grad_norm": 0.7085462910082986, + "learning_rate": 1.948317793141373e-05, + "loss": 0.6309, + "step": 4232 + }, + { + "epoch": 0.129735196763516, + "grad_norm": 1.4894321387190101, + "learning_rate": 1.948286289938148e-05, + "loss": 0.82, + "step": 4233 + }, + { + "epoch": 0.1297658452862572, + "grad_norm": 1.6805310569427125, + "learning_rate": 1.9482547773912154e-05, + "loss": 0.8064, + "step": 4234 + }, + { + "epoch": 0.1297964938089984, + "grad_norm": 1.543844848187351, + "learning_rate": 1.9482232555008854e-05, + "loss": 0.77, + "step": 4235 + }, + { + "epoch": 0.12982714233173961, + "grad_norm": 0.7816373218685523, + "learning_rate": 1.9481917242674696e-05, + "loss": 0.6747, + "step": 4236 + }, + { + "epoch": 0.12985779085448082, + "grad_norm": 0.752388900258064, + "learning_rate": 1.948160183691278e-05, + "loss": 0.6611, + "step": 4237 + }, + { + "epoch": 0.12988843937722203, + "grad_norm": 1.9865404081488607, + "learning_rate": 1.9481286337726216e-05, + "loss": 0.9034, + "step": 4238 + }, + { + "epoch": 0.12991908789996323, + "grad_norm": 1.7777514556971903, + "learning_rate": 1.9480970745118112e-05, + "loss": 0.9011, + "step": 4239 + }, + { + "epoch": 0.12994973642270444, + "grad_norm": 1.670500415347567, + "learning_rate": 1.9480655059091575e-05, + "loss": 0.7431, + "step": 4240 + }, + { + "epoch": 0.12998038494544564, + "grad_norm": 0.7767737911483241, + "learning_rate": 1.9480339279649717e-05, + "loss": 0.667, + "step": 4241 + }, + { + "epoch": 0.13001103346818682, + "grad_norm": 1.6089694379531982, + "learning_rate": 1.9480023406795653e-05, + "loss": 0.7758, + "step": 4242 + }, + { + "epoch": 0.13004168199092803, + "grad_norm": 1.6486366364606677, + "learning_rate": 1.9479707440532493e-05, + "loss": 0.9101, + "step": 4243 + }, + { + "epoch": 0.13007233051366923, + "grad_norm": 1.9685784222817413, + "learning_rate": 1.9479391380863348e-05, + "loss": 0.8435, + "step": 4244 + }, + { + "epoch": 0.13010297903641044, + "grad_norm": 1.6402104985414032, + "learning_rate": 1.9479075227791337e-05, + "loss": 0.877, + "step": 4245 + }, + { + "epoch": 0.13013362755915164, + "grad_norm": 1.6261241793969368, + "learning_rate": 1.947875898131957e-05, + "loss": 0.761, + "step": 4246 + }, + { + "epoch": 0.13016427608189285, + "grad_norm": 1.7093685694827174, + "learning_rate": 1.947844264145117e-05, + "loss": 0.8601, + "step": 4247 + }, + { + "epoch": 0.13019492460463405, + "grad_norm": 1.4916641777800195, + "learning_rate": 1.9478126208189243e-05, + "loss": 0.8055, + "step": 4248 + }, + { + "epoch": 0.13022557312737526, + "grad_norm": 1.8369289537656786, + "learning_rate": 1.947780968153692e-05, + "loss": 0.8935, + "step": 4249 + }, + { + "epoch": 0.13025622165011647, + "grad_norm": 1.4552816430565618, + "learning_rate": 1.9477493061497308e-05, + "loss": 0.8408, + "step": 4250 + }, + { + "epoch": 0.13028687017285767, + "grad_norm": 1.53550693271888, + "learning_rate": 1.9477176348073534e-05, + "loss": 0.7771, + "step": 4251 + }, + { + "epoch": 0.13031751869559888, + "grad_norm": 1.6409210746184146, + "learning_rate": 1.9476859541268718e-05, + "loss": 0.8966, + "step": 4252 + }, + { + "epoch": 0.13034816721834008, + "grad_norm": 1.7679217821203368, + "learning_rate": 1.9476542641085977e-05, + "loss": 0.8139, + "step": 4253 + }, + { + "epoch": 0.1303788157410813, + "grad_norm": 1.5000012670255811, + "learning_rate": 1.9476225647528438e-05, + "loss": 0.7363, + "step": 4254 + }, + { + "epoch": 0.1304094642638225, + "grad_norm": 1.540380208438582, + "learning_rate": 1.9475908560599225e-05, + "loss": 0.7601, + "step": 4255 + }, + { + "epoch": 0.1304401127865637, + "grad_norm": 1.5651336654580252, + "learning_rate": 1.9475591380301458e-05, + "loss": 0.7561, + "step": 4256 + }, + { + "epoch": 0.13047076130930488, + "grad_norm": 1.6139881019965838, + "learning_rate": 1.9475274106638265e-05, + "loss": 0.8988, + "step": 4257 + }, + { + "epoch": 0.13050140983204608, + "grad_norm": 1.7027361408139685, + "learning_rate": 1.9474956739612773e-05, + "loss": 0.8155, + "step": 4258 + }, + { + "epoch": 0.1305320583547873, + "grad_norm": 1.5983247402885041, + "learning_rate": 1.9474639279228106e-05, + "loss": 0.8499, + "step": 4259 + }, + { + "epoch": 0.1305627068775285, + "grad_norm": 1.7458994867541098, + "learning_rate": 1.9474321725487394e-05, + "loss": 0.8816, + "step": 4260 + }, + { + "epoch": 0.1305933554002697, + "grad_norm": 1.6125551120044122, + "learning_rate": 1.9474004078393768e-05, + "loss": 0.9499, + "step": 4261 + }, + { + "epoch": 0.1306240039230109, + "grad_norm": 1.5439807285448728, + "learning_rate": 1.947368633795036e-05, + "loss": 0.8615, + "step": 4262 + }, + { + "epoch": 0.1306546524457521, + "grad_norm": 1.6166972953791483, + "learning_rate": 1.947336850416029e-05, + "loss": 0.8889, + "step": 4263 + }, + { + "epoch": 0.13068530096849332, + "grad_norm": 1.5308977047340682, + "learning_rate": 1.9473050577026696e-05, + "loss": 0.9177, + "step": 4264 + }, + { + "epoch": 0.13071594949123452, + "grad_norm": 1.553547506188059, + "learning_rate": 1.947273255655271e-05, + "loss": 0.7654, + "step": 4265 + }, + { + "epoch": 0.13074659801397573, + "grad_norm": 1.5712913391770809, + "learning_rate": 1.947241444274147e-05, + "loss": 0.8887, + "step": 4266 + }, + { + "epoch": 0.13077724653671693, + "grad_norm": 0.7789323848839179, + "learning_rate": 1.9472096235596107e-05, + "loss": 0.6449, + "step": 4267 + }, + { + "epoch": 0.13080789505945814, + "grad_norm": 1.8780769582535393, + "learning_rate": 1.9471777935119755e-05, + "loss": 0.9356, + "step": 4268 + }, + { + "epoch": 0.13083854358219935, + "grad_norm": 1.6919139052169716, + "learning_rate": 1.947145954131555e-05, + "loss": 0.8262, + "step": 4269 + }, + { + "epoch": 0.13086919210494055, + "grad_norm": 0.809127267916493, + "learning_rate": 1.9471141054186632e-05, + "loss": 0.6458, + "step": 4270 + }, + { + "epoch": 0.13089984062768176, + "grad_norm": 1.5966971799401617, + "learning_rate": 1.9470822473736142e-05, + "loss": 0.7728, + "step": 4271 + }, + { + "epoch": 0.13093048915042296, + "grad_norm": 1.71602419091072, + "learning_rate": 1.947050379996721e-05, + "loss": 0.7323, + "step": 4272 + }, + { + "epoch": 0.13096113767316414, + "grad_norm": 1.614999857956843, + "learning_rate": 1.9470185032882982e-05, + "loss": 0.8912, + "step": 4273 + }, + { + "epoch": 0.13099178619590535, + "grad_norm": 0.7235133379688438, + "learning_rate": 1.94698661724866e-05, + "loss": 0.6361, + "step": 4274 + }, + { + "epoch": 0.13102243471864655, + "grad_norm": 1.6734122073405966, + "learning_rate": 1.94695472187812e-05, + "loss": 0.8386, + "step": 4275 + }, + { + "epoch": 0.13105308324138776, + "grad_norm": 1.5452931421307028, + "learning_rate": 1.9469228171769933e-05, + "loss": 0.804, + "step": 4276 + }, + { + "epoch": 0.13108373176412896, + "grad_norm": 1.9040929996139675, + "learning_rate": 1.9468909031455934e-05, + "loss": 0.8603, + "step": 4277 + }, + { + "epoch": 0.13111438028687017, + "grad_norm": 1.591344263533975, + "learning_rate": 1.946858979784235e-05, + "loss": 0.7141, + "step": 4278 + }, + { + "epoch": 0.13114502880961137, + "grad_norm": 1.7095144443849883, + "learning_rate": 1.9468270470932334e-05, + "loss": 0.8816, + "step": 4279 + }, + { + "epoch": 0.13117567733235258, + "grad_norm": 1.5999012186410615, + "learning_rate": 1.946795105072902e-05, + "loss": 0.719, + "step": 4280 + }, + { + "epoch": 0.13120632585509379, + "grad_norm": 1.9159426614626058, + "learning_rate": 1.9467631537235568e-05, + "loss": 0.8554, + "step": 4281 + }, + { + "epoch": 0.131236974377835, + "grad_norm": 0.7722110322593949, + "learning_rate": 1.9467311930455114e-05, + "loss": 0.6349, + "step": 4282 + }, + { + "epoch": 0.1312676229005762, + "grad_norm": 1.6944415418569503, + "learning_rate": 1.9466992230390817e-05, + "loss": 0.9045, + "step": 4283 + }, + { + "epoch": 0.1312982714233174, + "grad_norm": 1.7238778295281518, + "learning_rate": 1.9466672437045827e-05, + "loss": 0.8244, + "step": 4284 + }, + { + "epoch": 0.1313289199460586, + "grad_norm": 1.7184149234284354, + "learning_rate": 1.9466352550423286e-05, + "loss": 0.8528, + "step": 4285 + }, + { + "epoch": 0.13135956846879981, + "grad_norm": 1.6134965867500874, + "learning_rate": 1.946603257052635e-05, + "loss": 0.7171, + "step": 4286 + }, + { + "epoch": 0.13139021699154102, + "grad_norm": 1.6418516982350297, + "learning_rate": 1.9465712497358175e-05, + "loss": 0.8641, + "step": 4287 + }, + { + "epoch": 0.1314208655142822, + "grad_norm": 1.8226500981883862, + "learning_rate": 1.9465392330921915e-05, + "loss": 0.744, + "step": 4288 + }, + { + "epoch": 0.1314515140370234, + "grad_norm": 1.7488454308413297, + "learning_rate": 1.946507207122072e-05, + "loss": 0.985, + "step": 4289 + }, + { + "epoch": 0.1314821625597646, + "grad_norm": 1.7780964905238896, + "learning_rate": 1.9464751718257752e-05, + "loss": 0.7826, + "step": 4290 + }, + { + "epoch": 0.13151281108250581, + "grad_norm": 1.776290979037374, + "learning_rate": 1.946443127203616e-05, + "loss": 0.8341, + "step": 4291 + }, + { + "epoch": 0.13154345960524702, + "grad_norm": 1.5138850642944999, + "learning_rate": 1.9464110732559104e-05, + "loss": 0.7281, + "step": 4292 + }, + { + "epoch": 0.13157410812798823, + "grad_norm": 1.7034598705834967, + "learning_rate": 1.9463790099829746e-05, + "loss": 0.8617, + "step": 4293 + }, + { + "epoch": 0.13160475665072943, + "grad_norm": 1.704237788627676, + "learning_rate": 1.946346937385124e-05, + "loss": 0.8635, + "step": 4294 + }, + { + "epoch": 0.13163540517347064, + "grad_norm": 1.5902192726227098, + "learning_rate": 1.9463148554626753e-05, + "loss": 0.8887, + "step": 4295 + }, + { + "epoch": 0.13166605369621184, + "grad_norm": 0.7739002703111858, + "learning_rate": 1.9462827642159438e-05, + "loss": 0.5943, + "step": 4296 + }, + { + "epoch": 0.13169670221895305, + "grad_norm": 1.5912588799929812, + "learning_rate": 1.9462506636452464e-05, + "loss": 0.7904, + "step": 4297 + }, + { + "epoch": 0.13172735074169425, + "grad_norm": 1.6756590011498087, + "learning_rate": 1.9462185537508992e-05, + "loss": 0.7589, + "step": 4298 + }, + { + "epoch": 0.13175799926443546, + "grad_norm": 1.828736122605805, + "learning_rate": 1.946186434533218e-05, + "loss": 0.8292, + "step": 4299 + }, + { + "epoch": 0.13178864778717667, + "grad_norm": 1.9346897884592673, + "learning_rate": 1.94615430599252e-05, + "loss": 0.9287, + "step": 4300 + }, + { + "epoch": 0.13181929630991787, + "grad_norm": 1.652310360524421, + "learning_rate": 1.946122168129122e-05, + "loss": 0.8893, + "step": 4301 + }, + { + "epoch": 0.13184994483265908, + "grad_norm": 1.6594497976582243, + "learning_rate": 1.9460900209433394e-05, + "loss": 0.8618, + "step": 4302 + }, + { + "epoch": 0.13188059335540028, + "grad_norm": 1.7142314047148335, + "learning_rate": 1.94605786443549e-05, + "loss": 0.7373, + "step": 4303 + }, + { + "epoch": 0.13191124187814146, + "grad_norm": 0.7901408102380598, + "learning_rate": 1.9460256986058908e-05, + "loss": 0.6693, + "step": 4304 + }, + { + "epoch": 0.13194189040088267, + "grad_norm": 1.6610159819250276, + "learning_rate": 1.9459935234548582e-05, + "loss": 0.7433, + "step": 4305 + }, + { + "epoch": 0.13197253892362387, + "grad_norm": 1.5876459616407612, + "learning_rate": 1.945961338982709e-05, + "loss": 0.9125, + "step": 4306 + }, + { + "epoch": 0.13200318744636508, + "grad_norm": 1.5489043877047264, + "learning_rate": 1.9459291451897614e-05, + "loss": 0.8433, + "step": 4307 + }, + { + "epoch": 0.13203383596910628, + "grad_norm": 1.5514249441552526, + "learning_rate": 1.945896942076331e-05, + "loss": 0.7639, + "step": 4308 + }, + { + "epoch": 0.1320644844918475, + "grad_norm": 1.5741372737885444, + "learning_rate": 1.9458647296427368e-05, + "loss": 0.8311, + "step": 4309 + }, + { + "epoch": 0.1320951330145887, + "grad_norm": 1.7741422975874084, + "learning_rate": 1.945832507889295e-05, + "loss": 0.849, + "step": 4310 + }, + { + "epoch": 0.1321257815373299, + "grad_norm": 1.7294398051093258, + "learning_rate": 1.9458002768163234e-05, + "loss": 0.8902, + "step": 4311 + }, + { + "epoch": 0.1321564300600711, + "grad_norm": 1.636247533145816, + "learning_rate": 1.94576803642414e-05, + "loss": 0.8649, + "step": 4312 + }, + { + "epoch": 0.1321870785828123, + "grad_norm": 1.7891253295766385, + "learning_rate": 1.945735786713062e-05, + "loss": 0.7954, + "step": 4313 + }, + { + "epoch": 0.13221772710555352, + "grad_norm": 0.7767684537346988, + "learning_rate": 1.9457035276834073e-05, + "loss": 0.6568, + "step": 4314 + }, + { + "epoch": 0.13224837562829472, + "grad_norm": 0.7440010256266395, + "learning_rate": 1.9456712593354937e-05, + "loss": 0.6499, + "step": 4315 + }, + { + "epoch": 0.13227902415103593, + "grad_norm": 1.549536712709779, + "learning_rate": 1.9456389816696393e-05, + "loss": 0.6648, + "step": 4316 + }, + { + "epoch": 0.13230967267377713, + "grad_norm": 1.4535307559912096, + "learning_rate": 1.9456066946861623e-05, + "loss": 0.8363, + "step": 4317 + }, + { + "epoch": 0.13234032119651834, + "grad_norm": 1.629544629956031, + "learning_rate": 1.94557439838538e-05, + "loss": 0.9006, + "step": 4318 + }, + { + "epoch": 0.13237096971925952, + "grad_norm": 1.6268340679212814, + "learning_rate": 1.945542092767612e-05, + "loss": 0.8971, + "step": 4319 + }, + { + "epoch": 0.13240161824200072, + "grad_norm": 1.5893583211540736, + "learning_rate": 1.9455097778331753e-05, + "loss": 0.7975, + "step": 4320 + }, + { + "epoch": 0.13243226676474193, + "grad_norm": 1.6568473834755402, + "learning_rate": 1.945477453582389e-05, + "loss": 0.9246, + "step": 4321 + }, + { + "epoch": 0.13246291528748314, + "grad_norm": 1.7454407359701656, + "learning_rate": 1.9454451200155712e-05, + "loss": 0.7303, + "step": 4322 + }, + { + "epoch": 0.13249356381022434, + "grad_norm": 1.4407231221467718, + "learning_rate": 1.9454127771330412e-05, + "loss": 0.801, + "step": 4323 + }, + { + "epoch": 0.13252421233296555, + "grad_norm": 1.7293473870412428, + "learning_rate": 1.945380424935117e-05, + "loss": 0.9243, + "step": 4324 + }, + { + "epoch": 0.13255486085570675, + "grad_norm": 1.8003115223076267, + "learning_rate": 1.9453480634221176e-05, + "loss": 0.9342, + "step": 4325 + }, + { + "epoch": 0.13258550937844796, + "grad_norm": 1.5325500813476787, + "learning_rate": 1.9453156925943616e-05, + "loss": 0.8468, + "step": 4326 + }, + { + "epoch": 0.13261615790118916, + "grad_norm": 1.487864077050345, + "learning_rate": 1.9452833124521685e-05, + "loss": 0.8272, + "step": 4327 + }, + { + "epoch": 0.13264680642393037, + "grad_norm": 1.4135019359558563, + "learning_rate": 1.9452509229958568e-05, + "loss": 0.8126, + "step": 4328 + }, + { + "epoch": 0.13267745494667157, + "grad_norm": 1.658846432370556, + "learning_rate": 1.9452185242257463e-05, + "loss": 0.9019, + "step": 4329 + }, + { + "epoch": 0.13270810346941278, + "grad_norm": 1.708514803136392, + "learning_rate": 1.9451861161421555e-05, + "loss": 0.8492, + "step": 4330 + }, + { + "epoch": 0.13273875199215399, + "grad_norm": 1.6491531421616685, + "learning_rate": 1.9451536987454042e-05, + "loss": 0.7811, + "step": 4331 + }, + { + "epoch": 0.1327694005148952, + "grad_norm": 1.582549459237195, + "learning_rate": 1.9451212720358117e-05, + "loss": 0.8158, + "step": 4332 + }, + { + "epoch": 0.1328000490376364, + "grad_norm": 1.6165425346370499, + "learning_rate": 1.9450888360136973e-05, + "loss": 0.7652, + "step": 4333 + }, + { + "epoch": 0.1328306975603776, + "grad_norm": 1.5871530073370668, + "learning_rate": 1.945056390679381e-05, + "loss": 0.7583, + "step": 4334 + }, + { + "epoch": 0.13286134608311878, + "grad_norm": 1.6525356842478183, + "learning_rate": 1.9450239360331823e-05, + "loss": 0.7741, + "step": 4335 + }, + { + "epoch": 0.13289199460586, + "grad_norm": 1.8105608877067791, + "learning_rate": 1.9449914720754206e-05, + "loss": 0.8001, + "step": 4336 + }, + { + "epoch": 0.1329226431286012, + "grad_norm": 1.4753579716141814, + "learning_rate": 1.9449589988064164e-05, + "loss": 0.8238, + "step": 4337 + }, + { + "epoch": 0.1329532916513424, + "grad_norm": 1.6228305105609582, + "learning_rate": 1.9449265162264893e-05, + "loss": 0.6595, + "step": 4338 + }, + { + "epoch": 0.1329839401740836, + "grad_norm": 1.7204872389415684, + "learning_rate": 1.94489402433596e-05, + "loss": 0.8922, + "step": 4339 + }, + { + "epoch": 0.1330145886968248, + "grad_norm": 1.6494978380069996, + "learning_rate": 1.9448615231351474e-05, + "loss": 0.7075, + "step": 4340 + }, + { + "epoch": 0.13304523721956601, + "grad_norm": 1.523263338790505, + "learning_rate": 1.9448290126243726e-05, + "loss": 0.7893, + "step": 4341 + }, + { + "epoch": 0.13307588574230722, + "grad_norm": 1.5705876584773526, + "learning_rate": 1.9447964928039562e-05, + "loss": 0.7977, + "step": 4342 + }, + { + "epoch": 0.13310653426504843, + "grad_norm": 1.668807270113115, + "learning_rate": 1.9447639636742178e-05, + "loss": 0.8485, + "step": 4343 + }, + { + "epoch": 0.13313718278778963, + "grad_norm": 1.5644391716152173, + "learning_rate": 1.9447314252354785e-05, + "loss": 0.8291, + "step": 4344 + }, + { + "epoch": 0.13316783131053084, + "grad_norm": 1.7838079810648675, + "learning_rate": 1.944698877488059e-05, + "loss": 0.7951, + "step": 4345 + }, + { + "epoch": 0.13319847983327204, + "grad_norm": 1.5147724915279077, + "learning_rate": 1.9446663204322792e-05, + "loss": 0.7476, + "step": 4346 + }, + { + "epoch": 0.13322912835601325, + "grad_norm": 1.6810319841667005, + "learning_rate": 1.944633754068461e-05, + "loss": 0.9541, + "step": 4347 + }, + { + "epoch": 0.13325977687875445, + "grad_norm": 1.5432217023938355, + "learning_rate": 1.9446011783969244e-05, + "loss": 0.7705, + "step": 4348 + }, + { + "epoch": 0.13329042540149566, + "grad_norm": 1.7151308536570977, + "learning_rate": 1.9445685934179906e-05, + "loss": 0.872, + "step": 4349 + }, + { + "epoch": 0.13332107392423684, + "grad_norm": 1.4708610927658425, + "learning_rate": 1.944535999131981e-05, + "loss": 0.8452, + "step": 4350 + }, + { + "epoch": 0.13335172244697804, + "grad_norm": 1.818557822633676, + "learning_rate": 1.9445033955392166e-05, + "loss": 0.8317, + "step": 4351 + }, + { + "epoch": 0.13338237096971925, + "grad_norm": 1.552953529525742, + "learning_rate": 1.9444707826400183e-05, + "loss": 0.8835, + "step": 4352 + }, + { + "epoch": 0.13341301949246046, + "grad_norm": 1.6648594299032276, + "learning_rate": 1.944438160434708e-05, + "loss": 0.911, + "step": 4353 + }, + { + "epoch": 0.13344366801520166, + "grad_norm": 1.4766627190047115, + "learning_rate": 1.9444055289236065e-05, + "loss": 0.821, + "step": 4354 + }, + { + "epoch": 0.13347431653794287, + "grad_norm": 0.9744622275111138, + "learning_rate": 1.944372888107036e-05, + "loss": 0.6477, + "step": 4355 + }, + { + "epoch": 0.13350496506068407, + "grad_norm": 1.763911552438854, + "learning_rate": 1.944340237985318e-05, + "loss": 0.9055, + "step": 4356 + }, + { + "epoch": 0.13353561358342528, + "grad_norm": 1.7485187765481032, + "learning_rate": 1.9443075785587736e-05, + "loss": 0.855, + "step": 4357 + }, + { + "epoch": 0.13356626210616648, + "grad_norm": 1.5183606364585334, + "learning_rate": 1.9442749098277252e-05, + "loss": 0.8111, + "step": 4358 + }, + { + "epoch": 0.1335969106289077, + "grad_norm": 1.3277635457141619, + "learning_rate": 1.9442422317924942e-05, + "loss": 0.8083, + "step": 4359 + }, + { + "epoch": 0.1336275591516489, + "grad_norm": 1.632474563839407, + "learning_rate": 1.9442095444534032e-05, + "loss": 0.8106, + "step": 4360 + }, + { + "epoch": 0.1336582076743901, + "grad_norm": 2.4149615195625524, + "learning_rate": 1.944176847810774e-05, + "loss": 0.888, + "step": 4361 + }, + { + "epoch": 0.1336888561971313, + "grad_norm": 1.4520900976662874, + "learning_rate": 1.944144141864929e-05, + "loss": 0.757, + "step": 4362 + }, + { + "epoch": 0.1337195047198725, + "grad_norm": 1.7963305167421906, + "learning_rate": 1.9441114266161897e-05, + "loss": 0.8521, + "step": 4363 + }, + { + "epoch": 0.13375015324261372, + "grad_norm": 1.761801237091915, + "learning_rate": 1.944078702064879e-05, + "loss": 0.8235, + "step": 4364 + }, + { + "epoch": 0.13378080176535492, + "grad_norm": 1.790837109972004, + "learning_rate": 1.9440459682113195e-05, + "loss": 0.9197, + "step": 4365 + }, + { + "epoch": 0.1338114502880961, + "grad_norm": 1.570294965859869, + "learning_rate": 1.9440132250558334e-05, + "loss": 0.8466, + "step": 4366 + }, + { + "epoch": 0.1338420988108373, + "grad_norm": 1.541727439932406, + "learning_rate": 1.9439804725987437e-05, + "loss": 0.7641, + "step": 4367 + }, + { + "epoch": 0.1338727473335785, + "grad_norm": 1.278634529848554, + "learning_rate": 1.9439477108403727e-05, + "loss": 0.6921, + "step": 4368 + }, + { + "epoch": 0.13390339585631972, + "grad_norm": 1.8458023676347726, + "learning_rate": 1.9439149397810432e-05, + "loss": 0.9198, + "step": 4369 + }, + { + "epoch": 0.13393404437906092, + "grad_norm": 0.8195779976736419, + "learning_rate": 1.9438821594210785e-05, + "loss": 0.6578, + "step": 4370 + }, + { + "epoch": 0.13396469290180213, + "grad_norm": 1.9154928042522361, + "learning_rate": 1.9438493697608015e-05, + "loss": 0.9216, + "step": 4371 + }, + { + "epoch": 0.13399534142454333, + "grad_norm": 1.5719856089081683, + "learning_rate": 1.943816570800535e-05, + "loss": 0.7518, + "step": 4372 + }, + { + "epoch": 0.13402598994728454, + "grad_norm": 0.8521572414968746, + "learning_rate": 1.943783762540602e-05, + "loss": 0.6456, + "step": 4373 + }, + { + "epoch": 0.13405663847002575, + "grad_norm": 1.59796755455439, + "learning_rate": 1.9437509449813268e-05, + "loss": 0.8842, + "step": 4374 + }, + { + "epoch": 0.13408728699276695, + "grad_norm": 0.9349640494248351, + "learning_rate": 1.9437181181230314e-05, + "loss": 0.641, + "step": 4375 + }, + { + "epoch": 0.13411793551550816, + "grad_norm": 1.72139462128562, + "learning_rate": 1.9436852819660402e-05, + "loss": 0.8879, + "step": 4376 + }, + { + "epoch": 0.13414858403824936, + "grad_norm": 1.4250155354046488, + "learning_rate": 1.9436524365106767e-05, + "loss": 0.8019, + "step": 4377 + }, + { + "epoch": 0.13417923256099057, + "grad_norm": 0.7676029133489817, + "learning_rate": 1.943619581757264e-05, + "loss": 0.6333, + "step": 4378 + }, + { + "epoch": 0.13420988108373177, + "grad_norm": 1.7014524667578128, + "learning_rate": 1.9435867177061265e-05, + "loss": 0.7743, + "step": 4379 + }, + { + "epoch": 0.13424052960647298, + "grad_norm": 1.4793978838015134, + "learning_rate": 1.9435538443575872e-05, + "loss": 0.7646, + "step": 4380 + }, + { + "epoch": 0.13427117812921416, + "grad_norm": 1.6275203027049017, + "learning_rate": 1.943520961711971e-05, + "loss": 0.7665, + "step": 4381 + }, + { + "epoch": 0.13430182665195536, + "grad_norm": 1.621244134762956, + "learning_rate": 1.943488069769601e-05, + "loss": 0.9271, + "step": 4382 + }, + { + "epoch": 0.13433247517469657, + "grad_norm": 1.6489944734034203, + "learning_rate": 1.9434551685308013e-05, + "loss": 0.7443, + "step": 4383 + }, + { + "epoch": 0.13436312369743778, + "grad_norm": 1.6589934135150857, + "learning_rate": 1.9434222579958968e-05, + "loss": 0.7295, + "step": 4384 + }, + { + "epoch": 0.13439377222017898, + "grad_norm": 0.9176146993731088, + "learning_rate": 1.9433893381652112e-05, + "loss": 0.6686, + "step": 4385 + }, + { + "epoch": 0.1344244207429202, + "grad_norm": 1.6922838916689777, + "learning_rate": 1.9433564090390695e-05, + "loss": 0.8596, + "step": 4386 + }, + { + "epoch": 0.1344550692656614, + "grad_norm": 1.7415045365095694, + "learning_rate": 1.9433234706177953e-05, + "loss": 0.9966, + "step": 4387 + }, + { + "epoch": 0.1344857177884026, + "grad_norm": 1.6234261848536327, + "learning_rate": 1.9432905229017138e-05, + "loss": 0.9308, + "step": 4388 + }, + { + "epoch": 0.1345163663111438, + "grad_norm": 1.7476128658346828, + "learning_rate": 1.9432575658911496e-05, + "loss": 0.8207, + "step": 4389 + }, + { + "epoch": 0.134547014833885, + "grad_norm": 0.7163240252067622, + "learning_rate": 1.943224599586427e-05, + "loss": 0.6518, + "step": 4390 + }, + { + "epoch": 0.13457766335662621, + "grad_norm": 0.7103105970764388, + "learning_rate": 1.943191623987871e-05, + "loss": 0.6273, + "step": 4391 + }, + { + "epoch": 0.13460831187936742, + "grad_norm": 1.6824259703707578, + "learning_rate": 1.943158639095807e-05, + "loss": 0.8164, + "step": 4392 + }, + { + "epoch": 0.13463896040210863, + "grad_norm": 1.6943023632438134, + "learning_rate": 1.943125644910559e-05, + "loss": 0.8868, + "step": 4393 + }, + { + "epoch": 0.13466960892484983, + "grad_norm": 1.8371222026230845, + "learning_rate": 1.9430926414324535e-05, + "loss": 0.82, + "step": 4394 + }, + { + "epoch": 0.13470025744759104, + "grad_norm": 1.698283326212963, + "learning_rate": 1.943059628661814e-05, + "loss": 0.8127, + "step": 4395 + }, + { + "epoch": 0.13473090597033224, + "grad_norm": 1.547144297406473, + "learning_rate": 1.9430266065989673e-05, + "loss": 0.7804, + "step": 4396 + }, + { + "epoch": 0.13476155449307342, + "grad_norm": 0.7925462858483779, + "learning_rate": 1.942993575244238e-05, + "loss": 0.6169, + "step": 4397 + }, + { + "epoch": 0.13479220301581463, + "grad_norm": 1.6040282597910676, + "learning_rate": 1.942960534597952e-05, + "loss": 0.8256, + "step": 4398 + }, + { + "epoch": 0.13482285153855583, + "grad_norm": 1.7350649797613151, + "learning_rate": 1.942927484660434e-05, + "loss": 0.812, + "step": 4399 + }, + { + "epoch": 0.13485350006129704, + "grad_norm": 0.7648365546104187, + "learning_rate": 1.9428944254320108e-05, + "loss": 0.6781, + "step": 4400 + }, + { + "epoch": 0.13488414858403824, + "grad_norm": 1.6959667728988597, + "learning_rate": 1.9428613569130075e-05, + "loss": 0.7669, + "step": 4401 + }, + { + "epoch": 0.13491479710677945, + "grad_norm": 1.6949158660735562, + "learning_rate": 1.9428282791037496e-05, + "loss": 0.8294, + "step": 4402 + }, + { + "epoch": 0.13494544562952066, + "grad_norm": 1.5683901684358763, + "learning_rate": 1.942795192004564e-05, + "loss": 0.7787, + "step": 4403 + }, + { + "epoch": 0.13497609415226186, + "grad_norm": 1.7835455303777707, + "learning_rate": 1.9427620956157755e-05, + "loss": 0.8856, + "step": 4404 + }, + { + "epoch": 0.13500674267500307, + "grad_norm": 2.0197118128451845, + "learning_rate": 1.9427289899377113e-05, + "loss": 0.8341, + "step": 4405 + }, + { + "epoch": 0.13503739119774427, + "grad_norm": 1.5020930683156246, + "learning_rate": 1.942695874970697e-05, + "loss": 0.7466, + "step": 4406 + }, + { + "epoch": 0.13506803972048548, + "grad_norm": 1.4320500729909749, + "learning_rate": 1.942662750715059e-05, + "loss": 0.7575, + "step": 4407 + }, + { + "epoch": 0.13509868824322668, + "grad_norm": 1.6706076083558346, + "learning_rate": 1.9426296171711237e-05, + "loss": 0.7931, + "step": 4408 + }, + { + "epoch": 0.1351293367659679, + "grad_norm": 0.8435216423388909, + "learning_rate": 1.942596474339218e-05, + "loss": 0.646, + "step": 4409 + }, + { + "epoch": 0.1351599852887091, + "grad_norm": 1.6760628574486298, + "learning_rate": 1.9425633222196677e-05, + "loss": 0.7588, + "step": 4410 + }, + { + "epoch": 0.1351906338114503, + "grad_norm": 0.7698132103141931, + "learning_rate": 1.9425301608128e-05, + "loss": 0.6266, + "step": 4411 + }, + { + "epoch": 0.13522128233419148, + "grad_norm": 0.7316327066253567, + "learning_rate": 1.9424969901189415e-05, + "loss": 0.6407, + "step": 4412 + }, + { + "epoch": 0.13525193085693268, + "grad_norm": 1.718601803569185, + "learning_rate": 1.9424638101384187e-05, + "loss": 0.8987, + "step": 4413 + }, + { + "epoch": 0.1352825793796739, + "grad_norm": 1.3262582905958535, + "learning_rate": 1.9424306208715592e-05, + "loss": 0.7059, + "step": 4414 + }, + { + "epoch": 0.1353132279024151, + "grad_norm": 1.6975122231237239, + "learning_rate": 1.942397422318689e-05, + "loss": 0.8901, + "step": 4415 + }, + { + "epoch": 0.1353438764251563, + "grad_norm": 1.5796148684941111, + "learning_rate": 1.9423642144801366e-05, + "loss": 0.9007, + "step": 4416 + }, + { + "epoch": 0.1353745249478975, + "grad_norm": 1.4137585757094868, + "learning_rate": 1.9423309973562284e-05, + "loss": 0.748, + "step": 4417 + }, + { + "epoch": 0.1354051734706387, + "grad_norm": 1.6708179481605554, + "learning_rate": 1.9422977709472913e-05, + "loss": 0.7729, + "step": 4418 + }, + { + "epoch": 0.13543582199337992, + "grad_norm": 1.705895709551708, + "learning_rate": 1.9422645352536538e-05, + "loss": 0.8629, + "step": 4419 + }, + { + "epoch": 0.13546647051612112, + "grad_norm": 1.506879729580842, + "learning_rate": 1.9422312902756424e-05, + "loss": 0.8374, + "step": 4420 + }, + { + "epoch": 0.13549711903886233, + "grad_norm": 1.6080672198732917, + "learning_rate": 1.942198036013585e-05, + "loss": 0.8154, + "step": 4421 + }, + { + "epoch": 0.13552776756160353, + "grad_norm": 1.6248512924425682, + "learning_rate": 1.9421647724678095e-05, + "loss": 0.8327, + "step": 4422 + }, + { + "epoch": 0.13555841608434474, + "grad_norm": 1.5630848163425444, + "learning_rate": 1.942131499638643e-05, + "loss": 0.8639, + "step": 4423 + }, + { + "epoch": 0.13558906460708595, + "grad_norm": 1.4088136737181138, + "learning_rate": 1.942098217526414e-05, + "loss": 0.8104, + "step": 4424 + }, + { + "epoch": 0.13561971312982715, + "grad_norm": 1.5827909617658407, + "learning_rate": 1.9420649261314505e-05, + "loss": 0.7608, + "step": 4425 + }, + { + "epoch": 0.13565036165256836, + "grad_norm": 1.6669071324518565, + "learning_rate": 1.94203162545408e-05, + "loss": 0.838, + "step": 4426 + }, + { + "epoch": 0.13568101017530956, + "grad_norm": 1.5800997804703238, + "learning_rate": 1.9419983154946308e-05, + "loss": 0.8615, + "step": 4427 + }, + { + "epoch": 0.13571165869805074, + "grad_norm": 1.8641162312240642, + "learning_rate": 1.941964996253431e-05, + "loss": 0.894, + "step": 4428 + }, + { + "epoch": 0.13574230722079195, + "grad_norm": 1.5348758192749237, + "learning_rate": 1.9419316677308093e-05, + "loss": 0.868, + "step": 4429 + }, + { + "epoch": 0.13577295574353315, + "grad_norm": 1.657792250670833, + "learning_rate": 1.941898329927094e-05, + "loss": 0.7976, + "step": 4430 + }, + { + "epoch": 0.13580360426627436, + "grad_norm": 1.7024145072989216, + "learning_rate": 1.9418649828426135e-05, + "loss": 0.8451, + "step": 4431 + }, + { + "epoch": 0.13583425278901556, + "grad_norm": 2.235272052351857, + "learning_rate": 1.941831626477696e-05, + "loss": 0.8926, + "step": 4432 + }, + { + "epoch": 0.13586490131175677, + "grad_norm": 1.5051263021018217, + "learning_rate": 1.9417982608326706e-05, + "loss": 0.858, + "step": 4433 + }, + { + "epoch": 0.13589554983449798, + "grad_norm": 1.7029636710470166, + "learning_rate": 1.9417648859078664e-05, + "loss": 0.7339, + "step": 4434 + }, + { + "epoch": 0.13592619835723918, + "grad_norm": 1.557176866333986, + "learning_rate": 1.9417315017036112e-05, + "loss": 0.786, + "step": 4435 + }, + { + "epoch": 0.1359568468799804, + "grad_norm": 1.6327607585231065, + "learning_rate": 1.9416981082202347e-05, + "loss": 0.9455, + "step": 4436 + }, + { + "epoch": 0.1359874954027216, + "grad_norm": 1.8580902953551193, + "learning_rate": 1.941664705458066e-05, + "loss": 0.8123, + "step": 4437 + }, + { + "epoch": 0.1360181439254628, + "grad_norm": 1.7872806095117246, + "learning_rate": 1.941631293417434e-05, + "loss": 0.843, + "step": 4438 + }, + { + "epoch": 0.136048792448204, + "grad_norm": 1.879998655161473, + "learning_rate": 1.9415978720986677e-05, + "loss": 0.7958, + "step": 4439 + }, + { + "epoch": 0.1360794409709452, + "grad_norm": 1.1917196615323522, + "learning_rate": 1.941564441502097e-05, + "loss": 0.6673, + "step": 4440 + }, + { + "epoch": 0.13611008949368641, + "grad_norm": 1.7567081661339903, + "learning_rate": 1.941531001628051e-05, + "loss": 0.7872, + "step": 4441 + }, + { + "epoch": 0.13614073801642762, + "grad_norm": 0.797169518451092, + "learning_rate": 1.9414975524768588e-05, + "loss": 0.6696, + "step": 4442 + }, + { + "epoch": 0.1361713865391688, + "grad_norm": 1.7608947144942921, + "learning_rate": 1.9414640940488506e-05, + "loss": 0.7618, + "step": 4443 + }, + { + "epoch": 0.13620203506191, + "grad_norm": 0.8924078673189267, + "learning_rate": 1.9414306263443555e-05, + "loss": 0.6623, + "step": 4444 + }, + { + "epoch": 0.1362326835846512, + "grad_norm": 1.8821530666916946, + "learning_rate": 1.9413971493637037e-05, + "loss": 0.8858, + "step": 4445 + }, + { + "epoch": 0.13626333210739242, + "grad_norm": 1.743226042872469, + "learning_rate": 1.9413636631072253e-05, + "loss": 0.7686, + "step": 4446 + }, + { + "epoch": 0.13629398063013362, + "grad_norm": 0.9442625640844817, + "learning_rate": 1.9413301675752493e-05, + "loss": 0.6853, + "step": 4447 + }, + { + "epoch": 0.13632462915287483, + "grad_norm": 1.681929497151345, + "learning_rate": 1.9412966627681066e-05, + "loss": 0.8592, + "step": 4448 + }, + { + "epoch": 0.13635527767561603, + "grad_norm": 1.5643918760160638, + "learning_rate": 1.941263148686127e-05, + "loss": 0.8261, + "step": 4449 + }, + { + "epoch": 0.13638592619835724, + "grad_norm": 1.5595122901194465, + "learning_rate": 1.9412296253296407e-05, + "loss": 0.7261, + "step": 4450 + }, + { + "epoch": 0.13641657472109844, + "grad_norm": 0.8736242471626441, + "learning_rate": 1.941196092698978e-05, + "loss": 0.6467, + "step": 4451 + }, + { + "epoch": 0.13644722324383965, + "grad_norm": 1.670201698927051, + "learning_rate": 1.9411625507944697e-05, + "loss": 0.7754, + "step": 4452 + }, + { + "epoch": 0.13647787176658085, + "grad_norm": 1.7811033431335948, + "learning_rate": 1.9411289996164456e-05, + "loss": 0.9222, + "step": 4453 + }, + { + "epoch": 0.13650852028932206, + "grad_norm": 1.5779365335273354, + "learning_rate": 1.9410954391652367e-05, + "loss": 0.8103, + "step": 4454 + }, + { + "epoch": 0.13653916881206327, + "grad_norm": 1.7578809597714289, + "learning_rate": 1.9410618694411738e-05, + "loss": 0.8872, + "step": 4455 + }, + { + "epoch": 0.13656981733480447, + "grad_norm": 1.7666810818454521, + "learning_rate": 1.9410282904445874e-05, + "loss": 0.7784, + "step": 4456 + }, + { + "epoch": 0.13660046585754568, + "grad_norm": 1.6903420397107354, + "learning_rate": 1.9409947021758088e-05, + "loss": 0.8775, + "step": 4457 + }, + { + "epoch": 0.13663111438028688, + "grad_norm": 0.7900206213371769, + "learning_rate": 1.9409611046351684e-05, + "loss": 0.6352, + "step": 4458 + }, + { + "epoch": 0.13666176290302806, + "grad_norm": 2.0563526500566183, + "learning_rate": 1.9409274978229975e-05, + "loss": 0.7251, + "step": 4459 + }, + { + "epoch": 0.13669241142576927, + "grad_norm": 1.5459627439942525, + "learning_rate": 1.940893881739627e-05, + "loss": 0.8864, + "step": 4460 + }, + { + "epoch": 0.13672305994851047, + "grad_norm": 1.660534992020281, + "learning_rate": 1.9408602563853886e-05, + "loss": 0.6885, + "step": 4461 + }, + { + "epoch": 0.13675370847125168, + "grad_norm": 1.634205630349022, + "learning_rate": 1.9408266217606134e-05, + "loss": 0.8687, + "step": 4462 + }, + { + "epoch": 0.13678435699399288, + "grad_norm": 1.4359023859343953, + "learning_rate": 1.9407929778656328e-05, + "loss": 0.7102, + "step": 4463 + }, + { + "epoch": 0.1368150055167341, + "grad_norm": 1.5463990417448428, + "learning_rate": 1.9407593247007782e-05, + "loss": 0.8459, + "step": 4464 + }, + { + "epoch": 0.1368456540394753, + "grad_norm": 1.399623835626327, + "learning_rate": 1.940725662266381e-05, + "loss": 0.8586, + "step": 4465 + }, + { + "epoch": 0.1368763025622165, + "grad_norm": 0.7199758128561767, + "learning_rate": 1.9406919905627736e-05, + "loss": 0.6405, + "step": 4466 + }, + { + "epoch": 0.1369069510849577, + "grad_norm": 1.524812439819397, + "learning_rate": 1.9406583095902868e-05, + "loss": 0.7017, + "step": 4467 + }, + { + "epoch": 0.1369375996076989, + "grad_norm": 1.807223776594614, + "learning_rate": 1.9406246193492534e-05, + "loss": 0.8696, + "step": 4468 + }, + { + "epoch": 0.13696824813044012, + "grad_norm": 1.5409901876354914, + "learning_rate": 1.940590919840005e-05, + "loss": 0.8267, + "step": 4469 + }, + { + "epoch": 0.13699889665318132, + "grad_norm": 1.446738876516149, + "learning_rate": 1.9405572110628736e-05, + "loss": 0.7192, + "step": 4470 + }, + { + "epoch": 0.13702954517592253, + "grad_norm": 1.4657614204059115, + "learning_rate": 1.940523493018191e-05, + "loss": 0.7674, + "step": 4471 + }, + { + "epoch": 0.13706019369866373, + "grad_norm": 1.6381748844255744, + "learning_rate": 1.94048976570629e-05, + "loss": 0.838, + "step": 4472 + }, + { + "epoch": 0.13709084222140494, + "grad_norm": 1.6618422903435794, + "learning_rate": 1.940456029127503e-05, + "loss": 0.8126, + "step": 4473 + }, + { + "epoch": 0.13712149074414612, + "grad_norm": 1.9439207212913494, + "learning_rate": 1.9404222832821618e-05, + "loss": 0.8299, + "step": 4474 + }, + { + "epoch": 0.13715213926688732, + "grad_norm": 1.9105541952812646, + "learning_rate": 1.9403885281705992e-05, + "loss": 0.9275, + "step": 4475 + }, + { + "epoch": 0.13718278778962853, + "grad_norm": 1.696610881515403, + "learning_rate": 1.940354763793148e-05, + "loss": 0.8069, + "step": 4476 + }, + { + "epoch": 0.13721343631236974, + "grad_norm": 1.7583905404399325, + "learning_rate": 1.9403209901501406e-05, + "loss": 0.8714, + "step": 4477 + }, + { + "epoch": 0.13724408483511094, + "grad_norm": 1.5515543302390404, + "learning_rate": 1.9402872072419098e-05, + "loss": 0.8245, + "step": 4478 + }, + { + "epoch": 0.13727473335785215, + "grad_norm": 1.8353488237824793, + "learning_rate": 1.9402534150687885e-05, + "loss": 0.8454, + "step": 4479 + }, + { + "epoch": 0.13730538188059335, + "grad_norm": 1.56625920966875, + "learning_rate": 1.94021961363111e-05, + "loss": 0.8948, + "step": 4480 + }, + { + "epoch": 0.13733603040333456, + "grad_norm": 1.5230707707948339, + "learning_rate": 1.940185802929207e-05, + "loss": 0.7391, + "step": 4481 + }, + { + "epoch": 0.13736667892607576, + "grad_norm": 1.5985237487866295, + "learning_rate": 1.9401519829634127e-05, + "loss": 0.7612, + "step": 4482 + }, + { + "epoch": 0.13739732744881697, + "grad_norm": 0.7689394369180964, + "learning_rate": 1.9401181537340603e-05, + "loss": 0.6458, + "step": 4483 + }, + { + "epoch": 0.13742797597155818, + "grad_norm": 1.4805535664114564, + "learning_rate": 1.9400843152414834e-05, + "loss": 0.7176, + "step": 4484 + }, + { + "epoch": 0.13745862449429938, + "grad_norm": 1.7489959490870934, + "learning_rate": 1.940050467486015e-05, + "loss": 0.8678, + "step": 4485 + }, + { + "epoch": 0.1374892730170406, + "grad_norm": 0.7252668064967506, + "learning_rate": 1.9400166104679887e-05, + "loss": 0.638, + "step": 4486 + }, + { + "epoch": 0.1375199215397818, + "grad_norm": 1.5096613620705484, + "learning_rate": 1.9399827441877387e-05, + "loss": 0.7844, + "step": 4487 + }, + { + "epoch": 0.137550570062523, + "grad_norm": 1.505450425261222, + "learning_rate": 1.939948868645598e-05, + "loss": 0.7232, + "step": 4488 + }, + { + "epoch": 0.1375812185852642, + "grad_norm": 1.4989527113452636, + "learning_rate": 1.9399149838419004e-05, + "loss": 0.7896, + "step": 4489 + }, + { + "epoch": 0.13761186710800538, + "grad_norm": 1.7835106567467378, + "learning_rate": 1.93988108977698e-05, + "loss": 0.9021, + "step": 4490 + }, + { + "epoch": 0.1376425156307466, + "grad_norm": 1.5082050655162436, + "learning_rate": 1.939847186451171e-05, + "loss": 0.8028, + "step": 4491 + }, + { + "epoch": 0.1376731641534878, + "grad_norm": 1.918379326622245, + "learning_rate": 1.939813273864807e-05, + "loss": 0.7073, + "step": 4492 + }, + { + "epoch": 0.137703812676229, + "grad_norm": 1.7236571385654016, + "learning_rate": 1.9397793520182225e-05, + "loss": 0.8487, + "step": 4493 + }, + { + "epoch": 0.1377344611989702, + "grad_norm": 1.6334822538341618, + "learning_rate": 1.9397454209117513e-05, + "loss": 0.8649, + "step": 4494 + }, + { + "epoch": 0.1377651097217114, + "grad_norm": 1.4709196305386298, + "learning_rate": 1.9397114805457283e-05, + "loss": 0.8738, + "step": 4495 + }, + { + "epoch": 0.13779575824445262, + "grad_norm": 1.7064809370727088, + "learning_rate": 1.9396775309204873e-05, + "loss": 0.8512, + "step": 4496 + }, + { + "epoch": 0.13782640676719382, + "grad_norm": 0.8252719343403184, + "learning_rate": 1.9396435720363634e-05, + "loss": 0.6797, + "step": 4497 + }, + { + "epoch": 0.13785705528993503, + "grad_norm": 1.712243516830548, + "learning_rate": 1.939609603893691e-05, + "loss": 0.897, + "step": 4498 + }, + { + "epoch": 0.13788770381267623, + "grad_norm": 1.6374805923322957, + "learning_rate": 1.9395756264928048e-05, + "loss": 0.8166, + "step": 4499 + }, + { + "epoch": 0.13791835233541744, + "grad_norm": 1.6660425667005991, + "learning_rate": 1.9395416398340396e-05, + "loss": 0.8742, + "step": 4500 + }, + { + "epoch": 0.13794900085815864, + "grad_norm": 1.4207949892686182, + "learning_rate": 1.9395076439177304e-05, + "loss": 0.9105, + "step": 4501 + }, + { + "epoch": 0.13797964938089985, + "grad_norm": 1.5537409612315751, + "learning_rate": 1.9394736387442114e-05, + "loss": 0.7993, + "step": 4502 + }, + { + "epoch": 0.13801029790364105, + "grad_norm": 0.7017515038086034, + "learning_rate": 1.9394396243138186e-05, + "loss": 0.6365, + "step": 4503 + }, + { + "epoch": 0.13804094642638226, + "grad_norm": 1.44538304282127, + "learning_rate": 1.9394056006268868e-05, + "loss": 0.9028, + "step": 4504 + }, + { + "epoch": 0.13807159494912344, + "grad_norm": 1.6406805452396631, + "learning_rate": 1.939371567683751e-05, + "loss": 0.7597, + "step": 4505 + }, + { + "epoch": 0.13810224347186464, + "grad_norm": 0.7561511731254041, + "learning_rate": 1.9393375254847475e-05, + "loss": 0.6809, + "step": 4506 + }, + { + "epoch": 0.13813289199460585, + "grad_norm": 1.6959856525989998, + "learning_rate": 1.9393034740302105e-05, + "loss": 0.8363, + "step": 4507 + }, + { + "epoch": 0.13816354051734706, + "grad_norm": 1.7787630145757587, + "learning_rate": 1.9392694133204762e-05, + "loss": 0.8849, + "step": 4508 + }, + { + "epoch": 0.13819418904008826, + "grad_norm": 1.4566186167762907, + "learning_rate": 1.93923534335588e-05, + "loss": 0.7806, + "step": 4509 + }, + { + "epoch": 0.13822483756282947, + "grad_norm": 1.5916102136333108, + "learning_rate": 1.9392012641367574e-05, + "loss": 0.8792, + "step": 4510 + }, + { + "epoch": 0.13825548608557067, + "grad_norm": 1.6515894613311377, + "learning_rate": 1.9391671756634447e-05, + "loss": 0.854, + "step": 4511 + }, + { + "epoch": 0.13828613460831188, + "grad_norm": 1.5882974163301526, + "learning_rate": 1.9391330779362778e-05, + "loss": 0.7991, + "step": 4512 + }, + { + "epoch": 0.13831678313105308, + "grad_norm": 1.7820418280127643, + "learning_rate": 1.939098970955592e-05, + "loss": 0.7338, + "step": 4513 + }, + { + "epoch": 0.1383474316537943, + "grad_norm": 1.5572010096515942, + "learning_rate": 1.9390648547217238e-05, + "loss": 0.8023, + "step": 4514 + }, + { + "epoch": 0.1383780801765355, + "grad_norm": 1.5157370410647628, + "learning_rate": 1.9390307292350093e-05, + "loss": 0.8567, + "step": 4515 + }, + { + "epoch": 0.1384087286992767, + "grad_norm": 0.8207141638239296, + "learning_rate": 1.9389965944957847e-05, + "loss": 0.6783, + "step": 4516 + }, + { + "epoch": 0.1384393772220179, + "grad_norm": 1.499994116412983, + "learning_rate": 1.9389624505043866e-05, + "loss": 0.8078, + "step": 4517 + }, + { + "epoch": 0.1384700257447591, + "grad_norm": 1.625763764331914, + "learning_rate": 1.9389282972611513e-05, + "loss": 0.8506, + "step": 4518 + }, + { + "epoch": 0.13850067426750032, + "grad_norm": 1.6476552637225654, + "learning_rate": 1.938894134766415e-05, + "loss": 0.9114, + "step": 4519 + }, + { + "epoch": 0.13853132279024152, + "grad_norm": 1.7951747081590201, + "learning_rate": 1.9388599630205144e-05, + "loss": 0.766, + "step": 4520 + }, + { + "epoch": 0.1385619713129827, + "grad_norm": 1.616016386125651, + "learning_rate": 1.9388257820237867e-05, + "loss": 0.8132, + "step": 4521 + }, + { + "epoch": 0.1385926198357239, + "grad_norm": 1.5761321377601745, + "learning_rate": 1.9387915917765686e-05, + "loss": 0.8063, + "step": 4522 + }, + { + "epoch": 0.1386232683584651, + "grad_norm": 0.7961198306222166, + "learning_rate": 1.9387573922791964e-05, + "loss": 0.6433, + "step": 4523 + }, + { + "epoch": 0.13865391688120632, + "grad_norm": 1.7730185738060562, + "learning_rate": 1.9387231835320072e-05, + "loss": 0.8033, + "step": 4524 + }, + { + "epoch": 0.13868456540394752, + "grad_norm": 1.5059375337835423, + "learning_rate": 1.9386889655353388e-05, + "loss": 0.7978, + "step": 4525 + }, + { + "epoch": 0.13871521392668873, + "grad_norm": 1.730286741727709, + "learning_rate": 1.9386547382895274e-05, + "loss": 0.8822, + "step": 4526 + }, + { + "epoch": 0.13874586244942994, + "grad_norm": 0.7186719054786308, + "learning_rate": 1.938620501794911e-05, + "loss": 0.6002, + "step": 4527 + }, + { + "epoch": 0.13877651097217114, + "grad_norm": 0.7243265504360052, + "learning_rate": 1.9385862560518265e-05, + "loss": 0.6452, + "step": 4528 + }, + { + "epoch": 0.13880715949491235, + "grad_norm": 1.5799792103527086, + "learning_rate": 1.9385520010606114e-05, + "loss": 0.7887, + "step": 4529 + }, + { + "epoch": 0.13883780801765355, + "grad_norm": 1.8253167066077938, + "learning_rate": 1.9385177368216036e-05, + "loss": 0.9729, + "step": 4530 + }, + { + "epoch": 0.13886845654039476, + "grad_norm": 0.6871942336981182, + "learning_rate": 1.93848346333514e-05, + "loss": 0.6159, + "step": 4531 + }, + { + "epoch": 0.13889910506313596, + "grad_norm": 0.7558095588299558, + "learning_rate": 1.938449180601559e-05, + "loss": 0.6574, + "step": 4532 + }, + { + "epoch": 0.13892975358587717, + "grad_norm": 1.6191036691079992, + "learning_rate": 1.938414888621198e-05, + "loss": 0.7654, + "step": 4533 + }, + { + "epoch": 0.13896040210861837, + "grad_norm": 1.7856184222146674, + "learning_rate": 1.938380587394395e-05, + "loss": 0.8587, + "step": 4534 + }, + { + "epoch": 0.13899105063135958, + "grad_norm": 1.4410517431775876, + "learning_rate": 1.9383462769214883e-05, + "loss": 0.7432, + "step": 4535 + }, + { + "epoch": 0.13902169915410076, + "grad_norm": 1.503541740037732, + "learning_rate": 1.9383119572028152e-05, + "loss": 0.8288, + "step": 4536 + }, + { + "epoch": 0.13905234767684196, + "grad_norm": 1.794867380467939, + "learning_rate": 1.9382776282387142e-05, + "loss": 0.8465, + "step": 4537 + }, + { + "epoch": 0.13908299619958317, + "grad_norm": 1.7333252022166104, + "learning_rate": 1.9382432900295243e-05, + "loss": 0.8422, + "step": 4538 + }, + { + "epoch": 0.13911364472232438, + "grad_norm": 0.8474342842521525, + "learning_rate": 1.9382089425755827e-05, + "loss": 0.6641, + "step": 4539 + }, + { + "epoch": 0.13914429324506558, + "grad_norm": 1.6564682016670336, + "learning_rate": 1.9381745858772286e-05, + "loss": 0.8503, + "step": 4540 + }, + { + "epoch": 0.1391749417678068, + "grad_norm": 1.3863193349019653, + "learning_rate": 1.9381402199348e-05, + "loss": 0.861, + "step": 4541 + }, + { + "epoch": 0.139205590290548, + "grad_norm": 1.5556430840656565, + "learning_rate": 1.938105844748636e-05, + "loss": 0.7354, + "step": 4542 + }, + { + "epoch": 0.1392362388132892, + "grad_norm": 0.7323459694545853, + "learning_rate": 1.938071460319075e-05, + "loss": 0.6585, + "step": 4543 + }, + { + "epoch": 0.1392668873360304, + "grad_norm": 1.4306905200761408, + "learning_rate": 1.9380370666464557e-05, + "loss": 0.9131, + "step": 4544 + }, + { + "epoch": 0.1392975358587716, + "grad_norm": 1.667216239172317, + "learning_rate": 1.9380026637311176e-05, + "loss": 0.8678, + "step": 4545 + }, + { + "epoch": 0.13932818438151282, + "grad_norm": 1.5543861997251407, + "learning_rate": 1.9379682515733988e-05, + "loss": 0.88, + "step": 4546 + }, + { + "epoch": 0.13935883290425402, + "grad_norm": 0.7267300617006721, + "learning_rate": 1.9379338301736392e-05, + "loss": 0.6035, + "step": 4547 + }, + { + "epoch": 0.13938948142699523, + "grad_norm": 1.815566541122814, + "learning_rate": 1.9378993995321775e-05, + "loss": 0.8095, + "step": 4548 + }, + { + "epoch": 0.13942012994973643, + "grad_norm": 2.016128347430253, + "learning_rate": 1.937864959649353e-05, + "loss": 0.8912, + "step": 4549 + }, + { + "epoch": 0.13945077847247764, + "grad_norm": 1.6229119837689734, + "learning_rate": 1.9378305105255052e-05, + "loss": 0.8044, + "step": 4550 + }, + { + "epoch": 0.13948142699521884, + "grad_norm": 1.4879747752485475, + "learning_rate": 1.937796052160973e-05, + "loss": 0.7251, + "step": 4551 + }, + { + "epoch": 0.13951207551796002, + "grad_norm": 0.7403931642613458, + "learning_rate": 1.9377615845560967e-05, + "loss": 0.6243, + "step": 4552 + }, + { + "epoch": 0.13954272404070123, + "grad_norm": 1.4413987012353173, + "learning_rate": 1.9377271077112157e-05, + "loss": 0.6831, + "step": 4553 + }, + { + "epoch": 0.13957337256344243, + "grad_norm": 1.7419156179769053, + "learning_rate": 1.937692621626669e-05, + "loss": 0.8637, + "step": 4554 + }, + { + "epoch": 0.13960402108618364, + "grad_norm": 1.5953474583629899, + "learning_rate": 1.9376581263027977e-05, + "loss": 0.8482, + "step": 4555 + }, + { + "epoch": 0.13963466960892484, + "grad_norm": 0.7359750583393896, + "learning_rate": 1.9376236217399406e-05, + "loss": 0.6641, + "step": 4556 + }, + { + "epoch": 0.13966531813166605, + "grad_norm": 1.651137168964798, + "learning_rate": 1.9375891079384382e-05, + "loss": 0.7618, + "step": 4557 + }, + { + "epoch": 0.13969596665440726, + "grad_norm": 0.7414389923274393, + "learning_rate": 1.93755458489863e-05, + "loss": 0.672, + "step": 4558 + }, + { + "epoch": 0.13972661517714846, + "grad_norm": 0.7093112668291063, + "learning_rate": 1.9375200526208573e-05, + "loss": 0.6444, + "step": 4559 + }, + { + "epoch": 0.13975726369988967, + "grad_norm": 1.718495929164219, + "learning_rate": 1.937485511105459e-05, + "loss": 0.8451, + "step": 4560 + }, + { + "epoch": 0.13978791222263087, + "grad_norm": 1.6395448420727585, + "learning_rate": 1.9374509603527767e-05, + "loss": 0.7675, + "step": 4561 + }, + { + "epoch": 0.13981856074537208, + "grad_norm": 1.639726530554299, + "learning_rate": 1.9374164003631498e-05, + "loss": 0.8269, + "step": 4562 + }, + { + "epoch": 0.13984920926811328, + "grad_norm": 1.4108422476859153, + "learning_rate": 1.9373818311369193e-05, + "loss": 0.8586, + "step": 4563 + }, + { + "epoch": 0.1398798577908545, + "grad_norm": 1.6259445097332905, + "learning_rate": 1.937347252674426e-05, + "loss": 0.8196, + "step": 4564 + }, + { + "epoch": 0.1399105063135957, + "grad_norm": 0.775995160810439, + "learning_rate": 1.93731266497601e-05, + "loss": 0.6567, + "step": 4565 + }, + { + "epoch": 0.1399411548363369, + "grad_norm": 1.6234086601748807, + "learning_rate": 1.9372780680420127e-05, + "loss": 0.8616, + "step": 4566 + }, + { + "epoch": 0.13997180335907808, + "grad_norm": 1.6645541938122024, + "learning_rate": 1.937243461872775e-05, + "loss": 0.8502, + "step": 4567 + }, + { + "epoch": 0.14000245188181928, + "grad_norm": 1.495867453210824, + "learning_rate": 1.9372088464686372e-05, + "loss": 0.8454, + "step": 4568 + }, + { + "epoch": 0.1400331004045605, + "grad_norm": 1.5723031512080712, + "learning_rate": 1.9371742218299413e-05, + "loss": 0.8759, + "step": 4569 + }, + { + "epoch": 0.1400637489273017, + "grad_norm": 1.5527633737845243, + "learning_rate": 1.937139587957028e-05, + "loss": 0.7568, + "step": 4570 + }, + { + "epoch": 0.1400943974500429, + "grad_norm": 1.6261538791286712, + "learning_rate": 1.937104944850238e-05, + "loss": 0.8039, + "step": 4571 + }, + { + "epoch": 0.1401250459727841, + "grad_norm": 1.7046718387607926, + "learning_rate": 1.9370702925099135e-05, + "loss": 0.9506, + "step": 4572 + }, + { + "epoch": 0.1401556944955253, + "grad_norm": 1.5670880815782984, + "learning_rate": 1.937035630936396e-05, + "loss": 0.7831, + "step": 4573 + }, + { + "epoch": 0.14018634301826652, + "grad_norm": 0.7840436601121229, + "learning_rate": 1.937000960130026e-05, + "loss": 0.649, + "step": 4574 + }, + { + "epoch": 0.14021699154100772, + "grad_norm": 1.6333196776743557, + "learning_rate": 1.9369662800911462e-05, + "loss": 0.7049, + "step": 4575 + }, + { + "epoch": 0.14024764006374893, + "grad_norm": 1.812037682252762, + "learning_rate": 1.9369315908200983e-05, + "loss": 0.8535, + "step": 4576 + }, + { + "epoch": 0.14027828858649014, + "grad_norm": 1.628476980600891, + "learning_rate": 1.9368968923172234e-05, + "loss": 0.6595, + "step": 4577 + }, + { + "epoch": 0.14030893710923134, + "grad_norm": 1.6717646944609277, + "learning_rate": 1.9368621845828636e-05, + "loss": 0.6791, + "step": 4578 + }, + { + "epoch": 0.14033958563197255, + "grad_norm": 1.5367056101248442, + "learning_rate": 1.936827467617361e-05, + "loss": 0.8195, + "step": 4579 + }, + { + "epoch": 0.14037023415471375, + "grad_norm": 1.8444341968864137, + "learning_rate": 1.9367927414210575e-05, + "loss": 0.8128, + "step": 4580 + }, + { + "epoch": 0.14040088267745496, + "grad_norm": 1.533718710250124, + "learning_rate": 1.9367580059942956e-05, + "loss": 0.7264, + "step": 4581 + }, + { + "epoch": 0.14043153120019616, + "grad_norm": 1.6559070935627982, + "learning_rate": 1.9367232613374173e-05, + "loss": 0.8601, + "step": 4582 + }, + { + "epoch": 0.14046217972293734, + "grad_norm": 0.8153989445355095, + "learning_rate": 1.9366885074507652e-05, + "loss": 0.6698, + "step": 4583 + }, + { + "epoch": 0.14049282824567855, + "grad_norm": 1.768319083020537, + "learning_rate": 1.9366537443346815e-05, + "loss": 0.8359, + "step": 4584 + }, + { + "epoch": 0.14052347676841975, + "grad_norm": 0.7381805909235001, + "learning_rate": 1.936618971989509e-05, + "loss": 0.6401, + "step": 4585 + }, + { + "epoch": 0.14055412529116096, + "grad_norm": 1.681493849363511, + "learning_rate": 1.93658419041559e-05, + "loss": 0.8493, + "step": 4586 + }, + { + "epoch": 0.14058477381390216, + "grad_norm": 1.8403350737813342, + "learning_rate": 1.936549399613267e-05, + "loss": 0.8933, + "step": 4587 + }, + { + "epoch": 0.14061542233664337, + "grad_norm": 0.8215945792654404, + "learning_rate": 1.9365145995828835e-05, + "loss": 0.6354, + "step": 4588 + }, + { + "epoch": 0.14064607085938458, + "grad_norm": 1.7103054084814153, + "learning_rate": 1.936479790324782e-05, + "loss": 0.9205, + "step": 4589 + }, + { + "epoch": 0.14067671938212578, + "grad_norm": 1.5954255535806434, + "learning_rate": 1.9364449718393055e-05, + "loss": 0.7395, + "step": 4590 + }, + { + "epoch": 0.140707367904867, + "grad_norm": 1.8698982812644116, + "learning_rate": 1.936410144126797e-05, + "loss": 0.7458, + "step": 4591 + }, + { + "epoch": 0.1407380164276082, + "grad_norm": 1.6578780230921197, + "learning_rate": 1.9363753071875998e-05, + "loss": 0.8265, + "step": 4592 + }, + { + "epoch": 0.1407686649503494, + "grad_norm": 1.7066922606823227, + "learning_rate": 1.936340461022057e-05, + "loss": 0.7628, + "step": 4593 + }, + { + "epoch": 0.1407993134730906, + "grad_norm": 1.5911174338233272, + "learning_rate": 1.9363056056305123e-05, + "loss": 0.8681, + "step": 4594 + }, + { + "epoch": 0.1408299619958318, + "grad_norm": 1.6209909836474148, + "learning_rate": 1.9362707410133088e-05, + "loss": 0.8609, + "step": 4595 + }, + { + "epoch": 0.14086061051857302, + "grad_norm": 1.6962769927583297, + "learning_rate": 1.9362358671707903e-05, + "loss": 0.7472, + "step": 4596 + }, + { + "epoch": 0.14089125904131422, + "grad_norm": 0.8036693399759184, + "learning_rate": 1.9362009841033e-05, + "loss": 0.6681, + "step": 4597 + }, + { + "epoch": 0.14092190756405543, + "grad_norm": 1.5021540359990104, + "learning_rate": 1.936166091811182e-05, + "loss": 0.8854, + "step": 4598 + }, + { + "epoch": 0.1409525560867966, + "grad_norm": 1.5946404641159597, + "learning_rate": 1.93613119029478e-05, + "loss": 0.7071, + "step": 4599 + }, + { + "epoch": 0.1409832046095378, + "grad_norm": 0.7341919268578919, + "learning_rate": 1.9360962795544375e-05, + "loss": 0.6222, + "step": 4600 + }, + { + "epoch": 0.14101385313227902, + "grad_norm": 1.5300831373413688, + "learning_rate": 1.9360613595904993e-05, + "loss": 0.87, + "step": 4601 + }, + { + "epoch": 0.14104450165502022, + "grad_norm": 1.5402450881780798, + "learning_rate": 1.9360264304033088e-05, + "loss": 0.8863, + "step": 4602 + }, + { + "epoch": 0.14107515017776143, + "grad_norm": 1.4274009284299123, + "learning_rate": 1.9359914919932105e-05, + "loss": 0.7446, + "step": 4603 + }, + { + "epoch": 0.14110579870050263, + "grad_norm": 1.700826564199087, + "learning_rate": 1.9359565443605484e-05, + "loss": 0.8168, + "step": 4604 + }, + { + "epoch": 0.14113644722324384, + "grad_norm": 1.6610961539330402, + "learning_rate": 1.935921587505667e-05, + "loss": 0.7622, + "step": 4605 + }, + { + "epoch": 0.14116709574598504, + "grad_norm": 1.5494165217729192, + "learning_rate": 1.935886621428911e-05, + "loss": 0.7733, + "step": 4606 + }, + { + "epoch": 0.14119774426872625, + "grad_norm": 1.679261301001962, + "learning_rate": 1.9358516461306244e-05, + "loss": 0.8986, + "step": 4607 + }, + { + "epoch": 0.14122839279146746, + "grad_norm": 1.7862662721161813, + "learning_rate": 1.9358166616111523e-05, + "loss": 0.8289, + "step": 4608 + }, + { + "epoch": 0.14125904131420866, + "grad_norm": 1.616940349412265, + "learning_rate": 1.9357816678708388e-05, + "loss": 0.7753, + "step": 4609 + }, + { + "epoch": 0.14128968983694987, + "grad_norm": 1.6189340095323728, + "learning_rate": 1.9357466649100293e-05, + "loss": 0.7919, + "step": 4610 + }, + { + "epoch": 0.14132033835969107, + "grad_norm": 0.8071475717005336, + "learning_rate": 1.9357116527290687e-05, + "loss": 0.6368, + "step": 4611 + }, + { + "epoch": 0.14135098688243228, + "grad_norm": 1.6897251385081307, + "learning_rate": 1.9356766313283014e-05, + "loss": 0.8984, + "step": 4612 + }, + { + "epoch": 0.14138163540517348, + "grad_norm": 1.5664521040195336, + "learning_rate": 1.935641600708073e-05, + "loss": 0.787, + "step": 4613 + }, + { + "epoch": 0.14141228392791466, + "grad_norm": 0.7077441402008681, + "learning_rate": 1.9356065608687288e-05, + "loss": 0.6202, + "step": 4614 + }, + { + "epoch": 0.14144293245065587, + "grad_norm": 1.3622294995862045, + "learning_rate": 1.9355715118106137e-05, + "loss": 0.7681, + "step": 4615 + }, + { + "epoch": 0.14147358097339707, + "grad_norm": 1.5887965942783526, + "learning_rate": 1.935536453534073e-05, + "loss": 0.8522, + "step": 4616 + }, + { + "epoch": 0.14150422949613828, + "grad_norm": 1.616889192164199, + "learning_rate": 1.9355013860394522e-05, + "loss": 0.7966, + "step": 4617 + }, + { + "epoch": 0.14153487801887948, + "grad_norm": 0.7673021156544023, + "learning_rate": 1.9354663093270967e-05, + "loss": 0.6353, + "step": 4618 + }, + { + "epoch": 0.1415655265416207, + "grad_norm": 1.527175277453592, + "learning_rate": 1.9354312233973527e-05, + "loss": 0.7698, + "step": 4619 + }, + { + "epoch": 0.1415961750643619, + "grad_norm": 1.7856182703860604, + "learning_rate": 1.9353961282505652e-05, + "loss": 0.7969, + "step": 4620 + }, + { + "epoch": 0.1416268235871031, + "grad_norm": 1.57273559669226, + "learning_rate": 1.9353610238870804e-05, + "loss": 0.709, + "step": 4621 + }, + { + "epoch": 0.1416574721098443, + "grad_norm": 0.7146426503140904, + "learning_rate": 1.9353259103072442e-05, + "loss": 0.6299, + "step": 4622 + }, + { + "epoch": 0.1416881206325855, + "grad_norm": 1.7950919120094666, + "learning_rate": 1.9352907875114026e-05, + "loss": 0.8074, + "step": 4623 + }, + { + "epoch": 0.14171876915532672, + "grad_norm": 1.6745058279586233, + "learning_rate": 1.9352556554999014e-05, + "loss": 0.7836, + "step": 4624 + }, + { + "epoch": 0.14174941767806792, + "grad_norm": 1.6391634396128334, + "learning_rate": 1.9352205142730867e-05, + "loss": 0.8687, + "step": 4625 + }, + { + "epoch": 0.14178006620080913, + "grad_norm": 0.7572511617824805, + "learning_rate": 1.9351853638313053e-05, + "loss": 0.6601, + "step": 4626 + }, + { + "epoch": 0.14181071472355034, + "grad_norm": 1.5525379346329142, + "learning_rate": 1.9351502041749032e-05, + "loss": 0.7479, + "step": 4627 + }, + { + "epoch": 0.14184136324629154, + "grad_norm": 1.7557661673170786, + "learning_rate": 1.9351150353042267e-05, + "loss": 0.8347, + "step": 4628 + }, + { + "epoch": 0.14187201176903275, + "grad_norm": 1.688122385098841, + "learning_rate": 1.9350798572196227e-05, + "loss": 0.8143, + "step": 4629 + }, + { + "epoch": 0.14190266029177392, + "grad_norm": 1.7484952802585731, + "learning_rate": 1.9350446699214374e-05, + "loss": 0.8236, + "step": 4630 + }, + { + "epoch": 0.14193330881451513, + "grad_norm": 1.7717468191587995, + "learning_rate": 1.935009473410018e-05, + "loss": 0.9814, + "step": 4631 + }, + { + "epoch": 0.14196395733725634, + "grad_norm": 1.5482689228874555, + "learning_rate": 1.934974267685711e-05, + "loss": 0.8095, + "step": 4632 + }, + { + "epoch": 0.14199460585999754, + "grad_norm": 1.4204689176952217, + "learning_rate": 1.934939052748863e-05, + "loss": 0.799, + "step": 4633 + }, + { + "epoch": 0.14202525438273875, + "grad_norm": 1.7029854785289673, + "learning_rate": 1.9349038285998216e-05, + "loss": 0.6999, + "step": 4634 + }, + { + "epoch": 0.14205590290547995, + "grad_norm": 1.5912860336386143, + "learning_rate": 1.9348685952389333e-05, + "loss": 0.8008, + "step": 4635 + }, + { + "epoch": 0.14208655142822116, + "grad_norm": 1.5658016016927288, + "learning_rate": 1.934833352666546e-05, + "loss": 0.8103, + "step": 4636 + }, + { + "epoch": 0.14211719995096236, + "grad_norm": 1.5829079590464283, + "learning_rate": 1.934798100883006e-05, + "loss": 0.7389, + "step": 4637 + }, + { + "epoch": 0.14214784847370357, + "grad_norm": 1.3527912978048227, + "learning_rate": 1.9347628398886616e-05, + "loss": 0.7699, + "step": 4638 + }, + { + "epoch": 0.14217849699644478, + "grad_norm": 1.8841867201182847, + "learning_rate": 1.9347275696838595e-05, + "loss": 1.0118, + "step": 4639 + }, + { + "epoch": 0.14220914551918598, + "grad_norm": 1.670200725128001, + "learning_rate": 1.9346922902689473e-05, + "loss": 0.8103, + "step": 4640 + }, + { + "epoch": 0.1422397940419272, + "grad_norm": 0.7957169392972987, + "learning_rate": 1.934657001644273e-05, + "loss": 0.6316, + "step": 4641 + }, + { + "epoch": 0.1422704425646684, + "grad_norm": 1.5979024324640965, + "learning_rate": 1.9346217038101844e-05, + "loss": 0.7814, + "step": 4642 + }, + { + "epoch": 0.1423010910874096, + "grad_norm": 1.6440962170422846, + "learning_rate": 1.9345863967670286e-05, + "loss": 0.7598, + "step": 4643 + }, + { + "epoch": 0.1423317396101508, + "grad_norm": 1.6257744055221242, + "learning_rate": 1.9345510805151542e-05, + "loss": 0.7007, + "step": 4644 + }, + { + "epoch": 0.14236238813289198, + "grad_norm": 1.5094010549279533, + "learning_rate": 1.9345157550549086e-05, + "loss": 0.8581, + "step": 4645 + }, + { + "epoch": 0.1423930366556332, + "grad_norm": 1.7325953603595257, + "learning_rate": 1.9344804203866403e-05, + "loss": 0.8934, + "step": 4646 + }, + { + "epoch": 0.1424236851783744, + "grad_norm": 1.6516489929497178, + "learning_rate": 1.9344450765106973e-05, + "loss": 0.7901, + "step": 4647 + }, + { + "epoch": 0.1424543337011156, + "grad_norm": 1.448973429643027, + "learning_rate": 1.934409723427428e-05, + "loss": 0.8262, + "step": 4648 + }, + { + "epoch": 0.1424849822238568, + "grad_norm": 1.7647224501808605, + "learning_rate": 1.9343743611371803e-05, + "loss": 0.8162, + "step": 4649 + }, + { + "epoch": 0.142515630746598, + "grad_norm": 1.4465478648223515, + "learning_rate": 1.9343389896403033e-05, + "loss": 0.7665, + "step": 4650 + }, + { + "epoch": 0.14254627926933922, + "grad_norm": 1.5797285169328994, + "learning_rate": 1.9343036089371452e-05, + "loss": 0.8655, + "step": 4651 + }, + { + "epoch": 0.14257692779208042, + "grad_norm": 1.6043159375498453, + "learning_rate": 1.9342682190280545e-05, + "loss": 0.7953, + "step": 4652 + }, + { + "epoch": 0.14260757631482163, + "grad_norm": 1.929956592329003, + "learning_rate": 1.9342328199133796e-05, + "loss": 0.9217, + "step": 4653 + }, + { + "epoch": 0.14263822483756283, + "grad_norm": 1.9243668869080224, + "learning_rate": 1.93419741159347e-05, + "loss": 0.8203, + "step": 4654 + }, + { + "epoch": 0.14266887336030404, + "grad_norm": 1.7494207547496043, + "learning_rate": 1.9341619940686744e-05, + "loss": 0.8379, + "step": 4655 + }, + { + "epoch": 0.14269952188304524, + "grad_norm": 1.5767938528114014, + "learning_rate": 1.9341265673393414e-05, + "loss": 0.857, + "step": 4656 + }, + { + "epoch": 0.14273017040578645, + "grad_norm": 1.7641391494147134, + "learning_rate": 1.9340911314058207e-05, + "loss": 0.8729, + "step": 4657 + }, + { + "epoch": 0.14276081892852766, + "grad_norm": 1.5635488833639857, + "learning_rate": 1.9340556862684607e-05, + "loss": 0.7483, + "step": 4658 + }, + { + "epoch": 0.14279146745126886, + "grad_norm": 1.464883329968278, + "learning_rate": 1.9340202319276114e-05, + "loss": 0.9752, + "step": 4659 + }, + { + "epoch": 0.14282211597401007, + "grad_norm": 1.468330072871465, + "learning_rate": 1.9339847683836213e-05, + "loss": 0.6853, + "step": 4660 + }, + { + "epoch": 0.14285276449675124, + "grad_norm": 1.6802470593157532, + "learning_rate": 1.933949295636841e-05, + "loss": 0.8631, + "step": 4661 + }, + { + "epoch": 0.14288341301949245, + "grad_norm": 1.7853703105432168, + "learning_rate": 1.9339138136876187e-05, + "loss": 0.907, + "step": 4662 + }, + { + "epoch": 0.14291406154223366, + "grad_norm": 1.402283827057932, + "learning_rate": 1.933878322536305e-05, + "loss": 0.7205, + "step": 4663 + }, + { + "epoch": 0.14294471006497486, + "grad_norm": 1.4430898078727858, + "learning_rate": 1.9338428221832492e-05, + "loss": 0.7391, + "step": 4664 + }, + { + "epoch": 0.14297535858771607, + "grad_norm": 1.6105017413457885, + "learning_rate": 1.9338073126288008e-05, + "loss": 0.867, + "step": 4665 + }, + { + "epoch": 0.14300600711045727, + "grad_norm": 1.800617076826055, + "learning_rate": 1.9337717938733103e-05, + "loss": 0.9082, + "step": 4666 + }, + { + "epoch": 0.14303665563319848, + "grad_norm": 1.652277377290411, + "learning_rate": 1.9337362659171273e-05, + "loss": 0.9365, + "step": 4667 + }, + { + "epoch": 0.14306730415593968, + "grad_norm": 1.6040267036830764, + "learning_rate": 1.9337007287606023e-05, + "loss": 0.8115, + "step": 4668 + }, + { + "epoch": 0.1430979526786809, + "grad_norm": 1.6154356495647424, + "learning_rate": 1.9336651824040848e-05, + "loss": 0.7109, + "step": 4669 + }, + { + "epoch": 0.1431286012014221, + "grad_norm": 1.5360693051641316, + "learning_rate": 1.933629626847925e-05, + "loss": 0.8246, + "step": 4670 + }, + { + "epoch": 0.1431592497241633, + "grad_norm": 1.4170943216796348, + "learning_rate": 1.933594062092474e-05, + "loss": 0.6378, + "step": 4671 + }, + { + "epoch": 0.1431898982469045, + "grad_norm": 1.5617201798628253, + "learning_rate": 1.9335584881380822e-05, + "loss": 0.7249, + "step": 4672 + }, + { + "epoch": 0.1432205467696457, + "grad_norm": 0.9516029736994835, + "learning_rate": 1.9335229049850993e-05, + "loss": 0.6532, + "step": 4673 + }, + { + "epoch": 0.14325119529238692, + "grad_norm": 1.5680916885919003, + "learning_rate": 1.9334873126338765e-05, + "loss": 0.8332, + "step": 4674 + }, + { + "epoch": 0.14328184381512812, + "grad_norm": 1.6462538668474458, + "learning_rate": 1.9334517110847643e-05, + "loss": 0.7819, + "step": 4675 + }, + { + "epoch": 0.1433124923378693, + "grad_norm": 1.5760416766715792, + "learning_rate": 1.9334161003381137e-05, + "loss": 0.844, + "step": 4676 + }, + { + "epoch": 0.1433431408606105, + "grad_norm": 1.904623210887562, + "learning_rate": 1.9333804803942754e-05, + "loss": 0.8255, + "step": 4677 + }, + { + "epoch": 0.1433737893833517, + "grad_norm": 1.5355216298017138, + "learning_rate": 1.9333448512536003e-05, + "loss": 0.933, + "step": 4678 + }, + { + "epoch": 0.14340443790609292, + "grad_norm": 0.8605180112547182, + "learning_rate": 1.9333092129164397e-05, + "loss": 0.6472, + "step": 4679 + }, + { + "epoch": 0.14343508642883412, + "grad_norm": 1.6908954500844624, + "learning_rate": 1.9332735653831445e-05, + "loss": 0.7645, + "step": 4680 + }, + { + "epoch": 0.14346573495157533, + "grad_norm": 1.5483300658837678, + "learning_rate": 1.933237908654066e-05, + "loss": 0.8472, + "step": 4681 + }, + { + "epoch": 0.14349638347431654, + "grad_norm": 1.6126400138436814, + "learning_rate": 1.933202242729556e-05, + "loss": 0.7372, + "step": 4682 + }, + { + "epoch": 0.14352703199705774, + "grad_norm": 1.6616498562438642, + "learning_rate": 1.9331665676099653e-05, + "loss": 0.8087, + "step": 4683 + }, + { + "epoch": 0.14355768051979895, + "grad_norm": 1.6581016342162294, + "learning_rate": 1.9331308832956453e-05, + "loss": 0.8313, + "step": 4684 + }, + { + "epoch": 0.14358832904254015, + "grad_norm": 1.664444654990043, + "learning_rate": 1.9330951897869484e-05, + "loss": 0.8693, + "step": 4685 + }, + { + "epoch": 0.14361897756528136, + "grad_norm": 1.8083349048003514, + "learning_rate": 1.9330594870842255e-05, + "loss": 0.8672, + "step": 4686 + }, + { + "epoch": 0.14364962608802256, + "grad_norm": 1.7122946947090827, + "learning_rate": 1.933023775187829e-05, + "loss": 0.8489, + "step": 4687 + }, + { + "epoch": 0.14368027461076377, + "grad_norm": 1.7369098520571575, + "learning_rate": 1.9329880540981107e-05, + "loss": 0.9125, + "step": 4688 + }, + { + "epoch": 0.14371092313350498, + "grad_norm": 1.61756941821102, + "learning_rate": 1.932952323815422e-05, + "loss": 0.8862, + "step": 4689 + }, + { + "epoch": 0.14374157165624618, + "grad_norm": 1.4646131638913626, + "learning_rate": 1.9329165843401157e-05, + "loss": 0.7162, + "step": 4690 + }, + { + "epoch": 0.1437722201789874, + "grad_norm": 1.6485559260185214, + "learning_rate": 1.932880835672543e-05, + "loss": 0.8524, + "step": 4691 + }, + { + "epoch": 0.14380286870172856, + "grad_norm": 1.6672834329044004, + "learning_rate": 1.9328450778130574e-05, + "loss": 0.8336, + "step": 4692 + }, + { + "epoch": 0.14383351722446977, + "grad_norm": 1.68559017793303, + "learning_rate": 1.9328093107620103e-05, + "loss": 0.8028, + "step": 4693 + }, + { + "epoch": 0.14386416574721098, + "grad_norm": 1.6466080780712173, + "learning_rate": 1.9327735345197544e-05, + "loss": 0.7964, + "step": 4694 + }, + { + "epoch": 0.14389481426995218, + "grad_norm": 1.710114045530265, + "learning_rate": 1.9327377490866422e-05, + "loss": 0.9646, + "step": 4695 + }, + { + "epoch": 0.1439254627926934, + "grad_norm": 0.8571412887858405, + "learning_rate": 1.9327019544630264e-05, + "loss": 0.6481, + "step": 4696 + }, + { + "epoch": 0.1439561113154346, + "grad_norm": 1.8095678936336428, + "learning_rate": 1.9326661506492596e-05, + "loss": 0.878, + "step": 4697 + }, + { + "epoch": 0.1439867598381758, + "grad_norm": 1.8069982158751996, + "learning_rate": 1.9326303376456946e-05, + "loss": 0.8418, + "step": 4698 + }, + { + "epoch": 0.144017408360917, + "grad_norm": 0.7356013358184788, + "learning_rate": 1.932594515452684e-05, + "loss": 0.6694, + "step": 4699 + }, + { + "epoch": 0.1440480568836582, + "grad_norm": 1.4805670204392403, + "learning_rate": 1.9325586840705813e-05, + "loss": 0.79, + "step": 4700 + }, + { + "epoch": 0.14407870540639942, + "grad_norm": 1.5178839469496224, + "learning_rate": 1.932522843499739e-05, + "loss": 0.8275, + "step": 4701 + }, + { + "epoch": 0.14410935392914062, + "grad_norm": 0.7506070689674139, + "learning_rate": 1.932486993740511e-05, + "loss": 0.6617, + "step": 4702 + }, + { + "epoch": 0.14414000245188183, + "grad_norm": 1.5940648950283116, + "learning_rate": 1.9324511347932496e-05, + "loss": 0.9213, + "step": 4703 + }, + { + "epoch": 0.14417065097462303, + "grad_norm": 1.5434179164258868, + "learning_rate": 1.9324152666583087e-05, + "loss": 0.8359, + "step": 4704 + }, + { + "epoch": 0.14420129949736424, + "grad_norm": 1.5600174237337914, + "learning_rate": 1.932379389336042e-05, + "loss": 0.7245, + "step": 4705 + }, + { + "epoch": 0.14423194802010544, + "grad_norm": 1.5124861374156366, + "learning_rate": 1.932343502826802e-05, + "loss": 0.8766, + "step": 4706 + }, + { + "epoch": 0.14426259654284662, + "grad_norm": 1.7494427252332916, + "learning_rate": 1.9323076071309433e-05, + "loss": 0.8168, + "step": 4707 + }, + { + "epoch": 0.14429324506558783, + "grad_norm": 1.4268595165013718, + "learning_rate": 1.932271702248819e-05, + "loss": 0.5907, + "step": 4708 + }, + { + "epoch": 0.14432389358832903, + "grad_norm": 1.6147728410201887, + "learning_rate": 1.9322357881807833e-05, + "loss": 0.7743, + "step": 4709 + }, + { + "epoch": 0.14435454211107024, + "grad_norm": 0.8943879088066061, + "learning_rate": 1.9321998649271896e-05, + "loss": 0.6639, + "step": 4710 + }, + { + "epoch": 0.14438519063381144, + "grad_norm": 1.7608536288726186, + "learning_rate": 1.9321639324883923e-05, + "loss": 0.7393, + "step": 4711 + }, + { + "epoch": 0.14441583915655265, + "grad_norm": 1.5123209107950801, + "learning_rate": 1.9321279908647452e-05, + "loss": 0.7532, + "step": 4712 + }, + { + "epoch": 0.14444648767929386, + "grad_norm": 1.3663029202248616, + "learning_rate": 1.9320920400566026e-05, + "loss": 0.6998, + "step": 4713 + }, + { + "epoch": 0.14447713620203506, + "grad_norm": 1.5875399280056999, + "learning_rate": 1.9320560800643185e-05, + "loss": 0.9074, + "step": 4714 + }, + { + "epoch": 0.14450778472477627, + "grad_norm": 1.5046704185789936, + "learning_rate": 1.9320201108882475e-05, + "loss": 0.7232, + "step": 4715 + }, + { + "epoch": 0.14453843324751747, + "grad_norm": 1.6326976073106625, + "learning_rate": 1.9319841325287438e-05, + "loss": 0.8138, + "step": 4716 + }, + { + "epoch": 0.14456908177025868, + "grad_norm": 1.6721216190911168, + "learning_rate": 1.9319481449861622e-05, + "loss": 0.8939, + "step": 4717 + }, + { + "epoch": 0.14459973029299988, + "grad_norm": 1.7223294716108488, + "learning_rate": 1.931912148260857e-05, + "loss": 0.8442, + "step": 4718 + }, + { + "epoch": 0.1446303788157411, + "grad_norm": 1.5455676675820715, + "learning_rate": 1.9318761423531827e-05, + "loss": 0.7769, + "step": 4719 + }, + { + "epoch": 0.1446610273384823, + "grad_norm": 1.5110019561608363, + "learning_rate": 1.9318401272634943e-05, + "loss": 0.8326, + "step": 4720 + }, + { + "epoch": 0.1446916758612235, + "grad_norm": 1.7532224405140797, + "learning_rate": 1.9318041029921472e-05, + "loss": 0.8964, + "step": 4721 + }, + { + "epoch": 0.1447223243839647, + "grad_norm": 1.499506534584166, + "learning_rate": 1.9317680695394953e-05, + "loss": 0.8215, + "step": 4722 + }, + { + "epoch": 0.14475297290670588, + "grad_norm": 1.8239653758423833, + "learning_rate": 1.9317320269058945e-05, + "loss": 0.7701, + "step": 4723 + }, + { + "epoch": 0.1447836214294471, + "grad_norm": 1.4664428014223403, + "learning_rate": 1.9316959750916994e-05, + "loss": 0.8082, + "step": 4724 + }, + { + "epoch": 0.1448142699521883, + "grad_norm": 0.7993235033893484, + "learning_rate": 1.9316599140972657e-05, + "loss": 0.6407, + "step": 4725 + }, + { + "epoch": 0.1448449184749295, + "grad_norm": 1.3929986647592718, + "learning_rate": 1.9316238439229488e-05, + "loss": 0.7095, + "step": 4726 + }, + { + "epoch": 0.1448755669976707, + "grad_norm": 1.3744065620937382, + "learning_rate": 1.9315877645691033e-05, + "loss": 0.8787, + "step": 4727 + }, + { + "epoch": 0.1449062155204119, + "grad_norm": 1.4941981950264223, + "learning_rate": 1.9315516760360852e-05, + "loss": 0.8322, + "step": 4728 + }, + { + "epoch": 0.14493686404315312, + "grad_norm": 1.6581727766525578, + "learning_rate": 1.9315155783242504e-05, + "loss": 0.8665, + "step": 4729 + }, + { + "epoch": 0.14496751256589432, + "grad_norm": 0.7724930053892893, + "learning_rate": 1.931479471433954e-05, + "loss": 0.6512, + "step": 4730 + }, + { + "epoch": 0.14499816108863553, + "grad_norm": 1.4265913162095392, + "learning_rate": 1.9314433553655527e-05, + "loss": 0.732, + "step": 4731 + }, + { + "epoch": 0.14502880961137674, + "grad_norm": 1.6929840076141498, + "learning_rate": 1.931407230119401e-05, + "loss": 0.7736, + "step": 4732 + }, + { + "epoch": 0.14505945813411794, + "grad_norm": 1.4525470700302812, + "learning_rate": 1.9313710956958557e-05, + "loss": 0.8154, + "step": 4733 + }, + { + "epoch": 0.14509010665685915, + "grad_norm": 0.7625876205263424, + "learning_rate": 1.9313349520952728e-05, + "loss": 0.6223, + "step": 4734 + }, + { + "epoch": 0.14512075517960035, + "grad_norm": 1.6722174328562307, + "learning_rate": 1.931298799318008e-05, + "loss": 0.9005, + "step": 4735 + }, + { + "epoch": 0.14515140370234156, + "grad_norm": 1.4773005433838526, + "learning_rate": 1.931262637364418e-05, + "loss": 0.8685, + "step": 4736 + }, + { + "epoch": 0.14518205222508276, + "grad_norm": 1.4290775477516884, + "learning_rate": 1.9312264662348594e-05, + "loss": 0.8565, + "step": 4737 + }, + { + "epoch": 0.14521270074782394, + "grad_norm": 1.5433711791826956, + "learning_rate": 1.9311902859296876e-05, + "loss": 0.7773, + "step": 4738 + }, + { + "epoch": 0.14524334927056515, + "grad_norm": 1.7881396137963619, + "learning_rate": 1.9311540964492598e-05, + "loss": 0.9195, + "step": 4739 + }, + { + "epoch": 0.14527399779330635, + "grad_norm": 1.4632161181659487, + "learning_rate": 1.9311178977939327e-05, + "loss": 0.7157, + "step": 4740 + }, + { + "epoch": 0.14530464631604756, + "grad_norm": 1.6713452099769888, + "learning_rate": 1.9310816899640624e-05, + "loss": 0.7617, + "step": 4741 + }, + { + "epoch": 0.14533529483878876, + "grad_norm": 1.6277445027955895, + "learning_rate": 1.931045472960006e-05, + "loss": 0.8336, + "step": 4742 + }, + { + "epoch": 0.14536594336152997, + "grad_norm": 1.5952030203696121, + "learning_rate": 1.9310092467821208e-05, + "loss": 0.763, + "step": 4743 + }, + { + "epoch": 0.14539659188427118, + "grad_norm": 1.5669507780100564, + "learning_rate": 1.9309730114307626e-05, + "loss": 0.8272, + "step": 4744 + }, + { + "epoch": 0.14542724040701238, + "grad_norm": 1.4858176256169306, + "learning_rate": 1.9309367669062894e-05, + "loss": 0.7443, + "step": 4745 + }, + { + "epoch": 0.1454578889297536, + "grad_norm": 1.6497502084478362, + "learning_rate": 1.9309005132090585e-05, + "loss": 0.8198, + "step": 4746 + }, + { + "epoch": 0.1454885374524948, + "grad_norm": 1.4209675442747316, + "learning_rate": 1.930864250339426e-05, + "loss": 0.7608, + "step": 4747 + }, + { + "epoch": 0.145519185975236, + "grad_norm": 1.4984750204274344, + "learning_rate": 1.9308279782977502e-05, + "loss": 0.7968, + "step": 4748 + }, + { + "epoch": 0.1455498344979772, + "grad_norm": 1.5745055750438977, + "learning_rate": 1.9307916970843878e-05, + "loss": 0.857, + "step": 4749 + }, + { + "epoch": 0.1455804830207184, + "grad_norm": 0.8643702790031709, + "learning_rate": 1.930755406699697e-05, + "loss": 0.6699, + "step": 4750 + }, + { + "epoch": 0.14561113154345962, + "grad_norm": 0.8131239072108367, + "learning_rate": 1.930719107144035e-05, + "loss": 0.6445, + "step": 4751 + }, + { + "epoch": 0.14564178006620082, + "grad_norm": 0.7381152508760276, + "learning_rate": 1.9306827984177597e-05, + "loss": 0.6675, + "step": 4752 + }, + { + "epoch": 0.14567242858894203, + "grad_norm": 1.6147593247911645, + "learning_rate": 1.9306464805212285e-05, + "loss": 0.8375, + "step": 4753 + }, + { + "epoch": 0.1457030771116832, + "grad_norm": 0.8759823583048352, + "learning_rate": 1.9306101534547993e-05, + "loss": 0.6428, + "step": 4754 + }, + { + "epoch": 0.1457337256344244, + "grad_norm": 1.8433903583237676, + "learning_rate": 1.93057381721883e-05, + "loss": 0.7897, + "step": 4755 + }, + { + "epoch": 0.14576437415716562, + "grad_norm": 1.727348422873667, + "learning_rate": 1.930537471813679e-05, + "loss": 0.9672, + "step": 4756 + }, + { + "epoch": 0.14579502267990682, + "grad_norm": 1.5495647204978185, + "learning_rate": 1.9305011172397045e-05, + "loss": 0.816, + "step": 4757 + }, + { + "epoch": 0.14582567120264803, + "grad_norm": 1.5767095393354587, + "learning_rate": 1.930464753497264e-05, + "loss": 0.748, + "step": 4758 + }, + { + "epoch": 0.14585631972538923, + "grad_norm": 1.4636842241540735, + "learning_rate": 1.930428380586716e-05, + "loss": 0.8924, + "step": 4759 + }, + { + "epoch": 0.14588696824813044, + "grad_norm": 1.5343693740038784, + "learning_rate": 1.9303919985084195e-05, + "loss": 0.8336, + "step": 4760 + }, + { + "epoch": 0.14591761677087164, + "grad_norm": 1.464404067798457, + "learning_rate": 1.9303556072627328e-05, + "loss": 0.7654, + "step": 4761 + }, + { + "epoch": 0.14594826529361285, + "grad_norm": 1.4945406784507422, + "learning_rate": 1.9303192068500137e-05, + "loss": 0.9093, + "step": 4762 + }, + { + "epoch": 0.14597891381635406, + "grad_norm": 1.630493445067252, + "learning_rate": 1.9302827972706217e-05, + "loss": 0.8708, + "step": 4763 + }, + { + "epoch": 0.14600956233909526, + "grad_norm": 0.8811391886276767, + "learning_rate": 1.9302463785249154e-05, + "loss": 0.6631, + "step": 4764 + }, + { + "epoch": 0.14604021086183647, + "grad_norm": 1.6642420971430507, + "learning_rate": 1.9302099506132533e-05, + "loss": 0.7895, + "step": 4765 + }, + { + "epoch": 0.14607085938457767, + "grad_norm": 1.830014949637293, + "learning_rate": 1.9301735135359945e-05, + "loss": 0.8308, + "step": 4766 + }, + { + "epoch": 0.14610150790731888, + "grad_norm": 1.7582347942351737, + "learning_rate": 1.9301370672934984e-05, + "loss": 0.8525, + "step": 4767 + }, + { + "epoch": 0.14613215643006008, + "grad_norm": 1.6984377195850726, + "learning_rate": 1.9301006118861237e-05, + "loss": 0.8654, + "step": 4768 + }, + { + "epoch": 0.14616280495280126, + "grad_norm": 1.4986950380037825, + "learning_rate": 1.9300641473142293e-05, + "loss": 0.7522, + "step": 4769 + }, + { + "epoch": 0.14619345347554247, + "grad_norm": 1.4308882486867, + "learning_rate": 1.9300276735781753e-05, + "loss": 0.7485, + "step": 4770 + }, + { + "epoch": 0.14622410199828367, + "grad_norm": 1.3561947939049226, + "learning_rate": 1.9299911906783205e-05, + "loss": 0.7145, + "step": 4771 + }, + { + "epoch": 0.14625475052102488, + "grad_norm": 1.7523288485962736, + "learning_rate": 1.9299546986150245e-05, + "loss": 0.8456, + "step": 4772 + }, + { + "epoch": 0.14628539904376608, + "grad_norm": 1.513319887296233, + "learning_rate": 1.9299181973886473e-05, + "loss": 0.8247, + "step": 4773 + }, + { + "epoch": 0.1463160475665073, + "grad_norm": 1.6861509897448903, + "learning_rate": 1.9298816869995478e-05, + "loss": 0.7825, + "step": 4774 + }, + { + "epoch": 0.1463466960892485, + "grad_norm": 1.537299614435785, + "learning_rate": 1.929845167448086e-05, + "loss": 0.8735, + "step": 4775 + }, + { + "epoch": 0.1463773446119897, + "grad_norm": 1.4887848655821092, + "learning_rate": 1.929808638734622e-05, + "loss": 0.7453, + "step": 4776 + }, + { + "epoch": 0.1464079931347309, + "grad_norm": 1.5852009447207907, + "learning_rate": 1.929772100859516e-05, + "loss": 0.82, + "step": 4777 + }, + { + "epoch": 0.1464386416574721, + "grad_norm": 1.7338970615072398, + "learning_rate": 1.9297355538231273e-05, + "loss": 0.7834, + "step": 4778 + }, + { + "epoch": 0.14646929018021332, + "grad_norm": 1.6169145467339672, + "learning_rate": 1.929698997625816e-05, + "loss": 0.7625, + "step": 4779 + }, + { + "epoch": 0.14649993870295452, + "grad_norm": 1.7331219660440293, + "learning_rate": 1.929662432267943e-05, + "loss": 0.7666, + "step": 4780 + }, + { + "epoch": 0.14653058722569573, + "grad_norm": 1.603443834869241, + "learning_rate": 1.929625857749868e-05, + "loss": 0.7347, + "step": 4781 + }, + { + "epoch": 0.14656123574843694, + "grad_norm": 1.5334579449382946, + "learning_rate": 1.929589274071952e-05, + "loss": 0.7806, + "step": 4782 + }, + { + "epoch": 0.14659188427117814, + "grad_norm": 1.5108380057281388, + "learning_rate": 1.9295526812345545e-05, + "loss": 0.8485, + "step": 4783 + }, + { + "epoch": 0.14662253279391935, + "grad_norm": 1.5929562860339492, + "learning_rate": 1.9295160792380367e-05, + "loss": 0.8689, + "step": 4784 + }, + { + "epoch": 0.14665318131666052, + "grad_norm": 1.5990801768207472, + "learning_rate": 1.929479468082759e-05, + "loss": 0.8183, + "step": 4785 + }, + { + "epoch": 0.14668382983940173, + "grad_norm": 1.546481455800273, + "learning_rate": 1.9294428477690827e-05, + "loss": 0.7969, + "step": 4786 + }, + { + "epoch": 0.14671447836214294, + "grad_norm": 1.7332896294759552, + "learning_rate": 1.9294062182973677e-05, + "loss": 0.7202, + "step": 4787 + }, + { + "epoch": 0.14674512688488414, + "grad_norm": 1.5260518414329907, + "learning_rate": 1.929369579667976e-05, + "loss": 0.9264, + "step": 4788 + }, + { + "epoch": 0.14677577540762535, + "grad_norm": 1.6650669156043425, + "learning_rate": 1.9293329318812678e-05, + "loss": 0.8521, + "step": 4789 + }, + { + "epoch": 0.14680642393036655, + "grad_norm": 1.6875246333736091, + "learning_rate": 1.9292962749376045e-05, + "loss": 0.8562, + "step": 4790 + }, + { + "epoch": 0.14683707245310776, + "grad_norm": 1.495706403017004, + "learning_rate": 1.929259608837347e-05, + "loss": 0.881, + "step": 4791 + }, + { + "epoch": 0.14686772097584896, + "grad_norm": 1.8416247011157623, + "learning_rate": 1.929222933580857e-05, + "loss": 0.7928, + "step": 4792 + }, + { + "epoch": 0.14689836949859017, + "grad_norm": 1.5218134335628644, + "learning_rate": 1.9291862491684954e-05, + "loss": 0.7756, + "step": 4793 + }, + { + "epoch": 0.14692901802133138, + "grad_norm": 1.4918164991667664, + "learning_rate": 1.929149555600624e-05, + "loss": 0.8205, + "step": 4794 + }, + { + "epoch": 0.14695966654407258, + "grad_norm": 1.5787481642521302, + "learning_rate": 1.9291128528776047e-05, + "loss": 0.7131, + "step": 4795 + }, + { + "epoch": 0.1469903150668138, + "grad_norm": 1.7068615722707945, + "learning_rate": 1.9290761409997985e-05, + "loss": 0.815, + "step": 4796 + }, + { + "epoch": 0.147020963589555, + "grad_norm": 1.7618640167261117, + "learning_rate": 1.9290394199675675e-05, + "loss": 0.8775, + "step": 4797 + }, + { + "epoch": 0.1470516121122962, + "grad_norm": 1.713601106253648, + "learning_rate": 1.9290026897812733e-05, + "loss": 0.8079, + "step": 4798 + }, + { + "epoch": 0.1470822606350374, + "grad_norm": 1.6899150801592495, + "learning_rate": 1.9289659504412776e-05, + "loss": 0.8988, + "step": 4799 + }, + { + "epoch": 0.14711290915777858, + "grad_norm": 1.6600652875079667, + "learning_rate": 1.928929201947943e-05, + "loss": 0.9783, + "step": 4800 + }, + { + "epoch": 0.1471435576805198, + "grad_norm": 1.8338262398237322, + "learning_rate": 1.9288924443016314e-05, + "loss": 0.9618, + "step": 4801 + }, + { + "epoch": 0.147174206203261, + "grad_norm": 0.7826265346091934, + "learning_rate": 1.928855677502705e-05, + "loss": 0.6855, + "step": 4802 + }, + { + "epoch": 0.1472048547260022, + "grad_norm": 0.8029234169993018, + "learning_rate": 1.9288189015515254e-05, + "loss": 0.6747, + "step": 4803 + }, + { + "epoch": 0.1472355032487434, + "grad_norm": 1.5995252165123484, + "learning_rate": 1.9287821164484558e-05, + "loss": 0.8262, + "step": 4804 + }, + { + "epoch": 0.1472661517714846, + "grad_norm": 1.6509032127928294, + "learning_rate": 1.9287453221938586e-05, + "loss": 0.899, + "step": 4805 + }, + { + "epoch": 0.14729680029422582, + "grad_norm": 1.9735867288549052, + "learning_rate": 1.9287085187880957e-05, + "loss": 0.8529, + "step": 4806 + }, + { + "epoch": 0.14732744881696702, + "grad_norm": 1.5120462981380671, + "learning_rate": 1.9286717062315302e-05, + "loss": 0.8967, + "step": 4807 + }, + { + "epoch": 0.14735809733970823, + "grad_norm": 0.7398611038417189, + "learning_rate": 1.928634884524525e-05, + "loss": 0.6364, + "step": 4808 + }, + { + "epoch": 0.14738874586244943, + "grad_norm": 1.6832466358195148, + "learning_rate": 1.9285980536674427e-05, + "loss": 0.921, + "step": 4809 + }, + { + "epoch": 0.14741939438519064, + "grad_norm": 0.7607886248340879, + "learning_rate": 1.9285612136606458e-05, + "loss": 0.664, + "step": 4810 + }, + { + "epoch": 0.14745004290793184, + "grad_norm": 1.6602061990522154, + "learning_rate": 1.9285243645044982e-05, + "loss": 0.7978, + "step": 4811 + }, + { + "epoch": 0.14748069143067305, + "grad_norm": 1.7045885709199213, + "learning_rate": 1.9284875061993624e-05, + "loss": 0.8195, + "step": 4812 + }, + { + "epoch": 0.14751133995341426, + "grad_norm": 0.7502838427246851, + "learning_rate": 1.9284506387456012e-05, + "loss": 0.6369, + "step": 4813 + }, + { + "epoch": 0.14754198847615546, + "grad_norm": 0.7265019170574761, + "learning_rate": 1.9284137621435786e-05, + "loss": 0.6331, + "step": 4814 + }, + { + "epoch": 0.14757263699889667, + "grad_norm": 1.9113775441944272, + "learning_rate": 1.9283768763936578e-05, + "loss": 0.859, + "step": 4815 + }, + { + "epoch": 0.14760328552163784, + "grad_norm": 1.6598829686288745, + "learning_rate": 1.9283399814962016e-05, + "loss": 0.8443, + "step": 4816 + }, + { + "epoch": 0.14763393404437905, + "grad_norm": 1.8602836308718589, + "learning_rate": 1.9283030774515746e-05, + "loss": 0.8176, + "step": 4817 + }, + { + "epoch": 0.14766458256712026, + "grad_norm": 1.5880179623332806, + "learning_rate": 1.9282661642601394e-05, + "loss": 0.882, + "step": 4818 + }, + { + "epoch": 0.14769523108986146, + "grad_norm": 1.7482112340408902, + "learning_rate": 1.9282292419222604e-05, + "loss": 0.8988, + "step": 4819 + }, + { + "epoch": 0.14772587961260267, + "grad_norm": 0.9182378587335082, + "learning_rate": 1.928192310438301e-05, + "loss": 0.6484, + "step": 4820 + }, + { + "epoch": 0.14775652813534387, + "grad_norm": 1.5141638212752342, + "learning_rate": 1.9281553698086256e-05, + "loss": 0.7948, + "step": 4821 + }, + { + "epoch": 0.14778717665808508, + "grad_norm": 1.74946973339341, + "learning_rate": 1.9281184200335978e-05, + "loss": 0.7491, + "step": 4822 + }, + { + "epoch": 0.14781782518082628, + "grad_norm": 1.5138371703762652, + "learning_rate": 1.9280814611135815e-05, + "loss": 0.8046, + "step": 4823 + }, + { + "epoch": 0.1478484737035675, + "grad_norm": 1.7270106609366593, + "learning_rate": 1.9280444930489412e-05, + "loss": 0.8379, + "step": 4824 + }, + { + "epoch": 0.1478791222263087, + "grad_norm": 1.6361287601078152, + "learning_rate": 1.9280075158400413e-05, + "loss": 0.8198, + "step": 4825 + }, + { + "epoch": 0.1479097707490499, + "grad_norm": 1.4986975140392693, + "learning_rate": 1.927970529487246e-05, + "loss": 0.8191, + "step": 4826 + }, + { + "epoch": 0.1479404192717911, + "grad_norm": 1.6429792918858994, + "learning_rate": 1.927933533990919e-05, + "loss": 0.7297, + "step": 4827 + }, + { + "epoch": 0.1479710677945323, + "grad_norm": 1.612319957105199, + "learning_rate": 1.927896529351426e-05, + "loss": 0.9002, + "step": 4828 + }, + { + "epoch": 0.14800171631727352, + "grad_norm": 1.5946392632586335, + "learning_rate": 1.927859515569131e-05, + "loss": 0.817, + "step": 4829 + }, + { + "epoch": 0.14803236484001472, + "grad_norm": 1.4992485038332848, + "learning_rate": 1.9278224926443987e-05, + "loss": 0.7048, + "step": 4830 + }, + { + "epoch": 0.1480630133627559, + "grad_norm": 1.5210702311734678, + "learning_rate": 1.9277854605775936e-05, + "loss": 0.8812, + "step": 4831 + }, + { + "epoch": 0.1480936618854971, + "grad_norm": 1.549039835064813, + "learning_rate": 1.9277484193690817e-05, + "loss": 0.8048, + "step": 4832 + }, + { + "epoch": 0.1481243104082383, + "grad_norm": 1.5279430113588233, + "learning_rate": 1.927711369019227e-05, + "loss": 0.8047, + "step": 4833 + }, + { + "epoch": 0.14815495893097952, + "grad_norm": 1.5406374152752038, + "learning_rate": 1.9276743095283945e-05, + "loss": 0.785, + "step": 4834 + }, + { + "epoch": 0.14818560745372072, + "grad_norm": 1.7134236769939077, + "learning_rate": 1.92763724089695e-05, + "loss": 0.953, + "step": 4835 + }, + { + "epoch": 0.14821625597646193, + "grad_norm": 1.7767686200293715, + "learning_rate": 1.9276001631252584e-05, + "loss": 0.8645, + "step": 4836 + }, + { + "epoch": 0.14824690449920314, + "grad_norm": 1.6986889830445326, + "learning_rate": 1.927563076213685e-05, + "loss": 0.8587, + "step": 4837 + }, + { + "epoch": 0.14827755302194434, + "grad_norm": 1.5724369607205197, + "learning_rate": 1.927525980162595e-05, + "loss": 0.76, + "step": 4838 + }, + { + "epoch": 0.14830820154468555, + "grad_norm": 1.562452178582513, + "learning_rate": 1.9274888749723547e-05, + "loss": 0.9129, + "step": 4839 + }, + { + "epoch": 0.14833885006742675, + "grad_norm": 1.470089463550271, + "learning_rate": 1.9274517606433286e-05, + "loss": 0.7907, + "step": 4840 + }, + { + "epoch": 0.14836949859016796, + "grad_norm": 0.9314225446773922, + "learning_rate": 1.9274146371758835e-05, + "loss": 0.6879, + "step": 4841 + }, + { + "epoch": 0.14840014711290916, + "grad_norm": 0.805754723053185, + "learning_rate": 1.9273775045703848e-05, + "loss": 0.6889, + "step": 4842 + }, + { + "epoch": 0.14843079563565037, + "grad_norm": 1.6525563240093109, + "learning_rate": 1.927340362827198e-05, + "loss": 0.8257, + "step": 4843 + }, + { + "epoch": 0.14846144415839158, + "grad_norm": 1.5956665819700275, + "learning_rate": 1.927303211946689e-05, + "loss": 0.8078, + "step": 4844 + }, + { + "epoch": 0.14849209268113278, + "grad_norm": 1.7301941999961343, + "learning_rate": 1.9272660519292244e-05, + "loss": 0.765, + "step": 4845 + }, + { + "epoch": 0.148522741203874, + "grad_norm": 1.7585545763241648, + "learning_rate": 1.9272288827751705e-05, + "loss": 0.7444, + "step": 4846 + }, + { + "epoch": 0.14855338972661516, + "grad_norm": 1.5049570550456546, + "learning_rate": 1.9271917044848928e-05, + "loss": 0.8225, + "step": 4847 + }, + { + "epoch": 0.14858403824935637, + "grad_norm": 1.482557265134304, + "learning_rate": 1.9271545170587584e-05, + "loss": 0.738, + "step": 4848 + }, + { + "epoch": 0.14861468677209758, + "grad_norm": 1.1055510145589897, + "learning_rate": 1.927117320497133e-05, + "loss": 0.6682, + "step": 4849 + }, + { + "epoch": 0.14864533529483878, + "grad_norm": 0.9048397240383078, + "learning_rate": 1.9270801148003837e-05, + "loss": 0.6703, + "step": 4850 + }, + { + "epoch": 0.14867598381758, + "grad_norm": 0.705713957794488, + "learning_rate": 1.9270428999688767e-05, + "loss": 0.626, + "step": 4851 + }, + { + "epoch": 0.1487066323403212, + "grad_norm": 2.0115831969070634, + "learning_rate": 1.9270056760029785e-05, + "loss": 0.876, + "step": 4852 + }, + { + "epoch": 0.1487372808630624, + "grad_norm": 2.213411934216318, + "learning_rate": 1.9269684429030566e-05, + "loss": 0.8925, + "step": 4853 + }, + { + "epoch": 0.1487679293858036, + "grad_norm": 1.6921425627105096, + "learning_rate": 1.9269312006694774e-05, + "loss": 0.7319, + "step": 4854 + }, + { + "epoch": 0.1487985779085448, + "grad_norm": 1.2083323824036398, + "learning_rate": 1.9268939493026075e-05, + "loss": 0.6787, + "step": 4855 + }, + { + "epoch": 0.14882922643128602, + "grad_norm": 1.5173196949380572, + "learning_rate": 1.926856688802815e-05, + "loss": 0.7275, + "step": 4856 + }, + { + "epoch": 0.14885987495402722, + "grad_norm": 1.6467986584192602, + "learning_rate": 1.926819419170466e-05, + "loss": 0.7894, + "step": 4857 + }, + { + "epoch": 0.14889052347676843, + "grad_norm": 1.8559682881590434, + "learning_rate": 1.9267821404059283e-05, + "loss": 0.9403, + "step": 4858 + }, + { + "epoch": 0.14892117199950963, + "grad_norm": 1.7470055060066205, + "learning_rate": 1.9267448525095686e-05, + "loss": 0.7975, + "step": 4859 + }, + { + "epoch": 0.14895182052225084, + "grad_norm": 1.5710368321469848, + "learning_rate": 1.9267075554817553e-05, + "loss": 0.7516, + "step": 4860 + }, + { + "epoch": 0.14898246904499204, + "grad_norm": 0.8205264493951818, + "learning_rate": 1.926670249322855e-05, + "loss": 0.6612, + "step": 4861 + }, + { + "epoch": 0.14901311756773322, + "grad_norm": 1.6256510576040268, + "learning_rate": 1.9266329340332358e-05, + "loss": 0.7853, + "step": 4862 + }, + { + "epoch": 0.14904376609047443, + "grad_norm": 1.7521508826965888, + "learning_rate": 1.926595609613265e-05, + "loss": 0.7929, + "step": 4863 + }, + { + "epoch": 0.14907441461321563, + "grad_norm": 1.85484740707158, + "learning_rate": 1.926558276063311e-05, + "loss": 0.8288, + "step": 4864 + }, + { + "epoch": 0.14910506313595684, + "grad_norm": 1.436880636613079, + "learning_rate": 1.926520933383741e-05, + "loss": 0.8878, + "step": 4865 + }, + { + "epoch": 0.14913571165869804, + "grad_norm": 1.6255599453912302, + "learning_rate": 1.9264835815749233e-05, + "loss": 0.8078, + "step": 4866 + }, + { + "epoch": 0.14916636018143925, + "grad_norm": 1.5123897943534466, + "learning_rate": 1.9264462206372257e-05, + "loss": 0.8681, + "step": 4867 + }, + { + "epoch": 0.14919700870418046, + "grad_norm": 1.5131873308127113, + "learning_rate": 1.9264088505710163e-05, + "loss": 0.8442, + "step": 4868 + }, + { + "epoch": 0.14922765722692166, + "grad_norm": 1.5911376758648956, + "learning_rate": 1.9263714713766636e-05, + "loss": 0.7304, + "step": 4869 + }, + { + "epoch": 0.14925830574966287, + "grad_norm": 1.6311643337812327, + "learning_rate": 1.9263340830545358e-05, + "loss": 0.9206, + "step": 4870 + }, + { + "epoch": 0.14928895427240407, + "grad_norm": 1.4220968474690578, + "learning_rate": 1.9262966856050015e-05, + "loss": 0.7823, + "step": 4871 + }, + { + "epoch": 0.14931960279514528, + "grad_norm": 1.6425447617239948, + "learning_rate": 1.9262592790284283e-05, + "loss": 0.8421, + "step": 4872 + }, + { + "epoch": 0.14935025131788648, + "grad_norm": 1.5979900736260353, + "learning_rate": 1.9262218633251863e-05, + "loss": 0.8532, + "step": 4873 + }, + { + "epoch": 0.1493808998406277, + "grad_norm": 1.3523621239040855, + "learning_rate": 1.9261844384956426e-05, + "loss": 0.7517, + "step": 4874 + }, + { + "epoch": 0.1494115483633689, + "grad_norm": 1.8359623205629787, + "learning_rate": 1.926147004540167e-05, + "loss": 0.8504, + "step": 4875 + }, + { + "epoch": 0.1494421968861101, + "grad_norm": 1.5586191138724526, + "learning_rate": 1.9261095614591278e-05, + "loss": 0.787, + "step": 4876 + }, + { + "epoch": 0.1494728454088513, + "grad_norm": 1.5255460259662614, + "learning_rate": 1.926072109252894e-05, + "loss": 0.8299, + "step": 4877 + }, + { + "epoch": 0.14950349393159248, + "grad_norm": 1.8498761917987543, + "learning_rate": 1.926034647921835e-05, + "loss": 0.8742, + "step": 4878 + }, + { + "epoch": 0.1495341424543337, + "grad_norm": 1.6458353545261353, + "learning_rate": 1.9259971774663197e-05, + "loss": 0.7978, + "step": 4879 + }, + { + "epoch": 0.1495647909770749, + "grad_norm": 1.6307115354633241, + "learning_rate": 1.925959697886717e-05, + "loss": 0.8597, + "step": 4880 + }, + { + "epoch": 0.1495954394998161, + "grad_norm": 1.6681391330696251, + "learning_rate": 1.925922209183397e-05, + "loss": 0.7846, + "step": 4881 + }, + { + "epoch": 0.1496260880225573, + "grad_norm": 1.5877774094702704, + "learning_rate": 1.9258847113567282e-05, + "loss": 0.8101, + "step": 4882 + }, + { + "epoch": 0.1496567365452985, + "grad_norm": 1.5644121565749007, + "learning_rate": 1.9258472044070808e-05, + "loss": 0.8901, + "step": 4883 + }, + { + "epoch": 0.14968738506803972, + "grad_norm": 1.531084650438699, + "learning_rate": 1.9258096883348235e-05, + "loss": 0.8951, + "step": 4884 + }, + { + "epoch": 0.14971803359078092, + "grad_norm": 1.5182258585406903, + "learning_rate": 1.925772163140327e-05, + "loss": 0.7737, + "step": 4885 + }, + { + "epoch": 0.14974868211352213, + "grad_norm": 1.6971148088501973, + "learning_rate": 1.92573462882396e-05, + "loss": 0.7336, + "step": 4886 + }, + { + "epoch": 0.14977933063626334, + "grad_norm": 1.7291948055796702, + "learning_rate": 1.925697085386093e-05, + "loss": 0.7493, + "step": 4887 + }, + { + "epoch": 0.14980997915900454, + "grad_norm": 1.6389260274192163, + "learning_rate": 1.925659532827096e-05, + "loss": 0.8235, + "step": 4888 + }, + { + "epoch": 0.14984062768174575, + "grad_norm": 1.7929905062214018, + "learning_rate": 1.9256219711473383e-05, + "loss": 0.7775, + "step": 4889 + }, + { + "epoch": 0.14987127620448695, + "grad_norm": 1.7062467714093064, + "learning_rate": 1.925584400347191e-05, + "loss": 0.8742, + "step": 4890 + }, + { + "epoch": 0.14990192472722816, + "grad_norm": 1.5180084702092782, + "learning_rate": 1.9255468204270237e-05, + "loss": 0.8395, + "step": 4891 + }, + { + "epoch": 0.14993257324996936, + "grad_norm": 1.5385625265168135, + "learning_rate": 1.9255092313872066e-05, + "loss": 0.8523, + "step": 4892 + }, + { + "epoch": 0.14996322177271054, + "grad_norm": 1.8282825206228828, + "learning_rate": 1.9254716332281102e-05, + "loss": 0.8174, + "step": 4893 + }, + { + "epoch": 0.14999387029545175, + "grad_norm": 1.6536886303788094, + "learning_rate": 1.925434025950105e-05, + "loss": 0.8338, + "step": 4894 + }, + { + "epoch": 0.15002451881819295, + "grad_norm": 1.6274624005738023, + "learning_rate": 1.9253964095535617e-05, + "loss": 0.9004, + "step": 4895 + }, + { + "epoch": 0.15005516734093416, + "grad_norm": 1.4594264056293924, + "learning_rate": 1.925358784038851e-05, + "loss": 0.6998, + "step": 4896 + }, + { + "epoch": 0.15008581586367536, + "grad_norm": 1.6709970353824668, + "learning_rate": 1.925321149406343e-05, + "loss": 0.797, + "step": 4897 + }, + { + "epoch": 0.15011646438641657, + "grad_norm": 1.584112782848213, + "learning_rate": 1.9252835056564093e-05, + "loss": 0.8214, + "step": 4898 + }, + { + "epoch": 0.15014711290915778, + "grad_norm": 1.510094912789151, + "learning_rate": 1.92524585278942e-05, + "loss": 0.7772, + "step": 4899 + }, + { + "epoch": 0.15017776143189898, + "grad_norm": 1.6455039845042105, + "learning_rate": 1.925208190805747e-05, + "loss": 0.7194, + "step": 4900 + }, + { + "epoch": 0.1502084099546402, + "grad_norm": 1.5954392531448272, + "learning_rate": 1.925170519705761e-05, + "loss": 0.8493, + "step": 4901 + }, + { + "epoch": 0.1502390584773814, + "grad_norm": 0.924055522717169, + "learning_rate": 1.925132839489833e-05, + "loss": 0.6501, + "step": 4902 + }, + { + "epoch": 0.1502697070001226, + "grad_norm": 1.4280530406389766, + "learning_rate": 1.9250951501583345e-05, + "loss": 0.7707, + "step": 4903 + }, + { + "epoch": 0.1503003555228638, + "grad_norm": 0.8152425194595502, + "learning_rate": 1.9250574517116366e-05, + "loss": 0.6371, + "step": 4904 + }, + { + "epoch": 0.150331004045605, + "grad_norm": 1.8800368593724852, + "learning_rate": 1.9250197441501113e-05, + "loss": 0.7539, + "step": 4905 + }, + { + "epoch": 0.15036165256834622, + "grad_norm": 1.6371858246362574, + "learning_rate": 1.9249820274741294e-05, + "loss": 0.8568, + "step": 4906 + }, + { + "epoch": 0.15039230109108742, + "grad_norm": 1.4007856433075232, + "learning_rate": 1.924944301684063e-05, + "loss": 0.7674, + "step": 4907 + }, + { + "epoch": 0.15042294961382863, + "grad_norm": 1.6968196493691885, + "learning_rate": 1.9249065667802838e-05, + "loss": 0.8011, + "step": 4908 + }, + { + "epoch": 0.1504535981365698, + "grad_norm": 1.5862478564990121, + "learning_rate": 1.9248688227631636e-05, + "loss": 0.836, + "step": 4909 + }, + { + "epoch": 0.150484246659311, + "grad_norm": 1.688174608015913, + "learning_rate": 1.9248310696330743e-05, + "loss": 0.7907, + "step": 4910 + }, + { + "epoch": 0.15051489518205222, + "grad_norm": 1.524893037921265, + "learning_rate": 1.9247933073903878e-05, + "loss": 0.8751, + "step": 4911 + }, + { + "epoch": 0.15054554370479342, + "grad_norm": 1.6246312524149533, + "learning_rate": 1.924755536035476e-05, + "loss": 0.8129, + "step": 4912 + }, + { + "epoch": 0.15057619222753463, + "grad_norm": 1.6065420057478623, + "learning_rate": 1.9247177555687117e-05, + "loss": 0.8427, + "step": 4913 + }, + { + "epoch": 0.15060684075027583, + "grad_norm": 1.5390361396753087, + "learning_rate": 1.9246799659904664e-05, + "loss": 0.7353, + "step": 4914 + }, + { + "epoch": 0.15063748927301704, + "grad_norm": 1.7912807787261529, + "learning_rate": 1.924642167301113e-05, + "loss": 0.8783, + "step": 4915 + }, + { + "epoch": 0.15066813779575824, + "grad_norm": 1.8356230527505342, + "learning_rate": 1.9246043595010236e-05, + "loss": 0.8843, + "step": 4916 + }, + { + "epoch": 0.15069878631849945, + "grad_norm": 1.8089945843407336, + "learning_rate": 1.924566542590571e-05, + "loss": 0.7476, + "step": 4917 + }, + { + "epoch": 0.15072943484124066, + "grad_norm": 1.6763973795362104, + "learning_rate": 1.924528716570128e-05, + "loss": 0.9988, + "step": 4918 + }, + { + "epoch": 0.15076008336398186, + "grad_norm": 1.7507471363510017, + "learning_rate": 1.9244908814400665e-05, + "loss": 0.8419, + "step": 4919 + }, + { + "epoch": 0.15079073188672307, + "grad_norm": 1.770037428223811, + "learning_rate": 1.9244530372007598e-05, + "loss": 0.8301, + "step": 4920 + }, + { + "epoch": 0.15082138040946427, + "grad_norm": 1.8252805397745198, + "learning_rate": 1.924415183852581e-05, + "loss": 0.8059, + "step": 4921 + }, + { + "epoch": 0.15085202893220548, + "grad_norm": 1.7217695630196277, + "learning_rate": 1.9243773213959028e-05, + "loss": 0.8485, + "step": 4922 + }, + { + "epoch": 0.15088267745494668, + "grad_norm": 1.0674236902560885, + "learning_rate": 1.9243394498310987e-05, + "loss": 0.6453, + "step": 4923 + }, + { + "epoch": 0.15091332597768786, + "grad_norm": 0.8896265569466045, + "learning_rate": 1.924301569158541e-05, + "loss": 0.6383, + "step": 4924 + }, + { + "epoch": 0.15094397450042907, + "grad_norm": 1.6493768337701942, + "learning_rate": 1.9242636793786037e-05, + "loss": 0.8691, + "step": 4925 + }, + { + "epoch": 0.15097462302317027, + "grad_norm": 1.7465283684841488, + "learning_rate": 1.9242257804916598e-05, + "loss": 0.8956, + "step": 4926 + }, + { + "epoch": 0.15100527154591148, + "grad_norm": 1.997097167135846, + "learning_rate": 1.924187872498083e-05, + "loss": 0.9088, + "step": 4927 + }, + { + "epoch": 0.15103592006865268, + "grad_norm": 0.9807689953695482, + "learning_rate": 1.924149955398246e-05, + "loss": 0.6304, + "step": 4928 + }, + { + "epoch": 0.1510665685913939, + "grad_norm": 1.0263754010504207, + "learning_rate": 1.9241120291925236e-05, + "loss": 0.657, + "step": 4929 + }, + { + "epoch": 0.1510972171141351, + "grad_norm": 1.6542648294205664, + "learning_rate": 1.9240740938812887e-05, + "loss": 0.7902, + "step": 4930 + }, + { + "epoch": 0.1511278656368763, + "grad_norm": 2.2001950841373863, + "learning_rate": 1.9240361494649155e-05, + "loss": 0.7314, + "step": 4931 + }, + { + "epoch": 0.1511585141596175, + "grad_norm": 1.7487930862232735, + "learning_rate": 1.9239981959437777e-05, + "loss": 0.7944, + "step": 4932 + }, + { + "epoch": 0.1511891626823587, + "grad_norm": 1.9416200254736002, + "learning_rate": 1.9239602333182494e-05, + "loss": 0.8865, + "step": 4933 + }, + { + "epoch": 0.15121981120509992, + "grad_norm": 1.5005189559571934, + "learning_rate": 1.923922261588704e-05, + "loss": 0.7384, + "step": 4934 + }, + { + "epoch": 0.15125045972784112, + "grad_norm": 1.8380428044210542, + "learning_rate": 1.9238842807555165e-05, + "loss": 0.9477, + "step": 4935 + }, + { + "epoch": 0.15128110825058233, + "grad_norm": 1.7413082043163781, + "learning_rate": 1.9238462908190608e-05, + "loss": 0.8296, + "step": 4936 + }, + { + "epoch": 0.15131175677332354, + "grad_norm": 1.4682374104977731, + "learning_rate": 1.9238082917797114e-05, + "loss": 0.7201, + "step": 4937 + }, + { + "epoch": 0.15134240529606474, + "grad_norm": 0.9859476339784666, + "learning_rate": 1.923770283637842e-05, + "loss": 0.6543, + "step": 4938 + }, + { + "epoch": 0.15137305381880595, + "grad_norm": 0.8446347223163857, + "learning_rate": 1.923732266393828e-05, + "loss": 0.6415, + "step": 4939 + }, + { + "epoch": 0.15140370234154713, + "grad_norm": 1.840024841688997, + "learning_rate": 1.9236942400480437e-05, + "loss": 0.7941, + "step": 4940 + }, + { + "epoch": 0.15143435086428833, + "grad_norm": 1.7501891851083717, + "learning_rate": 1.9236562046008635e-05, + "loss": 0.8703, + "step": 4941 + }, + { + "epoch": 0.15146499938702954, + "grad_norm": 1.763312908973194, + "learning_rate": 1.9236181600526626e-05, + "loss": 0.9001, + "step": 4942 + }, + { + "epoch": 0.15149564790977074, + "grad_norm": 1.6898081815811699, + "learning_rate": 1.9235801064038156e-05, + "loss": 0.8483, + "step": 4943 + }, + { + "epoch": 0.15152629643251195, + "grad_norm": 1.509855548569608, + "learning_rate": 1.923542043654697e-05, + "loss": 0.8509, + "step": 4944 + }, + { + "epoch": 0.15155694495525315, + "grad_norm": 1.6344931641246356, + "learning_rate": 1.923503971805683e-05, + "loss": 0.7397, + "step": 4945 + }, + { + "epoch": 0.15158759347799436, + "grad_norm": 1.8932083297171776, + "learning_rate": 1.923465890857148e-05, + "loss": 0.8376, + "step": 4946 + }, + { + "epoch": 0.15161824200073556, + "grad_norm": 1.713079497512696, + "learning_rate": 1.923427800809467e-05, + "loss": 0.8577, + "step": 4947 + }, + { + "epoch": 0.15164889052347677, + "grad_norm": 1.7816306247234253, + "learning_rate": 1.923389701663016e-05, + "loss": 0.928, + "step": 4948 + }, + { + "epoch": 0.15167953904621798, + "grad_norm": 1.7357098824691528, + "learning_rate": 1.9233515934181696e-05, + "loss": 0.8167, + "step": 4949 + }, + { + "epoch": 0.15171018756895918, + "grad_norm": 1.5204110096695238, + "learning_rate": 1.923313476075304e-05, + "loss": 0.8553, + "step": 4950 + }, + { + "epoch": 0.1517408360917004, + "grad_norm": 1.251480035560472, + "learning_rate": 1.9232753496347946e-05, + "loss": 0.6563, + "step": 4951 + }, + { + "epoch": 0.1517714846144416, + "grad_norm": 1.554047965709987, + "learning_rate": 1.9232372140970164e-05, + "loss": 0.8092, + "step": 4952 + }, + { + "epoch": 0.1518021331371828, + "grad_norm": 1.7052788829252916, + "learning_rate": 1.923199069462346e-05, + "loss": 0.8036, + "step": 4953 + }, + { + "epoch": 0.151832781659924, + "grad_norm": 0.8215527421173139, + "learning_rate": 1.923160915731159e-05, + "loss": 0.6584, + "step": 4954 + }, + { + "epoch": 0.15186343018266518, + "grad_norm": 1.739587297480919, + "learning_rate": 1.923122752903831e-05, + "loss": 0.8616, + "step": 4955 + }, + { + "epoch": 0.1518940787054064, + "grad_norm": 1.5948126163621512, + "learning_rate": 1.923084580980739e-05, + "loss": 0.8676, + "step": 4956 + }, + { + "epoch": 0.1519247272281476, + "grad_norm": 1.8920493271088703, + "learning_rate": 1.923046399962258e-05, + "loss": 0.9327, + "step": 4957 + }, + { + "epoch": 0.1519553757508888, + "grad_norm": 1.41553935525317, + "learning_rate": 1.923008209848765e-05, + "loss": 0.7599, + "step": 4958 + }, + { + "epoch": 0.15198602427363, + "grad_norm": 1.6492750066881487, + "learning_rate": 1.9229700106406356e-05, + "loss": 0.8463, + "step": 4959 + }, + { + "epoch": 0.1520166727963712, + "grad_norm": 1.6457762372123017, + "learning_rate": 1.9229318023382465e-05, + "loss": 0.8192, + "step": 4960 + }, + { + "epoch": 0.15204732131911242, + "grad_norm": 1.5938347610988508, + "learning_rate": 1.922893584941974e-05, + "loss": 0.7987, + "step": 4961 + }, + { + "epoch": 0.15207796984185362, + "grad_norm": 1.693135447207513, + "learning_rate": 1.9228553584521955e-05, + "loss": 0.7486, + "step": 4962 + }, + { + "epoch": 0.15210861836459483, + "grad_norm": 1.4124500569455851, + "learning_rate": 1.9228171228692866e-05, + "loss": 0.7981, + "step": 4963 + }, + { + "epoch": 0.15213926688733603, + "grad_norm": 0.9856350407702928, + "learning_rate": 1.9227788781936242e-05, + "loss": 0.6746, + "step": 4964 + }, + { + "epoch": 0.15216991541007724, + "grad_norm": 1.7154187607418268, + "learning_rate": 1.922740624425586e-05, + "loss": 0.88, + "step": 4965 + }, + { + "epoch": 0.15220056393281844, + "grad_norm": 1.5446314081631418, + "learning_rate": 1.922702361565548e-05, + "loss": 0.9047, + "step": 4966 + }, + { + "epoch": 0.15223121245555965, + "grad_norm": 0.7517329797869927, + "learning_rate": 1.922664089613888e-05, + "loss": 0.6468, + "step": 4967 + }, + { + "epoch": 0.15226186097830086, + "grad_norm": 1.6615759125251637, + "learning_rate": 1.922625808570982e-05, + "loss": 0.8386, + "step": 4968 + }, + { + "epoch": 0.15229250950104206, + "grad_norm": 1.5289350146403249, + "learning_rate": 1.9225875184372083e-05, + "loss": 0.6991, + "step": 4969 + }, + { + "epoch": 0.15232315802378327, + "grad_norm": 1.814923007240072, + "learning_rate": 1.9225492192129436e-05, + "loss": 0.7501, + "step": 4970 + }, + { + "epoch": 0.15235380654652445, + "grad_norm": 0.7563006192047637, + "learning_rate": 1.922510910898565e-05, + "loss": 0.6336, + "step": 4971 + }, + { + "epoch": 0.15238445506926565, + "grad_norm": 1.5847691895407305, + "learning_rate": 1.922472593494451e-05, + "loss": 0.8412, + "step": 4972 + }, + { + "epoch": 0.15241510359200686, + "grad_norm": 0.7455504373585369, + "learning_rate": 1.9224342670009783e-05, + "loss": 0.6182, + "step": 4973 + }, + { + "epoch": 0.15244575211474806, + "grad_norm": 1.5888245821428584, + "learning_rate": 1.9223959314185244e-05, + "loss": 0.9125, + "step": 4974 + }, + { + "epoch": 0.15247640063748927, + "grad_norm": 1.578328581036795, + "learning_rate": 1.922357586747468e-05, + "loss": 0.851, + "step": 4975 + }, + { + "epoch": 0.15250704916023047, + "grad_norm": 0.7370363741977001, + "learning_rate": 1.9223192329881857e-05, + "loss": 0.6591, + "step": 4976 + }, + { + "epoch": 0.15253769768297168, + "grad_norm": 1.7565825693178512, + "learning_rate": 1.9222808701410565e-05, + "loss": 0.7927, + "step": 4977 + }, + { + "epoch": 0.15256834620571288, + "grad_norm": 1.5926620715745679, + "learning_rate": 1.9222424982064578e-05, + "loss": 0.8041, + "step": 4978 + }, + { + "epoch": 0.1525989947284541, + "grad_norm": 1.5956008174332332, + "learning_rate": 1.9222041171847676e-05, + "loss": 0.8738, + "step": 4979 + }, + { + "epoch": 0.1526296432511953, + "grad_norm": 1.6483386394817585, + "learning_rate": 1.9221657270763645e-05, + "loss": 0.8683, + "step": 4980 + }, + { + "epoch": 0.1526602917739365, + "grad_norm": 1.6558156557048231, + "learning_rate": 1.9221273278816264e-05, + "loss": 0.7968, + "step": 4981 + }, + { + "epoch": 0.1526909402966777, + "grad_norm": 1.5179721735698726, + "learning_rate": 1.9220889196009317e-05, + "loss": 0.8232, + "step": 4982 + }, + { + "epoch": 0.1527215888194189, + "grad_norm": 0.7814584414086977, + "learning_rate": 1.9220505022346593e-05, + "loss": 0.6614, + "step": 4983 + }, + { + "epoch": 0.15275223734216012, + "grad_norm": 1.8316005474552344, + "learning_rate": 1.922012075783187e-05, + "loss": 0.7364, + "step": 4984 + }, + { + "epoch": 0.15278288586490132, + "grad_norm": 2.103769778310911, + "learning_rate": 1.921973640246894e-05, + "loss": 0.8069, + "step": 4985 + }, + { + "epoch": 0.1528135343876425, + "grad_norm": 1.6103613747181669, + "learning_rate": 1.921935195626159e-05, + "loss": 0.8244, + "step": 4986 + }, + { + "epoch": 0.1528441829103837, + "grad_norm": 1.6149578181166726, + "learning_rate": 1.9218967419213604e-05, + "loss": 0.9015, + "step": 4987 + }, + { + "epoch": 0.1528748314331249, + "grad_norm": 1.6492948127909703, + "learning_rate": 1.9218582791328774e-05, + "loss": 0.8091, + "step": 4988 + }, + { + "epoch": 0.15290547995586612, + "grad_norm": 1.5802252274752984, + "learning_rate": 1.9218198072610886e-05, + "loss": 0.8876, + "step": 4989 + }, + { + "epoch": 0.15293612847860732, + "grad_norm": 1.5443546727478612, + "learning_rate": 1.9217813263063737e-05, + "loss": 0.7157, + "step": 4990 + }, + { + "epoch": 0.15296677700134853, + "grad_norm": 1.5460344333782448, + "learning_rate": 1.9217428362691116e-05, + "loss": 0.8578, + "step": 4991 + }, + { + "epoch": 0.15299742552408974, + "grad_norm": 1.7898523034806741, + "learning_rate": 1.9217043371496813e-05, + "loss": 1.0222, + "step": 4992 + }, + { + "epoch": 0.15302807404683094, + "grad_norm": 1.4557867917277494, + "learning_rate": 1.9216658289484623e-05, + "loss": 0.7866, + "step": 4993 + }, + { + "epoch": 0.15305872256957215, + "grad_norm": 1.7538218663671639, + "learning_rate": 1.9216273116658345e-05, + "loss": 0.8839, + "step": 4994 + }, + { + "epoch": 0.15308937109231335, + "grad_norm": 0.7648625974274897, + "learning_rate": 1.9215887853021766e-05, + "loss": 0.651, + "step": 4995 + }, + { + "epoch": 0.15312001961505456, + "grad_norm": 1.428665229250883, + "learning_rate": 1.9215502498578685e-05, + "loss": 0.9373, + "step": 4996 + }, + { + "epoch": 0.15315066813779576, + "grad_norm": 1.7158859715309038, + "learning_rate": 1.9215117053332903e-05, + "loss": 0.7964, + "step": 4997 + }, + { + "epoch": 0.15318131666053697, + "grad_norm": 1.5052369816056557, + "learning_rate": 1.9214731517288214e-05, + "loss": 0.7387, + "step": 4998 + }, + { + "epoch": 0.15321196518327818, + "grad_norm": 1.432069942845907, + "learning_rate": 1.9214345890448417e-05, + "loss": 0.7046, + "step": 4999 + }, + { + "epoch": 0.15324261370601938, + "grad_norm": 1.9433306509087098, + "learning_rate": 1.9213960172817313e-05, + "loss": 0.7922, + "step": 5000 + }, + { + "epoch": 0.1532732622287606, + "grad_norm": 1.6103106326239067, + "learning_rate": 1.92135743643987e-05, + "loss": 0.8271, + "step": 5001 + }, + { + "epoch": 0.15330391075150177, + "grad_norm": 1.6323251002502024, + "learning_rate": 1.9213188465196385e-05, + "loss": 0.8914, + "step": 5002 + }, + { + "epoch": 0.15333455927424297, + "grad_norm": 1.4834493909555482, + "learning_rate": 1.9212802475214163e-05, + "loss": 0.827, + "step": 5003 + }, + { + "epoch": 0.15336520779698418, + "grad_norm": 1.6829976959950455, + "learning_rate": 1.9212416394455844e-05, + "loss": 0.9184, + "step": 5004 + }, + { + "epoch": 0.15339585631972538, + "grad_norm": 0.8213267146706151, + "learning_rate": 1.9212030222925228e-05, + "loss": 0.6621, + "step": 5005 + }, + { + "epoch": 0.1534265048424666, + "grad_norm": 1.5795809677921822, + "learning_rate": 1.9211643960626122e-05, + "loss": 0.872, + "step": 5006 + }, + { + "epoch": 0.1534571533652078, + "grad_norm": 1.588586673015372, + "learning_rate": 1.921125760756233e-05, + "loss": 0.8756, + "step": 5007 + }, + { + "epoch": 0.153487801887949, + "grad_norm": 1.6275968048635239, + "learning_rate": 1.921087116373766e-05, + "loss": 0.708, + "step": 5008 + }, + { + "epoch": 0.1535184504106902, + "grad_norm": 1.7771753157220898, + "learning_rate": 1.9210484629155922e-05, + "loss": 0.8111, + "step": 5009 + }, + { + "epoch": 0.1535490989334314, + "grad_norm": 1.6233878171081815, + "learning_rate": 1.9210098003820917e-05, + "loss": 0.9451, + "step": 5010 + }, + { + "epoch": 0.15357974745617262, + "grad_norm": 1.5789373590658615, + "learning_rate": 1.9209711287736462e-05, + "loss": 0.8648, + "step": 5011 + }, + { + "epoch": 0.15361039597891382, + "grad_norm": 1.5872688213147665, + "learning_rate": 1.920932448090637e-05, + "loss": 0.8884, + "step": 5012 + }, + { + "epoch": 0.15364104450165503, + "grad_norm": 1.6637419430027345, + "learning_rate": 1.9208937583334443e-05, + "loss": 0.7975, + "step": 5013 + }, + { + "epoch": 0.15367169302439623, + "grad_norm": 1.5124305413338808, + "learning_rate": 1.92085505950245e-05, + "loss": 0.7343, + "step": 5014 + }, + { + "epoch": 0.15370234154713744, + "grad_norm": 1.4394051841902193, + "learning_rate": 1.920816351598035e-05, + "loss": 0.8586, + "step": 5015 + }, + { + "epoch": 0.15373299006987864, + "grad_norm": 1.765108748094218, + "learning_rate": 1.920777634620581e-05, + "loss": 0.8283, + "step": 5016 + }, + { + "epoch": 0.15376363859261982, + "grad_norm": 1.7260569749239623, + "learning_rate": 1.9207389085704693e-05, + "loss": 0.8969, + "step": 5017 + }, + { + "epoch": 0.15379428711536103, + "grad_norm": 1.760548114413794, + "learning_rate": 1.9207001734480816e-05, + "loss": 0.8975, + "step": 5018 + }, + { + "epoch": 0.15382493563810223, + "grad_norm": 1.5489567795939663, + "learning_rate": 1.9206614292537995e-05, + "loss": 0.8098, + "step": 5019 + }, + { + "epoch": 0.15385558416084344, + "grad_norm": 1.6696632544914933, + "learning_rate": 1.9206226759880047e-05, + "loss": 0.8068, + "step": 5020 + }, + { + "epoch": 0.15388623268358465, + "grad_norm": 1.5250996503221224, + "learning_rate": 1.9205839136510793e-05, + "loss": 0.7296, + "step": 5021 + }, + { + "epoch": 0.15391688120632585, + "grad_norm": 1.663984314411687, + "learning_rate": 1.920545142243405e-05, + "loss": 0.8014, + "step": 5022 + }, + { + "epoch": 0.15394752972906706, + "grad_norm": 1.7292710388460781, + "learning_rate": 1.920506361765364e-05, + "loss": 0.9952, + "step": 5023 + }, + { + "epoch": 0.15397817825180826, + "grad_norm": 1.6543112166984106, + "learning_rate": 1.920467572217338e-05, + "loss": 0.8224, + "step": 5024 + }, + { + "epoch": 0.15400882677454947, + "grad_norm": 1.6228289860462297, + "learning_rate": 1.9204287735997095e-05, + "loss": 0.8051, + "step": 5025 + }, + { + "epoch": 0.15403947529729067, + "grad_norm": 1.8922179201968874, + "learning_rate": 1.920389965912861e-05, + "loss": 0.9256, + "step": 5026 + }, + { + "epoch": 0.15407012382003188, + "grad_norm": 1.523763597268442, + "learning_rate": 1.9203511491571746e-05, + "loss": 0.8021, + "step": 5027 + }, + { + "epoch": 0.15410077234277308, + "grad_norm": 1.5219505225008616, + "learning_rate": 1.920312323333033e-05, + "loss": 0.8554, + "step": 5028 + }, + { + "epoch": 0.1541314208655143, + "grad_norm": 1.6709654164495515, + "learning_rate": 1.9202734884408186e-05, + "loss": 0.7951, + "step": 5029 + }, + { + "epoch": 0.1541620693882555, + "grad_norm": 1.550802020697651, + "learning_rate": 1.9202346444809137e-05, + "loss": 0.7642, + "step": 5030 + }, + { + "epoch": 0.1541927179109967, + "grad_norm": 0.9500526634572722, + "learning_rate": 1.9201957914537017e-05, + "loss": 0.647, + "step": 5031 + }, + { + "epoch": 0.1542233664337379, + "grad_norm": 1.606517105186001, + "learning_rate": 1.920156929359565e-05, + "loss": 0.8928, + "step": 5032 + }, + { + "epoch": 0.15425401495647909, + "grad_norm": 1.589382882779845, + "learning_rate": 1.9201180581988868e-05, + "loss": 0.8935, + "step": 5033 + }, + { + "epoch": 0.1542846634792203, + "grad_norm": 1.738147824787427, + "learning_rate": 1.9200791779720496e-05, + "loss": 0.7879, + "step": 5034 + }, + { + "epoch": 0.1543153120019615, + "grad_norm": 1.5330062531503768, + "learning_rate": 1.920040288679437e-05, + "loss": 0.8817, + "step": 5035 + }, + { + "epoch": 0.1543459605247027, + "grad_norm": 1.777818115136174, + "learning_rate": 1.9200013903214323e-05, + "loss": 0.8156, + "step": 5036 + }, + { + "epoch": 0.1543766090474439, + "grad_norm": 1.6177887222376304, + "learning_rate": 1.9199624828984183e-05, + "loss": 0.8108, + "step": 5037 + }, + { + "epoch": 0.1544072575701851, + "grad_norm": 1.6175863530764207, + "learning_rate": 1.9199235664107786e-05, + "loss": 0.7639, + "step": 5038 + }, + { + "epoch": 0.15443790609292632, + "grad_norm": 1.514709537005453, + "learning_rate": 1.9198846408588967e-05, + "loss": 0.8842, + "step": 5039 + }, + { + "epoch": 0.15446855461566752, + "grad_norm": 1.6085607997692104, + "learning_rate": 1.9198457062431558e-05, + "loss": 0.8066, + "step": 5040 + }, + { + "epoch": 0.15449920313840873, + "grad_norm": 1.6774978006408812, + "learning_rate": 1.91980676256394e-05, + "loss": 0.7613, + "step": 5041 + }, + { + "epoch": 0.15452985166114994, + "grad_norm": 1.7163155439287061, + "learning_rate": 1.9197678098216327e-05, + "loss": 0.8434, + "step": 5042 + }, + { + "epoch": 0.15456050018389114, + "grad_norm": 1.6760479898849383, + "learning_rate": 1.919728848016618e-05, + "loss": 0.8854, + "step": 5043 + }, + { + "epoch": 0.15459114870663235, + "grad_norm": 0.8877066893207847, + "learning_rate": 1.9196898771492798e-05, + "loss": 0.6735, + "step": 5044 + }, + { + "epoch": 0.15462179722937355, + "grad_norm": 1.644145178620968, + "learning_rate": 1.919650897220002e-05, + "loss": 0.8443, + "step": 5045 + }, + { + "epoch": 0.15465244575211476, + "grad_norm": 1.5796936602144238, + "learning_rate": 1.9196119082291683e-05, + "loss": 0.8227, + "step": 5046 + }, + { + "epoch": 0.15468309427485596, + "grad_norm": 1.4406414253454531, + "learning_rate": 1.919572910177163e-05, + "loss": 0.8714, + "step": 5047 + }, + { + "epoch": 0.15471374279759714, + "grad_norm": 1.6731115471062952, + "learning_rate": 1.9195339030643706e-05, + "loss": 0.9575, + "step": 5048 + }, + { + "epoch": 0.15474439132033835, + "grad_norm": 1.7023136395115048, + "learning_rate": 1.9194948868911757e-05, + "loss": 0.8637, + "step": 5049 + }, + { + "epoch": 0.15477503984307955, + "grad_norm": 0.7261539941332287, + "learning_rate": 1.9194558616579622e-05, + "loss": 0.6818, + "step": 5050 + }, + { + "epoch": 0.15480568836582076, + "grad_norm": 1.4491257380800975, + "learning_rate": 1.9194168273651147e-05, + "loss": 0.81, + "step": 5051 + }, + { + "epoch": 0.15483633688856197, + "grad_norm": 1.6012231272930733, + "learning_rate": 1.919377784013018e-05, + "loss": 0.8728, + "step": 5052 + }, + { + "epoch": 0.15486698541130317, + "grad_norm": 1.6786344127617758, + "learning_rate": 1.9193387316020572e-05, + "loss": 0.8224, + "step": 5053 + }, + { + "epoch": 0.15489763393404438, + "grad_norm": 1.5908345852109762, + "learning_rate": 1.9192996701326163e-05, + "loss": 0.8651, + "step": 5054 + }, + { + "epoch": 0.15492828245678558, + "grad_norm": 1.6135792352484986, + "learning_rate": 1.9192605996050807e-05, + "loss": 0.7445, + "step": 5055 + }, + { + "epoch": 0.1549589309795268, + "grad_norm": 1.682048821106842, + "learning_rate": 1.919221520019835e-05, + "loss": 0.8164, + "step": 5056 + }, + { + "epoch": 0.154989579502268, + "grad_norm": 1.6100738688971206, + "learning_rate": 1.9191824313772646e-05, + "loss": 0.878, + "step": 5057 + }, + { + "epoch": 0.1550202280250092, + "grad_norm": 1.600419116044567, + "learning_rate": 1.9191433336777546e-05, + "loss": 0.7671, + "step": 5058 + }, + { + "epoch": 0.1550508765477504, + "grad_norm": 1.5089644204737882, + "learning_rate": 1.91910422692169e-05, + "loss": 0.8675, + "step": 5059 + }, + { + "epoch": 0.1550815250704916, + "grad_norm": 1.552619269335001, + "learning_rate": 1.9190651111094563e-05, + "loss": 0.7707, + "step": 5060 + }, + { + "epoch": 0.15511217359323282, + "grad_norm": 0.7929902907654803, + "learning_rate": 1.9190259862414387e-05, + "loss": 0.6693, + "step": 5061 + }, + { + "epoch": 0.15514282211597402, + "grad_norm": 1.5156492458678115, + "learning_rate": 1.9189868523180233e-05, + "loss": 0.7672, + "step": 5062 + }, + { + "epoch": 0.15517347063871523, + "grad_norm": 0.7391616423690258, + "learning_rate": 1.9189477093395954e-05, + "loss": 0.6557, + "step": 5063 + }, + { + "epoch": 0.1552041191614564, + "grad_norm": 0.7132771689949002, + "learning_rate": 1.9189085573065404e-05, + "loss": 0.6431, + "step": 5064 + }, + { + "epoch": 0.1552347676841976, + "grad_norm": 1.676139253346534, + "learning_rate": 1.9188693962192442e-05, + "loss": 0.9157, + "step": 5065 + }, + { + "epoch": 0.15526541620693882, + "grad_norm": 1.8438014281431008, + "learning_rate": 1.9188302260780925e-05, + "loss": 0.7984, + "step": 5066 + }, + { + "epoch": 0.15529606472968002, + "grad_norm": 0.7600033548414066, + "learning_rate": 1.9187910468834722e-05, + "loss": 0.677, + "step": 5067 + }, + { + "epoch": 0.15532671325242123, + "grad_norm": 1.6337197318012346, + "learning_rate": 1.9187518586357678e-05, + "loss": 0.8287, + "step": 5068 + }, + { + "epoch": 0.15535736177516243, + "grad_norm": 1.5019761127494924, + "learning_rate": 1.918712661335367e-05, + "loss": 0.765, + "step": 5069 + }, + { + "epoch": 0.15538801029790364, + "grad_norm": 1.8217802983596636, + "learning_rate": 1.918673454982655e-05, + "loss": 0.939, + "step": 5070 + }, + { + "epoch": 0.15541865882064484, + "grad_norm": 1.519416764582471, + "learning_rate": 1.918634239578018e-05, + "loss": 0.8297, + "step": 5071 + }, + { + "epoch": 0.15544930734338605, + "grad_norm": 1.6684705301713323, + "learning_rate": 1.9185950151218433e-05, + "loss": 0.8212, + "step": 5072 + }, + { + "epoch": 0.15547995586612726, + "grad_norm": 1.363516353688186, + "learning_rate": 1.9185557816145166e-05, + "loss": 0.7721, + "step": 5073 + }, + { + "epoch": 0.15551060438886846, + "grad_norm": 1.6148757210460543, + "learning_rate": 1.9185165390564247e-05, + "loss": 0.9121, + "step": 5074 + }, + { + "epoch": 0.15554125291160967, + "grad_norm": 1.6207932943387835, + "learning_rate": 1.9184772874479545e-05, + "loss": 0.6913, + "step": 5075 + }, + { + "epoch": 0.15557190143435087, + "grad_norm": 0.7587834061685441, + "learning_rate": 1.918438026789493e-05, + "loss": 0.6827, + "step": 5076 + }, + { + "epoch": 0.15560254995709208, + "grad_norm": 1.6588923701518523, + "learning_rate": 1.918398757081426e-05, + "loss": 0.8441, + "step": 5077 + }, + { + "epoch": 0.15563319847983328, + "grad_norm": 1.4876663262135852, + "learning_rate": 1.9183594783241416e-05, + "loss": 0.7234, + "step": 5078 + }, + { + "epoch": 0.15566384700257446, + "grad_norm": 1.5089823455598101, + "learning_rate": 1.9183201905180257e-05, + "loss": 0.7226, + "step": 5079 + }, + { + "epoch": 0.15569449552531567, + "grad_norm": 0.7331073852380723, + "learning_rate": 1.918280893663466e-05, + "loss": 0.6501, + "step": 5080 + }, + { + "epoch": 0.15572514404805687, + "grad_norm": 1.7828105678518045, + "learning_rate": 1.9182415877608504e-05, + "loss": 0.8846, + "step": 5081 + }, + { + "epoch": 0.15575579257079808, + "grad_norm": 1.4278887043269848, + "learning_rate": 1.918202272810565e-05, + "loss": 0.8454, + "step": 5082 + }, + { + "epoch": 0.15578644109353929, + "grad_norm": 1.5771555239520123, + "learning_rate": 1.918162948812998e-05, + "loss": 0.7694, + "step": 5083 + }, + { + "epoch": 0.1558170896162805, + "grad_norm": 1.5345601014141916, + "learning_rate": 1.9181236157685358e-05, + "loss": 0.8647, + "step": 5084 + }, + { + "epoch": 0.1558477381390217, + "grad_norm": 1.5151125039621385, + "learning_rate": 1.9180842736775674e-05, + "loss": 0.801, + "step": 5085 + }, + { + "epoch": 0.1558783866617629, + "grad_norm": 1.54319832239195, + "learning_rate": 1.9180449225404796e-05, + "loss": 0.8055, + "step": 5086 + }, + { + "epoch": 0.1559090351845041, + "grad_norm": 1.599297428750897, + "learning_rate": 1.9180055623576602e-05, + "loss": 0.9579, + "step": 5087 + }, + { + "epoch": 0.1559396837072453, + "grad_norm": 1.496545694604111, + "learning_rate": 1.9179661931294974e-05, + "loss": 0.8395, + "step": 5088 + }, + { + "epoch": 0.15597033222998652, + "grad_norm": 1.6233934922901367, + "learning_rate": 1.9179268148563782e-05, + "loss": 0.942, + "step": 5089 + }, + { + "epoch": 0.15600098075272772, + "grad_norm": 0.8053047357694029, + "learning_rate": 1.9178874275386917e-05, + "loss": 0.6555, + "step": 5090 + }, + { + "epoch": 0.15603162927546893, + "grad_norm": 1.532193611441567, + "learning_rate": 1.9178480311768255e-05, + "loss": 0.7846, + "step": 5091 + }, + { + "epoch": 0.15606227779821014, + "grad_norm": 1.5347563796599628, + "learning_rate": 1.9178086257711675e-05, + "loss": 0.7927, + "step": 5092 + }, + { + "epoch": 0.15609292632095134, + "grad_norm": 1.5043226373376388, + "learning_rate": 1.9177692113221067e-05, + "loss": 0.8713, + "step": 5093 + }, + { + "epoch": 0.15612357484369255, + "grad_norm": 1.4468567838232103, + "learning_rate": 1.9177297878300307e-05, + "loss": 0.7868, + "step": 5094 + }, + { + "epoch": 0.15615422336643373, + "grad_norm": 1.7284751021041462, + "learning_rate": 1.9176903552953287e-05, + "loss": 0.8773, + "step": 5095 + }, + { + "epoch": 0.15618487188917493, + "grad_norm": 0.6896127955043775, + "learning_rate": 1.9176509137183884e-05, + "loss": 0.6545, + "step": 5096 + }, + { + "epoch": 0.15621552041191614, + "grad_norm": 1.4688878288087623, + "learning_rate": 1.917611463099599e-05, + "loss": 0.8381, + "step": 5097 + }, + { + "epoch": 0.15624616893465734, + "grad_norm": 1.5304757316407072, + "learning_rate": 1.9175720034393493e-05, + "loss": 0.808, + "step": 5098 + }, + { + "epoch": 0.15627681745739855, + "grad_norm": 1.5290555592325636, + "learning_rate": 1.9175325347380274e-05, + "loss": 0.6665, + "step": 5099 + }, + { + "epoch": 0.15630746598013975, + "grad_norm": 1.6397534762044135, + "learning_rate": 1.917493056996023e-05, + "loss": 0.7478, + "step": 5100 + }, + { + "epoch": 0.15633811450288096, + "grad_norm": 1.524462390972737, + "learning_rate": 1.9174535702137248e-05, + "loss": 0.7414, + "step": 5101 + }, + { + "epoch": 0.15636876302562217, + "grad_norm": 1.6305331690405496, + "learning_rate": 1.9174140743915217e-05, + "loss": 0.7602, + "step": 5102 + }, + { + "epoch": 0.15639941154836337, + "grad_norm": 1.7393709586581696, + "learning_rate": 1.9173745695298032e-05, + "loss": 0.8402, + "step": 5103 + }, + { + "epoch": 0.15643006007110458, + "grad_norm": 1.622608167660338, + "learning_rate": 1.917335055628958e-05, + "loss": 0.9708, + "step": 5104 + }, + { + "epoch": 0.15646070859384578, + "grad_norm": 1.6166154244299822, + "learning_rate": 1.917295532689376e-05, + "loss": 0.8007, + "step": 5105 + }, + { + "epoch": 0.156491357116587, + "grad_norm": 1.5757636853347328, + "learning_rate": 1.917256000711446e-05, + "loss": 0.8029, + "step": 5106 + }, + { + "epoch": 0.1565220056393282, + "grad_norm": 1.7493601666312908, + "learning_rate": 1.9172164596955588e-05, + "loss": 0.6967, + "step": 5107 + }, + { + "epoch": 0.1565526541620694, + "grad_norm": 1.6070514669738851, + "learning_rate": 1.9171769096421027e-05, + "loss": 0.7707, + "step": 5108 + }, + { + "epoch": 0.1565833026848106, + "grad_norm": 0.8071756454889307, + "learning_rate": 1.9171373505514677e-05, + "loss": 0.6582, + "step": 5109 + }, + { + "epoch": 0.15661395120755178, + "grad_norm": 1.54048965486175, + "learning_rate": 1.917097782424044e-05, + "loss": 1.0054, + "step": 5110 + }, + { + "epoch": 0.156644599730293, + "grad_norm": 1.542262122710341, + "learning_rate": 1.917058205260221e-05, + "loss": 0.8958, + "step": 5111 + }, + { + "epoch": 0.1566752482530342, + "grad_norm": 1.538880165365101, + "learning_rate": 1.9170186190603887e-05, + "loss": 0.8654, + "step": 5112 + }, + { + "epoch": 0.1567058967757754, + "grad_norm": 1.3071906705530965, + "learning_rate": 1.9169790238249375e-05, + "loss": 0.6799, + "step": 5113 + }, + { + "epoch": 0.1567365452985166, + "grad_norm": 1.4841087561498956, + "learning_rate": 1.9169394195542574e-05, + "loss": 0.7337, + "step": 5114 + }, + { + "epoch": 0.1567671938212578, + "grad_norm": 1.5710322308744102, + "learning_rate": 1.9168998062487386e-05, + "loss": 0.8682, + "step": 5115 + }, + { + "epoch": 0.15679784234399902, + "grad_norm": 1.6029642365101557, + "learning_rate": 1.916860183908771e-05, + "loss": 0.8078, + "step": 5116 + }, + { + "epoch": 0.15682849086674022, + "grad_norm": 1.3907461880560685, + "learning_rate": 1.916820552534746e-05, + "loss": 0.7947, + "step": 5117 + }, + { + "epoch": 0.15685913938948143, + "grad_norm": 1.5496268210481694, + "learning_rate": 1.9167809121270535e-05, + "loss": 0.9435, + "step": 5118 + }, + { + "epoch": 0.15688978791222263, + "grad_norm": 0.807528975794235, + "learning_rate": 1.9167412626860836e-05, + "loss": 0.6793, + "step": 5119 + }, + { + "epoch": 0.15692043643496384, + "grad_norm": 1.7467277254764775, + "learning_rate": 1.9167016042122283e-05, + "loss": 0.7086, + "step": 5120 + }, + { + "epoch": 0.15695108495770504, + "grad_norm": 1.8128257035420947, + "learning_rate": 1.916661936705877e-05, + "loss": 0.9065, + "step": 5121 + }, + { + "epoch": 0.15698173348044625, + "grad_norm": 1.58889267475611, + "learning_rate": 1.916622260167421e-05, + "loss": 0.8328, + "step": 5122 + }, + { + "epoch": 0.15701238200318746, + "grad_norm": 1.6529797273294733, + "learning_rate": 1.916582574597251e-05, + "loss": 0.8785, + "step": 5123 + }, + { + "epoch": 0.15704303052592866, + "grad_norm": 1.6926420556211517, + "learning_rate": 1.916542879995759e-05, + "loss": 0.8667, + "step": 5124 + }, + { + "epoch": 0.15707367904866987, + "grad_norm": 1.523521962052407, + "learning_rate": 1.9165031763633357e-05, + "loss": 0.875, + "step": 5125 + }, + { + "epoch": 0.15710432757141105, + "grad_norm": 0.7703498199708425, + "learning_rate": 1.9164634637003717e-05, + "loss": 0.6774, + "step": 5126 + }, + { + "epoch": 0.15713497609415225, + "grad_norm": 1.7649753082077422, + "learning_rate": 1.9164237420072587e-05, + "loss": 0.7419, + "step": 5127 + }, + { + "epoch": 0.15716562461689346, + "grad_norm": 1.575149861555293, + "learning_rate": 1.916384011284388e-05, + "loss": 0.7399, + "step": 5128 + }, + { + "epoch": 0.15719627313963466, + "grad_norm": 0.6908462863356528, + "learning_rate": 1.9163442715321514e-05, + "loss": 0.6283, + "step": 5129 + }, + { + "epoch": 0.15722692166237587, + "grad_norm": 1.3347039527390698, + "learning_rate": 1.9163045227509403e-05, + "loss": 0.5775, + "step": 5130 + }, + { + "epoch": 0.15725757018511707, + "grad_norm": 0.6708236368271442, + "learning_rate": 1.916264764941146e-05, + "loss": 0.6349, + "step": 5131 + }, + { + "epoch": 0.15728821870785828, + "grad_norm": 1.7162705792335722, + "learning_rate": 1.916224998103161e-05, + "loss": 0.8257, + "step": 5132 + }, + { + "epoch": 0.15731886723059949, + "grad_norm": 1.737300076766061, + "learning_rate": 1.916185222237376e-05, + "loss": 0.8255, + "step": 5133 + }, + { + "epoch": 0.1573495157533407, + "grad_norm": 1.6120578123307996, + "learning_rate": 1.9161454373441838e-05, + "loss": 0.8909, + "step": 5134 + }, + { + "epoch": 0.1573801642760819, + "grad_norm": 1.738333572153351, + "learning_rate": 1.9161056434239763e-05, + "loss": 0.85, + "step": 5135 + }, + { + "epoch": 0.1574108127988231, + "grad_norm": 1.4389197809925067, + "learning_rate": 1.9160658404771458e-05, + "loss": 0.8055, + "step": 5136 + }, + { + "epoch": 0.1574414613215643, + "grad_norm": 1.6898403295934128, + "learning_rate": 1.9160260285040838e-05, + "loss": 0.8447, + "step": 5137 + }, + { + "epoch": 0.1574721098443055, + "grad_norm": 1.5107338320451162, + "learning_rate": 1.915986207505183e-05, + "loss": 0.7408, + "step": 5138 + }, + { + "epoch": 0.15750275836704672, + "grad_norm": 1.4161027719758128, + "learning_rate": 1.915946377480836e-05, + "loss": 0.8263, + "step": 5139 + }, + { + "epoch": 0.15753340688978792, + "grad_norm": 1.6677287721167764, + "learning_rate": 1.9159065384314347e-05, + "loss": 0.861, + "step": 5140 + }, + { + "epoch": 0.1575640554125291, + "grad_norm": 1.5225740985602725, + "learning_rate": 1.915866690357372e-05, + "loss": 0.9288, + "step": 5141 + }, + { + "epoch": 0.1575947039352703, + "grad_norm": 1.5514146007759775, + "learning_rate": 1.9158268332590406e-05, + "loss": 0.8173, + "step": 5142 + }, + { + "epoch": 0.15762535245801151, + "grad_norm": 1.7028339150551637, + "learning_rate": 1.9157869671368333e-05, + "loss": 0.708, + "step": 5143 + }, + { + "epoch": 0.15765600098075272, + "grad_norm": 1.3459890716389988, + "learning_rate": 1.915747091991142e-05, + "loss": 0.7717, + "step": 5144 + }, + { + "epoch": 0.15768664950349393, + "grad_norm": 1.585842982576892, + "learning_rate": 1.915707207822361e-05, + "loss": 0.7918, + "step": 5145 + }, + { + "epoch": 0.15771729802623513, + "grad_norm": 1.5456960527776797, + "learning_rate": 1.9156673146308823e-05, + "loss": 0.7972, + "step": 5146 + }, + { + "epoch": 0.15774794654897634, + "grad_norm": 1.6673113736042202, + "learning_rate": 1.9156274124170992e-05, + "loss": 0.9595, + "step": 5147 + }, + { + "epoch": 0.15777859507171754, + "grad_norm": 1.6095070031835936, + "learning_rate": 1.915587501181405e-05, + "loss": 0.7678, + "step": 5148 + }, + { + "epoch": 0.15780924359445875, + "grad_norm": 1.5216795295928864, + "learning_rate": 1.9155475809241927e-05, + "loss": 0.783, + "step": 5149 + }, + { + "epoch": 0.15783989211719995, + "grad_norm": 1.549919207842649, + "learning_rate": 1.915507651645856e-05, + "loss": 0.825, + "step": 5150 + }, + { + "epoch": 0.15787054063994116, + "grad_norm": 1.774375382285615, + "learning_rate": 1.915467713346788e-05, + "loss": 0.8045, + "step": 5151 + }, + { + "epoch": 0.15790118916268236, + "grad_norm": 0.8103992281093648, + "learning_rate": 1.915427766027383e-05, + "loss": 0.6494, + "step": 5152 + }, + { + "epoch": 0.15793183768542357, + "grad_norm": 1.4331927542472007, + "learning_rate": 1.9153878096880335e-05, + "loss": 0.7956, + "step": 5153 + }, + { + "epoch": 0.15796248620816478, + "grad_norm": 1.6056480359089136, + "learning_rate": 1.9153478443291337e-05, + "loss": 0.8638, + "step": 5154 + }, + { + "epoch": 0.15799313473090598, + "grad_norm": 1.6597475788563802, + "learning_rate": 1.9153078699510773e-05, + "loss": 0.8877, + "step": 5155 + }, + { + "epoch": 0.1580237832536472, + "grad_norm": 1.5050727146609786, + "learning_rate": 1.9152678865542586e-05, + "loss": 0.8955, + "step": 5156 + }, + { + "epoch": 0.15805443177638837, + "grad_norm": 1.8344817881707205, + "learning_rate": 1.9152278941390706e-05, + "loss": 0.9114, + "step": 5157 + }, + { + "epoch": 0.15808508029912957, + "grad_norm": 1.4969094290667633, + "learning_rate": 1.9151878927059087e-05, + "loss": 0.8632, + "step": 5158 + }, + { + "epoch": 0.15811572882187078, + "grad_norm": 1.6115270937533526, + "learning_rate": 1.915147882255166e-05, + "loss": 0.7888, + "step": 5159 + }, + { + "epoch": 0.15814637734461198, + "grad_norm": 1.392936930223985, + "learning_rate": 1.915107862787237e-05, + "loss": 0.7104, + "step": 5160 + }, + { + "epoch": 0.1581770258673532, + "grad_norm": 1.6549991869349219, + "learning_rate": 1.9150678343025165e-05, + "loss": 0.8772, + "step": 5161 + }, + { + "epoch": 0.1582076743900944, + "grad_norm": 1.4132141055453031, + "learning_rate": 1.915027796801398e-05, + "loss": 0.8184, + "step": 5162 + }, + { + "epoch": 0.1582383229128356, + "grad_norm": 1.5220713228658886, + "learning_rate": 1.9149877502842767e-05, + "loss": 0.8425, + "step": 5163 + }, + { + "epoch": 0.1582689714355768, + "grad_norm": 1.5426436452863064, + "learning_rate": 1.9149476947515474e-05, + "loss": 0.8145, + "step": 5164 + }, + { + "epoch": 0.158299619958318, + "grad_norm": 0.7920543092666943, + "learning_rate": 1.9149076302036035e-05, + "loss": 0.6294, + "step": 5165 + }, + { + "epoch": 0.15833026848105922, + "grad_norm": 1.7354361065790669, + "learning_rate": 1.914867556640841e-05, + "loss": 0.9092, + "step": 5166 + }, + { + "epoch": 0.15836091700380042, + "grad_norm": 1.5373548899506746, + "learning_rate": 1.914827474063655e-05, + "loss": 0.8073, + "step": 5167 + }, + { + "epoch": 0.15839156552654163, + "grad_norm": 1.4388432577919155, + "learning_rate": 1.914787382472439e-05, + "loss": 0.8591, + "step": 5168 + }, + { + "epoch": 0.15842221404928283, + "grad_norm": 1.5441703941396714, + "learning_rate": 1.9147472818675893e-05, + "loss": 0.7672, + "step": 5169 + }, + { + "epoch": 0.15845286257202404, + "grad_norm": 1.610773579841234, + "learning_rate": 1.9147071722495003e-05, + "loss": 0.8335, + "step": 5170 + }, + { + "epoch": 0.15848351109476524, + "grad_norm": 0.7261291732387355, + "learning_rate": 1.9146670536185678e-05, + "loss": 0.6399, + "step": 5171 + }, + { + "epoch": 0.15851415961750642, + "grad_norm": 1.6756208944833435, + "learning_rate": 1.9146269259751867e-05, + "loss": 0.6953, + "step": 5172 + }, + { + "epoch": 0.15854480814024763, + "grad_norm": 1.6889915248079006, + "learning_rate": 1.9145867893197522e-05, + "loss": 0.8059, + "step": 5173 + }, + { + "epoch": 0.15857545666298883, + "grad_norm": 1.4171992425981583, + "learning_rate": 1.9145466436526603e-05, + "loss": 0.815, + "step": 5174 + }, + { + "epoch": 0.15860610518573004, + "grad_norm": 1.5043484296768634, + "learning_rate": 1.9145064889743065e-05, + "loss": 0.9288, + "step": 5175 + }, + { + "epoch": 0.15863675370847125, + "grad_norm": 1.602633306768678, + "learning_rate": 1.914466325285086e-05, + "loss": 0.8793, + "step": 5176 + }, + { + "epoch": 0.15866740223121245, + "grad_norm": 2.7313641338619994, + "learning_rate": 1.914426152585395e-05, + "loss": 0.8722, + "step": 5177 + }, + { + "epoch": 0.15869805075395366, + "grad_norm": 1.6340324875255823, + "learning_rate": 1.914385970875629e-05, + "loss": 0.8704, + "step": 5178 + }, + { + "epoch": 0.15872869927669486, + "grad_norm": 1.6007534124625176, + "learning_rate": 1.914345780156184e-05, + "loss": 0.8513, + "step": 5179 + }, + { + "epoch": 0.15875934779943607, + "grad_norm": 1.6554223379061137, + "learning_rate": 1.914305580427456e-05, + "loss": 0.8235, + "step": 5180 + }, + { + "epoch": 0.15878999632217727, + "grad_norm": 1.6406775682163963, + "learning_rate": 1.9142653716898417e-05, + "loss": 0.8177, + "step": 5181 + }, + { + "epoch": 0.15882064484491848, + "grad_norm": 1.558875628744716, + "learning_rate": 1.914225153943736e-05, + "loss": 0.8838, + "step": 5182 + }, + { + "epoch": 0.15885129336765968, + "grad_norm": 1.815878019481897, + "learning_rate": 1.9141849271895365e-05, + "loss": 0.947, + "step": 5183 + }, + { + "epoch": 0.1588819418904009, + "grad_norm": 1.5075583158099668, + "learning_rate": 1.914144691427639e-05, + "loss": 0.7099, + "step": 5184 + }, + { + "epoch": 0.1589125904131421, + "grad_norm": 1.4892122766121418, + "learning_rate": 1.91410444665844e-05, + "loss": 0.8725, + "step": 5185 + }, + { + "epoch": 0.1589432389358833, + "grad_norm": 1.9433282022027056, + "learning_rate": 1.9140641928823356e-05, + "loss": 0.8464, + "step": 5186 + }, + { + "epoch": 0.1589738874586245, + "grad_norm": 0.7504708876180143, + "learning_rate": 1.9140239300997234e-05, + "loss": 0.6488, + "step": 5187 + }, + { + "epoch": 0.15900453598136569, + "grad_norm": 1.409409896570993, + "learning_rate": 1.913983658310999e-05, + "loss": 0.728, + "step": 5188 + }, + { + "epoch": 0.1590351845041069, + "grad_norm": 1.5814682885796005, + "learning_rate": 1.9139433775165602e-05, + "loss": 0.8996, + "step": 5189 + }, + { + "epoch": 0.1590658330268481, + "grad_norm": 1.4141768153584742, + "learning_rate": 1.913903087716803e-05, + "loss": 0.8092, + "step": 5190 + }, + { + "epoch": 0.1590964815495893, + "grad_norm": 1.6575370522292412, + "learning_rate": 1.9138627889121256e-05, + "loss": 0.939, + "step": 5191 + }, + { + "epoch": 0.1591271300723305, + "grad_norm": 0.7169834382851791, + "learning_rate": 1.9138224811029237e-05, + "loss": 0.6467, + "step": 5192 + }, + { + "epoch": 0.15915777859507171, + "grad_norm": 1.6069191099454054, + "learning_rate": 1.9137821642895953e-05, + "loss": 0.8886, + "step": 5193 + }, + { + "epoch": 0.15918842711781292, + "grad_norm": 1.5970181887509196, + "learning_rate": 1.9137418384725373e-05, + "loss": 0.8234, + "step": 5194 + }, + { + "epoch": 0.15921907564055413, + "grad_norm": 1.7192062126418945, + "learning_rate": 1.9137015036521473e-05, + "loss": 0.8306, + "step": 5195 + }, + { + "epoch": 0.15924972416329533, + "grad_norm": 1.5646799323646992, + "learning_rate": 1.9136611598288223e-05, + "loss": 0.844, + "step": 5196 + }, + { + "epoch": 0.15928037268603654, + "grad_norm": 1.5343785790737596, + "learning_rate": 1.9136208070029604e-05, + "loss": 0.7499, + "step": 5197 + }, + { + "epoch": 0.15931102120877774, + "grad_norm": 1.6515325446468723, + "learning_rate": 1.9135804451749588e-05, + "loss": 0.8136, + "step": 5198 + }, + { + "epoch": 0.15934166973151895, + "grad_norm": 1.540394819760821, + "learning_rate": 1.9135400743452158e-05, + "loss": 0.8489, + "step": 5199 + }, + { + "epoch": 0.15937231825426015, + "grad_norm": 1.80613864480972, + "learning_rate": 1.913499694514128e-05, + "loss": 0.8363, + "step": 5200 + }, + { + "epoch": 0.15940296677700136, + "grad_norm": 1.4714734163428758, + "learning_rate": 1.9134593056820944e-05, + "loss": 0.7902, + "step": 5201 + }, + { + "epoch": 0.15943361529974256, + "grad_norm": 1.5919083287086966, + "learning_rate": 1.9134189078495123e-05, + "loss": 0.829, + "step": 5202 + }, + { + "epoch": 0.15946426382248374, + "grad_norm": 1.6089356384009053, + "learning_rate": 1.9133785010167806e-05, + "loss": 0.7801, + "step": 5203 + }, + { + "epoch": 0.15949491234522495, + "grad_norm": 1.5348286368031485, + "learning_rate": 1.9133380851842964e-05, + "loss": 0.7654, + "step": 5204 + }, + { + "epoch": 0.15952556086796615, + "grad_norm": 1.6528381008412771, + "learning_rate": 1.913297660352458e-05, + "loss": 0.8277, + "step": 5205 + }, + { + "epoch": 0.15955620939070736, + "grad_norm": 0.7303479033160825, + "learning_rate": 1.9132572265216645e-05, + "loss": 0.647, + "step": 5206 + }, + { + "epoch": 0.15958685791344857, + "grad_norm": 1.5295284907184763, + "learning_rate": 1.9132167836923137e-05, + "loss": 0.7922, + "step": 5207 + }, + { + "epoch": 0.15961750643618977, + "grad_norm": 1.9881149231869537, + "learning_rate": 1.9131763318648043e-05, + "loss": 0.8069, + "step": 5208 + }, + { + "epoch": 0.15964815495893098, + "grad_norm": 1.5190562630391304, + "learning_rate": 1.9131358710395348e-05, + "loss": 0.9011, + "step": 5209 + }, + { + "epoch": 0.15967880348167218, + "grad_norm": 1.4172770315296561, + "learning_rate": 1.9130954012169042e-05, + "loss": 0.7299, + "step": 5210 + }, + { + "epoch": 0.1597094520044134, + "grad_norm": 1.4237130233338133, + "learning_rate": 1.913054922397311e-05, + "loss": 0.6849, + "step": 5211 + }, + { + "epoch": 0.1597401005271546, + "grad_norm": 1.5054462049355868, + "learning_rate": 1.9130144345811537e-05, + "loss": 0.7805, + "step": 5212 + }, + { + "epoch": 0.1597707490498958, + "grad_norm": 1.4962739741652649, + "learning_rate": 1.9129739377688316e-05, + "loss": 0.7474, + "step": 5213 + }, + { + "epoch": 0.159801397572637, + "grad_norm": 1.3825827728140625, + "learning_rate": 1.9129334319607438e-05, + "loss": 0.6965, + "step": 5214 + }, + { + "epoch": 0.1598320460953782, + "grad_norm": 1.5285812034701456, + "learning_rate": 1.9128929171572895e-05, + "loss": 0.8386, + "step": 5215 + }, + { + "epoch": 0.15986269461811942, + "grad_norm": 1.6035973998224007, + "learning_rate": 1.9128523933588674e-05, + "loss": 0.7789, + "step": 5216 + }, + { + "epoch": 0.15989334314086062, + "grad_norm": 1.7453454221093319, + "learning_rate": 1.9128118605658773e-05, + "loss": 0.757, + "step": 5217 + }, + { + "epoch": 0.15992399166360183, + "grad_norm": 1.7481950028108424, + "learning_rate": 1.9127713187787186e-05, + "loss": 0.8195, + "step": 5218 + }, + { + "epoch": 0.159954640186343, + "grad_norm": 1.7442094940380526, + "learning_rate": 1.9127307679977902e-05, + "loss": 0.823, + "step": 5219 + }, + { + "epoch": 0.1599852887090842, + "grad_norm": 1.4643336788400938, + "learning_rate": 1.912690208223492e-05, + "loss": 0.7993, + "step": 5220 + }, + { + "epoch": 0.16001593723182542, + "grad_norm": 1.5632826924637617, + "learning_rate": 1.9126496394562238e-05, + "loss": 0.868, + "step": 5221 + }, + { + "epoch": 0.16004658575456662, + "grad_norm": 1.6757265181798788, + "learning_rate": 1.9126090616963853e-05, + "loss": 0.8787, + "step": 5222 + }, + { + "epoch": 0.16007723427730783, + "grad_norm": 1.6613174258406052, + "learning_rate": 1.912568474944376e-05, + "loss": 0.8481, + "step": 5223 + }, + { + "epoch": 0.16010788280004903, + "grad_norm": 1.4875637810872757, + "learning_rate": 1.9125278792005958e-05, + "loss": 0.8482, + "step": 5224 + }, + { + "epoch": 0.16013853132279024, + "grad_norm": 1.4943241319799718, + "learning_rate": 1.9124872744654454e-05, + "loss": 0.7325, + "step": 5225 + }, + { + "epoch": 0.16016917984553145, + "grad_norm": 1.4237283956531896, + "learning_rate": 1.9124466607393245e-05, + "loss": 0.8385, + "step": 5226 + }, + { + "epoch": 0.16019982836827265, + "grad_norm": 1.6810358514988606, + "learning_rate": 1.9124060380226327e-05, + "loss": 0.7869, + "step": 5227 + }, + { + "epoch": 0.16023047689101386, + "grad_norm": 1.5812899521995998, + "learning_rate": 1.912365406315771e-05, + "loss": 0.8422, + "step": 5228 + }, + { + "epoch": 0.16026112541375506, + "grad_norm": 1.4618262096930514, + "learning_rate": 1.9123247656191395e-05, + "loss": 0.729, + "step": 5229 + }, + { + "epoch": 0.16029177393649627, + "grad_norm": 1.5454835174400734, + "learning_rate": 1.9122841159331385e-05, + "loss": 0.8402, + "step": 5230 + }, + { + "epoch": 0.16032242245923747, + "grad_norm": 0.7824587708691497, + "learning_rate": 1.912243457258169e-05, + "loss": 0.6215, + "step": 5231 + }, + { + "epoch": 0.16035307098197868, + "grad_norm": 0.7573282584399044, + "learning_rate": 1.912202789594631e-05, + "loss": 0.6697, + "step": 5232 + }, + { + "epoch": 0.16038371950471988, + "grad_norm": 1.7134624830635472, + "learning_rate": 1.9121621129429258e-05, + "loss": 0.7031, + "step": 5233 + }, + { + "epoch": 0.16041436802746106, + "grad_norm": 1.40008523338381, + "learning_rate": 1.9121214273034536e-05, + "loss": 0.7059, + "step": 5234 + }, + { + "epoch": 0.16044501655020227, + "grad_norm": 0.7345290560684321, + "learning_rate": 1.912080732676616e-05, + "loss": 0.6563, + "step": 5235 + }, + { + "epoch": 0.16047566507294347, + "grad_norm": 1.7599922718418803, + "learning_rate": 1.9120400290628135e-05, + "loss": 0.8347, + "step": 5236 + }, + { + "epoch": 0.16050631359568468, + "grad_norm": 1.6294007711891767, + "learning_rate": 1.911999316462447e-05, + "loss": 0.8792, + "step": 5237 + }, + { + "epoch": 0.16053696211842589, + "grad_norm": 1.6657402401594106, + "learning_rate": 1.911958594875918e-05, + "loss": 0.6947, + "step": 5238 + }, + { + "epoch": 0.1605676106411671, + "grad_norm": 1.7575178224572336, + "learning_rate": 1.9119178643036275e-05, + "loss": 0.8973, + "step": 5239 + }, + { + "epoch": 0.1605982591639083, + "grad_norm": 0.7720269324331942, + "learning_rate": 1.9118771247459772e-05, + "loss": 0.6313, + "step": 5240 + }, + { + "epoch": 0.1606289076866495, + "grad_norm": 1.8329764617824174, + "learning_rate": 1.911836376203368e-05, + "loss": 0.9374, + "step": 5241 + }, + { + "epoch": 0.1606595562093907, + "grad_norm": 1.581075063787054, + "learning_rate": 1.9117956186762015e-05, + "loss": 0.7862, + "step": 5242 + }, + { + "epoch": 0.1606902047321319, + "grad_norm": 1.5602876736825066, + "learning_rate": 1.91175485216488e-05, + "loss": 0.7857, + "step": 5243 + }, + { + "epoch": 0.16072085325487312, + "grad_norm": 1.5989486382925946, + "learning_rate": 1.9117140766698045e-05, + "loss": 0.8488, + "step": 5244 + }, + { + "epoch": 0.16075150177761433, + "grad_norm": 1.5430984676546686, + "learning_rate": 1.911673292191377e-05, + "loss": 0.8665, + "step": 5245 + }, + { + "epoch": 0.16078215030035553, + "grad_norm": 1.7255976331865395, + "learning_rate": 1.911632498729999e-05, + "loss": 0.8349, + "step": 5246 + }, + { + "epoch": 0.16081279882309674, + "grad_norm": 1.3951566716019541, + "learning_rate": 1.911591696286073e-05, + "loss": 0.7227, + "step": 5247 + }, + { + "epoch": 0.16084344734583794, + "grad_norm": 1.635009744345717, + "learning_rate": 1.9115508848600008e-05, + "loss": 0.8731, + "step": 5248 + }, + { + "epoch": 0.16087409586857915, + "grad_norm": 1.5036748530473223, + "learning_rate": 1.9115100644521843e-05, + "loss": 0.7605, + "step": 5249 + }, + { + "epoch": 0.16090474439132033, + "grad_norm": 1.9568849572755738, + "learning_rate": 1.911469235063026e-05, + "loss": 0.8123, + "step": 5250 + }, + { + "epoch": 0.16093539291406153, + "grad_norm": 0.7700866623022041, + "learning_rate": 1.9114283966929283e-05, + "loss": 0.6282, + "step": 5251 + }, + { + "epoch": 0.16096604143680274, + "grad_norm": 1.5676228264671421, + "learning_rate": 1.911387549342293e-05, + "loss": 0.6317, + "step": 5252 + }, + { + "epoch": 0.16099668995954394, + "grad_norm": 1.461934715189104, + "learning_rate": 1.9113466930115234e-05, + "loss": 0.8526, + "step": 5253 + }, + { + "epoch": 0.16102733848228515, + "grad_norm": 1.575265417120717, + "learning_rate": 1.9113058277010216e-05, + "loss": 0.8488, + "step": 5254 + }, + { + "epoch": 0.16105798700502635, + "grad_norm": 1.6024647962152596, + "learning_rate": 1.9112649534111903e-05, + "loss": 0.7891, + "step": 5255 + }, + { + "epoch": 0.16108863552776756, + "grad_norm": 1.716654978164932, + "learning_rate": 1.9112240701424317e-05, + "loss": 0.8388, + "step": 5256 + }, + { + "epoch": 0.16111928405050877, + "grad_norm": 1.56571917244321, + "learning_rate": 1.91118317789515e-05, + "loss": 0.9116, + "step": 5257 + }, + { + "epoch": 0.16114993257324997, + "grad_norm": 1.6195525052917539, + "learning_rate": 1.9111422766697468e-05, + "loss": 0.7548, + "step": 5258 + }, + { + "epoch": 0.16118058109599118, + "grad_norm": 1.6508769121208484, + "learning_rate": 1.9111013664666262e-05, + "loss": 0.6666, + "step": 5259 + }, + { + "epoch": 0.16121122961873238, + "grad_norm": 1.621994673943618, + "learning_rate": 1.91106044728619e-05, + "loss": 0.8878, + "step": 5260 + }, + { + "epoch": 0.1612418781414736, + "grad_norm": 1.521911635358387, + "learning_rate": 1.9110195191288424e-05, + "loss": 0.8791, + "step": 5261 + }, + { + "epoch": 0.1612725266642148, + "grad_norm": 1.6087416956762646, + "learning_rate": 1.9109785819949865e-05, + "loss": 0.7431, + "step": 5262 + }, + { + "epoch": 0.161303175186956, + "grad_norm": 1.4574229665984864, + "learning_rate": 1.9109376358850253e-05, + "loss": 0.8973, + "step": 5263 + }, + { + "epoch": 0.1613338237096972, + "grad_norm": 1.4885483968193773, + "learning_rate": 1.9108966807993625e-05, + "loss": 0.859, + "step": 5264 + }, + { + "epoch": 0.16136447223243838, + "grad_norm": 1.8001034109383203, + "learning_rate": 1.9108557167384018e-05, + "loss": 0.868, + "step": 5265 + }, + { + "epoch": 0.1613951207551796, + "grad_norm": 1.5360178525837032, + "learning_rate": 1.910814743702547e-05, + "loss": 0.8787, + "step": 5266 + }, + { + "epoch": 0.1614257692779208, + "grad_norm": 1.6569267260363005, + "learning_rate": 1.9107737616922008e-05, + "loss": 0.7884, + "step": 5267 + }, + { + "epoch": 0.161456417800662, + "grad_norm": 1.668624191615966, + "learning_rate": 1.9107327707077683e-05, + "loss": 0.7787, + "step": 5268 + }, + { + "epoch": 0.1614870663234032, + "grad_norm": 1.6082490773487559, + "learning_rate": 1.9106917707496526e-05, + "loss": 0.825, + "step": 5269 + }, + { + "epoch": 0.1615177148461444, + "grad_norm": 1.6976990802529646, + "learning_rate": 1.9106507618182575e-05, + "loss": 0.8365, + "step": 5270 + }, + { + "epoch": 0.16154836336888562, + "grad_norm": 1.553762333363891, + "learning_rate": 1.910609743913988e-05, + "loss": 0.8316, + "step": 5271 + }, + { + "epoch": 0.16157901189162682, + "grad_norm": 1.551030184352816, + "learning_rate": 1.9105687170372475e-05, + "loss": 0.748, + "step": 5272 + }, + { + "epoch": 0.16160966041436803, + "grad_norm": 1.548632713974195, + "learning_rate": 1.9105276811884403e-05, + "loss": 0.7869, + "step": 5273 + }, + { + "epoch": 0.16164030893710923, + "grad_norm": 1.6442769062878695, + "learning_rate": 1.910486636367971e-05, + "loss": 0.7716, + "step": 5274 + }, + { + "epoch": 0.16167095745985044, + "grad_norm": 1.4970842104042477, + "learning_rate": 1.910445582576244e-05, + "loss": 0.7913, + "step": 5275 + }, + { + "epoch": 0.16170160598259165, + "grad_norm": 1.5627307010695433, + "learning_rate": 1.9104045198136634e-05, + "loss": 0.8338, + "step": 5276 + }, + { + "epoch": 0.16173225450533285, + "grad_norm": 1.5432772327580517, + "learning_rate": 1.9103634480806344e-05, + "loss": 0.8112, + "step": 5277 + }, + { + "epoch": 0.16176290302807406, + "grad_norm": 1.6481695534102807, + "learning_rate": 1.9103223673775614e-05, + "loss": 0.7847, + "step": 5278 + }, + { + "epoch": 0.16179355155081526, + "grad_norm": 1.6560718596705575, + "learning_rate": 1.910281277704849e-05, + "loss": 0.8427, + "step": 5279 + }, + { + "epoch": 0.16182420007355647, + "grad_norm": 1.773966092166637, + "learning_rate": 1.9102401790629025e-05, + "loss": 0.8825, + "step": 5280 + }, + { + "epoch": 0.16185484859629765, + "grad_norm": 0.8096960464470291, + "learning_rate": 1.9101990714521267e-05, + "loss": 0.6441, + "step": 5281 + }, + { + "epoch": 0.16188549711903885, + "grad_norm": 1.739531990695974, + "learning_rate": 1.9101579548729264e-05, + "loss": 0.7728, + "step": 5282 + }, + { + "epoch": 0.16191614564178006, + "grad_norm": 1.6060283918391822, + "learning_rate": 1.910116829325707e-05, + "loss": 0.7431, + "step": 5283 + }, + { + "epoch": 0.16194679416452126, + "grad_norm": 1.6187690288187293, + "learning_rate": 1.9100756948108733e-05, + "loss": 0.804, + "step": 5284 + }, + { + "epoch": 0.16197744268726247, + "grad_norm": 1.7503022802371144, + "learning_rate": 1.9100345513288312e-05, + "loss": 0.836, + "step": 5285 + }, + { + "epoch": 0.16200809121000367, + "grad_norm": 1.5704920018809432, + "learning_rate": 1.9099933988799856e-05, + "loss": 0.7453, + "step": 5286 + }, + { + "epoch": 0.16203873973274488, + "grad_norm": 1.593649063819816, + "learning_rate": 1.909952237464743e-05, + "loss": 0.9219, + "step": 5287 + }, + { + "epoch": 0.16206938825548609, + "grad_norm": 1.6421346323685877, + "learning_rate": 1.909911067083507e-05, + "loss": 0.7847, + "step": 5288 + }, + { + "epoch": 0.1621000367782273, + "grad_norm": 1.517640620873075, + "learning_rate": 1.9098698877366852e-05, + "loss": 0.8984, + "step": 5289 + }, + { + "epoch": 0.1621306853009685, + "grad_norm": 1.4745092844716554, + "learning_rate": 1.9098286994246824e-05, + "loss": 0.7816, + "step": 5290 + }, + { + "epoch": 0.1621613338237097, + "grad_norm": 1.976922464566483, + "learning_rate": 1.909787502147905e-05, + "loss": 0.7954, + "step": 5291 + }, + { + "epoch": 0.1621919823464509, + "grad_norm": 1.650201027880312, + "learning_rate": 1.909746295906758e-05, + "loss": 0.8407, + "step": 5292 + }, + { + "epoch": 0.1622226308691921, + "grad_norm": 1.4161396586960115, + "learning_rate": 1.9097050807016482e-05, + "loss": 0.7588, + "step": 5293 + }, + { + "epoch": 0.16225327939193332, + "grad_norm": 0.8223249213902758, + "learning_rate": 1.9096638565329813e-05, + "loss": 0.6319, + "step": 5294 + }, + { + "epoch": 0.16228392791467453, + "grad_norm": 1.635907966499567, + "learning_rate": 1.909622623401164e-05, + "loss": 0.8383, + "step": 5295 + }, + { + "epoch": 0.1623145764374157, + "grad_norm": 1.7815589952146607, + "learning_rate": 1.909581381306602e-05, + "loss": 0.8718, + "step": 5296 + }, + { + "epoch": 0.1623452249601569, + "grad_norm": 1.8548917864176688, + "learning_rate": 1.909540130249702e-05, + "loss": 0.8338, + "step": 5297 + }, + { + "epoch": 0.16237587348289811, + "grad_norm": 1.5863127902095093, + "learning_rate": 1.9094988702308705e-05, + "loss": 0.9865, + "step": 5298 + }, + { + "epoch": 0.16240652200563932, + "grad_norm": 1.362297712692512, + "learning_rate": 1.9094576012505136e-05, + "loss": 0.6909, + "step": 5299 + }, + { + "epoch": 0.16243717052838053, + "grad_norm": 0.8589155349024253, + "learning_rate": 1.9094163233090385e-05, + "loss": 0.6609, + "step": 5300 + }, + { + "epoch": 0.16246781905112173, + "grad_norm": 1.5304030197103387, + "learning_rate": 1.909375036406852e-05, + "loss": 0.7857, + "step": 5301 + }, + { + "epoch": 0.16249846757386294, + "grad_norm": 1.955133401561455, + "learning_rate": 1.9093337405443603e-05, + "loss": 0.7419, + "step": 5302 + }, + { + "epoch": 0.16252911609660414, + "grad_norm": 1.6258875047624681, + "learning_rate": 1.9092924357219703e-05, + "loss": 0.7117, + "step": 5303 + }, + { + "epoch": 0.16255976461934535, + "grad_norm": 1.5511701854597244, + "learning_rate": 1.9092511219400894e-05, + "loss": 0.8925, + "step": 5304 + }, + { + "epoch": 0.16259041314208655, + "grad_norm": 1.6147895492083157, + "learning_rate": 1.909209799199125e-05, + "loss": 0.8646, + "step": 5305 + }, + { + "epoch": 0.16262106166482776, + "grad_norm": 0.8021253627173749, + "learning_rate": 1.9091684674994835e-05, + "loss": 0.6661, + "step": 5306 + }, + { + "epoch": 0.16265171018756897, + "grad_norm": 1.4908782636795246, + "learning_rate": 1.9091271268415724e-05, + "loss": 0.8744, + "step": 5307 + }, + { + "epoch": 0.16268235871031017, + "grad_norm": 1.476341799486095, + "learning_rate": 1.9090857772257993e-05, + "loss": 0.8358, + "step": 5308 + }, + { + "epoch": 0.16271300723305138, + "grad_norm": 1.6419462915034058, + "learning_rate": 1.909044418652571e-05, + "loss": 0.749, + "step": 5309 + }, + { + "epoch": 0.16274365575579258, + "grad_norm": 1.5512215722323777, + "learning_rate": 1.909003051122296e-05, + "loss": 0.9193, + "step": 5310 + }, + { + "epoch": 0.1627743042785338, + "grad_norm": 1.7877112211393642, + "learning_rate": 1.9089616746353813e-05, + "loss": 0.8586, + "step": 5311 + }, + { + "epoch": 0.16280495280127497, + "grad_norm": 0.7567230195330327, + "learning_rate": 1.9089202891922345e-05, + "loss": 0.6634, + "step": 5312 + }, + { + "epoch": 0.16283560132401617, + "grad_norm": 1.419241097285398, + "learning_rate": 1.9088788947932633e-05, + "loss": 0.6725, + "step": 5313 + }, + { + "epoch": 0.16286624984675738, + "grad_norm": 1.6736700733797518, + "learning_rate": 1.908837491438876e-05, + "loss": 0.8102, + "step": 5314 + }, + { + "epoch": 0.16289689836949858, + "grad_norm": 1.8041923199389904, + "learning_rate": 1.9087960791294806e-05, + "loss": 0.8642, + "step": 5315 + }, + { + "epoch": 0.1629275468922398, + "grad_norm": 1.506853672395787, + "learning_rate": 1.9087546578654846e-05, + "loss": 0.7235, + "step": 5316 + }, + { + "epoch": 0.162958195414981, + "grad_norm": 0.726294216850842, + "learning_rate": 1.9087132276472967e-05, + "loss": 0.6491, + "step": 5317 + }, + { + "epoch": 0.1629888439377222, + "grad_norm": 1.8309539805220139, + "learning_rate": 1.9086717884753247e-05, + "loss": 0.8243, + "step": 5318 + }, + { + "epoch": 0.1630194924604634, + "grad_norm": 0.752528862986031, + "learning_rate": 1.908630340349977e-05, + "loss": 0.6578, + "step": 5319 + }, + { + "epoch": 0.1630501409832046, + "grad_norm": 1.8867631498229327, + "learning_rate": 1.908588883271662e-05, + "loss": 0.8375, + "step": 5320 + }, + { + "epoch": 0.16308078950594582, + "grad_norm": 1.4701586505351183, + "learning_rate": 1.9085474172407886e-05, + "loss": 0.8039, + "step": 5321 + }, + { + "epoch": 0.16311143802868702, + "grad_norm": 1.5350951642408508, + "learning_rate": 1.908505942257765e-05, + "loss": 0.7261, + "step": 5322 + }, + { + "epoch": 0.16314208655142823, + "grad_norm": 1.5454784785253455, + "learning_rate": 1.9084644583229998e-05, + "loss": 0.8442, + "step": 5323 + }, + { + "epoch": 0.16317273507416943, + "grad_norm": 1.852194390541488, + "learning_rate": 1.9084229654369014e-05, + "loss": 0.8604, + "step": 5324 + }, + { + "epoch": 0.16320338359691064, + "grad_norm": 1.7177656914358614, + "learning_rate": 1.9083814635998795e-05, + "loss": 0.9639, + "step": 5325 + }, + { + "epoch": 0.16323403211965185, + "grad_norm": 1.5599554694766569, + "learning_rate": 1.9083399528123428e-05, + "loss": 0.7751, + "step": 5326 + }, + { + "epoch": 0.16326468064239302, + "grad_norm": 1.6399087781627353, + "learning_rate": 1.9082984330747e-05, + "loss": 0.8504, + "step": 5327 + }, + { + "epoch": 0.16329532916513423, + "grad_norm": 1.4689065082293602, + "learning_rate": 1.90825690438736e-05, + "loss": 0.8065, + "step": 5328 + }, + { + "epoch": 0.16332597768787543, + "grad_norm": 1.676941546566475, + "learning_rate": 1.908215366750733e-05, + "loss": 0.8515, + "step": 5329 + }, + { + "epoch": 0.16335662621061664, + "grad_norm": 1.5900638341299047, + "learning_rate": 1.908173820165227e-05, + "loss": 0.8557, + "step": 5330 + }, + { + "epoch": 0.16338727473335785, + "grad_norm": 1.9069599814001186, + "learning_rate": 1.9081322646312522e-05, + "loss": 0.8888, + "step": 5331 + }, + { + "epoch": 0.16341792325609905, + "grad_norm": 0.8065106883065695, + "learning_rate": 1.908090700149218e-05, + "loss": 0.6215, + "step": 5332 + }, + { + "epoch": 0.16344857177884026, + "grad_norm": 1.8337787222340745, + "learning_rate": 1.9080491267195334e-05, + "loss": 0.8467, + "step": 5333 + }, + { + "epoch": 0.16347922030158146, + "grad_norm": 0.7514048374232586, + "learning_rate": 1.908007544342609e-05, + "loss": 0.6401, + "step": 5334 + }, + { + "epoch": 0.16350986882432267, + "grad_norm": 1.7726125859124522, + "learning_rate": 1.907965953018853e-05, + "loss": 1.0099, + "step": 5335 + }, + { + "epoch": 0.16354051734706387, + "grad_norm": 1.5952528324534094, + "learning_rate": 1.907924352748677e-05, + "loss": 0.782, + "step": 5336 + }, + { + "epoch": 0.16357116586980508, + "grad_norm": 1.4694124040911862, + "learning_rate": 1.9078827435324897e-05, + "loss": 0.7701, + "step": 5337 + }, + { + "epoch": 0.16360181439254629, + "grad_norm": 1.783676766980234, + "learning_rate": 1.907841125370702e-05, + "loss": 0.8355, + "step": 5338 + }, + { + "epoch": 0.1636324629152875, + "grad_norm": 1.5463146081915125, + "learning_rate": 1.9077994982637226e-05, + "loss": 0.9019, + "step": 5339 + }, + { + "epoch": 0.1636631114380287, + "grad_norm": 2.1534881424279675, + "learning_rate": 1.907757862211963e-05, + "loss": 0.8235, + "step": 5340 + }, + { + "epoch": 0.1636937599607699, + "grad_norm": 1.641056822395852, + "learning_rate": 1.907716217215833e-05, + "loss": 0.7807, + "step": 5341 + }, + { + "epoch": 0.1637244084835111, + "grad_norm": 1.4703058240424676, + "learning_rate": 1.9076745632757423e-05, + "loss": 0.8662, + "step": 5342 + }, + { + "epoch": 0.16375505700625229, + "grad_norm": 1.6810389758529567, + "learning_rate": 1.9076329003921022e-05, + "loss": 0.8417, + "step": 5343 + }, + { + "epoch": 0.1637857055289935, + "grad_norm": 0.8900840611041028, + "learning_rate": 1.907591228565323e-05, + "loss": 0.6512, + "step": 5344 + }, + { + "epoch": 0.1638163540517347, + "grad_norm": 1.3606689191617136, + "learning_rate": 1.907549547795815e-05, + "loss": 0.6971, + "step": 5345 + }, + { + "epoch": 0.1638470025744759, + "grad_norm": 0.7438477933508294, + "learning_rate": 1.907507858083989e-05, + "loss": 0.6165, + "step": 5346 + }, + { + "epoch": 0.1638776510972171, + "grad_norm": 1.4955144383180916, + "learning_rate": 1.9074661594302563e-05, + "loss": 0.7439, + "step": 5347 + }, + { + "epoch": 0.16390829961995831, + "grad_norm": 1.70066176344445, + "learning_rate": 1.907424451835027e-05, + "loss": 0.8831, + "step": 5348 + }, + { + "epoch": 0.16393894814269952, + "grad_norm": 1.4188512235822774, + "learning_rate": 1.9073827352987127e-05, + "loss": 0.7426, + "step": 5349 + }, + { + "epoch": 0.16396959666544073, + "grad_norm": 0.9377775248353928, + "learning_rate": 1.907341009821724e-05, + "loss": 0.677, + "step": 5350 + }, + { + "epoch": 0.16400024518818193, + "grad_norm": 1.6074880572593657, + "learning_rate": 1.9072992754044725e-05, + "loss": 0.8938, + "step": 5351 + }, + { + "epoch": 0.16403089371092314, + "grad_norm": 1.4955901016809066, + "learning_rate": 1.9072575320473685e-05, + "loss": 0.795, + "step": 5352 + }, + { + "epoch": 0.16406154223366434, + "grad_norm": 1.7587979803570033, + "learning_rate": 1.907215779750824e-05, + "loss": 0.8041, + "step": 5353 + }, + { + "epoch": 0.16409219075640555, + "grad_norm": 1.5323501917954363, + "learning_rate": 1.9071740185152507e-05, + "loss": 0.7184, + "step": 5354 + }, + { + "epoch": 0.16412283927914675, + "grad_norm": 1.6443885111870082, + "learning_rate": 1.9071322483410592e-05, + "loss": 0.9033, + "step": 5355 + }, + { + "epoch": 0.16415348780188796, + "grad_norm": 1.6301633462713883, + "learning_rate": 1.907090469228662e-05, + "loss": 0.7254, + "step": 5356 + }, + { + "epoch": 0.16418413632462917, + "grad_norm": 1.295001117978646, + "learning_rate": 1.90704868117847e-05, + "loss": 0.7229, + "step": 5357 + }, + { + "epoch": 0.16421478484737034, + "grad_norm": 1.4256178772881147, + "learning_rate": 1.907006884190895e-05, + "loss": 0.85, + "step": 5358 + }, + { + "epoch": 0.16424543337011155, + "grad_norm": 1.5700046222128512, + "learning_rate": 1.90696507826635e-05, + "loss": 0.8151, + "step": 5359 + }, + { + "epoch": 0.16427608189285275, + "grad_norm": 1.650925020472758, + "learning_rate": 1.9069232634052453e-05, + "loss": 0.8004, + "step": 5360 + }, + { + "epoch": 0.16430673041559396, + "grad_norm": 1.6197087527707281, + "learning_rate": 1.906881439607994e-05, + "loss": 0.7635, + "step": 5361 + }, + { + "epoch": 0.16433737893833517, + "grad_norm": 1.4978543301712322, + "learning_rate": 1.9068396068750077e-05, + "loss": 0.7611, + "step": 5362 + }, + { + "epoch": 0.16436802746107637, + "grad_norm": 1.7273599283596004, + "learning_rate": 1.9067977652066988e-05, + "loss": 0.7868, + "step": 5363 + }, + { + "epoch": 0.16439867598381758, + "grad_norm": 1.5501336609996996, + "learning_rate": 1.9067559146034794e-05, + "loss": 0.7747, + "step": 5364 + }, + { + "epoch": 0.16442932450655878, + "grad_norm": 1.5173994223016816, + "learning_rate": 1.906714055065762e-05, + "loss": 0.7517, + "step": 5365 + }, + { + "epoch": 0.1644599730293, + "grad_norm": 1.608499106656506, + "learning_rate": 1.906672186593959e-05, + "loss": 0.8925, + "step": 5366 + }, + { + "epoch": 0.1644906215520412, + "grad_norm": 1.6602530720617452, + "learning_rate": 1.906630309188483e-05, + "loss": 0.8547, + "step": 5367 + }, + { + "epoch": 0.1645212700747824, + "grad_norm": 1.4567314035142438, + "learning_rate": 1.9065884228497467e-05, + "loss": 0.777, + "step": 5368 + }, + { + "epoch": 0.1645519185975236, + "grad_norm": 0.8282280895865404, + "learning_rate": 1.9065465275781625e-05, + "loss": 0.6473, + "step": 5369 + }, + { + "epoch": 0.1645825671202648, + "grad_norm": 1.5904361195303789, + "learning_rate": 1.9065046233741436e-05, + "loss": 0.8183, + "step": 5370 + }, + { + "epoch": 0.16461321564300602, + "grad_norm": 1.4288035257597875, + "learning_rate": 1.9064627102381026e-05, + "loss": 0.7452, + "step": 5371 + }, + { + "epoch": 0.16464386416574722, + "grad_norm": 1.6429017503375785, + "learning_rate": 1.9064207881704525e-05, + "loss": 0.8842, + "step": 5372 + }, + { + "epoch": 0.16467451268848843, + "grad_norm": 1.6761883391777888, + "learning_rate": 1.9063788571716064e-05, + "loss": 0.7793, + "step": 5373 + }, + { + "epoch": 0.1647051612112296, + "grad_norm": 1.767277140792455, + "learning_rate": 1.906336917241978e-05, + "loss": 0.8374, + "step": 5374 + }, + { + "epoch": 0.1647358097339708, + "grad_norm": 1.5809466554661142, + "learning_rate": 1.9062949683819796e-05, + "loss": 0.7796, + "step": 5375 + }, + { + "epoch": 0.16476645825671202, + "grad_norm": 0.7521511278186818, + "learning_rate": 1.906253010592025e-05, + "loss": 0.6242, + "step": 5376 + }, + { + "epoch": 0.16479710677945322, + "grad_norm": 1.613090288849604, + "learning_rate": 1.9062110438725278e-05, + "loss": 0.8709, + "step": 5377 + }, + { + "epoch": 0.16482775530219443, + "grad_norm": 1.8298052782776386, + "learning_rate": 1.906169068223901e-05, + "loss": 0.997, + "step": 5378 + }, + { + "epoch": 0.16485840382493563, + "grad_norm": 0.7017573411329184, + "learning_rate": 1.906127083646559e-05, + "loss": 0.6547, + "step": 5379 + }, + { + "epoch": 0.16488905234767684, + "grad_norm": 1.6291587845630409, + "learning_rate": 1.9060850901409148e-05, + "loss": 0.8451, + "step": 5380 + }, + { + "epoch": 0.16491970087041805, + "grad_norm": 1.5829954219767906, + "learning_rate": 1.9060430877073825e-05, + "loss": 0.8159, + "step": 5381 + }, + { + "epoch": 0.16495034939315925, + "grad_norm": 1.5407023250515641, + "learning_rate": 1.9060010763463753e-05, + "loss": 0.8149, + "step": 5382 + }, + { + "epoch": 0.16498099791590046, + "grad_norm": 0.7443484587940874, + "learning_rate": 1.9059590560583083e-05, + "loss": 0.6431, + "step": 5383 + }, + { + "epoch": 0.16501164643864166, + "grad_norm": 1.3489445504627118, + "learning_rate": 1.9059170268435946e-05, + "loss": 0.7779, + "step": 5384 + }, + { + "epoch": 0.16504229496138287, + "grad_norm": 1.5776307403258554, + "learning_rate": 1.9058749887026487e-05, + "loss": 0.8158, + "step": 5385 + }, + { + "epoch": 0.16507294348412407, + "grad_norm": 1.591749269043332, + "learning_rate": 1.9058329416358848e-05, + "loss": 0.8016, + "step": 5386 + }, + { + "epoch": 0.16510359200686528, + "grad_norm": 1.574591553994428, + "learning_rate": 1.9057908856437172e-05, + "loss": 0.8446, + "step": 5387 + }, + { + "epoch": 0.16513424052960649, + "grad_norm": 1.5257240116165474, + "learning_rate": 1.9057488207265603e-05, + "loss": 0.6009, + "step": 5388 + }, + { + "epoch": 0.16516488905234766, + "grad_norm": 1.4784838222799508, + "learning_rate": 1.905706746884828e-05, + "loss": 0.7272, + "step": 5389 + }, + { + "epoch": 0.16519553757508887, + "grad_norm": 1.6237947077949468, + "learning_rate": 1.905664664118936e-05, + "loss": 0.8058, + "step": 5390 + }, + { + "epoch": 0.16522618609783007, + "grad_norm": 1.429635500163255, + "learning_rate": 1.9056225724292985e-05, + "loss": 0.7605, + "step": 5391 + }, + { + "epoch": 0.16525683462057128, + "grad_norm": 1.546505082434372, + "learning_rate": 1.9055804718163297e-05, + "loss": 0.893, + "step": 5392 + }, + { + "epoch": 0.16528748314331249, + "grad_norm": 1.50209195069001, + "learning_rate": 1.9055383622804448e-05, + "loss": 0.786, + "step": 5393 + }, + { + "epoch": 0.1653181316660537, + "grad_norm": 1.611646134205748, + "learning_rate": 1.9054962438220585e-05, + "loss": 0.7474, + "step": 5394 + }, + { + "epoch": 0.1653487801887949, + "grad_norm": 1.4698552884689589, + "learning_rate": 1.9054541164415865e-05, + "loss": 0.8067, + "step": 5395 + }, + { + "epoch": 0.1653794287115361, + "grad_norm": 1.6211132343120267, + "learning_rate": 1.9054119801394432e-05, + "loss": 0.7955, + "step": 5396 + }, + { + "epoch": 0.1654100772342773, + "grad_norm": 1.5351562715613385, + "learning_rate": 1.905369834916044e-05, + "loss": 0.7935, + "step": 5397 + }, + { + "epoch": 0.16544072575701851, + "grad_norm": 1.5068709008256929, + "learning_rate": 1.9053276807718042e-05, + "loss": 0.8376, + "step": 5398 + }, + { + "epoch": 0.16547137427975972, + "grad_norm": 1.6598575255482235, + "learning_rate": 1.9052855177071393e-05, + "loss": 0.8615, + "step": 5399 + }, + { + "epoch": 0.16550202280250093, + "grad_norm": 1.6265951748664154, + "learning_rate": 1.9052433457224642e-05, + "loss": 0.763, + "step": 5400 + }, + { + "epoch": 0.16553267132524213, + "grad_norm": 1.729700450536871, + "learning_rate": 1.905201164818195e-05, + "loss": 0.949, + "step": 5401 + }, + { + "epoch": 0.16556331984798334, + "grad_norm": 1.6750048954932828, + "learning_rate": 1.905158974994747e-05, + "loss": 0.7924, + "step": 5402 + }, + { + "epoch": 0.16559396837072454, + "grad_norm": 1.5189175971766036, + "learning_rate": 1.9051167762525362e-05, + "loss": 0.9106, + "step": 5403 + }, + { + "epoch": 0.16562461689346575, + "grad_norm": 1.4848723729704143, + "learning_rate": 1.905074568591978e-05, + "loss": 0.849, + "step": 5404 + }, + { + "epoch": 0.16565526541620693, + "grad_norm": 1.5675760884798027, + "learning_rate": 1.9050323520134885e-05, + "loss": 0.8317, + "step": 5405 + }, + { + "epoch": 0.16568591393894813, + "grad_norm": 0.8256345059308252, + "learning_rate": 1.904990126517484e-05, + "loss": 0.6793, + "step": 5406 + }, + { + "epoch": 0.16571656246168934, + "grad_norm": 1.746358899951088, + "learning_rate": 1.90494789210438e-05, + "loss": 0.8809, + "step": 5407 + }, + { + "epoch": 0.16574721098443054, + "grad_norm": 1.3979628581899328, + "learning_rate": 1.9049056487745928e-05, + "loss": 0.6913, + "step": 5408 + }, + { + "epoch": 0.16577785950717175, + "grad_norm": 1.4428164629634608, + "learning_rate": 1.9048633965285387e-05, + "loss": 0.8364, + "step": 5409 + }, + { + "epoch": 0.16580850802991295, + "grad_norm": 0.7526696153700009, + "learning_rate": 1.9048211353666344e-05, + "loss": 0.6361, + "step": 5410 + }, + { + "epoch": 0.16583915655265416, + "grad_norm": 1.4214286047837412, + "learning_rate": 1.9047788652892956e-05, + "loss": 0.7692, + "step": 5411 + }, + { + "epoch": 0.16586980507539537, + "grad_norm": 1.5869910927411635, + "learning_rate": 1.9047365862969392e-05, + "loss": 0.6867, + "step": 5412 + }, + { + "epoch": 0.16590045359813657, + "grad_norm": 1.7209067612640059, + "learning_rate": 1.9046942983899818e-05, + "loss": 0.7372, + "step": 5413 + }, + { + "epoch": 0.16593110212087778, + "grad_norm": 1.654770826622081, + "learning_rate": 1.90465200156884e-05, + "loss": 0.807, + "step": 5414 + }, + { + "epoch": 0.16596175064361898, + "grad_norm": 1.39617059194234, + "learning_rate": 1.9046096958339307e-05, + "loss": 0.849, + "step": 5415 + }, + { + "epoch": 0.1659923991663602, + "grad_norm": 1.488995946053501, + "learning_rate": 1.9045673811856705e-05, + "loss": 0.7458, + "step": 5416 + }, + { + "epoch": 0.1660230476891014, + "grad_norm": 1.5978996977104407, + "learning_rate": 1.9045250576244763e-05, + "loss": 0.8267, + "step": 5417 + }, + { + "epoch": 0.1660536962118426, + "grad_norm": 1.411968242328229, + "learning_rate": 1.9044827251507655e-05, + "loss": 0.8454, + "step": 5418 + }, + { + "epoch": 0.1660843447345838, + "grad_norm": 1.5946352646393251, + "learning_rate": 1.904440383764955e-05, + "loss": 0.8832, + "step": 5419 + }, + { + "epoch": 0.16611499325732498, + "grad_norm": 1.5517279802830233, + "learning_rate": 1.9043980334674618e-05, + "loss": 0.8637, + "step": 5420 + }, + { + "epoch": 0.1661456417800662, + "grad_norm": 1.4717316684686947, + "learning_rate": 1.9043556742587034e-05, + "loss": 0.8735, + "step": 5421 + }, + { + "epoch": 0.1661762903028074, + "grad_norm": 1.4232546705353704, + "learning_rate": 1.904313306139097e-05, + "loss": 0.8149, + "step": 5422 + }, + { + "epoch": 0.1662069388255486, + "grad_norm": 1.4280527922630073, + "learning_rate": 1.9042709291090605e-05, + "loss": 0.8389, + "step": 5423 + }, + { + "epoch": 0.1662375873482898, + "grad_norm": 1.4471270168081818, + "learning_rate": 1.904228543169011e-05, + "loss": 0.7172, + "step": 5424 + }, + { + "epoch": 0.166268235871031, + "grad_norm": 1.450432464651844, + "learning_rate": 1.9041861483193663e-05, + "loss": 0.7696, + "step": 5425 + }, + { + "epoch": 0.16629888439377222, + "grad_norm": 0.807677431046659, + "learning_rate": 1.9041437445605444e-05, + "loss": 0.6449, + "step": 5426 + }, + { + "epoch": 0.16632953291651342, + "grad_norm": 1.6981734725901212, + "learning_rate": 1.9041013318929624e-05, + "loss": 0.7825, + "step": 5427 + }, + { + "epoch": 0.16636018143925463, + "grad_norm": 1.6111528668205228, + "learning_rate": 1.904058910317039e-05, + "loss": 0.9141, + "step": 5428 + }, + { + "epoch": 0.16639082996199583, + "grad_norm": 1.615850389084039, + "learning_rate": 1.9040164798331916e-05, + "loss": 0.7277, + "step": 5429 + }, + { + "epoch": 0.16642147848473704, + "grad_norm": 1.6120650056591406, + "learning_rate": 1.9039740404418387e-05, + "loss": 0.7882, + "step": 5430 + }, + { + "epoch": 0.16645212700747825, + "grad_norm": 1.4723871195174918, + "learning_rate": 1.9039315921433984e-05, + "loss": 0.8342, + "step": 5431 + }, + { + "epoch": 0.16648277553021945, + "grad_norm": 1.6812650100723923, + "learning_rate": 1.9038891349382887e-05, + "loss": 0.808, + "step": 5432 + }, + { + "epoch": 0.16651342405296066, + "grad_norm": 1.4657670339119842, + "learning_rate": 1.903846668826928e-05, + "loss": 0.8177, + "step": 5433 + }, + { + "epoch": 0.16654407257570186, + "grad_norm": 1.4788568635923744, + "learning_rate": 1.9038041938097353e-05, + "loss": 0.7555, + "step": 5434 + }, + { + "epoch": 0.16657472109844307, + "grad_norm": 1.5154604906950246, + "learning_rate": 1.9037617098871278e-05, + "loss": 0.8423, + "step": 5435 + }, + { + "epoch": 0.16660536962118425, + "grad_norm": 1.578025987099843, + "learning_rate": 1.9037192170595254e-05, + "loss": 0.7838, + "step": 5436 + }, + { + "epoch": 0.16663601814392545, + "grad_norm": 1.3553720004335268, + "learning_rate": 1.9036767153273465e-05, + "loss": 0.7588, + "step": 5437 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.510931698942422, + "learning_rate": 1.9036342046910095e-05, + "loss": 0.8077, + "step": 5438 + }, + { + "epoch": 0.16669731518940786, + "grad_norm": 0.806115128593979, + "learning_rate": 1.9035916851509336e-05, + "loss": 0.6639, + "step": 5439 + }, + { + "epoch": 0.16672796371214907, + "grad_norm": 1.5279044246369085, + "learning_rate": 1.903549156707537e-05, + "loss": 0.8525, + "step": 5440 + }, + { + "epoch": 0.16675861223489027, + "grad_norm": 1.6245926135144697, + "learning_rate": 1.9035066193612403e-05, + "loss": 0.8866, + "step": 5441 + }, + { + "epoch": 0.16678926075763148, + "grad_norm": 1.547141372250907, + "learning_rate": 1.903464073112461e-05, + "loss": 0.825, + "step": 5442 + }, + { + "epoch": 0.16681990928037269, + "grad_norm": 1.6637568045742603, + "learning_rate": 1.9034215179616195e-05, + "loss": 0.8136, + "step": 5443 + }, + { + "epoch": 0.1668505578031139, + "grad_norm": 1.477578638798913, + "learning_rate": 1.9033789539091345e-05, + "loss": 0.8193, + "step": 5444 + }, + { + "epoch": 0.1668812063258551, + "grad_norm": 1.647612179073431, + "learning_rate": 1.9033363809554255e-05, + "loss": 0.8216, + "step": 5445 + }, + { + "epoch": 0.1669118548485963, + "grad_norm": 1.6060902192581454, + "learning_rate": 1.903293799100912e-05, + "loss": 0.864, + "step": 5446 + }, + { + "epoch": 0.1669425033713375, + "grad_norm": 1.8235908309927558, + "learning_rate": 1.9032512083460136e-05, + "loss": 0.8207, + "step": 5447 + }, + { + "epoch": 0.16697315189407871, + "grad_norm": 1.7985288326283675, + "learning_rate": 1.9032086086911498e-05, + "loss": 0.9537, + "step": 5448 + }, + { + "epoch": 0.16700380041681992, + "grad_norm": 1.6522492472836257, + "learning_rate": 1.9031660001367406e-05, + "loss": 0.828, + "step": 5449 + }, + { + "epoch": 0.16703444893956113, + "grad_norm": 1.5300212064236403, + "learning_rate": 1.9031233826832057e-05, + "loss": 0.7475, + "step": 5450 + }, + { + "epoch": 0.1670650974623023, + "grad_norm": 1.700315366199764, + "learning_rate": 1.903080756330965e-05, + "loss": 0.8325, + "step": 5451 + }, + { + "epoch": 0.1670957459850435, + "grad_norm": 1.5854567316616666, + "learning_rate": 1.9030381210804388e-05, + "loss": 0.8166, + "step": 5452 + }, + { + "epoch": 0.16712639450778471, + "grad_norm": 1.6617646467437128, + "learning_rate": 1.9029954769320466e-05, + "loss": 0.9042, + "step": 5453 + }, + { + "epoch": 0.16715704303052592, + "grad_norm": 1.4526685419666767, + "learning_rate": 1.9029528238862093e-05, + "loss": 0.9055, + "step": 5454 + }, + { + "epoch": 0.16718769155326713, + "grad_norm": 0.830489814672091, + "learning_rate": 1.9029101619433463e-05, + "loss": 0.6175, + "step": 5455 + }, + { + "epoch": 0.16721834007600833, + "grad_norm": 1.6930774264351418, + "learning_rate": 1.9028674911038787e-05, + "loss": 0.8459, + "step": 5456 + }, + { + "epoch": 0.16724898859874954, + "grad_norm": 1.7062880015458195, + "learning_rate": 1.9028248113682267e-05, + "loss": 0.8424, + "step": 5457 + }, + { + "epoch": 0.16727963712149074, + "grad_norm": 1.4053299903209093, + "learning_rate": 1.9027821227368107e-05, + "loss": 0.8031, + "step": 5458 + }, + { + "epoch": 0.16731028564423195, + "grad_norm": 1.8190445006814084, + "learning_rate": 1.9027394252100516e-05, + "loss": 0.8364, + "step": 5459 + }, + { + "epoch": 0.16734093416697315, + "grad_norm": 1.6148609723891272, + "learning_rate": 1.90269671878837e-05, + "loss": 0.81, + "step": 5460 + }, + { + "epoch": 0.16737158268971436, + "grad_norm": 1.6921529489482823, + "learning_rate": 1.9026540034721867e-05, + "loss": 0.6892, + "step": 5461 + }, + { + "epoch": 0.16740223121245557, + "grad_norm": 1.641778970004283, + "learning_rate": 1.9026112792619226e-05, + "loss": 0.7677, + "step": 5462 + }, + { + "epoch": 0.16743287973519677, + "grad_norm": 1.503084869204859, + "learning_rate": 1.9025685461579985e-05, + "loss": 0.7994, + "step": 5463 + }, + { + "epoch": 0.16746352825793798, + "grad_norm": 1.5231870676080355, + "learning_rate": 1.9025258041608353e-05, + "loss": 0.8491, + "step": 5464 + }, + { + "epoch": 0.16749417678067918, + "grad_norm": 1.6537261788011597, + "learning_rate": 1.9024830532708548e-05, + "loss": 0.8888, + "step": 5465 + }, + { + "epoch": 0.1675248253034204, + "grad_norm": 1.5246131004392764, + "learning_rate": 1.9024402934884778e-05, + "loss": 0.7479, + "step": 5466 + }, + { + "epoch": 0.16755547382616157, + "grad_norm": 1.517537419361947, + "learning_rate": 1.9023975248141257e-05, + "loss": 0.8608, + "step": 5467 + }, + { + "epoch": 0.16758612234890277, + "grad_norm": 1.6889441234839757, + "learning_rate": 1.90235474724822e-05, + "loss": 0.9183, + "step": 5468 + }, + { + "epoch": 0.16761677087164398, + "grad_norm": 0.9721742960360344, + "learning_rate": 1.902311960791182e-05, + "loss": 0.6485, + "step": 5469 + }, + { + "epoch": 0.16764741939438518, + "grad_norm": 1.5143676065203902, + "learning_rate": 1.9022691654434334e-05, + "loss": 0.831, + "step": 5470 + }, + { + "epoch": 0.1676780679171264, + "grad_norm": 1.8823383363772506, + "learning_rate": 1.9022263612053957e-05, + "loss": 0.7855, + "step": 5471 + }, + { + "epoch": 0.1677087164398676, + "grad_norm": 1.7493203521701535, + "learning_rate": 1.9021835480774912e-05, + "loss": 0.8238, + "step": 5472 + }, + { + "epoch": 0.1677393649626088, + "grad_norm": 1.6209321286939617, + "learning_rate": 1.902140726060141e-05, + "loss": 0.9163, + "step": 5473 + }, + { + "epoch": 0.16777001348535, + "grad_norm": 1.6802346399542682, + "learning_rate": 1.9020978951537673e-05, + "loss": 0.8634, + "step": 5474 + }, + { + "epoch": 0.1678006620080912, + "grad_norm": 1.5450712740766601, + "learning_rate": 1.9020550553587926e-05, + "loss": 0.8247, + "step": 5475 + }, + { + "epoch": 0.16783131053083242, + "grad_norm": 1.5414712422109926, + "learning_rate": 1.9020122066756382e-05, + "loss": 0.7231, + "step": 5476 + }, + { + "epoch": 0.16786195905357362, + "grad_norm": 1.9547656421370543, + "learning_rate": 1.901969349104727e-05, + "loss": 0.8981, + "step": 5477 + }, + { + "epoch": 0.16789260757631483, + "grad_norm": 1.539623253092208, + "learning_rate": 1.9019264826464813e-05, + "loss": 0.7704, + "step": 5478 + }, + { + "epoch": 0.16792325609905603, + "grad_norm": 1.7732429391052618, + "learning_rate": 1.9018836073013227e-05, + "loss": 0.7911, + "step": 5479 + }, + { + "epoch": 0.16795390462179724, + "grad_norm": 1.4573137168776982, + "learning_rate": 1.9018407230696745e-05, + "loss": 0.7286, + "step": 5480 + }, + { + "epoch": 0.16798455314453845, + "grad_norm": 1.6428965768196742, + "learning_rate": 1.9017978299519584e-05, + "loss": 0.8356, + "step": 5481 + }, + { + "epoch": 0.16801520166727962, + "grad_norm": 1.6189423604578763, + "learning_rate": 1.9017549279485984e-05, + "loss": 0.7834, + "step": 5482 + }, + { + "epoch": 0.16804585019002083, + "grad_norm": 1.456425519383744, + "learning_rate": 1.9017120170600156e-05, + "loss": 0.794, + "step": 5483 + }, + { + "epoch": 0.16807649871276203, + "grad_norm": 1.4310713086227422, + "learning_rate": 1.9016690972866342e-05, + "loss": 0.8106, + "step": 5484 + }, + { + "epoch": 0.16810714723550324, + "grad_norm": 1.7720665062878143, + "learning_rate": 1.9016261686288763e-05, + "loss": 0.8167, + "step": 5485 + }, + { + "epoch": 0.16813779575824445, + "grad_norm": 1.6069354567770884, + "learning_rate": 1.901583231087165e-05, + "loss": 0.8019, + "step": 5486 + }, + { + "epoch": 0.16816844428098565, + "grad_norm": 1.734988113860796, + "learning_rate": 1.9015402846619232e-05, + "loss": 0.7756, + "step": 5487 + }, + { + "epoch": 0.16819909280372686, + "grad_norm": 1.6433409366332643, + "learning_rate": 1.9014973293535744e-05, + "loss": 0.8865, + "step": 5488 + }, + { + "epoch": 0.16822974132646806, + "grad_norm": 1.660515192950757, + "learning_rate": 1.9014543651625418e-05, + "loss": 0.8622, + "step": 5489 + }, + { + "epoch": 0.16826038984920927, + "grad_norm": 1.4252977661692232, + "learning_rate": 1.9014113920892486e-05, + "loss": 0.7707, + "step": 5490 + }, + { + "epoch": 0.16829103837195047, + "grad_norm": 1.5977771044498208, + "learning_rate": 1.9013684101341187e-05, + "loss": 0.8469, + "step": 5491 + }, + { + "epoch": 0.16832168689469168, + "grad_norm": 1.5244853919127157, + "learning_rate": 1.901325419297575e-05, + "loss": 0.8144, + "step": 5492 + }, + { + "epoch": 0.16835233541743289, + "grad_norm": 1.6555560891654373, + "learning_rate": 1.901282419580041e-05, + "loss": 0.8901, + "step": 5493 + }, + { + "epoch": 0.1683829839401741, + "grad_norm": 1.6028861586369811, + "learning_rate": 1.9012394109819415e-05, + "loss": 0.8135, + "step": 5494 + }, + { + "epoch": 0.1684136324629153, + "grad_norm": 1.64115311010093, + "learning_rate": 1.9011963935036986e-05, + "loss": 0.7601, + "step": 5495 + }, + { + "epoch": 0.1684442809856565, + "grad_norm": 1.58739548611843, + "learning_rate": 1.901153367145738e-05, + "loss": 0.7987, + "step": 5496 + }, + { + "epoch": 0.1684749295083977, + "grad_norm": 1.4809116375987752, + "learning_rate": 1.901110331908482e-05, + "loss": 0.8107, + "step": 5497 + }, + { + "epoch": 0.1685055780311389, + "grad_norm": 1.643640414539237, + "learning_rate": 1.9010672877923555e-05, + "loss": 0.8752, + "step": 5498 + }, + { + "epoch": 0.1685362265538801, + "grad_norm": 1.5528884546193433, + "learning_rate": 1.9010242347977826e-05, + "loss": 0.8482, + "step": 5499 + }, + { + "epoch": 0.1685668750766213, + "grad_norm": 1.6334033321520203, + "learning_rate": 1.900981172925187e-05, + "loss": 0.953, + "step": 5500 + }, + { + "epoch": 0.1685975235993625, + "grad_norm": 1.6737634871188178, + "learning_rate": 1.900938102174994e-05, + "loss": 0.8648, + "step": 5501 + }, + { + "epoch": 0.1686281721221037, + "grad_norm": 1.6081416335519165, + "learning_rate": 1.9008950225476268e-05, + "loss": 0.8266, + "step": 5502 + }, + { + "epoch": 0.16865882064484491, + "grad_norm": 1.6284874205179427, + "learning_rate": 1.9008519340435106e-05, + "loss": 0.7718, + "step": 5503 + }, + { + "epoch": 0.16868946916758612, + "grad_norm": 0.9015697625254583, + "learning_rate": 1.90080883666307e-05, + "loss": 0.6628, + "step": 5504 + }, + { + "epoch": 0.16872011769032733, + "grad_norm": 0.8101746858947897, + "learning_rate": 1.9007657304067294e-05, + "loss": 0.6659, + "step": 5505 + }, + { + "epoch": 0.16875076621306853, + "grad_norm": 2.1060139116671386, + "learning_rate": 1.9007226152749135e-05, + "loss": 0.876, + "step": 5506 + }, + { + "epoch": 0.16878141473580974, + "grad_norm": 0.7205737091455732, + "learning_rate": 1.900679491268047e-05, + "loss": 0.6503, + "step": 5507 + }, + { + "epoch": 0.16881206325855094, + "grad_norm": 1.6568370416800782, + "learning_rate": 1.9006363583865554e-05, + "loss": 0.7769, + "step": 5508 + }, + { + "epoch": 0.16884271178129215, + "grad_norm": 0.8678503607722826, + "learning_rate": 1.900593216630863e-05, + "loss": 0.6695, + "step": 5509 + }, + { + "epoch": 0.16887336030403335, + "grad_norm": 1.5932573911284689, + "learning_rate": 1.9005500660013954e-05, + "loss": 0.7876, + "step": 5510 + }, + { + "epoch": 0.16890400882677456, + "grad_norm": 1.7027231805945109, + "learning_rate": 1.9005069064985778e-05, + "loss": 0.8174, + "step": 5511 + }, + { + "epoch": 0.16893465734951577, + "grad_norm": 1.6534777588642602, + "learning_rate": 1.900463738122835e-05, + "loss": 0.7729, + "step": 5512 + }, + { + "epoch": 0.16896530587225694, + "grad_norm": 0.7554220637085975, + "learning_rate": 1.9004205608745924e-05, + "loss": 0.6609, + "step": 5513 + }, + { + "epoch": 0.16899595439499815, + "grad_norm": 1.6641966816159033, + "learning_rate": 1.9003773747542756e-05, + "loss": 0.7841, + "step": 5514 + }, + { + "epoch": 0.16902660291773935, + "grad_norm": 1.6866383467474195, + "learning_rate": 1.9003341797623103e-05, + "loss": 0.778, + "step": 5515 + }, + { + "epoch": 0.16905725144048056, + "grad_norm": 1.716147490082983, + "learning_rate": 1.900290975899122e-05, + "loss": 0.8082, + "step": 5516 + }, + { + "epoch": 0.16908789996322177, + "grad_norm": 1.5365607014415665, + "learning_rate": 1.9002477631651368e-05, + "loss": 0.8253, + "step": 5517 + }, + { + "epoch": 0.16911854848596297, + "grad_norm": 0.7224744484357093, + "learning_rate": 1.9002045415607797e-05, + "loss": 0.6511, + "step": 5518 + }, + { + "epoch": 0.16914919700870418, + "grad_norm": 1.3567294977444861, + "learning_rate": 1.9001613110864768e-05, + "loss": 0.7878, + "step": 5519 + }, + { + "epoch": 0.16917984553144538, + "grad_norm": 1.639872320037874, + "learning_rate": 1.900118071742654e-05, + "loss": 0.7673, + "step": 5520 + }, + { + "epoch": 0.1692104940541866, + "grad_norm": 1.7129161119539325, + "learning_rate": 1.9000748235297378e-05, + "loss": 0.7577, + "step": 5521 + }, + { + "epoch": 0.1692411425769278, + "grad_norm": 1.5045773682878703, + "learning_rate": 1.9000315664481544e-05, + "loss": 0.7264, + "step": 5522 + }, + { + "epoch": 0.169271791099669, + "grad_norm": 1.4442317521375052, + "learning_rate": 1.8999883004983292e-05, + "loss": 0.7968, + "step": 5523 + }, + { + "epoch": 0.1693024396224102, + "grad_norm": 1.7116919331819391, + "learning_rate": 1.899945025680689e-05, + "loss": 0.7776, + "step": 5524 + }, + { + "epoch": 0.1693330881451514, + "grad_norm": 1.5965903933052146, + "learning_rate": 1.8999017419956606e-05, + "loss": 0.8117, + "step": 5525 + }, + { + "epoch": 0.16936373666789262, + "grad_norm": 1.5587866063850628, + "learning_rate": 1.8998584494436697e-05, + "loss": 0.8439, + "step": 5526 + }, + { + "epoch": 0.16939438519063382, + "grad_norm": 1.605199049706091, + "learning_rate": 1.8998151480251438e-05, + "loss": 0.8766, + "step": 5527 + }, + { + "epoch": 0.16942503371337503, + "grad_norm": 1.516551845821237, + "learning_rate": 1.8997718377405083e-05, + "loss": 0.7974, + "step": 5528 + }, + { + "epoch": 0.1694556822361162, + "grad_norm": 1.4617043497822455, + "learning_rate": 1.899728518590191e-05, + "loss": 0.743, + "step": 5529 + }, + { + "epoch": 0.1694863307588574, + "grad_norm": 2.1103959409119306, + "learning_rate": 1.8996851905746185e-05, + "loss": 0.7384, + "step": 5530 + }, + { + "epoch": 0.16951697928159862, + "grad_norm": 1.7630137050695656, + "learning_rate": 1.8996418536942177e-05, + "loss": 0.8401, + "step": 5531 + }, + { + "epoch": 0.16954762780433982, + "grad_norm": 1.6546191035731632, + "learning_rate": 1.8995985079494152e-05, + "loss": 0.7952, + "step": 5532 + }, + { + "epoch": 0.16957827632708103, + "grad_norm": 1.5580029565596956, + "learning_rate": 1.8995551533406385e-05, + "loss": 0.8252, + "step": 5533 + }, + { + "epoch": 0.16960892484982223, + "grad_norm": 0.8384917181776674, + "learning_rate": 1.899511789868315e-05, + "loss": 0.6585, + "step": 5534 + }, + { + "epoch": 0.16963957337256344, + "grad_norm": 1.675589586629327, + "learning_rate": 1.899468417532871e-05, + "loss": 0.8686, + "step": 5535 + }, + { + "epoch": 0.16967022189530465, + "grad_norm": 1.6905413591591538, + "learning_rate": 1.899425036334735e-05, + "loss": 0.9352, + "step": 5536 + }, + { + "epoch": 0.16970087041804585, + "grad_norm": 1.555572755984012, + "learning_rate": 1.8993816462743343e-05, + "loss": 0.856, + "step": 5537 + }, + { + "epoch": 0.16973151894078706, + "grad_norm": 0.7753926320886779, + "learning_rate": 1.899338247352096e-05, + "loss": 0.6895, + "step": 5538 + }, + { + "epoch": 0.16976216746352826, + "grad_norm": 1.4648360725981557, + "learning_rate": 1.8992948395684476e-05, + "loss": 0.7569, + "step": 5539 + }, + { + "epoch": 0.16979281598626947, + "grad_norm": 1.504605444659428, + "learning_rate": 1.899251422923817e-05, + "loss": 0.81, + "step": 5540 + }, + { + "epoch": 0.16982346450901067, + "grad_norm": 0.7197856150159566, + "learning_rate": 1.8992079974186325e-05, + "loss": 0.6263, + "step": 5541 + }, + { + "epoch": 0.16985411303175188, + "grad_norm": 1.5046454863216885, + "learning_rate": 1.899164563053321e-05, + "loss": 0.7284, + "step": 5542 + }, + { + "epoch": 0.16988476155449309, + "grad_norm": 1.5619643098170009, + "learning_rate": 1.899121119828311e-05, + "loss": 0.8933, + "step": 5543 + }, + { + "epoch": 0.16991541007723426, + "grad_norm": 1.60515686000317, + "learning_rate": 1.899077667744031e-05, + "loss": 0.7254, + "step": 5544 + }, + { + "epoch": 0.16994605859997547, + "grad_norm": 1.570639363044094, + "learning_rate": 1.8990342068009083e-05, + "loss": 0.8261, + "step": 5545 + }, + { + "epoch": 0.16997670712271667, + "grad_norm": 0.8242617963658165, + "learning_rate": 1.8989907369993717e-05, + "loss": 0.6402, + "step": 5546 + }, + { + "epoch": 0.17000735564545788, + "grad_norm": 1.7092832178367188, + "learning_rate": 1.8989472583398494e-05, + "loss": 0.8126, + "step": 5547 + }, + { + "epoch": 0.1700380041681991, + "grad_norm": 1.5398211798458068, + "learning_rate": 1.89890377082277e-05, + "loss": 0.8403, + "step": 5548 + }, + { + "epoch": 0.1700686526909403, + "grad_norm": 1.5335392976030187, + "learning_rate": 1.8988602744485615e-05, + "loss": 0.8607, + "step": 5549 + }, + { + "epoch": 0.1700993012136815, + "grad_norm": 1.4814640786799955, + "learning_rate": 1.8988167692176526e-05, + "loss": 0.8525, + "step": 5550 + }, + { + "epoch": 0.1701299497364227, + "grad_norm": 1.7490809608683282, + "learning_rate": 1.8987732551304718e-05, + "loss": 0.756, + "step": 5551 + }, + { + "epoch": 0.1701605982591639, + "grad_norm": 1.8261479461149808, + "learning_rate": 1.8987297321874487e-05, + "loss": 0.8761, + "step": 5552 + }, + { + "epoch": 0.17019124678190511, + "grad_norm": 1.5729712474791269, + "learning_rate": 1.8986862003890113e-05, + "loss": 0.7886, + "step": 5553 + }, + { + "epoch": 0.17022189530464632, + "grad_norm": 0.7999797013956792, + "learning_rate": 1.898642659735589e-05, + "loss": 0.646, + "step": 5554 + }, + { + "epoch": 0.17025254382738753, + "grad_norm": 1.6663838978572332, + "learning_rate": 1.8985991102276107e-05, + "loss": 0.8334, + "step": 5555 + }, + { + "epoch": 0.17028319235012873, + "grad_norm": 1.7753410223391985, + "learning_rate": 1.8985555518655055e-05, + "loss": 0.855, + "step": 5556 + }, + { + "epoch": 0.17031384087286994, + "grad_norm": 0.712499617077553, + "learning_rate": 1.8985119846497024e-05, + "loss": 0.6656, + "step": 5557 + }, + { + "epoch": 0.17034448939561114, + "grad_norm": 1.6766990521482965, + "learning_rate": 1.8984684085806305e-05, + "loss": 0.8767, + "step": 5558 + }, + { + "epoch": 0.17037513791835235, + "grad_norm": 1.523965235588371, + "learning_rate": 1.89842482365872e-05, + "loss": 0.8778, + "step": 5559 + }, + { + "epoch": 0.17040578644109353, + "grad_norm": 1.576908535150252, + "learning_rate": 1.8983812298843997e-05, + "loss": 0.9226, + "step": 5560 + }, + { + "epoch": 0.17043643496383473, + "grad_norm": 1.5303047623616113, + "learning_rate": 1.8983376272580992e-05, + "loss": 0.6997, + "step": 5561 + }, + { + "epoch": 0.17046708348657594, + "grad_norm": 0.7436433496750735, + "learning_rate": 1.8982940157802482e-05, + "loss": 0.6235, + "step": 5562 + }, + { + "epoch": 0.17049773200931714, + "grad_norm": 0.7172410809598859, + "learning_rate": 1.8982503954512766e-05, + "loss": 0.6562, + "step": 5563 + }, + { + "epoch": 0.17052838053205835, + "grad_norm": 1.689606667628948, + "learning_rate": 1.898206766271614e-05, + "loss": 0.8492, + "step": 5564 + }, + { + "epoch": 0.17055902905479955, + "grad_norm": 1.4092364141907836, + "learning_rate": 1.89816312824169e-05, + "loss": 0.7554, + "step": 5565 + }, + { + "epoch": 0.17058967757754076, + "grad_norm": 0.708875412525519, + "learning_rate": 1.898119481361935e-05, + "loss": 0.6299, + "step": 5566 + }, + { + "epoch": 0.17062032610028197, + "grad_norm": 1.5396167769622775, + "learning_rate": 1.8980758256327794e-05, + "loss": 0.6877, + "step": 5567 + }, + { + "epoch": 0.17065097462302317, + "grad_norm": 1.7103916929474312, + "learning_rate": 1.8980321610546525e-05, + "loss": 0.8238, + "step": 5568 + }, + { + "epoch": 0.17068162314576438, + "grad_norm": 1.7905942684790925, + "learning_rate": 1.897988487627985e-05, + "loss": 0.8516, + "step": 5569 + }, + { + "epoch": 0.17071227166850558, + "grad_norm": 1.7112376352297836, + "learning_rate": 1.8979448053532074e-05, + "loss": 0.9154, + "step": 5570 + }, + { + "epoch": 0.1707429201912468, + "grad_norm": 1.6592503148934548, + "learning_rate": 1.8979011142307494e-05, + "loss": 0.922, + "step": 5571 + }, + { + "epoch": 0.170773568713988, + "grad_norm": 0.8447970009055947, + "learning_rate": 1.8978574142610425e-05, + "loss": 0.6564, + "step": 5572 + }, + { + "epoch": 0.1708042172367292, + "grad_norm": 1.523898873362287, + "learning_rate": 1.8978137054445165e-05, + "loss": 0.878, + "step": 5573 + }, + { + "epoch": 0.1708348657594704, + "grad_norm": 1.5172703272515482, + "learning_rate": 1.8977699877816022e-05, + "loss": 0.7224, + "step": 5574 + }, + { + "epoch": 0.17086551428221158, + "grad_norm": 0.707672176070718, + "learning_rate": 1.8977262612727308e-05, + "loss": 0.648, + "step": 5575 + }, + { + "epoch": 0.1708961628049528, + "grad_norm": 1.4904617939758988, + "learning_rate": 1.8976825259183326e-05, + "loss": 0.8466, + "step": 5576 + }, + { + "epoch": 0.170926811327694, + "grad_norm": 1.4839997829911566, + "learning_rate": 1.897638781718839e-05, + "loss": 0.7317, + "step": 5577 + }, + { + "epoch": 0.1709574598504352, + "grad_norm": 1.697276445957093, + "learning_rate": 1.8975950286746808e-05, + "loss": 0.8306, + "step": 5578 + }, + { + "epoch": 0.1709881083731764, + "grad_norm": 1.5465888182573295, + "learning_rate": 1.897551266786289e-05, + "loss": 0.8133, + "step": 5579 + }, + { + "epoch": 0.1710187568959176, + "grad_norm": 1.5637271813105729, + "learning_rate": 1.897507496054095e-05, + "loss": 0.6926, + "step": 5580 + }, + { + "epoch": 0.17104940541865882, + "grad_norm": 1.4114150608529896, + "learning_rate": 1.89746371647853e-05, + "loss": 0.8137, + "step": 5581 + }, + { + "epoch": 0.17108005394140002, + "grad_norm": 1.6628666454530072, + "learning_rate": 1.8974199280600253e-05, + "loss": 0.8364, + "step": 5582 + }, + { + "epoch": 0.17111070246414123, + "grad_norm": 1.794641338363539, + "learning_rate": 1.8973761307990125e-05, + "loss": 0.9409, + "step": 5583 + }, + { + "epoch": 0.17114135098688243, + "grad_norm": 0.7487361352580151, + "learning_rate": 1.8973323246959232e-05, + "loss": 0.6392, + "step": 5584 + }, + { + "epoch": 0.17117199950962364, + "grad_norm": 1.711336884798996, + "learning_rate": 1.8972885097511885e-05, + "loss": 0.8974, + "step": 5585 + }, + { + "epoch": 0.17120264803236485, + "grad_norm": 1.6233563767385137, + "learning_rate": 1.897244685965241e-05, + "loss": 0.7319, + "step": 5586 + }, + { + "epoch": 0.17123329655510605, + "grad_norm": 1.5027975890262661, + "learning_rate": 1.8972008533385116e-05, + "loss": 0.8367, + "step": 5587 + }, + { + "epoch": 0.17126394507784726, + "grad_norm": 1.837276931358921, + "learning_rate": 1.897157011871433e-05, + "loss": 0.8211, + "step": 5588 + }, + { + "epoch": 0.17129459360058846, + "grad_norm": 1.426816751338734, + "learning_rate": 1.8971131615644366e-05, + "loss": 0.7274, + "step": 5589 + }, + { + "epoch": 0.17132524212332967, + "grad_norm": 1.5308432668020218, + "learning_rate": 1.897069302417955e-05, + "loss": 0.8891, + "step": 5590 + }, + { + "epoch": 0.17135589064607085, + "grad_norm": 1.3986706295377414, + "learning_rate": 1.8970254344324197e-05, + "loss": 0.6581, + "step": 5591 + }, + { + "epoch": 0.17138653916881205, + "grad_norm": 1.5571779450596217, + "learning_rate": 1.8969815576082635e-05, + "loss": 0.952, + "step": 5592 + }, + { + "epoch": 0.17141718769155326, + "grad_norm": 1.6794825078234477, + "learning_rate": 1.8969376719459183e-05, + "loss": 0.7625, + "step": 5593 + }, + { + "epoch": 0.17144783621429446, + "grad_norm": 1.5391432070479327, + "learning_rate": 1.896893777445817e-05, + "loss": 0.7699, + "step": 5594 + }, + { + "epoch": 0.17147848473703567, + "grad_norm": 1.5512700166059494, + "learning_rate": 1.8968498741083916e-05, + "loss": 0.809, + "step": 5595 + }, + { + "epoch": 0.17150913325977687, + "grad_norm": 1.7689188389106367, + "learning_rate": 1.8968059619340754e-05, + "loss": 0.8259, + "step": 5596 + }, + { + "epoch": 0.17153978178251808, + "grad_norm": 1.3225901131400541, + "learning_rate": 1.8967620409232997e-05, + "loss": 0.6972, + "step": 5597 + }, + { + "epoch": 0.17157043030525929, + "grad_norm": 1.465286774458542, + "learning_rate": 1.8967181110764986e-05, + "loss": 0.796, + "step": 5598 + }, + { + "epoch": 0.1716010788280005, + "grad_norm": 1.5385802727870355, + "learning_rate": 1.896674172394105e-05, + "loss": 0.8201, + "step": 5599 + }, + { + "epoch": 0.1716317273507417, + "grad_norm": 1.3556665084939026, + "learning_rate": 1.896630224876551e-05, + "loss": 0.8351, + "step": 5600 + }, + { + "epoch": 0.1716623758734829, + "grad_norm": 1.420244748024456, + "learning_rate": 1.89658626852427e-05, + "loss": 0.7125, + "step": 5601 + }, + { + "epoch": 0.1716930243962241, + "grad_norm": 1.575280128647612, + "learning_rate": 1.896542303337695e-05, + "loss": 0.8873, + "step": 5602 + }, + { + "epoch": 0.17172367291896531, + "grad_norm": 1.5541372979853807, + "learning_rate": 1.8964983293172593e-05, + "loss": 0.8803, + "step": 5603 + }, + { + "epoch": 0.17175432144170652, + "grad_norm": 1.7852006498378716, + "learning_rate": 1.896454346463396e-05, + "loss": 0.833, + "step": 5604 + }, + { + "epoch": 0.17178496996444773, + "grad_norm": 1.4730702626208145, + "learning_rate": 1.896410354776539e-05, + "loss": 0.7618, + "step": 5605 + }, + { + "epoch": 0.1718156184871889, + "grad_norm": 1.5800605368347125, + "learning_rate": 1.896366354257121e-05, + "loss": 0.7924, + "step": 5606 + }, + { + "epoch": 0.1718462670099301, + "grad_norm": 0.8951162971541547, + "learning_rate": 1.896322344905576e-05, + "loss": 0.6926, + "step": 5607 + }, + { + "epoch": 0.17187691553267131, + "grad_norm": 0.7640806296610878, + "learning_rate": 1.8962783267223378e-05, + "loss": 0.6424, + "step": 5608 + }, + { + "epoch": 0.17190756405541252, + "grad_norm": 1.5716006992653917, + "learning_rate": 1.89623429970784e-05, + "loss": 0.7224, + "step": 5609 + }, + { + "epoch": 0.17193821257815373, + "grad_norm": 1.677743044814173, + "learning_rate": 1.8961902638625164e-05, + "loss": 0.8152, + "step": 5610 + }, + { + "epoch": 0.17196886110089493, + "grad_norm": 1.6039964640575683, + "learning_rate": 1.8961462191868007e-05, + "loss": 0.8022, + "step": 5611 + }, + { + "epoch": 0.17199950962363614, + "grad_norm": 1.5767791635683874, + "learning_rate": 1.8961021656811273e-05, + "loss": 0.7807, + "step": 5612 + }, + { + "epoch": 0.17203015814637734, + "grad_norm": 1.510878035319736, + "learning_rate": 1.8960581033459296e-05, + "loss": 0.8271, + "step": 5613 + }, + { + "epoch": 0.17206080666911855, + "grad_norm": 1.6216171363982848, + "learning_rate": 1.8960140321816424e-05, + "loss": 0.8125, + "step": 5614 + }, + { + "epoch": 0.17209145519185975, + "grad_norm": 1.6492492641051537, + "learning_rate": 1.8959699521886995e-05, + "loss": 0.8319, + "step": 5615 + }, + { + "epoch": 0.17212210371460096, + "grad_norm": 1.6270093121282578, + "learning_rate": 1.895925863367535e-05, + "loss": 0.8075, + "step": 5616 + }, + { + "epoch": 0.17215275223734217, + "grad_norm": 1.4796025137362794, + "learning_rate": 1.8958817657185845e-05, + "loss": 0.8363, + "step": 5617 + }, + { + "epoch": 0.17218340076008337, + "grad_norm": 1.622557073003092, + "learning_rate": 1.8958376592422815e-05, + "loss": 0.8748, + "step": 5618 + }, + { + "epoch": 0.17221404928282458, + "grad_norm": 1.810597930736382, + "learning_rate": 1.8957935439390606e-05, + "loss": 0.7777, + "step": 5619 + }, + { + "epoch": 0.17224469780556578, + "grad_norm": 1.5327381657231682, + "learning_rate": 1.8957494198093572e-05, + "loss": 0.7448, + "step": 5620 + }, + { + "epoch": 0.172275346328307, + "grad_norm": 0.9560629590340878, + "learning_rate": 1.895705286853605e-05, + "loss": 0.6746, + "step": 5621 + }, + { + "epoch": 0.17230599485104817, + "grad_norm": 1.5442551780921328, + "learning_rate": 1.8956611450722397e-05, + "loss": 0.7527, + "step": 5622 + }, + { + "epoch": 0.17233664337378937, + "grad_norm": 0.8805537980914926, + "learning_rate": 1.8956169944656962e-05, + "loss": 0.6584, + "step": 5623 + }, + { + "epoch": 0.17236729189653058, + "grad_norm": 1.7909441095371126, + "learning_rate": 1.8955728350344088e-05, + "loss": 0.8351, + "step": 5624 + }, + { + "epoch": 0.17239794041927178, + "grad_norm": 1.3708042018877193, + "learning_rate": 1.8955286667788134e-05, + "loss": 0.8271, + "step": 5625 + }, + { + "epoch": 0.172428588942013, + "grad_norm": 1.5557563465792283, + "learning_rate": 1.8954844896993448e-05, + "loss": 0.854, + "step": 5626 + }, + { + "epoch": 0.1724592374647542, + "grad_norm": 0.7782357314876454, + "learning_rate": 1.8954403037964387e-05, + "loss": 0.6662, + "step": 5627 + }, + { + "epoch": 0.1724898859874954, + "grad_norm": 1.7789863691447512, + "learning_rate": 1.89539610907053e-05, + "loss": 0.8133, + "step": 5628 + }, + { + "epoch": 0.1725205345102366, + "grad_norm": 1.4551165256087857, + "learning_rate": 1.895351905522054e-05, + "loss": 0.799, + "step": 5629 + }, + { + "epoch": 0.1725511830329778, + "grad_norm": 1.5882033570855563, + "learning_rate": 1.8953076931514473e-05, + "loss": 0.7389, + "step": 5630 + }, + { + "epoch": 0.17258183155571902, + "grad_norm": 1.5341330512176177, + "learning_rate": 1.895263471959144e-05, + "loss": 0.8594, + "step": 5631 + }, + { + "epoch": 0.17261248007846022, + "grad_norm": 1.5742424967823458, + "learning_rate": 1.8952192419455814e-05, + "loss": 0.7769, + "step": 5632 + }, + { + "epoch": 0.17264312860120143, + "grad_norm": 0.9248865475676729, + "learning_rate": 1.895175003111194e-05, + "loss": 0.6831, + "step": 5633 + }, + { + "epoch": 0.17267377712394263, + "grad_norm": 0.8015050702513912, + "learning_rate": 1.8951307554564185e-05, + "loss": 0.6398, + "step": 5634 + }, + { + "epoch": 0.17270442564668384, + "grad_norm": 1.5124897491464055, + "learning_rate": 1.8950864989816908e-05, + "loss": 0.7487, + "step": 5635 + }, + { + "epoch": 0.17273507416942505, + "grad_norm": 1.7909875360252694, + "learning_rate": 1.8950422336874467e-05, + "loss": 0.8556, + "step": 5636 + }, + { + "epoch": 0.17276572269216625, + "grad_norm": 1.4534047778683972, + "learning_rate": 1.8949979595741222e-05, + "loss": 0.6193, + "step": 5637 + }, + { + "epoch": 0.17279637121490743, + "grad_norm": 1.5055226169369964, + "learning_rate": 1.894953676642154e-05, + "loss": 0.8121, + "step": 5638 + }, + { + "epoch": 0.17282701973764864, + "grad_norm": 1.61120274212269, + "learning_rate": 1.8949093848919783e-05, + "loss": 0.8542, + "step": 5639 + }, + { + "epoch": 0.17285766826038984, + "grad_norm": 1.874224978174178, + "learning_rate": 1.8948650843240317e-05, + "loss": 0.8817, + "step": 5640 + }, + { + "epoch": 0.17288831678313105, + "grad_norm": 1.4734800382496454, + "learning_rate": 1.89482077493875e-05, + "loss": 0.7703, + "step": 5641 + }, + { + "epoch": 0.17291896530587225, + "grad_norm": 1.567507086344462, + "learning_rate": 1.8947764567365704e-05, + "loss": 1.0177, + "step": 5642 + }, + { + "epoch": 0.17294961382861346, + "grad_norm": 1.4876701933001044, + "learning_rate": 1.8947321297179295e-05, + "loss": 0.7961, + "step": 5643 + }, + { + "epoch": 0.17298026235135466, + "grad_norm": 1.5411979942838494, + "learning_rate": 1.894687793883264e-05, + "loss": 0.8444, + "step": 5644 + }, + { + "epoch": 0.17301091087409587, + "grad_norm": 1.5996821191190596, + "learning_rate": 1.894643449233011e-05, + "loss": 0.853, + "step": 5645 + }, + { + "epoch": 0.17304155939683707, + "grad_norm": 1.5467262760751055, + "learning_rate": 1.8945990957676067e-05, + "loss": 0.8444, + "step": 5646 + }, + { + "epoch": 0.17307220791957828, + "grad_norm": 1.5475260261972879, + "learning_rate": 1.8945547334874888e-05, + "loss": 0.8275, + "step": 5647 + }, + { + "epoch": 0.17310285644231949, + "grad_norm": 1.5558659196023343, + "learning_rate": 1.894510362393094e-05, + "loss": 0.8228, + "step": 5648 + }, + { + "epoch": 0.1731335049650607, + "grad_norm": 1.6219528447690277, + "learning_rate": 1.89446598248486e-05, + "loss": 0.8775, + "step": 5649 + }, + { + "epoch": 0.1731641534878019, + "grad_norm": 1.623248560084726, + "learning_rate": 1.894421593763224e-05, + "loss": 0.8358, + "step": 5650 + }, + { + "epoch": 0.1731948020105431, + "grad_norm": 1.5548797884115169, + "learning_rate": 1.8943771962286227e-05, + "loss": 0.8139, + "step": 5651 + }, + { + "epoch": 0.1732254505332843, + "grad_norm": 1.1765765973578806, + "learning_rate": 1.8943327898814944e-05, + "loss": 0.6812, + "step": 5652 + }, + { + "epoch": 0.1732560990560255, + "grad_norm": 1.4158604215492154, + "learning_rate": 1.8942883747222764e-05, + "loss": 0.7642, + "step": 5653 + }, + { + "epoch": 0.1732867475787667, + "grad_norm": 1.7576318216373463, + "learning_rate": 1.894243950751406e-05, + "loss": 0.8439, + "step": 5654 + }, + { + "epoch": 0.1733173961015079, + "grad_norm": 1.5422875628473356, + "learning_rate": 1.8941995179693214e-05, + "loss": 0.907, + "step": 5655 + }, + { + "epoch": 0.1733480446242491, + "grad_norm": 1.7026276426541846, + "learning_rate": 1.89415507637646e-05, + "loss": 0.7918, + "step": 5656 + }, + { + "epoch": 0.1733786931469903, + "grad_norm": 1.7016892777498578, + "learning_rate": 1.8941106259732594e-05, + "loss": 0.8556, + "step": 5657 + }, + { + "epoch": 0.17340934166973151, + "grad_norm": 1.4250544386868027, + "learning_rate": 1.8940661667601587e-05, + "loss": 0.7972, + "step": 5658 + }, + { + "epoch": 0.17343999019247272, + "grad_norm": 0.7861440300117235, + "learning_rate": 1.894021698737595e-05, + "loss": 0.6613, + "step": 5659 + }, + { + "epoch": 0.17347063871521393, + "grad_norm": 1.5019742300853305, + "learning_rate": 1.893977221906007e-05, + "loss": 0.7737, + "step": 5660 + }, + { + "epoch": 0.17350128723795513, + "grad_norm": 1.5546124677099233, + "learning_rate": 1.8939327362658323e-05, + "loss": 0.8029, + "step": 5661 + }, + { + "epoch": 0.17353193576069634, + "grad_norm": 1.6206331476265576, + "learning_rate": 1.8938882418175097e-05, + "loss": 0.8334, + "step": 5662 + }, + { + "epoch": 0.17356258428343754, + "grad_norm": 1.9372800860714814, + "learning_rate": 1.8938437385614778e-05, + "loss": 0.8559, + "step": 5663 + }, + { + "epoch": 0.17359323280617875, + "grad_norm": 1.6407523934289416, + "learning_rate": 1.8937992264981747e-05, + "loss": 0.8866, + "step": 5664 + }, + { + "epoch": 0.17362388132891995, + "grad_norm": 1.5242876698447183, + "learning_rate": 1.893754705628039e-05, + "loss": 0.7864, + "step": 5665 + }, + { + "epoch": 0.17365452985166116, + "grad_norm": 0.8340878271224749, + "learning_rate": 1.89371017595151e-05, + "loss": 0.6484, + "step": 5666 + }, + { + "epoch": 0.17368517837440237, + "grad_norm": 0.7487168500790322, + "learning_rate": 1.8936656374690256e-05, + "loss": 0.6681, + "step": 5667 + }, + { + "epoch": 0.17371582689714357, + "grad_norm": 1.5967237958123057, + "learning_rate": 1.893621090181025e-05, + "loss": 0.8007, + "step": 5668 + }, + { + "epoch": 0.17374647541988475, + "grad_norm": 1.7003796403895208, + "learning_rate": 1.8935765340879472e-05, + "loss": 0.8502, + "step": 5669 + }, + { + "epoch": 0.17377712394262596, + "grad_norm": 1.4649209183301326, + "learning_rate": 1.8935319691902312e-05, + "loss": 0.7077, + "step": 5670 + }, + { + "epoch": 0.17380777246536716, + "grad_norm": 1.7266965466741038, + "learning_rate": 1.893487395488316e-05, + "loss": 0.8854, + "step": 5671 + }, + { + "epoch": 0.17383842098810837, + "grad_norm": 2.80492506905198, + "learning_rate": 1.893442812982641e-05, + "loss": 0.7759, + "step": 5672 + }, + { + "epoch": 0.17386906951084957, + "grad_norm": 1.5310059649564964, + "learning_rate": 1.8933982216736452e-05, + "loss": 0.844, + "step": 5673 + }, + { + "epoch": 0.17389971803359078, + "grad_norm": 1.6133539965913255, + "learning_rate": 1.8933536215617684e-05, + "loss": 0.7867, + "step": 5674 + }, + { + "epoch": 0.17393036655633198, + "grad_norm": 1.8058927504313702, + "learning_rate": 1.8933090126474497e-05, + "loss": 0.8289, + "step": 5675 + }, + { + "epoch": 0.1739610150790732, + "grad_norm": 1.6434715572612049, + "learning_rate": 1.8932643949311288e-05, + "loss": 0.8279, + "step": 5676 + }, + { + "epoch": 0.1739916636018144, + "grad_norm": 1.5609555141509432, + "learning_rate": 1.8932197684132448e-05, + "loss": 0.8571, + "step": 5677 + }, + { + "epoch": 0.1740223121245556, + "grad_norm": 1.6831958233648692, + "learning_rate": 1.8931751330942386e-05, + "loss": 0.8318, + "step": 5678 + }, + { + "epoch": 0.1740529606472968, + "grad_norm": 1.807556954756948, + "learning_rate": 1.893130488974549e-05, + "loss": 0.7722, + "step": 5679 + }, + { + "epoch": 0.174083609170038, + "grad_norm": 1.63172335884433, + "learning_rate": 1.893085836054616e-05, + "loss": 0.7637, + "step": 5680 + }, + { + "epoch": 0.17411425769277922, + "grad_norm": 1.5966622081417334, + "learning_rate": 1.8930411743348797e-05, + "loss": 0.7358, + "step": 5681 + }, + { + "epoch": 0.17414490621552042, + "grad_norm": 1.3572215571486033, + "learning_rate": 1.8929965038157805e-05, + "loss": 0.7833, + "step": 5682 + }, + { + "epoch": 0.17417555473826163, + "grad_norm": 1.1526163276084747, + "learning_rate": 1.892951824497758e-05, + "loss": 0.6479, + "step": 5683 + }, + { + "epoch": 0.1742062032610028, + "grad_norm": 1.7619768807661353, + "learning_rate": 1.892907136381253e-05, + "loss": 0.8539, + "step": 5684 + }, + { + "epoch": 0.174236851783744, + "grad_norm": 1.6788201084497414, + "learning_rate": 1.8928624394667053e-05, + "loss": 0.8021, + "step": 5685 + }, + { + "epoch": 0.17426750030648522, + "grad_norm": 0.7643322976996723, + "learning_rate": 1.8928177337545553e-05, + "loss": 0.6396, + "step": 5686 + }, + { + "epoch": 0.17429814882922642, + "grad_norm": 1.625934613282519, + "learning_rate": 1.8927730192452442e-05, + "loss": 0.8402, + "step": 5687 + }, + { + "epoch": 0.17432879735196763, + "grad_norm": 1.4464326644252006, + "learning_rate": 1.892728295939212e-05, + "loss": 0.8052, + "step": 5688 + }, + { + "epoch": 0.17435944587470883, + "grad_norm": 1.637543904152227, + "learning_rate": 1.8926835638368995e-05, + "loss": 0.8397, + "step": 5689 + }, + { + "epoch": 0.17439009439745004, + "grad_norm": 1.533234932170309, + "learning_rate": 1.8926388229387472e-05, + "loss": 0.7793, + "step": 5690 + }, + { + "epoch": 0.17442074292019125, + "grad_norm": 1.0469360977299338, + "learning_rate": 1.8925940732451965e-05, + "loss": 0.671, + "step": 5691 + }, + { + "epoch": 0.17445139144293245, + "grad_norm": 1.5184918918695336, + "learning_rate": 1.892549314756688e-05, + "loss": 0.8305, + "step": 5692 + }, + { + "epoch": 0.17448203996567366, + "grad_norm": 1.6636284115573368, + "learning_rate": 1.8925045474736623e-05, + "loss": 0.7735, + "step": 5693 + }, + { + "epoch": 0.17451268848841486, + "grad_norm": 1.7463162645402301, + "learning_rate": 1.8924597713965616e-05, + "loss": 0.7186, + "step": 5694 + }, + { + "epoch": 0.17454333701115607, + "grad_norm": 1.467553041226979, + "learning_rate": 1.892414986525826e-05, + "loss": 0.7507, + "step": 5695 + }, + { + "epoch": 0.17457398553389727, + "grad_norm": 1.6199160462122904, + "learning_rate": 1.892370192861897e-05, + "loss": 0.8946, + "step": 5696 + }, + { + "epoch": 0.17460463405663848, + "grad_norm": 1.5280640423046772, + "learning_rate": 1.8923253904052166e-05, + "loss": 0.7572, + "step": 5697 + }, + { + "epoch": 0.17463528257937969, + "grad_norm": 1.5349012340877959, + "learning_rate": 1.892280579156226e-05, + "loss": 0.8536, + "step": 5698 + }, + { + "epoch": 0.1746659311021209, + "grad_norm": 1.7538450748877858, + "learning_rate": 1.8922357591153658e-05, + "loss": 0.8364, + "step": 5699 + }, + { + "epoch": 0.17469657962486207, + "grad_norm": 1.7142814093703296, + "learning_rate": 1.892190930283079e-05, + "loss": 0.7151, + "step": 5700 + }, + { + "epoch": 0.17472722814760328, + "grad_norm": 1.5455716155453836, + "learning_rate": 1.8921460926598064e-05, + "loss": 0.8271, + "step": 5701 + }, + { + "epoch": 0.17475787667034448, + "grad_norm": 1.551213225308075, + "learning_rate": 1.89210124624599e-05, + "loss": 0.7948, + "step": 5702 + }, + { + "epoch": 0.1747885251930857, + "grad_norm": 1.7572266361549267, + "learning_rate": 1.892056391042072e-05, + "loss": 0.9246, + "step": 5703 + }, + { + "epoch": 0.1748191737158269, + "grad_norm": 1.6923599844369213, + "learning_rate": 1.892011527048494e-05, + "loss": 0.743, + "step": 5704 + }, + { + "epoch": 0.1748498222385681, + "grad_norm": 1.484119348296454, + "learning_rate": 1.8919666542656982e-05, + "loss": 0.8312, + "step": 5705 + }, + { + "epoch": 0.1748804707613093, + "grad_norm": 1.6587914266747021, + "learning_rate": 1.891921772694127e-05, + "loss": 0.9077, + "step": 5706 + }, + { + "epoch": 0.1749111192840505, + "grad_norm": 0.9129209830625671, + "learning_rate": 1.891876882334222e-05, + "loss": 0.6562, + "step": 5707 + }, + { + "epoch": 0.17494176780679171, + "grad_norm": 1.750538873292984, + "learning_rate": 1.891831983186426e-05, + "loss": 0.8595, + "step": 5708 + }, + { + "epoch": 0.17497241632953292, + "grad_norm": 1.6757570808309599, + "learning_rate": 1.8917870752511814e-05, + "loss": 0.8556, + "step": 5709 + }, + { + "epoch": 0.17500306485227413, + "grad_norm": 1.443324180481984, + "learning_rate": 1.8917421585289304e-05, + "loss": 0.8077, + "step": 5710 + }, + { + "epoch": 0.17503371337501533, + "grad_norm": 1.457165826314517, + "learning_rate": 1.891697233020116e-05, + "loss": 0.6374, + "step": 5711 + }, + { + "epoch": 0.17506436189775654, + "grad_norm": 1.593077440777418, + "learning_rate": 1.8916522987251806e-05, + "loss": 0.7967, + "step": 5712 + }, + { + "epoch": 0.17509501042049774, + "grad_norm": 0.7728103303386569, + "learning_rate": 1.8916073556445667e-05, + "loss": 0.6631, + "step": 5713 + }, + { + "epoch": 0.17512565894323895, + "grad_norm": 1.6968628978140636, + "learning_rate": 1.8915624037787174e-05, + "loss": 0.7295, + "step": 5714 + }, + { + "epoch": 0.17515630746598013, + "grad_norm": 1.7489718232697695, + "learning_rate": 1.8915174431280757e-05, + "loss": 0.867, + "step": 5715 + }, + { + "epoch": 0.17518695598872133, + "grad_norm": 1.5609108908564924, + "learning_rate": 1.8914724736930847e-05, + "loss": 0.8286, + "step": 5716 + }, + { + "epoch": 0.17521760451146254, + "grad_norm": 0.7295003804442504, + "learning_rate": 1.8914274954741872e-05, + "loss": 0.6731, + "step": 5717 + }, + { + "epoch": 0.17524825303420374, + "grad_norm": 1.5205431130826292, + "learning_rate": 1.8913825084718264e-05, + "loss": 0.8746, + "step": 5718 + }, + { + "epoch": 0.17527890155694495, + "grad_norm": 1.660049047517276, + "learning_rate": 1.891337512686446e-05, + "loss": 0.7961, + "step": 5719 + }, + { + "epoch": 0.17530955007968615, + "grad_norm": 1.393515610997588, + "learning_rate": 1.8912925081184884e-05, + "loss": 0.6992, + "step": 5720 + }, + { + "epoch": 0.17534019860242736, + "grad_norm": 1.5966673063528378, + "learning_rate": 1.8912474947683983e-05, + "loss": 0.8229, + "step": 5721 + }, + { + "epoch": 0.17537084712516857, + "grad_norm": 1.6274554462574284, + "learning_rate": 1.8912024726366182e-05, + "loss": 0.8799, + "step": 5722 + }, + { + "epoch": 0.17540149564790977, + "grad_norm": 1.6446113327726115, + "learning_rate": 1.8911574417235923e-05, + "loss": 0.8292, + "step": 5723 + }, + { + "epoch": 0.17543214417065098, + "grad_norm": 1.6692064540402034, + "learning_rate": 1.8911124020297642e-05, + "loss": 0.8792, + "step": 5724 + }, + { + "epoch": 0.17546279269339218, + "grad_norm": 1.486098106257976, + "learning_rate": 1.8910673535555776e-05, + "loss": 0.729, + "step": 5725 + }, + { + "epoch": 0.1754934412161334, + "grad_norm": 1.5341059322184336, + "learning_rate": 1.891022296301476e-05, + "loss": 0.7596, + "step": 5726 + }, + { + "epoch": 0.1755240897388746, + "grad_norm": 0.7756977310302333, + "learning_rate": 1.890977230267904e-05, + "loss": 0.6457, + "step": 5727 + }, + { + "epoch": 0.1755547382616158, + "grad_norm": 1.6024042715869116, + "learning_rate": 1.8909321554553056e-05, + "loss": 0.8108, + "step": 5728 + }, + { + "epoch": 0.175585386784357, + "grad_norm": 1.5357237346490893, + "learning_rate": 1.8908870718641244e-05, + "loss": 0.7929, + "step": 5729 + }, + { + "epoch": 0.1756160353070982, + "grad_norm": 0.706204413933074, + "learning_rate": 1.890841979494805e-05, + "loss": 0.6573, + "step": 5730 + }, + { + "epoch": 0.1756466838298394, + "grad_norm": 0.7258075699910762, + "learning_rate": 1.890796878347792e-05, + "loss": 0.6503, + "step": 5731 + }, + { + "epoch": 0.1756773323525806, + "grad_norm": 1.4892801347945457, + "learning_rate": 1.890751768423529e-05, + "loss": 0.8218, + "step": 5732 + }, + { + "epoch": 0.1757079808753218, + "grad_norm": 0.6891801403466356, + "learning_rate": 1.890706649722461e-05, + "loss": 0.6383, + "step": 5733 + }, + { + "epoch": 0.175738629398063, + "grad_norm": 1.5775928490402575, + "learning_rate": 1.8906615222450324e-05, + "loss": 0.828, + "step": 5734 + }, + { + "epoch": 0.1757692779208042, + "grad_norm": 1.35466205386352, + "learning_rate": 1.890616385991688e-05, + "loss": 0.8112, + "step": 5735 + }, + { + "epoch": 0.17579992644354542, + "grad_norm": 1.915087426709428, + "learning_rate": 1.890571240962873e-05, + "loss": 0.7957, + "step": 5736 + }, + { + "epoch": 0.17583057496628662, + "grad_norm": 1.5412034522590499, + "learning_rate": 1.890526087159031e-05, + "loss": 0.7875, + "step": 5737 + }, + { + "epoch": 0.17586122348902783, + "grad_norm": 1.3876229349849347, + "learning_rate": 1.8904809245806078e-05, + "loss": 0.763, + "step": 5738 + }, + { + "epoch": 0.17589187201176903, + "grad_norm": 0.8747815602104688, + "learning_rate": 1.8904357532280482e-05, + "loss": 0.6612, + "step": 5739 + }, + { + "epoch": 0.17592252053451024, + "grad_norm": 1.6098014520427668, + "learning_rate": 1.8903905731017972e-05, + "loss": 0.8261, + "step": 5740 + }, + { + "epoch": 0.17595316905725145, + "grad_norm": 1.4602387412306799, + "learning_rate": 1.8903453842023002e-05, + "loss": 0.8299, + "step": 5741 + }, + { + "epoch": 0.17598381757999265, + "grad_norm": 1.6451134645867027, + "learning_rate": 1.8903001865300027e-05, + "loss": 0.8919, + "step": 5742 + }, + { + "epoch": 0.17601446610273386, + "grad_norm": 1.6103933441394056, + "learning_rate": 1.890254980085349e-05, + "loss": 0.7883, + "step": 5743 + }, + { + "epoch": 0.17604511462547506, + "grad_norm": 1.5027327999969402, + "learning_rate": 1.8902097648687858e-05, + "loss": 0.7559, + "step": 5744 + }, + { + "epoch": 0.17607576314821627, + "grad_norm": 1.5192753332788627, + "learning_rate": 1.8901645408807576e-05, + "loss": 0.857, + "step": 5745 + }, + { + "epoch": 0.17610641167095745, + "grad_norm": 1.3917449619326945, + "learning_rate": 1.8901193081217106e-05, + "loss": 0.7148, + "step": 5746 + }, + { + "epoch": 0.17613706019369865, + "grad_norm": 1.5269224318046464, + "learning_rate": 1.8900740665920904e-05, + "loss": 0.7826, + "step": 5747 + }, + { + "epoch": 0.17616770871643986, + "grad_norm": 0.867548753177454, + "learning_rate": 1.8900288162923423e-05, + "loss": 0.6602, + "step": 5748 + }, + { + "epoch": 0.17619835723918106, + "grad_norm": 1.5766736438118836, + "learning_rate": 1.8899835572229127e-05, + "loss": 0.7407, + "step": 5749 + }, + { + "epoch": 0.17622900576192227, + "grad_norm": 1.6741173637171098, + "learning_rate": 1.8899382893842476e-05, + "loss": 0.7834, + "step": 5750 + }, + { + "epoch": 0.17625965428466348, + "grad_norm": 0.7145398966605475, + "learning_rate": 1.889893012776793e-05, + "loss": 0.6411, + "step": 5751 + }, + { + "epoch": 0.17629030280740468, + "grad_norm": 1.6976345266676538, + "learning_rate": 1.8898477274009947e-05, + "loss": 0.8469, + "step": 5752 + }, + { + "epoch": 0.1763209513301459, + "grad_norm": 1.471043803855469, + "learning_rate": 1.8898024332572986e-05, + "loss": 0.7522, + "step": 5753 + }, + { + "epoch": 0.1763515998528871, + "grad_norm": 1.67916173559053, + "learning_rate": 1.889757130346152e-05, + "loss": 0.8029, + "step": 5754 + }, + { + "epoch": 0.1763822483756283, + "grad_norm": 1.6290384865824972, + "learning_rate": 1.8897118186680005e-05, + "loss": 0.7653, + "step": 5755 + }, + { + "epoch": 0.1764128968983695, + "grad_norm": 1.658444368755087, + "learning_rate": 1.8896664982232907e-05, + "loss": 0.8974, + "step": 5756 + }, + { + "epoch": 0.1764435454211107, + "grad_norm": 1.6997854409365865, + "learning_rate": 1.8896211690124695e-05, + "loss": 0.7936, + "step": 5757 + }, + { + "epoch": 0.17647419394385191, + "grad_norm": 1.426613828085146, + "learning_rate": 1.8895758310359832e-05, + "loss": 0.8484, + "step": 5758 + }, + { + "epoch": 0.17650484246659312, + "grad_norm": 1.685911803521168, + "learning_rate": 1.8895304842942787e-05, + "loss": 0.7297, + "step": 5759 + }, + { + "epoch": 0.17653549098933433, + "grad_norm": 1.5399571327417145, + "learning_rate": 1.889485128787803e-05, + "loss": 0.8744, + "step": 5760 + }, + { + "epoch": 0.17656613951207553, + "grad_norm": 1.586844482522745, + "learning_rate": 1.8894397645170022e-05, + "loss": 0.7924, + "step": 5761 + }, + { + "epoch": 0.1765967880348167, + "grad_norm": 1.592333355209012, + "learning_rate": 1.889394391482324e-05, + "loss": 0.8516, + "step": 5762 + }, + { + "epoch": 0.17662743655755792, + "grad_norm": 0.8397274952027242, + "learning_rate": 1.8893490096842155e-05, + "loss": 0.6359, + "step": 5763 + }, + { + "epoch": 0.17665808508029912, + "grad_norm": 1.5663788147699589, + "learning_rate": 1.8893036191231236e-05, + "loss": 0.8501, + "step": 5764 + }, + { + "epoch": 0.17668873360304033, + "grad_norm": 1.5628185801213004, + "learning_rate": 1.8892582197994954e-05, + "loss": 0.8527, + "step": 5765 + }, + { + "epoch": 0.17671938212578153, + "grad_norm": 0.7295510937648586, + "learning_rate": 1.8892128117137787e-05, + "loss": 0.6349, + "step": 5766 + }, + { + "epoch": 0.17675003064852274, + "grad_norm": 1.4570081042967604, + "learning_rate": 1.8891673948664206e-05, + "loss": 0.8025, + "step": 5767 + }, + { + "epoch": 0.17678067917126394, + "grad_norm": 1.6021026854479303, + "learning_rate": 1.8891219692578683e-05, + "loss": 0.9551, + "step": 5768 + }, + { + "epoch": 0.17681132769400515, + "grad_norm": 1.827150199189224, + "learning_rate": 1.88907653488857e-05, + "loss": 0.8717, + "step": 5769 + }, + { + "epoch": 0.17684197621674635, + "grad_norm": 1.543604761630931, + "learning_rate": 1.8890310917589733e-05, + "loss": 0.8375, + "step": 5770 + }, + { + "epoch": 0.17687262473948756, + "grad_norm": 1.4013160962274913, + "learning_rate": 1.8889856398695254e-05, + "loss": 0.8835, + "step": 5771 + }, + { + "epoch": 0.17690327326222877, + "grad_norm": 1.476404861403678, + "learning_rate": 1.8889401792206746e-05, + "loss": 0.8852, + "step": 5772 + }, + { + "epoch": 0.17693392178496997, + "grad_norm": 1.6446609949822082, + "learning_rate": 1.8888947098128692e-05, + "loss": 0.7774, + "step": 5773 + }, + { + "epoch": 0.17696457030771118, + "grad_norm": 1.3889916684432788, + "learning_rate": 1.8888492316465565e-05, + "loss": 0.7567, + "step": 5774 + }, + { + "epoch": 0.17699521883045238, + "grad_norm": 1.513380037824673, + "learning_rate": 1.888803744722185e-05, + "loss": 0.8324, + "step": 5775 + }, + { + "epoch": 0.1770258673531936, + "grad_norm": 1.5087752093582982, + "learning_rate": 1.8887582490402026e-05, + "loss": 0.7801, + "step": 5776 + }, + { + "epoch": 0.17705651587593477, + "grad_norm": 1.528861182385365, + "learning_rate": 1.8887127446010577e-05, + "loss": 0.8353, + "step": 5777 + }, + { + "epoch": 0.17708716439867597, + "grad_norm": 1.6963926445440156, + "learning_rate": 1.888667231405199e-05, + "loss": 0.684, + "step": 5778 + }, + { + "epoch": 0.17711781292141718, + "grad_norm": 1.7548555456474264, + "learning_rate": 1.888621709453075e-05, + "loss": 0.7882, + "step": 5779 + }, + { + "epoch": 0.17714846144415838, + "grad_norm": 1.6922698775395921, + "learning_rate": 1.8885761787451333e-05, + "loss": 0.808, + "step": 5780 + }, + { + "epoch": 0.1771791099668996, + "grad_norm": 1.798509306317055, + "learning_rate": 1.8885306392818234e-05, + "loss": 0.7044, + "step": 5781 + }, + { + "epoch": 0.1772097584896408, + "grad_norm": 1.6492455803768518, + "learning_rate": 1.888485091063594e-05, + "loss": 0.8983, + "step": 5782 + }, + { + "epoch": 0.177240407012382, + "grad_norm": 1.5684259408163255, + "learning_rate": 1.8884395340908933e-05, + "loss": 0.8322, + "step": 5783 + }, + { + "epoch": 0.1772710555351232, + "grad_norm": 1.4925845335318109, + "learning_rate": 1.8883939683641705e-05, + "loss": 0.7733, + "step": 5784 + }, + { + "epoch": 0.1773017040578644, + "grad_norm": 1.43162102948086, + "learning_rate": 1.888348393883875e-05, + "loss": 0.8356, + "step": 5785 + }, + { + "epoch": 0.17733235258060562, + "grad_norm": 1.5306633181771365, + "learning_rate": 1.8883028106504553e-05, + "loss": 0.8546, + "step": 5786 + }, + { + "epoch": 0.17736300110334682, + "grad_norm": 1.590212974041362, + "learning_rate": 1.8882572186643606e-05, + "loss": 0.8076, + "step": 5787 + }, + { + "epoch": 0.17739364962608803, + "grad_norm": 1.5124342476125334, + "learning_rate": 1.8882116179260402e-05, + "loss": 0.837, + "step": 5788 + }, + { + "epoch": 0.17742429814882923, + "grad_norm": 1.508013523777641, + "learning_rate": 1.888166008435944e-05, + "loss": 0.7836, + "step": 5789 + }, + { + "epoch": 0.17745494667157044, + "grad_norm": 1.4654040633901713, + "learning_rate": 1.8881203901945205e-05, + "loss": 0.7806, + "step": 5790 + }, + { + "epoch": 0.17748559519431165, + "grad_norm": 1.4624326850471525, + "learning_rate": 1.8880747632022194e-05, + "loss": 0.8045, + "step": 5791 + }, + { + "epoch": 0.17751624371705285, + "grad_norm": 1.6858817220193205, + "learning_rate": 1.8880291274594907e-05, + "loss": 0.8413, + "step": 5792 + }, + { + "epoch": 0.17754689223979403, + "grad_norm": 1.4477364120990786, + "learning_rate": 1.8879834829667838e-05, + "loss": 0.837, + "step": 5793 + }, + { + "epoch": 0.17757754076253524, + "grad_norm": 1.6778591208085343, + "learning_rate": 1.887937829724548e-05, + "loss": 0.8543, + "step": 5794 + }, + { + "epoch": 0.17760818928527644, + "grad_norm": 1.408910686711939, + "learning_rate": 1.8878921677332343e-05, + "loss": 0.8199, + "step": 5795 + }, + { + "epoch": 0.17763883780801765, + "grad_norm": 1.6016495708727343, + "learning_rate": 1.8878464969932915e-05, + "loss": 0.8434, + "step": 5796 + }, + { + "epoch": 0.17766948633075885, + "grad_norm": 1.568355945990585, + "learning_rate": 1.8878008175051698e-05, + "loss": 0.7849, + "step": 5797 + }, + { + "epoch": 0.17770013485350006, + "grad_norm": 0.960354505272288, + "learning_rate": 1.88775512926932e-05, + "loss": 0.6432, + "step": 5798 + }, + { + "epoch": 0.17773078337624126, + "grad_norm": 0.8327079567793162, + "learning_rate": 1.8877094322861915e-05, + "loss": 0.6441, + "step": 5799 + }, + { + "epoch": 0.17776143189898247, + "grad_norm": 0.7012777731392302, + "learning_rate": 1.887663726556235e-05, + "loss": 0.6629, + "step": 5800 + }, + { + "epoch": 0.17779208042172367, + "grad_norm": 1.4419454327526005, + "learning_rate": 1.8876180120799e-05, + "loss": 0.8174, + "step": 5801 + }, + { + "epoch": 0.17782272894446488, + "grad_norm": 1.723106988942295, + "learning_rate": 1.8875722888576386e-05, + "loss": 0.8968, + "step": 5802 + }, + { + "epoch": 0.1778533774672061, + "grad_norm": 1.5515058046364005, + "learning_rate": 1.8875265568898996e-05, + "loss": 0.8021, + "step": 5803 + }, + { + "epoch": 0.1778840259899473, + "grad_norm": 1.7211708630889346, + "learning_rate": 1.8874808161771346e-05, + "loss": 0.8069, + "step": 5804 + }, + { + "epoch": 0.1779146745126885, + "grad_norm": 1.667245617493241, + "learning_rate": 1.8874350667197942e-05, + "loss": 0.8081, + "step": 5805 + }, + { + "epoch": 0.1779453230354297, + "grad_norm": 1.4820405896022089, + "learning_rate": 1.8873893085183288e-05, + "loss": 0.8081, + "step": 5806 + }, + { + "epoch": 0.1779759715581709, + "grad_norm": 1.5304408987950167, + "learning_rate": 1.8873435415731896e-05, + "loss": 0.9511, + "step": 5807 + }, + { + "epoch": 0.1780066200809121, + "grad_norm": 1.7803543097494603, + "learning_rate": 1.8872977658848275e-05, + "loss": 0.7572, + "step": 5808 + }, + { + "epoch": 0.1780372686036533, + "grad_norm": 1.637891344551609, + "learning_rate": 1.8872519814536933e-05, + "loss": 0.8228, + "step": 5809 + }, + { + "epoch": 0.1780679171263945, + "grad_norm": 1.761398996441892, + "learning_rate": 1.8872061882802385e-05, + "loss": 0.7559, + "step": 5810 + }, + { + "epoch": 0.1780985656491357, + "grad_norm": 1.4392587772133654, + "learning_rate": 1.887160386364914e-05, + "loss": 0.7112, + "step": 5811 + }, + { + "epoch": 0.1781292141718769, + "grad_norm": 1.5827183282353847, + "learning_rate": 1.8871145757081714e-05, + "loss": 0.7256, + "step": 5812 + }, + { + "epoch": 0.17815986269461812, + "grad_norm": 1.6552996387540186, + "learning_rate": 1.8870687563104617e-05, + "loss": 0.7116, + "step": 5813 + }, + { + "epoch": 0.17819051121735932, + "grad_norm": 1.6500622952226975, + "learning_rate": 1.8870229281722366e-05, + "loss": 0.8697, + "step": 5814 + }, + { + "epoch": 0.17822115974010053, + "grad_norm": 1.6367692411117174, + "learning_rate": 1.8869770912939478e-05, + "loss": 0.7664, + "step": 5815 + }, + { + "epoch": 0.17825180826284173, + "grad_norm": 1.5417116483308801, + "learning_rate": 1.8869312456760466e-05, + "loss": 0.8636, + "step": 5816 + }, + { + "epoch": 0.17828245678558294, + "grad_norm": 1.5825585865863052, + "learning_rate": 1.8868853913189852e-05, + "loss": 0.8265, + "step": 5817 + }, + { + "epoch": 0.17831310530832414, + "grad_norm": 1.7368706247913814, + "learning_rate": 1.8868395282232147e-05, + "loss": 0.9213, + "step": 5818 + }, + { + "epoch": 0.17834375383106535, + "grad_norm": 1.4049377677857684, + "learning_rate": 1.8867936563891877e-05, + "loss": 0.932, + "step": 5819 + }, + { + "epoch": 0.17837440235380655, + "grad_norm": 1.5945421617015667, + "learning_rate": 1.886747775817356e-05, + "loss": 0.8603, + "step": 5820 + }, + { + "epoch": 0.17840505087654776, + "grad_norm": 1.5163247472445613, + "learning_rate": 1.886701886508171e-05, + "loss": 0.9559, + "step": 5821 + }, + { + "epoch": 0.17843569939928897, + "grad_norm": 1.5373149018974708, + "learning_rate": 1.8866559884620862e-05, + "loss": 0.8712, + "step": 5822 + }, + { + "epoch": 0.17846634792203017, + "grad_norm": 1.4028518573965898, + "learning_rate": 1.8866100816795527e-05, + "loss": 0.6694, + "step": 5823 + }, + { + "epoch": 0.17849699644477135, + "grad_norm": 1.4707154777757592, + "learning_rate": 1.8865641661610232e-05, + "loss": 0.8862, + "step": 5824 + }, + { + "epoch": 0.17852764496751256, + "grad_norm": 1.495454421755336, + "learning_rate": 1.8865182419069504e-05, + "loss": 0.6806, + "step": 5825 + }, + { + "epoch": 0.17855829349025376, + "grad_norm": 1.5681826056878472, + "learning_rate": 1.886472308917786e-05, + "loss": 0.8151, + "step": 5826 + }, + { + "epoch": 0.17858894201299497, + "grad_norm": 1.5106632284366812, + "learning_rate": 1.8864263671939836e-05, + "loss": 0.8646, + "step": 5827 + }, + { + "epoch": 0.17861959053573617, + "grad_norm": 1.67436492970033, + "learning_rate": 1.8863804167359953e-05, + "loss": 0.8685, + "step": 5828 + }, + { + "epoch": 0.17865023905847738, + "grad_norm": 1.252588035211272, + "learning_rate": 1.886334457544274e-05, + "loss": 0.6536, + "step": 5829 + }, + { + "epoch": 0.17868088758121858, + "grad_norm": 0.8213349300807341, + "learning_rate": 1.8862884896192725e-05, + "loss": 0.6591, + "step": 5830 + }, + { + "epoch": 0.1787115361039598, + "grad_norm": 0.8517348285101998, + "learning_rate": 1.8862425129614434e-05, + "loss": 0.6582, + "step": 5831 + }, + { + "epoch": 0.178742184626701, + "grad_norm": 1.6717088033881145, + "learning_rate": 1.8861965275712403e-05, + "loss": 0.9051, + "step": 5832 + }, + { + "epoch": 0.1787728331494422, + "grad_norm": 1.5292256879849204, + "learning_rate": 1.8861505334491162e-05, + "loss": 0.6992, + "step": 5833 + }, + { + "epoch": 0.1788034816721834, + "grad_norm": 1.5286148785234783, + "learning_rate": 1.886104530595524e-05, + "loss": 0.7661, + "step": 5834 + }, + { + "epoch": 0.1788341301949246, + "grad_norm": 1.5225774103318443, + "learning_rate": 1.8860585190109172e-05, + "loss": 0.8383, + "step": 5835 + }, + { + "epoch": 0.17886477871766582, + "grad_norm": 1.444706748488983, + "learning_rate": 1.8860124986957493e-05, + "loss": 0.7918, + "step": 5836 + }, + { + "epoch": 0.17889542724040702, + "grad_norm": 1.456287054718809, + "learning_rate": 1.885966469650473e-05, + "loss": 0.8312, + "step": 5837 + }, + { + "epoch": 0.17892607576314823, + "grad_norm": 1.5189543897292244, + "learning_rate": 1.885920431875543e-05, + "loss": 0.8764, + "step": 5838 + }, + { + "epoch": 0.1789567242858894, + "grad_norm": 1.505626067931459, + "learning_rate": 1.885874385371412e-05, + "loss": 0.7485, + "step": 5839 + }, + { + "epoch": 0.1789873728086306, + "grad_norm": 0.8976863708828253, + "learning_rate": 1.885828330138534e-05, + "loss": 0.6735, + "step": 5840 + }, + { + "epoch": 0.17901802133137182, + "grad_norm": 1.6233723785595766, + "learning_rate": 1.8857822661773632e-05, + "loss": 0.8418, + "step": 5841 + }, + { + "epoch": 0.17904866985411302, + "grad_norm": 1.5171145556042982, + "learning_rate": 1.885736193488353e-05, + "loss": 0.8155, + "step": 5842 + }, + { + "epoch": 0.17907931837685423, + "grad_norm": 1.472928506518677, + "learning_rate": 1.885690112071957e-05, + "loss": 0.7914, + "step": 5843 + }, + { + "epoch": 0.17910996689959544, + "grad_norm": 0.7062603524600232, + "learning_rate": 1.8856440219286297e-05, + "loss": 0.6194, + "step": 5844 + }, + { + "epoch": 0.17914061542233664, + "grad_norm": 1.3498788530172956, + "learning_rate": 1.8855979230588257e-05, + "loss": 0.7597, + "step": 5845 + }, + { + "epoch": 0.17917126394507785, + "grad_norm": 1.6555239637673078, + "learning_rate": 1.8855518154629986e-05, + "loss": 0.8001, + "step": 5846 + }, + { + "epoch": 0.17920191246781905, + "grad_norm": 1.433172778638674, + "learning_rate": 1.885505699141603e-05, + "loss": 0.7465, + "step": 5847 + }, + { + "epoch": 0.17923256099056026, + "grad_norm": 1.6789947610392864, + "learning_rate": 1.885459574095093e-05, + "loss": 0.8725, + "step": 5848 + }, + { + "epoch": 0.17926320951330146, + "grad_norm": 1.46836860448795, + "learning_rate": 1.8854134403239236e-05, + "loss": 0.7386, + "step": 5849 + }, + { + "epoch": 0.17929385803604267, + "grad_norm": 1.5438121309785653, + "learning_rate": 1.8853672978285485e-05, + "loss": 0.8967, + "step": 5850 + }, + { + "epoch": 0.17932450655878387, + "grad_norm": 1.74914285744102, + "learning_rate": 1.8853211466094232e-05, + "loss": 0.7773, + "step": 5851 + }, + { + "epoch": 0.17935515508152508, + "grad_norm": 1.4065914112220586, + "learning_rate": 1.8852749866670018e-05, + "loss": 0.7018, + "step": 5852 + }, + { + "epoch": 0.1793858036042663, + "grad_norm": 1.473844079842726, + "learning_rate": 1.88522881800174e-05, + "loss": 0.6484, + "step": 5853 + }, + { + "epoch": 0.1794164521270075, + "grad_norm": 1.3555505529458158, + "learning_rate": 1.885182640614092e-05, + "loss": 0.7124, + "step": 5854 + }, + { + "epoch": 0.17944710064974867, + "grad_norm": 1.6436956032544718, + "learning_rate": 1.8851364545045124e-05, + "loss": 0.8659, + "step": 5855 + }, + { + "epoch": 0.17947774917248988, + "grad_norm": 1.651037816593375, + "learning_rate": 1.8850902596734574e-05, + "loss": 0.7987, + "step": 5856 + }, + { + "epoch": 0.17950839769523108, + "grad_norm": 0.8764346047967281, + "learning_rate": 1.8850440561213817e-05, + "loss": 0.6663, + "step": 5857 + }, + { + "epoch": 0.1795390462179723, + "grad_norm": 1.5036462454795705, + "learning_rate": 1.8849978438487402e-05, + "loss": 0.7737, + "step": 5858 + }, + { + "epoch": 0.1795696947407135, + "grad_norm": 1.526740297323631, + "learning_rate": 1.8849516228559884e-05, + "loss": 0.7829, + "step": 5859 + }, + { + "epoch": 0.1796003432634547, + "grad_norm": 1.5339845679102393, + "learning_rate": 1.884905393143582e-05, + "loss": 0.7453, + "step": 5860 + }, + { + "epoch": 0.1796309917861959, + "grad_norm": 0.7003006442460435, + "learning_rate": 1.8848591547119763e-05, + "loss": 0.6341, + "step": 5861 + }, + { + "epoch": 0.1796616403089371, + "grad_norm": 1.7173809789691945, + "learning_rate": 1.884812907561627e-05, + "loss": 0.8924, + "step": 5862 + }, + { + "epoch": 0.17969228883167832, + "grad_norm": 1.5588879241769602, + "learning_rate": 1.88476665169299e-05, + "loss": 0.8587, + "step": 5863 + }, + { + "epoch": 0.17972293735441952, + "grad_norm": 1.6772333284405163, + "learning_rate": 1.8847203871065206e-05, + "loss": 0.8708, + "step": 5864 + }, + { + "epoch": 0.17975358587716073, + "grad_norm": 1.4460872873789103, + "learning_rate": 1.8846741138026745e-05, + "loss": 0.8607, + "step": 5865 + }, + { + "epoch": 0.17978423439990193, + "grad_norm": 0.802249073468794, + "learning_rate": 1.8846278317819084e-05, + "loss": 0.6444, + "step": 5866 + }, + { + "epoch": 0.17981488292264314, + "grad_norm": 1.615382594988058, + "learning_rate": 1.884581541044678e-05, + "loss": 0.7935, + "step": 5867 + }, + { + "epoch": 0.17984553144538434, + "grad_norm": 1.6011821283577472, + "learning_rate": 1.884535241591439e-05, + "loss": 0.8513, + "step": 5868 + }, + { + "epoch": 0.17987617996812555, + "grad_norm": 1.4780136628619647, + "learning_rate": 1.8844889334226478e-05, + "loss": 0.8148, + "step": 5869 + }, + { + "epoch": 0.17990682849086673, + "grad_norm": 1.5428516792525295, + "learning_rate": 1.8844426165387614e-05, + "loss": 0.8448, + "step": 5870 + }, + { + "epoch": 0.17993747701360793, + "grad_norm": 0.7486009769384683, + "learning_rate": 1.8843962909402352e-05, + "loss": 0.6617, + "step": 5871 + }, + { + "epoch": 0.17996812553634914, + "grad_norm": 1.5992849647616498, + "learning_rate": 1.8843499566275265e-05, + "loss": 0.8757, + "step": 5872 + }, + { + "epoch": 0.17999877405909034, + "grad_norm": 1.3654083714341105, + "learning_rate": 1.884303613601091e-05, + "loss": 0.7257, + "step": 5873 + }, + { + "epoch": 0.18002942258183155, + "grad_norm": 0.6883397171049609, + "learning_rate": 1.884257261861386e-05, + "loss": 0.6319, + "step": 5874 + }, + { + "epoch": 0.18006007110457276, + "grad_norm": 1.432682845170391, + "learning_rate": 1.8842109014088677e-05, + "loss": 0.6852, + "step": 5875 + }, + { + "epoch": 0.18009071962731396, + "grad_norm": 1.5518204242024136, + "learning_rate": 1.8841645322439933e-05, + "loss": 0.8815, + "step": 5876 + }, + { + "epoch": 0.18012136815005517, + "grad_norm": 1.7359699860212092, + "learning_rate": 1.8841181543672197e-05, + "loss": 0.9066, + "step": 5877 + }, + { + "epoch": 0.18015201667279637, + "grad_norm": 1.4868980255351927, + "learning_rate": 1.8840717677790032e-05, + "loss": 0.8441, + "step": 5878 + }, + { + "epoch": 0.18018266519553758, + "grad_norm": 1.4575362241914258, + "learning_rate": 1.8840253724798017e-05, + "loss": 0.7015, + "step": 5879 + }, + { + "epoch": 0.18021331371827878, + "grad_norm": 1.500488970958786, + "learning_rate": 1.883978968470072e-05, + "loss": 0.8503, + "step": 5880 + }, + { + "epoch": 0.18024396224102, + "grad_norm": 1.5528473567094818, + "learning_rate": 1.8839325557502713e-05, + "loss": 0.7284, + "step": 5881 + }, + { + "epoch": 0.1802746107637612, + "grad_norm": 2.1555747130470744, + "learning_rate": 1.8838861343208572e-05, + "loss": 0.8583, + "step": 5882 + }, + { + "epoch": 0.1803052592865024, + "grad_norm": 1.6403408801164354, + "learning_rate": 1.8838397041822866e-05, + "loss": 0.854, + "step": 5883 + }, + { + "epoch": 0.1803359078092436, + "grad_norm": 1.619436743833245, + "learning_rate": 1.8837932653350176e-05, + "loss": 0.8665, + "step": 5884 + }, + { + "epoch": 0.1803665563319848, + "grad_norm": 1.6761604957130996, + "learning_rate": 1.8837468177795068e-05, + "loss": 0.88, + "step": 5885 + }, + { + "epoch": 0.180397204854726, + "grad_norm": 1.6188491462215915, + "learning_rate": 1.883700361516213e-05, + "loss": 0.8349, + "step": 5886 + }, + { + "epoch": 0.1804278533774672, + "grad_norm": 0.7886765164432642, + "learning_rate": 1.883653896545593e-05, + "loss": 0.6456, + "step": 5887 + }, + { + "epoch": 0.1804585019002084, + "grad_norm": 0.7427367742405714, + "learning_rate": 1.8836074228681057e-05, + "loss": 0.6004, + "step": 5888 + }, + { + "epoch": 0.1804891504229496, + "grad_norm": 1.5111151764587774, + "learning_rate": 1.883560940484208e-05, + "loss": 0.8081, + "step": 5889 + }, + { + "epoch": 0.1805197989456908, + "grad_norm": 1.418197581747516, + "learning_rate": 1.8835144493943583e-05, + "loss": 0.8628, + "step": 5890 + }, + { + "epoch": 0.18055044746843202, + "grad_norm": 1.4953705836209943, + "learning_rate": 1.8834679495990148e-05, + "loss": 0.7805, + "step": 5891 + }, + { + "epoch": 0.18058109599117322, + "grad_norm": 1.4859405675467015, + "learning_rate": 1.8834214410986354e-05, + "loss": 0.8622, + "step": 5892 + }, + { + "epoch": 0.18061174451391443, + "grad_norm": 1.573284568142862, + "learning_rate": 1.8833749238936786e-05, + "loss": 0.712, + "step": 5893 + }, + { + "epoch": 0.18064239303665564, + "grad_norm": 0.9218196673403458, + "learning_rate": 1.8833283979846024e-05, + "loss": 0.6415, + "step": 5894 + }, + { + "epoch": 0.18067304155939684, + "grad_norm": 1.466238824004496, + "learning_rate": 1.883281863371866e-05, + "loss": 0.7903, + "step": 5895 + }, + { + "epoch": 0.18070369008213805, + "grad_norm": 1.5723236269208019, + "learning_rate": 1.883235320055927e-05, + "loss": 0.7539, + "step": 5896 + }, + { + "epoch": 0.18073433860487925, + "grad_norm": 0.7576360791184917, + "learning_rate": 1.883188768037244e-05, + "loss": 0.625, + "step": 5897 + }, + { + "epoch": 0.18076498712762046, + "grad_norm": 0.6977302935206737, + "learning_rate": 1.883142207316277e-05, + "loss": 0.6053, + "step": 5898 + }, + { + "epoch": 0.18079563565036166, + "grad_norm": 1.4893892440121845, + "learning_rate": 1.8830956378934835e-05, + "loss": 0.7853, + "step": 5899 + }, + { + "epoch": 0.18082628417310287, + "grad_norm": 1.5875333387944448, + "learning_rate": 1.883049059769323e-05, + "loss": 0.7829, + "step": 5900 + }, + { + "epoch": 0.18085693269584405, + "grad_norm": 1.750849180091746, + "learning_rate": 1.8830024729442534e-05, + "loss": 0.9127, + "step": 5901 + }, + { + "epoch": 0.18088758121858525, + "grad_norm": 0.8493721868582423, + "learning_rate": 1.882955877418735e-05, + "loss": 0.6405, + "step": 5902 + }, + { + "epoch": 0.18091822974132646, + "grad_norm": 1.4815823967628503, + "learning_rate": 1.8829092731932266e-05, + "loss": 0.7702, + "step": 5903 + }, + { + "epoch": 0.18094887826406766, + "grad_norm": 1.7225958737627882, + "learning_rate": 1.882862660268187e-05, + "loss": 0.9235, + "step": 5904 + }, + { + "epoch": 0.18097952678680887, + "grad_norm": 0.7403983097308495, + "learning_rate": 1.882816038644076e-05, + "loss": 0.6315, + "step": 5905 + }, + { + "epoch": 0.18101017530955008, + "grad_norm": 1.7988582518304115, + "learning_rate": 1.8827694083213523e-05, + "loss": 0.877, + "step": 5906 + }, + { + "epoch": 0.18104082383229128, + "grad_norm": 1.41210576710745, + "learning_rate": 1.8827227693004758e-05, + "loss": 0.8175, + "step": 5907 + }, + { + "epoch": 0.1810714723550325, + "grad_norm": 1.6693719288421625, + "learning_rate": 1.882676121581906e-05, + "loss": 0.7694, + "step": 5908 + }, + { + "epoch": 0.1811021208777737, + "grad_norm": 1.349327576049126, + "learning_rate": 1.8826294651661027e-05, + "loss": 0.6407, + "step": 5909 + }, + { + "epoch": 0.1811327694005149, + "grad_norm": 1.4181181898240527, + "learning_rate": 1.8825828000535252e-05, + "loss": 0.8292, + "step": 5910 + }, + { + "epoch": 0.1811634179232561, + "grad_norm": 1.6988341184035691, + "learning_rate": 1.882536126244634e-05, + "loss": 0.9604, + "step": 5911 + }, + { + "epoch": 0.1811940664459973, + "grad_norm": 0.7803421777915244, + "learning_rate": 1.8824894437398883e-05, + "loss": 0.6769, + "step": 5912 + }, + { + "epoch": 0.18122471496873852, + "grad_norm": 1.5019332547155908, + "learning_rate": 1.882442752539748e-05, + "loss": 0.9597, + "step": 5913 + }, + { + "epoch": 0.18125536349147972, + "grad_norm": 1.4344231279612194, + "learning_rate": 1.882396052644674e-05, + "loss": 0.7754, + "step": 5914 + }, + { + "epoch": 0.18128601201422093, + "grad_norm": 0.69290419596004, + "learning_rate": 1.8823493440551256e-05, + "loss": 0.6527, + "step": 5915 + }, + { + "epoch": 0.18131666053696213, + "grad_norm": 1.5689681246694533, + "learning_rate": 1.8823026267715632e-05, + "loss": 0.7563, + "step": 5916 + }, + { + "epoch": 0.1813473090597033, + "grad_norm": 1.7369456998999675, + "learning_rate": 1.8822559007944477e-05, + "loss": 0.8601, + "step": 5917 + }, + { + "epoch": 0.18137795758244452, + "grad_norm": 1.4961305706292314, + "learning_rate": 1.882209166124239e-05, + "loss": 0.8373, + "step": 5918 + }, + { + "epoch": 0.18140860610518572, + "grad_norm": 1.4995577410285519, + "learning_rate": 1.8821624227613974e-05, + "loss": 0.8317, + "step": 5919 + }, + { + "epoch": 0.18143925462792693, + "grad_norm": 1.488727594207774, + "learning_rate": 1.882115670706384e-05, + "loss": 0.743, + "step": 5920 + }, + { + "epoch": 0.18146990315066813, + "grad_norm": 1.604048275107538, + "learning_rate": 1.882068909959659e-05, + "loss": 0.8428, + "step": 5921 + }, + { + "epoch": 0.18150055167340934, + "grad_norm": 1.5099561349578887, + "learning_rate": 1.8820221405216836e-05, + "loss": 0.8743, + "step": 5922 + }, + { + "epoch": 0.18153120019615054, + "grad_norm": 1.6016199796451636, + "learning_rate": 1.8819753623929182e-05, + "loss": 0.8602, + "step": 5923 + }, + { + "epoch": 0.18156184871889175, + "grad_norm": 1.6307499273771646, + "learning_rate": 1.8819285755738235e-05, + "loss": 0.8261, + "step": 5924 + }, + { + "epoch": 0.18159249724163296, + "grad_norm": 1.6757031621070877, + "learning_rate": 1.8818817800648617e-05, + "loss": 0.7638, + "step": 5925 + }, + { + "epoch": 0.18162314576437416, + "grad_norm": 1.6440852809266389, + "learning_rate": 1.8818349758664927e-05, + "loss": 0.8204, + "step": 5926 + }, + { + "epoch": 0.18165379428711537, + "grad_norm": 1.8059573164220644, + "learning_rate": 1.8817881629791778e-05, + "loss": 0.7913, + "step": 5927 + }, + { + "epoch": 0.18168444280985657, + "grad_norm": 1.4729766564689306, + "learning_rate": 1.881741341403379e-05, + "loss": 0.8246, + "step": 5928 + }, + { + "epoch": 0.18171509133259778, + "grad_norm": 1.4324678052702868, + "learning_rate": 1.8816945111395565e-05, + "loss": 0.8217, + "step": 5929 + }, + { + "epoch": 0.18174573985533898, + "grad_norm": 1.4235613566550085, + "learning_rate": 1.8816476721881728e-05, + "loss": 0.8366, + "step": 5930 + }, + { + "epoch": 0.1817763883780802, + "grad_norm": 1.421718534209015, + "learning_rate": 1.8816008245496893e-05, + "loss": 0.8149, + "step": 5931 + }, + { + "epoch": 0.18180703690082137, + "grad_norm": 0.8066727382142772, + "learning_rate": 1.881553968224567e-05, + "loss": 0.6562, + "step": 5932 + }, + { + "epoch": 0.18183768542356257, + "grad_norm": 0.8052057899925182, + "learning_rate": 1.881507103213268e-05, + "loss": 0.6484, + "step": 5933 + }, + { + "epoch": 0.18186833394630378, + "grad_norm": 0.7202293120039608, + "learning_rate": 1.881460229516254e-05, + "loss": 0.6596, + "step": 5934 + }, + { + "epoch": 0.18189898246904498, + "grad_norm": 1.515591701293109, + "learning_rate": 1.8814133471339863e-05, + "loss": 0.8295, + "step": 5935 + }, + { + "epoch": 0.1819296309917862, + "grad_norm": 1.5289819007496672, + "learning_rate": 1.881366456066928e-05, + "loss": 0.8609, + "step": 5936 + }, + { + "epoch": 0.1819602795145274, + "grad_norm": 1.6269835621309772, + "learning_rate": 1.88131955631554e-05, + "loss": 0.7921, + "step": 5937 + }, + { + "epoch": 0.1819909280372686, + "grad_norm": 1.7450331443664067, + "learning_rate": 1.8812726478802854e-05, + "loss": 0.865, + "step": 5938 + }, + { + "epoch": 0.1820215765600098, + "grad_norm": 1.795827885934758, + "learning_rate": 1.8812257307616256e-05, + "loss": 0.8291, + "step": 5939 + }, + { + "epoch": 0.182052225082751, + "grad_norm": 1.6151324492602162, + "learning_rate": 1.8811788049600236e-05, + "loss": 0.8164, + "step": 5940 + }, + { + "epoch": 0.18208287360549222, + "grad_norm": 1.4765238233263636, + "learning_rate": 1.8811318704759408e-05, + "loss": 0.7286, + "step": 5941 + }, + { + "epoch": 0.18211352212823342, + "grad_norm": 1.5897953788573533, + "learning_rate": 1.8810849273098405e-05, + "loss": 0.7062, + "step": 5942 + }, + { + "epoch": 0.18214417065097463, + "grad_norm": 1.0414097301771166, + "learning_rate": 1.881037975462185e-05, + "loss": 0.6675, + "step": 5943 + }, + { + "epoch": 0.18217481917371584, + "grad_norm": 1.6503579495845035, + "learning_rate": 1.880991014933437e-05, + "loss": 0.9145, + "step": 5944 + }, + { + "epoch": 0.18220546769645704, + "grad_norm": 0.7991720023101032, + "learning_rate": 1.8809440457240588e-05, + "loss": 0.6255, + "step": 5945 + }, + { + "epoch": 0.18223611621919825, + "grad_norm": 0.6966437406418504, + "learning_rate": 1.8808970678345137e-05, + "loss": 0.6417, + "step": 5946 + }, + { + "epoch": 0.18226676474193945, + "grad_norm": 1.6901182195973645, + "learning_rate": 1.8808500812652647e-05, + "loss": 0.7213, + "step": 5947 + }, + { + "epoch": 0.18229741326468063, + "grad_norm": 1.596705509321903, + "learning_rate": 1.880803086016774e-05, + "loss": 0.8854, + "step": 5948 + }, + { + "epoch": 0.18232806178742184, + "grad_norm": 1.555247145153309, + "learning_rate": 1.8807560820895055e-05, + "loss": 0.7715, + "step": 5949 + }, + { + "epoch": 0.18235871031016304, + "grad_norm": 1.417813482999968, + "learning_rate": 1.880709069483922e-05, + "loss": 0.8194, + "step": 5950 + }, + { + "epoch": 0.18238935883290425, + "grad_norm": 1.668582018919436, + "learning_rate": 1.8806620482004866e-05, + "loss": 0.826, + "step": 5951 + }, + { + "epoch": 0.18242000735564545, + "grad_norm": 1.5581835031653846, + "learning_rate": 1.8806150182396622e-05, + "loss": 0.777, + "step": 5952 + }, + { + "epoch": 0.18245065587838666, + "grad_norm": 1.5516778882953883, + "learning_rate": 1.8805679796019132e-05, + "loss": 0.7788, + "step": 5953 + }, + { + "epoch": 0.18248130440112786, + "grad_norm": 1.566533114858632, + "learning_rate": 1.8805209322877025e-05, + "loss": 0.744, + "step": 5954 + }, + { + "epoch": 0.18251195292386907, + "grad_norm": 1.634741390524747, + "learning_rate": 1.880473876297494e-05, + "loss": 0.7995, + "step": 5955 + }, + { + "epoch": 0.18254260144661028, + "grad_norm": 1.6672821202806258, + "learning_rate": 1.8804268116317507e-05, + "loss": 0.8293, + "step": 5956 + }, + { + "epoch": 0.18257324996935148, + "grad_norm": 1.4393430177847675, + "learning_rate": 1.880379738290937e-05, + "loss": 0.7745, + "step": 5957 + }, + { + "epoch": 0.1826038984920927, + "grad_norm": 1.4155124752304424, + "learning_rate": 1.8803326562755166e-05, + "loss": 0.7714, + "step": 5958 + }, + { + "epoch": 0.1826345470148339, + "grad_norm": 1.5862062291977526, + "learning_rate": 1.880285565585953e-05, + "loss": 0.8131, + "step": 5959 + }, + { + "epoch": 0.1826651955375751, + "grad_norm": 1.6361359292753164, + "learning_rate": 1.8802384662227107e-05, + "loss": 0.9545, + "step": 5960 + }, + { + "epoch": 0.1826958440603163, + "grad_norm": 1.587042914977944, + "learning_rate": 1.8801913581862537e-05, + "loss": 0.8295, + "step": 5961 + }, + { + "epoch": 0.1827264925830575, + "grad_norm": 1.6946517168782824, + "learning_rate": 1.8801442414770456e-05, + "loss": 0.8542, + "step": 5962 + }, + { + "epoch": 0.1827571411057987, + "grad_norm": 1.620845076794187, + "learning_rate": 1.8800971160955514e-05, + "loss": 0.8741, + "step": 5963 + }, + { + "epoch": 0.1827877896285399, + "grad_norm": 1.5530510797476837, + "learning_rate": 1.880049982042235e-05, + "loss": 0.7815, + "step": 5964 + }, + { + "epoch": 0.1828184381512811, + "grad_norm": 1.5479664347655038, + "learning_rate": 1.880002839317561e-05, + "loss": 0.8482, + "step": 5965 + }, + { + "epoch": 0.1828490866740223, + "grad_norm": 1.6219063870817831, + "learning_rate": 1.879955687921994e-05, + "loss": 0.9076, + "step": 5966 + }, + { + "epoch": 0.1828797351967635, + "grad_norm": 1.3947964307037997, + "learning_rate": 1.8799085278559985e-05, + "loss": 0.6302, + "step": 5967 + }, + { + "epoch": 0.18291038371950472, + "grad_norm": 1.459779301561759, + "learning_rate": 1.8798613591200387e-05, + "loss": 0.8603, + "step": 5968 + }, + { + "epoch": 0.18294103224224592, + "grad_norm": 1.4505329696527138, + "learning_rate": 1.8798141817145804e-05, + "loss": 0.8159, + "step": 5969 + }, + { + "epoch": 0.18297168076498713, + "grad_norm": 1.5815693590437707, + "learning_rate": 1.8797669956400876e-05, + "loss": 0.8323, + "step": 5970 + }, + { + "epoch": 0.18300232928772833, + "grad_norm": 1.4318818174741033, + "learning_rate": 1.8797198008970253e-05, + "loss": 0.6852, + "step": 5971 + }, + { + "epoch": 0.18303297781046954, + "grad_norm": 1.6769412219339517, + "learning_rate": 1.879672597485859e-05, + "loss": 0.9203, + "step": 5972 + }, + { + "epoch": 0.18306362633321074, + "grad_norm": 1.47128062620871, + "learning_rate": 1.8796253854070534e-05, + "loss": 0.6903, + "step": 5973 + }, + { + "epoch": 0.18309427485595195, + "grad_norm": 1.4828525440192966, + "learning_rate": 1.8795781646610737e-05, + "loss": 0.8573, + "step": 5974 + }, + { + "epoch": 0.18312492337869316, + "grad_norm": 1.4075880656463116, + "learning_rate": 1.8795309352483854e-05, + "loss": 0.7589, + "step": 5975 + }, + { + "epoch": 0.18315557190143436, + "grad_norm": 1.5482289289238218, + "learning_rate": 1.879483697169454e-05, + "loss": 0.8519, + "step": 5976 + }, + { + "epoch": 0.18318622042417557, + "grad_norm": 1.5827827892094741, + "learning_rate": 1.8794364504247444e-05, + "loss": 0.9134, + "step": 5977 + }, + { + "epoch": 0.18321686894691677, + "grad_norm": 1.3359679253396142, + "learning_rate": 1.8793891950147227e-05, + "loss": 0.7891, + "step": 5978 + }, + { + "epoch": 0.18324751746965795, + "grad_norm": 1.3638503091391836, + "learning_rate": 1.879341930939854e-05, + "loss": 0.7461, + "step": 5979 + }, + { + "epoch": 0.18327816599239916, + "grad_norm": 1.4640867560562874, + "learning_rate": 1.8792946582006042e-05, + "loss": 0.8254, + "step": 5980 + }, + { + "epoch": 0.18330881451514036, + "grad_norm": 1.5597624587155645, + "learning_rate": 1.879247376797439e-05, + "loss": 0.9787, + "step": 5981 + }, + { + "epoch": 0.18333946303788157, + "grad_norm": 1.5711622276377044, + "learning_rate": 1.879200086730825e-05, + "loss": 0.7834, + "step": 5982 + }, + { + "epoch": 0.18337011156062277, + "grad_norm": 1.399438122380322, + "learning_rate": 1.8791527880012272e-05, + "loss": 0.7814, + "step": 5983 + }, + { + "epoch": 0.18340076008336398, + "grad_norm": 1.0918775242107106, + "learning_rate": 1.8791054806091123e-05, + "loss": 0.6842, + "step": 5984 + }, + { + "epoch": 0.18343140860610518, + "grad_norm": 1.775462416722149, + "learning_rate": 1.8790581645549458e-05, + "loss": 0.9417, + "step": 5985 + }, + { + "epoch": 0.1834620571288464, + "grad_norm": 1.6494538847816604, + "learning_rate": 1.879010839839195e-05, + "loss": 0.8458, + "step": 5986 + }, + { + "epoch": 0.1834927056515876, + "grad_norm": 0.7507470874489589, + "learning_rate": 1.878963506462325e-05, + "loss": 0.6443, + "step": 5987 + }, + { + "epoch": 0.1835233541743288, + "grad_norm": 1.6815080257664172, + "learning_rate": 1.8789161644248025e-05, + "loss": 0.9109, + "step": 5988 + }, + { + "epoch": 0.18355400269707, + "grad_norm": 0.7497874877447593, + "learning_rate": 1.878868813727094e-05, + "loss": 0.6558, + "step": 5989 + }, + { + "epoch": 0.1835846512198112, + "grad_norm": 1.5094901414113115, + "learning_rate": 1.878821454369667e-05, + "loss": 0.7274, + "step": 5990 + }, + { + "epoch": 0.18361529974255242, + "grad_norm": 1.6149151075071908, + "learning_rate": 1.8787740863529865e-05, + "loss": 0.7999, + "step": 5991 + }, + { + "epoch": 0.18364594826529362, + "grad_norm": 0.7862080673067028, + "learning_rate": 1.8787267096775207e-05, + "loss": 0.644, + "step": 5992 + }, + { + "epoch": 0.18367659678803483, + "grad_norm": 0.8222623943699373, + "learning_rate": 1.8786793243437356e-05, + "loss": 0.6231, + "step": 5993 + }, + { + "epoch": 0.183707245310776, + "grad_norm": 1.652474153809251, + "learning_rate": 1.878631930352098e-05, + "loss": 0.9448, + "step": 5994 + }, + { + "epoch": 0.1837378938335172, + "grad_norm": 1.544612890260223, + "learning_rate": 1.8785845277030757e-05, + "loss": 0.8461, + "step": 5995 + }, + { + "epoch": 0.18376854235625842, + "grad_norm": 1.5505319252940186, + "learning_rate": 1.8785371163971347e-05, + "loss": 0.7869, + "step": 5996 + }, + { + "epoch": 0.18379919087899962, + "grad_norm": 1.8281653385971088, + "learning_rate": 1.8784896964347433e-05, + "loss": 0.8562, + "step": 5997 + }, + { + "epoch": 0.18382983940174083, + "grad_norm": 1.5125558353664026, + "learning_rate": 1.8784422678163678e-05, + "loss": 0.6849, + "step": 5998 + }, + { + "epoch": 0.18386048792448204, + "grad_norm": 1.721304463412255, + "learning_rate": 1.878394830542476e-05, + "loss": 0.8121, + "step": 5999 + }, + { + "epoch": 0.18389113644722324, + "grad_norm": 1.7246235417505065, + "learning_rate": 1.878347384613535e-05, + "loss": 0.7322, + "step": 6000 + }, + { + "epoch": 0.18392178496996445, + "grad_norm": 1.5699755650279519, + "learning_rate": 1.878299930030013e-05, + "loss": 0.8128, + "step": 6001 + }, + { + "epoch": 0.18395243349270565, + "grad_norm": 0.8582854075355432, + "learning_rate": 1.8782524667923766e-05, + "loss": 0.6761, + "step": 6002 + }, + { + "epoch": 0.18398308201544686, + "grad_norm": 1.5507094109936295, + "learning_rate": 1.878204994901094e-05, + "loss": 0.6702, + "step": 6003 + }, + { + "epoch": 0.18401373053818806, + "grad_norm": 0.7410714754211368, + "learning_rate": 1.878157514356633e-05, + "loss": 0.6473, + "step": 6004 + }, + { + "epoch": 0.18404437906092927, + "grad_norm": 1.7521682461426096, + "learning_rate": 1.8781100251594612e-05, + "loss": 0.7789, + "step": 6005 + }, + { + "epoch": 0.18407502758367048, + "grad_norm": 1.734157753357512, + "learning_rate": 1.8780625273100464e-05, + "loss": 0.8649, + "step": 6006 + }, + { + "epoch": 0.18410567610641168, + "grad_norm": 0.750132001967345, + "learning_rate": 1.8780150208088572e-05, + "loss": 0.6364, + "step": 6007 + }, + { + "epoch": 0.1841363246291529, + "grad_norm": 1.528732389854902, + "learning_rate": 1.8779675056563614e-05, + "loss": 0.7915, + "step": 6008 + }, + { + "epoch": 0.1841669731518941, + "grad_norm": 1.5816218805848217, + "learning_rate": 1.877919981853027e-05, + "loss": 0.7939, + "step": 6009 + }, + { + "epoch": 0.18419762167463527, + "grad_norm": 1.5744766567443866, + "learning_rate": 1.8778724493993222e-05, + "loss": 0.839, + "step": 6010 + }, + { + "epoch": 0.18422827019737648, + "grad_norm": 1.4883406094666496, + "learning_rate": 1.877824908295716e-05, + "loss": 0.8287, + "step": 6011 + }, + { + "epoch": 0.18425891872011768, + "grad_norm": 1.493456468022087, + "learning_rate": 1.877777358542676e-05, + "loss": 0.8579, + "step": 6012 + }, + { + "epoch": 0.1842895672428589, + "grad_norm": 1.6053601330751701, + "learning_rate": 1.8777298001406713e-05, + "loss": 0.7955, + "step": 6013 + }, + { + "epoch": 0.1843202157656001, + "grad_norm": 1.7180583392891944, + "learning_rate": 1.87768223309017e-05, + "loss": 0.8008, + "step": 6014 + }, + { + "epoch": 0.1843508642883413, + "grad_norm": 1.5655697881613007, + "learning_rate": 1.8776346573916414e-05, + "loss": 0.7881, + "step": 6015 + }, + { + "epoch": 0.1843815128110825, + "grad_norm": 1.671088846122993, + "learning_rate": 1.8775870730455537e-05, + "loss": 0.8405, + "step": 6016 + }, + { + "epoch": 0.1844121613338237, + "grad_norm": 1.3570999453550554, + "learning_rate": 1.8775394800523764e-05, + "loss": 0.7474, + "step": 6017 + }, + { + "epoch": 0.18444280985656492, + "grad_norm": 1.6439544569894682, + "learning_rate": 1.877491878412578e-05, + "loss": 0.8237, + "step": 6018 + }, + { + "epoch": 0.18447345837930612, + "grad_norm": 1.5671768117710614, + "learning_rate": 1.8774442681266274e-05, + "loss": 0.8253, + "step": 6019 + }, + { + "epoch": 0.18450410690204733, + "grad_norm": 1.6714556458037597, + "learning_rate": 1.8773966491949943e-05, + "loss": 0.7606, + "step": 6020 + }, + { + "epoch": 0.18453475542478853, + "grad_norm": 1.316869808649188, + "learning_rate": 1.8773490216181472e-05, + "loss": 0.8558, + "step": 6021 + }, + { + "epoch": 0.18456540394752974, + "grad_norm": 0.8924196501891067, + "learning_rate": 1.877301385396556e-05, + "loss": 0.6348, + "step": 6022 + }, + { + "epoch": 0.18459605247027094, + "grad_norm": 1.5658447660734098, + "learning_rate": 1.8772537405306893e-05, + "loss": 0.7616, + "step": 6023 + }, + { + "epoch": 0.18462670099301215, + "grad_norm": 1.6053443873764068, + "learning_rate": 1.877206087021017e-05, + "loss": 0.8062, + "step": 6024 + }, + { + "epoch": 0.18465734951575333, + "grad_norm": 1.4597582647031246, + "learning_rate": 1.877158424868009e-05, + "loss": 0.7725, + "step": 6025 + }, + { + "epoch": 0.18468799803849453, + "grad_norm": 1.7057989886289546, + "learning_rate": 1.8771107540721347e-05, + "loss": 0.7706, + "step": 6026 + }, + { + "epoch": 0.18471864656123574, + "grad_norm": 1.4916997019994234, + "learning_rate": 1.8770630746338638e-05, + "loss": 0.8286, + "step": 6027 + }, + { + "epoch": 0.18474929508397694, + "grad_norm": 1.4856534882857506, + "learning_rate": 1.8770153865536656e-05, + "loss": 0.8621, + "step": 6028 + }, + { + "epoch": 0.18477994360671815, + "grad_norm": 0.9051087187115229, + "learning_rate": 1.876967689832011e-05, + "loss": 0.6397, + "step": 6029 + }, + { + "epoch": 0.18481059212945936, + "grad_norm": 1.501235044825296, + "learning_rate": 1.8769199844693687e-05, + "loss": 0.8209, + "step": 6030 + }, + { + "epoch": 0.18484124065220056, + "grad_norm": 1.7089278092179732, + "learning_rate": 1.8768722704662097e-05, + "loss": 0.7951, + "step": 6031 + }, + { + "epoch": 0.18487188917494177, + "grad_norm": 1.5027486681545894, + "learning_rate": 1.876824547823004e-05, + "loss": 0.7936, + "step": 6032 + }, + { + "epoch": 0.18490253769768297, + "grad_norm": 1.5122508631566665, + "learning_rate": 1.8767768165402213e-05, + "loss": 0.8658, + "step": 6033 + }, + { + "epoch": 0.18493318622042418, + "grad_norm": 0.7422215908986334, + "learning_rate": 1.8767290766183326e-05, + "loss": 0.6571, + "step": 6034 + }, + { + "epoch": 0.18496383474316538, + "grad_norm": 0.6873874874748496, + "learning_rate": 1.8766813280578082e-05, + "loss": 0.6435, + "step": 6035 + }, + { + "epoch": 0.1849944832659066, + "grad_norm": 1.7092262325566694, + "learning_rate": 1.8766335708591178e-05, + "loss": 0.9436, + "step": 6036 + }, + { + "epoch": 0.1850251317886478, + "grad_norm": 1.4014890007591527, + "learning_rate": 1.876585805022733e-05, + "loss": 0.8329, + "step": 6037 + }, + { + "epoch": 0.185055780311389, + "grad_norm": 1.4165824384977137, + "learning_rate": 1.876538030549124e-05, + "loss": 0.7485, + "step": 6038 + }, + { + "epoch": 0.1850864288341302, + "grad_norm": 1.6314337272253834, + "learning_rate": 1.876490247438761e-05, + "loss": 0.7243, + "step": 6039 + }, + { + "epoch": 0.1851170773568714, + "grad_norm": 1.5211403742149139, + "learning_rate": 1.8764424556921156e-05, + "loss": 0.8055, + "step": 6040 + }, + { + "epoch": 0.1851477258796126, + "grad_norm": 0.7963805552268708, + "learning_rate": 1.8763946553096584e-05, + "loss": 0.642, + "step": 6041 + }, + { + "epoch": 0.1851783744023538, + "grad_norm": 1.5418511416091298, + "learning_rate": 1.8763468462918607e-05, + "loss": 0.8029, + "step": 6042 + }, + { + "epoch": 0.185209022925095, + "grad_norm": 1.7212879943845287, + "learning_rate": 1.8762990286391932e-05, + "loss": 0.8064, + "step": 6043 + }, + { + "epoch": 0.1852396714478362, + "grad_norm": 0.6924741618748737, + "learning_rate": 1.876251202352127e-05, + "loss": 0.6448, + "step": 6044 + }, + { + "epoch": 0.1852703199705774, + "grad_norm": 1.701050588876999, + "learning_rate": 1.8762033674311336e-05, + "loss": 0.8155, + "step": 6045 + }, + { + "epoch": 0.18530096849331862, + "grad_norm": 1.6001716362808613, + "learning_rate": 1.876155523876684e-05, + "loss": 0.8538, + "step": 6046 + }, + { + "epoch": 0.18533161701605982, + "grad_norm": 1.489768467711857, + "learning_rate": 1.8761076716892505e-05, + "loss": 0.8184, + "step": 6047 + }, + { + "epoch": 0.18536226553880103, + "grad_norm": 1.4030435376749555, + "learning_rate": 1.8760598108693032e-05, + "loss": 0.7679, + "step": 6048 + }, + { + "epoch": 0.18539291406154224, + "grad_norm": 1.5945125499988806, + "learning_rate": 1.8760119414173147e-05, + "loss": 0.8573, + "step": 6049 + }, + { + "epoch": 0.18542356258428344, + "grad_norm": 1.4103191742368353, + "learning_rate": 1.8759640633337565e-05, + "loss": 0.843, + "step": 6050 + }, + { + "epoch": 0.18545421110702465, + "grad_norm": 1.4977305891066177, + "learning_rate": 1.8759161766191003e-05, + "loss": 0.7429, + "step": 6051 + }, + { + "epoch": 0.18548485962976585, + "grad_norm": 1.5706192081436476, + "learning_rate": 1.8758682812738177e-05, + "loss": 0.7761, + "step": 6052 + }, + { + "epoch": 0.18551550815250706, + "grad_norm": 1.6612397679943676, + "learning_rate": 1.8758203772983813e-05, + "loss": 0.7586, + "step": 6053 + }, + { + "epoch": 0.18554615667524826, + "grad_norm": 1.4669098424003078, + "learning_rate": 1.875772464693262e-05, + "loss": 0.686, + "step": 6054 + }, + { + "epoch": 0.18557680519798947, + "grad_norm": 1.9745953646386145, + "learning_rate": 1.875724543458933e-05, + "loss": 0.8864, + "step": 6055 + }, + { + "epoch": 0.18560745372073065, + "grad_norm": 1.7473986142291744, + "learning_rate": 1.8756766135958658e-05, + "loss": 0.8307, + "step": 6056 + }, + { + "epoch": 0.18563810224347185, + "grad_norm": 1.5777447758699927, + "learning_rate": 1.8756286751045327e-05, + "loss": 0.7936, + "step": 6057 + }, + { + "epoch": 0.18566875076621306, + "grad_norm": 1.41941652090009, + "learning_rate": 1.8755807279854065e-05, + "loss": 0.7972, + "step": 6058 + }, + { + "epoch": 0.18569939928895426, + "grad_norm": 1.5932471298807833, + "learning_rate": 1.875532772238959e-05, + "loss": 0.9128, + "step": 6059 + }, + { + "epoch": 0.18573004781169547, + "grad_norm": 1.5377445500858622, + "learning_rate": 1.8754848078656635e-05, + "loss": 0.7707, + "step": 6060 + }, + { + "epoch": 0.18576069633443668, + "grad_norm": 1.3341890125292686, + "learning_rate": 1.875436834865992e-05, + "loss": 0.8363, + "step": 6061 + }, + { + "epoch": 0.18579134485717788, + "grad_norm": 1.4393499248735724, + "learning_rate": 1.8753888532404176e-05, + "loss": 0.8111, + "step": 6062 + }, + { + "epoch": 0.1858219933799191, + "grad_norm": 1.295229138715293, + "learning_rate": 1.8753408629894124e-05, + "loss": 0.7848, + "step": 6063 + }, + { + "epoch": 0.1858526419026603, + "grad_norm": 1.536056304488782, + "learning_rate": 1.8752928641134503e-05, + "loss": 0.8804, + "step": 6064 + }, + { + "epoch": 0.1858832904254015, + "grad_norm": 1.7212006680804268, + "learning_rate": 1.8752448566130034e-05, + "loss": 0.776, + "step": 6065 + }, + { + "epoch": 0.1859139389481427, + "grad_norm": 1.4202191491966254, + "learning_rate": 1.8751968404885447e-05, + "loss": 0.868, + "step": 6066 + }, + { + "epoch": 0.1859445874708839, + "grad_norm": 1.5615029551624695, + "learning_rate": 1.875148815740548e-05, + "loss": 0.8219, + "step": 6067 + }, + { + "epoch": 0.18597523599362512, + "grad_norm": 0.8217637213425838, + "learning_rate": 1.8751007823694855e-05, + "loss": 0.6412, + "step": 6068 + }, + { + "epoch": 0.18600588451636632, + "grad_norm": 1.4471044823118089, + "learning_rate": 1.8750527403758315e-05, + "loss": 0.7875, + "step": 6069 + }, + { + "epoch": 0.18603653303910753, + "grad_norm": 1.57554636908864, + "learning_rate": 1.875004689760059e-05, + "loss": 0.8402, + "step": 6070 + }, + { + "epoch": 0.18606718156184873, + "grad_norm": 1.3966675495977616, + "learning_rate": 1.8749566305226413e-05, + "loss": 0.8546, + "step": 6071 + }, + { + "epoch": 0.1860978300845899, + "grad_norm": 1.389912212058463, + "learning_rate": 1.8749085626640523e-05, + "loss": 0.7316, + "step": 6072 + }, + { + "epoch": 0.18612847860733112, + "grad_norm": 1.5292074468724115, + "learning_rate": 1.8748604861847655e-05, + "loss": 0.8282, + "step": 6073 + }, + { + "epoch": 0.18615912713007232, + "grad_norm": 1.5220539555979082, + "learning_rate": 1.874812401085254e-05, + "loss": 0.8753, + "step": 6074 + }, + { + "epoch": 0.18618977565281353, + "grad_norm": 1.4643338960451522, + "learning_rate": 1.8747643073659924e-05, + "loss": 0.8888, + "step": 6075 + }, + { + "epoch": 0.18622042417555473, + "grad_norm": 1.489822729506161, + "learning_rate": 1.874716205027454e-05, + "loss": 0.8608, + "step": 6076 + }, + { + "epoch": 0.18625107269829594, + "grad_norm": 1.5431327389351395, + "learning_rate": 1.8746680940701134e-05, + "loss": 0.8327, + "step": 6077 + }, + { + "epoch": 0.18628172122103714, + "grad_norm": 1.6615654825477633, + "learning_rate": 1.8746199744944438e-05, + "loss": 0.9018, + "step": 6078 + }, + { + "epoch": 0.18631236974377835, + "grad_norm": 1.4861766743688103, + "learning_rate": 1.87457184630092e-05, + "loss": 0.803, + "step": 6079 + }, + { + "epoch": 0.18634301826651956, + "grad_norm": 0.7463038160495202, + "learning_rate": 1.874523709490016e-05, + "loss": 0.6226, + "step": 6080 + }, + { + "epoch": 0.18637366678926076, + "grad_norm": 1.5207485465527641, + "learning_rate": 1.8744755640622064e-05, + "loss": 0.8852, + "step": 6081 + }, + { + "epoch": 0.18640431531200197, + "grad_norm": 1.6967377341439145, + "learning_rate": 1.8744274100179652e-05, + "loss": 0.8836, + "step": 6082 + }, + { + "epoch": 0.18643496383474317, + "grad_norm": 1.4753506271660237, + "learning_rate": 1.874379247357767e-05, + "loss": 0.8572, + "step": 6083 + }, + { + "epoch": 0.18646561235748438, + "grad_norm": 1.584106896024581, + "learning_rate": 1.874331076082086e-05, + "loss": 0.7635, + "step": 6084 + }, + { + "epoch": 0.18649626088022558, + "grad_norm": 1.5222361189396612, + "learning_rate": 1.8742828961913976e-05, + "loss": 0.8286, + "step": 6085 + }, + { + "epoch": 0.1865269094029668, + "grad_norm": 1.4552896055639881, + "learning_rate": 1.874234707686176e-05, + "loss": 0.7965, + "step": 6086 + }, + { + "epoch": 0.18655755792570797, + "grad_norm": 1.677181266816031, + "learning_rate": 1.874186510566896e-05, + "loss": 0.7394, + "step": 6087 + }, + { + "epoch": 0.18658820644844917, + "grad_norm": 1.6451715914668579, + "learning_rate": 1.8741383048340333e-05, + "loss": 0.9068, + "step": 6088 + }, + { + "epoch": 0.18661885497119038, + "grad_norm": 1.579247868223976, + "learning_rate": 1.8740900904880614e-05, + "loss": 0.7486, + "step": 6089 + }, + { + "epoch": 0.18664950349393158, + "grad_norm": 1.4193456883588211, + "learning_rate": 1.8740418675294564e-05, + "loss": 0.7342, + "step": 6090 + }, + { + "epoch": 0.1866801520166728, + "grad_norm": 1.6564202027986799, + "learning_rate": 1.8739936359586935e-05, + "loss": 0.8746, + "step": 6091 + }, + { + "epoch": 0.186710800539414, + "grad_norm": 1.355283379389839, + "learning_rate": 1.8739453957762475e-05, + "loss": 0.764, + "step": 6092 + }, + { + "epoch": 0.1867414490621552, + "grad_norm": 1.6733323290895388, + "learning_rate": 1.8738971469825942e-05, + "loss": 0.8851, + "step": 6093 + }, + { + "epoch": 0.1867720975848964, + "grad_norm": 1.501487694082779, + "learning_rate": 1.8738488895782083e-05, + "loss": 0.7287, + "step": 6094 + }, + { + "epoch": 0.1868027461076376, + "grad_norm": 1.650313699694387, + "learning_rate": 1.873800623563566e-05, + "loss": 0.8491, + "step": 6095 + }, + { + "epoch": 0.18683339463037882, + "grad_norm": 1.4712761984836766, + "learning_rate": 1.8737523489391423e-05, + "loss": 0.6995, + "step": 6096 + }, + { + "epoch": 0.18686404315312002, + "grad_norm": 1.591345950034545, + "learning_rate": 1.8737040657054133e-05, + "loss": 0.7457, + "step": 6097 + }, + { + "epoch": 0.18689469167586123, + "grad_norm": 1.4132115448299838, + "learning_rate": 1.8736557738628548e-05, + "loss": 0.8324, + "step": 6098 + }, + { + "epoch": 0.18692534019860244, + "grad_norm": 1.7535015939349254, + "learning_rate": 1.873607473411942e-05, + "loss": 0.8801, + "step": 6099 + }, + { + "epoch": 0.18695598872134364, + "grad_norm": 1.498555543414946, + "learning_rate": 1.8735591643531516e-05, + "loss": 0.8548, + "step": 6100 + }, + { + "epoch": 0.18698663724408485, + "grad_norm": 1.597323777733508, + "learning_rate": 1.873510846686959e-05, + "loss": 0.8091, + "step": 6101 + }, + { + "epoch": 0.18701728576682605, + "grad_norm": 1.535681649358949, + "learning_rate": 1.8734625204138407e-05, + "loss": 0.8814, + "step": 6102 + }, + { + "epoch": 0.18704793428956723, + "grad_norm": 1.5373373211450738, + "learning_rate": 1.8734141855342723e-05, + "loss": 0.8082, + "step": 6103 + }, + { + "epoch": 0.18707858281230844, + "grad_norm": 1.5415385734021374, + "learning_rate": 1.873365842048731e-05, + "loss": 0.8576, + "step": 6104 + }, + { + "epoch": 0.18710923133504964, + "grad_norm": 0.788631210733079, + "learning_rate": 1.8733174899576926e-05, + "loss": 0.6617, + "step": 6105 + }, + { + "epoch": 0.18713987985779085, + "grad_norm": 1.5389759281828916, + "learning_rate": 1.873269129261633e-05, + "loss": 0.8615, + "step": 6106 + }, + { + "epoch": 0.18717052838053205, + "grad_norm": 1.6160497226356394, + "learning_rate": 1.8732207599610296e-05, + "loss": 0.8987, + "step": 6107 + }, + { + "epoch": 0.18720117690327326, + "grad_norm": 1.450258048789, + "learning_rate": 1.873172382056359e-05, + "loss": 0.7253, + "step": 6108 + }, + { + "epoch": 0.18723182542601446, + "grad_norm": 1.4562485456955876, + "learning_rate": 1.873123995548097e-05, + "loss": 0.8006, + "step": 6109 + }, + { + "epoch": 0.18726247394875567, + "grad_norm": 1.667202217829702, + "learning_rate": 1.873075600436721e-05, + "loss": 0.8117, + "step": 6110 + }, + { + "epoch": 0.18729312247149688, + "grad_norm": 1.4637549060075825, + "learning_rate": 1.8730271967227075e-05, + "loss": 0.8239, + "step": 6111 + }, + { + "epoch": 0.18732377099423808, + "grad_norm": 0.7657192671010923, + "learning_rate": 1.872978784406534e-05, + "loss": 0.6689, + "step": 6112 + }, + { + "epoch": 0.1873544195169793, + "grad_norm": 1.6397988004161896, + "learning_rate": 1.8729303634886768e-05, + "loss": 0.904, + "step": 6113 + }, + { + "epoch": 0.1873850680397205, + "grad_norm": 1.6806843535689193, + "learning_rate": 1.8728819339696138e-05, + "loss": 0.7772, + "step": 6114 + }, + { + "epoch": 0.1874157165624617, + "grad_norm": 1.5856261049698914, + "learning_rate": 1.8728334958498215e-05, + "loss": 0.7827, + "step": 6115 + }, + { + "epoch": 0.1874463650852029, + "grad_norm": 0.7083126817751986, + "learning_rate": 1.8727850491297775e-05, + "loss": 0.6352, + "step": 6116 + }, + { + "epoch": 0.1874770136079441, + "grad_norm": 0.7316610498239154, + "learning_rate": 1.8727365938099595e-05, + "loss": 0.6754, + "step": 6117 + }, + { + "epoch": 0.1875076621306853, + "grad_norm": 1.6505909331429225, + "learning_rate": 1.8726881298908437e-05, + "loss": 0.8278, + "step": 6118 + }, + { + "epoch": 0.1875383106534265, + "grad_norm": 1.4777806649173877, + "learning_rate": 1.872639657372909e-05, + "loss": 0.7951, + "step": 6119 + }, + { + "epoch": 0.1875689591761677, + "grad_norm": 0.7105736437229934, + "learning_rate": 1.8725911762566324e-05, + "loss": 0.6225, + "step": 6120 + }, + { + "epoch": 0.1875996076989089, + "grad_norm": 0.714167012403645, + "learning_rate": 1.872542686542492e-05, + "loss": 0.6526, + "step": 6121 + }, + { + "epoch": 0.1876302562216501, + "grad_norm": 1.4483668875746893, + "learning_rate": 1.872494188230965e-05, + "loss": 0.8878, + "step": 6122 + }, + { + "epoch": 0.18766090474439132, + "grad_norm": 1.4512372056939773, + "learning_rate": 1.872445681322529e-05, + "loss": 0.9001, + "step": 6123 + }, + { + "epoch": 0.18769155326713252, + "grad_norm": 1.775541478695213, + "learning_rate": 1.872397165817663e-05, + "loss": 0.8996, + "step": 6124 + }, + { + "epoch": 0.18772220178987373, + "grad_norm": 1.401360885306399, + "learning_rate": 1.8723486417168446e-05, + "loss": 0.7511, + "step": 6125 + }, + { + "epoch": 0.18775285031261493, + "grad_norm": 1.5817113127025413, + "learning_rate": 1.872300109020552e-05, + "loss": 0.7744, + "step": 6126 + }, + { + "epoch": 0.18778349883535614, + "grad_norm": 1.4959097858839105, + "learning_rate": 1.8722515677292627e-05, + "loss": 0.7678, + "step": 6127 + }, + { + "epoch": 0.18781414735809734, + "grad_norm": 1.722252624945616, + "learning_rate": 1.8722030178434555e-05, + "loss": 0.901, + "step": 6128 + }, + { + "epoch": 0.18784479588083855, + "grad_norm": 1.4123967826057684, + "learning_rate": 1.8721544593636093e-05, + "loss": 0.7872, + "step": 6129 + }, + { + "epoch": 0.18787544440357976, + "grad_norm": 1.410308196330376, + "learning_rate": 1.8721058922902018e-05, + "loss": 0.8155, + "step": 6130 + }, + { + "epoch": 0.18790609292632096, + "grad_norm": 1.5209528078383137, + "learning_rate": 1.872057316623712e-05, + "loss": 0.766, + "step": 6131 + }, + { + "epoch": 0.18793674144906217, + "grad_norm": 1.7190108641998478, + "learning_rate": 1.8720087323646178e-05, + "loss": 0.8492, + "step": 6132 + }, + { + "epoch": 0.18796738997180337, + "grad_norm": 1.6146941740575886, + "learning_rate": 1.8719601395133987e-05, + "loss": 0.8576, + "step": 6133 + }, + { + "epoch": 0.18799803849454455, + "grad_norm": 1.7212165833370026, + "learning_rate": 1.8719115380705334e-05, + "loss": 0.8456, + "step": 6134 + }, + { + "epoch": 0.18802868701728576, + "grad_norm": 1.7628831576616186, + "learning_rate": 1.871862928036501e-05, + "loss": 0.7845, + "step": 6135 + }, + { + "epoch": 0.18805933554002696, + "grad_norm": 1.4638857440846529, + "learning_rate": 1.8718143094117795e-05, + "loss": 0.8131, + "step": 6136 + }, + { + "epoch": 0.18808998406276817, + "grad_norm": 2.1285427443425013, + "learning_rate": 1.871765682196849e-05, + "loss": 0.752, + "step": 6137 + }, + { + "epoch": 0.18812063258550937, + "grad_norm": 1.4026646186098004, + "learning_rate": 1.8717170463921875e-05, + "loss": 0.7145, + "step": 6138 + }, + { + "epoch": 0.18815128110825058, + "grad_norm": 1.5449487693324524, + "learning_rate": 1.8716684019982753e-05, + "loss": 0.8525, + "step": 6139 + }, + { + "epoch": 0.18818192963099178, + "grad_norm": 1.4745290738099373, + "learning_rate": 1.8716197490155914e-05, + "loss": 0.7625, + "step": 6140 + }, + { + "epoch": 0.188212578153733, + "grad_norm": 1.5782713154189612, + "learning_rate": 1.871571087444615e-05, + "loss": 0.8174, + "step": 6141 + }, + { + "epoch": 0.1882432266764742, + "grad_norm": 1.4595357224237926, + "learning_rate": 1.8715224172858258e-05, + "loss": 0.7323, + "step": 6142 + }, + { + "epoch": 0.1882738751992154, + "grad_norm": 1.6277263968336664, + "learning_rate": 1.871473738539703e-05, + "loss": 0.8273, + "step": 6143 + }, + { + "epoch": 0.1883045237219566, + "grad_norm": 1.490735902330965, + "learning_rate": 1.8714250512067268e-05, + "loss": 0.7524, + "step": 6144 + }, + { + "epoch": 0.1883351722446978, + "grad_norm": 1.4240707100860133, + "learning_rate": 1.8713763552873762e-05, + "loss": 0.814, + "step": 6145 + }, + { + "epoch": 0.18836582076743902, + "grad_norm": 1.7281171059337863, + "learning_rate": 1.8713276507821318e-05, + "loss": 0.817, + "step": 6146 + }, + { + "epoch": 0.18839646929018022, + "grad_norm": 1.544150544196614, + "learning_rate": 1.8712789376914728e-05, + "loss": 0.9082, + "step": 6147 + }, + { + "epoch": 0.18842711781292143, + "grad_norm": 1.413256976651073, + "learning_rate": 1.8712302160158798e-05, + "loss": 0.757, + "step": 6148 + }, + { + "epoch": 0.1884577663356626, + "grad_norm": 1.543328213000046, + "learning_rate": 1.8711814857558325e-05, + "loss": 0.8801, + "step": 6149 + }, + { + "epoch": 0.1884884148584038, + "grad_norm": 0.8160829078397153, + "learning_rate": 1.871132746911811e-05, + "loss": 0.6507, + "step": 6150 + }, + { + "epoch": 0.18851906338114502, + "grad_norm": 1.5682966067652817, + "learning_rate": 1.8710839994842955e-05, + "loss": 0.729, + "step": 6151 + }, + { + "epoch": 0.18854971190388622, + "grad_norm": 0.7240262589653761, + "learning_rate": 1.8710352434737666e-05, + "loss": 0.639, + "step": 6152 + }, + { + "epoch": 0.18858036042662743, + "grad_norm": 0.7229152062185622, + "learning_rate": 1.870986478880705e-05, + "loss": 0.6491, + "step": 6153 + }, + { + "epoch": 0.18861100894936864, + "grad_norm": 1.6621599157686964, + "learning_rate": 1.8709377057055903e-05, + "loss": 0.7423, + "step": 6154 + }, + { + "epoch": 0.18864165747210984, + "grad_norm": 1.7276331813725923, + "learning_rate": 1.8708889239489038e-05, + "loss": 0.8459, + "step": 6155 + }, + { + "epoch": 0.18867230599485105, + "grad_norm": 1.6961102906678094, + "learning_rate": 1.8708401336111257e-05, + "loss": 0.8618, + "step": 6156 + }, + { + "epoch": 0.18870295451759225, + "grad_norm": 1.4873085807806734, + "learning_rate": 1.8707913346927368e-05, + "loss": 0.7439, + "step": 6157 + }, + { + "epoch": 0.18873360304033346, + "grad_norm": 0.7735372376024022, + "learning_rate": 1.8707425271942186e-05, + "loss": 0.632, + "step": 6158 + }, + { + "epoch": 0.18876425156307466, + "grad_norm": 1.6207110807373781, + "learning_rate": 1.870693711116051e-05, + "loss": 0.7331, + "step": 6159 + }, + { + "epoch": 0.18879490008581587, + "grad_norm": 1.817879630244349, + "learning_rate": 1.8706448864587155e-05, + "loss": 0.7454, + "step": 6160 + }, + { + "epoch": 0.18882554860855708, + "grad_norm": 0.7293316864901875, + "learning_rate": 1.8705960532226936e-05, + "loss": 0.6542, + "step": 6161 + }, + { + "epoch": 0.18885619713129828, + "grad_norm": 1.5373809561189398, + "learning_rate": 1.8705472114084658e-05, + "loss": 0.8449, + "step": 6162 + }, + { + "epoch": 0.1888868456540395, + "grad_norm": 1.4799020215764709, + "learning_rate": 1.8704983610165135e-05, + "loss": 0.807, + "step": 6163 + }, + { + "epoch": 0.1889174941767807, + "grad_norm": 1.370098972654055, + "learning_rate": 1.8704495020473183e-05, + "loss": 0.8366, + "step": 6164 + }, + { + "epoch": 0.18894814269952187, + "grad_norm": 1.5395357370022378, + "learning_rate": 1.8704006345013615e-05, + "loss": 0.8316, + "step": 6165 + }, + { + "epoch": 0.18897879122226308, + "grad_norm": 1.509873292921037, + "learning_rate": 1.8703517583791243e-05, + "loss": 0.787, + "step": 6166 + }, + { + "epoch": 0.18900943974500428, + "grad_norm": 1.6090470642486554, + "learning_rate": 1.8703028736810885e-05, + "loss": 0.8542, + "step": 6167 + }, + { + "epoch": 0.1890400882677455, + "grad_norm": 1.3660815873201646, + "learning_rate": 1.870253980407736e-05, + "loss": 0.7945, + "step": 6168 + }, + { + "epoch": 0.1890707367904867, + "grad_norm": 1.488339805200447, + "learning_rate": 1.870205078559548e-05, + "loss": 0.7286, + "step": 6169 + }, + { + "epoch": 0.1891013853132279, + "grad_norm": 1.4109248490008854, + "learning_rate": 1.870156168137007e-05, + "loss": 0.8254, + "step": 6170 + }, + { + "epoch": 0.1891320338359691, + "grad_norm": 1.5247643298500182, + "learning_rate": 1.870107249140595e-05, + "loss": 0.8759, + "step": 6171 + }, + { + "epoch": 0.1891626823587103, + "grad_norm": 1.5535596536590064, + "learning_rate": 1.870058321570793e-05, + "loss": 0.9658, + "step": 6172 + }, + { + "epoch": 0.18919333088145152, + "grad_norm": 1.554901694887896, + "learning_rate": 1.8700093854280844e-05, + "loss": 0.846, + "step": 6173 + }, + { + "epoch": 0.18922397940419272, + "grad_norm": 1.5082682906033245, + "learning_rate": 1.86996044071295e-05, + "loss": 0.8154, + "step": 6174 + }, + { + "epoch": 0.18925462792693393, + "grad_norm": 1.6739518075948043, + "learning_rate": 1.869911487425873e-05, + "loss": 0.8249, + "step": 6175 + }, + { + "epoch": 0.18928527644967513, + "grad_norm": 0.8805160040822502, + "learning_rate": 1.869862525567336e-05, + "loss": 0.6597, + "step": 6176 + }, + { + "epoch": 0.18931592497241634, + "grad_norm": 1.4257235718626464, + "learning_rate": 1.8698135551378203e-05, + "loss": 0.8429, + "step": 6177 + }, + { + "epoch": 0.18934657349515754, + "grad_norm": 1.5515257977974344, + "learning_rate": 1.8697645761378098e-05, + "loss": 0.8219, + "step": 6178 + }, + { + "epoch": 0.18937722201789875, + "grad_norm": 1.433656704493542, + "learning_rate": 1.869715588567786e-05, + "loss": 0.7725, + "step": 6179 + }, + { + "epoch": 0.18940787054063993, + "grad_norm": 1.5320440269655615, + "learning_rate": 1.869666592428232e-05, + "loss": 0.9188, + "step": 6180 + }, + { + "epoch": 0.18943851906338113, + "grad_norm": 1.4537399555751556, + "learning_rate": 1.8696175877196306e-05, + "loss": 0.8446, + "step": 6181 + }, + { + "epoch": 0.18946916758612234, + "grad_norm": 0.7754500437608821, + "learning_rate": 1.8695685744424647e-05, + "loss": 0.6507, + "step": 6182 + }, + { + "epoch": 0.18949981610886354, + "grad_norm": 1.4805581526736364, + "learning_rate": 1.869519552597217e-05, + "loss": 0.8088, + "step": 6183 + }, + { + "epoch": 0.18953046463160475, + "grad_norm": 1.512032202648826, + "learning_rate": 1.8694705221843705e-05, + "loss": 0.7394, + "step": 6184 + }, + { + "epoch": 0.18956111315434596, + "grad_norm": 0.6984885540890394, + "learning_rate": 1.8694214832044086e-05, + "loss": 0.6274, + "step": 6185 + }, + { + "epoch": 0.18959176167708716, + "grad_norm": 1.5263203511838788, + "learning_rate": 1.8693724356578146e-05, + "loss": 0.7941, + "step": 6186 + }, + { + "epoch": 0.18962241019982837, + "grad_norm": 1.584882456044342, + "learning_rate": 1.8693233795450714e-05, + "loss": 0.8043, + "step": 6187 + }, + { + "epoch": 0.18965305872256957, + "grad_norm": 1.762704536356256, + "learning_rate": 1.8692743148666624e-05, + "loss": 0.9217, + "step": 6188 + }, + { + "epoch": 0.18968370724531078, + "grad_norm": 1.5779149497500775, + "learning_rate": 1.8692252416230716e-05, + "loss": 0.8637, + "step": 6189 + }, + { + "epoch": 0.18971435576805198, + "grad_norm": 1.6723689764265368, + "learning_rate": 1.8691761598147816e-05, + "loss": 0.7872, + "step": 6190 + }, + { + "epoch": 0.1897450042907932, + "grad_norm": 0.7204177690507335, + "learning_rate": 1.8691270694422767e-05, + "loss": 0.6596, + "step": 6191 + }, + { + "epoch": 0.1897756528135344, + "grad_norm": 1.40314483977292, + "learning_rate": 1.8690779705060403e-05, + "loss": 0.8396, + "step": 6192 + }, + { + "epoch": 0.1898063013362756, + "grad_norm": 1.530918601664687, + "learning_rate": 1.8690288630065566e-05, + "loss": 0.9144, + "step": 6193 + }, + { + "epoch": 0.1898369498590168, + "grad_norm": 1.638053253645153, + "learning_rate": 1.8689797469443088e-05, + "loss": 0.8191, + "step": 6194 + }, + { + "epoch": 0.189867598381758, + "grad_norm": 1.457660119749235, + "learning_rate": 1.8689306223197814e-05, + "loss": 0.8033, + "step": 6195 + }, + { + "epoch": 0.1898982469044992, + "grad_norm": 1.773500445239643, + "learning_rate": 1.8688814891334584e-05, + "loss": 0.9183, + "step": 6196 + }, + { + "epoch": 0.1899288954272404, + "grad_norm": 0.7360449518548828, + "learning_rate": 1.8688323473858232e-05, + "loss": 0.6524, + "step": 6197 + }, + { + "epoch": 0.1899595439499816, + "grad_norm": 0.759722740628928, + "learning_rate": 1.868783197077361e-05, + "loss": 0.6673, + "step": 6198 + }, + { + "epoch": 0.1899901924727228, + "grad_norm": 1.719276527736545, + "learning_rate": 1.868734038208556e-05, + "loss": 0.8019, + "step": 6199 + }, + { + "epoch": 0.190020840995464, + "grad_norm": 0.6889600636683265, + "learning_rate": 1.8686848707798918e-05, + "loss": 0.6336, + "step": 6200 + }, + { + "epoch": 0.19005148951820522, + "grad_norm": 1.692339740705628, + "learning_rate": 1.8686356947918533e-05, + "loss": 0.8037, + "step": 6201 + }, + { + "epoch": 0.19008213804094642, + "grad_norm": 1.479012551270903, + "learning_rate": 1.8685865102449253e-05, + "loss": 0.7831, + "step": 6202 + }, + { + "epoch": 0.19011278656368763, + "grad_norm": 1.5971377387865984, + "learning_rate": 1.868537317139592e-05, + "loss": 0.8142, + "step": 6203 + }, + { + "epoch": 0.19014343508642884, + "grad_norm": 0.7872183607416268, + "learning_rate": 1.868488115476338e-05, + "loss": 0.6358, + "step": 6204 + }, + { + "epoch": 0.19017408360917004, + "grad_norm": 1.5658156973583817, + "learning_rate": 1.8684389052556487e-05, + "loss": 0.8257, + "step": 6205 + }, + { + "epoch": 0.19020473213191125, + "grad_norm": 1.808000937535614, + "learning_rate": 1.8683896864780088e-05, + "loss": 0.7938, + "step": 6206 + }, + { + "epoch": 0.19023538065465245, + "grad_norm": 1.3619047769659962, + "learning_rate": 1.868340459143903e-05, + "loss": 0.8189, + "step": 6207 + }, + { + "epoch": 0.19026602917739366, + "grad_norm": 1.5966749904018849, + "learning_rate": 1.8682912232538167e-05, + "loss": 0.8437, + "step": 6208 + }, + { + "epoch": 0.19029667770013486, + "grad_norm": 1.499758094513442, + "learning_rate": 1.8682419788082345e-05, + "loss": 0.8272, + "step": 6209 + }, + { + "epoch": 0.19032732622287607, + "grad_norm": 1.4708188807259512, + "learning_rate": 1.8681927258076416e-05, + "loss": 0.7651, + "step": 6210 + }, + { + "epoch": 0.19035797474561725, + "grad_norm": 1.611680842080057, + "learning_rate": 1.8681434642525245e-05, + "loss": 0.9011, + "step": 6211 + }, + { + "epoch": 0.19038862326835845, + "grad_norm": 1.7395262419730917, + "learning_rate": 1.8680941941433673e-05, + "loss": 0.732, + "step": 6212 + }, + { + "epoch": 0.19041927179109966, + "grad_norm": 1.5195175488647057, + "learning_rate": 1.8680449154806556e-05, + "loss": 0.8123, + "step": 6213 + }, + { + "epoch": 0.19044992031384086, + "grad_norm": 1.501537951558701, + "learning_rate": 1.8679956282648756e-05, + "loss": 0.778, + "step": 6214 + }, + { + "epoch": 0.19048056883658207, + "grad_norm": 1.5391882559810401, + "learning_rate": 1.8679463324965127e-05, + "loss": 0.7774, + "step": 6215 + }, + { + "epoch": 0.19051121735932328, + "grad_norm": 1.9792600319751785, + "learning_rate": 1.8678970281760522e-05, + "loss": 0.9285, + "step": 6216 + }, + { + "epoch": 0.19054186588206448, + "grad_norm": 1.48551596030967, + "learning_rate": 1.8678477153039803e-05, + "loss": 0.7598, + "step": 6217 + }, + { + "epoch": 0.1905725144048057, + "grad_norm": 1.5036796355863744, + "learning_rate": 1.867798393880783e-05, + "loss": 0.892, + "step": 6218 + }, + { + "epoch": 0.1906031629275469, + "grad_norm": 1.362615707669389, + "learning_rate": 1.867749063906946e-05, + "loss": 0.7057, + "step": 6219 + }, + { + "epoch": 0.1906338114502881, + "grad_norm": 1.4561343318519162, + "learning_rate": 1.8676997253829553e-05, + "loss": 0.8629, + "step": 6220 + }, + { + "epoch": 0.1906644599730293, + "grad_norm": 1.6285445098145115, + "learning_rate": 1.8676503783092973e-05, + "loss": 0.8716, + "step": 6221 + }, + { + "epoch": 0.1906951084957705, + "grad_norm": 1.3119819402325195, + "learning_rate": 1.867601022686458e-05, + "loss": 0.7641, + "step": 6222 + }, + { + "epoch": 0.19072575701851172, + "grad_norm": 1.4508049095204494, + "learning_rate": 1.8675516585149243e-05, + "loss": 0.8673, + "step": 6223 + }, + { + "epoch": 0.19075640554125292, + "grad_norm": 1.5126244904579775, + "learning_rate": 1.8675022857951815e-05, + "loss": 0.8746, + "step": 6224 + }, + { + "epoch": 0.19078705406399413, + "grad_norm": 1.5091179811722988, + "learning_rate": 1.867452904527717e-05, + "loss": 0.9495, + "step": 6225 + }, + { + "epoch": 0.19081770258673533, + "grad_norm": 1.5177160591994037, + "learning_rate": 1.8674035147130172e-05, + "loss": 0.84, + "step": 6226 + }, + { + "epoch": 0.1908483511094765, + "grad_norm": 1.6488197216254563, + "learning_rate": 1.8673541163515688e-05, + "loss": 0.8292, + "step": 6227 + }, + { + "epoch": 0.19087899963221772, + "grad_norm": 1.6875517477751314, + "learning_rate": 1.8673047094438577e-05, + "loss": 0.8509, + "step": 6228 + }, + { + "epoch": 0.19090964815495892, + "grad_norm": 1.6579938172275779, + "learning_rate": 1.867255293990372e-05, + "loss": 0.8734, + "step": 6229 + }, + { + "epoch": 0.19094029667770013, + "grad_norm": 0.8332324866957017, + "learning_rate": 1.8672058699915978e-05, + "loss": 0.6567, + "step": 6230 + }, + { + "epoch": 0.19097094520044133, + "grad_norm": 1.5665897168867156, + "learning_rate": 1.8671564374480223e-05, + "loss": 0.7365, + "step": 6231 + }, + { + "epoch": 0.19100159372318254, + "grad_norm": 1.484968745359962, + "learning_rate": 1.8671069963601323e-05, + "loss": 0.8481, + "step": 6232 + }, + { + "epoch": 0.19103224224592374, + "grad_norm": 1.5311298426898954, + "learning_rate": 1.8670575467284155e-05, + "loss": 0.8486, + "step": 6233 + }, + { + "epoch": 0.19106289076866495, + "grad_norm": 1.775236331669694, + "learning_rate": 1.8670080885533588e-05, + "loss": 0.7443, + "step": 6234 + }, + { + "epoch": 0.19109353929140616, + "grad_norm": 1.4992275215804853, + "learning_rate": 1.8669586218354496e-05, + "loss": 0.8127, + "step": 6235 + }, + { + "epoch": 0.19112418781414736, + "grad_norm": 1.6685890933233003, + "learning_rate": 1.866909146575175e-05, + "loss": 0.7768, + "step": 6236 + }, + { + "epoch": 0.19115483633688857, + "grad_norm": 1.6191739071991598, + "learning_rate": 1.866859662773023e-05, + "loss": 0.8496, + "step": 6237 + }, + { + "epoch": 0.19118548485962977, + "grad_norm": 1.5532591720384241, + "learning_rate": 1.866810170429481e-05, + "loss": 0.792, + "step": 6238 + }, + { + "epoch": 0.19121613338237098, + "grad_norm": 1.5296790128112294, + "learning_rate": 1.8667606695450367e-05, + "loss": 0.7646, + "step": 6239 + }, + { + "epoch": 0.19124678190511218, + "grad_norm": 1.7693049582408147, + "learning_rate": 1.8667111601201776e-05, + "loss": 0.8377, + "step": 6240 + }, + { + "epoch": 0.1912774304278534, + "grad_norm": 1.3853922942404444, + "learning_rate": 1.8666616421553918e-05, + "loss": 0.9696, + "step": 6241 + }, + { + "epoch": 0.19130807895059457, + "grad_norm": 1.5102895444718982, + "learning_rate": 1.8666121156511666e-05, + "loss": 0.8045, + "step": 6242 + }, + { + "epoch": 0.19133872747333577, + "grad_norm": 1.5275500557541282, + "learning_rate": 1.866562580607991e-05, + "loss": 0.8164, + "step": 6243 + }, + { + "epoch": 0.19136937599607698, + "grad_norm": 1.4744551002463762, + "learning_rate": 1.8665130370263523e-05, + "loss": 0.7783, + "step": 6244 + }, + { + "epoch": 0.19140002451881818, + "grad_norm": 1.4443210523048715, + "learning_rate": 1.8664634849067392e-05, + "loss": 0.6839, + "step": 6245 + }, + { + "epoch": 0.1914306730415594, + "grad_norm": 1.6981686267086489, + "learning_rate": 1.8664139242496398e-05, + "loss": 0.752, + "step": 6246 + }, + { + "epoch": 0.1914613215643006, + "grad_norm": 1.5480058014752223, + "learning_rate": 1.866364355055542e-05, + "loss": 0.8685, + "step": 6247 + }, + { + "epoch": 0.1914919700870418, + "grad_norm": 1.5248577673791537, + "learning_rate": 1.8663147773249343e-05, + "loss": 0.707, + "step": 6248 + }, + { + "epoch": 0.191522618609783, + "grad_norm": 1.5330128795443274, + "learning_rate": 1.866265191058306e-05, + "loss": 0.7376, + "step": 6249 + }, + { + "epoch": 0.1915532671325242, + "grad_norm": 1.5388217145282994, + "learning_rate": 1.8662155962561447e-05, + "loss": 0.8814, + "step": 6250 + }, + { + "epoch": 0.19158391565526542, + "grad_norm": 1.48606099358123, + "learning_rate": 1.8661659929189396e-05, + "loss": 0.7821, + "step": 6251 + }, + { + "epoch": 0.19161456417800662, + "grad_norm": 1.535091603077522, + "learning_rate": 1.8661163810471796e-05, + "loss": 0.7487, + "step": 6252 + }, + { + "epoch": 0.19164521270074783, + "grad_norm": 0.8652848717742779, + "learning_rate": 1.8660667606413532e-05, + "loss": 0.6501, + "step": 6253 + }, + { + "epoch": 0.19167586122348904, + "grad_norm": 1.5645066264414769, + "learning_rate": 1.8660171317019494e-05, + "loss": 0.7806, + "step": 6254 + }, + { + "epoch": 0.19170650974623024, + "grad_norm": 1.6314806371568764, + "learning_rate": 1.865967494229457e-05, + "loss": 0.7942, + "step": 6255 + }, + { + "epoch": 0.19173715826897145, + "grad_norm": 1.3860284239848506, + "learning_rate": 1.8659178482243655e-05, + "loss": 0.8055, + "step": 6256 + }, + { + "epoch": 0.19176780679171265, + "grad_norm": 1.6252012251831889, + "learning_rate": 1.865868193687164e-05, + "loss": 0.8642, + "step": 6257 + }, + { + "epoch": 0.19179845531445383, + "grad_norm": 1.4055270829219577, + "learning_rate": 1.8658185306183416e-05, + "loss": 0.7922, + "step": 6258 + }, + { + "epoch": 0.19182910383719504, + "grad_norm": 1.409061908424822, + "learning_rate": 1.8657688590183877e-05, + "loss": 0.7821, + "step": 6259 + }, + { + "epoch": 0.19185975235993624, + "grad_norm": 0.7430690763168931, + "learning_rate": 1.8657191788877915e-05, + "loss": 0.6186, + "step": 6260 + }, + { + "epoch": 0.19189040088267745, + "grad_norm": 1.4875537199053201, + "learning_rate": 1.8656694902270426e-05, + "loss": 0.6792, + "step": 6261 + }, + { + "epoch": 0.19192104940541865, + "grad_norm": 1.5840780284321172, + "learning_rate": 1.8656197930366313e-05, + "loss": 0.8272, + "step": 6262 + }, + { + "epoch": 0.19195169792815986, + "grad_norm": 1.2944922745829006, + "learning_rate": 1.865570087317046e-05, + "loss": 0.7219, + "step": 6263 + }, + { + "epoch": 0.19198234645090106, + "grad_norm": 1.485072630692989, + "learning_rate": 1.865520373068778e-05, + "loss": 0.893, + "step": 6264 + }, + { + "epoch": 0.19201299497364227, + "grad_norm": 0.7082942675599938, + "learning_rate": 1.8654706502923155e-05, + "loss": 0.6251, + "step": 6265 + }, + { + "epoch": 0.19204364349638348, + "grad_norm": 1.44898697031819, + "learning_rate": 1.8654209189881496e-05, + "loss": 0.7895, + "step": 6266 + }, + { + "epoch": 0.19207429201912468, + "grad_norm": 1.5362056306157297, + "learning_rate": 1.8653711791567703e-05, + "loss": 0.8049, + "step": 6267 + }, + { + "epoch": 0.1921049405418659, + "grad_norm": 1.477794122413037, + "learning_rate": 1.865321430798667e-05, + "loss": 0.7726, + "step": 6268 + }, + { + "epoch": 0.1921355890646071, + "grad_norm": 1.4857171281500234, + "learning_rate": 1.86527167391433e-05, + "loss": 0.718, + "step": 6269 + }, + { + "epoch": 0.1921662375873483, + "grad_norm": 1.4849811782829752, + "learning_rate": 1.8652219085042504e-05, + "loss": 0.7373, + "step": 6270 + }, + { + "epoch": 0.1921968861100895, + "grad_norm": 1.302888211340875, + "learning_rate": 1.8651721345689173e-05, + "loss": 0.7643, + "step": 6271 + }, + { + "epoch": 0.1922275346328307, + "grad_norm": 1.479327261562878, + "learning_rate": 1.8651223521088223e-05, + "loss": 0.8326, + "step": 6272 + }, + { + "epoch": 0.1922581831555719, + "grad_norm": 1.6472949558474723, + "learning_rate": 1.865072561124455e-05, + "loss": 0.8356, + "step": 6273 + }, + { + "epoch": 0.1922888316783131, + "grad_norm": 1.5424143177873455, + "learning_rate": 1.865022761616307e-05, + "loss": 0.8275, + "step": 6274 + }, + { + "epoch": 0.1923194802010543, + "grad_norm": 1.4079320750661006, + "learning_rate": 1.864972953584868e-05, + "loss": 0.7482, + "step": 6275 + }, + { + "epoch": 0.1923501287237955, + "grad_norm": 1.5353663500214978, + "learning_rate": 1.864923137030629e-05, + "loss": 0.8003, + "step": 6276 + }, + { + "epoch": 0.1923807772465367, + "grad_norm": 1.3731331767793444, + "learning_rate": 1.864873311954081e-05, + "loss": 0.746, + "step": 6277 + }, + { + "epoch": 0.19241142576927792, + "grad_norm": 1.5150882892324744, + "learning_rate": 1.8648234783557154e-05, + "loss": 0.8909, + "step": 6278 + }, + { + "epoch": 0.19244207429201912, + "grad_norm": 1.5932757748703759, + "learning_rate": 1.8647736362360227e-05, + "loss": 0.8013, + "step": 6279 + }, + { + "epoch": 0.19247272281476033, + "grad_norm": 1.428929155637921, + "learning_rate": 1.864723785595494e-05, + "loss": 0.8774, + "step": 6280 + }, + { + "epoch": 0.19250337133750153, + "grad_norm": 1.6175845033210003, + "learning_rate": 1.8646739264346205e-05, + "loss": 0.7346, + "step": 6281 + }, + { + "epoch": 0.19253401986024274, + "grad_norm": 1.463286512351673, + "learning_rate": 1.8646240587538936e-05, + "loss": 0.8043, + "step": 6282 + }, + { + "epoch": 0.19256466838298394, + "grad_norm": 1.5216121216363554, + "learning_rate": 1.864574182553805e-05, + "loss": 0.8097, + "step": 6283 + }, + { + "epoch": 0.19259531690572515, + "grad_norm": 1.538337719922975, + "learning_rate": 1.8645242978348452e-05, + "loss": 0.9179, + "step": 6284 + }, + { + "epoch": 0.19262596542846636, + "grad_norm": 1.4780628613509181, + "learning_rate": 1.8644744045975066e-05, + "loss": 0.804, + "step": 6285 + }, + { + "epoch": 0.19265661395120756, + "grad_norm": 1.3983733756397907, + "learning_rate": 1.8644245028422804e-05, + "loss": 0.7433, + "step": 6286 + }, + { + "epoch": 0.19268726247394877, + "grad_norm": 1.4520815510916116, + "learning_rate": 1.8643745925696584e-05, + "loss": 0.7236, + "step": 6287 + }, + { + "epoch": 0.19271791099668997, + "grad_norm": 1.6620163076062489, + "learning_rate": 1.8643246737801327e-05, + "loss": 0.8683, + "step": 6288 + }, + { + "epoch": 0.19274855951943115, + "grad_norm": 1.471073675816872, + "learning_rate": 1.8642747464741945e-05, + "loss": 0.8298, + "step": 6289 + }, + { + "epoch": 0.19277920804217236, + "grad_norm": 1.5524200026043253, + "learning_rate": 1.8642248106523362e-05, + "loss": 0.7501, + "step": 6290 + }, + { + "epoch": 0.19280985656491356, + "grad_norm": 1.4557226463133448, + "learning_rate": 1.86417486631505e-05, + "loss": 0.7849, + "step": 6291 + }, + { + "epoch": 0.19284050508765477, + "grad_norm": 1.5240093164803856, + "learning_rate": 1.864124913462827e-05, + "loss": 0.8239, + "step": 6292 + }, + { + "epoch": 0.19287115361039597, + "grad_norm": 1.443693055249993, + "learning_rate": 1.8640749520961607e-05, + "loss": 0.7585, + "step": 6293 + }, + { + "epoch": 0.19290180213313718, + "grad_norm": 1.3835618784822044, + "learning_rate": 1.8640249822155426e-05, + "loss": 0.6939, + "step": 6294 + }, + { + "epoch": 0.19293245065587838, + "grad_norm": 1.5118658240761895, + "learning_rate": 1.8639750038214654e-05, + "loss": 0.7344, + "step": 6295 + }, + { + "epoch": 0.1929630991786196, + "grad_norm": 1.4730438514292756, + "learning_rate": 1.8639250169144215e-05, + "loss": 0.8672, + "step": 6296 + }, + { + "epoch": 0.1929937477013608, + "grad_norm": 1.569765923738297, + "learning_rate": 1.8638750214949032e-05, + "loss": 0.7761, + "step": 6297 + }, + { + "epoch": 0.193024396224102, + "grad_norm": 1.408399363463487, + "learning_rate": 1.8638250175634034e-05, + "loss": 0.8272, + "step": 6298 + }, + { + "epoch": 0.1930550447468432, + "grad_norm": 1.4946647123278758, + "learning_rate": 1.8637750051204144e-05, + "loss": 0.8019, + "step": 6299 + }, + { + "epoch": 0.1930856932695844, + "grad_norm": 1.600960786796982, + "learning_rate": 1.86372498416643e-05, + "loss": 0.8582, + "step": 6300 + }, + { + "epoch": 0.19311634179232562, + "grad_norm": 1.6850307149340706, + "learning_rate": 1.8636749547019415e-05, + "loss": 0.9011, + "step": 6301 + }, + { + "epoch": 0.19314699031506682, + "grad_norm": 1.5134116358855845, + "learning_rate": 1.863624916727443e-05, + "loss": 0.7511, + "step": 6302 + }, + { + "epoch": 0.19317763883780803, + "grad_norm": 1.6188021855561425, + "learning_rate": 1.8635748702434272e-05, + "loss": 0.8042, + "step": 6303 + }, + { + "epoch": 0.1932082873605492, + "grad_norm": 1.5795616168234403, + "learning_rate": 1.8635248152503873e-05, + "loss": 0.818, + "step": 6304 + }, + { + "epoch": 0.1932389358832904, + "grad_norm": 1.574587488476516, + "learning_rate": 1.8634747517488164e-05, + "loss": 0.839, + "step": 6305 + }, + { + "epoch": 0.19326958440603162, + "grad_norm": 1.517403708288456, + "learning_rate": 1.8634246797392078e-05, + "loss": 0.8176, + "step": 6306 + }, + { + "epoch": 0.19330023292877282, + "grad_norm": 1.3193007533171817, + "learning_rate": 1.863374599222055e-05, + "loss": 0.6792, + "step": 6307 + }, + { + "epoch": 0.19333088145151403, + "grad_norm": 1.4159674665613877, + "learning_rate": 1.8633245101978518e-05, + "loss": 0.8191, + "step": 6308 + }, + { + "epoch": 0.19336152997425524, + "grad_norm": 1.6467527708456027, + "learning_rate": 1.8632744126670907e-05, + "loss": 0.8361, + "step": 6309 + }, + { + "epoch": 0.19339217849699644, + "grad_norm": 0.755489234145436, + "learning_rate": 1.863224306630266e-05, + "loss": 0.6609, + "step": 6310 + }, + { + "epoch": 0.19342282701973765, + "grad_norm": 1.5420819694748682, + "learning_rate": 1.8631741920878715e-05, + "loss": 0.8135, + "step": 6311 + }, + { + "epoch": 0.19345347554247885, + "grad_norm": 1.703425391234535, + "learning_rate": 1.8631240690404007e-05, + "loss": 0.8775, + "step": 6312 + }, + { + "epoch": 0.19348412406522006, + "grad_norm": 1.5378666845320492, + "learning_rate": 1.863073937488348e-05, + "loss": 0.7879, + "step": 6313 + }, + { + "epoch": 0.19351477258796126, + "grad_norm": 1.6366436742639814, + "learning_rate": 1.863023797432206e-05, + "loss": 0.8654, + "step": 6314 + }, + { + "epoch": 0.19354542111070247, + "grad_norm": 1.3713304345254909, + "learning_rate": 1.862973648872471e-05, + "loss": 0.7841, + "step": 6315 + }, + { + "epoch": 0.19357606963344368, + "grad_norm": 1.683811549309995, + "learning_rate": 1.862923491809635e-05, + "loss": 0.727, + "step": 6316 + }, + { + "epoch": 0.19360671815618488, + "grad_norm": 1.4877437769393953, + "learning_rate": 1.862873326244193e-05, + "loss": 0.7368, + "step": 6317 + }, + { + "epoch": 0.1936373666789261, + "grad_norm": 1.6613729887830413, + "learning_rate": 1.8628231521766397e-05, + "loss": 0.7494, + "step": 6318 + }, + { + "epoch": 0.1936680152016673, + "grad_norm": 1.644332779190127, + "learning_rate": 1.8627729696074692e-05, + "loss": 0.9305, + "step": 6319 + }, + { + "epoch": 0.19369866372440847, + "grad_norm": 0.7087004140904507, + "learning_rate": 1.8627227785371755e-05, + "loss": 0.6221, + "step": 6320 + }, + { + "epoch": 0.19372931224714968, + "grad_norm": 0.7191890418898967, + "learning_rate": 1.862672578966254e-05, + "loss": 0.6352, + "step": 6321 + }, + { + "epoch": 0.19375996076989088, + "grad_norm": 1.6509906973816824, + "learning_rate": 1.8626223708951982e-05, + "loss": 0.8493, + "step": 6322 + }, + { + "epoch": 0.1937906092926321, + "grad_norm": 0.6976365382805579, + "learning_rate": 1.8625721543245043e-05, + "loss": 0.609, + "step": 6323 + }, + { + "epoch": 0.1938212578153733, + "grad_norm": 1.4955789398515247, + "learning_rate": 1.8625219292546655e-05, + "loss": 0.8611, + "step": 6324 + }, + { + "epoch": 0.1938519063381145, + "grad_norm": 0.6895203635574773, + "learning_rate": 1.862471695686178e-05, + "loss": 0.6361, + "step": 6325 + }, + { + "epoch": 0.1938825548608557, + "grad_norm": 1.8049659535157423, + "learning_rate": 1.8624214536195358e-05, + "loss": 0.7771, + "step": 6326 + }, + { + "epoch": 0.1939132033835969, + "grad_norm": 1.607536670451343, + "learning_rate": 1.8623712030552345e-05, + "loss": 0.7385, + "step": 6327 + }, + { + "epoch": 0.19394385190633812, + "grad_norm": 1.5096422765248128, + "learning_rate": 1.862320943993769e-05, + "loss": 0.7765, + "step": 6328 + }, + { + "epoch": 0.19397450042907932, + "grad_norm": 1.4913405676565268, + "learning_rate": 1.862270676435635e-05, + "loss": 0.8898, + "step": 6329 + }, + { + "epoch": 0.19400514895182053, + "grad_norm": 1.5199176608486302, + "learning_rate": 1.8622204003813268e-05, + "loss": 0.8451, + "step": 6330 + }, + { + "epoch": 0.19403579747456173, + "grad_norm": 0.7872479936154878, + "learning_rate": 1.8621701158313407e-05, + "loss": 0.6434, + "step": 6331 + }, + { + "epoch": 0.19406644599730294, + "grad_norm": 1.5405208618412356, + "learning_rate": 1.862119822786172e-05, + "loss": 0.8421, + "step": 6332 + }, + { + "epoch": 0.19409709452004414, + "grad_norm": 1.571708287286843, + "learning_rate": 1.862069521246316e-05, + "loss": 0.8233, + "step": 6333 + }, + { + "epoch": 0.19412774304278535, + "grad_norm": 1.4513051781867536, + "learning_rate": 1.8620192112122683e-05, + "loss": 0.7453, + "step": 6334 + }, + { + "epoch": 0.19415839156552653, + "grad_norm": 1.5914815483189344, + "learning_rate": 1.8619688926845248e-05, + "loss": 0.7522, + "step": 6335 + }, + { + "epoch": 0.19418904008826773, + "grad_norm": 1.6867279381878992, + "learning_rate": 1.8619185656635813e-05, + "loss": 0.9167, + "step": 6336 + }, + { + "epoch": 0.19421968861100894, + "grad_norm": 1.431494056330554, + "learning_rate": 1.8618682301499337e-05, + "loss": 0.7836, + "step": 6337 + }, + { + "epoch": 0.19425033713375014, + "grad_norm": 1.5984494515209628, + "learning_rate": 1.861817886144078e-05, + "loss": 0.8607, + "step": 6338 + }, + { + "epoch": 0.19428098565649135, + "grad_norm": 0.7055928086772112, + "learning_rate": 1.8617675336465096e-05, + "loss": 0.6192, + "step": 6339 + }, + { + "epoch": 0.19431163417923256, + "grad_norm": 1.3886272091783023, + "learning_rate": 1.861717172657726e-05, + "loss": 0.7223, + "step": 6340 + }, + { + "epoch": 0.19434228270197376, + "grad_norm": 0.7058898161399594, + "learning_rate": 1.861666803178222e-05, + "loss": 0.6507, + "step": 6341 + }, + { + "epoch": 0.19437293122471497, + "grad_norm": 1.2919114402080845, + "learning_rate": 1.8616164252084948e-05, + "loss": 0.7865, + "step": 6342 + }, + { + "epoch": 0.19440357974745617, + "grad_norm": 1.3865396492085165, + "learning_rate": 1.8615660387490407e-05, + "loss": 0.8043, + "step": 6343 + }, + { + "epoch": 0.19443422827019738, + "grad_norm": 1.530239808227328, + "learning_rate": 1.8615156438003557e-05, + "loss": 0.7508, + "step": 6344 + }, + { + "epoch": 0.19446487679293858, + "grad_norm": 0.746838906565962, + "learning_rate": 1.861465240362937e-05, + "loss": 0.6511, + "step": 6345 + }, + { + "epoch": 0.1944955253156798, + "grad_norm": 1.6999947114371685, + "learning_rate": 1.8614148284372803e-05, + "loss": 0.8165, + "step": 6346 + }, + { + "epoch": 0.194526173838421, + "grad_norm": 0.7179335684559011, + "learning_rate": 1.861364408023883e-05, + "loss": 0.6283, + "step": 6347 + }, + { + "epoch": 0.1945568223611622, + "grad_norm": 1.4934497294659703, + "learning_rate": 1.861313979123242e-05, + "loss": 0.7418, + "step": 6348 + }, + { + "epoch": 0.1945874708839034, + "grad_norm": 1.692378178717857, + "learning_rate": 1.861263541735854e-05, + "loss": 0.7552, + "step": 6349 + }, + { + "epoch": 0.1946181194066446, + "grad_norm": 1.4099498699473176, + "learning_rate": 1.861213095862216e-05, + "loss": 0.7365, + "step": 6350 + }, + { + "epoch": 0.1946487679293858, + "grad_norm": 1.6686527062407337, + "learning_rate": 1.8611626415028246e-05, + "loss": 0.8362, + "step": 6351 + }, + { + "epoch": 0.194679416452127, + "grad_norm": 1.408231077248974, + "learning_rate": 1.8611121786581777e-05, + "loss": 0.7041, + "step": 6352 + }, + { + "epoch": 0.1947100649748682, + "grad_norm": 1.675690156329507, + "learning_rate": 1.861061707328772e-05, + "loss": 0.8399, + "step": 6353 + }, + { + "epoch": 0.1947407134976094, + "grad_norm": 1.628481917694341, + "learning_rate": 1.8610112275151053e-05, + "loss": 0.8361, + "step": 6354 + }, + { + "epoch": 0.1947713620203506, + "grad_norm": 1.6835840060835934, + "learning_rate": 1.8609607392176744e-05, + "loss": 0.7474, + "step": 6355 + }, + { + "epoch": 0.19480201054309182, + "grad_norm": 0.8213133232174815, + "learning_rate": 1.8609102424369775e-05, + "loss": 0.644, + "step": 6356 + }, + { + "epoch": 0.19483265906583302, + "grad_norm": 1.8329196529646057, + "learning_rate": 1.8608597371735112e-05, + "loss": 0.8066, + "step": 6357 + }, + { + "epoch": 0.19486330758857423, + "grad_norm": 1.4053422865248926, + "learning_rate": 1.8608092234277736e-05, + "loss": 0.8207, + "step": 6358 + }, + { + "epoch": 0.19489395611131544, + "grad_norm": 1.4853388048748035, + "learning_rate": 1.860758701200263e-05, + "loss": 0.8103, + "step": 6359 + }, + { + "epoch": 0.19492460463405664, + "grad_norm": 1.5115028145531224, + "learning_rate": 1.860708170491476e-05, + "loss": 0.7305, + "step": 6360 + }, + { + "epoch": 0.19495525315679785, + "grad_norm": 0.7103971043080406, + "learning_rate": 1.8606576313019115e-05, + "loss": 0.6517, + "step": 6361 + }, + { + "epoch": 0.19498590167953905, + "grad_norm": 1.5984329533630566, + "learning_rate": 1.8606070836320673e-05, + "loss": 0.791, + "step": 6362 + }, + { + "epoch": 0.19501655020228026, + "grad_norm": 1.6568415051312146, + "learning_rate": 1.860556527482441e-05, + "loss": 0.8058, + "step": 6363 + }, + { + "epoch": 0.19504719872502146, + "grad_norm": 1.516048098167845, + "learning_rate": 1.8605059628535317e-05, + "loss": 0.9375, + "step": 6364 + }, + { + "epoch": 0.19507784724776267, + "grad_norm": 1.409072617689014, + "learning_rate": 1.8604553897458363e-05, + "loss": 0.8281, + "step": 6365 + }, + { + "epoch": 0.19510849577050385, + "grad_norm": 0.7360439976392597, + "learning_rate": 1.860404808159854e-05, + "loss": 0.6479, + "step": 6366 + }, + { + "epoch": 0.19513914429324505, + "grad_norm": 0.7184863666222107, + "learning_rate": 1.860354218096083e-05, + "loss": 0.6845, + "step": 6367 + }, + { + "epoch": 0.19516979281598626, + "grad_norm": 1.500372695436385, + "learning_rate": 1.8603036195550217e-05, + "loss": 0.7683, + "step": 6368 + }, + { + "epoch": 0.19520044133872747, + "grad_norm": 1.5086139395108735, + "learning_rate": 1.860253012537169e-05, + "loss": 0.8337, + "step": 6369 + }, + { + "epoch": 0.19523108986146867, + "grad_norm": 0.7475999035251971, + "learning_rate": 1.8602023970430227e-05, + "loss": 0.667, + "step": 6370 + }, + { + "epoch": 0.19526173838420988, + "grad_norm": 1.7287821888220167, + "learning_rate": 1.8601517730730825e-05, + "loss": 0.809, + "step": 6371 + }, + { + "epoch": 0.19529238690695108, + "grad_norm": 1.5170921817324454, + "learning_rate": 1.860101140627847e-05, + "loss": 0.7559, + "step": 6372 + }, + { + "epoch": 0.1953230354296923, + "grad_norm": 1.5258662834776797, + "learning_rate": 1.8600504997078146e-05, + "loss": 0.8234, + "step": 6373 + }, + { + "epoch": 0.1953536839524335, + "grad_norm": 1.4931860817447444, + "learning_rate": 1.8599998503134843e-05, + "loss": 0.7463, + "step": 6374 + }, + { + "epoch": 0.1953843324751747, + "grad_norm": 1.620890802809672, + "learning_rate": 1.859949192445356e-05, + "loss": 0.7484, + "step": 6375 + }, + { + "epoch": 0.1954149809979159, + "grad_norm": 1.3597482030078474, + "learning_rate": 1.859898526103928e-05, + "loss": 0.8448, + "step": 6376 + }, + { + "epoch": 0.1954456295206571, + "grad_norm": 1.7946378983532236, + "learning_rate": 1.8598478512896994e-05, + "loss": 0.8305, + "step": 6377 + }, + { + "epoch": 0.19547627804339832, + "grad_norm": 1.5693622879080789, + "learning_rate": 1.8597971680031706e-05, + "loss": 0.7782, + "step": 6378 + }, + { + "epoch": 0.19550692656613952, + "grad_norm": 1.5244432875173235, + "learning_rate": 1.85974647624484e-05, + "loss": 0.7948, + "step": 6379 + }, + { + "epoch": 0.19553757508888073, + "grad_norm": 1.7725948207345426, + "learning_rate": 1.8596957760152074e-05, + "loss": 0.7941, + "step": 6380 + }, + { + "epoch": 0.19556822361162193, + "grad_norm": 0.79338575223078, + "learning_rate": 1.8596450673147726e-05, + "loss": 0.6335, + "step": 6381 + }, + { + "epoch": 0.1955988721343631, + "grad_norm": 1.2996072781629988, + "learning_rate": 1.8595943501440347e-05, + "loss": 0.7554, + "step": 6382 + }, + { + "epoch": 0.19562952065710432, + "grad_norm": 1.5160376811198828, + "learning_rate": 1.859543624503494e-05, + "loss": 0.8642, + "step": 6383 + }, + { + "epoch": 0.19566016917984552, + "grad_norm": 1.9071324867976163, + "learning_rate": 1.8594928903936496e-05, + "loss": 0.9482, + "step": 6384 + }, + { + "epoch": 0.19569081770258673, + "grad_norm": 1.5135549079384223, + "learning_rate": 1.859442147815002e-05, + "loss": 0.8159, + "step": 6385 + }, + { + "epoch": 0.19572146622532793, + "grad_norm": 1.488235235413121, + "learning_rate": 1.8593913967680516e-05, + "loss": 0.7859, + "step": 6386 + }, + { + "epoch": 0.19575211474806914, + "grad_norm": 1.6089475857787152, + "learning_rate": 1.859340637253297e-05, + "loss": 0.6999, + "step": 6387 + }, + { + "epoch": 0.19578276327081034, + "grad_norm": 1.5424967814704953, + "learning_rate": 1.8592898692712398e-05, + "loss": 0.8239, + "step": 6388 + }, + { + "epoch": 0.19581341179355155, + "grad_norm": 1.3884735377998563, + "learning_rate": 1.8592390928223797e-05, + "loss": 0.891, + "step": 6389 + }, + { + "epoch": 0.19584406031629276, + "grad_norm": 1.6927558891171297, + "learning_rate": 1.8591883079072166e-05, + "loss": 0.9126, + "step": 6390 + }, + { + "epoch": 0.19587470883903396, + "grad_norm": 1.4971377909938668, + "learning_rate": 1.8591375145262516e-05, + "loss": 0.8312, + "step": 6391 + }, + { + "epoch": 0.19590535736177517, + "grad_norm": 0.8391881925233342, + "learning_rate": 1.8590867126799844e-05, + "loss": 0.6246, + "step": 6392 + }, + { + "epoch": 0.19593600588451637, + "grad_norm": 1.632369520232284, + "learning_rate": 1.8590359023689166e-05, + "loss": 0.822, + "step": 6393 + }, + { + "epoch": 0.19596665440725758, + "grad_norm": 0.7333556342807448, + "learning_rate": 1.858985083593548e-05, + "loss": 0.6811, + "step": 6394 + }, + { + "epoch": 0.19599730292999878, + "grad_norm": 1.4966022783805166, + "learning_rate": 1.8589342563543793e-05, + "loss": 0.764, + "step": 6395 + }, + { + "epoch": 0.19602795145274, + "grad_norm": 0.693524669921343, + "learning_rate": 1.858883420651912e-05, + "loss": 0.6341, + "step": 6396 + }, + { + "epoch": 0.19605859997548117, + "grad_norm": 1.536474897412656, + "learning_rate": 1.8588325764866467e-05, + "loss": 0.7842, + "step": 6397 + }, + { + "epoch": 0.19608924849822237, + "grad_norm": 0.7259575982500853, + "learning_rate": 1.858781723859084e-05, + "loss": 0.6376, + "step": 6398 + }, + { + "epoch": 0.19611989702096358, + "grad_norm": 0.7197731901856872, + "learning_rate": 1.858730862769725e-05, + "loss": 0.6494, + "step": 6399 + }, + { + "epoch": 0.19615054554370479, + "grad_norm": 0.7090052748902869, + "learning_rate": 1.8586799932190716e-05, + "loss": 0.6216, + "step": 6400 + }, + { + "epoch": 0.196181194066446, + "grad_norm": 1.8138018563899854, + "learning_rate": 1.8586291152076242e-05, + "loss": 0.8839, + "step": 6401 + }, + { + "epoch": 0.1962118425891872, + "grad_norm": 1.6213017136235826, + "learning_rate": 1.8585782287358846e-05, + "loss": 0.7337, + "step": 6402 + }, + { + "epoch": 0.1962424911119284, + "grad_norm": 0.7517626919496349, + "learning_rate": 1.858527333804354e-05, + "loss": 0.6351, + "step": 6403 + }, + { + "epoch": 0.1962731396346696, + "grad_norm": 1.4829379339501956, + "learning_rate": 1.858476430413534e-05, + "loss": 0.8388, + "step": 6404 + }, + { + "epoch": 0.1963037881574108, + "grad_norm": 1.5117834024702619, + "learning_rate": 1.858425518563926e-05, + "loss": 0.8834, + "step": 6405 + }, + { + "epoch": 0.19633443668015202, + "grad_norm": 1.3775550499521905, + "learning_rate": 1.8583745982560315e-05, + "loss": 0.7903, + "step": 6406 + }, + { + "epoch": 0.19636508520289322, + "grad_norm": 1.5845771884706215, + "learning_rate": 1.8583236694903526e-05, + "loss": 0.8486, + "step": 6407 + }, + { + "epoch": 0.19639573372563443, + "grad_norm": 1.4020007830871217, + "learning_rate": 1.8582727322673913e-05, + "loss": 0.7966, + "step": 6408 + }, + { + "epoch": 0.19642638224837564, + "grad_norm": 1.585942373335946, + "learning_rate": 1.858221786587649e-05, + "loss": 0.8934, + "step": 6409 + }, + { + "epoch": 0.19645703077111684, + "grad_norm": 0.7954491286595572, + "learning_rate": 1.8581708324516276e-05, + "loss": 0.6485, + "step": 6410 + }, + { + "epoch": 0.19648767929385805, + "grad_norm": 1.38831829254458, + "learning_rate": 1.8581198698598296e-05, + "loss": 0.8264, + "step": 6411 + }, + { + "epoch": 0.19651832781659925, + "grad_norm": 1.5808162224659814, + "learning_rate": 1.858068898812757e-05, + "loss": 0.6884, + "step": 6412 + }, + { + "epoch": 0.19654897633934043, + "grad_norm": 1.4226231737627861, + "learning_rate": 1.8580179193109117e-05, + "loss": 0.7786, + "step": 6413 + }, + { + "epoch": 0.19657962486208164, + "grad_norm": 1.4943083365639842, + "learning_rate": 1.8579669313547968e-05, + "loss": 0.8302, + "step": 6414 + }, + { + "epoch": 0.19661027338482284, + "grad_norm": 1.600536993898089, + "learning_rate": 1.857915934944914e-05, + "loss": 0.8052, + "step": 6415 + }, + { + "epoch": 0.19664092190756405, + "grad_norm": 1.3897523163739045, + "learning_rate": 1.857864930081766e-05, + "loss": 0.7087, + "step": 6416 + }, + { + "epoch": 0.19667157043030525, + "grad_norm": 1.5573469553598687, + "learning_rate": 1.857813916765855e-05, + "loss": 0.7899, + "step": 6417 + }, + { + "epoch": 0.19670221895304646, + "grad_norm": 1.7212877863538343, + "learning_rate": 1.8577628949976842e-05, + "loss": 0.879, + "step": 6418 + }, + { + "epoch": 0.19673286747578766, + "grad_norm": 1.7577572285107026, + "learning_rate": 1.8577118647777562e-05, + "loss": 0.8887, + "step": 6419 + }, + { + "epoch": 0.19676351599852887, + "grad_norm": 1.531866619784211, + "learning_rate": 1.857660826106574e-05, + "loss": 0.8373, + "step": 6420 + }, + { + "epoch": 0.19679416452127008, + "grad_norm": 1.7204422518775144, + "learning_rate": 1.85760977898464e-05, + "loss": 0.7884, + "step": 6421 + }, + { + "epoch": 0.19682481304401128, + "grad_norm": 1.6136200116020438, + "learning_rate": 1.8575587234124572e-05, + "loss": 0.7824, + "step": 6422 + }, + { + "epoch": 0.1968554615667525, + "grad_norm": 1.5985442764006383, + "learning_rate": 1.857507659390529e-05, + "loss": 0.8065, + "step": 6423 + }, + { + "epoch": 0.1968861100894937, + "grad_norm": 1.6860527624767034, + "learning_rate": 1.8574565869193587e-05, + "loss": 0.741, + "step": 6424 + }, + { + "epoch": 0.1969167586122349, + "grad_norm": 1.5784317541966113, + "learning_rate": 1.8574055059994492e-05, + "loss": 0.7542, + "step": 6425 + }, + { + "epoch": 0.1969474071349761, + "grad_norm": 1.652318679747324, + "learning_rate": 1.8573544166313037e-05, + "loss": 0.8778, + "step": 6426 + }, + { + "epoch": 0.1969780556577173, + "grad_norm": 0.722863865907621, + "learning_rate": 1.8573033188154258e-05, + "loss": 0.6247, + "step": 6427 + }, + { + "epoch": 0.1970087041804585, + "grad_norm": 1.5788929899060422, + "learning_rate": 1.857252212552319e-05, + "loss": 0.829, + "step": 6428 + }, + { + "epoch": 0.1970393527031997, + "grad_norm": 1.502960524082615, + "learning_rate": 1.8572010978424866e-05, + "loss": 0.7134, + "step": 6429 + }, + { + "epoch": 0.1970700012259409, + "grad_norm": 0.7833024148636915, + "learning_rate": 1.857149974686433e-05, + "loss": 0.6306, + "step": 6430 + }, + { + "epoch": 0.1971006497486821, + "grad_norm": 0.6967445603650818, + "learning_rate": 1.8570988430846608e-05, + "loss": 0.6164, + "step": 6431 + }, + { + "epoch": 0.1971312982714233, + "grad_norm": 1.6627425103217741, + "learning_rate": 1.8570477030376744e-05, + "loss": 0.8647, + "step": 6432 + }, + { + "epoch": 0.19716194679416452, + "grad_norm": 1.691179829723219, + "learning_rate": 1.8569965545459783e-05, + "loss": 0.8083, + "step": 6433 + }, + { + "epoch": 0.19719259531690572, + "grad_norm": 1.7267099406633617, + "learning_rate": 1.8569453976100752e-05, + "loss": 0.7973, + "step": 6434 + }, + { + "epoch": 0.19722324383964693, + "grad_norm": 1.4919967291975487, + "learning_rate": 1.8568942322304703e-05, + "loss": 0.8049, + "step": 6435 + }, + { + "epoch": 0.19725389236238813, + "grad_norm": 1.6993250292606485, + "learning_rate": 1.856843058407667e-05, + "loss": 0.8821, + "step": 6436 + }, + { + "epoch": 0.19728454088512934, + "grad_norm": 1.49586799729343, + "learning_rate": 1.85679187614217e-05, + "loss": 0.7765, + "step": 6437 + }, + { + "epoch": 0.19731518940787054, + "grad_norm": 1.4291294975528592, + "learning_rate": 1.8567406854344835e-05, + "loss": 0.8267, + "step": 6438 + }, + { + "epoch": 0.19734583793061175, + "grad_norm": 1.4924333029355894, + "learning_rate": 1.856689486285112e-05, + "loss": 0.8248, + "step": 6439 + }, + { + "epoch": 0.19737648645335296, + "grad_norm": 1.584833110485376, + "learning_rate": 1.8566382786945592e-05, + "loss": 0.7735, + "step": 6440 + }, + { + "epoch": 0.19740713497609416, + "grad_norm": 1.722588856689879, + "learning_rate": 1.8565870626633303e-05, + "loss": 0.8594, + "step": 6441 + }, + { + "epoch": 0.19743778349883537, + "grad_norm": 1.3095602298572553, + "learning_rate": 1.8565358381919304e-05, + "loss": 0.671, + "step": 6442 + }, + { + "epoch": 0.19746843202157657, + "grad_norm": 1.4047876608659158, + "learning_rate": 1.8564846052808633e-05, + "loss": 0.8133, + "step": 6443 + }, + { + "epoch": 0.19749908054431775, + "grad_norm": 1.5695130202153822, + "learning_rate": 1.8564333639306345e-05, + "loss": 0.8346, + "step": 6444 + }, + { + "epoch": 0.19752972906705896, + "grad_norm": 1.6811858598551093, + "learning_rate": 1.8563821141417488e-05, + "loss": 0.9104, + "step": 6445 + }, + { + "epoch": 0.19756037758980016, + "grad_norm": 1.7455468923766144, + "learning_rate": 1.8563308559147107e-05, + "loss": 0.8316, + "step": 6446 + }, + { + "epoch": 0.19759102611254137, + "grad_norm": 1.4328844314105245, + "learning_rate": 1.8562795892500257e-05, + "loss": 0.7035, + "step": 6447 + }, + { + "epoch": 0.19762167463528257, + "grad_norm": 1.4277611522660383, + "learning_rate": 1.8562283141481984e-05, + "loss": 0.7398, + "step": 6448 + }, + { + "epoch": 0.19765232315802378, + "grad_norm": 1.4191469073670635, + "learning_rate": 1.856177030609735e-05, + "loss": 0.7556, + "step": 6449 + }, + { + "epoch": 0.19768297168076499, + "grad_norm": 1.4992720208030315, + "learning_rate": 1.85612573863514e-05, + "loss": 0.6995, + "step": 6450 + }, + { + "epoch": 0.1977136202035062, + "grad_norm": 0.809392444433868, + "learning_rate": 1.856074438224919e-05, + "loss": 0.6228, + "step": 6451 + }, + { + "epoch": 0.1977442687262474, + "grad_norm": 1.3961648939982425, + "learning_rate": 1.8560231293795777e-05, + "loss": 0.7537, + "step": 6452 + }, + { + "epoch": 0.1977749172489886, + "grad_norm": 1.7086804809634442, + "learning_rate": 1.8559718120996214e-05, + "loss": 0.8222, + "step": 6453 + }, + { + "epoch": 0.1978055657717298, + "grad_norm": 1.6289838555856049, + "learning_rate": 1.855920486385556e-05, + "loss": 0.8938, + "step": 6454 + }, + { + "epoch": 0.197836214294471, + "grad_norm": 0.7121140514209631, + "learning_rate": 1.855869152237887e-05, + "loss": 0.6453, + "step": 6455 + }, + { + "epoch": 0.19786686281721222, + "grad_norm": 1.7148708870440763, + "learning_rate": 1.85581780965712e-05, + "loss": 0.7632, + "step": 6456 + }, + { + "epoch": 0.19789751133995342, + "grad_norm": 1.922623004227976, + "learning_rate": 1.8557664586437615e-05, + "loss": 0.7591, + "step": 6457 + }, + { + "epoch": 0.19792815986269463, + "grad_norm": 1.4234834212493133, + "learning_rate": 1.8557150991983167e-05, + "loss": 0.6991, + "step": 6458 + }, + { + "epoch": 0.1979588083854358, + "grad_norm": 1.6155859714834908, + "learning_rate": 1.8556637313212925e-05, + "loss": 0.7431, + "step": 6459 + }, + { + "epoch": 0.19798945690817701, + "grad_norm": 0.7392034898162532, + "learning_rate": 1.8556123550131944e-05, + "loss": 0.6408, + "step": 6460 + }, + { + "epoch": 0.19802010543091822, + "grad_norm": 1.6444902130238896, + "learning_rate": 1.8555609702745286e-05, + "loss": 0.8807, + "step": 6461 + }, + { + "epoch": 0.19805075395365943, + "grad_norm": 1.607285480756324, + "learning_rate": 1.855509577105802e-05, + "loss": 0.7012, + "step": 6462 + }, + { + "epoch": 0.19808140247640063, + "grad_norm": 1.4321081461039438, + "learning_rate": 1.8554581755075207e-05, + "loss": 0.7185, + "step": 6463 + }, + { + "epoch": 0.19811205099914184, + "grad_norm": 1.425059550157436, + "learning_rate": 1.8554067654801912e-05, + "loss": 0.8601, + "step": 6464 + }, + { + "epoch": 0.19814269952188304, + "grad_norm": 1.313819518020178, + "learning_rate": 1.8553553470243195e-05, + "loss": 0.7502, + "step": 6465 + }, + { + "epoch": 0.19817334804462425, + "grad_norm": 0.7181830050534402, + "learning_rate": 1.855303920140413e-05, + "loss": 0.6584, + "step": 6466 + }, + { + "epoch": 0.19820399656736545, + "grad_norm": 1.536919281454209, + "learning_rate": 1.8552524848289783e-05, + "loss": 0.752, + "step": 6467 + }, + { + "epoch": 0.19823464509010666, + "grad_norm": 1.5398135940732245, + "learning_rate": 1.855201041090522e-05, + "loss": 0.8791, + "step": 6468 + }, + { + "epoch": 0.19826529361284786, + "grad_norm": 1.5987106628113656, + "learning_rate": 1.8551495889255507e-05, + "loss": 0.798, + "step": 6469 + }, + { + "epoch": 0.19829594213558907, + "grad_norm": 1.4370914190066355, + "learning_rate": 1.8550981283345718e-05, + "loss": 0.755, + "step": 6470 + }, + { + "epoch": 0.19832659065833028, + "grad_norm": 1.5372417272458851, + "learning_rate": 1.8550466593180925e-05, + "loss": 0.7338, + "step": 6471 + }, + { + "epoch": 0.19835723918107148, + "grad_norm": 1.532571053852574, + "learning_rate": 1.8549951818766194e-05, + "loss": 0.7581, + "step": 6472 + }, + { + "epoch": 0.1983878877038127, + "grad_norm": 1.4035934536688663, + "learning_rate": 1.8549436960106605e-05, + "loss": 0.7386, + "step": 6473 + }, + { + "epoch": 0.1984185362265539, + "grad_norm": 1.4867979653098542, + "learning_rate": 1.854892201720722e-05, + "loss": 0.7897, + "step": 6474 + }, + { + "epoch": 0.19844918474929507, + "grad_norm": 1.3390868251096806, + "learning_rate": 1.8548406990073126e-05, + "loss": 0.7278, + "step": 6475 + }, + { + "epoch": 0.19847983327203628, + "grad_norm": 1.4809068748013707, + "learning_rate": 1.8547891878709382e-05, + "loss": 0.7534, + "step": 6476 + }, + { + "epoch": 0.19851048179477748, + "grad_norm": 1.6678081957567894, + "learning_rate": 1.854737668312108e-05, + "loss": 0.8427, + "step": 6477 + }, + { + "epoch": 0.1985411303175187, + "grad_norm": 1.4875835177740038, + "learning_rate": 1.8546861403313285e-05, + "loss": 0.7895, + "step": 6478 + }, + { + "epoch": 0.1985717788402599, + "grad_norm": 1.7011427378599508, + "learning_rate": 1.8546346039291078e-05, + "loss": 0.8344, + "step": 6479 + }, + { + "epoch": 0.1986024273630011, + "grad_norm": 1.6256948724714764, + "learning_rate": 1.8545830591059536e-05, + "loss": 0.7872, + "step": 6480 + }, + { + "epoch": 0.1986330758857423, + "grad_norm": 1.4675890700394878, + "learning_rate": 1.854531505862374e-05, + "loss": 0.7651, + "step": 6481 + }, + { + "epoch": 0.1986637244084835, + "grad_norm": 0.7628100112099365, + "learning_rate": 1.8544799441988768e-05, + "loss": 0.6286, + "step": 6482 + }, + { + "epoch": 0.19869437293122472, + "grad_norm": 1.4457592852230874, + "learning_rate": 1.8544283741159702e-05, + "loss": 0.795, + "step": 6483 + }, + { + "epoch": 0.19872502145396592, + "grad_norm": 0.700356559334669, + "learning_rate": 1.854376795614162e-05, + "loss": 0.6498, + "step": 6484 + }, + { + "epoch": 0.19875566997670713, + "grad_norm": 1.4386682877759993, + "learning_rate": 1.854325208693961e-05, + "loss": 0.8073, + "step": 6485 + }, + { + "epoch": 0.19878631849944833, + "grad_norm": 1.5441723657427584, + "learning_rate": 1.8542736133558745e-05, + "loss": 0.8066, + "step": 6486 + }, + { + "epoch": 0.19881696702218954, + "grad_norm": 1.4571746146218534, + "learning_rate": 1.854222009600412e-05, + "loss": 0.777, + "step": 6487 + }, + { + "epoch": 0.19884761554493074, + "grad_norm": 1.6462498843005373, + "learning_rate": 1.854170397428081e-05, + "loss": 0.8169, + "step": 6488 + }, + { + "epoch": 0.19887826406767195, + "grad_norm": 0.8200197440981973, + "learning_rate": 1.8541187768393913e-05, + "loss": 0.6588, + "step": 6489 + }, + { + "epoch": 0.19890891259041313, + "grad_norm": 1.5046044997462313, + "learning_rate": 1.8540671478348502e-05, + "loss": 0.7487, + "step": 6490 + }, + { + "epoch": 0.19893956111315433, + "grad_norm": 1.3790095201609611, + "learning_rate": 1.854015510414967e-05, + "loss": 0.7222, + "step": 6491 + }, + { + "epoch": 0.19897020963589554, + "grad_norm": 1.7477062322504022, + "learning_rate": 1.853963864580251e-05, + "loss": 0.8229, + "step": 6492 + }, + { + "epoch": 0.19900085815863675, + "grad_norm": 1.4759735957565812, + "learning_rate": 1.8539122103312097e-05, + "loss": 0.7836, + "step": 6493 + }, + { + "epoch": 0.19903150668137795, + "grad_norm": 1.5697051244057134, + "learning_rate": 1.853860547668353e-05, + "loss": 0.7907, + "step": 6494 + }, + { + "epoch": 0.19906215520411916, + "grad_norm": 1.5060047625865334, + "learning_rate": 1.8538088765921904e-05, + "loss": 0.766, + "step": 6495 + }, + { + "epoch": 0.19909280372686036, + "grad_norm": 1.5500849176727989, + "learning_rate": 1.8537571971032304e-05, + "loss": 0.7473, + "step": 6496 + }, + { + "epoch": 0.19912345224960157, + "grad_norm": 1.6437091447057564, + "learning_rate": 1.8537055092019822e-05, + "loss": 0.8699, + "step": 6497 + }, + { + "epoch": 0.19915410077234277, + "grad_norm": 1.787730421993754, + "learning_rate": 1.853653812888955e-05, + "loss": 0.8946, + "step": 6498 + }, + { + "epoch": 0.19918474929508398, + "grad_norm": 1.6142505890486119, + "learning_rate": 1.8536021081646587e-05, + "loss": 0.7723, + "step": 6499 + }, + { + "epoch": 0.19921539781782518, + "grad_norm": 1.6744816250980088, + "learning_rate": 1.8535503950296022e-05, + "loss": 0.8332, + "step": 6500 + }, + { + "epoch": 0.1992460463405664, + "grad_norm": 1.4884007954688752, + "learning_rate": 1.8534986734842952e-05, + "loss": 0.8213, + "step": 6501 + }, + { + "epoch": 0.1992766948633076, + "grad_norm": 1.7721950859626303, + "learning_rate": 1.8534469435292473e-05, + "loss": 0.819, + "step": 6502 + }, + { + "epoch": 0.1993073433860488, + "grad_norm": 1.6683761818108385, + "learning_rate": 1.8533952051649685e-05, + "loss": 0.7335, + "step": 6503 + }, + { + "epoch": 0.19933799190879, + "grad_norm": 1.6511238850184835, + "learning_rate": 1.8533434583919686e-05, + "loss": 0.8578, + "step": 6504 + }, + { + "epoch": 0.1993686404315312, + "grad_norm": 1.632136581686399, + "learning_rate": 1.853291703210757e-05, + "loss": 0.829, + "step": 6505 + }, + { + "epoch": 0.1993992889542724, + "grad_norm": 1.7790707704277324, + "learning_rate": 1.8532399396218438e-05, + "loss": 0.8515, + "step": 6506 + }, + { + "epoch": 0.1994299374770136, + "grad_norm": 1.5310213048553454, + "learning_rate": 1.8531881676257396e-05, + "loss": 0.8105, + "step": 6507 + }, + { + "epoch": 0.1994605859997548, + "grad_norm": 1.509138442966394, + "learning_rate": 1.8531363872229537e-05, + "loss": 0.7996, + "step": 6508 + }, + { + "epoch": 0.199491234522496, + "grad_norm": 1.5537904309725121, + "learning_rate": 1.853084598413997e-05, + "loss": 0.8096, + "step": 6509 + }, + { + "epoch": 0.1995218830452372, + "grad_norm": 1.6102100153990861, + "learning_rate": 1.853032801199379e-05, + "loss": 0.747, + "step": 6510 + }, + { + "epoch": 0.19955253156797842, + "grad_norm": 1.4470574273135002, + "learning_rate": 1.852980995579611e-05, + "loss": 0.7235, + "step": 6511 + }, + { + "epoch": 0.19958318009071963, + "grad_norm": 1.4087591841878504, + "learning_rate": 1.8529291815552027e-05, + "loss": 0.8598, + "step": 6512 + }, + { + "epoch": 0.19961382861346083, + "grad_norm": 0.9103345721557937, + "learning_rate": 1.8528773591266654e-05, + "loss": 0.6597, + "step": 6513 + }, + { + "epoch": 0.19964447713620204, + "grad_norm": 1.5141356975143285, + "learning_rate": 1.852825528294509e-05, + "loss": 0.8099, + "step": 6514 + }, + { + "epoch": 0.19967512565894324, + "grad_norm": 0.7292993394298954, + "learning_rate": 1.8527736890592444e-05, + "loss": 0.6537, + "step": 6515 + }, + { + "epoch": 0.19970577418168445, + "grad_norm": 1.7257113692866815, + "learning_rate": 1.8527218414213823e-05, + "loss": 0.8344, + "step": 6516 + }, + { + "epoch": 0.19973642270442565, + "grad_norm": 1.4855416782472406, + "learning_rate": 1.852669985381434e-05, + "loss": 0.7211, + "step": 6517 + }, + { + "epoch": 0.19976707122716686, + "grad_norm": 1.513879305022202, + "learning_rate": 1.8526181209399098e-05, + "loss": 0.7855, + "step": 6518 + }, + { + "epoch": 0.19979771974990806, + "grad_norm": 1.5835449187928063, + "learning_rate": 1.8525662480973216e-05, + "loss": 0.7059, + "step": 6519 + }, + { + "epoch": 0.19982836827264927, + "grad_norm": 1.7409781824441133, + "learning_rate": 1.8525143668541798e-05, + "loss": 0.7422, + "step": 6520 + }, + { + "epoch": 0.19985901679539045, + "grad_norm": 0.8322518800465445, + "learning_rate": 1.8524624772109957e-05, + "loss": 0.661, + "step": 6521 + }, + { + "epoch": 0.19988966531813165, + "grad_norm": 1.460547543893841, + "learning_rate": 1.8524105791682808e-05, + "loss": 0.8409, + "step": 6522 + }, + { + "epoch": 0.19992031384087286, + "grad_norm": 0.7381400868148464, + "learning_rate": 1.8523586727265465e-05, + "loss": 0.6414, + "step": 6523 + }, + { + "epoch": 0.19995096236361407, + "grad_norm": 1.986903931965722, + "learning_rate": 1.852306757886304e-05, + "loss": 0.754, + "step": 6524 + }, + { + "epoch": 0.19998161088635527, + "grad_norm": 2.21496034450257, + "learning_rate": 1.852254834648065e-05, + "loss": 0.8376, + "step": 6525 + }, + { + "epoch": 0.20001225940909648, + "grad_norm": 1.944812528586957, + "learning_rate": 1.8522029030123408e-05, + "loss": 0.8059, + "step": 6526 + }, + { + "epoch": 0.20004290793183768, + "grad_norm": 1.6243574760086743, + "learning_rate": 1.8521509629796433e-05, + "loss": 0.7689, + "step": 6527 + }, + { + "epoch": 0.2000735564545789, + "grad_norm": 1.7341430949539482, + "learning_rate": 1.8520990145504848e-05, + "loss": 0.8801, + "step": 6528 + }, + { + "epoch": 0.2001042049773201, + "grad_norm": 1.5526115748343088, + "learning_rate": 1.8520470577253765e-05, + "loss": 0.6917, + "step": 6529 + }, + { + "epoch": 0.2001348535000613, + "grad_norm": 1.8251492351099592, + "learning_rate": 1.8519950925048302e-05, + "loss": 0.8406, + "step": 6530 + }, + { + "epoch": 0.2001655020228025, + "grad_norm": 2.154236584065206, + "learning_rate": 1.8519431188893588e-05, + "loss": 0.9013, + "step": 6531 + }, + { + "epoch": 0.2001961505455437, + "grad_norm": 1.5860568546468194, + "learning_rate": 1.8518911368794733e-05, + "loss": 0.7698, + "step": 6532 + }, + { + "epoch": 0.20022679906828492, + "grad_norm": 1.8077314733249215, + "learning_rate": 1.8518391464756872e-05, + "loss": 0.8481, + "step": 6533 + }, + { + "epoch": 0.20025744759102612, + "grad_norm": 0.9251708506483687, + "learning_rate": 1.8517871476785114e-05, + "loss": 0.6373, + "step": 6534 + }, + { + "epoch": 0.20028809611376733, + "grad_norm": 1.5371538438282093, + "learning_rate": 1.851735140488459e-05, + "loss": 0.7587, + "step": 6535 + }, + { + "epoch": 0.20031874463650853, + "grad_norm": 1.5941798563734912, + "learning_rate": 1.8516831249060426e-05, + "loss": 0.8416, + "step": 6536 + }, + { + "epoch": 0.2003493931592497, + "grad_norm": 1.4673341811537746, + "learning_rate": 1.8516311009317743e-05, + "loss": 0.8488, + "step": 6537 + }, + { + "epoch": 0.20038004168199092, + "grad_norm": 1.5528289369210342, + "learning_rate": 1.8515790685661667e-05, + "loss": 0.7304, + "step": 6538 + }, + { + "epoch": 0.20041069020473212, + "grad_norm": 1.5639331418836036, + "learning_rate": 1.851527027809733e-05, + "loss": 0.7279, + "step": 6539 + }, + { + "epoch": 0.20044133872747333, + "grad_norm": 1.3443909922067812, + "learning_rate": 1.8514749786629857e-05, + "loss": 0.8339, + "step": 6540 + }, + { + "epoch": 0.20047198725021453, + "grad_norm": 1.4530899656740415, + "learning_rate": 1.8514229211264368e-05, + "loss": 0.8796, + "step": 6541 + }, + { + "epoch": 0.20050263577295574, + "grad_norm": 1.7390423845099126, + "learning_rate": 1.851370855200601e-05, + "loss": 0.8369, + "step": 6542 + }, + { + "epoch": 0.20053328429569695, + "grad_norm": 1.3182212505678834, + "learning_rate": 1.8513187808859895e-05, + "loss": 0.8047, + "step": 6543 + }, + { + "epoch": 0.20056393281843815, + "grad_norm": 1.485149989357819, + "learning_rate": 1.8512666981831167e-05, + "loss": 0.9132, + "step": 6544 + }, + { + "epoch": 0.20059458134117936, + "grad_norm": 1.5687082398142784, + "learning_rate": 1.8512146070924953e-05, + "loss": 0.7849, + "step": 6545 + }, + { + "epoch": 0.20062522986392056, + "grad_norm": 1.6141949688913282, + "learning_rate": 1.8511625076146384e-05, + "loss": 0.875, + "step": 6546 + }, + { + "epoch": 0.20065587838666177, + "grad_norm": 1.5428094524265534, + "learning_rate": 1.8511103997500596e-05, + "loss": 0.8327, + "step": 6547 + }, + { + "epoch": 0.20068652690940297, + "grad_norm": 1.573799278934854, + "learning_rate": 1.8510582834992722e-05, + "loss": 0.7555, + "step": 6548 + }, + { + "epoch": 0.20071717543214418, + "grad_norm": 1.7236162107739506, + "learning_rate": 1.8510061588627902e-05, + "loss": 0.8213, + "step": 6549 + }, + { + "epoch": 0.20074782395488538, + "grad_norm": 1.501695597019011, + "learning_rate": 1.8509540258411262e-05, + "loss": 0.9005, + "step": 6550 + }, + { + "epoch": 0.2007784724776266, + "grad_norm": 0.9405050730392069, + "learning_rate": 1.850901884434795e-05, + "loss": 0.6754, + "step": 6551 + }, + { + "epoch": 0.20080912100036777, + "grad_norm": 1.5620802545058223, + "learning_rate": 1.850849734644309e-05, + "loss": 0.8477, + "step": 6552 + }, + { + "epoch": 0.20083976952310897, + "grad_norm": 1.580699014005134, + "learning_rate": 1.8507975764701837e-05, + "loss": 0.8111, + "step": 6553 + }, + { + "epoch": 0.20087041804585018, + "grad_norm": 1.7748706789275681, + "learning_rate": 1.850745409912932e-05, + "loss": 0.8161, + "step": 6554 + }, + { + "epoch": 0.20090106656859139, + "grad_norm": 1.76832637742052, + "learning_rate": 1.850693234973068e-05, + "loss": 0.8739, + "step": 6555 + }, + { + "epoch": 0.2009317150913326, + "grad_norm": 0.7742817668624561, + "learning_rate": 1.850641051651106e-05, + "loss": 0.6562, + "step": 6556 + }, + { + "epoch": 0.2009623636140738, + "grad_norm": 1.6117570527109564, + "learning_rate": 1.8505888599475597e-05, + "loss": 0.7769, + "step": 6557 + }, + { + "epoch": 0.200993012136815, + "grad_norm": 1.6610607934172572, + "learning_rate": 1.850536659862944e-05, + "loss": 0.8276, + "step": 6558 + }, + { + "epoch": 0.2010236606595562, + "grad_norm": 1.4508513626358175, + "learning_rate": 1.850484451397773e-05, + "loss": 0.7526, + "step": 6559 + }, + { + "epoch": 0.2010543091822974, + "grad_norm": 1.549090788570718, + "learning_rate": 1.8504322345525612e-05, + "loss": 0.7621, + "step": 6560 + }, + { + "epoch": 0.20108495770503862, + "grad_norm": 1.4508022387551822, + "learning_rate": 1.8503800093278227e-05, + "loss": 0.7434, + "step": 6561 + }, + { + "epoch": 0.20111560622777983, + "grad_norm": 1.455347523091075, + "learning_rate": 1.8503277757240726e-05, + "loss": 0.8748, + "step": 6562 + }, + { + "epoch": 0.20114625475052103, + "grad_norm": 1.5617793105076991, + "learning_rate": 1.8502755337418253e-05, + "loss": 0.8276, + "step": 6563 + }, + { + "epoch": 0.20117690327326224, + "grad_norm": 1.4990745145279258, + "learning_rate": 1.8502232833815955e-05, + "loss": 0.8171, + "step": 6564 + }, + { + "epoch": 0.20120755179600344, + "grad_norm": 1.5265686440920556, + "learning_rate": 1.850171024643898e-05, + "loss": 0.78, + "step": 6565 + }, + { + "epoch": 0.20123820031874465, + "grad_norm": 1.469879709607594, + "learning_rate": 1.8501187575292485e-05, + "loss": 0.7351, + "step": 6566 + }, + { + "epoch": 0.20126884884148585, + "grad_norm": 1.6478103312361785, + "learning_rate": 1.850066482038161e-05, + "loss": 0.7334, + "step": 6567 + }, + { + "epoch": 0.20129949736422703, + "grad_norm": 1.5922437418794346, + "learning_rate": 1.850014198171151e-05, + "loss": 0.7607, + "step": 6568 + }, + { + "epoch": 0.20133014588696824, + "grad_norm": 1.5207115950969234, + "learning_rate": 1.8499619059287336e-05, + "loss": 0.8241, + "step": 6569 + }, + { + "epoch": 0.20136079440970944, + "grad_norm": 1.4017804487351386, + "learning_rate": 1.849909605311424e-05, + "loss": 0.8227, + "step": 6570 + }, + { + "epoch": 0.20139144293245065, + "grad_norm": 1.4149271133167747, + "learning_rate": 1.8498572963197373e-05, + "loss": 0.7565, + "step": 6571 + }, + { + "epoch": 0.20142209145519185, + "grad_norm": 1.4271777905707599, + "learning_rate": 1.84980497895419e-05, + "loss": 0.8156, + "step": 6572 + }, + { + "epoch": 0.20145273997793306, + "grad_norm": 1.6027736384088271, + "learning_rate": 1.8497526532152964e-05, + "loss": 0.8392, + "step": 6573 + }, + { + "epoch": 0.20148338850067427, + "grad_norm": 1.465591029872732, + "learning_rate": 1.8497003191035722e-05, + "loss": 0.7321, + "step": 6574 + }, + { + "epoch": 0.20151403702341547, + "grad_norm": 1.4308094847925081, + "learning_rate": 1.8496479766195335e-05, + "loss": 0.7361, + "step": 6575 + }, + { + "epoch": 0.20154468554615668, + "grad_norm": 1.4457879117700367, + "learning_rate": 1.8495956257636963e-05, + "loss": 0.7563, + "step": 6576 + }, + { + "epoch": 0.20157533406889788, + "grad_norm": 1.5366155015326755, + "learning_rate": 1.849543266536576e-05, + "loss": 0.7765, + "step": 6577 + }, + { + "epoch": 0.2016059825916391, + "grad_norm": 1.5662081778695889, + "learning_rate": 1.849490898938688e-05, + "loss": 0.816, + "step": 6578 + }, + { + "epoch": 0.2016366311143803, + "grad_norm": 1.5159093540051547, + "learning_rate": 1.849438522970549e-05, + "loss": 0.7595, + "step": 6579 + }, + { + "epoch": 0.2016672796371215, + "grad_norm": 1.4830394073254778, + "learning_rate": 1.849386138632675e-05, + "loss": 0.8148, + "step": 6580 + }, + { + "epoch": 0.2016979281598627, + "grad_norm": 0.8031044725007467, + "learning_rate": 1.8493337459255822e-05, + "loss": 0.6445, + "step": 6581 + }, + { + "epoch": 0.2017285766826039, + "grad_norm": 0.8077757588031599, + "learning_rate": 1.8492813448497863e-05, + "loss": 0.655, + "step": 6582 + }, + { + "epoch": 0.2017592252053451, + "grad_norm": 1.48603449388236, + "learning_rate": 1.8492289354058043e-05, + "loss": 0.7548, + "step": 6583 + }, + { + "epoch": 0.2017898737280863, + "grad_norm": 1.4882633473393305, + "learning_rate": 1.8491765175941522e-05, + "loss": 0.8632, + "step": 6584 + }, + { + "epoch": 0.2018205222508275, + "grad_norm": 1.5808546844231195, + "learning_rate": 1.8491240914153464e-05, + "loss": 0.8051, + "step": 6585 + }, + { + "epoch": 0.2018511707735687, + "grad_norm": 1.5689705515662273, + "learning_rate": 1.849071656869904e-05, + "loss": 0.862, + "step": 6586 + }, + { + "epoch": 0.2018818192963099, + "grad_norm": 0.8400989958053431, + "learning_rate": 1.8490192139583413e-05, + "loss": 0.6532, + "step": 6587 + }, + { + "epoch": 0.20191246781905112, + "grad_norm": 1.4681227754250092, + "learning_rate": 1.848966762681175e-05, + "loss": 0.7512, + "step": 6588 + }, + { + "epoch": 0.20194311634179232, + "grad_norm": 1.681362972472169, + "learning_rate": 1.8489143030389218e-05, + "loss": 0.7887, + "step": 6589 + }, + { + "epoch": 0.20197376486453353, + "grad_norm": 1.7173995067651437, + "learning_rate": 1.848861835032099e-05, + "loss": 0.8557, + "step": 6590 + }, + { + "epoch": 0.20200441338727473, + "grad_norm": 1.5238190227343946, + "learning_rate": 1.848809358661223e-05, + "loss": 0.843, + "step": 6591 + }, + { + "epoch": 0.20203506191001594, + "grad_norm": 0.7590505749475436, + "learning_rate": 1.8487568739268118e-05, + "loss": 0.6029, + "step": 6592 + }, + { + "epoch": 0.20206571043275715, + "grad_norm": 1.5496185827169737, + "learning_rate": 1.8487043808293816e-05, + "loss": 0.8883, + "step": 6593 + }, + { + "epoch": 0.20209635895549835, + "grad_norm": 1.4415449469544965, + "learning_rate": 1.8486518793694502e-05, + "loss": 0.8663, + "step": 6594 + }, + { + "epoch": 0.20212700747823956, + "grad_norm": 1.6001072918483887, + "learning_rate": 1.8485993695475344e-05, + "loss": 0.7935, + "step": 6595 + }, + { + "epoch": 0.20215765600098076, + "grad_norm": 1.607358899324076, + "learning_rate": 1.848546851364152e-05, + "loss": 0.8257, + "step": 6596 + }, + { + "epoch": 0.20218830452372197, + "grad_norm": 1.5185728552254538, + "learning_rate": 1.8484943248198205e-05, + "loss": 0.8316, + "step": 6597 + }, + { + "epoch": 0.20221895304646317, + "grad_norm": 1.5341628004953207, + "learning_rate": 1.848441789915057e-05, + "loss": 0.7386, + "step": 6598 + }, + { + "epoch": 0.20224960156920435, + "grad_norm": 1.3186402894792804, + "learning_rate": 1.8483892466503798e-05, + "loss": 0.6948, + "step": 6599 + }, + { + "epoch": 0.20228025009194556, + "grad_norm": 1.4060179371488344, + "learning_rate": 1.8483366950263062e-05, + "loss": 0.7608, + "step": 6600 + }, + { + "epoch": 0.20231089861468676, + "grad_norm": 1.7386181861781598, + "learning_rate": 1.848284135043354e-05, + "loss": 0.8317, + "step": 6601 + }, + { + "epoch": 0.20234154713742797, + "grad_norm": 1.4092609000026668, + "learning_rate": 1.8482315667020413e-05, + "loss": 0.8475, + "step": 6602 + }, + { + "epoch": 0.20237219566016917, + "grad_norm": 1.4461304740699543, + "learning_rate": 1.8481789900028858e-05, + "loss": 0.7328, + "step": 6603 + }, + { + "epoch": 0.20240284418291038, + "grad_norm": 1.4752309010370095, + "learning_rate": 1.8481264049464055e-05, + "loss": 0.8633, + "step": 6604 + }, + { + "epoch": 0.20243349270565159, + "grad_norm": 1.6107836335553012, + "learning_rate": 1.848073811533119e-05, + "loss": 0.8025, + "step": 6605 + }, + { + "epoch": 0.2024641412283928, + "grad_norm": 1.7883304463798582, + "learning_rate": 1.848021209763544e-05, + "loss": 0.7426, + "step": 6606 + }, + { + "epoch": 0.202494789751134, + "grad_norm": 1.7065801728662546, + "learning_rate": 1.8479685996381994e-05, + "loss": 0.9236, + "step": 6607 + }, + { + "epoch": 0.2025254382738752, + "grad_norm": 1.6144996050615759, + "learning_rate": 1.847915981157603e-05, + "loss": 0.8676, + "step": 6608 + }, + { + "epoch": 0.2025560867966164, + "grad_norm": 0.8064570497295654, + "learning_rate": 1.8478633543222737e-05, + "loss": 0.6425, + "step": 6609 + }, + { + "epoch": 0.2025867353193576, + "grad_norm": 1.5133113626464414, + "learning_rate": 1.8478107191327298e-05, + "loss": 0.8941, + "step": 6610 + }, + { + "epoch": 0.20261738384209882, + "grad_norm": 1.5701502830312342, + "learning_rate": 1.84775807558949e-05, + "loss": 0.8845, + "step": 6611 + }, + { + "epoch": 0.20264803236484003, + "grad_norm": 0.7034924800107968, + "learning_rate": 1.847705423693073e-05, + "loss": 0.6253, + "step": 6612 + }, + { + "epoch": 0.20267868088758123, + "grad_norm": 1.6119213728137105, + "learning_rate": 1.8476527634439972e-05, + "loss": 0.8965, + "step": 6613 + }, + { + "epoch": 0.2027093294103224, + "grad_norm": 1.4713588919978784, + "learning_rate": 1.847600094842782e-05, + "loss": 0.8423, + "step": 6614 + }, + { + "epoch": 0.20273997793306361, + "grad_norm": 1.7637597528889712, + "learning_rate": 1.8475474178899462e-05, + "loss": 0.8591, + "step": 6615 + }, + { + "epoch": 0.20277062645580482, + "grad_norm": 1.5301580890189845, + "learning_rate": 1.847494732586009e-05, + "loss": 0.8312, + "step": 6616 + }, + { + "epoch": 0.20280127497854603, + "grad_norm": 1.490165435550064, + "learning_rate": 1.8474420389314895e-05, + "loss": 0.7281, + "step": 6617 + }, + { + "epoch": 0.20283192350128723, + "grad_norm": 1.4703967634593178, + "learning_rate": 1.8473893369269062e-05, + "loss": 0.8801, + "step": 6618 + }, + { + "epoch": 0.20286257202402844, + "grad_norm": 1.5122875727076492, + "learning_rate": 1.8473366265727794e-05, + "loss": 0.7722, + "step": 6619 + }, + { + "epoch": 0.20289322054676964, + "grad_norm": 1.5535713136776832, + "learning_rate": 1.8472839078696276e-05, + "loss": 0.6967, + "step": 6620 + }, + { + "epoch": 0.20292386906951085, + "grad_norm": 1.4081253683889927, + "learning_rate": 1.847231180817971e-05, + "loss": 0.8202, + "step": 6621 + }, + { + "epoch": 0.20295451759225205, + "grad_norm": 1.6112973563531254, + "learning_rate": 1.847178445418329e-05, + "loss": 0.8179, + "step": 6622 + }, + { + "epoch": 0.20298516611499326, + "grad_norm": 1.3626504338938121, + "learning_rate": 1.8471257016712204e-05, + "loss": 0.7031, + "step": 6623 + }, + { + "epoch": 0.20301581463773447, + "grad_norm": 1.3503965616155376, + "learning_rate": 1.8470729495771662e-05, + "loss": 0.7553, + "step": 6624 + }, + { + "epoch": 0.20304646316047567, + "grad_norm": 1.431044196626148, + "learning_rate": 1.847020189136685e-05, + "loss": 0.8608, + "step": 6625 + }, + { + "epoch": 0.20307711168321688, + "grad_norm": 1.4985939773028454, + "learning_rate": 1.846967420350297e-05, + "loss": 0.7886, + "step": 6626 + }, + { + "epoch": 0.20310776020595808, + "grad_norm": 1.3943127513301206, + "learning_rate": 1.846914643218523e-05, + "loss": 0.7506, + "step": 6627 + }, + { + "epoch": 0.2031384087286993, + "grad_norm": 1.4514743918021098, + "learning_rate": 1.846861857741882e-05, + "loss": 0.7757, + "step": 6628 + }, + { + "epoch": 0.2031690572514405, + "grad_norm": 1.5647387047698869, + "learning_rate": 1.8468090639208944e-05, + "loss": 0.7186, + "step": 6629 + }, + { + "epoch": 0.20319970577418167, + "grad_norm": 0.9356490388669089, + "learning_rate": 1.8467562617560804e-05, + "loss": 0.644, + "step": 6630 + }, + { + "epoch": 0.20323035429692288, + "grad_norm": 1.4141887500990813, + "learning_rate": 1.8467034512479603e-05, + "loss": 0.8361, + "step": 6631 + }, + { + "epoch": 0.20326100281966408, + "grad_norm": 1.5740940215824846, + "learning_rate": 1.8466506323970543e-05, + "loss": 0.852, + "step": 6632 + }, + { + "epoch": 0.2032916513424053, + "grad_norm": 1.6860098700888309, + "learning_rate": 1.8465978052038833e-05, + "loss": 0.8255, + "step": 6633 + }, + { + "epoch": 0.2033222998651465, + "grad_norm": 1.5942614204382686, + "learning_rate": 1.8465449696689673e-05, + "loss": 0.8677, + "step": 6634 + }, + { + "epoch": 0.2033529483878877, + "grad_norm": 1.715511434873686, + "learning_rate": 1.8464921257928276e-05, + "loss": 0.8365, + "step": 6635 + }, + { + "epoch": 0.2033835969106289, + "grad_norm": 1.3761287590354176, + "learning_rate": 1.846439273575984e-05, + "loss": 0.863, + "step": 6636 + }, + { + "epoch": 0.2034142454333701, + "grad_norm": 1.6023668750249518, + "learning_rate": 1.8463864130189573e-05, + "loss": 0.736, + "step": 6637 + }, + { + "epoch": 0.20344489395611132, + "grad_norm": 1.5463546580516558, + "learning_rate": 1.846333544122269e-05, + "loss": 0.9072, + "step": 6638 + }, + { + "epoch": 0.20347554247885252, + "grad_norm": 1.470758302917987, + "learning_rate": 1.84628066688644e-05, + "loss": 0.7875, + "step": 6639 + }, + { + "epoch": 0.20350619100159373, + "grad_norm": 1.6238689585256256, + "learning_rate": 1.846227781311991e-05, + "loss": 0.8669, + "step": 6640 + }, + { + "epoch": 0.20353683952433493, + "grad_norm": 1.55951474439527, + "learning_rate": 1.846174887399443e-05, + "loss": 0.8171, + "step": 6641 + }, + { + "epoch": 0.20356748804707614, + "grad_norm": 1.6284282392771505, + "learning_rate": 1.8461219851493176e-05, + "loss": 0.8411, + "step": 6642 + }, + { + "epoch": 0.20359813656981735, + "grad_norm": 0.9994939855417857, + "learning_rate": 1.8460690745621352e-05, + "loss": 0.6638, + "step": 6643 + }, + { + "epoch": 0.20362878509255855, + "grad_norm": 0.8658284399643036, + "learning_rate": 1.8460161556384183e-05, + "loss": 0.6229, + "step": 6644 + }, + { + "epoch": 0.20365943361529976, + "grad_norm": 1.7037646284673207, + "learning_rate": 1.8459632283786876e-05, + "loss": 0.8054, + "step": 6645 + }, + { + "epoch": 0.20369008213804093, + "grad_norm": 1.6684899244335687, + "learning_rate": 1.8459102927834645e-05, + "loss": 0.8612, + "step": 6646 + }, + { + "epoch": 0.20372073066078214, + "grad_norm": 1.4988876083083398, + "learning_rate": 1.8458573488532713e-05, + "loss": 0.8536, + "step": 6647 + }, + { + "epoch": 0.20375137918352335, + "grad_norm": 1.580674673562361, + "learning_rate": 1.845804396588629e-05, + "loss": 0.8215, + "step": 6648 + }, + { + "epoch": 0.20378202770626455, + "grad_norm": 1.562018922542238, + "learning_rate": 1.8457514359900595e-05, + "loss": 0.8528, + "step": 6649 + }, + { + "epoch": 0.20381267622900576, + "grad_norm": 1.6071329112746147, + "learning_rate": 1.8456984670580845e-05, + "loss": 0.729, + "step": 6650 + }, + { + "epoch": 0.20384332475174696, + "grad_norm": 1.052631312908418, + "learning_rate": 1.8456454897932264e-05, + "loss": 0.6536, + "step": 6651 + }, + { + "epoch": 0.20387397327448817, + "grad_norm": 0.931851132379166, + "learning_rate": 1.8455925041960073e-05, + "loss": 0.6535, + "step": 6652 + }, + { + "epoch": 0.20390462179722937, + "grad_norm": 1.9075370454661615, + "learning_rate": 1.8455395102669483e-05, + "loss": 0.7827, + "step": 6653 + }, + { + "epoch": 0.20393527031997058, + "grad_norm": 1.8374345611663192, + "learning_rate": 1.8454865080065724e-05, + "loss": 0.7649, + "step": 6654 + }, + { + "epoch": 0.20396591884271179, + "grad_norm": 1.5872720245905352, + "learning_rate": 1.8454334974154016e-05, + "loss": 0.8217, + "step": 6655 + }, + { + "epoch": 0.203996567365453, + "grad_norm": 1.6880378714126583, + "learning_rate": 1.8453804784939585e-05, + "loss": 0.9259, + "step": 6656 + }, + { + "epoch": 0.2040272158881942, + "grad_norm": 1.0060588961598251, + "learning_rate": 1.845327451242765e-05, + "loss": 0.6598, + "step": 6657 + }, + { + "epoch": 0.2040578644109354, + "grad_norm": 1.5892264662609037, + "learning_rate": 1.8452744156623437e-05, + "loss": 0.9018, + "step": 6658 + }, + { + "epoch": 0.2040885129336766, + "grad_norm": 1.4544146501981579, + "learning_rate": 1.8452213717532172e-05, + "loss": 0.697, + "step": 6659 + }, + { + "epoch": 0.2041191614564178, + "grad_norm": 1.5556634802827731, + "learning_rate": 1.8451683195159086e-05, + "loss": 0.7575, + "step": 6660 + }, + { + "epoch": 0.204149809979159, + "grad_norm": 1.7504065141573024, + "learning_rate": 1.84511525895094e-05, + "loss": 0.8171, + "step": 6661 + }, + { + "epoch": 0.2041804585019002, + "grad_norm": 1.5514325441302952, + "learning_rate": 1.8450621900588347e-05, + "loss": 0.7243, + "step": 6662 + }, + { + "epoch": 0.2042111070246414, + "grad_norm": 1.4795904517804557, + "learning_rate": 1.8450091128401155e-05, + "loss": 0.7913, + "step": 6663 + }, + { + "epoch": 0.2042417555473826, + "grad_norm": 1.5216087531583444, + "learning_rate": 1.844956027295305e-05, + "loss": 0.7179, + "step": 6664 + }, + { + "epoch": 0.20427240407012381, + "grad_norm": 1.5152338960957363, + "learning_rate": 1.8449029334249272e-05, + "loss": 0.8482, + "step": 6665 + }, + { + "epoch": 0.20430305259286502, + "grad_norm": 0.7692381261787363, + "learning_rate": 1.844849831229504e-05, + "loss": 0.6477, + "step": 6666 + }, + { + "epoch": 0.20433370111560623, + "grad_norm": 1.3998292917222384, + "learning_rate": 1.8447967207095595e-05, + "loss": 0.8429, + "step": 6667 + }, + { + "epoch": 0.20436434963834743, + "grad_norm": 1.847912030905475, + "learning_rate": 1.844743601865617e-05, + "loss": 0.8158, + "step": 6668 + }, + { + "epoch": 0.20439499816108864, + "grad_norm": 1.558728934781341, + "learning_rate": 1.844690474698199e-05, + "loss": 0.8454, + "step": 6669 + }, + { + "epoch": 0.20442564668382984, + "grad_norm": 1.591674497969109, + "learning_rate": 1.84463733920783e-05, + "loss": 0.8377, + "step": 6670 + }, + { + "epoch": 0.20445629520657105, + "grad_norm": 1.6073267364879822, + "learning_rate": 1.8445841953950333e-05, + "loss": 0.8434, + "step": 6671 + }, + { + "epoch": 0.20448694372931225, + "grad_norm": 1.6520396756575388, + "learning_rate": 1.8445310432603326e-05, + "loss": 0.8799, + "step": 6672 + }, + { + "epoch": 0.20451759225205346, + "grad_norm": 1.5511378071333648, + "learning_rate": 1.8444778828042512e-05, + "loss": 0.8292, + "step": 6673 + }, + { + "epoch": 0.20454824077479467, + "grad_norm": 1.6955953502892593, + "learning_rate": 1.844424714027313e-05, + "loss": 0.7854, + "step": 6674 + }, + { + "epoch": 0.20457888929753587, + "grad_norm": 1.480779708248123, + "learning_rate": 1.844371536930042e-05, + "loss": 0.8232, + "step": 6675 + }, + { + "epoch": 0.20460953782027708, + "grad_norm": 1.4808783380014041, + "learning_rate": 1.8443183515129623e-05, + "loss": 0.7889, + "step": 6676 + }, + { + "epoch": 0.20464018634301825, + "grad_norm": 1.690349947104166, + "learning_rate": 1.8442651577765983e-05, + "loss": 0.7912, + "step": 6677 + }, + { + "epoch": 0.20467083486575946, + "grad_norm": 1.533006015205151, + "learning_rate": 1.8442119557214732e-05, + "loss": 0.7447, + "step": 6678 + }, + { + "epoch": 0.20470148338850067, + "grad_norm": 1.5916383841178345, + "learning_rate": 1.8441587453481115e-05, + "loss": 0.8859, + "step": 6679 + }, + { + "epoch": 0.20473213191124187, + "grad_norm": 1.4199795186839876, + "learning_rate": 1.844105526657038e-05, + "loss": 0.72, + "step": 6680 + }, + { + "epoch": 0.20476278043398308, + "grad_norm": 0.752934578340933, + "learning_rate": 1.844052299648777e-05, + "loss": 0.6773, + "step": 6681 + }, + { + "epoch": 0.20479342895672428, + "grad_norm": 1.2658610114741389, + "learning_rate": 1.8439990643238527e-05, + "loss": 0.6478, + "step": 6682 + }, + { + "epoch": 0.2048240774794655, + "grad_norm": 1.4916059852787713, + "learning_rate": 1.8439458206827892e-05, + "loss": 0.7341, + "step": 6683 + }, + { + "epoch": 0.2048547260022067, + "grad_norm": 1.6991635403858532, + "learning_rate": 1.843892568726112e-05, + "loss": 0.7762, + "step": 6684 + }, + { + "epoch": 0.2048853745249479, + "grad_norm": 1.5180272253989227, + "learning_rate": 1.8438393084543453e-05, + "loss": 0.7675, + "step": 6685 + }, + { + "epoch": 0.2049160230476891, + "grad_norm": 1.5502604315991548, + "learning_rate": 1.8437860398680142e-05, + "loss": 0.6574, + "step": 6686 + }, + { + "epoch": 0.2049466715704303, + "grad_norm": 1.5672324180986101, + "learning_rate": 1.843732762967643e-05, + "loss": 0.8362, + "step": 6687 + }, + { + "epoch": 0.20497732009317152, + "grad_norm": 1.7404739591091498, + "learning_rate": 1.843679477753757e-05, + "loss": 0.8503, + "step": 6688 + }, + { + "epoch": 0.20500796861591272, + "grad_norm": 1.4261532776123063, + "learning_rate": 1.8436261842268815e-05, + "loss": 0.8307, + "step": 6689 + }, + { + "epoch": 0.20503861713865393, + "grad_norm": 1.5794683769783164, + "learning_rate": 1.843572882387541e-05, + "loss": 0.8321, + "step": 6690 + }, + { + "epoch": 0.20506926566139513, + "grad_norm": 1.3879449822973533, + "learning_rate": 1.8435195722362612e-05, + "loss": 0.6775, + "step": 6691 + }, + { + "epoch": 0.2050999141841363, + "grad_norm": 1.4085044744815076, + "learning_rate": 1.8434662537735676e-05, + "loss": 0.7185, + "step": 6692 + }, + { + "epoch": 0.20513056270687752, + "grad_norm": 0.7236604633771059, + "learning_rate": 1.843412926999985e-05, + "loss": 0.6457, + "step": 6693 + }, + { + "epoch": 0.20516121122961872, + "grad_norm": 1.3997624798470951, + "learning_rate": 1.8433595919160387e-05, + "loss": 0.8035, + "step": 6694 + }, + { + "epoch": 0.20519185975235993, + "grad_norm": 1.611816144668045, + "learning_rate": 1.843306248522255e-05, + "loss": 0.9509, + "step": 6695 + }, + { + "epoch": 0.20522250827510113, + "grad_norm": 1.3880174369534268, + "learning_rate": 1.8432528968191588e-05, + "loss": 0.635, + "step": 6696 + }, + { + "epoch": 0.20525315679784234, + "grad_norm": 1.8292996189292066, + "learning_rate": 1.843199536807276e-05, + "loss": 0.781, + "step": 6697 + }, + { + "epoch": 0.20528380532058355, + "grad_norm": 1.6142190541340387, + "learning_rate": 1.8431461684871327e-05, + "loss": 0.8855, + "step": 6698 + }, + { + "epoch": 0.20531445384332475, + "grad_norm": 3.0831341533350813, + "learning_rate": 1.8430927918592544e-05, + "loss": 0.8787, + "step": 6699 + }, + { + "epoch": 0.20534510236606596, + "grad_norm": 1.5643034816481867, + "learning_rate": 1.843039406924167e-05, + "loss": 0.8811, + "step": 6700 + }, + { + "epoch": 0.20537575088880716, + "grad_norm": 1.5199189258876316, + "learning_rate": 1.8429860136823965e-05, + "loss": 0.8901, + "step": 6701 + }, + { + "epoch": 0.20540639941154837, + "grad_norm": 0.6924253516994546, + "learning_rate": 1.8429326121344694e-05, + "loss": 0.6117, + "step": 6702 + }, + { + "epoch": 0.20543704793428957, + "grad_norm": 1.381204253884153, + "learning_rate": 1.8428792022809114e-05, + "loss": 0.7894, + "step": 6703 + }, + { + "epoch": 0.20546769645703078, + "grad_norm": 1.4386658135371306, + "learning_rate": 1.842825784122249e-05, + "loss": 0.7236, + "step": 6704 + }, + { + "epoch": 0.20549834497977199, + "grad_norm": 1.3928471484970009, + "learning_rate": 1.8427723576590085e-05, + "loss": 0.831, + "step": 6705 + }, + { + "epoch": 0.2055289935025132, + "grad_norm": 1.6014873498078415, + "learning_rate": 1.842718922891716e-05, + "loss": 0.8876, + "step": 6706 + }, + { + "epoch": 0.2055596420252544, + "grad_norm": 0.7006351064276858, + "learning_rate": 1.842665479820899e-05, + "loss": 0.6435, + "step": 6707 + }, + { + "epoch": 0.20559029054799557, + "grad_norm": 1.5637380273430923, + "learning_rate": 1.842612028447083e-05, + "loss": 0.9025, + "step": 6708 + }, + { + "epoch": 0.20562093907073678, + "grad_norm": 1.6436153217475937, + "learning_rate": 1.842558568770795e-05, + "loss": 0.7789, + "step": 6709 + }, + { + "epoch": 0.20565158759347799, + "grad_norm": 1.5706529865479342, + "learning_rate": 1.8425051007925623e-05, + "loss": 0.8088, + "step": 6710 + }, + { + "epoch": 0.2056822361162192, + "grad_norm": 1.4885820217476622, + "learning_rate": 1.842451624512911e-05, + "loss": 0.7895, + "step": 6711 + }, + { + "epoch": 0.2057128846389604, + "grad_norm": 1.6405016060781838, + "learning_rate": 1.842398139932368e-05, + "loss": 0.7426, + "step": 6712 + }, + { + "epoch": 0.2057435331617016, + "grad_norm": 1.4860830138733945, + "learning_rate": 1.842344647051461e-05, + "loss": 0.7407, + "step": 6713 + }, + { + "epoch": 0.2057741816844428, + "grad_norm": 1.4599808090565998, + "learning_rate": 1.842291145870717e-05, + "loss": 0.6595, + "step": 6714 + }, + { + "epoch": 0.20580483020718401, + "grad_norm": 0.6972685058011313, + "learning_rate": 1.842237636390662e-05, + "loss": 0.622, + "step": 6715 + }, + { + "epoch": 0.20583547872992522, + "grad_norm": 1.55733390531832, + "learning_rate": 1.8421841186118247e-05, + "loss": 0.7956, + "step": 6716 + }, + { + "epoch": 0.20586612725266643, + "grad_norm": 1.735937119136563, + "learning_rate": 1.8421305925347316e-05, + "loss": 0.7824, + "step": 6717 + }, + { + "epoch": 0.20589677577540763, + "grad_norm": 1.5097489332822362, + "learning_rate": 1.8420770581599103e-05, + "loss": 0.7119, + "step": 6718 + }, + { + "epoch": 0.20592742429814884, + "grad_norm": 1.6618311874255274, + "learning_rate": 1.8420235154878883e-05, + "loss": 0.9356, + "step": 6719 + }, + { + "epoch": 0.20595807282089004, + "grad_norm": 0.7578885367907393, + "learning_rate": 1.8419699645191928e-05, + "loss": 0.6632, + "step": 6720 + }, + { + "epoch": 0.20598872134363125, + "grad_norm": 1.550542496505959, + "learning_rate": 1.8419164052543523e-05, + "loss": 0.7905, + "step": 6721 + }, + { + "epoch": 0.20601936986637245, + "grad_norm": 1.7536440974636611, + "learning_rate": 1.8418628376938938e-05, + "loss": 0.841, + "step": 6722 + }, + { + "epoch": 0.20605001838911363, + "grad_norm": 1.631343633781871, + "learning_rate": 1.8418092618383454e-05, + "loss": 0.8798, + "step": 6723 + }, + { + "epoch": 0.20608066691185484, + "grad_norm": 1.453471952704601, + "learning_rate": 1.841755677688235e-05, + "loss": 0.7886, + "step": 6724 + }, + { + "epoch": 0.20611131543459604, + "grad_norm": 1.441884161185926, + "learning_rate": 1.841702085244091e-05, + "loss": 0.657, + "step": 6725 + }, + { + "epoch": 0.20614196395733725, + "grad_norm": 1.5944365530029827, + "learning_rate": 1.84164848450644e-05, + "loss": 0.7422, + "step": 6726 + }, + { + "epoch": 0.20617261248007845, + "grad_norm": 0.7081562674702466, + "learning_rate": 1.841594875475812e-05, + "loss": 0.6246, + "step": 6727 + }, + { + "epoch": 0.20620326100281966, + "grad_norm": 1.5725291387427722, + "learning_rate": 1.841541258152734e-05, + "loss": 0.8657, + "step": 6728 + }, + { + "epoch": 0.20623390952556087, + "grad_norm": 1.5099252785394601, + "learning_rate": 1.8414876325377346e-05, + "loss": 0.8671, + "step": 6729 + }, + { + "epoch": 0.20626455804830207, + "grad_norm": 1.429722298963631, + "learning_rate": 1.8414339986313425e-05, + "loss": 0.8821, + "step": 6730 + }, + { + "epoch": 0.20629520657104328, + "grad_norm": 1.4254474790168465, + "learning_rate": 1.8413803564340856e-05, + "loss": 0.8005, + "step": 6731 + }, + { + "epoch": 0.20632585509378448, + "grad_norm": 1.611735987717679, + "learning_rate": 1.841326705946493e-05, + "loss": 0.8652, + "step": 6732 + }, + { + "epoch": 0.2063565036165257, + "grad_norm": 1.4639489805900399, + "learning_rate": 1.841273047169093e-05, + "loss": 0.8074, + "step": 6733 + }, + { + "epoch": 0.2063871521392669, + "grad_norm": 1.5006025757411101, + "learning_rate": 1.8412193801024144e-05, + "loss": 0.7316, + "step": 6734 + }, + { + "epoch": 0.2064178006620081, + "grad_norm": 1.541599092769299, + "learning_rate": 1.8411657047469862e-05, + "loss": 0.7266, + "step": 6735 + }, + { + "epoch": 0.2064484491847493, + "grad_norm": 1.3350705992572585, + "learning_rate": 1.841112021103337e-05, + "loss": 0.7988, + "step": 6736 + }, + { + "epoch": 0.2064790977074905, + "grad_norm": 1.4990836003734596, + "learning_rate": 1.841058329171996e-05, + "loss": 0.8455, + "step": 6737 + }, + { + "epoch": 0.20650974623023172, + "grad_norm": 1.59385892115334, + "learning_rate": 1.8410046289534914e-05, + "loss": 0.8186, + "step": 6738 + }, + { + "epoch": 0.2065403947529729, + "grad_norm": 1.3185724136543207, + "learning_rate": 1.840950920448354e-05, + "loss": 0.8274, + "step": 6739 + }, + { + "epoch": 0.2065710432757141, + "grad_norm": 1.3657923114746755, + "learning_rate": 1.8408972036571115e-05, + "loss": 0.8154, + "step": 6740 + }, + { + "epoch": 0.2066016917984553, + "grad_norm": 1.4297399913416433, + "learning_rate": 1.8408434785802936e-05, + "loss": 0.7457, + "step": 6741 + }, + { + "epoch": 0.2066323403211965, + "grad_norm": 0.7609645786913892, + "learning_rate": 1.84078974521843e-05, + "loss": 0.64, + "step": 6742 + }, + { + "epoch": 0.20666298884393772, + "grad_norm": 1.5664769862927987, + "learning_rate": 1.8407360035720497e-05, + "loss": 0.8527, + "step": 6743 + }, + { + "epoch": 0.20669363736667892, + "grad_norm": 1.8581760270946204, + "learning_rate": 1.8406822536416826e-05, + "loss": 0.7135, + "step": 6744 + }, + { + "epoch": 0.20672428588942013, + "grad_norm": 1.4628770530742452, + "learning_rate": 1.840628495427858e-05, + "loss": 0.7829, + "step": 6745 + }, + { + "epoch": 0.20675493441216133, + "grad_norm": 1.480821624609566, + "learning_rate": 1.840574728931106e-05, + "loss": 0.8281, + "step": 6746 + }, + { + "epoch": 0.20678558293490254, + "grad_norm": 1.7154408061238167, + "learning_rate": 1.840520954151956e-05, + "loss": 0.7755, + "step": 6747 + }, + { + "epoch": 0.20681623145764375, + "grad_norm": 0.7333194161192862, + "learning_rate": 1.840467171090938e-05, + "loss": 0.6434, + "step": 6748 + }, + { + "epoch": 0.20684687998038495, + "grad_norm": 1.5928280367568488, + "learning_rate": 1.840413379748582e-05, + "loss": 0.777, + "step": 6749 + }, + { + "epoch": 0.20687752850312616, + "grad_norm": 0.6520229338292736, + "learning_rate": 1.8403595801254175e-05, + "loss": 0.614, + "step": 6750 + }, + { + "epoch": 0.20690817702586736, + "grad_norm": 1.455366341932173, + "learning_rate": 1.8403057722219755e-05, + "loss": 0.7725, + "step": 6751 + }, + { + "epoch": 0.20693882554860857, + "grad_norm": 0.6798187941176941, + "learning_rate": 1.8402519560387854e-05, + "loss": 0.6247, + "step": 6752 + }, + { + "epoch": 0.20696947407134977, + "grad_norm": 1.5541378590518764, + "learning_rate": 1.8401981315763782e-05, + "loss": 0.7642, + "step": 6753 + }, + { + "epoch": 0.20700012259409095, + "grad_norm": 1.5876107681057723, + "learning_rate": 1.8401442988352837e-05, + "loss": 0.827, + "step": 6754 + }, + { + "epoch": 0.20703077111683216, + "grad_norm": 1.6020307928932644, + "learning_rate": 1.8400904578160322e-05, + "loss": 0.8253, + "step": 6755 + }, + { + "epoch": 0.20706141963957336, + "grad_norm": 1.5504536129013093, + "learning_rate": 1.840036608519155e-05, + "loss": 0.8519, + "step": 6756 + }, + { + "epoch": 0.20709206816231457, + "grad_norm": 1.5490171609691938, + "learning_rate": 1.8399827509451815e-05, + "loss": 0.8421, + "step": 6757 + }, + { + "epoch": 0.20712271668505577, + "grad_norm": 1.3838563907621488, + "learning_rate": 1.8399288850946435e-05, + "loss": 0.7669, + "step": 6758 + }, + { + "epoch": 0.20715336520779698, + "grad_norm": 1.5862062590913113, + "learning_rate": 1.839875010968071e-05, + "loss": 0.7462, + "step": 6759 + }, + { + "epoch": 0.20718401373053819, + "grad_norm": 1.559738128053471, + "learning_rate": 1.8398211285659953e-05, + "loss": 0.8509, + "step": 6760 + }, + { + "epoch": 0.2072146622532794, + "grad_norm": 1.4636739194676427, + "learning_rate": 1.839767237888947e-05, + "loss": 0.74, + "step": 6761 + }, + { + "epoch": 0.2072453107760206, + "grad_norm": 1.6251032158286054, + "learning_rate": 1.8397133389374575e-05, + "loss": 0.7647, + "step": 6762 + }, + { + "epoch": 0.2072759592987618, + "grad_norm": 1.721545366081228, + "learning_rate": 1.8396594317120577e-05, + "loss": 0.7858, + "step": 6763 + }, + { + "epoch": 0.207306607821503, + "grad_norm": 1.7982292027800302, + "learning_rate": 1.839605516213278e-05, + "loss": 0.8837, + "step": 6764 + }, + { + "epoch": 0.20733725634424421, + "grad_norm": 1.2823504630034361, + "learning_rate": 1.8395515924416513e-05, + "loss": 0.7106, + "step": 6765 + }, + { + "epoch": 0.20736790486698542, + "grad_norm": 1.4268711739458844, + "learning_rate": 1.839497660397707e-05, + "loss": 0.8276, + "step": 6766 + }, + { + "epoch": 0.20739855338972663, + "grad_norm": 1.6147817533106728, + "learning_rate": 1.8394437200819778e-05, + "loss": 0.8699, + "step": 6767 + }, + { + "epoch": 0.20742920191246783, + "grad_norm": 1.4241977129376806, + "learning_rate": 1.8393897714949952e-05, + "loss": 0.7391, + "step": 6768 + }, + { + "epoch": 0.20745985043520904, + "grad_norm": 1.5788369745908464, + "learning_rate": 1.83933581463729e-05, + "loss": 0.8172, + "step": 6769 + }, + { + "epoch": 0.20749049895795021, + "grad_norm": 1.6316046940177005, + "learning_rate": 1.8392818495093946e-05, + "loss": 0.8238, + "step": 6770 + }, + { + "epoch": 0.20752114748069142, + "grad_norm": 1.348483409540127, + "learning_rate": 1.8392278761118402e-05, + "loss": 0.7458, + "step": 6771 + }, + { + "epoch": 0.20755179600343263, + "grad_norm": 0.9238713152649054, + "learning_rate": 1.8391738944451588e-05, + "loss": 0.6583, + "step": 6772 + }, + { + "epoch": 0.20758244452617383, + "grad_norm": 1.5985931106457898, + "learning_rate": 1.8391199045098824e-05, + "loss": 0.8661, + "step": 6773 + }, + { + "epoch": 0.20761309304891504, + "grad_norm": 1.6244186222882628, + "learning_rate": 1.839065906306543e-05, + "loss": 0.8797, + "step": 6774 + }, + { + "epoch": 0.20764374157165624, + "grad_norm": 1.5109839751399377, + "learning_rate": 1.839011899835672e-05, + "loss": 0.8993, + "step": 6775 + }, + { + "epoch": 0.20767439009439745, + "grad_norm": 1.3320564367761187, + "learning_rate": 1.8389578850978024e-05, + "loss": 0.6499, + "step": 6776 + }, + { + "epoch": 0.20770503861713865, + "grad_norm": 1.383508051720782, + "learning_rate": 1.8389038620934663e-05, + "loss": 0.7333, + "step": 6777 + }, + { + "epoch": 0.20773568713987986, + "grad_norm": 1.5566244143274248, + "learning_rate": 1.8388498308231955e-05, + "loss": 0.9087, + "step": 6778 + }, + { + "epoch": 0.20776633566262107, + "grad_norm": 1.5328770630903354, + "learning_rate": 1.838795791287523e-05, + "loss": 0.7848, + "step": 6779 + }, + { + "epoch": 0.20779698418536227, + "grad_norm": 1.4993820957129131, + "learning_rate": 1.8387417434869808e-05, + "loss": 0.706, + "step": 6780 + }, + { + "epoch": 0.20782763270810348, + "grad_norm": 1.4942856601208143, + "learning_rate": 1.8386876874221017e-05, + "loss": 0.8087, + "step": 6781 + }, + { + "epoch": 0.20785828123084468, + "grad_norm": 1.5814099894472229, + "learning_rate": 1.838633623093418e-05, + "loss": 0.8339, + "step": 6782 + }, + { + "epoch": 0.2078889297535859, + "grad_norm": 2.2794282802066648, + "learning_rate": 1.838579550501463e-05, + "loss": 0.8033, + "step": 6783 + }, + { + "epoch": 0.2079195782763271, + "grad_norm": 1.6984988735026827, + "learning_rate": 1.8385254696467683e-05, + "loss": 0.8692, + "step": 6784 + }, + { + "epoch": 0.20795022679906827, + "grad_norm": 1.5543136151269856, + "learning_rate": 1.8384713805298684e-05, + "loss": 0.7741, + "step": 6785 + }, + { + "epoch": 0.20798087532180948, + "grad_norm": 1.4825398734245783, + "learning_rate": 1.838417283151295e-05, + "loss": 0.8637, + "step": 6786 + }, + { + "epoch": 0.20801152384455068, + "grad_norm": 1.414885940816512, + "learning_rate": 1.838363177511582e-05, + "loss": 0.6638, + "step": 6787 + }, + { + "epoch": 0.2080421723672919, + "grad_norm": 1.4070128052218596, + "learning_rate": 1.838309063611262e-05, + "loss": 0.8102, + "step": 6788 + }, + { + "epoch": 0.2080728208900331, + "grad_norm": 1.4941205579800454, + "learning_rate": 1.8382549414508684e-05, + "loss": 0.7031, + "step": 6789 + }, + { + "epoch": 0.2081034694127743, + "grad_norm": 0.9867703122332204, + "learning_rate": 1.838200811030934e-05, + "loss": 0.6267, + "step": 6790 + }, + { + "epoch": 0.2081341179355155, + "grad_norm": 1.6501283040967762, + "learning_rate": 1.8381466723519928e-05, + "loss": 0.9232, + "step": 6791 + }, + { + "epoch": 0.2081647664582567, + "grad_norm": 1.5235864423149599, + "learning_rate": 1.8380925254145782e-05, + "loss": 0.7259, + "step": 6792 + }, + { + "epoch": 0.20819541498099792, + "grad_norm": 1.3356642587825611, + "learning_rate": 1.8380383702192232e-05, + "loss": 0.6916, + "step": 6793 + }, + { + "epoch": 0.20822606350373912, + "grad_norm": 1.7041696644818485, + "learning_rate": 1.837984206766462e-05, + "loss": 0.762, + "step": 6794 + }, + { + "epoch": 0.20825671202648033, + "grad_norm": 1.7199253664847411, + "learning_rate": 1.8379300350568277e-05, + "loss": 0.8447, + "step": 6795 + }, + { + "epoch": 0.20828736054922153, + "grad_norm": 1.4052312986694444, + "learning_rate": 1.837875855090854e-05, + "loss": 0.7662, + "step": 6796 + }, + { + "epoch": 0.20831800907196274, + "grad_norm": 1.5120699158132929, + "learning_rate": 1.837821666869076e-05, + "loss": 0.8496, + "step": 6797 + }, + { + "epoch": 0.20834865759470395, + "grad_norm": 0.7613400921770378, + "learning_rate": 1.8377674703920264e-05, + "loss": 0.6559, + "step": 6798 + }, + { + "epoch": 0.20837930611744515, + "grad_norm": 1.3882726630451987, + "learning_rate": 1.8377132656602392e-05, + "loss": 0.7007, + "step": 6799 + }, + { + "epoch": 0.20840995464018636, + "grad_norm": 1.7284274509552784, + "learning_rate": 1.8376590526742494e-05, + "loss": 0.9005, + "step": 6800 + }, + { + "epoch": 0.20844060316292753, + "grad_norm": 0.7095388079332565, + "learning_rate": 1.8376048314345903e-05, + "loss": 0.6448, + "step": 6801 + }, + { + "epoch": 0.20847125168566874, + "grad_norm": 1.5347147634362945, + "learning_rate": 1.8375506019417966e-05, + "loss": 0.7073, + "step": 6802 + }, + { + "epoch": 0.20850190020840995, + "grad_norm": 1.50808904067118, + "learning_rate": 1.8374963641964023e-05, + "loss": 0.8164, + "step": 6803 + }, + { + "epoch": 0.20853254873115115, + "grad_norm": 1.4002429799132905, + "learning_rate": 1.8374421181989422e-05, + "loss": 0.8564, + "step": 6804 + }, + { + "epoch": 0.20856319725389236, + "grad_norm": 1.5552903569644816, + "learning_rate": 1.837387863949951e-05, + "loss": 0.8771, + "step": 6805 + }, + { + "epoch": 0.20859384577663356, + "grad_norm": 1.536966046037944, + "learning_rate": 1.8373336014499626e-05, + "loss": 0.8416, + "step": 6806 + }, + { + "epoch": 0.20862449429937477, + "grad_norm": 1.5613456145930318, + "learning_rate": 1.837279330699512e-05, + "loss": 0.7476, + "step": 6807 + }, + { + "epoch": 0.20865514282211597, + "grad_norm": 1.5261375371204424, + "learning_rate": 1.8372250516991337e-05, + "loss": 0.7835, + "step": 6808 + }, + { + "epoch": 0.20868579134485718, + "grad_norm": 0.8127158269103457, + "learning_rate": 1.837170764449363e-05, + "loss": 0.6261, + "step": 6809 + }, + { + "epoch": 0.20871643986759839, + "grad_norm": 0.792509226331124, + "learning_rate": 1.8371164689507346e-05, + "loss": 0.6375, + "step": 6810 + }, + { + "epoch": 0.2087470883903396, + "grad_norm": 1.6094084143173755, + "learning_rate": 1.8370621652037832e-05, + "loss": 0.8989, + "step": 6811 + }, + { + "epoch": 0.2087777369130808, + "grad_norm": 1.5096645737733976, + "learning_rate": 1.8370078532090443e-05, + "loss": 0.7323, + "step": 6812 + }, + { + "epoch": 0.208808385435822, + "grad_norm": 1.3210500607863032, + "learning_rate": 1.836953532967053e-05, + "loss": 0.7887, + "step": 6813 + }, + { + "epoch": 0.2088390339585632, + "grad_norm": 1.3695595350117697, + "learning_rate": 1.836899204478344e-05, + "loss": 0.759, + "step": 6814 + }, + { + "epoch": 0.20886968248130441, + "grad_norm": 1.5419140101342568, + "learning_rate": 1.8368448677434535e-05, + "loss": 0.846, + "step": 6815 + }, + { + "epoch": 0.2089003310040456, + "grad_norm": 1.492717687414154, + "learning_rate": 1.836790522762916e-05, + "loss": 0.6835, + "step": 6816 + }, + { + "epoch": 0.2089309795267868, + "grad_norm": 1.5184795294322317, + "learning_rate": 1.8367361695372677e-05, + "loss": 0.8962, + "step": 6817 + }, + { + "epoch": 0.208961628049528, + "grad_norm": 1.5517277988340292, + "learning_rate": 1.8366818080670436e-05, + "loss": 0.8415, + "step": 6818 + }, + { + "epoch": 0.2089922765722692, + "grad_norm": 1.3758648724144291, + "learning_rate": 1.8366274383527797e-05, + "loss": 0.7108, + "step": 6819 + }, + { + "epoch": 0.20902292509501041, + "grad_norm": 1.396479550206979, + "learning_rate": 1.8365730603950112e-05, + "loss": 0.8197, + "step": 6820 + }, + { + "epoch": 0.20905357361775162, + "grad_norm": 1.5534472842287408, + "learning_rate": 1.8365186741942745e-05, + "loss": 0.8487, + "step": 6821 + }, + { + "epoch": 0.20908422214049283, + "grad_norm": 1.740524940546155, + "learning_rate": 1.836464279751106e-05, + "loss": 0.8787, + "step": 6822 + }, + { + "epoch": 0.20911487066323403, + "grad_norm": 1.5430670592706828, + "learning_rate": 1.83640987706604e-05, + "loss": 0.7104, + "step": 6823 + }, + { + "epoch": 0.20914551918597524, + "grad_norm": 1.4217644181859388, + "learning_rate": 1.8363554661396138e-05, + "loss": 0.7896, + "step": 6824 + }, + { + "epoch": 0.20917616770871644, + "grad_norm": 1.5245936305709773, + "learning_rate": 1.8363010469723633e-05, + "loss": 0.8354, + "step": 6825 + }, + { + "epoch": 0.20920681623145765, + "grad_norm": 1.3436777581013064, + "learning_rate": 1.8362466195648246e-05, + "loss": 0.7189, + "step": 6826 + }, + { + "epoch": 0.20923746475419885, + "grad_norm": 1.5153022396590579, + "learning_rate": 1.836192183917534e-05, + "loss": 0.8289, + "step": 6827 + }, + { + "epoch": 0.20926811327694006, + "grad_norm": 1.6589128121537386, + "learning_rate": 1.8361377400310275e-05, + "loss": 0.7952, + "step": 6828 + }, + { + "epoch": 0.20929876179968127, + "grad_norm": 1.5216693500414276, + "learning_rate": 1.8360832879058422e-05, + "loss": 0.8628, + "step": 6829 + }, + { + "epoch": 0.20932941032242247, + "grad_norm": 1.4870629566841376, + "learning_rate": 1.836028827542514e-05, + "loss": 0.7895, + "step": 6830 + }, + { + "epoch": 0.20936005884516368, + "grad_norm": 1.6008542977872657, + "learning_rate": 1.8359743589415805e-05, + "loss": 0.9375, + "step": 6831 + }, + { + "epoch": 0.20939070736790485, + "grad_norm": 1.638701946515804, + "learning_rate": 1.8359198821035775e-05, + "loss": 0.8321, + "step": 6832 + }, + { + "epoch": 0.20942135589064606, + "grad_norm": 1.642083107695209, + "learning_rate": 1.835865397029042e-05, + "loss": 0.8831, + "step": 6833 + }, + { + "epoch": 0.20945200441338727, + "grad_norm": 1.6338200328053039, + "learning_rate": 1.8358109037185106e-05, + "loss": 0.8487, + "step": 6834 + }, + { + "epoch": 0.20948265293612847, + "grad_norm": 1.4057787471542091, + "learning_rate": 1.8357564021725206e-05, + "loss": 0.8227, + "step": 6835 + }, + { + "epoch": 0.20951330145886968, + "grad_norm": 1.7138507554837206, + "learning_rate": 1.835701892391609e-05, + "loss": 0.8588, + "step": 6836 + }, + { + "epoch": 0.20954394998161088, + "grad_norm": 1.579998779171467, + "learning_rate": 1.835647374376313e-05, + "loss": 0.882, + "step": 6837 + }, + { + "epoch": 0.2095745985043521, + "grad_norm": 1.495295209014348, + "learning_rate": 1.8355928481271698e-05, + "loss": 0.7788, + "step": 6838 + }, + { + "epoch": 0.2096052470270933, + "grad_norm": 1.6051740679183153, + "learning_rate": 1.835538313644716e-05, + "loss": 0.7749, + "step": 6839 + }, + { + "epoch": 0.2096358955498345, + "grad_norm": 1.4595707540620046, + "learning_rate": 1.8354837709294894e-05, + "loss": 0.7518, + "step": 6840 + }, + { + "epoch": 0.2096665440725757, + "grad_norm": 0.9008313356143156, + "learning_rate": 1.835429219982028e-05, + "loss": 0.6571, + "step": 6841 + }, + { + "epoch": 0.2096971925953169, + "grad_norm": 0.8425872789762705, + "learning_rate": 1.835374660802868e-05, + "loss": 0.65, + "step": 6842 + }, + { + "epoch": 0.20972784111805812, + "grad_norm": 1.5184301639303324, + "learning_rate": 1.8353200933925482e-05, + "loss": 0.8621, + "step": 6843 + }, + { + "epoch": 0.20975848964079932, + "grad_norm": 1.7187098289062028, + "learning_rate": 1.8352655177516057e-05, + "loss": 0.8998, + "step": 6844 + }, + { + "epoch": 0.20978913816354053, + "grad_norm": 1.3947808701180964, + "learning_rate": 1.8352109338805784e-05, + "loss": 0.756, + "step": 6845 + }, + { + "epoch": 0.20981978668628173, + "grad_norm": 1.5178498798275215, + "learning_rate": 1.835156341780004e-05, + "loss": 0.9036, + "step": 6846 + }, + { + "epoch": 0.2098504352090229, + "grad_norm": 1.5083271406030487, + "learning_rate": 1.8351017414504203e-05, + "loss": 0.8906, + "step": 6847 + }, + { + "epoch": 0.20988108373176412, + "grad_norm": 1.7126666978824965, + "learning_rate": 1.8350471328923656e-05, + "loss": 0.8327, + "step": 6848 + }, + { + "epoch": 0.20991173225450532, + "grad_norm": 1.7482681873902142, + "learning_rate": 1.834992516106378e-05, + "loss": 0.865, + "step": 6849 + }, + { + "epoch": 0.20994238077724653, + "grad_norm": 1.4868988877082658, + "learning_rate": 1.8349378910929956e-05, + "loss": 0.8845, + "step": 6850 + }, + { + "epoch": 0.20997302929998773, + "grad_norm": 1.8619910240636044, + "learning_rate": 1.8348832578527562e-05, + "loss": 0.8827, + "step": 6851 + }, + { + "epoch": 0.21000367782272894, + "grad_norm": 1.6709556810628785, + "learning_rate": 1.8348286163861987e-05, + "loss": 0.7783, + "step": 6852 + }, + { + "epoch": 0.21003432634547015, + "grad_norm": 1.4829991960340885, + "learning_rate": 1.834773966693861e-05, + "loss": 0.6926, + "step": 6853 + }, + { + "epoch": 0.21006497486821135, + "grad_norm": 1.4812659461978661, + "learning_rate": 1.834719308776282e-05, + "loss": 0.7898, + "step": 6854 + }, + { + "epoch": 0.21009562339095256, + "grad_norm": 1.5092639062114293, + "learning_rate": 1.834664642634e-05, + "loss": 0.8034, + "step": 6855 + }, + { + "epoch": 0.21012627191369376, + "grad_norm": 1.5049294187930193, + "learning_rate": 1.8346099682675536e-05, + "loss": 0.8123, + "step": 6856 + }, + { + "epoch": 0.21015692043643497, + "grad_norm": 1.6628576548392002, + "learning_rate": 1.8345552856774817e-05, + "loss": 0.8321, + "step": 6857 + }, + { + "epoch": 0.21018756895917617, + "grad_norm": 1.4545127315485207, + "learning_rate": 1.834500594864323e-05, + "loss": 0.8256, + "step": 6858 + }, + { + "epoch": 0.21021821748191738, + "grad_norm": 1.186539674378323, + "learning_rate": 1.834445895828617e-05, + "loss": 0.6533, + "step": 6859 + }, + { + "epoch": 0.21024886600465859, + "grad_norm": 1.7562907130184924, + "learning_rate": 1.8343911885709013e-05, + "loss": 0.8403, + "step": 6860 + }, + { + "epoch": 0.2102795145273998, + "grad_norm": 1.5999985325187824, + "learning_rate": 1.834336473091716e-05, + "loss": 0.8301, + "step": 6861 + }, + { + "epoch": 0.210310163050141, + "grad_norm": 1.4896899317017498, + "learning_rate": 1.8342817493916e-05, + "loss": 0.7658, + "step": 6862 + }, + { + "epoch": 0.21034081157288217, + "grad_norm": 1.5487404139285186, + "learning_rate": 1.8342270174710927e-05, + "loss": 0.8779, + "step": 6863 + }, + { + "epoch": 0.21037146009562338, + "grad_norm": 1.4803540718975106, + "learning_rate": 1.834172277330733e-05, + "loss": 0.8414, + "step": 6864 + }, + { + "epoch": 0.21040210861836459, + "grad_norm": 1.7163023021126824, + "learning_rate": 1.83411752897106e-05, + "loss": 0.8014, + "step": 6865 + }, + { + "epoch": 0.2104327571411058, + "grad_norm": 0.8843582720994928, + "learning_rate": 1.834062772392614e-05, + "loss": 0.6609, + "step": 6866 + }, + { + "epoch": 0.210463405663847, + "grad_norm": 1.6012941626619355, + "learning_rate": 1.8340080075959343e-05, + "loss": 0.8316, + "step": 6867 + }, + { + "epoch": 0.2104940541865882, + "grad_norm": 1.4462690916127185, + "learning_rate": 1.8339532345815597e-05, + "loss": 0.8641, + "step": 6868 + }, + { + "epoch": 0.2105247027093294, + "grad_norm": 1.7853656616546896, + "learning_rate": 1.8338984533500308e-05, + "loss": 0.7731, + "step": 6869 + }, + { + "epoch": 0.21055535123207061, + "grad_norm": 1.5613052473209277, + "learning_rate": 1.8338436639018873e-05, + "loss": 0.805, + "step": 6870 + }, + { + "epoch": 0.21058599975481182, + "grad_norm": 0.72691036449426, + "learning_rate": 1.8337888662376685e-05, + "loss": 0.673, + "step": 6871 + }, + { + "epoch": 0.21061664827755303, + "grad_norm": 1.6804635159198047, + "learning_rate": 1.833734060357915e-05, + "loss": 0.8772, + "step": 6872 + }, + { + "epoch": 0.21064729680029423, + "grad_norm": 1.6060519304803618, + "learning_rate": 1.833679246263166e-05, + "loss": 0.8304, + "step": 6873 + }, + { + "epoch": 0.21067794532303544, + "grad_norm": 1.6127063315294858, + "learning_rate": 1.8336244239539626e-05, + "loss": 0.9119, + "step": 6874 + }, + { + "epoch": 0.21070859384577664, + "grad_norm": 1.6495714589459318, + "learning_rate": 1.8335695934308438e-05, + "loss": 0.7615, + "step": 6875 + }, + { + "epoch": 0.21073924236851785, + "grad_norm": 1.3491909367759132, + "learning_rate": 1.833514754694351e-05, + "loss": 0.6885, + "step": 6876 + }, + { + "epoch": 0.21076989089125905, + "grad_norm": 1.499544471973429, + "learning_rate": 1.8334599077450243e-05, + "loss": 0.7549, + "step": 6877 + }, + { + "epoch": 0.21080053941400023, + "grad_norm": 1.469510097105331, + "learning_rate": 1.8334050525834036e-05, + "loss": 0.7767, + "step": 6878 + }, + { + "epoch": 0.21083118793674144, + "grad_norm": 1.347397909138933, + "learning_rate": 1.8333501892100293e-05, + "loss": 0.7431, + "step": 6879 + }, + { + "epoch": 0.21086183645948264, + "grad_norm": 1.5681673660702165, + "learning_rate": 1.833295317625443e-05, + "loss": 0.8014, + "step": 6880 + }, + { + "epoch": 0.21089248498222385, + "grad_norm": 1.4696144379073106, + "learning_rate": 1.8332404378301843e-05, + "loss": 0.8433, + "step": 6881 + }, + { + "epoch": 0.21092313350496505, + "grad_norm": 1.6518907291378748, + "learning_rate": 1.8331855498247944e-05, + "loss": 0.9191, + "step": 6882 + }, + { + "epoch": 0.21095378202770626, + "grad_norm": 1.4393574021764317, + "learning_rate": 1.8331306536098145e-05, + "loss": 0.753, + "step": 6883 + }, + { + "epoch": 0.21098443055044747, + "grad_norm": 1.4617531191386788, + "learning_rate": 1.8330757491857846e-05, + "loss": 0.7873, + "step": 6884 + }, + { + "epoch": 0.21101507907318867, + "grad_norm": 1.5439017239263162, + "learning_rate": 1.8330208365532465e-05, + "loss": 0.7955, + "step": 6885 + }, + { + "epoch": 0.21104572759592988, + "grad_norm": 1.4681532043389636, + "learning_rate": 1.832965915712741e-05, + "loss": 0.7143, + "step": 6886 + }, + { + "epoch": 0.21107637611867108, + "grad_norm": 0.7865540407909373, + "learning_rate": 1.832910986664809e-05, + "loss": 0.6416, + "step": 6887 + }, + { + "epoch": 0.2111070246414123, + "grad_norm": 1.360422725234303, + "learning_rate": 1.8328560494099922e-05, + "loss": 0.7625, + "step": 6888 + }, + { + "epoch": 0.2111376731641535, + "grad_norm": 1.4012867191475291, + "learning_rate": 1.8328011039488315e-05, + "loss": 0.6828, + "step": 6889 + }, + { + "epoch": 0.2111683216868947, + "grad_norm": 0.6661203073928726, + "learning_rate": 1.8327461502818683e-05, + "loss": 0.6163, + "step": 6890 + }, + { + "epoch": 0.2111989702096359, + "grad_norm": 1.4686871427417176, + "learning_rate": 1.832691188409644e-05, + "loss": 0.8722, + "step": 6891 + }, + { + "epoch": 0.2112296187323771, + "grad_norm": 1.5235005346097346, + "learning_rate": 1.8326362183327007e-05, + "loss": 0.8547, + "step": 6892 + }, + { + "epoch": 0.21126026725511832, + "grad_norm": 1.4944846854524596, + "learning_rate": 1.8325812400515798e-05, + "loss": 0.7662, + "step": 6893 + }, + { + "epoch": 0.2112909157778595, + "grad_norm": 0.7501610228412583, + "learning_rate": 1.832526253566823e-05, + "loss": 0.6228, + "step": 6894 + }, + { + "epoch": 0.2113215643006007, + "grad_norm": 1.485310054956468, + "learning_rate": 1.8324712588789715e-05, + "loss": 0.865, + "step": 6895 + }, + { + "epoch": 0.2113522128233419, + "grad_norm": 1.5123388650557288, + "learning_rate": 1.832416255988568e-05, + "loss": 0.7704, + "step": 6896 + }, + { + "epoch": 0.2113828613460831, + "grad_norm": 1.5549798248563556, + "learning_rate": 1.8323612448961545e-05, + "loss": 0.7386, + "step": 6897 + }, + { + "epoch": 0.21141350986882432, + "grad_norm": 1.70628454684224, + "learning_rate": 1.8323062256022722e-05, + "loss": 0.8094, + "step": 6898 + }, + { + "epoch": 0.21144415839156552, + "grad_norm": 1.6303512918431993, + "learning_rate": 1.8322511981074637e-05, + "loss": 0.8388, + "step": 6899 + }, + { + "epoch": 0.21147480691430673, + "grad_norm": 1.6084499455039138, + "learning_rate": 1.8321961624122714e-05, + "loss": 0.8526, + "step": 6900 + }, + { + "epoch": 0.21150545543704793, + "grad_norm": 1.6041285893246777, + "learning_rate": 1.8321411185172374e-05, + "loss": 0.8039, + "step": 6901 + }, + { + "epoch": 0.21153610395978914, + "grad_norm": 1.5255807386334863, + "learning_rate": 1.832086066422904e-05, + "loss": 0.7649, + "step": 6902 + }, + { + "epoch": 0.21156675248253035, + "grad_norm": 1.4396475475995454, + "learning_rate": 1.832031006129814e-05, + "loss": 0.7776, + "step": 6903 + }, + { + "epoch": 0.21159740100527155, + "grad_norm": 1.57832480020469, + "learning_rate": 1.8319759376385092e-05, + "loss": 0.8816, + "step": 6904 + }, + { + "epoch": 0.21162804952801276, + "grad_norm": 0.7454910663655587, + "learning_rate": 1.8319208609495325e-05, + "loss": 0.6562, + "step": 6905 + }, + { + "epoch": 0.21165869805075396, + "grad_norm": 1.6116817757724051, + "learning_rate": 1.8318657760634272e-05, + "loss": 0.7505, + "step": 6906 + }, + { + "epoch": 0.21168934657349517, + "grad_norm": 0.7396550404742046, + "learning_rate": 1.8318106829807353e-05, + "loss": 0.6761, + "step": 6907 + }, + { + "epoch": 0.21171999509623637, + "grad_norm": 1.5156315326682934, + "learning_rate": 1.8317555817019997e-05, + "loss": 0.7673, + "step": 6908 + }, + { + "epoch": 0.21175064361897755, + "grad_norm": 1.5170655865397984, + "learning_rate": 1.8317004722277637e-05, + "loss": 0.7923, + "step": 6909 + }, + { + "epoch": 0.21178129214171876, + "grad_norm": 0.7248286930181582, + "learning_rate": 1.8316453545585703e-05, + "loss": 0.6338, + "step": 6910 + }, + { + "epoch": 0.21181194066445996, + "grad_norm": 1.8095823310248136, + "learning_rate": 1.831590228694962e-05, + "loss": 0.8081, + "step": 6911 + }, + { + "epoch": 0.21184258918720117, + "grad_norm": 1.4740904372276842, + "learning_rate": 1.831535094637483e-05, + "loss": 0.8077, + "step": 6912 + }, + { + "epoch": 0.21187323770994237, + "grad_norm": 1.5152649563802538, + "learning_rate": 1.8314799523866754e-05, + "loss": 0.8024, + "step": 6913 + }, + { + "epoch": 0.21190388623268358, + "grad_norm": 1.3557122641269579, + "learning_rate": 1.8314248019430834e-05, + "loss": 0.8239, + "step": 6914 + }, + { + "epoch": 0.21193453475542479, + "grad_norm": 1.480599669677041, + "learning_rate": 1.8313696433072502e-05, + "loss": 0.7169, + "step": 6915 + }, + { + "epoch": 0.211965183278166, + "grad_norm": 1.467566048239492, + "learning_rate": 1.8313144764797188e-05, + "loss": 0.7813, + "step": 6916 + }, + { + "epoch": 0.2119958318009072, + "grad_norm": 1.4777009610721916, + "learning_rate": 1.8312593014610335e-05, + "loss": 0.7297, + "step": 6917 + }, + { + "epoch": 0.2120264803236484, + "grad_norm": 1.6684562832914094, + "learning_rate": 1.8312041182517374e-05, + "loss": 0.8053, + "step": 6918 + }, + { + "epoch": 0.2120571288463896, + "grad_norm": 0.8102408969987468, + "learning_rate": 1.8311489268523748e-05, + "loss": 0.6393, + "step": 6919 + }, + { + "epoch": 0.21208777736913081, + "grad_norm": 1.5891828855749643, + "learning_rate": 1.8310937272634887e-05, + "loss": 0.9339, + "step": 6920 + }, + { + "epoch": 0.21211842589187202, + "grad_norm": 1.4363828409949864, + "learning_rate": 1.831038519485624e-05, + "loss": 0.7943, + "step": 6921 + }, + { + "epoch": 0.21214907441461323, + "grad_norm": 1.5271919580628208, + "learning_rate": 1.830983303519324e-05, + "loss": 0.847, + "step": 6922 + }, + { + "epoch": 0.21217972293735443, + "grad_norm": 1.5195202408283113, + "learning_rate": 1.8309280793651325e-05, + "loss": 0.7996, + "step": 6923 + }, + { + "epoch": 0.21221037146009564, + "grad_norm": 0.7307761951424616, + "learning_rate": 1.830872847023594e-05, + "loss": 0.6398, + "step": 6924 + }, + { + "epoch": 0.21224101998283681, + "grad_norm": 1.5260099451194415, + "learning_rate": 1.8308176064952532e-05, + "loss": 0.8498, + "step": 6925 + }, + { + "epoch": 0.21227166850557802, + "grad_norm": 1.616975203678236, + "learning_rate": 1.8307623577806537e-05, + "loss": 0.7532, + "step": 6926 + }, + { + "epoch": 0.21230231702831923, + "grad_norm": 1.9214348000725996, + "learning_rate": 1.83070710088034e-05, + "loss": 0.8082, + "step": 6927 + }, + { + "epoch": 0.21233296555106043, + "grad_norm": 1.3202883516907158, + "learning_rate": 1.8306518357948572e-05, + "loss": 0.7276, + "step": 6928 + }, + { + "epoch": 0.21236361407380164, + "grad_norm": 1.3270357213053994, + "learning_rate": 1.8305965625247492e-05, + "loss": 0.7367, + "step": 6929 + }, + { + "epoch": 0.21239426259654284, + "grad_norm": 1.5709132073235248, + "learning_rate": 1.8305412810705604e-05, + "loss": 0.8201, + "step": 6930 + }, + { + "epoch": 0.21242491111928405, + "grad_norm": 1.45448897216373, + "learning_rate": 1.830485991432836e-05, + "loss": 0.7415, + "step": 6931 + }, + { + "epoch": 0.21245555964202525, + "grad_norm": 0.7451535451299756, + "learning_rate": 1.8304306936121206e-05, + "loss": 0.6475, + "step": 6932 + }, + { + "epoch": 0.21248620816476646, + "grad_norm": 1.5116777244648663, + "learning_rate": 1.830375387608959e-05, + "loss": 0.7559, + "step": 6933 + }, + { + "epoch": 0.21251685668750767, + "grad_norm": 1.5483271860679841, + "learning_rate": 1.8303200734238965e-05, + "loss": 0.8355, + "step": 6934 + }, + { + "epoch": 0.21254750521024887, + "grad_norm": 1.537658369235163, + "learning_rate": 1.830264751057478e-05, + "loss": 0.7974, + "step": 6935 + }, + { + "epoch": 0.21257815373299008, + "grad_norm": 0.7091929113954132, + "learning_rate": 1.830209420510248e-05, + "loss": 0.6195, + "step": 6936 + }, + { + "epoch": 0.21260880225573128, + "grad_norm": 1.4693759754980675, + "learning_rate": 1.8301540817827526e-05, + "loss": 0.8118, + "step": 6937 + }, + { + "epoch": 0.2126394507784725, + "grad_norm": 1.7014407283595052, + "learning_rate": 1.830098734875536e-05, + "loss": 0.8218, + "step": 6938 + }, + { + "epoch": 0.2126700993012137, + "grad_norm": 1.575477453615353, + "learning_rate": 1.830043379789145e-05, + "loss": 0.8316, + "step": 6939 + }, + { + "epoch": 0.21270074782395487, + "grad_norm": 1.603677265800151, + "learning_rate": 1.8299880165241237e-05, + "loss": 0.8502, + "step": 6940 + }, + { + "epoch": 0.21273139634669608, + "grad_norm": 1.5934489143249941, + "learning_rate": 1.8299326450810183e-05, + "loss": 0.8578, + "step": 6941 + }, + { + "epoch": 0.21276204486943728, + "grad_norm": 1.4549858892867658, + "learning_rate": 1.829877265460374e-05, + "loss": 0.7524, + "step": 6942 + }, + { + "epoch": 0.2127926933921785, + "grad_norm": 1.2664205555577779, + "learning_rate": 1.829821877662737e-05, + "loss": 0.7894, + "step": 6943 + }, + { + "epoch": 0.2128233419149197, + "grad_norm": 1.431249674695553, + "learning_rate": 1.8297664816886524e-05, + "loss": 0.8255, + "step": 6944 + }, + { + "epoch": 0.2128539904376609, + "grad_norm": 0.7272018966940994, + "learning_rate": 1.8297110775386664e-05, + "loss": 0.6383, + "step": 6945 + }, + { + "epoch": 0.2128846389604021, + "grad_norm": 1.6414728594789825, + "learning_rate": 1.8296556652133248e-05, + "loss": 0.7477, + "step": 6946 + }, + { + "epoch": 0.2129152874831433, + "grad_norm": 1.6544550684201424, + "learning_rate": 1.829600244713174e-05, + "loss": 0.8364, + "step": 6947 + }, + { + "epoch": 0.21294593600588452, + "grad_norm": 0.7183489217332915, + "learning_rate": 1.8295448160387595e-05, + "loss": 0.6505, + "step": 6948 + }, + { + "epoch": 0.21297658452862572, + "grad_norm": 1.5423138888968584, + "learning_rate": 1.8294893791906275e-05, + "loss": 0.6887, + "step": 6949 + }, + { + "epoch": 0.21300723305136693, + "grad_norm": 0.6769635209784242, + "learning_rate": 1.8294339341693245e-05, + "loss": 0.6515, + "step": 6950 + }, + { + "epoch": 0.21303788157410813, + "grad_norm": 1.3591958831911863, + "learning_rate": 1.829378480975397e-05, + "loss": 0.7865, + "step": 6951 + }, + { + "epoch": 0.21306853009684934, + "grad_norm": 0.703390081693175, + "learning_rate": 1.8293230196093906e-05, + "loss": 0.6114, + "step": 6952 + }, + { + "epoch": 0.21309917861959055, + "grad_norm": 1.581677253243345, + "learning_rate": 1.829267550071853e-05, + "loss": 0.8047, + "step": 6953 + }, + { + "epoch": 0.21312982714233175, + "grad_norm": 1.6102769385148183, + "learning_rate": 1.8292120723633297e-05, + "loss": 0.837, + "step": 6954 + }, + { + "epoch": 0.21316047566507296, + "grad_norm": 1.5941157462603026, + "learning_rate": 1.8291565864843675e-05, + "loss": 0.8517, + "step": 6955 + }, + { + "epoch": 0.21319112418781413, + "grad_norm": 1.7052912888892129, + "learning_rate": 1.8291010924355138e-05, + "loss": 0.8851, + "step": 6956 + }, + { + "epoch": 0.21322177271055534, + "grad_norm": 1.6676061918857836, + "learning_rate": 1.8290455902173146e-05, + "loss": 0.7673, + "step": 6957 + }, + { + "epoch": 0.21325242123329655, + "grad_norm": 1.599021672663712, + "learning_rate": 1.8289900798303168e-05, + "loss": 0.9402, + "step": 6958 + }, + { + "epoch": 0.21328306975603775, + "grad_norm": 1.478064385506272, + "learning_rate": 1.8289345612750682e-05, + "loss": 0.7995, + "step": 6959 + }, + { + "epoch": 0.21331371827877896, + "grad_norm": 1.785291549523051, + "learning_rate": 1.8288790345521147e-05, + "loss": 0.7797, + "step": 6960 + }, + { + "epoch": 0.21334436680152016, + "grad_norm": 1.5811195383438028, + "learning_rate": 1.8288234996620045e-05, + "loss": 0.891, + "step": 6961 + }, + { + "epoch": 0.21337501532426137, + "grad_norm": 1.5786297847093493, + "learning_rate": 1.828767956605284e-05, + "loss": 0.7754, + "step": 6962 + }, + { + "epoch": 0.21340566384700257, + "grad_norm": 1.796132119015451, + "learning_rate": 1.828712405382501e-05, + "loss": 0.8884, + "step": 6963 + }, + { + "epoch": 0.21343631236974378, + "grad_norm": 1.4764247933214616, + "learning_rate": 1.8286568459942022e-05, + "loss": 0.7927, + "step": 6964 + }, + { + "epoch": 0.21346696089248499, + "grad_norm": 1.6743724804973568, + "learning_rate": 1.8286012784409355e-05, + "loss": 0.7765, + "step": 6965 + }, + { + "epoch": 0.2134976094152262, + "grad_norm": 1.4525311543132082, + "learning_rate": 1.828545702723249e-05, + "loss": 0.7639, + "step": 6966 + }, + { + "epoch": 0.2135282579379674, + "grad_norm": 1.5160420349692056, + "learning_rate": 1.8284901188416893e-05, + "loss": 0.8739, + "step": 6967 + }, + { + "epoch": 0.2135589064607086, + "grad_norm": 1.5018283159166415, + "learning_rate": 1.8284345267968048e-05, + "loss": 0.7852, + "step": 6968 + }, + { + "epoch": 0.2135895549834498, + "grad_norm": 1.681778575790218, + "learning_rate": 1.8283789265891424e-05, + "loss": 0.8444, + "step": 6969 + }, + { + "epoch": 0.21362020350619101, + "grad_norm": 1.6091851531605816, + "learning_rate": 1.828323318219251e-05, + "loss": 0.8034, + "step": 6970 + }, + { + "epoch": 0.2136508520289322, + "grad_norm": 1.3972189976581784, + "learning_rate": 1.8282677016876776e-05, + "loss": 0.7071, + "step": 6971 + }, + { + "epoch": 0.2136815005516734, + "grad_norm": 1.7547676220928399, + "learning_rate": 1.8282120769949707e-05, + "loss": 0.8064, + "step": 6972 + }, + { + "epoch": 0.2137121490744146, + "grad_norm": 1.6109151131256705, + "learning_rate": 1.8281564441416786e-05, + "loss": 0.8301, + "step": 6973 + }, + { + "epoch": 0.2137427975971558, + "grad_norm": 0.7184270209347873, + "learning_rate": 1.828100803128349e-05, + "loss": 0.6454, + "step": 6974 + }, + { + "epoch": 0.21377344611989701, + "grad_norm": 1.53693974808477, + "learning_rate": 1.8280451539555303e-05, + "loss": 0.7301, + "step": 6975 + }, + { + "epoch": 0.21380409464263822, + "grad_norm": 1.5011309932985049, + "learning_rate": 1.8279894966237704e-05, + "loss": 0.8174, + "step": 6976 + }, + { + "epoch": 0.21383474316537943, + "grad_norm": 1.454249672100161, + "learning_rate": 1.827933831133619e-05, + "loss": 0.8281, + "step": 6977 + }, + { + "epoch": 0.21386539168812063, + "grad_norm": 1.7805834076098672, + "learning_rate": 1.827878157485623e-05, + "loss": 0.7804, + "step": 6978 + }, + { + "epoch": 0.21389604021086184, + "grad_norm": 1.5037878850756525, + "learning_rate": 1.8278224756803318e-05, + "loss": 0.765, + "step": 6979 + }, + { + "epoch": 0.21392668873360304, + "grad_norm": 1.4378804184957095, + "learning_rate": 1.8277667857182942e-05, + "loss": 0.669, + "step": 6980 + }, + { + "epoch": 0.21395733725634425, + "grad_norm": 0.7064227500104487, + "learning_rate": 1.8277110876000582e-05, + "loss": 0.6286, + "step": 6981 + }, + { + "epoch": 0.21398798577908545, + "grad_norm": 1.5992414633497534, + "learning_rate": 1.8276553813261735e-05, + "loss": 0.8617, + "step": 6982 + }, + { + "epoch": 0.21401863430182666, + "grad_norm": 1.4251019867147332, + "learning_rate": 1.827599666897189e-05, + "loss": 0.8007, + "step": 6983 + }, + { + "epoch": 0.21404928282456787, + "grad_norm": 1.5425555956726464, + "learning_rate": 1.8275439443136526e-05, + "loss": 0.8212, + "step": 6984 + }, + { + "epoch": 0.21407993134730907, + "grad_norm": 1.6236000587717645, + "learning_rate": 1.827488213576114e-05, + "loss": 0.8116, + "step": 6985 + }, + { + "epoch": 0.21411057987005028, + "grad_norm": 1.7052963793311275, + "learning_rate": 1.8274324746851224e-05, + "loss": 0.8358, + "step": 6986 + }, + { + "epoch": 0.21414122839279146, + "grad_norm": 1.3840169483768978, + "learning_rate": 1.827376727641227e-05, + "loss": 0.7686, + "step": 6987 + }, + { + "epoch": 0.21417187691553266, + "grad_norm": 1.5682524058324623, + "learning_rate": 1.827320972444977e-05, + "loss": 0.826, + "step": 6988 + }, + { + "epoch": 0.21420252543827387, + "grad_norm": 1.641572915769126, + "learning_rate": 1.8272652090969215e-05, + "loss": 0.9602, + "step": 6989 + }, + { + "epoch": 0.21423317396101507, + "grad_norm": 0.7209816138212073, + "learning_rate": 1.8272094375976107e-05, + "loss": 0.6601, + "step": 6990 + }, + { + "epoch": 0.21426382248375628, + "grad_norm": 1.7559532809017044, + "learning_rate": 1.8271536579475932e-05, + "loss": 0.8332, + "step": 6991 + }, + { + "epoch": 0.21429447100649748, + "grad_norm": 1.4824221130493223, + "learning_rate": 1.8270978701474193e-05, + "loss": 0.8703, + "step": 6992 + }, + { + "epoch": 0.2143251195292387, + "grad_norm": 1.577452630664633, + "learning_rate": 1.8270420741976384e-05, + "loss": 0.8224, + "step": 6993 + }, + { + "epoch": 0.2143557680519799, + "grad_norm": 1.5391334862630754, + "learning_rate": 1.8269862700988003e-05, + "loss": 0.8448, + "step": 6994 + }, + { + "epoch": 0.2143864165747211, + "grad_norm": 1.7474663466519853, + "learning_rate": 1.826930457851455e-05, + "loss": 0.7084, + "step": 6995 + }, + { + "epoch": 0.2144170650974623, + "grad_norm": 1.4545798816338895, + "learning_rate": 1.8268746374561523e-05, + "loss": 0.7229, + "step": 6996 + }, + { + "epoch": 0.2144477136202035, + "grad_norm": 1.497367870770027, + "learning_rate": 1.8268188089134425e-05, + "loss": 0.7575, + "step": 6997 + }, + { + "epoch": 0.21447836214294472, + "grad_norm": 0.7128328166312792, + "learning_rate": 1.826762972223875e-05, + "loss": 0.6433, + "step": 6998 + }, + { + "epoch": 0.21450901066568592, + "grad_norm": 1.4469061889262573, + "learning_rate": 1.8267071273880007e-05, + "loss": 0.8009, + "step": 6999 + }, + { + "epoch": 0.21453965918842713, + "grad_norm": 1.5852309038943573, + "learning_rate": 1.826651274406369e-05, + "loss": 0.7917, + "step": 7000 + }, + { + "epoch": 0.21457030771116833, + "grad_norm": 0.7055747148655365, + "learning_rate": 1.8265954132795313e-05, + "loss": 0.6214, + "step": 7001 + }, + { + "epoch": 0.2146009562339095, + "grad_norm": 0.7078512296723272, + "learning_rate": 1.8265395440080375e-05, + "loss": 0.6569, + "step": 7002 + }, + { + "epoch": 0.21463160475665072, + "grad_norm": 1.6249778115407862, + "learning_rate": 1.8264836665924378e-05, + "loss": 0.7419, + "step": 7003 + }, + { + "epoch": 0.21466225327939192, + "grad_norm": 1.5266994961776867, + "learning_rate": 1.8264277810332834e-05, + "loss": 0.774, + "step": 7004 + }, + { + "epoch": 0.21469290180213313, + "grad_norm": 1.494054775346256, + "learning_rate": 1.8263718873311242e-05, + "loss": 0.7864, + "step": 7005 + }, + { + "epoch": 0.21472355032487433, + "grad_norm": 1.2941478727661806, + "learning_rate": 1.8263159854865118e-05, + "loss": 0.7263, + "step": 7006 + }, + { + "epoch": 0.21475419884761554, + "grad_norm": 1.3659846039583001, + "learning_rate": 1.8262600754999965e-05, + "loss": 0.7058, + "step": 7007 + }, + { + "epoch": 0.21478484737035675, + "grad_norm": 1.4235585014479417, + "learning_rate": 1.8262041573721288e-05, + "loss": 0.757, + "step": 7008 + }, + { + "epoch": 0.21481549589309795, + "grad_norm": 1.561268404843337, + "learning_rate": 1.826148231103461e-05, + "loss": 0.8358, + "step": 7009 + }, + { + "epoch": 0.21484614441583916, + "grad_norm": 0.8048490575850745, + "learning_rate": 1.8260922966945423e-05, + "loss": 0.6278, + "step": 7010 + }, + { + "epoch": 0.21487679293858036, + "grad_norm": 1.5199887632601619, + "learning_rate": 1.8260363541459256e-05, + "loss": 0.7141, + "step": 7011 + }, + { + "epoch": 0.21490744146132157, + "grad_norm": 1.5563286784075803, + "learning_rate": 1.8259804034581613e-05, + "loss": 0.7969, + "step": 7012 + }, + { + "epoch": 0.21493808998406277, + "grad_norm": 1.757941197679233, + "learning_rate": 1.8259244446318004e-05, + "loss": 0.8161, + "step": 7013 + }, + { + "epoch": 0.21496873850680398, + "grad_norm": 1.4721629953592885, + "learning_rate": 1.8258684776673947e-05, + "loss": 0.7954, + "step": 7014 + }, + { + "epoch": 0.21499938702954519, + "grad_norm": 1.4519182198316176, + "learning_rate": 1.8258125025654957e-05, + "loss": 0.7103, + "step": 7015 + }, + { + "epoch": 0.2150300355522864, + "grad_norm": 1.715343667905014, + "learning_rate": 1.825756519326655e-05, + "loss": 0.7767, + "step": 7016 + }, + { + "epoch": 0.2150606840750276, + "grad_norm": 0.713205643225468, + "learning_rate": 1.8257005279514234e-05, + "loss": 0.6389, + "step": 7017 + }, + { + "epoch": 0.21509133259776878, + "grad_norm": 1.4673327124324849, + "learning_rate": 1.825644528440354e-05, + "loss": 0.7494, + "step": 7018 + }, + { + "epoch": 0.21512198112050998, + "grad_norm": 1.5314584607206323, + "learning_rate": 1.8255885207939973e-05, + "loss": 0.8481, + "step": 7019 + }, + { + "epoch": 0.2151526296432512, + "grad_norm": 1.4466257391769073, + "learning_rate": 1.825532505012906e-05, + "loss": 0.7893, + "step": 7020 + }, + { + "epoch": 0.2151832781659924, + "grad_norm": 1.6019779985529616, + "learning_rate": 1.825476481097631e-05, + "loss": 0.7861, + "step": 7021 + }, + { + "epoch": 0.2152139266887336, + "grad_norm": 1.5293811814103437, + "learning_rate": 1.825420449048726e-05, + "loss": 0.7555, + "step": 7022 + }, + { + "epoch": 0.2152445752114748, + "grad_norm": 1.7130056529144042, + "learning_rate": 1.8253644088667414e-05, + "loss": 0.7205, + "step": 7023 + }, + { + "epoch": 0.215275223734216, + "grad_norm": 1.4086136659513293, + "learning_rate": 1.8253083605522305e-05, + "loss": 0.7195, + "step": 7024 + }, + { + "epoch": 0.21530587225695721, + "grad_norm": 1.604981378080538, + "learning_rate": 1.825252304105745e-05, + "loss": 0.8528, + "step": 7025 + }, + { + "epoch": 0.21533652077969842, + "grad_norm": 1.4206945055838993, + "learning_rate": 1.8251962395278374e-05, + "loss": 0.8104, + "step": 7026 + }, + { + "epoch": 0.21536716930243963, + "grad_norm": 1.4541979408557886, + "learning_rate": 1.8251401668190603e-05, + "loss": 0.8884, + "step": 7027 + }, + { + "epoch": 0.21539781782518083, + "grad_norm": 1.6504749712418425, + "learning_rate": 1.825084085979966e-05, + "loss": 0.8411, + "step": 7028 + }, + { + "epoch": 0.21542846634792204, + "grad_norm": 1.563410626805854, + "learning_rate": 1.8250279970111066e-05, + "loss": 0.7161, + "step": 7029 + }, + { + "epoch": 0.21545911487066324, + "grad_norm": 1.4195954269918432, + "learning_rate": 1.8249718999130356e-05, + "loss": 0.8007, + "step": 7030 + }, + { + "epoch": 0.21548976339340445, + "grad_norm": 0.7255826958221749, + "learning_rate": 1.8249157946863055e-05, + "loss": 0.6074, + "step": 7031 + }, + { + "epoch": 0.21552041191614565, + "grad_norm": 1.542809150144142, + "learning_rate": 1.824859681331469e-05, + "loss": 0.8555, + "step": 7032 + }, + { + "epoch": 0.21555106043888683, + "grad_norm": 1.3865304238295275, + "learning_rate": 1.824803559849079e-05, + "loss": 0.8148, + "step": 7033 + }, + { + "epoch": 0.21558170896162804, + "grad_norm": 1.4634617567324006, + "learning_rate": 1.8247474302396884e-05, + "loss": 0.7242, + "step": 7034 + }, + { + "epoch": 0.21561235748436924, + "grad_norm": 1.4273503711553166, + "learning_rate": 1.82469129250385e-05, + "loss": 0.6929, + "step": 7035 + }, + { + "epoch": 0.21564300600711045, + "grad_norm": 1.445139791173363, + "learning_rate": 1.824635146642118e-05, + "loss": 0.771, + "step": 7036 + }, + { + "epoch": 0.21567365452985165, + "grad_norm": 1.6181128359948849, + "learning_rate": 1.8245789926550443e-05, + "loss": 0.9612, + "step": 7037 + }, + { + "epoch": 0.21570430305259286, + "grad_norm": 1.4689856669631927, + "learning_rate": 1.8245228305431833e-05, + "loss": 0.8118, + "step": 7038 + }, + { + "epoch": 0.21573495157533407, + "grad_norm": 1.599704362174743, + "learning_rate": 1.8244666603070876e-05, + "loss": 0.883, + "step": 7039 + }, + { + "epoch": 0.21576560009807527, + "grad_norm": 0.7321167955657234, + "learning_rate": 1.824410481947311e-05, + "loss": 0.6326, + "step": 7040 + }, + { + "epoch": 0.21579624862081648, + "grad_norm": 1.494427624471454, + "learning_rate": 1.824354295464407e-05, + "loss": 0.7711, + "step": 7041 + }, + { + "epoch": 0.21582689714355768, + "grad_norm": 1.5849215586389633, + "learning_rate": 1.824298100858929e-05, + "loss": 0.9059, + "step": 7042 + }, + { + "epoch": 0.2158575456662989, + "grad_norm": 1.8493390580548064, + "learning_rate": 1.8242418981314313e-05, + "loss": 0.7806, + "step": 7043 + }, + { + "epoch": 0.2158881941890401, + "grad_norm": 1.825826526974057, + "learning_rate": 1.824185687282467e-05, + "loss": 0.8036, + "step": 7044 + }, + { + "epoch": 0.2159188427117813, + "grad_norm": 1.4597142491309296, + "learning_rate": 1.8241294683125903e-05, + "loss": 0.7747, + "step": 7045 + }, + { + "epoch": 0.2159494912345225, + "grad_norm": 0.7128700077745636, + "learning_rate": 1.8240732412223553e-05, + "loss": 0.613, + "step": 7046 + }, + { + "epoch": 0.2159801397572637, + "grad_norm": 1.2856525948492026, + "learning_rate": 1.8240170060123154e-05, + "loss": 0.6785, + "step": 7047 + }, + { + "epoch": 0.21601078828000492, + "grad_norm": 1.3547887087467432, + "learning_rate": 1.8239607626830253e-05, + "loss": 0.7873, + "step": 7048 + }, + { + "epoch": 0.2160414368027461, + "grad_norm": 1.585006400663066, + "learning_rate": 1.823904511235039e-05, + "loss": 0.8441, + "step": 7049 + }, + { + "epoch": 0.2160720853254873, + "grad_norm": 0.7303726724159368, + "learning_rate": 1.8238482516689108e-05, + "loss": 0.668, + "step": 7050 + }, + { + "epoch": 0.2161027338482285, + "grad_norm": 1.4695896796579149, + "learning_rate": 1.8237919839851953e-05, + "loss": 0.7732, + "step": 7051 + }, + { + "epoch": 0.2161333823709697, + "grad_norm": 1.5194656794585657, + "learning_rate": 1.823735708184446e-05, + "loss": 0.8906, + "step": 7052 + }, + { + "epoch": 0.21616403089371092, + "grad_norm": 1.4344203300880893, + "learning_rate": 1.8236794242672183e-05, + "loss": 0.7568, + "step": 7053 + }, + { + "epoch": 0.21619467941645212, + "grad_norm": 1.4962519989429797, + "learning_rate": 1.8236231322340666e-05, + "loss": 0.7696, + "step": 7054 + }, + { + "epoch": 0.21622532793919333, + "grad_norm": 0.7394599747215008, + "learning_rate": 1.823566832085545e-05, + "loss": 0.6209, + "step": 7055 + }, + { + "epoch": 0.21625597646193453, + "grad_norm": 1.5327193801372931, + "learning_rate": 1.8235105238222092e-05, + "loss": 0.98, + "step": 7056 + }, + { + "epoch": 0.21628662498467574, + "grad_norm": 1.3232561595820092, + "learning_rate": 1.823454207444613e-05, + "loss": 0.7144, + "step": 7057 + }, + { + "epoch": 0.21631727350741695, + "grad_norm": 1.5120865669824544, + "learning_rate": 1.8233978829533123e-05, + "loss": 0.7728, + "step": 7058 + }, + { + "epoch": 0.21634792203015815, + "grad_norm": 1.408536748319078, + "learning_rate": 1.8233415503488613e-05, + "loss": 0.8401, + "step": 7059 + }, + { + "epoch": 0.21637857055289936, + "grad_norm": 1.4905584593701753, + "learning_rate": 1.8232852096318154e-05, + "loss": 0.7826, + "step": 7060 + }, + { + "epoch": 0.21640921907564056, + "grad_norm": 1.3517551943982953, + "learning_rate": 1.8232288608027296e-05, + "loss": 0.7228, + "step": 7061 + }, + { + "epoch": 0.21643986759838177, + "grad_norm": 1.367638349236157, + "learning_rate": 1.8231725038621594e-05, + "loss": 0.8719, + "step": 7062 + }, + { + "epoch": 0.21647051612112297, + "grad_norm": 1.6712885491166805, + "learning_rate": 1.8231161388106596e-05, + "loss": 0.8165, + "step": 7063 + }, + { + "epoch": 0.21650116464386415, + "grad_norm": 1.3203472746093392, + "learning_rate": 1.823059765648786e-05, + "loss": 0.7973, + "step": 7064 + }, + { + "epoch": 0.21653181316660536, + "grad_norm": 1.5248019772104124, + "learning_rate": 1.8230033843770942e-05, + "loss": 0.7947, + "step": 7065 + }, + { + "epoch": 0.21656246168934656, + "grad_norm": 1.295526773554816, + "learning_rate": 1.8229469949961393e-05, + "loss": 0.7603, + "step": 7066 + }, + { + "epoch": 0.21659311021208777, + "grad_norm": 1.429586176193955, + "learning_rate": 1.8228905975064774e-05, + "loss": 0.7656, + "step": 7067 + }, + { + "epoch": 0.21662375873482898, + "grad_norm": 1.5076140019673852, + "learning_rate": 1.8228341919086633e-05, + "loss": 0.7691, + "step": 7068 + }, + { + "epoch": 0.21665440725757018, + "grad_norm": 0.7695972312198149, + "learning_rate": 1.822777778203254e-05, + "loss": 0.6291, + "step": 7069 + }, + { + "epoch": 0.2166850557803114, + "grad_norm": 1.5275036591233784, + "learning_rate": 1.822721356390804e-05, + "loss": 0.8546, + "step": 7070 + }, + { + "epoch": 0.2167157043030526, + "grad_norm": 1.5734624356008338, + "learning_rate": 1.8226649264718704e-05, + "loss": 0.6718, + "step": 7071 + }, + { + "epoch": 0.2167463528257938, + "grad_norm": 1.5049308263669259, + "learning_rate": 1.822608488447009e-05, + "loss": 0.7941, + "step": 7072 + }, + { + "epoch": 0.216777001348535, + "grad_norm": 1.5010860837131326, + "learning_rate": 1.8225520423167755e-05, + "loss": 0.6828, + "step": 7073 + }, + { + "epoch": 0.2168076498712762, + "grad_norm": 0.6989612852406373, + "learning_rate": 1.8224955880817262e-05, + "loss": 0.655, + "step": 7074 + }, + { + "epoch": 0.21683829839401741, + "grad_norm": 1.4976464777757215, + "learning_rate": 1.822439125742417e-05, + "loss": 0.8333, + "step": 7075 + }, + { + "epoch": 0.21686894691675862, + "grad_norm": 1.4680238992861725, + "learning_rate": 1.8223826552994053e-05, + "loss": 0.8768, + "step": 7076 + }, + { + "epoch": 0.21689959543949983, + "grad_norm": 1.4879277629280345, + "learning_rate": 1.8223261767532466e-05, + "loss": 0.7317, + "step": 7077 + }, + { + "epoch": 0.21693024396224103, + "grad_norm": 1.5118704607752869, + "learning_rate": 1.8222696901044982e-05, + "loss": 0.7147, + "step": 7078 + }, + { + "epoch": 0.21696089248498224, + "grad_norm": 1.627553514213872, + "learning_rate": 1.8222131953537157e-05, + "loss": 0.8708, + "step": 7079 + }, + { + "epoch": 0.21699154100772342, + "grad_norm": 1.6410579972211707, + "learning_rate": 1.822156692501456e-05, + "loss": 0.7795, + "step": 7080 + }, + { + "epoch": 0.21702218953046462, + "grad_norm": 1.5805510214189669, + "learning_rate": 1.8221001815482766e-05, + "loss": 0.7642, + "step": 7081 + }, + { + "epoch": 0.21705283805320583, + "grad_norm": 0.7215121440916978, + "learning_rate": 1.8220436624947333e-05, + "loss": 0.6168, + "step": 7082 + }, + { + "epoch": 0.21708348657594703, + "grad_norm": 1.9311885227524082, + "learning_rate": 1.8219871353413837e-05, + "loss": 0.8198, + "step": 7083 + }, + { + "epoch": 0.21711413509868824, + "grad_norm": 1.6941924961170378, + "learning_rate": 1.8219306000887843e-05, + "loss": 0.8555, + "step": 7084 + }, + { + "epoch": 0.21714478362142944, + "grad_norm": 1.4768775316547396, + "learning_rate": 1.8218740567374925e-05, + "loss": 0.7527, + "step": 7085 + }, + { + "epoch": 0.21717543214417065, + "grad_norm": 1.6360093876485071, + "learning_rate": 1.8218175052880656e-05, + "loss": 0.7115, + "step": 7086 + }, + { + "epoch": 0.21720608066691185, + "grad_norm": 1.6050544471146342, + "learning_rate": 1.8217609457410603e-05, + "loss": 0.9374, + "step": 7087 + }, + { + "epoch": 0.21723672918965306, + "grad_norm": 1.644530305745964, + "learning_rate": 1.8217043780970343e-05, + "loss": 0.8418, + "step": 7088 + }, + { + "epoch": 0.21726737771239427, + "grad_norm": 1.3995783678077622, + "learning_rate": 1.8216478023565443e-05, + "loss": 0.853, + "step": 7089 + }, + { + "epoch": 0.21729802623513547, + "grad_norm": 1.655704668452596, + "learning_rate": 1.821591218520149e-05, + "loss": 0.7085, + "step": 7090 + }, + { + "epoch": 0.21732867475787668, + "grad_norm": 0.7474682382305868, + "learning_rate": 1.821534626588405e-05, + "loss": 0.6471, + "step": 7091 + }, + { + "epoch": 0.21735932328061788, + "grad_norm": 0.730303716271887, + "learning_rate": 1.82147802656187e-05, + "loss": 0.679, + "step": 7092 + }, + { + "epoch": 0.2173899718033591, + "grad_norm": 1.6320458072304305, + "learning_rate": 1.821421418441102e-05, + "loss": 0.9056, + "step": 7093 + }, + { + "epoch": 0.2174206203261003, + "grad_norm": 0.7140474380278181, + "learning_rate": 1.821364802226658e-05, + "loss": 0.642, + "step": 7094 + }, + { + "epoch": 0.21745126884884147, + "grad_norm": 1.4955710493024228, + "learning_rate": 1.821308177919097e-05, + "loss": 0.6859, + "step": 7095 + }, + { + "epoch": 0.21748191737158268, + "grad_norm": 1.6095592893175614, + "learning_rate": 1.8212515455189766e-05, + "loss": 0.8196, + "step": 7096 + }, + { + "epoch": 0.21751256589432388, + "grad_norm": 1.495504860392713, + "learning_rate": 1.8211949050268544e-05, + "loss": 0.8021, + "step": 7097 + }, + { + "epoch": 0.2175432144170651, + "grad_norm": 1.710073318055572, + "learning_rate": 1.8211382564432883e-05, + "loss": 0.8543, + "step": 7098 + }, + { + "epoch": 0.2175738629398063, + "grad_norm": 1.545820443204372, + "learning_rate": 1.821081599768837e-05, + "loss": 0.8329, + "step": 7099 + }, + { + "epoch": 0.2176045114625475, + "grad_norm": 1.4012116919535136, + "learning_rate": 1.821024935004059e-05, + "loss": 0.7579, + "step": 7100 + }, + { + "epoch": 0.2176351599852887, + "grad_norm": 1.6788297053502288, + "learning_rate": 1.8209682621495118e-05, + "loss": 0.8355, + "step": 7101 + }, + { + "epoch": 0.2176658085080299, + "grad_norm": 0.8789915184095115, + "learning_rate": 1.8209115812057547e-05, + "loss": 0.6322, + "step": 7102 + }, + { + "epoch": 0.21769645703077112, + "grad_norm": 1.5168773169113008, + "learning_rate": 1.8208548921733452e-05, + "loss": 0.7529, + "step": 7103 + }, + { + "epoch": 0.21772710555351232, + "grad_norm": 0.770291325846055, + "learning_rate": 1.8207981950528427e-05, + "loss": 0.6722, + "step": 7104 + }, + { + "epoch": 0.21775775407625353, + "grad_norm": 1.4000539263101144, + "learning_rate": 1.8207414898448057e-05, + "loss": 0.6939, + "step": 7105 + }, + { + "epoch": 0.21778840259899473, + "grad_norm": 1.6317176017860102, + "learning_rate": 1.8206847765497927e-05, + "loss": 0.8277, + "step": 7106 + }, + { + "epoch": 0.21781905112173594, + "grad_norm": 1.6105415393109845, + "learning_rate": 1.8206280551683625e-05, + "loss": 0.7118, + "step": 7107 + }, + { + "epoch": 0.21784969964447715, + "grad_norm": 1.723471490688282, + "learning_rate": 1.820571325701074e-05, + "loss": 0.8398, + "step": 7108 + }, + { + "epoch": 0.21788034816721835, + "grad_norm": 1.4186156184639658, + "learning_rate": 1.8205145881484867e-05, + "loss": 0.7307, + "step": 7109 + }, + { + "epoch": 0.21791099668995956, + "grad_norm": 1.4449956326256972, + "learning_rate": 1.820457842511159e-05, + "loss": 0.8665, + "step": 7110 + }, + { + "epoch": 0.21794164521270074, + "grad_norm": 1.5848892925304088, + "learning_rate": 1.8204010887896505e-05, + "loss": 0.8024, + "step": 7111 + }, + { + "epoch": 0.21797229373544194, + "grad_norm": 0.9376685076672263, + "learning_rate": 1.82034432698452e-05, + "loss": 0.6408, + "step": 7112 + }, + { + "epoch": 0.21800294225818315, + "grad_norm": 1.5987449819589805, + "learning_rate": 1.8202875570963266e-05, + "loss": 0.7267, + "step": 7113 + }, + { + "epoch": 0.21803359078092435, + "grad_norm": 1.8274573381447972, + "learning_rate": 1.8202307791256305e-05, + "loss": 0.8915, + "step": 7114 + }, + { + "epoch": 0.21806423930366556, + "grad_norm": 1.4615242832503539, + "learning_rate": 1.82017399307299e-05, + "loss": 0.7627, + "step": 7115 + }, + { + "epoch": 0.21809488782640676, + "grad_norm": 1.618100617404626, + "learning_rate": 1.820117198938966e-05, + "loss": 0.8218, + "step": 7116 + }, + { + "epoch": 0.21812553634914797, + "grad_norm": 1.5754431633674137, + "learning_rate": 1.8200603967241174e-05, + "loss": 0.8548, + "step": 7117 + }, + { + "epoch": 0.21815618487188917, + "grad_norm": 1.6309724101117726, + "learning_rate": 1.8200035864290035e-05, + "loss": 0.8155, + "step": 7118 + }, + { + "epoch": 0.21818683339463038, + "grad_norm": 1.6692180418157494, + "learning_rate": 1.8199467680541846e-05, + "loss": 0.7266, + "step": 7119 + }, + { + "epoch": 0.2182174819173716, + "grad_norm": 1.5666217760861336, + "learning_rate": 1.8198899416002204e-05, + "loss": 0.7485, + "step": 7120 + }, + { + "epoch": 0.2182481304401128, + "grad_norm": 1.4013997826338356, + "learning_rate": 1.819833107067671e-05, + "loss": 0.8784, + "step": 7121 + }, + { + "epoch": 0.218278778962854, + "grad_norm": 1.5219559771925355, + "learning_rate": 1.819776264457096e-05, + "loss": 0.7209, + "step": 7122 + }, + { + "epoch": 0.2183094274855952, + "grad_norm": 1.4182041964796996, + "learning_rate": 1.8197194137690558e-05, + "loss": 0.8749, + "step": 7123 + }, + { + "epoch": 0.2183400760083364, + "grad_norm": 1.5343001209260139, + "learning_rate": 1.8196625550041105e-05, + "loss": 0.6925, + "step": 7124 + }, + { + "epoch": 0.21837072453107761, + "grad_norm": 1.4594000536214784, + "learning_rate": 1.8196056881628202e-05, + "loss": 0.7199, + "step": 7125 + }, + { + "epoch": 0.2184013730538188, + "grad_norm": 1.4110263297192298, + "learning_rate": 1.8195488132457456e-05, + "loss": 0.6668, + "step": 7126 + }, + { + "epoch": 0.21843202157656, + "grad_norm": 1.4112787545659242, + "learning_rate": 1.8194919302534466e-05, + "loss": 0.8618, + "step": 7127 + }, + { + "epoch": 0.2184626700993012, + "grad_norm": 1.6479787250793918, + "learning_rate": 1.819435039186484e-05, + "loss": 0.8435, + "step": 7128 + }, + { + "epoch": 0.2184933186220424, + "grad_norm": 1.8834143969092576, + "learning_rate": 1.8193781400454185e-05, + "loss": 0.8957, + "step": 7129 + }, + { + "epoch": 0.21852396714478362, + "grad_norm": 1.5620112328679134, + "learning_rate": 1.8193212328308104e-05, + "loss": 0.6645, + "step": 7130 + }, + { + "epoch": 0.21855461566752482, + "grad_norm": 1.685857220594266, + "learning_rate": 1.8192643175432202e-05, + "loss": 0.8672, + "step": 7131 + }, + { + "epoch": 0.21858526419026603, + "grad_norm": 1.6284962418736304, + "learning_rate": 1.8192073941832096e-05, + "loss": 0.8067, + "step": 7132 + }, + { + "epoch": 0.21861591271300723, + "grad_norm": 1.5778870227443034, + "learning_rate": 1.819150462751339e-05, + "loss": 0.8886, + "step": 7133 + }, + { + "epoch": 0.21864656123574844, + "grad_norm": 1.3649156690025908, + "learning_rate": 1.819093523248169e-05, + "loss": 0.8087, + "step": 7134 + }, + { + "epoch": 0.21867720975848964, + "grad_norm": 1.6699982441229992, + "learning_rate": 1.819036575674261e-05, + "loss": 0.7487, + "step": 7135 + }, + { + "epoch": 0.21870785828123085, + "grad_norm": 1.6592138612914769, + "learning_rate": 1.818979620030176e-05, + "loss": 0.8699, + "step": 7136 + }, + { + "epoch": 0.21873850680397205, + "grad_norm": 1.5820981047229195, + "learning_rate": 1.8189226563164752e-05, + "loss": 0.8171, + "step": 7137 + }, + { + "epoch": 0.21876915532671326, + "grad_norm": 1.4667608465310202, + "learning_rate": 1.81886568453372e-05, + "loss": 0.7989, + "step": 7138 + }, + { + "epoch": 0.21879980384945447, + "grad_norm": 1.5381139778627653, + "learning_rate": 1.8188087046824717e-05, + "loss": 0.7673, + "step": 7139 + }, + { + "epoch": 0.21883045237219567, + "grad_norm": 1.82067367737422, + "learning_rate": 1.8187517167632917e-05, + "loss": 0.7388, + "step": 7140 + }, + { + "epoch": 0.21886110089493688, + "grad_norm": 1.5193975594610072, + "learning_rate": 1.818694720776742e-05, + "loss": 0.8708, + "step": 7141 + }, + { + "epoch": 0.21889174941767806, + "grad_norm": 1.400210810485916, + "learning_rate": 1.8186377167233834e-05, + "loss": 0.7939, + "step": 7142 + }, + { + "epoch": 0.21892239794041926, + "grad_norm": 1.4754340523429266, + "learning_rate": 1.8185807046037776e-05, + "loss": 0.7811, + "step": 7143 + }, + { + "epoch": 0.21895304646316047, + "grad_norm": 0.8096409849698424, + "learning_rate": 1.818523684418487e-05, + "loss": 0.6369, + "step": 7144 + }, + { + "epoch": 0.21898369498590167, + "grad_norm": 1.4904737913577648, + "learning_rate": 1.818466656168073e-05, + "loss": 0.7666, + "step": 7145 + }, + { + "epoch": 0.21901434350864288, + "grad_norm": 1.6024636877647134, + "learning_rate": 1.8184096198530977e-05, + "loss": 0.8416, + "step": 7146 + }, + { + "epoch": 0.21904499203138408, + "grad_norm": 1.629555923932494, + "learning_rate": 1.818352575474123e-05, + "loss": 0.8227, + "step": 7147 + }, + { + "epoch": 0.2190756405541253, + "grad_norm": 1.602127086183239, + "learning_rate": 1.818295523031711e-05, + "loss": 0.865, + "step": 7148 + }, + { + "epoch": 0.2191062890768665, + "grad_norm": 1.313271766562141, + "learning_rate": 1.818238462526424e-05, + "loss": 0.7208, + "step": 7149 + }, + { + "epoch": 0.2191369375996077, + "grad_norm": 1.4542403949378264, + "learning_rate": 1.818181393958824e-05, + "loss": 0.8235, + "step": 7150 + }, + { + "epoch": 0.2191675861223489, + "grad_norm": 1.5771644266275235, + "learning_rate": 1.818124317329473e-05, + "loss": 0.8496, + "step": 7151 + }, + { + "epoch": 0.2191982346450901, + "grad_norm": 1.6895300646714921, + "learning_rate": 1.818067232638934e-05, + "loss": 0.7872, + "step": 7152 + }, + { + "epoch": 0.21922888316783132, + "grad_norm": 1.5239500585864936, + "learning_rate": 1.8180101398877696e-05, + "loss": 0.8747, + "step": 7153 + }, + { + "epoch": 0.21925953169057252, + "grad_norm": 1.3914392345114441, + "learning_rate": 1.8179530390765416e-05, + "loss": 0.744, + "step": 7154 + }, + { + "epoch": 0.21929018021331373, + "grad_norm": 1.5002426338353096, + "learning_rate": 1.817895930205813e-05, + "loss": 0.8107, + "step": 7155 + }, + { + "epoch": 0.21932082873605493, + "grad_norm": 1.473258341596854, + "learning_rate": 1.817838813276147e-05, + "loss": 0.7301, + "step": 7156 + }, + { + "epoch": 0.2193514772587961, + "grad_norm": 1.4668094933875662, + "learning_rate": 1.8177816882881053e-05, + "loss": 0.7558, + "step": 7157 + }, + { + "epoch": 0.21938212578153732, + "grad_norm": 1.349563352087866, + "learning_rate": 1.8177245552422514e-05, + "loss": 0.7443, + "step": 7158 + }, + { + "epoch": 0.21941277430427852, + "grad_norm": 1.7111496882721784, + "learning_rate": 1.8176674141391487e-05, + "loss": 0.8776, + "step": 7159 + }, + { + "epoch": 0.21944342282701973, + "grad_norm": 1.5408760163759636, + "learning_rate": 1.8176102649793596e-05, + "loss": 0.7995, + "step": 7160 + }, + { + "epoch": 0.21947407134976094, + "grad_norm": 1.4041785929408976, + "learning_rate": 1.8175531077634473e-05, + "loss": 0.6931, + "step": 7161 + }, + { + "epoch": 0.21950471987250214, + "grad_norm": 1.4280417446466769, + "learning_rate": 1.8174959424919752e-05, + "loss": 0.8023, + "step": 7162 + }, + { + "epoch": 0.21953536839524335, + "grad_norm": 0.8316609032291863, + "learning_rate": 1.817438769165506e-05, + "loss": 0.6662, + "step": 7163 + }, + { + "epoch": 0.21956601691798455, + "grad_norm": 0.737445839429255, + "learning_rate": 1.817381587784604e-05, + "loss": 0.6254, + "step": 7164 + }, + { + "epoch": 0.21959666544072576, + "grad_norm": 1.4815133564157148, + "learning_rate": 1.817324398349832e-05, + "loss": 0.7636, + "step": 7165 + }, + { + "epoch": 0.21962731396346696, + "grad_norm": 1.8120686768274845, + "learning_rate": 1.8172672008617533e-05, + "loss": 0.8959, + "step": 7166 + }, + { + "epoch": 0.21965796248620817, + "grad_norm": 1.7085075060104529, + "learning_rate": 1.817209995320932e-05, + "loss": 0.89, + "step": 7167 + }, + { + "epoch": 0.21968861100894937, + "grad_norm": 1.3203102840813827, + "learning_rate": 1.8171527817279313e-05, + "loss": 0.7636, + "step": 7168 + }, + { + "epoch": 0.21971925953169058, + "grad_norm": 1.4558377629991388, + "learning_rate": 1.817095560083315e-05, + "loss": 0.7936, + "step": 7169 + }, + { + "epoch": 0.2197499080544318, + "grad_norm": 1.4510832033743128, + "learning_rate": 1.8170383303876476e-05, + "loss": 0.7167, + "step": 7170 + }, + { + "epoch": 0.219780556577173, + "grad_norm": 1.472464925565084, + "learning_rate": 1.816981092641492e-05, + "loss": 0.7182, + "step": 7171 + }, + { + "epoch": 0.2198112050999142, + "grad_norm": 1.5557828460314818, + "learning_rate": 1.8169238468454132e-05, + "loss": 0.7555, + "step": 7172 + }, + { + "epoch": 0.21984185362265538, + "grad_norm": 1.791494444644734, + "learning_rate": 1.8168665929999742e-05, + "loss": 0.724, + "step": 7173 + }, + { + "epoch": 0.21987250214539658, + "grad_norm": 1.564488452259997, + "learning_rate": 1.81680933110574e-05, + "loss": 0.7933, + "step": 7174 + }, + { + "epoch": 0.2199031506681378, + "grad_norm": 1.4068628358788666, + "learning_rate": 1.8167520611632743e-05, + "loss": 0.731, + "step": 7175 + }, + { + "epoch": 0.219933799190879, + "grad_norm": 1.476333983332369, + "learning_rate": 1.8166947831731415e-05, + "loss": 0.8146, + "step": 7176 + }, + { + "epoch": 0.2199644477136202, + "grad_norm": 1.5121719159382392, + "learning_rate": 1.8166374971359063e-05, + "loss": 0.8061, + "step": 7177 + }, + { + "epoch": 0.2199950962363614, + "grad_norm": 1.5361409488730868, + "learning_rate": 1.8165802030521328e-05, + "loss": 0.7486, + "step": 7178 + }, + { + "epoch": 0.2200257447591026, + "grad_norm": 1.4073273200233047, + "learning_rate": 1.8165229009223856e-05, + "loss": 0.7352, + "step": 7179 + }, + { + "epoch": 0.22005639328184382, + "grad_norm": 1.6126837155634102, + "learning_rate": 1.816465590747229e-05, + "loss": 0.8059, + "step": 7180 + }, + { + "epoch": 0.22008704180458502, + "grad_norm": 1.3809856060499974, + "learning_rate": 1.8164082725272285e-05, + "loss": 0.6107, + "step": 7181 + }, + { + "epoch": 0.22011769032732623, + "grad_norm": 1.316393758666243, + "learning_rate": 1.816350946262948e-05, + "loss": 0.7622, + "step": 7182 + }, + { + "epoch": 0.22014833885006743, + "grad_norm": 1.4300574782851203, + "learning_rate": 1.8162936119549533e-05, + "loss": 0.7515, + "step": 7183 + }, + { + "epoch": 0.22017898737280864, + "grad_norm": 0.987556506943064, + "learning_rate": 1.8162362696038083e-05, + "loss": 0.678, + "step": 7184 + }, + { + "epoch": 0.22020963589554984, + "grad_norm": 1.665671597099313, + "learning_rate": 1.8161789192100787e-05, + "loss": 0.7485, + "step": 7185 + }, + { + "epoch": 0.22024028441829105, + "grad_norm": 1.4318787269081972, + "learning_rate": 1.8161215607743293e-05, + "loss": 0.8143, + "step": 7186 + }, + { + "epoch": 0.22027093294103225, + "grad_norm": 1.5812300993298773, + "learning_rate": 1.8160641942971256e-05, + "loss": 0.836, + "step": 7187 + }, + { + "epoch": 0.22030158146377343, + "grad_norm": 1.3491874007607447, + "learning_rate": 1.8160068197790323e-05, + "loss": 0.7998, + "step": 7188 + }, + { + "epoch": 0.22033222998651464, + "grad_norm": 1.90415606068159, + "learning_rate": 1.8159494372206153e-05, + "loss": 0.8062, + "step": 7189 + }, + { + "epoch": 0.22036287850925584, + "grad_norm": 0.7093136213288397, + "learning_rate": 1.815892046622439e-05, + "loss": 0.6599, + "step": 7190 + }, + { + "epoch": 0.22039352703199705, + "grad_norm": 1.5116392171831867, + "learning_rate": 1.8158346479850705e-05, + "loss": 0.6676, + "step": 7191 + }, + { + "epoch": 0.22042417555473826, + "grad_norm": 1.5763838322013715, + "learning_rate": 1.8157772413090742e-05, + "loss": 0.8793, + "step": 7192 + }, + { + "epoch": 0.22045482407747946, + "grad_norm": 1.5064280120606937, + "learning_rate": 1.815719826595016e-05, + "loss": 0.643, + "step": 7193 + }, + { + "epoch": 0.22048547260022067, + "grad_norm": 1.498538075833393, + "learning_rate": 1.8156624038434615e-05, + "loss": 0.8607, + "step": 7194 + }, + { + "epoch": 0.22051612112296187, + "grad_norm": 1.4568823180558597, + "learning_rate": 1.8156049730549767e-05, + "loss": 0.7263, + "step": 7195 + }, + { + "epoch": 0.22054676964570308, + "grad_norm": 1.6699089960267053, + "learning_rate": 1.8155475342301275e-05, + "loss": 0.869, + "step": 7196 + }, + { + "epoch": 0.22057741816844428, + "grad_norm": 1.6504570988674288, + "learning_rate": 1.8154900873694795e-05, + "loss": 0.8312, + "step": 7197 + }, + { + "epoch": 0.2206080666911855, + "grad_norm": 0.8214215118514329, + "learning_rate": 1.8154326324735994e-05, + "loss": 0.6735, + "step": 7198 + }, + { + "epoch": 0.2206387152139267, + "grad_norm": 0.7724029221451163, + "learning_rate": 1.8153751695430524e-05, + "loss": 0.6383, + "step": 7199 + }, + { + "epoch": 0.2206693637366679, + "grad_norm": 1.4181033208626557, + "learning_rate": 1.8153176985784058e-05, + "loss": 0.7368, + "step": 7200 + }, + { + "epoch": 0.2207000122594091, + "grad_norm": 0.6855768492185101, + "learning_rate": 1.8152602195802252e-05, + "loss": 0.6547, + "step": 7201 + }, + { + "epoch": 0.2207306607821503, + "grad_norm": 1.4409805991609006, + "learning_rate": 1.815202732549077e-05, + "loss": 0.7904, + "step": 7202 + }, + { + "epoch": 0.22076130930489152, + "grad_norm": 1.5285622662409875, + "learning_rate": 1.8151452374855277e-05, + "loss": 0.7445, + "step": 7203 + }, + { + "epoch": 0.2207919578276327, + "grad_norm": 1.539517664958689, + "learning_rate": 1.8150877343901438e-05, + "loss": 0.7273, + "step": 7204 + }, + { + "epoch": 0.2208226063503739, + "grad_norm": 1.484233981455133, + "learning_rate": 1.815030223263492e-05, + "loss": 0.8116, + "step": 7205 + }, + { + "epoch": 0.2208532548731151, + "grad_norm": 0.8352890536535338, + "learning_rate": 1.8149727041061383e-05, + "loss": 0.6627, + "step": 7206 + }, + { + "epoch": 0.2208839033958563, + "grad_norm": 0.79404381857629, + "learning_rate": 1.8149151769186504e-05, + "loss": 0.6239, + "step": 7207 + }, + { + "epoch": 0.22091455191859752, + "grad_norm": 1.4199012026290325, + "learning_rate": 1.8148576417015952e-05, + "loss": 0.7488, + "step": 7208 + }, + { + "epoch": 0.22094520044133872, + "grad_norm": 1.4994631185197844, + "learning_rate": 1.814800098455539e-05, + "loss": 0.8643, + "step": 7209 + }, + { + "epoch": 0.22097584896407993, + "grad_norm": 0.7321037628350382, + "learning_rate": 1.8147425471810484e-05, + "loss": 0.6666, + "step": 7210 + }, + { + "epoch": 0.22100649748682114, + "grad_norm": 0.7553740021580883, + "learning_rate": 1.8146849878786916e-05, + "loss": 0.6288, + "step": 7211 + }, + { + "epoch": 0.22103714600956234, + "grad_norm": 1.4330014747854405, + "learning_rate": 1.8146274205490347e-05, + "loss": 0.8029, + "step": 7212 + }, + { + "epoch": 0.22106779453230355, + "grad_norm": 1.61128972484741, + "learning_rate": 1.814569845192646e-05, + "loss": 0.7739, + "step": 7213 + }, + { + "epoch": 0.22109844305504475, + "grad_norm": 0.7363587912771805, + "learning_rate": 1.8145122618100918e-05, + "loss": 0.6517, + "step": 7214 + }, + { + "epoch": 0.22112909157778596, + "grad_norm": 1.588055512920893, + "learning_rate": 1.8144546704019398e-05, + "loss": 0.6919, + "step": 7215 + }, + { + "epoch": 0.22115974010052716, + "grad_norm": 1.3724963931173302, + "learning_rate": 1.8143970709687577e-05, + "loss": 0.7232, + "step": 7216 + }, + { + "epoch": 0.22119038862326837, + "grad_norm": 1.7173418283206185, + "learning_rate": 1.8143394635111128e-05, + "loss": 0.8351, + "step": 7217 + }, + { + "epoch": 0.22122103714600957, + "grad_norm": 1.6404037568388725, + "learning_rate": 1.814281848029573e-05, + "loss": 0.755, + "step": 7218 + }, + { + "epoch": 0.22125168566875075, + "grad_norm": 1.8174509165950754, + "learning_rate": 1.8142242245247055e-05, + "loss": 0.7508, + "step": 7219 + }, + { + "epoch": 0.22128233419149196, + "grad_norm": 1.6211444938368678, + "learning_rate": 1.8141665929970785e-05, + "loss": 0.9123, + "step": 7220 + }, + { + "epoch": 0.22131298271423316, + "grad_norm": 1.3949255031808319, + "learning_rate": 1.81410895344726e-05, + "loss": 0.7286, + "step": 7221 + }, + { + "epoch": 0.22134363123697437, + "grad_norm": 1.4717782106233313, + "learning_rate": 1.8140513058758173e-05, + "loss": 0.7863, + "step": 7222 + }, + { + "epoch": 0.22137427975971558, + "grad_norm": 1.3788582210959266, + "learning_rate": 1.8139936502833192e-05, + "loss": 0.8352, + "step": 7223 + }, + { + "epoch": 0.22140492828245678, + "grad_norm": 1.4178385867817616, + "learning_rate": 1.813935986670333e-05, + "loss": 0.809, + "step": 7224 + }, + { + "epoch": 0.221435576805198, + "grad_norm": 1.6407654255852846, + "learning_rate": 1.8138783150374274e-05, + "loss": 0.7788, + "step": 7225 + }, + { + "epoch": 0.2214662253279392, + "grad_norm": 1.5945986786778996, + "learning_rate": 1.8138206353851705e-05, + "loss": 0.7784, + "step": 7226 + }, + { + "epoch": 0.2214968738506804, + "grad_norm": 1.544243670243021, + "learning_rate": 1.813762947714131e-05, + "loss": 0.9019, + "step": 7227 + }, + { + "epoch": 0.2215275223734216, + "grad_norm": 0.7915657709903589, + "learning_rate": 1.8137052520248766e-05, + "loss": 0.6562, + "step": 7228 + }, + { + "epoch": 0.2215581708961628, + "grad_norm": 1.330968275770306, + "learning_rate": 1.813647548317976e-05, + "loss": 0.8272, + "step": 7229 + }, + { + "epoch": 0.22158881941890402, + "grad_norm": 1.530911481130283, + "learning_rate": 1.8135898365939987e-05, + "loss": 0.9021, + "step": 7230 + }, + { + "epoch": 0.22161946794164522, + "grad_norm": 1.4128584648046705, + "learning_rate": 1.8135321168535118e-05, + "loss": 0.7082, + "step": 7231 + }, + { + "epoch": 0.22165011646438643, + "grad_norm": 1.481464307597741, + "learning_rate": 1.8134743890970852e-05, + "loss": 0.9053, + "step": 7232 + }, + { + "epoch": 0.22168076498712763, + "grad_norm": 0.681994620988412, + "learning_rate": 1.8134166533252872e-05, + "loss": 0.5981, + "step": 7233 + }, + { + "epoch": 0.22171141350986884, + "grad_norm": 1.6687949873514112, + "learning_rate": 1.8133589095386866e-05, + "loss": 0.8469, + "step": 7234 + }, + { + "epoch": 0.22174206203261002, + "grad_norm": 1.2988681187783866, + "learning_rate": 1.813301157737853e-05, + "loss": 0.76, + "step": 7235 + }, + { + "epoch": 0.22177271055535122, + "grad_norm": 1.6116945803404843, + "learning_rate": 1.8132433979233543e-05, + "loss": 0.7496, + "step": 7236 + }, + { + "epoch": 0.22180335907809243, + "grad_norm": 1.7204357138641986, + "learning_rate": 1.8131856300957607e-05, + "loss": 0.798, + "step": 7237 + }, + { + "epoch": 0.22183400760083363, + "grad_norm": 1.4820653116163516, + "learning_rate": 1.813127854255641e-05, + "loss": 0.7776, + "step": 7238 + }, + { + "epoch": 0.22186465612357484, + "grad_norm": 1.519641302248808, + "learning_rate": 1.8130700704035645e-05, + "loss": 0.9293, + "step": 7239 + }, + { + "epoch": 0.22189530464631604, + "grad_norm": 0.722311380365851, + "learning_rate": 1.813012278540101e-05, + "loss": 0.6344, + "step": 7240 + }, + { + "epoch": 0.22192595316905725, + "grad_norm": 1.4710408221028373, + "learning_rate": 1.8129544786658187e-05, + "loss": 0.8234, + "step": 7241 + }, + { + "epoch": 0.22195660169179846, + "grad_norm": 1.5584470137124002, + "learning_rate": 1.8128966707812887e-05, + "loss": 0.7721, + "step": 7242 + }, + { + "epoch": 0.22198725021453966, + "grad_norm": 1.5671424920775834, + "learning_rate": 1.8128388548870792e-05, + "loss": 0.7782, + "step": 7243 + }, + { + "epoch": 0.22201789873728087, + "grad_norm": 1.5236587711635343, + "learning_rate": 1.812781030983761e-05, + "loss": 0.7834, + "step": 7244 + }, + { + "epoch": 0.22204854726002207, + "grad_norm": 0.7129827735433495, + "learning_rate": 1.812723199071903e-05, + "loss": 0.6461, + "step": 7245 + }, + { + "epoch": 0.22207919578276328, + "grad_norm": 1.5495163920349633, + "learning_rate": 1.8126653591520755e-05, + "loss": 0.813, + "step": 7246 + }, + { + "epoch": 0.22210984430550448, + "grad_norm": 1.574503382973923, + "learning_rate": 1.812607511224848e-05, + "loss": 0.7724, + "step": 7247 + }, + { + "epoch": 0.2221404928282457, + "grad_norm": 1.8263189575183971, + "learning_rate": 1.8125496552907912e-05, + "loss": 0.8045, + "step": 7248 + }, + { + "epoch": 0.2221711413509869, + "grad_norm": 1.4374303939644821, + "learning_rate": 1.812491791350475e-05, + "loss": 0.7604, + "step": 7249 + }, + { + "epoch": 0.22220178987372807, + "grad_norm": 1.5694301925253429, + "learning_rate": 1.8124339194044686e-05, + "loss": 0.8976, + "step": 7250 + }, + { + "epoch": 0.22223243839646928, + "grad_norm": 1.4364508151871724, + "learning_rate": 1.812376039453343e-05, + "loss": 0.7689, + "step": 7251 + }, + { + "epoch": 0.22226308691921048, + "grad_norm": 1.552036047312719, + "learning_rate": 1.8123181514976687e-05, + "loss": 0.7913, + "step": 7252 + }, + { + "epoch": 0.2222937354419517, + "grad_norm": 1.3888411538849355, + "learning_rate": 1.8122602555380158e-05, + "loss": 0.6817, + "step": 7253 + }, + { + "epoch": 0.2223243839646929, + "grad_norm": 1.5268328775238384, + "learning_rate": 1.8122023515749546e-05, + "loss": 0.8912, + "step": 7254 + }, + { + "epoch": 0.2223550324874341, + "grad_norm": 1.5856941022724214, + "learning_rate": 1.812144439609056e-05, + "loss": 0.77, + "step": 7255 + }, + { + "epoch": 0.2223856810101753, + "grad_norm": 1.447521673159961, + "learning_rate": 1.8120865196408904e-05, + "loss": 0.761, + "step": 7256 + }, + { + "epoch": 0.2224163295329165, + "grad_norm": 1.4899181707391813, + "learning_rate": 1.8120285916710286e-05, + "loss": 0.8113, + "step": 7257 + }, + { + "epoch": 0.22244697805565772, + "grad_norm": 1.5124418156134238, + "learning_rate": 1.811970655700041e-05, + "loss": 0.7492, + "step": 7258 + }, + { + "epoch": 0.22247762657839892, + "grad_norm": 1.5502874171842518, + "learning_rate": 1.811912711728499e-05, + "loss": 0.8381, + "step": 7259 + }, + { + "epoch": 0.22250827510114013, + "grad_norm": 1.6290682340411835, + "learning_rate": 1.8118547597569735e-05, + "loss": 0.8283, + "step": 7260 + }, + { + "epoch": 0.22253892362388134, + "grad_norm": 1.4917930399471984, + "learning_rate": 1.811796799786035e-05, + "loss": 0.7975, + "step": 7261 + }, + { + "epoch": 0.22256957214662254, + "grad_norm": 1.3798758425145747, + "learning_rate": 1.811738831816255e-05, + "loss": 0.7427, + "step": 7262 + }, + { + "epoch": 0.22260022066936375, + "grad_norm": 1.6316716099647124, + "learning_rate": 1.8116808558482047e-05, + "loss": 0.9173, + "step": 7263 + }, + { + "epoch": 0.22263086919210495, + "grad_norm": 1.5446663866342383, + "learning_rate": 1.8116228718824554e-05, + "loss": 0.7894, + "step": 7264 + }, + { + "epoch": 0.22266151771484616, + "grad_norm": 1.4979393234554963, + "learning_rate": 1.8115648799195784e-05, + "loss": 0.8191, + "step": 7265 + }, + { + "epoch": 0.22269216623758734, + "grad_norm": 1.5559373850342868, + "learning_rate": 1.8115068799601445e-05, + "loss": 0.8373, + "step": 7266 + }, + { + "epoch": 0.22272281476032854, + "grad_norm": 1.6378576404465515, + "learning_rate": 1.811448872004726e-05, + "loss": 0.7692, + "step": 7267 + }, + { + "epoch": 0.22275346328306975, + "grad_norm": 1.4600418401102175, + "learning_rate": 1.811390856053894e-05, + "loss": 0.7682, + "step": 7268 + }, + { + "epoch": 0.22278411180581095, + "grad_norm": 1.5479539793956112, + "learning_rate": 1.81133283210822e-05, + "loss": 0.8687, + "step": 7269 + }, + { + "epoch": 0.22281476032855216, + "grad_norm": 1.4277408341001847, + "learning_rate": 1.811274800168276e-05, + "loss": 0.8054, + "step": 7270 + }, + { + "epoch": 0.22284540885129336, + "grad_norm": 1.533961503915475, + "learning_rate": 1.8112167602346344e-05, + "loss": 0.8042, + "step": 7271 + }, + { + "epoch": 0.22287605737403457, + "grad_norm": 1.3836325325695114, + "learning_rate": 1.8111587123078663e-05, + "loss": 0.7305, + "step": 7272 + }, + { + "epoch": 0.22290670589677578, + "grad_norm": 1.5803044454370299, + "learning_rate": 1.811100656388544e-05, + "loss": 0.8595, + "step": 7273 + }, + { + "epoch": 0.22293735441951698, + "grad_norm": 1.721796404490333, + "learning_rate": 1.811042592477239e-05, + "loss": 0.9277, + "step": 7274 + }, + { + "epoch": 0.2229680029422582, + "grad_norm": 1.563469995912481, + "learning_rate": 1.8109845205745242e-05, + "loss": 0.8933, + "step": 7275 + }, + { + "epoch": 0.2229986514649994, + "grad_norm": 1.602712132462082, + "learning_rate": 1.8109264406809712e-05, + "loss": 0.7813, + "step": 7276 + }, + { + "epoch": 0.2230292999877406, + "grad_norm": 1.7925151734638949, + "learning_rate": 1.8108683527971528e-05, + "loss": 0.742, + "step": 7277 + }, + { + "epoch": 0.2230599485104818, + "grad_norm": 0.7837217077698442, + "learning_rate": 1.810810256923641e-05, + "loss": 0.6176, + "step": 7278 + }, + { + "epoch": 0.223090597033223, + "grad_norm": 1.7518349659805912, + "learning_rate": 1.8107521530610078e-05, + "loss": 0.9047, + "step": 7279 + }, + { + "epoch": 0.22312124555596421, + "grad_norm": 1.6134426273746314, + "learning_rate": 1.8106940412098267e-05, + "loss": 0.8023, + "step": 7280 + }, + { + "epoch": 0.2231518940787054, + "grad_norm": 1.514878835694627, + "learning_rate": 1.810635921370669e-05, + "loss": 0.6835, + "step": 7281 + }, + { + "epoch": 0.2231825426014466, + "grad_norm": 0.678172696056939, + "learning_rate": 1.8105777935441092e-05, + "loss": 0.6297, + "step": 7282 + }, + { + "epoch": 0.2232131911241878, + "grad_norm": 1.4458599108125945, + "learning_rate": 1.8105196577307184e-05, + "loss": 0.8092, + "step": 7283 + }, + { + "epoch": 0.223243839646929, + "grad_norm": 1.6087299092182274, + "learning_rate": 1.8104615139310703e-05, + "loss": 0.7718, + "step": 7284 + }, + { + "epoch": 0.22327448816967022, + "grad_norm": 1.5607257621973607, + "learning_rate": 1.8104033621457372e-05, + "loss": 0.8314, + "step": 7285 + }, + { + "epoch": 0.22330513669241142, + "grad_norm": 1.3905608274864947, + "learning_rate": 1.8103452023752927e-05, + "loss": 0.7117, + "step": 7286 + }, + { + "epoch": 0.22333578521515263, + "grad_norm": 1.6960140777593704, + "learning_rate": 1.8102870346203098e-05, + "loss": 0.6461, + "step": 7287 + }, + { + "epoch": 0.22336643373789383, + "grad_norm": 1.4971744183207951, + "learning_rate": 1.8102288588813606e-05, + "loss": 0.759, + "step": 7288 + }, + { + "epoch": 0.22339708226063504, + "grad_norm": 1.7443045518072897, + "learning_rate": 1.81017067515902e-05, + "loss": 0.8685, + "step": 7289 + }, + { + "epoch": 0.22342773078337624, + "grad_norm": 1.6248336319244105, + "learning_rate": 1.8101124834538602e-05, + "loss": 0.8887, + "step": 7290 + }, + { + "epoch": 0.22345837930611745, + "grad_norm": 0.7807300299442216, + "learning_rate": 1.8100542837664545e-05, + "loss": 0.6193, + "step": 7291 + }, + { + "epoch": 0.22348902782885866, + "grad_norm": 1.5700869764983707, + "learning_rate": 1.8099960760973773e-05, + "loss": 0.703, + "step": 7292 + }, + { + "epoch": 0.22351967635159986, + "grad_norm": 1.831166602609125, + "learning_rate": 1.809937860447201e-05, + "loss": 0.9298, + "step": 7293 + }, + { + "epoch": 0.22355032487434107, + "grad_norm": 1.4983709161153935, + "learning_rate": 1.8098796368164998e-05, + "loss": 0.8147, + "step": 7294 + }, + { + "epoch": 0.22358097339708227, + "grad_norm": 0.6880046262081078, + "learning_rate": 1.8098214052058473e-05, + "loss": 0.6229, + "step": 7295 + }, + { + "epoch": 0.22361162191982348, + "grad_norm": 1.7388613027984696, + "learning_rate": 1.8097631656158175e-05, + "loss": 0.822, + "step": 7296 + }, + { + "epoch": 0.22364227044256466, + "grad_norm": 1.437389069031173, + "learning_rate": 1.809704918046984e-05, + "loss": 0.7834, + "step": 7297 + }, + { + "epoch": 0.22367291896530586, + "grad_norm": 1.5040546761483145, + "learning_rate": 1.8096466624999207e-05, + "loss": 0.8233, + "step": 7298 + }, + { + "epoch": 0.22370356748804707, + "grad_norm": 1.6909847870862629, + "learning_rate": 1.8095883989752016e-05, + "loss": 0.8741, + "step": 7299 + }, + { + "epoch": 0.22373421601078827, + "grad_norm": 0.7134009880745309, + "learning_rate": 1.809530127473401e-05, + "loss": 0.6355, + "step": 7300 + }, + { + "epoch": 0.22376486453352948, + "grad_norm": 1.5694336823913984, + "learning_rate": 1.809471847995093e-05, + "loss": 0.7269, + "step": 7301 + }, + { + "epoch": 0.22379551305627068, + "grad_norm": 1.6169197292966553, + "learning_rate": 1.8094135605408518e-05, + "loss": 0.846, + "step": 7302 + }, + { + "epoch": 0.2238261615790119, + "grad_norm": 1.532134548522396, + "learning_rate": 1.8093552651112513e-05, + "loss": 0.8863, + "step": 7303 + }, + { + "epoch": 0.2238568101017531, + "grad_norm": 1.466406978684512, + "learning_rate": 1.8092969617068665e-05, + "loss": 0.7418, + "step": 7304 + }, + { + "epoch": 0.2238874586244943, + "grad_norm": 1.465142181801521, + "learning_rate": 1.809238650328272e-05, + "loss": 0.8767, + "step": 7305 + }, + { + "epoch": 0.2239181071472355, + "grad_norm": 1.52923700901379, + "learning_rate": 1.8091803309760413e-05, + "loss": 0.7127, + "step": 7306 + }, + { + "epoch": 0.2239487556699767, + "grad_norm": 1.4801916362386336, + "learning_rate": 1.8091220036507505e-05, + "loss": 0.8325, + "step": 7307 + }, + { + "epoch": 0.22397940419271792, + "grad_norm": 0.731193543036685, + "learning_rate": 1.809063668352973e-05, + "loss": 0.6431, + "step": 7308 + }, + { + "epoch": 0.22401005271545912, + "grad_norm": 0.7152957224322216, + "learning_rate": 1.8090053250832845e-05, + "loss": 0.6468, + "step": 7309 + }, + { + "epoch": 0.22404070123820033, + "grad_norm": 0.6670592603559938, + "learning_rate": 1.8089469738422597e-05, + "loss": 0.6162, + "step": 7310 + }, + { + "epoch": 0.22407134976094154, + "grad_norm": 1.4180785016944855, + "learning_rate": 1.808888614630473e-05, + "loss": 0.6507, + "step": 7311 + }, + { + "epoch": 0.2241019982836827, + "grad_norm": 1.5254313023979393, + "learning_rate": 1.8088302474485e-05, + "loss": 0.8626, + "step": 7312 + }, + { + "epoch": 0.22413264680642392, + "grad_norm": 1.6821918811249856, + "learning_rate": 1.8087718722969155e-05, + "loss": 0.8982, + "step": 7313 + }, + { + "epoch": 0.22416329532916512, + "grad_norm": 1.4421613965987332, + "learning_rate": 1.808713489176295e-05, + "loss": 0.8121, + "step": 7314 + }, + { + "epoch": 0.22419394385190633, + "grad_norm": 1.6078424813378498, + "learning_rate": 1.8086550980872136e-05, + "loss": 0.7511, + "step": 7315 + }, + { + "epoch": 0.22422459237464754, + "grad_norm": 1.6473720345500475, + "learning_rate": 1.8085966990302464e-05, + "loss": 0.8633, + "step": 7316 + }, + { + "epoch": 0.22425524089738874, + "grad_norm": 0.7962815127946866, + "learning_rate": 1.808538292005969e-05, + "loss": 0.6323, + "step": 7317 + }, + { + "epoch": 0.22428588942012995, + "grad_norm": 0.7342683203455205, + "learning_rate": 1.808479877014957e-05, + "loss": 0.6353, + "step": 7318 + }, + { + "epoch": 0.22431653794287115, + "grad_norm": 1.5821657237304567, + "learning_rate": 1.8084214540577864e-05, + "loss": 0.8698, + "step": 7319 + }, + { + "epoch": 0.22434718646561236, + "grad_norm": 1.6120127322528923, + "learning_rate": 1.808363023135032e-05, + "loss": 0.8006, + "step": 7320 + }, + { + "epoch": 0.22437783498835356, + "grad_norm": 0.7326579081771618, + "learning_rate": 1.8083045842472694e-05, + "loss": 0.6324, + "step": 7321 + }, + { + "epoch": 0.22440848351109477, + "grad_norm": 1.592364861938043, + "learning_rate": 1.8082461373950753e-05, + "loss": 0.8402, + "step": 7322 + }, + { + "epoch": 0.22443913203383598, + "grad_norm": 1.6949800430538802, + "learning_rate": 1.8081876825790254e-05, + "loss": 0.8643, + "step": 7323 + }, + { + "epoch": 0.22446978055657718, + "grad_norm": 2.837288817076127, + "learning_rate": 1.8081292197996954e-05, + "loss": 0.8222, + "step": 7324 + }, + { + "epoch": 0.2245004290793184, + "grad_norm": 1.26844599054805, + "learning_rate": 1.8080707490576615e-05, + "loss": 0.7164, + "step": 7325 + }, + { + "epoch": 0.2245310776020596, + "grad_norm": 1.4858669448527415, + "learning_rate": 1.8080122703534995e-05, + "loss": 0.8487, + "step": 7326 + }, + { + "epoch": 0.2245617261248008, + "grad_norm": 1.599427206221084, + "learning_rate": 1.8079537836877862e-05, + "loss": 0.8976, + "step": 7327 + }, + { + "epoch": 0.22459237464754198, + "grad_norm": 1.4600109591139827, + "learning_rate": 1.8078952890610973e-05, + "loss": 0.7829, + "step": 7328 + }, + { + "epoch": 0.22462302317028318, + "grad_norm": 1.8757863825438807, + "learning_rate": 1.8078367864740092e-05, + "loss": 1.0247, + "step": 7329 + }, + { + "epoch": 0.2246536716930244, + "grad_norm": 1.6405017225060736, + "learning_rate": 1.807778275927099e-05, + "loss": 0.8167, + "step": 7330 + }, + { + "epoch": 0.2246843202157656, + "grad_norm": 1.8560400693234838, + "learning_rate": 1.8077197574209427e-05, + "loss": 0.7492, + "step": 7331 + }, + { + "epoch": 0.2247149687385068, + "grad_norm": 0.8154456275602252, + "learning_rate": 1.807661230956117e-05, + "loss": 0.6618, + "step": 7332 + }, + { + "epoch": 0.224745617261248, + "grad_norm": 1.5871706715093676, + "learning_rate": 1.807602696533198e-05, + "loss": 0.8102, + "step": 7333 + }, + { + "epoch": 0.2247762657839892, + "grad_norm": 1.6905596696474086, + "learning_rate": 1.8075441541527637e-05, + "loss": 0.9039, + "step": 7334 + }, + { + "epoch": 0.22480691430673042, + "grad_norm": 0.7346496361331064, + "learning_rate": 1.8074856038153896e-05, + "loss": 0.6202, + "step": 7335 + }, + { + "epoch": 0.22483756282947162, + "grad_norm": 1.5329459702867074, + "learning_rate": 1.8074270455216538e-05, + "loss": 0.7704, + "step": 7336 + }, + { + "epoch": 0.22486821135221283, + "grad_norm": 1.7338296674157296, + "learning_rate": 1.8073684792721322e-05, + "loss": 0.8014, + "step": 7337 + }, + { + "epoch": 0.22489885987495403, + "grad_norm": 1.5908032399415113, + "learning_rate": 1.807309905067403e-05, + "loss": 0.8732, + "step": 7338 + }, + { + "epoch": 0.22492950839769524, + "grad_norm": 0.6741256196448195, + "learning_rate": 1.8072513229080422e-05, + "loss": 0.6332, + "step": 7339 + }, + { + "epoch": 0.22496015692043644, + "grad_norm": 1.4890119663308794, + "learning_rate": 1.807192732794628e-05, + "loss": 0.8661, + "step": 7340 + }, + { + "epoch": 0.22499080544317765, + "grad_norm": 1.3854023955291268, + "learning_rate": 1.807134134727737e-05, + "loss": 0.7776, + "step": 7341 + }, + { + "epoch": 0.22502145396591886, + "grad_norm": 1.6132612535289639, + "learning_rate": 1.807075528707947e-05, + "loss": 0.7286, + "step": 7342 + }, + { + "epoch": 0.22505210248866003, + "grad_norm": 0.8352259755027401, + "learning_rate": 1.8070169147358353e-05, + "loss": 0.6662, + "step": 7343 + }, + { + "epoch": 0.22508275101140124, + "grad_norm": 1.8192666994435434, + "learning_rate": 1.8069582928119792e-05, + "loss": 0.7507, + "step": 7344 + }, + { + "epoch": 0.22511339953414244, + "grad_norm": 1.3538605996615396, + "learning_rate": 1.8068996629369568e-05, + "loss": 0.8183, + "step": 7345 + }, + { + "epoch": 0.22514404805688365, + "grad_norm": 1.2587175945828915, + "learning_rate": 1.8068410251113456e-05, + "loss": 0.6693, + "step": 7346 + }, + { + "epoch": 0.22517469657962486, + "grad_norm": 1.6224718834576415, + "learning_rate": 1.8067823793357235e-05, + "loss": 0.8175, + "step": 7347 + }, + { + "epoch": 0.22520534510236606, + "grad_norm": 1.5239638800036865, + "learning_rate": 1.8067237256106676e-05, + "loss": 0.7846, + "step": 7348 + }, + { + "epoch": 0.22523599362510727, + "grad_norm": 1.5937237552694914, + "learning_rate": 1.806665063936757e-05, + "loss": 0.6968, + "step": 7349 + }, + { + "epoch": 0.22526664214784847, + "grad_norm": 1.4486980901206423, + "learning_rate": 1.806606394314569e-05, + "loss": 0.7462, + "step": 7350 + }, + { + "epoch": 0.22529729067058968, + "grad_norm": 1.5628036575739859, + "learning_rate": 1.8065477167446815e-05, + "loss": 0.813, + "step": 7351 + }, + { + "epoch": 0.22532793919333088, + "grad_norm": 1.4030965814778353, + "learning_rate": 1.8064890312276734e-05, + "loss": 0.7654, + "step": 7352 + }, + { + "epoch": 0.2253585877160721, + "grad_norm": 1.4855585971752496, + "learning_rate": 1.8064303377641224e-05, + "loss": 0.8465, + "step": 7353 + }, + { + "epoch": 0.2253892362388133, + "grad_norm": 1.5469197315989058, + "learning_rate": 1.8063716363546068e-05, + "loss": 0.7721, + "step": 7354 + }, + { + "epoch": 0.2254198847615545, + "grad_norm": 1.5432672438381736, + "learning_rate": 1.8063129269997054e-05, + "loss": 0.798, + "step": 7355 + }, + { + "epoch": 0.2254505332842957, + "grad_norm": 1.516103680249808, + "learning_rate": 1.8062542096999964e-05, + "loss": 0.645, + "step": 7356 + }, + { + "epoch": 0.2254811818070369, + "grad_norm": 0.8923685853505401, + "learning_rate": 1.8061954844560582e-05, + "loss": 0.6111, + "step": 7357 + }, + { + "epoch": 0.22551183032977812, + "grad_norm": 1.4576644980517386, + "learning_rate": 1.8061367512684695e-05, + "loss": 0.7173, + "step": 7358 + }, + { + "epoch": 0.2255424788525193, + "grad_norm": 1.37040223305661, + "learning_rate": 1.8060780101378094e-05, + "loss": 0.6722, + "step": 7359 + }, + { + "epoch": 0.2255731273752605, + "grad_norm": 1.7309628027093953, + "learning_rate": 1.8060192610646562e-05, + "loss": 0.8746, + "step": 7360 + }, + { + "epoch": 0.2256037758980017, + "grad_norm": 1.548845147948209, + "learning_rate": 1.8059605040495892e-05, + "loss": 0.7992, + "step": 7361 + }, + { + "epoch": 0.2256344244207429, + "grad_norm": 1.6536331324237727, + "learning_rate": 1.805901739093187e-05, + "loss": 0.7784, + "step": 7362 + }, + { + "epoch": 0.22566507294348412, + "grad_norm": 1.3752988250927265, + "learning_rate": 1.805842966196029e-05, + "loss": 0.6936, + "step": 7363 + }, + { + "epoch": 0.22569572146622532, + "grad_norm": 1.601187444235077, + "learning_rate": 1.8057841853586936e-05, + "loss": 0.8155, + "step": 7364 + }, + { + "epoch": 0.22572636998896653, + "grad_norm": 1.69803760946347, + "learning_rate": 1.805725396581761e-05, + "loss": 0.7807, + "step": 7365 + }, + { + "epoch": 0.22575701851170774, + "grad_norm": 0.7708851635444415, + "learning_rate": 1.8056665998658096e-05, + "loss": 0.6612, + "step": 7366 + }, + { + "epoch": 0.22578766703444894, + "grad_norm": 1.5203026150710974, + "learning_rate": 1.8056077952114193e-05, + "loss": 0.7631, + "step": 7367 + }, + { + "epoch": 0.22581831555719015, + "grad_norm": 1.6810043892137874, + "learning_rate": 1.8055489826191688e-05, + "loss": 0.9303, + "step": 7368 + }, + { + "epoch": 0.22584896407993135, + "grad_norm": 1.6543103835185828, + "learning_rate": 1.8054901620896385e-05, + "loss": 0.8033, + "step": 7369 + }, + { + "epoch": 0.22587961260267256, + "grad_norm": 0.6967068363790138, + "learning_rate": 1.8054313336234072e-05, + "loss": 0.6319, + "step": 7370 + }, + { + "epoch": 0.22591026112541376, + "grad_norm": 1.370905654224328, + "learning_rate": 1.8053724972210555e-05, + "loss": 0.6733, + "step": 7371 + }, + { + "epoch": 0.22594090964815497, + "grad_norm": 1.4975943340296374, + "learning_rate": 1.8053136528831617e-05, + "loss": 0.7708, + "step": 7372 + }, + { + "epoch": 0.22597155817089618, + "grad_norm": 1.3889563279894224, + "learning_rate": 1.805254800610307e-05, + "loss": 0.6278, + "step": 7373 + }, + { + "epoch": 0.22600220669363735, + "grad_norm": 0.6966114402722521, + "learning_rate": 1.8051959404030705e-05, + "loss": 0.6502, + "step": 7374 + }, + { + "epoch": 0.22603285521637856, + "grad_norm": 1.637560045394644, + "learning_rate": 1.8051370722620324e-05, + "loss": 0.7394, + "step": 7375 + }, + { + "epoch": 0.22606350373911976, + "grad_norm": 0.6855528773334252, + "learning_rate": 1.8050781961877728e-05, + "loss": 0.6408, + "step": 7376 + }, + { + "epoch": 0.22609415226186097, + "grad_norm": 1.5242289483342184, + "learning_rate": 1.8050193121808718e-05, + "loss": 0.7601, + "step": 7377 + }, + { + "epoch": 0.22612480078460218, + "grad_norm": 1.6772116939951789, + "learning_rate": 1.8049604202419094e-05, + "loss": 0.7992, + "step": 7378 + }, + { + "epoch": 0.22615544930734338, + "grad_norm": 1.415570370932642, + "learning_rate": 1.804901520371466e-05, + "loss": 0.756, + "step": 7379 + }, + { + "epoch": 0.2261860978300846, + "grad_norm": 1.6197833812479225, + "learning_rate": 1.804842612570122e-05, + "loss": 0.9063, + "step": 7380 + }, + { + "epoch": 0.2262167463528258, + "grad_norm": 1.5123891722736948, + "learning_rate": 1.8047836968384578e-05, + "loss": 0.8177, + "step": 7381 + }, + { + "epoch": 0.226247394875567, + "grad_norm": 0.6775877032943058, + "learning_rate": 1.8047247731770544e-05, + "loss": 0.6061, + "step": 7382 + }, + { + "epoch": 0.2262780433983082, + "grad_norm": 1.7063930685190896, + "learning_rate": 1.8046658415864913e-05, + "loss": 0.8974, + "step": 7383 + }, + { + "epoch": 0.2263086919210494, + "grad_norm": 1.4753887965229875, + "learning_rate": 1.80460690206735e-05, + "loss": 0.8325, + "step": 7384 + }, + { + "epoch": 0.22633934044379062, + "grad_norm": 1.406008935272227, + "learning_rate": 1.804547954620211e-05, + "loss": 0.7696, + "step": 7385 + }, + { + "epoch": 0.22636998896653182, + "grad_norm": 1.4975685496755442, + "learning_rate": 1.804488999245655e-05, + "loss": 0.8263, + "step": 7386 + }, + { + "epoch": 0.22640063748927303, + "grad_norm": 0.6865418002378645, + "learning_rate": 1.8044300359442632e-05, + "loss": 0.561, + "step": 7387 + }, + { + "epoch": 0.22643128601201423, + "grad_norm": 0.7319410724718622, + "learning_rate": 1.8043710647166164e-05, + "loss": 0.629, + "step": 7388 + }, + { + "epoch": 0.22646193453475544, + "grad_norm": 1.6191610790178406, + "learning_rate": 1.804312085563296e-05, + "loss": 0.7871, + "step": 7389 + }, + { + "epoch": 0.22649258305749662, + "grad_norm": 1.6173235008110427, + "learning_rate": 1.8042530984848824e-05, + "loss": 0.796, + "step": 7390 + }, + { + "epoch": 0.22652323158023782, + "grad_norm": 1.4627790592684864, + "learning_rate": 1.8041941034819573e-05, + "loss": 0.8569, + "step": 7391 + }, + { + "epoch": 0.22655388010297903, + "grad_norm": 0.7448828334461088, + "learning_rate": 1.8041351005551023e-05, + "loss": 0.6433, + "step": 7392 + }, + { + "epoch": 0.22658452862572023, + "grad_norm": 0.7111797339534272, + "learning_rate": 1.8040760897048978e-05, + "loss": 0.6248, + "step": 7393 + }, + { + "epoch": 0.22661517714846144, + "grad_norm": 0.706148067447591, + "learning_rate": 1.8040170709319263e-05, + "loss": 0.6058, + "step": 7394 + }, + { + "epoch": 0.22664582567120264, + "grad_norm": 1.3187852260021813, + "learning_rate": 1.8039580442367688e-05, + "loss": 0.6519, + "step": 7395 + }, + { + "epoch": 0.22667647419394385, + "grad_norm": 1.3548696381528234, + "learning_rate": 1.803899009620007e-05, + "loss": 0.7923, + "step": 7396 + }, + { + "epoch": 0.22670712271668506, + "grad_norm": 1.5564458336761093, + "learning_rate": 1.8038399670822224e-05, + "loss": 0.8072, + "step": 7397 + }, + { + "epoch": 0.22673777123942626, + "grad_norm": 1.7032662830811318, + "learning_rate": 1.8037809166239974e-05, + "loss": 0.9175, + "step": 7398 + }, + { + "epoch": 0.22676841976216747, + "grad_norm": 1.493059285730092, + "learning_rate": 1.803721858245913e-05, + "loss": 0.8783, + "step": 7399 + }, + { + "epoch": 0.22679906828490867, + "grad_norm": 1.4884936117303942, + "learning_rate": 1.8036627919485513e-05, + "loss": 0.7858, + "step": 7400 + }, + { + "epoch": 0.22682971680764988, + "grad_norm": 1.670820182593507, + "learning_rate": 1.8036037177324948e-05, + "loss": 0.7926, + "step": 7401 + }, + { + "epoch": 0.22686036533039108, + "grad_norm": 1.4146941409250458, + "learning_rate": 1.8035446355983254e-05, + "loss": 0.7401, + "step": 7402 + }, + { + "epoch": 0.2268910138531323, + "grad_norm": 1.5415014705379744, + "learning_rate": 1.8034855455466247e-05, + "loss": 0.8839, + "step": 7403 + }, + { + "epoch": 0.2269216623758735, + "grad_norm": 1.5148197146898155, + "learning_rate": 1.8034264475779754e-05, + "loss": 0.842, + "step": 7404 + }, + { + "epoch": 0.22695231089861467, + "grad_norm": 1.5595962891254465, + "learning_rate": 1.80336734169296e-05, + "loss": 0.6223, + "step": 7405 + }, + { + "epoch": 0.22698295942135588, + "grad_norm": 1.6757008807560194, + "learning_rate": 1.8033082278921606e-05, + "loss": 0.8108, + "step": 7406 + }, + { + "epoch": 0.22701360794409708, + "grad_norm": 1.5391488181162074, + "learning_rate": 1.8032491061761596e-05, + "loss": 0.8649, + "step": 7407 + }, + { + "epoch": 0.2270442564668383, + "grad_norm": 1.502225392671853, + "learning_rate": 1.8031899765455394e-05, + "loss": 0.8147, + "step": 7408 + }, + { + "epoch": 0.2270749049895795, + "grad_norm": 1.467468435537345, + "learning_rate": 1.8031308390008833e-05, + "loss": 0.7397, + "step": 7409 + }, + { + "epoch": 0.2271055535123207, + "grad_norm": 1.5350038705671543, + "learning_rate": 1.803071693542773e-05, + "loss": 0.8382, + "step": 7410 + }, + { + "epoch": 0.2271362020350619, + "grad_norm": 1.5675742370203416, + "learning_rate": 1.8030125401717925e-05, + "loss": 0.8675, + "step": 7411 + }, + { + "epoch": 0.2271668505578031, + "grad_norm": 1.5847115300204226, + "learning_rate": 1.8029533788885238e-05, + "loss": 0.835, + "step": 7412 + }, + { + "epoch": 0.22719749908054432, + "grad_norm": 1.5640384388204234, + "learning_rate": 1.80289420969355e-05, + "loss": 0.7961, + "step": 7413 + }, + { + "epoch": 0.22722814760328552, + "grad_norm": 1.4395138358619364, + "learning_rate": 1.802835032587454e-05, + "loss": 0.7626, + "step": 7414 + }, + { + "epoch": 0.22725879612602673, + "grad_norm": 1.4583434729111602, + "learning_rate": 1.802775847570819e-05, + "loss": 0.7798, + "step": 7415 + }, + { + "epoch": 0.22728944464876794, + "grad_norm": 0.9310864081192715, + "learning_rate": 1.8027166546442282e-05, + "loss": 0.6386, + "step": 7416 + }, + { + "epoch": 0.22732009317150914, + "grad_norm": 1.4827802912588623, + "learning_rate": 1.8026574538082643e-05, + "loss": 0.8176, + "step": 7417 + }, + { + "epoch": 0.22735074169425035, + "grad_norm": 0.7323495890723607, + "learning_rate": 1.802598245063512e-05, + "loss": 0.6498, + "step": 7418 + }, + { + "epoch": 0.22738139021699155, + "grad_norm": 1.3438252033997276, + "learning_rate": 1.8025390284105535e-05, + "loss": 0.7992, + "step": 7419 + }, + { + "epoch": 0.22741203873973276, + "grad_norm": 1.6730654046004616, + "learning_rate": 1.8024798038499726e-05, + "loss": 0.9409, + "step": 7420 + }, + { + "epoch": 0.22744268726247394, + "grad_norm": 1.6644874420323421, + "learning_rate": 1.8024205713823528e-05, + "loss": 0.8306, + "step": 7421 + }, + { + "epoch": 0.22747333578521514, + "grad_norm": 1.3668147331788567, + "learning_rate": 1.8023613310082777e-05, + "loss": 0.722, + "step": 7422 + }, + { + "epoch": 0.22750398430795635, + "grad_norm": 1.5296229846230271, + "learning_rate": 1.8023020827283315e-05, + "loss": 0.8193, + "step": 7423 + }, + { + "epoch": 0.22753463283069755, + "grad_norm": 1.0184956537799341, + "learning_rate": 1.8022428265430973e-05, + "loss": 0.6639, + "step": 7424 + }, + { + "epoch": 0.22756528135343876, + "grad_norm": 1.6997785312287743, + "learning_rate": 1.802183562453159e-05, + "loss": 0.8732, + "step": 7425 + }, + { + "epoch": 0.22759592987617996, + "grad_norm": 0.7282019028704251, + "learning_rate": 1.8021242904591016e-05, + "loss": 0.6354, + "step": 7426 + }, + { + "epoch": 0.22762657839892117, + "grad_norm": 1.6164008994467431, + "learning_rate": 1.8020650105615076e-05, + "loss": 0.7815, + "step": 7427 + }, + { + "epoch": 0.22765722692166238, + "grad_norm": 1.5613696227779272, + "learning_rate": 1.802005722760962e-05, + "loss": 0.7115, + "step": 7428 + }, + { + "epoch": 0.22768787544440358, + "grad_norm": 1.7590775420394016, + "learning_rate": 1.801946427058049e-05, + "loss": 0.9506, + "step": 7429 + }, + { + "epoch": 0.2277185239671448, + "grad_norm": 1.476893750824802, + "learning_rate": 1.8018871234533528e-05, + "loss": 0.8276, + "step": 7430 + }, + { + "epoch": 0.227749172489886, + "grad_norm": 0.8639520733363459, + "learning_rate": 1.8018278119474573e-05, + "loss": 0.6155, + "step": 7431 + }, + { + "epoch": 0.2277798210126272, + "grad_norm": 1.4821648203453082, + "learning_rate": 1.8017684925409473e-05, + "loss": 0.8075, + "step": 7432 + }, + { + "epoch": 0.2278104695353684, + "grad_norm": 1.7269153057394382, + "learning_rate": 1.8017091652344074e-05, + "loss": 0.81, + "step": 7433 + }, + { + "epoch": 0.2278411180581096, + "grad_norm": 1.3766258669889044, + "learning_rate": 1.801649830028422e-05, + "loss": 0.8137, + "step": 7434 + }, + { + "epoch": 0.22787176658085082, + "grad_norm": 1.4849352511622274, + "learning_rate": 1.8015904869235753e-05, + "loss": 0.7985, + "step": 7435 + }, + { + "epoch": 0.227902415103592, + "grad_norm": 1.5033639898740492, + "learning_rate": 1.8015311359204525e-05, + "loss": 0.7456, + "step": 7436 + }, + { + "epoch": 0.2279330636263332, + "grad_norm": 1.4163037270001975, + "learning_rate": 1.8014717770196385e-05, + "loss": 0.8267, + "step": 7437 + }, + { + "epoch": 0.2279637121490744, + "grad_norm": 1.5499698917448792, + "learning_rate": 1.801412410221718e-05, + "loss": 0.9239, + "step": 7438 + }, + { + "epoch": 0.2279943606718156, + "grad_norm": 1.5022627334602792, + "learning_rate": 1.801353035527276e-05, + "loss": 0.8052, + "step": 7439 + }, + { + "epoch": 0.22802500919455682, + "grad_norm": 1.6409364139822906, + "learning_rate": 1.8012936529368975e-05, + "loss": 0.8206, + "step": 7440 + }, + { + "epoch": 0.22805565771729802, + "grad_norm": 0.7197764570770551, + "learning_rate": 1.8012342624511675e-05, + "loss": 0.6083, + "step": 7441 + }, + { + "epoch": 0.22808630624003923, + "grad_norm": 1.701153691653165, + "learning_rate": 1.8011748640706713e-05, + "loss": 0.8166, + "step": 7442 + }, + { + "epoch": 0.22811695476278043, + "grad_norm": 1.7283013958839353, + "learning_rate": 1.8011154577959944e-05, + "loss": 0.7873, + "step": 7443 + }, + { + "epoch": 0.22814760328552164, + "grad_norm": 1.765512588016929, + "learning_rate": 1.801056043627722e-05, + "loss": 0.8109, + "step": 7444 + }, + { + "epoch": 0.22817825180826284, + "grad_norm": 1.5279889210487567, + "learning_rate": 1.800996621566439e-05, + "loss": 0.823, + "step": 7445 + }, + { + "epoch": 0.22820890033100405, + "grad_norm": 0.7074032211439525, + "learning_rate": 1.8009371916127313e-05, + "loss": 0.6296, + "step": 7446 + }, + { + "epoch": 0.22823954885374526, + "grad_norm": 1.7043801754915544, + "learning_rate": 1.8008777537671853e-05, + "loss": 0.8205, + "step": 7447 + }, + { + "epoch": 0.22827019737648646, + "grad_norm": 1.5136525233575182, + "learning_rate": 1.800818308030385e-05, + "loss": 0.8902, + "step": 7448 + }, + { + "epoch": 0.22830084589922767, + "grad_norm": 1.5582996506407318, + "learning_rate": 1.8007588544029174e-05, + "loss": 0.8613, + "step": 7449 + }, + { + "epoch": 0.22833149442196887, + "grad_norm": 1.3152630393128055, + "learning_rate": 1.8006993928853684e-05, + "loss": 0.8304, + "step": 7450 + }, + { + "epoch": 0.22836214294471008, + "grad_norm": 0.720964696326858, + "learning_rate": 1.8006399234783226e-05, + "loss": 0.6489, + "step": 7451 + }, + { + "epoch": 0.22839279146745126, + "grad_norm": 1.5193414507487273, + "learning_rate": 1.800580446182367e-05, + "loss": 0.7857, + "step": 7452 + }, + { + "epoch": 0.22842343999019246, + "grad_norm": 1.6044661927817805, + "learning_rate": 1.8005209609980876e-05, + "loss": 0.7736, + "step": 7453 + }, + { + "epoch": 0.22845408851293367, + "grad_norm": 1.4080292924579383, + "learning_rate": 1.8004614679260703e-05, + "loss": 0.6655, + "step": 7454 + }, + { + "epoch": 0.22848473703567487, + "grad_norm": 1.6811974071846325, + "learning_rate": 1.8004019669669013e-05, + "loss": 0.8402, + "step": 7455 + }, + { + "epoch": 0.22851538555841608, + "grad_norm": 1.4094538002174717, + "learning_rate": 1.800342458121167e-05, + "loss": 0.8066, + "step": 7456 + }, + { + "epoch": 0.22854603408115728, + "grad_norm": 1.5392774966986735, + "learning_rate": 1.8002829413894538e-05, + "loss": 0.7968, + "step": 7457 + }, + { + "epoch": 0.2285766826038985, + "grad_norm": 1.350998374931002, + "learning_rate": 1.800223416772348e-05, + "loss": 0.7374, + "step": 7458 + }, + { + "epoch": 0.2286073311266397, + "grad_norm": 1.4437881778043897, + "learning_rate": 1.8001638842704356e-05, + "loss": 0.8495, + "step": 7459 + }, + { + "epoch": 0.2286379796493809, + "grad_norm": 1.610285223100029, + "learning_rate": 1.8001043438843044e-05, + "loss": 0.9618, + "step": 7460 + }, + { + "epoch": 0.2286686281721221, + "grad_norm": 1.3225557442477527, + "learning_rate": 1.80004479561454e-05, + "loss": 0.8484, + "step": 7461 + }, + { + "epoch": 0.2286992766948633, + "grad_norm": 0.7963771785856779, + "learning_rate": 1.7999852394617297e-05, + "loss": 0.6328, + "step": 7462 + }, + { + "epoch": 0.22872992521760452, + "grad_norm": 0.7746330783178013, + "learning_rate": 1.7999256754264596e-05, + "loss": 0.6385, + "step": 7463 + }, + { + "epoch": 0.22876057374034572, + "grad_norm": 1.5186835413597077, + "learning_rate": 1.799866103509318e-05, + "loss": 0.7439, + "step": 7464 + }, + { + "epoch": 0.22879122226308693, + "grad_norm": 1.5299768005930863, + "learning_rate": 1.7998065237108907e-05, + "loss": 0.7971, + "step": 7465 + }, + { + "epoch": 0.22882187078582814, + "grad_norm": 1.6481367918546912, + "learning_rate": 1.7997469360317648e-05, + "loss": 0.7898, + "step": 7466 + }, + { + "epoch": 0.2288525193085693, + "grad_norm": 1.4773361580994016, + "learning_rate": 1.799687340472528e-05, + "loss": 0.8558, + "step": 7467 + }, + { + "epoch": 0.22888316783131052, + "grad_norm": 0.779058222677025, + "learning_rate": 1.799627737033767e-05, + "loss": 0.6173, + "step": 7468 + }, + { + "epoch": 0.22891381635405172, + "grad_norm": 1.5703509700510514, + "learning_rate": 1.7995681257160696e-05, + "loss": 0.7785, + "step": 7469 + }, + { + "epoch": 0.22894446487679293, + "grad_norm": 1.5412465563107036, + "learning_rate": 1.7995085065200228e-05, + "loss": 0.8371, + "step": 7470 + }, + { + "epoch": 0.22897511339953414, + "grad_norm": 1.4461463087085091, + "learning_rate": 1.799448879446214e-05, + "loss": 0.7593, + "step": 7471 + }, + { + "epoch": 0.22900576192227534, + "grad_norm": 1.497950789251947, + "learning_rate": 1.799389244495231e-05, + "loss": 0.7705, + "step": 7472 + }, + { + "epoch": 0.22903641044501655, + "grad_norm": 0.7202191476677648, + "learning_rate": 1.7993296016676613e-05, + "loss": 0.6339, + "step": 7473 + }, + { + "epoch": 0.22906705896775775, + "grad_norm": 1.4924782576267255, + "learning_rate": 1.7992699509640922e-05, + "loss": 0.7739, + "step": 7474 + }, + { + "epoch": 0.22909770749049896, + "grad_norm": 1.6919874151374998, + "learning_rate": 1.7992102923851123e-05, + "loss": 0.8177, + "step": 7475 + }, + { + "epoch": 0.22912835601324016, + "grad_norm": 1.5829666919980032, + "learning_rate": 1.7991506259313084e-05, + "loss": 0.7656, + "step": 7476 + }, + { + "epoch": 0.22915900453598137, + "grad_norm": 0.6941857675298367, + "learning_rate": 1.799090951603269e-05, + "loss": 0.6125, + "step": 7477 + }, + { + "epoch": 0.22918965305872258, + "grad_norm": 1.5863988673676486, + "learning_rate": 1.799031269401582e-05, + "loss": 0.7801, + "step": 7478 + }, + { + "epoch": 0.22922030158146378, + "grad_norm": 1.466936843302134, + "learning_rate": 1.7989715793268357e-05, + "loss": 0.6974, + "step": 7479 + }, + { + "epoch": 0.229250950104205, + "grad_norm": 1.4458684478170385, + "learning_rate": 1.7989118813796177e-05, + "loss": 0.7909, + "step": 7480 + }, + { + "epoch": 0.2292815986269462, + "grad_norm": 1.626200024681349, + "learning_rate": 1.798852175560517e-05, + "loss": 0.8119, + "step": 7481 + }, + { + "epoch": 0.2293122471496874, + "grad_norm": 1.4431867453459946, + "learning_rate": 1.798792461870121e-05, + "loss": 0.7888, + "step": 7482 + }, + { + "epoch": 0.22934289567242858, + "grad_norm": 1.5089731258287087, + "learning_rate": 1.7987327403090183e-05, + "loss": 0.7398, + "step": 7483 + }, + { + "epoch": 0.22937354419516978, + "grad_norm": 1.5497720395059689, + "learning_rate": 1.7986730108777977e-05, + "loss": 0.82, + "step": 7484 + }, + { + "epoch": 0.229404192717911, + "grad_norm": 1.6826753929998635, + "learning_rate": 1.798613273577048e-05, + "loss": 0.8183, + "step": 7485 + }, + { + "epoch": 0.2294348412406522, + "grad_norm": 1.4009207168024396, + "learning_rate": 1.798553528407357e-05, + "loss": 0.8377, + "step": 7486 + }, + { + "epoch": 0.2294654897633934, + "grad_norm": 1.7481896897037623, + "learning_rate": 1.7984937753693138e-05, + "loss": 0.8408, + "step": 7487 + }, + { + "epoch": 0.2294961382861346, + "grad_norm": 1.4974181278630936, + "learning_rate": 1.7984340144635073e-05, + "loss": 0.863, + "step": 7488 + }, + { + "epoch": 0.2295267868088758, + "grad_norm": 0.7253759745580175, + "learning_rate": 1.798374245690526e-05, + "loss": 0.6267, + "step": 7489 + }, + { + "epoch": 0.22955743533161702, + "grad_norm": 1.4909068931300962, + "learning_rate": 1.798314469050959e-05, + "loss": 0.8308, + "step": 7490 + }, + { + "epoch": 0.22958808385435822, + "grad_norm": 1.4530189885927918, + "learning_rate": 1.798254684545395e-05, + "loss": 0.8275, + "step": 7491 + }, + { + "epoch": 0.22961873237709943, + "grad_norm": 1.3331238065532633, + "learning_rate": 1.7981948921744238e-05, + "loss": 0.7434, + "step": 7492 + }, + { + "epoch": 0.22964938089984063, + "grad_norm": 1.5448154469589535, + "learning_rate": 1.798135091938634e-05, + "loss": 0.7653, + "step": 7493 + }, + { + "epoch": 0.22968002942258184, + "grad_norm": 1.6042501317967508, + "learning_rate": 1.7980752838386148e-05, + "loss": 0.8222, + "step": 7494 + }, + { + "epoch": 0.22971067794532304, + "grad_norm": 1.5609304882559119, + "learning_rate": 1.7980154678749556e-05, + "loss": 0.7777, + "step": 7495 + }, + { + "epoch": 0.22974132646806425, + "grad_norm": 1.449732480902003, + "learning_rate": 1.797955644048246e-05, + "loss": 0.8646, + "step": 7496 + }, + { + "epoch": 0.22977197499080546, + "grad_norm": 1.5083591649988533, + "learning_rate": 1.7978958123590754e-05, + "loss": 0.8194, + "step": 7497 + }, + { + "epoch": 0.22980262351354663, + "grad_norm": 1.4742310522410427, + "learning_rate": 1.797835972808033e-05, + "loss": 0.8613, + "step": 7498 + }, + { + "epoch": 0.22983327203628784, + "grad_norm": 1.3759982113519735, + "learning_rate": 1.7977761253957085e-05, + "loss": 0.7594, + "step": 7499 + }, + { + "epoch": 0.22986392055902904, + "grad_norm": 1.3982081232195693, + "learning_rate": 1.797716270122692e-05, + "loss": 0.7678, + "step": 7500 + }, + { + "epoch": 0.22989456908177025, + "grad_norm": 1.606391081327373, + "learning_rate": 1.7976564069895727e-05, + "loss": 0.8588, + "step": 7501 + }, + { + "epoch": 0.22992521760451146, + "grad_norm": 0.7453890652967696, + "learning_rate": 1.797596535996941e-05, + "loss": 0.6481, + "step": 7502 + }, + { + "epoch": 0.22995586612725266, + "grad_norm": 0.753741550855622, + "learning_rate": 1.7975366571453862e-05, + "loss": 0.616, + "step": 7503 + }, + { + "epoch": 0.22998651464999387, + "grad_norm": 1.5420267042827263, + "learning_rate": 1.7974767704354993e-05, + "loss": 0.7877, + "step": 7504 + }, + { + "epoch": 0.23001716317273507, + "grad_norm": 1.6447937334171434, + "learning_rate": 1.797416875867869e-05, + "loss": 0.7484, + "step": 7505 + }, + { + "epoch": 0.23004781169547628, + "grad_norm": 1.4896472411714496, + "learning_rate": 1.7973569734430866e-05, + "loss": 0.8313, + "step": 7506 + }, + { + "epoch": 0.23007846021821748, + "grad_norm": 1.4631982329227664, + "learning_rate": 1.797297063161742e-05, + "loss": 0.7912, + "step": 7507 + }, + { + "epoch": 0.2301091087409587, + "grad_norm": 1.7601186358243612, + "learning_rate": 1.797237145024425e-05, + "loss": 0.8405, + "step": 7508 + }, + { + "epoch": 0.2301397572636999, + "grad_norm": 1.367173964685857, + "learning_rate": 1.7971772190317268e-05, + "loss": 0.8136, + "step": 7509 + }, + { + "epoch": 0.2301704057864411, + "grad_norm": 1.4998051869253501, + "learning_rate": 1.7971172851842375e-05, + "loss": 0.7959, + "step": 7510 + }, + { + "epoch": 0.2302010543091823, + "grad_norm": 1.4155683724691521, + "learning_rate": 1.7970573434825475e-05, + "loss": 0.7438, + "step": 7511 + }, + { + "epoch": 0.2302317028319235, + "grad_norm": 1.5873681719210666, + "learning_rate": 1.7969973939272476e-05, + "loss": 0.7181, + "step": 7512 + }, + { + "epoch": 0.23026235135466472, + "grad_norm": 1.6524632844178606, + "learning_rate": 1.7969374365189283e-05, + "loss": 0.8427, + "step": 7513 + }, + { + "epoch": 0.2302929998774059, + "grad_norm": 1.40129421321336, + "learning_rate": 1.796877471258181e-05, + "loss": 0.8476, + "step": 7514 + }, + { + "epoch": 0.2303236484001471, + "grad_norm": 1.6767166339536153, + "learning_rate": 1.7968174981455955e-05, + "loss": 0.734, + "step": 7515 + }, + { + "epoch": 0.2303542969228883, + "grad_norm": 1.4820579907619131, + "learning_rate": 1.7967575171817637e-05, + "loss": 0.7271, + "step": 7516 + }, + { + "epoch": 0.2303849454456295, + "grad_norm": 1.583986636676309, + "learning_rate": 1.796697528367276e-05, + "loss": 0.9507, + "step": 7517 + }, + { + "epoch": 0.23041559396837072, + "grad_norm": 1.640639520199754, + "learning_rate": 1.7966375317027237e-05, + "loss": 0.8413, + "step": 7518 + }, + { + "epoch": 0.23044624249111192, + "grad_norm": 1.4361643820959524, + "learning_rate": 1.7965775271886983e-05, + "loss": 0.7749, + "step": 7519 + }, + { + "epoch": 0.23047689101385313, + "grad_norm": 1.9085007557886713, + "learning_rate": 1.7965175148257905e-05, + "loss": 0.7482, + "step": 7520 + }, + { + "epoch": 0.23050753953659434, + "grad_norm": 1.727561189579622, + "learning_rate": 1.796457494614592e-05, + "loss": 0.7034, + "step": 7521 + }, + { + "epoch": 0.23053818805933554, + "grad_norm": 0.8976247933510304, + "learning_rate": 1.7963974665556936e-05, + "loss": 0.6428, + "step": 7522 + }, + { + "epoch": 0.23056883658207675, + "grad_norm": 1.5645168073569053, + "learning_rate": 1.7963374306496877e-05, + "loss": 0.7911, + "step": 7523 + }, + { + "epoch": 0.23059948510481795, + "grad_norm": 1.3828621520609683, + "learning_rate": 1.796277386897165e-05, + "loss": 0.7836, + "step": 7524 + }, + { + "epoch": 0.23063013362755916, + "grad_norm": 1.6560971377133504, + "learning_rate": 1.796217335298718e-05, + "loss": 0.8684, + "step": 7525 + }, + { + "epoch": 0.23066078215030036, + "grad_norm": 1.5186126621199985, + "learning_rate": 1.796157275854937e-05, + "loss": 0.84, + "step": 7526 + }, + { + "epoch": 0.23069143067304157, + "grad_norm": 1.452966796465441, + "learning_rate": 1.796097208566415e-05, + "loss": 0.7262, + "step": 7527 + }, + { + "epoch": 0.23072207919578278, + "grad_norm": 1.460743925308086, + "learning_rate": 1.796037133433744e-05, + "loss": 0.7398, + "step": 7528 + }, + { + "epoch": 0.23075272771852395, + "grad_norm": 1.5087595918379972, + "learning_rate": 1.795977050457515e-05, + "loss": 0.7459, + "step": 7529 + }, + { + "epoch": 0.23078337624126516, + "grad_norm": 1.5182753388220844, + "learning_rate": 1.79591695963832e-05, + "loss": 0.8229, + "step": 7530 + }, + { + "epoch": 0.23081402476400636, + "grad_norm": 1.5578152215185048, + "learning_rate": 1.7958568609767523e-05, + "loss": 0.7873, + "step": 7531 + }, + { + "epoch": 0.23084467328674757, + "grad_norm": 0.7983122725644693, + "learning_rate": 1.795796754473403e-05, + "loss": 0.5946, + "step": 7532 + }, + { + "epoch": 0.23087532180948878, + "grad_norm": 1.4239109036156468, + "learning_rate": 1.795736640128865e-05, + "loss": 0.7297, + "step": 7533 + }, + { + "epoch": 0.23090597033222998, + "grad_norm": 1.7223531886115002, + "learning_rate": 1.79567651794373e-05, + "loss": 0.7726, + "step": 7534 + }, + { + "epoch": 0.2309366188549712, + "grad_norm": 1.7987149300504612, + "learning_rate": 1.7956163879185906e-05, + "loss": 0.8546, + "step": 7535 + }, + { + "epoch": 0.2309672673777124, + "grad_norm": 1.5682647168240107, + "learning_rate": 1.79555625005404e-05, + "loss": 0.806, + "step": 7536 + }, + { + "epoch": 0.2309979159004536, + "grad_norm": 1.3074758151220642, + "learning_rate": 1.7954961043506692e-05, + "loss": 0.7908, + "step": 7537 + }, + { + "epoch": 0.2310285644231948, + "grad_norm": 1.4970869106095617, + "learning_rate": 1.7954359508090724e-05, + "loss": 0.8396, + "step": 7538 + }, + { + "epoch": 0.231059212945936, + "grad_norm": 1.6328212084145743, + "learning_rate": 1.7953757894298412e-05, + "loss": 0.7855, + "step": 7539 + }, + { + "epoch": 0.23108986146867722, + "grad_norm": 1.5293660354597187, + "learning_rate": 1.795315620213569e-05, + "loss": 0.738, + "step": 7540 + }, + { + "epoch": 0.23112050999141842, + "grad_norm": 1.7137649583968404, + "learning_rate": 1.7952554431608487e-05, + "loss": 0.6869, + "step": 7541 + }, + { + "epoch": 0.23115115851415963, + "grad_norm": 1.4217907142379156, + "learning_rate": 1.795195258272273e-05, + "loss": 0.7838, + "step": 7542 + }, + { + "epoch": 0.23118180703690083, + "grad_norm": 1.5095880809119822, + "learning_rate": 1.7951350655484346e-05, + "loss": 0.762, + "step": 7543 + }, + { + "epoch": 0.23121245555964204, + "grad_norm": 1.4739560561888814, + "learning_rate": 1.7950748649899275e-05, + "loss": 0.6716, + "step": 7544 + }, + { + "epoch": 0.23124310408238322, + "grad_norm": 1.5102215526875127, + "learning_rate": 1.7950146565973438e-05, + "loss": 0.8067, + "step": 7545 + }, + { + "epoch": 0.23127375260512442, + "grad_norm": 1.7171766882539048, + "learning_rate": 1.7949544403712774e-05, + "loss": 0.8347, + "step": 7546 + }, + { + "epoch": 0.23130440112786563, + "grad_norm": 1.7178643056500944, + "learning_rate": 1.7948942163123216e-05, + "loss": 0.8581, + "step": 7547 + }, + { + "epoch": 0.23133504965060683, + "grad_norm": 1.581441708609942, + "learning_rate": 1.79483398442107e-05, + "loss": 0.8633, + "step": 7548 + }, + { + "epoch": 0.23136569817334804, + "grad_norm": 0.8244583384429965, + "learning_rate": 1.7947737446981155e-05, + "loss": 0.6241, + "step": 7549 + }, + { + "epoch": 0.23139634669608924, + "grad_norm": 1.5764828842370502, + "learning_rate": 1.794713497144052e-05, + "loss": 0.8347, + "step": 7550 + }, + { + "epoch": 0.23142699521883045, + "grad_norm": 0.6836281391501862, + "learning_rate": 1.794653241759473e-05, + "loss": 0.613, + "step": 7551 + }, + { + "epoch": 0.23145764374157166, + "grad_norm": 1.5644345387478407, + "learning_rate": 1.7945929785449725e-05, + "loss": 0.7695, + "step": 7552 + }, + { + "epoch": 0.23148829226431286, + "grad_norm": 1.634131921069116, + "learning_rate": 1.794532707501144e-05, + "loss": 0.8707, + "step": 7553 + }, + { + "epoch": 0.23151894078705407, + "grad_norm": 1.4548775551628772, + "learning_rate": 1.794472428628581e-05, + "loss": 0.8, + "step": 7554 + }, + { + "epoch": 0.23154958930979527, + "grad_norm": 0.7417494391602589, + "learning_rate": 1.7944121419278785e-05, + "loss": 0.6375, + "step": 7555 + }, + { + "epoch": 0.23158023783253648, + "grad_norm": 2.3158549755816478, + "learning_rate": 1.7943518473996294e-05, + "loss": 0.8303, + "step": 7556 + }, + { + "epoch": 0.23161088635527768, + "grad_norm": 1.3557248550732837, + "learning_rate": 1.7942915450444286e-05, + "loss": 0.7263, + "step": 7557 + }, + { + "epoch": 0.2316415348780189, + "grad_norm": 1.4166620929201788, + "learning_rate": 1.7942312348628697e-05, + "loss": 0.7673, + "step": 7558 + }, + { + "epoch": 0.2316721834007601, + "grad_norm": 0.6998845936540296, + "learning_rate": 1.7941709168555476e-05, + "loss": 0.6282, + "step": 7559 + }, + { + "epoch": 0.23170283192350127, + "grad_norm": 1.6049079603856726, + "learning_rate": 1.7941105910230564e-05, + "loss": 0.7611, + "step": 7560 + }, + { + "epoch": 0.23173348044624248, + "grad_norm": 1.76411088645169, + "learning_rate": 1.7940502573659898e-05, + "loss": 0.8088, + "step": 7561 + }, + { + "epoch": 0.23176412896898368, + "grad_norm": 1.5620751864817595, + "learning_rate": 1.793989915884943e-05, + "loss": 0.7382, + "step": 7562 + }, + { + "epoch": 0.2317947774917249, + "grad_norm": 1.3877514984716974, + "learning_rate": 1.7939295665805104e-05, + "loss": 0.6983, + "step": 7563 + }, + { + "epoch": 0.2318254260144661, + "grad_norm": 1.4005961723842244, + "learning_rate": 1.793869209453287e-05, + "loss": 0.8125, + "step": 7564 + }, + { + "epoch": 0.2318560745372073, + "grad_norm": 1.5594546841289025, + "learning_rate": 1.7938088445038667e-05, + "loss": 0.7645, + "step": 7565 + }, + { + "epoch": 0.2318867230599485, + "grad_norm": 1.5329704136342004, + "learning_rate": 1.7937484717328454e-05, + "loss": 0.7733, + "step": 7566 + }, + { + "epoch": 0.2319173715826897, + "grad_norm": 1.4419308125844654, + "learning_rate": 1.793688091140817e-05, + "loss": 0.7828, + "step": 7567 + }, + { + "epoch": 0.23194802010543092, + "grad_norm": 1.3742166733135195, + "learning_rate": 1.7936277027283765e-05, + "loss": 0.7156, + "step": 7568 + }, + { + "epoch": 0.23197866862817212, + "grad_norm": 0.7606127468505722, + "learning_rate": 1.793567306496119e-05, + "loss": 0.6566, + "step": 7569 + }, + { + "epoch": 0.23200931715091333, + "grad_norm": 1.518099841421236, + "learning_rate": 1.7935069024446403e-05, + "loss": 0.8186, + "step": 7570 + }, + { + "epoch": 0.23203996567365454, + "grad_norm": 1.4973361875218718, + "learning_rate": 1.7934464905745352e-05, + "loss": 0.768, + "step": 7571 + }, + { + "epoch": 0.23207061419639574, + "grad_norm": 1.6693945232898213, + "learning_rate": 1.7933860708863983e-05, + "loss": 0.8662, + "step": 7572 + }, + { + "epoch": 0.23210126271913695, + "grad_norm": 1.5293642420819007, + "learning_rate": 1.7933256433808255e-05, + "loss": 0.7909, + "step": 7573 + }, + { + "epoch": 0.23213191124187815, + "grad_norm": 1.370853172067656, + "learning_rate": 1.7932652080584123e-05, + "loss": 0.7699, + "step": 7574 + }, + { + "epoch": 0.23216255976461936, + "grad_norm": 0.6992742337767153, + "learning_rate": 1.7932047649197542e-05, + "loss": 0.6508, + "step": 7575 + }, + { + "epoch": 0.23219320828736054, + "grad_norm": 0.706053343882313, + "learning_rate": 1.7931443139654466e-05, + "loss": 0.6363, + "step": 7576 + }, + { + "epoch": 0.23222385681010174, + "grad_norm": 1.434337841770104, + "learning_rate": 1.793083855196085e-05, + "loss": 0.7792, + "step": 7577 + }, + { + "epoch": 0.23225450533284295, + "grad_norm": 1.4076619706545404, + "learning_rate": 1.793023388612265e-05, + "loss": 0.7955, + "step": 7578 + }, + { + "epoch": 0.23228515385558415, + "grad_norm": 1.498319026879124, + "learning_rate": 1.792962914214583e-05, + "loss": 0.8302, + "step": 7579 + }, + { + "epoch": 0.23231580237832536, + "grad_norm": 1.429974421272649, + "learning_rate": 1.7929024320036345e-05, + "loss": 0.7399, + "step": 7580 + }, + { + "epoch": 0.23234645090106656, + "grad_norm": 1.4876453673299461, + "learning_rate": 1.7928419419800155e-05, + "loss": 0.8136, + "step": 7581 + }, + { + "epoch": 0.23237709942380777, + "grad_norm": 1.4784201633025333, + "learning_rate": 1.7927814441443217e-05, + "loss": 0.8255, + "step": 7582 + }, + { + "epoch": 0.23240774794654898, + "grad_norm": 1.3058217031805874, + "learning_rate": 1.7927209384971495e-05, + "loss": 0.7564, + "step": 7583 + }, + { + "epoch": 0.23243839646929018, + "grad_norm": 1.478644807209602, + "learning_rate": 1.7926604250390952e-05, + "loss": 0.7055, + "step": 7584 + }, + { + "epoch": 0.2324690449920314, + "grad_norm": 1.4041115647716074, + "learning_rate": 1.792599903770755e-05, + "loss": 0.8056, + "step": 7585 + }, + { + "epoch": 0.2324996935147726, + "grad_norm": 1.4881520073963854, + "learning_rate": 1.792539374692725e-05, + "loss": 0.843, + "step": 7586 + }, + { + "epoch": 0.2325303420375138, + "grad_norm": 1.4044166281238322, + "learning_rate": 1.792478837805602e-05, + "loss": 0.8457, + "step": 7587 + }, + { + "epoch": 0.232560990560255, + "grad_norm": 1.5461202876438076, + "learning_rate": 1.7924182931099823e-05, + "loss": 0.736, + "step": 7588 + }, + { + "epoch": 0.2325916390829962, + "grad_norm": 0.7645013177284936, + "learning_rate": 1.792357740606462e-05, + "loss": 0.6657, + "step": 7589 + }, + { + "epoch": 0.23262228760573742, + "grad_norm": 1.6706593116381738, + "learning_rate": 1.7922971802956387e-05, + "loss": 0.8313, + "step": 7590 + }, + { + "epoch": 0.2326529361284786, + "grad_norm": 1.4888545675392713, + "learning_rate": 1.792236612178108e-05, + "loss": 0.7961, + "step": 7591 + }, + { + "epoch": 0.2326835846512198, + "grad_norm": 1.6601356745829114, + "learning_rate": 1.7921760362544676e-05, + "loss": 0.8567, + "step": 7592 + }, + { + "epoch": 0.232714233173961, + "grad_norm": 1.3587838243241799, + "learning_rate": 1.7921154525253138e-05, + "loss": 0.7692, + "step": 7593 + }, + { + "epoch": 0.2327448816967022, + "grad_norm": 1.4259783817125091, + "learning_rate": 1.792054860991244e-05, + "loss": 0.7733, + "step": 7594 + }, + { + "epoch": 0.23277553021944342, + "grad_norm": 1.4180051800039128, + "learning_rate": 1.791994261652855e-05, + "loss": 0.7547, + "step": 7595 + }, + { + "epoch": 0.23280617874218462, + "grad_norm": 1.4008014099412962, + "learning_rate": 1.7919336545107435e-05, + "loss": 0.719, + "step": 7596 + }, + { + "epoch": 0.23283682726492583, + "grad_norm": 1.4160925703609317, + "learning_rate": 1.7918730395655074e-05, + "loss": 0.7405, + "step": 7597 + }, + { + "epoch": 0.23286747578766703, + "grad_norm": 0.6930952330119935, + "learning_rate": 1.791812416817744e-05, + "loss": 0.5973, + "step": 7598 + }, + { + "epoch": 0.23289812431040824, + "grad_norm": 1.4945670545664347, + "learning_rate": 1.7917517862680494e-05, + "loss": 0.8568, + "step": 7599 + }, + { + "epoch": 0.23292877283314944, + "grad_norm": 1.4181090665724927, + "learning_rate": 1.7916911479170226e-05, + "loss": 0.712, + "step": 7600 + }, + { + "epoch": 0.23295942135589065, + "grad_norm": 1.3982573654196166, + "learning_rate": 1.7916305017652597e-05, + "loss": 0.813, + "step": 7601 + }, + { + "epoch": 0.23299006987863186, + "grad_norm": 1.461000464122567, + "learning_rate": 1.7915698478133595e-05, + "loss": 0.7186, + "step": 7602 + }, + { + "epoch": 0.23302071840137306, + "grad_norm": 1.430929257150428, + "learning_rate": 1.791509186061919e-05, + "loss": 0.8229, + "step": 7603 + }, + { + "epoch": 0.23305136692411427, + "grad_norm": 1.3640541585558215, + "learning_rate": 1.791448516511536e-05, + "loss": 0.7592, + "step": 7604 + }, + { + "epoch": 0.23308201544685547, + "grad_norm": 1.5246096375818028, + "learning_rate": 1.791387839162808e-05, + "loss": 0.792, + "step": 7605 + }, + { + "epoch": 0.23311266396959668, + "grad_norm": 1.4129856037837896, + "learning_rate": 1.791327154016333e-05, + "loss": 0.7725, + "step": 7606 + }, + { + "epoch": 0.23314331249233786, + "grad_norm": 1.4568251160661752, + "learning_rate": 1.7912664610727093e-05, + "loss": 0.8882, + "step": 7607 + }, + { + "epoch": 0.23317396101507906, + "grad_norm": 1.6403270038816555, + "learning_rate": 1.791205760332535e-05, + "loss": 0.9246, + "step": 7608 + }, + { + "epoch": 0.23320460953782027, + "grad_norm": 0.8110355810762373, + "learning_rate": 1.7911450517964075e-05, + "loss": 0.6364, + "step": 7609 + }, + { + "epoch": 0.23323525806056147, + "grad_norm": 1.4501467181394923, + "learning_rate": 1.7910843354649255e-05, + "loss": 0.7973, + "step": 7610 + }, + { + "epoch": 0.23326590658330268, + "grad_norm": 1.6878136493977318, + "learning_rate": 1.791023611338687e-05, + "loss": 0.826, + "step": 7611 + }, + { + "epoch": 0.23329655510604388, + "grad_norm": 1.3182942856633175, + "learning_rate": 1.7909628794182908e-05, + "loss": 0.8257, + "step": 7612 + }, + { + "epoch": 0.2333272036287851, + "grad_norm": 1.4066373164839836, + "learning_rate": 1.7909021397043348e-05, + "loss": 0.7605, + "step": 7613 + }, + { + "epoch": 0.2333578521515263, + "grad_norm": 1.716532152905031, + "learning_rate": 1.7908413921974175e-05, + "loss": 0.801, + "step": 7614 + }, + { + "epoch": 0.2333885006742675, + "grad_norm": 0.8165024187651336, + "learning_rate": 1.7907806368981377e-05, + "loss": 0.6342, + "step": 7615 + }, + { + "epoch": 0.2334191491970087, + "grad_norm": 1.5706881266175383, + "learning_rate": 1.7907198738070942e-05, + "loss": 0.9074, + "step": 7616 + }, + { + "epoch": 0.2334497977197499, + "grad_norm": 1.644873541619099, + "learning_rate": 1.7906591029248855e-05, + "loss": 0.7164, + "step": 7617 + }, + { + "epoch": 0.23348044624249112, + "grad_norm": 0.6856864488124167, + "learning_rate": 1.79059832425211e-05, + "loss": 0.6264, + "step": 7618 + }, + { + "epoch": 0.23351109476523232, + "grad_norm": 1.6076037784465746, + "learning_rate": 1.790537537789367e-05, + "loss": 0.8519, + "step": 7619 + }, + { + "epoch": 0.23354174328797353, + "grad_norm": 1.592568275110008, + "learning_rate": 1.7904767435372555e-05, + "loss": 0.7687, + "step": 7620 + }, + { + "epoch": 0.23357239181071474, + "grad_norm": 1.4722756113840514, + "learning_rate": 1.7904159414963743e-05, + "loss": 0.6754, + "step": 7621 + }, + { + "epoch": 0.2336030403334559, + "grad_norm": 1.5323318812575688, + "learning_rate": 1.7903551316673223e-05, + "loss": 0.7153, + "step": 7622 + }, + { + "epoch": 0.23363368885619712, + "grad_norm": 1.5080119723025127, + "learning_rate": 1.7902943140506996e-05, + "loss": 0.7732, + "step": 7623 + }, + { + "epoch": 0.23366433737893832, + "grad_norm": 1.4031108091742002, + "learning_rate": 1.7902334886471045e-05, + "loss": 0.7779, + "step": 7624 + }, + { + "epoch": 0.23369498590167953, + "grad_norm": 1.5424293085733376, + "learning_rate": 1.7901726554571366e-05, + "loss": 0.8077, + "step": 7625 + }, + { + "epoch": 0.23372563442442074, + "grad_norm": 1.4068923407231937, + "learning_rate": 1.7901118144813953e-05, + "loss": 0.7672, + "step": 7626 + }, + { + "epoch": 0.23375628294716194, + "grad_norm": 1.4411514102479193, + "learning_rate": 1.7900509657204804e-05, + "loss": 0.7432, + "step": 7627 + }, + { + "epoch": 0.23378693146990315, + "grad_norm": 1.4062389317467914, + "learning_rate": 1.7899901091749908e-05, + "loss": 0.7208, + "step": 7628 + }, + { + "epoch": 0.23381757999264435, + "grad_norm": 1.4440525653621576, + "learning_rate": 1.789929244845527e-05, + "loss": 0.7683, + "step": 7629 + }, + { + "epoch": 0.23384822851538556, + "grad_norm": 1.345695947528265, + "learning_rate": 1.789868372732688e-05, + "loss": 0.7781, + "step": 7630 + }, + { + "epoch": 0.23387887703812676, + "grad_norm": 1.4601554576460711, + "learning_rate": 1.789807492837074e-05, + "loss": 0.8536, + "step": 7631 + }, + { + "epoch": 0.23390952556086797, + "grad_norm": 1.611282427682743, + "learning_rate": 1.789746605159284e-05, + "loss": 0.7974, + "step": 7632 + }, + { + "epoch": 0.23394017408360918, + "grad_norm": 1.6942165329816652, + "learning_rate": 1.7896857096999195e-05, + "loss": 0.8185, + "step": 7633 + }, + { + "epoch": 0.23397082260635038, + "grad_norm": 1.527883450518732, + "learning_rate": 1.7896248064595794e-05, + "loss": 0.7799, + "step": 7634 + }, + { + "epoch": 0.2340014711290916, + "grad_norm": 1.5665423402943297, + "learning_rate": 1.789563895438864e-05, + "loss": 0.7195, + "step": 7635 + }, + { + "epoch": 0.2340321196518328, + "grad_norm": 1.4885269584469225, + "learning_rate": 1.7895029766383735e-05, + "loss": 0.6828, + "step": 7636 + }, + { + "epoch": 0.234062768174574, + "grad_norm": 1.8077963589893977, + "learning_rate": 1.789442050058708e-05, + "loss": 0.7345, + "step": 7637 + }, + { + "epoch": 0.23409341669731518, + "grad_norm": 1.4728382148490244, + "learning_rate": 1.789381115700468e-05, + "loss": 0.8807, + "step": 7638 + }, + { + "epoch": 0.23412406522005638, + "grad_norm": 1.4503474165799146, + "learning_rate": 1.7893201735642544e-05, + "loss": 0.9002, + "step": 7639 + }, + { + "epoch": 0.2341547137427976, + "grad_norm": 0.9052315093222544, + "learning_rate": 1.7892592236506666e-05, + "loss": 0.6732, + "step": 7640 + }, + { + "epoch": 0.2341853622655388, + "grad_norm": 1.5995480261925001, + "learning_rate": 1.7891982659603057e-05, + "loss": 0.9386, + "step": 7641 + }, + { + "epoch": 0.23421601078828, + "grad_norm": 1.6087002616195394, + "learning_rate": 1.789137300493773e-05, + "loss": 0.8586, + "step": 7642 + }, + { + "epoch": 0.2342466593110212, + "grad_norm": 1.4488946600352146, + "learning_rate": 1.789076327251668e-05, + "loss": 0.783, + "step": 7643 + }, + { + "epoch": 0.2342773078337624, + "grad_norm": 1.5932612541817341, + "learning_rate": 1.7890153462345923e-05, + "loss": 0.7023, + "step": 7644 + }, + { + "epoch": 0.23430795635650362, + "grad_norm": 1.5324696019146284, + "learning_rate": 1.7889543574431463e-05, + "loss": 0.7863, + "step": 7645 + }, + { + "epoch": 0.23433860487924482, + "grad_norm": 1.383840913576804, + "learning_rate": 1.7888933608779314e-05, + "loss": 0.842, + "step": 7646 + }, + { + "epoch": 0.23436925340198603, + "grad_norm": 1.4482292442510007, + "learning_rate": 1.788832356539548e-05, + "loss": 0.826, + "step": 7647 + }, + { + "epoch": 0.23439990192472723, + "grad_norm": 1.6793289625660768, + "learning_rate": 1.788771344428598e-05, + "loss": 0.8322, + "step": 7648 + }, + { + "epoch": 0.23443055044746844, + "grad_norm": 1.594819412854339, + "learning_rate": 1.788710324545682e-05, + "loss": 0.805, + "step": 7649 + }, + { + "epoch": 0.23446119897020964, + "grad_norm": 1.543393092098548, + "learning_rate": 1.7886492968914013e-05, + "loss": 0.783, + "step": 7650 + }, + { + "epoch": 0.23449184749295085, + "grad_norm": 1.350340901949107, + "learning_rate": 1.788588261466357e-05, + "loss": 0.6153, + "step": 7651 + }, + { + "epoch": 0.23452249601569206, + "grad_norm": 1.4382796008988596, + "learning_rate": 1.788527218271151e-05, + "loss": 0.7684, + "step": 7652 + }, + { + "epoch": 0.23455314453843326, + "grad_norm": 1.6195382517091441, + "learning_rate": 1.788466167306385e-05, + "loss": 0.8055, + "step": 7653 + }, + { + "epoch": 0.23458379306117444, + "grad_norm": 1.5291506184504082, + "learning_rate": 1.78840510857266e-05, + "loss": 0.7031, + "step": 7654 + }, + { + "epoch": 0.23461444158391564, + "grad_norm": 1.6040230009053824, + "learning_rate": 1.7883440420705773e-05, + "loss": 0.7205, + "step": 7655 + }, + { + "epoch": 0.23464509010665685, + "grad_norm": 1.56076468935441, + "learning_rate": 1.788282967800739e-05, + "loss": 0.7959, + "step": 7656 + }, + { + "epoch": 0.23467573862939806, + "grad_norm": 1.3708972117107836, + "learning_rate": 1.7882218857637473e-05, + "loss": 0.8347, + "step": 7657 + }, + { + "epoch": 0.23470638715213926, + "grad_norm": 1.4613428367519317, + "learning_rate": 1.7881607959602038e-05, + "loss": 0.7523, + "step": 7658 + }, + { + "epoch": 0.23473703567488047, + "grad_norm": 1.5407651061528802, + "learning_rate": 1.7880996983907098e-05, + "loss": 0.8522, + "step": 7659 + }, + { + "epoch": 0.23476768419762167, + "grad_norm": 1.5593124184101919, + "learning_rate": 1.7880385930558685e-05, + "loss": 0.7331, + "step": 7660 + }, + { + "epoch": 0.23479833272036288, + "grad_norm": 1.7605842218844032, + "learning_rate": 1.787977479956281e-05, + "loss": 0.786, + "step": 7661 + }, + { + "epoch": 0.23482898124310408, + "grad_norm": 1.3961512900279796, + "learning_rate": 1.7879163590925494e-05, + "loss": 0.7949, + "step": 7662 + }, + { + "epoch": 0.2348596297658453, + "grad_norm": 1.707824938954592, + "learning_rate": 1.7878552304652768e-05, + "loss": 0.7613, + "step": 7663 + }, + { + "epoch": 0.2348902782885865, + "grad_norm": 0.8474552096236418, + "learning_rate": 1.7877940940750648e-05, + "loss": 0.6821, + "step": 7664 + }, + { + "epoch": 0.2349209268113277, + "grad_norm": 1.6131375755840145, + "learning_rate": 1.787732949922516e-05, + "loss": 0.8385, + "step": 7665 + }, + { + "epoch": 0.2349515753340689, + "grad_norm": 1.6405068982067554, + "learning_rate": 1.787671798008233e-05, + "loss": 0.7732, + "step": 7666 + }, + { + "epoch": 0.2349822238568101, + "grad_norm": 1.3879612242528259, + "learning_rate": 1.7876106383328182e-05, + "loss": 0.6036, + "step": 7667 + }, + { + "epoch": 0.23501287237955132, + "grad_norm": 0.6528741909299517, + "learning_rate": 1.7875494708968744e-05, + "loss": 0.6379, + "step": 7668 + }, + { + "epoch": 0.2350435209022925, + "grad_norm": 1.632765194484259, + "learning_rate": 1.787488295701004e-05, + "loss": 0.7776, + "step": 7669 + }, + { + "epoch": 0.2350741694250337, + "grad_norm": 0.7154992495603955, + "learning_rate": 1.78742711274581e-05, + "loss": 0.6509, + "step": 7670 + }, + { + "epoch": 0.2351048179477749, + "grad_norm": 0.7266805923050513, + "learning_rate": 1.7873659220318954e-05, + "loss": 0.6659, + "step": 7671 + }, + { + "epoch": 0.2351354664705161, + "grad_norm": 0.7305351983335432, + "learning_rate": 1.7873047235598625e-05, + "loss": 0.6367, + "step": 7672 + }, + { + "epoch": 0.23516611499325732, + "grad_norm": 1.4838653909482438, + "learning_rate": 1.787243517330315e-05, + "loss": 0.8335, + "step": 7673 + }, + { + "epoch": 0.23519676351599852, + "grad_norm": 1.369673207012927, + "learning_rate": 1.7871823033438557e-05, + "loss": 0.739, + "step": 7674 + }, + { + "epoch": 0.23522741203873973, + "grad_norm": 1.483352338350083, + "learning_rate": 1.7871210816010874e-05, + "loss": 0.7836, + "step": 7675 + }, + { + "epoch": 0.23525806056148094, + "grad_norm": 1.3618632574225575, + "learning_rate": 1.787059852102614e-05, + "loss": 0.814, + "step": 7676 + }, + { + "epoch": 0.23528870908422214, + "grad_norm": 1.3778796446364214, + "learning_rate": 1.7869986148490386e-05, + "loss": 0.7069, + "step": 7677 + }, + { + "epoch": 0.23531935760696335, + "grad_norm": 1.7164641971558254, + "learning_rate": 1.786937369840964e-05, + "loss": 0.7815, + "step": 7678 + }, + { + "epoch": 0.23535000612970455, + "grad_norm": 1.5532089835367189, + "learning_rate": 1.7868761170789944e-05, + "loss": 0.8503, + "step": 7679 + }, + { + "epoch": 0.23538065465244576, + "grad_norm": 1.6244262100731797, + "learning_rate": 1.7868148565637334e-05, + "loss": 0.8797, + "step": 7680 + }, + { + "epoch": 0.23541130317518696, + "grad_norm": 1.591117958388523, + "learning_rate": 1.786753588295784e-05, + "loss": 0.7189, + "step": 7681 + }, + { + "epoch": 0.23544195169792817, + "grad_norm": 1.4411468001633474, + "learning_rate": 1.7866923122757503e-05, + "loss": 0.6834, + "step": 7682 + }, + { + "epoch": 0.23547260022066938, + "grad_norm": 1.5019106613820028, + "learning_rate": 1.7866310285042358e-05, + "loss": 0.8841, + "step": 7683 + }, + { + "epoch": 0.23550324874341058, + "grad_norm": 1.424008566348621, + "learning_rate": 1.7865697369818446e-05, + "loss": 0.6578, + "step": 7684 + }, + { + "epoch": 0.23553389726615176, + "grad_norm": 1.494919774832374, + "learning_rate": 1.7865084377091806e-05, + "loss": 0.7642, + "step": 7685 + }, + { + "epoch": 0.23556454578889297, + "grad_norm": 1.3429576731944464, + "learning_rate": 1.786447130686848e-05, + "loss": 0.7204, + "step": 7686 + }, + { + "epoch": 0.23559519431163417, + "grad_norm": 1.2180507678378925, + "learning_rate": 1.78638581591545e-05, + "loss": 0.654, + "step": 7687 + }, + { + "epoch": 0.23562584283437538, + "grad_norm": 1.5302528136200264, + "learning_rate": 1.7863244933955918e-05, + "loss": 0.8457, + "step": 7688 + }, + { + "epoch": 0.23565649135711658, + "grad_norm": 1.3898134360052794, + "learning_rate": 1.786263163127877e-05, + "loss": 0.7911, + "step": 7689 + }, + { + "epoch": 0.2356871398798578, + "grad_norm": 1.5147681991194926, + "learning_rate": 1.78620182511291e-05, + "loss": 0.8265, + "step": 7690 + }, + { + "epoch": 0.235717788402599, + "grad_norm": 0.8765280896256846, + "learning_rate": 1.7861404793512953e-05, + "loss": 0.6214, + "step": 7691 + }, + { + "epoch": 0.2357484369253402, + "grad_norm": 1.4891656322122433, + "learning_rate": 1.7860791258436375e-05, + "loss": 0.9389, + "step": 7692 + }, + { + "epoch": 0.2357790854480814, + "grad_norm": 0.7594414830365918, + "learning_rate": 1.7860177645905407e-05, + "loss": 0.6635, + "step": 7693 + }, + { + "epoch": 0.2358097339708226, + "grad_norm": 1.8744216934229407, + "learning_rate": 1.78595639559261e-05, + "loss": 0.8398, + "step": 7694 + }, + { + "epoch": 0.23584038249356382, + "grad_norm": 1.514562177593419, + "learning_rate": 1.78589501885045e-05, + "loss": 0.7324, + "step": 7695 + }, + { + "epoch": 0.23587103101630502, + "grad_norm": 0.7488177816663896, + "learning_rate": 1.7858336343646647e-05, + "loss": 0.6044, + "step": 7696 + }, + { + "epoch": 0.23590167953904623, + "grad_norm": 1.5731534740540816, + "learning_rate": 1.7857722421358597e-05, + "loss": 0.7747, + "step": 7697 + }, + { + "epoch": 0.23593232806178743, + "grad_norm": 1.390341773955477, + "learning_rate": 1.7857108421646402e-05, + "loss": 0.8397, + "step": 7698 + }, + { + "epoch": 0.23596297658452864, + "grad_norm": 1.6463103617443278, + "learning_rate": 1.78564943445161e-05, + "loss": 0.9392, + "step": 7699 + }, + { + "epoch": 0.23599362510726982, + "grad_norm": 1.450806942454734, + "learning_rate": 1.7855880189973757e-05, + "loss": 0.7779, + "step": 7700 + }, + { + "epoch": 0.23602427363001102, + "grad_norm": 1.473292498636259, + "learning_rate": 1.7855265958025413e-05, + "loss": 0.8157, + "step": 7701 + }, + { + "epoch": 0.23605492215275223, + "grad_norm": 0.740529661799796, + "learning_rate": 1.7854651648677123e-05, + "loss": 0.6358, + "step": 7702 + }, + { + "epoch": 0.23608557067549343, + "grad_norm": 0.7431591004518353, + "learning_rate": 1.785403726193494e-05, + "loss": 0.6451, + "step": 7703 + }, + { + "epoch": 0.23611621919823464, + "grad_norm": 1.568639367649875, + "learning_rate": 1.785342279780492e-05, + "loss": 0.7626, + "step": 7704 + }, + { + "epoch": 0.23614686772097584, + "grad_norm": 1.4832046738726805, + "learning_rate": 1.7852808256293116e-05, + "loss": 0.7752, + "step": 7705 + }, + { + "epoch": 0.23617751624371705, + "grad_norm": 1.8043872104836607, + "learning_rate": 1.785219363740558e-05, + "loss": 0.796, + "step": 7706 + }, + { + "epoch": 0.23620816476645826, + "grad_norm": 1.6114258473060112, + "learning_rate": 1.7851578941148374e-05, + "loss": 0.7769, + "step": 7707 + }, + { + "epoch": 0.23623881328919946, + "grad_norm": 1.5538639650337565, + "learning_rate": 1.7850964167527552e-05, + "loss": 0.8462, + "step": 7708 + }, + { + "epoch": 0.23626946181194067, + "grad_norm": 1.61745113336573, + "learning_rate": 1.785034931654917e-05, + "loss": 0.7201, + "step": 7709 + }, + { + "epoch": 0.23630011033468187, + "grad_norm": 1.6460381858418816, + "learning_rate": 1.7849734388219285e-05, + "loss": 0.7259, + "step": 7710 + }, + { + "epoch": 0.23633075885742308, + "grad_norm": 1.6352281508276532, + "learning_rate": 1.7849119382543966e-05, + "loss": 0.831, + "step": 7711 + }, + { + "epoch": 0.23636140738016428, + "grad_norm": 1.6872069786685904, + "learning_rate": 1.784850429952926e-05, + "loss": 0.8239, + "step": 7712 + }, + { + "epoch": 0.2363920559029055, + "grad_norm": 0.8488309032189317, + "learning_rate": 1.7847889139181234e-05, + "loss": 0.5993, + "step": 7713 + }, + { + "epoch": 0.2364227044256467, + "grad_norm": 1.4922719902870751, + "learning_rate": 1.784727390150595e-05, + "loss": 0.6785, + "step": 7714 + }, + { + "epoch": 0.2364533529483879, + "grad_norm": 1.661915391284802, + "learning_rate": 1.7846658586509463e-05, + "loss": 0.8744, + "step": 7715 + }, + { + "epoch": 0.23648400147112908, + "grad_norm": 1.486993401227827, + "learning_rate": 1.7846043194197847e-05, + "loss": 0.7825, + "step": 7716 + }, + { + "epoch": 0.23651464999387029, + "grad_norm": 1.7257189625826899, + "learning_rate": 1.7845427724577158e-05, + "loss": 0.8653, + "step": 7717 + }, + { + "epoch": 0.2365452985166115, + "grad_norm": 1.5087445238061628, + "learning_rate": 1.7844812177653463e-05, + "loss": 0.7237, + "step": 7718 + }, + { + "epoch": 0.2365759470393527, + "grad_norm": 0.6845657752102988, + "learning_rate": 1.7844196553432825e-05, + "loss": 0.6223, + "step": 7719 + }, + { + "epoch": 0.2366065955620939, + "grad_norm": 1.4450231926039248, + "learning_rate": 1.7843580851921315e-05, + "loss": 0.7289, + "step": 7720 + }, + { + "epoch": 0.2366372440848351, + "grad_norm": 1.433069863260977, + "learning_rate": 1.784296507312499e-05, + "loss": 0.7547, + "step": 7721 + }, + { + "epoch": 0.2366678926075763, + "grad_norm": 0.7099006677798194, + "learning_rate": 1.7842349217049927e-05, + "loss": 0.6334, + "step": 7722 + }, + { + "epoch": 0.23669854113031752, + "grad_norm": 0.7281781277240994, + "learning_rate": 1.784173328370219e-05, + "loss": 0.6391, + "step": 7723 + }, + { + "epoch": 0.23672918965305872, + "grad_norm": 1.4635630576059704, + "learning_rate": 1.7841117273087848e-05, + "loss": 0.778, + "step": 7724 + }, + { + "epoch": 0.23675983817579993, + "grad_norm": 1.4582276516879018, + "learning_rate": 1.7840501185212972e-05, + "loss": 0.7998, + "step": 7725 + }, + { + "epoch": 0.23679048669854114, + "grad_norm": 1.5346960991677807, + "learning_rate": 1.7839885020083633e-05, + "loss": 0.7259, + "step": 7726 + }, + { + "epoch": 0.23682113522128234, + "grad_norm": 1.6857691834702944, + "learning_rate": 1.78392687777059e-05, + "loss": 0.925, + "step": 7727 + }, + { + "epoch": 0.23685178374402355, + "grad_norm": 1.5685172509702698, + "learning_rate": 1.7838652458085844e-05, + "loss": 0.78, + "step": 7728 + }, + { + "epoch": 0.23688243226676475, + "grad_norm": 1.4088576305219, + "learning_rate": 1.783803606122954e-05, + "loss": 0.828, + "step": 7729 + }, + { + "epoch": 0.23691308078950596, + "grad_norm": 0.7580231839792743, + "learning_rate": 1.7837419587143064e-05, + "loss": 0.6352, + "step": 7730 + }, + { + "epoch": 0.23694372931224714, + "grad_norm": 1.4076020635715891, + "learning_rate": 1.7836803035832485e-05, + "loss": 0.7328, + "step": 7731 + }, + { + "epoch": 0.23697437783498834, + "grad_norm": 1.5684732103312948, + "learning_rate": 1.7836186407303882e-05, + "loss": 0.867, + "step": 7732 + }, + { + "epoch": 0.23700502635772955, + "grad_norm": 1.486663392422916, + "learning_rate": 1.783556970156333e-05, + "loss": 0.7786, + "step": 7733 + }, + { + "epoch": 0.23703567488047075, + "grad_norm": 1.4607387324332084, + "learning_rate": 1.7834952918616904e-05, + "loss": 0.8089, + "step": 7734 + }, + { + "epoch": 0.23706632340321196, + "grad_norm": 1.5885687376796278, + "learning_rate": 1.7834336058470682e-05, + "loss": 0.8087, + "step": 7735 + }, + { + "epoch": 0.23709697192595316, + "grad_norm": 1.6616483486720153, + "learning_rate": 1.7833719121130743e-05, + "loss": 0.8031, + "step": 7736 + }, + { + "epoch": 0.23712762044869437, + "grad_norm": 1.4784623464061215, + "learning_rate": 1.7833102106603165e-05, + "loss": 0.7782, + "step": 7737 + }, + { + "epoch": 0.23715826897143558, + "grad_norm": 1.5859337658045016, + "learning_rate": 1.7832485014894025e-05, + "loss": 0.7211, + "step": 7738 + }, + { + "epoch": 0.23718891749417678, + "grad_norm": 1.4043522726271789, + "learning_rate": 1.783186784600941e-05, + "loss": 0.8425, + "step": 7739 + }, + { + "epoch": 0.237219566016918, + "grad_norm": 1.50429416630418, + "learning_rate": 1.7831250599955398e-05, + "loss": 0.8541, + "step": 7740 + }, + { + "epoch": 0.2372502145396592, + "grad_norm": 1.494196103278096, + "learning_rate": 1.7830633276738066e-05, + "loss": 0.7244, + "step": 7741 + }, + { + "epoch": 0.2372808630624004, + "grad_norm": 0.7559204126168485, + "learning_rate": 1.7830015876363504e-05, + "loss": 0.6158, + "step": 7742 + }, + { + "epoch": 0.2373115115851416, + "grad_norm": 1.3411013226832331, + "learning_rate": 1.782939839883779e-05, + "loss": 0.8883, + "step": 7743 + }, + { + "epoch": 0.2373421601078828, + "grad_norm": 0.6839606878666985, + "learning_rate": 1.782878084416701e-05, + "loss": 0.6466, + "step": 7744 + }, + { + "epoch": 0.23737280863062402, + "grad_norm": 0.6903288866775399, + "learning_rate": 1.7828163212357254e-05, + "loss": 0.6642, + "step": 7745 + }, + { + "epoch": 0.23740345715336522, + "grad_norm": 1.4190410572803227, + "learning_rate": 1.78275455034146e-05, + "loss": 0.7241, + "step": 7746 + }, + { + "epoch": 0.2374341056761064, + "grad_norm": 1.553842705509902, + "learning_rate": 1.7826927717345133e-05, + "loss": 0.9236, + "step": 7747 + }, + { + "epoch": 0.2374647541988476, + "grad_norm": 1.5952153894178152, + "learning_rate": 1.782630985415495e-05, + "loss": 0.704, + "step": 7748 + }, + { + "epoch": 0.2374954027215888, + "grad_norm": 1.511435698382322, + "learning_rate": 1.7825691913850128e-05, + "loss": 0.7261, + "step": 7749 + }, + { + "epoch": 0.23752605124433002, + "grad_norm": 1.5280532971249075, + "learning_rate": 1.782507389643677e-05, + "loss": 0.7949, + "step": 7750 + }, + { + "epoch": 0.23755669976707122, + "grad_norm": 0.7384804143853914, + "learning_rate": 1.782445580192095e-05, + "loss": 0.6125, + "step": 7751 + }, + { + "epoch": 0.23758734828981243, + "grad_norm": 0.7106499831655404, + "learning_rate": 1.7823837630308768e-05, + "loss": 0.6096, + "step": 7752 + }, + { + "epoch": 0.23761799681255363, + "grad_norm": 1.5225817473512582, + "learning_rate": 1.7823219381606308e-05, + "loss": 0.6829, + "step": 7753 + }, + { + "epoch": 0.23764864533529484, + "grad_norm": 1.7199819287101326, + "learning_rate": 1.782260105581967e-05, + "loss": 0.9247, + "step": 7754 + }, + { + "epoch": 0.23767929385803604, + "grad_norm": 1.5624728118929352, + "learning_rate": 1.782198265295494e-05, + "loss": 0.8771, + "step": 7755 + }, + { + "epoch": 0.23770994238077725, + "grad_norm": 0.7046720574391283, + "learning_rate": 1.7821364173018216e-05, + "loss": 0.6212, + "step": 7756 + }, + { + "epoch": 0.23774059090351846, + "grad_norm": 1.5579348508319868, + "learning_rate": 1.782074561601559e-05, + "loss": 0.832, + "step": 7757 + }, + { + "epoch": 0.23777123942625966, + "grad_norm": 1.5467638128329408, + "learning_rate": 1.7820126981953153e-05, + "loss": 0.7707, + "step": 7758 + }, + { + "epoch": 0.23780188794900087, + "grad_norm": 1.459419020557756, + "learning_rate": 1.7819508270837006e-05, + "loss": 0.797, + "step": 7759 + }, + { + "epoch": 0.23783253647174207, + "grad_norm": 1.610155137957402, + "learning_rate": 1.7818889482673244e-05, + "loss": 0.75, + "step": 7760 + }, + { + "epoch": 0.23786318499448328, + "grad_norm": 1.5671485638923877, + "learning_rate": 1.781827061746796e-05, + "loss": 0.7582, + "step": 7761 + }, + { + "epoch": 0.23789383351722446, + "grad_norm": 1.5024281364121657, + "learning_rate": 1.781765167522726e-05, + "loss": 0.661, + "step": 7762 + }, + { + "epoch": 0.23792448203996566, + "grad_norm": 1.3809309250955322, + "learning_rate": 1.7817032655957236e-05, + "loss": 0.7324, + "step": 7763 + }, + { + "epoch": 0.23795513056270687, + "grad_norm": 0.7304481486158162, + "learning_rate": 1.781641355966399e-05, + "loss": 0.659, + "step": 7764 + }, + { + "epoch": 0.23798577908544807, + "grad_norm": 1.4756895288631355, + "learning_rate": 1.7815794386353618e-05, + "loss": 0.8357, + "step": 7765 + }, + { + "epoch": 0.23801642760818928, + "grad_norm": 1.372447819012768, + "learning_rate": 1.7815175136032224e-05, + "loss": 0.7386, + "step": 7766 + }, + { + "epoch": 0.23804707613093049, + "grad_norm": 1.352025481184312, + "learning_rate": 1.781455580870591e-05, + "loss": 0.7992, + "step": 7767 + }, + { + "epoch": 0.2380777246536717, + "grad_norm": 1.2954736968405272, + "learning_rate": 1.7813936404380784e-05, + "loss": 0.7147, + "step": 7768 + }, + { + "epoch": 0.2381083731764129, + "grad_norm": 1.7481503018728477, + "learning_rate": 1.7813316923062938e-05, + "loss": 0.8808, + "step": 7769 + }, + { + "epoch": 0.2381390216991541, + "grad_norm": 1.4568457832875976, + "learning_rate": 1.781269736475848e-05, + "loss": 0.7615, + "step": 7770 + }, + { + "epoch": 0.2381696702218953, + "grad_norm": 1.5016980722789677, + "learning_rate": 1.781207772947352e-05, + "loss": 0.8497, + "step": 7771 + }, + { + "epoch": 0.2382003187446365, + "grad_norm": 0.7122919808127148, + "learning_rate": 1.7811458017214158e-05, + "loss": 0.6577, + "step": 7772 + }, + { + "epoch": 0.23823096726737772, + "grad_norm": 1.314110069503682, + "learning_rate": 1.7810838227986503e-05, + "loss": 0.8336, + "step": 7773 + }, + { + "epoch": 0.23826161579011892, + "grad_norm": 1.4911715325468693, + "learning_rate": 1.7810218361796656e-05, + "loss": 0.7122, + "step": 7774 + }, + { + "epoch": 0.23829226431286013, + "grad_norm": 1.3450488426207268, + "learning_rate": 1.7809598418650734e-05, + "loss": 0.8675, + "step": 7775 + }, + { + "epoch": 0.23832291283560134, + "grad_norm": 1.6522376000362915, + "learning_rate": 1.7808978398554838e-05, + "loss": 0.7556, + "step": 7776 + }, + { + "epoch": 0.23835356135834254, + "grad_norm": 0.6932402722835773, + "learning_rate": 1.7808358301515078e-05, + "loss": 0.5972, + "step": 7777 + }, + { + "epoch": 0.23838420988108372, + "grad_norm": 1.429301015263568, + "learning_rate": 1.7807738127537567e-05, + "loss": 0.8041, + "step": 7778 + }, + { + "epoch": 0.23841485840382493, + "grad_norm": 1.4926320489410134, + "learning_rate": 1.7807117876628418e-05, + "loss": 0.7639, + "step": 7779 + }, + { + "epoch": 0.23844550692656613, + "grad_norm": 1.5863132190752907, + "learning_rate": 1.780649754879374e-05, + "loss": 0.8587, + "step": 7780 + }, + { + "epoch": 0.23847615544930734, + "grad_norm": 1.5129817341717322, + "learning_rate": 1.780587714403964e-05, + "loss": 0.702, + "step": 7781 + }, + { + "epoch": 0.23850680397204854, + "grad_norm": 0.7022998027882924, + "learning_rate": 1.7805256662372233e-05, + "loss": 0.6266, + "step": 7782 + }, + { + "epoch": 0.23853745249478975, + "grad_norm": 1.628208923042646, + "learning_rate": 1.7804636103797637e-05, + "loss": 0.8335, + "step": 7783 + }, + { + "epoch": 0.23856810101753095, + "grad_norm": 0.6960235596901043, + "learning_rate": 1.780401546832197e-05, + "loss": 0.6583, + "step": 7784 + }, + { + "epoch": 0.23859874954027216, + "grad_norm": 0.6868382143321451, + "learning_rate": 1.780339475595134e-05, + "loss": 0.6646, + "step": 7785 + }, + { + "epoch": 0.23862939806301336, + "grad_norm": 1.410234133219898, + "learning_rate": 1.780277396669186e-05, + "loss": 0.778, + "step": 7786 + }, + { + "epoch": 0.23866004658575457, + "grad_norm": 1.6207270234075688, + "learning_rate": 1.7802153100549653e-05, + "loss": 0.7503, + "step": 7787 + }, + { + "epoch": 0.23869069510849578, + "grad_norm": 1.3817412215781084, + "learning_rate": 1.7801532157530835e-05, + "loss": 0.8544, + "step": 7788 + }, + { + "epoch": 0.23872134363123698, + "grad_norm": 1.5895964451196778, + "learning_rate": 1.7800911137641527e-05, + "loss": 0.807, + "step": 7789 + }, + { + "epoch": 0.2387519921539782, + "grad_norm": 1.441871149272927, + "learning_rate": 1.7800290040887845e-05, + "loss": 0.8325, + "step": 7790 + }, + { + "epoch": 0.2387826406767194, + "grad_norm": 1.3428752559466253, + "learning_rate": 1.779966886727591e-05, + "loss": 0.8064, + "step": 7791 + }, + { + "epoch": 0.2388132891994606, + "grad_norm": 1.5418720199012783, + "learning_rate": 1.779904761681184e-05, + "loss": 0.8248, + "step": 7792 + }, + { + "epoch": 0.23884393772220178, + "grad_norm": 1.412039914948215, + "learning_rate": 1.779842628950176e-05, + "loss": 0.8411, + "step": 7793 + }, + { + "epoch": 0.23887458624494298, + "grad_norm": 1.4102317456902114, + "learning_rate": 1.7797804885351788e-05, + "loss": 0.7819, + "step": 7794 + }, + { + "epoch": 0.2389052347676842, + "grad_norm": 1.3150260190126901, + "learning_rate": 1.7797183404368054e-05, + "loss": 0.8094, + "step": 7795 + }, + { + "epoch": 0.2389358832904254, + "grad_norm": 1.3914353230695886, + "learning_rate": 1.7796561846556672e-05, + "loss": 0.8014, + "step": 7796 + }, + { + "epoch": 0.2389665318131666, + "grad_norm": 0.8542888681979585, + "learning_rate": 1.7795940211923774e-05, + "loss": 0.6466, + "step": 7797 + }, + { + "epoch": 0.2389971803359078, + "grad_norm": 1.5475927404038583, + "learning_rate": 1.7795318500475483e-05, + "loss": 0.7926, + "step": 7798 + }, + { + "epoch": 0.239027828858649, + "grad_norm": 0.7256326370812068, + "learning_rate": 1.7794696712217923e-05, + "loss": 0.6326, + "step": 7799 + }, + { + "epoch": 0.23905847738139022, + "grad_norm": 0.6904860688025357, + "learning_rate": 1.7794074847157222e-05, + "loss": 0.6354, + "step": 7800 + }, + { + "epoch": 0.23908912590413142, + "grad_norm": 1.6228459363038168, + "learning_rate": 1.7793452905299507e-05, + "loss": 0.79, + "step": 7801 + }, + { + "epoch": 0.23911977442687263, + "grad_norm": 1.5703114095161201, + "learning_rate": 1.7792830886650906e-05, + "loss": 0.9226, + "step": 7802 + }, + { + "epoch": 0.23915042294961383, + "grad_norm": 0.7508737352878241, + "learning_rate": 1.779220879121755e-05, + "loss": 0.6052, + "step": 7803 + }, + { + "epoch": 0.23918107147235504, + "grad_norm": 1.5292093804349023, + "learning_rate": 1.7791586619005565e-05, + "loss": 0.8122, + "step": 7804 + }, + { + "epoch": 0.23921171999509624, + "grad_norm": 1.5071213827651715, + "learning_rate": 1.7790964370021086e-05, + "loss": 0.7988, + "step": 7805 + }, + { + "epoch": 0.23924236851783745, + "grad_norm": 1.5628437261999057, + "learning_rate": 1.779034204427024e-05, + "loss": 0.763, + "step": 7806 + }, + { + "epoch": 0.23927301704057866, + "grad_norm": 1.5282644914983623, + "learning_rate": 1.778971964175916e-05, + "loss": 0.6826, + "step": 7807 + }, + { + "epoch": 0.23930366556331986, + "grad_norm": 1.5024539482837558, + "learning_rate": 1.778909716249398e-05, + "loss": 0.6607, + "step": 7808 + }, + { + "epoch": 0.23933431408606104, + "grad_norm": 1.5754801816852377, + "learning_rate": 1.7788474606480835e-05, + "loss": 0.749, + "step": 7809 + }, + { + "epoch": 0.23936496260880225, + "grad_norm": 1.3751381282650117, + "learning_rate": 1.7787851973725856e-05, + "loss": 0.9053, + "step": 7810 + }, + { + "epoch": 0.23939561113154345, + "grad_norm": 1.5276485610402146, + "learning_rate": 1.7787229264235178e-05, + "loss": 0.7509, + "step": 7811 + }, + { + "epoch": 0.23942625965428466, + "grad_norm": 1.5777136400193532, + "learning_rate": 1.7786606478014936e-05, + "loss": 0.7654, + "step": 7812 + }, + { + "epoch": 0.23945690817702586, + "grad_norm": 0.8100203079140056, + "learning_rate": 1.778598361507127e-05, + "loss": 0.6395, + "step": 7813 + }, + { + "epoch": 0.23948755669976707, + "grad_norm": 1.4398765884938722, + "learning_rate": 1.7785360675410314e-05, + "loss": 0.6781, + "step": 7814 + }, + { + "epoch": 0.23951820522250827, + "grad_norm": 1.5150948214336961, + "learning_rate": 1.778473765903821e-05, + "loss": 0.7875, + "step": 7815 + }, + { + "epoch": 0.23954885374524948, + "grad_norm": 1.4045338029738228, + "learning_rate": 1.778411456596109e-05, + "loss": 0.7445, + "step": 7816 + }, + { + "epoch": 0.23957950226799068, + "grad_norm": 1.5146916585279688, + "learning_rate": 1.77834913961851e-05, + "loss": 0.8176, + "step": 7817 + }, + { + "epoch": 0.2396101507907319, + "grad_norm": 1.476773079346792, + "learning_rate": 1.7782868149716378e-05, + "loss": 0.743, + "step": 7818 + }, + { + "epoch": 0.2396407993134731, + "grad_norm": 1.3083137187277374, + "learning_rate": 1.7782244826561067e-05, + "loss": 0.7681, + "step": 7819 + }, + { + "epoch": 0.2396714478362143, + "grad_norm": 1.7582214572419501, + "learning_rate": 1.7781621426725302e-05, + "loss": 0.8651, + "step": 7820 + }, + { + "epoch": 0.2397020963589555, + "grad_norm": 1.5928241086663424, + "learning_rate": 1.778099795021523e-05, + "loss": 0.8629, + "step": 7821 + }, + { + "epoch": 0.2397327448816967, + "grad_norm": 1.4046265169979246, + "learning_rate": 1.7780374397036996e-05, + "loss": 0.783, + "step": 7822 + }, + { + "epoch": 0.23976339340443792, + "grad_norm": 1.2504973741203422, + "learning_rate": 1.7779750767196743e-05, + "loss": 0.7585, + "step": 7823 + }, + { + "epoch": 0.2397940419271791, + "grad_norm": 1.4895990115369557, + "learning_rate": 1.7779127060700615e-05, + "loss": 0.7988, + "step": 7824 + }, + { + "epoch": 0.2398246904499203, + "grad_norm": 1.7205354477739752, + "learning_rate": 1.777850327755476e-05, + "loss": 0.7058, + "step": 7825 + }, + { + "epoch": 0.2398553389726615, + "grad_norm": 1.4361549227042416, + "learning_rate": 1.7777879417765317e-05, + "loss": 0.7739, + "step": 7826 + }, + { + "epoch": 0.2398859874954027, + "grad_norm": 1.3732759806099886, + "learning_rate": 1.7777255481338443e-05, + "loss": 0.8135, + "step": 7827 + }, + { + "epoch": 0.23991663601814392, + "grad_norm": 1.3832534217073302, + "learning_rate": 1.7776631468280278e-05, + "loss": 0.7195, + "step": 7828 + }, + { + "epoch": 0.23994728454088513, + "grad_norm": 1.6909881068780377, + "learning_rate": 1.7776007378596974e-05, + "loss": 0.8242, + "step": 7829 + }, + { + "epoch": 0.23997793306362633, + "grad_norm": 1.5264337195770743, + "learning_rate": 1.777538321229468e-05, + "loss": 0.8606, + "step": 7830 + }, + { + "epoch": 0.24000858158636754, + "grad_norm": 1.3491603019218927, + "learning_rate": 1.7774758969379545e-05, + "loss": 0.7524, + "step": 7831 + }, + { + "epoch": 0.24003923010910874, + "grad_norm": 1.5040789257999312, + "learning_rate": 1.777413464985772e-05, + "loss": 0.8238, + "step": 7832 + }, + { + "epoch": 0.24006987863184995, + "grad_norm": 1.521199051146436, + "learning_rate": 1.777351025373536e-05, + "loss": 0.7349, + "step": 7833 + }, + { + "epoch": 0.24010052715459115, + "grad_norm": 1.892253114361085, + "learning_rate": 1.777288578101861e-05, + "loss": 0.7893, + "step": 7834 + }, + { + "epoch": 0.24013117567733236, + "grad_norm": 1.6661015082823265, + "learning_rate": 1.777226123171363e-05, + "loss": 0.8446, + "step": 7835 + }, + { + "epoch": 0.24016182420007356, + "grad_norm": 1.4640587522346136, + "learning_rate": 1.7771636605826573e-05, + "loss": 0.8928, + "step": 7836 + }, + { + "epoch": 0.24019247272281477, + "grad_norm": 1.5984921905106075, + "learning_rate": 1.777101190336359e-05, + "loss": 0.851, + "step": 7837 + }, + { + "epoch": 0.24022312124555598, + "grad_norm": 1.4488498258741214, + "learning_rate": 1.777038712433084e-05, + "loss": 0.8148, + "step": 7838 + }, + { + "epoch": 0.24025376976829718, + "grad_norm": 0.8218568650549801, + "learning_rate": 1.7769762268734477e-05, + "loss": 0.682, + "step": 7839 + }, + { + "epoch": 0.24028441829103836, + "grad_norm": 0.7769050569720624, + "learning_rate": 1.7769137336580658e-05, + "loss": 0.6664, + "step": 7840 + }, + { + "epoch": 0.24031506681377957, + "grad_norm": 1.2339273110096993, + "learning_rate": 1.776851232787554e-05, + "loss": 0.762, + "step": 7841 + }, + { + "epoch": 0.24034571533652077, + "grad_norm": 1.4112437556888677, + "learning_rate": 1.7767887242625287e-05, + "loss": 0.7193, + "step": 7842 + }, + { + "epoch": 0.24037636385926198, + "grad_norm": 0.8447106968863471, + "learning_rate": 1.776726208083605e-05, + "loss": 0.6542, + "step": 7843 + }, + { + "epoch": 0.24040701238200318, + "grad_norm": 1.5311966188219885, + "learning_rate": 1.7766636842513988e-05, + "loss": 0.7829, + "step": 7844 + }, + { + "epoch": 0.2404376609047444, + "grad_norm": 1.7916480891149988, + "learning_rate": 1.7766011527665272e-05, + "loss": 0.7965, + "step": 7845 + }, + { + "epoch": 0.2404683094274856, + "grad_norm": 1.513921037299857, + "learning_rate": 1.7765386136296054e-05, + "loss": 0.696, + "step": 7846 + }, + { + "epoch": 0.2404989579502268, + "grad_norm": 0.8035848317162141, + "learning_rate": 1.7764760668412503e-05, + "loss": 0.6408, + "step": 7847 + }, + { + "epoch": 0.240529606472968, + "grad_norm": 1.6071545500708502, + "learning_rate": 1.7764135124020776e-05, + "loss": 0.7881, + "step": 7848 + }, + { + "epoch": 0.2405602549957092, + "grad_norm": 1.3954478510096848, + "learning_rate": 1.7763509503127042e-05, + "loss": 0.7054, + "step": 7849 + }, + { + "epoch": 0.24059090351845042, + "grad_norm": 1.552329766441057, + "learning_rate": 1.776288380573746e-05, + "loss": 0.7974, + "step": 7850 + }, + { + "epoch": 0.24062155204119162, + "grad_norm": 0.7120322385745467, + "learning_rate": 1.7762258031858196e-05, + "loss": 0.6522, + "step": 7851 + }, + { + "epoch": 0.24065220056393283, + "grad_norm": 0.7210290816161401, + "learning_rate": 1.776163218149542e-05, + "loss": 0.647, + "step": 7852 + }, + { + "epoch": 0.24068284908667403, + "grad_norm": 1.576300813013233, + "learning_rate": 1.7761006254655297e-05, + "loss": 0.7683, + "step": 7853 + }, + { + "epoch": 0.24071349760941524, + "grad_norm": 1.4647705181454305, + "learning_rate": 1.7760380251343995e-05, + "loss": 0.7296, + "step": 7854 + }, + { + "epoch": 0.24074414613215642, + "grad_norm": 1.5376912139568628, + "learning_rate": 1.7759754171567675e-05, + "loss": 0.6965, + "step": 7855 + }, + { + "epoch": 0.24077479465489762, + "grad_norm": 1.4369255672517232, + "learning_rate": 1.7759128015332513e-05, + "loss": 0.7559, + "step": 7856 + }, + { + "epoch": 0.24080544317763883, + "grad_norm": 1.5533453467783387, + "learning_rate": 1.7758501782644683e-05, + "loss": 0.8749, + "step": 7857 + }, + { + "epoch": 0.24083609170038003, + "grad_norm": 1.5951685523254797, + "learning_rate": 1.7757875473510343e-05, + "loss": 0.7302, + "step": 7858 + }, + { + "epoch": 0.24086674022312124, + "grad_norm": 1.6145812801757253, + "learning_rate": 1.7757249087935675e-05, + "loss": 0.773, + "step": 7859 + }, + { + "epoch": 0.24089738874586245, + "grad_norm": 0.7481091065212571, + "learning_rate": 1.7756622625926847e-05, + "loss": 0.6511, + "step": 7860 + }, + { + "epoch": 0.24092803726860365, + "grad_norm": 1.6736707042057808, + "learning_rate": 1.775599608749003e-05, + "loss": 0.8953, + "step": 7861 + }, + { + "epoch": 0.24095868579134486, + "grad_norm": 1.7110779477474347, + "learning_rate": 1.77553694726314e-05, + "loss": 0.8662, + "step": 7862 + }, + { + "epoch": 0.24098933431408606, + "grad_norm": 1.5465097811891737, + "learning_rate": 1.775474278135713e-05, + "loss": 0.7245, + "step": 7863 + }, + { + "epoch": 0.24101998283682727, + "grad_norm": 1.590857147001962, + "learning_rate": 1.7754116013673396e-05, + "loss": 0.8602, + "step": 7864 + }, + { + "epoch": 0.24105063135956847, + "grad_norm": 0.708524229968536, + "learning_rate": 1.7753489169586372e-05, + "loss": 0.6429, + "step": 7865 + }, + { + "epoch": 0.24108127988230968, + "grad_norm": 1.5840067427525109, + "learning_rate": 1.7752862249102236e-05, + "loss": 0.8548, + "step": 7866 + }, + { + "epoch": 0.24111192840505088, + "grad_norm": 1.3617478156255252, + "learning_rate": 1.7752235252227165e-05, + "loss": 0.7596, + "step": 7867 + }, + { + "epoch": 0.2411425769277921, + "grad_norm": 1.4293674600827073, + "learning_rate": 1.7751608178967338e-05, + "loss": 0.8359, + "step": 7868 + }, + { + "epoch": 0.2411732254505333, + "grad_norm": 1.4094727915425271, + "learning_rate": 1.7750981029328927e-05, + "loss": 0.8211, + "step": 7869 + }, + { + "epoch": 0.2412038739732745, + "grad_norm": 1.4749488314769406, + "learning_rate": 1.7750353803318122e-05, + "loss": 0.7626, + "step": 7870 + }, + { + "epoch": 0.24123452249601568, + "grad_norm": 0.7334768855495967, + "learning_rate": 1.7749726500941094e-05, + "loss": 0.6295, + "step": 7871 + }, + { + "epoch": 0.24126517101875689, + "grad_norm": 0.6781866333210056, + "learning_rate": 1.7749099122204028e-05, + "loss": 0.6313, + "step": 7872 + }, + { + "epoch": 0.2412958195414981, + "grad_norm": 0.6849383583602763, + "learning_rate": 1.774847166711311e-05, + "loss": 0.6356, + "step": 7873 + }, + { + "epoch": 0.2413264680642393, + "grad_norm": 0.6807317892262071, + "learning_rate": 1.7747844135674515e-05, + "loss": 0.6377, + "step": 7874 + }, + { + "epoch": 0.2413571165869805, + "grad_norm": 1.4727551632192375, + "learning_rate": 1.774721652789443e-05, + "loss": 0.7438, + "step": 7875 + }, + { + "epoch": 0.2413877651097217, + "grad_norm": 1.6280497600043748, + "learning_rate": 1.774658884377904e-05, + "loss": 0.8186, + "step": 7876 + }, + { + "epoch": 0.2414184136324629, + "grad_norm": 1.5067555256302085, + "learning_rate": 1.7745961083334523e-05, + "loss": 0.6885, + "step": 7877 + }, + { + "epoch": 0.24144906215520412, + "grad_norm": 1.3909293245088366, + "learning_rate": 1.7745333246567077e-05, + "loss": 0.7948, + "step": 7878 + }, + { + "epoch": 0.24147971067794533, + "grad_norm": 1.7436727270739831, + "learning_rate": 1.7744705333482875e-05, + "loss": 0.8256, + "step": 7879 + }, + { + "epoch": 0.24151035920068653, + "grad_norm": 1.2791236067068654, + "learning_rate": 1.7744077344088113e-05, + "loss": 0.7875, + "step": 7880 + }, + { + "epoch": 0.24154100772342774, + "grad_norm": 1.5414984960506304, + "learning_rate": 1.7743449278388973e-05, + "loss": 0.7657, + "step": 7881 + }, + { + "epoch": 0.24157165624616894, + "grad_norm": 1.6252886666910356, + "learning_rate": 1.7742821136391647e-05, + "loss": 0.8207, + "step": 7882 + }, + { + "epoch": 0.24160230476891015, + "grad_norm": 0.7435436969306781, + "learning_rate": 1.7742192918102324e-05, + "loss": 0.6199, + "step": 7883 + }, + { + "epoch": 0.24163295329165135, + "grad_norm": 1.391332494043931, + "learning_rate": 1.774156462352719e-05, + "loss": 0.7902, + "step": 7884 + }, + { + "epoch": 0.24166360181439256, + "grad_norm": 1.6049937561884455, + "learning_rate": 1.7740936252672442e-05, + "loss": 0.9145, + "step": 7885 + }, + { + "epoch": 0.24169425033713374, + "grad_norm": 1.2941472755928773, + "learning_rate": 1.7740307805544267e-05, + "loss": 0.6871, + "step": 7886 + }, + { + "epoch": 0.24172489885987494, + "grad_norm": 0.7156551378738676, + "learning_rate": 1.773967928214886e-05, + "loss": 0.6316, + "step": 7887 + }, + { + "epoch": 0.24175554738261615, + "grad_norm": 1.5621519300549773, + "learning_rate": 1.7739050682492417e-05, + "loss": 0.7721, + "step": 7888 + }, + { + "epoch": 0.24178619590535735, + "grad_norm": 1.6689174729803584, + "learning_rate": 1.773842200658112e-05, + "loss": 0.6911, + "step": 7889 + }, + { + "epoch": 0.24181684442809856, + "grad_norm": 1.453730793608191, + "learning_rate": 1.7737793254421175e-05, + "loss": 0.8027, + "step": 7890 + }, + { + "epoch": 0.24184749295083977, + "grad_norm": 1.474303695904589, + "learning_rate": 1.7737164426018773e-05, + "loss": 0.8176, + "step": 7891 + }, + { + "epoch": 0.24187814147358097, + "grad_norm": 1.5138705304593665, + "learning_rate": 1.773653552138011e-05, + "loss": 0.7932, + "step": 7892 + }, + { + "epoch": 0.24190878999632218, + "grad_norm": 1.3681511942238234, + "learning_rate": 1.7735906540511382e-05, + "loss": 0.7714, + "step": 7893 + }, + { + "epoch": 0.24193943851906338, + "grad_norm": 1.5327566160828527, + "learning_rate": 1.773527748341879e-05, + "loss": 0.7837, + "step": 7894 + }, + { + "epoch": 0.2419700870418046, + "grad_norm": 1.664353542087075, + "learning_rate": 1.773464835010853e-05, + "loss": 0.917, + "step": 7895 + }, + { + "epoch": 0.2420007355645458, + "grad_norm": 1.341417288773931, + "learning_rate": 1.7734019140586797e-05, + "loss": 0.7417, + "step": 7896 + }, + { + "epoch": 0.242031384087287, + "grad_norm": 1.298603460125358, + "learning_rate": 1.7733389854859795e-05, + "loss": 0.7378, + "step": 7897 + }, + { + "epoch": 0.2420620326100282, + "grad_norm": 1.631547306763157, + "learning_rate": 1.7732760492933725e-05, + "loss": 0.7961, + "step": 7898 + }, + { + "epoch": 0.2420926811327694, + "grad_norm": 1.4314104639885172, + "learning_rate": 1.7732131054814786e-05, + "loss": 0.8477, + "step": 7899 + }, + { + "epoch": 0.24212332965551062, + "grad_norm": 1.4177784367985233, + "learning_rate": 1.7731501540509187e-05, + "loss": 0.8525, + "step": 7900 + }, + { + "epoch": 0.24215397817825182, + "grad_norm": 1.4498606204165947, + "learning_rate": 1.7730871950023118e-05, + "loss": 0.8024, + "step": 7901 + }, + { + "epoch": 0.242184626700993, + "grad_norm": 1.3096492659482692, + "learning_rate": 1.7730242283362794e-05, + "loss": 0.7379, + "step": 7902 + }, + { + "epoch": 0.2422152752237342, + "grad_norm": 0.8203583622859071, + "learning_rate": 1.7729612540534414e-05, + "loss": 0.6268, + "step": 7903 + }, + { + "epoch": 0.2422459237464754, + "grad_norm": 1.5122028974100277, + "learning_rate": 1.7728982721544183e-05, + "loss": 0.8269, + "step": 7904 + }, + { + "epoch": 0.24227657226921662, + "grad_norm": 1.43730494367017, + "learning_rate": 1.772835282639831e-05, + "loss": 0.7428, + "step": 7905 + }, + { + "epoch": 0.24230722079195782, + "grad_norm": 1.63569989441779, + "learning_rate": 1.7727722855103e-05, + "loss": 0.8043, + "step": 7906 + }, + { + "epoch": 0.24233786931469903, + "grad_norm": 1.4249931368853033, + "learning_rate": 1.7727092807664455e-05, + "loss": 0.8, + "step": 7907 + }, + { + "epoch": 0.24236851783744023, + "grad_norm": 0.7229975653406497, + "learning_rate": 1.772646268408889e-05, + "loss": 0.6452, + "step": 7908 + }, + { + "epoch": 0.24239916636018144, + "grad_norm": 0.7302275898608321, + "learning_rate": 1.772583248438251e-05, + "loss": 0.6326, + "step": 7909 + }, + { + "epoch": 0.24242981488292265, + "grad_norm": 1.5977927331819475, + "learning_rate": 1.7725202208551526e-05, + "loss": 0.7741, + "step": 7910 + }, + { + "epoch": 0.24246046340566385, + "grad_norm": 1.5214204403904372, + "learning_rate": 1.772457185660215e-05, + "loss": 0.7709, + "step": 7911 + }, + { + "epoch": 0.24249111192840506, + "grad_norm": 1.485655572554796, + "learning_rate": 1.772394142854059e-05, + "loss": 0.8597, + "step": 7912 + }, + { + "epoch": 0.24252176045114626, + "grad_norm": 1.5735981557337377, + "learning_rate": 1.772331092437306e-05, + "loss": 0.7493, + "step": 7913 + }, + { + "epoch": 0.24255240897388747, + "grad_norm": 1.445625218389991, + "learning_rate": 1.7722680344105767e-05, + "loss": 0.8268, + "step": 7914 + }, + { + "epoch": 0.24258305749662867, + "grad_norm": 0.8188921624771396, + "learning_rate": 1.772204968774493e-05, + "loss": 0.641, + "step": 7915 + }, + { + "epoch": 0.24261370601936988, + "grad_norm": 1.4705893345521208, + "learning_rate": 1.7721418955296767e-05, + "loss": 0.8348, + "step": 7916 + }, + { + "epoch": 0.24264435454211106, + "grad_norm": 1.5350930056377055, + "learning_rate": 1.772078814676748e-05, + "loss": 0.8535, + "step": 7917 + }, + { + "epoch": 0.24267500306485226, + "grad_norm": 1.5232426986362977, + "learning_rate": 1.77201572621633e-05, + "loss": 0.8037, + "step": 7918 + }, + { + "epoch": 0.24270565158759347, + "grad_norm": 1.4209670700659285, + "learning_rate": 1.771952630149043e-05, + "loss": 0.7258, + "step": 7919 + }, + { + "epoch": 0.24273630011033467, + "grad_norm": 1.436863151427939, + "learning_rate": 1.7718895264755093e-05, + "loss": 0.7372, + "step": 7920 + }, + { + "epoch": 0.24276694863307588, + "grad_norm": 1.3569605877376671, + "learning_rate": 1.7718264151963505e-05, + "loss": 0.8595, + "step": 7921 + }, + { + "epoch": 0.24279759715581709, + "grad_norm": 1.6835052429705781, + "learning_rate": 1.7717632963121888e-05, + "loss": 0.8905, + "step": 7922 + }, + { + "epoch": 0.2428282456785583, + "grad_norm": 1.4449043023854968, + "learning_rate": 1.771700169823646e-05, + "loss": 0.7569, + "step": 7923 + }, + { + "epoch": 0.2428588942012995, + "grad_norm": 0.6887653440756093, + "learning_rate": 1.7716370357313435e-05, + "loss": 0.6228, + "step": 7924 + }, + { + "epoch": 0.2428895427240407, + "grad_norm": 1.4887622859677394, + "learning_rate": 1.7715738940359042e-05, + "loss": 0.8207, + "step": 7925 + }, + { + "epoch": 0.2429201912467819, + "grad_norm": 1.4059790533456356, + "learning_rate": 1.7715107447379497e-05, + "loss": 0.8144, + "step": 7926 + }, + { + "epoch": 0.2429508397695231, + "grad_norm": 1.6485446673702193, + "learning_rate": 1.7714475878381026e-05, + "loss": 0.725, + "step": 7927 + }, + { + "epoch": 0.24298148829226432, + "grad_norm": 1.3706607954824264, + "learning_rate": 1.771384423336985e-05, + "loss": 0.8355, + "step": 7928 + }, + { + "epoch": 0.24301213681500552, + "grad_norm": 1.5087709997592362, + "learning_rate": 1.7713212512352193e-05, + "loss": 0.853, + "step": 7929 + }, + { + "epoch": 0.24304278533774673, + "grad_norm": 1.3731405607612766, + "learning_rate": 1.7712580715334278e-05, + "loss": 0.7524, + "step": 7930 + }, + { + "epoch": 0.24307343386048794, + "grad_norm": 1.4528328554000913, + "learning_rate": 1.7711948842322333e-05, + "loss": 0.8155, + "step": 7931 + }, + { + "epoch": 0.24310408238322914, + "grad_norm": 1.4041476155095785, + "learning_rate": 1.7711316893322584e-05, + "loss": 0.7522, + "step": 7932 + }, + { + "epoch": 0.24313473090597032, + "grad_norm": 1.597228963586956, + "learning_rate": 1.7710684868341256e-05, + "loss": 0.8943, + "step": 7933 + }, + { + "epoch": 0.24316537942871153, + "grad_norm": 1.3636054925042518, + "learning_rate": 1.7710052767384576e-05, + "loss": 0.7747, + "step": 7934 + }, + { + "epoch": 0.24319602795145273, + "grad_norm": 1.6696842902280362, + "learning_rate": 1.7709420590458775e-05, + "loss": 0.7626, + "step": 7935 + }, + { + "epoch": 0.24322667647419394, + "grad_norm": 1.5616976817216697, + "learning_rate": 1.7708788337570076e-05, + "loss": 0.6955, + "step": 7936 + }, + { + "epoch": 0.24325732499693514, + "grad_norm": 1.5411638882076781, + "learning_rate": 1.770815600872472e-05, + "loss": 0.698, + "step": 7937 + }, + { + "epoch": 0.24328797351967635, + "grad_norm": 1.348532197668285, + "learning_rate": 1.7707523603928924e-05, + "loss": 0.6842, + "step": 7938 + }, + { + "epoch": 0.24331862204241755, + "grad_norm": 1.5607125568900369, + "learning_rate": 1.770689112318893e-05, + "loss": 0.8993, + "step": 7939 + }, + { + "epoch": 0.24334927056515876, + "grad_norm": 1.5971477833151053, + "learning_rate": 1.770625856651097e-05, + "loss": 0.8267, + "step": 7940 + }, + { + "epoch": 0.24337991908789997, + "grad_norm": 1.4440198520390064, + "learning_rate": 1.7705625933901265e-05, + "loss": 0.7384, + "step": 7941 + }, + { + "epoch": 0.24341056761064117, + "grad_norm": 1.5072258680392419, + "learning_rate": 1.7704993225366056e-05, + "loss": 0.8511, + "step": 7942 + }, + { + "epoch": 0.24344121613338238, + "grad_norm": 1.3626841676466175, + "learning_rate": 1.7704360440911583e-05, + "loss": 0.7361, + "step": 7943 + }, + { + "epoch": 0.24347186465612358, + "grad_norm": 0.7719668365375114, + "learning_rate": 1.770372758054407e-05, + "loss": 0.6161, + "step": 7944 + }, + { + "epoch": 0.2435025131788648, + "grad_norm": 1.4084985310108846, + "learning_rate": 1.7703094644269763e-05, + "loss": 0.8146, + "step": 7945 + }, + { + "epoch": 0.243533161701606, + "grad_norm": 1.5223327295107738, + "learning_rate": 1.770246163209489e-05, + "loss": 0.7133, + "step": 7946 + }, + { + "epoch": 0.2435638102243472, + "grad_norm": 1.4025879556915197, + "learning_rate": 1.770182854402569e-05, + "loss": 0.7486, + "step": 7947 + }, + { + "epoch": 0.24359445874708838, + "grad_norm": 1.6008218532659217, + "learning_rate": 1.770119538006841e-05, + "loss": 0.8127, + "step": 7948 + }, + { + "epoch": 0.24362510726982958, + "grad_norm": 1.576195839324513, + "learning_rate": 1.7700562140229273e-05, + "loss": 0.7667, + "step": 7949 + }, + { + "epoch": 0.2436557557925708, + "grad_norm": 1.56157551944367, + "learning_rate": 1.7699928824514535e-05, + "loss": 0.7824, + "step": 7950 + }, + { + "epoch": 0.243686404315312, + "grad_norm": 1.5141801331853628, + "learning_rate": 1.769929543293042e-05, + "loss": 0.7577, + "step": 7951 + }, + { + "epoch": 0.2437170528380532, + "grad_norm": 1.5658841521326343, + "learning_rate": 1.7698661965483187e-05, + "loss": 0.7704, + "step": 7952 + }, + { + "epoch": 0.2437477013607944, + "grad_norm": 1.4091331840592844, + "learning_rate": 1.7698028422179058e-05, + "loss": 0.8108, + "step": 7953 + }, + { + "epoch": 0.2437783498835356, + "grad_norm": 1.3944844562041179, + "learning_rate": 1.769739480302429e-05, + "loss": 0.8255, + "step": 7954 + }, + { + "epoch": 0.24380899840627682, + "grad_norm": 0.7724931718929271, + "learning_rate": 1.7696761108025123e-05, + "loss": 0.6492, + "step": 7955 + }, + { + "epoch": 0.24383964692901802, + "grad_norm": 1.443320745723797, + "learning_rate": 1.7696127337187796e-05, + "loss": 0.6817, + "step": 7956 + }, + { + "epoch": 0.24387029545175923, + "grad_norm": 1.477572557245164, + "learning_rate": 1.769549349051856e-05, + "loss": 0.7891, + "step": 7957 + }, + { + "epoch": 0.24390094397450043, + "grad_norm": 1.4852611159406044, + "learning_rate": 1.7694859568023656e-05, + "loss": 0.7043, + "step": 7958 + }, + { + "epoch": 0.24393159249724164, + "grad_norm": 1.5033125232829656, + "learning_rate": 1.769422556970933e-05, + "loss": 0.7772, + "step": 7959 + }, + { + "epoch": 0.24396224101998285, + "grad_norm": 1.5569739173745152, + "learning_rate": 1.7693591495581835e-05, + "loss": 0.942, + "step": 7960 + }, + { + "epoch": 0.24399288954272405, + "grad_norm": 1.5214251338268192, + "learning_rate": 1.7692957345647414e-05, + "loss": 0.7095, + "step": 7961 + }, + { + "epoch": 0.24402353806546526, + "grad_norm": 1.5885239853597548, + "learning_rate": 1.7692323119912313e-05, + "loss": 0.7692, + "step": 7962 + }, + { + "epoch": 0.24405418658820646, + "grad_norm": 0.742364675214384, + "learning_rate": 1.7691688818382785e-05, + "loss": 0.6412, + "step": 7963 + }, + { + "epoch": 0.24408483511094764, + "grad_norm": 0.7258227335577602, + "learning_rate": 1.769105444106508e-05, + "loss": 0.6291, + "step": 7964 + }, + { + "epoch": 0.24411548363368885, + "grad_norm": 1.6064892524340815, + "learning_rate": 1.7690419987965448e-05, + "loss": 0.8868, + "step": 7965 + }, + { + "epoch": 0.24414613215643005, + "grad_norm": 1.6547481510631636, + "learning_rate": 1.768978545909014e-05, + "loss": 0.7905, + "step": 7966 + }, + { + "epoch": 0.24417678067917126, + "grad_norm": 0.6789858583165254, + "learning_rate": 1.7689150854445407e-05, + "loss": 0.609, + "step": 7967 + }, + { + "epoch": 0.24420742920191246, + "grad_norm": 1.396779837873921, + "learning_rate": 1.7688516174037507e-05, + "loss": 0.7171, + "step": 7968 + }, + { + "epoch": 0.24423807772465367, + "grad_norm": 1.4437117951343945, + "learning_rate": 1.7687881417872685e-05, + "loss": 0.7834, + "step": 7969 + }, + { + "epoch": 0.24426872624739487, + "grad_norm": 1.6452174980926346, + "learning_rate": 1.7687246585957205e-05, + "loss": 0.8589, + "step": 7970 + }, + { + "epoch": 0.24429937477013608, + "grad_norm": 1.6087495963426417, + "learning_rate": 1.7686611678297314e-05, + "loss": 0.733, + "step": 7971 + }, + { + "epoch": 0.24433002329287729, + "grad_norm": 1.6701016534275974, + "learning_rate": 1.768597669489927e-05, + "loss": 0.8888, + "step": 7972 + }, + { + "epoch": 0.2443606718156185, + "grad_norm": 1.5548267581027662, + "learning_rate": 1.7685341635769337e-05, + "loss": 0.8305, + "step": 7973 + }, + { + "epoch": 0.2443913203383597, + "grad_norm": 1.6825347703078293, + "learning_rate": 1.7684706500913764e-05, + "loss": 0.8159, + "step": 7974 + }, + { + "epoch": 0.2444219688611009, + "grad_norm": 1.523094047331936, + "learning_rate": 1.7684071290338808e-05, + "loss": 0.7894, + "step": 7975 + }, + { + "epoch": 0.2444526173838421, + "grad_norm": 1.4079652100602595, + "learning_rate": 1.7683436004050734e-05, + "loss": 0.7761, + "step": 7976 + }, + { + "epoch": 0.2444832659065833, + "grad_norm": 1.4291108513316693, + "learning_rate": 1.7682800642055798e-05, + "loss": 0.7727, + "step": 7977 + }, + { + "epoch": 0.24451391442932452, + "grad_norm": 1.3869364711601775, + "learning_rate": 1.7682165204360267e-05, + "loss": 0.7101, + "step": 7978 + }, + { + "epoch": 0.2445445629520657, + "grad_norm": 0.8477959726499248, + "learning_rate": 1.7681529690970392e-05, + "loss": 0.6599, + "step": 7979 + }, + { + "epoch": 0.2445752114748069, + "grad_norm": 1.4725459179191247, + "learning_rate": 1.7680894101892438e-05, + "loss": 0.8396, + "step": 7980 + }, + { + "epoch": 0.2446058599975481, + "grad_norm": 1.4050804975270426, + "learning_rate": 1.768025843713267e-05, + "loss": 0.853, + "step": 7981 + }, + { + "epoch": 0.24463650852028931, + "grad_norm": 0.6951353773209158, + "learning_rate": 1.7679622696697355e-05, + "loss": 0.6351, + "step": 7982 + }, + { + "epoch": 0.24466715704303052, + "grad_norm": 0.7371056350616361, + "learning_rate": 1.767898688059275e-05, + "loss": 0.6277, + "step": 7983 + }, + { + "epoch": 0.24469780556577173, + "grad_norm": 1.3334692231016871, + "learning_rate": 1.767835098882512e-05, + "loss": 0.7543, + "step": 7984 + }, + { + "epoch": 0.24472845408851293, + "grad_norm": 0.7069971552654127, + "learning_rate": 1.7677715021400738e-05, + "loss": 0.6457, + "step": 7985 + }, + { + "epoch": 0.24475910261125414, + "grad_norm": 1.5087454549792225, + "learning_rate": 1.767707897832586e-05, + "loss": 0.7539, + "step": 7986 + }, + { + "epoch": 0.24478975113399534, + "grad_norm": 1.6495291242844807, + "learning_rate": 1.7676442859606762e-05, + "loss": 0.7898, + "step": 7987 + }, + { + "epoch": 0.24482039965673655, + "grad_norm": 1.494158109283764, + "learning_rate": 1.767580666524971e-05, + "loss": 0.7653, + "step": 7988 + }, + { + "epoch": 0.24485104817947775, + "grad_norm": 1.604762734238706, + "learning_rate": 1.7675170395260967e-05, + "loss": 0.8362, + "step": 7989 + }, + { + "epoch": 0.24488169670221896, + "grad_norm": 1.7737417195558058, + "learning_rate": 1.7674534049646808e-05, + "loss": 0.7855, + "step": 7990 + }, + { + "epoch": 0.24491234522496017, + "grad_norm": 1.5919669761459276, + "learning_rate": 1.7673897628413502e-05, + "loss": 0.7383, + "step": 7991 + }, + { + "epoch": 0.24494299374770137, + "grad_norm": 1.507625692989689, + "learning_rate": 1.767326113156732e-05, + "loss": 0.7987, + "step": 7992 + }, + { + "epoch": 0.24497364227044258, + "grad_norm": 1.4193769433404966, + "learning_rate": 1.767262455911453e-05, + "loss": 0.7199, + "step": 7993 + }, + { + "epoch": 0.24500429079318378, + "grad_norm": 1.6751464961692035, + "learning_rate": 1.767198791106141e-05, + "loss": 0.8026, + "step": 7994 + }, + { + "epoch": 0.24503493931592496, + "grad_norm": 1.314884869457315, + "learning_rate": 1.7671351187414226e-05, + "loss": 0.7764, + "step": 7995 + }, + { + "epoch": 0.24506558783866617, + "grad_norm": 1.3865275717865642, + "learning_rate": 1.767071438817926e-05, + "loss": 0.8131, + "step": 7996 + }, + { + "epoch": 0.24509623636140737, + "grad_norm": 1.6159880585125197, + "learning_rate": 1.767007751336278e-05, + "loss": 0.7559, + "step": 7997 + }, + { + "epoch": 0.24512688488414858, + "grad_norm": 1.7555244497673888, + "learning_rate": 1.7669440562971067e-05, + "loss": 0.8326, + "step": 7998 + }, + { + "epoch": 0.24515753340688978, + "grad_norm": 1.4841974529108886, + "learning_rate": 1.766880353701039e-05, + "loss": 0.7797, + "step": 7999 + }, + { + "epoch": 0.245188181929631, + "grad_norm": 1.4220495503872734, + "learning_rate": 1.7668166435487033e-05, + "loss": 0.7289, + "step": 8000 + }, + { + "epoch": 0.2452188304523722, + "grad_norm": 1.5895977120084308, + "learning_rate": 1.7667529258407268e-05, + "loss": 0.8196, + "step": 8001 + }, + { + "epoch": 0.2452494789751134, + "grad_norm": 1.5553981716029466, + "learning_rate": 1.7666892005777378e-05, + "loss": 0.7184, + "step": 8002 + }, + { + "epoch": 0.2452801274978546, + "grad_norm": 1.4901132301794833, + "learning_rate": 1.7666254677603635e-05, + "loss": 0.8559, + "step": 8003 + }, + { + "epoch": 0.2453107760205958, + "grad_norm": 1.5347787600768088, + "learning_rate": 1.7665617273892324e-05, + "loss": 0.7239, + "step": 8004 + }, + { + "epoch": 0.24534142454333702, + "grad_norm": 1.383430799563161, + "learning_rate": 1.7664979794649726e-05, + "loss": 0.7298, + "step": 8005 + }, + { + "epoch": 0.24537207306607822, + "grad_norm": 1.5608934774493108, + "learning_rate": 1.766434223988212e-05, + "loss": 0.8723, + "step": 8006 + }, + { + "epoch": 0.24540272158881943, + "grad_norm": 1.3291870355473956, + "learning_rate": 1.766370460959579e-05, + "loss": 0.5992, + "step": 8007 + }, + { + "epoch": 0.24543337011156063, + "grad_norm": 1.4553656338895302, + "learning_rate": 1.7663066903797017e-05, + "loss": 0.8065, + "step": 8008 + }, + { + "epoch": 0.24546401863430184, + "grad_norm": 1.528760209822787, + "learning_rate": 1.766242912249209e-05, + "loss": 0.7819, + "step": 8009 + }, + { + "epoch": 0.24549466715704302, + "grad_norm": 1.6001943446819429, + "learning_rate": 1.7661791265687283e-05, + "loss": 0.7691, + "step": 8010 + }, + { + "epoch": 0.24552531567978422, + "grad_norm": 1.27948507073434, + "learning_rate": 1.7661153333388886e-05, + "loss": 0.7195, + "step": 8011 + }, + { + "epoch": 0.24555596420252543, + "grad_norm": 1.5144873286284213, + "learning_rate": 1.7660515325603188e-05, + "loss": 0.7653, + "step": 8012 + }, + { + "epoch": 0.24558661272526663, + "grad_norm": 1.7512187442778697, + "learning_rate": 1.765987724233647e-05, + "loss": 0.8476, + "step": 8013 + }, + { + "epoch": 0.24561726124800784, + "grad_norm": 0.8339713376476562, + "learning_rate": 1.7659239083595022e-05, + "loss": 0.6506, + "step": 8014 + }, + { + "epoch": 0.24564790977074905, + "grad_norm": 1.4220484039392791, + "learning_rate": 1.765860084938513e-05, + "loss": 0.8082, + "step": 8015 + }, + { + "epoch": 0.24567855829349025, + "grad_norm": 1.3492996275373084, + "learning_rate": 1.7657962539713086e-05, + "loss": 0.7301, + "step": 8016 + }, + { + "epoch": 0.24570920681623146, + "grad_norm": 1.4282975047265998, + "learning_rate": 1.7657324154585177e-05, + "loss": 0.7914, + "step": 8017 + }, + { + "epoch": 0.24573985533897266, + "grad_norm": 1.5099882618177352, + "learning_rate": 1.7656685694007696e-05, + "loss": 0.8901, + "step": 8018 + }, + { + "epoch": 0.24577050386171387, + "grad_norm": 1.4401877832700232, + "learning_rate": 1.7656047157986932e-05, + "loss": 0.7854, + "step": 8019 + }, + { + "epoch": 0.24580115238445507, + "grad_norm": 1.5480745837546128, + "learning_rate": 1.7655408546529177e-05, + "loss": 0.756, + "step": 8020 + }, + { + "epoch": 0.24583180090719628, + "grad_norm": 1.4573620914553502, + "learning_rate": 1.765476985964072e-05, + "loss": 0.7193, + "step": 8021 + }, + { + "epoch": 0.24586244942993749, + "grad_norm": 1.4564108678351284, + "learning_rate": 1.765413109732786e-05, + "loss": 0.8023, + "step": 8022 + }, + { + "epoch": 0.2458930979526787, + "grad_norm": 1.350399657812643, + "learning_rate": 1.7653492259596883e-05, + "loss": 0.7373, + "step": 8023 + }, + { + "epoch": 0.2459237464754199, + "grad_norm": 1.526613231199581, + "learning_rate": 1.7652853346454093e-05, + "loss": 0.798, + "step": 8024 + }, + { + "epoch": 0.2459543949981611, + "grad_norm": 1.3709172618888656, + "learning_rate": 1.7652214357905778e-05, + "loss": 0.7921, + "step": 8025 + }, + { + "epoch": 0.24598504352090228, + "grad_norm": 1.5251033319394578, + "learning_rate": 1.7651575293958238e-05, + "loss": 0.7917, + "step": 8026 + }, + { + "epoch": 0.24601569204364349, + "grad_norm": 1.457439925773553, + "learning_rate": 1.765093615461777e-05, + "loss": 0.8472, + "step": 8027 + }, + { + "epoch": 0.2460463405663847, + "grad_norm": 0.7547980508509945, + "learning_rate": 1.765029693989067e-05, + "loss": 0.626, + "step": 8028 + }, + { + "epoch": 0.2460769890891259, + "grad_norm": 1.611187993984944, + "learning_rate": 1.7649657649783237e-05, + "loss": 0.8133, + "step": 8029 + }, + { + "epoch": 0.2461076376118671, + "grad_norm": 1.5631440902916522, + "learning_rate": 1.764901828430177e-05, + "loss": 0.7949, + "step": 8030 + }, + { + "epoch": 0.2461382861346083, + "grad_norm": 1.6552871541062628, + "learning_rate": 1.7648378843452568e-05, + "loss": 0.7724, + "step": 8031 + }, + { + "epoch": 0.24616893465734951, + "grad_norm": 1.4728211271045977, + "learning_rate": 1.7647739327241933e-05, + "loss": 0.8401, + "step": 8032 + }, + { + "epoch": 0.24619958318009072, + "grad_norm": 1.3694587949515313, + "learning_rate": 1.7647099735676165e-05, + "loss": 0.7841, + "step": 8033 + }, + { + "epoch": 0.24623023170283193, + "grad_norm": 1.3435156841428617, + "learning_rate": 1.7646460068761567e-05, + "loss": 0.688, + "step": 8034 + }, + { + "epoch": 0.24626088022557313, + "grad_norm": 1.5838621382341755, + "learning_rate": 1.7645820326504443e-05, + "loss": 0.7923, + "step": 8035 + }, + { + "epoch": 0.24629152874831434, + "grad_norm": 1.488207601001072, + "learning_rate": 1.7645180508911093e-05, + "loss": 0.9005, + "step": 8036 + }, + { + "epoch": 0.24632217727105554, + "grad_norm": 1.3334253482411393, + "learning_rate": 1.7644540615987824e-05, + "loss": 0.6567, + "step": 8037 + }, + { + "epoch": 0.24635282579379675, + "grad_norm": 1.5892602860095426, + "learning_rate": 1.764390064774094e-05, + "loss": 0.8525, + "step": 8038 + }, + { + "epoch": 0.24638347431653795, + "grad_norm": 1.4119451369491276, + "learning_rate": 1.7643260604176748e-05, + "loss": 0.7, + "step": 8039 + }, + { + "epoch": 0.24641412283927916, + "grad_norm": 1.3761194521848805, + "learning_rate": 1.764262048530155e-05, + "loss": 0.7339, + "step": 8040 + }, + { + "epoch": 0.24644477136202034, + "grad_norm": 1.4233551992967348, + "learning_rate": 1.764198029112166e-05, + "loss": 0.798, + "step": 8041 + }, + { + "epoch": 0.24647541988476154, + "grad_norm": 1.5334391681806245, + "learning_rate": 1.7641340021643385e-05, + "loss": 0.8079, + "step": 8042 + }, + { + "epoch": 0.24650606840750275, + "grad_norm": 1.546490155892837, + "learning_rate": 1.764069967687303e-05, + "loss": 0.8292, + "step": 8043 + }, + { + "epoch": 0.24653671693024395, + "grad_norm": 0.7580580764621587, + "learning_rate": 1.7640059256816905e-05, + "loss": 0.6297, + "step": 8044 + }, + { + "epoch": 0.24656736545298516, + "grad_norm": 1.6134432090480209, + "learning_rate": 1.7639418761481324e-05, + "loss": 0.8081, + "step": 8045 + }, + { + "epoch": 0.24659801397572637, + "grad_norm": 1.9319307594659214, + "learning_rate": 1.763877819087259e-05, + "loss": 0.8189, + "step": 8046 + }, + { + "epoch": 0.24662866249846757, + "grad_norm": 1.4698606613303529, + "learning_rate": 1.763813754499703e-05, + "loss": 0.7698, + "step": 8047 + }, + { + "epoch": 0.24665931102120878, + "grad_norm": 1.3396130680266862, + "learning_rate": 1.7637496823860935e-05, + "loss": 0.7687, + "step": 8048 + }, + { + "epoch": 0.24668995954394998, + "grad_norm": 1.6681458021676308, + "learning_rate": 1.7636856027470637e-05, + "loss": 0.844, + "step": 8049 + }, + { + "epoch": 0.2467206080666912, + "grad_norm": 1.439139955888945, + "learning_rate": 1.763621515583244e-05, + "loss": 0.8104, + "step": 8050 + }, + { + "epoch": 0.2467512565894324, + "grad_norm": 1.4797966766005481, + "learning_rate": 1.763557420895266e-05, + "loss": 0.8024, + "step": 8051 + }, + { + "epoch": 0.2467819051121736, + "grad_norm": 1.482614861111658, + "learning_rate": 1.7634933186837616e-05, + "loss": 0.6883, + "step": 8052 + }, + { + "epoch": 0.2468125536349148, + "grad_norm": 1.6867337589142828, + "learning_rate": 1.7634292089493618e-05, + "loss": 0.7741, + "step": 8053 + }, + { + "epoch": 0.246843202157656, + "grad_norm": 0.7535431295664876, + "learning_rate": 1.7633650916926993e-05, + "loss": 0.6292, + "step": 8054 + }, + { + "epoch": 0.24687385068039722, + "grad_norm": 1.5055176591560904, + "learning_rate": 1.7633009669144048e-05, + "loss": 0.8089, + "step": 8055 + }, + { + "epoch": 0.24690449920313842, + "grad_norm": 0.6935242732804554, + "learning_rate": 1.7632368346151107e-05, + "loss": 0.6527, + "step": 8056 + }, + { + "epoch": 0.2469351477258796, + "grad_norm": 1.5454122455482713, + "learning_rate": 1.7631726947954487e-05, + "loss": 0.7726, + "step": 8057 + }, + { + "epoch": 0.2469657962486208, + "grad_norm": 1.5680905610329543, + "learning_rate": 1.763108547456051e-05, + "loss": 0.9325, + "step": 8058 + }, + { + "epoch": 0.246996444771362, + "grad_norm": 1.5804650333176153, + "learning_rate": 1.7630443925975494e-05, + "loss": 0.8164, + "step": 8059 + }, + { + "epoch": 0.24702709329410322, + "grad_norm": 1.7370114374135006, + "learning_rate": 1.7629802302205764e-05, + "loss": 0.7908, + "step": 8060 + }, + { + "epoch": 0.24705774181684442, + "grad_norm": 1.4113854024398447, + "learning_rate": 1.7629160603257635e-05, + "loss": 0.7939, + "step": 8061 + }, + { + "epoch": 0.24708839033958563, + "grad_norm": 1.392907045604363, + "learning_rate": 1.762851882913744e-05, + "loss": 0.8204, + "step": 8062 + }, + { + "epoch": 0.24711903886232683, + "grad_norm": 0.692344686090162, + "learning_rate": 1.7627876979851493e-05, + "loss": 0.6359, + "step": 8063 + }, + { + "epoch": 0.24714968738506804, + "grad_norm": 1.5960249939378635, + "learning_rate": 1.7627235055406125e-05, + "loss": 0.7133, + "step": 8064 + }, + { + "epoch": 0.24718033590780925, + "grad_norm": 1.6361952100439134, + "learning_rate": 1.7626593055807656e-05, + "loss": 0.85, + "step": 8065 + }, + { + "epoch": 0.24721098443055045, + "grad_norm": 1.495939555528784, + "learning_rate": 1.7625950981062416e-05, + "loss": 0.7142, + "step": 8066 + }, + { + "epoch": 0.24724163295329166, + "grad_norm": 1.3591070489059107, + "learning_rate": 1.7625308831176732e-05, + "loss": 0.8048, + "step": 8067 + }, + { + "epoch": 0.24727228147603286, + "grad_norm": 1.3614797613735605, + "learning_rate": 1.7624666606156924e-05, + "loss": 0.6994, + "step": 8068 + }, + { + "epoch": 0.24730292999877407, + "grad_norm": 1.6449462554916696, + "learning_rate": 1.762402430600933e-05, + "loss": 0.8119, + "step": 8069 + }, + { + "epoch": 0.24733357852151527, + "grad_norm": 1.7057425239532324, + "learning_rate": 1.762338193074027e-05, + "loss": 0.9179, + "step": 8070 + }, + { + "epoch": 0.24736422704425648, + "grad_norm": 0.7523864542630652, + "learning_rate": 1.762273948035608e-05, + "loss": 0.656, + "step": 8071 + }, + { + "epoch": 0.24739487556699766, + "grad_norm": 0.7246652271851063, + "learning_rate": 1.7622096954863085e-05, + "loss": 0.6189, + "step": 8072 + }, + { + "epoch": 0.24742552408973886, + "grad_norm": 1.4909613544365057, + "learning_rate": 1.7621454354267622e-05, + "loss": 0.8842, + "step": 8073 + }, + { + "epoch": 0.24745617261248007, + "grad_norm": 1.6031720047211377, + "learning_rate": 1.7620811678576016e-05, + "loss": 0.7204, + "step": 8074 + }, + { + "epoch": 0.24748682113522127, + "grad_norm": 1.3550236007064356, + "learning_rate": 1.7620168927794605e-05, + "loss": 0.7435, + "step": 8075 + }, + { + "epoch": 0.24751746965796248, + "grad_norm": 1.5263599395598542, + "learning_rate": 1.761952610192972e-05, + "loss": 0.748, + "step": 8076 + }, + { + "epoch": 0.24754811818070369, + "grad_norm": 1.4460060219677435, + "learning_rate": 1.7618883200987693e-05, + "loss": 0.7714, + "step": 8077 + }, + { + "epoch": 0.2475787667034449, + "grad_norm": 0.7946287753053012, + "learning_rate": 1.761824022497486e-05, + "loss": 0.6496, + "step": 8078 + }, + { + "epoch": 0.2476094152261861, + "grad_norm": 1.4767483995841204, + "learning_rate": 1.761759717389756e-05, + "loss": 0.7006, + "step": 8079 + }, + { + "epoch": 0.2476400637489273, + "grad_norm": 1.417100527916903, + "learning_rate": 1.7616954047762123e-05, + "loss": 0.7397, + "step": 8080 + }, + { + "epoch": 0.2476707122716685, + "grad_norm": 0.6832345802421688, + "learning_rate": 1.761631084657489e-05, + "loss": 0.6211, + "step": 8081 + }, + { + "epoch": 0.24770136079440971, + "grad_norm": 1.6387062059137047, + "learning_rate": 1.7615667570342196e-05, + "loss": 0.8469, + "step": 8082 + }, + { + "epoch": 0.24773200931715092, + "grad_norm": 0.6937928473321646, + "learning_rate": 1.7615024219070383e-05, + "loss": 0.6052, + "step": 8083 + }, + { + "epoch": 0.24776265783989213, + "grad_norm": 1.4331549720639936, + "learning_rate": 1.7614380792765786e-05, + "loss": 0.7305, + "step": 8084 + }, + { + "epoch": 0.24779330636263333, + "grad_norm": 1.7770486990428267, + "learning_rate": 1.761373729143475e-05, + "loss": 0.7418, + "step": 8085 + }, + { + "epoch": 0.24782395488537454, + "grad_norm": 1.511388626506577, + "learning_rate": 1.7613093715083608e-05, + "loss": 0.8018, + "step": 8086 + }, + { + "epoch": 0.24785460340811574, + "grad_norm": 1.5425860445471817, + "learning_rate": 1.761245006371871e-05, + "loss": 0.8253, + "step": 8087 + }, + { + "epoch": 0.24788525193085692, + "grad_norm": 1.4353711756282401, + "learning_rate": 1.761180633734639e-05, + "loss": 0.806, + "step": 8088 + }, + { + "epoch": 0.24791590045359813, + "grad_norm": 1.3644200945584628, + "learning_rate": 1.7611162535972997e-05, + "loss": 0.7753, + "step": 8089 + }, + { + "epoch": 0.24794654897633933, + "grad_norm": 1.359432594568296, + "learning_rate": 1.761051865960487e-05, + "loss": 0.7431, + "step": 8090 + }, + { + "epoch": 0.24797719749908054, + "grad_norm": 1.3624799389658269, + "learning_rate": 1.760987470824836e-05, + "loss": 0.7779, + "step": 8091 + }, + { + "epoch": 0.24800784602182174, + "grad_norm": 1.6104877229715515, + "learning_rate": 1.7609230681909803e-05, + "loss": 0.8594, + "step": 8092 + }, + { + "epoch": 0.24803849454456295, + "grad_norm": 1.4204769070049288, + "learning_rate": 1.7608586580595553e-05, + "loss": 0.7787, + "step": 8093 + }, + { + "epoch": 0.24806914306730415, + "grad_norm": 1.3886872440886746, + "learning_rate": 1.760794240431195e-05, + "loss": 0.7299, + "step": 8094 + }, + { + "epoch": 0.24809979159004536, + "grad_norm": 1.4927269253756048, + "learning_rate": 1.7607298153065343e-05, + "loss": 0.8417, + "step": 8095 + }, + { + "epoch": 0.24813044011278657, + "grad_norm": 1.3800967533663868, + "learning_rate": 1.760665382686208e-05, + "loss": 0.7699, + "step": 8096 + }, + { + "epoch": 0.24816108863552777, + "grad_norm": 1.546212968729616, + "learning_rate": 1.7606009425708515e-05, + "loss": 0.723, + "step": 8097 + }, + { + "epoch": 0.24819173715826898, + "grad_norm": 1.5391567701797997, + "learning_rate": 1.760536494961099e-05, + "loss": 0.7991, + "step": 8098 + }, + { + "epoch": 0.24822238568101018, + "grad_norm": 1.4185888680809147, + "learning_rate": 1.760472039857586e-05, + "loss": 0.8389, + "step": 8099 + }, + { + "epoch": 0.2482530342037514, + "grad_norm": 1.5795537289929666, + "learning_rate": 1.7604075772609473e-05, + "loss": 0.6987, + "step": 8100 + }, + { + "epoch": 0.2482836827264926, + "grad_norm": 1.3854127988800797, + "learning_rate": 1.760343107171818e-05, + "loss": 0.7536, + "step": 8101 + }, + { + "epoch": 0.2483143312492338, + "grad_norm": 1.457989956053934, + "learning_rate": 1.760278629590834e-05, + "loss": 0.833, + "step": 8102 + }, + { + "epoch": 0.24834497977197498, + "grad_norm": 1.3152481979517074, + "learning_rate": 1.7602141445186295e-05, + "loss": 0.8289, + "step": 8103 + }, + { + "epoch": 0.24837562829471618, + "grad_norm": 1.3887844723629017, + "learning_rate": 1.7601496519558412e-05, + "loss": 0.7264, + "step": 8104 + }, + { + "epoch": 0.2484062768174574, + "grad_norm": 1.4900173440561086, + "learning_rate": 1.7600851519031035e-05, + "loss": 0.8481, + "step": 8105 + }, + { + "epoch": 0.2484369253401986, + "grad_norm": 1.4441507999648822, + "learning_rate": 1.7600206443610522e-05, + "loss": 0.7653, + "step": 8106 + }, + { + "epoch": 0.2484675738629398, + "grad_norm": 0.8216464826829212, + "learning_rate": 1.7599561293303234e-05, + "loss": 0.6094, + "step": 8107 + }, + { + "epoch": 0.248498222385681, + "grad_norm": 1.689522803393604, + "learning_rate": 1.7598916068115522e-05, + "loss": 0.8787, + "step": 8108 + }, + { + "epoch": 0.2485288709084222, + "grad_norm": 1.6174494847522995, + "learning_rate": 1.7598270768053747e-05, + "loss": 0.7907, + "step": 8109 + }, + { + "epoch": 0.24855951943116342, + "grad_norm": 1.3102581855809694, + "learning_rate": 1.7597625393124265e-05, + "loss": 0.763, + "step": 8110 + }, + { + "epoch": 0.24859016795390462, + "grad_norm": 1.4775897870509838, + "learning_rate": 1.7596979943333435e-05, + "loss": 0.7978, + "step": 8111 + }, + { + "epoch": 0.24862081647664583, + "grad_norm": 1.6111401176690718, + "learning_rate": 1.7596334418687623e-05, + "loss": 0.8025, + "step": 8112 + }, + { + "epoch": 0.24865146499938703, + "grad_norm": 1.5678285683664335, + "learning_rate": 1.759568881919318e-05, + "loss": 0.7393, + "step": 8113 + }, + { + "epoch": 0.24868211352212824, + "grad_norm": 1.5488063372335052, + "learning_rate": 1.759504314485647e-05, + "loss": 0.7889, + "step": 8114 + }, + { + "epoch": 0.24871276204486945, + "grad_norm": 1.3752283199171595, + "learning_rate": 1.759439739568386e-05, + "loss": 0.7315, + "step": 8115 + }, + { + "epoch": 0.24874341056761065, + "grad_norm": 1.3370215370377654, + "learning_rate": 1.7593751571681706e-05, + "loss": 0.7488, + "step": 8116 + }, + { + "epoch": 0.24877405909035186, + "grad_norm": 1.4364863884921195, + "learning_rate": 1.7593105672856376e-05, + "loss": 0.7173, + "step": 8117 + }, + { + "epoch": 0.24880470761309306, + "grad_norm": 1.321747998074865, + "learning_rate": 1.7592459699214232e-05, + "loss": 0.7551, + "step": 8118 + }, + { + "epoch": 0.24883535613583424, + "grad_norm": 1.4153750650586263, + "learning_rate": 1.7591813650761643e-05, + "loss": 0.6819, + "step": 8119 + }, + { + "epoch": 0.24886600465857545, + "grad_norm": 1.4731422348529246, + "learning_rate": 1.7591167527504972e-05, + "loss": 0.7997, + "step": 8120 + }, + { + "epoch": 0.24889665318131665, + "grad_norm": 1.4224270344647012, + "learning_rate": 1.7590521329450583e-05, + "loss": 0.7357, + "step": 8121 + }, + { + "epoch": 0.24892730170405786, + "grad_norm": 1.4507826811660105, + "learning_rate": 1.7589875056604844e-05, + "loss": 0.7655, + "step": 8122 + }, + { + "epoch": 0.24895795022679906, + "grad_norm": 1.4681090833618295, + "learning_rate": 1.7589228708974126e-05, + "loss": 0.7128, + "step": 8123 + }, + { + "epoch": 0.24898859874954027, + "grad_norm": 1.6991514550963829, + "learning_rate": 1.7588582286564796e-05, + "loss": 0.7392, + "step": 8124 + }, + { + "epoch": 0.24901924727228147, + "grad_norm": 1.5819034552810984, + "learning_rate": 1.7587935789383225e-05, + "loss": 0.7767, + "step": 8125 + }, + { + "epoch": 0.24904989579502268, + "grad_norm": 1.438293875800617, + "learning_rate": 1.7587289217435777e-05, + "loss": 0.6989, + "step": 8126 + }, + { + "epoch": 0.24908054431776389, + "grad_norm": 1.3931173894848192, + "learning_rate": 1.758664257072883e-05, + "loss": 0.7585, + "step": 8127 + }, + { + "epoch": 0.2491111928405051, + "grad_norm": 1.5030434360484153, + "learning_rate": 1.7585995849268752e-05, + "loss": 0.7029, + "step": 8128 + }, + { + "epoch": 0.2491418413632463, + "grad_norm": 1.3735979793583684, + "learning_rate": 1.7585349053061915e-05, + "loss": 0.7042, + "step": 8129 + }, + { + "epoch": 0.2491724898859875, + "grad_norm": 1.751344886368114, + "learning_rate": 1.7584702182114696e-05, + "loss": 0.9854, + "step": 8130 + }, + { + "epoch": 0.2492031384087287, + "grad_norm": 1.4620032059649057, + "learning_rate": 1.7584055236433462e-05, + "loss": 0.7388, + "step": 8131 + }, + { + "epoch": 0.24923378693146991, + "grad_norm": 1.460277447091621, + "learning_rate": 1.758340821602459e-05, + "loss": 0.8211, + "step": 8132 + }, + { + "epoch": 0.24926443545421112, + "grad_norm": 1.5587943742684627, + "learning_rate": 1.7582761120894462e-05, + "loss": 0.8312, + "step": 8133 + }, + { + "epoch": 0.2492950839769523, + "grad_norm": 1.6464885836252243, + "learning_rate": 1.7582113951049445e-05, + "loss": 0.8423, + "step": 8134 + }, + { + "epoch": 0.2493257324996935, + "grad_norm": 1.475525376898092, + "learning_rate": 1.758146670649592e-05, + "loss": 0.7932, + "step": 8135 + }, + { + "epoch": 0.2493563810224347, + "grad_norm": 0.7932461073132725, + "learning_rate": 1.7580819387240263e-05, + "loss": 0.6291, + "step": 8136 + }, + { + "epoch": 0.24938702954517591, + "grad_norm": 1.346865712456308, + "learning_rate": 1.758017199328885e-05, + "loss": 0.7801, + "step": 8137 + }, + { + "epoch": 0.24941767806791712, + "grad_norm": 1.5282457454330394, + "learning_rate": 1.757952452464807e-05, + "loss": 0.7999, + "step": 8138 + }, + { + "epoch": 0.24944832659065833, + "grad_norm": 1.6620525170241567, + "learning_rate": 1.757887698132429e-05, + "loss": 0.7546, + "step": 8139 + }, + { + "epoch": 0.24947897511339953, + "grad_norm": 0.7274990121384428, + "learning_rate": 1.75782293633239e-05, + "loss": 0.6141, + "step": 8140 + }, + { + "epoch": 0.24950962363614074, + "grad_norm": 0.6679866193886178, + "learning_rate": 1.7577581670653275e-05, + "loss": 0.6197, + "step": 8141 + }, + { + "epoch": 0.24954027215888194, + "grad_norm": 1.4608526301364089, + "learning_rate": 1.75769339033188e-05, + "loss": 0.7476, + "step": 8142 + }, + { + "epoch": 0.24957092068162315, + "grad_norm": 1.4575319005079157, + "learning_rate": 1.7576286061326854e-05, + "loss": 0.8021, + "step": 8143 + }, + { + "epoch": 0.24960156920436435, + "grad_norm": 1.5357980601688566, + "learning_rate": 1.7575638144683828e-05, + "loss": 0.7175, + "step": 8144 + }, + { + "epoch": 0.24963221772710556, + "grad_norm": 1.5758681190623587, + "learning_rate": 1.7574990153396098e-05, + "loss": 0.756, + "step": 8145 + }, + { + "epoch": 0.24966286624984677, + "grad_norm": 0.7779367876027218, + "learning_rate": 1.757434208747005e-05, + "loss": 0.6244, + "step": 8146 + }, + { + "epoch": 0.24969351477258797, + "grad_norm": 1.6226191192304222, + "learning_rate": 1.7573693946912072e-05, + "loss": 0.8352, + "step": 8147 + }, + { + "epoch": 0.24972416329532918, + "grad_norm": 1.7469449422857974, + "learning_rate": 1.757304573172855e-05, + "loss": 0.8865, + "step": 8148 + }, + { + "epoch": 0.24975481181807038, + "grad_norm": 1.591112143277451, + "learning_rate": 1.7572397441925874e-05, + "loss": 0.8626, + "step": 8149 + }, + { + "epoch": 0.24978546034081156, + "grad_norm": 1.5716788950775575, + "learning_rate": 1.7571749077510424e-05, + "loss": 0.8499, + "step": 8150 + }, + { + "epoch": 0.24981610886355277, + "grad_norm": 1.35210923493557, + "learning_rate": 1.7571100638488596e-05, + "loss": 0.713, + "step": 8151 + }, + { + "epoch": 0.24984675738629397, + "grad_norm": 1.677396932316632, + "learning_rate": 1.7570452124866774e-05, + "loss": 0.7562, + "step": 8152 + }, + { + "epoch": 0.24987740590903518, + "grad_norm": 1.4962247385315695, + "learning_rate": 1.756980353665135e-05, + "loss": 0.8605, + "step": 8153 + }, + { + "epoch": 0.24990805443177638, + "grad_norm": 0.715522626804052, + "learning_rate": 1.7569154873848718e-05, + "loss": 0.6028, + "step": 8154 + }, + { + "epoch": 0.2499387029545176, + "grad_norm": 1.6448785939978239, + "learning_rate": 1.7568506136465267e-05, + "loss": 0.8606, + "step": 8155 + }, + { + "epoch": 0.2499693514772588, + "grad_norm": 1.5455943610860692, + "learning_rate": 1.7567857324507386e-05, + "loss": 0.8191, + "step": 8156 + }, + { + "epoch": 0.25, + "grad_norm": 1.5452749485187538, + "learning_rate": 1.756720843798147e-05, + "loss": 0.7829, + "step": 8157 + }, + { + "epoch": 0.2500306485227412, + "grad_norm": 1.4962266889910283, + "learning_rate": 1.7566559476893915e-05, + "loss": 0.7478, + "step": 8158 + }, + { + "epoch": 0.2500612970454824, + "grad_norm": 1.5390612313945762, + "learning_rate": 1.7565910441251112e-05, + "loss": 0.8103, + "step": 8159 + }, + { + "epoch": 0.2500919455682236, + "grad_norm": 1.4991241107103335, + "learning_rate": 1.756526133105946e-05, + "loss": 0.8097, + "step": 8160 + }, + { + "epoch": 0.2501225940909648, + "grad_norm": 1.4874577012428492, + "learning_rate": 1.756461214632535e-05, + "loss": 0.7268, + "step": 8161 + }, + { + "epoch": 0.25015324261370603, + "grad_norm": 1.628790834368951, + "learning_rate": 1.756396288705518e-05, + "loss": 0.7592, + "step": 8162 + }, + { + "epoch": 0.25018389113644723, + "grad_norm": 1.5007920310003018, + "learning_rate": 1.756331355325535e-05, + "loss": 0.8354, + "step": 8163 + }, + { + "epoch": 0.25021453965918844, + "grad_norm": 1.7252765118881452, + "learning_rate": 1.756266414493226e-05, + "loss": 0.9446, + "step": 8164 + }, + { + "epoch": 0.25024518818192965, + "grad_norm": 1.4823612482168718, + "learning_rate": 1.75620146620923e-05, + "loss": 0.6837, + "step": 8165 + }, + { + "epoch": 0.25027583670467085, + "grad_norm": 1.3966434075404108, + "learning_rate": 1.7561365104741874e-05, + "loss": 0.8385, + "step": 8166 + }, + { + "epoch": 0.25030648522741206, + "grad_norm": 0.7228079774409744, + "learning_rate": 1.7560715472887385e-05, + "loss": 0.6487, + "step": 8167 + }, + { + "epoch": 0.25033713375015326, + "grad_norm": 1.5721109433711338, + "learning_rate": 1.7560065766535235e-05, + "loss": 0.8313, + "step": 8168 + }, + { + "epoch": 0.25036778227289447, + "grad_norm": 0.6907370008553917, + "learning_rate": 1.7559415985691818e-05, + "loss": 0.6241, + "step": 8169 + }, + { + "epoch": 0.2503984307956357, + "grad_norm": 0.6826605570498105, + "learning_rate": 1.7558766130363543e-05, + "loss": 0.6434, + "step": 8170 + }, + { + "epoch": 0.2504290793183769, + "grad_norm": 1.269457923055359, + "learning_rate": 1.7558116200556813e-05, + "loss": 0.7159, + "step": 8171 + }, + { + "epoch": 0.2504597278411181, + "grad_norm": 1.486674212754952, + "learning_rate": 1.7557466196278028e-05, + "loss": 0.8699, + "step": 8172 + }, + { + "epoch": 0.25049037636385924, + "grad_norm": 1.400485684204202, + "learning_rate": 1.7556816117533592e-05, + "loss": 0.7381, + "step": 8173 + }, + { + "epoch": 0.25052102488660044, + "grad_norm": 0.7107110948028547, + "learning_rate": 1.7556165964329918e-05, + "loss": 0.6147, + "step": 8174 + }, + { + "epoch": 0.25055167340934165, + "grad_norm": 1.5214420341489605, + "learning_rate": 1.7555515736673407e-05, + "loss": 0.868, + "step": 8175 + }, + { + "epoch": 0.25058232193208285, + "grad_norm": 1.5232065243293342, + "learning_rate": 1.7554865434570465e-05, + "loss": 0.7497, + "step": 8176 + }, + { + "epoch": 0.25061297045482406, + "grad_norm": 1.6532488129280327, + "learning_rate": 1.7554215058027503e-05, + "loss": 0.8623, + "step": 8177 + }, + { + "epoch": 0.25064361897756526, + "grad_norm": 1.4610669913226784, + "learning_rate": 1.7553564607050924e-05, + "loss": 0.7386, + "step": 8178 + }, + { + "epoch": 0.25067426750030647, + "grad_norm": 1.4905287605479167, + "learning_rate": 1.7552914081647143e-05, + "loss": 0.7791, + "step": 8179 + }, + { + "epoch": 0.2507049160230477, + "grad_norm": 1.6759571802168602, + "learning_rate": 1.7552263481822566e-05, + "loss": 0.846, + "step": 8180 + }, + { + "epoch": 0.2507355645457889, + "grad_norm": 1.4138728643079508, + "learning_rate": 1.7551612807583603e-05, + "loss": 0.749, + "step": 8181 + }, + { + "epoch": 0.2507662130685301, + "grad_norm": 0.6948600842565509, + "learning_rate": 1.755096205893667e-05, + "loss": 0.6157, + "step": 8182 + }, + { + "epoch": 0.2507968615912713, + "grad_norm": 1.5205787614947768, + "learning_rate": 1.7550311235888173e-05, + "loss": 0.7604, + "step": 8183 + }, + { + "epoch": 0.2508275101140125, + "grad_norm": 1.5891875260670227, + "learning_rate": 1.7549660338444526e-05, + "loss": 0.7818, + "step": 8184 + }, + { + "epoch": 0.2508581586367537, + "grad_norm": 1.5194308725022407, + "learning_rate": 1.7549009366612152e-05, + "loss": 0.7907, + "step": 8185 + }, + { + "epoch": 0.2508888071594949, + "grad_norm": 1.7585733937369863, + "learning_rate": 1.754835832039745e-05, + "loss": 0.8237, + "step": 8186 + }, + { + "epoch": 0.2509194556822361, + "grad_norm": 1.3116028995321698, + "learning_rate": 1.7547707199806843e-05, + "loss": 0.7806, + "step": 8187 + }, + { + "epoch": 0.2509501042049773, + "grad_norm": 0.7411644720775376, + "learning_rate": 1.7547056004846746e-05, + "loss": 0.6329, + "step": 8188 + }, + { + "epoch": 0.2509807527277185, + "grad_norm": 1.4009787101721844, + "learning_rate": 1.7546404735523577e-05, + "loss": 0.9088, + "step": 8189 + }, + { + "epoch": 0.25101140125045973, + "grad_norm": 1.3841039919525842, + "learning_rate": 1.754575339184375e-05, + "loss": 0.8761, + "step": 8190 + }, + { + "epoch": 0.25104204977320094, + "grad_norm": 1.5555913466731386, + "learning_rate": 1.7545101973813686e-05, + "loss": 0.7237, + "step": 8191 + }, + { + "epoch": 0.25107269829594214, + "grad_norm": 1.6868856189561285, + "learning_rate": 1.75444504814398e-05, + "loss": 0.7272, + "step": 8192 + }, + { + "epoch": 0.25110334681868335, + "grad_norm": 0.6929670860041501, + "learning_rate": 1.7543798914728512e-05, + "loss": 0.6143, + "step": 8193 + }, + { + "epoch": 0.25113399534142455, + "grad_norm": 1.5730991668900423, + "learning_rate": 1.7543147273686245e-05, + "loss": 0.8157, + "step": 8194 + }, + { + "epoch": 0.25116464386416576, + "grad_norm": 1.351760071370966, + "learning_rate": 1.7542495558319416e-05, + "loss": 0.8113, + "step": 8195 + }, + { + "epoch": 0.25119529238690697, + "grad_norm": 1.5910846412421897, + "learning_rate": 1.754184376863445e-05, + "loss": 0.8314, + "step": 8196 + }, + { + "epoch": 0.25122594090964817, + "grad_norm": 1.5754836324916142, + "learning_rate": 1.754119190463777e-05, + "loss": 0.8281, + "step": 8197 + }, + { + "epoch": 0.2512565894323894, + "grad_norm": 1.5547300220043734, + "learning_rate": 1.7540539966335792e-05, + "loss": 0.867, + "step": 8198 + }, + { + "epoch": 0.2512872379551306, + "grad_norm": 1.6248717224587068, + "learning_rate": 1.7539887953734947e-05, + "loss": 0.7444, + "step": 8199 + }, + { + "epoch": 0.2513178864778718, + "grad_norm": 1.568456249200246, + "learning_rate": 1.753923586684166e-05, + "loss": 0.7854, + "step": 8200 + }, + { + "epoch": 0.251348535000613, + "grad_norm": 1.6047898704240302, + "learning_rate": 1.7538583705662344e-05, + "loss": 0.859, + "step": 8201 + }, + { + "epoch": 0.2513791835233542, + "grad_norm": 0.7231068839717221, + "learning_rate": 1.753793147020344e-05, + "loss": 0.6049, + "step": 8202 + }, + { + "epoch": 0.2514098320460954, + "grad_norm": 0.7514522732500749, + "learning_rate": 1.753727916047137e-05, + "loss": 0.631, + "step": 8203 + }, + { + "epoch": 0.25144048056883656, + "grad_norm": 0.7254922335543287, + "learning_rate": 1.7536626776472557e-05, + "loss": 0.6579, + "step": 8204 + }, + { + "epoch": 0.25147112909157776, + "grad_norm": 1.7377067491660183, + "learning_rate": 1.7535974318213434e-05, + "loss": 0.8847, + "step": 8205 + }, + { + "epoch": 0.25150177761431897, + "grad_norm": 1.5584364277640985, + "learning_rate": 1.753532178570043e-05, + "loss": 0.8677, + "step": 8206 + }, + { + "epoch": 0.25153242613706017, + "grad_norm": 1.344982135589664, + "learning_rate": 1.7534669178939964e-05, + "loss": 0.6347, + "step": 8207 + }, + { + "epoch": 0.2515630746598014, + "grad_norm": 1.2727308029538809, + "learning_rate": 1.753401649793848e-05, + "loss": 0.7381, + "step": 8208 + }, + { + "epoch": 0.2515937231825426, + "grad_norm": 1.5376804119374379, + "learning_rate": 1.7533363742702404e-05, + "loss": 0.813, + "step": 8209 + }, + { + "epoch": 0.2516243717052838, + "grad_norm": 1.3754902003281124, + "learning_rate": 1.753271091323817e-05, + "loss": 0.8066, + "step": 8210 + }, + { + "epoch": 0.251655020228025, + "grad_norm": 1.5941136082224283, + "learning_rate": 1.7532058009552204e-05, + "loss": 0.8191, + "step": 8211 + }, + { + "epoch": 0.2516856687507662, + "grad_norm": 1.6825680857200211, + "learning_rate": 1.7531405031650945e-05, + "loss": 0.7111, + "step": 8212 + }, + { + "epoch": 0.2517163172735074, + "grad_norm": 1.5564284856397606, + "learning_rate": 1.7530751979540824e-05, + "loss": 0.7414, + "step": 8213 + }, + { + "epoch": 0.2517469657962486, + "grad_norm": 0.9356444954309006, + "learning_rate": 1.7530098853228276e-05, + "loss": 0.6208, + "step": 8214 + }, + { + "epoch": 0.2517776143189898, + "grad_norm": 1.744221962829095, + "learning_rate": 1.7529445652719742e-05, + "loss": 0.7474, + "step": 8215 + }, + { + "epoch": 0.251808262841731, + "grad_norm": 1.6622796098989039, + "learning_rate": 1.752879237802165e-05, + "loss": 0.8165, + "step": 8216 + }, + { + "epoch": 0.25183891136447223, + "grad_norm": 1.63774724542641, + "learning_rate": 1.752813902914044e-05, + "loss": 0.8972, + "step": 8217 + }, + { + "epoch": 0.25186955988721343, + "grad_norm": 1.3647768800523505, + "learning_rate": 1.752748560608255e-05, + "loss": 0.7722, + "step": 8218 + }, + { + "epoch": 0.25190020840995464, + "grad_norm": 1.423874711071805, + "learning_rate": 1.752683210885442e-05, + "loss": 0.7573, + "step": 8219 + }, + { + "epoch": 0.25193085693269585, + "grad_norm": 1.4570718256251098, + "learning_rate": 1.7526178537462488e-05, + "loss": 0.8478, + "step": 8220 + }, + { + "epoch": 0.25196150545543705, + "grad_norm": 1.447242319641658, + "learning_rate": 1.752552489191319e-05, + "loss": 0.7619, + "step": 8221 + }, + { + "epoch": 0.25199215397817826, + "grad_norm": 1.4558916970948665, + "learning_rate": 1.7524871172212972e-05, + "loss": 0.7758, + "step": 8222 + }, + { + "epoch": 0.25202280250091946, + "grad_norm": 1.4037975052381588, + "learning_rate": 1.7524217378368273e-05, + "loss": 0.7844, + "step": 8223 + }, + { + "epoch": 0.25205345102366067, + "grad_norm": 1.3982012020637653, + "learning_rate": 1.7523563510385535e-05, + "loss": 0.6591, + "step": 8224 + }, + { + "epoch": 0.2520840995464019, + "grad_norm": 1.4094490949800702, + "learning_rate": 1.75229095682712e-05, + "loss": 0.7703, + "step": 8225 + }, + { + "epoch": 0.2521147480691431, + "grad_norm": 1.4622723781747742, + "learning_rate": 1.7522255552031714e-05, + "loss": 0.7621, + "step": 8226 + }, + { + "epoch": 0.2521453965918843, + "grad_norm": 1.474908154749445, + "learning_rate": 1.7521601461673517e-05, + "loss": 0.7358, + "step": 8227 + }, + { + "epoch": 0.2521760451146255, + "grad_norm": 0.7349610868102527, + "learning_rate": 1.7520947297203057e-05, + "loss": 0.63, + "step": 8228 + }, + { + "epoch": 0.2522066936373667, + "grad_norm": 1.527233589694923, + "learning_rate": 1.752029305862678e-05, + "loss": 0.7606, + "step": 8229 + }, + { + "epoch": 0.2522373421601079, + "grad_norm": 0.7300493220855269, + "learning_rate": 1.751963874595113e-05, + "loss": 0.645, + "step": 8230 + }, + { + "epoch": 0.2522679906828491, + "grad_norm": 1.5219412093876736, + "learning_rate": 1.7518984359182555e-05, + "loss": 0.8441, + "step": 8231 + }, + { + "epoch": 0.2522986392055903, + "grad_norm": 1.550165134649922, + "learning_rate": 1.7518329898327505e-05, + "loss": 0.8267, + "step": 8232 + }, + { + "epoch": 0.2523292877283315, + "grad_norm": 1.4238128956670602, + "learning_rate": 1.7517675363392427e-05, + "loss": 0.7979, + "step": 8233 + }, + { + "epoch": 0.2523599362510727, + "grad_norm": 1.5432342242741701, + "learning_rate": 1.751702075438377e-05, + "loss": 0.7529, + "step": 8234 + }, + { + "epoch": 0.2523905847738139, + "grad_norm": 1.4454514225661503, + "learning_rate": 1.751636607130798e-05, + "loss": 0.7805, + "step": 8235 + }, + { + "epoch": 0.2524212332965551, + "grad_norm": 1.454211424771513, + "learning_rate": 1.7515711314171516e-05, + "loss": 0.8085, + "step": 8236 + }, + { + "epoch": 0.2524518818192963, + "grad_norm": 1.3874980422165122, + "learning_rate": 1.7515056482980827e-05, + "loss": 0.7472, + "step": 8237 + }, + { + "epoch": 0.2524825303420375, + "grad_norm": 1.5390365193040854, + "learning_rate": 1.751440157774236e-05, + "loss": 0.7878, + "step": 8238 + }, + { + "epoch": 0.2525131788647787, + "grad_norm": 1.370688457325573, + "learning_rate": 1.7513746598462574e-05, + "loss": 0.7476, + "step": 8239 + }, + { + "epoch": 0.2525438273875199, + "grad_norm": 1.5312618963727778, + "learning_rate": 1.7513091545147924e-05, + "loss": 0.9321, + "step": 8240 + }, + { + "epoch": 0.2525744759102611, + "grad_norm": 1.5304249043686158, + "learning_rate": 1.7512436417804853e-05, + "loss": 0.7575, + "step": 8241 + }, + { + "epoch": 0.2526051244330023, + "grad_norm": 1.3948340995426753, + "learning_rate": 1.7511781216439827e-05, + "loss": 0.8136, + "step": 8242 + }, + { + "epoch": 0.2526357729557435, + "grad_norm": 1.5136623272750542, + "learning_rate": 1.75111259410593e-05, + "loss": 0.7816, + "step": 8243 + }, + { + "epoch": 0.2526664214784847, + "grad_norm": 1.5640467440201227, + "learning_rate": 1.7510470591669724e-05, + "loss": 0.6549, + "step": 8244 + }, + { + "epoch": 0.25269707000122593, + "grad_norm": 1.5293086290489493, + "learning_rate": 1.7509815168277563e-05, + "loss": 0.7775, + "step": 8245 + }, + { + "epoch": 0.25272771852396714, + "grad_norm": 1.5406461271335985, + "learning_rate": 1.7509159670889267e-05, + "loss": 0.7192, + "step": 8246 + }, + { + "epoch": 0.25275836704670834, + "grad_norm": 1.487024213996429, + "learning_rate": 1.750850409951131e-05, + "loss": 0.6704, + "step": 8247 + }, + { + "epoch": 0.25278901556944955, + "grad_norm": 1.3698903399833409, + "learning_rate": 1.7507848454150128e-05, + "loss": 0.7997, + "step": 8248 + }, + { + "epoch": 0.25281966409219075, + "grad_norm": 1.6077573039752147, + "learning_rate": 1.75071927348122e-05, + "loss": 0.7629, + "step": 8249 + }, + { + "epoch": 0.25285031261493196, + "grad_norm": 1.4164991041211663, + "learning_rate": 1.7506536941503983e-05, + "loss": 0.7376, + "step": 8250 + }, + { + "epoch": 0.25288096113767317, + "grad_norm": 1.4505431163120934, + "learning_rate": 1.7505881074231937e-05, + "loss": 0.7806, + "step": 8251 + }, + { + "epoch": 0.25291160966041437, + "grad_norm": 1.3853122646343035, + "learning_rate": 1.7505225133002518e-05, + "loss": 0.7126, + "step": 8252 + }, + { + "epoch": 0.2529422581831556, + "grad_norm": 1.4790157573106695, + "learning_rate": 1.7504569117822202e-05, + "loss": 0.8624, + "step": 8253 + }, + { + "epoch": 0.2529729067058968, + "grad_norm": 1.4609527329044836, + "learning_rate": 1.7503913028697445e-05, + "loss": 0.7675, + "step": 8254 + }, + { + "epoch": 0.253003555228638, + "grad_norm": 1.4325822393325145, + "learning_rate": 1.750325686563471e-05, + "loss": 0.8504, + "step": 8255 + }, + { + "epoch": 0.2530342037513792, + "grad_norm": 1.4678865470382956, + "learning_rate": 1.7502600628640468e-05, + "loss": 0.7383, + "step": 8256 + }, + { + "epoch": 0.2530648522741204, + "grad_norm": 1.456000920157543, + "learning_rate": 1.7501944317721184e-05, + "loss": 0.7932, + "step": 8257 + }, + { + "epoch": 0.2530955007968616, + "grad_norm": 1.504446212024032, + "learning_rate": 1.750128793288332e-05, + "loss": 0.7069, + "step": 8258 + }, + { + "epoch": 0.2531261493196028, + "grad_norm": 1.4125248473883754, + "learning_rate": 1.7500631474133348e-05, + "loss": 0.8149, + "step": 8259 + }, + { + "epoch": 0.253156797842344, + "grad_norm": 0.8952995857196163, + "learning_rate": 1.7499974941477735e-05, + "loss": 0.6679, + "step": 8260 + }, + { + "epoch": 0.2531874463650852, + "grad_norm": 1.4313267822735933, + "learning_rate": 1.749931833492295e-05, + "loss": 0.7429, + "step": 8261 + }, + { + "epoch": 0.25321809488782643, + "grad_norm": 1.803196772254488, + "learning_rate": 1.7498661654475462e-05, + "loss": 0.7633, + "step": 8262 + }, + { + "epoch": 0.25324874341056763, + "grad_norm": 0.6949478363789433, + "learning_rate": 1.7498004900141742e-05, + "loss": 0.6467, + "step": 8263 + }, + { + "epoch": 0.25327939193330884, + "grad_norm": 1.603427430676692, + "learning_rate": 1.7497348071928263e-05, + "loss": 0.8741, + "step": 8264 + }, + { + "epoch": 0.25331004045605005, + "grad_norm": 1.5257085598731563, + "learning_rate": 1.7496691169841497e-05, + "loss": 0.8087, + "step": 8265 + }, + { + "epoch": 0.2533406889787912, + "grad_norm": 1.339895751254026, + "learning_rate": 1.749603419388791e-05, + "loss": 0.8031, + "step": 8266 + }, + { + "epoch": 0.2533713375015324, + "grad_norm": 1.5651794939971966, + "learning_rate": 1.749537714407398e-05, + "loss": 0.8028, + "step": 8267 + }, + { + "epoch": 0.2534019860242736, + "grad_norm": 1.5103225152836444, + "learning_rate": 1.7494720020406184e-05, + "loss": 0.7906, + "step": 8268 + }, + { + "epoch": 0.2534326345470148, + "grad_norm": 1.5515267362556697, + "learning_rate": 1.7494062822890992e-05, + "loss": 0.796, + "step": 8269 + }, + { + "epoch": 0.253463283069756, + "grad_norm": 1.4646241908251003, + "learning_rate": 1.7493405551534883e-05, + "loss": 0.722, + "step": 8270 + }, + { + "epoch": 0.2534939315924972, + "grad_norm": 1.607415129375504, + "learning_rate": 1.749274820634433e-05, + "loss": 0.8104, + "step": 8271 + }, + { + "epoch": 0.25352458011523843, + "grad_norm": 0.832674041421667, + "learning_rate": 1.7492090787325816e-05, + "loss": 0.645, + "step": 8272 + }, + { + "epoch": 0.25355522863797963, + "grad_norm": 1.3871918261988343, + "learning_rate": 1.749143329448581e-05, + "loss": 0.7338, + "step": 8273 + }, + { + "epoch": 0.25358587716072084, + "grad_norm": 1.3419291614825946, + "learning_rate": 1.74907757278308e-05, + "loss": 0.6958, + "step": 8274 + }, + { + "epoch": 0.25361652568346205, + "grad_norm": 1.5027489991703478, + "learning_rate": 1.7490118087367257e-05, + "loss": 0.816, + "step": 8275 + }, + { + "epoch": 0.25364717420620325, + "grad_norm": 1.4722180754422778, + "learning_rate": 1.7489460373101662e-05, + "loss": 0.8209, + "step": 8276 + }, + { + "epoch": 0.25367782272894446, + "grad_norm": 1.512558982785787, + "learning_rate": 1.74888025850405e-05, + "loss": 0.763, + "step": 8277 + }, + { + "epoch": 0.25370847125168566, + "grad_norm": 1.666355716991812, + "learning_rate": 1.748814472319025e-05, + "loss": 0.8789, + "step": 8278 + }, + { + "epoch": 0.25373911977442687, + "grad_norm": 1.5837436005111694, + "learning_rate": 1.7487486787557394e-05, + "loss": 0.8174, + "step": 8279 + }, + { + "epoch": 0.2537697682971681, + "grad_norm": 1.4825246482926004, + "learning_rate": 1.7486828778148416e-05, + "loss": 0.7625, + "step": 8280 + }, + { + "epoch": 0.2538004168199093, + "grad_norm": 1.664312947062625, + "learning_rate": 1.7486170694969798e-05, + "loss": 0.7477, + "step": 8281 + }, + { + "epoch": 0.2538310653426505, + "grad_norm": 1.5484386427045764, + "learning_rate": 1.7485512538028023e-05, + "loss": 0.8581, + "step": 8282 + }, + { + "epoch": 0.2538617138653917, + "grad_norm": 1.4337145208198139, + "learning_rate": 1.748485430732958e-05, + "loss": 0.8796, + "step": 8283 + }, + { + "epoch": 0.2538923623881329, + "grad_norm": 1.5533758642453745, + "learning_rate": 1.748419600288095e-05, + "loss": 0.9383, + "step": 8284 + }, + { + "epoch": 0.2539230109108741, + "grad_norm": 1.7045828583935037, + "learning_rate": 1.7483537624688622e-05, + "loss": 0.9258, + "step": 8285 + }, + { + "epoch": 0.2539536594336153, + "grad_norm": 1.4995297979763427, + "learning_rate": 1.7482879172759086e-05, + "loss": 0.6782, + "step": 8286 + }, + { + "epoch": 0.2539843079563565, + "grad_norm": 1.5461433628839762, + "learning_rate": 1.748222064709882e-05, + "loss": 0.858, + "step": 8287 + }, + { + "epoch": 0.2540149564790977, + "grad_norm": 1.377201830602742, + "learning_rate": 1.7481562047714326e-05, + "loss": 0.6749, + "step": 8288 + }, + { + "epoch": 0.2540456050018389, + "grad_norm": 1.5074997817525218, + "learning_rate": 1.7480903374612087e-05, + "loss": 0.733, + "step": 8289 + }, + { + "epoch": 0.25407625352458013, + "grad_norm": 1.417282340519639, + "learning_rate": 1.748024462779859e-05, + "loss": 0.7668, + "step": 8290 + }, + { + "epoch": 0.25410690204732134, + "grad_norm": 1.4062110688908465, + "learning_rate": 1.7479585807280333e-05, + "loss": 0.7589, + "step": 8291 + }, + { + "epoch": 0.25413755057006254, + "grad_norm": 1.540862205276347, + "learning_rate": 1.74789269130638e-05, + "loss": 0.721, + "step": 8292 + }, + { + "epoch": 0.25416819909280375, + "grad_norm": 1.567338817990714, + "learning_rate": 1.7478267945155488e-05, + "loss": 0.8469, + "step": 8293 + }, + { + "epoch": 0.25419884761554495, + "grad_norm": 1.3335709966150118, + "learning_rate": 1.7477608903561885e-05, + "loss": 0.6912, + "step": 8294 + }, + { + "epoch": 0.25422949613828616, + "grad_norm": 1.4939597500544242, + "learning_rate": 1.7476949788289494e-05, + "loss": 0.7783, + "step": 8295 + }, + { + "epoch": 0.25426014466102737, + "grad_norm": 1.3893078811432968, + "learning_rate": 1.74762905993448e-05, + "loss": 0.6134, + "step": 8296 + }, + { + "epoch": 0.2542907931837685, + "grad_norm": 1.471184254654752, + "learning_rate": 1.7475631336734303e-05, + "loss": 0.7984, + "step": 8297 + }, + { + "epoch": 0.2543214417065097, + "grad_norm": 1.429979607925186, + "learning_rate": 1.7474972000464494e-05, + "loss": 0.7558, + "step": 8298 + }, + { + "epoch": 0.2543520902292509, + "grad_norm": 1.4945472514763976, + "learning_rate": 1.7474312590541876e-05, + "loss": 0.6806, + "step": 8299 + }, + { + "epoch": 0.25438273875199213, + "grad_norm": 1.836197690583333, + "learning_rate": 1.7473653106972946e-05, + "loss": 0.7462, + "step": 8300 + }, + { + "epoch": 0.25441338727473334, + "grad_norm": 0.7617759642275567, + "learning_rate": 1.7472993549764198e-05, + "loss": 0.6424, + "step": 8301 + }, + { + "epoch": 0.25444403579747454, + "grad_norm": 1.4455387363838716, + "learning_rate": 1.747233391892213e-05, + "loss": 0.7483, + "step": 8302 + }, + { + "epoch": 0.25447468432021575, + "grad_norm": 1.3927031490851294, + "learning_rate": 1.7471674214453248e-05, + "loss": 0.8798, + "step": 8303 + }, + { + "epoch": 0.25450533284295696, + "grad_norm": 1.6598642422564152, + "learning_rate": 1.7471014436364047e-05, + "loss": 0.8907, + "step": 8304 + }, + { + "epoch": 0.25453598136569816, + "grad_norm": 1.6770619839365348, + "learning_rate": 1.7470354584661028e-05, + "loss": 0.8422, + "step": 8305 + }, + { + "epoch": 0.25456662988843937, + "grad_norm": 0.7044379650053411, + "learning_rate": 1.7469694659350697e-05, + "loss": 0.6252, + "step": 8306 + }, + { + "epoch": 0.25459727841118057, + "grad_norm": 1.5011246262315536, + "learning_rate": 1.746903466043955e-05, + "loss": 0.8401, + "step": 8307 + }, + { + "epoch": 0.2546279269339218, + "grad_norm": 1.389823492196016, + "learning_rate": 1.7468374587934092e-05, + "loss": 0.6939, + "step": 8308 + }, + { + "epoch": 0.254658575456663, + "grad_norm": 0.6688895587059466, + "learning_rate": 1.7467714441840832e-05, + "loss": 0.6551, + "step": 8309 + }, + { + "epoch": 0.2546892239794042, + "grad_norm": 1.6015338921643592, + "learning_rate": 1.746705422216627e-05, + "loss": 0.7574, + "step": 8310 + }, + { + "epoch": 0.2547198725021454, + "grad_norm": 1.5754585454510202, + "learning_rate": 1.7466393928916913e-05, + "loss": 0.8133, + "step": 8311 + }, + { + "epoch": 0.2547505210248866, + "grad_norm": 1.6482718429158447, + "learning_rate": 1.7465733562099265e-05, + "loss": 0.7961, + "step": 8312 + }, + { + "epoch": 0.2547811695476278, + "grad_norm": 1.4071879632818995, + "learning_rate": 1.7465073121719833e-05, + "loss": 0.851, + "step": 8313 + }, + { + "epoch": 0.254811818070369, + "grad_norm": 1.399892769769842, + "learning_rate": 1.7464412607785128e-05, + "loss": 0.8252, + "step": 8314 + }, + { + "epoch": 0.2548424665931102, + "grad_norm": 1.5111080448143543, + "learning_rate": 1.7463752020301654e-05, + "loss": 0.8989, + "step": 8315 + }, + { + "epoch": 0.2548731151158514, + "grad_norm": 0.6964726432407178, + "learning_rate": 1.7463091359275924e-05, + "loss": 0.615, + "step": 8316 + }, + { + "epoch": 0.25490376363859263, + "grad_norm": 1.5499445471978264, + "learning_rate": 1.7462430624714442e-05, + "loss": 0.838, + "step": 8317 + }, + { + "epoch": 0.25493441216133383, + "grad_norm": 1.7031572382340106, + "learning_rate": 1.7461769816623724e-05, + "loss": 0.8946, + "step": 8318 + }, + { + "epoch": 0.25496506068407504, + "grad_norm": 1.492421025016256, + "learning_rate": 1.7461108935010278e-05, + "loss": 0.7468, + "step": 8319 + }, + { + "epoch": 0.25499570920681625, + "grad_norm": 1.3298120090800314, + "learning_rate": 1.7460447979880614e-05, + "loss": 0.6773, + "step": 8320 + }, + { + "epoch": 0.25502635772955745, + "grad_norm": 1.374703059333177, + "learning_rate": 1.7459786951241253e-05, + "loss": 0.8016, + "step": 8321 + }, + { + "epoch": 0.25505700625229866, + "grad_norm": 1.2500029650701212, + "learning_rate": 1.7459125849098697e-05, + "loss": 0.6577, + "step": 8322 + }, + { + "epoch": 0.25508765477503986, + "grad_norm": 1.548762655226168, + "learning_rate": 1.7458464673459468e-05, + "loss": 0.8164, + "step": 8323 + }, + { + "epoch": 0.25511830329778107, + "grad_norm": 1.527261613357123, + "learning_rate": 1.7457803424330078e-05, + "loss": 0.8197, + "step": 8324 + }, + { + "epoch": 0.2551489518205223, + "grad_norm": 1.4880386118753266, + "learning_rate": 1.7457142101717043e-05, + "loss": 0.8385, + "step": 8325 + }, + { + "epoch": 0.2551796003432635, + "grad_norm": 1.3175794705382666, + "learning_rate": 1.745648070562688e-05, + "loss": 0.7857, + "step": 8326 + }, + { + "epoch": 0.2552102488660047, + "grad_norm": 1.3871643088813501, + "learning_rate": 1.7455819236066102e-05, + "loss": 0.7492, + "step": 8327 + }, + { + "epoch": 0.25524089738874584, + "grad_norm": 0.6797834272739397, + "learning_rate": 1.745515769304123e-05, + "loss": 0.649, + "step": 8328 + }, + { + "epoch": 0.25527154591148704, + "grad_norm": 0.6733994249470798, + "learning_rate": 1.7454496076558784e-05, + "loss": 0.6613, + "step": 8329 + }, + { + "epoch": 0.25530219443422825, + "grad_norm": 1.7316666709288853, + "learning_rate": 1.745383438662528e-05, + "loss": 0.9134, + "step": 8330 + }, + { + "epoch": 0.25533284295696945, + "grad_norm": 1.4806148072634906, + "learning_rate": 1.745317262324724e-05, + "loss": 0.8203, + "step": 8331 + }, + { + "epoch": 0.25536349147971066, + "grad_norm": 1.397424905397583, + "learning_rate": 1.745251078643118e-05, + "loss": 0.7821, + "step": 8332 + }, + { + "epoch": 0.25539414000245186, + "grad_norm": 1.637224626687801, + "learning_rate": 1.7451848876183626e-05, + "loss": 0.7714, + "step": 8333 + }, + { + "epoch": 0.25542478852519307, + "grad_norm": 0.7167682755965838, + "learning_rate": 1.74511868925111e-05, + "loss": 0.6039, + "step": 8334 + }, + { + "epoch": 0.2554554370479343, + "grad_norm": 1.4308750860755757, + "learning_rate": 1.745052483542012e-05, + "loss": 0.7839, + "step": 8335 + }, + { + "epoch": 0.2554860855706755, + "grad_norm": 1.4926276598035988, + "learning_rate": 1.744986270491721e-05, + "loss": 0.734, + "step": 8336 + }, + { + "epoch": 0.2555167340934167, + "grad_norm": 0.7209751065136856, + "learning_rate": 1.74492005010089e-05, + "loss": 0.6378, + "step": 8337 + }, + { + "epoch": 0.2555473826161579, + "grad_norm": 1.7587283422622753, + "learning_rate": 1.7448538223701714e-05, + "loss": 0.8139, + "step": 8338 + }, + { + "epoch": 0.2555780311388991, + "grad_norm": 1.3368024227909217, + "learning_rate": 1.7447875873002172e-05, + "loss": 0.7796, + "step": 8339 + }, + { + "epoch": 0.2556086796616403, + "grad_norm": 1.6006131101358083, + "learning_rate": 1.7447213448916803e-05, + "loss": 0.7109, + "step": 8340 + }, + { + "epoch": 0.2556393281843815, + "grad_norm": 0.7534066291814165, + "learning_rate": 1.744655095145214e-05, + "loss": 0.6263, + "step": 8341 + }, + { + "epoch": 0.2556699767071227, + "grad_norm": 1.5423342457362037, + "learning_rate": 1.74458883806147e-05, + "loss": 0.7997, + "step": 8342 + }, + { + "epoch": 0.2557006252298639, + "grad_norm": 1.409410516058837, + "learning_rate": 1.744522573641102e-05, + "loss": 0.7506, + "step": 8343 + }, + { + "epoch": 0.2557312737526051, + "grad_norm": 1.5243598991467127, + "learning_rate": 1.744456301884762e-05, + "loss": 0.85, + "step": 8344 + }, + { + "epoch": 0.25576192227534633, + "grad_norm": 1.4856639877536677, + "learning_rate": 1.744390022793104e-05, + "loss": 0.8181, + "step": 8345 + }, + { + "epoch": 0.25579257079808754, + "grad_norm": 1.6166141035945665, + "learning_rate": 1.7443237363667806e-05, + "loss": 0.855, + "step": 8346 + }, + { + "epoch": 0.25582321932082874, + "grad_norm": 1.4852365517010209, + "learning_rate": 1.744257442606445e-05, + "loss": 0.7929, + "step": 8347 + }, + { + "epoch": 0.25585386784356995, + "grad_norm": 1.636092918869636, + "learning_rate": 1.7441911415127503e-05, + "loss": 0.8014, + "step": 8348 + }, + { + "epoch": 0.25588451636631115, + "grad_norm": 1.3508675605826814, + "learning_rate": 1.74412483308635e-05, + "loss": 0.76, + "step": 8349 + }, + { + "epoch": 0.25591516488905236, + "grad_norm": 1.4688934355850367, + "learning_rate": 1.744058517327897e-05, + "loss": 0.7121, + "step": 8350 + }, + { + "epoch": 0.25594581341179357, + "grad_norm": 1.385320806342172, + "learning_rate": 1.7439921942380454e-05, + "loss": 0.8324, + "step": 8351 + }, + { + "epoch": 0.25597646193453477, + "grad_norm": 1.3839510098162957, + "learning_rate": 1.7439258638174483e-05, + "loss": 0.7914, + "step": 8352 + }, + { + "epoch": 0.256007110457276, + "grad_norm": 1.5358656479288544, + "learning_rate": 1.7438595260667592e-05, + "loss": 0.821, + "step": 8353 + }, + { + "epoch": 0.2560377589800172, + "grad_norm": 1.4625209658668377, + "learning_rate": 1.743793180986632e-05, + "loss": 0.8362, + "step": 8354 + }, + { + "epoch": 0.2560684075027584, + "grad_norm": 1.4674839141417126, + "learning_rate": 1.7437268285777203e-05, + "loss": 0.8746, + "step": 8355 + }, + { + "epoch": 0.2560990560254996, + "grad_norm": 1.5038381904791935, + "learning_rate": 1.7436604688406776e-05, + "loss": 0.7446, + "step": 8356 + }, + { + "epoch": 0.2561297045482408, + "grad_norm": 0.7055229343102661, + "learning_rate": 1.7435941017761582e-05, + "loss": 0.6491, + "step": 8357 + }, + { + "epoch": 0.256160353070982, + "grad_norm": 1.4451172655841549, + "learning_rate": 1.743527727384816e-05, + "loss": 0.7893, + "step": 8358 + }, + { + "epoch": 0.25619100159372316, + "grad_norm": 1.5142167154056299, + "learning_rate": 1.7434613456673046e-05, + "loss": 0.7904, + "step": 8359 + }, + { + "epoch": 0.25622165011646436, + "grad_norm": 1.6506790870379362, + "learning_rate": 1.7433949566242786e-05, + "loss": 0.8228, + "step": 8360 + }, + { + "epoch": 0.25625229863920557, + "grad_norm": 1.3575641905503955, + "learning_rate": 1.7433285602563918e-05, + "loss": 0.9419, + "step": 8361 + }, + { + "epoch": 0.2562829471619468, + "grad_norm": 1.2483160909342124, + "learning_rate": 1.7432621565642985e-05, + "loss": 0.7668, + "step": 8362 + }, + { + "epoch": 0.256313595684688, + "grad_norm": 1.41551543483147, + "learning_rate": 1.7431957455486527e-05, + "loss": 0.7337, + "step": 8363 + }, + { + "epoch": 0.2563442442074292, + "grad_norm": 1.5860867864556198, + "learning_rate": 1.7431293272101096e-05, + "loss": 0.8523, + "step": 8364 + }, + { + "epoch": 0.2563748927301704, + "grad_norm": 1.2630824887823089, + "learning_rate": 1.7430629015493227e-05, + "loss": 0.6958, + "step": 8365 + }, + { + "epoch": 0.2564055412529116, + "grad_norm": 1.3103131137106938, + "learning_rate": 1.742996468566947e-05, + "loss": 0.6556, + "step": 8366 + }, + { + "epoch": 0.2564361897756528, + "grad_norm": 1.5925578747772007, + "learning_rate": 1.742930028263637e-05, + "loss": 0.827, + "step": 8367 + }, + { + "epoch": 0.256466838298394, + "grad_norm": 1.4096767410665365, + "learning_rate": 1.7428635806400475e-05, + "loss": 0.7679, + "step": 8368 + }, + { + "epoch": 0.2564974868211352, + "grad_norm": 1.7089925750777852, + "learning_rate": 1.742797125696833e-05, + "loss": 0.8281, + "step": 8369 + }, + { + "epoch": 0.2565281353438764, + "grad_norm": 0.8237883638026797, + "learning_rate": 1.7427306634346482e-05, + "loss": 0.6406, + "step": 8370 + }, + { + "epoch": 0.2565587838666176, + "grad_norm": 1.4630935265020348, + "learning_rate": 1.7426641938541483e-05, + "loss": 0.7912, + "step": 8371 + }, + { + "epoch": 0.25658943238935883, + "grad_norm": 1.4698601403504665, + "learning_rate": 1.742597716955988e-05, + "loss": 0.7073, + "step": 8372 + }, + { + "epoch": 0.25662008091210003, + "grad_norm": 0.6574037021242064, + "learning_rate": 1.7425312327408223e-05, + "loss": 0.6179, + "step": 8373 + }, + { + "epoch": 0.25665072943484124, + "grad_norm": 1.5102520599792626, + "learning_rate": 1.7424647412093067e-05, + "loss": 0.8101, + "step": 8374 + }, + { + "epoch": 0.25668137795758245, + "grad_norm": 1.520059799249208, + "learning_rate": 1.742398242362096e-05, + "loss": 0.814, + "step": 8375 + }, + { + "epoch": 0.25671202648032365, + "grad_norm": 1.489333841325411, + "learning_rate": 1.7423317361998452e-05, + "loss": 0.7077, + "step": 8376 + }, + { + "epoch": 0.25674267500306486, + "grad_norm": 1.502929496872658, + "learning_rate": 1.74226522272321e-05, + "loss": 0.873, + "step": 8377 + }, + { + "epoch": 0.25677332352580606, + "grad_norm": 0.7628198304862124, + "learning_rate": 1.7421987019328453e-05, + "loss": 0.6537, + "step": 8378 + }, + { + "epoch": 0.25680397204854727, + "grad_norm": 1.5307238249994721, + "learning_rate": 1.7421321738294076e-05, + "loss": 0.7952, + "step": 8379 + }, + { + "epoch": 0.2568346205712885, + "grad_norm": 1.5078404397808347, + "learning_rate": 1.7420656384135514e-05, + "loss": 0.8076, + "step": 8380 + }, + { + "epoch": 0.2568652690940297, + "grad_norm": 1.4624903746549962, + "learning_rate": 1.7419990956859322e-05, + "loss": 0.8389, + "step": 8381 + }, + { + "epoch": 0.2568959176167709, + "grad_norm": 1.7657205567422525, + "learning_rate": 1.7419325456472065e-05, + "loss": 0.7959, + "step": 8382 + }, + { + "epoch": 0.2569265661395121, + "grad_norm": 1.4443115938761562, + "learning_rate": 1.7418659882980295e-05, + "loss": 0.8254, + "step": 8383 + }, + { + "epoch": 0.2569572146622533, + "grad_norm": 1.547313487391813, + "learning_rate": 1.7417994236390573e-05, + "loss": 0.7304, + "step": 8384 + }, + { + "epoch": 0.2569878631849945, + "grad_norm": 1.6108074434977047, + "learning_rate": 1.7417328516709454e-05, + "loss": 0.8875, + "step": 8385 + }, + { + "epoch": 0.2570185117077357, + "grad_norm": 1.4858339342662628, + "learning_rate": 1.7416662723943496e-05, + "loss": 0.8151, + "step": 8386 + }, + { + "epoch": 0.2570491602304769, + "grad_norm": 1.3265984842016818, + "learning_rate": 1.7415996858099266e-05, + "loss": 0.7597, + "step": 8387 + }, + { + "epoch": 0.2570798087532181, + "grad_norm": 1.5190211913910043, + "learning_rate": 1.7415330919183323e-05, + "loss": 0.789, + "step": 8388 + }, + { + "epoch": 0.2571104572759593, + "grad_norm": 1.385978465477179, + "learning_rate": 1.7414664907202223e-05, + "loss": 0.7403, + "step": 8389 + }, + { + "epoch": 0.2571411057987005, + "grad_norm": 0.6803938590548385, + "learning_rate": 1.7413998822162536e-05, + "loss": 0.6142, + "step": 8390 + }, + { + "epoch": 0.2571717543214417, + "grad_norm": 1.3906254389843433, + "learning_rate": 1.7413332664070818e-05, + "loss": 0.8351, + "step": 8391 + }, + { + "epoch": 0.2572024028441829, + "grad_norm": 1.5762615659100363, + "learning_rate": 1.741266643293364e-05, + "loss": 0.8795, + "step": 8392 + }, + { + "epoch": 0.2572330513669241, + "grad_norm": 1.4773328993952242, + "learning_rate": 1.741200012875756e-05, + "loss": 0.76, + "step": 8393 + }, + { + "epoch": 0.2572636998896653, + "grad_norm": 1.4033138265778753, + "learning_rate": 1.741133375154915e-05, + "loss": 0.7445, + "step": 8394 + }, + { + "epoch": 0.2572943484124065, + "grad_norm": 1.3507562766397727, + "learning_rate": 1.741066730131497e-05, + "loss": 0.7741, + "step": 8395 + }, + { + "epoch": 0.2573249969351477, + "grad_norm": 1.5834028144518035, + "learning_rate": 1.741000077806159e-05, + "loss": 0.73, + "step": 8396 + }, + { + "epoch": 0.2573556454578889, + "grad_norm": 1.4365905480031789, + "learning_rate": 1.7409334181795574e-05, + "loss": 0.7772, + "step": 8397 + }, + { + "epoch": 0.2573862939806301, + "grad_norm": 0.723545773316225, + "learning_rate": 1.7408667512523497e-05, + "loss": 0.6608, + "step": 8398 + }, + { + "epoch": 0.2574169425033713, + "grad_norm": 1.4124760230242035, + "learning_rate": 1.7408000770251918e-05, + "loss": 0.7761, + "step": 8399 + }, + { + "epoch": 0.25744759102611253, + "grad_norm": 1.475110245207858, + "learning_rate": 1.7407333954987414e-05, + "loss": 0.858, + "step": 8400 + }, + { + "epoch": 0.25747823954885374, + "grad_norm": 1.5912672000435941, + "learning_rate": 1.7406667066736557e-05, + "loss": 0.8142, + "step": 8401 + }, + { + "epoch": 0.25750888807159494, + "grad_norm": 1.3670429293069193, + "learning_rate": 1.7406000105505908e-05, + "loss": 0.8484, + "step": 8402 + }, + { + "epoch": 0.25753953659433615, + "grad_norm": 1.3603461539486081, + "learning_rate": 1.7405333071302052e-05, + "loss": 0.734, + "step": 8403 + }, + { + "epoch": 0.25757018511707735, + "grad_norm": 1.4051292111796865, + "learning_rate": 1.740466596413155e-05, + "loss": 0.7571, + "step": 8404 + }, + { + "epoch": 0.25760083363981856, + "grad_norm": 1.6936752084496547, + "learning_rate": 1.7403998784000983e-05, + "loss": 0.8965, + "step": 8405 + }, + { + "epoch": 0.25763148216255977, + "grad_norm": 1.4232351238698162, + "learning_rate": 1.7403331530916915e-05, + "loss": 0.7695, + "step": 8406 + }, + { + "epoch": 0.25766213068530097, + "grad_norm": 1.3368053535135604, + "learning_rate": 1.7402664204885933e-05, + "loss": 0.7464, + "step": 8407 + }, + { + "epoch": 0.2576927792080422, + "grad_norm": 1.5234473714003491, + "learning_rate": 1.7401996805914606e-05, + "loss": 0.8028, + "step": 8408 + }, + { + "epoch": 0.2577234277307834, + "grad_norm": 1.6068437002247726, + "learning_rate": 1.7401329334009508e-05, + "loss": 0.7467, + "step": 8409 + }, + { + "epoch": 0.2577540762535246, + "grad_norm": 1.6182652991087063, + "learning_rate": 1.7400661789177223e-05, + "loss": 0.7517, + "step": 8410 + }, + { + "epoch": 0.2577847247762658, + "grad_norm": 1.5030722322712162, + "learning_rate": 1.739999417142432e-05, + "loss": 0.9199, + "step": 8411 + }, + { + "epoch": 0.257815373299007, + "grad_norm": 1.5615337915179002, + "learning_rate": 1.739932648075738e-05, + "loss": 0.7481, + "step": 8412 + }, + { + "epoch": 0.2578460218217482, + "grad_norm": 1.590678532632067, + "learning_rate": 1.7398658717182985e-05, + "loss": 0.7621, + "step": 8413 + }, + { + "epoch": 0.2578766703444894, + "grad_norm": 0.7632077181736864, + "learning_rate": 1.7397990880707712e-05, + "loss": 0.6379, + "step": 8414 + }, + { + "epoch": 0.2579073188672306, + "grad_norm": 0.7278876207000902, + "learning_rate": 1.7397322971338143e-05, + "loss": 0.6249, + "step": 8415 + }, + { + "epoch": 0.2579379673899718, + "grad_norm": 0.6781475168314925, + "learning_rate": 1.7396654989080857e-05, + "loss": 0.6295, + "step": 8416 + }, + { + "epoch": 0.25796861591271303, + "grad_norm": 0.7050049818310989, + "learning_rate": 1.739598693394244e-05, + "loss": 0.6303, + "step": 8417 + }, + { + "epoch": 0.25799926443545423, + "grad_norm": 1.6130452699944318, + "learning_rate": 1.7395318805929466e-05, + "loss": 0.8137, + "step": 8418 + }, + { + "epoch": 0.25802991295819544, + "grad_norm": 1.53765820727947, + "learning_rate": 1.7394650605048527e-05, + "loss": 0.8678, + "step": 8419 + }, + { + "epoch": 0.25806056148093665, + "grad_norm": 0.7367445718301321, + "learning_rate": 1.7393982331306204e-05, + "loss": 0.6634, + "step": 8420 + }, + { + "epoch": 0.2580912100036778, + "grad_norm": 1.405336087337923, + "learning_rate": 1.739331398470908e-05, + "loss": 0.6356, + "step": 8421 + }, + { + "epoch": 0.258121858526419, + "grad_norm": 1.4362528562103547, + "learning_rate": 1.739264556526374e-05, + "loss": 0.7923, + "step": 8422 + }, + { + "epoch": 0.2581525070491602, + "grad_norm": 1.7457602069642681, + "learning_rate": 1.7391977072976773e-05, + "loss": 0.8562, + "step": 8423 + }, + { + "epoch": 0.2581831555719014, + "grad_norm": 1.4205594131298722, + "learning_rate": 1.7391308507854768e-05, + "loss": 0.846, + "step": 8424 + }, + { + "epoch": 0.2582138040946426, + "grad_norm": 1.4123609584713224, + "learning_rate": 1.7390639869904303e-05, + "loss": 0.6353, + "step": 8425 + }, + { + "epoch": 0.2582444526173838, + "grad_norm": 1.476652024455576, + "learning_rate": 1.7389971159131977e-05, + "loss": 0.7422, + "step": 8426 + }, + { + "epoch": 0.25827510114012503, + "grad_norm": 1.4896089089735987, + "learning_rate": 1.7389302375544375e-05, + "loss": 0.7336, + "step": 8427 + }, + { + "epoch": 0.25830574966286624, + "grad_norm": 1.4377232644229336, + "learning_rate": 1.7388633519148084e-05, + "loss": 0.6509, + "step": 8428 + }, + { + "epoch": 0.25833639818560744, + "grad_norm": 1.43625488959951, + "learning_rate": 1.7387964589949695e-05, + "loss": 0.8408, + "step": 8429 + }, + { + "epoch": 0.25836704670834865, + "grad_norm": 1.5291019883943173, + "learning_rate": 1.7387295587955803e-05, + "loss": 0.7418, + "step": 8430 + }, + { + "epoch": 0.25839769523108985, + "grad_norm": 1.5057105956661183, + "learning_rate": 1.7386626513172995e-05, + "loss": 0.7598, + "step": 8431 + }, + { + "epoch": 0.25842834375383106, + "grad_norm": 1.602970849677581, + "learning_rate": 1.738595736560787e-05, + "loss": 0.7577, + "step": 8432 + }, + { + "epoch": 0.25845899227657226, + "grad_norm": 1.6802521893267908, + "learning_rate": 1.7385288145267013e-05, + "loss": 0.8011, + "step": 8433 + }, + { + "epoch": 0.25848964079931347, + "grad_norm": 1.3253894675535414, + "learning_rate": 1.7384618852157028e-05, + "loss": 0.7915, + "step": 8434 + }, + { + "epoch": 0.2585202893220547, + "grad_norm": 1.4716946716164954, + "learning_rate": 1.7383949486284497e-05, + "loss": 0.7403, + "step": 8435 + }, + { + "epoch": 0.2585509378447959, + "grad_norm": 0.7599997085321236, + "learning_rate": 1.7383280047656027e-05, + "loss": 0.6338, + "step": 8436 + }, + { + "epoch": 0.2585815863675371, + "grad_norm": 1.5268564669905, + "learning_rate": 1.738261053627821e-05, + "loss": 0.8128, + "step": 8437 + }, + { + "epoch": 0.2586122348902783, + "grad_norm": 1.3857977651514912, + "learning_rate": 1.738194095215764e-05, + "loss": 0.7489, + "step": 8438 + }, + { + "epoch": 0.2586428834130195, + "grad_norm": 1.4318477728580599, + "learning_rate": 1.7381271295300917e-05, + "loss": 0.7957, + "step": 8439 + }, + { + "epoch": 0.2586735319357607, + "grad_norm": 1.3493833482628208, + "learning_rate": 1.7380601565714637e-05, + "loss": 0.6966, + "step": 8440 + }, + { + "epoch": 0.2587041804585019, + "grad_norm": 1.3493269539333348, + "learning_rate": 1.737993176340541e-05, + "loss": 0.8197, + "step": 8441 + }, + { + "epoch": 0.2587348289812431, + "grad_norm": 1.5041645526310714, + "learning_rate": 1.737926188837982e-05, + "loss": 0.7842, + "step": 8442 + }, + { + "epoch": 0.2587654775039843, + "grad_norm": 1.5022798107577016, + "learning_rate": 1.7378591940644476e-05, + "loss": 0.7413, + "step": 8443 + }, + { + "epoch": 0.2587961260267255, + "grad_norm": 1.444217997759969, + "learning_rate": 1.7377921920205975e-05, + "loss": 0.7652, + "step": 8444 + }, + { + "epoch": 0.25882677454946673, + "grad_norm": 1.6661500539811298, + "learning_rate": 1.737725182707093e-05, + "loss": 0.7771, + "step": 8445 + }, + { + "epoch": 0.25885742307220794, + "grad_norm": 1.581624562933262, + "learning_rate": 1.7376581661245927e-05, + "loss": 0.8336, + "step": 8446 + }, + { + "epoch": 0.25888807159494914, + "grad_norm": 1.3921736222125454, + "learning_rate": 1.737591142273758e-05, + "loss": 0.7651, + "step": 8447 + }, + { + "epoch": 0.25891872011769035, + "grad_norm": 0.7438649797830745, + "learning_rate": 1.737524111155249e-05, + "loss": 0.6427, + "step": 8448 + }, + { + "epoch": 0.25894936864043155, + "grad_norm": 0.7191974397766747, + "learning_rate": 1.7374570727697263e-05, + "loss": 0.6238, + "step": 8449 + }, + { + "epoch": 0.25898001716317276, + "grad_norm": 1.4472405889499609, + "learning_rate": 1.7373900271178502e-05, + "loss": 0.8308, + "step": 8450 + }, + { + "epoch": 0.25901066568591397, + "grad_norm": 1.5462580190117452, + "learning_rate": 1.7373229742002818e-05, + "loss": 0.758, + "step": 8451 + }, + { + "epoch": 0.2590413142086551, + "grad_norm": 1.4968675315173696, + "learning_rate": 1.7372559140176816e-05, + "loss": 0.8772, + "step": 8452 + }, + { + "epoch": 0.2590719627313963, + "grad_norm": 0.6999848504968438, + "learning_rate": 1.73718884657071e-05, + "loss": 0.6069, + "step": 8453 + }, + { + "epoch": 0.2591026112541375, + "grad_norm": 1.4374776852401216, + "learning_rate": 1.737121771860028e-05, + "loss": 0.8047, + "step": 8454 + }, + { + "epoch": 0.25913325977687873, + "grad_norm": 1.3970974583709883, + "learning_rate": 1.7370546898862966e-05, + "loss": 0.6835, + "step": 8455 + }, + { + "epoch": 0.25916390829961994, + "grad_norm": 1.452826027571933, + "learning_rate": 1.736987600650177e-05, + "loss": 0.7985, + "step": 8456 + }, + { + "epoch": 0.25919455682236114, + "grad_norm": 1.3628861041531366, + "learning_rate": 1.7369205041523297e-05, + "loss": 0.8987, + "step": 8457 + }, + { + "epoch": 0.25922520534510235, + "grad_norm": 1.3438533654433389, + "learning_rate": 1.7368534003934164e-05, + "loss": 0.7229, + "step": 8458 + }, + { + "epoch": 0.25925585386784356, + "grad_norm": 1.531586139945682, + "learning_rate": 1.7367862893740976e-05, + "loss": 0.7747, + "step": 8459 + }, + { + "epoch": 0.25928650239058476, + "grad_norm": 1.4579210455778495, + "learning_rate": 1.7367191710950352e-05, + "loss": 0.8017, + "step": 8460 + }, + { + "epoch": 0.25931715091332597, + "grad_norm": 1.4510772627717603, + "learning_rate": 1.7366520455568904e-05, + "loss": 0.8043, + "step": 8461 + }, + { + "epoch": 0.2593477994360672, + "grad_norm": 0.7790017769063072, + "learning_rate": 1.7365849127603243e-05, + "loss": 0.6308, + "step": 8462 + }, + { + "epoch": 0.2593784479588084, + "grad_norm": 0.6975290995447421, + "learning_rate": 1.7365177727059988e-05, + "loss": 0.6426, + "step": 8463 + }, + { + "epoch": 0.2594090964815496, + "grad_norm": 1.5092534140815832, + "learning_rate": 1.7364506253945756e-05, + "loss": 0.7819, + "step": 8464 + }, + { + "epoch": 0.2594397450042908, + "grad_norm": 1.5941101726352684, + "learning_rate": 1.7363834708267152e-05, + "loss": 0.8121, + "step": 8465 + }, + { + "epoch": 0.259470393527032, + "grad_norm": 0.7120904035394908, + "learning_rate": 1.7363163090030806e-05, + "loss": 0.6252, + "step": 8466 + }, + { + "epoch": 0.2595010420497732, + "grad_norm": 1.5489035322533118, + "learning_rate": 1.7362491399243325e-05, + "loss": 0.7938, + "step": 8467 + }, + { + "epoch": 0.2595316905725144, + "grad_norm": 0.6951096035377993, + "learning_rate": 1.736181963591134e-05, + "loss": 0.6135, + "step": 8468 + }, + { + "epoch": 0.2595623390952556, + "grad_norm": 1.3967756688459367, + "learning_rate": 1.7361147800041454e-05, + "loss": 0.653, + "step": 8469 + }, + { + "epoch": 0.2595929876179968, + "grad_norm": 1.5045890004535607, + "learning_rate": 1.7360475891640303e-05, + "loss": 0.7016, + "step": 8470 + }, + { + "epoch": 0.259623636140738, + "grad_norm": 1.526232494864078, + "learning_rate": 1.7359803910714495e-05, + "loss": 0.7299, + "step": 8471 + }, + { + "epoch": 0.25965428466347923, + "grad_norm": 1.468207855970897, + "learning_rate": 1.7359131857270658e-05, + "loss": 0.7385, + "step": 8472 + }, + { + "epoch": 0.25968493318622043, + "grad_norm": 1.5970657437518803, + "learning_rate": 1.735845973131541e-05, + "loss": 0.7554, + "step": 8473 + }, + { + "epoch": 0.25971558170896164, + "grad_norm": 1.4568183603577478, + "learning_rate": 1.7357787532855376e-05, + "loss": 0.8817, + "step": 8474 + }, + { + "epoch": 0.25974623023170285, + "grad_norm": 1.5150929174312802, + "learning_rate": 1.735711526189718e-05, + "loss": 0.7105, + "step": 8475 + }, + { + "epoch": 0.25977687875444405, + "grad_norm": 1.4510806253611568, + "learning_rate": 1.7356442918447444e-05, + "loss": 0.7431, + "step": 8476 + }, + { + "epoch": 0.25980752727718526, + "grad_norm": 1.5286910241774343, + "learning_rate": 1.7355770502512794e-05, + "loss": 0.7461, + "step": 8477 + }, + { + "epoch": 0.25983817579992646, + "grad_norm": 1.6852557743018999, + "learning_rate": 1.7355098014099857e-05, + "loss": 0.9154, + "step": 8478 + }, + { + "epoch": 0.25986882432266767, + "grad_norm": 1.5454662166603477, + "learning_rate": 1.7354425453215254e-05, + "loss": 0.7225, + "step": 8479 + }, + { + "epoch": 0.2598994728454089, + "grad_norm": 1.3077840095443236, + "learning_rate": 1.7353752819865618e-05, + "loss": 0.6767, + "step": 8480 + }, + { + "epoch": 0.2599301213681501, + "grad_norm": 1.4245486036759363, + "learning_rate": 1.735308011405757e-05, + "loss": 0.7625, + "step": 8481 + }, + { + "epoch": 0.2599607698908913, + "grad_norm": 0.7618776607920827, + "learning_rate": 1.7352407335797744e-05, + "loss": 0.6515, + "step": 8482 + }, + { + "epoch": 0.25999141841363244, + "grad_norm": 1.4368590355530793, + "learning_rate": 1.7351734485092772e-05, + "loss": 0.8282, + "step": 8483 + }, + { + "epoch": 0.26002206693637364, + "grad_norm": 1.546575755248825, + "learning_rate": 1.7351061561949274e-05, + "loss": 0.8736, + "step": 8484 + }, + { + "epoch": 0.26005271545911485, + "grad_norm": 0.6985052069000542, + "learning_rate": 1.735038856637389e-05, + "loss": 0.6015, + "step": 8485 + }, + { + "epoch": 0.26008336398185605, + "grad_norm": 1.5123260530412515, + "learning_rate": 1.734971549837324e-05, + "loss": 0.875, + "step": 8486 + }, + { + "epoch": 0.26011401250459726, + "grad_norm": 1.423933931945447, + "learning_rate": 1.734904235795397e-05, + "loss": 0.8195, + "step": 8487 + }, + { + "epoch": 0.26014466102733846, + "grad_norm": 1.3789801797688062, + "learning_rate": 1.73483691451227e-05, + "loss": 0.7804, + "step": 8488 + }, + { + "epoch": 0.26017530955007967, + "grad_norm": 1.5201384111025917, + "learning_rate": 1.7347695859886072e-05, + "loss": 0.7884, + "step": 8489 + }, + { + "epoch": 0.2602059580728209, + "grad_norm": 1.6482807114920515, + "learning_rate": 1.7347022502250716e-05, + "loss": 0.6468, + "step": 8490 + }, + { + "epoch": 0.2602366065955621, + "grad_norm": 1.4377485445220264, + "learning_rate": 1.7346349072223265e-05, + "loss": 0.86, + "step": 8491 + }, + { + "epoch": 0.2602672551183033, + "grad_norm": 1.5664270787807937, + "learning_rate": 1.7345675569810357e-05, + "loss": 0.8621, + "step": 8492 + }, + { + "epoch": 0.2602979036410445, + "grad_norm": 1.418213738751314, + "learning_rate": 1.7345001995018633e-05, + "loss": 0.8971, + "step": 8493 + }, + { + "epoch": 0.2603285521637857, + "grad_norm": 1.461520486517398, + "learning_rate": 1.734432834785472e-05, + "loss": 0.7957, + "step": 8494 + }, + { + "epoch": 0.2603592006865269, + "grad_norm": 1.502737144003716, + "learning_rate": 1.734365462832526e-05, + "loss": 0.7856, + "step": 8495 + }, + { + "epoch": 0.2603898492092681, + "grad_norm": 1.482677575513582, + "learning_rate": 1.73429808364369e-05, + "loss": 0.7633, + "step": 8496 + }, + { + "epoch": 0.2604204977320093, + "grad_norm": 1.5330641711169346, + "learning_rate": 1.7342306972196263e-05, + "loss": 0.8245, + "step": 8497 + }, + { + "epoch": 0.2604511462547505, + "grad_norm": 1.47791312222967, + "learning_rate": 1.734163303561e-05, + "loss": 0.7682, + "step": 8498 + }, + { + "epoch": 0.2604817947774917, + "grad_norm": 1.4954722890397598, + "learning_rate": 1.7340959026684746e-05, + "loss": 0.6258, + "step": 8499 + }, + { + "epoch": 0.26051244330023293, + "grad_norm": 0.847757367560167, + "learning_rate": 1.7340284945427147e-05, + "loss": 0.6654, + "step": 8500 + }, + { + "epoch": 0.26054309182297414, + "grad_norm": 1.3764752104504239, + "learning_rate": 1.733961079184384e-05, + "loss": 0.879, + "step": 8501 + }, + { + "epoch": 0.26057374034571534, + "grad_norm": 0.6932822687404547, + "learning_rate": 1.7338936565941472e-05, + "loss": 0.6151, + "step": 8502 + }, + { + "epoch": 0.26060438886845655, + "grad_norm": 1.3605402031249345, + "learning_rate": 1.7338262267726683e-05, + "loss": 0.8112, + "step": 8503 + }, + { + "epoch": 0.26063503739119775, + "grad_norm": 1.5648891135014549, + "learning_rate": 1.733758789720612e-05, + "loss": 0.8732, + "step": 8504 + }, + { + "epoch": 0.26066568591393896, + "grad_norm": 0.690295469593108, + "learning_rate": 1.7336913454386426e-05, + "loss": 0.6042, + "step": 8505 + }, + { + "epoch": 0.26069633443668017, + "grad_norm": 1.4348248782043431, + "learning_rate": 1.7336238939274245e-05, + "loss": 0.7196, + "step": 8506 + }, + { + "epoch": 0.26072698295942137, + "grad_norm": 1.5276253583416681, + "learning_rate": 1.7335564351876225e-05, + "loss": 0.7578, + "step": 8507 + }, + { + "epoch": 0.2607576314821626, + "grad_norm": 1.6096543600464575, + "learning_rate": 1.7334889692199013e-05, + "loss": 0.7942, + "step": 8508 + }, + { + "epoch": 0.2607882800049038, + "grad_norm": 0.7007103150164935, + "learning_rate": 1.7334214960249257e-05, + "loss": 0.6295, + "step": 8509 + }, + { + "epoch": 0.260818928527645, + "grad_norm": 0.6886379412170406, + "learning_rate": 1.73335401560336e-05, + "loss": 0.6411, + "step": 8510 + }, + { + "epoch": 0.2608495770503862, + "grad_norm": 1.441000100394366, + "learning_rate": 1.73328652795587e-05, + "loss": 0.7435, + "step": 8511 + }, + { + "epoch": 0.2608802255731274, + "grad_norm": 1.3465741523403372, + "learning_rate": 1.7332190330831204e-05, + "loss": 0.6967, + "step": 8512 + }, + { + "epoch": 0.2609108740958686, + "grad_norm": 1.648196483535394, + "learning_rate": 1.7331515309857757e-05, + "loss": 0.8481, + "step": 8513 + }, + { + "epoch": 0.26094152261860976, + "grad_norm": 1.2623930296537713, + "learning_rate": 1.7330840216645013e-05, + "loss": 0.6789, + "step": 8514 + }, + { + "epoch": 0.26097217114135096, + "grad_norm": 0.7199008782832953, + "learning_rate": 1.7330165051199625e-05, + "loss": 0.6205, + "step": 8515 + }, + { + "epoch": 0.26100281966409217, + "grad_norm": 1.5867330145746419, + "learning_rate": 1.7329489813528248e-05, + "loss": 0.8593, + "step": 8516 + }, + { + "epoch": 0.2610334681868334, + "grad_norm": 1.4873429707420116, + "learning_rate": 1.732881450363753e-05, + "loss": 0.7207, + "step": 8517 + }, + { + "epoch": 0.2610641167095746, + "grad_norm": 1.5522981857568696, + "learning_rate": 1.7328139121534128e-05, + "loss": 0.8002, + "step": 8518 + }, + { + "epoch": 0.2610947652323158, + "grad_norm": 1.4715884406264275, + "learning_rate": 1.7327463667224697e-05, + "loss": 0.7452, + "step": 8519 + }, + { + "epoch": 0.261125413755057, + "grad_norm": 1.4474117906219757, + "learning_rate": 1.7326788140715895e-05, + "loss": 0.6631, + "step": 8520 + }, + { + "epoch": 0.2611560622777982, + "grad_norm": 1.5319739418154457, + "learning_rate": 1.732611254201437e-05, + "loss": 0.6842, + "step": 8521 + }, + { + "epoch": 0.2611867108005394, + "grad_norm": 1.4383620579343943, + "learning_rate": 1.7325436871126783e-05, + "loss": 0.7214, + "step": 8522 + }, + { + "epoch": 0.2612173593232806, + "grad_norm": 0.7374435544196007, + "learning_rate": 1.7324761128059795e-05, + "loss": 0.6365, + "step": 8523 + }, + { + "epoch": 0.2612480078460218, + "grad_norm": 1.381354461445827, + "learning_rate": 1.732408531282006e-05, + "loss": 0.7691, + "step": 8524 + }, + { + "epoch": 0.261278656368763, + "grad_norm": 1.4226805774433655, + "learning_rate": 1.732340942541424e-05, + "loss": 0.7224, + "step": 8525 + }, + { + "epoch": 0.2613093048915042, + "grad_norm": 1.4421271979989323, + "learning_rate": 1.732273346584899e-05, + "loss": 0.7996, + "step": 8526 + }, + { + "epoch": 0.26133995341424543, + "grad_norm": 1.643212437226561, + "learning_rate": 1.7322057434130976e-05, + "loss": 0.8175, + "step": 8527 + }, + { + "epoch": 0.26137060193698664, + "grad_norm": 1.6541972648026089, + "learning_rate": 1.7321381330266858e-05, + "loss": 0.8085, + "step": 8528 + }, + { + "epoch": 0.26140125045972784, + "grad_norm": 1.441465689288056, + "learning_rate": 1.7320705154263292e-05, + "loss": 0.7398, + "step": 8529 + }, + { + "epoch": 0.26143189898246905, + "grad_norm": 1.5481173250466078, + "learning_rate": 1.732002890612695e-05, + "loss": 0.7749, + "step": 8530 + }, + { + "epoch": 0.26146254750521025, + "grad_norm": 0.6799627047362022, + "learning_rate": 1.7319352585864488e-05, + "loss": 0.6188, + "step": 8531 + }, + { + "epoch": 0.26149319602795146, + "grad_norm": 1.4495682472669298, + "learning_rate": 1.731867619348257e-05, + "loss": 0.7751, + "step": 8532 + }, + { + "epoch": 0.26152384455069266, + "grad_norm": 1.4649394678854433, + "learning_rate": 1.7317999728987867e-05, + "loss": 0.7571, + "step": 8533 + }, + { + "epoch": 0.26155449307343387, + "grad_norm": 1.562447591380963, + "learning_rate": 1.7317323192387038e-05, + "loss": 0.7929, + "step": 8534 + }, + { + "epoch": 0.2615851415961751, + "grad_norm": 1.5045405524095319, + "learning_rate": 1.731664658368675e-05, + "loss": 0.8089, + "step": 8535 + }, + { + "epoch": 0.2616157901189163, + "grad_norm": 1.5031494393027438, + "learning_rate": 1.7315969902893676e-05, + "loss": 0.7863, + "step": 8536 + }, + { + "epoch": 0.2616464386416575, + "grad_norm": 1.3129011304416267, + "learning_rate": 1.7315293150014476e-05, + "loss": 0.8425, + "step": 8537 + }, + { + "epoch": 0.2616770871643987, + "grad_norm": 1.4312314702485416, + "learning_rate": 1.731461632505582e-05, + "loss": 0.8042, + "step": 8538 + }, + { + "epoch": 0.2617077356871399, + "grad_norm": 1.403198285344907, + "learning_rate": 1.731393942802438e-05, + "loss": 0.7981, + "step": 8539 + }, + { + "epoch": 0.2617383842098811, + "grad_norm": 1.5612454695259743, + "learning_rate": 1.731326245892682e-05, + "loss": 0.887, + "step": 8540 + }, + { + "epoch": 0.2617690327326223, + "grad_norm": 1.5380887325079589, + "learning_rate": 1.7312585417769816e-05, + "loss": 0.7836, + "step": 8541 + }, + { + "epoch": 0.2617996812553635, + "grad_norm": 1.5523021593447377, + "learning_rate": 1.731190830456004e-05, + "loss": 0.7107, + "step": 8542 + }, + { + "epoch": 0.2618303297781047, + "grad_norm": 0.7165069966436285, + "learning_rate": 1.7311231119304156e-05, + "loss": 0.6482, + "step": 8543 + }, + { + "epoch": 0.2618609783008459, + "grad_norm": 1.7433230954359002, + "learning_rate": 1.7310553862008843e-05, + "loss": 0.7236, + "step": 8544 + }, + { + "epoch": 0.2618916268235871, + "grad_norm": 1.6818243809027653, + "learning_rate": 1.7309876532680768e-05, + "loss": 0.7023, + "step": 8545 + }, + { + "epoch": 0.2619222753463283, + "grad_norm": 1.6341832208835494, + "learning_rate": 1.7309199131326615e-05, + "loss": 0.8179, + "step": 8546 + }, + { + "epoch": 0.2619529238690695, + "grad_norm": 1.5792073939880265, + "learning_rate": 1.730852165795305e-05, + "loss": 0.7923, + "step": 8547 + }, + { + "epoch": 0.2619835723918107, + "grad_norm": 1.6129057882313456, + "learning_rate": 1.7307844112566753e-05, + "loss": 0.81, + "step": 8548 + }, + { + "epoch": 0.2620142209145519, + "grad_norm": 1.6537087147713976, + "learning_rate": 1.7307166495174397e-05, + "loss": 0.8891, + "step": 8549 + }, + { + "epoch": 0.2620448694372931, + "grad_norm": 1.4929905242415815, + "learning_rate": 1.730648880578266e-05, + "loss": 0.7058, + "step": 8550 + }, + { + "epoch": 0.2620755179600343, + "grad_norm": 1.4556333837975861, + "learning_rate": 1.730581104439822e-05, + "loss": 0.7421, + "step": 8551 + }, + { + "epoch": 0.2621061664827755, + "grad_norm": 1.5327908847083567, + "learning_rate": 1.7305133211027754e-05, + "loss": 0.8346, + "step": 8552 + }, + { + "epoch": 0.2621368150055167, + "grad_norm": 1.4220988822260476, + "learning_rate": 1.730445530567794e-05, + "loss": 0.7959, + "step": 8553 + }, + { + "epoch": 0.2621674635282579, + "grad_norm": 1.401417514177581, + "learning_rate": 1.730377732835546e-05, + "loss": 0.7177, + "step": 8554 + }, + { + "epoch": 0.26219811205099913, + "grad_norm": 1.5444265654332419, + "learning_rate": 1.7303099279066993e-05, + "loss": 0.6728, + "step": 8555 + }, + { + "epoch": 0.26222876057374034, + "grad_norm": 1.3959886003299535, + "learning_rate": 1.730242115781922e-05, + "loss": 0.739, + "step": 8556 + }, + { + "epoch": 0.26225940909648154, + "grad_norm": 1.4946477828855105, + "learning_rate": 1.7301742964618826e-05, + "loss": 0.7882, + "step": 8557 + }, + { + "epoch": 0.26229005761922275, + "grad_norm": 1.537226831136923, + "learning_rate": 1.7301064699472487e-05, + "loss": 0.7988, + "step": 8558 + }, + { + "epoch": 0.26232070614196396, + "grad_norm": 0.7179036995585321, + "learning_rate": 1.7300386362386888e-05, + "loss": 0.6391, + "step": 8559 + }, + { + "epoch": 0.26235135466470516, + "grad_norm": 1.5925392097584383, + "learning_rate": 1.7299707953368717e-05, + "loss": 0.7211, + "step": 8560 + }, + { + "epoch": 0.26238200318744637, + "grad_norm": 1.3952877905950887, + "learning_rate": 1.729902947242466e-05, + "loss": 0.7097, + "step": 8561 + }, + { + "epoch": 0.26241265171018757, + "grad_norm": 1.704362082415151, + "learning_rate": 1.729835091956139e-05, + "loss": 0.8664, + "step": 8562 + }, + { + "epoch": 0.2624433002329288, + "grad_norm": 1.5814212446720706, + "learning_rate": 1.7297672294785605e-05, + "loss": 0.8641, + "step": 8563 + }, + { + "epoch": 0.26247394875567, + "grad_norm": 1.451838359032823, + "learning_rate": 1.729699359810399e-05, + "loss": 0.7701, + "step": 8564 + }, + { + "epoch": 0.2625045972784112, + "grad_norm": 0.6699829170112738, + "learning_rate": 1.7296314829523225e-05, + "loss": 0.6337, + "step": 8565 + }, + { + "epoch": 0.2625352458011524, + "grad_norm": 1.5990376815159193, + "learning_rate": 1.7295635989050005e-05, + "loss": 0.7875, + "step": 8566 + }, + { + "epoch": 0.2625658943238936, + "grad_norm": 1.5532777066082748, + "learning_rate": 1.7294957076691016e-05, + "loss": 0.8672, + "step": 8567 + }, + { + "epoch": 0.2625965428466348, + "grad_norm": 1.4177168315567863, + "learning_rate": 1.7294278092452953e-05, + "loss": 0.8414, + "step": 8568 + }, + { + "epoch": 0.262627191369376, + "grad_norm": 1.4644092091695886, + "learning_rate": 1.7293599036342498e-05, + "loss": 0.958, + "step": 8569 + }, + { + "epoch": 0.2626578398921172, + "grad_norm": 1.3992675811997763, + "learning_rate": 1.7292919908366346e-05, + "loss": 0.7215, + "step": 8570 + }, + { + "epoch": 0.2626884884148584, + "grad_norm": 1.4518968162451888, + "learning_rate": 1.7292240708531188e-05, + "loss": 0.8831, + "step": 8571 + }, + { + "epoch": 0.26271913693759963, + "grad_norm": 1.50177377392645, + "learning_rate": 1.7291561436843716e-05, + "loss": 0.7532, + "step": 8572 + }, + { + "epoch": 0.26274978546034083, + "grad_norm": 0.7319585820939855, + "learning_rate": 1.7290882093310625e-05, + "loss": 0.6306, + "step": 8573 + }, + { + "epoch": 0.26278043398308204, + "grad_norm": 1.5928541290219353, + "learning_rate": 1.7290202677938606e-05, + "loss": 0.6772, + "step": 8574 + }, + { + "epoch": 0.26281108250582325, + "grad_norm": 1.3661536547089808, + "learning_rate": 1.7289523190734355e-05, + "loss": 0.7432, + "step": 8575 + }, + { + "epoch": 0.2628417310285644, + "grad_norm": 1.4277635501426595, + "learning_rate": 1.728884363170457e-05, + "loss": 0.7695, + "step": 8576 + }, + { + "epoch": 0.2628723795513056, + "grad_norm": 1.5264625980460214, + "learning_rate": 1.7288164000855937e-05, + "loss": 0.8275, + "step": 8577 + }, + { + "epoch": 0.2629030280740468, + "grad_norm": 1.5127921867164627, + "learning_rate": 1.7287484298195164e-05, + "loss": 0.8419, + "step": 8578 + }, + { + "epoch": 0.262933676596788, + "grad_norm": 1.5225408478309688, + "learning_rate": 1.728680452372894e-05, + "loss": 0.7619, + "step": 8579 + }, + { + "epoch": 0.2629643251195292, + "grad_norm": 1.497076972302823, + "learning_rate": 1.7286124677463974e-05, + "loss": 0.7813, + "step": 8580 + }, + { + "epoch": 0.2629949736422704, + "grad_norm": 0.7153091360853004, + "learning_rate": 1.7285444759406954e-05, + "loss": 0.6191, + "step": 8581 + }, + { + "epoch": 0.26302562216501163, + "grad_norm": 1.5140656575549578, + "learning_rate": 1.728476476956458e-05, + "loss": 0.9535, + "step": 8582 + }, + { + "epoch": 0.26305627068775284, + "grad_norm": 1.4891453381167745, + "learning_rate": 1.7284084707943557e-05, + "loss": 0.8105, + "step": 8583 + }, + { + "epoch": 0.26308691921049404, + "grad_norm": 1.2820641340441934, + "learning_rate": 1.7283404574550582e-05, + "loss": 0.7223, + "step": 8584 + }, + { + "epoch": 0.26311756773323525, + "grad_norm": 1.3885491378706896, + "learning_rate": 1.7282724369392358e-05, + "loss": 0.7511, + "step": 8585 + }, + { + "epoch": 0.26314821625597645, + "grad_norm": 1.7061244657059844, + "learning_rate": 1.728204409247559e-05, + "loss": 0.8919, + "step": 8586 + }, + { + "epoch": 0.26317886477871766, + "grad_norm": 1.443854234000433, + "learning_rate": 1.7281363743806976e-05, + "loss": 0.8067, + "step": 8587 + }, + { + "epoch": 0.26320951330145886, + "grad_norm": 1.355551806093188, + "learning_rate": 1.7280683323393224e-05, + "loss": 0.8055, + "step": 8588 + }, + { + "epoch": 0.26324016182420007, + "grad_norm": 0.7389692967575062, + "learning_rate": 1.7280002831241037e-05, + "loss": 0.6272, + "step": 8589 + }, + { + "epoch": 0.2632708103469413, + "grad_norm": 1.5842000525212059, + "learning_rate": 1.7279322267357116e-05, + "loss": 0.7775, + "step": 8590 + }, + { + "epoch": 0.2633014588696825, + "grad_norm": 0.6780804209672294, + "learning_rate": 1.7278641631748173e-05, + "loss": 0.6115, + "step": 8591 + }, + { + "epoch": 0.2633321073924237, + "grad_norm": 1.587761367853725, + "learning_rate": 1.727796092442091e-05, + "loss": 0.8326, + "step": 8592 + }, + { + "epoch": 0.2633627559151649, + "grad_norm": 1.4031996522636152, + "learning_rate": 1.7277280145382035e-05, + "loss": 0.7961, + "step": 8593 + }, + { + "epoch": 0.2633934044379061, + "grad_norm": 1.6555524508380228, + "learning_rate": 1.727659929463826e-05, + "loss": 0.7606, + "step": 8594 + }, + { + "epoch": 0.2634240529606473, + "grad_norm": 1.4644512114998365, + "learning_rate": 1.7275918372196287e-05, + "loss": 0.7862, + "step": 8595 + }, + { + "epoch": 0.2634547014833885, + "grad_norm": 1.5009006601365367, + "learning_rate": 1.727523737806283e-05, + "loss": 0.7089, + "step": 8596 + }, + { + "epoch": 0.2634853500061297, + "grad_norm": 1.238249236631434, + "learning_rate": 1.72745563122446e-05, + "loss": 0.67, + "step": 8597 + }, + { + "epoch": 0.2635159985288709, + "grad_norm": 1.5734473974217398, + "learning_rate": 1.7273875174748303e-05, + "loss": 0.6422, + "step": 8598 + }, + { + "epoch": 0.2635466470516121, + "grad_norm": 1.3745892509550617, + "learning_rate": 1.7273193965580653e-05, + "loss": 0.6814, + "step": 8599 + }, + { + "epoch": 0.26357729557435333, + "grad_norm": 1.45945784154098, + "learning_rate": 1.727251268474836e-05, + "loss": 0.8433, + "step": 8600 + }, + { + "epoch": 0.26360794409709454, + "grad_norm": 1.3938395817133844, + "learning_rate": 1.727183133225814e-05, + "loss": 0.7822, + "step": 8601 + }, + { + "epoch": 0.26363859261983574, + "grad_norm": 0.7738134902300632, + "learning_rate": 1.727114990811671e-05, + "loss": 0.6227, + "step": 8602 + }, + { + "epoch": 0.26366924114257695, + "grad_norm": 1.3811619024300332, + "learning_rate": 1.7270468412330773e-05, + "loss": 0.7059, + "step": 8603 + }, + { + "epoch": 0.26369988966531815, + "grad_norm": 1.3819731122083712, + "learning_rate": 1.7269786844907054e-05, + "loss": 0.6746, + "step": 8604 + }, + { + "epoch": 0.26373053818805936, + "grad_norm": 1.3020488426490622, + "learning_rate": 1.7269105205852266e-05, + "loss": 0.6661, + "step": 8605 + }, + { + "epoch": 0.26376118671080057, + "grad_norm": 1.4562257324207044, + "learning_rate": 1.726842349517312e-05, + "loss": 0.6771, + "step": 8606 + }, + { + "epoch": 0.2637918352335417, + "grad_norm": 0.7356097306540593, + "learning_rate": 1.726774171287634e-05, + "loss": 0.6297, + "step": 8607 + }, + { + "epoch": 0.2638224837562829, + "grad_norm": 1.4316247087152718, + "learning_rate": 1.7267059858968645e-05, + "loss": 0.7979, + "step": 8608 + }, + { + "epoch": 0.2638531322790241, + "grad_norm": 1.4278328519015469, + "learning_rate": 1.7266377933456747e-05, + "loss": 0.8043, + "step": 8609 + }, + { + "epoch": 0.26388378080176533, + "grad_norm": 1.5381579566923969, + "learning_rate": 1.7265695936347367e-05, + "loss": 0.8369, + "step": 8610 + }, + { + "epoch": 0.26391442932450654, + "grad_norm": 0.6767879740522433, + "learning_rate": 1.7265013867647226e-05, + "loss": 0.6291, + "step": 8611 + }, + { + "epoch": 0.26394507784724774, + "grad_norm": 1.5679174030993808, + "learning_rate": 1.7264331727363046e-05, + "loss": 0.7559, + "step": 8612 + }, + { + "epoch": 0.26397572636998895, + "grad_norm": 1.4662174027266803, + "learning_rate": 1.7263649515501547e-05, + "loss": 0.7695, + "step": 8613 + }, + { + "epoch": 0.26400637489273016, + "grad_norm": 1.5679938647854776, + "learning_rate": 1.726296723206945e-05, + "loss": 0.7749, + "step": 8614 + }, + { + "epoch": 0.26403702341547136, + "grad_norm": 1.4130391903188346, + "learning_rate": 1.7262284877073478e-05, + "loss": 0.7732, + "step": 8615 + }, + { + "epoch": 0.26406767193821257, + "grad_norm": 1.436089052159237, + "learning_rate": 1.7261602450520355e-05, + "loss": 0.8364, + "step": 8616 + }, + { + "epoch": 0.2640983204609538, + "grad_norm": 1.5176181455878484, + "learning_rate": 1.7260919952416807e-05, + "loss": 0.7282, + "step": 8617 + }, + { + "epoch": 0.264128968983695, + "grad_norm": 1.4566194785357094, + "learning_rate": 1.7260237382769553e-05, + "loss": 0.7066, + "step": 8618 + }, + { + "epoch": 0.2641596175064362, + "grad_norm": 1.3238556419839012, + "learning_rate": 1.7259554741585325e-05, + "loss": 0.6986, + "step": 8619 + }, + { + "epoch": 0.2641902660291774, + "grad_norm": 1.4386865029476237, + "learning_rate": 1.725887202887085e-05, + "loss": 0.7594, + "step": 8620 + }, + { + "epoch": 0.2642209145519186, + "grad_norm": 1.391542469936696, + "learning_rate": 1.7258189244632846e-05, + "loss": 0.7631, + "step": 8621 + }, + { + "epoch": 0.2642515630746598, + "grad_norm": 1.392269488013388, + "learning_rate": 1.725750638887805e-05, + "loss": 0.8572, + "step": 8622 + }, + { + "epoch": 0.264282211597401, + "grad_norm": 1.4362281096964458, + "learning_rate": 1.7256823461613183e-05, + "loss": 0.7027, + "step": 8623 + }, + { + "epoch": 0.2643128601201422, + "grad_norm": 0.7554783425137838, + "learning_rate": 1.725614046284498e-05, + "loss": 0.6026, + "step": 8624 + }, + { + "epoch": 0.2643435086428834, + "grad_norm": 1.3800284581503346, + "learning_rate": 1.7255457392580167e-05, + "loss": 0.8532, + "step": 8625 + }, + { + "epoch": 0.2643741571656246, + "grad_norm": 1.6057759465431167, + "learning_rate": 1.725477425082548e-05, + "loss": 0.742, + "step": 8626 + }, + { + "epoch": 0.26440480568836583, + "grad_norm": 1.5898852624019792, + "learning_rate": 1.7254091037587643e-05, + "loss": 0.6798, + "step": 8627 + }, + { + "epoch": 0.26443545421110703, + "grad_norm": 1.631207051594008, + "learning_rate": 1.725340775287339e-05, + "loss": 0.8606, + "step": 8628 + }, + { + "epoch": 0.26446610273384824, + "grad_norm": 1.5258868316118142, + "learning_rate": 1.7252724396689457e-05, + "loss": 0.7117, + "step": 8629 + }, + { + "epoch": 0.26449675125658945, + "grad_norm": 1.4855958246619092, + "learning_rate": 1.7252040969042574e-05, + "loss": 0.724, + "step": 8630 + }, + { + "epoch": 0.26452739977933065, + "grad_norm": 0.7311613560032645, + "learning_rate": 1.7251357469939474e-05, + "loss": 0.6157, + "step": 8631 + }, + { + "epoch": 0.26455804830207186, + "grad_norm": 1.4672438443234388, + "learning_rate": 1.7250673899386895e-05, + "loss": 0.7913, + "step": 8632 + }, + { + "epoch": 0.26458869682481306, + "grad_norm": 1.4606170781926557, + "learning_rate": 1.724999025739157e-05, + "loss": 0.7501, + "step": 8633 + }, + { + "epoch": 0.26461934534755427, + "grad_norm": 1.4049607351462696, + "learning_rate": 1.724930654396024e-05, + "loss": 0.8022, + "step": 8634 + }, + { + "epoch": 0.2646499938702955, + "grad_norm": 0.7109205513637267, + "learning_rate": 1.7248622759099634e-05, + "loss": 0.6401, + "step": 8635 + }, + { + "epoch": 0.2646806423930367, + "grad_norm": 1.6780465976680141, + "learning_rate": 1.7247938902816496e-05, + "loss": 0.8098, + "step": 8636 + }, + { + "epoch": 0.2647112909157779, + "grad_norm": 1.3421849095616338, + "learning_rate": 1.7247254975117557e-05, + "loss": 0.7546, + "step": 8637 + }, + { + "epoch": 0.26474193943851904, + "grad_norm": 1.4578059573247961, + "learning_rate": 1.7246570976009563e-05, + "loss": 0.8599, + "step": 8638 + }, + { + "epoch": 0.26477258796126024, + "grad_norm": 1.6562555419693081, + "learning_rate": 1.7245886905499253e-05, + "loss": 0.8043, + "step": 8639 + }, + { + "epoch": 0.26480323648400145, + "grad_norm": 1.3074262003963315, + "learning_rate": 1.724520276359336e-05, + "loss": 0.7384, + "step": 8640 + }, + { + "epoch": 0.26483388500674265, + "grad_norm": 1.4651570064146968, + "learning_rate": 1.7244518550298634e-05, + "loss": 0.8079, + "step": 8641 + }, + { + "epoch": 0.26486453352948386, + "grad_norm": 1.5158848041591468, + "learning_rate": 1.7243834265621813e-05, + "loss": 0.9123, + "step": 8642 + }, + { + "epoch": 0.26489518205222506, + "grad_norm": 1.3569505164770852, + "learning_rate": 1.7243149909569642e-05, + "loss": 0.7471, + "step": 8643 + }, + { + "epoch": 0.26492583057496627, + "grad_norm": 1.6433553842040243, + "learning_rate": 1.7242465482148857e-05, + "loss": 0.7075, + "step": 8644 + }, + { + "epoch": 0.2649564790977075, + "grad_norm": 0.7070900248216475, + "learning_rate": 1.724178098336621e-05, + "loss": 0.6338, + "step": 8645 + }, + { + "epoch": 0.2649871276204487, + "grad_norm": 1.756279570300156, + "learning_rate": 1.724109641322844e-05, + "loss": 0.6514, + "step": 8646 + }, + { + "epoch": 0.2650177761431899, + "grad_norm": 1.6745354164026405, + "learning_rate": 1.7240411771742295e-05, + "loss": 0.7473, + "step": 8647 + }, + { + "epoch": 0.2650484246659311, + "grad_norm": 1.573779208612319, + "learning_rate": 1.723972705891452e-05, + "loss": 0.7624, + "step": 8648 + }, + { + "epoch": 0.2650790731886723, + "grad_norm": 1.3887681807028236, + "learning_rate": 1.7239042274751864e-05, + "loss": 0.7217, + "step": 8649 + }, + { + "epoch": 0.2651097217114135, + "grad_norm": 0.7238865927124463, + "learning_rate": 1.723835741926107e-05, + "loss": 0.6134, + "step": 8650 + }, + { + "epoch": 0.2651403702341547, + "grad_norm": 1.4777079133340076, + "learning_rate": 1.723767249244889e-05, + "loss": 0.8459, + "step": 8651 + }, + { + "epoch": 0.2651710187568959, + "grad_norm": 1.3515415639756592, + "learning_rate": 1.7236987494322067e-05, + "loss": 0.7781, + "step": 8652 + }, + { + "epoch": 0.2652016672796371, + "grad_norm": 1.3731277178022483, + "learning_rate": 1.7236302424887358e-05, + "loss": 0.8133, + "step": 8653 + }, + { + "epoch": 0.2652323158023783, + "grad_norm": 1.4122749211608459, + "learning_rate": 1.723561728415151e-05, + "loss": 0.732, + "step": 8654 + }, + { + "epoch": 0.26526296432511953, + "grad_norm": 1.4571204401493851, + "learning_rate": 1.7234932072121275e-05, + "loss": 0.8429, + "step": 8655 + }, + { + "epoch": 0.26529361284786074, + "grad_norm": 1.6050606472752587, + "learning_rate": 1.72342467888034e-05, + "loss": 0.8575, + "step": 8656 + }, + { + "epoch": 0.26532426137060194, + "grad_norm": 1.668710480603356, + "learning_rate": 1.723356143420464e-05, + "loss": 0.8027, + "step": 8657 + }, + { + "epoch": 0.26535490989334315, + "grad_norm": 1.346964132047277, + "learning_rate": 1.723287600833175e-05, + "loss": 0.7096, + "step": 8658 + }, + { + "epoch": 0.26538555841608436, + "grad_norm": 0.7315271843908633, + "learning_rate": 1.7232190511191485e-05, + "loss": 0.6421, + "step": 8659 + }, + { + "epoch": 0.26541620693882556, + "grad_norm": 1.5105016287420368, + "learning_rate": 1.723150494279059e-05, + "loss": 0.8708, + "step": 8660 + }, + { + "epoch": 0.26544685546156677, + "grad_norm": 1.6355986174698423, + "learning_rate": 1.7230819303135832e-05, + "loss": 0.7722, + "step": 8661 + }, + { + "epoch": 0.26547750398430797, + "grad_norm": 1.5806918449052578, + "learning_rate": 1.723013359223396e-05, + "loss": 0.8166, + "step": 8662 + }, + { + "epoch": 0.2655081525070492, + "grad_norm": 1.7938614880202357, + "learning_rate": 1.722944781009173e-05, + "loss": 0.8195, + "step": 8663 + }, + { + "epoch": 0.2655388010297904, + "grad_norm": 1.5434181108414837, + "learning_rate": 1.72287619567159e-05, + "loss": 0.6952, + "step": 8664 + }, + { + "epoch": 0.2655694495525316, + "grad_norm": 1.4201298744347595, + "learning_rate": 1.7228076032113234e-05, + "loss": 0.7495, + "step": 8665 + }, + { + "epoch": 0.2656000980752728, + "grad_norm": 1.5971904533865622, + "learning_rate": 1.7227390036290483e-05, + "loss": 0.8445, + "step": 8666 + }, + { + "epoch": 0.265630746598014, + "grad_norm": 1.5889688418162442, + "learning_rate": 1.7226703969254408e-05, + "loss": 0.8139, + "step": 8667 + }, + { + "epoch": 0.2656613951207552, + "grad_norm": 1.744017260662575, + "learning_rate": 1.722601783101177e-05, + "loss": 0.7676, + "step": 8668 + }, + { + "epoch": 0.26569204364349636, + "grad_norm": 1.82867417272512, + "learning_rate": 1.722533162156933e-05, + "loss": 0.8935, + "step": 8669 + }, + { + "epoch": 0.26572269216623756, + "grad_norm": 1.463420260914325, + "learning_rate": 1.722464534093385e-05, + "loss": 0.7528, + "step": 8670 + }, + { + "epoch": 0.26575334068897877, + "grad_norm": 1.7151014686730854, + "learning_rate": 1.7223958989112087e-05, + "loss": 0.7997, + "step": 8671 + }, + { + "epoch": 0.26578398921172, + "grad_norm": 1.5077062604741276, + "learning_rate": 1.722327256611081e-05, + "loss": 0.8167, + "step": 8672 + }, + { + "epoch": 0.2658146377344612, + "grad_norm": 1.6692573126503119, + "learning_rate": 1.722258607193678e-05, + "loss": 0.7444, + "step": 8673 + }, + { + "epoch": 0.2658452862572024, + "grad_norm": 1.4254881705591829, + "learning_rate": 1.722189950659676e-05, + "loss": 0.7661, + "step": 8674 + }, + { + "epoch": 0.2658759347799436, + "grad_norm": 1.4222004495988465, + "learning_rate": 1.7221212870097522e-05, + "loss": 0.7492, + "step": 8675 + }, + { + "epoch": 0.2659065833026848, + "grad_norm": 1.5265693299334573, + "learning_rate": 1.722052616244582e-05, + "loss": 0.7511, + "step": 8676 + }, + { + "epoch": 0.265937231825426, + "grad_norm": 0.7092594466487816, + "learning_rate": 1.7219839383648426e-05, + "loss": 0.6248, + "step": 8677 + }, + { + "epoch": 0.2659678803481672, + "grad_norm": 1.3892317002469128, + "learning_rate": 1.7219152533712114e-05, + "loss": 0.7303, + "step": 8678 + }, + { + "epoch": 0.2659985288709084, + "grad_norm": 0.7430534561017652, + "learning_rate": 1.721846561264364e-05, + "loss": 0.6614, + "step": 8679 + }, + { + "epoch": 0.2660291773936496, + "grad_norm": 1.4794986221550954, + "learning_rate": 1.7217778620449777e-05, + "loss": 0.7941, + "step": 8680 + }, + { + "epoch": 0.2660598259163908, + "grad_norm": 1.507281229445682, + "learning_rate": 1.7217091557137297e-05, + "loss": 0.763, + "step": 8681 + }, + { + "epoch": 0.26609047443913203, + "grad_norm": 1.4707601767794343, + "learning_rate": 1.7216404422712966e-05, + "loss": 0.783, + "step": 8682 + }, + { + "epoch": 0.26612112296187324, + "grad_norm": 1.6161233560750368, + "learning_rate": 1.7215717217183556e-05, + "loss": 0.7827, + "step": 8683 + }, + { + "epoch": 0.26615177148461444, + "grad_norm": 0.7217013878813019, + "learning_rate": 1.7215029940555833e-05, + "loss": 0.5981, + "step": 8684 + }, + { + "epoch": 0.26618242000735565, + "grad_norm": 1.4443514030245472, + "learning_rate": 1.721434259283658e-05, + "loss": 0.7552, + "step": 8685 + }, + { + "epoch": 0.26621306853009685, + "grad_norm": 1.6564546722398628, + "learning_rate": 1.7213655174032563e-05, + "loss": 0.7701, + "step": 8686 + }, + { + "epoch": 0.26624371705283806, + "grad_norm": 0.664850936352611, + "learning_rate": 1.7212967684150554e-05, + "loss": 0.6167, + "step": 8687 + }, + { + "epoch": 0.26627436557557926, + "grad_norm": 1.3893126799107216, + "learning_rate": 1.721228012319733e-05, + "loss": 0.7621, + "step": 8688 + }, + { + "epoch": 0.26630501409832047, + "grad_norm": 1.5345932308670094, + "learning_rate": 1.7211592491179665e-05, + "loss": 0.9363, + "step": 8689 + }, + { + "epoch": 0.2663356626210617, + "grad_norm": 1.2872443278960495, + "learning_rate": 1.7210904788104336e-05, + "loss": 0.653, + "step": 8690 + }, + { + "epoch": 0.2663663111438029, + "grad_norm": 1.4237017410918293, + "learning_rate": 1.7210217013978114e-05, + "loss": 0.8371, + "step": 8691 + }, + { + "epoch": 0.2663969596665441, + "grad_norm": 1.4110479402956784, + "learning_rate": 1.7209529168807776e-05, + "loss": 0.7896, + "step": 8692 + }, + { + "epoch": 0.2664276081892853, + "grad_norm": 1.4572598590795687, + "learning_rate": 1.7208841252600108e-05, + "loss": 0.7463, + "step": 8693 + }, + { + "epoch": 0.2664582567120265, + "grad_norm": 1.3210698973106711, + "learning_rate": 1.720815326536188e-05, + "loss": 0.7963, + "step": 8694 + }, + { + "epoch": 0.2664889052347677, + "grad_norm": 0.787876831837398, + "learning_rate": 1.7207465207099876e-05, + "loss": 0.6418, + "step": 8695 + }, + { + "epoch": 0.2665195537575089, + "grad_norm": 0.7450226090410951, + "learning_rate": 1.720677707782087e-05, + "loss": 0.6365, + "step": 8696 + }, + { + "epoch": 0.2665502022802501, + "grad_norm": 1.6037387650786463, + "learning_rate": 1.7206088877531648e-05, + "loss": 0.7304, + "step": 8697 + }, + { + "epoch": 0.2665808508029913, + "grad_norm": 1.590167988648825, + "learning_rate": 1.7205400606238986e-05, + "loss": 0.8898, + "step": 8698 + }, + { + "epoch": 0.2666114993257325, + "grad_norm": 1.6104101564792601, + "learning_rate": 1.7204712263949674e-05, + "loss": 0.9151, + "step": 8699 + }, + { + "epoch": 0.2666421478484737, + "grad_norm": 1.3245798683543997, + "learning_rate": 1.7204023850670482e-05, + "loss": 0.6395, + "step": 8700 + }, + { + "epoch": 0.2666727963712149, + "grad_norm": 1.3355532146602571, + "learning_rate": 1.7203335366408202e-05, + "loss": 0.7468, + "step": 8701 + }, + { + "epoch": 0.2667034448939561, + "grad_norm": 1.4397830133752456, + "learning_rate": 1.7202646811169616e-05, + "loss": 0.8388, + "step": 8702 + }, + { + "epoch": 0.2667340934166973, + "grad_norm": 1.6273090279955849, + "learning_rate": 1.720195818496151e-05, + "loss": 0.8365, + "step": 8703 + }, + { + "epoch": 0.2667647419394385, + "grad_norm": 1.5083690166279624, + "learning_rate": 1.7201269487790665e-05, + "loss": 0.7468, + "step": 8704 + }, + { + "epoch": 0.2667953904621797, + "grad_norm": 1.6724727246017788, + "learning_rate": 1.720058071966387e-05, + "loss": 0.8505, + "step": 8705 + }, + { + "epoch": 0.2668260389849209, + "grad_norm": 1.44393384052385, + "learning_rate": 1.719989188058791e-05, + "loss": 0.7438, + "step": 8706 + }, + { + "epoch": 0.2668566875076621, + "grad_norm": 1.4244675129646036, + "learning_rate": 1.7199202970569574e-05, + "loss": 0.7552, + "step": 8707 + }, + { + "epoch": 0.2668873360304033, + "grad_norm": 1.403170263455873, + "learning_rate": 1.7198513989615647e-05, + "loss": 0.7191, + "step": 8708 + }, + { + "epoch": 0.2669179845531445, + "grad_norm": 1.4656228463174334, + "learning_rate": 1.7197824937732922e-05, + "loss": 0.8054, + "step": 8709 + }, + { + "epoch": 0.26694863307588573, + "grad_norm": 1.5661758034895201, + "learning_rate": 1.7197135814928187e-05, + "loss": 0.7654, + "step": 8710 + }, + { + "epoch": 0.26697928159862694, + "grad_norm": 1.4242700744321959, + "learning_rate": 1.719644662120823e-05, + "loss": 0.7595, + "step": 8711 + }, + { + "epoch": 0.26700993012136814, + "grad_norm": 1.2941896798451256, + "learning_rate": 1.7195757356579842e-05, + "loss": 0.8414, + "step": 8712 + }, + { + "epoch": 0.26704057864410935, + "grad_norm": 1.4574477234114669, + "learning_rate": 1.7195068021049816e-05, + "loss": 0.854, + "step": 8713 + }, + { + "epoch": 0.26707122716685056, + "grad_norm": 1.815387030766043, + "learning_rate": 1.7194378614624944e-05, + "loss": 0.8205, + "step": 8714 + }, + { + "epoch": 0.26710187568959176, + "grad_norm": 1.4643180137437781, + "learning_rate": 1.719368913731202e-05, + "loss": 0.7615, + "step": 8715 + }, + { + "epoch": 0.26713252421233297, + "grad_norm": 1.5070965627115687, + "learning_rate": 1.7192999589117835e-05, + "loss": 0.6983, + "step": 8716 + }, + { + "epoch": 0.2671631727350742, + "grad_norm": 1.4755182498747212, + "learning_rate": 1.7192309970049188e-05, + "loss": 0.7584, + "step": 8717 + }, + { + "epoch": 0.2671938212578154, + "grad_norm": 1.534501678760926, + "learning_rate": 1.7191620280112865e-05, + "loss": 0.8707, + "step": 8718 + }, + { + "epoch": 0.2672244697805566, + "grad_norm": 1.5586853768632292, + "learning_rate": 1.7190930519315673e-05, + "loss": 0.7834, + "step": 8719 + }, + { + "epoch": 0.2672551183032978, + "grad_norm": 1.5757114039162519, + "learning_rate": 1.71902406876644e-05, + "loss": 0.7753, + "step": 8720 + }, + { + "epoch": 0.267285766826039, + "grad_norm": 1.5613367892962766, + "learning_rate": 1.7189550785165846e-05, + "loss": 0.7929, + "step": 8721 + }, + { + "epoch": 0.2673164153487802, + "grad_norm": 1.430603402027936, + "learning_rate": 1.7188860811826807e-05, + "loss": 0.8277, + "step": 8722 + }, + { + "epoch": 0.2673470638715214, + "grad_norm": 1.461725603432015, + "learning_rate": 1.7188170767654085e-05, + "loss": 0.7111, + "step": 8723 + }, + { + "epoch": 0.2673777123942626, + "grad_norm": 1.471844801563746, + "learning_rate": 1.7187480652654474e-05, + "loss": 0.7364, + "step": 8724 + }, + { + "epoch": 0.2674083609170038, + "grad_norm": 1.4311620431340508, + "learning_rate": 1.718679046683478e-05, + "loss": 0.79, + "step": 8725 + }, + { + "epoch": 0.267439009439745, + "grad_norm": 1.3324688651016374, + "learning_rate": 1.7186100210201805e-05, + "loss": 0.822, + "step": 8726 + }, + { + "epoch": 0.26746965796248623, + "grad_norm": 1.3906580375737778, + "learning_rate": 1.718540988276234e-05, + "loss": 0.7852, + "step": 8727 + }, + { + "epoch": 0.26750030648522743, + "grad_norm": 1.4859431669804852, + "learning_rate": 1.7184719484523195e-05, + "loss": 0.7754, + "step": 8728 + }, + { + "epoch": 0.26753095500796864, + "grad_norm": 1.4634462778670752, + "learning_rate": 1.718402901549117e-05, + "loss": 0.7192, + "step": 8729 + }, + { + "epoch": 0.26756160353070985, + "grad_norm": 1.2089542868993812, + "learning_rate": 1.718333847567307e-05, + "loss": 0.7461, + "step": 8730 + }, + { + "epoch": 0.267592252053451, + "grad_norm": 1.4551045420695365, + "learning_rate": 1.7182647865075693e-05, + "loss": 0.8081, + "step": 8731 + }, + { + "epoch": 0.2676229005761922, + "grad_norm": 1.6292633232702785, + "learning_rate": 1.7181957183705856e-05, + "loss": 0.8128, + "step": 8732 + }, + { + "epoch": 0.2676535490989334, + "grad_norm": 1.0402634727569844, + "learning_rate": 1.7181266431570356e-05, + "loss": 0.6471, + "step": 8733 + }, + { + "epoch": 0.2676841976216746, + "grad_norm": 0.8731096833646776, + "learning_rate": 1.7180575608675997e-05, + "loss": 0.6438, + "step": 8734 + }, + { + "epoch": 0.2677148461444158, + "grad_norm": 1.5311158127915796, + "learning_rate": 1.7179884715029592e-05, + "loss": 0.6677, + "step": 8735 + }, + { + "epoch": 0.267745494667157, + "grad_norm": 1.2939583458260557, + "learning_rate": 1.7179193750637946e-05, + "loss": 0.725, + "step": 8736 + }, + { + "epoch": 0.26777614318989823, + "grad_norm": 1.5341044157322778, + "learning_rate": 1.7178502715507864e-05, + "loss": 0.7264, + "step": 8737 + }, + { + "epoch": 0.26780679171263944, + "grad_norm": 0.8787059531513146, + "learning_rate": 1.717781160964616e-05, + "loss": 0.6015, + "step": 8738 + }, + { + "epoch": 0.26783744023538064, + "grad_norm": 1.3728613405157262, + "learning_rate": 1.717712043305964e-05, + "loss": 0.8256, + "step": 8739 + }, + { + "epoch": 0.26786808875812185, + "grad_norm": 1.55504897723237, + "learning_rate": 1.7176429185755118e-05, + "loss": 0.7289, + "step": 8740 + }, + { + "epoch": 0.26789873728086305, + "grad_norm": 1.3673150782617787, + "learning_rate": 1.7175737867739406e-05, + "loss": 0.8007, + "step": 8741 + }, + { + "epoch": 0.26792938580360426, + "grad_norm": 1.6071525255140806, + "learning_rate": 1.7175046479019307e-05, + "loss": 0.7752, + "step": 8742 + }, + { + "epoch": 0.26796003432634546, + "grad_norm": 1.5203262273122766, + "learning_rate": 1.7174355019601646e-05, + "loss": 0.7539, + "step": 8743 + }, + { + "epoch": 0.26799068284908667, + "grad_norm": 1.6575486387832485, + "learning_rate": 1.7173663489493222e-05, + "loss": 0.808, + "step": 8744 + }, + { + "epoch": 0.2680213313718279, + "grad_norm": 1.450870357103102, + "learning_rate": 1.7172971888700863e-05, + "loss": 0.722, + "step": 8745 + }, + { + "epoch": 0.2680519798945691, + "grad_norm": 1.487173477981501, + "learning_rate": 1.717228021723137e-05, + "loss": 0.8054, + "step": 8746 + }, + { + "epoch": 0.2680826284173103, + "grad_norm": 1.3940671983447839, + "learning_rate": 1.717158847509157e-05, + "loss": 0.745, + "step": 8747 + }, + { + "epoch": 0.2681132769400515, + "grad_norm": 1.6023978434076027, + "learning_rate": 1.717089666228827e-05, + "loss": 0.7532, + "step": 8748 + }, + { + "epoch": 0.2681439254627927, + "grad_norm": 1.6040218427425381, + "learning_rate": 1.7170204778828294e-05, + "loss": 0.8821, + "step": 8749 + }, + { + "epoch": 0.2681745739855339, + "grad_norm": 1.2814226625038247, + "learning_rate": 1.7169512824718456e-05, + "loss": 0.6022, + "step": 8750 + }, + { + "epoch": 0.2682052225082751, + "grad_norm": 1.4217254353005848, + "learning_rate": 1.716882079996557e-05, + "loss": 0.8484, + "step": 8751 + }, + { + "epoch": 0.2682358710310163, + "grad_norm": 1.6024100770366243, + "learning_rate": 1.716812870457646e-05, + "loss": 0.8614, + "step": 8752 + }, + { + "epoch": 0.2682665195537575, + "grad_norm": 1.7505240519992424, + "learning_rate": 1.7167436538557943e-05, + "loss": 0.8056, + "step": 8753 + }, + { + "epoch": 0.2682971680764987, + "grad_norm": 1.4337075061673425, + "learning_rate": 1.716674430191684e-05, + "loss": 0.7069, + "step": 8754 + }, + { + "epoch": 0.26832781659923993, + "grad_norm": 0.846118836573555, + "learning_rate": 1.7166051994659976e-05, + "loss": 0.6272, + "step": 8755 + }, + { + "epoch": 0.26835846512198114, + "grad_norm": 1.6252554843213027, + "learning_rate": 1.716535961679416e-05, + "loss": 0.7069, + "step": 8756 + }, + { + "epoch": 0.26838911364472234, + "grad_norm": 0.8104923286812724, + "learning_rate": 1.716466716832623e-05, + "loss": 0.6315, + "step": 8757 + }, + { + "epoch": 0.26841976216746355, + "grad_norm": 1.3634318720308984, + "learning_rate": 1.7163974649263e-05, + "loss": 0.7383, + "step": 8758 + }, + { + "epoch": 0.26845041069020475, + "grad_norm": 1.9454243047557114, + "learning_rate": 1.7163282059611292e-05, + "loss": 0.8065, + "step": 8759 + }, + { + "epoch": 0.26848105921294596, + "grad_norm": 1.5724510430225898, + "learning_rate": 1.7162589399377933e-05, + "loss": 0.7923, + "step": 8760 + }, + { + "epoch": 0.26851170773568717, + "grad_norm": 1.495776912219226, + "learning_rate": 1.716189666856975e-05, + "loss": 0.8663, + "step": 8761 + }, + { + "epoch": 0.2685423562584283, + "grad_norm": 1.5940679402909046, + "learning_rate": 1.7161203867193567e-05, + "loss": 0.9077, + "step": 8762 + }, + { + "epoch": 0.2685730047811695, + "grad_norm": 0.7696109252807787, + "learning_rate": 1.716051099525621e-05, + "loss": 0.6498, + "step": 8763 + }, + { + "epoch": 0.2686036533039107, + "grad_norm": 1.5042460797124353, + "learning_rate": 1.7159818052764502e-05, + "loss": 0.8207, + "step": 8764 + }, + { + "epoch": 0.26863430182665193, + "grad_norm": 1.822281571577932, + "learning_rate": 1.715912503972528e-05, + "loss": 0.8216, + "step": 8765 + }, + { + "epoch": 0.26866495034939314, + "grad_norm": 1.7897523908759492, + "learning_rate": 1.7158431956145366e-05, + "loss": 0.7729, + "step": 8766 + }, + { + "epoch": 0.26869559887213434, + "grad_norm": 1.4720799309812966, + "learning_rate": 1.715773880203159e-05, + "loss": 0.7788, + "step": 8767 + }, + { + "epoch": 0.26872624739487555, + "grad_norm": 1.382280229100789, + "learning_rate": 1.715704557739078e-05, + "loss": 0.7195, + "step": 8768 + }, + { + "epoch": 0.26875689591761676, + "grad_norm": 1.5987759990287527, + "learning_rate": 1.715635228222977e-05, + "loss": 0.812, + "step": 8769 + }, + { + "epoch": 0.26878754444035796, + "grad_norm": 1.4038286392450992, + "learning_rate": 1.715565891655539e-05, + "loss": 0.7816, + "step": 8770 + }, + { + "epoch": 0.26881819296309917, + "grad_norm": 1.4861709353421655, + "learning_rate": 1.7154965480374473e-05, + "loss": 0.7185, + "step": 8771 + }, + { + "epoch": 0.2688488414858404, + "grad_norm": 1.6745255006442497, + "learning_rate": 1.715427197369385e-05, + "loss": 0.9157, + "step": 8772 + }, + { + "epoch": 0.2688794900085816, + "grad_norm": 1.5256216804376919, + "learning_rate": 1.7153578396520356e-05, + "loss": 0.7678, + "step": 8773 + }, + { + "epoch": 0.2689101385313228, + "grad_norm": 1.5661279649855495, + "learning_rate": 1.715288474886082e-05, + "loss": 0.8588, + "step": 8774 + }, + { + "epoch": 0.268940787054064, + "grad_norm": 1.5493134504294392, + "learning_rate": 1.7152191030722085e-05, + "loss": 0.7678, + "step": 8775 + }, + { + "epoch": 0.2689714355768052, + "grad_norm": 0.7473812738182791, + "learning_rate": 1.7151497242110977e-05, + "loss": 0.5991, + "step": 8776 + }, + { + "epoch": 0.2690020840995464, + "grad_norm": 1.4469676775146947, + "learning_rate": 1.715080338303434e-05, + "loss": 0.81, + "step": 8777 + }, + { + "epoch": 0.2690327326222876, + "grad_norm": 1.4734029847575718, + "learning_rate": 1.7150109453499006e-05, + "loss": 0.8221, + "step": 8778 + }, + { + "epoch": 0.2690633811450288, + "grad_norm": 1.6056536076729664, + "learning_rate": 1.7149415453511818e-05, + "loss": 0.7409, + "step": 8779 + }, + { + "epoch": 0.26909402966777, + "grad_norm": 0.6759824344547813, + "learning_rate": 1.7148721383079607e-05, + "loss": 0.6292, + "step": 8780 + }, + { + "epoch": 0.2691246781905112, + "grad_norm": 1.7022016035718988, + "learning_rate": 1.7148027242209213e-05, + "loss": 0.8907, + "step": 8781 + }, + { + "epoch": 0.26915532671325243, + "grad_norm": 1.3506050659890956, + "learning_rate": 1.714733303090748e-05, + "loss": 0.703, + "step": 8782 + }, + { + "epoch": 0.26918597523599364, + "grad_norm": 1.3448251436770935, + "learning_rate": 1.7146638749181245e-05, + "loss": 0.7209, + "step": 8783 + }, + { + "epoch": 0.26921662375873484, + "grad_norm": 1.6124108574562805, + "learning_rate": 1.714594439703735e-05, + "loss": 0.8015, + "step": 8784 + }, + { + "epoch": 0.26924727228147605, + "grad_norm": 1.510671300706881, + "learning_rate": 1.714524997448264e-05, + "loss": 0.7057, + "step": 8785 + }, + { + "epoch": 0.26927792080421725, + "grad_norm": 1.3282694559191799, + "learning_rate": 1.714455548152395e-05, + "loss": 0.8458, + "step": 8786 + }, + { + "epoch": 0.26930856932695846, + "grad_norm": 0.7062193058961498, + "learning_rate": 1.714386091816813e-05, + "loss": 0.605, + "step": 8787 + }, + { + "epoch": 0.26933921784969966, + "grad_norm": 0.7074328319951677, + "learning_rate": 1.7143166284422018e-05, + "loss": 0.6139, + "step": 8788 + }, + { + "epoch": 0.26936986637244087, + "grad_norm": 0.6801214780835745, + "learning_rate": 1.714247158029246e-05, + "loss": 0.6481, + "step": 8789 + }, + { + "epoch": 0.2694005148951821, + "grad_norm": 1.319199921881965, + "learning_rate": 1.714177680578631e-05, + "loss": 0.7773, + "step": 8790 + }, + { + "epoch": 0.2694311634179233, + "grad_norm": 1.444704437472173, + "learning_rate": 1.7141081960910393e-05, + "loss": 0.8779, + "step": 8791 + }, + { + "epoch": 0.2694618119406645, + "grad_norm": 1.403391618690986, + "learning_rate": 1.7140387045671577e-05, + "loss": 0.8599, + "step": 8792 + }, + { + "epoch": 0.26949246046340564, + "grad_norm": 1.6090966052239362, + "learning_rate": 1.71396920600767e-05, + "loss": 0.8179, + "step": 8793 + }, + { + "epoch": 0.26952310898614684, + "grad_norm": 1.3391450837992902, + "learning_rate": 1.7138997004132604e-05, + "loss": 0.7061, + "step": 8794 + }, + { + "epoch": 0.26955375750888805, + "grad_norm": 1.5930202944186749, + "learning_rate": 1.7138301877846154e-05, + "loss": 0.8104, + "step": 8795 + }, + { + "epoch": 0.26958440603162925, + "grad_norm": 0.8426457186657001, + "learning_rate": 1.713760668122418e-05, + "loss": 0.6313, + "step": 8796 + }, + { + "epoch": 0.26961505455437046, + "grad_norm": 1.6197388088970754, + "learning_rate": 1.7136911414273547e-05, + "loss": 0.8084, + "step": 8797 + }, + { + "epoch": 0.26964570307711166, + "grad_norm": 1.3340066746703727, + "learning_rate": 1.7136216077001096e-05, + "loss": 0.6036, + "step": 8798 + }, + { + "epoch": 0.26967635159985287, + "grad_norm": 0.7209819843471255, + "learning_rate": 1.7135520669413686e-05, + "loss": 0.6075, + "step": 8799 + }, + { + "epoch": 0.2697070001225941, + "grad_norm": 1.4907619088991235, + "learning_rate": 1.713482519151816e-05, + "loss": 0.7082, + "step": 8800 + }, + { + "epoch": 0.2697376486453353, + "grad_norm": 1.46862837245139, + "learning_rate": 1.713412964332138e-05, + "loss": 0.8182, + "step": 8801 + }, + { + "epoch": 0.2697682971680765, + "grad_norm": 1.798873207412451, + "learning_rate": 1.7133434024830192e-05, + "loss": 0.7848, + "step": 8802 + }, + { + "epoch": 0.2697989456908177, + "grad_norm": 1.5150967029474265, + "learning_rate": 1.713273833605146e-05, + "loss": 0.753, + "step": 8803 + }, + { + "epoch": 0.2698295942135589, + "grad_norm": 1.639797689362444, + "learning_rate": 1.7132042576992026e-05, + "loss": 0.8238, + "step": 8804 + }, + { + "epoch": 0.2698602427363001, + "grad_norm": 1.2990795482980562, + "learning_rate": 1.713134674765875e-05, + "loss": 0.6937, + "step": 8805 + }, + { + "epoch": 0.2698908912590413, + "grad_norm": 1.4452511420210363, + "learning_rate": 1.7130650848058496e-05, + "loss": 0.8461, + "step": 8806 + }, + { + "epoch": 0.2699215397817825, + "grad_norm": 1.6944702875564166, + "learning_rate": 1.7129954878198113e-05, + "loss": 0.7511, + "step": 8807 + }, + { + "epoch": 0.2699521883045237, + "grad_norm": 1.6009857514730435, + "learning_rate": 1.7129258838084455e-05, + "loss": 0.7308, + "step": 8808 + }, + { + "epoch": 0.2699828368272649, + "grad_norm": 1.5252395885210042, + "learning_rate": 1.7128562727724393e-05, + "loss": 0.7339, + "step": 8809 + }, + { + "epoch": 0.27001348535000613, + "grad_norm": 1.614318331677546, + "learning_rate": 1.7127866547124774e-05, + "loss": 0.8178, + "step": 8810 + }, + { + "epoch": 0.27004413387274734, + "grad_norm": 1.4035579283870434, + "learning_rate": 1.7127170296292463e-05, + "loss": 0.7579, + "step": 8811 + }, + { + "epoch": 0.27007478239548854, + "grad_norm": 1.210532131346939, + "learning_rate": 1.712647397523432e-05, + "loss": 0.7118, + "step": 8812 + }, + { + "epoch": 0.27010543091822975, + "grad_norm": 1.5550740884700465, + "learning_rate": 1.7125777583957207e-05, + "loss": 0.7478, + "step": 8813 + }, + { + "epoch": 0.27013607944097096, + "grad_norm": 1.67694543301392, + "learning_rate": 1.7125081122467982e-05, + "loss": 0.6643, + "step": 8814 + }, + { + "epoch": 0.27016672796371216, + "grad_norm": 1.4759185836223099, + "learning_rate": 1.712438459077351e-05, + "loss": 0.7711, + "step": 8815 + }, + { + "epoch": 0.27019737648645337, + "grad_norm": 0.8172925610933998, + "learning_rate": 1.7123687988880653e-05, + "loss": 0.6615, + "step": 8816 + }, + { + "epoch": 0.2702280250091946, + "grad_norm": 1.372720057150666, + "learning_rate": 1.712299131679628e-05, + "loss": 0.773, + "step": 8817 + }, + { + "epoch": 0.2702586735319358, + "grad_norm": 1.504730547219705, + "learning_rate": 1.7122294574527246e-05, + "loss": 0.8006, + "step": 8818 + }, + { + "epoch": 0.270289322054677, + "grad_norm": 1.4983092062712182, + "learning_rate": 1.7121597762080422e-05, + "loss": 0.8214, + "step": 8819 + }, + { + "epoch": 0.2703199705774182, + "grad_norm": 0.7140268994994935, + "learning_rate": 1.7120900879462675e-05, + "loss": 0.6439, + "step": 8820 + }, + { + "epoch": 0.2703506191001594, + "grad_norm": 1.4215312259055277, + "learning_rate": 1.712020392668087e-05, + "loss": 0.6876, + "step": 8821 + }, + { + "epoch": 0.2703812676229006, + "grad_norm": 0.6820952048678843, + "learning_rate": 1.711950690374187e-05, + "loss": 0.6283, + "step": 8822 + }, + { + "epoch": 0.2704119161456418, + "grad_norm": 1.4262846546597523, + "learning_rate": 1.711880981065255e-05, + "loss": 0.8995, + "step": 8823 + }, + { + "epoch": 0.27044256466838296, + "grad_norm": 1.3787601678843897, + "learning_rate": 1.7118112647419778e-05, + "loss": 0.7273, + "step": 8824 + }, + { + "epoch": 0.27047321319112416, + "grad_norm": 1.4960799821739634, + "learning_rate": 1.7117415414050417e-05, + "loss": 0.6971, + "step": 8825 + }, + { + "epoch": 0.27050386171386537, + "grad_norm": 1.3296588246469168, + "learning_rate": 1.7116718110551343e-05, + "loss": 0.6669, + "step": 8826 + }, + { + "epoch": 0.2705345102366066, + "grad_norm": 1.4758850892412296, + "learning_rate": 1.7116020736929423e-05, + "loss": 0.8007, + "step": 8827 + }, + { + "epoch": 0.2705651587593478, + "grad_norm": 1.4210327073649993, + "learning_rate": 1.7115323293191532e-05, + "loss": 0.6871, + "step": 8828 + }, + { + "epoch": 0.270595807282089, + "grad_norm": 1.5762957986703976, + "learning_rate": 1.7114625779344534e-05, + "loss": 0.8594, + "step": 8829 + }, + { + "epoch": 0.2706264558048302, + "grad_norm": 1.467607658380748, + "learning_rate": 1.7113928195395314e-05, + "loss": 0.8961, + "step": 8830 + }, + { + "epoch": 0.2706571043275714, + "grad_norm": 1.359158128027806, + "learning_rate": 1.7113230541350736e-05, + "loss": 0.8233, + "step": 8831 + }, + { + "epoch": 0.2706877528503126, + "grad_norm": 0.7404495366115281, + "learning_rate": 1.711253281721768e-05, + "loss": 0.6009, + "step": 8832 + }, + { + "epoch": 0.2707184013730538, + "grad_norm": 0.7134217337730744, + "learning_rate": 1.7111835023003016e-05, + "loss": 0.5867, + "step": 8833 + }, + { + "epoch": 0.270749049895795, + "grad_norm": 1.4608788283081706, + "learning_rate": 1.7111137158713626e-05, + "loss": 0.7981, + "step": 8834 + }, + { + "epoch": 0.2707796984185362, + "grad_norm": 1.493292092355571, + "learning_rate": 1.711043922435638e-05, + "loss": 0.7954, + "step": 8835 + }, + { + "epoch": 0.2708103469412774, + "grad_norm": 1.7142902838555512, + "learning_rate": 1.7109741219938155e-05, + "loss": 0.9193, + "step": 8836 + }, + { + "epoch": 0.27084099546401863, + "grad_norm": 0.7647632175917494, + "learning_rate": 1.7109043145465833e-05, + "loss": 0.6308, + "step": 8837 + }, + { + "epoch": 0.27087164398675984, + "grad_norm": 1.3679135003572427, + "learning_rate": 1.7108345000946288e-05, + "loss": 0.8595, + "step": 8838 + }, + { + "epoch": 0.27090229250950104, + "grad_norm": 1.3382338699813228, + "learning_rate": 1.7107646786386402e-05, + "loss": 0.8352, + "step": 8839 + }, + { + "epoch": 0.27093294103224225, + "grad_norm": 1.4674132304536296, + "learning_rate": 1.7106948501793053e-05, + "loss": 0.9208, + "step": 8840 + }, + { + "epoch": 0.27096358955498345, + "grad_norm": 1.3696969988316894, + "learning_rate": 1.7106250147173122e-05, + "loss": 0.7763, + "step": 8841 + }, + { + "epoch": 0.27099423807772466, + "grad_norm": 1.4220578675694007, + "learning_rate": 1.710555172253349e-05, + "loss": 0.6786, + "step": 8842 + }, + { + "epoch": 0.27102488660046586, + "grad_norm": 1.4250139256637655, + "learning_rate": 1.7104853227881042e-05, + "loss": 0.6504, + "step": 8843 + }, + { + "epoch": 0.27105553512320707, + "grad_norm": 1.7541266794760368, + "learning_rate": 1.7104154663222653e-05, + "loss": 0.8675, + "step": 8844 + }, + { + "epoch": 0.2710861836459483, + "grad_norm": 1.3856874811856539, + "learning_rate": 1.7103456028565213e-05, + "loss": 0.7892, + "step": 8845 + }, + { + "epoch": 0.2711168321686895, + "grad_norm": 1.5257800589894823, + "learning_rate": 1.71027573239156e-05, + "loss": 0.6681, + "step": 8846 + }, + { + "epoch": 0.2711474806914307, + "grad_norm": 1.3669629525587, + "learning_rate": 1.7102058549280705e-05, + "loss": 0.7334, + "step": 8847 + }, + { + "epoch": 0.2711781292141719, + "grad_norm": 1.6663366196641385, + "learning_rate": 1.710135970466741e-05, + "loss": 0.8368, + "step": 8848 + }, + { + "epoch": 0.2712087777369131, + "grad_norm": 1.4888450357488467, + "learning_rate": 1.71006607900826e-05, + "loss": 0.761, + "step": 8849 + }, + { + "epoch": 0.2712394262596543, + "grad_norm": 1.3644156471952302, + "learning_rate": 1.7099961805533163e-05, + "loss": 0.7041, + "step": 8850 + }, + { + "epoch": 0.2712700747823955, + "grad_norm": 1.4031605992785474, + "learning_rate": 1.7099262751025988e-05, + "loss": 0.7682, + "step": 8851 + }, + { + "epoch": 0.2713007233051367, + "grad_norm": 1.349735816182479, + "learning_rate": 1.7098563626567955e-05, + "loss": 0.8009, + "step": 8852 + }, + { + "epoch": 0.2713313718278779, + "grad_norm": 1.5259943527166155, + "learning_rate": 1.7097864432165963e-05, + "loss": 0.7257, + "step": 8853 + }, + { + "epoch": 0.2713620203506191, + "grad_norm": 1.4241032345299904, + "learning_rate": 1.7097165167826894e-05, + "loss": 0.7486, + "step": 8854 + }, + { + "epoch": 0.2713926688733603, + "grad_norm": 1.473518612521044, + "learning_rate": 1.709646583355764e-05, + "loss": 0.8517, + "step": 8855 + }, + { + "epoch": 0.2714233173961015, + "grad_norm": 1.5329287601009043, + "learning_rate": 1.7095766429365097e-05, + "loss": 0.8763, + "step": 8856 + }, + { + "epoch": 0.2714539659188427, + "grad_norm": 1.7676836767068014, + "learning_rate": 1.7095066955256147e-05, + "loss": 0.8095, + "step": 8857 + }, + { + "epoch": 0.2714846144415839, + "grad_norm": 1.7863880827096077, + "learning_rate": 1.709436741123769e-05, + "loss": 0.7667, + "step": 8858 + }, + { + "epoch": 0.2715152629643251, + "grad_norm": 0.8196045977253115, + "learning_rate": 1.7093667797316618e-05, + "loss": 0.647, + "step": 8859 + }, + { + "epoch": 0.2715459114870663, + "grad_norm": 1.5664116907560033, + "learning_rate": 1.7092968113499816e-05, + "loss": 0.7411, + "step": 8860 + }, + { + "epoch": 0.2715765600098075, + "grad_norm": 1.6219367833393847, + "learning_rate": 1.709226835979419e-05, + "loss": 0.7782, + "step": 8861 + }, + { + "epoch": 0.2716072085325487, + "grad_norm": 1.6398858488569632, + "learning_rate": 1.7091568536206625e-05, + "loss": 0.8976, + "step": 8862 + }, + { + "epoch": 0.2716378570552899, + "grad_norm": 1.3637863689550596, + "learning_rate": 1.709086864274402e-05, + "loss": 0.7304, + "step": 8863 + }, + { + "epoch": 0.2716685055780311, + "grad_norm": 1.5636540369647662, + "learning_rate": 1.7090168679413276e-05, + "loss": 0.7388, + "step": 8864 + }, + { + "epoch": 0.27169915410077233, + "grad_norm": 1.560480692199553, + "learning_rate": 1.7089468646221282e-05, + "loss": 0.8026, + "step": 8865 + }, + { + "epoch": 0.27172980262351354, + "grad_norm": 1.505541730533984, + "learning_rate": 1.708876854317494e-05, + "loss": 0.7344, + "step": 8866 + }, + { + "epoch": 0.27176045114625474, + "grad_norm": 1.6090537139536858, + "learning_rate": 1.7088068370281153e-05, + "loss": 0.7397, + "step": 8867 + }, + { + "epoch": 0.27179109966899595, + "grad_norm": 1.3204231186284516, + "learning_rate": 1.708736812754681e-05, + "loss": 0.7488, + "step": 8868 + }, + { + "epoch": 0.27182174819173716, + "grad_norm": 1.4661601350080087, + "learning_rate": 1.708666781497882e-05, + "loss": 0.7411, + "step": 8869 + }, + { + "epoch": 0.27185239671447836, + "grad_norm": 1.4985234502571532, + "learning_rate": 1.7085967432584075e-05, + "loss": 0.7128, + "step": 8870 + }, + { + "epoch": 0.27188304523721957, + "grad_norm": 1.410303055084258, + "learning_rate": 1.708526698036948e-05, + "loss": 0.7844, + "step": 8871 + }, + { + "epoch": 0.2719136937599608, + "grad_norm": 1.5020811185746847, + "learning_rate": 1.7084566458341934e-05, + "loss": 0.8642, + "step": 8872 + }, + { + "epoch": 0.271944342282702, + "grad_norm": 1.398413091476863, + "learning_rate": 1.7083865866508347e-05, + "loss": 0.8429, + "step": 8873 + }, + { + "epoch": 0.2719749908054432, + "grad_norm": 1.5511519903030608, + "learning_rate": 1.7083165204875617e-05, + "loss": 0.7253, + "step": 8874 + }, + { + "epoch": 0.2720056393281844, + "grad_norm": 1.4073890328586136, + "learning_rate": 1.708246447345064e-05, + "loss": 0.7383, + "step": 8875 + }, + { + "epoch": 0.2720362878509256, + "grad_norm": 1.358572665856911, + "learning_rate": 1.7081763672240338e-05, + "loss": 0.8116, + "step": 8876 + }, + { + "epoch": 0.2720669363736668, + "grad_norm": 1.429961062265147, + "learning_rate": 1.7081062801251603e-05, + "loss": 0.7841, + "step": 8877 + }, + { + "epoch": 0.272097584896408, + "grad_norm": 1.5955492426482938, + "learning_rate": 1.7080361860491342e-05, + "loss": 0.8533, + "step": 8878 + }, + { + "epoch": 0.2721282334191492, + "grad_norm": 1.6521977423021579, + "learning_rate": 1.7079660849966472e-05, + "loss": 0.8264, + "step": 8879 + }, + { + "epoch": 0.2721588819418904, + "grad_norm": 0.7621309335207508, + "learning_rate": 1.7078959769683882e-05, + "loss": 0.6344, + "step": 8880 + }, + { + "epoch": 0.2721895304646316, + "grad_norm": 1.6212674335745074, + "learning_rate": 1.7078258619650497e-05, + "loss": 0.722, + "step": 8881 + }, + { + "epoch": 0.27222017898737283, + "grad_norm": 1.5435696801649255, + "learning_rate": 1.7077557399873216e-05, + "loss": 0.8489, + "step": 8882 + }, + { + "epoch": 0.27225082751011404, + "grad_norm": 1.5745334930662864, + "learning_rate": 1.7076856110358952e-05, + "loss": 0.7512, + "step": 8883 + }, + { + "epoch": 0.27228147603285524, + "grad_norm": 0.6698261141999188, + "learning_rate": 1.7076154751114616e-05, + "loss": 0.6332, + "step": 8884 + }, + { + "epoch": 0.27231212455559645, + "grad_norm": 1.4776096558222622, + "learning_rate": 1.7075453322147112e-05, + "loss": 0.8101, + "step": 8885 + }, + { + "epoch": 0.2723427730783376, + "grad_norm": 1.40367937720038, + "learning_rate": 1.707475182346336e-05, + "loss": 0.6972, + "step": 8886 + }, + { + "epoch": 0.2723734216010788, + "grad_norm": 1.5212567017689083, + "learning_rate": 1.7074050255070263e-05, + "loss": 0.7907, + "step": 8887 + }, + { + "epoch": 0.27240407012382, + "grad_norm": 1.5040535621198716, + "learning_rate": 1.7073348616974746e-05, + "loss": 0.7497, + "step": 8888 + }, + { + "epoch": 0.2724347186465612, + "grad_norm": 1.4025998469624945, + "learning_rate": 1.707264690918371e-05, + "loss": 0.7759, + "step": 8889 + }, + { + "epoch": 0.2724653671693024, + "grad_norm": 0.764565169732655, + "learning_rate": 1.7071945131704077e-05, + "loss": 0.6223, + "step": 8890 + }, + { + "epoch": 0.2724960156920436, + "grad_norm": 1.4583380529158123, + "learning_rate": 1.707124328454276e-05, + "loss": 0.7069, + "step": 8891 + }, + { + "epoch": 0.27252666421478483, + "grad_norm": 1.7043434194248888, + "learning_rate": 1.7070541367706673e-05, + "loss": 0.9144, + "step": 8892 + }, + { + "epoch": 0.27255731273752604, + "grad_norm": 0.6948095198605788, + "learning_rate": 1.706983938120273e-05, + "loss": 0.6096, + "step": 8893 + }, + { + "epoch": 0.27258796126026724, + "grad_norm": 0.683882811020595, + "learning_rate": 1.7069137325037852e-05, + "loss": 0.6216, + "step": 8894 + }, + { + "epoch": 0.27261860978300845, + "grad_norm": 1.3699944001375846, + "learning_rate": 1.7068435199218957e-05, + "loss": 0.7821, + "step": 8895 + }, + { + "epoch": 0.27264925830574965, + "grad_norm": 1.4396408702899652, + "learning_rate": 1.706773300375296e-05, + "loss": 0.9121, + "step": 8896 + }, + { + "epoch": 0.27267990682849086, + "grad_norm": 1.438641076045947, + "learning_rate": 1.706703073864678e-05, + "loss": 0.8594, + "step": 8897 + }, + { + "epoch": 0.27271055535123206, + "grad_norm": 0.7084170717270901, + "learning_rate": 1.7066328403907345e-05, + "loss": 0.6374, + "step": 8898 + }, + { + "epoch": 0.27274120387397327, + "grad_norm": 1.3580623064080983, + "learning_rate": 1.706562599954156e-05, + "loss": 0.7351, + "step": 8899 + }, + { + "epoch": 0.2727718523967145, + "grad_norm": 1.5124608433360431, + "learning_rate": 1.7064923525556357e-05, + "loss": 0.7405, + "step": 8900 + }, + { + "epoch": 0.2728025009194557, + "grad_norm": 1.5383020738158597, + "learning_rate": 1.7064220981958655e-05, + "loss": 0.757, + "step": 8901 + }, + { + "epoch": 0.2728331494421969, + "grad_norm": 1.4934546471564618, + "learning_rate": 1.7063518368755376e-05, + "loss": 0.8045, + "step": 8902 + }, + { + "epoch": 0.2728637979649381, + "grad_norm": 1.4027574734569996, + "learning_rate": 1.7062815685953444e-05, + "loss": 0.8162, + "step": 8903 + }, + { + "epoch": 0.2728944464876793, + "grad_norm": 1.4442475336825746, + "learning_rate": 1.706211293355978e-05, + "loss": 0.6733, + "step": 8904 + }, + { + "epoch": 0.2729250950104205, + "grad_norm": 1.57977388945798, + "learning_rate": 1.706141011158131e-05, + "loss": 0.7778, + "step": 8905 + }, + { + "epoch": 0.2729557435331617, + "grad_norm": 1.419163845776306, + "learning_rate": 1.7060707220024963e-05, + "loss": 0.6415, + "step": 8906 + }, + { + "epoch": 0.2729863920559029, + "grad_norm": 1.511164254160126, + "learning_rate": 1.7060004258897657e-05, + "loss": 0.8265, + "step": 8907 + }, + { + "epoch": 0.2730170405786441, + "grad_norm": 0.7632949142466192, + "learning_rate": 1.7059301228206326e-05, + "loss": 0.6264, + "step": 8908 + }, + { + "epoch": 0.2730476891013853, + "grad_norm": 1.4488521749781103, + "learning_rate": 1.7058598127957894e-05, + "loss": 0.8146, + "step": 8909 + }, + { + "epoch": 0.27307833762412653, + "grad_norm": 1.4057662599737213, + "learning_rate": 1.7057894958159287e-05, + "loss": 0.825, + "step": 8910 + }, + { + "epoch": 0.27310898614686774, + "grad_norm": 1.5489536921646812, + "learning_rate": 1.7057191718817437e-05, + "loss": 0.8192, + "step": 8911 + }, + { + "epoch": 0.27313963466960894, + "grad_norm": 1.854859961873132, + "learning_rate": 1.7056488409939266e-05, + "loss": 0.7583, + "step": 8912 + }, + { + "epoch": 0.27317028319235015, + "grad_norm": 1.6787446721194588, + "learning_rate": 1.7055785031531715e-05, + "loss": 0.81, + "step": 8913 + }, + { + "epoch": 0.27320093171509136, + "grad_norm": 1.6111246237398043, + "learning_rate": 1.7055081583601706e-05, + "loss": 0.7652, + "step": 8914 + }, + { + "epoch": 0.27323158023783256, + "grad_norm": 0.689043162691557, + "learning_rate": 1.7054378066156174e-05, + "loss": 0.6304, + "step": 8915 + }, + { + "epoch": 0.27326222876057377, + "grad_norm": 1.560068132120279, + "learning_rate": 1.705367447920205e-05, + "loss": 0.9328, + "step": 8916 + }, + { + "epoch": 0.2732928772833149, + "grad_norm": 1.491522331485512, + "learning_rate": 1.7052970822746265e-05, + "loss": 0.7642, + "step": 8917 + }, + { + "epoch": 0.2733235258060561, + "grad_norm": 1.380082463373917, + "learning_rate": 1.705226709679576e-05, + "loss": 0.7269, + "step": 8918 + }, + { + "epoch": 0.27335417432879733, + "grad_norm": 1.6787481268942333, + "learning_rate": 1.7051563301357456e-05, + "loss": 0.8774, + "step": 8919 + }, + { + "epoch": 0.27338482285153853, + "grad_norm": 1.58224957343724, + "learning_rate": 1.7050859436438298e-05, + "loss": 0.7943, + "step": 8920 + }, + { + "epoch": 0.27341547137427974, + "grad_norm": 0.6635055985984668, + "learning_rate": 1.7050155502045215e-05, + "loss": 0.6281, + "step": 8921 + }, + { + "epoch": 0.27344611989702095, + "grad_norm": 1.481335024371534, + "learning_rate": 1.704945149818515e-05, + "loss": 0.8573, + "step": 8922 + }, + { + "epoch": 0.27347676841976215, + "grad_norm": 1.4816891839054236, + "learning_rate": 1.704874742486503e-05, + "loss": 0.706, + "step": 8923 + }, + { + "epoch": 0.27350741694250336, + "grad_norm": 1.270614961212966, + "learning_rate": 1.70480432820918e-05, + "loss": 0.7473, + "step": 8924 + }, + { + "epoch": 0.27353806546524456, + "grad_norm": 1.547414935863288, + "learning_rate": 1.70473390698724e-05, + "loss": 0.8172, + "step": 8925 + }, + { + "epoch": 0.27356871398798577, + "grad_norm": 1.5424576047444962, + "learning_rate": 1.7046634788213767e-05, + "loss": 0.6934, + "step": 8926 + }, + { + "epoch": 0.273599362510727, + "grad_norm": 1.3657173334843222, + "learning_rate": 1.7045930437122832e-05, + "loss": 0.6347, + "step": 8927 + }, + { + "epoch": 0.2736300110334682, + "grad_norm": 0.691901548243232, + "learning_rate": 1.7045226016606544e-05, + "loss": 0.6411, + "step": 8928 + }, + { + "epoch": 0.2736606595562094, + "grad_norm": 1.6022865963562711, + "learning_rate": 1.7044521526671842e-05, + "loss": 0.7609, + "step": 8929 + }, + { + "epoch": 0.2736913080789506, + "grad_norm": 1.3802383795891817, + "learning_rate": 1.7043816967325664e-05, + "loss": 0.8562, + "step": 8930 + }, + { + "epoch": 0.2737219566016918, + "grad_norm": 0.6711361151854842, + "learning_rate": 1.704311233857496e-05, + "loss": 0.6324, + "step": 8931 + }, + { + "epoch": 0.273752605124433, + "grad_norm": 1.5597589055820233, + "learning_rate": 1.704240764042666e-05, + "loss": 0.7244, + "step": 8932 + }, + { + "epoch": 0.2737832536471742, + "grad_norm": 1.5239719549723363, + "learning_rate": 1.7041702872887725e-05, + "loss": 0.804, + "step": 8933 + }, + { + "epoch": 0.2738139021699154, + "grad_norm": 1.5126793244665164, + "learning_rate": 1.7040998035965086e-05, + "loss": 0.7514, + "step": 8934 + }, + { + "epoch": 0.2738445506926566, + "grad_norm": 1.3027849909584124, + "learning_rate": 1.7040293129665692e-05, + "loss": 0.6894, + "step": 8935 + }, + { + "epoch": 0.2738751992153978, + "grad_norm": 1.5758717306735939, + "learning_rate": 1.7039588153996488e-05, + "loss": 0.8902, + "step": 8936 + }, + { + "epoch": 0.27390584773813903, + "grad_norm": 1.4034744635291831, + "learning_rate": 1.7038883108964423e-05, + "loss": 0.7034, + "step": 8937 + }, + { + "epoch": 0.27393649626088024, + "grad_norm": 1.2880732970549373, + "learning_rate": 1.703817799457644e-05, + "loss": 0.7364, + "step": 8938 + }, + { + "epoch": 0.27396714478362144, + "grad_norm": 1.3944191943925783, + "learning_rate": 1.7037472810839484e-05, + "loss": 0.7257, + "step": 8939 + }, + { + "epoch": 0.27399779330636265, + "grad_norm": 1.2952701584987054, + "learning_rate": 1.7036767557760515e-05, + "loss": 0.7114, + "step": 8940 + }, + { + "epoch": 0.27402844182910385, + "grad_norm": 1.5647179390232828, + "learning_rate": 1.7036062235346472e-05, + "loss": 0.7049, + "step": 8941 + }, + { + "epoch": 0.27405909035184506, + "grad_norm": 0.7374729713673805, + "learning_rate": 1.7035356843604306e-05, + "loss": 0.629, + "step": 8942 + }, + { + "epoch": 0.27408973887458626, + "grad_norm": 1.5490706680039497, + "learning_rate": 1.703465138254097e-05, + "loss": 0.7742, + "step": 8943 + }, + { + "epoch": 0.27412038739732747, + "grad_norm": 1.5143069850346906, + "learning_rate": 1.7033945852163415e-05, + "loss": 0.8542, + "step": 8944 + }, + { + "epoch": 0.2741510359200687, + "grad_norm": 1.35518055209175, + "learning_rate": 1.7033240252478595e-05, + "loss": 0.7649, + "step": 8945 + }, + { + "epoch": 0.2741816844428099, + "grad_norm": 1.5592620153687957, + "learning_rate": 1.7032534583493455e-05, + "loss": 0.6891, + "step": 8946 + }, + { + "epoch": 0.2742123329655511, + "grad_norm": 1.402228459951746, + "learning_rate": 1.7031828845214952e-05, + "loss": 0.6818, + "step": 8947 + }, + { + "epoch": 0.27424298148829224, + "grad_norm": 1.374232062901449, + "learning_rate": 1.703112303765004e-05, + "loss": 0.7564, + "step": 8948 + }, + { + "epoch": 0.27427363001103344, + "grad_norm": 0.7020786771842688, + "learning_rate": 1.7030417160805677e-05, + "loss": 0.6177, + "step": 8949 + }, + { + "epoch": 0.27430427853377465, + "grad_norm": 1.4568176244054172, + "learning_rate": 1.7029711214688812e-05, + "loss": 0.7727, + "step": 8950 + }, + { + "epoch": 0.27433492705651585, + "grad_norm": 1.314461829010274, + "learning_rate": 1.7029005199306405e-05, + "loss": 0.7086, + "step": 8951 + }, + { + "epoch": 0.27436557557925706, + "grad_norm": 1.5358149114130206, + "learning_rate": 1.702829911466541e-05, + "loss": 0.6752, + "step": 8952 + }, + { + "epoch": 0.27439622410199827, + "grad_norm": 1.5943892336015033, + "learning_rate": 1.7027592960772786e-05, + "loss": 0.7776, + "step": 8953 + }, + { + "epoch": 0.27442687262473947, + "grad_norm": 1.3409773206323357, + "learning_rate": 1.702688673763549e-05, + "loss": 0.657, + "step": 8954 + }, + { + "epoch": 0.2744575211474807, + "grad_norm": 1.55444229518415, + "learning_rate": 1.7026180445260482e-05, + "loss": 0.7622, + "step": 8955 + }, + { + "epoch": 0.2744881696702219, + "grad_norm": 1.4677212034241833, + "learning_rate": 1.702547408365472e-05, + "loss": 0.7929, + "step": 8956 + }, + { + "epoch": 0.2745188181929631, + "grad_norm": 1.5782611003231288, + "learning_rate": 1.7024767652825165e-05, + "loss": 0.8526, + "step": 8957 + }, + { + "epoch": 0.2745494667157043, + "grad_norm": 1.4594449698750922, + "learning_rate": 1.7024061152778776e-05, + "loss": 0.764, + "step": 8958 + }, + { + "epoch": 0.2745801152384455, + "grad_norm": 1.557296712405165, + "learning_rate": 1.7023354583522516e-05, + "loss": 0.7464, + "step": 8959 + }, + { + "epoch": 0.2746107637611867, + "grad_norm": 1.5060544692100226, + "learning_rate": 1.7022647945063347e-05, + "loss": 0.7204, + "step": 8960 + }, + { + "epoch": 0.2746414122839279, + "grad_norm": 1.5994203596402201, + "learning_rate": 1.702194123740823e-05, + "loss": 0.734, + "step": 8961 + }, + { + "epoch": 0.2746720608066691, + "grad_norm": 1.719087376383349, + "learning_rate": 1.7021234460564128e-05, + "loss": 0.7576, + "step": 8962 + }, + { + "epoch": 0.2747027093294103, + "grad_norm": 1.413655308677272, + "learning_rate": 1.7020527614538007e-05, + "loss": 0.7468, + "step": 8963 + }, + { + "epoch": 0.2747333578521515, + "grad_norm": 1.4279951873678673, + "learning_rate": 1.7019820699336836e-05, + "loss": 0.7662, + "step": 8964 + }, + { + "epoch": 0.27476400637489273, + "grad_norm": 1.5314300923819764, + "learning_rate": 1.701911371496757e-05, + "loss": 0.8089, + "step": 8965 + }, + { + "epoch": 0.27479465489763394, + "grad_norm": 1.3940409259725186, + "learning_rate": 1.7018406661437182e-05, + "loss": 0.7716, + "step": 8966 + }, + { + "epoch": 0.27482530342037514, + "grad_norm": 1.5521289214494014, + "learning_rate": 1.7017699538752638e-05, + "loss": 0.732, + "step": 8967 + }, + { + "epoch": 0.27485595194311635, + "grad_norm": 0.7154568005771353, + "learning_rate": 1.7016992346920905e-05, + "loss": 0.6358, + "step": 8968 + }, + { + "epoch": 0.27488660046585756, + "grad_norm": 1.570149274334056, + "learning_rate": 1.7016285085948952e-05, + "loss": 0.9199, + "step": 8969 + }, + { + "epoch": 0.27491724898859876, + "grad_norm": 1.3907238919575808, + "learning_rate": 1.7015577755843746e-05, + "loss": 0.695, + "step": 8970 + }, + { + "epoch": 0.27494789751133997, + "grad_norm": 1.3553414745487349, + "learning_rate": 1.7014870356612255e-05, + "loss": 0.7821, + "step": 8971 + }, + { + "epoch": 0.2749785460340812, + "grad_norm": 1.579374179829399, + "learning_rate": 1.7014162888261453e-05, + "loss": 0.7378, + "step": 8972 + }, + { + "epoch": 0.2750091945568224, + "grad_norm": 1.5234743456349011, + "learning_rate": 1.7013455350798308e-05, + "loss": 0.8087, + "step": 8973 + }, + { + "epoch": 0.2750398430795636, + "grad_norm": 0.6929686376487382, + "learning_rate": 1.7012747744229794e-05, + "loss": 0.625, + "step": 8974 + }, + { + "epoch": 0.2750704916023048, + "grad_norm": 1.3995825286987442, + "learning_rate": 1.7012040068562884e-05, + "loss": 0.8929, + "step": 8975 + }, + { + "epoch": 0.275101140125046, + "grad_norm": 1.4902547003690587, + "learning_rate": 1.701133232380455e-05, + "loss": 0.7783, + "step": 8976 + }, + { + "epoch": 0.2751317886477872, + "grad_norm": 1.5804217868725243, + "learning_rate": 1.701062450996176e-05, + "loss": 0.9057, + "step": 8977 + }, + { + "epoch": 0.2751624371705284, + "grad_norm": 1.4719831285625256, + "learning_rate": 1.7009916627041498e-05, + "loss": 0.6081, + "step": 8978 + }, + { + "epoch": 0.27519308569326956, + "grad_norm": 1.6092452659363417, + "learning_rate": 1.7009208675050732e-05, + "loss": 0.8677, + "step": 8979 + }, + { + "epoch": 0.27522373421601076, + "grad_norm": 1.3533822013025927, + "learning_rate": 1.7008500653996437e-05, + "loss": 0.7622, + "step": 8980 + }, + { + "epoch": 0.27525438273875197, + "grad_norm": 1.632331447725122, + "learning_rate": 1.7007792563885596e-05, + "loss": 0.7902, + "step": 8981 + }, + { + "epoch": 0.2752850312614932, + "grad_norm": 1.4650697660890104, + "learning_rate": 1.7007084404725178e-05, + "loss": 0.7794, + "step": 8982 + }, + { + "epoch": 0.2753156797842344, + "grad_norm": 0.7619898424136542, + "learning_rate": 1.7006376176522166e-05, + "loss": 0.6157, + "step": 8983 + }, + { + "epoch": 0.2753463283069756, + "grad_norm": 1.331897361787234, + "learning_rate": 1.7005667879283536e-05, + "loss": 0.6728, + "step": 8984 + }, + { + "epoch": 0.2753769768297168, + "grad_norm": 1.501809701689891, + "learning_rate": 1.700495951301627e-05, + "loss": 0.77, + "step": 8985 + }, + { + "epoch": 0.275407625352458, + "grad_norm": 1.6574592107227217, + "learning_rate": 1.7004251077727347e-05, + "loss": 0.8135, + "step": 8986 + }, + { + "epoch": 0.2754382738751992, + "grad_norm": 1.5852431548265038, + "learning_rate": 1.700354257342374e-05, + "loss": 0.9091, + "step": 8987 + }, + { + "epoch": 0.2754689223979404, + "grad_norm": 1.5399402931720416, + "learning_rate": 1.700283400011244e-05, + "loss": 0.7414, + "step": 8988 + }, + { + "epoch": 0.2754995709206816, + "grad_norm": 1.598848566285046, + "learning_rate": 1.7002125357800425e-05, + "loss": 0.8347, + "step": 8989 + }, + { + "epoch": 0.2755302194434228, + "grad_norm": 1.5640267927246023, + "learning_rate": 1.7001416646494678e-05, + "loss": 0.7764, + "step": 8990 + }, + { + "epoch": 0.275560867966164, + "grad_norm": 0.8132621871094108, + "learning_rate": 1.700070786620218e-05, + "loss": 0.6122, + "step": 8991 + }, + { + "epoch": 0.27559151648890523, + "grad_norm": 0.7313128248200912, + "learning_rate": 1.6999999016929916e-05, + "loss": 0.6124, + "step": 8992 + }, + { + "epoch": 0.27562216501164644, + "grad_norm": 1.5055874602133041, + "learning_rate": 1.6999290098684872e-05, + "loss": 0.7795, + "step": 8993 + }, + { + "epoch": 0.27565281353438764, + "grad_norm": 1.47537865418157, + "learning_rate": 1.699858111147403e-05, + "loss": 0.7628, + "step": 8994 + }, + { + "epoch": 0.27568346205712885, + "grad_norm": 1.6180396242301163, + "learning_rate": 1.699787205530438e-05, + "loss": 0.8514, + "step": 8995 + }, + { + "epoch": 0.27571411057987005, + "grad_norm": 1.521525320570333, + "learning_rate": 1.6997162930182905e-05, + "loss": 0.6819, + "step": 8996 + }, + { + "epoch": 0.27574475910261126, + "grad_norm": 1.795392130054359, + "learning_rate": 1.6996453736116592e-05, + "loss": 0.8387, + "step": 8997 + }, + { + "epoch": 0.27577540762535246, + "grad_norm": 1.6101583105894937, + "learning_rate": 1.699574447311243e-05, + "loss": 0.7233, + "step": 8998 + }, + { + "epoch": 0.27580605614809367, + "grad_norm": 1.3509728902545404, + "learning_rate": 1.699503514117741e-05, + "loss": 0.714, + "step": 8999 + }, + { + "epoch": 0.2758367046708349, + "grad_norm": 1.4865890572395213, + "learning_rate": 1.6994325740318518e-05, + "loss": 0.7823, + "step": 9000 + }, + { + "epoch": 0.2758673531935761, + "grad_norm": 1.5844519670632515, + "learning_rate": 1.6993616270542747e-05, + "loss": 0.9037, + "step": 9001 + }, + { + "epoch": 0.2758980017163173, + "grad_norm": 1.5379427987560879, + "learning_rate": 1.699290673185708e-05, + "loss": 0.8494, + "step": 9002 + }, + { + "epoch": 0.2759286502390585, + "grad_norm": 1.4268374113622142, + "learning_rate": 1.699219712426852e-05, + "loss": 0.756, + "step": 9003 + }, + { + "epoch": 0.2759592987617997, + "grad_norm": 1.5438340324618618, + "learning_rate": 1.6991487447784048e-05, + "loss": 0.7785, + "step": 9004 + }, + { + "epoch": 0.2759899472845409, + "grad_norm": 1.472679492745676, + "learning_rate": 1.6990777702410664e-05, + "loss": 0.7905, + "step": 9005 + }, + { + "epoch": 0.2760205958072821, + "grad_norm": 1.5437716938483408, + "learning_rate": 1.6990067888155358e-05, + "loss": 0.7732, + "step": 9006 + }, + { + "epoch": 0.2760512443300233, + "grad_norm": 1.635900334496756, + "learning_rate": 1.6989358005025123e-05, + "loss": 0.6549, + "step": 9007 + }, + { + "epoch": 0.2760818928527645, + "grad_norm": 1.2936052912924694, + "learning_rate": 1.698864805302696e-05, + "loss": 0.7721, + "step": 9008 + }, + { + "epoch": 0.2761125413755057, + "grad_norm": 1.6181007930732199, + "learning_rate": 1.6987938032167856e-05, + "loss": 0.8741, + "step": 9009 + }, + { + "epoch": 0.2761431898982469, + "grad_norm": 1.6221677014769063, + "learning_rate": 1.698722794245481e-05, + "loss": 0.6958, + "step": 9010 + }, + { + "epoch": 0.2761738384209881, + "grad_norm": 1.4737289286060564, + "learning_rate": 1.698651778389482e-05, + "loss": 0.7826, + "step": 9011 + }, + { + "epoch": 0.2762044869437293, + "grad_norm": 1.572623345886125, + "learning_rate": 1.698580755649488e-05, + "loss": 0.8006, + "step": 9012 + }, + { + "epoch": 0.2762351354664705, + "grad_norm": 1.0741417872172583, + "learning_rate": 1.6985097260262e-05, + "loss": 0.6439, + "step": 9013 + }, + { + "epoch": 0.2762657839892117, + "grad_norm": 1.3983561497687023, + "learning_rate": 1.698438689520316e-05, + "loss": 0.8745, + "step": 9014 + }, + { + "epoch": 0.2762964325119529, + "grad_norm": 1.4808155876505102, + "learning_rate": 1.698367646132537e-05, + "loss": 0.7121, + "step": 9015 + }, + { + "epoch": 0.2763270810346941, + "grad_norm": 0.6903584196633269, + "learning_rate": 1.6982965958635634e-05, + "loss": 0.6305, + "step": 9016 + }, + { + "epoch": 0.2763577295574353, + "grad_norm": 1.532154364282589, + "learning_rate": 1.6982255387140944e-05, + "loss": 0.7598, + "step": 9017 + }, + { + "epoch": 0.2763883780801765, + "grad_norm": 1.5802134288526835, + "learning_rate": 1.698154474684831e-05, + "loss": 0.8727, + "step": 9018 + }, + { + "epoch": 0.27641902660291773, + "grad_norm": 1.2925660607312994, + "learning_rate": 1.698083403776472e-05, + "loss": 0.667, + "step": 9019 + }, + { + "epoch": 0.27644967512565893, + "grad_norm": 1.3227844426201547, + "learning_rate": 1.6980123259897193e-05, + "loss": 0.6937, + "step": 9020 + }, + { + "epoch": 0.27648032364840014, + "grad_norm": 0.8869405788134123, + "learning_rate": 1.6979412413252726e-05, + "loss": 0.6528, + "step": 9021 + }, + { + "epoch": 0.27651097217114134, + "grad_norm": 1.7274063631333663, + "learning_rate": 1.6978701497838322e-05, + "loss": 0.8639, + "step": 9022 + }, + { + "epoch": 0.27654162069388255, + "grad_norm": 1.5107835687044076, + "learning_rate": 1.6977990513660984e-05, + "loss": 0.7408, + "step": 9023 + }, + { + "epoch": 0.27657226921662376, + "grad_norm": 1.4748006893993524, + "learning_rate": 1.697727946072772e-05, + "loss": 0.6315, + "step": 9024 + }, + { + "epoch": 0.27660291773936496, + "grad_norm": 1.5032801815443735, + "learning_rate": 1.697656833904554e-05, + "loss": 0.8031, + "step": 9025 + }, + { + "epoch": 0.27663356626210617, + "grad_norm": 0.7031201997506782, + "learning_rate": 1.6975857148621445e-05, + "loss": 0.6597, + "step": 9026 + }, + { + "epoch": 0.2766642147848474, + "grad_norm": 1.3743834697486905, + "learning_rate": 1.6975145889462443e-05, + "loss": 0.7291, + "step": 9027 + }, + { + "epoch": 0.2766948633075886, + "grad_norm": 0.6744726438685147, + "learning_rate": 1.6974434561575544e-05, + "loss": 0.6184, + "step": 9028 + }, + { + "epoch": 0.2767255118303298, + "grad_norm": 0.7300054242126497, + "learning_rate": 1.697372316496776e-05, + "loss": 0.6309, + "step": 9029 + }, + { + "epoch": 0.276756160353071, + "grad_norm": 1.4685311298282564, + "learning_rate": 1.6973011699646096e-05, + "loss": 0.7865, + "step": 9030 + }, + { + "epoch": 0.2767868088758122, + "grad_norm": 1.5274123959935402, + "learning_rate": 1.697230016561756e-05, + "loss": 0.7603, + "step": 9031 + }, + { + "epoch": 0.2768174573985534, + "grad_norm": 1.4933805263421127, + "learning_rate": 1.697158856288917e-05, + "loss": 0.7548, + "step": 9032 + }, + { + "epoch": 0.2768481059212946, + "grad_norm": 1.489418890854094, + "learning_rate": 1.6970876891467935e-05, + "loss": 0.7601, + "step": 9033 + }, + { + "epoch": 0.2768787544440358, + "grad_norm": 1.3331613005242322, + "learning_rate": 1.6970165151360864e-05, + "loss": 0.8272, + "step": 9034 + }, + { + "epoch": 0.276909402966777, + "grad_norm": 1.5246486211333405, + "learning_rate": 1.6969453342574973e-05, + "loss": 0.7901, + "step": 9035 + }, + { + "epoch": 0.2769400514895182, + "grad_norm": 1.4110563048294353, + "learning_rate": 1.6968741465117275e-05, + "loss": 0.7131, + "step": 9036 + }, + { + "epoch": 0.27697070001225943, + "grad_norm": 1.4283669894653106, + "learning_rate": 1.6968029518994787e-05, + "loss": 0.7242, + "step": 9037 + }, + { + "epoch": 0.27700134853500064, + "grad_norm": 0.8212929612160031, + "learning_rate": 1.696731750421452e-05, + "loss": 0.6363, + "step": 9038 + }, + { + "epoch": 0.27703199705774184, + "grad_norm": 1.450362793902889, + "learning_rate": 1.696660542078349e-05, + "loss": 0.7312, + "step": 9039 + }, + { + "epoch": 0.27706264558048305, + "grad_norm": 1.627783011048045, + "learning_rate": 1.6965893268708714e-05, + "loss": 0.7336, + "step": 9040 + }, + { + "epoch": 0.2770932941032242, + "grad_norm": 1.4530533364451166, + "learning_rate": 1.696518104799721e-05, + "loss": 0.889, + "step": 9041 + }, + { + "epoch": 0.2771239426259654, + "grad_norm": 1.5510887068100392, + "learning_rate": 1.6964468758655995e-05, + "loss": 0.8647, + "step": 9042 + }, + { + "epoch": 0.2771545911487066, + "grad_norm": 0.6909695222541077, + "learning_rate": 1.6963756400692085e-05, + "loss": 0.6573, + "step": 9043 + }, + { + "epoch": 0.2771852396714478, + "grad_norm": 1.4634721404331927, + "learning_rate": 1.6963043974112502e-05, + "loss": 0.9006, + "step": 9044 + }, + { + "epoch": 0.277215888194189, + "grad_norm": 1.4128111862822357, + "learning_rate": 1.696233147892427e-05, + "loss": 0.6895, + "step": 9045 + }, + { + "epoch": 0.2772465367169302, + "grad_norm": 1.490071529280027, + "learning_rate": 1.6961618915134403e-05, + "loss": 0.7675, + "step": 9046 + }, + { + "epoch": 0.27727718523967143, + "grad_norm": 1.5567380550385377, + "learning_rate": 1.696090628274992e-05, + "loss": 0.8451, + "step": 9047 + }, + { + "epoch": 0.27730783376241264, + "grad_norm": 1.5406782205055392, + "learning_rate": 1.6960193581777846e-05, + "loss": 0.766, + "step": 9048 + }, + { + "epoch": 0.27733848228515384, + "grad_norm": 1.428065706002695, + "learning_rate": 1.695948081222521e-05, + "loss": 0.8062, + "step": 9049 + }, + { + "epoch": 0.27736913080789505, + "grad_norm": 0.7303661276564389, + "learning_rate": 1.6958767974099023e-05, + "loss": 0.6339, + "step": 9050 + }, + { + "epoch": 0.27739977933063625, + "grad_norm": 1.5302458536260861, + "learning_rate": 1.6958055067406316e-05, + "loss": 0.7416, + "step": 9051 + }, + { + "epoch": 0.27743042785337746, + "grad_norm": 0.6889509116058317, + "learning_rate": 1.695734209215411e-05, + "loss": 0.6268, + "step": 9052 + }, + { + "epoch": 0.27746107637611866, + "grad_norm": 0.6809756078713958, + "learning_rate": 1.695662904834944e-05, + "loss": 0.6268, + "step": 9053 + }, + { + "epoch": 0.27749172489885987, + "grad_norm": 1.6843558923253483, + "learning_rate": 1.6955915935999317e-05, + "loss": 0.8137, + "step": 9054 + }, + { + "epoch": 0.2775223734216011, + "grad_norm": 1.3064527399922552, + "learning_rate": 1.695520275511078e-05, + "loss": 0.5898, + "step": 9055 + }, + { + "epoch": 0.2775530219443423, + "grad_norm": 0.7525074947170578, + "learning_rate": 1.6954489505690845e-05, + "loss": 0.621, + "step": 9056 + }, + { + "epoch": 0.2775836704670835, + "grad_norm": 1.5300118705981047, + "learning_rate": 1.6953776187746548e-05, + "loss": 0.7722, + "step": 9057 + }, + { + "epoch": 0.2776143189898247, + "grad_norm": 1.4708080017405785, + "learning_rate": 1.6953062801284913e-05, + "loss": 0.7462, + "step": 9058 + }, + { + "epoch": 0.2776449675125659, + "grad_norm": 0.7202948897358773, + "learning_rate": 1.6952349346312976e-05, + "loss": 0.624, + "step": 9059 + }, + { + "epoch": 0.2776756160353071, + "grad_norm": 1.3153943801807484, + "learning_rate": 1.6951635822837757e-05, + "loss": 0.7254, + "step": 9060 + }, + { + "epoch": 0.2777062645580483, + "grad_norm": 1.310190701899337, + "learning_rate": 1.6950922230866295e-05, + "loss": 0.7111, + "step": 9061 + }, + { + "epoch": 0.2777369130807895, + "grad_norm": 0.7377020199367718, + "learning_rate": 1.6950208570405615e-05, + "loss": 0.6085, + "step": 9062 + }, + { + "epoch": 0.2777675616035307, + "grad_norm": 1.54741966812524, + "learning_rate": 1.6949494841462755e-05, + "loss": 0.8595, + "step": 9063 + }, + { + "epoch": 0.2777982101262719, + "grad_norm": 1.5920894817547344, + "learning_rate": 1.694878104404474e-05, + "loss": 0.7134, + "step": 9064 + }, + { + "epoch": 0.27782885864901313, + "grad_norm": 1.3887988143656078, + "learning_rate": 1.6948067178158613e-05, + "loss": 0.8429, + "step": 9065 + }, + { + "epoch": 0.27785950717175434, + "grad_norm": 1.3253030099343994, + "learning_rate": 1.6947353243811398e-05, + "loss": 0.7774, + "step": 9066 + }, + { + "epoch": 0.27789015569449554, + "grad_norm": 1.4862171789758254, + "learning_rate": 1.6946639241010135e-05, + "loss": 0.8729, + "step": 9067 + }, + { + "epoch": 0.27792080421723675, + "grad_norm": 1.3202739557438345, + "learning_rate": 1.6945925169761857e-05, + "loss": 0.669, + "step": 9068 + }, + { + "epoch": 0.27795145273997796, + "grad_norm": 1.6339184751362599, + "learning_rate": 1.69452110300736e-05, + "loss": 0.7722, + "step": 9069 + }, + { + "epoch": 0.27798210126271916, + "grad_norm": 1.5531957678585215, + "learning_rate": 1.6944496821952406e-05, + "loss": 0.6485, + "step": 9070 + }, + { + "epoch": 0.27801274978546037, + "grad_norm": 1.4829890827205903, + "learning_rate": 1.6943782545405304e-05, + "loss": 0.8571, + "step": 9071 + }, + { + "epoch": 0.2780433983082015, + "grad_norm": 0.7061910245184417, + "learning_rate": 1.6943068200439342e-05, + "loss": 0.6184, + "step": 9072 + }, + { + "epoch": 0.2780740468309427, + "grad_norm": 1.5156060848446227, + "learning_rate": 1.6942353787061548e-05, + "loss": 0.7204, + "step": 9073 + }, + { + "epoch": 0.27810469535368393, + "grad_norm": 1.618967868035124, + "learning_rate": 1.6941639305278966e-05, + "loss": 0.7451, + "step": 9074 + }, + { + "epoch": 0.27813534387642513, + "grad_norm": 1.602244024635431, + "learning_rate": 1.6940924755098635e-05, + "loss": 0.6659, + "step": 9075 + }, + { + "epoch": 0.27816599239916634, + "grad_norm": 1.5656264884786593, + "learning_rate": 1.69402101365276e-05, + "loss": 0.8058, + "step": 9076 + }, + { + "epoch": 0.27819664092190755, + "grad_norm": 1.484119132228725, + "learning_rate": 1.6939495449572897e-05, + "loss": 0.7129, + "step": 9077 + }, + { + "epoch": 0.27822728944464875, + "grad_norm": 1.4258703998279902, + "learning_rate": 1.6938780694241566e-05, + "loss": 0.7424, + "step": 9078 + }, + { + "epoch": 0.27825793796738996, + "grad_norm": 0.7171276646598749, + "learning_rate": 1.693806587054066e-05, + "loss": 0.6242, + "step": 9079 + }, + { + "epoch": 0.27828858649013116, + "grad_norm": 1.3964541261325583, + "learning_rate": 1.693735097847721e-05, + "loss": 0.6923, + "step": 9080 + }, + { + "epoch": 0.27831923501287237, + "grad_norm": 1.5069968923566865, + "learning_rate": 1.693663601805827e-05, + "loss": 0.7773, + "step": 9081 + }, + { + "epoch": 0.2783498835356136, + "grad_norm": 1.5518525425541345, + "learning_rate": 1.693592098929088e-05, + "loss": 0.8268, + "step": 9082 + }, + { + "epoch": 0.2783805320583548, + "grad_norm": 0.6771456380637434, + "learning_rate": 1.6935205892182084e-05, + "loss": 0.5986, + "step": 9083 + }, + { + "epoch": 0.278411180581096, + "grad_norm": 1.5057660366885768, + "learning_rate": 1.6934490726738932e-05, + "loss": 0.808, + "step": 9084 + }, + { + "epoch": 0.2784418291038372, + "grad_norm": 1.5616959330979097, + "learning_rate": 1.6933775492968464e-05, + "loss": 0.8086, + "step": 9085 + }, + { + "epoch": 0.2784724776265784, + "grad_norm": 1.660076915614004, + "learning_rate": 1.6933060190877736e-05, + "loss": 0.7699, + "step": 9086 + }, + { + "epoch": 0.2785031261493196, + "grad_norm": 0.7143128640712081, + "learning_rate": 1.6932344820473793e-05, + "loss": 0.6319, + "step": 9087 + }, + { + "epoch": 0.2785337746720608, + "grad_norm": 1.4567110224182784, + "learning_rate": 1.693162938176368e-05, + "loss": 0.6689, + "step": 9088 + }, + { + "epoch": 0.278564423194802, + "grad_norm": 1.4400595633641606, + "learning_rate": 1.693091387475445e-05, + "loss": 0.7386, + "step": 9089 + }, + { + "epoch": 0.2785950717175432, + "grad_norm": 1.4498334613729247, + "learning_rate": 1.6930198299453154e-05, + "loss": 0.8526, + "step": 9090 + }, + { + "epoch": 0.2786257202402844, + "grad_norm": 1.4567304064765305, + "learning_rate": 1.692948265586684e-05, + "loss": 0.7444, + "step": 9091 + }, + { + "epoch": 0.27865636876302563, + "grad_norm": 1.376872670741112, + "learning_rate": 1.6928766944002556e-05, + "loss": 0.7153, + "step": 9092 + }, + { + "epoch": 0.27868701728576684, + "grad_norm": 1.514166882298145, + "learning_rate": 1.6928051163867364e-05, + "loss": 0.784, + "step": 9093 + }, + { + "epoch": 0.27871766580850804, + "grad_norm": 1.3822528055119032, + "learning_rate": 1.692733531546831e-05, + "loss": 0.7939, + "step": 9094 + }, + { + "epoch": 0.27874831433124925, + "grad_norm": 1.6110201673566298, + "learning_rate": 1.6926619398812446e-05, + "loss": 0.8178, + "step": 9095 + }, + { + "epoch": 0.27877896285399045, + "grad_norm": 1.3970490743269102, + "learning_rate": 1.6925903413906836e-05, + "loss": 0.8059, + "step": 9096 + }, + { + "epoch": 0.27880961137673166, + "grad_norm": 1.487704830934392, + "learning_rate": 1.6925187360758518e-05, + "loss": 0.7619, + "step": 9097 + }, + { + "epoch": 0.27884025989947286, + "grad_norm": 1.3979502157597177, + "learning_rate": 1.692447123937456e-05, + "loss": 0.7989, + "step": 9098 + }, + { + "epoch": 0.27887090842221407, + "grad_norm": 1.6042353929295496, + "learning_rate": 1.692375504976202e-05, + "loss": 0.7171, + "step": 9099 + }, + { + "epoch": 0.2789015569449553, + "grad_norm": 1.4858861170787812, + "learning_rate": 1.6923038791927946e-05, + "loss": 0.7722, + "step": 9100 + }, + { + "epoch": 0.2789322054676965, + "grad_norm": 1.4905685546036502, + "learning_rate": 1.6922322465879403e-05, + "loss": 0.7642, + "step": 9101 + }, + { + "epoch": 0.2789628539904377, + "grad_norm": 1.3242844126817797, + "learning_rate": 1.692160607162344e-05, + "loss": 0.7888, + "step": 9102 + }, + { + "epoch": 0.27899350251317884, + "grad_norm": 1.4368979473606749, + "learning_rate": 1.6920889609167125e-05, + "loss": 0.769, + "step": 9103 + }, + { + "epoch": 0.27902415103592004, + "grad_norm": 1.4329356394768196, + "learning_rate": 1.6920173078517515e-05, + "loss": 0.7767, + "step": 9104 + }, + { + "epoch": 0.27905479955866125, + "grad_norm": 1.8313020997443517, + "learning_rate": 1.6919456479681666e-05, + "loss": 0.768, + "step": 9105 + }, + { + "epoch": 0.27908544808140245, + "grad_norm": 1.5819948526455183, + "learning_rate": 1.6918739812666643e-05, + "loss": 0.8512, + "step": 9106 + }, + { + "epoch": 0.27911609660414366, + "grad_norm": 1.5338663534668997, + "learning_rate": 1.691802307747951e-05, + "loss": 0.8165, + "step": 9107 + }, + { + "epoch": 0.27914674512688487, + "grad_norm": 1.4004266863082502, + "learning_rate": 1.6917306274127317e-05, + "loss": 0.8045, + "step": 9108 + }, + { + "epoch": 0.27917739364962607, + "grad_norm": 1.4235665681444734, + "learning_rate": 1.691658940261714e-05, + "loss": 0.7957, + "step": 9109 + }, + { + "epoch": 0.2792080421723673, + "grad_norm": 1.3743531964635993, + "learning_rate": 1.6915872462956044e-05, + "loss": 0.8003, + "step": 9110 + }, + { + "epoch": 0.2792386906951085, + "grad_norm": 1.5279775362564607, + "learning_rate": 1.691515545515108e-05, + "loss": 0.7985, + "step": 9111 + }, + { + "epoch": 0.2792693392178497, + "grad_norm": 0.7435813219479388, + "learning_rate": 1.691443837920932e-05, + "loss": 0.6091, + "step": 9112 + }, + { + "epoch": 0.2792999877405909, + "grad_norm": 1.3074995882652625, + "learning_rate": 1.691372123513783e-05, + "loss": 0.661, + "step": 9113 + }, + { + "epoch": 0.2793306362633321, + "grad_norm": 1.5400775818021295, + "learning_rate": 1.691300402294368e-05, + "loss": 0.7268, + "step": 9114 + }, + { + "epoch": 0.2793612847860733, + "grad_norm": 1.231400712639508, + "learning_rate": 1.691228674263393e-05, + "loss": 0.6774, + "step": 9115 + }, + { + "epoch": 0.2793919333088145, + "grad_norm": 1.4545620017998153, + "learning_rate": 1.6911569394215647e-05, + "loss": 0.8595, + "step": 9116 + }, + { + "epoch": 0.2794225818315557, + "grad_norm": 1.48521435784177, + "learning_rate": 1.6910851977695904e-05, + "loss": 0.8228, + "step": 9117 + }, + { + "epoch": 0.2794532303542969, + "grad_norm": 1.4397863112069558, + "learning_rate": 1.6910134493081774e-05, + "loss": 0.7626, + "step": 9118 + }, + { + "epoch": 0.2794838788770381, + "grad_norm": 1.6220488086184088, + "learning_rate": 1.6909416940380313e-05, + "loss": 0.7357, + "step": 9119 + }, + { + "epoch": 0.27951452739977933, + "grad_norm": 1.4740701843961268, + "learning_rate": 1.6908699319598603e-05, + "loss": 0.8737, + "step": 9120 + }, + { + "epoch": 0.27954517592252054, + "grad_norm": 0.7392849360517212, + "learning_rate": 1.690798163074371e-05, + "loss": 0.6465, + "step": 9121 + }, + { + "epoch": 0.27957582444526174, + "grad_norm": 1.5069031358280776, + "learning_rate": 1.6907263873822704e-05, + "loss": 0.7677, + "step": 9122 + }, + { + "epoch": 0.27960647296800295, + "grad_norm": 1.3519782738350405, + "learning_rate": 1.690654604884266e-05, + "loss": 0.7145, + "step": 9123 + }, + { + "epoch": 0.27963712149074416, + "grad_norm": 1.5303494703195586, + "learning_rate": 1.6905828155810657e-05, + "loss": 0.7661, + "step": 9124 + }, + { + "epoch": 0.27966777001348536, + "grad_norm": 0.6909994835111862, + "learning_rate": 1.6905110194733758e-05, + "loss": 0.6257, + "step": 9125 + }, + { + "epoch": 0.27969841853622657, + "grad_norm": 1.2656048284852957, + "learning_rate": 1.6904392165619043e-05, + "loss": 0.6364, + "step": 9126 + }, + { + "epoch": 0.2797290670589678, + "grad_norm": 1.379575934081963, + "learning_rate": 1.6903674068473582e-05, + "loss": 0.6977, + "step": 9127 + }, + { + "epoch": 0.279759715581709, + "grad_norm": 1.6096455167456198, + "learning_rate": 1.6902955903304457e-05, + "loss": 0.7587, + "step": 9128 + }, + { + "epoch": 0.2797903641044502, + "grad_norm": 1.6477559776638953, + "learning_rate": 1.690223767011874e-05, + "loss": 0.8877, + "step": 9129 + }, + { + "epoch": 0.2798210126271914, + "grad_norm": 1.3779737401353545, + "learning_rate": 1.6901519368923512e-05, + "loss": 0.7173, + "step": 9130 + }, + { + "epoch": 0.2798516611499326, + "grad_norm": 1.4353979465423667, + "learning_rate": 1.6900800999725845e-05, + "loss": 0.7739, + "step": 9131 + }, + { + "epoch": 0.2798823096726738, + "grad_norm": 1.6050183348286717, + "learning_rate": 1.690008256253282e-05, + "loss": 0.8729, + "step": 9132 + }, + { + "epoch": 0.279912958195415, + "grad_norm": 0.7364717798006372, + "learning_rate": 1.6899364057351518e-05, + "loss": 0.6312, + "step": 9133 + }, + { + "epoch": 0.27994360671815616, + "grad_norm": 1.6770538784644657, + "learning_rate": 1.689864548418901e-05, + "loss": 0.8392, + "step": 9134 + }, + { + "epoch": 0.27997425524089736, + "grad_norm": 0.6878973303724706, + "learning_rate": 1.689792684305239e-05, + "loss": 0.6144, + "step": 9135 + }, + { + "epoch": 0.28000490376363857, + "grad_norm": 1.507876586324382, + "learning_rate": 1.6897208133948733e-05, + "loss": 0.7409, + "step": 9136 + }, + { + "epoch": 0.2800355522863798, + "grad_norm": 1.4187365260819895, + "learning_rate": 1.6896489356885115e-05, + "loss": 0.7759, + "step": 9137 + }, + { + "epoch": 0.280066200809121, + "grad_norm": 1.4818490103977404, + "learning_rate": 1.6895770511868623e-05, + "loss": 0.7858, + "step": 9138 + }, + { + "epoch": 0.2800968493318622, + "grad_norm": 1.506436922604175, + "learning_rate": 1.689505159890634e-05, + "loss": 0.7714, + "step": 9139 + }, + { + "epoch": 0.2801274978546034, + "grad_norm": 1.4588292151281397, + "learning_rate": 1.6894332618005355e-05, + "loss": 0.7809, + "step": 9140 + }, + { + "epoch": 0.2801581463773446, + "grad_norm": 1.5368938663412868, + "learning_rate": 1.689361356917274e-05, + "loss": 0.7826, + "step": 9141 + }, + { + "epoch": 0.2801887949000858, + "grad_norm": 1.3709366006690744, + "learning_rate": 1.689289445241559e-05, + "loss": 0.706, + "step": 9142 + }, + { + "epoch": 0.280219443422827, + "grad_norm": 1.4405053035378084, + "learning_rate": 1.6892175267740984e-05, + "loss": 0.8257, + "step": 9143 + }, + { + "epoch": 0.2802500919455682, + "grad_norm": 1.6133775414808453, + "learning_rate": 1.6891456015156017e-05, + "loss": 0.7483, + "step": 9144 + }, + { + "epoch": 0.2802807404683094, + "grad_norm": 1.537194957123688, + "learning_rate": 1.689073669466777e-05, + "loss": 0.7345, + "step": 9145 + }, + { + "epoch": 0.2803113889910506, + "grad_norm": 0.7115975350304464, + "learning_rate": 1.6890017306283325e-05, + "loss": 0.6047, + "step": 9146 + }, + { + "epoch": 0.28034203751379183, + "grad_norm": 1.4287925945733075, + "learning_rate": 1.688929785000978e-05, + "loss": 0.7158, + "step": 9147 + }, + { + "epoch": 0.28037268603653304, + "grad_norm": 1.497079386112993, + "learning_rate": 1.688857832585422e-05, + "loss": 0.7153, + "step": 9148 + }, + { + "epoch": 0.28040333455927424, + "grad_norm": 1.5740508864335208, + "learning_rate": 1.6887858733823738e-05, + "loss": 0.8346, + "step": 9149 + }, + { + "epoch": 0.28043398308201545, + "grad_norm": 1.4193017799143293, + "learning_rate": 1.688713907392542e-05, + "loss": 0.7349, + "step": 9150 + }, + { + "epoch": 0.28046463160475665, + "grad_norm": 1.4555028032971493, + "learning_rate": 1.6886419346166357e-05, + "loss": 0.9006, + "step": 9151 + }, + { + "epoch": 0.28049528012749786, + "grad_norm": 1.5592690368279534, + "learning_rate": 1.6885699550553646e-05, + "loss": 0.7798, + "step": 9152 + }, + { + "epoch": 0.28052592865023906, + "grad_norm": 1.534540497431319, + "learning_rate": 1.6884979687094375e-05, + "loss": 0.7248, + "step": 9153 + }, + { + "epoch": 0.28055657717298027, + "grad_norm": 1.479253040862668, + "learning_rate": 1.6884259755795635e-05, + "loss": 0.8811, + "step": 9154 + }, + { + "epoch": 0.2805872256957215, + "grad_norm": 1.4476725831998876, + "learning_rate": 1.6883539756664522e-05, + "loss": 0.752, + "step": 9155 + }, + { + "epoch": 0.2806178742184627, + "grad_norm": 0.7305041015533511, + "learning_rate": 1.6882819689708133e-05, + "loss": 0.6174, + "step": 9156 + }, + { + "epoch": 0.2806485227412039, + "grad_norm": 1.4089773039029976, + "learning_rate": 1.6882099554933557e-05, + "loss": 0.6517, + "step": 9157 + }, + { + "epoch": 0.2806791712639451, + "grad_norm": 1.8444526659199645, + "learning_rate": 1.6881379352347895e-05, + "loss": 0.8798, + "step": 9158 + }, + { + "epoch": 0.2807098197866863, + "grad_norm": 0.7114686453650412, + "learning_rate": 1.6880659081958244e-05, + "loss": 0.6444, + "step": 9159 + }, + { + "epoch": 0.2807404683094275, + "grad_norm": 1.5330874960529566, + "learning_rate": 1.6879938743771694e-05, + "loss": 0.686, + "step": 9160 + }, + { + "epoch": 0.2807711168321687, + "grad_norm": 1.5613469305296845, + "learning_rate": 1.6879218337795352e-05, + "loss": 0.7116, + "step": 9161 + }, + { + "epoch": 0.2808017653549099, + "grad_norm": 1.5682892183742094, + "learning_rate": 1.6878497864036307e-05, + "loss": 0.8723, + "step": 9162 + }, + { + "epoch": 0.2808324138776511, + "grad_norm": 1.360873236182108, + "learning_rate": 1.6877777322501666e-05, + "loss": 0.6929, + "step": 9163 + }, + { + "epoch": 0.2808630624003923, + "grad_norm": 1.5192440036739492, + "learning_rate": 1.6877056713198524e-05, + "loss": 0.7984, + "step": 9164 + }, + { + "epoch": 0.2808937109231335, + "grad_norm": 1.4566796283441246, + "learning_rate": 1.6876336036133983e-05, + "loss": 0.7633, + "step": 9165 + }, + { + "epoch": 0.2809243594458747, + "grad_norm": 1.5161457521733674, + "learning_rate": 1.6875615291315145e-05, + "loss": 0.7884, + "step": 9166 + }, + { + "epoch": 0.2809550079686159, + "grad_norm": 1.6360470785217085, + "learning_rate": 1.6874894478749107e-05, + "loss": 0.7537, + "step": 9167 + }, + { + "epoch": 0.2809856564913571, + "grad_norm": 1.3537895842388517, + "learning_rate": 1.6874173598442978e-05, + "loss": 0.7366, + "step": 9168 + }, + { + "epoch": 0.2810163050140983, + "grad_norm": 1.3605577893215395, + "learning_rate": 1.6873452650403853e-05, + "loss": 0.6727, + "step": 9169 + }, + { + "epoch": 0.2810469535368395, + "grad_norm": 1.3656540186795398, + "learning_rate": 1.6872731634638846e-05, + "loss": 0.695, + "step": 9170 + }, + { + "epoch": 0.2810776020595807, + "grad_norm": 1.4263529551358538, + "learning_rate": 1.6872010551155053e-05, + "loss": 0.7342, + "step": 9171 + }, + { + "epoch": 0.2811082505823219, + "grad_norm": 0.760839782908172, + "learning_rate": 1.6871289399959585e-05, + "loss": 0.632, + "step": 9172 + }, + { + "epoch": 0.2811388991050631, + "grad_norm": 1.5167662321101483, + "learning_rate": 1.687056818105954e-05, + "loss": 0.7385, + "step": 9173 + }, + { + "epoch": 0.28116954762780433, + "grad_norm": 1.4043148101856635, + "learning_rate": 1.686984689446203e-05, + "loss": 0.6689, + "step": 9174 + }, + { + "epoch": 0.28120019615054553, + "grad_norm": 1.5169204784401975, + "learning_rate": 1.686912554017416e-05, + "loss": 0.7678, + "step": 9175 + }, + { + "epoch": 0.28123084467328674, + "grad_norm": 1.5888349786121494, + "learning_rate": 1.686840411820304e-05, + "loss": 0.7467, + "step": 9176 + }, + { + "epoch": 0.28126149319602795, + "grad_norm": 0.6901138890098194, + "learning_rate": 1.6867682628555776e-05, + "loss": 0.6416, + "step": 9177 + }, + { + "epoch": 0.28129214171876915, + "grad_norm": 0.6821707955895108, + "learning_rate": 1.6866961071239482e-05, + "loss": 0.5922, + "step": 9178 + }, + { + "epoch": 0.28132279024151036, + "grad_norm": 1.5374593367437739, + "learning_rate": 1.6866239446261258e-05, + "loss": 0.867, + "step": 9179 + }, + { + "epoch": 0.28135343876425156, + "grad_norm": 1.549385135770689, + "learning_rate": 1.686551775362822e-05, + "loss": 0.825, + "step": 9180 + }, + { + "epoch": 0.28138408728699277, + "grad_norm": 1.4815039830901413, + "learning_rate": 1.6864795993347482e-05, + "loss": 0.7762, + "step": 9181 + }, + { + "epoch": 0.281414735809734, + "grad_norm": 1.552561467635152, + "learning_rate": 1.6864074165426154e-05, + "loss": 0.8198, + "step": 9182 + }, + { + "epoch": 0.2814453843324752, + "grad_norm": 1.447475277047424, + "learning_rate": 1.686335226987134e-05, + "loss": 0.8255, + "step": 9183 + }, + { + "epoch": 0.2814760328552164, + "grad_norm": 1.4862903281303586, + "learning_rate": 1.6862630306690168e-05, + "loss": 0.902, + "step": 9184 + }, + { + "epoch": 0.2815066813779576, + "grad_norm": 1.2852801479172857, + "learning_rate": 1.6861908275889738e-05, + "loss": 0.7703, + "step": 9185 + }, + { + "epoch": 0.2815373299006988, + "grad_norm": 1.566158032689612, + "learning_rate": 1.6861186177477172e-05, + "loss": 0.8217, + "step": 9186 + }, + { + "epoch": 0.28156797842344, + "grad_norm": 0.8091300334353152, + "learning_rate": 1.6860464011459584e-05, + "loss": 0.6625, + "step": 9187 + }, + { + "epoch": 0.2815986269461812, + "grad_norm": 1.392319145099529, + "learning_rate": 1.685974177784409e-05, + "loss": 0.8316, + "step": 9188 + }, + { + "epoch": 0.2816292754689224, + "grad_norm": 1.6240528251217738, + "learning_rate": 1.6859019476637804e-05, + "loss": 0.8488, + "step": 9189 + }, + { + "epoch": 0.2816599239916636, + "grad_norm": 1.4263302146047616, + "learning_rate": 1.685829710784784e-05, + "loss": 0.8475, + "step": 9190 + }, + { + "epoch": 0.2816905725144048, + "grad_norm": 1.377480539343844, + "learning_rate": 1.6857574671481325e-05, + "loss": 0.7692, + "step": 9191 + }, + { + "epoch": 0.28172122103714603, + "grad_norm": 1.533754609734449, + "learning_rate": 1.685685216754537e-05, + "loss": 0.7657, + "step": 9192 + }, + { + "epoch": 0.28175186955988724, + "grad_norm": 1.3184759731188236, + "learning_rate": 1.6856129596047096e-05, + "loss": 0.6296, + "step": 9193 + }, + { + "epoch": 0.28178251808262844, + "grad_norm": 1.411955179107455, + "learning_rate": 1.6855406956993624e-05, + "loss": 0.7092, + "step": 9194 + }, + { + "epoch": 0.28181316660536965, + "grad_norm": 1.5054503999950968, + "learning_rate": 1.6854684250392076e-05, + "loss": 0.7725, + "step": 9195 + }, + { + "epoch": 0.28184381512811085, + "grad_norm": 1.4276128823354122, + "learning_rate": 1.6853961476249565e-05, + "loss": 0.8209, + "step": 9196 + }, + { + "epoch": 0.281874463650852, + "grad_norm": 1.409411837819429, + "learning_rate": 1.685323863457322e-05, + "loss": 0.7817, + "step": 9197 + }, + { + "epoch": 0.2819051121735932, + "grad_norm": 1.5781888570870064, + "learning_rate": 1.685251572537016e-05, + "loss": 0.6882, + "step": 9198 + }, + { + "epoch": 0.2819357606963344, + "grad_norm": 1.3485900949525789, + "learning_rate": 1.6851792748647514e-05, + "loss": 0.6112, + "step": 9199 + }, + { + "epoch": 0.2819664092190756, + "grad_norm": 1.4218183157018516, + "learning_rate": 1.6851069704412396e-05, + "loss": 0.8044, + "step": 9200 + }, + { + "epoch": 0.2819970577418168, + "grad_norm": 1.4446958147529212, + "learning_rate": 1.6850346592671934e-05, + "loss": 0.7991, + "step": 9201 + }, + { + "epoch": 0.28202770626455803, + "grad_norm": 1.378747630068705, + "learning_rate": 1.684962341343326e-05, + "loss": 0.7618, + "step": 9202 + }, + { + "epoch": 0.28205835478729924, + "grad_norm": 1.607937141632176, + "learning_rate": 1.684890016670349e-05, + "loss": 0.7299, + "step": 9203 + }, + { + "epoch": 0.28208900331004044, + "grad_norm": 1.3891332214612144, + "learning_rate": 1.6848176852489754e-05, + "loss": 0.7203, + "step": 9204 + }, + { + "epoch": 0.28211965183278165, + "grad_norm": 0.7721849674397478, + "learning_rate": 1.6847453470799183e-05, + "loss": 0.6334, + "step": 9205 + }, + { + "epoch": 0.28215030035552285, + "grad_norm": 1.566728519548027, + "learning_rate": 1.6846730021638898e-05, + "loss": 0.7559, + "step": 9206 + }, + { + "epoch": 0.28218094887826406, + "grad_norm": 1.4316289852157338, + "learning_rate": 1.684600650501603e-05, + "loss": 0.8518, + "step": 9207 + }, + { + "epoch": 0.28221159740100527, + "grad_norm": 1.5532052556785634, + "learning_rate": 1.684528292093771e-05, + "loss": 0.7767, + "step": 9208 + }, + { + "epoch": 0.28224224592374647, + "grad_norm": 0.6803672654233137, + "learning_rate": 1.6844559269411063e-05, + "loss": 0.644, + "step": 9209 + }, + { + "epoch": 0.2822728944464877, + "grad_norm": 1.4507884565227036, + "learning_rate": 1.6843835550443224e-05, + "loss": 0.7142, + "step": 9210 + }, + { + "epoch": 0.2823035429692289, + "grad_norm": 1.3826095907657536, + "learning_rate": 1.6843111764041325e-05, + "loss": 0.7221, + "step": 9211 + }, + { + "epoch": 0.2823341914919701, + "grad_norm": 1.392446863491483, + "learning_rate": 1.684238791021249e-05, + "loss": 0.7604, + "step": 9212 + }, + { + "epoch": 0.2823648400147113, + "grad_norm": 1.466861110158592, + "learning_rate": 1.6841663988963855e-05, + "loss": 0.7502, + "step": 9213 + }, + { + "epoch": 0.2823954885374525, + "grad_norm": 1.3308755701778094, + "learning_rate": 1.6840940000302563e-05, + "loss": 0.7566, + "step": 9214 + }, + { + "epoch": 0.2824261370601937, + "grad_norm": 1.4174173661416862, + "learning_rate": 1.684021594423573e-05, + "loss": 0.7408, + "step": 9215 + }, + { + "epoch": 0.2824567855829349, + "grad_norm": 1.5823395910048648, + "learning_rate": 1.6839491820770507e-05, + "loss": 0.8075, + "step": 9216 + }, + { + "epoch": 0.2824874341056761, + "grad_norm": 1.8343255987389997, + "learning_rate": 1.6838767629914014e-05, + "loss": 0.8226, + "step": 9217 + }, + { + "epoch": 0.2825180826284173, + "grad_norm": 1.6321401136284448, + "learning_rate": 1.6838043371673397e-05, + "loss": 0.7982, + "step": 9218 + }, + { + "epoch": 0.2825487311511585, + "grad_norm": 1.593512519582135, + "learning_rate": 1.6837319046055788e-05, + "loss": 0.8709, + "step": 9219 + }, + { + "epoch": 0.28257937967389973, + "grad_norm": 1.351195037806087, + "learning_rate": 1.683659465306833e-05, + "loss": 0.73, + "step": 9220 + }, + { + "epoch": 0.28261002819664094, + "grad_norm": 1.3056099639037584, + "learning_rate": 1.6835870192718146e-05, + "loss": 0.7603, + "step": 9221 + }, + { + "epoch": 0.28264067671938214, + "grad_norm": 1.5492697635784844, + "learning_rate": 1.683514566501239e-05, + "loss": 0.7436, + "step": 9222 + }, + { + "epoch": 0.28267132524212335, + "grad_norm": 1.3834011790863472, + "learning_rate": 1.6834421069958196e-05, + "loss": 0.7957, + "step": 9223 + }, + { + "epoch": 0.28270197376486456, + "grad_norm": 1.4657100702192913, + "learning_rate": 1.68336964075627e-05, + "loss": 0.8077, + "step": 9224 + }, + { + "epoch": 0.28273262228760576, + "grad_norm": 1.424884626329834, + "learning_rate": 1.6832971677833044e-05, + "loss": 0.8901, + "step": 9225 + }, + { + "epoch": 0.28276327081034697, + "grad_norm": 1.2733364997695422, + "learning_rate": 1.6832246880776375e-05, + "loss": 0.7909, + "step": 9226 + }, + { + "epoch": 0.2827939193330882, + "grad_norm": 1.404040236353309, + "learning_rate": 1.6831522016399825e-05, + "loss": 0.8679, + "step": 9227 + }, + { + "epoch": 0.2828245678558293, + "grad_norm": 1.517845361097867, + "learning_rate": 1.6830797084710542e-05, + "loss": 0.6915, + "step": 9228 + }, + { + "epoch": 0.28285521637857053, + "grad_norm": 1.4056818230196906, + "learning_rate": 1.683007208571567e-05, + "loss": 0.811, + "step": 9229 + }, + { + "epoch": 0.28288586490131173, + "grad_norm": 0.7634003496447258, + "learning_rate": 1.6829347019422344e-05, + "loss": 0.6281, + "step": 9230 + }, + { + "epoch": 0.28291651342405294, + "grad_norm": 1.4561725776960155, + "learning_rate": 1.682862188583772e-05, + "loss": 0.8372, + "step": 9231 + }, + { + "epoch": 0.28294716194679415, + "grad_norm": 1.367998078006335, + "learning_rate": 1.682789668496893e-05, + "loss": 0.7358, + "step": 9232 + }, + { + "epoch": 0.28297781046953535, + "grad_norm": 1.2971051756243654, + "learning_rate": 1.6827171416823132e-05, + "loss": 0.6531, + "step": 9233 + }, + { + "epoch": 0.28300845899227656, + "grad_norm": 1.5511369765440868, + "learning_rate": 1.682644608140747e-05, + "loss": 0.7742, + "step": 9234 + }, + { + "epoch": 0.28303910751501776, + "grad_norm": 1.364148298321899, + "learning_rate": 1.6825720678729085e-05, + "loss": 0.7285, + "step": 9235 + }, + { + "epoch": 0.28306975603775897, + "grad_norm": 1.4087016860991362, + "learning_rate": 1.6824995208795128e-05, + "loss": 0.8639, + "step": 9236 + }, + { + "epoch": 0.2831004045605002, + "grad_norm": 1.4139737429263712, + "learning_rate": 1.682426967161275e-05, + "loss": 0.7114, + "step": 9237 + }, + { + "epoch": 0.2831310530832414, + "grad_norm": 1.5423128981491725, + "learning_rate": 1.682354406718909e-05, + "loss": 0.7983, + "step": 9238 + }, + { + "epoch": 0.2831617016059826, + "grad_norm": 1.502965497253406, + "learning_rate": 1.6822818395531308e-05, + "loss": 0.8316, + "step": 9239 + }, + { + "epoch": 0.2831923501287238, + "grad_norm": 0.7205084454582437, + "learning_rate": 1.6822092656646552e-05, + "loss": 0.6174, + "step": 9240 + }, + { + "epoch": 0.283222998651465, + "grad_norm": 1.3213725401924863, + "learning_rate": 1.6821366850541973e-05, + "loss": 0.7315, + "step": 9241 + }, + { + "epoch": 0.2832536471742062, + "grad_norm": 1.5766853993177719, + "learning_rate": 1.6820640977224716e-05, + "loss": 0.7798, + "step": 9242 + }, + { + "epoch": 0.2832842956969474, + "grad_norm": 1.5918013137763272, + "learning_rate": 1.681991503670194e-05, + "loss": 0.8081, + "step": 9243 + }, + { + "epoch": 0.2833149442196886, + "grad_norm": 1.5319183477714726, + "learning_rate": 1.6819189028980802e-05, + "loss": 0.7119, + "step": 9244 + }, + { + "epoch": 0.2833455927424298, + "grad_norm": 1.3984409891279972, + "learning_rate": 1.6818462954068443e-05, + "loss": 0.7301, + "step": 9245 + }, + { + "epoch": 0.283376241265171, + "grad_norm": 1.4472319522906876, + "learning_rate": 1.6817736811972026e-05, + "loss": 0.6843, + "step": 9246 + }, + { + "epoch": 0.28340688978791223, + "grad_norm": 0.727634304755486, + "learning_rate": 1.6817010602698703e-05, + "loss": 0.652, + "step": 9247 + }, + { + "epoch": 0.28343753831065344, + "grad_norm": 1.476236376312425, + "learning_rate": 1.681628432625563e-05, + "loss": 0.8803, + "step": 9248 + }, + { + "epoch": 0.28346818683339464, + "grad_norm": 1.5305319826536128, + "learning_rate": 1.6815557982649967e-05, + "loss": 0.8068, + "step": 9249 + }, + { + "epoch": 0.28349883535613585, + "grad_norm": 1.4717733065864176, + "learning_rate": 1.6814831571888862e-05, + "loss": 0.8585, + "step": 9250 + }, + { + "epoch": 0.28352948387887705, + "grad_norm": 1.5269356001048648, + "learning_rate": 1.681410509397948e-05, + "loss": 0.7952, + "step": 9251 + }, + { + "epoch": 0.28356013240161826, + "grad_norm": 1.3972287378315278, + "learning_rate": 1.681337854892898e-05, + "loss": 0.8079, + "step": 9252 + }, + { + "epoch": 0.28359078092435946, + "grad_norm": 1.5482929726104528, + "learning_rate": 1.6812651936744516e-05, + "loss": 0.8343, + "step": 9253 + }, + { + "epoch": 0.28362142944710067, + "grad_norm": 0.718900683987908, + "learning_rate": 1.6811925257433248e-05, + "loss": 0.6221, + "step": 9254 + }, + { + "epoch": 0.2836520779698419, + "grad_norm": 0.7209978851129779, + "learning_rate": 1.6811198511002336e-05, + "loss": 0.6164, + "step": 9255 + }, + { + "epoch": 0.2836827264925831, + "grad_norm": 1.4944828829520185, + "learning_rate": 1.6810471697458948e-05, + "loss": 0.797, + "step": 9256 + }, + { + "epoch": 0.2837133750153243, + "grad_norm": 1.5130277602392237, + "learning_rate": 1.6809744816810235e-05, + "loss": 0.7654, + "step": 9257 + }, + { + "epoch": 0.2837440235380655, + "grad_norm": 1.5352462522766501, + "learning_rate": 1.6809017869063367e-05, + "loss": 0.6774, + "step": 9258 + }, + { + "epoch": 0.28377467206080664, + "grad_norm": 0.7204043145594974, + "learning_rate": 1.6808290854225503e-05, + "loss": 0.6139, + "step": 9259 + }, + { + "epoch": 0.28380532058354785, + "grad_norm": 1.43772718128729, + "learning_rate": 1.6807563772303805e-05, + "loss": 0.7188, + "step": 9260 + }, + { + "epoch": 0.28383596910628905, + "grad_norm": 1.4149462909612769, + "learning_rate": 1.6806836623305442e-05, + "loss": 0.7521, + "step": 9261 + }, + { + "epoch": 0.28386661762903026, + "grad_norm": 1.3945720690676289, + "learning_rate": 1.6806109407237574e-05, + "loss": 0.8213, + "step": 9262 + }, + { + "epoch": 0.28389726615177147, + "grad_norm": 1.6068312931310949, + "learning_rate": 1.680538212410737e-05, + "loss": 0.8082, + "step": 9263 + }, + { + "epoch": 0.28392791467451267, + "grad_norm": 1.5372091793753815, + "learning_rate": 1.6804654773921996e-05, + "loss": 0.8195, + "step": 9264 + }, + { + "epoch": 0.2839585631972539, + "grad_norm": 1.6719442888396197, + "learning_rate": 1.6803927356688617e-05, + "loss": 0.7101, + "step": 9265 + }, + { + "epoch": 0.2839892117199951, + "grad_norm": 1.3672569409954034, + "learning_rate": 1.6803199872414397e-05, + "loss": 0.7468, + "step": 9266 + }, + { + "epoch": 0.2840198602427363, + "grad_norm": 1.5061335593072986, + "learning_rate": 1.6802472321106515e-05, + "loss": 0.8376, + "step": 9267 + }, + { + "epoch": 0.2840505087654775, + "grad_norm": 1.352145313957844, + "learning_rate": 1.6801744702772126e-05, + "loss": 0.7869, + "step": 9268 + }, + { + "epoch": 0.2840811572882187, + "grad_norm": 1.466711222802472, + "learning_rate": 1.680101701741841e-05, + "loss": 0.7634, + "step": 9269 + }, + { + "epoch": 0.2841118058109599, + "grad_norm": 0.7207241626930915, + "learning_rate": 1.6800289265052532e-05, + "loss": 0.6262, + "step": 9270 + }, + { + "epoch": 0.2841424543337011, + "grad_norm": 0.722523001406767, + "learning_rate": 1.6799561445681663e-05, + "loss": 0.6208, + "step": 9271 + }, + { + "epoch": 0.2841731028564423, + "grad_norm": 1.425725190402031, + "learning_rate": 1.6798833559312978e-05, + "loss": 0.7686, + "step": 9272 + }, + { + "epoch": 0.2842037513791835, + "grad_norm": 1.6484747011973613, + "learning_rate": 1.6798105605953643e-05, + "loss": 0.9603, + "step": 9273 + }, + { + "epoch": 0.28423439990192473, + "grad_norm": 1.52859973399622, + "learning_rate": 1.6797377585610834e-05, + "loss": 0.7976, + "step": 9274 + }, + { + "epoch": 0.28426504842466593, + "grad_norm": 1.5145273897556357, + "learning_rate": 1.6796649498291727e-05, + "loss": 0.7204, + "step": 9275 + }, + { + "epoch": 0.28429569694740714, + "grad_norm": 0.6989204386268503, + "learning_rate": 1.6795921344003496e-05, + "loss": 0.6284, + "step": 9276 + }, + { + "epoch": 0.28432634547014835, + "grad_norm": 1.5920549315575274, + "learning_rate": 1.6795193122753304e-05, + "loss": 0.715, + "step": 9277 + }, + { + "epoch": 0.28435699399288955, + "grad_norm": 1.5150306882659728, + "learning_rate": 1.6794464834548344e-05, + "loss": 0.8078, + "step": 9278 + }, + { + "epoch": 0.28438764251563076, + "grad_norm": 1.6260093439103747, + "learning_rate": 1.6793736479395783e-05, + "loss": 0.8749, + "step": 9279 + }, + { + "epoch": 0.28441829103837196, + "grad_norm": 1.6051783497657546, + "learning_rate": 1.6793008057302794e-05, + "loss": 0.894, + "step": 9280 + }, + { + "epoch": 0.28444893956111317, + "grad_norm": 1.6256500636448135, + "learning_rate": 1.679227956827656e-05, + "loss": 0.8191, + "step": 9281 + }, + { + "epoch": 0.2844795880838544, + "grad_norm": 1.4122607830232718, + "learning_rate": 1.6791551012324258e-05, + "loss": 0.8319, + "step": 9282 + }, + { + "epoch": 0.2845102366065956, + "grad_norm": 1.621225440996105, + "learning_rate": 1.6790822389453066e-05, + "loss": 0.8296, + "step": 9283 + }, + { + "epoch": 0.2845408851293368, + "grad_norm": 1.408138841944391, + "learning_rate": 1.6790093699670168e-05, + "loss": 0.7769, + "step": 9284 + }, + { + "epoch": 0.284571533652078, + "grad_norm": 0.7012199524201093, + "learning_rate": 1.6789364942982733e-05, + "loss": 0.6154, + "step": 9285 + }, + { + "epoch": 0.2846021821748192, + "grad_norm": 1.3994993063442869, + "learning_rate": 1.6788636119397953e-05, + "loss": 0.7118, + "step": 9286 + }, + { + "epoch": 0.2846328306975604, + "grad_norm": 1.460421326974131, + "learning_rate": 1.6787907228923002e-05, + "loss": 0.7051, + "step": 9287 + }, + { + "epoch": 0.2846634792203016, + "grad_norm": 1.3644601898505277, + "learning_rate": 1.6787178271565062e-05, + "loss": 0.7519, + "step": 9288 + }, + { + "epoch": 0.2846941277430428, + "grad_norm": 1.2901910637401381, + "learning_rate": 1.6786449247331316e-05, + "loss": 0.6988, + "step": 9289 + }, + { + "epoch": 0.28472477626578396, + "grad_norm": 1.38966666189432, + "learning_rate": 1.6785720156228955e-05, + "loss": 0.7906, + "step": 9290 + }, + { + "epoch": 0.28475542478852517, + "grad_norm": 1.6083385768251668, + "learning_rate": 1.678499099826516e-05, + "loss": 0.692, + "step": 9291 + }, + { + "epoch": 0.2847860733112664, + "grad_norm": 1.515927668254316, + "learning_rate": 1.6784261773447107e-05, + "loss": 0.6854, + "step": 9292 + }, + { + "epoch": 0.2848167218340076, + "grad_norm": 1.443231194092367, + "learning_rate": 1.6783532481781987e-05, + "loss": 0.8411, + "step": 9293 + }, + { + "epoch": 0.2848473703567488, + "grad_norm": 1.6740037925378677, + "learning_rate": 1.6782803123276985e-05, + "loss": 0.6974, + "step": 9294 + }, + { + "epoch": 0.28487801887949, + "grad_norm": 1.6485581028152427, + "learning_rate": 1.678207369793929e-05, + "loss": 0.8179, + "step": 9295 + }, + { + "epoch": 0.2849086674022312, + "grad_norm": 1.4508343806106119, + "learning_rate": 1.678134420577609e-05, + "loss": 0.8317, + "step": 9296 + }, + { + "epoch": 0.2849393159249724, + "grad_norm": 1.4585820064330415, + "learning_rate": 1.678061464679457e-05, + "loss": 0.6963, + "step": 9297 + }, + { + "epoch": 0.2849699644477136, + "grad_norm": 1.5738878494816064, + "learning_rate": 1.6779885021001915e-05, + "loss": 0.8074, + "step": 9298 + }, + { + "epoch": 0.2850006129704548, + "grad_norm": 1.331631553753901, + "learning_rate": 1.677915532840532e-05, + "loss": 0.6734, + "step": 9299 + }, + { + "epoch": 0.285031261493196, + "grad_norm": 1.5827665597589515, + "learning_rate": 1.6778425569011974e-05, + "loss": 0.7828, + "step": 9300 + }, + { + "epoch": 0.2850619100159372, + "grad_norm": 1.7668161414094874, + "learning_rate": 1.6777695742829067e-05, + "loss": 0.8397, + "step": 9301 + }, + { + "epoch": 0.28509255853867843, + "grad_norm": 1.5443231522249172, + "learning_rate": 1.6776965849863785e-05, + "loss": 0.8708, + "step": 9302 + }, + { + "epoch": 0.28512320706141964, + "grad_norm": 1.3978102655983602, + "learning_rate": 1.677623589012333e-05, + "loss": 0.7781, + "step": 9303 + }, + { + "epoch": 0.28515385558416084, + "grad_norm": 1.4577780601345125, + "learning_rate": 1.6775505863614884e-05, + "loss": 0.695, + "step": 9304 + }, + { + "epoch": 0.28518450410690205, + "grad_norm": 1.5037223426804334, + "learning_rate": 1.677477577034565e-05, + "loss": 0.7948, + "step": 9305 + }, + { + "epoch": 0.28521515262964325, + "grad_norm": 0.6917592730126554, + "learning_rate": 1.6774045610322816e-05, + "loss": 0.6162, + "step": 9306 + }, + { + "epoch": 0.28524580115238446, + "grad_norm": 1.4549770314451493, + "learning_rate": 1.6773315383553576e-05, + "loss": 0.8744, + "step": 9307 + }, + { + "epoch": 0.28527644967512567, + "grad_norm": 1.4551803060473998, + "learning_rate": 1.6772585090045127e-05, + "loss": 0.7921, + "step": 9308 + }, + { + "epoch": 0.28530709819786687, + "grad_norm": 1.6714394438838385, + "learning_rate": 1.6771854729804663e-05, + "loss": 0.7944, + "step": 9309 + }, + { + "epoch": 0.2853377467206081, + "grad_norm": 1.474964369278572, + "learning_rate": 1.6771124302839386e-05, + "loss": 0.7604, + "step": 9310 + }, + { + "epoch": 0.2853683952433493, + "grad_norm": 1.4474181941248965, + "learning_rate": 1.6770393809156485e-05, + "loss": 0.8746, + "step": 9311 + }, + { + "epoch": 0.2853990437660905, + "grad_norm": 1.399260487575078, + "learning_rate": 1.6769663248763163e-05, + "loss": 0.7578, + "step": 9312 + }, + { + "epoch": 0.2854296922888317, + "grad_norm": 0.7250687577966146, + "learning_rate": 1.6768932621666617e-05, + "loss": 0.6685, + "step": 9313 + }, + { + "epoch": 0.2854603408115729, + "grad_norm": 1.4642012694452793, + "learning_rate": 1.6768201927874045e-05, + "loss": 0.9083, + "step": 9314 + }, + { + "epoch": 0.2854909893343141, + "grad_norm": 1.6608213291925322, + "learning_rate": 1.6767471167392646e-05, + "loss": 0.796, + "step": 9315 + }, + { + "epoch": 0.2855216378570553, + "grad_norm": 1.402202129696458, + "learning_rate": 1.6766740340229624e-05, + "loss": 0.7395, + "step": 9316 + }, + { + "epoch": 0.2855522863797965, + "grad_norm": 1.3251002920047283, + "learning_rate": 1.6766009446392177e-05, + "loss": 0.7638, + "step": 9317 + }, + { + "epoch": 0.2855829349025377, + "grad_norm": 1.6145855031884822, + "learning_rate": 1.676527848588751e-05, + "loss": 0.7396, + "step": 9318 + }, + { + "epoch": 0.2856135834252789, + "grad_norm": 1.531227455636509, + "learning_rate": 1.6764547458722823e-05, + "loss": 0.7672, + "step": 9319 + }, + { + "epoch": 0.28564423194802013, + "grad_norm": 1.430001491486749, + "learning_rate": 1.6763816364905318e-05, + "loss": 0.7638, + "step": 9320 + }, + { + "epoch": 0.2856748804707613, + "grad_norm": 1.5712032844472819, + "learning_rate": 1.67630852044422e-05, + "loss": 0.7451, + "step": 9321 + }, + { + "epoch": 0.2857055289935025, + "grad_norm": 1.60014125682038, + "learning_rate": 1.6762353977340674e-05, + "loss": 0.7829, + "step": 9322 + }, + { + "epoch": 0.2857361775162437, + "grad_norm": 1.6342363168930942, + "learning_rate": 1.676162268360794e-05, + "loss": 0.7356, + "step": 9323 + }, + { + "epoch": 0.2857668260389849, + "grad_norm": 0.7802813089537751, + "learning_rate": 1.676089132325121e-05, + "loss": 0.6387, + "step": 9324 + }, + { + "epoch": 0.2857974745617261, + "grad_norm": 1.5528349032223585, + "learning_rate": 1.6760159896277688e-05, + "loss": 0.7917, + "step": 9325 + }, + { + "epoch": 0.2858281230844673, + "grad_norm": 1.3098457142991533, + "learning_rate": 1.6759428402694582e-05, + "loss": 0.7735, + "step": 9326 + }, + { + "epoch": 0.2858587716072085, + "grad_norm": 1.4733983708644123, + "learning_rate": 1.67586968425091e-05, + "loss": 0.8226, + "step": 9327 + }, + { + "epoch": 0.2858894201299497, + "grad_norm": 1.5297031405115353, + "learning_rate": 1.675796521572845e-05, + "loss": 0.7589, + "step": 9328 + }, + { + "epoch": 0.28592006865269093, + "grad_norm": 1.4812418703466401, + "learning_rate": 1.675723352235983e-05, + "loss": 0.8573, + "step": 9329 + }, + { + "epoch": 0.28595071717543213, + "grad_norm": 1.4505616341166707, + "learning_rate": 1.675650176241047e-05, + "loss": 0.8131, + "step": 9330 + }, + { + "epoch": 0.28598136569817334, + "grad_norm": 0.7562670783114244, + "learning_rate": 1.6755769935887562e-05, + "loss": 0.6398, + "step": 9331 + }, + { + "epoch": 0.28601201422091455, + "grad_norm": 1.412664967101129, + "learning_rate": 1.6755038042798327e-05, + "loss": 0.7789, + "step": 9332 + }, + { + "epoch": 0.28604266274365575, + "grad_norm": 1.6166026098297341, + "learning_rate": 1.675430608314997e-05, + "loss": 0.7805, + "step": 9333 + }, + { + "epoch": 0.28607331126639696, + "grad_norm": 1.6618440433486907, + "learning_rate": 1.6753574056949715e-05, + "loss": 0.88, + "step": 9334 + }, + { + "epoch": 0.28610395978913816, + "grad_norm": 1.3078177905469535, + "learning_rate": 1.6752841964204762e-05, + "loss": 0.9222, + "step": 9335 + }, + { + "epoch": 0.28613460831187937, + "grad_norm": 1.4975058402162593, + "learning_rate": 1.675210980492233e-05, + "loss": 0.8969, + "step": 9336 + }, + { + "epoch": 0.2861652568346206, + "grad_norm": 1.5334844082300358, + "learning_rate": 1.6751377579109634e-05, + "loss": 0.7651, + "step": 9337 + }, + { + "epoch": 0.2861959053573618, + "grad_norm": 0.7740723444634293, + "learning_rate": 1.6750645286773885e-05, + "loss": 0.6057, + "step": 9338 + }, + { + "epoch": 0.286226553880103, + "grad_norm": 1.4996801577295775, + "learning_rate": 1.67499129279223e-05, + "loss": 0.7317, + "step": 9339 + }, + { + "epoch": 0.2862572024028442, + "grad_norm": 1.5140357385175116, + "learning_rate": 1.67491805025621e-05, + "loss": 0.7437, + "step": 9340 + }, + { + "epoch": 0.2862878509255854, + "grad_norm": 1.4311911889537197, + "learning_rate": 1.6748448010700494e-05, + "loss": 0.8412, + "step": 9341 + }, + { + "epoch": 0.2863184994483266, + "grad_norm": 1.4560981698623154, + "learning_rate": 1.6747715452344705e-05, + "loss": 0.7723, + "step": 9342 + }, + { + "epoch": 0.2863491479710678, + "grad_norm": 1.3395989075883221, + "learning_rate": 1.6746982827501948e-05, + "loss": 0.7355, + "step": 9343 + }, + { + "epoch": 0.286379796493809, + "grad_norm": 1.4086468424975527, + "learning_rate": 1.6746250136179444e-05, + "loss": 0.792, + "step": 9344 + }, + { + "epoch": 0.2864104450165502, + "grad_norm": 0.7351953414112702, + "learning_rate": 1.674551737838441e-05, + "loss": 0.628, + "step": 9345 + }, + { + "epoch": 0.2864410935392914, + "grad_norm": 0.7198472731150624, + "learning_rate": 1.6744784554124067e-05, + "loss": 0.6405, + "step": 9346 + }, + { + "epoch": 0.28647174206203263, + "grad_norm": 1.6194581866436693, + "learning_rate": 1.6744051663405637e-05, + "loss": 0.8677, + "step": 9347 + }, + { + "epoch": 0.28650239058477384, + "grad_norm": 1.3603788855024213, + "learning_rate": 1.674331870623634e-05, + "loss": 0.8454, + "step": 9348 + }, + { + "epoch": 0.28653303910751504, + "grad_norm": 1.3620364939140925, + "learning_rate": 1.6742585682623402e-05, + "loss": 0.8067, + "step": 9349 + }, + { + "epoch": 0.28656368763025625, + "grad_norm": 1.5367880378912913, + "learning_rate": 1.6741852592574036e-05, + "loss": 0.7926, + "step": 9350 + }, + { + "epoch": 0.28659433615299745, + "grad_norm": 1.6688906892849138, + "learning_rate": 1.6741119436095475e-05, + "loss": 0.7609, + "step": 9351 + }, + { + "epoch": 0.2866249846757386, + "grad_norm": 1.4437455116175228, + "learning_rate": 1.674038621319494e-05, + "loss": 0.7763, + "step": 9352 + }, + { + "epoch": 0.2866556331984798, + "grad_norm": 1.3002383760597909, + "learning_rate": 1.6739652923879656e-05, + "loss": 0.7811, + "step": 9353 + }, + { + "epoch": 0.286686281721221, + "grad_norm": 1.5154359507699415, + "learning_rate": 1.6738919568156845e-05, + "loss": 0.7556, + "step": 9354 + }, + { + "epoch": 0.2867169302439622, + "grad_norm": 1.5750799018100383, + "learning_rate": 1.6738186146033735e-05, + "loss": 0.9169, + "step": 9355 + }, + { + "epoch": 0.2867475787667034, + "grad_norm": 1.4779063785994104, + "learning_rate": 1.673745265751755e-05, + "loss": 0.7532, + "step": 9356 + }, + { + "epoch": 0.28677822728944463, + "grad_norm": 1.4884285603203986, + "learning_rate": 1.6736719102615525e-05, + "loss": 0.7425, + "step": 9357 + }, + { + "epoch": 0.28680887581218584, + "grad_norm": 1.42592853283934, + "learning_rate": 1.673598548133488e-05, + "loss": 0.8057, + "step": 9358 + }, + { + "epoch": 0.28683952433492704, + "grad_norm": 1.4650954471675495, + "learning_rate": 1.673525179368285e-05, + "loss": 0.8457, + "step": 9359 + }, + { + "epoch": 0.28687017285766825, + "grad_norm": 1.4492283205683345, + "learning_rate": 1.6734518039666658e-05, + "loss": 0.7805, + "step": 9360 + }, + { + "epoch": 0.28690082138040945, + "grad_norm": 0.7978973935241942, + "learning_rate": 1.673378421929354e-05, + "loss": 0.6263, + "step": 9361 + }, + { + "epoch": 0.28693146990315066, + "grad_norm": 1.7271158060506528, + "learning_rate": 1.673305033257072e-05, + "loss": 0.8135, + "step": 9362 + }, + { + "epoch": 0.28696211842589187, + "grad_norm": 1.3713388742464594, + "learning_rate": 1.673231637950543e-05, + "loss": 0.7155, + "step": 9363 + }, + { + "epoch": 0.28699276694863307, + "grad_norm": 1.467628260228804, + "learning_rate": 1.673158236010491e-05, + "loss": 0.7661, + "step": 9364 + }, + { + "epoch": 0.2870234154713743, + "grad_norm": 1.5183326574187437, + "learning_rate": 1.6730848274376385e-05, + "loss": 0.8106, + "step": 9365 + }, + { + "epoch": 0.2870540639941155, + "grad_norm": 1.3613376487257083, + "learning_rate": 1.6730114122327088e-05, + "loss": 0.8035, + "step": 9366 + }, + { + "epoch": 0.2870847125168567, + "grad_norm": 1.5757145131088108, + "learning_rate": 1.6729379903964253e-05, + "loss": 0.6928, + "step": 9367 + }, + { + "epoch": 0.2871153610395979, + "grad_norm": 1.4072040643819614, + "learning_rate": 1.672864561929512e-05, + "loss": 0.7815, + "step": 9368 + }, + { + "epoch": 0.2871460095623391, + "grad_norm": 1.450302429723573, + "learning_rate": 1.672791126832692e-05, + "loss": 0.735, + "step": 9369 + }, + { + "epoch": 0.2871766580850803, + "grad_norm": 1.690095944496037, + "learning_rate": 1.6727176851066883e-05, + "loss": 0.9032, + "step": 9370 + }, + { + "epoch": 0.2872073066078215, + "grad_norm": 0.6803963661370084, + "learning_rate": 1.6726442367522254e-05, + "loss": 0.5606, + "step": 9371 + }, + { + "epoch": 0.2872379551305627, + "grad_norm": 1.4704109477475527, + "learning_rate": 1.672570781770027e-05, + "loss": 0.6612, + "step": 9372 + }, + { + "epoch": 0.2872686036533039, + "grad_norm": 1.6079801298607759, + "learning_rate": 1.6724973201608166e-05, + "loss": 0.824, + "step": 9373 + }, + { + "epoch": 0.28729925217604513, + "grad_norm": 1.4441403291022432, + "learning_rate": 1.672423851925318e-05, + "loss": 0.7094, + "step": 9374 + }, + { + "epoch": 0.28732990069878633, + "grad_norm": 1.4028997274880464, + "learning_rate": 1.6723503770642547e-05, + "loss": 0.6886, + "step": 9375 + }, + { + "epoch": 0.28736054922152754, + "grad_norm": 1.5263873444757259, + "learning_rate": 1.672276895578352e-05, + "loss": 0.8655, + "step": 9376 + }, + { + "epoch": 0.28739119774426874, + "grad_norm": 1.6596721701158315, + "learning_rate": 1.672203407468332e-05, + "loss": 0.8189, + "step": 9377 + }, + { + "epoch": 0.28742184626700995, + "grad_norm": 1.4909957005604744, + "learning_rate": 1.6721299127349207e-05, + "loss": 0.76, + "step": 9378 + }, + { + "epoch": 0.28745249478975116, + "grad_norm": 1.5591509716734968, + "learning_rate": 1.672056411378841e-05, + "loss": 0.8094, + "step": 9379 + }, + { + "epoch": 0.28748314331249236, + "grad_norm": 1.4479187057570602, + "learning_rate": 1.6719829034008178e-05, + "loss": 0.7833, + "step": 9380 + }, + { + "epoch": 0.28751379183523357, + "grad_norm": 1.6537749364255379, + "learning_rate": 1.6719093888015747e-05, + "loss": 0.7905, + "step": 9381 + }, + { + "epoch": 0.2875444403579748, + "grad_norm": 0.7130906785526203, + "learning_rate": 1.6718358675818363e-05, + "loss": 0.6328, + "step": 9382 + }, + { + "epoch": 0.2875750888807159, + "grad_norm": 1.4363363924385677, + "learning_rate": 1.671762339742328e-05, + "loss": 0.7502, + "step": 9383 + }, + { + "epoch": 0.28760573740345713, + "grad_norm": 1.5748957624048323, + "learning_rate": 1.671688805283773e-05, + "loss": 0.8287, + "step": 9384 + }, + { + "epoch": 0.28763638592619833, + "grad_norm": 1.4697369008687713, + "learning_rate": 1.671615264206896e-05, + "loss": 0.9469, + "step": 9385 + }, + { + "epoch": 0.28766703444893954, + "grad_norm": 1.3697758251146215, + "learning_rate": 1.6715417165124227e-05, + "loss": 0.8138, + "step": 9386 + }, + { + "epoch": 0.28769768297168075, + "grad_norm": 1.5187495911383775, + "learning_rate": 1.6714681622010766e-05, + "loss": 0.856, + "step": 9387 + }, + { + "epoch": 0.28772833149442195, + "grad_norm": 1.472029188942332, + "learning_rate": 1.671394601273583e-05, + "loss": 0.7845, + "step": 9388 + }, + { + "epoch": 0.28775898001716316, + "grad_norm": 0.7431086919073595, + "learning_rate": 1.671321033730667e-05, + "loss": 0.6045, + "step": 9389 + }, + { + "epoch": 0.28778962853990436, + "grad_norm": 1.610383775122581, + "learning_rate": 1.6712474595730522e-05, + "loss": 0.8269, + "step": 9390 + }, + { + "epoch": 0.28782027706264557, + "grad_norm": 1.2724299572060194, + "learning_rate": 1.671173878801465e-05, + "loss": 0.7614, + "step": 9391 + }, + { + "epoch": 0.2878509255853868, + "grad_norm": 1.4443381558433408, + "learning_rate": 1.67110029141663e-05, + "loss": 0.7802, + "step": 9392 + }, + { + "epoch": 0.287881574108128, + "grad_norm": 0.6891773117677769, + "learning_rate": 1.6710266974192717e-05, + "loss": 0.61, + "step": 9393 + }, + { + "epoch": 0.2879122226308692, + "grad_norm": 1.492215210588797, + "learning_rate": 1.670953096810116e-05, + "loss": 0.7358, + "step": 9394 + }, + { + "epoch": 0.2879428711536104, + "grad_norm": 0.7222143377975185, + "learning_rate": 1.6708794895898876e-05, + "loss": 0.6607, + "step": 9395 + }, + { + "epoch": 0.2879735196763516, + "grad_norm": 0.6839552043230719, + "learning_rate": 1.670805875759312e-05, + "loss": 0.6144, + "step": 9396 + }, + { + "epoch": 0.2880041681990928, + "grad_norm": 1.2886405121307447, + "learning_rate": 1.670732255319114e-05, + "loss": 0.7488, + "step": 9397 + }, + { + "epoch": 0.288034816721834, + "grad_norm": 1.5379387343193727, + "learning_rate": 1.6706586282700203e-05, + "loss": 0.7528, + "step": 9398 + }, + { + "epoch": 0.2880654652445752, + "grad_norm": 1.2544040159221381, + "learning_rate": 1.670584994612755e-05, + "loss": 0.7298, + "step": 9399 + }, + { + "epoch": 0.2880961137673164, + "grad_norm": 1.3535775396999006, + "learning_rate": 1.670511354348044e-05, + "loss": 0.8132, + "step": 9400 + }, + { + "epoch": 0.2881267622900576, + "grad_norm": 0.7074590482475172, + "learning_rate": 1.6704377074766137e-05, + "loss": 0.6384, + "step": 9401 + }, + { + "epoch": 0.28815741081279883, + "grad_norm": 1.489067978609824, + "learning_rate": 1.670364053999189e-05, + "loss": 0.8714, + "step": 9402 + }, + { + "epoch": 0.28818805933554004, + "grad_norm": 1.5790834685284605, + "learning_rate": 1.6702903939164955e-05, + "loss": 0.774, + "step": 9403 + }, + { + "epoch": 0.28821870785828124, + "grad_norm": 1.5252470645754577, + "learning_rate": 1.6702167272292592e-05, + "loss": 0.7568, + "step": 9404 + }, + { + "epoch": 0.28824935638102245, + "grad_norm": 1.355415063792785, + "learning_rate": 1.670143053938206e-05, + "loss": 0.7331, + "step": 9405 + }, + { + "epoch": 0.28828000490376365, + "grad_norm": 1.644066467556545, + "learning_rate": 1.6700693740440622e-05, + "loss": 0.8816, + "step": 9406 + }, + { + "epoch": 0.28831065342650486, + "grad_norm": 0.6755479216008898, + "learning_rate": 1.669995687547553e-05, + "loss": 0.6085, + "step": 9407 + }, + { + "epoch": 0.28834130194924606, + "grad_norm": 1.7248368543892312, + "learning_rate": 1.6699219944494052e-05, + "loss": 0.7605, + "step": 9408 + }, + { + "epoch": 0.28837195047198727, + "grad_norm": 1.473891461402015, + "learning_rate": 1.6698482947503442e-05, + "loss": 0.8267, + "step": 9409 + }, + { + "epoch": 0.2884025989947285, + "grad_norm": 1.41055887909666, + "learning_rate": 1.6697745884510968e-05, + "loss": 0.7125, + "step": 9410 + }, + { + "epoch": 0.2884332475174697, + "grad_norm": 1.5378488927226628, + "learning_rate": 1.669700875552389e-05, + "loss": 0.6839, + "step": 9411 + }, + { + "epoch": 0.2884638960402109, + "grad_norm": 1.5257481416400698, + "learning_rate": 1.669627156054947e-05, + "loss": 0.8272, + "step": 9412 + }, + { + "epoch": 0.2884945445629521, + "grad_norm": 1.566790271177121, + "learning_rate": 1.6695534299594977e-05, + "loss": 0.7375, + "step": 9413 + }, + { + "epoch": 0.28852519308569324, + "grad_norm": 0.6847699655070832, + "learning_rate": 1.669479697266767e-05, + "loss": 0.61, + "step": 9414 + }, + { + "epoch": 0.28855584160843445, + "grad_norm": 1.7103560414396408, + "learning_rate": 1.6694059579774812e-05, + "loss": 0.6647, + "step": 9415 + }, + { + "epoch": 0.28858649013117565, + "grad_norm": 1.4448629220825497, + "learning_rate": 1.6693322120923676e-05, + "loss": 0.7353, + "step": 9416 + }, + { + "epoch": 0.28861713865391686, + "grad_norm": 0.6776380601081695, + "learning_rate": 1.669258459612152e-05, + "loss": 0.6116, + "step": 9417 + }, + { + "epoch": 0.28864778717665807, + "grad_norm": 1.4335450179546056, + "learning_rate": 1.669184700537562e-05, + "loss": 0.687, + "step": 9418 + }, + { + "epoch": 0.28867843569939927, + "grad_norm": 1.369084432148362, + "learning_rate": 1.6691109348693237e-05, + "loss": 0.7332, + "step": 9419 + }, + { + "epoch": 0.2887090842221405, + "grad_norm": 1.6483287518455503, + "learning_rate": 1.6690371626081644e-05, + "loss": 0.7235, + "step": 9420 + }, + { + "epoch": 0.2887397327448817, + "grad_norm": 0.6961789687212716, + "learning_rate": 1.6689633837548103e-05, + "loss": 0.6421, + "step": 9421 + }, + { + "epoch": 0.2887703812676229, + "grad_norm": 1.3556275379019955, + "learning_rate": 1.6688895983099895e-05, + "loss": 0.8312, + "step": 9422 + }, + { + "epoch": 0.2888010297903641, + "grad_norm": 1.5521029534846893, + "learning_rate": 1.6688158062744276e-05, + "loss": 0.746, + "step": 9423 + }, + { + "epoch": 0.2888316783131053, + "grad_norm": 1.5737856846310125, + "learning_rate": 1.668742007648853e-05, + "loss": 0.8103, + "step": 9424 + }, + { + "epoch": 0.2888623268358465, + "grad_norm": 1.5328764900613971, + "learning_rate": 1.6686682024339917e-05, + "loss": 0.7568, + "step": 9425 + }, + { + "epoch": 0.2888929753585877, + "grad_norm": 1.4192822163936374, + "learning_rate": 1.668594390630572e-05, + "loss": 0.7261, + "step": 9426 + }, + { + "epoch": 0.2889236238813289, + "grad_norm": 1.5233618427086733, + "learning_rate": 1.6685205722393206e-05, + "loss": 0.7428, + "step": 9427 + }, + { + "epoch": 0.2889542724040701, + "grad_norm": 1.3563101022761936, + "learning_rate": 1.668446747260965e-05, + "loss": 0.7388, + "step": 9428 + }, + { + "epoch": 0.28898492092681133, + "grad_norm": 1.3577886442181482, + "learning_rate": 1.6683729156962324e-05, + "loss": 0.7579, + "step": 9429 + }, + { + "epoch": 0.28901556944955253, + "grad_norm": 0.7272292921847779, + "learning_rate": 1.6682990775458506e-05, + "loss": 0.6066, + "step": 9430 + }, + { + "epoch": 0.28904621797229374, + "grad_norm": 0.7253812753001339, + "learning_rate": 1.668225232810547e-05, + "loss": 0.6351, + "step": 9431 + }, + { + "epoch": 0.28907686649503495, + "grad_norm": 0.6954066023143701, + "learning_rate": 1.668151381491049e-05, + "loss": 0.617, + "step": 9432 + }, + { + "epoch": 0.28910751501777615, + "grad_norm": 1.5706306958714962, + "learning_rate": 1.6680775235880847e-05, + "loss": 0.8299, + "step": 9433 + }, + { + "epoch": 0.28913816354051736, + "grad_norm": 0.7039213476856883, + "learning_rate": 1.6680036591023817e-05, + "loss": 0.601, + "step": 9434 + }, + { + "epoch": 0.28916881206325856, + "grad_norm": 0.7199579359342392, + "learning_rate": 1.6679297880346675e-05, + "loss": 0.6232, + "step": 9435 + }, + { + "epoch": 0.28919946058599977, + "grad_norm": 1.4626178164048227, + "learning_rate": 1.6678559103856703e-05, + "loss": 0.6733, + "step": 9436 + }, + { + "epoch": 0.289230109108741, + "grad_norm": 1.4062126348012536, + "learning_rate": 1.6677820261561182e-05, + "loss": 0.7495, + "step": 9437 + }, + { + "epoch": 0.2892607576314822, + "grad_norm": 1.5292989596471, + "learning_rate": 1.6677081353467386e-05, + "loss": 0.8068, + "step": 9438 + }, + { + "epoch": 0.2892914061542234, + "grad_norm": 1.5483471844720085, + "learning_rate": 1.66763423795826e-05, + "loss": 0.7902, + "step": 9439 + }, + { + "epoch": 0.2893220546769646, + "grad_norm": 1.5375766222381608, + "learning_rate": 1.6675603339914103e-05, + "loss": 0.7791, + "step": 9440 + }, + { + "epoch": 0.2893527031997058, + "grad_norm": 1.4286191208465513, + "learning_rate": 1.6674864234469182e-05, + "loss": 0.8024, + "step": 9441 + }, + { + "epoch": 0.289383351722447, + "grad_norm": 1.5676898577881324, + "learning_rate": 1.667412506325511e-05, + "loss": 0.9507, + "step": 9442 + }, + { + "epoch": 0.2894140002451882, + "grad_norm": 1.6675900875397471, + "learning_rate": 1.667338582627918e-05, + "loss": 0.846, + "step": 9443 + }, + { + "epoch": 0.2894446487679294, + "grad_norm": 1.6578070694104416, + "learning_rate": 1.6672646523548672e-05, + "loss": 0.8268, + "step": 9444 + }, + { + "epoch": 0.28947529729067056, + "grad_norm": 1.4891687426034241, + "learning_rate": 1.6671907155070873e-05, + "loss": 0.6986, + "step": 9445 + }, + { + "epoch": 0.28950594581341177, + "grad_norm": 1.3067345533970713, + "learning_rate": 1.667116772085306e-05, + "loss": 0.7219, + "step": 9446 + }, + { + "epoch": 0.289536594336153, + "grad_norm": 1.5075659041681788, + "learning_rate": 1.667042822090253e-05, + "loss": 0.7803, + "step": 9447 + }, + { + "epoch": 0.2895672428588942, + "grad_norm": 1.3423379229508716, + "learning_rate": 1.6669688655226556e-05, + "loss": 0.7437, + "step": 9448 + }, + { + "epoch": 0.2895978913816354, + "grad_norm": 1.5489999650573931, + "learning_rate": 1.666894902383244e-05, + "loss": 0.7896, + "step": 9449 + }, + { + "epoch": 0.2896285399043766, + "grad_norm": 0.756803399978448, + "learning_rate": 1.6668209326727464e-05, + "loss": 0.6306, + "step": 9450 + }, + { + "epoch": 0.2896591884271178, + "grad_norm": 1.4260962504946946, + "learning_rate": 1.666746956391891e-05, + "loss": 0.7919, + "step": 9451 + }, + { + "epoch": 0.289689836949859, + "grad_norm": 0.7079741315006568, + "learning_rate": 1.6666729735414076e-05, + "loss": 0.6208, + "step": 9452 + }, + { + "epoch": 0.2897204854726002, + "grad_norm": 1.5032589504927918, + "learning_rate": 1.666598984122025e-05, + "loss": 0.7467, + "step": 9453 + }, + { + "epoch": 0.2897511339953414, + "grad_norm": 1.508692159746705, + "learning_rate": 1.6665249881344715e-05, + "loss": 0.7993, + "step": 9454 + }, + { + "epoch": 0.2897817825180826, + "grad_norm": 1.3396836293385324, + "learning_rate": 1.6664509855794772e-05, + "loss": 0.6092, + "step": 9455 + }, + { + "epoch": 0.2898124310408238, + "grad_norm": 1.4989669274552848, + "learning_rate": 1.6663769764577705e-05, + "loss": 0.7553, + "step": 9456 + }, + { + "epoch": 0.28984307956356503, + "grad_norm": 1.5253732457329836, + "learning_rate": 1.6663029607700812e-05, + "loss": 0.7786, + "step": 9457 + }, + { + "epoch": 0.28987372808630624, + "grad_norm": 1.4098061498650585, + "learning_rate": 1.666228938517138e-05, + "loss": 0.7957, + "step": 9458 + }, + { + "epoch": 0.28990437660904744, + "grad_norm": 1.6525400888803132, + "learning_rate": 1.666154909699671e-05, + "loss": 0.7598, + "step": 9459 + }, + { + "epoch": 0.28993502513178865, + "grad_norm": 0.7366826425117431, + "learning_rate": 1.6660808743184092e-05, + "loss": 0.5992, + "step": 9460 + }, + { + "epoch": 0.28996567365452985, + "grad_norm": 0.7178592373656998, + "learning_rate": 1.666006832374082e-05, + "loss": 0.6202, + "step": 9461 + }, + { + "epoch": 0.28999632217727106, + "grad_norm": 1.5458022381409997, + "learning_rate": 1.665932783867419e-05, + "loss": 0.756, + "step": 9462 + }, + { + "epoch": 0.29002697070001227, + "grad_norm": 1.3580213754471957, + "learning_rate": 1.66585872879915e-05, + "loss": 0.8074, + "step": 9463 + }, + { + "epoch": 0.29005761922275347, + "grad_norm": 1.527340715290385, + "learning_rate": 1.6657846671700045e-05, + "loss": 0.7857, + "step": 9464 + }, + { + "epoch": 0.2900882677454947, + "grad_norm": 0.8124065388153082, + "learning_rate": 1.6657105989807124e-05, + "loss": 0.6318, + "step": 9465 + }, + { + "epoch": 0.2901189162682359, + "grad_norm": 1.5112546724388058, + "learning_rate": 1.6656365242320036e-05, + "loss": 0.7439, + "step": 9466 + }, + { + "epoch": 0.2901495647909771, + "grad_norm": 1.5313924682902995, + "learning_rate": 1.6655624429246075e-05, + "loss": 0.8053, + "step": 9467 + }, + { + "epoch": 0.2901802133137183, + "grad_norm": 1.6365554758067287, + "learning_rate": 1.6654883550592546e-05, + "loss": 0.814, + "step": 9468 + }, + { + "epoch": 0.2902108618364595, + "grad_norm": 1.5753388230063956, + "learning_rate": 1.665414260636675e-05, + "loss": 0.9047, + "step": 9469 + }, + { + "epoch": 0.2902415103592007, + "grad_norm": 1.5858466366818407, + "learning_rate": 1.6653401596575976e-05, + "loss": 0.785, + "step": 9470 + }, + { + "epoch": 0.2902721588819419, + "grad_norm": 0.7203083745479016, + "learning_rate": 1.6652660521227536e-05, + "loss": 0.5825, + "step": 9471 + }, + { + "epoch": 0.2903028074046831, + "grad_norm": 1.4187616895818695, + "learning_rate": 1.6651919380328735e-05, + "loss": 0.6955, + "step": 9472 + }, + { + "epoch": 0.2903334559274243, + "grad_norm": 0.7202948483439618, + "learning_rate": 1.6651178173886866e-05, + "loss": 0.6417, + "step": 9473 + }, + { + "epoch": 0.2903641044501655, + "grad_norm": 1.5088610144532764, + "learning_rate": 1.6650436901909238e-05, + "loss": 0.7811, + "step": 9474 + }, + { + "epoch": 0.29039475297290673, + "grad_norm": 0.7113351092829243, + "learning_rate": 1.6649695564403153e-05, + "loss": 0.5979, + "step": 9475 + }, + { + "epoch": 0.2904254014956479, + "grad_norm": 1.6495499477132578, + "learning_rate": 1.6648954161375918e-05, + "loss": 0.8388, + "step": 9476 + }, + { + "epoch": 0.2904560500183891, + "grad_norm": 1.606084494227805, + "learning_rate": 1.664821269283483e-05, + "loss": 0.7911, + "step": 9477 + }, + { + "epoch": 0.2904866985411303, + "grad_norm": 1.5831521013185286, + "learning_rate": 1.664747115878721e-05, + "loss": 0.7938, + "step": 9478 + }, + { + "epoch": 0.2905173470638715, + "grad_norm": 1.584319608741167, + "learning_rate": 1.664672955924035e-05, + "loss": 0.7865, + "step": 9479 + }, + { + "epoch": 0.2905479955866127, + "grad_norm": 1.3914473649263075, + "learning_rate": 1.6645987894201567e-05, + "loss": 0.8025, + "step": 9480 + }, + { + "epoch": 0.2905786441093539, + "grad_norm": 0.7889732086014801, + "learning_rate": 1.664524616367816e-05, + "loss": 0.6306, + "step": 9481 + }, + { + "epoch": 0.2906092926320951, + "grad_norm": 1.5548318094118458, + "learning_rate": 1.6644504367677447e-05, + "loss": 0.8453, + "step": 9482 + }, + { + "epoch": 0.2906399411548363, + "grad_norm": 1.707648542882308, + "learning_rate": 1.664376250620673e-05, + "loss": 0.8177, + "step": 9483 + }, + { + "epoch": 0.29067058967757753, + "grad_norm": 1.5646071912470774, + "learning_rate": 1.664302057927332e-05, + "loss": 0.8572, + "step": 9484 + }, + { + "epoch": 0.29070123820031873, + "grad_norm": 1.385959470206506, + "learning_rate": 1.6642278586884533e-05, + "loss": 0.8097, + "step": 9485 + }, + { + "epoch": 0.29073188672305994, + "grad_norm": 1.695815702204147, + "learning_rate": 1.664153652904767e-05, + "loss": 0.9012, + "step": 9486 + }, + { + "epoch": 0.29076253524580115, + "grad_norm": 1.5536067127516475, + "learning_rate": 1.6640794405770055e-05, + "loss": 0.7853, + "step": 9487 + }, + { + "epoch": 0.29079318376854235, + "grad_norm": 1.439050529916514, + "learning_rate": 1.6640052217058988e-05, + "loss": 0.7547, + "step": 9488 + }, + { + "epoch": 0.29082383229128356, + "grad_norm": 1.4051948809717223, + "learning_rate": 1.663930996292179e-05, + "loss": 0.7473, + "step": 9489 + }, + { + "epoch": 0.29085448081402476, + "grad_norm": 1.6990043857377242, + "learning_rate": 1.663856764336577e-05, + "loss": 0.8021, + "step": 9490 + }, + { + "epoch": 0.29088512933676597, + "grad_norm": 1.3917265586576504, + "learning_rate": 1.6637825258398246e-05, + "loss": 0.6991, + "step": 9491 + }, + { + "epoch": 0.2909157778595072, + "grad_norm": 1.3572544493250822, + "learning_rate": 1.663708280802653e-05, + "loss": 0.7647, + "step": 9492 + }, + { + "epoch": 0.2909464263822484, + "grad_norm": 1.4889905844793767, + "learning_rate": 1.663634029225794e-05, + "loss": 0.8368, + "step": 9493 + }, + { + "epoch": 0.2909770749049896, + "grad_norm": 0.663459932825506, + "learning_rate": 1.6635597711099794e-05, + "loss": 0.6143, + "step": 9494 + }, + { + "epoch": 0.2910077234277308, + "grad_norm": 0.6724907103815051, + "learning_rate": 1.6634855064559404e-05, + "loss": 0.6182, + "step": 9495 + }, + { + "epoch": 0.291038371950472, + "grad_norm": 1.6069248377106717, + "learning_rate": 1.663411235264409e-05, + "loss": 0.8243, + "step": 9496 + }, + { + "epoch": 0.2910690204732132, + "grad_norm": 0.7020793826399423, + "learning_rate": 1.6633369575361164e-05, + "loss": 0.6292, + "step": 9497 + }, + { + "epoch": 0.2910996689959544, + "grad_norm": 1.5605463066305645, + "learning_rate": 1.6632626732717955e-05, + "loss": 0.8634, + "step": 9498 + }, + { + "epoch": 0.2911303175186956, + "grad_norm": 1.4428605142761508, + "learning_rate": 1.663188382472178e-05, + "loss": 0.7451, + "step": 9499 + }, + { + "epoch": 0.2911609660414368, + "grad_norm": 1.5679405169983542, + "learning_rate": 1.663114085137995e-05, + "loss": 0.854, + "step": 9500 + }, + { + "epoch": 0.291191614564178, + "grad_norm": 1.6097409259698006, + "learning_rate": 1.66303978126998e-05, + "loss": 0.8191, + "step": 9501 + }, + { + "epoch": 0.29122226308691923, + "grad_norm": 1.5290237304086474, + "learning_rate": 1.6629654708688637e-05, + "loss": 0.8162, + "step": 9502 + }, + { + "epoch": 0.29125291160966044, + "grad_norm": 1.6140181516384111, + "learning_rate": 1.66289115393538e-05, + "loss": 0.8948, + "step": 9503 + }, + { + "epoch": 0.29128356013240164, + "grad_norm": 1.7215687773938138, + "learning_rate": 1.6628168304702593e-05, + "loss": 0.923, + "step": 9504 + }, + { + "epoch": 0.29131420865514285, + "grad_norm": 1.5735091183165135, + "learning_rate": 1.662742500474235e-05, + "loss": 0.8889, + "step": 9505 + }, + { + "epoch": 0.29134485717788405, + "grad_norm": 1.606599564500514, + "learning_rate": 1.662668163948039e-05, + "loss": 0.7996, + "step": 9506 + }, + { + "epoch": 0.2913755057006252, + "grad_norm": 1.5262035301292693, + "learning_rate": 1.6625938208924048e-05, + "loss": 0.7864, + "step": 9507 + }, + { + "epoch": 0.2914061542233664, + "grad_norm": 0.7450045005191461, + "learning_rate": 1.662519471308063e-05, + "loss": 0.6228, + "step": 9508 + }, + { + "epoch": 0.2914368027461076, + "grad_norm": 0.7267193649814094, + "learning_rate": 1.6624451151957483e-05, + "loss": 0.6594, + "step": 9509 + }, + { + "epoch": 0.2914674512688488, + "grad_norm": 1.5497911029276104, + "learning_rate": 1.6623707525561918e-05, + "loss": 0.7039, + "step": 9510 + }, + { + "epoch": 0.29149809979159, + "grad_norm": 1.4527853481586213, + "learning_rate": 1.6622963833901272e-05, + "loss": 0.6766, + "step": 9511 + }, + { + "epoch": 0.29152874831433123, + "grad_norm": 0.6888395520812493, + "learning_rate": 1.662222007698287e-05, + "loss": 0.6172, + "step": 9512 + }, + { + "epoch": 0.29155939683707244, + "grad_norm": 1.3977379165365302, + "learning_rate": 1.6621476254814034e-05, + "loss": 0.6741, + "step": 9513 + }, + { + "epoch": 0.29159004535981364, + "grad_norm": 1.426438772289555, + "learning_rate": 1.6620732367402102e-05, + "loss": 0.7374, + "step": 9514 + }, + { + "epoch": 0.29162069388255485, + "grad_norm": 1.516213723837711, + "learning_rate": 1.66199884147544e-05, + "loss": 0.7925, + "step": 9515 + }, + { + "epoch": 0.29165134240529605, + "grad_norm": 1.4200196339418079, + "learning_rate": 1.661924439687826e-05, + "loss": 0.6978, + "step": 9516 + }, + { + "epoch": 0.29168199092803726, + "grad_norm": 0.7932656492515832, + "learning_rate": 1.6618500313781004e-05, + "loss": 0.6368, + "step": 9517 + }, + { + "epoch": 0.29171263945077847, + "grad_norm": 1.5884687641643136, + "learning_rate": 1.6617756165469975e-05, + "loss": 0.6997, + "step": 9518 + }, + { + "epoch": 0.29174328797351967, + "grad_norm": 1.3819778338827757, + "learning_rate": 1.6617011951952503e-05, + "loss": 0.7795, + "step": 9519 + }, + { + "epoch": 0.2917739364962609, + "grad_norm": 1.5591205337870961, + "learning_rate": 1.661626767323592e-05, + "loss": 0.822, + "step": 9520 + }, + { + "epoch": 0.2918045850190021, + "grad_norm": 1.596222477929443, + "learning_rate": 1.6615523329327555e-05, + "loss": 0.8334, + "step": 9521 + }, + { + "epoch": 0.2918352335417433, + "grad_norm": 1.3423651057486519, + "learning_rate": 1.661477892023475e-05, + "loss": 0.7226, + "step": 9522 + }, + { + "epoch": 0.2918658820644845, + "grad_norm": 1.3847452059086385, + "learning_rate": 1.6614034445964832e-05, + "loss": 0.7659, + "step": 9523 + }, + { + "epoch": 0.2918965305872257, + "grad_norm": 0.7134789547495869, + "learning_rate": 1.6613289906525142e-05, + "loss": 0.6323, + "step": 9524 + }, + { + "epoch": 0.2919271791099669, + "grad_norm": 1.4936469405200474, + "learning_rate": 1.6612545301923014e-05, + "loss": 0.8353, + "step": 9525 + }, + { + "epoch": 0.2919578276327081, + "grad_norm": 1.4357120841214774, + "learning_rate": 1.6611800632165787e-05, + "loss": 0.7474, + "step": 9526 + }, + { + "epoch": 0.2919884761554493, + "grad_norm": 1.365755176822062, + "learning_rate": 1.6611055897260796e-05, + "loss": 0.7471, + "step": 9527 + }, + { + "epoch": 0.2920191246781905, + "grad_norm": 1.3234370043804526, + "learning_rate": 1.6610311097215377e-05, + "loss": 0.738, + "step": 9528 + }, + { + "epoch": 0.29204977320093173, + "grad_norm": 1.5639298987502641, + "learning_rate": 1.6609566232036874e-05, + "loss": 0.8264, + "step": 9529 + }, + { + "epoch": 0.29208042172367293, + "grad_norm": 1.371228663573225, + "learning_rate": 1.6608821301732624e-05, + "loss": 0.7949, + "step": 9530 + }, + { + "epoch": 0.29211107024641414, + "grad_norm": 1.4750173437593994, + "learning_rate": 1.6608076306309965e-05, + "loss": 0.7005, + "step": 9531 + }, + { + "epoch": 0.29214171876915535, + "grad_norm": 1.5823985108593384, + "learning_rate": 1.6607331245776243e-05, + "loss": 0.7838, + "step": 9532 + }, + { + "epoch": 0.29217236729189655, + "grad_norm": 1.5527540049155046, + "learning_rate": 1.660658612013879e-05, + "loss": 0.7743, + "step": 9533 + }, + { + "epoch": 0.29220301581463776, + "grad_norm": 1.3843284035823296, + "learning_rate": 1.660584092940496e-05, + "loss": 0.7406, + "step": 9534 + }, + { + "epoch": 0.29223366433737896, + "grad_norm": 1.3866851011834267, + "learning_rate": 1.6605095673582085e-05, + "loss": 0.6775, + "step": 9535 + }, + { + "epoch": 0.29226431286012017, + "grad_norm": 0.6974104871937492, + "learning_rate": 1.6604350352677512e-05, + "loss": 0.6202, + "step": 9536 + }, + { + "epoch": 0.2922949613828614, + "grad_norm": 1.3600826039581282, + "learning_rate": 1.6603604966698586e-05, + "loss": 0.6398, + "step": 9537 + }, + { + "epoch": 0.2923256099056025, + "grad_norm": 1.6052221402986435, + "learning_rate": 1.6602859515652653e-05, + "loss": 0.8177, + "step": 9538 + }, + { + "epoch": 0.29235625842834373, + "grad_norm": 0.7146243181846436, + "learning_rate": 1.6602113999547054e-05, + "loss": 0.6347, + "step": 9539 + }, + { + "epoch": 0.29238690695108494, + "grad_norm": 1.3832874837631957, + "learning_rate": 1.6601368418389135e-05, + "loss": 0.6626, + "step": 9540 + }, + { + "epoch": 0.29241755547382614, + "grad_norm": 1.4176884620027057, + "learning_rate": 1.6600622772186245e-05, + "loss": 0.6751, + "step": 9541 + }, + { + "epoch": 0.29244820399656735, + "grad_norm": 1.5699362975800046, + "learning_rate": 1.6599877060945732e-05, + "loss": 0.7602, + "step": 9542 + }, + { + "epoch": 0.29247885251930855, + "grad_norm": 0.7484819694201053, + "learning_rate": 1.659913128467494e-05, + "loss": 0.635, + "step": 9543 + }, + { + "epoch": 0.29250950104204976, + "grad_norm": 1.5366250900490508, + "learning_rate": 1.6598385443381218e-05, + "loss": 0.721, + "step": 9544 + }, + { + "epoch": 0.29254014956479096, + "grad_norm": 1.4885513851989138, + "learning_rate": 1.6597639537071918e-05, + "loss": 0.8303, + "step": 9545 + }, + { + "epoch": 0.29257079808753217, + "grad_norm": 1.4935289923274109, + "learning_rate": 1.6596893565754388e-05, + "loss": 0.7225, + "step": 9546 + }, + { + "epoch": 0.2926014466102734, + "grad_norm": 1.720784458716337, + "learning_rate": 1.6596147529435976e-05, + "loss": 0.7465, + "step": 9547 + }, + { + "epoch": 0.2926320951330146, + "grad_norm": 1.4451271817130242, + "learning_rate": 1.6595401428124034e-05, + "loss": 0.7526, + "step": 9548 + }, + { + "epoch": 0.2926627436557558, + "grad_norm": 1.4769625873762413, + "learning_rate": 1.6594655261825916e-05, + "loss": 0.7848, + "step": 9549 + }, + { + "epoch": 0.292693392178497, + "grad_norm": 0.7336674126814438, + "learning_rate": 1.659390903054897e-05, + "loss": 0.6082, + "step": 9550 + }, + { + "epoch": 0.2927240407012382, + "grad_norm": 1.4206043511293815, + "learning_rate": 1.6593162734300555e-05, + "loss": 0.7969, + "step": 9551 + }, + { + "epoch": 0.2927546892239794, + "grad_norm": 1.7358241706360253, + "learning_rate": 1.6592416373088016e-05, + "loss": 0.7466, + "step": 9552 + }, + { + "epoch": 0.2927853377467206, + "grad_norm": 0.7089146395560392, + "learning_rate": 1.6591669946918716e-05, + "loss": 0.6102, + "step": 9553 + }, + { + "epoch": 0.2928159862694618, + "grad_norm": 1.445933334914931, + "learning_rate": 1.6590923455800006e-05, + "loss": 0.7643, + "step": 9554 + }, + { + "epoch": 0.292846634792203, + "grad_norm": 1.471878620338887, + "learning_rate": 1.6590176899739237e-05, + "loss": 0.7586, + "step": 9555 + }, + { + "epoch": 0.2928772833149442, + "grad_norm": 1.495095546516767, + "learning_rate": 1.658943027874377e-05, + "loss": 0.7617, + "step": 9556 + }, + { + "epoch": 0.29290793183768543, + "grad_norm": 1.4738490852512516, + "learning_rate": 1.658868359282096e-05, + "loss": 0.7529, + "step": 9557 + }, + { + "epoch": 0.29293858036042664, + "grad_norm": 1.4952253735946364, + "learning_rate": 1.6587936841978166e-05, + "loss": 0.8005, + "step": 9558 + }, + { + "epoch": 0.29296922888316784, + "grad_norm": 1.5450208606292954, + "learning_rate": 1.6587190026222746e-05, + "loss": 0.7668, + "step": 9559 + }, + { + "epoch": 0.29299987740590905, + "grad_norm": 1.50732526412005, + "learning_rate": 1.6586443145562055e-05, + "loss": 0.7281, + "step": 9560 + }, + { + "epoch": 0.29303052592865025, + "grad_norm": 1.5443857421322422, + "learning_rate": 1.6585696200003454e-05, + "loss": 0.7961, + "step": 9561 + }, + { + "epoch": 0.29306117445139146, + "grad_norm": 1.658630253252065, + "learning_rate": 1.6584949189554303e-05, + "loss": 0.7295, + "step": 9562 + }, + { + "epoch": 0.29309182297413267, + "grad_norm": 1.3090664066988262, + "learning_rate": 1.6584202114221964e-05, + "loss": 0.6588, + "step": 9563 + }, + { + "epoch": 0.29312247149687387, + "grad_norm": 1.453762318190904, + "learning_rate": 1.65834549740138e-05, + "loss": 0.7408, + "step": 9564 + }, + { + "epoch": 0.2931531200196151, + "grad_norm": 1.4155439751683145, + "learning_rate": 1.6582707768937166e-05, + "loss": 0.7452, + "step": 9565 + }, + { + "epoch": 0.2931837685423563, + "grad_norm": 1.3734065549434935, + "learning_rate": 1.6581960498999427e-05, + "loss": 0.6506, + "step": 9566 + }, + { + "epoch": 0.2932144170650975, + "grad_norm": 1.4433852257666264, + "learning_rate": 1.658121316420795e-05, + "loss": 0.8398, + "step": 9567 + }, + { + "epoch": 0.2932450655878387, + "grad_norm": 1.3136002849473831, + "learning_rate": 1.6580465764570094e-05, + "loss": 0.7058, + "step": 9568 + }, + { + "epoch": 0.29327571411057984, + "grad_norm": 1.6075957151715685, + "learning_rate": 1.657971830009323e-05, + "loss": 0.8065, + "step": 9569 + }, + { + "epoch": 0.29330636263332105, + "grad_norm": 1.6497075624278474, + "learning_rate": 1.657897077078471e-05, + "loss": 0.7958, + "step": 9570 + }, + { + "epoch": 0.29333701115606226, + "grad_norm": 1.3992652038039761, + "learning_rate": 1.6578223176651912e-05, + "loss": 0.7338, + "step": 9571 + }, + { + "epoch": 0.29336765967880346, + "grad_norm": 1.4723968230910298, + "learning_rate": 1.65774755177022e-05, + "loss": 0.7293, + "step": 9572 + }, + { + "epoch": 0.29339830820154467, + "grad_norm": 0.8700964578632439, + "learning_rate": 1.6576727793942935e-05, + "loss": 0.6537, + "step": 9573 + }, + { + "epoch": 0.29342895672428587, + "grad_norm": 1.5728087026447342, + "learning_rate": 1.6575980005381492e-05, + "loss": 0.7461, + "step": 9574 + }, + { + "epoch": 0.2934596052470271, + "grad_norm": 0.7505948399571744, + "learning_rate": 1.6575232152025234e-05, + "loss": 0.6534, + "step": 9575 + }, + { + "epoch": 0.2934902537697683, + "grad_norm": 1.4840946435278333, + "learning_rate": 1.657448423388153e-05, + "loss": 0.7536, + "step": 9576 + }, + { + "epoch": 0.2935209022925095, + "grad_norm": 2.0614495017072683, + "learning_rate": 1.657373625095775e-05, + "loss": 0.8174, + "step": 9577 + }, + { + "epoch": 0.2935515508152507, + "grad_norm": 1.5443605668330473, + "learning_rate": 1.6572988203261266e-05, + "loss": 0.6962, + "step": 9578 + }, + { + "epoch": 0.2935821993379919, + "grad_norm": 0.7747118665679059, + "learning_rate": 1.6572240090799448e-05, + "loss": 0.6249, + "step": 9579 + }, + { + "epoch": 0.2936128478607331, + "grad_norm": 1.3866863115180816, + "learning_rate": 1.6571491913579665e-05, + "loss": 0.75, + "step": 9580 + }, + { + "epoch": 0.2936434963834743, + "grad_norm": 1.4748925210082178, + "learning_rate": 1.657074367160929e-05, + "loss": 0.7595, + "step": 9581 + }, + { + "epoch": 0.2936741449062155, + "grad_norm": 0.8250991899177126, + "learning_rate": 1.65699953648957e-05, + "loss": 0.6374, + "step": 9582 + }, + { + "epoch": 0.2937047934289567, + "grad_norm": 0.690682658171365, + "learning_rate": 1.6569246993446265e-05, + "loss": 0.5932, + "step": 9583 + }, + { + "epoch": 0.29373544195169793, + "grad_norm": 1.5680397483622075, + "learning_rate": 1.6568498557268357e-05, + "loss": 0.8002, + "step": 9584 + }, + { + "epoch": 0.29376609047443913, + "grad_norm": 1.2793838479876887, + "learning_rate": 1.6567750056369352e-05, + "loss": 0.823, + "step": 9585 + }, + { + "epoch": 0.29379673899718034, + "grad_norm": 1.3650625501318459, + "learning_rate": 1.6567001490756624e-05, + "loss": 0.6534, + "step": 9586 + }, + { + "epoch": 0.29382738751992155, + "grad_norm": 1.6017341887607155, + "learning_rate": 1.656625286043755e-05, + "loss": 0.7342, + "step": 9587 + }, + { + "epoch": 0.29385803604266275, + "grad_norm": 1.4404251786328441, + "learning_rate": 1.656550416541951e-05, + "loss": 0.6893, + "step": 9588 + }, + { + "epoch": 0.29388868456540396, + "grad_norm": 1.4369726068785953, + "learning_rate": 1.6564755405709874e-05, + "loss": 0.7062, + "step": 9589 + }, + { + "epoch": 0.29391933308814516, + "grad_norm": 1.4304419090985943, + "learning_rate": 1.6564006581316024e-05, + "loss": 0.7386, + "step": 9590 + }, + { + "epoch": 0.29394998161088637, + "grad_norm": 1.5935650176943685, + "learning_rate": 1.6563257692245337e-05, + "loss": 0.8299, + "step": 9591 + }, + { + "epoch": 0.2939806301336276, + "grad_norm": 1.5689897343794403, + "learning_rate": 1.6562508738505195e-05, + "loss": 0.7649, + "step": 9592 + }, + { + "epoch": 0.2940112786563688, + "grad_norm": 0.9259822733897053, + "learning_rate": 1.6561759720102975e-05, + "loss": 0.6346, + "step": 9593 + }, + { + "epoch": 0.29404192717911, + "grad_norm": 1.4838975862917692, + "learning_rate": 1.6561010637046056e-05, + "loss": 0.8285, + "step": 9594 + }, + { + "epoch": 0.2940725757018512, + "grad_norm": 1.4399592463752175, + "learning_rate": 1.656026148934182e-05, + "loss": 0.6697, + "step": 9595 + }, + { + "epoch": 0.2941032242245924, + "grad_norm": 1.5664556729694272, + "learning_rate": 1.6559512276997652e-05, + "loss": 0.7964, + "step": 9596 + }, + { + "epoch": 0.2941338727473336, + "grad_norm": 0.6712344514805634, + "learning_rate": 1.6558763000020932e-05, + "loss": 0.614, + "step": 9597 + }, + { + "epoch": 0.2941645212700748, + "grad_norm": 1.4728683263910411, + "learning_rate": 1.6558013658419037e-05, + "loss": 0.8642, + "step": 9598 + }, + { + "epoch": 0.294195169792816, + "grad_norm": 0.7006671874966871, + "learning_rate": 1.655726425219936e-05, + "loss": 0.6529, + "step": 9599 + }, + { + "epoch": 0.29422581831555716, + "grad_norm": 1.3493332285370727, + "learning_rate": 1.6556514781369278e-05, + "loss": 0.7776, + "step": 9600 + }, + { + "epoch": 0.29425646683829837, + "grad_norm": 1.3190338822366245, + "learning_rate": 1.6555765245936178e-05, + "loss": 0.7024, + "step": 9601 + }, + { + "epoch": 0.2942871153610396, + "grad_norm": 1.6450338117550787, + "learning_rate": 1.6555015645907445e-05, + "loss": 0.7361, + "step": 9602 + }, + { + "epoch": 0.2943177638837808, + "grad_norm": 1.3754504025990844, + "learning_rate": 1.655426598129047e-05, + "loss": 0.7466, + "step": 9603 + }, + { + "epoch": 0.294348412406522, + "grad_norm": 1.4855665516445127, + "learning_rate": 1.655351625209263e-05, + "loss": 0.9017, + "step": 9604 + }, + { + "epoch": 0.2943790609292632, + "grad_norm": 1.3009864695698263, + "learning_rate": 1.655276645832132e-05, + "loss": 0.7246, + "step": 9605 + }, + { + "epoch": 0.2944097094520044, + "grad_norm": 1.677386519461741, + "learning_rate": 1.655201659998393e-05, + "loss": 0.8385, + "step": 9606 + }, + { + "epoch": 0.2944403579747456, + "grad_norm": 1.4845762716290225, + "learning_rate": 1.6551266677087837e-05, + "loss": 0.7456, + "step": 9607 + }, + { + "epoch": 0.2944710064974868, + "grad_norm": 1.481114750946955, + "learning_rate": 1.655051668964044e-05, + "loss": 0.7379, + "step": 9608 + }, + { + "epoch": 0.294501655020228, + "grad_norm": 1.466451056840708, + "learning_rate": 1.6549766637649126e-05, + "loss": 0.8161, + "step": 9609 + }, + { + "epoch": 0.2945323035429692, + "grad_norm": 1.3418445527793723, + "learning_rate": 1.6549016521121287e-05, + "loss": 0.7666, + "step": 9610 + }, + { + "epoch": 0.2945629520657104, + "grad_norm": 1.5583646013078274, + "learning_rate": 1.654826634006431e-05, + "loss": 0.7142, + "step": 9611 + }, + { + "epoch": 0.29459360058845163, + "grad_norm": 1.3698155934759364, + "learning_rate": 1.654751609448559e-05, + "loss": 0.7439, + "step": 9612 + }, + { + "epoch": 0.29462424911119284, + "grad_norm": 0.794226084305421, + "learning_rate": 1.654676578439252e-05, + "loss": 0.6276, + "step": 9613 + }, + { + "epoch": 0.29465489763393404, + "grad_norm": 1.4023767097075528, + "learning_rate": 1.654601540979249e-05, + "loss": 0.7481, + "step": 9614 + }, + { + "epoch": 0.29468554615667525, + "grad_norm": 0.7549528683067506, + "learning_rate": 1.6545264970692897e-05, + "loss": 0.6073, + "step": 9615 + }, + { + "epoch": 0.29471619467941645, + "grad_norm": 1.4837179698538987, + "learning_rate": 1.6544514467101132e-05, + "loss": 0.8334, + "step": 9616 + }, + { + "epoch": 0.29474684320215766, + "grad_norm": 1.557196494110796, + "learning_rate": 1.6543763899024593e-05, + "loss": 0.7613, + "step": 9617 + }, + { + "epoch": 0.29477749172489887, + "grad_norm": 1.280834798869857, + "learning_rate": 1.654301326647067e-05, + "loss": 0.7283, + "step": 9618 + }, + { + "epoch": 0.29480814024764007, + "grad_norm": 0.7505066941676983, + "learning_rate": 1.6542262569446768e-05, + "loss": 0.6153, + "step": 9619 + }, + { + "epoch": 0.2948387887703813, + "grad_norm": 1.5199572884354586, + "learning_rate": 1.6541511807960277e-05, + "loss": 0.7165, + "step": 9620 + }, + { + "epoch": 0.2948694372931225, + "grad_norm": 1.6309477558460883, + "learning_rate": 1.6540760982018594e-05, + "loss": 0.8177, + "step": 9621 + }, + { + "epoch": 0.2949000858158637, + "grad_norm": 1.4364469792543217, + "learning_rate": 1.6540010091629126e-05, + "loss": 0.7342, + "step": 9622 + }, + { + "epoch": 0.2949307343386049, + "grad_norm": 1.629214938055133, + "learning_rate": 1.653925913679926e-05, + "loss": 0.8247, + "step": 9623 + }, + { + "epoch": 0.2949613828613461, + "grad_norm": 0.7495011474278642, + "learning_rate": 1.6538508117536402e-05, + "loss": 0.6407, + "step": 9624 + }, + { + "epoch": 0.2949920313840873, + "grad_norm": 1.5578307404230016, + "learning_rate": 1.653775703384795e-05, + "loss": 0.8498, + "step": 9625 + }, + { + "epoch": 0.2950226799068285, + "grad_norm": 0.6871956696709886, + "learning_rate": 1.6537005885741307e-05, + "loss": 0.6136, + "step": 9626 + }, + { + "epoch": 0.2950533284295697, + "grad_norm": 1.523713132106445, + "learning_rate": 1.653625467322387e-05, + "loss": 0.7374, + "step": 9627 + }, + { + "epoch": 0.2950839769523109, + "grad_norm": 0.6639239467053907, + "learning_rate": 1.6535503396303046e-05, + "loss": 0.5844, + "step": 9628 + }, + { + "epoch": 0.29511462547505213, + "grad_norm": 1.650424803650888, + "learning_rate": 1.6534752054986233e-05, + "loss": 0.7976, + "step": 9629 + }, + { + "epoch": 0.29514527399779333, + "grad_norm": 1.308652895604564, + "learning_rate": 1.6534000649280835e-05, + "loss": 0.9068, + "step": 9630 + }, + { + "epoch": 0.2951759225205345, + "grad_norm": 1.341204301179143, + "learning_rate": 1.653324917919426e-05, + "loss": 0.7246, + "step": 9631 + }, + { + "epoch": 0.2952065710432757, + "grad_norm": 0.6916513545251031, + "learning_rate": 1.6532497644733907e-05, + "loss": 0.595, + "step": 9632 + }, + { + "epoch": 0.2952372195660169, + "grad_norm": 0.7133144642318506, + "learning_rate": 1.6531746045907182e-05, + "loss": 0.6125, + "step": 9633 + }, + { + "epoch": 0.2952678680887581, + "grad_norm": 1.5760592691499085, + "learning_rate": 1.6530994382721495e-05, + "loss": 0.7377, + "step": 9634 + }, + { + "epoch": 0.2952985166114993, + "grad_norm": 1.5518826566178252, + "learning_rate": 1.6530242655184248e-05, + "loss": 0.7618, + "step": 9635 + }, + { + "epoch": 0.2953291651342405, + "grad_norm": 1.384017573467967, + "learning_rate": 1.652949086330285e-05, + "loss": 0.7443, + "step": 9636 + }, + { + "epoch": 0.2953598136569817, + "grad_norm": 1.4576510311628765, + "learning_rate": 1.6528739007084705e-05, + "loss": 0.7357, + "step": 9637 + }, + { + "epoch": 0.2953904621797229, + "grad_norm": 1.457299800638317, + "learning_rate": 1.6527987086537225e-05, + "loss": 0.7903, + "step": 9638 + }, + { + "epoch": 0.29542111070246413, + "grad_norm": 1.3715433908595316, + "learning_rate": 1.6527235101667822e-05, + "loss": 0.7695, + "step": 9639 + }, + { + "epoch": 0.29545175922520533, + "grad_norm": 1.5104476718236168, + "learning_rate": 1.6526483052483898e-05, + "loss": 0.7747, + "step": 9640 + }, + { + "epoch": 0.29548240774794654, + "grad_norm": 1.4074773775254978, + "learning_rate": 1.6525730938992867e-05, + "loss": 0.7659, + "step": 9641 + }, + { + "epoch": 0.29551305627068775, + "grad_norm": 0.780349356937594, + "learning_rate": 1.652497876120214e-05, + "loss": 0.6111, + "step": 9642 + }, + { + "epoch": 0.29554370479342895, + "grad_norm": 1.6083926341435884, + "learning_rate": 1.652422651911913e-05, + "loss": 0.8531, + "step": 9643 + }, + { + "epoch": 0.29557435331617016, + "grad_norm": 1.588545324375251, + "learning_rate": 1.652347421275124e-05, + "loss": 0.7998, + "step": 9644 + }, + { + "epoch": 0.29560500183891136, + "grad_norm": 1.6986408304125236, + "learning_rate": 1.6522721842105897e-05, + "loss": 0.8565, + "step": 9645 + }, + { + "epoch": 0.29563565036165257, + "grad_norm": 1.5772896611502212, + "learning_rate": 1.6521969407190504e-05, + "loss": 0.822, + "step": 9646 + }, + { + "epoch": 0.2956662988843938, + "grad_norm": 1.321570421518855, + "learning_rate": 1.6521216908012476e-05, + "loss": 0.8104, + "step": 9647 + }, + { + "epoch": 0.295696947407135, + "grad_norm": 1.3814629783688246, + "learning_rate": 1.652046434457923e-05, + "loss": 0.7853, + "step": 9648 + }, + { + "epoch": 0.2957275959298762, + "grad_norm": 1.3658484280393837, + "learning_rate": 1.651971171689818e-05, + "loss": 0.7623, + "step": 9649 + }, + { + "epoch": 0.2957582444526174, + "grad_norm": 0.7387610555084855, + "learning_rate": 1.6518959024976745e-05, + "loss": 0.6267, + "step": 9650 + }, + { + "epoch": 0.2957888929753586, + "grad_norm": 1.4279784517136236, + "learning_rate": 1.6518206268822335e-05, + "loss": 0.7613, + "step": 9651 + }, + { + "epoch": 0.2958195414980998, + "grad_norm": 1.566600522674971, + "learning_rate": 1.6517453448442373e-05, + "loss": 0.8633, + "step": 9652 + }, + { + "epoch": 0.295850190020841, + "grad_norm": 1.385782701495987, + "learning_rate": 1.6516700563844277e-05, + "loss": 0.742, + "step": 9653 + }, + { + "epoch": 0.2958808385435822, + "grad_norm": 1.4321013546813617, + "learning_rate": 1.651594761503546e-05, + "loss": 0.8957, + "step": 9654 + }, + { + "epoch": 0.2959114870663234, + "grad_norm": 1.4712352657936187, + "learning_rate": 1.6515194602023345e-05, + "loss": 0.7184, + "step": 9655 + }, + { + "epoch": 0.2959421355890646, + "grad_norm": 1.6476251009193763, + "learning_rate": 1.651444152481535e-05, + "loss": 0.8058, + "step": 9656 + }, + { + "epoch": 0.29597278411180583, + "grad_norm": 1.5199986741926712, + "learning_rate": 1.6513688383418894e-05, + "loss": 0.7733, + "step": 9657 + }, + { + "epoch": 0.29600343263454704, + "grad_norm": 1.4292857803946284, + "learning_rate": 1.6512935177841406e-05, + "loss": 0.7701, + "step": 9658 + }, + { + "epoch": 0.29603408115728824, + "grad_norm": 0.6973324375888305, + "learning_rate": 1.6512181908090293e-05, + "loss": 0.6199, + "step": 9659 + }, + { + "epoch": 0.29606472968002945, + "grad_norm": 1.418755140824126, + "learning_rate": 1.6511428574172992e-05, + "loss": 0.7172, + "step": 9660 + }, + { + "epoch": 0.29609537820277065, + "grad_norm": 1.5510992061571245, + "learning_rate": 1.6510675176096916e-05, + "loss": 0.8574, + "step": 9661 + }, + { + "epoch": 0.2961260267255118, + "grad_norm": 1.6404524433089742, + "learning_rate": 1.650992171386949e-05, + "loss": 0.7677, + "step": 9662 + }, + { + "epoch": 0.296156675248253, + "grad_norm": 1.4250809206092478, + "learning_rate": 1.6509168187498143e-05, + "loss": 0.8802, + "step": 9663 + }, + { + "epoch": 0.2961873237709942, + "grad_norm": 1.3656960654773493, + "learning_rate": 1.6508414596990296e-05, + "loss": 0.6453, + "step": 9664 + }, + { + "epoch": 0.2962179722937354, + "grad_norm": 0.6741249214124575, + "learning_rate": 1.6507660942353375e-05, + "loss": 0.6011, + "step": 9665 + }, + { + "epoch": 0.2962486208164766, + "grad_norm": 1.582979796535701, + "learning_rate": 1.6506907223594806e-05, + "loss": 0.7559, + "step": 9666 + }, + { + "epoch": 0.29627926933921783, + "grad_norm": 0.6884874569644676, + "learning_rate": 1.6506153440722013e-05, + "loss": 0.6298, + "step": 9667 + }, + { + "epoch": 0.29630991786195904, + "grad_norm": 1.6058041905673177, + "learning_rate": 1.6505399593742425e-05, + "loss": 0.8563, + "step": 9668 + }, + { + "epoch": 0.29634056638470024, + "grad_norm": 0.678876885572, + "learning_rate": 1.6504645682663474e-05, + "loss": 0.6106, + "step": 9669 + }, + { + "epoch": 0.29637121490744145, + "grad_norm": 1.4952784087095499, + "learning_rate": 1.6503891707492585e-05, + "loss": 0.8202, + "step": 9670 + }, + { + "epoch": 0.29640186343018265, + "grad_norm": 1.535548725800976, + "learning_rate": 1.6503137668237183e-05, + "loss": 0.8275, + "step": 9671 + }, + { + "epoch": 0.29643251195292386, + "grad_norm": 1.3962867579584506, + "learning_rate": 1.6502383564904704e-05, + "loss": 0.8039, + "step": 9672 + }, + { + "epoch": 0.29646316047566507, + "grad_norm": 1.5929633699832724, + "learning_rate": 1.6501629397502578e-05, + "loss": 0.7964, + "step": 9673 + }, + { + "epoch": 0.29649380899840627, + "grad_norm": 1.3939589735678737, + "learning_rate": 1.650087516603823e-05, + "loss": 0.8003, + "step": 9674 + }, + { + "epoch": 0.2965244575211475, + "grad_norm": 1.291990530883612, + "learning_rate": 1.6500120870519097e-05, + "loss": 0.5145, + "step": 9675 + }, + { + "epoch": 0.2965551060438887, + "grad_norm": 1.5468878706147013, + "learning_rate": 1.649936651095261e-05, + "loss": 0.8074, + "step": 9676 + }, + { + "epoch": 0.2965857545666299, + "grad_norm": 0.7830682379109016, + "learning_rate": 1.64986120873462e-05, + "loss": 0.6497, + "step": 9677 + }, + { + "epoch": 0.2966164030893711, + "grad_norm": 1.7642345616629755, + "learning_rate": 1.6497857599707305e-05, + "loss": 0.7592, + "step": 9678 + }, + { + "epoch": 0.2966470516121123, + "grad_norm": 1.4013552085960643, + "learning_rate": 1.6497103048043356e-05, + "loss": 0.7447, + "step": 9679 + }, + { + "epoch": 0.2966777001348535, + "grad_norm": 1.44071769572632, + "learning_rate": 1.649634843236179e-05, + "loss": 0.7355, + "step": 9680 + }, + { + "epoch": 0.2967083486575947, + "grad_norm": 1.296249077650037, + "learning_rate": 1.6495593752670037e-05, + "loss": 0.6544, + "step": 9681 + }, + { + "epoch": 0.2967389971803359, + "grad_norm": 1.2031169556212413, + "learning_rate": 1.6494839008975537e-05, + "loss": 0.6195, + "step": 9682 + }, + { + "epoch": 0.2967696457030771, + "grad_norm": 0.6769414797423208, + "learning_rate": 1.6494084201285726e-05, + "loss": 0.6036, + "step": 9683 + }, + { + "epoch": 0.29680029422581833, + "grad_norm": 1.5632072584086045, + "learning_rate": 1.6493329329608048e-05, + "loss": 0.8342, + "step": 9684 + }, + { + "epoch": 0.29683094274855953, + "grad_norm": 1.43866108567958, + "learning_rate": 1.649257439394993e-05, + "loss": 0.7372, + "step": 9685 + }, + { + "epoch": 0.29686159127130074, + "grad_norm": 1.4519264613256588, + "learning_rate": 1.6491819394318816e-05, + "loss": 0.7231, + "step": 9686 + }, + { + "epoch": 0.29689223979404195, + "grad_norm": 1.427565547832004, + "learning_rate": 1.6491064330722144e-05, + "loss": 0.7554, + "step": 9687 + }, + { + "epoch": 0.29692288831678315, + "grad_norm": 1.3268204459117934, + "learning_rate": 1.6490309203167356e-05, + "loss": 0.7808, + "step": 9688 + }, + { + "epoch": 0.29695353683952436, + "grad_norm": 1.4091971416040678, + "learning_rate": 1.6489554011661888e-05, + "loss": 0.7274, + "step": 9689 + }, + { + "epoch": 0.29698418536226556, + "grad_norm": 1.4546706178317614, + "learning_rate": 1.6488798756213185e-05, + "loss": 0.8341, + "step": 9690 + }, + { + "epoch": 0.29701483388500677, + "grad_norm": 1.6981414299514757, + "learning_rate": 1.6488043436828687e-05, + "loss": 0.8418, + "step": 9691 + }, + { + "epoch": 0.297045482407748, + "grad_norm": 1.4647617857172943, + "learning_rate": 1.648728805351584e-05, + "loss": 0.6216, + "step": 9692 + }, + { + "epoch": 0.2970761309304891, + "grad_norm": 1.3105732044927254, + "learning_rate": 1.6486532606282084e-05, + "loss": 0.7046, + "step": 9693 + }, + { + "epoch": 0.29710677945323033, + "grad_norm": 1.3568074251385673, + "learning_rate": 1.648577709513486e-05, + "loss": 0.7197, + "step": 9694 + }, + { + "epoch": 0.29713742797597154, + "grad_norm": 1.5065863505195527, + "learning_rate": 1.6485021520081614e-05, + "loss": 0.7665, + "step": 9695 + }, + { + "epoch": 0.29716807649871274, + "grad_norm": 1.4044924460904151, + "learning_rate": 1.6484265881129796e-05, + "loss": 0.739, + "step": 9696 + }, + { + "epoch": 0.29719872502145395, + "grad_norm": 0.7291358005614241, + "learning_rate": 1.6483510178286842e-05, + "loss": 0.5995, + "step": 9697 + }, + { + "epoch": 0.29722937354419515, + "grad_norm": 1.586947235620788, + "learning_rate": 1.6482754411560205e-05, + "loss": 0.8527, + "step": 9698 + }, + { + "epoch": 0.29726002206693636, + "grad_norm": 1.3698194634456446, + "learning_rate": 1.6481998580957334e-05, + "loss": 0.6895, + "step": 9699 + }, + { + "epoch": 0.29729067058967756, + "grad_norm": 1.362525087627192, + "learning_rate": 1.6481242686485664e-05, + "loss": 0.7714, + "step": 9700 + }, + { + "epoch": 0.29732131911241877, + "grad_norm": 1.3561586353182928, + "learning_rate": 1.6480486728152657e-05, + "loss": 0.7398, + "step": 9701 + }, + { + "epoch": 0.29735196763516, + "grad_norm": 1.4819762713421236, + "learning_rate": 1.647973070596576e-05, + "loss": 0.8123, + "step": 9702 + }, + { + "epoch": 0.2973826161579012, + "grad_norm": 1.4484287177766069, + "learning_rate": 1.647897461993241e-05, + "loss": 0.8008, + "step": 9703 + }, + { + "epoch": 0.2974132646806424, + "grad_norm": 1.4357878029400353, + "learning_rate": 1.6478218470060074e-05, + "loss": 0.7563, + "step": 9704 + }, + { + "epoch": 0.2974439132033836, + "grad_norm": 1.4288772270545573, + "learning_rate": 1.6477462256356187e-05, + "loss": 0.5539, + "step": 9705 + }, + { + "epoch": 0.2974745617261248, + "grad_norm": 0.7747102775490874, + "learning_rate": 1.647670597882821e-05, + "loss": 0.6345, + "step": 9706 + }, + { + "epoch": 0.297505210248866, + "grad_norm": 1.6958373000645774, + "learning_rate": 1.6475949637483593e-05, + "loss": 0.7379, + "step": 9707 + }, + { + "epoch": 0.2975358587716072, + "grad_norm": 1.517406326500523, + "learning_rate": 1.6475193232329786e-05, + "loss": 0.6245, + "step": 9708 + }, + { + "epoch": 0.2975665072943484, + "grad_norm": 1.5332881349225072, + "learning_rate": 1.647443676337424e-05, + "loss": 0.8811, + "step": 9709 + }, + { + "epoch": 0.2975971558170896, + "grad_norm": 1.6318734436064977, + "learning_rate": 1.6473680230624415e-05, + "loss": 0.7406, + "step": 9710 + }, + { + "epoch": 0.2976278043398308, + "grad_norm": 1.766732594268508, + "learning_rate": 1.6472923634087762e-05, + "loss": 0.7531, + "step": 9711 + }, + { + "epoch": 0.29765845286257203, + "grad_norm": 1.4663267096892734, + "learning_rate": 1.6472166973771738e-05, + "loss": 0.7309, + "step": 9712 + }, + { + "epoch": 0.29768910138531324, + "grad_norm": 1.4655385866222628, + "learning_rate": 1.6471410249683795e-05, + "loss": 0.7989, + "step": 9713 + }, + { + "epoch": 0.29771974990805444, + "grad_norm": 0.7310333660957374, + "learning_rate": 1.6470653461831392e-05, + "loss": 0.6178, + "step": 9714 + }, + { + "epoch": 0.29775039843079565, + "grad_norm": 1.6143645582171275, + "learning_rate": 1.6469896610221985e-05, + "loss": 0.8967, + "step": 9715 + }, + { + "epoch": 0.29778104695353685, + "grad_norm": 0.67452933283544, + "learning_rate": 1.646913969486303e-05, + "loss": 0.6504, + "step": 9716 + }, + { + "epoch": 0.29781169547627806, + "grad_norm": 1.5184598679981292, + "learning_rate": 1.6468382715761987e-05, + "loss": 0.8386, + "step": 9717 + }, + { + "epoch": 0.29784234399901927, + "grad_norm": 1.4498007401874198, + "learning_rate": 1.6467625672926314e-05, + "loss": 0.7247, + "step": 9718 + }, + { + "epoch": 0.29787299252176047, + "grad_norm": 1.6309234424603813, + "learning_rate": 1.6466868566363473e-05, + "loss": 0.769, + "step": 9719 + }, + { + "epoch": 0.2979036410445017, + "grad_norm": 1.5475536238790994, + "learning_rate": 1.646611139608092e-05, + "loss": 0.8141, + "step": 9720 + }, + { + "epoch": 0.2979342895672429, + "grad_norm": 1.4928536675856088, + "learning_rate": 1.6465354162086115e-05, + "loss": 0.8313, + "step": 9721 + }, + { + "epoch": 0.2979649380899841, + "grad_norm": 1.372905075546851, + "learning_rate": 1.646459686438652e-05, + "loss": 0.733, + "step": 9722 + }, + { + "epoch": 0.2979955866127253, + "grad_norm": 1.5458763257961052, + "learning_rate": 1.6463839502989604e-05, + "loss": 0.7812, + "step": 9723 + }, + { + "epoch": 0.29802623513546644, + "grad_norm": 1.6594948811163177, + "learning_rate": 1.646308207790282e-05, + "loss": 0.7723, + "step": 9724 + }, + { + "epoch": 0.29805688365820765, + "grad_norm": 1.4923841647256053, + "learning_rate": 1.6462324589133633e-05, + "loss": 0.7846, + "step": 9725 + }, + { + "epoch": 0.29808753218094886, + "grad_norm": 1.5674060288796823, + "learning_rate": 1.6461567036689508e-05, + "loss": 0.8251, + "step": 9726 + }, + { + "epoch": 0.29811818070369006, + "grad_norm": 0.7969875320701031, + "learning_rate": 1.646080942057791e-05, + "loss": 0.6112, + "step": 9727 + }, + { + "epoch": 0.29814882922643127, + "grad_norm": 1.463942437686273, + "learning_rate": 1.6460051740806306e-05, + "loss": 0.762, + "step": 9728 + }, + { + "epoch": 0.2981794777491725, + "grad_norm": 1.522276263056309, + "learning_rate": 1.645929399738216e-05, + "loss": 0.7362, + "step": 9729 + }, + { + "epoch": 0.2982101262719137, + "grad_norm": 1.7406161114011962, + "learning_rate": 1.6458536190312938e-05, + "loss": 0.8644, + "step": 9730 + }, + { + "epoch": 0.2982407747946549, + "grad_norm": 1.605991203452586, + "learning_rate": 1.64577783196061e-05, + "loss": 0.8267, + "step": 9731 + }, + { + "epoch": 0.2982714233173961, + "grad_norm": 1.5561866862810878, + "learning_rate": 1.6457020385269128e-05, + "loss": 0.8091, + "step": 9732 + }, + { + "epoch": 0.2983020718401373, + "grad_norm": 1.451438719497946, + "learning_rate": 1.6456262387309477e-05, + "loss": 0.7674, + "step": 9733 + }, + { + "epoch": 0.2983327203628785, + "grad_norm": 1.5511498995293083, + "learning_rate": 1.6455504325734624e-05, + "loss": 0.8159, + "step": 9734 + }, + { + "epoch": 0.2983633688856197, + "grad_norm": 1.3838924444764453, + "learning_rate": 1.6454746200552034e-05, + "loss": 0.7815, + "step": 9735 + }, + { + "epoch": 0.2983940174083609, + "grad_norm": 1.5112199840249767, + "learning_rate": 1.6453988011769176e-05, + "loss": 0.7957, + "step": 9736 + }, + { + "epoch": 0.2984246659311021, + "grad_norm": 0.6729596178283388, + "learning_rate": 1.6453229759393524e-05, + "loss": 0.6117, + "step": 9737 + }, + { + "epoch": 0.2984553144538433, + "grad_norm": 1.6698473805723768, + "learning_rate": 1.645247144343255e-05, + "loss": 0.7056, + "step": 9738 + }, + { + "epoch": 0.29848596297658453, + "grad_norm": 0.6799113590773224, + "learning_rate": 1.645171306389372e-05, + "loss": 0.6269, + "step": 9739 + }, + { + "epoch": 0.29851661149932573, + "grad_norm": 1.4410407656882307, + "learning_rate": 1.6450954620784518e-05, + "loss": 0.6935, + "step": 9740 + }, + { + "epoch": 0.29854726002206694, + "grad_norm": 1.3283901761900867, + "learning_rate": 1.64501961141124e-05, + "loss": 0.8431, + "step": 9741 + }, + { + "epoch": 0.29857790854480815, + "grad_norm": 1.4992396472444622, + "learning_rate": 1.6449437543884856e-05, + "loss": 0.7635, + "step": 9742 + }, + { + "epoch": 0.29860855706754935, + "grad_norm": 1.7538956417593399, + "learning_rate": 1.644867891010935e-05, + "loss": 0.7778, + "step": 9743 + }, + { + "epoch": 0.29863920559029056, + "grad_norm": 1.6060786242726344, + "learning_rate": 1.6447920212793362e-05, + "loss": 0.8131, + "step": 9744 + }, + { + "epoch": 0.29866985411303176, + "grad_norm": 1.7244632004096168, + "learning_rate": 1.6447161451944367e-05, + "loss": 0.7829, + "step": 9745 + }, + { + "epoch": 0.29870050263577297, + "grad_norm": 1.3550797070709328, + "learning_rate": 1.6446402627569842e-05, + "loss": 0.7041, + "step": 9746 + }, + { + "epoch": 0.2987311511585142, + "grad_norm": 1.368149710987993, + "learning_rate": 1.644564373967726e-05, + "loss": 0.7188, + "step": 9747 + }, + { + "epoch": 0.2987617996812554, + "grad_norm": 1.708168814927806, + "learning_rate": 1.64448847882741e-05, + "loss": 0.8081, + "step": 9748 + }, + { + "epoch": 0.2987924482039966, + "grad_norm": 1.4473456161873746, + "learning_rate": 1.6444125773367846e-05, + "loss": 0.7339, + "step": 9749 + }, + { + "epoch": 0.2988230967267378, + "grad_norm": 1.4460548937232323, + "learning_rate": 1.644336669496597e-05, + "loss": 0.7877, + "step": 9750 + }, + { + "epoch": 0.298853745249479, + "grad_norm": 1.2898343292345829, + "learning_rate": 1.644260755307595e-05, + "loss": 0.6533, + "step": 9751 + }, + { + "epoch": 0.2988843937722202, + "grad_norm": 1.3618845836393692, + "learning_rate": 1.644184834770527e-05, + "loss": 0.6203, + "step": 9752 + }, + { + "epoch": 0.2989150422949614, + "grad_norm": 1.6792202464472745, + "learning_rate": 1.6441089078861414e-05, + "loss": 0.7298, + "step": 9753 + }, + { + "epoch": 0.2989456908177026, + "grad_norm": 1.6398471514097672, + "learning_rate": 1.6440329746551856e-05, + "loss": 0.8144, + "step": 9754 + }, + { + "epoch": 0.29897633934044376, + "grad_norm": 1.4621476473518142, + "learning_rate": 1.643957035078408e-05, + "loss": 0.7413, + "step": 9755 + }, + { + "epoch": 0.29900698786318497, + "grad_norm": 1.5443052719226784, + "learning_rate": 1.6438810891565572e-05, + "loss": 0.832, + "step": 9756 + }, + { + "epoch": 0.2990376363859262, + "grad_norm": 1.5694145572214453, + "learning_rate": 1.6438051368903815e-05, + "loss": 0.8361, + "step": 9757 + }, + { + "epoch": 0.2990682849086674, + "grad_norm": 1.467135616619929, + "learning_rate": 1.643729178280629e-05, + "loss": 0.8536, + "step": 9758 + }, + { + "epoch": 0.2990989334314086, + "grad_norm": 1.4805174114670687, + "learning_rate": 1.6436532133280477e-05, + "loss": 0.8367, + "step": 9759 + }, + { + "epoch": 0.2991295819541498, + "grad_norm": 1.2828315657862928, + "learning_rate": 1.6435772420333872e-05, + "loss": 0.7178, + "step": 9760 + }, + { + "epoch": 0.299160230476891, + "grad_norm": 1.55234771583768, + "learning_rate": 1.6435012643973953e-05, + "loss": 0.7685, + "step": 9761 + }, + { + "epoch": 0.2991908789996322, + "grad_norm": 1.497090459871749, + "learning_rate": 1.6434252804208206e-05, + "loss": 0.7183, + "step": 9762 + }, + { + "epoch": 0.2992215275223734, + "grad_norm": 0.8267880331463765, + "learning_rate": 1.6433492901044118e-05, + "loss": 0.611, + "step": 9763 + }, + { + "epoch": 0.2992521760451146, + "grad_norm": 1.7749463799143663, + "learning_rate": 1.6432732934489184e-05, + "loss": 0.7575, + "step": 9764 + }, + { + "epoch": 0.2992828245678558, + "grad_norm": 1.3423632279233062, + "learning_rate": 1.6431972904550883e-05, + "loss": 0.7091, + "step": 9765 + }, + { + "epoch": 0.299313473090597, + "grad_norm": 1.4582596881001824, + "learning_rate": 1.643121281123671e-05, + "loss": 0.8655, + "step": 9766 + }, + { + "epoch": 0.29934412161333823, + "grad_norm": 1.4403352988852156, + "learning_rate": 1.6430452654554146e-05, + "loss": 0.776, + "step": 9767 + }, + { + "epoch": 0.29937477013607944, + "grad_norm": 1.4174783630167591, + "learning_rate": 1.642969243451069e-05, + "loss": 0.791, + "step": 9768 + }, + { + "epoch": 0.29940541865882064, + "grad_norm": 1.6751294293720516, + "learning_rate": 1.642893215111383e-05, + "loss": 0.741, + "step": 9769 + }, + { + "epoch": 0.29943606718156185, + "grad_norm": 1.501706483334988, + "learning_rate": 1.642817180437106e-05, + "loss": 0.7521, + "step": 9770 + }, + { + "epoch": 0.29946671570430305, + "grad_norm": 1.3415631399661547, + "learning_rate": 1.6427411394289864e-05, + "loss": 0.7, + "step": 9771 + }, + { + "epoch": 0.29949736422704426, + "grad_norm": 0.8535871873211512, + "learning_rate": 1.6426650920877737e-05, + "loss": 0.6366, + "step": 9772 + }, + { + "epoch": 0.29952801274978547, + "grad_norm": 1.526429767032174, + "learning_rate": 1.6425890384142178e-05, + "loss": 0.7272, + "step": 9773 + }, + { + "epoch": 0.29955866127252667, + "grad_norm": 1.4369595618092619, + "learning_rate": 1.6425129784090677e-05, + "loss": 0.8069, + "step": 9774 + }, + { + "epoch": 0.2995893097952679, + "grad_norm": 1.4737163324876443, + "learning_rate": 1.6424369120730726e-05, + "loss": 0.8211, + "step": 9775 + }, + { + "epoch": 0.2996199583180091, + "grad_norm": 1.4565204705931256, + "learning_rate": 1.6423608394069826e-05, + "loss": 0.7278, + "step": 9776 + }, + { + "epoch": 0.2996506068407503, + "grad_norm": 1.5052338945981256, + "learning_rate": 1.6422847604115465e-05, + "loss": 0.8184, + "step": 9777 + }, + { + "epoch": 0.2996812553634915, + "grad_norm": 1.4151569853780381, + "learning_rate": 1.6422086750875146e-05, + "loss": 0.783, + "step": 9778 + }, + { + "epoch": 0.2997119038862327, + "grad_norm": 1.3868617736045032, + "learning_rate": 1.642132583435636e-05, + "loss": 0.8385, + "step": 9779 + }, + { + "epoch": 0.2997425524089739, + "grad_norm": 1.4064764941138113, + "learning_rate": 1.642056485456661e-05, + "loss": 0.7693, + "step": 9780 + }, + { + "epoch": 0.2997732009317151, + "grad_norm": 0.7692654893193158, + "learning_rate": 1.641980381151339e-05, + "loss": 0.6505, + "step": 9781 + }, + { + "epoch": 0.2998038494544563, + "grad_norm": 1.3433609018074788, + "learning_rate": 1.6419042705204204e-05, + "loss": 0.7035, + "step": 9782 + }, + { + "epoch": 0.2998344979771975, + "grad_norm": 1.335914696171491, + "learning_rate": 1.6418281535646542e-05, + "loss": 0.8169, + "step": 9783 + }, + { + "epoch": 0.29986514649993873, + "grad_norm": 1.4277261475112535, + "learning_rate": 1.6417520302847917e-05, + "loss": 0.7574, + "step": 9784 + }, + { + "epoch": 0.29989579502267993, + "grad_norm": 0.6809267337703679, + "learning_rate": 1.6416759006815816e-05, + "loss": 0.6468, + "step": 9785 + }, + { + "epoch": 0.2999264435454211, + "grad_norm": 1.6206550526794197, + "learning_rate": 1.6415997647557747e-05, + "loss": 0.7941, + "step": 9786 + }, + { + "epoch": 0.2999570920681623, + "grad_norm": 1.6181911230914414, + "learning_rate": 1.6415236225081215e-05, + "loss": 0.8077, + "step": 9787 + }, + { + "epoch": 0.2999877405909035, + "grad_norm": 1.4289856888285035, + "learning_rate": 1.641447473939372e-05, + "loss": 0.7909, + "step": 9788 + }, + { + "epoch": 0.3000183891136447, + "grad_norm": 1.5607064599839495, + "learning_rate": 1.641371319050276e-05, + "loss": 0.8615, + "step": 9789 + }, + { + "epoch": 0.3000490376363859, + "grad_norm": 1.3452966983229522, + "learning_rate": 1.6412951578415848e-05, + "loss": 0.694, + "step": 9790 + }, + { + "epoch": 0.3000796861591271, + "grad_norm": 1.4214133488409033, + "learning_rate": 1.641218990314048e-05, + "loss": 0.768, + "step": 9791 + }, + { + "epoch": 0.3001103346818683, + "grad_norm": 1.4838877144313525, + "learning_rate": 1.6411428164684164e-05, + "loss": 0.7797, + "step": 9792 + }, + { + "epoch": 0.3001409832046095, + "grad_norm": 0.7078310677443602, + "learning_rate": 1.6410666363054407e-05, + "loss": 0.62, + "step": 9793 + }, + { + "epoch": 0.30017163172735073, + "grad_norm": 1.6839250376271933, + "learning_rate": 1.6409904498258713e-05, + "loss": 0.7662, + "step": 9794 + }, + { + "epoch": 0.30020228025009194, + "grad_norm": 1.6710850905841084, + "learning_rate": 1.6409142570304586e-05, + "loss": 0.7806, + "step": 9795 + }, + { + "epoch": 0.30023292877283314, + "grad_norm": 1.3869410756897826, + "learning_rate": 1.6408380579199546e-05, + "loss": 0.7444, + "step": 9796 + }, + { + "epoch": 0.30026357729557435, + "grad_norm": 1.5692181552303386, + "learning_rate": 1.640761852495109e-05, + "loss": 0.6541, + "step": 9797 + }, + { + "epoch": 0.30029422581831555, + "grad_norm": 1.4951895290184978, + "learning_rate": 1.6406856407566725e-05, + "loss": 0.6851, + "step": 9798 + }, + { + "epoch": 0.30032487434105676, + "grad_norm": 1.8101031324752723, + "learning_rate": 1.6406094227053967e-05, + "loss": 0.808, + "step": 9799 + }, + { + "epoch": 0.30035552286379796, + "grad_norm": 1.6520218834686111, + "learning_rate": 1.6405331983420324e-05, + "loss": 0.7096, + "step": 9800 + }, + { + "epoch": 0.30038617138653917, + "grad_norm": 1.3310339735765893, + "learning_rate": 1.6404569676673307e-05, + "loss": 0.7126, + "step": 9801 + }, + { + "epoch": 0.3004168199092804, + "grad_norm": 1.5783111634568567, + "learning_rate": 1.6403807306820426e-05, + "loss": 0.7678, + "step": 9802 + }, + { + "epoch": 0.3004474684320216, + "grad_norm": 1.4145572543497098, + "learning_rate": 1.6403044873869193e-05, + "loss": 0.7452, + "step": 9803 + }, + { + "epoch": 0.3004781169547628, + "grad_norm": 0.7307043247405215, + "learning_rate": 1.6402282377827118e-05, + "loss": 0.6294, + "step": 9804 + }, + { + "epoch": 0.300508765477504, + "grad_norm": 1.4543123774695723, + "learning_rate": 1.640151981870172e-05, + "loss": 0.7608, + "step": 9805 + }, + { + "epoch": 0.3005394140002452, + "grad_norm": 1.434557982933027, + "learning_rate": 1.6400757196500507e-05, + "loss": 0.7587, + "step": 9806 + }, + { + "epoch": 0.3005700625229864, + "grad_norm": 1.6049967663389504, + "learning_rate": 1.6399994511230993e-05, + "loss": 0.7703, + "step": 9807 + }, + { + "epoch": 0.3006007110457276, + "grad_norm": 1.6070374099941083, + "learning_rate": 1.63992317629007e-05, + "loss": 0.8013, + "step": 9808 + }, + { + "epoch": 0.3006313595684688, + "grad_norm": 1.566174551963428, + "learning_rate": 1.639846895151714e-05, + "loss": 0.7139, + "step": 9809 + }, + { + "epoch": 0.30066200809121, + "grad_norm": 1.3806651623352888, + "learning_rate": 1.6397706077087825e-05, + "loss": 0.7778, + "step": 9810 + }, + { + "epoch": 0.3006926566139512, + "grad_norm": 1.4505398539507561, + "learning_rate": 1.6396943139620276e-05, + "loss": 0.8714, + "step": 9811 + }, + { + "epoch": 0.30072330513669243, + "grad_norm": 1.5046057865050824, + "learning_rate": 1.639618013912201e-05, + "loss": 0.7768, + "step": 9812 + }, + { + "epoch": 0.30075395365943364, + "grad_norm": 1.439849360782868, + "learning_rate": 1.6395417075600542e-05, + "loss": 0.7797, + "step": 9813 + }, + { + "epoch": 0.30078460218217484, + "grad_norm": 1.5404226376895642, + "learning_rate": 1.6394653949063398e-05, + "loss": 0.6735, + "step": 9814 + }, + { + "epoch": 0.30081525070491605, + "grad_norm": 1.6198437335009852, + "learning_rate": 1.639389075951809e-05, + "loss": 0.8143, + "step": 9815 + }, + { + "epoch": 0.30084589922765725, + "grad_norm": 1.5781684555089377, + "learning_rate": 1.639312750697214e-05, + "loss": 0.762, + "step": 9816 + }, + { + "epoch": 0.3008765477503984, + "grad_norm": 0.7000603899168946, + "learning_rate": 1.639236419143307e-05, + "loss": 0.6192, + "step": 9817 + }, + { + "epoch": 0.3009071962731396, + "grad_norm": 0.7197999595857197, + "learning_rate": 1.63916008129084e-05, + "loss": 0.5963, + "step": 9818 + }, + { + "epoch": 0.3009378447958808, + "grad_norm": 1.6427151722298454, + "learning_rate": 1.639083737140565e-05, + "loss": 0.8105, + "step": 9819 + }, + { + "epoch": 0.300968493318622, + "grad_norm": 1.624948104769158, + "learning_rate": 1.6390073866932347e-05, + "loss": 0.7163, + "step": 9820 + }, + { + "epoch": 0.3009991418413632, + "grad_norm": 1.5333981967400583, + "learning_rate": 1.638931029949601e-05, + "loss": 0.719, + "step": 9821 + }, + { + "epoch": 0.30102979036410443, + "grad_norm": 1.5785332485383199, + "learning_rate": 1.6388546669104163e-05, + "loss": 0.7424, + "step": 9822 + }, + { + "epoch": 0.30106043888684564, + "grad_norm": 1.3618134150648453, + "learning_rate": 1.6387782975764334e-05, + "loss": 0.7473, + "step": 9823 + }, + { + "epoch": 0.30109108740958684, + "grad_norm": 1.4092856195825942, + "learning_rate": 1.638701921948404e-05, + "loss": 0.7625, + "step": 9824 + }, + { + "epoch": 0.30112173593232805, + "grad_norm": 1.3885820348599858, + "learning_rate": 1.6386255400270816e-05, + "loss": 0.8464, + "step": 9825 + }, + { + "epoch": 0.30115238445506926, + "grad_norm": 1.7277767383810752, + "learning_rate": 1.6385491518132178e-05, + "loss": 0.654, + "step": 9826 + }, + { + "epoch": 0.30118303297781046, + "grad_norm": 1.5301605454423821, + "learning_rate": 1.6384727573075668e-05, + "loss": 0.8461, + "step": 9827 + }, + { + "epoch": 0.30121368150055167, + "grad_norm": 0.9362919430122446, + "learning_rate": 1.6383963565108795e-05, + "loss": 0.6442, + "step": 9828 + }, + { + "epoch": 0.30124433002329287, + "grad_norm": 1.4564826087354918, + "learning_rate": 1.63831994942391e-05, + "loss": 0.8525, + "step": 9829 + }, + { + "epoch": 0.3012749785460341, + "grad_norm": 1.6370702246918316, + "learning_rate": 1.6382435360474105e-05, + "loss": 0.7712, + "step": 9830 + }, + { + "epoch": 0.3013056270687753, + "grad_norm": 0.7274561551881185, + "learning_rate": 1.638167116382134e-05, + "loss": 0.6296, + "step": 9831 + }, + { + "epoch": 0.3013362755915165, + "grad_norm": 1.4234336822836653, + "learning_rate": 1.638090690428834e-05, + "loss": 0.7391, + "step": 9832 + }, + { + "epoch": 0.3013669241142577, + "grad_norm": 1.53978736678824, + "learning_rate": 1.6380142581882626e-05, + "loss": 0.7516, + "step": 9833 + }, + { + "epoch": 0.3013975726369989, + "grad_norm": 1.5891940534329765, + "learning_rate": 1.637937819661174e-05, + "loss": 0.8191, + "step": 9834 + }, + { + "epoch": 0.3014282211597401, + "grad_norm": 1.5714753664989283, + "learning_rate": 1.6378613748483207e-05, + "loss": 0.7957, + "step": 9835 + }, + { + "epoch": 0.3014588696824813, + "grad_norm": 1.275240784149936, + "learning_rate": 1.637784923750456e-05, + "loss": 0.7284, + "step": 9836 + }, + { + "epoch": 0.3014895182052225, + "grad_norm": 1.3587988716169892, + "learning_rate": 1.6377084663683334e-05, + "loss": 0.8276, + "step": 9837 + }, + { + "epoch": 0.3015201667279637, + "grad_norm": 1.3405928623333003, + "learning_rate": 1.6376320027027062e-05, + "loss": 0.844, + "step": 9838 + }, + { + "epoch": 0.30155081525070493, + "grad_norm": 0.7385268939465806, + "learning_rate": 1.6375555327543273e-05, + "loss": 0.6178, + "step": 9839 + }, + { + "epoch": 0.30158146377344613, + "grad_norm": 1.5174469700248951, + "learning_rate": 1.637479056523951e-05, + "loss": 0.766, + "step": 9840 + }, + { + "epoch": 0.30161211229618734, + "grad_norm": 1.7572892987442863, + "learning_rate": 1.63740257401233e-05, + "loss": 0.8686, + "step": 9841 + }, + { + "epoch": 0.30164276081892855, + "grad_norm": 0.6810585740564841, + "learning_rate": 1.6373260852202188e-05, + "loss": 0.6169, + "step": 9842 + }, + { + "epoch": 0.30167340934166975, + "grad_norm": 1.3812441107814628, + "learning_rate": 1.6372495901483704e-05, + "loss": 0.8419, + "step": 9843 + }, + { + "epoch": 0.30170405786441096, + "grad_norm": 1.2939197902501631, + "learning_rate": 1.637173088797539e-05, + "loss": 0.5924, + "step": 9844 + }, + { + "epoch": 0.30173470638715216, + "grad_norm": 1.6191856170777383, + "learning_rate": 1.637096581168478e-05, + "loss": 0.7028, + "step": 9845 + }, + { + "epoch": 0.30176535490989337, + "grad_norm": 1.4753019323447647, + "learning_rate": 1.6370200672619412e-05, + "loss": 0.7355, + "step": 9846 + }, + { + "epoch": 0.3017960034326346, + "grad_norm": 1.575654908733436, + "learning_rate": 1.636943547078683e-05, + "loss": 0.8015, + "step": 9847 + }, + { + "epoch": 0.3018266519553757, + "grad_norm": 1.6574615157409738, + "learning_rate": 1.6368670206194568e-05, + "loss": 0.7951, + "step": 9848 + }, + { + "epoch": 0.30185730047811693, + "grad_norm": 1.5537171838692518, + "learning_rate": 1.636790487885017e-05, + "loss": 0.794, + "step": 9849 + }, + { + "epoch": 0.30188794900085814, + "grad_norm": 1.916483658813392, + "learning_rate": 1.6367139488761173e-05, + "loss": 0.7964, + "step": 9850 + }, + { + "epoch": 0.30191859752359934, + "grad_norm": 1.7461386341725131, + "learning_rate": 1.6366374035935124e-05, + "loss": 0.7626, + "step": 9851 + }, + { + "epoch": 0.30194924604634055, + "grad_norm": 1.6162968708091792, + "learning_rate": 1.6365608520379567e-05, + "loss": 0.7064, + "step": 9852 + }, + { + "epoch": 0.30197989456908175, + "grad_norm": 1.4228788991638819, + "learning_rate": 1.6364842942102036e-05, + "loss": 0.7336, + "step": 9853 + }, + { + "epoch": 0.30201054309182296, + "grad_norm": 1.5362408660461409, + "learning_rate": 1.636407730111008e-05, + "loss": 0.7377, + "step": 9854 + }, + { + "epoch": 0.30204119161456416, + "grad_norm": 1.3943823994296922, + "learning_rate": 1.6363311597411236e-05, + "loss": 0.6876, + "step": 9855 + }, + { + "epoch": 0.30207184013730537, + "grad_norm": 1.402455116163468, + "learning_rate": 1.636254583101306e-05, + "loss": 0.7822, + "step": 9856 + }, + { + "epoch": 0.3021024886600466, + "grad_norm": 1.2337729895140939, + "learning_rate": 1.6361780001923095e-05, + "loss": 0.662, + "step": 9857 + }, + { + "epoch": 0.3021331371827878, + "grad_norm": 1.460748012903066, + "learning_rate": 1.636101411014888e-05, + "loss": 0.7514, + "step": 9858 + }, + { + "epoch": 0.302163785705529, + "grad_norm": 1.4640547246595979, + "learning_rate": 1.6360248155697965e-05, + "loss": 0.8107, + "step": 9859 + }, + { + "epoch": 0.3021944342282702, + "grad_norm": 1.8623753807535752, + "learning_rate": 1.6359482138577903e-05, + "loss": 0.6453, + "step": 9860 + }, + { + "epoch": 0.3022250827510114, + "grad_norm": 1.2436612843921533, + "learning_rate": 1.6358716058796233e-05, + "loss": 0.7803, + "step": 9861 + }, + { + "epoch": 0.3022557312737526, + "grad_norm": 1.583412842770068, + "learning_rate": 1.6357949916360506e-05, + "loss": 0.7965, + "step": 9862 + }, + { + "epoch": 0.3022863797964938, + "grad_norm": 1.6729611419640922, + "learning_rate": 1.6357183711278272e-05, + "loss": 0.8347, + "step": 9863 + }, + { + "epoch": 0.302317028319235, + "grad_norm": 1.557459589558204, + "learning_rate": 1.635641744355708e-05, + "loss": 0.8088, + "step": 9864 + }, + { + "epoch": 0.3023476768419762, + "grad_norm": 1.4647173802767153, + "learning_rate": 1.635565111320448e-05, + "loss": 0.8043, + "step": 9865 + }, + { + "epoch": 0.3023783253647174, + "grad_norm": 1.3742658473726388, + "learning_rate": 1.6354884720228023e-05, + "loss": 0.7902, + "step": 9866 + }, + { + "epoch": 0.30240897388745863, + "grad_norm": 0.8233912840505793, + "learning_rate": 1.635411826463526e-05, + "loss": 0.6007, + "step": 9867 + }, + { + "epoch": 0.30243962241019984, + "grad_norm": 1.489752387239377, + "learning_rate": 1.635335174643375e-05, + "loss": 0.7008, + "step": 9868 + }, + { + "epoch": 0.30247027093294104, + "grad_norm": 1.4827586435060773, + "learning_rate": 1.6352585165631034e-05, + "loss": 0.672, + "step": 9869 + }, + { + "epoch": 0.30250091945568225, + "grad_norm": 1.4655222420444005, + "learning_rate": 1.635181852223467e-05, + "loss": 0.7019, + "step": 9870 + }, + { + "epoch": 0.30253156797842345, + "grad_norm": 0.7099983575687405, + "learning_rate": 1.635105181625222e-05, + "loss": 0.617, + "step": 9871 + }, + { + "epoch": 0.30256221650116466, + "grad_norm": 1.5639559397123435, + "learning_rate": 1.6350285047691225e-05, + "loss": 0.7496, + "step": 9872 + }, + { + "epoch": 0.30259286502390587, + "grad_norm": 1.4256141793692902, + "learning_rate": 1.634951821655925e-05, + "loss": 0.7581, + "step": 9873 + }, + { + "epoch": 0.30262351354664707, + "grad_norm": 1.6783759460415575, + "learning_rate": 1.6348751322863848e-05, + "loss": 0.9548, + "step": 9874 + }, + { + "epoch": 0.3026541620693883, + "grad_norm": 1.339273908440523, + "learning_rate": 1.634798436661257e-05, + "loss": 0.674, + "step": 9875 + }, + { + "epoch": 0.3026848105921295, + "grad_norm": 1.3518656741207746, + "learning_rate": 1.634721734781298e-05, + "loss": 0.7991, + "step": 9876 + }, + { + "epoch": 0.3027154591148707, + "grad_norm": 1.4112606107109178, + "learning_rate": 1.6346450266472635e-05, + "loss": 0.7658, + "step": 9877 + }, + { + "epoch": 0.3027461076376119, + "grad_norm": 1.4748763217220875, + "learning_rate": 1.6345683122599093e-05, + "loss": 0.8415, + "step": 9878 + }, + { + "epoch": 0.30277675616035304, + "grad_norm": 1.3113764215760022, + "learning_rate": 1.6344915916199907e-05, + "loss": 0.7945, + "step": 9879 + }, + { + "epoch": 0.30280740468309425, + "grad_norm": 1.2998960932015229, + "learning_rate": 1.6344148647282645e-05, + "loss": 0.7133, + "step": 9880 + }, + { + "epoch": 0.30283805320583546, + "grad_norm": 1.4883963866578338, + "learning_rate": 1.6343381315854864e-05, + "loss": 0.8007, + "step": 9881 + }, + { + "epoch": 0.30286870172857666, + "grad_norm": 1.428351647004618, + "learning_rate": 1.634261392192412e-05, + "loss": 0.8714, + "step": 9882 + }, + { + "epoch": 0.30289935025131787, + "grad_norm": 1.3604739693658445, + "learning_rate": 1.634184646549798e-05, + "loss": 0.7163, + "step": 9883 + }, + { + "epoch": 0.3029299987740591, + "grad_norm": 1.5953136642153307, + "learning_rate": 1.6341078946584003e-05, + "loss": 0.8895, + "step": 9884 + }, + { + "epoch": 0.3029606472968003, + "grad_norm": 1.6834421794309429, + "learning_rate": 1.6340311365189755e-05, + "loss": 0.8232, + "step": 9885 + }, + { + "epoch": 0.3029912958195415, + "grad_norm": 0.8127162046583062, + "learning_rate": 1.6339543721322795e-05, + "loss": 0.637, + "step": 9886 + }, + { + "epoch": 0.3030219443422827, + "grad_norm": 0.7389994526633633, + "learning_rate": 1.633877601499069e-05, + "loss": 0.6047, + "step": 9887 + }, + { + "epoch": 0.3030525928650239, + "grad_norm": 1.6382232032806516, + "learning_rate": 1.6338008246201002e-05, + "loss": 0.7014, + "step": 9888 + }, + { + "epoch": 0.3030832413877651, + "grad_norm": 1.4247126204008855, + "learning_rate": 1.6337240414961298e-05, + "loss": 0.7926, + "step": 9889 + }, + { + "epoch": 0.3031138899105063, + "grad_norm": 1.6287944340621097, + "learning_rate": 1.633647252127914e-05, + "loss": 0.7819, + "step": 9890 + }, + { + "epoch": 0.3031445384332475, + "grad_norm": 1.3651122133522835, + "learning_rate": 1.63357045651621e-05, + "loss": 0.6484, + "step": 9891 + }, + { + "epoch": 0.3031751869559887, + "grad_norm": 1.5433885747398517, + "learning_rate": 1.633493654661774e-05, + "loss": 0.749, + "step": 9892 + }, + { + "epoch": 0.3032058354787299, + "grad_norm": 1.6719200720853433, + "learning_rate": 1.633416846565363e-05, + "loss": 0.6878, + "step": 9893 + }, + { + "epoch": 0.30323648400147113, + "grad_norm": 1.4740488146340367, + "learning_rate": 1.633340032227734e-05, + "loss": 0.8229, + "step": 9894 + }, + { + "epoch": 0.30326713252421234, + "grad_norm": 0.8991895397648922, + "learning_rate": 1.6332632116496433e-05, + "loss": 0.6318, + "step": 9895 + }, + { + "epoch": 0.30329778104695354, + "grad_norm": 1.4542948968312939, + "learning_rate": 1.6331863848318483e-05, + "loss": 0.7521, + "step": 9896 + }, + { + "epoch": 0.30332842956969475, + "grad_norm": 1.507099489128372, + "learning_rate": 1.6331095517751057e-05, + "loss": 0.7418, + "step": 9897 + }, + { + "epoch": 0.30335907809243595, + "grad_norm": 1.7351099529344565, + "learning_rate": 1.633032712480173e-05, + "loss": 0.7148, + "step": 9898 + }, + { + "epoch": 0.30338972661517716, + "grad_norm": 1.331701231300918, + "learning_rate": 1.6329558669478066e-05, + "loss": 0.6879, + "step": 9899 + }, + { + "epoch": 0.30342037513791836, + "grad_norm": 0.6866642204545312, + "learning_rate": 1.6328790151787645e-05, + "loss": 0.6264, + "step": 9900 + }, + { + "epoch": 0.30345102366065957, + "grad_norm": 1.5625862966857598, + "learning_rate": 1.632802157173803e-05, + "loss": 0.7734, + "step": 9901 + }, + { + "epoch": 0.3034816721834008, + "grad_norm": 1.4718416700896244, + "learning_rate": 1.63272529293368e-05, + "loss": 0.7701, + "step": 9902 + }, + { + "epoch": 0.303512320706142, + "grad_norm": 1.4651241762839615, + "learning_rate": 1.6326484224591535e-05, + "loss": 0.7684, + "step": 9903 + }, + { + "epoch": 0.3035429692288832, + "grad_norm": 0.7747269488908641, + "learning_rate": 1.6325715457509796e-05, + "loss": 0.6411, + "step": 9904 + }, + { + "epoch": 0.3035736177516244, + "grad_norm": 1.354159001000012, + "learning_rate": 1.632494662809917e-05, + "loss": 0.7407, + "step": 9905 + }, + { + "epoch": 0.3036042662743656, + "grad_norm": 1.4545336416355423, + "learning_rate": 1.632417773636722e-05, + "loss": 0.7522, + "step": 9906 + }, + { + "epoch": 0.3036349147971068, + "grad_norm": 0.7009009673233564, + "learning_rate": 1.632340878232153e-05, + "loss": 0.6316, + "step": 9907 + }, + { + "epoch": 0.303665563319848, + "grad_norm": 1.41660125231715, + "learning_rate": 1.632263976596968e-05, + "loss": 0.7594, + "step": 9908 + }, + { + "epoch": 0.3036962118425892, + "grad_norm": 1.4620294825814344, + "learning_rate": 1.6321870687319235e-05, + "loss": 0.7879, + "step": 9909 + }, + { + "epoch": 0.30372686036533036, + "grad_norm": 1.3699941093444998, + "learning_rate": 1.6321101546377787e-05, + "loss": 0.7325, + "step": 9910 + }, + { + "epoch": 0.30375750888807157, + "grad_norm": 1.4851801834517344, + "learning_rate": 1.6320332343152906e-05, + "loss": 0.736, + "step": 9911 + }, + { + "epoch": 0.3037881574108128, + "grad_norm": 1.4261971933057176, + "learning_rate": 1.6319563077652173e-05, + "loss": 0.7632, + "step": 9912 + }, + { + "epoch": 0.303818805933554, + "grad_norm": 1.5424075143526312, + "learning_rate": 1.631879374988317e-05, + "loss": 0.7344, + "step": 9913 + }, + { + "epoch": 0.3038494544562952, + "grad_norm": 1.5753719102482813, + "learning_rate": 1.631802435985347e-05, + "loss": 0.7107, + "step": 9914 + }, + { + "epoch": 0.3038801029790364, + "grad_norm": 1.387627532362929, + "learning_rate": 1.6317254907570664e-05, + "loss": 0.7374, + "step": 9915 + }, + { + "epoch": 0.3039107515017776, + "grad_norm": 1.474010014364507, + "learning_rate": 1.631648539304233e-05, + "loss": 0.8354, + "step": 9916 + }, + { + "epoch": 0.3039414000245188, + "grad_norm": 1.441963579100269, + "learning_rate": 1.6315715816276044e-05, + "loss": 0.7861, + "step": 9917 + }, + { + "epoch": 0.30397204854726, + "grad_norm": 1.599707799319372, + "learning_rate": 1.63149461772794e-05, + "loss": 0.8245, + "step": 9918 + }, + { + "epoch": 0.3040026970700012, + "grad_norm": 1.4319298657624684, + "learning_rate": 1.6314176476059972e-05, + "loss": 0.8457, + "step": 9919 + }, + { + "epoch": 0.3040333455927424, + "grad_norm": 1.1507337226967451, + "learning_rate": 1.631340671262535e-05, + "loss": 0.6742, + "step": 9920 + }, + { + "epoch": 0.3040639941154836, + "grad_norm": 1.4682170542054966, + "learning_rate": 1.6312636886983116e-05, + "loss": 0.8386, + "step": 9921 + }, + { + "epoch": 0.30409464263822483, + "grad_norm": 1.4359441640612522, + "learning_rate": 1.6311866999140856e-05, + "loss": 0.7436, + "step": 9922 + }, + { + "epoch": 0.30412529116096604, + "grad_norm": 1.4841252302429264, + "learning_rate": 1.631109704910615e-05, + "loss": 0.8103, + "step": 9923 + }, + { + "epoch": 0.30415593968370724, + "grad_norm": 1.649810365583851, + "learning_rate": 1.6310327036886597e-05, + "loss": 0.6879, + "step": 9924 + }, + { + "epoch": 0.30418658820644845, + "grad_norm": 1.5561886896223804, + "learning_rate": 1.6309556962489776e-05, + "loss": 0.7415, + "step": 9925 + }, + { + "epoch": 0.30421723672918966, + "grad_norm": 1.5678151081595157, + "learning_rate": 1.6308786825923274e-05, + "loss": 0.7385, + "step": 9926 + }, + { + "epoch": 0.30424788525193086, + "grad_norm": 1.7145174895733286, + "learning_rate": 1.630801662719468e-05, + "loss": 0.8489, + "step": 9927 + }, + { + "epoch": 0.30427853377467207, + "grad_norm": 1.6026713361694027, + "learning_rate": 1.6307246366311586e-05, + "loss": 0.8466, + "step": 9928 + }, + { + "epoch": 0.30430918229741327, + "grad_norm": 1.5933858490837502, + "learning_rate": 1.630647604328158e-05, + "loss": 0.68, + "step": 9929 + }, + { + "epoch": 0.3043398308201545, + "grad_norm": 1.4564060325066277, + "learning_rate": 1.6305705658112253e-05, + "loss": 0.7875, + "step": 9930 + }, + { + "epoch": 0.3043704793428957, + "grad_norm": 1.4971685066663873, + "learning_rate": 1.6304935210811192e-05, + "loss": 0.774, + "step": 9931 + }, + { + "epoch": 0.3044011278656369, + "grad_norm": 1.6858250081385668, + "learning_rate": 1.630416470138599e-05, + "loss": 0.833, + "step": 9932 + }, + { + "epoch": 0.3044317763883781, + "grad_norm": 1.3866075737625854, + "learning_rate": 1.6303394129844243e-05, + "loss": 0.7095, + "step": 9933 + }, + { + "epoch": 0.3044624249111193, + "grad_norm": 1.7213911262904185, + "learning_rate": 1.6302623496193542e-05, + "loss": 0.8795, + "step": 9934 + }, + { + "epoch": 0.3044930734338605, + "grad_norm": 1.6398543191570305, + "learning_rate": 1.6301852800441476e-05, + "loss": 0.8769, + "step": 9935 + }, + { + "epoch": 0.3045237219566017, + "grad_norm": 1.481962120179617, + "learning_rate": 1.6301082042595643e-05, + "loss": 0.8245, + "step": 9936 + }, + { + "epoch": 0.3045543704793429, + "grad_norm": 0.7333573205145891, + "learning_rate": 1.6300311222663637e-05, + "loss": 0.6466, + "step": 9937 + }, + { + "epoch": 0.3045850190020841, + "grad_norm": 1.3377556568217168, + "learning_rate": 1.6299540340653055e-05, + "loss": 0.6867, + "step": 9938 + }, + { + "epoch": 0.30461566752482533, + "grad_norm": 1.574537992543152, + "learning_rate": 1.6298769396571484e-05, + "loss": 0.7594, + "step": 9939 + }, + { + "epoch": 0.30464631604756653, + "grad_norm": 1.572339466304658, + "learning_rate": 1.6297998390426532e-05, + "loss": 0.7726, + "step": 9940 + }, + { + "epoch": 0.3046769645703077, + "grad_norm": 1.3635431199827588, + "learning_rate": 1.6297227322225788e-05, + "loss": 0.684, + "step": 9941 + }, + { + "epoch": 0.3047076130930489, + "grad_norm": 1.4291345488725697, + "learning_rate": 1.6296456191976855e-05, + "loss": 0.7705, + "step": 9942 + }, + { + "epoch": 0.3047382616157901, + "grad_norm": 1.564060192963627, + "learning_rate": 1.6295684999687326e-05, + "loss": 0.792, + "step": 9943 + }, + { + "epoch": 0.3047689101385313, + "grad_norm": 1.6554933041238133, + "learning_rate": 1.62949137453648e-05, + "loss": 0.7413, + "step": 9944 + }, + { + "epoch": 0.3047995586612725, + "grad_norm": 1.507968145228266, + "learning_rate": 1.629414242901688e-05, + "loss": 0.9076, + "step": 9945 + }, + { + "epoch": 0.3048302071840137, + "grad_norm": 0.7010160233991505, + "learning_rate": 1.6293371050651164e-05, + "loss": 0.6195, + "step": 9946 + }, + { + "epoch": 0.3048608557067549, + "grad_norm": 1.5048997765952612, + "learning_rate": 1.6292599610275252e-05, + "loss": 0.7882, + "step": 9947 + }, + { + "epoch": 0.3048915042294961, + "grad_norm": 1.530122161578691, + "learning_rate": 1.6291828107896746e-05, + "loss": 0.7478, + "step": 9948 + }, + { + "epoch": 0.30492215275223733, + "grad_norm": 1.5395563174868636, + "learning_rate": 1.6291056543523248e-05, + "loss": 0.8018, + "step": 9949 + }, + { + "epoch": 0.30495280127497854, + "grad_norm": 1.5179695027164681, + "learning_rate": 1.6290284917162364e-05, + "loss": 0.7939, + "step": 9950 + }, + { + "epoch": 0.30498344979771974, + "grad_norm": 1.7563438433783476, + "learning_rate": 1.628951322882169e-05, + "loss": 0.7415, + "step": 9951 + }, + { + "epoch": 0.30501409832046095, + "grad_norm": 1.320797584029334, + "learning_rate": 1.6288741478508835e-05, + "loss": 0.718, + "step": 9952 + }, + { + "epoch": 0.30504474684320215, + "grad_norm": 1.3404300974015138, + "learning_rate": 1.62879696662314e-05, + "loss": 0.7888, + "step": 9953 + }, + { + "epoch": 0.30507539536594336, + "grad_norm": 1.4370484056904749, + "learning_rate": 1.628719779199699e-05, + "loss": 0.8144, + "step": 9954 + }, + { + "epoch": 0.30510604388868456, + "grad_norm": 1.6605547670325622, + "learning_rate": 1.628642585581321e-05, + "loss": 0.7989, + "step": 9955 + }, + { + "epoch": 0.30513669241142577, + "grad_norm": 1.4330325506507753, + "learning_rate": 1.628565385768767e-05, + "loss": 0.6459, + "step": 9956 + }, + { + "epoch": 0.305167340934167, + "grad_norm": 0.7022722718817257, + "learning_rate": 1.628488179762797e-05, + "loss": 0.6214, + "step": 9957 + }, + { + "epoch": 0.3051979894569082, + "grad_norm": 1.6154651740062589, + "learning_rate": 1.628410967564173e-05, + "loss": 0.7713, + "step": 9958 + }, + { + "epoch": 0.3052286379796494, + "grad_norm": 1.3032507275048528, + "learning_rate": 1.6283337491736543e-05, + "loss": 0.6197, + "step": 9959 + }, + { + "epoch": 0.3052592865023906, + "grad_norm": 1.8631832173165839, + "learning_rate": 1.6282565245920024e-05, + "loss": 0.8393, + "step": 9960 + }, + { + "epoch": 0.3052899350251318, + "grad_norm": 1.4984573439996314, + "learning_rate": 1.6281792938199786e-05, + "loss": 0.8352, + "step": 9961 + }, + { + "epoch": 0.305320583547873, + "grad_norm": 0.6585256140820362, + "learning_rate": 1.6281020568583433e-05, + "loss": 0.6106, + "step": 9962 + }, + { + "epoch": 0.3053512320706142, + "grad_norm": 1.5350212995816215, + "learning_rate": 1.6280248137078576e-05, + "loss": 0.7916, + "step": 9963 + }, + { + "epoch": 0.3053818805933554, + "grad_norm": 1.605452461331668, + "learning_rate": 1.627947564369283e-05, + "loss": 0.6606, + "step": 9964 + }, + { + "epoch": 0.3054125291160966, + "grad_norm": 1.5289828771491782, + "learning_rate": 1.6278703088433803e-05, + "loss": 0.8158, + "step": 9965 + }, + { + "epoch": 0.3054431776388378, + "grad_norm": 1.379220420831789, + "learning_rate": 1.6277930471309106e-05, + "loss": 0.8075, + "step": 9966 + }, + { + "epoch": 0.30547382616157903, + "grad_norm": 1.50281295132107, + "learning_rate": 1.6277157792326355e-05, + "loss": 0.8247, + "step": 9967 + }, + { + "epoch": 0.30550447468432024, + "grad_norm": 1.413440968010086, + "learning_rate": 1.6276385051493164e-05, + "loss": 0.6767, + "step": 9968 + }, + { + "epoch": 0.30553512320706144, + "grad_norm": 1.6369754522355833, + "learning_rate": 1.6275612248817145e-05, + "loss": 0.7129, + "step": 9969 + }, + { + "epoch": 0.30556577172980265, + "grad_norm": 1.3304544572654617, + "learning_rate": 1.6274839384305908e-05, + "loss": 0.7559, + "step": 9970 + }, + { + "epoch": 0.30559642025254385, + "grad_norm": 0.7029225846074609, + "learning_rate": 1.6274066457967077e-05, + "loss": 0.6491, + "step": 9971 + }, + { + "epoch": 0.305627068775285, + "grad_norm": 1.5589080688757344, + "learning_rate": 1.6273293469808264e-05, + "loss": 0.7912, + "step": 9972 + }, + { + "epoch": 0.3056577172980262, + "grad_norm": 1.4415334305021008, + "learning_rate": 1.6272520419837083e-05, + "loss": 0.7907, + "step": 9973 + }, + { + "epoch": 0.3056883658207674, + "grad_norm": 1.4582078931242437, + "learning_rate": 1.6271747308061154e-05, + "loss": 0.7645, + "step": 9974 + }, + { + "epoch": 0.3057190143435086, + "grad_norm": 1.4318782393784186, + "learning_rate": 1.6270974134488096e-05, + "loss": 0.7571, + "step": 9975 + }, + { + "epoch": 0.3057496628662498, + "grad_norm": 1.4444085786409402, + "learning_rate": 1.6270200899125527e-05, + "loss": 0.6842, + "step": 9976 + }, + { + "epoch": 0.30578031138899103, + "grad_norm": 1.433113651771195, + "learning_rate": 1.626942760198106e-05, + "loss": 0.8847, + "step": 9977 + }, + { + "epoch": 0.30581095991173224, + "grad_norm": 1.7181323147219687, + "learning_rate": 1.626865424306232e-05, + "loss": 0.8162, + "step": 9978 + }, + { + "epoch": 0.30584160843447344, + "grad_norm": 1.3257524734834174, + "learning_rate": 1.6267880822376925e-05, + "loss": 0.7191, + "step": 9979 + }, + { + "epoch": 0.30587225695721465, + "grad_norm": 1.4088634883483584, + "learning_rate": 1.62671073399325e-05, + "loss": 0.7787, + "step": 9980 + }, + { + "epoch": 0.30590290547995586, + "grad_norm": 1.466480026093229, + "learning_rate": 1.626633379573666e-05, + "loss": 0.8026, + "step": 9981 + }, + { + "epoch": 0.30593355400269706, + "grad_norm": 1.631497290560967, + "learning_rate": 1.626556018979703e-05, + "loss": 0.7281, + "step": 9982 + }, + { + "epoch": 0.30596420252543827, + "grad_norm": 1.2364252404181744, + "learning_rate": 1.626478652212123e-05, + "loss": 0.6657, + "step": 9983 + }, + { + "epoch": 0.3059948510481795, + "grad_norm": 0.7117610982557565, + "learning_rate": 1.6264012792716893e-05, + "loss": 0.6105, + "step": 9984 + }, + { + "epoch": 0.3060254995709207, + "grad_norm": 1.4040484818413117, + "learning_rate": 1.626323900159163e-05, + "loss": 0.7024, + "step": 9985 + }, + { + "epoch": 0.3060561480936619, + "grad_norm": 1.412049420010532, + "learning_rate": 1.626246514875307e-05, + "loss": 0.7617, + "step": 9986 + }, + { + "epoch": 0.3060867966164031, + "grad_norm": 1.6090656287956315, + "learning_rate": 1.6261691234208838e-05, + "loss": 0.7568, + "step": 9987 + }, + { + "epoch": 0.3061174451391443, + "grad_norm": 1.600829760903102, + "learning_rate": 1.6260917257966563e-05, + "loss": 0.8298, + "step": 9988 + }, + { + "epoch": 0.3061480936618855, + "grad_norm": 1.5770728181147453, + "learning_rate": 1.626014322003387e-05, + "loss": 0.693, + "step": 9989 + }, + { + "epoch": 0.3061787421846267, + "grad_norm": 1.4862556127876732, + "learning_rate": 1.625936912041838e-05, + "loss": 0.7113, + "step": 9990 + }, + { + "epoch": 0.3062093907073679, + "grad_norm": 1.4504993435747506, + "learning_rate": 1.6258594959127726e-05, + "loss": 0.7451, + "step": 9991 + }, + { + "epoch": 0.3062400392301091, + "grad_norm": 1.8752142332414492, + "learning_rate": 1.6257820736169535e-05, + "loss": 0.8915, + "step": 9992 + }, + { + "epoch": 0.3062706877528503, + "grad_norm": 1.4592743790574856, + "learning_rate": 1.6257046451551434e-05, + "loss": 0.7611, + "step": 9993 + }, + { + "epoch": 0.30630133627559153, + "grad_norm": 1.656240847866232, + "learning_rate": 1.625627210528105e-05, + "loss": 0.8814, + "step": 9994 + }, + { + "epoch": 0.30633198479833273, + "grad_norm": 0.7103553283891783, + "learning_rate": 1.625549769736602e-05, + "loss": 0.6049, + "step": 9995 + }, + { + "epoch": 0.30636263332107394, + "grad_norm": 1.5366456887123023, + "learning_rate": 1.6254723227813975e-05, + "loss": 0.8381, + "step": 9996 + }, + { + "epoch": 0.30639328184381515, + "grad_norm": 1.6278134740694203, + "learning_rate": 1.6253948696632535e-05, + "loss": 0.8713, + "step": 9997 + }, + { + "epoch": 0.30642393036655635, + "grad_norm": 1.4773599779924338, + "learning_rate": 1.625317410382934e-05, + "loss": 0.7379, + "step": 9998 + }, + { + "epoch": 0.30645457888929756, + "grad_norm": 1.574339602878624, + "learning_rate": 1.6252399449412024e-05, + "loss": 0.7981, + "step": 9999 + }, + { + "epoch": 0.30648522741203876, + "grad_norm": 1.4630693555580974, + "learning_rate": 1.625162473338821e-05, + "loss": 0.7273, + "step": 10000 + }, + { + "epoch": 0.30651587593477997, + "grad_norm": 0.6668058988436468, + "learning_rate": 1.6250849955765545e-05, + "loss": 0.6249, + "step": 10001 + }, + { + "epoch": 0.3065465244575212, + "grad_norm": 0.709588476992072, + "learning_rate": 1.6250075116551653e-05, + "loss": 0.607, + "step": 10002 + }, + { + "epoch": 0.3065771729802623, + "grad_norm": 1.5265047773947582, + "learning_rate": 1.6249300215754173e-05, + "loss": 0.7187, + "step": 10003 + }, + { + "epoch": 0.30660782150300353, + "grad_norm": 1.4237597557572095, + "learning_rate": 1.6248525253380735e-05, + "loss": 0.7463, + "step": 10004 + }, + { + "epoch": 0.30663847002574474, + "grad_norm": 1.5610142691767859, + "learning_rate": 1.6247750229438983e-05, + "loss": 0.7811, + "step": 10005 + }, + { + "epoch": 0.30666911854848594, + "grad_norm": 1.3993002831926316, + "learning_rate": 1.6246975143936546e-05, + "loss": 0.7075, + "step": 10006 + }, + { + "epoch": 0.30669976707122715, + "grad_norm": 1.460818073536309, + "learning_rate": 1.624619999688107e-05, + "loss": 0.8347, + "step": 10007 + }, + { + "epoch": 0.30673041559396835, + "grad_norm": 1.2858889056679899, + "learning_rate": 1.624542478828018e-05, + "loss": 0.7094, + "step": 10008 + }, + { + "epoch": 0.30676106411670956, + "grad_norm": 1.4514521827091011, + "learning_rate": 1.6244649518141527e-05, + "loss": 0.7584, + "step": 10009 + }, + { + "epoch": 0.30679171263945076, + "grad_norm": 1.6357426904411985, + "learning_rate": 1.6243874186472742e-05, + "loss": 0.7291, + "step": 10010 + }, + { + "epoch": 0.30682236116219197, + "grad_norm": 1.5281230203326566, + "learning_rate": 1.624309879328147e-05, + "loss": 0.7817, + "step": 10011 + }, + { + "epoch": 0.3068530096849332, + "grad_norm": 1.554046287638693, + "learning_rate": 1.6242323338575347e-05, + "loss": 0.7689, + "step": 10012 + }, + { + "epoch": 0.3068836582076744, + "grad_norm": 1.3420948450584904, + "learning_rate": 1.624154782236201e-05, + "loss": 0.7586, + "step": 10013 + }, + { + "epoch": 0.3069143067304156, + "grad_norm": 1.3381699892878895, + "learning_rate": 1.624077224464911e-05, + "loss": 0.8091, + "step": 10014 + }, + { + "epoch": 0.3069449552531568, + "grad_norm": 1.374708305616394, + "learning_rate": 1.6239996605444286e-05, + "loss": 0.6534, + "step": 10015 + }, + { + "epoch": 0.306975603775898, + "grad_norm": 1.3237208437815806, + "learning_rate": 1.6239220904755176e-05, + "loss": 0.7339, + "step": 10016 + }, + { + "epoch": 0.3070062522986392, + "grad_norm": 1.4762059011511748, + "learning_rate": 1.6238445142589428e-05, + "loss": 0.689, + "step": 10017 + }, + { + "epoch": 0.3070369008213804, + "grad_norm": 1.4764120825388245, + "learning_rate": 1.6237669318954682e-05, + "loss": 0.6305, + "step": 10018 + }, + { + "epoch": 0.3070675493441216, + "grad_norm": 1.6514218637652394, + "learning_rate": 1.6236893433858588e-05, + "loss": 0.7843, + "step": 10019 + }, + { + "epoch": 0.3070981978668628, + "grad_norm": 1.4827540464058258, + "learning_rate": 1.6236117487308783e-05, + "loss": 0.7785, + "step": 10020 + }, + { + "epoch": 0.307128846389604, + "grad_norm": 0.7511731911197098, + "learning_rate": 1.6235341479312915e-05, + "loss": 0.6104, + "step": 10021 + }, + { + "epoch": 0.30715949491234523, + "grad_norm": 1.4766681630865246, + "learning_rate": 1.6234565409878636e-05, + "loss": 0.7908, + "step": 10022 + }, + { + "epoch": 0.30719014343508644, + "grad_norm": 1.2148273444047597, + "learning_rate": 1.6233789279013588e-05, + "loss": 0.7642, + "step": 10023 + }, + { + "epoch": 0.30722079195782764, + "grad_norm": 1.463989925386545, + "learning_rate": 1.623301308672542e-05, + "loss": 0.8117, + "step": 10024 + }, + { + "epoch": 0.30725144048056885, + "grad_norm": 1.5531116390734077, + "learning_rate": 1.6232236833021778e-05, + "loss": 0.7781, + "step": 10025 + }, + { + "epoch": 0.30728208900331005, + "grad_norm": 1.5413272821414454, + "learning_rate": 1.6231460517910312e-05, + "loss": 0.7893, + "step": 10026 + }, + { + "epoch": 0.30731273752605126, + "grad_norm": 1.5167109405077148, + "learning_rate": 1.623068414139867e-05, + "loss": 0.8388, + "step": 10027 + }, + { + "epoch": 0.30734338604879247, + "grad_norm": 1.4501584696452194, + "learning_rate": 1.6229907703494505e-05, + "loss": 0.7973, + "step": 10028 + }, + { + "epoch": 0.30737403457153367, + "grad_norm": 1.5116367871548169, + "learning_rate": 1.6229131204205466e-05, + "loss": 0.7914, + "step": 10029 + }, + { + "epoch": 0.3074046830942749, + "grad_norm": 1.413043516876924, + "learning_rate": 1.62283546435392e-05, + "loss": 0.7691, + "step": 10030 + }, + { + "epoch": 0.3074353316170161, + "grad_norm": 0.6965027799783026, + "learning_rate": 1.6227578021503365e-05, + "loss": 0.6166, + "step": 10031 + }, + { + "epoch": 0.3074659801397573, + "grad_norm": 1.4138300835803237, + "learning_rate": 1.622680133810561e-05, + "loss": 0.7617, + "step": 10032 + }, + { + "epoch": 0.3074966286624985, + "grad_norm": 0.6648447158912205, + "learning_rate": 1.6226024593353585e-05, + "loss": 0.6417, + "step": 10033 + }, + { + "epoch": 0.30752727718523964, + "grad_norm": 1.5931499059373928, + "learning_rate": 1.6225247787254953e-05, + "loss": 0.9222, + "step": 10034 + }, + { + "epoch": 0.30755792570798085, + "grad_norm": 1.401585926929359, + "learning_rate": 1.622447091981736e-05, + "loss": 0.7705, + "step": 10035 + }, + { + "epoch": 0.30758857423072206, + "grad_norm": 1.429308283403548, + "learning_rate": 1.6223693991048456e-05, + "loss": 0.7953, + "step": 10036 + }, + { + "epoch": 0.30761922275346326, + "grad_norm": 1.442475740553553, + "learning_rate": 1.622291700095591e-05, + "loss": 0.7301, + "step": 10037 + }, + { + "epoch": 0.30764987127620447, + "grad_norm": 1.5576199558424701, + "learning_rate": 1.6222139949547368e-05, + "loss": 0.759, + "step": 10038 + }, + { + "epoch": 0.3076805197989457, + "grad_norm": 1.6268287009409268, + "learning_rate": 1.622136283683049e-05, + "loss": 0.8145, + "step": 10039 + }, + { + "epoch": 0.3077111683216869, + "grad_norm": 1.5663216763886252, + "learning_rate": 1.622058566281293e-05, + "loss": 0.79, + "step": 10040 + }, + { + "epoch": 0.3077418168444281, + "grad_norm": 0.6881977421884509, + "learning_rate": 1.621980842750235e-05, + "loss": 0.6464, + "step": 10041 + }, + { + "epoch": 0.3077724653671693, + "grad_norm": 1.3244651129051674, + "learning_rate": 1.6219031130906404e-05, + "loss": 0.7204, + "step": 10042 + }, + { + "epoch": 0.3078031138899105, + "grad_norm": 1.3497553919626757, + "learning_rate": 1.6218253773032752e-05, + "loss": 0.7615, + "step": 10043 + }, + { + "epoch": 0.3078337624126517, + "grad_norm": 1.446905930910026, + "learning_rate": 1.6217476353889057e-05, + "loss": 0.8944, + "step": 10044 + }, + { + "epoch": 0.3078644109353929, + "grad_norm": 1.3966345741823614, + "learning_rate": 1.621669887348298e-05, + "loss": 0.6748, + "step": 10045 + }, + { + "epoch": 0.3078950594581341, + "grad_norm": 0.6892101848210255, + "learning_rate": 1.6215921331822175e-05, + "loss": 0.6365, + "step": 10046 + }, + { + "epoch": 0.3079257079808753, + "grad_norm": 1.3871446129582232, + "learning_rate": 1.6215143728914305e-05, + "loss": 0.7747, + "step": 10047 + }, + { + "epoch": 0.3079563565036165, + "grad_norm": 1.293091552186994, + "learning_rate": 1.6214366064767035e-05, + "loss": 0.7732, + "step": 10048 + }, + { + "epoch": 0.30798700502635773, + "grad_norm": 1.4164777232859356, + "learning_rate": 1.6213588339388023e-05, + "loss": 0.8174, + "step": 10049 + }, + { + "epoch": 0.30801765354909894, + "grad_norm": 1.5462284624743496, + "learning_rate": 1.6212810552784942e-05, + "loss": 0.8027, + "step": 10050 + }, + { + "epoch": 0.30804830207184014, + "grad_norm": 1.516244753118959, + "learning_rate": 1.6212032704965445e-05, + "loss": 0.8104, + "step": 10051 + }, + { + "epoch": 0.30807895059458135, + "grad_norm": 0.7007815490702832, + "learning_rate": 1.6211254795937202e-05, + "loss": 0.5958, + "step": 10052 + }, + { + "epoch": 0.30810959911732255, + "grad_norm": 0.6978422220349144, + "learning_rate": 1.6210476825707874e-05, + "loss": 0.6105, + "step": 10053 + }, + { + "epoch": 0.30814024764006376, + "grad_norm": 0.6632613942934584, + "learning_rate": 1.6209698794285132e-05, + "loss": 0.6075, + "step": 10054 + }, + { + "epoch": 0.30817089616280496, + "grad_norm": 0.6807879668830435, + "learning_rate": 1.6208920701676637e-05, + "loss": 0.6295, + "step": 10055 + }, + { + "epoch": 0.30820154468554617, + "grad_norm": 0.6655906813660339, + "learning_rate": 1.6208142547890058e-05, + "loss": 0.6041, + "step": 10056 + }, + { + "epoch": 0.3082321932082874, + "grad_norm": 1.496863238739425, + "learning_rate": 1.620736433293306e-05, + "loss": 0.8311, + "step": 10057 + }, + { + "epoch": 0.3082628417310286, + "grad_norm": 1.5175428305282848, + "learning_rate": 1.6206586056813315e-05, + "loss": 0.579, + "step": 10058 + }, + { + "epoch": 0.3082934902537698, + "grad_norm": 1.616719613337797, + "learning_rate": 1.620580771953849e-05, + "loss": 0.7702, + "step": 10059 + }, + { + "epoch": 0.308324138776511, + "grad_norm": 1.4803375484397039, + "learning_rate": 1.6205029321116253e-05, + "loss": 0.7968, + "step": 10060 + }, + { + "epoch": 0.3083547872992522, + "grad_norm": 1.4037661339566758, + "learning_rate": 1.6204250861554277e-05, + "loss": 0.7926, + "step": 10061 + }, + { + "epoch": 0.3083854358219934, + "grad_norm": 1.3967897210936688, + "learning_rate": 1.6203472340860225e-05, + "loss": 0.7998, + "step": 10062 + }, + { + "epoch": 0.3084160843447346, + "grad_norm": 1.4588586373107126, + "learning_rate": 1.6202693759041776e-05, + "loss": 0.7347, + "step": 10063 + }, + { + "epoch": 0.3084467328674758, + "grad_norm": 1.4657352944845106, + "learning_rate": 1.6201915116106597e-05, + "loss": 0.7983, + "step": 10064 + }, + { + "epoch": 0.30847738139021696, + "grad_norm": 1.5241782283652265, + "learning_rate": 1.620113641206236e-05, + "loss": 0.7189, + "step": 10065 + }, + { + "epoch": 0.30850802991295817, + "grad_norm": 1.3870713928017275, + "learning_rate": 1.6200357646916745e-05, + "loss": 0.8287, + "step": 10066 + }, + { + "epoch": 0.3085386784356994, + "grad_norm": 1.4408313592180562, + "learning_rate": 1.6199578820677415e-05, + "loss": 0.8928, + "step": 10067 + }, + { + "epoch": 0.3085693269584406, + "grad_norm": 1.5899480312912655, + "learning_rate": 1.619879993335205e-05, + "loss": 0.822, + "step": 10068 + }, + { + "epoch": 0.3085999754811818, + "grad_norm": 1.7146701706039504, + "learning_rate": 1.6198020984948323e-05, + "loss": 0.7691, + "step": 10069 + }, + { + "epoch": 0.308630624003923, + "grad_norm": 1.413024554884183, + "learning_rate": 1.6197241975473906e-05, + "loss": 0.7626, + "step": 10070 + }, + { + "epoch": 0.3086612725266642, + "grad_norm": 0.807674967631599, + "learning_rate": 1.6196462904936485e-05, + "loss": 0.6278, + "step": 10071 + }, + { + "epoch": 0.3086919210494054, + "grad_norm": 1.5344822511588978, + "learning_rate": 1.6195683773343725e-05, + "loss": 0.7858, + "step": 10072 + }, + { + "epoch": 0.3087225695721466, + "grad_norm": 1.3708801995021702, + "learning_rate": 1.619490458070331e-05, + "loss": 0.7828, + "step": 10073 + }, + { + "epoch": 0.3087532180948878, + "grad_norm": 1.5777373308115283, + "learning_rate": 1.6194125327022914e-05, + "loss": 0.7876, + "step": 10074 + }, + { + "epoch": 0.308783866617629, + "grad_norm": 1.5092986710342546, + "learning_rate": 1.6193346012310213e-05, + "loss": 0.6907, + "step": 10075 + }, + { + "epoch": 0.3088145151403702, + "grad_norm": 1.741580191400595, + "learning_rate": 1.6192566636572892e-05, + "loss": 0.7704, + "step": 10076 + }, + { + "epoch": 0.30884516366311143, + "grad_norm": 1.788573389472502, + "learning_rate": 1.619178719981863e-05, + "loss": 0.7341, + "step": 10077 + }, + { + "epoch": 0.30887581218585264, + "grad_norm": 1.3819527143967176, + "learning_rate": 1.61910077020551e-05, + "loss": 0.8423, + "step": 10078 + }, + { + "epoch": 0.30890646070859384, + "grad_norm": 0.69276587967827, + "learning_rate": 1.619022814328999e-05, + "loss": 0.5902, + "step": 10079 + }, + { + "epoch": 0.30893710923133505, + "grad_norm": 1.5129807913054363, + "learning_rate": 1.618944852353098e-05, + "loss": 0.7842, + "step": 10080 + }, + { + "epoch": 0.30896775775407626, + "grad_norm": 1.594126840872066, + "learning_rate": 1.6188668842785747e-05, + "loss": 0.7379, + "step": 10081 + }, + { + "epoch": 0.30899840627681746, + "grad_norm": 1.6414491742245334, + "learning_rate": 1.618788910106198e-05, + "loss": 0.6655, + "step": 10082 + }, + { + "epoch": 0.30902905479955867, + "grad_norm": 1.414570408836752, + "learning_rate": 1.6187109298367353e-05, + "loss": 0.8, + "step": 10083 + }, + { + "epoch": 0.3090597033222999, + "grad_norm": 1.5265201687874819, + "learning_rate": 1.6186329434709557e-05, + "loss": 0.6947, + "step": 10084 + }, + { + "epoch": 0.3090903518450411, + "grad_norm": 0.6828954641225554, + "learning_rate": 1.6185549510096275e-05, + "loss": 0.5975, + "step": 10085 + }, + { + "epoch": 0.3091210003677823, + "grad_norm": 1.4451913435995196, + "learning_rate": 1.618476952453519e-05, + "loss": 0.719, + "step": 10086 + }, + { + "epoch": 0.3091516488905235, + "grad_norm": 1.6509640766416709, + "learning_rate": 1.618398947803399e-05, + "loss": 0.7472, + "step": 10087 + }, + { + "epoch": 0.3091822974132647, + "grad_norm": 1.5074823121375664, + "learning_rate": 1.618320937060036e-05, + "loss": 0.7488, + "step": 10088 + }, + { + "epoch": 0.3092129459360059, + "grad_norm": 1.613840336225057, + "learning_rate": 1.6182429202241983e-05, + "loss": 0.8586, + "step": 10089 + }, + { + "epoch": 0.3092435944587471, + "grad_norm": 1.593969107748202, + "learning_rate": 1.618164897296655e-05, + "loss": 0.8259, + "step": 10090 + }, + { + "epoch": 0.3092742429814883, + "grad_norm": 0.7280848492155493, + "learning_rate": 1.6180868682781748e-05, + "loss": 0.6314, + "step": 10091 + }, + { + "epoch": 0.3093048915042295, + "grad_norm": 1.6378390880990774, + "learning_rate": 1.6180088331695268e-05, + "loss": 0.781, + "step": 10092 + }, + { + "epoch": 0.3093355400269707, + "grad_norm": 1.3411655817693238, + "learning_rate": 1.6179307919714797e-05, + "loss": 0.6983, + "step": 10093 + }, + { + "epoch": 0.30936618854971193, + "grad_norm": 1.430758991146581, + "learning_rate": 1.617852744684802e-05, + "loss": 0.6758, + "step": 10094 + }, + { + "epoch": 0.30939683707245313, + "grad_norm": 1.351985478013833, + "learning_rate": 1.6177746913102634e-05, + "loss": 0.7744, + "step": 10095 + }, + { + "epoch": 0.3094274855951943, + "grad_norm": 1.4438408012802388, + "learning_rate": 1.6176966318486328e-05, + "loss": 0.7581, + "step": 10096 + }, + { + "epoch": 0.3094581341179355, + "grad_norm": 1.380961007780601, + "learning_rate": 1.6176185663006788e-05, + "loss": 0.7046, + "step": 10097 + }, + { + "epoch": 0.3094887826406767, + "grad_norm": 1.386095975232646, + "learning_rate": 1.6175404946671715e-05, + "loss": 0.7442, + "step": 10098 + }, + { + "epoch": 0.3095194311634179, + "grad_norm": 1.484385714169784, + "learning_rate": 1.6174624169488794e-05, + "loss": 0.6236, + "step": 10099 + }, + { + "epoch": 0.3095500796861591, + "grad_norm": 1.3898839526625286, + "learning_rate": 1.6173843331465722e-05, + "loss": 0.6491, + "step": 10100 + }, + { + "epoch": 0.3095807282089003, + "grad_norm": 1.388667898007979, + "learning_rate": 1.617306243261019e-05, + "loss": 0.712, + "step": 10101 + }, + { + "epoch": 0.3096113767316415, + "grad_norm": 1.4940500197303361, + "learning_rate": 1.6172281472929898e-05, + "loss": 0.7554, + "step": 10102 + }, + { + "epoch": 0.3096420252543827, + "grad_norm": 1.6137552509137238, + "learning_rate": 1.6171500452432534e-05, + "loss": 0.6964, + "step": 10103 + }, + { + "epoch": 0.30967267377712393, + "grad_norm": 1.3507513631608379, + "learning_rate": 1.61707193711258e-05, + "loss": 0.682, + "step": 10104 + }, + { + "epoch": 0.30970332229986514, + "grad_norm": 1.6050309659834834, + "learning_rate": 1.6169938229017387e-05, + "loss": 0.7499, + "step": 10105 + }, + { + "epoch": 0.30973397082260634, + "grad_norm": 1.5588102711315566, + "learning_rate": 1.6169157026114998e-05, + "loss": 0.7719, + "step": 10106 + }, + { + "epoch": 0.30976461934534755, + "grad_norm": 0.7644692612140188, + "learning_rate": 1.6168375762426324e-05, + "loss": 0.5872, + "step": 10107 + }, + { + "epoch": 0.30979526786808875, + "grad_norm": 0.7333262634663964, + "learning_rate": 1.6167594437959064e-05, + "loss": 0.6157, + "step": 10108 + }, + { + "epoch": 0.30982591639082996, + "grad_norm": 1.6112748318894983, + "learning_rate": 1.6166813052720918e-05, + "loss": 0.6854, + "step": 10109 + }, + { + "epoch": 0.30985656491357116, + "grad_norm": 1.5220302883292949, + "learning_rate": 1.6166031606719585e-05, + "loss": 0.7638, + "step": 10110 + }, + { + "epoch": 0.30988721343631237, + "grad_norm": 0.721367747739245, + "learning_rate": 1.6165250099962765e-05, + "loss": 0.6101, + "step": 10111 + }, + { + "epoch": 0.3099178619590536, + "grad_norm": 1.5060281712941381, + "learning_rate": 1.616446853245816e-05, + "loss": 0.731, + "step": 10112 + }, + { + "epoch": 0.3099485104817948, + "grad_norm": 1.4184692002364752, + "learning_rate": 1.616368690421347e-05, + "loss": 0.683, + "step": 10113 + }, + { + "epoch": 0.309979159004536, + "grad_norm": 1.4150128052659785, + "learning_rate": 1.6162905215236392e-05, + "loss": 0.6537, + "step": 10114 + }, + { + "epoch": 0.3100098075272772, + "grad_norm": 0.7823518738942409, + "learning_rate": 1.616212346553464e-05, + "loss": 0.625, + "step": 10115 + }, + { + "epoch": 0.3100404560500184, + "grad_norm": 1.625991332650211, + "learning_rate": 1.61613416551159e-05, + "loss": 0.7102, + "step": 10116 + }, + { + "epoch": 0.3100711045727596, + "grad_norm": 1.5254929894504805, + "learning_rate": 1.6160559783987885e-05, + "loss": 0.8426, + "step": 10117 + }, + { + "epoch": 0.3101017530955008, + "grad_norm": 1.5021154672218258, + "learning_rate": 1.6159777852158304e-05, + "loss": 0.8343, + "step": 10118 + }, + { + "epoch": 0.310132401618242, + "grad_norm": 1.4298435659440836, + "learning_rate": 1.615899585963485e-05, + "loss": 0.8131, + "step": 10119 + }, + { + "epoch": 0.3101630501409832, + "grad_norm": 1.4032680774632689, + "learning_rate": 1.615821380642524e-05, + "loss": 0.6162, + "step": 10120 + }, + { + "epoch": 0.3101936986637244, + "grad_norm": 0.6710043564033593, + "learning_rate": 1.6157431692537167e-05, + "loss": 0.6153, + "step": 10121 + }, + { + "epoch": 0.31022434718646563, + "grad_norm": 1.4719372643543376, + "learning_rate": 1.6156649517978348e-05, + "loss": 0.7633, + "step": 10122 + }, + { + "epoch": 0.31025499570920684, + "grad_norm": 0.7115101022951505, + "learning_rate": 1.6155867282756486e-05, + "loss": 0.6122, + "step": 10123 + }, + { + "epoch": 0.31028564423194804, + "grad_norm": 1.6777117774898405, + "learning_rate": 1.6155084986879286e-05, + "loss": 0.7318, + "step": 10124 + }, + { + "epoch": 0.31031629275468925, + "grad_norm": 1.4496394207070724, + "learning_rate": 1.6154302630354463e-05, + "loss": 0.8102, + "step": 10125 + }, + { + "epoch": 0.31034694127743045, + "grad_norm": 1.5017701279308897, + "learning_rate": 1.6153520213189718e-05, + "loss": 0.7243, + "step": 10126 + }, + { + "epoch": 0.3103775898001716, + "grad_norm": 1.5863636584935654, + "learning_rate": 1.6152737735392765e-05, + "loss": 0.7755, + "step": 10127 + }, + { + "epoch": 0.3104082383229128, + "grad_norm": 1.5596561227694155, + "learning_rate": 1.6151955196971312e-05, + "loss": 0.7772, + "step": 10128 + }, + { + "epoch": 0.310438886845654, + "grad_norm": 1.3380722123203275, + "learning_rate": 1.6151172597933072e-05, + "loss": 0.7113, + "step": 10129 + }, + { + "epoch": 0.3104695353683952, + "grad_norm": 0.7202584031100744, + "learning_rate": 1.6150389938285752e-05, + "loss": 0.6123, + "step": 10130 + }, + { + "epoch": 0.3105001838911364, + "grad_norm": 1.5313681664662215, + "learning_rate": 1.614960721803707e-05, + "loss": 0.849, + "step": 10131 + }, + { + "epoch": 0.31053083241387763, + "grad_norm": 1.3540975753395195, + "learning_rate": 1.6148824437194734e-05, + "loss": 0.7824, + "step": 10132 + }, + { + "epoch": 0.31056148093661884, + "grad_norm": 1.4351025673683753, + "learning_rate": 1.614804159576646e-05, + "loss": 0.8475, + "step": 10133 + }, + { + "epoch": 0.31059212945936004, + "grad_norm": 1.4220315779062196, + "learning_rate": 1.6147258693759952e-05, + "loss": 0.8174, + "step": 10134 + }, + { + "epoch": 0.31062277798210125, + "grad_norm": 1.48086062642922, + "learning_rate": 1.6146475731182937e-05, + "loss": 0.8438, + "step": 10135 + }, + { + "epoch": 0.31065342650484246, + "grad_norm": 1.4263551187419738, + "learning_rate": 1.614569270804312e-05, + "loss": 0.7406, + "step": 10136 + }, + { + "epoch": 0.31068407502758366, + "grad_norm": 1.2972260915929121, + "learning_rate": 1.6144909624348222e-05, + "loss": 0.7751, + "step": 10137 + }, + { + "epoch": 0.31071472355032487, + "grad_norm": 1.3929523252448768, + "learning_rate": 1.6144126480105957e-05, + "loss": 0.6721, + "step": 10138 + }, + { + "epoch": 0.3107453720730661, + "grad_norm": 1.3332929393163444, + "learning_rate": 1.6143343275324044e-05, + "loss": 0.6996, + "step": 10139 + }, + { + "epoch": 0.3107760205958073, + "grad_norm": 1.336827115950492, + "learning_rate": 1.6142560010010196e-05, + "loss": 0.7783, + "step": 10140 + }, + { + "epoch": 0.3108066691185485, + "grad_norm": 1.441008321222763, + "learning_rate": 1.614177668417213e-05, + "loss": 0.7402, + "step": 10141 + }, + { + "epoch": 0.3108373176412897, + "grad_norm": 0.7999992205228618, + "learning_rate": 1.614099329781757e-05, + "loss": 0.6041, + "step": 10142 + }, + { + "epoch": 0.3108679661640309, + "grad_norm": 1.5338370308682614, + "learning_rate": 1.6140209850954232e-05, + "loss": 0.6578, + "step": 10143 + }, + { + "epoch": 0.3108986146867721, + "grad_norm": 0.7088311016088263, + "learning_rate": 1.6139426343589836e-05, + "loss": 0.6241, + "step": 10144 + }, + { + "epoch": 0.3109292632095133, + "grad_norm": 1.4418298559330152, + "learning_rate": 1.61386427757321e-05, + "loss": 0.7903, + "step": 10145 + }, + { + "epoch": 0.3109599117322545, + "grad_norm": 1.2920947950890067, + "learning_rate": 1.6137859147388745e-05, + "loss": 0.6085, + "step": 10146 + }, + { + "epoch": 0.3109905602549957, + "grad_norm": 1.5537801332032881, + "learning_rate": 1.6137075458567497e-05, + "loss": 0.7632, + "step": 10147 + }, + { + "epoch": 0.3110212087777369, + "grad_norm": 1.3494017398683473, + "learning_rate": 1.6136291709276068e-05, + "loss": 0.8314, + "step": 10148 + }, + { + "epoch": 0.31105185730047813, + "grad_norm": 1.364594057571942, + "learning_rate": 1.613550789952219e-05, + "loss": 0.7397, + "step": 10149 + }, + { + "epoch": 0.31108250582321934, + "grad_norm": 1.693078589110873, + "learning_rate": 1.6134724029313583e-05, + "loss": 0.7688, + "step": 10150 + }, + { + "epoch": 0.31111315434596054, + "grad_norm": 1.5337231117250947, + "learning_rate": 1.613394009865797e-05, + "loss": 0.6181, + "step": 10151 + }, + { + "epoch": 0.31114380286870175, + "grad_norm": 1.7071515256802585, + "learning_rate": 1.613315610756308e-05, + "loss": 0.819, + "step": 10152 + }, + { + "epoch": 0.31117445139144295, + "grad_norm": 1.6334527692764653, + "learning_rate": 1.613237205603663e-05, + "loss": 0.6914, + "step": 10153 + }, + { + "epoch": 0.31120509991418416, + "grad_norm": 1.438437241160091, + "learning_rate": 1.613158794408635e-05, + "loss": 0.7787, + "step": 10154 + }, + { + "epoch": 0.31123574843692536, + "grad_norm": 1.522570053022492, + "learning_rate": 1.613080377171996e-05, + "loss": 0.7505, + "step": 10155 + }, + { + "epoch": 0.31126639695966657, + "grad_norm": 1.4643951600842717, + "learning_rate": 1.6130019538945196e-05, + "loss": 0.7426, + "step": 10156 + }, + { + "epoch": 0.3112970454824078, + "grad_norm": 1.4466508999846603, + "learning_rate": 1.6129235245769785e-05, + "loss": 0.7897, + "step": 10157 + }, + { + "epoch": 0.3113276940051489, + "grad_norm": 1.4293648328587294, + "learning_rate": 1.6128450892201446e-05, + "loss": 0.6882, + "step": 10158 + }, + { + "epoch": 0.31135834252789013, + "grad_norm": 1.549145300823973, + "learning_rate": 1.6127666478247916e-05, + "loss": 0.7977, + "step": 10159 + }, + { + "epoch": 0.31138899105063134, + "grad_norm": 1.56670590448055, + "learning_rate": 1.612688200391691e-05, + "loss": 0.7659, + "step": 10160 + }, + { + "epoch": 0.31141963957337254, + "grad_norm": 1.7155675624280065, + "learning_rate": 1.612609746921618e-05, + "loss": 0.8509, + "step": 10161 + }, + { + "epoch": 0.31145028809611375, + "grad_norm": 1.4701597395275692, + "learning_rate": 1.612531287415344e-05, + "loss": 0.6542, + "step": 10162 + }, + { + "epoch": 0.31148093661885495, + "grad_norm": 1.4894135452618829, + "learning_rate": 1.6124528218736427e-05, + "loss": 0.7528, + "step": 10163 + }, + { + "epoch": 0.31151158514159616, + "grad_norm": 1.4120819986222242, + "learning_rate": 1.6123743502972867e-05, + "loss": 0.7688, + "step": 10164 + }, + { + "epoch": 0.31154223366433736, + "grad_norm": 1.5415601713824483, + "learning_rate": 1.61229587268705e-05, + "loss": 0.7214, + "step": 10165 + }, + { + "epoch": 0.31157288218707857, + "grad_norm": 1.4122021593137581, + "learning_rate": 1.6122173890437046e-05, + "loss": 0.8034, + "step": 10166 + }, + { + "epoch": 0.3116035307098198, + "grad_norm": 1.6376037179198584, + "learning_rate": 1.6121388993680254e-05, + "loss": 0.8108, + "step": 10167 + }, + { + "epoch": 0.311634179232561, + "grad_norm": 1.5160902852480225, + "learning_rate": 1.6120604036607846e-05, + "loss": 0.7303, + "step": 10168 + }, + { + "epoch": 0.3116648277553022, + "grad_norm": 1.540171267570598, + "learning_rate": 1.6119819019227563e-05, + "loss": 0.8085, + "step": 10169 + }, + { + "epoch": 0.3116954762780434, + "grad_norm": 1.2789203365757378, + "learning_rate": 1.6119033941547133e-05, + "loss": 0.8044, + "step": 10170 + }, + { + "epoch": 0.3117261248007846, + "grad_norm": 1.4937441594074379, + "learning_rate": 1.61182488035743e-05, + "loss": 0.7223, + "step": 10171 + }, + { + "epoch": 0.3117567733235258, + "grad_norm": 1.3583805746112145, + "learning_rate": 1.6117463605316793e-05, + "loss": 0.6565, + "step": 10172 + }, + { + "epoch": 0.311787421846267, + "grad_norm": 1.477659815800058, + "learning_rate": 1.6116678346782357e-05, + "loss": 0.6593, + "step": 10173 + }, + { + "epoch": 0.3118180703690082, + "grad_norm": 1.7332742706288449, + "learning_rate": 1.611589302797872e-05, + "loss": 0.7962, + "step": 10174 + }, + { + "epoch": 0.3118487188917494, + "grad_norm": 1.3153662139697415, + "learning_rate": 1.6115107648913625e-05, + "loss": 0.7436, + "step": 10175 + }, + { + "epoch": 0.3118793674144906, + "grad_norm": 1.5578998787176153, + "learning_rate": 1.611432220959481e-05, + "loss": 0.7621, + "step": 10176 + }, + { + "epoch": 0.31191001593723183, + "grad_norm": 1.4418811801027653, + "learning_rate": 1.6113536710030016e-05, + "loss": 0.7914, + "step": 10177 + }, + { + "epoch": 0.31194066445997304, + "grad_norm": 1.5802248069090765, + "learning_rate": 1.6112751150226977e-05, + "loss": 0.8228, + "step": 10178 + }, + { + "epoch": 0.31197131298271424, + "grad_norm": 1.6266636678104316, + "learning_rate": 1.6111965530193438e-05, + "loss": 0.7043, + "step": 10179 + }, + { + "epoch": 0.31200196150545545, + "grad_norm": 1.5195235090886985, + "learning_rate": 1.611117984993714e-05, + "loss": 0.7892, + "step": 10180 + }, + { + "epoch": 0.31203261002819666, + "grad_norm": 1.417485032790486, + "learning_rate": 1.611039410946582e-05, + "loss": 0.7363, + "step": 10181 + }, + { + "epoch": 0.31206325855093786, + "grad_norm": 1.4134488299403385, + "learning_rate": 1.6109608308787227e-05, + "loss": 0.7033, + "step": 10182 + }, + { + "epoch": 0.31209390707367907, + "grad_norm": 1.4787312049957135, + "learning_rate": 1.61088224479091e-05, + "loss": 0.838, + "step": 10183 + }, + { + "epoch": 0.3121245555964203, + "grad_norm": 1.7782358619109497, + "learning_rate": 1.6108036526839183e-05, + "loss": 0.7683, + "step": 10184 + }, + { + "epoch": 0.3121552041191615, + "grad_norm": 1.4022076140981958, + "learning_rate": 1.6107250545585218e-05, + "loss": 0.7187, + "step": 10185 + }, + { + "epoch": 0.3121858526419027, + "grad_norm": 1.4547749380990445, + "learning_rate": 1.610646450415495e-05, + "loss": 0.7099, + "step": 10186 + }, + { + "epoch": 0.3122165011646439, + "grad_norm": 1.2370176310503955, + "learning_rate": 1.6105678402556125e-05, + "loss": 0.8283, + "step": 10187 + }, + { + "epoch": 0.3122471496873851, + "grad_norm": 0.9286020058899722, + "learning_rate": 1.6104892240796492e-05, + "loss": 0.6323, + "step": 10188 + }, + { + "epoch": 0.31227779821012625, + "grad_norm": 1.58300443491985, + "learning_rate": 1.6104106018883787e-05, + "loss": 0.8862, + "step": 10189 + }, + { + "epoch": 0.31230844673286745, + "grad_norm": 2.1683050797418715, + "learning_rate": 1.610331973682577e-05, + "loss": 0.8556, + "step": 10190 + }, + { + "epoch": 0.31233909525560866, + "grad_norm": 1.6452953714489806, + "learning_rate": 1.6102533394630174e-05, + "loss": 0.6773, + "step": 10191 + }, + { + "epoch": 0.31236974377834986, + "grad_norm": 1.3929287343255723, + "learning_rate": 1.6101746992304764e-05, + "loss": 0.7938, + "step": 10192 + }, + { + "epoch": 0.31240039230109107, + "grad_norm": 1.4618832622867004, + "learning_rate": 1.6100960529857274e-05, + "loss": 0.7752, + "step": 10193 + }, + { + "epoch": 0.3124310408238323, + "grad_norm": 1.5546697006595884, + "learning_rate": 1.6100174007295462e-05, + "loss": 0.7624, + "step": 10194 + }, + { + "epoch": 0.3124616893465735, + "grad_norm": 1.5083082139938255, + "learning_rate": 1.6099387424627074e-05, + "loss": 0.7185, + "step": 10195 + }, + { + "epoch": 0.3124923378693147, + "grad_norm": 1.644300926920445, + "learning_rate": 1.6098600781859863e-05, + "loss": 0.841, + "step": 10196 + }, + { + "epoch": 0.3125229863920559, + "grad_norm": 1.5538109205443031, + "learning_rate": 1.6097814079001572e-05, + "loss": 0.805, + "step": 10197 + }, + { + "epoch": 0.3125536349147971, + "grad_norm": 1.4156712249433026, + "learning_rate": 1.6097027316059962e-05, + "loss": 0.8072, + "step": 10198 + }, + { + "epoch": 0.3125842834375383, + "grad_norm": 1.4124913228594158, + "learning_rate": 1.6096240493042784e-05, + "loss": 0.7175, + "step": 10199 + }, + { + "epoch": 0.3126149319602795, + "grad_norm": 1.4182264955936885, + "learning_rate": 1.6095453609957787e-05, + "loss": 0.7155, + "step": 10200 + }, + { + "epoch": 0.3126455804830207, + "grad_norm": 1.5049976361988504, + "learning_rate": 1.6094666666812726e-05, + "loss": 0.7547, + "step": 10201 + }, + { + "epoch": 0.3126762290057619, + "grad_norm": 1.311442008702434, + "learning_rate": 1.6093879663615357e-05, + "loss": 0.8603, + "step": 10202 + }, + { + "epoch": 0.3127068775285031, + "grad_norm": 1.3005419529942732, + "learning_rate": 1.6093092600373428e-05, + "loss": 0.7373, + "step": 10203 + }, + { + "epoch": 0.31273752605124433, + "grad_norm": 1.4090665478767694, + "learning_rate": 1.60923054770947e-05, + "loss": 0.7345, + "step": 10204 + }, + { + "epoch": 0.31276817457398554, + "grad_norm": 1.6722181252158725, + "learning_rate": 1.609151829378693e-05, + "loss": 0.7604, + "step": 10205 + }, + { + "epoch": 0.31279882309672674, + "grad_norm": 1.4955117933015551, + "learning_rate": 1.6090731050457868e-05, + "loss": 0.7827, + "step": 10206 + }, + { + "epoch": 0.31282947161946795, + "grad_norm": 1.5218024595151531, + "learning_rate": 1.6089943747115278e-05, + "loss": 0.79, + "step": 10207 + }, + { + "epoch": 0.31286012014220915, + "grad_norm": 1.3488385479704452, + "learning_rate": 1.6089156383766913e-05, + "loss": 0.7329, + "step": 10208 + }, + { + "epoch": 0.31289076866495036, + "grad_norm": 1.3501702268780373, + "learning_rate": 1.6088368960420534e-05, + "loss": 0.6685, + "step": 10209 + }, + { + "epoch": 0.31292141718769156, + "grad_norm": 1.4833281123409903, + "learning_rate": 1.6087581477083894e-05, + "loss": 0.7922, + "step": 10210 + }, + { + "epoch": 0.31295206571043277, + "grad_norm": 1.4432686669187005, + "learning_rate": 1.6086793933764754e-05, + "loss": 0.7325, + "step": 10211 + }, + { + "epoch": 0.312982714233174, + "grad_norm": 0.8809470413911668, + "learning_rate": 1.608600633047088e-05, + "loss": 0.6687, + "step": 10212 + }, + { + "epoch": 0.3130133627559152, + "grad_norm": 0.7960401490865405, + "learning_rate": 1.6085218667210033e-05, + "loss": 0.6383, + "step": 10213 + }, + { + "epoch": 0.3130440112786564, + "grad_norm": 1.5097772280327661, + "learning_rate": 1.6084430943989963e-05, + "loss": 0.7822, + "step": 10214 + }, + { + "epoch": 0.3130746598013976, + "grad_norm": 1.49172188243896, + "learning_rate": 1.608364316081844e-05, + "loss": 0.802, + "step": 10215 + }, + { + "epoch": 0.3131053083241388, + "grad_norm": 1.6396256311043396, + "learning_rate": 1.6082855317703226e-05, + "loss": 0.6766, + "step": 10216 + }, + { + "epoch": 0.31313595684688, + "grad_norm": 0.8081130725296107, + "learning_rate": 1.608206741465208e-05, + "loss": 0.6496, + "step": 10217 + }, + { + "epoch": 0.3131666053696212, + "grad_norm": 0.7974937611205865, + "learning_rate": 1.6081279451672768e-05, + "loss": 0.6342, + "step": 10218 + }, + { + "epoch": 0.3131972538923624, + "grad_norm": 1.374113530767023, + "learning_rate": 1.6080491428773057e-05, + "loss": 0.7828, + "step": 10219 + }, + { + "epoch": 0.31322790241510357, + "grad_norm": 1.3738294756022071, + "learning_rate": 1.60797033459607e-05, + "loss": 0.7984, + "step": 10220 + }, + { + "epoch": 0.31325855093784477, + "grad_norm": 1.4861350311256338, + "learning_rate": 1.6078915203243476e-05, + "loss": 0.8294, + "step": 10221 + }, + { + "epoch": 0.313289199460586, + "grad_norm": 1.5668150386789008, + "learning_rate": 1.6078127000629144e-05, + "loss": 0.7571, + "step": 10222 + }, + { + "epoch": 0.3133198479833272, + "grad_norm": 1.4424735701994478, + "learning_rate": 1.6077338738125473e-05, + "loss": 0.843, + "step": 10223 + }, + { + "epoch": 0.3133504965060684, + "grad_norm": 0.7235637598918389, + "learning_rate": 1.6076550415740228e-05, + "loss": 0.6129, + "step": 10224 + }, + { + "epoch": 0.3133811450288096, + "grad_norm": 1.6773192364621856, + "learning_rate": 1.6075762033481175e-05, + "loss": 0.8218, + "step": 10225 + }, + { + "epoch": 0.3134117935515508, + "grad_norm": 1.5986506163751526, + "learning_rate": 1.6074973591356083e-05, + "loss": 0.8515, + "step": 10226 + }, + { + "epoch": 0.313442442074292, + "grad_norm": 1.6084574289156484, + "learning_rate": 1.6074185089372728e-05, + "loss": 0.8189, + "step": 10227 + }, + { + "epoch": 0.3134730905970332, + "grad_norm": 1.4573437588117488, + "learning_rate": 1.607339652753887e-05, + "loss": 0.7509, + "step": 10228 + }, + { + "epoch": 0.3135037391197744, + "grad_norm": 1.457402037819103, + "learning_rate": 1.607260790586228e-05, + "loss": 0.7917, + "step": 10229 + }, + { + "epoch": 0.3135343876425156, + "grad_norm": 1.3425480605237694, + "learning_rate": 1.6071819224350733e-05, + "loss": 0.6821, + "step": 10230 + }, + { + "epoch": 0.3135650361652568, + "grad_norm": 1.5323838389618416, + "learning_rate": 1.6071030483011998e-05, + "loss": 0.812, + "step": 10231 + }, + { + "epoch": 0.31359568468799803, + "grad_norm": 1.5315569082932703, + "learning_rate": 1.6070241681853845e-05, + "loss": 0.8128, + "step": 10232 + }, + { + "epoch": 0.31362633321073924, + "grad_norm": 1.3455271752623272, + "learning_rate": 1.606945282088405e-05, + "loss": 0.6669, + "step": 10233 + }, + { + "epoch": 0.31365698173348044, + "grad_norm": 1.510384436377276, + "learning_rate": 1.6068663900110376e-05, + "loss": 0.719, + "step": 10234 + }, + { + "epoch": 0.31368763025622165, + "grad_norm": 1.6650582568203596, + "learning_rate": 1.6067874919540612e-05, + "loss": 0.777, + "step": 10235 + }, + { + "epoch": 0.31371827877896286, + "grad_norm": 1.4812977685143762, + "learning_rate": 1.606708587918252e-05, + "loss": 0.6922, + "step": 10236 + }, + { + "epoch": 0.31374892730170406, + "grad_norm": 1.428611686721302, + "learning_rate": 1.6066296779043877e-05, + "loss": 0.867, + "step": 10237 + }, + { + "epoch": 0.31377957582444527, + "grad_norm": 1.4670162675342493, + "learning_rate": 1.6065507619132464e-05, + "loss": 0.7382, + "step": 10238 + }, + { + "epoch": 0.3138102243471865, + "grad_norm": 1.6001393721270243, + "learning_rate": 1.606471839945605e-05, + "loss": 0.8997, + "step": 10239 + }, + { + "epoch": 0.3138408728699277, + "grad_norm": 1.668154222287971, + "learning_rate": 1.6063929120022414e-05, + "loss": 0.7224, + "step": 10240 + }, + { + "epoch": 0.3138715213926689, + "grad_norm": 1.4870742549859886, + "learning_rate": 1.6063139780839334e-05, + "loss": 0.7267, + "step": 10241 + }, + { + "epoch": 0.3139021699154101, + "grad_norm": 1.6775792238099485, + "learning_rate": 1.606235038191459e-05, + "loss": 0.8798, + "step": 10242 + }, + { + "epoch": 0.3139328184381513, + "grad_norm": 1.4577355863513566, + "learning_rate": 1.606156092325595e-05, + "loss": 0.7082, + "step": 10243 + }, + { + "epoch": 0.3139634669608925, + "grad_norm": 1.3294733405850294, + "learning_rate": 1.60607714048712e-05, + "loss": 0.7541, + "step": 10244 + }, + { + "epoch": 0.3139941154836337, + "grad_norm": 1.4024015706214052, + "learning_rate": 1.605998182676812e-05, + "loss": 0.8306, + "step": 10245 + }, + { + "epoch": 0.3140247640063749, + "grad_norm": 1.4128848487345569, + "learning_rate": 1.6059192188954492e-05, + "loss": 0.7908, + "step": 10246 + }, + { + "epoch": 0.3140554125291161, + "grad_norm": 1.6992574388913209, + "learning_rate": 1.605840249143809e-05, + "loss": 0.8143, + "step": 10247 + }, + { + "epoch": 0.3140860610518573, + "grad_norm": 1.6405901687116382, + "learning_rate": 1.6057612734226698e-05, + "loss": 0.7803, + "step": 10248 + }, + { + "epoch": 0.31411670957459853, + "grad_norm": 1.5451095573476559, + "learning_rate": 1.6056822917328095e-05, + "loss": 0.8028, + "step": 10249 + }, + { + "epoch": 0.31414735809733974, + "grad_norm": 1.5529814069050012, + "learning_rate": 1.605603304075007e-05, + "loss": 0.7751, + "step": 10250 + }, + { + "epoch": 0.3141780066200809, + "grad_norm": 0.7981712546901358, + "learning_rate": 1.60552431045004e-05, + "loss": 0.6287, + "step": 10251 + }, + { + "epoch": 0.3142086551428221, + "grad_norm": 0.725236686926159, + "learning_rate": 1.605445310858687e-05, + "loss": 0.6465, + "step": 10252 + }, + { + "epoch": 0.3142393036655633, + "grad_norm": 1.3663689604793166, + "learning_rate": 1.6053663053017267e-05, + "loss": 0.7572, + "step": 10253 + }, + { + "epoch": 0.3142699521883045, + "grad_norm": 1.3759543396923928, + "learning_rate": 1.6052872937799372e-05, + "loss": 0.7703, + "step": 10254 + }, + { + "epoch": 0.3143006007110457, + "grad_norm": 1.2811623375959467, + "learning_rate": 1.6052082762940972e-05, + "loss": 0.7487, + "step": 10255 + }, + { + "epoch": 0.3143312492337869, + "grad_norm": 1.3310700399426396, + "learning_rate": 1.6051292528449847e-05, + "loss": 0.6458, + "step": 10256 + }, + { + "epoch": 0.3143618977565281, + "grad_norm": 0.7285415511349328, + "learning_rate": 1.6050502234333793e-05, + "loss": 0.6192, + "step": 10257 + }, + { + "epoch": 0.3143925462792693, + "grad_norm": 1.5740738470983962, + "learning_rate": 1.6049711880600595e-05, + "loss": 0.8328, + "step": 10258 + }, + { + "epoch": 0.31442319480201053, + "grad_norm": 1.4313345736902108, + "learning_rate": 1.6048921467258033e-05, + "loss": 0.7647, + "step": 10259 + }, + { + "epoch": 0.31445384332475174, + "grad_norm": 0.7326603676823169, + "learning_rate": 1.6048130994313903e-05, + "loss": 0.6114, + "step": 10260 + }, + { + "epoch": 0.31448449184749294, + "grad_norm": 1.467174313734994, + "learning_rate": 1.604734046177599e-05, + "loss": 0.7298, + "step": 10261 + }, + { + "epoch": 0.31451514037023415, + "grad_norm": 1.5821978902514877, + "learning_rate": 1.6046549869652085e-05, + "loss": 0.8474, + "step": 10262 + }, + { + "epoch": 0.31454578889297535, + "grad_norm": 1.2894149147814071, + "learning_rate": 1.6045759217949976e-05, + "loss": 0.7331, + "step": 10263 + }, + { + "epoch": 0.31457643741571656, + "grad_norm": 0.6901188568138599, + "learning_rate": 1.6044968506677452e-05, + "loss": 0.6315, + "step": 10264 + }, + { + "epoch": 0.31460708593845776, + "grad_norm": 1.675899489564622, + "learning_rate": 1.6044177735842314e-05, + "loss": 0.7142, + "step": 10265 + }, + { + "epoch": 0.31463773446119897, + "grad_norm": 1.386296731157903, + "learning_rate": 1.604338690545234e-05, + "loss": 0.8471, + "step": 10266 + }, + { + "epoch": 0.3146683829839402, + "grad_norm": 1.2856355453581798, + "learning_rate": 1.6042596015515326e-05, + "loss": 0.6813, + "step": 10267 + }, + { + "epoch": 0.3146990315066814, + "grad_norm": 1.3144870753861184, + "learning_rate": 1.6041805066039073e-05, + "loss": 0.7535, + "step": 10268 + }, + { + "epoch": 0.3147296800294226, + "grad_norm": 1.6241859665501446, + "learning_rate": 1.6041014057031368e-05, + "loss": 0.9672, + "step": 10269 + }, + { + "epoch": 0.3147603285521638, + "grad_norm": 1.4566869810356333, + "learning_rate": 1.6040222988500005e-05, + "loss": 0.7656, + "step": 10270 + }, + { + "epoch": 0.314790977074905, + "grad_norm": 1.5775982752909563, + "learning_rate": 1.6039431860452777e-05, + "loss": 0.841, + "step": 10271 + }, + { + "epoch": 0.3148216255976462, + "grad_norm": 0.7233628697125504, + "learning_rate": 1.6038640672897487e-05, + "loss": 0.611, + "step": 10272 + }, + { + "epoch": 0.3148522741203874, + "grad_norm": 1.4690798591901177, + "learning_rate": 1.6037849425841923e-05, + "loss": 0.8417, + "step": 10273 + }, + { + "epoch": 0.3148829226431286, + "grad_norm": 1.4488449450627816, + "learning_rate": 1.6037058119293882e-05, + "loss": 0.6934, + "step": 10274 + }, + { + "epoch": 0.3149135711658698, + "grad_norm": 0.696979119284125, + "learning_rate": 1.6036266753261163e-05, + "loss": 0.6282, + "step": 10275 + }, + { + "epoch": 0.314944219688611, + "grad_norm": 1.4352045554724238, + "learning_rate": 1.6035475327751563e-05, + "loss": 0.8669, + "step": 10276 + }, + { + "epoch": 0.31497486821135223, + "grad_norm": 1.5187740273759818, + "learning_rate": 1.603468384277288e-05, + "loss": 0.838, + "step": 10277 + }, + { + "epoch": 0.31500551673409344, + "grad_norm": 1.5572095973621156, + "learning_rate": 1.6033892298332916e-05, + "loss": 0.5941, + "step": 10278 + }, + { + "epoch": 0.31503616525683464, + "grad_norm": 1.3372894340577544, + "learning_rate": 1.6033100694439468e-05, + "loss": 0.7532, + "step": 10279 + }, + { + "epoch": 0.31506681377957585, + "grad_norm": 1.4737618978545768, + "learning_rate": 1.603230903110033e-05, + "loss": 0.6948, + "step": 10280 + }, + { + "epoch": 0.31509746230231706, + "grad_norm": 1.3584779629079504, + "learning_rate": 1.6031517308323314e-05, + "loss": 0.6936, + "step": 10281 + }, + { + "epoch": 0.3151281108250582, + "grad_norm": 1.3746368214132516, + "learning_rate": 1.603072552611621e-05, + "loss": 0.7991, + "step": 10282 + }, + { + "epoch": 0.3151587593477994, + "grad_norm": 1.4589240190749528, + "learning_rate": 1.6029933684486827e-05, + "loss": 0.7416, + "step": 10283 + }, + { + "epoch": 0.3151894078705406, + "grad_norm": 1.3967431809228712, + "learning_rate": 1.6029141783442963e-05, + "loss": 0.7433, + "step": 10284 + }, + { + "epoch": 0.3152200563932818, + "grad_norm": 1.4952199580694483, + "learning_rate": 1.6028349822992425e-05, + "loss": 0.8143, + "step": 10285 + }, + { + "epoch": 0.31525070491602303, + "grad_norm": 1.2619464600606498, + "learning_rate": 1.602755780314301e-05, + "loss": 0.7534, + "step": 10286 + }, + { + "epoch": 0.31528135343876423, + "grad_norm": 1.4340283259799191, + "learning_rate": 1.6026765723902527e-05, + "loss": 0.7352, + "step": 10287 + }, + { + "epoch": 0.31531200196150544, + "grad_norm": 1.513832965939854, + "learning_rate": 1.602597358527878e-05, + "loss": 0.7952, + "step": 10288 + }, + { + "epoch": 0.31534265048424664, + "grad_norm": 1.3156228851336584, + "learning_rate": 1.6025181387279572e-05, + "loss": 0.7335, + "step": 10289 + }, + { + "epoch": 0.31537329900698785, + "grad_norm": 1.4676676463444838, + "learning_rate": 1.6024389129912715e-05, + "loss": 0.8553, + "step": 10290 + }, + { + "epoch": 0.31540394752972906, + "grad_norm": 1.505432453749987, + "learning_rate": 1.6023596813186008e-05, + "loss": 0.8778, + "step": 10291 + }, + { + "epoch": 0.31543459605247026, + "grad_norm": 1.3784279779256379, + "learning_rate": 1.6022804437107256e-05, + "loss": 0.7937, + "step": 10292 + }, + { + "epoch": 0.31546524457521147, + "grad_norm": 1.2761133406410026, + "learning_rate": 1.6022012001684274e-05, + "loss": 0.8586, + "step": 10293 + }, + { + "epoch": 0.3154958930979527, + "grad_norm": 1.394938721962818, + "learning_rate": 1.6021219506924865e-05, + "loss": 0.701, + "step": 10294 + }, + { + "epoch": 0.3155265416206939, + "grad_norm": 0.7721753809073678, + "learning_rate": 1.6020426952836843e-05, + "loss": 0.6194, + "step": 10295 + }, + { + "epoch": 0.3155571901434351, + "grad_norm": 1.4277875844798547, + "learning_rate": 1.6019634339428014e-05, + "loss": 0.6953, + "step": 10296 + }, + { + "epoch": 0.3155878386661763, + "grad_norm": 1.439641713189255, + "learning_rate": 1.6018841666706187e-05, + "loss": 0.7999, + "step": 10297 + }, + { + "epoch": 0.3156184871889175, + "grad_norm": 1.5470389192214438, + "learning_rate": 1.6018048934679173e-05, + "loss": 0.7581, + "step": 10298 + }, + { + "epoch": 0.3156491357116587, + "grad_norm": 1.552326840679912, + "learning_rate": 1.601725614335478e-05, + "loss": 0.7263, + "step": 10299 + }, + { + "epoch": 0.3156797842343999, + "grad_norm": 0.716311061191768, + "learning_rate": 1.6016463292740824e-05, + "loss": 0.638, + "step": 10300 + }, + { + "epoch": 0.3157104327571411, + "grad_norm": 1.3888922115203102, + "learning_rate": 1.6015670382845117e-05, + "loss": 0.7376, + "step": 10301 + }, + { + "epoch": 0.3157410812798823, + "grad_norm": 1.3846635011346409, + "learning_rate": 1.601487741367547e-05, + "loss": 0.802, + "step": 10302 + }, + { + "epoch": 0.3157717298026235, + "grad_norm": 1.4195570473999877, + "learning_rate": 1.6014084385239698e-05, + "loss": 0.7733, + "step": 10303 + }, + { + "epoch": 0.31580237832536473, + "grad_norm": 1.5253985295521209, + "learning_rate": 1.6013291297545612e-05, + "loss": 0.8191, + "step": 10304 + }, + { + "epoch": 0.31583302684810594, + "grad_norm": 1.3814957408343693, + "learning_rate": 1.601249815060103e-05, + "loss": 0.6991, + "step": 10305 + }, + { + "epoch": 0.31586367537084714, + "grad_norm": 1.4916682199605067, + "learning_rate": 1.6011704944413766e-05, + "loss": 0.7368, + "step": 10306 + }, + { + "epoch": 0.31589432389358835, + "grad_norm": 1.5535438281418386, + "learning_rate": 1.6010911678991632e-05, + "loss": 0.6754, + "step": 10307 + }, + { + "epoch": 0.31592497241632955, + "grad_norm": 1.4491275842416134, + "learning_rate": 1.601011835434245e-05, + "loss": 0.7051, + "step": 10308 + }, + { + "epoch": 0.31595562093907076, + "grad_norm": 1.503734778994748, + "learning_rate": 1.6009324970474032e-05, + "loss": 0.6856, + "step": 10309 + }, + { + "epoch": 0.31598626946181196, + "grad_norm": 1.4446160081061286, + "learning_rate": 1.6008531527394198e-05, + "loss": 0.7907, + "step": 10310 + }, + { + "epoch": 0.31601691798455317, + "grad_norm": 1.490073269731908, + "learning_rate": 1.6007738025110766e-05, + "loss": 0.776, + "step": 10311 + }, + { + "epoch": 0.3160475665072944, + "grad_norm": 1.4675094928991768, + "learning_rate": 1.600694446363156e-05, + "loss": 0.7773, + "step": 10312 + }, + { + "epoch": 0.3160782150300355, + "grad_norm": 0.7443457013447388, + "learning_rate": 1.6006150842964383e-05, + "loss": 0.602, + "step": 10313 + }, + { + "epoch": 0.31610886355277673, + "grad_norm": 1.5843774639527382, + "learning_rate": 1.6005357163117068e-05, + "loss": 0.7973, + "step": 10314 + }, + { + "epoch": 0.31613951207551794, + "grad_norm": 1.3445539803161377, + "learning_rate": 1.6004563424097435e-05, + "loss": 0.7747, + "step": 10315 + }, + { + "epoch": 0.31617016059825914, + "grad_norm": 1.355002937833354, + "learning_rate": 1.60037696259133e-05, + "loss": 0.7334, + "step": 10316 + }, + { + "epoch": 0.31620080912100035, + "grad_norm": 1.627442119051677, + "learning_rate": 1.600297576857249e-05, + "loss": 0.7509, + "step": 10317 + }, + { + "epoch": 0.31623145764374155, + "grad_norm": 1.3381411755388966, + "learning_rate": 1.600218185208282e-05, + "loss": 0.6799, + "step": 10318 + }, + { + "epoch": 0.31626210616648276, + "grad_norm": 1.4497471343928874, + "learning_rate": 1.600138787645212e-05, + "loss": 0.7499, + "step": 10319 + }, + { + "epoch": 0.31629275468922396, + "grad_norm": 0.6758911232176724, + "learning_rate": 1.6000593841688205e-05, + "loss": 0.6201, + "step": 10320 + }, + { + "epoch": 0.31632340321196517, + "grad_norm": 1.6527212407472507, + "learning_rate": 1.5999799747798907e-05, + "loss": 0.7992, + "step": 10321 + }, + { + "epoch": 0.3163540517347064, + "grad_norm": 1.479390960128429, + "learning_rate": 1.599900559479205e-05, + "loss": 0.8574, + "step": 10322 + }, + { + "epoch": 0.3163847002574476, + "grad_norm": 1.2771858069862692, + "learning_rate": 1.599821138267545e-05, + "loss": 0.6987, + "step": 10323 + }, + { + "epoch": 0.3164153487801888, + "grad_norm": 1.470395768545664, + "learning_rate": 1.599741711145694e-05, + "loss": 0.7866, + "step": 10324 + }, + { + "epoch": 0.31644599730293, + "grad_norm": 1.7145007142392834, + "learning_rate": 1.5996622781144347e-05, + "loss": 0.9833, + "step": 10325 + }, + { + "epoch": 0.3164766458256712, + "grad_norm": 1.6778733746693417, + "learning_rate": 1.599582839174549e-05, + "loss": 0.8604, + "step": 10326 + }, + { + "epoch": 0.3165072943484124, + "grad_norm": 1.2773658976446318, + "learning_rate": 1.599503394326821e-05, + "loss": 0.7928, + "step": 10327 + }, + { + "epoch": 0.3165379428711536, + "grad_norm": 1.6828232889267807, + "learning_rate": 1.599423943572032e-05, + "loss": 0.968, + "step": 10328 + }, + { + "epoch": 0.3165685913938948, + "grad_norm": 1.4952694327964762, + "learning_rate": 1.5993444869109657e-05, + "loss": 0.7275, + "step": 10329 + }, + { + "epoch": 0.316599239916636, + "grad_norm": 1.3750808595589394, + "learning_rate": 1.599265024344405e-05, + "loss": 0.6894, + "step": 10330 + }, + { + "epoch": 0.3166298884393772, + "grad_norm": 0.6897347208577307, + "learning_rate": 1.5991855558731323e-05, + "loss": 0.6149, + "step": 10331 + }, + { + "epoch": 0.31666053696211843, + "grad_norm": 1.3527484914150523, + "learning_rate": 1.5991060814979317e-05, + "loss": 0.6849, + "step": 10332 + }, + { + "epoch": 0.31669118548485964, + "grad_norm": 1.368138048198284, + "learning_rate": 1.5990266012195847e-05, + "loss": 0.8038, + "step": 10333 + }, + { + "epoch": 0.31672183400760084, + "grad_norm": 1.7617860006936428, + "learning_rate": 1.598947115038876e-05, + "loss": 0.7321, + "step": 10334 + }, + { + "epoch": 0.31675248253034205, + "grad_norm": 0.6954349254026128, + "learning_rate": 1.598867622956588e-05, + "loss": 0.6074, + "step": 10335 + }, + { + "epoch": 0.31678313105308326, + "grad_norm": 1.4960055440842075, + "learning_rate": 1.598788124973504e-05, + "loss": 0.8231, + "step": 10336 + }, + { + "epoch": 0.31681377957582446, + "grad_norm": 1.4900751008009558, + "learning_rate": 1.598708621090407e-05, + "loss": 0.7377, + "step": 10337 + }, + { + "epoch": 0.31684442809856567, + "grad_norm": 1.6163470823694184, + "learning_rate": 1.5986291113080815e-05, + "loss": 0.7091, + "step": 10338 + }, + { + "epoch": 0.3168750766213069, + "grad_norm": 0.6723806251929257, + "learning_rate": 1.59854959562731e-05, + "loss": 0.6108, + "step": 10339 + }, + { + "epoch": 0.3169057251440481, + "grad_norm": 1.49807383166075, + "learning_rate": 1.598470074048876e-05, + "loss": 0.7887, + "step": 10340 + }, + { + "epoch": 0.3169363736667893, + "grad_norm": 1.6209055356135036, + "learning_rate": 1.5983905465735633e-05, + "loss": 0.815, + "step": 10341 + }, + { + "epoch": 0.3169670221895305, + "grad_norm": 1.5240819896107145, + "learning_rate": 1.5983110132021554e-05, + "loss": 0.7653, + "step": 10342 + }, + { + "epoch": 0.3169976707122717, + "grad_norm": 1.3331120005776165, + "learning_rate": 1.598231473935436e-05, + "loss": 0.6499, + "step": 10343 + }, + { + "epoch": 0.31702831923501285, + "grad_norm": 1.379074620516198, + "learning_rate": 1.5981519287741888e-05, + "loss": 0.6927, + "step": 10344 + }, + { + "epoch": 0.31705896775775405, + "grad_norm": 0.6957554244763938, + "learning_rate": 1.5980723777191974e-05, + "loss": 0.6201, + "step": 10345 + }, + { + "epoch": 0.31708961628049526, + "grad_norm": 1.449242355011749, + "learning_rate": 1.5979928207712464e-05, + "loss": 0.7046, + "step": 10346 + }, + { + "epoch": 0.31712026480323646, + "grad_norm": 1.953295603699818, + "learning_rate": 1.597913257931119e-05, + "loss": 0.7605, + "step": 10347 + }, + { + "epoch": 0.31715091332597767, + "grad_norm": 1.4807052104745848, + "learning_rate": 1.5978336891995988e-05, + "loss": 0.8268, + "step": 10348 + }, + { + "epoch": 0.3171815618487189, + "grad_norm": 1.5298793819144763, + "learning_rate": 1.5977541145774705e-05, + "loss": 0.7003, + "step": 10349 + }, + { + "epoch": 0.3172122103714601, + "grad_norm": 1.4426065231780674, + "learning_rate": 1.5976745340655183e-05, + "loss": 0.7187, + "step": 10350 + }, + { + "epoch": 0.3172428588942013, + "grad_norm": 1.4682735423339581, + "learning_rate": 1.5975949476645258e-05, + "loss": 0.7114, + "step": 10351 + }, + { + "epoch": 0.3172735074169425, + "grad_norm": 1.6015187993235247, + "learning_rate": 1.5975153553752774e-05, + "loss": 0.7367, + "step": 10352 + }, + { + "epoch": 0.3173041559396837, + "grad_norm": 1.451149574712367, + "learning_rate": 1.5974357571985574e-05, + "loss": 0.8221, + "step": 10353 + }, + { + "epoch": 0.3173348044624249, + "grad_norm": 1.2580196746922303, + "learning_rate": 1.59735615313515e-05, + "loss": 0.7157, + "step": 10354 + }, + { + "epoch": 0.3173654529851661, + "grad_norm": 1.894435316072708, + "learning_rate": 1.597276543185839e-05, + "loss": 0.9545, + "step": 10355 + }, + { + "epoch": 0.3173961015079073, + "grad_norm": 1.8453726739924408, + "learning_rate": 1.5971969273514102e-05, + "loss": 0.826, + "step": 10356 + }, + { + "epoch": 0.3174267500306485, + "grad_norm": 1.4489454842711604, + "learning_rate": 1.5971173056326468e-05, + "loss": 0.7776, + "step": 10357 + }, + { + "epoch": 0.3174573985533897, + "grad_norm": 1.5651235661758374, + "learning_rate": 1.597037678030334e-05, + "loss": 0.9137, + "step": 10358 + }, + { + "epoch": 0.31748804707613093, + "grad_norm": 1.4349422053980614, + "learning_rate": 1.5969580445452563e-05, + "loss": 0.7477, + "step": 10359 + }, + { + "epoch": 0.31751869559887214, + "grad_norm": 1.6146934443240193, + "learning_rate": 1.5968784051781982e-05, + "loss": 0.8523, + "step": 10360 + }, + { + "epoch": 0.31754934412161334, + "grad_norm": 1.504588797606931, + "learning_rate": 1.596798759929944e-05, + "loss": 0.7157, + "step": 10361 + }, + { + "epoch": 0.31757999264435455, + "grad_norm": 1.7480917235597802, + "learning_rate": 1.5967191088012795e-05, + "loss": 0.8613, + "step": 10362 + }, + { + "epoch": 0.31761064116709575, + "grad_norm": 1.4260589120124019, + "learning_rate": 1.5966394517929887e-05, + "loss": 0.7133, + "step": 10363 + }, + { + "epoch": 0.31764128968983696, + "grad_norm": 1.7168328889414048, + "learning_rate": 1.596559788905857e-05, + "loss": 0.7945, + "step": 10364 + }, + { + "epoch": 0.31767193821257816, + "grad_norm": 1.4788986111874085, + "learning_rate": 1.5964801201406687e-05, + "loss": 0.7423, + "step": 10365 + }, + { + "epoch": 0.31770258673531937, + "grad_norm": 1.3172181821930136, + "learning_rate": 1.596400445498209e-05, + "loss": 0.7526, + "step": 10366 + }, + { + "epoch": 0.3177332352580606, + "grad_norm": 1.4311856116986044, + "learning_rate": 1.5963207649792637e-05, + "loss": 0.6764, + "step": 10367 + }, + { + "epoch": 0.3177638837808018, + "grad_norm": 1.246053306184683, + "learning_rate": 1.596241078584617e-05, + "loss": 0.7071, + "step": 10368 + }, + { + "epoch": 0.317794532303543, + "grad_norm": 1.3889261268152646, + "learning_rate": 1.5961613863150546e-05, + "loss": 0.6839, + "step": 10369 + }, + { + "epoch": 0.3178251808262842, + "grad_norm": 1.418414496608539, + "learning_rate": 1.5960816881713612e-05, + "loss": 0.7261, + "step": 10370 + }, + { + "epoch": 0.3178558293490254, + "grad_norm": 1.4953812274556757, + "learning_rate": 1.5960019841543227e-05, + "loss": 0.7839, + "step": 10371 + }, + { + "epoch": 0.3178864778717666, + "grad_norm": 1.4797600787082001, + "learning_rate": 1.5959222742647238e-05, + "loss": 0.7492, + "step": 10372 + }, + { + "epoch": 0.3179171263945078, + "grad_norm": 1.2684547968177469, + "learning_rate": 1.5958425585033505e-05, + "loss": 0.698, + "step": 10373 + }, + { + "epoch": 0.317947774917249, + "grad_norm": 1.6242593268324776, + "learning_rate": 1.5957628368709882e-05, + "loss": 0.801, + "step": 10374 + }, + { + "epoch": 0.31797842343999017, + "grad_norm": 1.4434864741987565, + "learning_rate": 1.5956831093684217e-05, + "loss": 0.6094, + "step": 10375 + }, + { + "epoch": 0.31800907196273137, + "grad_norm": 1.4228977703530334, + "learning_rate": 1.5956033759964375e-05, + "loss": 0.8006, + "step": 10376 + }, + { + "epoch": 0.3180397204854726, + "grad_norm": 0.7175308943841047, + "learning_rate": 1.5955236367558212e-05, + "loss": 0.6007, + "step": 10377 + }, + { + "epoch": 0.3180703690082138, + "grad_norm": 1.514130572921709, + "learning_rate": 1.5954438916473574e-05, + "loss": 0.8255, + "step": 10378 + }, + { + "epoch": 0.318101017530955, + "grad_norm": 1.6537409243341032, + "learning_rate": 1.595364140671833e-05, + "loss": 0.752, + "step": 10379 + }, + { + "epoch": 0.3181316660536962, + "grad_norm": 1.4855021816426577, + "learning_rate": 1.595284383830033e-05, + "loss": 0.7394, + "step": 10380 + }, + { + "epoch": 0.3181623145764374, + "grad_norm": 1.4264541488174385, + "learning_rate": 1.5952046211227444e-05, + "loss": 0.6988, + "step": 10381 + }, + { + "epoch": 0.3181929630991786, + "grad_norm": 1.7299235446962278, + "learning_rate": 1.5951248525507516e-05, + "loss": 0.8336, + "step": 10382 + }, + { + "epoch": 0.3182236116219198, + "grad_norm": 1.4125520627886519, + "learning_rate": 1.595045078114842e-05, + "loss": 0.6967, + "step": 10383 + }, + { + "epoch": 0.318254260144661, + "grad_norm": 1.5361073166855854, + "learning_rate": 1.5949652978158004e-05, + "loss": 0.7157, + "step": 10384 + }, + { + "epoch": 0.3182849086674022, + "grad_norm": 1.688942870247032, + "learning_rate": 1.594885511654414e-05, + "loss": 0.9099, + "step": 10385 + }, + { + "epoch": 0.31831555719014343, + "grad_norm": 1.4505762099288617, + "learning_rate": 1.594805719631468e-05, + "loss": 0.8133, + "step": 10386 + }, + { + "epoch": 0.31834620571288463, + "grad_norm": 1.4547261231608424, + "learning_rate": 1.594725921747749e-05, + "loss": 0.756, + "step": 10387 + }, + { + "epoch": 0.31837685423562584, + "grad_norm": 1.441861351227649, + "learning_rate": 1.594646118004044e-05, + "loss": 0.6977, + "step": 10388 + }, + { + "epoch": 0.31840750275836704, + "grad_norm": 1.5615988393920934, + "learning_rate": 1.5945663084011385e-05, + "loss": 0.8122, + "step": 10389 + }, + { + "epoch": 0.31843815128110825, + "grad_norm": 1.4419869257113722, + "learning_rate": 1.5944864929398186e-05, + "loss": 0.8054, + "step": 10390 + }, + { + "epoch": 0.31846879980384946, + "grad_norm": 1.5257205196280448, + "learning_rate": 1.594406671620871e-05, + "loss": 0.7863, + "step": 10391 + }, + { + "epoch": 0.31849944832659066, + "grad_norm": 1.6314134418037034, + "learning_rate": 1.594326844445083e-05, + "loss": 0.7235, + "step": 10392 + }, + { + "epoch": 0.31853009684933187, + "grad_norm": 1.458675134126776, + "learning_rate": 1.5942470114132404e-05, + "loss": 0.8269, + "step": 10393 + }, + { + "epoch": 0.3185607453720731, + "grad_norm": 1.513141456299473, + "learning_rate": 1.59416717252613e-05, + "loss": 0.7365, + "step": 10394 + }, + { + "epoch": 0.3185913938948143, + "grad_norm": 0.8204362764448379, + "learning_rate": 1.5940873277845382e-05, + "loss": 0.6372, + "step": 10395 + }, + { + "epoch": 0.3186220424175555, + "grad_norm": 0.7179600877496591, + "learning_rate": 1.594007477189252e-05, + "loss": 0.6194, + "step": 10396 + }, + { + "epoch": 0.3186526909402967, + "grad_norm": 1.723506243268396, + "learning_rate": 1.5939276207410582e-05, + "loss": 0.8082, + "step": 10397 + }, + { + "epoch": 0.3186833394630379, + "grad_norm": 1.525831199629003, + "learning_rate": 1.5938477584407438e-05, + "loss": 0.8141, + "step": 10398 + }, + { + "epoch": 0.3187139879857791, + "grad_norm": 1.6643742859765531, + "learning_rate": 1.5937678902890953e-05, + "loss": 0.7114, + "step": 10399 + }, + { + "epoch": 0.3187446365085203, + "grad_norm": 1.3435766105666809, + "learning_rate": 1.5936880162868998e-05, + "loss": 0.7756, + "step": 10400 + }, + { + "epoch": 0.3187752850312615, + "grad_norm": 1.5036597275745052, + "learning_rate": 1.593608136434944e-05, + "loss": 0.8226, + "step": 10401 + }, + { + "epoch": 0.3188059335540027, + "grad_norm": 1.66301324656209, + "learning_rate": 1.593528250734016e-05, + "loss": 0.7648, + "step": 10402 + }, + { + "epoch": 0.3188365820767439, + "grad_norm": 1.474694565904249, + "learning_rate": 1.593448359184902e-05, + "loss": 0.7219, + "step": 10403 + }, + { + "epoch": 0.31886723059948513, + "grad_norm": 1.5646008734936918, + "learning_rate": 1.5933684617883897e-05, + "loss": 0.9114, + "step": 10404 + }, + { + "epoch": 0.31889787912222634, + "grad_norm": 1.5502348417578173, + "learning_rate": 1.5932885585452656e-05, + "loss": 0.708, + "step": 10405 + }, + { + "epoch": 0.3189285276449675, + "grad_norm": 0.9885937336296085, + "learning_rate": 1.593208649456318e-05, + "loss": 0.6826, + "step": 10406 + }, + { + "epoch": 0.3189591761677087, + "grad_norm": 1.498033937416566, + "learning_rate": 1.5931287345223333e-05, + "loss": 0.7769, + "step": 10407 + }, + { + "epoch": 0.3189898246904499, + "grad_norm": 1.5972438906972073, + "learning_rate": 1.5930488137441002e-05, + "loss": 0.6586, + "step": 10408 + }, + { + "epoch": 0.3190204732131911, + "grad_norm": 1.4587771106272427, + "learning_rate": 1.592968887122405e-05, + "loss": 0.7854, + "step": 10409 + }, + { + "epoch": 0.3190511217359323, + "grad_norm": 0.710466901635882, + "learning_rate": 1.5928889546580355e-05, + "loss": 0.6294, + "step": 10410 + }, + { + "epoch": 0.3190817702586735, + "grad_norm": 1.3389174192163724, + "learning_rate": 1.5928090163517796e-05, + "loss": 0.7927, + "step": 10411 + }, + { + "epoch": 0.3191124187814147, + "grad_norm": 1.8026196152180674, + "learning_rate": 1.5927290722044246e-05, + "loss": 0.7763, + "step": 10412 + }, + { + "epoch": 0.3191430673041559, + "grad_norm": 0.7230580355458139, + "learning_rate": 1.5926491222167583e-05, + "loss": 0.6224, + "step": 10413 + }, + { + "epoch": 0.31917371582689713, + "grad_norm": 1.476984952625859, + "learning_rate": 1.592569166389569e-05, + "loss": 0.6744, + "step": 10414 + }, + { + "epoch": 0.31920436434963834, + "grad_norm": 1.3681140118265216, + "learning_rate": 1.592489204723644e-05, + "loss": 0.7614, + "step": 10415 + }, + { + "epoch": 0.31923501287237954, + "grad_norm": 1.2168761617650217, + "learning_rate": 1.592409237219771e-05, + "loss": 0.7347, + "step": 10416 + }, + { + "epoch": 0.31926566139512075, + "grad_norm": 0.7637306146641893, + "learning_rate": 1.5923292638787385e-05, + "loss": 0.6601, + "step": 10417 + }, + { + "epoch": 0.31929630991786195, + "grad_norm": 1.2164263695189181, + "learning_rate": 1.592249284701334e-05, + "loss": 0.5807, + "step": 10418 + }, + { + "epoch": 0.31932695844060316, + "grad_norm": 1.5566787063714211, + "learning_rate": 1.592169299688346e-05, + "loss": 0.773, + "step": 10419 + }, + { + "epoch": 0.31935760696334436, + "grad_norm": 1.6586667390855037, + "learning_rate": 1.592089308840562e-05, + "loss": 0.7338, + "step": 10420 + }, + { + "epoch": 0.31938825548608557, + "grad_norm": 1.3275019888923005, + "learning_rate": 1.5920093121587708e-05, + "loss": 0.7235, + "step": 10421 + }, + { + "epoch": 0.3194189040088268, + "grad_norm": 1.649122824248035, + "learning_rate": 1.5919293096437604e-05, + "loss": 0.8623, + "step": 10422 + }, + { + "epoch": 0.319449552531568, + "grad_norm": 0.7059134274302699, + "learning_rate": 1.591849301296319e-05, + "loss": 0.5962, + "step": 10423 + }, + { + "epoch": 0.3194802010543092, + "grad_norm": 1.3148772437296152, + "learning_rate": 1.591769287117235e-05, + "loss": 0.7047, + "step": 10424 + }, + { + "epoch": 0.3195108495770504, + "grad_norm": 1.464197140031119, + "learning_rate": 1.5916892671072967e-05, + "loss": 0.7174, + "step": 10425 + }, + { + "epoch": 0.3195414980997916, + "grad_norm": 1.4874193367991284, + "learning_rate": 1.5916092412672927e-05, + "loss": 0.7363, + "step": 10426 + }, + { + "epoch": 0.3195721466225328, + "grad_norm": 1.5075096318943584, + "learning_rate": 1.5915292095980117e-05, + "loss": 0.6544, + "step": 10427 + }, + { + "epoch": 0.319602795145274, + "grad_norm": 1.5446243677791998, + "learning_rate": 1.5914491721002417e-05, + "loss": 0.6722, + "step": 10428 + }, + { + "epoch": 0.3196334436680152, + "grad_norm": 1.2868895281719368, + "learning_rate": 1.5913691287747723e-05, + "loss": 0.6514, + "step": 10429 + }, + { + "epoch": 0.3196640921907564, + "grad_norm": 1.462748113812773, + "learning_rate": 1.5912890796223907e-05, + "loss": 0.7802, + "step": 10430 + }, + { + "epoch": 0.3196947407134976, + "grad_norm": 1.5108620224287297, + "learning_rate": 1.591209024643887e-05, + "loss": 0.8846, + "step": 10431 + }, + { + "epoch": 0.31972538923623883, + "grad_norm": 1.552532370600968, + "learning_rate": 1.5911289638400497e-05, + "loss": 0.7319, + "step": 10432 + }, + { + "epoch": 0.31975603775898004, + "grad_norm": 1.4190456061222905, + "learning_rate": 1.591048897211667e-05, + "loss": 0.7717, + "step": 10433 + }, + { + "epoch": 0.31978668628172124, + "grad_norm": 1.4786082826685703, + "learning_rate": 1.5909688247595284e-05, + "loss": 0.7568, + "step": 10434 + }, + { + "epoch": 0.31981733480446245, + "grad_norm": 0.6958726223809011, + "learning_rate": 1.590888746484423e-05, + "loss": 0.6211, + "step": 10435 + }, + { + "epoch": 0.31984798332720366, + "grad_norm": 0.7062702961461486, + "learning_rate": 1.5908086623871393e-05, + "loss": 0.6008, + "step": 10436 + }, + { + "epoch": 0.3198786318499448, + "grad_norm": 1.3066518721502116, + "learning_rate": 1.590728572468467e-05, + "loss": 0.696, + "step": 10437 + }, + { + "epoch": 0.319909280372686, + "grad_norm": 1.4446293152659395, + "learning_rate": 1.5906484767291948e-05, + "loss": 0.8299, + "step": 10438 + }, + { + "epoch": 0.3199399288954272, + "grad_norm": 1.4253857897628917, + "learning_rate": 1.5905683751701123e-05, + "loss": 0.6957, + "step": 10439 + }, + { + "epoch": 0.3199705774181684, + "grad_norm": 1.586010881570682, + "learning_rate": 1.590488267792008e-05, + "loss": 0.6411, + "step": 10440 + }, + { + "epoch": 0.32000122594090963, + "grad_norm": 1.3487428142759326, + "learning_rate": 1.590408154595672e-05, + "loss": 0.7283, + "step": 10441 + }, + { + "epoch": 0.32003187446365083, + "grad_norm": 1.3228034406990377, + "learning_rate": 1.5903280355818933e-05, + "loss": 0.7486, + "step": 10442 + }, + { + "epoch": 0.32006252298639204, + "grad_norm": 1.6958295564579475, + "learning_rate": 1.5902479107514615e-05, + "loss": 0.9206, + "step": 10443 + }, + { + "epoch": 0.32009317150913325, + "grad_norm": 1.5565250775794728, + "learning_rate": 1.590167780105166e-05, + "loss": 0.758, + "step": 10444 + }, + { + "epoch": 0.32012382003187445, + "grad_norm": 1.4872098513907133, + "learning_rate": 1.590087643643796e-05, + "loss": 0.7026, + "step": 10445 + }, + { + "epoch": 0.32015446855461566, + "grad_norm": 1.257881132807298, + "learning_rate": 1.590007501368142e-05, + "loss": 0.7646, + "step": 10446 + }, + { + "epoch": 0.32018511707735686, + "grad_norm": 1.5075486886294753, + "learning_rate": 1.5899273532789932e-05, + "loss": 0.7207, + "step": 10447 + }, + { + "epoch": 0.32021576560009807, + "grad_norm": 1.3757840206456107, + "learning_rate": 1.5898471993771388e-05, + "loss": 0.6726, + "step": 10448 + }, + { + "epoch": 0.3202464141228393, + "grad_norm": 1.505817264885719, + "learning_rate": 1.589767039663369e-05, + "loss": 0.8736, + "step": 10449 + }, + { + "epoch": 0.3202770626455805, + "grad_norm": 1.4262963811204343, + "learning_rate": 1.589686874138474e-05, + "loss": 0.6678, + "step": 10450 + }, + { + "epoch": 0.3203077111683217, + "grad_norm": 1.4385806936261634, + "learning_rate": 1.589606702803243e-05, + "loss": 0.6658, + "step": 10451 + }, + { + "epoch": 0.3203383596910629, + "grad_norm": 0.7621425719892851, + "learning_rate": 1.5895265256584668e-05, + "loss": 0.6102, + "step": 10452 + }, + { + "epoch": 0.3203690082138041, + "grad_norm": 1.4769220378664183, + "learning_rate": 1.5894463427049344e-05, + "loss": 0.7559, + "step": 10453 + }, + { + "epoch": 0.3203996567365453, + "grad_norm": 0.6966166000014904, + "learning_rate": 1.589366153943437e-05, + "loss": 0.6138, + "step": 10454 + }, + { + "epoch": 0.3204303052592865, + "grad_norm": 1.6268054260469056, + "learning_rate": 1.5892859593747632e-05, + "loss": 0.7569, + "step": 10455 + }, + { + "epoch": 0.3204609537820277, + "grad_norm": 1.5698005881848323, + "learning_rate": 1.5892057589997048e-05, + "loss": 0.8414, + "step": 10456 + }, + { + "epoch": 0.3204916023047689, + "grad_norm": 0.6726774702634841, + "learning_rate": 1.5891255528190506e-05, + "loss": 0.6118, + "step": 10457 + }, + { + "epoch": 0.3205222508275101, + "grad_norm": 1.4665858809415764, + "learning_rate": 1.5890453408335927e-05, + "loss": 0.6779, + "step": 10458 + }, + { + "epoch": 0.32055289935025133, + "grad_norm": 1.3736346952516738, + "learning_rate": 1.5889651230441196e-05, + "loss": 0.7876, + "step": 10459 + }, + { + "epoch": 0.32058354787299254, + "grad_norm": 1.39456844765666, + "learning_rate": 1.5888848994514222e-05, + "loss": 0.8737, + "step": 10460 + }, + { + "epoch": 0.32061419639573374, + "grad_norm": 1.393727521685718, + "learning_rate": 1.5888046700562916e-05, + "loss": 0.7707, + "step": 10461 + }, + { + "epoch": 0.32064484491847495, + "grad_norm": 1.3586988427870568, + "learning_rate": 1.588724434859518e-05, + "loss": 0.749, + "step": 10462 + }, + { + "epoch": 0.32067549344121615, + "grad_norm": 0.7680689182999174, + "learning_rate": 1.5886441938618916e-05, + "loss": 0.5913, + "step": 10463 + }, + { + "epoch": 0.32070614196395736, + "grad_norm": 0.7120667266464601, + "learning_rate": 1.588563947064204e-05, + "loss": 0.6515, + "step": 10464 + }, + { + "epoch": 0.32073679048669856, + "grad_norm": 1.5388650778757873, + "learning_rate": 1.5884836944672443e-05, + "loss": 0.8208, + "step": 10465 + }, + { + "epoch": 0.32076743900943977, + "grad_norm": 0.6566632314550029, + "learning_rate": 1.588403436071805e-05, + "loss": 0.6291, + "step": 10466 + }, + { + "epoch": 0.320798087532181, + "grad_norm": 1.5606797314702032, + "learning_rate": 1.5883231718786757e-05, + "loss": 0.7534, + "step": 10467 + }, + { + "epoch": 0.3208287360549221, + "grad_norm": 1.4972914365647307, + "learning_rate": 1.5882429018886475e-05, + "loss": 0.7703, + "step": 10468 + }, + { + "epoch": 0.32085938457766333, + "grad_norm": 1.39728616867698, + "learning_rate": 1.5881626261025117e-05, + "loss": 0.6967, + "step": 10469 + }, + { + "epoch": 0.32089003310040454, + "grad_norm": 1.3119265342163429, + "learning_rate": 1.5880823445210592e-05, + "loss": 0.7518, + "step": 10470 + }, + { + "epoch": 0.32092068162314574, + "grad_norm": 0.7192320504130533, + "learning_rate": 1.5880020571450807e-05, + "loss": 0.6205, + "step": 10471 + }, + { + "epoch": 0.32095133014588695, + "grad_norm": 1.528064407402406, + "learning_rate": 1.5879217639753673e-05, + "loss": 0.8425, + "step": 10472 + }, + { + "epoch": 0.32098197866862815, + "grad_norm": 1.3363881820948786, + "learning_rate": 1.5878414650127106e-05, + "loss": 0.7246, + "step": 10473 + }, + { + "epoch": 0.32101262719136936, + "grad_norm": 1.5125620964528042, + "learning_rate": 1.5877611602579017e-05, + "loss": 0.8585, + "step": 10474 + }, + { + "epoch": 0.32104327571411057, + "grad_norm": 0.6793995603117495, + "learning_rate": 1.5876808497117317e-05, + "loss": 0.6187, + "step": 10475 + }, + { + "epoch": 0.32107392423685177, + "grad_norm": 0.6924136435371715, + "learning_rate": 1.5876005333749916e-05, + "loss": 0.6226, + "step": 10476 + }, + { + "epoch": 0.321104572759593, + "grad_norm": 1.345992887077246, + "learning_rate": 1.587520211248473e-05, + "loss": 0.769, + "step": 10477 + }, + { + "epoch": 0.3211352212823342, + "grad_norm": 1.4023645235028557, + "learning_rate": 1.5874398833329678e-05, + "loss": 0.7335, + "step": 10478 + }, + { + "epoch": 0.3211658698050754, + "grad_norm": 1.3644161513275868, + "learning_rate": 1.587359549629267e-05, + "loss": 0.7463, + "step": 10479 + }, + { + "epoch": 0.3211965183278166, + "grad_norm": 1.4568347743371686, + "learning_rate": 1.5872792101381624e-05, + "loss": 0.7497, + "step": 10480 + }, + { + "epoch": 0.3212271668505578, + "grad_norm": 0.6992492468156097, + "learning_rate": 1.587198864860445e-05, + "loss": 0.6195, + "step": 10481 + }, + { + "epoch": 0.321257815373299, + "grad_norm": 1.4032299070575676, + "learning_rate": 1.5871185137969074e-05, + "loss": 0.7962, + "step": 10482 + }, + { + "epoch": 0.3212884638960402, + "grad_norm": 1.4551779447960478, + "learning_rate": 1.587038156948341e-05, + "loss": 0.7912, + "step": 10483 + }, + { + "epoch": 0.3213191124187814, + "grad_norm": 1.5185548598185086, + "learning_rate": 1.586957794315537e-05, + "loss": 0.773, + "step": 10484 + }, + { + "epoch": 0.3213497609415226, + "grad_norm": 0.7074498985038846, + "learning_rate": 1.586877425899288e-05, + "loss": 0.6208, + "step": 10485 + }, + { + "epoch": 0.3213804094642638, + "grad_norm": 0.6881850483613344, + "learning_rate": 1.586797051700385e-05, + "loss": 0.6032, + "step": 10486 + }, + { + "epoch": 0.32141105798700503, + "grad_norm": 1.581918921224656, + "learning_rate": 1.5867166717196213e-05, + "loss": 0.7791, + "step": 10487 + }, + { + "epoch": 0.32144170650974624, + "grad_norm": 1.349634889862498, + "learning_rate": 1.5866362859577875e-05, + "loss": 0.7631, + "step": 10488 + }, + { + "epoch": 0.32147235503248744, + "grad_norm": 1.4702266051645532, + "learning_rate": 1.586555894415677e-05, + "loss": 0.7356, + "step": 10489 + }, + { + "epoch": 0.32150300355522865, + "grad_norm": 1.422705746367164, + "learning_rate": 1.5864754970940805e-05, + "loss": 0.7662, + "step": 10490 + }, + { + "epoch": 0.32153365207796986, + "grad_norm": 0.7119484681863604, + "learning_rate": 1.5863950939937912e-05, + "loss": 0.5967, + "step": 10491 + }, + { + "epoch": 0.32156430060071106, + "grad_norm": 0.7314503308615985, + "learning_rate": 1.5863146851156005e-05, + "loss": 0.5967, + "step": 10492 + }, + { + "epoch": 0.32159494912345227, + "grad_norm": 1.560251025954307, + "learning_rate": 1.586234270460302e-05, + "loss": 0.8499, + "step": 10493 + }, + { + "epoch": 0.3216255976461935, + "grad_norm": 1.5004202024189393, + "learning_rate": 1.5861538500286865e-05, + "loss": 0.7514, + "step": 10494 + }, + { + "epoch": 0.3216562461689347, + "grad_norm": 1.270414528599963, + "learning_rate": 1.5860734238215475e-05, + "loss": 0.7147, + "step": 10495 + }, + { + "epoch": 0.3216868946916759, + "grad_norm": 1.4135237752311023, + "learning_rate": 1.5859929918396774e-05, + "loss": 0.8649, + "step": 10496 + }, + { + "epoch": 0.3217175432144171, + "grad_norm": 1.470576332302939, + "learning_rate": 1.585912554083868e-05, + "loss": 0.7403, + "step": 10497 + }, + { + "epoch": 0.3217481917371583, + "grad_norm": 1.3176665016317073, + "learning_rate": 1.5858321105549122e-05, + "loss": 0.7097, + "step": 10498 + }, + { + "epoch": 0.32177884025989945, + "grad_norm": 1.5936214003086986, + "learning_rate": 1.585751661253603e-05, + "loss": 0.8164, + "step": 10499 + }, + { + "epoch": 0.32180948878264065, + "grad_norm": 1.4571783975763999, + "learning_rate": 1.5856712061807326e-05, + "loss": 0.7784, + "step": 10500 + }, + { + "epoch": 0.32184013730538186, + "grad_norm": 1.1999573151966223, + "learning_rate": 1.5855907453370944e-05, + "loss": 0.7671, + "step": 10501 + }, + { + "epoch": 0.32187078582812306, + "grad_norm": 0.7339194121694796, + "learning_rate": 1.5855102787234802e-05, + "loss": 0.6299, + "step": 10502 + }, + { + "epoch": 0.32190143435086427, + "grad_norm": 1.460459362655845, + "learning_rate": 1.5854298063406836e-05, + "loss": 0.7897, + "step": 10503 + }, + { + "epoch": 0.3219320828736055, + "grad_norm": 1.4411248098677352, + "learning_rate": 1.5853493281894975e-05, + "loss": 0.7376, + "step": 10504 + }, + { + "epoch": 0.3219627313963467, + "grad_norm": 1.6165441026255543, + "learning_rate": 1.5852688442707146e-05, + "loss": 0.7754, + "step": 10505 + }, + { + "epoch": 0.3219933799190879, + "grad_norm": 1.3809154981669425, + "learning_rate": 1.5851883545851277e-05, + "loss": 0.7954, + "step": 10506 + }, + { + "epoch": 0.3220240284418291, + "grad_norm": 1.5594666569281002, + "learning_rate": 1.5851078591335308e-05, + "loss": 0.8479, + "step": 10507 + }, + { + "epoch": 0.3220546769645703, + "grad_norm": 1.432511528131206, + "learning_rate": 1.5850273579167162e-05, + "loss": 0.7534, + "step": 10508 + }, + { + "epoch": 0.3220853254873115, + "grad_norm": 1.439888651234559, + "learning_rate": 1.5849468509354773e-05, + "loss": 0.8121, + "step": 10509 + }, + { + "epoch": 0.3221159740100527, + "grad_norm": 1.3014742843115692, + "learning_rate": 1.5848663381906077e-05, + "loss": 0.7553, + "step": 10510 + }, + { + "epoch": 0.3221466225327939, + "grad_norm": 1.394020353907746, + "learning_rate": 1.5847858196829e-05, + "loss": 0.6962, + "step": 10511 + }, + { + "epoch": 0.3221772710555351, + "grad_norm": 1.620463258518597, + "learning_rate": 1.584705295413148e-05, + "loss": 0.7616, + "step": 10512 + }, + { + "epoch": 0.3222079195782763, + "grad_norm": 0.6493030255522662, + "learning_rate": 1.584624765382145e-05, + "loss": 0.5473, + "step": 10513 + }, + { + "epoch": 0.32223856810101753, + "grad_norm": 1.5191144561047674, + "learning_rate": 1.584544229590685e-05, + "loss": 0.8026, + "step": 10514 + }, + { + "epoch": 0.32226921662375874, + "grad_norm": 1.4029633246202629, + "learning_rate": 1.584463688039561e-05, + "loss": 0.7473, + "step": 10515 + }, + { + "epoch": 0.32229986514649994, + "grad_norm": 0.6796509629358433, + "learning_rate": 1.584383140729567e-05, + "loss": 0.6108, + "step": 10516 + }, + { + "epoch": 0.32233051366924115, + "grad_norm": 1.419875033068557, + "learning_rate": 1.5843025876614962e-05, + "loss": 0.7865, + "step": 10517 + }, + { + "epoch": 0.32236116219198235, + "grad_norm": 1.2184107953030805, + "learning_rate": 1.5842220288361423e-05, + "loss": 0.7556, + "step": 10518 + }, + { + "epoch": 0.32239181071472356, + "grad_norm": 1.3990945600404612, + "learning_rate": 1.5841414642542994e-05, + "loss": 0.7692, + "step": 10519 + }, + { + "epoch": 0.32242245923746476, + "grad_norm": 0.6595110611042284, + "learning_rate": 1.5840608939167615e-05, + "loss": 0.6037, + "step": 10520 + }, + { + "epoch": 0.32245310776020597, + "grad_norm": 1.386203675466018, + "learning_rate": 1.583980317824322e-05, + "loss": 0.7147, + "step": 10521 + }, + { + "epoch": 0.3224837562829472, + "grad_norm": 0.672378082446588, + "learning_rate": 1.5838997359777746e-05, + "loss": 0.6275, + "step": 10522 + }, + { + "epoch": 0.3225144048056884, + "grad_norm": 1.5278494695888842, + "learning_rate": 1.5838191483779143e-05, + "loss": 0.7982, + "step": 10523 + }, + { + "epoch": 0.3225450533284296, + "grad_norm": 1.235056627147661, + "learning_rate": 1.583738555025534e-05, + "loss": 0.7554, + "step": 10524 + }, + { + "epoch": 0.3225757018511708, + "grad_norm": 1.5516293770245775, + "learning_rate": 1.583657955921429e-05, + "loss": 0.7882, + "step": 10525 + }, + { + "epoch": 0.322606350373912, + "grad_norm": 1.7064222061977685, + "learning_rate": 1.583577351066392e-05, + "loss": 0.7866, + "step": 10526 + }, + { + "epoch": 0.3226369988966532, + "grad_norm": 1.4364893805733912, + "learning_rate": 1.583496740461219e-05, + "loss": 0.7507, + "step": 10527 + }, + { + "epoch": 0.3226676474193944, + "grad_norm": 1.534889308223452, + "learning_rate": 1.5834161241067025e-05, + "loss": 0.791, + "step": 10528 + }, + { + "epoch": 0.3226982959421356, + "grad_norm": 1.3886545505660577, + "learning_rate": 1.583335502003638e-05, + "loss": 0.6203, + "step": 10529 + }, + { + "epoch": 0.32272894446487677, + "grad_norm": 1.51772617865848, + "learning_rate": 1.5832548741528196e-05, + "loss": 0.8314, + "step": 10530 + }, + { + "epoch": 0.32275959298761797, + "grad_norm": 1.4028786768087125, + "learning_rate": 1.5831742405550418e-05, + "loss": 0.791, + "step": 10531 + }, + { + "epoch": 0.3227902415103592, + "grad_norm": 0.6862245957705984, + "learning_rate": 1.5830936012110985e-05, + "loss": 0.5894, + "step": 10532 + }, + { + "epoch": 0.3228208900331004, + "grad_norm": 1.3879308470206697, + "learning_rate": 1.5830129561217853e-05, + "loss": 0.8109, + "step": 10533 + }, + { + "epoch": 0.3228515385558416, + "grad_norm": 1.7011155634108048, + "learning_rate": 1.582932305287896e-05, + "loss": 0.7642, + "step": 10534 + }, + { + "epoch": 0.3228821870785828, + "grad_norm": 1.5880250375971172, + "learning_rate": 1.5828516487102258e-05, + "loss": 0.8195, + "step": 10535 + }, + { + "epoch": 0.322912835601324, + "grad_norm": 1.3586061907340696, + "learning_rate": 1.5827709863895688e-05, + "loss": 0.765, + "step": 10536 + }, + { + "epoch": 0.3229434841240652, + "grad_norm": 1.473573310180178, + "learning_rate": 1.5826903183267204e-05, + "loss": 0.7642, + "step": 10537 + }, + { + "epoch": 0.3229741326468064, + "grad_norm": 1.611954185487726, + "learning_rate": 1.5826096445224752e-05, + "loss": 0.7679, + "step": 10538 + }, + { + "epoch": 0.3230047811695476, + "grad_norm": 1.378490770205421, + "learning_rate": 1.582528964977628e-05, + "loss": 0.7477, + "step": 10539 + }, + { + "epoch": 0.3230354296922888, + "grad_norm": 1.3969329500662444, + "learning_rate": 1.582448279692974e-05, + "loss": 0.814, + "step": 10540 + }, + { + "epoch": 0.32306607821503003, + "grad_norm": 1.42058920675107, + "learning_rate": 1.5823675886693077e-05, + "loss": 0.7315, + "step": 10541 + }, + { + "epoch": 0.32309672673777123, + "grad_norm": 1.5131174231632067, + "learning_rate": 1.5822868919074248e-05, + "loss": 0.6828, + "step": 10542 + }, + { + "epoch": 0.32312737526051244, + "grad_norm": 1.36710305019308, + "learning_rate": 1.5822061894081205e-05, + "loss": 0.7456, + "step": 10543 + }, + { + "epoch": 0.32315802378325365, + "grad_norm": 1.648198338048825, + "learning_rate": 1.5821254811721893e-05, + "loss": 0.6705, + "step": 10544 + }, + { + "epoch": 0.32318867230599485, + "grad_norm": 1.408625506161473, + "learning_rate": 1.5820447672004265e-05, + "loss": 0.711, + "step": 10545 + }, + { + "epoch": 0.32321932082873606, + "grad_norm": 1.545052467446304, + "learning_rate": 1.5819640474936282e-05, + "loss": 0.7926, + "step": 10546 + }, + { + "epoch": 0.32324996935147726, + "grad_norm": 1.3292888522698536, + "learning_rate": 1.581883322052589e-05, + "loss": 0.7146, + "step": 10547 + }, + { + "epoch": 0.32328061787421847, + "grad_norm": 1.4981776633532542, + "learning_rate": 1.581802590878105e-05, + "loss": 0.7583, + "step": 10548 + }, + { + "epoch": 0.3233112663969597, + "grad_norm": 1.7534276511226337, + "learning_rate": 1.5817218539709703e-05, + "loss": 0.6768, + "step": 10549 + }, + { + "epoch": 0.3233419149197009, + "grad_norm": 1.4349434850096217, + "learning_rate": 1.5816411113319822e-05, + "loss": 0.7532, + "step": 10550 + }, + { + "epoch": 0.3233725634424421, + "grad_norm": 1.549116387043617, + "learning_rate": 1.581560362961935e-05, + "loss": 0.7779, + "step": 10551 + }, + { + "epoch": 0.3234032119651833, + "grad_norm": 1.3727924040830914, + "learning_rate": 1.5814796088616247e-05, + "loss": 0.8428, + "step": 10552 + }, + { + "epoch": 0.3234338604879245, + "grad_norm": 1.4810224536001784, + "learning_rate": 1.581398849031847e-05, + "loss": 0.7226, + "step": 10553 + }, + { + "epoch": 0.3234645090106657, + "grad_norm": 1.6954394635170424, + "learning_rate": 1.581318083473398e-05, + "loss": 0.8699, + "step": 10554 + }, + { + "epoch": 0.3234951575334069, + "grad_norm": 1.465103655150622, + "learning_rate": 1.5812373121870732e-05, + "loss": 0.7429, + "step": 10555 + }, + { + "epoch": 0.3235258060561481, + "grad_norm": 1.448982956216932, + "learning_rate": 1.5811565351736683e-05, + "loss": 0.7137, + "step": 10556 + }, + { + "epoch": 0.3235564545788893, + "grad_norm": 1.4328262413832797, + "learning_rate": 1.581075752433979e-05, + "loss": 0.8268, + "step": 10557 + }, + { + "epoch": 0.3235871031016305, + "grad_norm": 1.4776575515665584, + "learning_rate": 1.5809949639688023e-05, + "loss": 0.7352, + "step": 10558 + }, + { + "epoch": 0.32361775162437173, + "grad_norm": 1.3482635391685909, + "learning_rate": 1.5809141697789333e-05, + "loss": 0.7022, + "step": 10559 + }, + { + "epoch": 0.32364840014711294, + "grad_norm": 1.3300823841909364, + "learning_rate": 1.580833369865168e-05, + "loss": 0.8042, + "step": 10560 + }, + { + "epoch": 0.3236790486698541, + "grad_norm": 0.715987765282989, + "learning_rate": 1.5807525642283033e-05, + "loss": 0.6102, + "step": 10561 + }, + { + "epoch": 0.3237096971925953, + "grad_norm": 1.406544547311913, + "learning_rate": 1.5806717528691347e-05, + "loss": 0.714, + "step": 10562 + }, + { + "epoch": 0.3237403457153365, + "grad_norm": 1.376630444342493, + "learning_rate": 1.5805909357884592e-05, + "loss": 0.6752, + "step": 10563 + }, + { + "epoch": 0.3237709942380777, + "grad_norm": 1.464315131673901, + "learning_rate": 1.5805101129870725e-05, + "loss": 0.7167, + "step": 10564 + }, + { + "epoch": 0.3238016427608189, + "grad_norm": 1.4675550212612543, + "learning_rate": 1.5804292844657706e-05, + "loss": 0.6931, + "step": 10565 + }, + { + "epoch": 0.3238322912835601, + "grad_norm": 1.6112241285555406, + "learning_rate": 1.580348450225351e-05, + "loss": 0.7952, + "step": 10566 + }, + { + "epoch": 0.3238629398063013, + "grad_norm": 1.5078176337179283, + "learning_rate": 1.5802676102666093e-05, + "loss": 0.7388, + "step": 10567 + }, + { + "epoch": 0.3238935883290425, + "grad_norm": 1.4810169139400886, + "learning_rate": 1.5801867645903427e-05, + "loss": 0.8029, + "step": 10568 + }, + { + "epoch": 0.32392423685178373, + "grad_norm": 1.5463540307919603, + "learning_rate": 1.5801059131973474e-05, + "loss": 0.8132, + "step": 10569 + }, + { + "epoch": 0.32395488537452494, + "grad_norm": 1.5275833666129455, + "learning_rate": 1.58002505608842e-05, + "loss": 0.8352, + "step": 10570 + }, + { + "epoch": 0.32398553389726614, + "grad_norm": 1.4462098095813254, + "learning_rate": 1.5799441932643572e-05, + "loss": 0.8866, + "step": 10571 + }, + { + "epoch": 0.32401618242000735, + "grad_norm": 1.4417240899477302, + "learning_rate": 1.579863324725956e-05, + "loss": 0.7363, + "step": 10572 + }, + { + "epoch": 0.32404683094274855, + "grad_norm": 1.4604574808681352, + "learning_rate": 1.5797824504740132e-05, + "loss": 0.8443, + "step": 10573 + }, + { + "epoch": 0.32407747946548976, + "grad_norm": 1.411260868331502, + "learning_rate": 1.5797015705093257e-05, + "loss": 0.8132, + "step": 10574 + }, + { + "epoch": 0.32410812798823097, + "grad_norm": 1.5397871296901606, + "learning_rate": 1.57962068483269e-05, + "loss": 0.7287, + "step": 10575 + }, + { + "epoch": 0.32413877651097217, + "grad_norm": 1.6659888736701485, + "learning_rate": 1.5795397934449034e-05, + "loss": 0.7516, + "step": 10576 + }, + { + "epoch": 0.3241694250337134, + "grad_norm": 1.4127122008750366, + "learning_rate": 1.579458896346763e-05, + "loss": 0.7782, + "step": 10577 + }, + { + "epoch": 0.3242000735564546, + "grad_norm": 1.436698538677153, + "learning_rate": 1.5793779935390658e-05, + "loss": 0.807, + "step": 10578 + }, + { + "epoch": 0.3242307220791958, + "grad_norm": 1.316928729876506, + "learning_rate": 1.579297085022609e-05, + "loss": 0.7006, + "step": 10579 + }, + { + "epoch": 0.324261370601937, + "grad_norm": 1.6070183133442857, + "learning_rate": 1.5792161707981902e-05, + "loss": 0.7847, + "step": 10580 + }, + { + "epoch": 0.3242920191246782, + "grad_norm": 1.7621847198813894, + "learning_rate": 1.5791352508666058e-05, + "loss": 0.7941, + "step": 10581 + }, + { + "epoch": 0.3243226676474194, + "grad_norm": 1.3311475599618234, + "learning_rate": 1.5790543252286536e-05, + "loss": 0.7355, + "step": 10582 + }, + { + "epoch": 0.3243533161701606, + "grad_norm": 1.3742748458851386, + "learning_rate": 1.578973393885131e-05, + "loss": 0.8215, + "step": 10583 + }, + { + "epoch": 0.3243839646929018, + "grad_norm": 1.3225695961179884, + "learning_rate": 1.5788924568368357e-05, + "loss": 0.7647, + "step": 10584 + }, + { + "epoch": 0.324414613215643, + "grad_norm": 1.4108292285701185, + "learning_rate": 1.5788115140845648e-05, + "loss": 0.6831, + "step": 10585 + }, + { + "epoch": 0.3244452617383842, + "grad_norm": 1.5530757516422335, + "learning_rate": 1.5787305656291157e-05, + "loss": 0.827, + "step": 10586 + }, + { + "epoch": 0.32447591026112543, + "grad_norm": 1.55909660736755, + "learning_rate": 1.5786496114712867e-05, + "loss": 0.8238, + "step": 10587 + }, + { + "epoch": 0.32450655878386664, + "grad_norm": 1.4669348954158616, + "learning_rate": 1.5785686516118746e-05, + "loss": 0.8001, + "step": 10588 + }, + { + "epoch": 0.32453720730660784, + "grad_norm": 1.41521283791047, + "learning_rate": 1.5784876860516776e-05, + "loss": 0.8665, + "step": 10589 + }, + { + "epoch": 0.32456785582934905, + "grad_norm": 1.4819038642696707, + "learning_rate": 1.5784067147914934e-05, + "loss": 0.7329, + "step": 10590 + }, + { + "epoch": 0.32459850435209026, + "grad_norm": 1.5005527254593447, + "learning_rate": 1.57832573783212e-05, + "loss": 0.8454, + "step": 10591 + }, + { + "epoch": 0.3246291528748314, + "grad_norm": 1.3898385322267202, + "learning_rate": 1.5782447551743552e-05, + "loss": 0.7791, + "step": 10592 + }, + { + "epoch": 0.3246598013975726, + "grad_norm": 1.2563280341538994, + "learning_rate": 1.578163766818997e-05, + "loss": 0.7064, + "step": 10593 + }, + { + "epoch": 0.3246904499203138, + "grad_norm": 1.4064611423574147, + "learning_rate": 1.5780827727668428e-05, + "loss": 0.8038, + "step": 10594 + }, + { + "epoch": 0.324721098443055, + "grad_norm": 0.7390510846280735, + "learning_rate": 1.5780017730186915e-05, + "loss": 0.6366, + "step": 10595 + }, + { + "epoch": 0.32475174696579623, + "grad_norm": 1.4620542173065236, + "learning_rate": 1.5779207675753404e-05, + "loss": 0.7379, + "step": 10596 + }, + { + "epoch": 0.32478239548853743, + "grad_norm": 1.467387070694083, + "learning_rate": 1.5778397564375887e-05, + "loss": 0.7048, + "step": 10597 + }, + { + "epoch": 0.32481304401127864, + "grad_norm": 1.5930395294207231, + "learning_rate": 1.5777587396062334e-05, + "loss": 0.9307, + "step": 10598 + }, + { + "epoch": 0.32484369253401985, + "grad_norm": 1.6541736496628627, + "learning_rate": 1.577677717082074e-05, + "loss": 0.8174, + "step": 10599 + }, + { + "epoch": 0.32487434105676105, + "grad_norm": 1.6002922533050936, + "learning_rate": 1.577596688865908e-05, + "loss": 0.703, + "step": 10600 + }, + { + "epoch": 0.32490498957950226, + "grad_norm": 0.7287471741841948, + "learning_rate": 1.577515654958534e-05, + "loss": 0.6331, + "step": 10601 + }, + { + "epoch": 0.32493563810224346, + "grad_norm": 1.3705114092329818, + "learning_rate": 1.5774346153607506e-05, + "loss": 0.7128, + "step": 10602 + }, + { + "epoch": 0.32496628662498467, + "grad_norm": 1.5675785181219437, + "learning_rate": 1.5773535700733562e-05, + "loss": 0.7472, + "step": 10603 + }, + { + "epoch": 0.3249969351477259, + "grad_norm": 0.7182625566105182, + "learning_rate": 1.5772725190971493e-05, + "loss": 0.6302, + "step": 10604 + }, + { + "epoch": 0.3250275836704671, + "grad_norm": 1.5835920967373913, + "learning_rate": 1.5771914624329285e-05, + "loss": 0.8632, + "step": 10605 + }, + { + "epoch": 0.3250582321932083, + "grad_norm": 1.3386962191519867, + "learning_rate": 1.5771104000814927e-05, + "loss": 0.6697, + "step": 10606 + }, + { + "epoch": 0.3250888807159495, + "grad_norm": 1.4181134290621442, + "learning_rate": 1.5770293320436404e-05, + "loss": 0.7637, + "step": 10607 + }, + { + "epoch": 0.3251195292386907, + "grad_norm": 1.4558070870351174, + "learning_rate": 1.5769482583201706e-05, + "loss": 0.7919, + "step": 10608 + }, + { + "epoch": 0.3251501777614319, + "grad_norm": 1.6005540327545165, + "learning_rate": 1.5768671789118815e-05, + "loss": 0.8476, + "step": 10609 + }, + { + "epoch": 0.3251808262841731, + "grad_norm": 1.5119464620455931, + "learning_rate": 1.5767860938195728e-05, + "loss": 0.8302, + "step": 10610 + }, + { + "epoch": 0.3252114748069143, + "grad_norm": 1.3728287354038236, + "learning_rate": 1.576705003044043e-05, + "loss": 0.7314, + "step": 10611 + }, + { + "epoch": 0.3252421233296555, + "grad_norm": 1.199400763683114, + "learning_rate": 1.5766239065860916e-05, + "loss": 0.6781, + "step": 10612 + }, + { + "epoch": 0.3252727718523967, + "grad_norm": 1.5417772209421001, + "learning_rate": 1.576542804446517e-05, + "loss": 0.7819, + "step": 10613 + }, + { + "epoch": 0.32530342037513793, + "grad_norm": 1.4704460845497216, + "learning_rate": 1.5764616966261188e-05, + "loss": 0.8088, + "step": 10614 + }, + { + "epoch": 0.32533406889787914, + "grad_norm": 1.464315386601878, + "learning_rate": 1.576380583125696e-05, + "loss": 0.7046, + "step": 10615 + }, + { + "epoch": 0.32536471742062034, + "grad_norm": 0.8072945816523853, + "learning_rate": 1.5762994639460478e-05, + "loss": 0.6338, + "step": 10616 + }, + { + "epoch": 0.32539536594336155, + "grad_norm": 1.4633999115738041, + "learning_rate": 1.5762183390879735e-05, + "loss": 0.7972, + "step": 10617 + }, + { + "epoch": 0.32542601446610275, + "grad_norm": 1.3378464698053705, + "learning_rate": 1.5761372085522726e-05, + "loss": 0.664, + "step": 10618 + }, + { + "epoch": 0.32545666298884396, + "grad_norm": 1.2841343898204922, + "learning_rate": 1.576056072339744e-05, + "loss": 0.6716, + "step": 10619 + }, + { + "epoch": 0.32548731151158516, + "grad_norm": 1.5104018895630136, + "learning_rate": 1.5759749304511877e-05, + "loss": 0.7884, + "step": 10620 + }, + { + "epoch": 0.32551796003432637, + "grad_norm": 1.2981684223547938, + "learning_rate": 1.5758937828874032e-05, + "loss": 0.7364, + "step": 10621 + }, + { + "epoch": 0.3255486085570676, + "grad_norm": 1.4508968266700628, + "learning_rate": 1.5758126296491898e-05, + "loss": 0.7877, + "step": 10622 + }, + { + "epoch": 0.3255792570798087, + "grad_norm": 1.501635870290481, + "learning_rate": 1.575731470737347e-05, + "loss": 0.7664, + "step": 10623 + }, + { + "epoch": 0.32560990560254993, + "grad_norm": 1.3366035171923139, + "learning_rate": 1.5756503061526754e-05, + "loss": 0.6886, + "step": 10624 + }, + { + "epoch": 0.32564055412529114, + "grad_norm": 1.3013004052243897, + "learning_rate": 1.5755691358959737e-05, + "loss": 0.7544, + "step": 10625 + }, + { + "epoch": 0.32567120264803234, + "grad_norm": 1.5233980313367466, + "learning_rate": 1.575487959968042e-05, + "loss": 0.7572, + "step": 10626 + }, + { + "epoch": 0.32570185117077355, + "grad_norm": 1.5112609772349888, + "learning_rate": 1.57540677836968e-05, + "loss": 0.7573, + "step": 10627 + }, + { + "epoch": 0.32573249969351475, + "grad_norm": 1.677247985164186, + "learning_rate": 1.575325591101688e-05, + "loss": 0.7266, + "step": 10628 + }, + { + "epoch": 0.32576314821625596, + "grad_norm": 1.3768378053126575, + "learning_rate": 1.5752443981648657e-05, + "loss": 0.7411, + "step": 10629 + }, + { + "epoch": 0.32579379673899717, + "grad_norm": 0.7143151200033545, + "learning_rate": 1.575163199560013e-05, + "loss": 0.6285, + "step": 10630 + }, + { + "epoch": 0.32582444526173837, + "grad_norm": 1.6425082861841678, + "learning_rate": 1.5750819952879303e-05, + "loss": 0.7838, + "step": 10631 + }, + { + "epoch": 0.3258550937844796, + "grad_norm": 1.3933382713484053, + "learning_rate": 1.5750007853494175e-05, + "loss": 0.777, + "step": 10632 + }, + { + "epoch": 0.3258857423072208, + "grad_norm": 1.3231506267106798, + "learning_rate": 1.574919569745275e-05, + "loss": 0.7338, + "step": 10633 + }, + { + "epoch": 0.325916390829962, + "grad_norm": 1.3726596771623607, + "learning_rate": 1.5748383484763027e-05, + "loss": 0.8014, + "step": 10634 + }, + { + "epoch": 0.3259470393527032, + "grad_norm": 0.6974476530953104, + "learning_rate": 1.5747571215433013e-05, + "loss": 0.6122, + "step": 10635 + }, + { + "epoch": 0.3259776878754444, + "grad_norm": 1.5863774626460032, + "learning_rate": 1.574675888947071e-05, + "loss": 0.8354, + "step": 10636 + }, + { + "epoch": 0.3260083363981856, + "grad_norm": 1.5105885109189348, + "learning_rate": 1.5745946506884116e-05, + "loss": 0.8641, + "step": 10637 + }, + { + "epoch": 0.3260389849209268, + "grad_norm": 0.7073749191089478, + "learning_rate": 1.5745134067681242e-05, + "loss": 0.6309, + "step": 10638 + }, + { + "epoch": 0.326069633443668, + "grad_norm": 1.5026678125507882, + "learning_rate": 1.5744321571870095e-05, + "loss": 0.769, + "step": 10639 + }, + { + "epoch": 0.3261002819664092, + "grad_norm": 1.5359541051089969, + "learning_rate": 1.574350901945868e-05, + "loss": 0.7641, + "step": 10640 + }, + { + "epoch": 0.32613093048915043, + "grad_norm": 1.533508466906169, + "learning_rate": 1.5742696410454995e-05, + "loss": 0.7921, + "step": 10641 + }, + { + "epoch": 0.32616157901189163, + "grad_norm": 1.7106121269964945, + "learning_rate": 1.5741883744867055e-05, + "loss": 0.7045, + "step": 10642 + }, + { + "epoch": 0.32619222753463284, + "grad_norm": 1.4140178989765626, + "learning_rate": 1.5741071022702866e-05, + "loss": 0.7543, + "step": 10643 + }, + { + "epoch": 0.32622287605737404, + "grad_norm": 1.3604017746496602, + "learning_rate": 1.5740258243970436e-05, + "loss": 0.6745, + "step": 10644 + }, + { + "epoch": 0.32625352458011525, + "grad_norm": 1.617672949326704, + "learning_rate": 1.5739445408677775e-05, + "loss": 0.7762, + "step": 10645 + }, + { + "epoch": 0.32628417310285646, + "grad_norm": 1.4122286176804375, + "learning_rate": 1.5738632516832883e-05, + "loss": 0.7074, + "step": 10646 + }, + { + "epoch": 0.32631482162559766, + "grad_norm": 1.5562035371399157, + "learning_rate": 1.5737819568443783e-05, + "loss": 0.8525, + "step": 10647 + }, + { + "epoch": 0.32634547014833887, + "grad_norm": 1.473486024700357, + "learning_rate": 1.5737006563518475e-05, + "loss": 0.7116, + "step": 10648 + }, + { + "epoch": 0.3263761186710801, + "grad_norm": 1.3851954014527694, + "learning_rate": 1.5736193502064977e-05, + "loss": 0.8327, + "step": 10649 + }, + { + "epoch": 0.3264067671938213, + "grad_norm": 1.4623917508046718, + "learning_rate": 1.573538038409129e-05, + "loss": 0.8438, + "step": 10650 + }, + { + "epoch": 0.3264374157165625, + "grad_norm": 1.7849173536034884, + "learning_rate": 1.573456720960544e-05, + "loss": 0.8381, + "step": 10651 + }, + { + "epoch": 0.3264680642393037, + "grad_norm": 1.5549309785407108, + "learning_rate": 1.573375397861543e-05, + "loss": 0.743, + "step": 10652 + }, + { + "epoch": 0.3264987127620449, + "grad_norm": 1.3701869328466436, + "learning_rate": 1.5732940691129272e-05, + "loss": 0.7429, + "step": 10653 + }, + { + "epoch": 0.32652936128478605, + "grad_norm": 0.7697680653849699, + "learning_rate": 1.5732127347154985e-05, + "loss": 0.6239, + "step": 10654 + }, + { + "epoch": 0.32656000980752725, + "grad_norm": 1.5614946135325447, + "learning_rate": 1.5731313946700582e-05, + "loss": 0.7735, + "step": 10655 + }, + { + "epoch": 0.32659065833026846, + "grad_norm": 1.4150350942982657, + "learning_rate": 1.5730500489774075e-05, + "loss": 0.7953, + "step": 10656 + }, + { + "epoch": 0.32662130685300966, + "grad_norm": 1.5451219480869844, + "learning_rate": 1.572968697638348e-05, + "loss": 0.8391, + "step": 10657 + }, + { + "epoch": 0.32665195537575087, + "grad_norm": 1.4574196725347703, + "learning_rate": 1.5728873406536815e-05, + "loss": 0.6318, + "step": 10658 + }, + { + "epoch": 0.3266826038984921, + "grad_norm": 1.8669431100581813, + "learning_rate": 1.572805978024209e-05, + "loss": 0.7697, + "step": 10659 + }, + { + "epoch": 0.3267132524212333, + "grad_norm": 1.536161799546409, + "learning_rate": 1.572724609750733e-05, + "loss": 0.7254, + "step": 10660 + }, + { + "epoch": 0.3267439009439745, + "grad_norm": 1.3858479922741933, + "learning_rate": 1.5726432358340548e-05, + "loss": 0.8482, + "step": 10661 + }, + { + "epoch": 0.3267745494667157, + "grad_norm": 1.5042969052790518, + "learning_rate": 1.5725618562749764e-05, + "loss": 0.7794, + "step": 10662 + }, + { + "epoch": 0.3268051979894569, + "grad_norm": 1.4840168308109194, + "learning_rate": 1.572480471074299e-05, + "loss": 0.8168, + "step": 10663 + }, + { + "epoch": 0.3268358465121981, + "grad_norm": 1.5262653246642024, + "learning_rate": 1.5723990802328256e-05, + "loss": 0.8213, + "step": 10664 + }, + { + "epoch": 0.3268664950349393, + "grad_norm": 1.3410057209125332, + "learning_rate": 1.5723176837513574e-05, + "loss": 0.6931, + "step": 10665 + }, + { + "epoch": 0.3268971435576805, + "grad_norm": 1.3415559901388248, + "learning_rate": 1.572236281630697e-05, + "loss": 0.6861, + "step": 10666 + }, + { + "epoch": 0.3269277920804217, + "grad_norm": 0.7127894665026545, + "learning_rate": 1.5721548738716457e-05, + "loss": 0.5993, + "step": 10667 + }, + { + "epoch": 0.3269584406031629, + "grad_norm": 1.3757222353197718, + "learning_rate": 1.572073460475006e-05, + "loss": 0.6345, + "step": 10668 + }, + { + "epoch": 0.32698908912590413, + "grad_norm": 1.5198840453170257, + "learning_rate": 1.5719920414415802e-05, + "loss": 0.8251, + "step": 10669 + }, + { + "epoch": 0.32701973764864534, + "grad_norm": 1.4766791229858751, + "learning_rate": 1.571910616772171e-05, + "loss": 0.7436, + "step": 10670 + }, + { + "epoch": 0.32705038617138654, + "grad_norm": 0.6840663506599267, + "learning_rate": 1.5718291864675793e-05, + "loss": 0.6263, + "step": 10671 + }, + { + "epoch": 0.32708103469412775, + "grad_norm": 0.6978444112318192, + "learning_rate": 1.5717477505286087e-05, + "loss": 0.6118, + "step": 10672 + }, + { + "epoch": 0.32711168321686895, + "grad_norm": 1.418839036534784, + "learning_rate": 1.5716663089560612e-05, + "loss": 0.7614, + "step": 10673 + }, + { + "epoch": 0.32714233173961016, + "grad_norm": 1.617939295157914, + "learning_rate": 1.5715848617507396e-05, + "loss": 0.7076, + "step": 10674 + }, + { + "epoch": 0.32717298026235136, + "grad_norm": 1.582960228273797, + "learning_rate": 1.5715034089134457e-05, + "loss": 0.8834, + "step": 10675 + }, + { + "epoch": 0.32720362878509257, + "grad_norm": 1.3536507602418706, + "learning_rate": 1.5714219504449823e-05, + "loss": 0.6419, + "step": 10676 + }, + { + "epoch": 0.3272342773078338, + "grad_norm": 0.6956745066413105, + "learning_rate": 1.5713404863461526e-05, + "loss": 0.621, + "step": 10677 + }, + { + "epoch": 0.327264925830575, + "grad_norm": 0.7149209505785764, + "learning_rate": 1.5712590166177587e-05, + "loss": 0.6486, + "step": 10678 + }, + { + "epoch": 0.3272955743533162, + "grad_norm": 1.4712560196464155, + "learning_rate": 1.571177541260604e-05, + "loss": 0.7242, + "step": 10679 + }, + { + "epoch": 0.3273262228760574, + "grad_norm": 0.664410903843245, + "learning_rate": 1.5710960602754903e-05, + "loss": 0.6185, + "step": 10680 + }, + { + "epoch": 0.3273568713987986, + "grad_norm": 1.5099930319737038, + "learning_rate": 1.5710145736632215e-05, + "loss": 0.7676, + "step": 10681 + }, + { + "epoch": 0.3273875199215398, + "grad_norm": 1.4806116518853365, + "learning_rate": 1.5709330814245997e-05, + "loss": 0.8168, + "step": 10682 + }, + { + "epoch": 0.327418168444281, + "grad_norm": 1.5897096433896913, + "learning_rate": 1.5708515835604282e-05, + "loss": 0.8889, + "step": 10683 + }, + { + "epoch": 0.3274488169670222, + "grad_norm": 0.7018706145586305, + "learning_rate": 1.57077008007151e-05, + "loss": 0.6428, + "step": 10684 + }, + { + "epoch": 0.32747946548976337, + "grad_norm": 1.4380577563849495, + "learning_rate": 1.5706885709586482e-05, + "loss": 0.8434, + "step": 10685 + }, + { + "epoch": 0.32751011401250457, + "grad_norm": 1.476263084197407, + "learning_rate": 1.5706070562226457e-05, + "loss": 0.724, + "step": 10686 + }, + { + "epoch": 0.3275407625352458, + "grad_norm": 1.2846470812915871, + "learning_rate": 1.5705255358643058e-05, + "loss": 0.684, + "step": 10687 + }, + { + "epoch": 0.327571411057987, + "grad_norm": 1.4503238424222584, + "learning_rate": 1.570444009884432e-05, + "loss": 0.7846, + "step": 10688 + }, + { + "epoch": 0.3276020595807282, + "grad_norm": 1.385540084178764, + "learning_rate": 1.5703624782838277e-05, + "loss": 0.6973, + "step": 10689 + }, + { + "epoch": 0.3276327081034694, + "grad_norm": 1.4193366441476043, + "learning_rate": 1.5702809410632956e-05, + "loss": 0.7599, + "step": 10690 + }, + { + "epoch": 0.3276633566262106, + "grad_norm": 0.728195570437989, + "learning_rate": 1.5701993982236398e-05, + "loss": 0.6422, + "step": 10691 + }, + { + "epoch": 0.3276940051489518, + "grad_norm": 1.2823510535803404, + "learning_rate": 1.5701178497656632e-05, + "loss": 0.6505, + "step": 10692 + }, + { + "epoch": 0.327724653671693, + "grad_norm": 1.4195647377020295, + "learning_rate": 1.5700362956901695e-05, + "loss": 0.8099, + "step": 10693 + }, + { + "epoch": 0.3277553021944342, + "grad_norm": 1.4576131635158536, + "learning_rate": 1.5699547359979627e-05, + "loss": 0.7759, + "step": 10694 + }, + { + "epoch": 0.3277859507171754, + "grad_norm": 1.4514099813161838, + "learning_rate": 1.5698731706898455e-05, + "loss": 0.7356, + "step": 10695 + }, + { + "epoch": 0.32781659923991663, + "grad_norm": 1.4723072443655394, + "learning_rate": 1.5697915997666226e-05, + "loss": 0.8059, + "step": 10696 + }, + { + "epoch": 0.32784724776265783, + "grad_norm": 1.3985769950877682, + "learning_rate": 1.5697100232290972e-05, + "loss": 0.7643, + "step": 10697 + }, + { + "epoch": 0.32787789628539904, + "grad_norm": 0.6512360573486536, + "learning_rate": 1.5696284410780727e-05, + "loss": 0.6047, + "step": 10698 + }, + { + "epoch": 0.32790854480814025, + "grad_norm": 1.4206064771873224, + "learning_rate": 1.5695468533143538e-05, + "loss": 0.7294, + "step": 10699 + }, + { + "epoch": 0.32793919333088145, + "grad_norm": 1.404067334540772, + "learning_rate": 1.5694652599387442e-05, + "loss": 0.8042, + "step": 10700 + }, + { + "epoch": 0.32796984185362266, + "grad_norm": 1.4079480229283114, + "learning_rate": 1.5693836609520478e-05, + "loss": 0.7302, + "step": 10701 + }, + { + "epoch": 0.32800049037636386, + "grad_norm": 0.6687151470259065, + "learning_rate": 1.569302056355068e-05, + "loss": 0.5901, + "step": 10702 + }, + { + "epoch": 0.32803113889910507, + "grad_norm": 1.3077456323271421, + "learning_rate": 1.5692204461486097e-05, + "loss": 0.8005, + "step": 10703 + }, + { + "epoch": 0.3280617874218463, + "grad_norm": 1.4683669573119031, + "learning_rate": 1.5691388303334764e-05, + "loss": 0.7294, + "step": 10704 + }, + { + "epoch": 0.3280924359445875, + "grad_norm": 1.4600208671425485, + "learning_rate": 1.569057208910473e-05, + "loss": 0.765, + "step": 10705 + }, + { + "epoch": 0.3281230844673287, + "grad_norm": 1.5031591003551819, + "learning_rate": 1.568975581880403e-05, + "loss": 0.7083, + "step": 10706 + }, + { + "epoch": 0.3281537329900699, + "grad_norm": 0.7043076578145029, + "learning_rate": 1.568893949244071e-05, + "loss": 0.6342, + "step": 10707 + }, + { + "epoch": 0.3281843815128111, + "grad_norm": 1.61097307507689, + "learning_rate": 1.5688123110022816e-05, + "loss": 0.8073, + "step": 10708 + }, + { + "epoch": 0.3282150300355523, + "grad_norm": 1.4366748871283197, + "learning_rate": 1.5687306671558388e-05, + "loss": 0.6846, + "step": 10709 + }, + { + "epoch": 0.3282456785582935, + "grad_norm": 0.663394745083849, + "learning_rate": 1.5686490177055472e-05, + "loss": 0.5932, + "step": 10710 + }, + { + "epoch": 0.3282763270810347, + "grad_norm": 0.6713590847766182, + "learning_rate": 1.5685673626522113e-05, + "loss": 0.6234, + "step": 10711 + }, + { + "epoch": 0.3283069756037759, + "grad_norm": 1.4195009373866085, + "learning_rate": 1.568485701996636e-05, + "loss": 0.7489, + "step": 10712 + }, + { + "epoch": 0.3283376241265171, + "grad_norm": 1.8307402639709345, + "learning_rate": 1.5684040357396252e-05, + "loss": 0.7792, + "step": 10713 + }, + { + "epoch": 0.32836827264925833, + "grad_norm": 1.4246476396507712, + "learning_rate": 1.5683223638819844e-05, + "loss": 0.7635, + "step": 10714 + }, + { + "epoch": 0.32839892117199954, + "grad_norm": 1.6215679408075574, + "learning_rate": 1.5682406864245176e-05, + "loss": 0.7403, + "step": 10715 + }, + { + "epoch": 0.3284295696947407, + "grad_norm": 1.22338229940888, + "learning_rate": 1.5681590033680302e-05, + "loss": 0.7453, + "step": 10716 + }, + { + "epoch": 0.3284602182174819, + "grad_norm": 1.4509365899192772, + "learning_rate": 1.568077314713327e-05, + "loss": 0.764, + "step": 10717 + }, + { + "epoch": 0.3284908667402231, + "grad_norm": 1.321302115388718, + "learning_rate": 1.567995620461212e-05, + "loss": 0.6284, + "step": 10718 + }, + { + "epoch": 0.3285215152629643, + "grad_norm": 1.6765673743158986, + "learning_rate": 1.5679139206124912e-05, + "loss": 0.729, + "step": 10719 + }, + { + "epoch": 0.3285521637857055, + "grad_norm": 0.7114354549061256, + "learning_rate": 1.5678322151679693e-05, + "loss": 0.6504, + "step": 10720 + }, + { + "epoch": 0.3285828123084467, + "grad_norm": 1.6667730450561304, + "learning_rate": 1.5677505041284512e-05, + "loss": 0.8312, + "step": 10721 + }, + { + "epoch": 0.3286134608311879, + "grad_norm": 1.3671587758638046, + "learning_rate": 1.567668787494742e-05, + "loss": 0.6755, + "step": 10722 + }, + { + "epoch": 0.3286441093539291, + "grad_norm": 1.768195920488374, + "learning_rate": 1.5675870652676472e-05, + "loss": 0.8099, + "step": 10723 + }, + { + "epoch": 0.32867475787667033, + "grad_norm": 1.5723665345373987, + "learning_rate": 1.5675053374479717e-05, + "loss": 0.7501, + "step": 10724 + }, + { + "epoch": 0.32870540639941154, + "grad_norm": 1.6219404184555828, + "learning_rate": 1.567423604036521e-05, + "loss": 0.8724, + "step": 10725 + }, + { + "epoch": 0.32873605492215274, + "grad_norm": 1.2724744837349666, + "learning_rate": 1.5673418650341e-05, + "loss": 0.6907, + "step": 10726 + }, + { + "epoch": 0.32876670344489395, + "grad_norm": 0.671446007589963, + "learning_rate": 1.5672601204415148e-05, + "loss": 0.6125, + "step": 10727 + }, + { + "epoch": 0.32879735196763515, + "grad_norm": 1.410128680337839, + "learning_rate": 1.5671783702595705e-05, + "loss": 0.7653, + "step": 10728 + }, + { + "epoch": 0.32882800049037636, + "grad_norm": 1.4876468189296996, + "learning_rate": 1.5670966144890725e-05, + "loss": 0.678, + "step": 10729 + }, + { + "epoch": 0.32885864901311757, + "grad_norm": 0.6853848580732721, + "learning_rate": 1.5670148531308266e-05, + "loss": 0.6312, + "step": 10730 + }, + { + "epoch": 0.32888929753585877, + "grad_norm": 1.6405152264623832, + "learning_rate": 1.566933086185638e-05, + "loss": 0.7275, + "step": 10731 + }, + { + "epoch": 0.3289199460586, + "grad_norm": 1.5165669021870518, + "learning_rate": 1.5668513136543127e-05, + "loss": 0.6761, + "step": 10732 + }, + { + "epoch": 0.3289505945813412, + "grad_norm": 1.6698008651991287, + "learning_rate": 1.5667695355376565e-05, + "loss": 0.885, + "step": 10733 + }, + { + "epoch": 0.3289812431040824, + "grad_norm": 0.6752695474916356, + "learning_rate": 1.566687751836475e-05, + "loss": 0.6123, + "step": 10734 + }, + { + "epoch": 0.3290118916268236, + "grad_norm": 1.3201429530085065, + "learning_rate": 1.5666059625515742e-05, + "loss": 0.604, + "step": 10735 + }, + { + "epoch": 0.3290425401495648, + "grad_norm": 1.4236168213833325, + "learning_rate": 1.5665241676837597e-05, + "loss": 0.8143, + "step": 10736 + }, + { + "epoch": 0.329073188672306, + "grad_norm": 0.6851063845656138, + "learning_rate": 1.5664423672338377e-05, + "loss": 0.5884, + "step": 10737 + }, + { + "epoch": 0.3291038371950472, + "grad_norm": 1.4201366352253992, + "learning_rate": 1.5663605612026144e-05, + "loss": 0.7171, + "step": 10738 + }, + { + "epoch": 0.3291344857177884, + "grad_norm": 1.5205279375431622, + "learning_rate": 1.5662787495908954e-05, + "loss": 0.8306, + "step": 10739 + }, + { + "epoch": 0.3291651342405296, + "grad_norm": 1.4581607113656887, + "learning_rate": 1.5661969323994868e-05, + "loss": 0.7669, + "step": 10740 + }, + { + "epoch": 0.32919578276327083, + "grad_norm": 1.3620370684458927, + "learning_rate": 1.566115109629195e-05, + "loss": 0.7954, + "step": 10741 + }, + { + "epoch": 0.32922643128601203, + "grad_norm": 1.356595510074767, + "learning_rate": 1.566033281280826e-05, + "loss": 0.7535, + "step": 10742 + }, + { + "epoch": 0.32925707980875324, + "grad_norm": 1.4707801778529976, + "learning_rate": 1.5659514473551868e-05, + "loss": 0.8159, + "step": 10743 + }, + { + "epoch": 0.32928772833149444, + "grad_norm": 0.7219035004335534, + "learning_rate": 1.5658696078530825e-05, + "loss": 0.6094, + "step": 10744 + }, + { + "epoch": 0.32931837685423565, + "grad_norm": 1.4403245924756956, + "learning_rate": 1.5657877627753205e-05, + "loss": 0.7212, + "step": 10745 + }, + { + "epoch": 0.32934902537697686, + "grad_norm": 1.2120357089568397, + "learning_rate": 1.565705912122707e-05, + "loss": 0.6978, + "step": 10746 + }, + { + "epoch": 0.329379673899718, + "grad_norm": 1.5997627738576925, + "learning_rate": 1.5656240558960485e-05, + "loss": 0.7634, + "step": 10747 + }, + { + "epoch": 0.3294103224224592, + "grad_norm": 1.5317656313782257, + "learning_rate": 1.5655421940961515e-05, + "loss": 0.7817, + "step": 10748 + }, + { + "epoch": 0.3294409709452004, + "grad_norm": 1.4950073296405004, + "learning_rate": 1.5654603267238223e-05, + "loss": 0.7297, + "step": 10749 + }, + { + "epoch": 0.3294716194679416, + "grad_norm": 1.5950293646204585, + "learning_rate": 1.5653784537798676e-05, + "loss": 0.6865, + "step": 10750 + }, + { + "epoch": 0.32950226799068283, + "grad_norm": 1.5319590406856438, + "learning_rate": 1.5652965752650948e-05, + "loss": 0.7926, + "step": 10751 + }, + { + "epoch": 0.32953291651342403, + "grad_norm": 1.5284518182547078, + "learning_rate": 1.56521469118031e-05, + "loss": 0.7594, + "step": 10752 + }, + { + "epoch": 0.32956356503616524, + "grad_norm": 1.2237436505112043, + "learning_rate": 1.5651328015263202e-05, + "loss": 0.7437, + "step": 10753 + }, + { + "epoch": 0.32959421355890645, + "grad_norm": 1.4496821789006995, + "learning_rate": 1.5650509063039326e-05, + "loss": 0.8243, + "step": 10754 + }, + { + "epoch": 0.32962486208164765, + "grad_norm": 1.4723144545689073, + "learning_rate": 1.5649690055139537e-05, + "loss": 0.7953, + "step": 10755 + }, + { + "epoch": 0.32965551060438886, + "grad_norm": 1.4320616711374532, + "learning_rate": 1.5648870991571906e-05, + "loss": 0.7361, + "step": 10756 + }, + { + "epoch": 0.32968615912713006, + "grad_norm": 1.4737444157434416, + "learning_rate": 1.56480518723445e-05, + "loss": 0.7354, + "step": 10757 + }, + { + "epoch": 0.32971680764987127, + "grad_norm": 1.4818275492542163, + "learning_rate": 1.56472326974654e-05, + "loss": 0.7599, + "step": 10758 + }, + { + "epoch": 0.3297474561726125, + "grad_norm": 1.316255058196465, + "learning_rate": 1.5646413466942666e-05, + "loss": 0.5885, + "step": 10759 + }, + { + "epoch": 0.3297781046953537, + "grad_norm": 1.3586818283738151, + "learning_rate": 1.564559418078438e-05, + "loss": 0.6678, + "step": 10760 + }, + { + "epoch": 0.3298087532180949, + "grad_norm": 0.7750070136551885, + "learning_rate": 1.5644774838998608e-05, + "loss": 0.6122, + "step": 10761 + }, + { + "epoch": 0.3298394017408361, + "grad_norm": 1.3181401575192342, + "learning_rate": 1.5643955441593425e-05, + "loss": 0.6717, + "step": 10762 + }, + { + "epoch": 0.3298700502635773, + "grad_norm": 1.3870454481946266, + "learning_rate": 1.5643135988576905e-05, + "loss": 0.7439, + "step": 10763 + }, + { + "epoch": 0.3299006987863185, + "grad_norm": 1.498946001845344, + "learning_rate": 1.5642316479957123e-05, + "loss": 0.9404, + "step": 10764 + }, + { + "epoch": 0.3299313473090597, + "grad_norm": 1.4942632092215766, + "learning_rate": 1.5641496915742154e-05, + "loss": 0.6843, + "step": 10765 + }, + { + "epoch": 0.3299619958318009, + "grad_norm": 1.460708868443531, + "learning_rate": 1.5640677295940072e-05, + "loss": 0.8152, + "step": 10766 + }, + { + "epoch": 0.3299926443545421, + "grad_norm": 1.4588899507927389, + "learning_rate": 1.563985762055895e-05, + "loss": 0.7574, + "step": 10767 + }, + { + "epoch": 0.3300232928772833, + "grad_norm": 0.7093445589292587, + "learning_rate": 1.5639037889606868e-05, + "loss": 0.6355, + "step": 10768 + }, + { + "epoch": 0.33005394140002453, + "grad_norm": 1.3877563406304123, + "learning_rate": 1.563821810309191e-05, + "loss": 0.7496, + "step": 10769 + }, + { + "epoch": 0.33008458992276574, + "grad_norm": 1.411680799308964, + "learning_rate": 1.563739826102214e-05, + "loss": 0.7731, + "step": 10770 + }, + { + "epoch": 0.33011523844550694, + "grad_norm": 1.4137253775375898, + "learning_rate": 1.5636578363405644e-05, + "loss": 0.783, + "step": 10771 + }, + { + "epoch": 0.33014588696824815, + "grad_norm": 1.4133369064298071, + "learning_rate": 1.56357584102505e-05, + "loss": 0.7221, + "step": 10772 + }, + { + "epoch": 0.33017653549098935, + "grad_norm": 0.6848474337101745, + "learning_rate": 1.5634938401564787e-05, + "loss": 0.6131, + "step": 10773 + }, + { + "epoch": 0.33020718401373056, + "grad_norm": 1.457033305282319, + "learning_rate": 1.563411833735658e-05, + "loss": 0.7084, + "step": 10774 + }, + { + "epoch": 0.33023783253647176, + "grad_norm": 0.6776465200090365, + "learning_rate": 1.5633298217633965e-05, + "loss": 0.6135, + "step": 10775 + }, + { + "epoch": 0.33026848105921297, + "grad_norm": 1.2895709142998977, + "learning_rate": 1.5632478042405024e-05, + "loss": 0.7034, + "step": 10776 + }, + { + "epoch": 0.3302991295819542, + "grad_norm": 1.5087733082015453, + "learning_rate": 1.5631657811677833e-05, + "loss": 0.6949, + "step": 10777 + }, + { + "epoch": 0.3303297781046953, + "grad_norm": 1.4261805452015819, + "learning_rate": 1.563083752546048e-05, + "loss": 0.7755, + "step": 10778 + }, + { + "epoch": 0.33036042662743653, + "grad_norm": 1.4051681193740297, + "learning_rate": 1.563001718376104e-05, + "loss": 0.7194, + "step": 10779 + }, + { + "epoch": 0.33039107515017774, + "grad_norm": 1.4072634326352544, + "learning_rate": 1.5629196786587604e-05, + "loss": 0.6814, + "step": 10780 + }, + { + "epoch": 0.33042172367291894, + "grad_norm": 1.380046305544497, + "learning_rate": 1.562837633394825e-05, + "loss": 0.7592, + "step": 10781 + }, + { + "epoch": 0.33045237219566015, + "grad_norm": 1.5769261300149549, + "learning_rate": 1.5627555825851065e-05, + "loss": 0.8235, + "step": 10782 + }, + { + "epoch": 0.33048302071840135, + "grad_norm": 1.4990631309613567, + "learning_rate": 1.562673526230413e-05, + "loss": 0.8348, + "step": 10783 + }, + { + "epoch": 0.33051366924114256, + "grad_norm": 1.419792186514628, + "learning_rate": 1.5625914643315537e-05, + "loss": 0.7484, + "step": 10784 + }, + { + "epoch": 0.33054431776388377, + "grad_norm": 1.5904495058033328, + "learning_rate": 1.5625093968893363e-05, + "loss": 0.7539, + "step": 10785 + }, + { + "epoch": 0.33057496628662497, + "grad_norm": 1.561149506435952, + "learning_rate": 1.56242732390457e-05, + "loss": 0.7676, + "step": 10786 + }, + { + "epoch": 0.3306056148093662, + "grad_norm": 0.7618132227487887, + "learning_rate": 1.5623452453780635e-05, + "loss": 0.6427, + "step": 10787 + }, + { + "epoch": 0.3306362633321074, + "grad_norm": 1.4806348285194988, + "learning_rate": 1.5622631613106252e-05, + "loss": 0.7239, + "step": 10788 + }, + { + "epoch": 0.3306669118548486, + "grad_norm": 1.3900724068566404, + "learning_rate": 1.5621810717030646e-05, + "loss": 0.7243, + "step": 10789 + }, + { + "epoch": 0.3306975603775898, + "grad_norm": 1.5606383387413727, + "learning_rate": 1.5620989765561895e-05, + "loss": 0.8014, + "step": 10790 + }, + { + "epoch": 0.330728208900331, + "grad_norm": 0.6960374863393738, + "learning_rate": 1.5620168758708098e-05, + "loss": 0.6101, + "step": 10791 + }, + { + "epoch": 0.3307588574230722, + "grad_norm": 0.6792530205670112, + "learning_rate": 1.5619347696477337e-05, + "loss": 0.6051, + "step": 10792 + }, + { + "epoch": 0.3307895059458134, + "grad_norm": 1.5519156871313577, + "learning_rate": 1.561852657887771e-05, + "loss": 0.751, + "step": 10793 + }, + { + "epoch": 0.3308201544685546, + "grad_norm": 1.5247222573419357, + "learning_rate": 1.56177054059173e-05, + "loss": 0.6984, + "step": 10794 + }, + { + "epoch": 0.3308508029912958, + "grad_norm": 1.4097858055232333, + "learning_rate": 1.56168841776042e-05, + "loss": 0.8079, + "step": 10795 + }, + { + "epoch": 0.33088145151403703, + "grad_norm": 1.4595033120457164, + "learning_rate": 1.56160628939465e-05, + "loss": 0.8111, + "step": 10796 + }, + { + "epoch": 0.33091210003677823, + "grad_norm": 1.3794814188573739, + "learning_rate": 1.5615241554952302e-05, + "loss": 0.8293, + "step": 10797 + }, + { + "epoch": 0.33094274855951944, + "grad_norm": 1.3040634739571082, + "learning_rate": 1.5614420160629687e-05, + "loss": 0.6225, + "step": 10798 + }, + { + "epoch": 0.33097339708226065, + "grad_norm": 1.560776967434514, + "learning_rate": 1.561359871098676e-05, + "loss": 0.7794, + "step": 10799 + }, + { + "epoch": 0.33100404560500185, + "grad_norm": 1.4419459151838152, + "learning_rate": 1.5612777206031604e-05, + "loss": 0.8065, + "step": 10800 + }, + { + "epoch": 0.33103469412774306, + "grad_norm": 1.4946040081002228, + "learning_rate": 1.5611955645772318e-05, + "loss": 0.8035, + "step": 10801 + }, + { + "epoch": 0.33106534265048426, + "grad_norm": 1.5515757916860198, + "learning_rate": 1.5611134030217e-05, + "loss": 0.7336, + "step": 10802 + }, + { + "epoch": 0.33109599117322547, + "grad_norm": 1.382002406797887, + "learning_rate": 1.561031235937374e-05, + "loss": 0.7184, + "step": 10803 + }, + { + "epoch": 0.3311266396959667, + "grad_norm": 1.6721719327288591, + "learning_rate": 1.560949063325064e-05, + "loss": 0.88, + "step": 10804 + }, + { + "epoch": 0.3311572882187079, + "grad_norm": 1.6044786861125657, + "learning_rate": 1.560866885185579e-05, + "loss": 0.7234, + "step": 10805 + }, + { + "epoch": 0.3311879367414491, + "grad_norm": 0.7939722713348598, + "learning_rate": 1.560784701519729e-05, + "loss": 0.5787, + "step": 10806 + }, + { + "epoch": 0.3312185852641903, + "grad_norm": 1.5154136715759583, + "learning_rate": 1.5607025123283243e-05, + "loss": 0.7503, + "step": 10807 + }, + { + "epoch": 0.3312492337869315, + "grad_norm": 1.3874873263625986, + "learning_rate": 1.5606203176121743e-05, + "loss": 0.7047, + "step": 10808 + }, + { + "epoch": 0.33127988230967265, + "grad_norm": 1.4906923563486483, + "learning_rate": 1.5605381173720883e-05, + "loss": 0.8868, + "step": 10809 + }, + { + "epoch": 0.33131053083241385, + "grad_norm": 1.4631589460211323, + "learning_rate": 1.560455911608877e-05, + "loss": 0.7563, + "step": 10810 + }, + { + "epoch": 0.33134117935515506, + "grad_norm": 1.4103881962918141, + "learning_rate": 1.5603737003233503e-05, + "loss": 0.7456, + "step": 10811 + }, + { + "epoch": 0.33137182787789626, + "grad_norm": 1.3833402989686496, + "learning_rate": 1.5602914835163184e-05, + "loss": 0.7703, + "step": 10812 + }, + { + "epoch": 0.33140247640063747, + "grad_norm": 0.7227689126537301, + "learning_rate": 1.5602092611885907e-05, + "loss": 0.6158, + "step": 10813 + }, + { + "epoch": 0.3314331249233787, + "grad_norm": 1.4511403261901252, + "learning_rate": 1.560127033340978e-05, + "loss": 0.759, + "step": 10814 + }, + { + "epoch": 0.3314637734461199, + "grad_norm": 0.6996096607244056, + "learning_rate": 1.5600447999742904e-05, + "loss": 0.6201, + "step": 10815 + }, + { + "epoch": 0.3314944219688611, + "grad_norm": 1.6616410850576644, + "learning_rate": 1.5599625610893383e-05, + "loss": 0.7973, + "step": 10816 + }, + { + "epoch": 0.3315250704916023, + "grad_norm": 1.4196326631944751, + "learning_rate": 1.5598803166869318e-05, + "loss": 0.6784, + "step": 10817 + }, + { + "epoch": 0.3315557190143435, + "grad_norm": 1.639962911725868, + "learning_rate": 1.559798066767881e-05, + "loss": 0.887, + "step": 10818 + }, + { + "epoch": 0.3315863675370847, + "grad_norm": 1.4218140303879279, + "learning_rate": 1.5597158113329968e-05, + "loss": 0.7943, + "step": 10819 + }, + { + "epoch": 0.3316170160598259, + "grad_norm": 1.5515778608197583, + "learning_rate": 1.559633550383089e-05, + "loss": 0.7872, + "step": 10820 + }, + { + "epoch": 0.3316476645825671, + "grad_norm": 1.4150774683560547, + "learning_rate": 1.5595512839189693e-05, + "loss": 0.8375, + "step": 10821 + }, + { + "epoch": 0.3316783131053083, + "grad_norm": 1.5196795280579798, + "learning_rate": 1.5594690119414472e-05, + "loss": 0.6962, + "step": 10822 + }, + { + "epoch": 0.3317089616280495, + "grad_norm": 1.4282034635429584, + "learning_rate": 1.559386734451334e-05, + "loss": 0.7689, + "step": 10823 + }, + { + "epoch": 0.33173961015079073, + "grad_norm": 1.4780215609417218, + "learning_rate": 1.55930445144944e-05, + "loss": 0.7543, + "step": 10824 + }, + { + "epoch": 0.33177025867353194, + "grad_norm": 0.7792243857861881, + "learning_rate": 1.5592221629365765e-05, + "loss": 0.5893, + "step": 10825 + }, + { + "epoch": 0.33180090719627314, + "grad_norm": 1.542583048571395, + "learning_rate": 1.559139868913554e-05, + "loss": 0.7615, + "step": 10826 + }, + { + "epoch": 0.33183155571901435, + "grad_norm": 1.4680950897174256, + "learning_rate": 1.5590575693811824e-05, + "loss": 0.7763, + "step": 10827 + }, + { + "epoch": 0.33186220424175555, + "grad_norm": 1.4622183085428804, + "learning_rate": 1.5589752643402743e-05, + "loss": 0.7667, + "step": 10828 + }, + { + "epoch": 0.33189285276449676, + "grad_norm": 1.5687466930208513, + "learning_rate": 1.5588929537916396e-05, + "loss": 0.736, + "step": 10829 + }, + { + "epoch": 0.33192350128723797, + "grad_norm": 1.4834890411551638, + "learning_rate": 1.55881063773609e-05, + "loss": 0.7139, + "step": 10830 + }, + { + "epoch": 0.33195414980997917, + "grad_norm": 1.4847526067219738, + "learning_rate": 1.558728316174436e-05, + "loss": 0.786, + "step": 10831 + }, + { + "epoch": 0.3319847983327204, + "grad_norm": 0.6658323714036717, + "learning_rate": 1.5586459891074888e-05, + "loss": 0.6128, + "step": 10832 + }, + { + "epoch": 0.3320154468554616, + "grad_norm": 1.2982480513983674, + "learning_rate": 1.5585636565360598e-05, + "loss": 0.6364, + "step": 10833 + }, + { + "epoch": 0.3320460953782028, + "grad_norm": 1.4022188262219346, + "learning_rate": 1.5584813184609603e-05, + "loss": 0.7422, + "step": 10834 + }, + { + "epoch": 0.332076743900944, + "grad_norm": 1.5379953370587849, + "learning_rate": 1.5583989748830016e-05, + "loss": 0.8049, + "step": 10835 + }, + { + "epoch": 0.3321073924236852, + "grad_norm": 1.5583493194438656, + "learning_rate": 1.5583166258029946e-05, + "loss": 0.7224, + "step": 10836 + }, + { + "epoch": 0.3321380409464264, + "grad_norm": 1.421199951433216, + "learning_rate": 1.558234271221751e-05, + "loss": 0.7482, + "step": 10837 + }, + { + "epoch": 0.3321686894691676, + "grad_norm": 1.2818498936391414, + "learning_rate": 1.5581519111400826e-05, + "loss": 0.7273, + "step": 10838 + }, + { + "epoch": 0.3321993379919088, + "grad_norm": 1.7216540306921593, + "learning_rate": 1.5580695455588005e-05, + "loss": 0.8355, + "step": 10839 + }, + { + "epoch": 0.33222998651464997, + "grad_norm": 1.3068151213503256, + "learning_rate": 1.5579871744787163e-05, + "loss": 0.6374, + "step": 10840 + }, + { + "epoch": 0.33226063503739117, + "grad_norm": 1.4765502856649917, + "learning_rate": 1.557904797900642e-05, + "loss": 0.7561, + "step": 10841 + }, + { + "epoch": 0.3322912835601324, + "grad_norm": 0.7296004140252627, + "learning_rate": 1.5578224158253885e-05, + "loss": 0.6018, + "step": 10842 + }, + { + "epoch": 0.3323219320828736, + "grad_norm": 0.691005588076008, + "learning_rate": 1.5577400282537683e-05, + "loss": 0.6329, + "step": 10843 + }, + { + "epoch": 0.3323525806056148, + "grad_norm": 1.640081683497518, + "learning_rate": 1.557657635186593e-05, + "loss": 0.7963, + "step": 10844 + }, + { + "epoch": 0.332383229128356, + "grad_norm": 0.6487770654013287, + "learning_rate": 1.5575752366246743e-05, + "loss": 0.6105, + "step": 10845 + }, + { + "epoch": 0.3324138776510972, + "grad_norm": 1.3184464653697279, + "learning_rate": 1.5574928325688236e-05, + "loss": 0.6356, + "step": 10846 + }, + { + "epoch": 0.3324445261738384, + "grad_norm": 1.6492698198461908, + "learning_rate": 1.557410423019854e-05, + "loss": 0.8281, + "step": 10847 + }, + { + "epoch": 0.3324751746965796, + "grad_norm": 1.6094664065132276, + "learning_rate": 1.5573280079785768e-05, + "loss": 0.7359, + "step": 10848 + }, + { + "epoch": 0.3325058232193208, + "grad_norm": 0.7520254265395766, + "learning_rate": 1.557245587445804e-05, + "loss": 0.6439, + "step": 10849 + }, + { + "epoch": 0.332536471742062, + "grad_norm": 1.446429230038046, + "learning_rate": 1.557163161422348e-05, + "loss": 0.8074, + "step": 10850 + }, + { + "epoch": 0.33256712026480323, + "grad_norm": 1.4276010748145447, + "learning_rate": 1.5570807299090206e-05, + "loss": 0.7641, + "step": 10851 + }, + { + "epoch": 0.33259776878754443, + "grad_norm": 0.6868402027486392, + "learning_rate": 1.5569982929066342e-05, + "loss": 0.6118, + "step": 10852 + }, + { + "epoch": 0.33262841731028564, + "grad_norm": 1.268010557736334, + "learning_rate": 1.5569158504160012e-05, + "loss": 0.7443, + "step": 10853 + }, + { + "epoch": 0.33265906583302685, + "grad_norm": 1.555825066282154, + "learning_rate": 1.556833402437934e-05, + "loss": 0.6903, + "step": 10854 + }, + { + "epoch": 0.33268971435576805, + "grad_norm": 1.5703028404887687, + "learning_rate": 1.5567509489732445e-05, + "loss": 0.7347, + "step": 10855 + }, + { + "epoch": 0.33272036287850926, + "grad_norm": 0.7204972499684843, + "learning_rate": 1.5566684900227454e-05, + "loss": 0.6056, + "step": 10856 + }, + { + "epoch": 0.33275101140125046, + "grad_norm": 1.4858007071429529, + "learning_rate": 1.5565860255872495e-05, + "loss": 0.8044, + "step": 10857 + }, + { + "epoch": 0.33278165992399167, + "grad_norm": 1.431332769812386, + "learning_rate": 1.556503555667569e-05, + "loss": 0.6867, + "step": 10858 + }, + { + "epoch": 0.3328123084467329, + "grad_norm": 1.4558013187739571, + "learning_rate": 1.5564210802645168e-05, + "loss": 0.7471, + "step": 10859 + }, + { + "epoch": 0.3328429569694741, + "grad_norm": 1.5256318343670914, + "learning_rate": 1.5563385993789052e-05, + "loss": 0.7422, + "step": 10860 + }, + { + "epoch": 0.3328736054922153, + "grad_norm": 1.4821575694214946, + "learning_rate": 1.5562561130115468e-05, + "loss": 0.8438, + "step": 10861 + }, + { + "epoch": 0.3329042540149565, + "grad_norm": 0.7198735805663723, + "learning_rate": 1.556173621163255e-05, + "loss": 0.6413, + "step": 10862 + }, + { + "epoch": 0.3329349025376977, + "grad_norm": 1.406963538421234, + "learning_rate": 1.556091123834842e-05, + "loss": 0.7388, + "step": 10863 + }, + { + "epoch": 0.3329655510604389, + "grad_norm": 0.6901880711695375, + "learning_rate": 1.5560086210271208e-05, + "loss": 0.6468, + "step": 10864 + }, + { + "epoch": 0.3329961995831801, + "grad_norm": 0.685876561608442, + "learning_rate": 1.5559261127409044e-05, + "loss": 0.6009, + "step": 10865 + }, + { + "epoch": 0.3330268481059213, + "grad_norm": 1.4929790190659773, + "learning_rate": 1.5558435989770056e-05, + "loss": 0.8605, + "step": 10866 + }, + { + "epoch": 0.3330574966286625, + "grad_norm": 1.4485992601097661, + "learning_rate": 1.5557610797362382e-05, + "loss": 0.6264, + "step": 10867 + }, + { + "epoch": 0.3330881451514037, + "grad_norm": 1.5496050020307508, + "learning_rate": 1.555678555019414e-05, + "loss": 0.7987, + "step": 10868 + }, + { + "epoch": 0.33311879367414493, + "grad_norm": 1.5572091831408506, + "learning_rate": 1.555596024827347e-05, + "loss": 0.7746, + "step": 10869 + }, + { + "epoch": 0.33314944219688614, + "grad_norm": 0.7077789858695104, + "learning_rate": 1.5555134891608506e-05, + "loss": 0.6134, + "step": 10870 + }, + { + "epoch": 0.3331800907196273, + "grad_norm": 0.7038486033692942, + "learning_rate": 1.5554309480207375e-05, + "loss": 0.6108, + "step": 10871 + }, + { + "epoch": 0.3332107392423685, + "grad_norm": 1.5097620852092668, + "learning_rate": 1.555348401407821e-05, + "loss": 0.738, + "step": 10872 + }, + { + "epoch": 0.3332413877651097, + "grad_norm": 1.6933116417514935, + "learning_rate": 1.5552658493229148e-05, + "loss": 0.7678, + "step": 10873 + }, + { + "epoch": 0.3332720362878509, + "grad_norm": 2.937021602280322, + "learning_rate": 1.555183291766832e-05, + "loss": 0.682, + "step": 10874 + }, + { + "epoch": 0.3333026848105921, + "grad_norm": 1.5662426434097585, + "learning_rate": 1.555100728740386e-05, + "loss": 0.782, + "step": 10875 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.5853508644725969, + "learning_rate": 1.5550181602443907e-05, + "loss": 0.7361, + "step": 10876 + }, + { + "epoch": 0.3333639818560745, + "grad_norm": 1.4819690076513965, + "learning_rate": 1.554935586279659e-05, + "loss": 0.7202, + "step": 10877 + }, + { + "epoch": 0.3333946303788157, + "grad_norm": 1.484754308415419, + "learning_rate": 1.5548530068470058e-05, + "loss": 0.7853, + "step": 10878 + }, + { + "epoch": 0.33342527890155693, + "grad_norm": 1.485772633648899, + "learning_rate": 1.5547704219472435e-05, + "loss": 0.7403, + "step": 10879 + }, + { + "epoch": 0.33345592742429814, + "grad_norm": 0.8399704816254501, + "learning_rate": 1.5546878315811862e-05, + "loss": 0.6347, + "step": 10880 + }, + { + "epoch": 0.33348657594703934, + "grad_norm": 1.5592652858372953, + "learning_rate": 1.5546052357496478e-05, + "loss": 0.715, + "step": 10881 + }, + { + "epoch": 0.33351722446978055, + "grad_norm": 1.6182209608689375, + "learning_rate": 1.554522634453442e-05, + "loss": 0.7739, + "step": 10882 + }, + { + "epoch": 0.33354787299252175, + "grad_norm": 0.6880642047955072, + "learning_rate": 1.5544400276933834e-05, + "loss": 0.5938, + "step": 10883 + }, + { + "epoch": 0.33357852151526296, + "grad_norm": 1.5918157566569193, + "learning_rate": 1.554357415470285e-05, + "loss": 0.757, + "step": 10884 + }, + { + "epoch": 0.33360917003800417, + "grad_norm": 0.6541178783740532, + "learning_rate": 1.554274797784961e-05, + "loss": 0.5948, + "step": 10885 + }, + { + "epoch": 0.33363981856074537, + "grad_norm": 1.4567315065686623, + "learning_rate": 1.5541921746382257e-05, + "loss": 0.8055, + "step": 10886 + }, + { + "epoch": 0.3336704670834866, + "grad_norm": 1.388598870817013, + "learning_rate": 1.554109546030893e-05, + "loss": 0.6876, + "step": 10887 + }, + { + "epoch": 0.3337011156062278, + "grad_norm": 1.473495233929902, + "learning_rate": 1.5540269119637776e-05, + "loss": 0.843, + "step": 10888 + }, + { + "epoch": 0.333731764128969, + "grad_norm": 1.3304404930055664, + "learning_rate": 1.553944272437693e-05, + "loss": 0.6906, + "step": 10889 + }, + { + "epoch": 0.3337624126517102, + "grad_norm": 1.4517336723457888, + "learning_rate": 1.5538616274534536e-05, + "loss": 0.7312, + "step": 10890 + }, + { + "epoch": 0.3337930611744514, + "grad_norm": 1.6070410395716066, + "learning_rate": 1.553778977011874e-05, + "loss": 0.751, + "step": 10891 + }, + { + "epoch": 0.3338237096971926, + "grad_norm": 1.4141282665207133, + "learning_rate": 1.5536963211137686e-05, + "loss": 0.7037, + "step": 10892 + }, + { + "epoch": 0.3338543582199338, + "grad_norm": 1.3697591442684378, + "learning_rate": 1.5536136597599515e-05, + "loss": 0.7255, + "step": 10893 + }, + { + "epoch": 0.333885006742675, + "grad_norm": 0.7368109837678742, + "learning_rate": 1.5535309929512372e-05, + "loss": 0.5909, + "step": 10894 + }, + { + "epoch": 0.3339156552654162, + "grad_norm": 1.3753661073501975, + "learning_rate": 1.5534483206884408e-05, + "loss": 0.7075, + "step": 10895 + }, + { + "epoch": 0.33394630378815743, + "grad_norm": 1.59265000133101, + "learning_rate": 1.553365642972376e-05, + "loss": 0.7087, + "step": 10896 + }, + { + "epoch": 0.33397695231089863, + "grad_norm": 1.825776599411098, + "learning_rate": 1.5532829598038587e-05, + "loss": 0.7865, + "step": 10897 + }, + { + "epoch": 0.33400760083363984, + "grad_norm": 1.2972826676397395, + "learning_rate": 1.553200271183702e-05, + "loss": 0.7713, + "step": 10898 + }, + { + "epoch": 0.33403824935638105, + "grad_norm": 0.67094230166476, + "learning_rate": 1.5531175771127218e-05, + "loss": 0.633, + "step": 10899 + }, + { + "epoch": 0.33406889787912225, + "grad_norm": 1.533874312029978, + "learning_rate": 1.5530348775917325e-05, + "loss": 0.7404, + "step": 10900 + }, + { + "epoch": 0.33409954640186346, + "grad_norm": 1.4956498613774343, + "learning_rate": 1.5529521726215497e-05, + "loss": 0.7189, + "step": 10901 + }, + { + "epoch": 0.3341301949246046, + "grad_norm": 1.3917632074964712, + "learning_rate": 1.552869462202987e-05, + "loss": 0.7466, + "step": 10902 + }, + { + "epoch": 0.3341608434473458, + "grad_norm": 1.3621119362245202, + "learning_rate": 1.55278674633686e-05, + "loss": 0.6351, + "step": 10903 + }, + { + "epoch": 0.334191491970087, + "grad_norm": 1.6264121691712679, + "learning_rate": 1.5527040250239836e-05, + "loss": 0.7497, + "step": 10904 + }, + { + "epoch": 0.3342221404928282, + "grad_norm": 1.6205974991125847, + "learning_rate": 1.5526212982651738e-05, + "loss": 0.8389, + "step": 10905 + }, + { + "epoch": 0.33425278901556943, + "grad_norm": 1.3349349994952313, + "learning_rate": 1.5525385660612445e-05, + "loss": 0.8326, + "step": 10906 + }, + { + "epoch": 0.33428343753831063, + "grad_norm": 1.5227770680292982, + "learning_rate": 1.552455828413011e-05, + "loss": 0.8251, + "step": 10907 + }, + { + "epoch": 0.33431408606105184, + "grad_norm": 1.4211877013595404, + "learning_rate": 1.5523730853212893e-05, + "loss": 0.7223, + "step": 10908 + }, + { + "epoch": 0.33434473458379305, + "grad_norm": 1.3670128210733572, + "learning_rate": 1.552290336786894e-05, + "loss": 0.7183, + "step": 10909 + }, + { + "epoch": 0.33437538310653425, + "grad_norm": 0.7753284482555476, + "learning_rate": 1.552207582810641e-05, + "loss": 0.6076, + "step": 10910 + }, + { + "epoch": 0.33440603162927546, + "grad_norm": 1.568766430581649, + "learning_rate": 1.5521248233933452e-05, + "loss": 0.7677, + "step": 10911 + }, + { + "epoch": 0.33443668015201666, + "grad_norm": 1.4711622582481907, + "learning_rate": 1.5520420585358228e-05, + "loss": 0.736, + "step": 10912 + }, + { + "epoch": 0.33446732867475787, + "grad_norm": 1.7349460008869986, + "learning_rate": 1.551959288238888e-05, + "loss": 0.7728, + "step": 10913 + }, + { + "epoch": 0.3344979771974991, + "grad_norm": 1.496207234786872, + "learning_rate": 1.551876512503357e-05, + "loss": 0.846, + "step": 10914 + }, + { + "epoch": 0.3345286257202403, + "grad_norm": 1.5126426591992665, + "learning_rate": 1.5517937313300462e-05, + "loss": 0.7479, + "step": 10915 + }, + { + "epoch": 0.3345592742429815, + "grad_norm": 1.372854037658923, + "learning_rate": 1.5517109447197704e-05, + "loss": 0.6535, + "step": 10916 + }, + { + "epoch": 0.3345899227657227, + "grad_norm": 1.6323902355850257, + "learning_rate": 1.5516281526733453e-05, + "loss": 0.774, + "step": 10917 + }, + { + "epoch": 0.3346205712884639, + "grad_norm": 1.6821266468741531, + "learning_rate": 1.551545355191587e-05, + "loss": 0.8518, + "step": 10918 + }, + { + "epoch": 0.3346512198112051, + "grad_norm": 1.4352267573637345, + "learning_rate": 1.551462552275311e-05, + "loss": 0.7001, + "step": 10919 + }, + { + "epoch": 0.3346818683339463, + "grad_norm": 1.2450668825972324, + "learning_rate": 1.551379743925334e-05, + "loss": 0.6388, + "step": 10920 + }, + { + "epoch": 0.3347125168566875, + "grad_norm": 1.5458303116042684, + "learning_rate": 1.5512969301424705e-05, + "loss": 0.7705, + "step": 10921 + }, + { + "epoch": 0.3347431653794287, + "grad_norm": 1.4169936143395023, + "learning_rate": 1.5512141109275377e-05, + "loss": 0.8011, + "step": 10922 + }, + { + "epoch": 0.3347738139021699, + "grad_norm": 1.412282971683196, + "learning_rate": 1.551131286281351e-05, + "loss": 0.7653, + "step": 10923 + }, + { + "epoch": 0.33480446242491113, + "grad_norm": 1.478137744220489, + "learning_rate": 1.551048456204727e-05, + "loss": 0.7212, + "step": 10924 + }, + { + "epoch": 0.33483511094765234, + "grad_norm": 0.7968394543223647, + "learning_rate": 1.5509656206984814e-05, + "loss": 0.6227, + "step": 10925 + }, + { + "epoch": 0.33486575947039354, + "grad_norm": 1.5392722524009945, + "learning_rate": 1.5508827797634304e-05, + "loss": 0.5667, + "step": 10926 + }, + { + "epoch": 0.33489640799313475, + "grad_norm": 1.4505038266098205, + "learning_rate": 1.5507999334003904e-05, + "loss": 0.7786, + "step": 10927 + }, + { + "epoch": 0.33492705651587595, + "grad_norm": 1.4281942581280866, + "learning_rate": 1.5507170816101785e-05, + "loss": 0.8387, + "step": 10928 + }, + { + "epoch": 0.33495770503861716, + "grad_norm": 1.4393437741309099, + "learning_rate": 1.5506342243936096e-05, + "loss": 0.8034, + "step": 10929 + }, + { + "epoch": 0.33498835356135837, + "grad_norm": 1.827134308169871, + "learning_rate": 1.550551361751501e-05, + "loss": 0.8775, + "step": 10930 + }, + { + "epoch": 0.33501900208409957, + "grad_norm": 1.299363046534452, + "learning_rate": 1.5504684936846687e-05, + "loss": 0.7108, + "step": 10931 + }, + { + "epoch": 0.3350496506068408, + "grad_norm": 1.5387672328892437, + "learning_rate": 1.55038562019393e-05, + "loss": 0.7313, + "step": 10932 + }, + { + "epoch": 0.3350802991295819, + "grad_norm": 1.3547500929995782, + "learning_rate": 1.5503027412801003e-05, + "loss": 0.7018, + "step": 10933 + }, + { + "epoch": 0.33511094765232313, + "grad_norm": 1.6529967189982178, + "learning_rate": 1.5502198569439976e-05, + "loss": 0.7372, + "step": 10934 + }, + { + "epoch": 0.33514159617506434, + "grad_norm": 1.4385967631768866, + "learning_rate": 1.550136967186437e-05, + "loss": 0.6996, + "step": 10935 + }, + { + "epoch": 0.33517224469780554, + "grad_norm": 1.473494703928765, + "learning_rate": 1.550054072008237e-05, + "loss": 0.7063, + "step": 10936 + }, + { + "epoch": 0.33520289322054675, + "grad_norm": 1.3526477601342572, + "learning_rate": 1.549971171410213e-05, + "loss": 0.7519, + "step": 10937 + }, + { + "epoch": 0.33523354174328795, + "grad_norm": 1.5183242291381822, + "learning_rate": 1.5498882653931823e-05, + "loss": 0.7595, + "step": 10938 + }, + { + "epoch": 0.33526419026602916, + "grad_norm": 0.7017991057305644, + "learning_rate": 1.5498053539579623e-05, + "loss": 0.6134, + "step": 10939 + }, + { + "epoch": 0.33529483878877037, + "grad_norm": 1.3541944286137637, + "learning_rate": 1.549722437105369e-05, + "loss": 0.7037, + "step": 10940 + }, + { + "epoch": 0.33532548731151157, + "grad_norm": 1.459586413361033, + "learning_rate": 1.54963951483622e-05, + "loss": 0.7386, + "step": 10941 + }, + { + "epoch": 0.3353561358342528, + "grad_norm": 1.5555810373481822, + "learning_rate": 1.5495565871513323e-05, + "loss": 0.7048, + "step": 10942 + }, + { + "epoch": 0.335386784356994, + "grad_norm": 1.4813477793950556, + "learning_rate": 1.549473654051523e-05, + "loss": 0.742, + "step": 10943 + }, + { + "epoch": 0.3354174328797352, + "grad_norm": 1.4059927508481882, + "learning_rate": 1.5493907155376092e-05, + "loss": 0.7612, + "step": 10944 + }, + { + "epoch": 0.3354480814024764, + "grad_norm": 1.5987654466255912, + "learning_rate": 1.549307771610408e-05, + "loss": 0.8298, + "step": 10945 + }, + { + "epoch": 0.3354787299252176, + "grad_norm": 1.5566326305811322, + "learning_rate": 1.5492248222707366e-05, + "loss": 0.7651, + "step": 10946 + }, + { + "epoch": 0.3355093784479588, + "grad_norm": 0.7033885086140513, + "learning_rate": 1.5491418675194124e-05, + "loss": 0.5889, + "step": 10947 + }, + { + "epoch": 0.3355400269707, + "grad_norm": 1.2634266665559881, + "learning_rate": 1.5490589073572537e-05, + "loss": 0.7502, + "step": 10948 + }, + { + "epoch": 0.3355706754934412, + "grad_norm": 1.4322472488316713, + "learning_rate": 1.548975941785076e-05, + "loss": 0.7848, + "step": 10949 + }, + { + "epoch": 0.3356013240161824, + "grad_norm": 1.3722143391234858, + "learning_rate": 1.5488929708036988e-05, + "loss": 0.7145, + "step": 10950 + }, + { + "epoch": 0.33563197253892363, + "grad_norm": 1.4339621796984405, + "learning_rate": 1.548809994413938e-05, + "loss": 0.7589, + "step": 10951 + }, + { + "epoch": 0.33566262106166483, + "grad_norm": 1.2712454102606976, + "learning_rate": 1.5487270126166125e-05, + "loss": 0.6499, + "step": 10952 + }, + { + "epoch": 0.33569326958440604, + "grad_norm": 1.5925343937100627, + "learning_rate": 1.5486440254125392e-05, + "loss": 0.7585, + "step": 10953 + }, + { + "epoch": 0.33572391810714725, + "grad_norm": 1.5149277312530747, + "learning_rate": 1.5485610328025357e-05, + "loss": 0.8419, + "step": 10954 + }, + { + "epoch": 0.33575456662988845, + "grad_norm": 1.3518207268994067, + "learning_rate": 1.5484780347874205e-05, + "loss": 0.7324, + "step": 10955 + }, + { + "epoch": 0.33578521515262966, + "grad_norm": 1.4909819441451877, + "learning_rate": 1.54839503136801e-05, + "loss": 0.6932, + "step": 10956 + }, + { + "epoch": 0.33581586367537086, + "grad_norm": 1.3746913520599793, + "learning_rate": 1.5483120225451238e-05, + "loss": 0.738, + "step": 10957 + }, + { + "epoch": 0.33584651219811207, + "grad_norm": 1.3411481772491527, + "learning_rate": 1.5482290083195785e-05, + "loss": 0.7624, + "step": 10958 + }, + { + "epoch": 0.3358771607208533, + "grad_norm": 1.4740321609472844, + "learning_rate": 1.548145988692193e-05, + "loss": 0.7888, + "step": 10959 + }, + { + "epoch": 0.3359078092435945, + "grad_norm": 1.6964555034592639, + "learning_rate": 1.5480629636637842e-05, + "loss": 0.8472, + "step": 10960 + }, + { + "epoch": 0.3359384577663357, + "grad_norm": 1.5528760362020437, + "learning_rate": 1.547979933235171e-05, + "loss": 0.7468, + "step": 10961 + }, + { + "epoch": 0.3359691062890769, + "grad_norm": 1.3652022134187276, + "learning_rate": 1.5478968974071716e-05, + "loss": 0.7096, + "step": 10962 + }, + { + "epoch": 0.3359997548118181, + "grad_norm": 0.7319155884573039, + "learning_rate": 1.5478138561806035e-05, + "loss": 0.6277, + "step": 10963 + }, + { + "epoch": 0.33603040333455925, + "grad_norm": 1.4330697246973938, + "learning_rate": 1.5477308095562854e-05, + "loss": 0.7974, + "step": 10964 + }, + { + "epoch": 0.33606105185730045, + "grad_norm": 1.3528242773631096, + "learning_rate": 1.5476477575350355e-05, + "loss": 0.6413, + "step": 10965 + }, + { + "epoch": 0.33609170038004166, + "grad_norm": 1.5922205674589807, + "learning_rate": 1.5475647001176722e-05, + "loss": 0.799, + "step": 10966 + }, + { + "epoch": 0.33612234890278286, + "grad_norm": 1.7291690860500266, + "learning_rate": 1.547481637305014e-05, + "loss": 0.8181, + "step": 10967 + }, + { + "epoch": 0.33615299742552407, + "grad_norm": 1.6066540746021494, + "learning_rate": 1.547398569097879e-05, + "loss": 0.8138, + "step": 10968 + }, + { + "epoch": 0.3361836459482653, + "grad_norm": 0.6492661200967098, + "learning_rate": 1.5473154954970854e-05, + "loss": 0.6013, + "step": 10969 + }, + { + "epoch": 0.3362142944710065, + "grad_norm": 1.5997589869936837, + "learning_rate": 1.547232416503453e-05, + "loss": 0.8884, + "step": 10970 + }, + { + "epoch": 0.3362449429937477, + "grad_norm": 1.5419976278282794, + "learning_rate": 1.5471493321177987e-05, + "loss": 0.7662, + "step": 10971 + }, + { + "epoch": 0.3362755915164889, + "grad_norm": 1.5187710038897138, + "learning_rate": 1.5470662423409426e-05, + "loss": 0.6964, + "step": 10972 + }, + { + "epoch": 0.3363062400392301, + "grad_norm": 1.5715979346300792, + "learning_rate": 1.5469831471737026e-05, + "loss": 0.8716, + "step": 10973 + }, + { + "epoch": 0.3363368885619713, + "grad_norm": 1.3005767053669441, + "learning_rate": 1.546900046616898e-05, + "loss": 0.6998, + "step": 10974 + }, + { + "epoch": 0.3363675370847125, + "grad_norm": 1.5370801716803417, + "learning_rate": 1.5468169406713472e-05, + "loss": 0.7451, + "step": 10975 + }, + { + "epoch": 0.3363981856074537, + "grad_norm": 1.4709535238772626, + "learning_rate": 1.5467338293378688e-05, + "loss": 0.8321, + "step": 10976 + }, + { + "epoch": 0.3364288341301949, + "grad_norm": 1.434251678217054, + "learning_rate": 1.5466507126172826e-05, + "loss": 0.748, + "step": 10977 + }, + { + "epoch": 0.3364594826529361, + "grad_norm": 1.5114228530490295, + "learning_rate": 1.546567590510407e-05, + "loss": 0.759, + "step": 10978 + }, + { + "epoch": 0.33649013117567733, + "grad_norm": 1.5384277705617788, + "learning_rate": 1.546484463018061e-05, + "loss": 0.7892, + "step": 10979 + }, + { + "epoch": 0.33652077969841854, + "grad_norm": 1.544235342445926, + "learning_rate": 1.5464013301410635e-05, + "loss": 0.8005, + "step": 10980 + }, + { + "epoch": 0.33655142822115974, + "grad_norm": 0.7458771831306942, + "learning_rate": 1.546318191880234e-05, + "loss": 0.6497, + "step": 10981 + }, + { + "epoch": 0.33658207674390095, + "grad_norm": 1.3232978319337554, + "learning_rate": 1.5462350482363918e-05, + "loss": 0.7646, + "step": 10982 + }, + { + "epoch": 0.33661272526664215, + "grad_norm": 1.3835154126565763, + "learning_rate": 1.5461518992103555e-05, + "loss": 0.8198, + "step": 10983 + }, + { + "epoch": 0.33664337378938336, + "grad_norm": 1.6355208799159218, + "learning_rate": 1.546068744802945e-05, + "loss": 0.7244, + "step": 10984 + }, + { + "epoch": 0.33667402231212457, + "grad_norm": 1.6295398750356465, + "learning_rate": 1.5459855850149796e-05, + "loss": 0.8037, + "step": 10985 + }, + { + "epoch": 0.33670467083486577, + "grad_norm": 1.5028101980157045, + "learning_rate": 1.5459024198472787e-05, + "loss": 0.7587, + "step": 10986 + }, + { + "epoch": 0.336735319357607, + "grad_norm": 1.5621735450383778, + "learning_rate": 1.5458192493006615e-05, + "loss": 0.8323, + "step": 10987 + }, + { + "epoch": 0.3367659678803482, + "grad_norm": 1.5301816514966966, + "learning_rate": 1.545736073375947e-05, + "loss": 0.8067, + "step": 10988 + }, + { + "epoch": 0.3367966164030894, + "grad_norm": 1.4983796714727249, + "learning_rate": 1.5456528920739562e-05, + "loss": 0.7277, + "step": 10989 + }, + { + "epoch": 0.3368272649258306, + "grad_norm": 1.469240883397004, + "learning_rate": 1.545569705395507e-05, + "loss": 0.6971, + "step": 10990 + }, + { + "epoch": 0.3368579134485718, + "grad_norm": 1.5462783083717082, + "learning_rate": 1.5454865133414206e-05, + "loss": 0.7946, + "step": 10991 + }, + { + "epoch": 0.336888561971313, + "grad_norm": 1.1858698878473322, + "learning_rate": 1.5454033159125156e-05, + "loss": 0.6068, + "step": 10992 + }, + { + "epoch": 0.3369192104940542, + "grad_norm": 1.4183688269519246, + "learning_rate": 1.5453201131096122e-05, + "loss": 0.7678, + "step": 10993 + }, + { + "epoch": 0.3369498590167954, + "grad_norm": 1.6154382482061962, + "learning_rate": 1.5452369049335305e-05, + "loss": 0.8252, + "step": 10994 + }, + { + "epoch": 0.33698050753953657, + "grad_norm": 1.5455617302210605, + "learning_rate": 1.54515369138509e-05, + "loss": 0.7508, + "step": 10995 + }, + { + "epoch": 0.3370111560622778, + "grad_norm": 1.6154691056477637, + "learning_rate": 1.54507047246511e-05, + "loss": 0.7557, + "step": 10996 + }, + { + "epoch": 0.337041804585019, + "grad_norm": 1.4958766895274878, + "learning_rate": 1.544987248174412e-05, + "loss": 0.8176, + "step": 10997 + }, + { + "epoch": 0.3370724531077602, + "grad_norm": 1.5799495310085894, + "learning_rate": 1.544904018513815e-05, + "loss": 0.7705, + "step": 10998 + }, + { + "epoch": 0.3371031016305014, + "grad_norm": 1.4780616104889979, + "learning_rate": 1.544820783484139e-05, + "loss": 0.8463, + "step": 10999 + }, + { + "epoch": 0.3371337501532426, + "grad_norm": 0.8093177145331774, + "learning_rate": 1.5447375430862047e-05, + "loss": 0.5905, + "step": 11000 + }, + { + "epoch": 0.3371643986759838, + "grad_norm": 1.9003594753998478, + "learning_rate": 1.5446542973208324e-05, + "loss": 0.8284, + "step": 11001 + }, + { + "epoch": 0.337195047198725, + "grad_norm": 1.7714714273911432, + "learning_rate": 1.5445710461888412e-05, + "loss": 0.8244, + "step": 11002 + }, + { + "epoch": 0.3372256957214662, + "grad_norm": 1.6298381066454528, + "learning_rate": 1.5444877896910525e-05, + "loss": 0.808, + "step": 11003 + }, + { + "epoch": 0.3372563442442074, + "grad_norm": 1.3053285003831592, + "learning_rate": 1.5444045278282862e-05, + "loss": 0.7335, + "step": 11004 + }, + { + "epoch": 0.3372869927669486, + "grad_norm": 1.5342784086973922, + "learning_rate": 1.544321260601363e-05, + "loss": 0.7275, + "step": 11005 + }, + { + "epoch": 0.33731764128968983, + "grad_norm": 1.3979114111561093, + "learning_rate": 1.5442379880111026e-05, + "loss": 0.7544, + "step": 11006 + }, + { + "epoch": 0.33734828981243103, + "grad_norm": 1.500234165164656, + "learning_rate": 1.5441547100583268e-05, + "loss": 0.7247, + "step": 11007 + }, + { + "epoch": 0.33737893833517224, + "grad_norm": 1.5708492199044535, + "learning_rate": 1.544071426743855e-05, + "loss": 0.7575, + "step": 11008 + }, + { + "epoch": 0.33740958685791345, + "grad_norm": 1.5158670712530145, + "learning_rate": 1.5439881380685086e-05, + "loss": 0.7515, + "step": 11009 + }, + { + "epoch": 0.33744023538065465, + "grad_norm": 1.5310476986945383, + "learning_rate": 1.5439048440331074e-05, + "loss": 0.8112, + "step": 11010 + }, + { + "epoch": 0.33747088390339586, + "grad_norm": 1.4958075908098372, + "learning_rate": 1.5438215446384725e-05, + "loss": 0.8693, + "step": 11011 + }, + { + "epoch": 0.33750153242613706, + "grad_norm": 1.4978534737773936, + "learning_rate": 1.5437382398854252e-05, + "loss": 0.7486, + "step": 11012 + }, + { + "epoch": 0.33753218094887827, + "grad_norm": 1.5680213930853104, + "learning_rate": 1.5436549297747857e-05, + "loss": 0.7017, + "step": 11013 + }, + { + "epoch": 0.3375628294716195, + "grad_norm": 1.5488321986444455, + "learning_rate": 1.5435716143073754e-05, + "loss": 0.8371, + "step": 11014 + }, + { + "epoch": 0.3375934779943607, + "grad_norm": 1.3991628324098457, + "learning_rate": 1.5434882934840144e-05, + "loss": 0.7654, + "step": 11015 + }, + { + "epoch": 0.3376241265171019, + "grad_norm": 1.5053156090645075, + "learning_rate": 1.5434049673055245e-05, + "loss": 0.8015, + "step": 11016 + }, + { + "epoch": 0.3376547750398431, + "grad_norm": 1.3592211214495482, + "learning_rate": 1.5433216357727262e-05, + "loss": 0.6987, + "step": 11017 + }, + { + "epoch": 0.3376854235625843, + "grad_norm": 1.3520615898054011, + "learning_rate": 1.5432382988864412e-05, + "loss": 0.6478, + "step": 11018 + }, + { + "epoch": 0.3377160720853255, + "grad_norm": 1.5732766109826775, + "learning_rate": 1.54315495664749e-05, + "loss": 0.807, + "step": 11019 + }, + { + "epoch": 0.3377467206080667, + "grad_norm": 1.314693827336805, + "learning_rate": 1.543071609056694e-05, + "loss": 0.7493, + "step": 11020 + }, + { + "epoch": 0.3377773691308079, + "grad_norm": 1.5079198499420206, + "learning_rate": 1.5429882561148747e-05, + "loss": 0.8257, + "step": 11021 + }, + { + "epoch": 0.3378080176535491, + "grad_norm": 0.8918723169504866, + "learning_rate": 1.5429048978228527e-05, + "loss": 0.6421, + "step": 11022 + }, + { + "epoch": 0.3378386661762903, + "grad_norm": 1.2321155275960238, + "learning_rate": 1.5428215341814505e-05, + "loss": 0.758, + "step": 11023 + }, + { + "epoch": 0.33786931469903153, + "grad_norm": 1.4016970454677236, + "learning_rate": 1.5427381651914885e-05, + "loss": 0.7281, + "step": 11024 + }, + { + "epoch": 0.33789996322177274, + "grad_norm": 1.5231584263848323, + "learning_rate": 1.5426547908537884e-05, + "loss": 0.8563, + "step": 11025 + }, + { + "epoch": 0.3379306117445139, + "grad_norm": 1.3626853511488788, + "learning_rate": 1.5425714111691718e-05, + "loss": 0.6658, + "step": 11026 + }, + { + "epoch": 0.3379612602672551, + "grad_norm": 1.4281569736787343, + "learning_rate": 1.5424880261384604e-05, + "loss": 0.7147, + "step": 11027 + }, + { + "epoch": 0.3379919087899963, + "grad_norm": 1.5345323066780674, + "learning_rate": 1.5424046357624757e-05, + "loss": 0.8547, + "step": 11028 + }, + { + "epoch": 0.3380225573127375, + "grad_norm": 1.5145764366475103, + "learning_rate": 1.542321240042039e-05, + "loss": 0.8104, + "step": 11029 + }, + { + "epoch": 0.3380532058354787, + "grad_norm": 1.34568175658146, + "learning_rate": 1.5422378389779727e-05, + "loss": 0.8509, + "step": 11030 + }, + { + "epoch": 0.3380838543582199, + "grad_norm": 1.4460815248758991, + "learning_rate": 1.5421544325710984e-05, + "loss": 0.7984, + "step": 11031 + }, + { + "epoch": 0.3381145028809611, + "grad_norm": 0.7754506035469094, + "learning_rate": 1.5420710208222373e-05, + "loss": 0.6291, + "step": 11032 + }, + { + "epoch": 0.3381451514037023, + "grad_norm": 1.3891301635488402, + "learning_rate": 1.541987603732212e-05, + "loss": 0.6878, + "step": 11033 + }, + { + "epoch": 0.33817579992644353, + "grad_norm": 1.5236798152916737, + "learning_rate": 1.541904181301844e-05, + "loss": 0.7354, + "step": 11034 + }, + { + "epoch": 0.33820644844918474, + "grad_norm": 0.6919266012163203, + "learning_rate": 1.5418207535319558e-05, + "loss": 0.6274, + "step": 11035 + }, + { + "epoch": 0.33823709697192594, + "grad_norm": 1.4186150409734941, + "learning_rate": 1.5417373204233686e-05, + "loss": 0.6469, + "step": 11036 + }, + { + "epoch": 0.33826774549466715, + "grad_norm": 1.3570825333798464, + "learning_rate": 1.5416538819769055e-05, + "loss": 0.7704, + "step": 11037 + }, + { + "epoch": 0.33829839401740835, + "grad_norm": 1.3075730887724677, + "learning_rate": 1.5415704381933874e-05, + "loss": 0.6738, + "step": 11038 + }, + { + "epoch": 0.33832904254014956, + "grad_norm": 1.4177090316040206, + "learning_rate": 1.541486989073638e-05, + "loss": 0.8448, + "step": 11039 + }, + { + "epoch": 0.33835969106289077, + "grad_norm": 1.3823983899907275, + "learning_rate": 1.5414035346184782e-05, + "loss": 0.7279, + "step": 11040 + }, + { + "epoch": 0.33839033958563197, + "grad_norm": 1.3627151879136623, + "learning_rate": 1.541320074828731e-05, + "loss": 0.7874, + "step": 11041 + }, + { + "epoch": 0.3384209881083732, + "grad_norm": 1.2875874530454507, + "learning_rate": 1.541236609705219e-05, + "loss": 0.7407, + "step": 11042 + }, + { + "epoch": 0.3384516366311144, + "grad_norm": 1.2987052476848917, + "learning_rate": 1.541153139248764e-05, + "loss": 0.6987, + "step": 11043 + }, + { + "epoch": 0.3384822851538556, + "grad_norm": 1.3573635125215873, + "learning_rate": 1.5410696634601885e-05, + "loss": 0.7879, + "step": 11044 + }, + { + "epoch": 0.3385129336765968, + "grad_norm": 1.4740645169946, + "learning_rate": 1.540986182340315e-05, + "loss": 0.7929, + "step": 11045 + }, + { + "epoch": 0.338543582199338, + "grad_norm": 1.30817439559024, + "learning_rate": 1.5409026958899662e-05, + "loss": 0.7253, + "step": 11046 + }, + { + "epoch": 0.3385742307220792, + "grad_norm": 1.4813593015705728, + "learning_rate": 1.540819204109965e-05, + "loss": 0.673, + "step": 11047 + }, + { + "epoch": 0.3386048792448204, + "grad_norm": 1.34579136990472, + "learning_rate": 1.540735707001134e-05, + "loss": 0.7966, + "step": 11048 + }, + { + "epoch": 0.3386355277675616, + "grad_norm": 1.3818388900826999, + "learning_rate": 1.5406522045642952e-05, + "loss": 0.7469, + "step": 11049 + }, + { + "epoch": 0.3386661762903028, + "grad_norm": 1.673022975488332, + "learning_rate": 1.5405686968002722e-05, + "loss": 0.8072, + "step": 11050 + }, + { + "epoch": 0.33869682481304403, + "grad_norm": 1.6446752675849527, + "learning_rate": 1.540485183709888e-05, + "loss": 0.7579, + "step": 11051 + }, + { + "epoch": 0.33872747333578523, + "grad_norm": 1.5019231288679278, + "learning_rate": 1.540401665293964e-05, + "loss": 0.7936, + "step": 11052 + }, + { + "epoch": 0.33875812185852644, + "grad_norm": 1.3650383870477478, + "learning_rate": 1.540318141553325e-05, + "loss": 0.7583, + "step": 11053 + }, + { + "epoch": 0.33878877038126765, + "grad_norm": 1.507564521795482, + "learning_rate": 1.5402346124887926e-05, + "loss": 0.8298, + "step": 11054 + }, + { + "epoch": 0.33881941890400885, + "grad_norm": 1.605917130649227, + "learning_rate": 1.5401510781011905e-05, + "loss": 0.8356, + "step": 11055 + }, + { + "epoch": 0.33885006742675006, + "grad_norm": 0.7984774840952752, + "learning_rate": 1.5400675383913416e-05, + "loss": 0.643, + "step": 11056 + }, + { + "epoch": 0.3388807159494912, + "grad_norm": 1.462172493745359, + "learning_rate": 1.5399839933600688e-05, + "loss": 0.7158, + "step": 11057 + }, + { + "epoch": 0.3389113644722324, + "grad_norm": 1.7101068663778678, + "learning_rate": 1.539900443008196e-05, + "loss": 0.8042, + "step": 11058 + }, + { + "epoch": 0.3389420129949736, + "grad_norm": 1.3360708080731005, + "learning_rate": 1.5398168873365457e-05, + "loss": 0.5952, + "step": 11059 + }, + { + "epoch": 0.3389726615177148, + "grad_norm": 1.6177407878892407, + "learning_rate": 1.5397333263459416e-05, + "loss": 0.7162, + "step": 11060 + }, + { + "epoch": 0.33900331004045603, + "grad_norm": 1.364167069918813, + "learning_rate": 1.539649760037207e-05, + "loss": 0.7332, + "step": 11061 + }, + { + "epoch": 0.33903395856319724, + "grad_norm": 1.3927390130408508, + "learning_rate": 1.539566188411165e-05, + "loss": 0.7827, + "step": 11062 + }, + { + "epoch": 0.33906460708593844, + "grad_norm": 0.6946919770585235, + "learning_rate": 1.5394826114686396e-05, + "loss": 0.6039, + "step": 11063 + }, + { + "epoch": 0.33909525560867965, + "grad_norm": 1.48087228544885, + "learning_rate": 1.5393990292104538e-05, + "loss": 0.8238, + "step": 11064 + }, + { + "epoch": 0.33912590413142085, + "grad_norm": 0.688437214280556, + "learning_rate": 1.5393154416374313e-05, + "loss": 0.6287, + "step": 11065 + }, + { + "epoch": 0.33915655265416206, + "grad_norm": 1.6930782423499502, + "learning_rate": 1.539231848750396e-05, + "loss": 0.7722, + "step": 11066 + }, + { + "epoch": 0.33918720117690326, + "grad_norm": 1.3264069597729184, + "learning_rate": 1.5391482505501715e-05, + "loss": 0.6718, + "step": 11067 + }, + { + "epoch": 0.33921784969964447, + "grad_norm": 1.5681940230089086, + "learning_rate": 1.5390646470375807e-05, + "loss": 0.7114, + "step": 11068 + }, + { + "epoch": 0.3392484982223857, + "grad_norm": 1.378884125348371, + "learning_rate": 1.5389810382134483e-05, + "loss": 0.785, + "step": 11069 + }, + { + "epoch": 0.3392791467451269, + "grad_norm": 1.6051573374436052, + "learning_rate": 1.538897424078598e-05, + "loss": 0.7564, + "step": 11070 + }, + { + "epoch": 0.3393097952678681, + "grad_norm": 0.7403960080082378, + "learning_rate": 1.5388138046338533e-05, + "loss": 0.6358, + "step": 11071 + }, + { + "epoch": 0.3393404437906093, + "grad_norm": 1.451607835449712, + "learning_rate": 1.538730179880038e-05, + "loss": 0.8666, + "step": 11072 + }, + { + "epoch": 0.3393710923133505, + "grad_norm": 1.4478386113902049, + "learning_rate": 1.5386465498179772e-05, + "loss": 0.751, + "step": 11073 + }, + { + "epoch": 0.3394017408360917, + "grad_norm": 1.548844386047643, + "learning_rate": 1.538562914448494e-05, + "loss": 0.8435, + "step": 11074 + }, + { + "epoch": 0.3394323893588329, + "grad_norm": 1.3141062797980543, + "learning_rate": 1.538479273772412e-05, + "loss": 0.7697, + "step": 11075 + }, + { + "epoch": 0.3394630378815741, + "grad_norm": 1.4651496643470991, + "learning_rate": 1.5383956277905564e-05, + "loss": 0.8135, + "step": 11076 + }, + { + "epoch": 0.3394936864043153, + "grad_norm": 1.3701764869743802, + "learning_rate": 1.5383119765037506e-05, + "loss": 0.7814, + "step": 11077 + }, + { + "epoch": 0.3395243349270565, + "grad_norm": 1.3243074418538041, + "learning_rate": 1.5382283199128197e-05, + "loss": 0.7478, + "step": 11078 + }, + { + "epoch": 0.33955498344979773, + "grad_norm": 1.2563038773520747, + "learning_rate": 1.5381446580185867e-05, + "loss": 0.7605, + "step": 11079 + }, + { + "epoch": 0.33958563197253894, + "grad_norm": 1.3251585986728207, + "learning_rate": 1.5380609908218773e-05, + "loss": 0.7437, + "step": 11080 + }, + { + "epoch": 0.33961628049528014, + "grad_norm": 1.5508594774053335, + "learning_rate": 1.537977318323515e-05, + "loss": 0.8021, + "step": 11081 + }, + { + "epoch": 0.33964692901802135, + "grad_norm": 1.3258366546873566, + "learning_rate": 1.5378936405243247e-05, + "loss": 0.705, + "step": 11082 + }, + { + "epoch": 0.33967757754076255, + "grad_norm": 1.2916151008169756, + "learning_rate": 1.5378099574251308e-05, + "loss": 0.7156, + "step": 11083 + }, + { + "epoch": 0.33970822606350376, + "grad_norm": 1.364567136813371, + "learning_rate": 1.5377262690267574e-05, + "loss": 0.7414, + "step": 11084 + }, + { + "epoch": 0.33973887458624497, + "grad_norm": 1.4545209636110654, + "learning_rate": 1.5376425753300297e-05, + "loss": 0.7516, + "step": 11085 + }, + { + "epoch": 0.33976952310898617, + "grad_norm": 1.4571489234916293, + "learning_rate": 1.5375588763357723e-05, + "loss": 0.6558, + "step": 11086 + }, + { + "epoch": 0.3398001716317274, + "grad_norm": 1.4875513107544291, + "learning_rate": 1.5374751720448095e-05, + "loss": 0.7669, + "step": 11087 + }, + { + "epoch": 0.3398308201544685, + "grad_norm": 1.4270684022919997, + "learning_rate": 1.5373914624579666e-05, + "loss": 0.7958, + "step": 11088 + }, + { + "epoch": 0.33986146867720973, + "grad_norm": 1.6635677103022097, + "learning_rate": 1.5373077475760677e-05, + "loss": 0.7512, + "step": 11089 + }, + { + "epoch": 0.33989211719995094, + "grad_norm": 1.3633363846537274, + "learning_rate": 1.5372240273999384e-05, + "loss": 0.7592, + "step": 11090 + }, + { + "epoch": 0.33992276572269214, + "grad_norm": 1.3876830248725462, + "learning_rate": 1.5371403019304035e-05, + "loss": 0.7395, + "step": 11091 + }, + { + "epoch": 0.33995341424543335, + "grad_norm": 1.5212523853912765, + "learning_rate": 1.5370565711682875e-05, + "loss": 0.8424, + "step": 11092 + }, + { + "epoch": 0.33998406276817456, + "grad_norm": 1.3629588122925878, + "learning_rate": 1.5369728351144155e-05, + "loss": 0.7287, + "step": 11093 + }, + { + "epoch": 0.34001471129091576, + "grad_norm": 1.4745696646398092, + "learning_rate": 1.536889093769613e-05, + "loss": 0.7634, + "step": 11094 + }, + { + "epoch": 0.34004535981365697, + "grad_norm": 1.4435518054062568, + "learning_rate": 1.536805347134705e-05, + "loss": 0.658, + "step": 11095 + }, + { + "epoch": 0.3400760083363982, + "grad_norm": 1.3891109917953435, + "learning_rate": 1.536721595210516e-05, + "loss": 0.7774, + "step": 11096 + }, + { + "epoch": 0.3401066568591394, + "grad_norm": 1.3425570774273896, + "learning_rate": 1.536637837997873e-05, + "loss": 0.7362, + "step": 11097 + }, + { + "epoch": 0.3401373053818806, + "grad_norm": 1.5806323161611213, + "learning_rate": 1.536554075497599e-05, + "loss": 0.8928, + "step": 11098 + }, + { + "epoch": 0.3401679539046218, + "grad_norm": 1.2417872473009397, + "learning_rate": 1.5364703077105206e-05, + "loss": 0.7135, + "step": 11099 + }, + { + "epoch": 0.340198602427363, + "grad_norm": 1.4769506276805284, + "learning_rate": 1.536386534637463e-05, + "loss": 0.8005, + "step": 11100 + }, + { + "epoch": 0.3402292509501042, + "grad_norm": 1.4069615658285497, + "learning_rate": 1.536302756279252e-05, + "loss": 0.7671, + "step": 11101 + }, + { + "epoch": 0.3402598994728454, + "grad_norm": 1.4184326048985791, + "learning_rate": 1.5362189726367124e-05, + "loss": 0.7534, + "step": 11102 + }, + { + "epoch": 0.3402905479955866, + "grad_norm": 0.7176295753037244, + "learning_rate": 1.53613518371067e-05, + "loss": 0.6596, + "step": 11103 + }, + { + "epoch": 0.3403211965183278, + "grad_norm": 1.5077800265425827, + "learning_rate": 1.5360513895019507e-05, + "loss": 0.8073, + "step": 11104 + }, + { + "epoch": 0.340351845041069, + "grad_norm": 1.5842660135710058, + "learning_rate": 1.5359675900113798e-05, + "loss": 0.9187, + "step": 11105 + }, + { + "epoch": 0.34038249356381023, + "grad_norm": 1.3566905942863134, + "learning_rate": 1.535883785239783e-05, + "loss": 0.7363, + "step": 11106 + }, + { + "epoch": 0.34041314208655143, + "grad_norm": 1.5577247202556663, + "learning_rate": 1.5357999751879863e-05, + "loss": 0.817, + "step": 11107 + }, + { + "epoch": 0.34044379060929264, + "grad_norm": 1.51944464125601, + "learning_rate": 1.5357161598568154e-05, + "loss": 0.7185, + "step": 11108 + }, + { + "epoch": 0.34047443913203385, + "grad_norm": 1.30500924922648, + "learning_rate": 1.535632339247096e-05, + "loss": 0.6842, + "step": 11109 + }, + { + "epoch": 0.34050508765477505, + "grad_norm": 0.7151446727616105, + "learning_rate": 1.535548513359654e-05, + "loss": 0.6141, + "step": 11110 + }, + { + "epoch": 0.34053573617751626, + "grad_norm": 1.282839694972756, + "learning_rate": 1.5354646821953155e-05, + "loss": 0.7215, + "step": 11111 + }, + { + "epoch": 0.34056638470025746, + "grad_norm": 1.457858626089621, + "learning_rate": 1.5353808457549065e-05, + "loss": 0.7205, + "step": 11112 + }, + { + "epoch": 0.34059703322299867, + "grad_norm": 1.4363995074414566, + "learning_rate": 1.5352970040392533e-05, + "loss": 0.8791, + "step": 11113 + }, + { + "epoch": 0.3406276817457399, + "grad_norm": 1.3568468585693345, + "learning_rate": 1.5352131570491818e-05, + "loss": 0.8391, + "step": 11114 + }, + { + "epoch": 0.3406583302684811, + "grad_norm": 1.5850578084083211, + "learning_rate": 1.5351293047855177e-05, + "loss": 0.7907, + "step": 11115 + }, + { + "epoch": 0.3406889787912223, + "grad_norm": 1.3271399151261576, + "learning_rate": 1.535045447249088e-05, + "loss": 0.726, + "step": 11116 + }, + { + "epoch": 0.3407196273139635, + "grad_norm": 1.4474631353859657, + "learning_rate": 1.5349615844407186e-05, + "loss": 0.805, + "step": 11117 + }, + { + "epoch": 0.3407502758367047, + "grad_norm": 1.5633391730451522, + "learning_rate": 1.5348777163612357e-05, + "loss": 0.6879, + "step": 11118 + }, + { + "epoch": 0.34078092435944585, + "grad_norm": 1.4187675676695326, + "learning_rate": 1.5347938430114657e-05, + "loss": 0.6772, + "step": 11119 + }, + { + "epoch": 0.34081157288218705, + "grad_norm": 1.2992007675712032, + "learning_rate": 1.5347099643922352e-05, + "loss": 0.7317, + "step": 11120 + }, + { + "epoch": 0.34084222140492826, + "grad_norm": 1.5925035036935227, + "learning_rate": 1.5346260805043708e-05, + "loss": 0.7966, + "step": 11121 + }, + { + "epoch": 0.34087286992766946, + "grad_norm": 1.41093730520972, + "learning_rate": 1.5345421913486983e-05, + "loss": 0.7103, + "step": 11122 + }, + { + "epoch": 0.34090351845041067, + "grad_norm": 0.7166184468259785, + "learning_rate": 1.534458296926045e-05, + "loss": 0.6445, + "step": 11123 + }, + { + "epoch": 0.3409341669731519, + "grad_norm": 1.4505627625348558, + "learning_rate": 1.534374397237238e-05, + "loss": 0.7667, + "step": 11124 + }, + { + "epoch": 0.3409648154958931, + "grad_norm": 1.7254735150264593, + "learning_rate": 1.5342904922831028e-05, + "loss": 0.7742, + "step": 11125 + }, + { + "epoch": 0.3409954640186343, + "grad_norm": 1.3761469778848014, + "learning_rate": 1.5342065820644667e-05, + "loss": 0.754, + "step": 11126 + }, + { + "epoch": 0.3410261125413755, + "grad_norm": 1.7151332340782595, + "learning_rate": 1.5341226665821567e-05, + "loss": 1.0107, + "step": 11127 + }, + { + "epoch": 0.3410567610641167, + "grad_norm": 1.3913921973798342, + "learning_rate": 1.5340387458369993e-05, + "loss": 0.8295, + "step": 11128 + }, + { + "epoch": 0.3410874095868579, + "grad_norm": 1.5232172923341214, + "learning_rate": 1.5339548198298215e-05, + "loss": 0.7769, + "step": 11129 + }, + { + "epoch": 0.3411180581095991, + "grad_norm": 0.7136022225542852, + "learning_rate": 1.53387088856145e-05, + "loss": 0.6079, + "step": 11130 + }, + { + "epoch": 0.3411487066323403, + "grad_norm": 1.3055774857294387, + "learning_rate": 1.533786952032712e-05, + "loss": 0.6825, + "step": 11131 + }, + { + "epoch": 0.3411793551550815, + "grad_norm": 1.3696325802096816, + "learning_rate": 1.533703010244435e-05, + "loss": 0.6671, + "step": 11132 + }, + { + "epoch": 0.3412100036778227, + "grad_norm": 1.577763649691198, + "learning_rate": 1.5336190631974453e-05, + "loss": 0.8688, + "step": 11133 + }, + { + "epoch": 0.34124065220056393, + "grad_norm": 1.6645466874537613, + "learning_rate": 1.5335351108925708e-05, + "loss": 0.765, + "step": 11134 + }, + { + "epoch": 0.34127130072330514, + "grad_norm": 1.3101758985004457, + "learning_rate": 1.533451153330638e-05, + "loss": 0.766, + "step": 11135 + }, + { + "epoch": 0.34130194924604634, + "grad_norm": 1.2633040184193562, + "learning_rate": 1.533367190512475e-05, + "loss": 0.5775, + "step": 11136 + }, + { + "epoch": 0.34133259776878755, + "grad_norm": 1.521732226155102, + "learning_rate": 1.533283222438908e-05, + "loss": 0.7641, + "step": 11137 + }, + { + "epoch": 0.34136324629152875, + "grad_norm": 1.493364199025502, + "learning_rate": 1.5331992491107653e-05, + "loss": 0.7886, + "step": 11138 + }, + { + "epoch": 0.34139389481426996, + "grad_norm": 0.7377442024745722, + "learning_rate": 1.5331152705288738e-05, + "loss": 0.6206, + "step": 11139 + }, + { + "epoch": 0.34142454333701117, + "grad_norm": 1.5932195496516186, + "learning_rate": 1.5330312866940614e-05, + "loss": 0.753, + "step": 11140 + }, + { + "epoch": 0.34145519185975237, + "grad_norm": 1.361947235593736, + "learning_rate": 1.5329472976071552e-05, + "loss": 0.6792, + "step": 11141 + }, + { + "epoch": 0.3414858403824936, + "grad_norm": 0.6776348010322537, + "learning_rate": 1.532863303268983e-05, + "loss": 0.588, + "step": 11142 + }, + { + "epoch": 0.3415164889052348, + "grad_norm": 1.536629638149738, + "learning_rate": 1.532779303680372e-05, + "loss": 0.6802, + "step": 11143 + }, + { + "epoch": 0.341547137427976, + "grad_norm": 1.4877999652433063, + "learning_rate": 1.5326952988421506e-05, + "loss": 0.7417, + "step": 11144 + }, + { + "epoch": 0.3415777859507172, + "grad_norm": 1.3907290886402734, + "learning_rate": 1.5326112887551458e-05, + "loss": 0.6998, + "step": 11145 + }, + { + "epoch": 0.3416084344734584, + "grad_norm": 1.4517665318224515, + "learning_rate": 1.532527273420186e-05, + "loss": 0.6671, + "step": 11146 + }, + { + "epoch": 0.3416390829961996, + "grad_norm": 1.405666936689685, + "learning_rate": 1.5324432528380988e-05, + "loss": 0.6745, + "step": 11147 + }, + { + "epoch": 0.3416697315189408, + "grad_norm": 1.4597121505529358, + "learning_rate": 1.5323592270097118e-05, + "loss": 0.766, + "step": 11148 + }, + { + "epoch": 0.341700380041682, + "grad_norm": 0.7547667333254805, + "learning_rate": 1.532275195935853e-05, + "loss": 0.6023, + "step": 11149 + }, + { + "epoch": 0.34173102856442317, + "grad_norm": 1.7028306337017831, + "learning_rate": 1.5321911596173508e-05, + "loss": 0.8272, + "step": 11150 + }, + { + "epoch": 0.3417616770871644, + "grad_norm": 1.4664937746033038, + "learning_rate": 1.5321071180550326e-05, + "loss": 0.7194, + "step": 11151 + }, + { + "epoch": 0.3417923256099056, + "grad_norm": 1.5703928215791851, + "learning_rate": 1.532023071249727e-05, + "loss": 0.739, + "step": 11152 + }, + { + "epoch": 0.3418229741326468, + "grad_norm": 1.5108325354217484, + "learning_rate": 1.5319390192022617e-05, + "loss": 0.6575, + "step": 11153 + }, + { + "epoch": 0.341853622655388, + "grad_norm": 1.5398537997583743, + "learning_rate": 1.5318549619134653e-05, + "loss": 0.7981, + "step": 11154 + }, + { + "epoch": 0.3418842711781292, + "grad_norm": 1.4343726193458814, + "learning_rate": 1.5317708993841663e-05, + "loss": 0.83, + "step": 11155 + }, + { + "epoch": 0.3419149197008704, + "grad_norm": 1.6534530538580994, + "learning_rate": 1.5316868316151922e-05, + "loss": 0.7465, + "step": 11156 + }, + { + "epoch": 0.3419455682236116, + "grad_norm": 1.536585361357265, + "learning_rate": 1.5316027586073715e-05, + "loss": 0.7523, + "step": 11157 + }, + { + "epoch": 0.3419762167463528, + "grad_norm": 1.4949249136213636, + "learning_rate": 1.5315186803615333e-05, + "loss": 0.7666, + "step": 11158 + }, + { + "epoch": 0.342006865269094, + "grad_norm": 1.6118637489948777, + "learning_rate": 1.5314345968785053e-05, + "loss": 0.8326, + "step": 11159 + }, + { + "epoch": 0.3420375137918352, + "grad_norm": 1.5212639372815995, + "learning_rate": 1.531350508159116e-05, + "loss": 0.6737, + "step": 11160 + }, + { + "epoch": 0.34206816231457643, + "grad_norm": 1.416644310509395, + "learning_rate": 1.5312664142041945e-05, + "loss": 0.6962, + "step": 11161 + }, + { + "epoch": 0.34209881083731764, + "grad_norm": 1.4415858328133984, + "learning_rate": 1.531182315014569e-05, + "loss": 0.6733, + "step": 11162 + }, + { + "epoch": 0.34212945936005884, + "grad_norm": 1.3307393346701342, + "learning_rate": 1.5310982105910683e-05, + "loss": 0.7406, + "step": 11163 + }, + { + "epoch": 0.34216010788280005, + "grad_norm": 1.5226805714572407, + "learning_rate": 1.531014100934521e-05, + "loss": 0.7469, + "step": 11164 + }, + { + "epoch": 0.34219075640554125, + "grad_norm": 1.4020835645966954, + "learning_rate": 1.530929986045756e-05, + "loss": 0.8441, + "step": 11165 + }, + { + "epoch": 0.34222140492828246, + "grad_norm": 1.6053571736897274, + "learning_rate": 1.5308458659256015e-05, + "loss": 0.7397, + "step": 11166 + }, + { + "epoch": 0.34225205345102366, + "grad_norm": 1.658744283246832, + "learning_rate": 1.5307617405748872e-05, + "loss": 0.8038, + "step": 11167 + }, + { + "epoch": 0.34228270197376487, + "grad_norm": 1.5192090481225795, + "learning_rate": 1.530677609994442e-05, + "loss": 0.8634, + "step": 11168 + }, + { + "epoch": 0.3423133504965061, + "grad_norm": 1.4608537890122595, + "learning_rate": 1.5305934741850942e-05, + "loss": 0.7734, + "step": 11169 + }, + { + "epoch": 0.3423439990192473, + "grad_norm": 1.3307222062693909, + "learning_rate": 1.5305093331476736e-05, + "loss": 0.4877, + "step": 11170 + }, + { + "epoch": 0.3423746475419885, + "grad_norm": 1.6524640213861554, + "learning_rate": 1.530425186883008e-05, + "loss": 0.7926, + "step": 11171 + }, + { + "epoch": 0.3424052960647297, + "grad_norm": 0.7253526479645275, + "learning_rate": 1.5303410353919277e-05, + "loss": 0.6122, + "step": 11172 + }, + { + "epoch": 0.3424359445874709, + "grad_norm": 1.4090692582260131, + "learning_rate": 1.5302568786752615e-05, + "loss": 0.7433, + "step": 11173 + }, + { + "epoch": 0.3424665931102121, + "grad_norm": 1.3207231559091923, + "learning_rate": 1.5301727167338386e-05, + "loss": 0.6913, + "step": 11174 + }, + { + "epoch": 0.3424972416329533, + "grad_norm": 1.6061110100842468, + "learning_rate": 1.5300885495684884e-05, + "loss": 0.7342, + "step": 11175 + }, + { + "epoch": 0.3425278901556945, + "grad_norm": 1.4821753978695713, + "learning_rate": 1.53000437718004e-05, + "loss": 0.7408, + "step": 11176 + }, + { + "epoch": 0.3425585386784357, + "grad_norm": 1.529978056048442, + "learning_rate": 1.5299201995693227e-05, + "loss": 0.8518, + "step": 11177 + }, + { + "epoch": 0.3425891872011769, + "grad_norm": 1.2941007485117528, + "learning_rate": 1.5298360167371664e-05, + "loss": 0.7282, + "step": 11178 + }, + { + "epoch": 0.34261983572391813, + "grad_norm": 0.6795042435321985, + "learning_rate": 1.5297518286844e-05, + "loss": 0.6258, + "step": 11179 + }, + { + "epoch": 0.34265048424665934, + "grad_norm": 1.4754870539902718, + "learning_rate": 1.5296676354118532e-05, + "loss": 0.7933, + "step": 11180 + }, + { + "epoch": 0.3426811327694005, + "grad_norm": 1.346267340806814, + "learning_rate": 1.529583436920356e-05, + "loss": 0.8468, + "step": 11181 + }, + { + "epoch": 0.3427117812921417, + "grad_norm": 1.2612556318844232, + "learning_rate": 1.5294992332107375e-05, + "loss": 0.6751, + "step": 11182 + }, + { + "epoch": 0.3427424298148829, + "grad_norm": 1.393379899320109, + "learning_rate": 1.5294150242838278e-05, + "loss": 0.8163, + "step": 11183 + }, + { + "epoch": 0.3427730783376241, + "grad_norm": 1.629782791123796, + "learning_rate": 1.5293308101404562e-05, + "loss": 0.7819, + "step": 11184 + }, + { + "epoch": 0.3428037268603653, + "grad_norm": 1.47638381452687, + "learning_rate": 1.5292465907814524e-05, + "loss": 0.7668, + "step": 11185 + }, + { + "epoch": 0.3428343753831065, + "grad_norm": 1.5672630234564058, + "learning_rate": 1.529162366207647e-05, + "loss": 0.7573, + "step": 11186 + }, + { + "epoch": 0.3428650239058477, + "grad_norm": 1.4440184207685502, + "learning_rate": 1.5290781364198693e-05, + "loss": 0.7686, + "step": 11187 + }, + { + "epoch": 0.3428956724285889, + "grad_norm": 1.441252562329797, + "learning_rate": 1.5289939014189493e-05, + "loss": 0.774, + "step": 11188 + }, + { + "epoch": 0.34292632095133013, + "grad_norm": 1.5698573857900875, + "learning_rate": 1.528909661205717e-05, + "loss": 0.7546, + "step": 11189 + }, + { + "epoch": 0.34295696947407134, + "grad_norm": 0.6934163377248624, + "learning_rate": 1.5288254157810026e-05, + "loss": 0.5883, + "step": 11190 + }, + { + "epoch": 0.34298761799681254, + "grad_norm": 1.5374775457960845, + "learning_rate": 1.5287411651456355e-05, + "loss": 0.8075, + "step": 11191 + }, + { + "epoch": 0.34301826651955375, + "grad_norm": 1.461064443312345, + "learning_rate": 1.5286569093004474e-05, + "loss": 0.6824, + "step": 11192 + }, + { + "epoch": 0.34304891504229496, + "grad_norm": 1.4262262991700223, + "learning_rate": 1.5285726482462665e-05, + "loss": 0.6933, + "step": 11193 + }, + { + "epoch": 0.34307956356503616, + "grad_norm": 1.6695132925558984, + "learning_rate": 1.528488381983925e-05, + "loss": 0.6852, + "step": 11194 + }, + { + "epoch": 0.34311021208777737, + "grad_norm": 1.4859880690978053, + "learning_rate": 1.528404110514252e-05, + "loss": 0.7474, + "step": 11195 + }, + { + "epoch": 0.34314086061051857, + "grad_norm": 1.4602495552194836, + "learning_rate": 1.5283198338380776e-05, + "loss": 0.782, + "step": 11196 + }, + { + "epoch": 0.3431715091332598, + "grad_norm": 0.6838438552234833, + "learning_rate": 1.5282355519562334e-05, + "loss": 0.564, + "step": 11197 + }, + { + "epoch": 0.343202157656001, + "grad_norm": 1.3804363428247122, + "learning_rate": 1.5281512648695485e-05, + "loss": 0.7841, + "step": 11198 + }, + { + "epoch": 0.3432328061787422, + "grad_norm": 1.6554796315313764, + "learning_rate": 1.5280669725788546e-05, + "loss": 0.8475, + "step": 11199 + }, + { + "epoch": 0.3432634547014834, + "grad_norm": 1.724220007054753, + "learning_rate": 1.5279826750849812e-05, + "loss": 0.7823, + "step": 11200 + }, + { + "epoch": 0.3432941032242246, + "grad_norm": 1.4569854348660973, + "learning_rate": 1.5278983723887598e-05, + "loss": 0.7309, + "step": 11201 + }, + { + "epoch": 0.3433247517469658, + "grad_norm": 1.4834451144613536, + "learning_rate": 1.5278140644910203e-05, + "loss": 0.763, + "step": 11202 + }, + { + "epoch": 0.343355400269707, + "grad_norm": 1.5216705757235027, + "learning_rate": 1.527729751392594e-05, + "loss": 0.7288, + "step": 11203 + }, + { + "epoch": 0.3433860487924482, + "grad_norm": 1.4639020561239235, + "learning_rate": 1.5276454330943117e-05, + "loss": 0.8078, + "step": 11204 + }, + { + "epoch": 0.3434166973151894, + "grad_norm": 0.7498551317617985, + "learning_rate": 1.5275611095970036e-05, + "loss": 0.6379, + "step": 11205 + }, + { + "epoch": 0.34344734583793063, + "grad_norm": 1.3224862977477185, + "learning_rate": 1.527476780901501e-05, + "loss": 0.6777, + "step": 11206 + }, + { + "epoch": 0.34347799436067183, + "grad_norm": 0.7068794309418022, + "learning_rate": 1.5273924470086347e-05, + "loss": 0.6306, + "step": 11207 + }, + { + "epoch": 0.34350864288341304, + "grad_norm": 1.5947436149626202, + "learning_rate": 1.5273081079192355e-05, + "loss": 0.753, + "step": 11208 + }, + { + "epoch": 0.34353929140615425, + "grad_norm": 1.360936851843596, + "learning_rate": 1.527223763634135e-05, + "loss": 0.7604, + "step": 11209 + }, + { + "epoch": 0.34356993992889545, + "grad_norm": 1.487562587881954, + "learning_rate": 1.5271394141541636e-05, + "loss": 0.7484, + "step": 11210 + }, + { + "epoch": 0.34360058845163666, + "grad_norm": 1.5783003075831599, + "learning_rate": 1.5270550594801527e-05, + "loss": 0.7428, + "step": 11211 + }, + { + "epoch": 0.3436312369743778, + "grad_norm": 1.424547819192662, + "learning_rate": 1.5269706996129334e-05, + "loss": 0.8592, + "step": 11212 + }, + { + "epoch": 0.343661885497119, + "grad_norm": 1.3916774535282712, + "learning_rate": 1.526886334553337e-05, + "loss": 0.7695, + "step": 11213 + }, + { + "epoch": 0.3436925340198602, + "grad_norm": 1.4360328599069583, + "learning_rate": 1.5268019643021947e-05, + "loss": 0.7679, + "step": 11214 + }, + { + "epoch": 0.3437231825426014, + "grad_norm": 1.4564831067949748, + "learning_rate": 1.526717588860338e-05, + "loss": 0.7608, + "step": 11215 + }, + { + "epoch": 0.34375383106534263, + "grad_norm": 1.4447237793449712, + "learning_rate": 1.526633208228598e-05, + "loss": 0.7905, + "step": 11216 + }, + { + "epoch": 0.34378447958808384, + "grad_norm": 1.3438078917104999, + "learning_rate": 1.5265488224078065e-05, + "loss": 0.6786, + "step": 11217 + }, + { + "epoch": 0.34381512811082504, + "grad_norm": 1.7726451346642154, + "learning_rate": 1.5264644313987944e-05, + "loss": 0.7937, + "step": 11218 + }, + { + "epoch": 0.34384577663356625, + "grad_norm": 1.482851945206269, + "learning_rate": 1.5263800352023936e-05, + "loss": 0.7937, + "step": 11219 + }, + { + "epoch": 0.34387642515630745, + "grad_norm": 1.543591701137843, + "learning_rate": 1.526295633819436e-05, + "loss": 0.8332, + "step": 11220 + }, + { + "epoch": 0.34390707367904866, + "grad_norm": 1.4886231306723785, + "learning_rate": 1.5262112272507525e-05, + "loss": 0.816, + "step": 11221 + }, + { + "epoch": 0.34393772220178986, + "grad_norm": 1.4488458365552555, + "learning_rate": 1.526126815497175e-05, + "loss": 0.7887, + "step": 11222 + }, + { + "epoch": 0.34396837072453107, + "grad_norm": 1.6261423173508882, + "learning_rate": 1.5260423985595357e-05, + "loss": 0.827, + "step": 11223 + }, + { + "epoch": 0.3439990192472723, + "grad_norm": 1.4596956946739703, + "learning_rate": 1.525957976438666e-05, + "loss": 0.8538, + "step": 11224 + }, + { + "epoch": 0.3440296677700135, + "grad_norm": 1.6316784663629353, + "learning_rate": 1.5258735491353978e-05, + "loss": 0.7557, + "step": 11225 + }, + { + "epoch": 0.3440603162927547, + "grad_norm": 1.3879174240302794, + "learning_rate": 1.5257891166505627e-05, + "loss": 0.6835, + "step": 11226 + }, + { + "epoch": 0.3440909648154959, + "grad_norm": 1.580679596891659, + "learning_rate": 1.5257046789849931e-05, + "loss": 0.8348, + "step": 11227 + }, + { + "epoch": 0.3441216133382371, + "grad_norm": 1.4471195101168863, + "learning_rate": 1.5256202361395211e-05, + "loss": 0.8564, + "step": 11228 + }, + { + "epoch": 0.3441522618609783, + "grad_norm": 1.4354248991890606, + "learning_rate": 1.525535788114978e-05, + "loss": 0.7065, + "step": 11229 + }, + { + "epoch": 0.3441829103837195, + "grad_norm": 1.348648887589584, + "learning_rate": 1.5254513349121966e-05, + "loss": 0.7431, + "step": 11230 + }, + { + "epoch": 0.3442135589064607, + "grad_norm": 1.4851487860994854, + "learning_rate": 1.5253668765320084e-05, + "loss": 0.7178, + "step": 11231 + }, + { + "epoch": 0.3442442074292019, + "grad_norm": 1.3955791136200275, + "learning_rate": 1.5252824129752462e-05, + "loss": 0.6945, + "step": 11232 + }, + { + "epoch": 0.3442748559519431, + "grad_norm": 1.5147380267243227, + "learning_rate": 1.5251979442427417e-05, + "loss": 0.6867, + "step": 11233 + }, + { + "epoch": 0.34430550447468433, + "grad_norm": 1.455286345471311, + "learning_rate": 1.525113470335328e-05, + "loss": 0.8051, + "step": 11234 + }, + { + "epoch": 0.34433615299742554, + "grad_norm": 1.5344353575775764, + "learning_rate": 1.5250289912538366e-05, + "loss": 0.7503, + "step": 11235 + }, + { + "epoch": 0.34436680152016674, + "grad_norm": 0.8543670197779136, + "learning_rate": 1.5249445069991003e-05, + "loss": 0.6483, + "step": 11236 + }, + { + "epoch": 0.34439745004290795, + "grad_norm": 1.476761813437776, + "learning_rate": 1.5248600175719514e-05, + "loss": 0.8122, + "step": 11237 + }, + { + "epoch": 0.34442809856564915, + "grad_norm": 1.542880008545565, + "learning_rate": 1.5247755229732222e-05, + "loss": 0.781, + "step": 11238 + }, + { + "epoch": 0.34445874708839036, + "grad_norm": 1.5621663439360145, + "learning_rate": 1.524691023203746e-05, + "loss": 0.7851, + "step": 11239 + }, + { + "epoch": 0.34448939561113157, + "grad_norm": 1.5707307238152417, + "learning_rate": 1.5246065182643547e-05, + "loss": 0.79, + "step": 11240 + }, + { + "epoch": 0.34452004413387277, + "grad_norm": 1.5811606690220923, + "learning_rate": 1.5245220081558811e-05, + "loss": 0.8155, + "step": 11241 + }, + { + "epoch": 0.344550692656614, + "grad_norm": 1.4855235609662745, + "learning_rate": 1.524437492879158e-05, + "loss": 0.7895, + "step": 11242 + }, + { + "epoch": 0.3445813411793552, + "grad_norm": 1.5087310496989603, + "learning_rate": 1.524352972435018e-05, + "loss": 0.7468, + "step": 11243 + }, + { + "epoch": 0.34461198970209633, + "grad_norm": 1.2334077992023955, + "learning_rate": 1.5242684468242939e-05, + "loss": 0.7087, + "step": 11244 + }, + { + "epoch": 0.34464263822483754, + "grad_norm": 1.6478569587999268, + "learning_rate": 1.5241839160478188e-05, + "loss": 0.7111, + "step": 11245 + }, + { + "epoch": 0.34467328674757874, + "grad_norm": 1.5347966254115875, + "learning_rate": 1.5240993801064257e-05, + "loss": 0.7862, + "step": 11246 + }, + { + "epoch": 0.34470393527031995, + "grad_norm": 1.611574945966937, + "learning_rate": 1.5240148390009468e-05, + "loss": 0.7832, + "step": 11247 + }, + { + "epoch": 0.34473458379306116, + "grad_norm": 1.7736161263080563, + "learning_rate": 1.5239302927322162e-05, + "loss": 0.8048, + "step": 11248 + }, + { + "epoch": 0.34476523231580236, + "grad_norm": 1.4462737022127166, + "learning_rate": 1.5238457413010659e-05, + "loss": 0.7011, + "step": 11249 + }, + { + "epoch": 0.34479588083854357, + "grad_norm": 0.7967295125296067, + "learning_rate": 1.5237611847083296e-05, + "loss": 0.6314, + "step": 11250 + }, + { + "epoch": 0.3448265293612848, + "grad_norm": 1.5186330137983242, + "learning_rate": 1.5236766229548405e-05, + "loss": 0.7707, + "step": 11251 + }, + { + "epoch": 0.344857177884026, + "grad_norm": 1.6737952720085263, + "learning_rate": 1.5235920560414315e-05, + "loss": 0.8104, + "step": 11252 + }, + { + "epoch": 0.3448878264067672, + "grad_norm": 1.5700511259174244, + "learning_rate": 1.5235074839689361e-05, + "loss": 0.7947, + "step": 11253 + }, + { + "epoch": 0.3449184749295084, + "grad_norm": 1.5976889159948398, + "learning_rate": 1.5234229067381874e-05, + "loss": 0.7336, + "step": 11254 + }, + { + "epoch": 0.3449491234522496, + "grad_norm": 0.7194147708624296, + "learning_rate": 1.5233383243500189e-05, + "loss": 0.6286, + "step": 11255 + }, + { + "epoch": 0.3449797719749908, + "grad_norm": 1.344282684927599, + "learning_rate": 1.5232537368052641e-05, + "loss": 0.7288, + "step": 11256 + }, + { + "epoch": 0.345010420497732, + "grad_norm": 1.6187433174855097, + "learning_rate": 1.5231691441047561e-05, + "loss": 0.8306, + "step": 11257 + }, + { + "epoch": 0.3450410690204732, + "grad_norm": 1.7955868908415604, + "learning_rate": 1.5230845462493289e-05, + "loss": 0.8831, + "step": 11258 + }, + { + "epoch": 0.3450717175432144, + "grad_norm": 1.3857940573535494, + "learning_rate": 1.522999943239816e-05, + "loss": 0.6494, + "step": 11259 + }, + { + "epoch": 0.3451023660659556, + "grad_norm": 1.5013995030978324, + "learning_rate": 1.5229153350770505e-05, + "loss": 0.7597, + "step": 11260 + }, + { + "epoch": 0.34513301458869683, + "grad_norm": 1.548848091186856, + "learning_rate": 1.5228307217618663e-05, + "loss": 0.7976, + "step": 11261 + }, + { + "epoch": 0.34516366311143803, + "grad_norm": 1.4933962996399974, + "learning_rate": 1.5227461032950974e-05, + "loss": 0.6746, + "step": 11262 + }, + { + "epoch": 0.34519431163417924, + "grad_norm": 1.3438100895380953, + "learning_rate": 1.5226614796775776e-05, + "loss": 0.7012, + "step": 11263 + }, + { + "epoch": 0.34522496015692045, + "grad_norm": 1.2814534023695243, + "learning_rate": 1.5225768509101403e-05, + "loss": 0.7102, + "step": 11264 + }, + { + "epoch": 0.34525560867966165, + "grad_norm": 1.6377516674726573, + "learning_rate": 1.5224922169936198e-05, + "loss": 0.8429, + "step": 11265 + }, + { + "epoch": 0.34528625720240286, + "grad_norm": 1.4174185130690318, + "learning_rate": 1.5224075779288494e-05, + "loss": 0.6761, + "step": 11266 + }, + { + "epoch": 0.34531690572514406, + "grad_norm": 1.704038542559467, + "learning_rate": 1.5223229337166641e-05, + "loss": 0.8433, + "step": 11267 + }, + { + "epoch": 0.34534755424788527, + "grad_norm": 0.875513067820914, + "learning_rate": 1.5222382843578966e-05, + "loss": 0.5962, + "step": 11268 + }, + { + "epoch": 0.3453782027706265, + "grad_norm": 1.3450886696240583, + "learning_rate": 1.522153629853382e-05, + "loss": 0.6644, + "step": 11269 + }, + { + "epoch": 0.3454088512933677, + "grad_norm": 1.4376378652496373, + "learning_rate": 1.522068970203954e-05, + "loss": 0.7082, + "step": 11270 + }, + { + "epoch": 0.3454394998161089, + "grad_norm": 1.3888048069372259, + "learning_rate": 1.5219843054104469e-05, + "loss": 0.6981, + "step": 11271 + }, + { + "epoch": 0.3454701483388501, + "grad_norm": 1.7095441082910057, + "learning_rate": 1.521899635473695e-05, + "loss": 0.8215, + "step": 11272 + }, + { + "epoch": 0.3455007968615913, + "grad_norm": 1.6101991197534395, + "learning_rate": 1.5218149603945325e-05, + "loss": 0.7973, + "step": 11273 + }, + { + "epoch": 0.3455314453843325, + "grad_norm": 1.4951577228895314, + "learning_rate": 1.5217302801737935e-05, + "loss": 0.8541, + "step": 11274 + }, + { + "epoch": 0.34556209390707365, + "grad_norm": 1.3909986803012935, + "learning_rate": 1.5216455948123124e-05, + "loss": 0.7595, + "step": 11275 + }, + { + "epoch": 0.34559274242981486, + "grad_norm": 0.7145992867064035, + "learning_rate": 1.521560904310924e-05, + "loss": 0.6347, + "step": 11276 + }, + { + "epoch": 0.34562339095255606, + "grad_norm": 1.4535690131444172, + "learning_rate": 1.5214762086704625e-05, + "loss": 0.7603, + "step": 11277 + }, + { + "epoch": 0.34565403947529727, + "grad_norm": 1.436256128117181, + "learning_rate": 1.5213915078917626e-05, + "loss": 0.6003, + "step": 11278 + }, + { + "epoch": 0.3456846879980385, + "grad_norm": 1.403854342847788, + "learning_rate": 1.5213068019756585e-05, + "loss": 0.6649, + "step": 11279 + }, + { + "epoch": 0.3457153365207797, + "grad_norm": 0.680627227661349, + "learning_rate": 1.5212220909229856e-05, + "loss": 0.5871, + "step": 11280 + }, + { + "epoch": 0.3457459850435209, + "grad_norm": 1.437601279220997, + "learning_rate": 1.5211373747345774e-05, + "loss": 0.7762, + "step": 11281 + }, + { + "epoch": 0.3457766335662621, + "grad_norm": 0.6859026514883911, + "learning_rate": 1.5210526534112699e-05, + "loss": 0.6093, + "step": 11282 + }, + { + "epoch": 0.3458072820890033, + "grad_norm": 0.663029026264432, + "learning_rate": 1.520967926953897e-05, + "loss": 0.6064, + "step": 11283 + }, + { + "epoch": 0.3458379306117445, + "grad_norm": 1.331284340364193, + "learning_rate": 1.520883195363294e-05, + "loss": 0.7435, + "step": 11284 + }, + { + "epoch": 0.3458685791344857, + "grad_norm": 1.479443515406797, + "learning_rate": 1.5207984586402953e-05, + "loss": 0.7545, + "step": 11285 + }, + { + "epoch": 0.3458992276572269, + "grad_norm": 1.3569714725448643, + "learning_rate": 1.5207137167857365e-05, + "loss": 0.6436, + "step": 11286 + }, + { + "epoch": 0.3459298761799681, + "grad_norm": 1.6252878002622706, + "learning_rate": 1.5206289698004519e-05, + "loss": 0.8381, + "step": 11287 + }, + { + "epoch": 0.3459605247027093, + "grad_norm": 1.4543187288335238, + "learning_rate": 1.520544217685277e-05, + "loss": 0.7759, + "step": 11288 + }, + { + "epoch": 0.34599117322545053, + "grad_norm": 1.3276320728946238, + "learning_rate": 1.5204594604410468e-05, + "loss": 0.6076, + "step": 11289 + }, + { + "epoch": 0.34602182174819174, + "grad_norm": 1.5440191933252065, + "learning_rate": 1.5203746980685963e-05, + "loss": 0.7406, + "step": 11290 + }, + { + "epoch": 0.34605247027093294, + "grad_norm": 0.6564706548178322, + "learning_rate": 1.5202899305687608e-05, + "loss": 0.5948, + "step": 11291 + }, + { + "epoch": 0.34608311879367415, + "grad_norm": 1.4535339954602222, + "learning_rate": 1.5202051579423754e-05, + "loss": 0.7671, + "step": 11292 + }, + { + "epoch": 0.34611376731641535, + "grad_norm": 1.6296112807140468, + "learning_rate": 1.5201203801902755e-05, + "loss": 0.8582, + "step": 11293 + }, + { + "epoch": 0.34614441583915656, + "grad_norm": 1.3949497422330748, + "learning_rate": 1.5200355973132966e-05, + "loss": 0.6915, + "step": 11294 + }, + { + "epoch": 0.34617506436189777, + "grad_norm": 1.6049556354652288, + "learning_rate": 1.5199508093122737e-05, + "loss": 0.7501, + "step": 11295 + }, + { + "epoch": 0.34620571288463897, + "grad_norm": 1.4750440157998566, + "learning_rate": 1.5198660161880423e-05, + "loss": 0.7294, + "step": 11296 + }, + { + "epoch": 0.3462363614073802, + "grad_norm": 1.5260252380988824, + "learning_rate": 1.5197812179414384e-05, + "loss": 0.8298, + "step": 11297 + }, + { + "epoch": 0.3462670099301214, + "grad_norm": 1.3839559942182473, + "learning_rate": 1.519696414573297e-05, + "loss": 0.7366, + "step": 11298 + }, + { + "epoch": 0.3462976584528626, + "grad_norm": 1.5732728329433874, + "learning_rate": 1.5196116060844539e-05, + "loss": 0.7524, + "step": 11299 + }, + { + "epoch": 0.3463283069756038, + "grad_norm": 1.5281651599857358, + "learning_rate": 1.5195267924757444e-05, + "loss": 0.7027, + "step": 11300 + }, + { + "epoch": 0.346358955498345, + "grad_norm": 1.319110255295031, + "learning_rate": 1.5194419737480049e-05, + "loss": 0.7109, + "step": 11301 + }, + { + "epoch": 0.3463896040210862, + "grad_norm": 1.300266294356435, + "learning_rate": 1.5193571499020705e-05, + "loss": 0.7586, + "step": 11302 + }, + { + "epoch": 0.3464202525438274, + "grad_norm": 1.3051483410684215, + "learning_rate": 1.5192723209387772e-05, + "loss": 0.77, + "step": 11303 + }, + { + "epoch": 0.3464509010665686, + "grad_norm": 1.4618035708769082, + "learning_rate": 1.5191874868589609e-05, + "loss": 0.809, + "step": 11304 + }, + { + "epoch": 0.3464815495893098, + "grad_norm": 1.5752444102091128, + "learning_rate": 1.5191026476634576e-05, + "loss": 0.7692, + "step": 11305 + }, + { + "epoch": 0.346512198112051, + "grad_norm": 1.327846911897193, + "learning_rate": 1.5190178033531031e-05, + "loss": 0.6462, + "step": 11306 + }, + { + "epoch": 0.3465428466347922, + "grad_norm": 1.5046889578325957, + "learning_rate": 1.5189329539287329e-05, + "loss": 0.7961, + "step": 11307 + }, + { + "epoch": 0.3465734951575334, + "grad_norm": 1.4921446198674808, + "learning_rate": 1.518848099391184e-05, + "loss": 0.7631, + "step": 11308 + }, + { + "epoch": 0.3466041436802746, + "grad_norm": 1.3758407403519013, + "learning_rate": 1.5187632397412922e-05, + "loss": 0.7327, + "step": 11309 + }, + { + "epoch": 0.3466347922030158, + "grad_norm": 1.4839670716536835, + "learning_rate": 1.518678374979893e-05, + "loss": 0.7636, + "step": 11310 + }, + { + "epoch": 0.346665440725757, + "grad_norm": 0.7132643543158194, + "learning_rate": 1.5185935051078234e-05, + "loss": 0.6273, + "step": 11311 + }, + { + "epoch": 0.3466960892484982, + "grad_norm": 1.4018933414426016, + "learning_rate": 1.518508630125919e-05, + "loss": 0.7786, + "step": 11312 + }, + { + "epoch": 0.3467267377712394, + "grad_norm": 1.4002272812483474, + "learning_rate": 1.5184237500350167e-05, + "loss": 0.8021, + "step": 11313 + }, + { + "epoch": 0.3467573862939806, + "grad_norm": 0.6797394559355311, + "learning_rate": 1.5183388648359523e-05, + "loss": 0.6173, + "step": 11314 + }, + { + "epoch": 0.3467880348167218, + "grad_norm": 1.3555919699860068, + "learning_rate": 1.5182539745295626e-05, + "loss": 0.7582, + "step": 11315 + }, + { + "epoch": 0.34681868333946303, + "grad_norm": 1.5411654147651486, + "learning_rate": 1.5181690791166837e-05, + "loss": 0.8187, + "step": 11316 + }, + { + "epoch": 0.34684933186220424, + "grad_norm": 1.4938173214387342, + "learning_rate": 1.5180841785981526e-05, + "loss": 0.8097, + "step": 11317 + }, + { + "epoch": 0.34687998038494544, + "grad_norm": 1.4329815853204249, + "learning_rate": 1.5179992729748053e-05, + "loss": 0.7196, + "step": 11318 + }, + { + "epoch": 0.34691062890768665, + "grad_norm": 1.4417688115461824, + "learning_rate": 1.5179143622474785e-05, + "loss": 0.8745, + "step": 11319 + }, + { + "epoch": 0.34694127743042785, + "grad_norm": 1.524393236946424, + "learning_rate": 1.5178294464170091e-05, + "loss": 0.8139, + "step": 11320 + }, + { + "epoch": 0.34697192595316906, + "grad_norm": 1.3354065222080473, + "learning_rate": 1.517744525484234e-05, + "loss": 0.7516, + "step": 11321 + }, + { + "epoch": 0.34700257447591026, + "grad_norm": 0.7306719689666329, + "learning_rate": 1.5176595994499892e-05, + "loss": 0.6252, + "step": 11322 + }, + { + "epoch": 0.34703322299865147, + "grad_norm": 0.7072402914357525, + "learning_rate": 1.517574668315112e-05, + "loss": 0.6004, + "step": 11323 + }, + { + "epoch": 0.3470638715213927, + "grad_norm": 1.5027806955134984, + "learning_rate": 1.5174897320804394e-05, + "loss": 0.7729, + "step": 11324 + }, + { + "epoch": 0.3470945200441339, + "grad_norm": 0.6522837495241428, + "learning_rate": 1.5174047907468082e-05, + "loss": 0.6048, + "step": 11325 + }, + { + "epoch": 0.3471251685668751, + "grad_norm": 1.4712108154249703, + "learning_rate": 1.5173198443150545e-05, + "loss": 0.6345, + "step": 11326 + }, + { + "epoch": 0.3471558170896163, + "grad_norm": 1.4000104826562638, + "learning_rate": 1.5172348927860165e-05, + "loss": 0.6777, + "step": 11327 + }, + { + "epoch": 0.3471864656123575, + "grad_norm": 1.3915663294535634, + "learning_rate": 1.5171499361605308e-05, + "loss": 0.7288, + "step": 11328 + }, + { + "epoch": 0.3472171141350987, + "grad_norm": 1.5415823087028928, + "learning_rate": 1.517064974439434e-05, + "loss": 0.757, + "step": 11329 + }, + { + "epoch": 0.3472477626578399, + "grad_norm": 1.4565286061571516, + "learning_rate": 1.5169800076235644e-05, + "loss": 0.7616, + "step": 11330 + }, + { + "epoch": 0.3472784111805811, + "grad_norm": 0.7189965002752036, + "learning_rate": 1.5168950357137578e-05, + "loss": 0.645, + "step": 11331 + }, + { + "epoch": 0.3473090597033223, + "grad_norm": 1.3874090773670122, + "learning_rate": 1.5168100587108527e-05, + "loss": 0.7908, + "step": 11332 + }, + { + "epoch": 0.3473397082260635, + "grad_norm": 1.4390970886137395, + "learning_rate": 1.5167250766156855e-05, + "loss": 0.7976, + "step": 11333 + }, + { + "epoch": 0.34737035674880473, + "grad_norm": 1.3753211349015337, + "learning_rate": 1.5166400894290943e-05, + "loss": 0.7217, + "step": 11334 + }, + { + "epoch": 0.34740100527154594, + "grad_norm": 1.490770382636742, + "learning_rate": 1.5165550971519158e-05, + "loss": 0.7196, + "step": 11335 + }, + { + "epoch": 0.34743165379428714, + "grad_norm": 1.3202554358231509, + "learning_rate": 1.516470099784988e-05, + "loss": 0.6645, + "step": 11336 + }, + { + "epoch": 0.3474623023170283, + "grad_norm": 1.4737855147553118, + "learning_rate": 1.516385097329148e-05, + "loss": 0.8178, + "step": 11337 + }, + { + "epoch": 0.3474929508397695, + "grad_norm": 1.3707960639962382, + "learning_rate": 1.5163000897852336e-05, + "loss": 0.8876, + "step": 11338 + }, + { + "epoch": 0.3475235993625107, + "grad_norm": 1.5770729376817694, + "learning_rate": 1.516215077154082e-05, + "loss": 0.6922, + "step": 11339 + }, + { + "epoch": 0.3475542478852519, + "grad_norm": 1.4877873114263425, + "learning_rate": 1.5161300594365316e-05, + "loss": 0.7902, + "step": 11340 + }, + { + "epoch": 0.3475848964079931, + "grad_norm": 1.3350657564215433, + "learning_rate": 1.5160450366334196e-05, + "loss": 0.6985, + "step": 11341 + }, + { + "epoch": 0.3476155449307343, + "grad_norm": 1.3428882739959906, + "learning_rate": 1.5159600087455835e-05, + "loss": 0.7167, + "step": 11342 + }, + { + "epoch": 0.3476461934534755, + "grad_norm": 1.5214441916353716, + "learning_rate": 1.5158749757738615e-05, + "loss": 0.7776, + "step": 11343 + }, + { + "epoch": 0.34767684197621673, + "grad_norm": 1.505741423853205, + "learning_rate": 1.5157899377190917e-05, + "loss": 0.8836, + "step": 11344 + }, + { + "epoch": 0.34770749049895794, + "grad_norm": 0.6844704173864931, + "learning_rate": 1.5157048945821116e-05, + "loss": 0.6018, + "step": 11345 + }, + { + "epoch": 0.34773813902169914, + "grad_norm": 1.522472440989818, + "learning_rate": 1.5156198463637589e-05, + "loss": 0.8156, + "step": 11346 + }, + { + "epoch": 0.34776878754444035, + "grad_norm": 0.7007107560651727, + "learning_rate": 1.515534793064872e-05, + "loss": 0.6312, + "step": 11347 + }, + { + "epoch": 0.34779943606718156, + "grad_norm": 1.240845444007608, + "learning_rate": 1.5154497346862891e-05, + "loss": 0.7765, + "step": 11348 + }, + { + "epoch": 0.34783008458992276, + "grad_norm": 1.4010864890950303, + "learning_rate": 1.5153646712288482e-05, + "loss": 0.8441, + "step": 11349 + }, + { + "epoch": 0.34786073311266397, + "grad_norm": 0.6628131054935811, + "learning_rate": 1.515279602693387e-05, + "loss": 0.6011, + "step": 11350 + }, + { + "epoch": 0.3478913816354052, + "grad_norm": 1.5944654839077186, + "learning_rate": 1.5151945290807444e-05, + "loss": 0.8938, + "step": 11351 + }, + { + "epoch": 0.3479220301581464, + "grad_norm": 1.3795357295495003, + "learning_rate": 1.5151094503917576e-05, + "loss": 0.7426, + "step": 11352 + }, + { + "epoch": 0.3479526786808876, + "grad_norm": 0.6869124100443612, + "learning_rate": 1.515024366627266e-05, + "loss": 0.605, + "step": 11353 + }, + { + "epoch": 0.3479833272036288, + "grad_norm": 1.2833972181053102, + "learning_rate": 1.5149392777881075e-05, + "loss": 0.7614, + "step": 11354 + }, + { + "epoch": 0.34801397572637, + "grad_norm": 1.2837735068932477, + "learning_rate": 1.5148541838751208e-05, + "loss": 0.7003, + "step": 11355 + }, + { + "epoch": 0.3480446242491112, + "grad_norm": 1.3371561209434655, + "learning_rate": 1.514769084889144e-05, + "loss": 0.6617, + "step": 11356 + }, + { + "epoch": 0.3480752727718524, + "grad_norm": 1.3633832058033002, + "learning_rate": 1.5146839808310154e-05, + "loss": 0.8446, + "step": 11357 + }, + { + "epoch": 0.3481059212945936, + "grad_norm": 1.3329063265273435, + "learning_rate": 1.514598871701574e-05, + "loss": 0.8176, + "step": 11358 + }, + { + "epoch": 0.3481365698173348, + "grad_norm": 1.4075481706137891, + "learning_rate": 1.5145137575016585e-05, + "loss": 0.7625, + "step": 11359 + }, + { + "epoch": 0.348167218340076, + "grad_norm": 0.6894763749716623, + "learning_rate": 1.514428638232107e-05, + "loss": 0.6032, + "step": 11360 + }, + { + "epoch": 0.34819786686281723, + "grad_norm": 1.5047405028746845, + "learning_rate": 1.5143435138937585e-05, + "loss": 0.7735, + "step": 11361 + }, + { + "epoch": 0.34822851538555843, + "grad_norm": 1.4813818656012467, + "learning_rate": 1.514258384487452e-05, + "loss": 0.8163, + "step": 11362 + }, + { + "epoch": 0.34825916390829964, + "grad_norm": 1.4682554771871785, + "learning_rate": 1.514173250014026e-05, + "loss": 0.7547, + "step": 11363 + }, + { + "epoch": 0.34828981243104085, + "grad_norm": 1.3903907737817498, + "learning_rate": 1.5140881104743192e-05, + "loss": 0.7557, + "step": 11364 + }, + { + "epoch": 0.34832046095378205, + "grad_norm": 1.608925904290402, + "learning_rate": 1.5140029658691709e-05, + "loss": 0.7531, + "step": 11365 + }, + { + "epoch": 0.34835110947652326, + "grad_norm": 1.7792959172261682, + "learning_rate": 1.51391781619942e-05, + "loss": 0.8138, + "step": 11366 + }, + { + "epoch": 0.34838175799926446, + "grad_norm": 1.6779606922637242, + "learning_rate": 1.5138326614659052e-05, + "loss": 0.9176, + "step": 11367 + }, + { + "epoch": 0.3484124065220056, + "grad_norm": 1.4096898767615897, + "learning_rate": 1.5137475016694654e-05, + "loss": 0.7178, + "step": 11368 + }, + { + "epoch": 0.3484430550447468, + "grad_norm": 0.6777883022875061, + "learning_rate": 1.5136623368109402e-05, + "loss": 0.6099, + "step": 11369 + }, + { + "epoch": 0.348473703567488, + "grad_norm": 1.4823037783766406, + "learning_rate": 1.5135771668911687e-05, + "loss": 0.7877, + "step": 11370 + }, + { + "epoch": 0.34850435209022923, + "grad_norm": 1.3919576578479083, + "learning_rate": 1.5134919919109901e-05, + "loss": 0.7181, + "step": 11371 + }, + { + "epoch": 0.34853500061297044, + "grad_norm": 0.7144011639738185, + "learning_rate": 1.5134068118712432e-05, + "loss": 0.6118, + "step": 11372 + }, + { + "epoch": 0.34856564913571164, + "grad_norm": 1.3805821452703437, + "learning_rate": 1.513321626772768e-05, + "loss": 0.7396, + "step": 11373 + }, + { + "epoch": 0.34859629765845285, + "grad_norm": 0.6735766841636163, + "learning_rate": 1.5132364366164031e-05, + "loss": 0.6086, + "step": 11374 + }, + { + "epoch": 0.34862694618119405, + "grad_norm": 1.5996725653784605, + "learning_rate": 1.5131512414029884e-05, + "loss": 0.8092, + "step": 11375 + }, + { + "epoch": 0.34865759470393526, + "grad_norm": 1.459078131151847, + "learning_rate": 1.5130660411333634e-05, + "loss": 0.8064, + "step": 11376 + }, + { + "epoch": 0.34868824322667646, + "grad_norm": 1.4282393968573495, + "learning_rate": 1.5129808358083674e-05, + "loss": 0.6962, + "step": 11377 + }, + { + "epoch": 0.34871889174941767, + "grad_norm": 1.5154932714968512, + "learning_rate": 1.5128956254288398e-05, + "loss": 0.8441, + "step": 11378 + }, + { + "epoch": 0.3487495402721589, + "grad_norm": 1.436531592046607, + "learning_rate": 1.5128104099956204e-05, + "loss": 0.7496, + "step": 11379 + }, + { + "epoch": 0.3487801887949001, + "grad_norm": 1.657385629177107, + "learning_rate": 1.5127251895095487e-05, + "loss": 0.7871, + "step": 11380 + }, + { + "epoch": 0.3488108373176413, + "grad_norm": 1.43419927189349, + "learning_rate": 1.5126399639714649e-05, + "loss": 0.7984, + "step": 11381 + }, + { + "epoch": 0.3488414858403825, + "grad_norm": 1.5440426017406996, + "learning_rate": 1.5125547333822081e-05, + "loss": 0.7122, + "step": 11382 + }, + { + "epoch": 0.3488721343631237, + "grad_norm": 0.765528592109988, + "learning_rate": 1.5124694977426181e-05, + "loss": 0.6432, + "step": 11383 + }, + { + "epoch": 0.3489027828858649, + "grad_norm": 1.4719193466674714, + "learning_rate": 1.5123842570535354e-05, + "loss": 0.7756, + "step": 11384 + }, + { + "epoch": 0.3489334314086061, + "grad_norm": 1.3832940849475717, + "learning_rate": 1.5122990113157996e-05, + "loss": 0.7349, + "step": 11385 + }, + { + "epoch": 0.3489640799313473, + "grad_norm": 0.6887826552692086, + "learning_rate": 1.5122137605302505e-05, + "loss": 0.5931, + "step": 11386 + }, + { + "epoch": 0.3489947284540885, + "grad_norm": 1.5030769179997663, + "learning_rate": 1.5121285046977278e-05, + "loss": 0.8012, + "step": 11387 + }, + { + "epoch": 0.3490253769768297, + "grad_norm": 1.389071710069344, + "learning_rate": 1.5120432438190724e-05, + "loss": 0.8001, + "step": 11388 + }, + { + "epoch": 0.34905602549957093, + "grad_norm": 1.3787020026013432, + "learning_rate": 1.5119579778951235e-05, + "loss": 0.6763, + "step": 11389 + }, + { + "epoch": 0.34908667402231214, + "grad_norm": 1.359566316458047, + "learning_rate": 1.511872706926722e-05, + "loss": 0.7062, + "step": 11390 + }, + { + "epoch": 0.34911732254505334, + "grad_norm": 1.6562606767941248, + "learning_rate": 1.5117874309147077e-05, + "loss": 0.756, + "step": 11391 + }, + { + "epoch": 0.34914797106779455, + "grad_norm": 1.366815186914775, + "learning_rate": 1.5117021498599207e-05, + "loss": 0.7311, + "step": 11392 + }, + { + "epoch": 0.34917861959053575, + "grad_norm": 1.4359921091031362, + "learning_rate": 1.5116168637632017e-05, + "loss": 0.7478, + "step": 11393 + }, + { + "epoch": 0.34920926811327696, + "grad_norm": 1.6355693913245755, + "learning_rate": 1.5115315726253908e-05, + "loss": 0.7883, + "step": 11394 + }, + { + "epoch": 0.34923991663601817, + "grad_norm": 1.308820163115602, + "learning_rate": 1.5114462764473281e-05, + "loss": 0.7167, + "step": 11395 + }, + { + "epoch": 0.34927056515875937, + "grad_norm": 1.409002707742787, + "learning_rate": 1.5113609752298546e-05, + "loss": 0.7694, + "step": 11396 + }, + { + "epoch": 0.3493012136815006, + "grad_norm": 1.4352332765213758, + "learning_rate": 1.5112756689738106e-05, + "loss": 0.8545, + "step": 11397 + }, + { + "epoch": 0.3493318622042418, + "grad_norm": 1.5716780625512665, + "learning_rate": 1.5111903576800367e-05, + "loss": 0.8351, + "step": 11398 + }, + { + "epoch": 0.34936251072698293, + "grad_norm": 1.49731179453152, + "learning_rate": 1.5111050413493736e-05, + "loss": 0.7633, + "step": 11399 + }, + { + "epoch": 0.34939315924972414, + "grad_norm": 1.482803389136655, + "learning_rate": 1.5110197199826612e-05, + "loss": 0.7273, + "step": 11400 + }, + { + "epoch": 0.34942380777246534, + "grad_norm": 1.5859868428182702, + "learning_rate": 1.5109343935807413e-05, + "loss": 0.782, + "step": 11401 + }, + { + "epoch": 0.34945445629520655, + "grad_norm": 1.5583011413216508, + "learning_rate": 1.5108490621444536e-05, + "loss": 0.7805, + "step": 11402 + }, + { + "epoch": 0.34948510481794776, + "grad_norm": 1.5224158850126466, + "learning_rate": 1.5107637256746397e-05, + "loss": 0.7946, + "step": 11403 + }, + { + "epoch": 0.34951575334068896, + "grad_norm": 1.5293061229973752, + "learning_rate": 1.51067838417214e-05, + "loss": 0.5264, + "step": 11404 + }, + { + "epoch": 0.34954640186343017, + "grad_norm": 1.4037851864339144, + "learning_rate": 1.5105930376377958e-05, + "loss": 0.7946, + "step": 11405 + }, + { + "epoch": 0.3495770503861714, + "grad_norm": 1.5025195309903352, + "learning_rate": 1.5105076860724472e-05, + "loss": 0.7994, + "step": 11406 + }, + { + "epoch": 0.3496076989089126, + "grad_norm": 0.7935983592527851, + "learning_rate": 1.5104223294769363e-05, + "loss": 0.6111, + "step": 11407 + }, + { + "epoch": 0.3496383474316538, + "grad_norm": 1.5576127972747043, + "learning_rate": 1.5103369678521032e-05, + "loss": 0.7552, + "step": 11408 + }, + { + "epoch": 0.349668995954395, + "grad_norm": 1.3310341799272953, + "learning_rate": 1.5102516011987895e-05, + "loss": 0.6928, + "step": 11409 + }, + { + "epoch": 0.3496996444771362, + "grad_norm": 1.353523848480912, + "learning_rate": 1.5101662295178364e-05, + "loss": 0.6869, + "step": 11410 + }, + { + "epoch": 0.3497302929998774, + "grad_norm": 1.5022554317007784, + "learning_rate": 1.5100808528100846e-05, + "loss": 0.7481, + "step": 11411 + }, + { + "epoch": 0.3497609415226186, + "grad_norm": 0.6738201201592896, + "learning_rate": 1.5099954710763757e-05, + "loss": 0.6066, + "step": 11412 + }, + { + "epoch": 0.3497915900453598, + "grad_norm": 1.5379112325820972, + "learning_rate": 1.5099100843175514e-05, + "loss": 0.7391, + "step": 11413 + }, + { + "epoch": 0.349822238568101, + "grad_norm": 1.544281620651039, + "learning_rate": 1.5098246925344523e-05, + "loss": 0.7584, + "step": 11414 + }, + { + "epoch": 0.3498528870908422, + "grad_norm": 1.3035998659331454, + "learning_rate": 1.5097392957279198e-05, + "loss": 0.6843, + "step": 11415 + }, + { + "epoch": 0.34988353561358343, + "grad_norm": 1.3707987425223755, + "learning_rate": 1.5096538938987956e-05, + "loss": 0.7371, + "step": 11416 + }, + { + "epoch": 0.34991418413632464, + "grad_norm": 1.3736907249190102, + "learning_rate": 1.5095684870479215e-05, + "loss": 0.7237, + "step": 11417 + }, + { + "epoch": 0.34994483265906584, + "grad_norm": 1.509311511271465, + "learning_rate": 1.5094830751761387e-05, + "loss": 0.7939, + "step": 11418 + }, + { + "epoch": 0.34997548118180705, + "grad_norm": 1.462933327277484, + "learning_rate": 1.5093976582842884e-05, + "loss": 0.7057, + "step": 11419 + }, + { + "epoch": 0.35000612970454825, + "grad_norm": 1.4487542857560063, + "learning_rate": 1.509312236373213e-05, + "loss": 0.8498, + "step": 11420 + }, + { + "epoch": 0.35003677822728946, + "grad_norm": 1.536220788180797, + "learning_rate": 1.5092268094437538e-05, + "loss": 0.6555, + "step": 11421 + }, + { + "epoch": 0.35006742675003066, + "grad_norm": 0.753163166708253, + "learning_rate": 1.5091413774967528e-05, + "loss": 0.6318, + "step": 11422 + }, + { + "epoch": 0.35009807527277187, + "grad_norm": 1.353232421494108, + "learning_rate": 1.5090559405330509e-05, + "loss": 0.7999, + "step": 11423 + }, + { + "epoch": 0.3501287237955131, + "grad_norm": 1.411582275185064, + "learning_rate": 1.508970498553491e-05, + "loss": 0.8212, + "step": 11424 + }, + { + "epoch": 0.3501593723182543, + "grad_norm": 0.6914928884080799, + "learning_rate": 1.5088850515589143e-05, + "loss": 0.6182, + "step": 11425 + }, + { + "epoch": 0.3501900208409955, + "grad_norm": 1.4775535460950422, + "learning_rate": 1.5087995995501633e-05, + "loss": 0.7625, + "step": 11426 + }, + { + "epoch": 0.3502206693637367, + "grad_norm": 1.5654542832449798, + "learning_rate": 1.5087141425280796e-05, + "loss": 0.8479, + "step": 11427 + }, + { + "epoch": 0.3502513178864779, + "grad_norm": 1.6093626683756583, + "learning_rate": 1.508628680493505e-05, + "loss": 0.9212, + "step": 11428 + }, + { + "epoch": 0.3502819664092191, + "grad_norm": 1.3435695170719537, + "learning_rate": 1.5085432134472822e-05, + "loss": 0.7639, + "step": 11429 + }, + { + "epoch": 0.35031261493196025, + "grad_norm": 1.418751843071116, + "learning_rate": 1.5084577413902528e-05, + "loss": 0.8254, + "step": 11430 + }, + { + "epoch": 0.35034326345470146, + "grad_norm": 1.4780329135986896, + "learning_rate": 1.5083722643232595e-05, + "loss": 0.7215, + "step": 11431 + }, + { + "epoch": 0.35037391197744266, + "grad_norm": 1.2819481340863317, + "learning_rate": 1.5082867822471439e-05, + "loss": 0.7125, + "step": 11432 + }, + { + "epoch": 0.35040456050018387, + "grad_norm": 1.4464971058429463, + "learning_rate": 1.5082012951627488e-05, + "loss": 0.8494, + "step": 11433 + }, + { + "epoch": 0.3504352090229251, + "grad_norm": 1.5093701795658496, + "learning_rate": 1.5081158030709158e-05, + "loss": 0.7784, + "step": 11434 + }, + { + "epoch": 0.3504658575456663, + "grad_norm": 1.5611675043836573, + "learning_rate": 1.5080303059724883e-05, + "loss": 0.797, + "step": 11435 + }, + { + "epoch": 0.3504965060684075, + "grad_norm": 1.4006601809119443, + "learning_rate": 1.5079448038683083e-05, + "loss": 0.7822, + "step": 11436 + }, + { + "epoch": 0.3505271545911487, + "grad_norm": 1.3985100240724866, + "learning_rate": 1.5078592967592176e-05, + "loss": 0.7482, + "step": 11437 + }, + { + "epoch": 0.3505578031138899, + "grad_norm": 1.491573382051605, + "learning_rate": 1.5077737846460596e-05, + "loss": 0.6711, + "step": 11438 + }, + { + "epoch": 0.3505884516366311, + "grad_norm": 1.7577062482633863, + "learning_rate": 1.5076882675296767e-05, + "loss": 0.8391, + "step": 11439 + }, + { + "epoch": 0.3506191001593723, + "grad_norm": 0.7303586219857588, + "learning_rate": 1.5076027454109115e-05, + "loss": 0.5876, + "step": 11440 + }, + { + "epoch": 0.3506497486821135, + "grad_norm": 1.3971600678517986, + "learning_rate": 1.5075172182906061e-05, + "loss": 0.7195, + "step": 11441 + }, + { + "epoch": 0.3506803972048547, + "grad_norm": 1.524697810575696, + "learning_rate": 1.5074316861696044e-05, + "loss": 0.6772, + "step": 11442 + }, + { + "epoch": 0.3507110457275959, + "grad_norm": 1.4502575898320702, + "learning_rate": 1.5073461490487478e-05, + "loss": 0.7229, + "step": 11443 + }, + { + "epoch": 0.35074169425033713, + "grad_norm": 1.6570773167049777, + "learning_rate": 1.5072606069288803e-05, + "loss": 0.724, + "step": 11444 + }, + { + "epoch": 0.35077234277307834, + "grad_norm": 1.492181671101329, + "learning_rate": 1.5071750598108436e-05, + "loss": 0.7895, + "step": 11445 + }, + { + "epoch": 0.35080299129581954, + "grad_norm": 1.3671239883977357, + "learning_rate": 1.5070895076954818e-05, + "loss": 0.6738, + "step": 11446 + }, + { + "epoch": 0.35083363981856075, + "grad_norm": 1.410814592653424, + "learning_rate": 1.5070039505836372e-05, + "loss": 0.6907, + "step": 11447 + }, + { + "epoch": 0.35086428834130196, + "grad_norm": 1.5067810495103564, + "learning_rate": 1.5069183884761531e-05, + "loss": 0.8524, + "step": 11448 + }, + { + "epoch": 0.35089493686404316, + "grad_norm": 1.5324255724914229, + "learning_rate": 1.5068328213738723e-05, + "loss": 0.7775, + "step": 11449 + }, + { + "epoch": 0.35092558538678437, + "grad_norm": 1.5788363787262174, + "learning_rate": 1.506747249277638e-05, + "loss": 0.8139, + "step": 11450 + }, + { + "epoch": 0.3509562339095256, + "grad_norm": 1.3739917716649253, + "learning_rate": 1.5066616721882933e-05, + "loss": 0.6745, + "step": 11451 + }, + { + "epoch": 0.3509868824322668, + "grad_norm": 0.7020751224462304, + "learning_rate": 1.5065760901066817e-05, + "loss": 0.602, + "step": 11452 + }, + { + "epoch": 0.351017530955008, + "grad_norm": 1.5542753037838737, + "learning_rate": 1.506490503033646e-05, + "loss": 0.7684, + "step": 11453 + }, + { + "epoch": 0.3510481794777492, + "grad_norm": 1.399270726650331, + "learning_rate": 1.50640491097003e-05, + "loss": 0.7484, + "step": 11454 + }, + { + "epoch": 0.3510788280004904, + "grad_norm": 1.4295154291013, + "learning_rate": 1.506319313916677e-05, + "loss": 0.7223, + "step": 11455 + }, + { + "epoch": 0.3511094765232316, + "grad_norm": 1.4262803902564916, + "learning_rate": 1.50623371187443e-05, + "loss": 0.8008, + "step": 11456 + }, + { + "epoch": 0.3511401250459728, + "grad_norm": 1.6535506284252635, + "learning_rate": 1.5061481048441326e-05, + "loss": 0.8091, + "step": 11457 + }, + { + "epoch": 0.351170773568714, + "grad_norm": 1.5266988433520208, + "learning_rate": 1.5060624928266285e-05, + "loss": 0.7786, + "step": 11458 + }, + { + "epoch": 0.3512014220914552, + "grad_norm": 1.734964610489063, + "learning_rate": 1.5059768758227616e-05, + "loss": 0.8231, + "step": 11459 + }, + { + "epoch": 0.3512320706141964, + "grad_norm": 0.6972421813536637, + "learning_rate": 1.5058912538333745e-05, + "loss": 0.6153, + "step": 11460 + }, + { + "epoch": 0.3512627191369376, + "grad_norm": 1.42285846391668, + "learning_rate": 1.5058056268593118e-05, + "loss": 0.78, + "step": 11461 + }, + { + "epoch": 0.3512933676596788, + "grad_norm": 1.5962814788363349, + "learning_rate": 1.5057199949014165e-05, + "loss": 0.6648, + "step": 11462 + }, + { + "epoch": 0.35132401618242, + "grad_norm": 1.6677187413338583, + "learning_rate": 1.505634357960533e-05, + "loss": 0.7544, + "step": 11463 + }, + { + "epoch": 0.3513546647051612, + "grad_norm": 1.5478683558390751, + "learning_rate": 1.505548716037505e-05, + "loss": 0.7023, + "step": 11464 + }, + { + "epoch": 0.3513853132279024, + "grad_norm": 1.78268866438388, + "learning_rate": 1.5054630691331758e-05, + "loss": 0.7765, + "step": 11465 + }, + { + "epoch": 0.3514159617506436, + "grad_norm": 1.5295477757412828, + "learning_rate": 1.5053774172483894e-05, + "loss": 0.7006, + "step": 11466 + }, + { + "epoch": 0.3514466102733848, + "grad_norm": 0.6732275182551195, + "learning_rate": 1.5052917603839908e-05, + "loss": 0.6151, + "step": 11467 + }, + { + "epoch": 0.351477258796126, + "grad_norm": 1.5210053816198488, + "learning_rate": 1.5052060985408226e-05, + "loss": 0.7839, + "step": 11468 + }, + { + "epoch": 0.3515079073188672, + "grad_norm": 1.3930586369370683, + "learning_rate": 1.5051204317197295e-05, + "loss": 0.6857, + "step": 11469 + }, + { + "epoch": 0.3515385558416084, + "grad_norm": 1.600993573933635, + "learning_rate": 1.5050347599215556e-05, + "loss": 0.7934, + "step": 11470 + }, + { + "epoch": 0.35156920436434963, + "grad_norm": 0.6692645876276673, + "learning_rate": 1.5049490831471451e-05, + "loss": 0.6325, + "step": 11471 + }, + { + "epoch": 0.35159985288709084, + "grad_norm": 1.5069321115176635, + "learning_rate": 1.504863401397342e-05, + "loss": 0.7874, + "step": 11472 + }, + { + "epoch": 0.35163050140983204, + "grad_norm": 1.6798478429794488, + "learning_rate": 1.5047777146729905e-05, + "loss": 0.925, + "step": 11473 + }, + { + "epoch": 0.35166114993257325, + "grad_norm": 1.432981647120768, + "learning_rate": 1.5046920229749353e-05, + "loss": 0.7513, + "step": 11474 + }, + { + "epoch": 0.35169179845531445, + "grad_norm": 2.024265771967907, + "learning_rate": 1.5046063263040202e-05, + "loss": 0.7375, + "step": 11475 + }, + { + "epoch": 0.35172244697805566, + "grad_norm": 1.4484410186949341, + "learning_rate": 1.50452062466109e-05, + "loss": 0.7019, + "step": 11476 + }, + { + "epoch": 0.35175309550079686, + "grad_norm": 1.5006026671890906, + "learning_rate": 1.504434918046989e-05, + "loss": 0.7165, + "step": 11477 + }, + { + "epoch": 0.35178374402353807, + "grad_norm": 1.509410563306226, + "learning_rate": 1.5043492064625618e-05, + "loss": 0.8563, + "step": 11478 + }, + { + "epoch": 0.3518143925462793, + "grad_norm": 1.4619673844853058, + "learning_rate": 1.5042634899086526e-05, + "loss": 0.7751, + "step": 11479 + }, + { + "epoch": 0.3518450410690205, + "grad_norm": 1.5102176782939447, + "learning_rate": 1.5041777683861063e-05, + "loss": 0.7736, + "step": 11480 + }, + { + "epoch": 0.3518756895917617, + "grad_norm": 1.3440166631813617, + "learning_rate": 1.5040920418957675e-05, + "loss": 0.7179, + "step": 11481 + }, + { + "epoch": 0.3519063381145029, + "grad_norm": 1.3708788056826289, + "learning_rate": 1.5040063104384807e-05, + "loss": 0.7592, + "step": 11482 + }, + { + "epoch": 0.3519369866372441, + "grad_norm": 1.3757018860908488, + "learning_rate": 1.503920574015091e-05, + "loss": 0.7267, + "step": 11483 + }, + { + "epoch": 0.3519676351599853, + "grad_norm": 1.4047715978060942, + "learning_rate": 1.5038348326264424e-05, + "loss": 0.7897, + "step": 11484 + }, + { + "epoch": 0.3519982836827265, + "grad_norm": 1.4020749115399018, + "learning_rate": 1.503749086273381e-05, + "loss": 0.6583, + "step": 11485 + }, + { + "epoch": 0.3520289322054677, + "grad_norm": 1.526405613537891, + "learning_rate": 1.5036633349567507e-05, + "loss": 0.6654, + "step": 11486 + }, + { + "epoch": 0.3520595807282089, + "grad_norm": 1.476385044012318, + "learning_rate": 1.5035775786773967e-05, + "loss": 0.6649, + "step": 11487 + }, + { + "epoch": 0.3520902292509501, + "grad_norm": 1.3039558131120175, + "learning_rate": 1.5034918174361637e-05, + "loss": 0.6605, + "step": 11488 + }, + { + "epoch": 0.35212087777369133, + "grad_norm": 1.303325422802467, + "learning_rate": 1.5034060512338972e-05, + "loss": 0.6621, + "step": 11489 + }, + { + "epoch": 0.35215152629643254, + "grad_norm": 1.4551542574434058, + "learning_rate": 1.5033202800714422e-05, + "loss": 0.7689, + "step": 11490 + }, + { + "epoch": 0.35218217481917374, + "grad_norm": 1.3243614683179337, + "learning_rate": 1.5032345039496436e-05, + "loss": 0.6621, + "step": 11491 + }, + { + "epoch": 0.3522128233419149, + "grad_norm": 1.384793126238277, + "learning_rate": 1.5031487228693467e-05, + "loss": 0.7762, + "step": 11492 + }, + { + "epoch": 0.3522434718646561, + "grad_norm": 1.4822517573271152, + "learning_rate": 1.5030629368313965e-05, + "loss": 0.7534, + "step": 11493 + }, + { + "epoch": 0.3522741203873973, + "grad_norm": 0.693187780052638, + "learning_rate": 1.502977145836639e-05, + "loss": 0.6261, + "step": 11494 + }, + { + "epoch": 0.3523047689101385, + "grad_norm": 1.4995245543757585, + "learning_rate": 1.5028913498859183e-05, + "loss": 0.8483, + "step": 11495 + }, + { + "epoch": 0.3523354174328797, + "grad_norm": 1.4515669355985161, + "learning_rate": 1.5028055489800808e-05, + "loss": 0.688, + "step": 11496 + }, + { + "epoch": 0.3523660659556209, + "grad_norm": 1.2624358631483268, + "learning_rate": 1.5027197431199714e-05, + "loss": 0.6906, + "step": 11497 + }, + { + "epoch": 0.3523967144783621, + "grad_norm": 1.3712999497077156, + "learning_rate": 1.502633932306436e-05, + "loss": 0.6667, + "step": 11498 + }, + { + "epoch": 0.35242736300110333, + "grad_norm": 1.4168937268078297, + "learning_rate": 1.5025481165403197e-05, + "loss": 0.7948, + "step": 11499 + }, + { + "epoch": 0.35245801152384454, + "grad_norm": 1.5149337032892771, + "learning_rate": 1.5024622958224684e-05, + "loss": 0.7929, + "step": 11500 + }, + { + "epoch": 0.35248866004658574, + "grad_norm": 0.6994347325495713, + "learning_rate": 1.5023764701537273e-05, + "loss": 0.6378, + "step": 11501 + }, + { + "epoch": 0.35251930856932695, + "grad_norm": 1.3711352123831573, + "learning_rate": 1.5022906395349428e-05, + "loss": 0.7367, + "step": 11502 + }, + { + "epoch": 0.35254995709206816, + "grad_norm": 0.7074498306732622, + "learning_rate": 1.5022048039669596e-05, + "loss": 0.606, + "step": 11503 + }, + { + "epoch": 0.35258060561480936, + "grad_norm": 0.6368762509755489, + "learning_rate": 1.502118963450624e-05, + "loss": 0.5795, + "step": 11504 + }, + { + "epoch": 0.35261125413755057, + "grad_norm": 1.3760709774916473, + "learning_rate": 1.5020331179867821e-05, + "loss": 0.7651, + "step": 11505 + }, + { + "epoch": 0.3526419026602918, + "grad_norm": 1.4165595476079171, + "learning_rate": 1.501947267576279e-05, + "loss": 0.7331, + "step": 11506 + }, + { + "epoch": 0.352672551183033, + "grad_norm": 0.6521211473518761, + "learning_rate": 1.5018614122199612e-05, + "loss": 0.6103, + "step": 11507 + }, + { + "epoch": 0.3527031997057742, + "grad_norm": 0.6628866730878339, + "learning_rate": 1.5017755519186747e-05, + "loss": 0.5788, + "step": 11508 + }, + { + "epoch": 0.3527338482285154, + "grad_norm": 1.51966760927574, + "learning_rate": 1.5016896866732653e-05, + "loss": 0.6246, + "step": 11509 + }, + { + "epoch": 0.3527644967512566, + "grad_norm": 1.4611475731600738, + "learning_rate": 1.5016038164845787e-05, + "loss": 0.7451, + "step": 11510 + }, + { + "epoch": 0.3527951452739978, + "grad_norm": 1.3622779927078168, + "learning_rate": 1.5015179413534618e-05, + "loss": 0.781, + "step": 11511 + }, + { + "epoch": 0.352825793796739, + "grad_norm": 1.3286687584926806, + "learning_rate": 1.50143206128076e-05, + "loss": 0.8268, + "step": 11512 + }, + { + "epoch": 0.3528564423194802, + "grad_norm": 1.365936112613986, + "learning_rate": 1.50134617626732e-05, + "loss": 0.8188, + "step": 11513 + }, + { + "epoch": 0.3528870908422214, + "grad_norm": 1.3935348695061747, + "learning_rate": 1.5012602863139876e-05, + "loss": 0.8152, + "step": 11514 + }, + { + "epoch": 0.3529177393649626, + "grad_norm": 1.4588023655749043, + "learning_rate": 1.5011743914216097e-05, + "loss": 0.7882, + "step": 11515 + }, + { + "epoch": 0.35294838788770383, + "grad_norm": 1.375250578372334, + "learning_rate": 1.5010884915910317e-05, + "loss": 0.7236, + "step": 11516 + }, + { + "epoch": 0.35297903641044504, + "grad_norm": 0.7098909598396855, + "learning_rate": 1.5010025868231013e-05, + "loss": 0.58, + "step": 11517 + }, + { + "epoch": 0.35300968493318624, + "grad_norm": 1.5941745936426734, + "learning_rate": 1.5009166771186636e-05, + "loss": 0.7365, + "step": 11518 + }, + { + "epoch": 0.35304033345592745, + "grad_norm": 1.290184233524429, + "learning_rate": 1.5008307624785663e-05, + "loss": 0.7078, + "step": 11519 + }, + { + "epoch": 0.35307098197866865, + "grad_norm": 1.5705822258603368, + "learning_rate": 1.500744842903655e-05, + "loss": 0.7473, + "step": 11520 + }, + { + "epoch": 0.35310163050140986, + "grad_norm": 1.474385406972665, + "learning_rate": 1.5006589183947766e-05, + "loss": 0.6582, + "step": 11521 + }, + { + "epoch": 0.35313227902415106, + "grad_norm": 0.7020844651514936, + "learning_rate": 1.500572988952778e-05, + "loss": 0.6423, + "step": 11522 + }, + { + "epoch": 0.3531629275468922, + "grad_norm": 1.3465293645112648, + "learning_rate": 1.5004870545785053e-05, + "loss": 0.7436, + "step": 11523 + }, + { + "epoch": 0.3531935760696334, + "grad_norm": 1.544873787301433, + "learning_rate": 1.500401115272806e-05, + "loss": 0.8265, + "step": 11524 + }, + { + "epoch": 0.3532242245923746, + "grad_norm": 1.6540257747234302, + "learning_rate": 1.5003151710365262e-05, + "loss": 0.7604, + "step": 11525 + }, + { + "epoch": 0.35325487311511583, + "grad_norm": 1.3773998261817042, + "learning_rate": 1.5002292218705132e-05, + "loss": 0.6626, + "step": 11526 + }, + { + "epoch": 0.35328552163785704, + "grad_norm": 0.6990150286320083, + "learning_rate": 1.5001432677756136e-05, + "loss": 0.6073, + "step": 11527 + }, + { + "epoch": 0.35331617016059824, + "grad_norm": 1.4641862172289553, + "learning_rate": 1.5000573087526745e-05, + "loss": 0.7534, + "step": 11528 + }, + { + "epoch": 0.35334681868333945, + "grad_norm": 1.4034011228257506, + "learning_rate": 1.4999713448025426e-05, + "loss": 0.7402, + "step": 11529 + }, + { + "epoch": 0.35337746720608065, + "grad_norm": 1.5898163364717168, + "learning_rate": 1.4998853759260655e-05, + "loss": 0.7992, + "step": 11530 + }, + { + "epoch": 0.35340811572882186, + "grad_norm": 1.4056313174447062, + "learning_rate": 1.4997994021240894e-05, + "loss": 0.7287, + "step": 11531 + }, + { + "epoch": 0.35343876425156306, + "grad_norm": 1.4930771957990145, + "learning_rate": 1.4997134233974622e-05, + "loss": 0.7666, + "step": 11532 + }, + { + "epoch": 0.35346941277430427, + "grad_norm": 1.5620028746092853, + "learning_rate": 1.4996274397470307e-05, + "loss": 0.8681, + "step": 11533 + }, + { + "epoch": 0.3535000612970455, + "grad_norm": 1.3871419141295442, + "learning_rate": 1.4995414511736421e-05, + "loss": 0.6857, + "step": 11534 + }, + { + "epoch": 0.3535307098197867, + "grad_norm": 1.4669196145108314, + "learning_rate": 1.4994554576781439e-05, + "loss": 0.8144, + "step": 11535 + }, + { + "epoch": 0.3535613583425279, + "grad_norm": 1.378557385971297, + "learning_rate": 1.4993694592613834e-05, + "loss": 0.7343, + "step": 11536 + }, + { + "epoch": 0.3535920068652691, + "grad_norm": 1.4204108208655948, + "learning_rate": 1.4992834559242078e-05, + "loss": 0.832, + "step": 11537 + }, + { + "epoch": 0.3536226553880103, + "grad_norm": 1.3822071302357157, + "learning_rate": 1.4991974476674642e-05, + "loss": 0.8185, + "step": 11538 + }, + { + "epoch": 0.3536533039107515, + "grad_norm": 1.4181806881237513, + "learning_rate": 1.4991114344920008e-05, + "loss": 0.73, + "step": 11539 + }, + { + "epoch": 0.3536839524334927, + "grad_norm": 1.5245576601290716, + "learning_rate": 1.4990254163986646e-05, + "loss": 0.6898, + "step": 11540 + }, + { + "epoch": 0.3537146009562339, + "grad_norm": 1.4131550317610717, + "learning_rate": 1.4989393933883033e-05, + "loss": 0.749, + "step": 11541 + }, + { + "epoch": 0.3537452494789751, + "grad_norm": 1.3763982438157527, + "learning_rate": 1.4988533654617645e-05, + "loss": 0.7458, + "step": 11542 + }, + { + "epoch": 0.3537758980017163, + "grad_norm": 1.411057539077253, + "learning_rate": 1.4987673326198961e-05, + "loss": 0.7718, + "step": 11543 + }, + { + "epoch": 0.35380654652445753, + "grad_norm": 1.6372035927508606, + "learning_rate": 1.4986812948635452e-05, + "loss": 0.7579, + "step": 11544 + }, + { + "epoch": 0.35383719504719874, + "grad_norm": 1.4819392664888962, + "learning_rate": 1.4985952521935602e-05, + "loss": 0.7527, + "step": 11545 + }, + { + "epoch": 0.35386784356993994, + "grad_norm": 1.4643978410104734, + "learning_rate": 1.4985092046107882e-05, + "loss": 0.7114, + "step": 11546 + }, + { + "epoch": 0.35389849209268115, + "grad_norm": 1.3545467411557204, + "learning_rate": 1.498423152116078e-05, + "loss": 0.6758, + "step": 11547 + }, + { + "epoch": 0.35392914061542236, + "grad_norm": 1.4663032754268486, + "learning_rate": 1.4983370947102767e-05, + "loss": 0.7849, + "step": 11548 + }, + { + "epoch": 0.35395978913816356, + "grad_norm": 1.4508422329788977, + "learning_rate": 1.4982510323942323e-05, + "loss": 0.6811, + "step": 11549 + }, + { + "epoch": 0.35399043766090477, + "grad_norm": 1.5297693454252446, + "learning_rate": 1.498164965168793e-05, + "loss": 0.7429, + "step": 11550 + }, + { + "epoch": 0.35402108618364597, + "grad_norm": 1.5344596577441687, + "learning_rate": 1.4980788930348071e-05, + "loss": 0.8058, + "step": 11551 + }, + { + "epoch": 0.3540517347063872, + "grad_norm": 1.332066791264444, + "learning_rate": 1.4979928159931225e-05, + "loss": 0.7313, + "step": 11552 + }, + { + "epoch": 0.3540823832291284, + "grad_norm": 0.7352492485820822, + "learning_rate": 1.497906734044587e-05, + "loss": 0.6265, + "step": 11553 + }, + { + "epoch": 0.35411303175186953, + "grad_norm": 1.6213630347653774, + "learning_rate": 1.4978206471900491e-05, + "loss": 0.7769, + "step": 11554 + }, + { + "epoch": 0.35414368027461074, + "grad_norm": 1.4242077083976201, + "learning_rate": 1.4977345554303573e-05, + "loss": 0.6334, + "step": 11555 + }, + { + "epoch": 0.35417432879735194, + "grad_norm": 0.6763940965475962, + "learning_rate": 1.497648458766359e-05, + "loss": 0.6097, + "step": 11556 + }, + { + "epoch": 0.35420497732009315, + "grad_norm": 1.432532416539294, + "learning_rate": 1.4975623571989036e-05, + "loss": 0.6766, + "step": 11557 + }, + { + "epoch": 0.35423562584283436, + "grad_norm": 0.66056362891553, + "learning_rate": 1.4974762507288387e-05, + "loss": 0.5999, + "step": 11558 + }, + { + "epoch": 0.35426627436557556, + "grad_norm": 1.5537558481937037, + "learning_rate": 1.4973901393570132e-05, + "loss": 0.7621, + "step": 11559 + }, + { + "epoch": 0.35429692288831677, + "grad_norm": 1.4754510502571547, + "learning_rate": 1.4973040230842753e-05, + "loss": 0.6623, + "step": 11560 + }, + { + "epoch": 0.354327571411058, + "grad_norm": 1.5282025843980052, + "learning_rate": 1.4972179019114736e-05, + "loss": 0.8656, + "step": 11561 + }, + { + "epoch": 0.3543582199337992, + "grad_norm": 0.6716424093984338, + "learning_rate": 1.4971317758394568e-05, + "loss": 0.6044, + "step": 11562 + }, + { + "epoch": 0.3543888684565404, + "grad_norm": 0.7066756040812358, + "learning_rate": 1.4970456448690733e-05, + "loss": 0.6346, + "step": 11563 + }, + { + "epoch": 0.3544195169792816, + "grad_norm": 1.2950901430045512, + "learning_rate": 1.4969595090011719e-05, + "loss": 0.7623, + "step": 11564 + }, + { + "epoch": 0.3544501655020228, + "grad_norm": 1.4921101228060916, + "learning_rate": 1.4968733682366015e-05, + "loss": 0.8094, + "step": 11565 + }, + { + "epoch": 0.354480814024764, + "grad_norm": 1.5954437630153844, + "learning_rate": 1.4967872225762103e-05, + "loss": 0.805, + "step": 11566 + }, + { + "epoch": 0.3545114625475052, + "grad_norm": 1.381393423176489, + "learning_rate": 1.496701072020848e-05, + "loss": 0.6438, + "step": 11567 + }, + { + "epoch": 0.3545421110702464, + "grad_norm": 1.5943792347066996, + "learning_rate": 1.4966149165713624e-05, + "loss": 0.8376, + "step": 11568 + }, + { + "epoch": 0.3545727595929876, + "grad_norm": 1.5457829827809453, + "learning_rate": 1.4965287562286032e-05, + "loss": 0.7125, + "step": 11569 + }, + { + "epoch": 0.3546034081157288, + "grad_norm": 1.4808163035520023, + "learning_rate": 1.496442590993419e-05, + "loss": 0.7054, + "step": 11570 + }, + { + "epoch": 0.35463405663847003, + "grad_norm": 1.487790642223869, + "learning_rate": 1.4963564208666594e-05, + "loss": 0.6548, + "step": 11571 + }, + { + "epoch": 0.35466470516121124, + "grad_norm": 1.4297852412322094, + "learning_rate": 1.4962702458491725e-05, + "loss": 0.7959, + "step": 11572 + }, + { + "epoch": 0.35469535368395244, + "grad_norm": 1.257045546554827, + "learning_rate": 1.4961840659418081e-05, + "loss": 0.7293, + "step": 11573 + }, + { + "epoch": 0.35472600220669365, + "grad_norm": 1.3283604118457042, + "learning_rate": 1.496097881145415e-05, + "loss": 0.7078, + "step": 11574 + }, + { + "epoch": 0.35475665072943485, + "grad_norm": 1.3705152772816445, + "learning_rate": 1.4960116914608427e-05, + "loss": 0.6774, + "step": 11575 + }, + { + "epoch": 0.35478729925217606, + "grad_norm": 1.59106309714475, + "learning_rate": 1.4959254968889403e-05, + "loss": 0.8243, + "step": 11576 + }, + { + "epoch": 0.35481794777491726, + "grad_norm": 1.3997535630618902, + "learning_rate": 1.4958392974305569e-05, + "loss": 0.7418, + "step": 11577 + }, + { + "epoch": 0.35484859629765847, + "grad_norm": 0.8097572400221417, + "learning_rate": 1.4957530930865423e-05, + "loss": 0.6488, + "step": 11578 + }, + { + "epoch": 0.3548792448203997, + "grad_norm": 1.3704463780715292, + "learning_rate": 1.4956668838577452e-05, + "loss": 0.6885, + "step": 11579 + }, + { + "epoch": 0.3549098933431409, + "grad_norm": 1.3694752455432828, + "learning_rate": 1.4955806697450159e-05, + "loss": 0.6027, + "step": 11580 + }, + { + "epoch": 0.3549405418658821, + "grad_norm": 1.3714966630837029, + "learning_rate": 1.4954944507492033e-05, + "loss": 0.6809, + "step": 11581 + }, + { + "epoch": 0.3549711903886233, + "grad_norm": 1.3333924791251062, + "learning_rate": 1.4954082268711574e-05, + "loss": 0.7187, + "step": 11582 + }, + { + "epoch": 0.3550018389113645, + "grad_norm": 1.484912132490028, + "learning_rate": 1.4953219981117271e-05, + "loss": 0.8581, + "step": 11583 + }, + { + "epoch": 0.3550324874341057, + "grad_norm": 0.6567858597805937, + "learning_rate": 1.4952357644717625e-05, + "loss": 0.6036, + "step": 11584 + }, + { + "epoch": 0.35506313595684685, + "grad_norm": 1.4728973523519866, + "learning_rate": 1.4951495259521131e-05, + "loss": 0.8325, + "step": 11585 + }, + { + "epoch": 0.35509378447958806, + "grad_norm": 1.4035320505559787, + "learning_rate": 1.495063282553629e-05, + "loss": 0.775, + "step": 11586 + }, + { + "epoch": 0.35512443300232927, + "grad_norm": 1.6222251272855295, + "learning_rate": 1.4949770342771594e-05, + "loss": 0.7959, + "step": 11587 + }, + { + "epoch": 0.35515508152507047, + "grad_norm": 0.7454231986019403, + "learning_rate": 1.4948907811235547e-05, + "loss": 0.6362, + "step": 11588 + }, + { + "epoch": 0.3551857300478117, + "grad_norm": 1.4889488366007957, + "learning_rate": 1.4948045230936643e-05, + "loss": 0.7712, + "step": 11589 + }, + { + "epoch": 0.3552163785705529, + "grad_norm": 1.4258968773171539, + "learning_rate": 1.4947182601883385e-05, + "loss": 0.8475, + "step": 11590 + }, + { + "epoch": 0.3552470270932941, + "grad_norm": 1.215323754365051, + "learning_rate": 1.4946319924084272e-05, + "loss": 0.5745, + "step": 11591 + }, + { + "epoch": 0.3552776756160353, + "grad_norm": 1.4225445352532404, + "learning_rate": 1.4945457197547799e-05, + "loss": 0.7398, + "step": 11592 + }, + { + "epoch": 0.3553083241387765, + "grad_norm": 1.4348335640437966, + "learning_rate": 1.4944594422282475e-05, + "loss": 0.7207, + "step": 11593 + }, + { + "epoch": 0.3553389726615177, + "grad_norm": 1.581104616183922, + "learning_rate": 1.4943731598296796e-05, + "loss": 0.6969, + "step": 11594 + }, + { + "epoch": 0.3553696211842589, + "grad_norm": 1.312162326964434, + "learning_rate": 1.4942868725599264e-05, + "loss": 0.6879, + "step": 11595 + }, + { + "epoch": 0.3554002697070001, + "grad_norm": 1.264686904730342, + "learning_rate": 1.494200580419838e-05, + "loss": 0.7568, + "step": 11596 + }, + { + "epoch": 0.3554309182297413, + "grad_norm": 0.7179502873422732, + "learning_rate": 1.4941142834102654e-05, + "loss": 0.6341, + "step": 11597 + }, + { + "epoch": 0.3554615667524825, + "grad_norm": 1.424200549273102, + "learning_rate": 1.4940279815320577e-05, + "loss": 0.7053, + "step": 11598 + }, + { + "epoch": 0.35549221527522373, + "grad_norm": 1.5265264602642536, + "learning_rate": 1.4939416747860663e-05, + "loss": 0.7721, + "step": 11599 + }, + { + "epoch": 0.35552286379796494, + "grad_norm": 1.4831945139752707, + "learning_rate": 1.493855363173141e-05, + "loss": 0.7977, + "step": 11600 + }, + { + "epoch": 0.35555351232070614, + "grad_norm": 1.4264296681437738, + "learning_rate": 1.4937690466941326e-05, + "loss": 0.741, + "step": 11601 + }, + { + "epoch": 0.35558416084344735, + "grad_norm": 1.3997633389882342, + "learning_rate": 1.4936827253498914e-05, + "loss": 0.6617, + "step": 11602 + }, + { + "epoch": 0.35561480936618856, + "grad_norm": 1.743747062660031, + "learning_rate": 1.4935963991412679e-05, + "loss": 0.7206, + "step": 11603 + }, + { + "epoch": 0.35564545788892976, + "grad_norm": 1.466475573676505, + "learning_rate": 1.4935100680691128e-05, + "loss": 0.7239, + "step": 11604 + }, + { + "epoch": 0.35567610641167097, + "grad_norm": 1.4094408832449263, + "learning_rate": 1.4934237321342767e-05, + "loss": 0.7365, + "step": 11605 + }, + { + "epoch": 0.3557067549344122, + "grad_norm": 0.6960132553254192, + "learning_rate": 1.4933373913376106e-05, + "loss": 0.6044, + "step": 11606 + }, + { + "epoch": 0.3557374034571534, + "grad_norm": 0.7095002699148829, + "learning_rate": 1.4932510456799648e-05, + "loss": 0.6103, + "step": 11607 + }, + { + "epoch": 0.3557680519798946, + "grad_norm": 1.4337981487256373, + "learning_rate": 1.4931646951621901e-05, + "loss": 0.7217, + "step": 11608 + }, + { + "epoch": 0.3557987005026358, + "grad_norm": 1.340439317138591, + "learning_rate": 1.4930783397851377e-05, + "loss": 0.7369, + "step": 11609 + }, + { + "epoch": 0.355829349025377, + "grad_norm": 1.2496899828475283, + "learning_rate": 1.4929919795496579e-05, + "loss": 0.7791, + "step": 11610 + }, + { + "epoch": 0.3558599975481182, + "grad_norm": 1.477225296528351, + "learning_rate": 1.492905614456602e-05, + "loss": 0.6749, + "step": 11611 + }, + { + "epoch": 0.3558906460708594, + "grad_norm": 0.717862211084647, + "learning_rate": 1.4928192445068214e-05, + "loss": 0.6122, + "step": 11612 + }, + { + "epoch": 0.3559212945936006, + "grad_norm": 1.4009141336256434, + "learning_rate": 1.4927328697011664e-05, + "loss": 0.7376, + "step": 11613 + }, + { + "epoch": 0.3559519431163418, + "grad_norm": 1.4127319985161768, + "learning_rate": 1.4926464900404886e-05, + "loss": 0.7418, + "step": 11614 + }, + { + "epoch": 0.355982591639083, + "grad_norm": 0.7066545531204965, + "learning_rate": 1.4925601055256387e-05, + "loss": 0.6261, + "step": 11615 + }, + { + "epoch": 0.3560132401618242, + "grad_norm": 1.4241094601339237, + "learning_rate": 1.4924737161574681e-05, + "loss": 0.7846, + "step": 11616 + }, + { + "epoch": 0.3560438886845654, + "grad_norm": 0.6587910997104023, + "learning_rate": 1.492387321936828e-05, + "loss": 0.6017, + "step": 11617 + }, + { + "epoch": 0.3560745372073066, + "grad_norm": 1.460255315873664, + "learning_rate": 1.4923009228645696e-05, + "loss": 0.7487, + "step": 11618 + }, + { + "epoch": 0.3561051857300478, + "grad_norm": 1.47794639638534, + "learning_rate": 1.492214518941544e-05, + "loss": 0.8166, + "step": 11619 + }, + { + "epoch": 0.356135834252789, + "grad_norm": 1.3381562005171566, + "learning_rate": 1.492128110168603e-05, + "loss": 0.7943, + "step": 11620 + }, + { + "epoch": 0.3561664827755302, + "grad_norm": 1.5771743534870784, + "learning_rate": 1.492041696546598e-05, + "loss": 0.7429, + "step": 11621 + }, + { + "epoch": 0.3561971312982714, + "grad_norm": 1.3758264259231892, + "learning_rate": 1.4919552780763802e-05, + "loss": 0.7247, + "step": 11622 + }, + { + "epoch": 0.3562277798210126, + "grad_norm": 1.4254477022156304, + "learning_rate": 1.4918688547588009e-05, + "loss": 0.8076, + "step": 11623 + }, + { + "epoch": 0.3562584283437538, + "grad_norm": 1.5189149638418824, + "learning_rate": 1.4917824265947121e-05, + "loss": 0.7425, + "step": 11624 + }, + { + "epoch": 0.356289076866495, + "grad_norm": 1.510046058762314, + "learning_rate": 1.4916959935849655e-05, + "loss": 0.7678, + "step": 11625 + }, + { + "epoch": 0.35631972538923623, + "grad_norm": 1.3592682868688502, + "learning_rate": 1.491609555730412e-05, + "loss": 0.8615, + "step": 11626 + }, + { + "epoch": 0.35635037391197744, + "grad_norm": 1.6417706699853265, + "learning_rate": 1.4915231130319042e-05, + "loss": 0.8208, + "step": 11627 + }, + { + "epoch": 0.35638102243471864, + "grad_norm": 1.322194726097253, + "learning_rate": 1.491436665490293e-05, + "loss": 0.6722, + "step": 11628 + }, + { + "epoch": 0.35641167095745985, + "grad_norm": 1.590095734615041, + "learning_rate": 1.4913502131064306e-05, + "loss": 0.8056, + "step": 11629 + }, + { + "epoch": 0.35644231948020105, + "grad_norm": 0.8513947529997211, + "learning_rate": 1.491263755881169e-05, + "loss": 0.6102, + "step": 11630 + }, + { + "epoch": 0.35647296800294226, + "grad_norm": 1.5802128762793475, + "learning_rate": 1.49117729381536e-05, + "loss": 0.7163, + "step": 11631 + }, + { + "epoch": 0.35650361652568346, + "grad_norm": 1.514347170950049, + "learning_rate": 1.4910908269098556e-05, + "loss": 0.6787, + "step": 11632 + }, + { + "epoch": 0.35653426504842467, + "grad_norm": 0.7016042946602538, + "learning_rate": 1.4910043551655071e-05, + "loss": 0.6119, + "step": 11633 + }, + { + "epoch": 0.3565649135711659, + "grad_norm": 1.4583568038456207, + "learning_rate": 1.4909178785831675e-05, + "loss": 0.7549, + "step": 11634 + }, + { + "epoch": 0.3565955620939071, + "grad_norm": 1.4079107494019723, + "learning_rate": 1.4908313971636882e-05, + "loss": 0.7586, + "step": 11635 + }, + { + "epoch": 0.3566262106166483, + "grad_norm": 2.0533834496321126, + "learning_rate": 1.4907449109079219e-05, + "loss": 0.8096, + "step": 11636 + }, + { + "epoch": 0.3566568591393895, + "grad_norm": 1.613232873807462, + "learning_rate": 1.49065841981672e-05, + "loss": 0.6125, + "step": 11637 + }, + { + "epoch": 0.3566875076621307, + "grad_norm": 1.4411188792888134, + "learning_rate": 1.4905719238909355e-05, + "loss": 0.8166, + "step": 11638 + }, + { + "epoch": 0.3567181561848719, + "grad_norm": 1.4636105532319383, + "learning_rate": 1.4904854231314199e-05, + "loss": 0.7407, + "step": 11639 + }, + { + "epoch": 0.3567488047076131, + "grad_norm": 1.5439369360419097, + "learning_rate": 1.4903989175390266e-05, + "loss": 0.7683, + "step": 11640 + }, + { + "epoch": 0.3567794532303543, + "grad_norm": 1.385792426367147, + "learning_rate": 1.4903124071146067e-05, + "loss": 0.7258, + "step": 11641 + }, + { + "epoch": 0.3568101017530955, + "grad_norm": 1.475213492008877, + "learning_rate": 1.4902258918590133e-05, + "loss": 0.691, + "step": 11642 + }, + { + "epoch": 0.3568407502758367, + "grad_norm": 1.6269459619904854, + "learning_rate": 1.4901393717730988e-05, + "loss": 0.7672, + "step": 11643 + }, + { + "epoch": 0.35687139879857793, + "grad_norm": 1.3492950165434936, + "learning_rate": 1.4900528468577155e-05, + "loss": 0.6758, + "step": 11644 + }, + { + "epoch": 0.35690204732131914, + "grad_norm": 1.478444989696452, + "learning_rate": 1.4899663171137167e-05, + "loss": 0.6902, + "step": 11645 + }, + { + "epoch": 0.35693269584406034, + "grad_norm": 0.8944102484375004, + "learning_rate": 1.4898797825419537e-05, + "loss": 0.5958, + "step": 11646 + }, + { + "epoch": 0.3569633443668015, + "grad_norm": 1.3221008898766495, + "learning_rate": 1.4897932431432802e-05, + "loss": 0.637, + "step": 11647 + }, + { + "epoch": 0.3569939928895427, + "grad_norm": 1.5972630539438164, + "learning_rate": 1.4897066989185486e-05, + "loss": 0.7633, + "step": 11648 + }, + { + "epoch": 0.3570246414122839, + "grad_norm": 0.7279219698999083, + "learning_rate": 1.4896201498686119e-05, + "loss": 0.6163, + "step": 11649 + }, + { + "epoch": 0.3570552899350251, + "grad_norm": 1.4157820992149817, + "learning_rate": 1.4895335959943219e-05, + "loss": 0.7577, + "step": 11650 + }, + { + "epoch": 0.3570859384577663, + "grad_norm": 1.401829422865973, + "learning_rate": 1.4894470372965324e-05, + "loss": 0.8024, + "step": 11651 + }, + { + "epoch": 0.3571165869805075, + "grad_norm": 1.5170221660555034, + "learning_rate": 1.4893604737760962e-05, + "loss": 0.7618, + "step": 11652 + }, + { + "epoch": 0.35714723550324873, + "grad_norm": 1.506581612399986, + "learning_rate": 1.489273905433866e-05, + "loss": 0.7482, + "step": 11653 + }, + { + "epoch": 0.35717788402598993, + "grad_norm": 1.3995120089285609, + "learning_rate": 1.4891873322706944e-05, + "loss": 0.6803, + "step": 11654 + }, + { + "epoch": 0.35720853254873114, + "grad_norm": 1.5106148424213819, + "learning_rate": 1.4891007542874354e-05, + "loss": 0.846, + "step": 11655 + }, + { + "epoch": 0.35723918107147234, + "grad_norm": 1.279244113188822, + "learning_rate": 1.4890141714849413e-05, + "loss": 0.6587, + "step": 11656 + }, + { + "epoch": 0.35726982959421355, + "grad_norm": 1.6136206276826708, + "learning_rate": 1.4889275838640653e-05, + "loss": 0.7121, + "step": 11657 + }, + { + "epoch": 0.35730047811695476, + "grad_norm": 1.2892819927318129, + "learning_rate": 1.488840991425661e-05, + "loss": 0.7268, + "step": 11658 + }, + { + "epoch": 0.35733112663969596, + "grad_norm": 1.345834779030035, + "learning_rate": 1.4887543941705813e-05, + "loss": 0.7032, + "step": 11659 + }, + { + "epoch": 0.35736177516243717, + "grad_norm": 1.4004039384975573, + "learning_rate": 1.4886677920996796e-05, + "loss": 0.7419, + "step": 11660 + }, + { + "epoch": 0.3573924236851784, + "grad_norm": 1.4931906351625097, + "learning_rate": 1.4885811852138085e-05, + "loss": 0.7712, + "step": 11661 + }, + { + "epoch": 0.3574230722079196, + "grad_norm": 1.5761636528422838, + "learning_rate": 1.4884945735138225e-05, + "loss": 0.7852, + "step": 11662 + }, + { + "epoch": 0.3574537207306608, + "grad_norm": 1.2627515047393938, + "learning_rate": 1.4884079570005744e-05, + "loss": 0.6215, + "step": 11663 + }, + { + "epoch": 0.357484369253402, + "grad_norm": 1.5761446620531634, + "learning_rate": 1.4883213356749178e-05, + "loss": 0.7913, + "step": 11664 + }, + { + "epoch": 0.3575150177761432, + "grad_norm": 0.7189491805429371, + "learning_rate": 1.4882347095377058e-05, + "loss": 0.6218, + "step": 11665 + }, + { + "epoch": 0.3575456662988844, + "grad_norm": 1.5683403174103123, + "learning_rate": 1.4881480785897928e-05, + "loss": 0.7381, + "step": 11666 + }, + { + "epoch": 0.3575763148216256, + "grad_norm": 1.572994644408266, + "learning_rate": 1.4880614428320317e-05, + "loss": 0.7721, + "step": 11667 + }, + { + "epoch": 0.3576069633443668, + "grad_norm": 1.4971657761869033, + "learning_rate": 1.4879748022652762e-05, + "loss": 0.784, + "step": 11668 + }, + { + "epoch": 0.357637611867108, + "grad_norm": 1.8105280002191564, + "learning_rate": 1.4878881568903803e-05, + "loss": 0.9132, + "step": 11669 + }, + { + "epoch": 0.3576682603898492, + "grad_norm": 1.4280604033157818, + "learning_rate": 1.4878015067081972e-05, + "loss": 0.7605, + "step": 11670 + }, + { + "epoch": 0.35769890891259043, + "grad_norm": 1.5984927527822625, + "learning_rate": 1.4877148517195814e-05, + "loss": 0.733, + "step": 11671 + }, + { + "epoch": 0.35772955743533164, + "grad_norm": 1.467201609793084, + "learning_rate": 1.4876281919253861e-05, + "loss": 0.7135, + "step": 11672 + }, + { + "epoch": 0.35776020595807284, + "grad_norm": 1.6196716180632016, + "learning_rate": 1.4875415273264658e-05, + "loss": 0.7858, + "step": 11673 + }, + { + "epoch": 0.35779085448081405, + "grad_norm": 1.3093253687938229, + "learning_rate": 1.4874548579236736e-05, + "loss": 0.6877, + "step": 11674 + }, + { + "epoch": 0.35782150300355525, + "grad_norm": 1.4297132631387297, + "learning_rate": 1.4873681837178647e-05, + "loss": 0.7073, + "step": 11675 + }, + { + "epoch": 0.35785215152629646, + "grad_norm": 1.4899593762006524, + "learning_rate": 1.4872815047098917e-05, + "loss": 0.7693, + "step": 11676 + }, + { + "epoch": 0.35788280004903766, + "grad_norm": 1.5861027111926786, + "learning_rate": 1.4871948209006097e-05, + "loss": 0.6952, + "step": 11677 + }, + { + "epoch": 0.3579134485717788, + "grad_norm": 1.3103276336018879, + "learning_rate": 1.4871081322908723e-05, + "loss": 0.6755, + "step": 11678 + }, + { + "epoch": 0.35794409709452, + "grad_norm": 1.4581036896041775, + "learning_rate": 1.487021438881534e-05, + "loss": 0.677, + "step": 11679 + }, + { + "epoch": 0.3579747456172612, + "grad_norm": 1.51594768953272, + "learning_rate": 1.4869347406734486e-05, + "loss": 0.7478, + "step": 11680 + }, + { + "epoch": 0.35800539414000243, + "grad_norm": 1.5569740365906002, + "learning_rate": 1.486848037667471e-05, + "loss": 0.7315, + "step": 11681 + }, + { + "epoch": 0.35803604266274364, + "grad_norm": 0.7754811009996251, + "learning_rate": 1.4867613298644548e-05, + "loss": 0.632, + "step": 11682 + }, + { + "epoch": 0.35806669118548484, + "grad_norm": 1.6429624974994737, + "learning_rate": 1.4866746172652549e-05, + "loss": 0.7757, + "step": 11683 + }, + { + "epoch": 0.35809733970822605, + "grad_norm": 1.6530144095298798, + "learning_rate": 1.4865878998707254e-05, + "loss": 0.7916, + "step": 11684 + }, + { + "epoch": 0.35812798823096725, + "grad_norm": 1.5000766168679245, + "learning_rate": 1.4865011776817207e-05, + "loss": 0.7859, + "step": 11685 + }, + { + "epoch": 0.35815863675370846, + "grad_norm": 1.4383064910006647, + "learning_rate": 1.4864144506990957e-05, + "loss": 0.6734, + "step": 11686 + }, + { + "epoch": 0.35818928527644966, + "grad_norm": 1.4316130372375668, + "learning_rate": 1.4863277189237043e-05, + "loss": 0.7469, + "step": 11687 + }, + { + "epoch": 0.35821993379919087, + "grad_norm": 0.8445362705088495, + "learning_rate": 1.4862409823564017e-05, + "loss": 0.6292, + "step": 11688 + }, + { + "epoch": 0.3582505823219321, + "grad_norm": 1.6543092429924555, + "learning_rate": 1.4861542409980421e-05, + "loss": 0.8223, + "step": 11689 + }, + { + "epoch": 0.3582812308446733, + "grad_norm": 1.4208893352118381, + "learning_rate": 1.4860674948494806e-05, + "loss": 0.8469, + "step": 11690 + }, + { + "epoch": 0.3583118793674145, + "grad_norm": 1.4954052381064504, + "learning_rate": 1.4859807439115714e-05, + "loss": 0.8002, + "step": 11691 + }, + { + "epoch": 0.3583425278901557, + "grad_norm": 1.7920820116109453, + "learning_rate": 1.48589398818517e-05, + "loss": 0.7551, + "step": 11692 + }, + { + "epoch": 0.3583731764128969, + "grad_norm": 1.4099011701522177, + "learning_rate": 1.4858072276711304e-05, + "loss": 0.7855, + "step": 11693 + }, + { + "epoch": 0.3584038249356381, + "grad_norm": 1.3246702198848894, + "learning_rate": 1.4857204623703083e-05, + "loss": 0.745, + "step": 11694 + }, + { + "epoch": 0.3584344734583793, + "grad_norm": 0.6958294113299633, + "learning_rate": 1.485633692283558e-05, + "loss": 0.5707, + "step": 11695 + }, + { + "epoch": 0.3584651219811205, + "grad_norm": 1.8052504883678422, + "learning_rate": 1.4855469174117345e-05, + "loss": 0.8448, + "step": 11696 + }, + { + "epoch": 0.3584957705038617, + "grad_norm": 1.460218203604646, + "learning_rate": 1.485460137755693e-05, + "loss": 0.7571, + "step": 11697 + }, + { + "epoch": 0.3585264190266029, + "grad_norm": 1.2959707828250315, + "learning_rate": 1.4853733533162888e-05, + "loss": 0.7632, + "step": 11698 + }, + { + "epoch": 0.35855706754934413, + "grad_norm": 1.5992931919716098, + "learning_rate": 1.4852865640943767e-05, + "loss": 0.7526, + "step": 11699 + }, + { + "epoch": 0.35858771607208534, + "grad_norm": 1.4815687811225828, + "learning_rate": 1.4851997700908118e-05, + "loss": 0.7935, + "step": 11700 + }, + { + "epoch": 0.35861836459482654, + "grad_norm": 1.6054678510295946, + "learning_rate": 1.4851129713064495e-05, + "loss": 0.7716, + "step": 11701 + }, + { + "epoch": 0.35864901311756775, + "grad_norm": 1.297176213561238, + "learning_rate": 1.4850261677421451e-05, + "loss": 0.7004, + "step": 11702 + }, + { + "epoch": 0.35867966164030896, + "grad_norm": 1.2851613559973525, + "learning_rate": 1.4849393593987538e-05, + "loss": 0.7331, + "step": 11703 + }, + { + "epoch": 0.35871031016305016, + "grad_norm": 1.4814885099443604, + "learning_rate": 1.4848525462771306e-05, + "loss": 0.8578, + "step": 11704 + }, + { + "epoch": 0.35874095868579137, + "grad_norm": 0.7131985483408679, + "learning_rate": 1.4847657283781314e-05, + "loss": 0.6179, + "step": 11705 + }, + { + "epoch": 0.3587716072085326, + "grad_norm": 1.3596467032905521, + "learning_rate": 1.4846789057026113e-05, + "loss": 0.8146, + "step": 11706 + }, + { + "epoch": 0.3588022557312738, + "grad_norm": 1.4784181769013471, + "learning_rate": 1.4845920782514262e-05, + "loss": 0.6677, + "step": 11707 + }, + { + "epoch": 0.358832904254015, + "grad_norm": 1.3066749094259773, + "learning_rate": 1.4845052460254312e-05, + "loss": 0.8208, + "step": 11708 + }, + { + "epoch": 0.35886355277675613, + "grad_norm": 1.4240698870846404, + "learning_rate": 1.484418409025482e-05, + "loss": 0.8251, + "step": 11709 + }, + { + "epoch": 0.35889420129949734, + "grad_norm": 1.5151102463706165, + "learning_rate": 1.4843315672524345e-05, + "loss": 0.821, + "step": 11710 + }, + { + "epoch": 0.35892484982223855, + "grad_norm": 1.563966915710336, + "learning_rate": 1.484244720707144e-05, + "loss": 0.79, + "step": 11711 + }, + { + "epoch": 0.35895549834497975, + "grad_norm": 1.5160347509759382, + "learning_rate": 1.4841578693904661e-05, + "loss": 0.8103, + "step": 11712 + }, + { + "epoch": 0.35898614686772096, + "grad_norm": 1.219798093656817, + "learning_rate": 1.4840710133032571e-05, + "loss": 0.6653, + "step": 11713 + }, + { + "epoch": 0.35901679539046216, + "grad_norm": 1.3785140011605548, + "learning_rate": 1.4839841524463728e-05, + "loss": 0.6858, + "step": 11714 + }, + { + "epoch": 0.35904744391320337, + "grad_norm": 1.4828195818581316, + "learning_rate": 1.4838972868206682e-05, + "loss": 0.7101, + "step": 11715 + }, + { + "epoch": 0.3590780924359446, + "grad_norm": 1.3550791374024094, + "learning_rate": 1.4838104164270002e-05, + "loss": 0.7702, + "step": 11716 + }, + { + "epoch": 0.3591087409586858, + "grad_norm": 1.4507302492795715, + "learning_rate": 1.4837235412662246e-05, + "loss": 0.8587, + "step": 11717 + }, + { + "epoch": 0.359139389481427, + "grad_norm": 1.4752001923697449, + "learning_rate": 1.4836366613391968e-05, + "loss": 0.8579, + "step": 11718 + }, + { + "epoch": 0.3591700380041682, + "grad_norm": 1.289264213124185, + "learning_rate": 1.4835497766467733e-05, + "loss": 0.7165, + "step": 11719 + }, + { + "epoch": 0.3592006865269094, + "grad_norm": 1.5960597406911767, + "learning_rate": 1.4834628871898103e-05, + "loss": 0.8469, + "step": 11720 + }, + { + "epoch": 0.3592313350496506, + "grad_norm": 0.7224054129150874, + "learning_rate": 1.4833759929691636e-05, + "loss": 0.617, + "step": 11721 + }, + { + "epoch": 0.3592619835723918, + "grad_norm": 0.7214062725959991, + "learning_rate": 1.4832890939856897e-05, + "loss": 0.6109, + "step": 11722 + }, + { + "epoch": 0.359292632095133, + "grad_norm": 1.4884314850781997, + "learning_rate": 1.4832021902402444e-05, + "loss": 0.85, + "step": 11723 + }, + { + "epoch": 0.3593232806178742, + "grad_norm": 1.4275645315180372, + "learning_rate": 1.4831152817336846e-05, + "loss": 0.6472, + "step": 11724 + }, + { + "epoch": 0.3593539291406154, + "grad_norm": 1.291364129767187, + "learning_rate": 1.4830283684668665e-05, + "loss": 0.6708, + "step": 11725 + }, + { + "epoch": 0.35938457766335663, + "grad_norm": 1.4916388830455112, + "learning_rate": 1.4829414504406459e-05, + "loss": 0.8088, + "step": 11726 + }, + { + "epoch": 0.35941522618609784, + "grad_norm": 1.5034661087482655, + "learning_rate": 1.4828545276558797e-05, + "loss": 0.7822, + "step": 11727 + }, + { + "epoch": 0.35944587470883904, + "grad_norm": 1.3597806172659688, + "learning_rate": 1.4827676001134243e-05, + "loss": 0.7372, + "step": 11728 + }, + { + "epoch": 0.35947652323158025, + "grad_norm": 1.477900979142541, + "learning_rate": 1.4826806678141364e-05, + "loss": 0.7537, + "step": 11729 + }, + { + "epoch": 0.35950717175432145, + "grad_norm": 1.5803819905161267, + "learning_rate": 1.4825937307588723e-05, + "loss": 0.7013, + "step": 11730 + }, + { + "epoch": 0.35953782027706266, + "grad_norm": 1.4959606680116126, + "learning_rate": 1.4825067889484886e-05, + "loss": 0.7935, + "step": 11731 + }, + { + "epoch": 0.35956846879980386, + "grad_norm": 1.479089258210832, + "learning_rate": 1.4824198423838418e-05, + "loss": 0.7263, + "step": 11732 + }, + { + "epoch": 0.35959911732254507, + "grad_norm": 1.364445359473845, + "learning_rate": 1.4823328910657896e-05, + "loss": 0.648, + "step": 11733 + }, + { + "epoch": 0.3596297658452863, + "grad_norm": 1.5359255003237233, + "learning_rate": 1.4822459349951874e-05, + "loss": 0.8706, + "step": 11734 + }, + { + "epoch": 0.3596604143680275, + "grad_norm": 1.393947335344459, + "learning_rate": 1.4821589741728927e-05, + "loss": 0.7519, + "step": 11735 + }, + { + "epoch": 0.3596910628907687, + "grad_norm": 1.380143438965864, + "learning_rate": 1.4820720085997624e-05, + "loss": 0.807, + "step": 11736 + }, + { + "epoch": 0.3597217114135099, + "grad_norm": 1.4311984586833981, + "learning_rate": 1.4819850382766533e-05, + "loss": 0.7529, + "step": 11737 + }, + { + "epoch": 0.3597523599362511, + "grad_norm": 1.387143622243163, + "learning_rate": 1.481898063204422e-05, + "loss": 0.7863, + "step": 11738 + }, + { + "epoch": 0.3597830084589923, + "grad_norm": 1.4934596591478777, + "learning_rate": 1.4818110833839261e-05, + "loss": 0.7673, + "step": 11739 + }, + { + "epoch": 0.35981365698173345, + "grad_norm": 1.407665652494414, + "learning_rate": 1.4817240988160222e-05, + "loss": 0.6717, + "step": 11740 + }, + { + "epoch": 0.35984430550447466, + "grad_norm": 1.4544475659962905, + "learning_rate": 1.4816371095015673e-05, + "loss": 0.7434, + "step": 11741 + }, + { + "epoch": 0.35987495402721587, + "grad_norm": 1.4225093943986813, + "learning_rate": 1.4815501154414191e-05, + "loss": 0.7502, + "step": 11742 + }, + { + "epoch": 0.35990560254995707, + "grad_norm": 1.3792676750420314, + "learning_rate": 1.4814631166364342e-05, + "loss": 0.6699, + "step": 11743 + }, + { + "epoch": 0.3599362510726983, + "grad_norm": 0.7919613764576169, + "learning_rate": 1.4813761130874702e-05, + "loss": 0.6075, + "step": 11744 + }, + { + "epoch": 0.3599668995954395, + "grad_norm": 1.458763797964372, + "learning_rate": 1.4812891047953839e-05, + "loss": 0.6538, + "step": 11745 + }, + { + "epoch": 0.3599975481181807, + "grad_norm": 1.5284473209222864, + "learning_rate": 1.481202091761033e-05, + "loss": 0.6441, + "step": 11746 + }, + { + "epoch": 0.3600281966409219, + "grad_norm": 1.5339002029570041, + "learning_rate": 1.4811150739852749e-05, + "loss": 0.7178, + "step": 11747 + }, + { + "epoch": 0.3600588451636631, + "grad_norm": 1.3869489071441046, + "learning_rate": 1.481028051468967e-05, + "loss": 0.7404, + "step": 11748 + }, + { + "epoch": 0.3600894936864043, + "grad_norm": 1.418016863815174, + "learning_rate": 1.4809410242129662e-05, + "loss": 0.7465, + "step": 11749 + }, + { + "epoch": 0.3601201422091455, + "grad_norm": 1.3437803725824378, + "learning_rate": 1.4808539922181306e-05, + "loss": 0.7355, + "step": 11750 + }, + { + "epoch": 0.3601507907318867, + "grad_norm": 1.6278673444631961, + "learning_rate": 1.4807669554853176e-05, + "loss": 0.7323, + "step": 11751 + }, + { + "epoch": 0.3601814392546279, + "grad_norm": 1.5991127222047286, + "learning_rate": 1.4806799140153848e-05, + "loss": 0.8297, + "step": 11752 + }, + { + "epoch": 0.3602120877773691, + "grad_norm": 1.335940152091523, + "learning_rate": 1.48059286780919e-05, + "loss": 0.784, + "step": 11753 + }, + { + "epoch": 0.36024273630011033, + "grad_norm": 1.2985136524503809, + "learning_rate": 1.4805058168675905e-05, + "loss": 0.6979, + "step": 11754 + }, + { + "epoch": 0.36027338482285154, + "grad_norm": 1.277857739860093, + "learning_rate": 1.4804187611914442e-05, + "loss": 0.6917, + "step": 11755 + }, + { + "epoch": 0.36030403334559274, + "grad_norm": 1.4077263898624048, + "learning_rate": 1.4803317007816092e-05, + "loss": 0.7599, + "step": 11756 + }, + { + "epoch": 0.36033468186833395, + "grad_norm": 1.5437362995521526, + "learning_rate": 1.4802446356389428e-05, + "loss": 0.7781, + "step": 11757 + }, + { + "epoch": 0.36036533039107516, + "grad_norm": 1.3772806984897485, + "learning_rate": 1.4801575657643032e-05, + "loss": 0.7015, + "step": 11758 + }, + { + "epoch": 0.36039597891381636, + "grad_norm": 1.4577165875514189, + "learning_rate": 1.4800704911585482e-05, + "loss": 0.7015, + "step": 11759 + }, + { + "epoch": 0.36042662743655757, + "grad_norm": 0.7219594299065759, + "learning_rate": 1.479983411822536e-05, + "loss": 0.6052, + "step": 11760 + }, + { + "epoch": 0.3604572759592988, + "grad_norm": 1.8066385399788671, + "learning_rate": 1.4798963277571244e-05, + "loss": 0.7991, + "step": 11761 + }, + { + "epoch": 0.36048792448204, + "grad_norm": 0.6685735772632708, + "learning_rate": 1.4798092389631713e-05, + "loss": 0.5782, + "step": 11762 + }, + { + "epoch": 0.3605185730047812, + "grad_norm": 1.6871372683205779, + "learning_rate": 1.4797221454415353e-05, + "loss": 0.7091, + "step": 11763 + }, + { + "epoch": 0.3605492215275224, + "grad_norm": 1.5552581609287903, + "learning_rate": 1.479635047193074e-05, + "loss": 0.8001, + "step": 11764 + }, + { + "epoch": 0.3605798700502636, + "grad_norm": 1.2907009104493403, + "learning_rate": 1.479547944218646e-05, + "loss": 0.7238, + "step": 11765 + }, + { + "epoch": 0.3606105185730048, + "grad_norm": 0.7138994618088921, + "learning_rate": 1.4794608365191092e-05, + "loss": 0.6186, + "step": 11766 + }, + { + "epoch": 0.360641167095746, + "grad_norm": 1.5118284936149602, + "learning_rate": 1.4793737240953223e-05, + "loss": 0.7344, + "step": 11767 + }, + { + "epoch": 0.3606718156184872, + "grad_norm": 1.5288954793166227, + "learning_rate": 1.4792866069481436e-05, + "loss": 0.7226, + "step": 11768 + }, + { + "epoch": 0.3607024641412284, + "grad_norm": 1.3660095388498263, + "learning_rate": 1.4791994850784307e-05, + "loss": 0.7566, + "step": 11769 + }, + { + "epoch": 0.3607331126639696, + "grad_norm": 1.419745266696491, + "learning_rate": 1.4791123584870432e-05, + "loss": 0.7033, + "step": 11770 + }, + { + "epoch": 0.3607637611867108, + "grad_norm": 1.547367970849973, + "learning_rate": 1.4790252271748392e-05, + "loss": 0.6347, + "step": 11771 + }, + { + "epoch": 0.360794409709452, + "grad_norm": 1.596224264034643, + "learning_rate": 1.4789380911426767e-05, + "loss": 0.7636, + "step": 11772 + }, + { + "epoch": 0.3608250582321932, + "grad_norm": 1.6215384966772477, + "learning_rate": 1.4788509503914146e-05, + "loss": 0.8385, + "step": 11773 + }, + { + "epoch": 0.3608557067549344, + "grad_norm": 0.7246014953548305, + "learning_rate": 1.4787638049219117e-05, + "loss": 0.6227, + "step": 11774 + }, + { + "epoch": 0.3608863552776756, + "grad_norm": 0.6770404508654871, + "learning_rate": 1.4786766547350267e-05, + "loss": 0.6068, + "step": 11775 + }, + { + "epoch": 0.3609170038004168, + "grad_norm": 1.4771666814903444, + "learning_rate": 1.478589499831618e-05, + "loss": 0.7929, + "step": 11776 + }, + { + "epoch": 0.360947652323158, + "grad_norm": 1.5748978361133117, + "learning_rate": 1.4785023402125442e-05, + "loss": 0.8415, + "step": 11777 + }, + { + "epoch": 0.3609783008458992, + "grad_norm": 1.4333553429293644, + "learning_rate": 1.4784151758786648e-05, + "loss": 0.7078, + "step": 11778 + }, + { + "epoch": 0.3610089493686404, + "grad_norm": 1.4218836755224413, + "learning_rate": 1.4783280068308384e-05, + "loss": 0.7812, + "step": 11779 + }, + { + "epoch": 0.3610395978913816, + "grad_norm": 1.472665807494261, + "learning_rate": 1.4782408330699236e-05, + "loss": 0.7572, + "step": 11780 + }, + { + "epoch": 0.36107024641412283, + "grad_norm": 1.7306423032976934, + "learning_rate": 1.4781536545967792e-05, + "loss": 0.6989, + "step": 11781 + }, + { + "epoch": 0.36110089493686404, + "grad_norm": 1.371584456250208, + "learning_rate": 1.4780664714122648e-05, + "loss": 0.7599, + "step": 11782 + }, + { + "epoch": 0.36113154345960524, + "grad_norm": 1.5950145808269824, + "learning_rate": 1.477979283517239e-05, + "loss": 0.7707, + "step": 11783 + }, + { + "epoch": 0.36116219198234645, + "grad_norm": 1.4831386340039079, + "learning_rate": 1.4778920909125612e-05, + "loss": 0.7548, + "step": 11784 + }, + { + "epoch": 0.36119284050508765, + "grad_norm": 1.6349176748188488, + "learning_rate": 1.4778048935990903e-05, + "loss": 0.7848, + "step": 11785 + }, + { + "epoch": 0.36122348902782886, + "grad_norm": 1.3988851102630409, + "learning_rate": 1.4777176915776851e-05, + "loss": 0.8285, + "step": 11786 + }, + { + "epoch": 0.36125413755057006, + "grad_norm": 1.352074796989847, + "learning_rate": 1.4776304848492062e-05, + "loss": 0.7542, + "step": 11787 + }, + { + "epoch": 0.36128478607331127, + "grad_norm": 1.3189677334275864, + "learning_rate": 1.4775432734145112e-05, + "loss": 0.6617, + "step": 11788 + }, + { + "epoch": 0.3613154345960525, + "grad_norm": 1.3461793944076847, + "learning_rate": 1.4774560572744603e-05, + "loss": 0.8294, + "step": 11789 + }, + { + "epoch": 0.3613460831187937, + "grad_norm": 1.3948346242540657, + "learning_rate": 1.4773688364299127e-05, + "loss": 0.7295, + "step": 11790 + }, + { + "epoch": 0.3613767316415349, + "grad_norm": 1.5878352979876507, + "learning_rate": 1.477281610881728e-05, + "loss": 0.7465, + "step": 11791 + }, + { + "epoch": 0.3614073801642761, + "grad_norm": 1.45469085966679, + "learning_rate": 1.4771943806307652e-05, + "loss": 0.7621, + "step": 11792 + }, + { + "epoch": 0.3614380286870173, + "grad_norm": 0.8057555618717654, + "learning_rate": 1.4771071456778843e-05, + "loss": 0.6286, + "step": 11793 + }, + { + "epoch": 0.3614686772097585, + "grad_norm": 0.7557557597378787, + "learning_rate": 1.4770199060239445e-05, + "loss": 0.614, + "step": 11794 + }, + { + "epoch": 0.3614993257324997, + "grad_norm": 1.591704233933026, + "learning_rate": 1.4769326616698054e-05, + "loss": 0.6871, + "step": 11795 + }, + { + "epoch": 0.3615299742552409, + "grad_norm": 1.3649315504150215, + "learning_rate": 1.4768454126163269e-05, + "loss": 0.7492, + "step": 11796 + }, + { + "epoch": 0.3615606227779821, + "grad_norm": 1.4278780964678166, + "learning_rate": 1.4767581588643682e-05, + "loss": 0.762, + "step": 11797 + }, + { + "epoch": 0.3615912713007233, + "grad_norm": 1.5761731774077208, + "learning_rate": 1.4766709004147902e-05, + "loss": 0.7579, + "step": 11798 + }, + { + "epoch": 0.36162191982346453, + "grad_norm": 1.6173233787774335, + "learning_rate": 1.4765836372684512e-05, + "loss": 0.8162, + "step": 11799 + }, + { + "epoch": 0.36165256834620574, + "grad_norm": 1.3583123048339527, + "learning_rate": 1.4764963694262118e-05, + "loss": 0.809, + "step": 11800 + }, + { + "epoch": 0.36168321686894694, + "grad_norm": 1.3720620455145442, + "learning_rate": 1.4764090968889315e-05, + "loss": 0.7597, + "step": 11801 + }, + { + "epoch": 0.3617138653916881, + "grad_norm": 1.498943112850366, + "learning_rate": 1.4763218196574711e-05, + "loss": 0.7802, + "step": 11802 + }, + { + "epoch": 0.3617445139144293, + "grad_norm": 1.390506128054116, + "learning_rate": 1.4762345377326894e-05, + "loss": 0.8003, + "step": 11803 + }, + { + "epoch": 0.3617751624371705, + "grad_norm": 1.4500055384270145, + "learning_rate": 1.4761472511154473e-05, + "loss": 0.6999, + "step": 11804 + }, + { + "epoch": 0.3618058109599117, + "grad_norm": 1.4804329450763947, + "learning_rate": 1.4760599598066043e-05, + "loss": 0.8039, + "step": 11805 + }, + { + "epoch": 0.3618364594826529, + "grad_norm": 1.3731376199255358, + "learning_rate": 1.4759726638070209e-05, + "loss": 0.7084, + "step": 11806 + }, + { + "epoch": 0.3618671080053941, + "grad_norm": 1.7661758558979952, + "learning_rate": 1.4758853631175569e-05, + "loss": 0.7664, + "step": 11807 + }, + { + "epoch": 0.36189775652813533, + "grad_norm": 1.4904598908328641, + "learning_rate": 1.4757980577390727e-05, + "loss": 0.851, + "step": 11808 + }, + { + "epoch": 0.36192840505087653, + "grad_norm": 1.4509164556642655, + "learning_rate": 1.4757107476724284e-05, + "loss": 0.6879, + "step": 11809 + }, + { + "epoch": 0.36195905357361774, + "grad_norm": 1.485552907884819, + "learning_rate": 1.4756234329184844e-05, + "loss": 0.6776, + "step": 11810 + }, + { + "epoch": 0.36198970209635895, + "grad_norm": 1.4155886021804738, + "learning_rate": 1.4755361134781012e-05, + "loss": 0.7036, + "step": 11811 + }, + { + "epoch": 0.36202035061910015, + "grad_norm": 1.3603141552942977, + "learning_rate": 1.4754487893521387e-05, + "loss": 0.71, + "step": 11812 + }, + { + "epoch": 0.36205099914184136, + "grad_norm": 1.6927204331724575, + "learning_rate": 1.4753614605414582e-05, + "loss": 0.7992, + "step": 11813 + }, + { + "epoch": 0.36208164766458256, + "grad_norm": 1.0031361670254215, + "learning_rate": 1.4752741270469191e-05, + "loss": 0.6712, + "step": 11814 + }, + { + "epoch": 0.36211229618732377, + "grad_norm": 1.2422737493397236, + "learning_rate": 1.4751867888693826e-05, + "loss": 0.7006, + "step": 11815 + }, + { + "epoch": 0.362142944710065, + "grad_norm": 1.4788269435029693, + "learning_rate": 1.4750994460097087e-05, + "loss": 0.6852, + "step": 11816 + }, + { + "epoch": 0.3621735932328062, + "grad_norm": 1.5013617998277184, + "learning_rate": 1.4750120984687591e-05, + "loss": 0.8706, + "step": 11817 + }, + { + "epoch": 0.3622042417555474, + "grad_norm": 1.469053663615053, + "learning_rate": 1.4749247462473932e-05, + "loss": 0.7463, + "step": 11818 + }, + { + "epoch": 0.3622348902782886, + "grad_norm": 1.3506558787395426, + "learning_rate": 1.4748373893464724e-05, + "loss": 0.7389, + "step": 11819 + }, + { + "epoch": 0.3622655388010298, + "grad_norm": 1.4373305896432016, + "learning_rate": 1.4747500277668573e-05, + "loss": 0.8146, + "step": 11820 + }, + { + "epoch": 0.362296187323771, + "grad_norm": 1.3593933559542886, + "learning_rate": 1.4746626615094088e-05, + "loss": 0.6613, + "step": 11821 + }, + { + "epoch": 0.3623268358465122, + "grad_norm": 1.3644877609094506, + "learning_rate": 1.4745752905749877e-05, + "loss": 0.7489, + "step": 11822 + }, + { + "epoch": 0.3623574843692534, + "grad_norm": 1.394030577014346, + "learning_rate": 1.4744879149644546e-05, + "loss": 0.7236, + "step": 11823 + }, + { + "epoch": 0.3623881328919946, + "grad_norm": 1.356351888746472, + "learning_rate": 1.474400534678671e-05, + "loss": 0.7212, + "step": 11824 + }, + { + "epoch": 0.3624187814147358, + "grad_norm": 1.4935446746282122, + "learning_rate": 1.4743131497184975e-05, + "loss": 0.7916, + "step": 11825 + }, + { + "epoch": 0.36244942993747703, + "grad_norm": 0.7465601496018035, + "learning_rate": 1.474225760084795e-05, + "loss": 0.6066, + "step": 11826 + }, + { + "epoch": 0.36248007846021824, + "grad_norm": 1.4842516049520313, + "learning_rate": 1.4741383657784248e-05, + "loss": 0.7629, + "step": 11827 + }, + { + "epoch": 0.36251072698295944, + "grad_norm": 0.6779336726990618, + "learning_rate": 1.4740509668002481e-05, + "loss": 0.6279, + "step": 11828 + }, + { + "epoch": 0.36254137550570065, + "grad_norm": 1.3640184840307528, + "learning_rate": 1.4739635631511258e-05, + "loss": 0.7929, + "step": 11829 + }, + { + "epoch": 0.36257202402844185, + "grad_norm": 1.4776963279741702, + "learning_rate": 1.4738761548319191e-05, + "loss": 0.7363, + "step": 11830 + }, + { + "epoch": 0.36260267255118306, + "grad_norm": 1.5330750351532194, + "learning_rate": 1.4737887418434895e-05, + "loss": 0.7991, + "step": 11831 + }, + { + "epoch": 0.36263332107392426, + "grad_norm": 1.4577633809253674, + "learning_rate": 1.4737013241866982e-05, + "loss": 0.9049, + "step": 11832 + }, + { + "epoch": 0.3626639695966654, + "grad_norm": 1.6565477191543783, + "learning_rate": 1.4736139018624067e-05, + "loss": 0.7602, + "step": 11833 + }, + { + "epoch": 0.3626946181194066, + "grad_norm": 0.7543467039372745, + "learning_rate": 1.4735264748714761e-05, + "loss": 0.6218, + "step": 11834 + }, + { + "epoch": 0.3627252666421478, + "grad_norm": 1.58757967106911, + "learning_rate": 1.473439043214768e-05, + "loss": 0.755, + "step": 11835 + }, + { + "epoch": 0.36275591516488903, + "grad_norm": 1.519599185848134, + "learning_rate": 1.4733516068931439e-05, + "loss": 0.7229, + "step": 11836 + }, + { + "epoch": 0.36278656368763024, + "grad_norm": 0.6862849924340088, + "learning_rate": 1.4732641659074656e-05, + "loss": 0.6554, + "step": 11837 + }, + { + "epoch": 0.36281721221037144, + "grad_norm": 1.4557181860597679, + "learning_rate": 1.4731767202585939e-05, + "loss": 0.7949, + "step": 11838 + }, + { + "epoch": 0.36284786073311265, + "grad_norm": 0.6604060079585945, + "learning_rate": 1.473089269947391e-05, + "loss": 0.6161, + "step": 11839 + }, + { + "epoch": 0.36287850925585385, + "grad_norm": 1.4282186173398892, + "learning_rate": 1.4730018149747187e-05, + "loss": 0.6429, + "step": 11840 + }, + { + "epoch": 0.36290915777859506, + "grad_norm": 1.3304837463520731, + "learning_rate": 1.4729143553414384e-05, + "loss": 0.7735, + "step": 11841 + }, + { + "epoch": 0.36293980630133627, + "grad_norm": 0.7006545699349269, + "learning_rate": 1.4728268910484121e-05, + "loss": 0.623, + "step": 11842 + }, + { + "epoch": 0.36297045482407747, + "grad_norm": 1.4368962862430736, + "learning_rate": 1.4727394220965012e-05, + "loss": 0.8256, + "step": 11843 + }, + { + "epoch": 0.3630011033468187, + "grad_norm": 1.383376629439486, + "learning_rate": 1.472651948486568e-05, + "loss": 0.7214, + "step": 11844 + }, + { + "epoch": 0.3630317518695599, + "grad_norm": 1.2238007535196185, + "learning_rate": 1.4725644702194742e-05, + "loss": 0.7036, + "step": 11845 + }, + { + "epoch": 0.3630624003923011, + "grad_norm": 1.6296724528721938, + "learning_rate": 1.4724769872960814e-05, + "loss": 0.7322, + "step": 11846 + }, + { + "epoch": 0.3630930489150423, + "grad_norm": 1.479306504284517, + "learning_rate": 1.4723894997172524e-05, + "loss": 0.7942, + "step": 11847 + }, + { + "epoch": 0.3631236974377835, + "grad_norm": 1.329686117552266, + "learning_rate": 1.4723020074838487e-05, + "loss": 0.6319, + "step": 11848 + }, + { + "epoch": 0.3631543459605247, + "grad_norm": 1.4807952599521315, + "learning_rate": 1.4722145105967322e-05, + "loss": 0.7419, + "step": 11849 + }, + { + "epoch": 0.3631849944832659, + "grad_norm": 1.4614226667699803, + "learning_rate": 1.4721270090567657e-05, + "loss": 0.8114, + "step": 11850 + }, + { + "epoch": 0.3632156430060071, + "grad_norm": 1.4694130288380804, + "learning_rate": 1.472039502864811e-05, + "loss": 0.6158, + "step": 11851 + }, + { + "epoch": 0.3632462915287483, + "grad_norm": 1.549103666237963, + "learning_rate": 1.47195199202173e-05, + "loss": 0.6542, + "step": 11852 + }, + { + "epoch": 0.3632769400514895, + "grad_norm": 1.6867262585671006, + "learning_rate": 1.4718644765283851e-05, + "loss": 0.8286, + "step": 11853 + }, + { + "epoch": 0.36330758857423073, + "grad_norm": 1.727377105467183, + "learning_rate": 1.4717769563856392e-05, + "loss": 0.8395, + "step": 11854 + }, + { + "epoch": 0.36333823709697194, + "grad_norm": 1.398262716656376, + "learning_rate": 1.471689431594354e-05, + "loss": 0.759, + "step": 11855 + }, + { + "epoch": 0.36336888561971314, + "grad_norm": 1.4393497342892771, + "learning_rate": 1.4716019021553925e-05, + "loss": 0.768, + "step": 11856 + }, + { + "epoch": 0.36339953414245435, + "grad_norm": 1.4799906621942143, + "learning_rate": 1.4715143680696165e-05, + "loss": 0.7635, + "step": 11857 + }, + { + "epoch": 0.36343018266519556, + "grad_norm": 1.4738846238781569, + "learning_rate": 1.4714268293378889e-05, + "loss": 0.7567, + "step": 11858 + }, + { + "epoch": 0.36346083118793676, + "grad_norm": 1.4293943865775354, + "learning_rate": 1.4713392859610718e-05, + "loss": 0.7615, + "step": 11859 + }, + { + "epoch": 0.36349147971067797, + "grad_norm": 1.4059145044490606, + "learning_rate": 1.4712517379400286e-05, + "loss": 0.7844, + "step": 11860 + }, + { + "epoch": 0.3635221282334192, + "grad_norm": 1.353462927442212, + "learning_rate": 1.471164185275621e-05, + "loss": 0.7636, + "step": 11861 + }, + { + "epoch": 0.3635527767561604, + "grad_norm": 0.759314724608375, + "learning_rate": 1.4710766279687125e-05, + "loss": 0.6007, + "step": 11862 + }, + { + "epoch": 0.3635834252789016, + "grad_norm": 0.7054101166410529, + "learning_rate": 1.4709890660201654e-05, + "loss": 0.5739, + "step": 11863 + }, + { + "epoch": 0.36361407380164273, + "grad_norm": 1.4665249646209804, + "learning_rate": 1.4709014994308423e-05, + "loss": 0.8183, + "step": 11864 + }, + { + "epoch": 0.36364472232438394, + "grad_norm": 1.3171257413833695, + "learning_rate": 1.4708139282016065e-05, + "loss": 0.7577, + "step": 11865 + }, + { + "epoch": 0.36367537084712515, + "grad_norm": 1.445050497090487, + "learning_rate": 1.4707263523333204e-05, + "loss": 0.7705, + "step": 11866 + }, + { + "epoch": 0.36370601936986635, + "grad_norm": 1.4288834127799501, + "learning_rate": 1.4706387718268474e-05, + "loss": 0.6829, + "step": 11867 + }, + { + "epoch": 0.36373666789260756, + "grad_norm": 1.473865951038141, + "learning_rate": 1.4705511866830498e-05, + "loss": 0.8662, + "step": 11868 + }, + { + "epoch": 0.36376731641534876, + "grad_norm": 1.582185991503902, + "learning_rate": 1.4704635969027912e-05, + "loss": 0.8728, + "step": 11869 + }, + { + "epoch": 0.36379796493808997, + "grad_norm": 1.5295374710391516, + "learning_rate": 1.4703760024869342e-05, + "loss": 0.7047, + "step": 11870 + }, + { + "epoch": 0.3638286134608312, + "grad_norm": 1.5452745213714196, + "learning_rate": 1.4702884034363423e-05, + "loss": 0.8288, + "step": 11871 + }, + { + "epoch": 0.3638592619835724, + "grad_norm": 1.2915372622615577, + "learning_rate": 1.4702007997518784e-05, + "loss": 0.7038, + "step": 11872 + }, + { + "epoch": 0.3638899105063136, + "grad_norm": 1.4763987517888433, + "learning_rate": 1.4701131914344056e-05, + "loss": 0.703, + "step": 11873 + }, + { + "epoch": 0.3639205590290548, + "grad_norm": 1.4022598928763266, + "learning_rate": 1.4700255784847872e-05, + "loss": 0.7636, + "step": 11874 + }, + { + "epoch": 0.363951207551796, + "grad_norm": 1.5753474923827568, + "learning_rate": 1.4699379609038866e-05, + "loss": 0.7014, + "step": 11875 + }, + { + "epoch": 0.3639818560745372, + "grad_norm": 1.2682952394380165, + "learning_rate": 1.4698503386925672e-05, + "loss": 0.6642, + "step": 11876 + }, + { + "epoch": 0.3640125045972784, + "grad_norm": 1.5066518241947284, + "learning_rate": 1.4697627118516921e-05, + "loss": 0.7139, + "step": 11877 + }, + { + "epoch": 0.3640431531200196, + "grad_norm": 1.6052505320340642, + "learning_rate": 1.4696750803821248e-05, + "loss": 0.7655, + "step": 11878 + }, + { + "epoch": 0.3640738016427608, + "grad_norm": 1.3877974315985568, + "learning_rate": 1.4695874442847285e-05, + "loss": 0.7815, + "step": 11879 + }, + { + "epoch": 0.364104450165502, + "grad_norm": 1.4046655388674203, + "learning_rate": 1.4694998035603673e-05, + "loss": 0.7644, + "step": 11880 + }, + { + "epoch": 0.36413509868824323, + "grad_norm": 1.4447479991451606, + "learning_rate": 1.4694121582099042e-05, + "loss": 0.8016, + "step": 11881 + }, + { + "epoch": 0.36416574721098444, + "grad_norm": 1.655691980268813, + "learning_rate": 1.4693245082342031e-05, + "loss": 0.7412, + "step": 11882 + }, + { + "epoch": 0.36419639573372564, + "grad_norm": 1.4156740577238265, + "learning_rate": 1.4692368536341275e-05, + "loss": 0.7611, + "step": 11883 + }, + { + "epoch": 0.36422704425646685, + "grad_norm": 1.342371696506702, + "learning_rate": 1.4691491944105414e-05, + "loss": 0.6839, + "step": 11884 + }, + { + "epoch": 0.36425769277920805, + "grad_norm": 0.8726621108364065, + "learning_rate": 1.4690615305643076e-05, + "loss": 0.6354, + "step": 11885 + }, + { + "epoch": 0.36428834130194926, + "grad_norm": 1.3384281190613374, + "learning_rate": 1.468973862096291e-05, + "loss": 0.771, + "step": 11886 + }, + { + "epoch": 0.36431898982469046, + "grad_norm": 1.5467610321833933, + "learning_rate": 1.4688861890073552e-05, + "loss": 0.7323, + "step": 11887 + }, + { + "epoch": 0.36434963834743167, + "grad_norm": 1.4376803663835112, + "learning_rate": 1.4687985112983634e-05, + "loss": 0.8529, + "step": 11888 + }, + { + "epoch": 0.3643802868701729, + "grad_norm": 1.4402003703267166, + "learning_rate": 1.46871082897018e-05, + "loss": 0.8113, + "step": 11889 + }, + { + "epoch": 0.3644109353929141, + "grad_norm": 1.316138144422293, + "learning_rate": 1.4686231420236687e-05, + "loss": 0.771, + "step": 11890 + }, + { + "epoch": 0.3644415839156553, + "grad_norm": 1.4693963896270563, + "learning_rate": 1.468535450459694e-05, + "loss": 0.768, + "step": 11891 + }, + { + "epoch": 0.3644722324383965, + "grad_norm": 1.3831999288669872, + "learning_rate": 1.4684477542791193e-05, + "loss": 0.6487, + "step": 11892 + }, + { + "epoch": 0.3645028809611377, + "grad_norm": 1.4652937360596638, + "learning_rate": 1.4683600534828093e-05, + "loss": 0.758, + "step": 11893 + }, + { + "epoch": 0.3645335294838789, + "grad_norm": 1.5967248486395207, + "learning_rate": 1.4682723480716279e-05, + "loss": 0.7126, + "step": 11894 + }, + { + "epoch": 0.36456417800662005, + "grad_norm": 1.5780241957053878, + "learning_rate": 1.468184638046439e-05, + "loss": 0.7567, + "step": 11895 + }, + { + "epoch": 0.36459482652936126, + "grad_norm": 1.4822852430350393, + "learning_rate": 1.4680969234081071e-05, + "loss": 0.8176, + "step": 11896 + }, + { + "epoch": 0.36462547505210247, + "grad_norm": 1.236993205415649, + "learning_rate": 1.4680092041574967e-05, + "loss": 0.741, + "step": 11897 + }, + { + "epoch": 0.36465612357484367, + "grad_norm": 1.4540781893846262, + "learning_rate": 1.4679214802954715e-05, + "loss": 0.7053, + "step": 11898 + }, + { + "epoch": 0.3646867720975849, + "grad_norm": 0.6579232047750806, + "learning_rate": 1.4678337518228966e-05, + "loss": 0.5973, + "step": 11899 + }, + { + "epoch": 0.3647174206203261, + "grad_norm": 1.568370128668665, + "learning_rate": 1.4677460187406358e-05, + "loss": 0.8699, + "step": 11900 + }, + { + "epoch": 0.3647480691430673, + "grad_norm": 1.2647336486566754, + "learning_rate": 1.467658281049554e-05, + "loss": 0.7607, + "step": 11901 + }, + { + "epoch": 0.3647787176658085, + "grad_norm": 1.3015861008110294, + "learning_rate": 1.4675705387505152e-05, + "loss": 0.7687, + "step": 11902 + }, + { + "epoch": 0.3648093661885497, + "grad_norm": 1.6269904912498214, + "learning_rate": 1.4674827918443846e-05, + "loss": 0.7773, + "step": 11903 + }, + { + "epoch": 0.3648400147112909, + "grad_norm": 1.423022396860938, + "learning_rate": 1.467395040332026e-05, + "loss": 0.8181, + "step": 11904 + }, + { + "epoch": 0.3648706632340321, + "grad_norm": 1.4915059616946975, + "learning_rate": 1.4673072842143048e-05, + "loss": 0.8087, + "step": 11905 + }, + { + "epoch": 0.3649013117567733, + "grad_norm": 1.7545623171943354, + "learning_rate": 1.4672195234920854e-05, + "loss": 0.7982, + "step": 11906 + }, + { + "epoch": 0.3649319602795145, + "grad_norm": 1.4530958286480122, + "learning_rate": 1.4671317581662324e-05, + "loss": 0.791, + "step": 11907 + }, + { + "epoch": 0.36496260880225573, + "grad_norm": 1.5042752760823555, + "learning_rate": 1.4670439882376104e-05, + "loss": 0.736, + "step": 11908 + }, + { + "epoch": 0.36499325732499693, + "grad_norm": 1.4125136568139434, + "learning_rate": 1.4669562137070848e-05, + "loss": 0.7888, + "step": 11909 + }, + { + "epoch": 0.36502390584773814, + "grad_norm": 1.502106186907111, + "learning_rate": 1.4668684345755202e-05, + "loss": 0.6994, + "step": 11910 + }, + { + "epoch": 0.36505455437047934, + "grad_norm": 1.3055630762860189, + "learning_rate": 1.4667806508437812e-05, + "loss": 0.7172, + "step": 11911 + }, + { + "epoch": 0.36508520289322055, + "grad_norm": 1.4407726785833788, + "learning_rate": 1.4666928625127332e-05, + "loss": 0.7244, + "step": 11912 + }, + { + "epoch": 0.36511585141596176, + "grad_norm": 1.3963823059775953, + "learning_rate": 1.466605069583241e-05, + "loss": 0.7786, + "step": 11913 + }, + { + "epoch": 0.36514649993870296, + "grad_norm": 1.5287012987795183, + "learning_rate": 1.4665172720561697e-05, + "loss": 0.7653, + "step": 11914 + }, + { + "epoch": 0.36517714846144417, + "grad_norm": 1.4433321232243101, + "learning_rate": 1.4664294699323842e-05, + "loss": 0.7991, + "step": 11915 + }, + { + "epoch": 0.3652077969841854, + "grad_norm": 1.4577166899361877, + "learning_rate": 1.46634166321275e-05, + "loss": 0.8099, + "step": 11916 + }, + { + "epoch": 0.3652384455069266, + "grad_norm": 1.4541742307584948, + "learning_rate": 1.466253851898132e-05, + "loss": 0.6717, + "step": 11917 + }, + { + "epoch": 0.3652690940296678, + "grad_norm": 1.4145582890139852, + "learning_rate": 1.4661660359893955e-05, + "loss": 0.7039, + "step": 11918 + }, + { + "epoch": 0.365299742552409, + "grad_norm": 0.6890798024836009, + "learning_rate": 1.4660782154874056e-05, + "loss": 0.6272, + "step": 11919 + }, + { + "epoch": 0.3653303910751502, + "grad_norm": 0.666629710202176, + "learning_rate": 1.465990390393028e-05, + "loss": 0.6204, + "step": 11920 + }, + { + "epoch": 0.3653610395978914, + "grad_norm": 1.3233503793935224, + "learning_rate": 1.4659025607071278e-05, + "loss": 0.8001, + "step": 11921 + }, + { + "epoch": 0.3653916881206326, + "grad_norm": 1.5472919247199477, + "learning_rate": 1.4658147264305704e-05, + "loss": 0.8915, + "step": 11922 + }, + { + "epoch": 0.3654223366433738, + "grad_norm": 1.7916539828078466, + "learning_rate": 1.4657268875642214e-05, + "loss": 0.8277, + "step": 11923 + }, + { + "epoch": 0.365452985166115, + "grad_norm": 1.5443853811256962, + "learning_rate": 1.4656390441089461e-05, + "loss": 0.7432, + "step": 11924 + }, + { + "epoch": 0.3654836336888562, + "grad_norm": 1.4119727549391372, + "learning_rate": 1.4655511960656106e-05, + "loss": 0.7691, + "step": 11925 + }, + { + "epoch": 0.3655142822115974, + "grad_norm": 1.571493029830556, + "learning_rate": 1.4654633434350793e-05, + "loss": 0.7511, + "step": 11926 + }, + { + "epoch": 0.3655449307343386, + "grad_norm": 1.484090815043051, + "learning_rate": 1.465375486218219e-05, + "loss": 0.7311, + "step": 11927 + }, + { + "epoch": 0.3655755792570798, + "grad_norm": 1.450647864581078, + "learning_rate": 1.4652876244158949e-05, + "loss": 0.7831, + "step": 11928 + }, + { + "epoch": 0.365606227779821, + "grad_norm": 0.6793675533743673, + "learning_rate": 1.4651997580289732e-05, + "loss": 0.5982, + "step": 11929 + }, + { + "epoch": 0.3656368763025622, + "grad_norm": 1.5113815832176, + "learning_rate": 1.4651118870583188e-05, + "loss": 0.7469, + "step": 11930 + }, + { + "epoch": 0.3656675248253034, + "grad_norm": 1.392063730599973, + "learning_rate": 1.4650240115047981e-05, + "loss": 0.6988, + "step": 11931 + }, + { + "epoch": 0.3656981733480446, + "grad_norm": 1.5164882647518154, + "learning_rate": 1.4649361313692764e-05, + "loss": 0.6761, + "step": 11932 + }, + { + "epoch": 0.3657288218707858, + "grad_norm": 1.4178964696863339, + "learning_rate": 1.4648482466526206e-05, + "loss": 0.703, + "step": 11933 + }, + { + "epoch": 0.365759470393527, + "grad_norm": 1.5871280972224309, + "learning_rate": 1.464760357355696e-05, + "loss": 0.7153, + "step": 11934 + }, + { + "epoch": 0.3657901189162682, + "grad_norm": 1.471956252842124, + "learning_rate": 1.4646724634793686e-05, + "loss": 0.8319, + "step": 11935 + }, + { + "epoch": 0.36582076743900943, + "grad_norm": 1.4551249698277398, + "learning_rate": 1.4645845650245045e-05, + "loss": 0.7526, + "step": 11936 + }, + { + "epoch": 0.36585141596175064, + "grad_norm": 1.4232883324265724, + "learning_rate": 1.4644966619919699e-05, + "loss": 0.7105, + "step": 11937 + }, + { + "epoch": 0.36588206448449184, + "grad_norm": 0.7176512412364351, + "learning_rate": 1.4644087543826308e-05, + "loss": 0.6307, + "step": 11938 + }, + { + "epoch": 0.36591271300723305, + "grad_norm": 1.4458115118622785, + "learning_rate": 1.4643208421973531e-05, + "loss": 0.7732, + "step": 11939 + }, + { + "epoch": 0.36594336152997425, + "grad_norm": 1.4111083020404942, + "learning_rate": 1.4642329254370038e-05, + "loss": 0.7713, + "step": 11940 + }, + { + "epoch": 0.36597401005271546, + "grad_norm": 1.2457826285821827, + "learning_rate": 1.4641450041024486e-05, + "loss": 0.68, + "step": 11941 + }, + { + "epoch": 0.36600465857545667, + "grad_norm": 1.5462084656209016, + "learning_rate": 1.464057078194554e-05, + "loss": 0.7489, + "step": 11942 + }, + { + "epoch": 0.36603530709819787, + "grad_norm": 1.522646171500309, + "learning_rate": 1.463969147714186e-05, + "loss": 0.8066, + "step": 11943 + }, + { + "epoch": 0.3660659556209391, + "grad_norm": 1.344056335634828, + "learning_rate": 1.4638812126622112e-05, + "loss": 0.6507, + "step": 11944 + }, + { + "epoch": 0.3660966041436803, + "grad_norm": 0.6883835521359586, + "learning_rate": 1.4637932730394966e-05, + "loss": 0.6204, + "step": 11945 + }, + { + "epoch": 0.3661272526664215, + "grad_norm": 1.448182859231081, + "learning_rate": 1.4637053288469077e-05, + "loss": 0.7406, + "step": 11946 + }, + { + "epoch": 0.3661579011891627, + "grad_norm": 0.67929593864288, + "learning_rate": 1.463617380085312e-05, + "loss": 0.6225, + "step": 11947 + }, + { + "epoch": 0.3661885497119039, + "grad_norm": 0.6784429647316128, + "learning_rate": 1.4635294267555753e-05, + "loss": 0.6295, + "step": 11948 + }, + { + "epoch": 0.3662191982346451, + "grad_norm": 1.3698012236606938, + "learning_rate": 1.463441468858565e-05, + "loss": 0.7162, + "step": 11949 + }, + { + "epoch": 0.3662498467573863, + "grad_norm": 1.577624053127439, + "learning_rate": 1.4633535063951467e-05, + "loss": 0.718, + "step": 11950 + }, + { + "epoch": 0.3662804952801275, + "grad_norm": 0.6859724720487904, + "learning_rate": 1.463265539366188e-05, + "loss": 0.6282, + "step": 11951 + }, + { + "epoch": 0.3663111438028687, + "grad_norm": 0.6757169732905832, + "learning_rate": 1.4631775677725557e-05, + "loss": 0.6117, + "step": 11952 + }, + { + "epoch": 0.3663417923256099, + "grad_norm": 1.4426726895070492, + "learning_rate": 1.4630895916151161e-05, + "loss": 0.7616, + "step": 11953 + }, + { + "epoch": 0.36637244084835113, + "grad_norm": 1.258784554502014, + "learning_rate": 1.4630016108947362e-05, + "loss": 0.6554, + "step": 11954 + }, + { + "epoch": 0.36640308937109234, + "grad_norm": 1.747989850127271, + "learning_rate": 1.4629136256122831e-05, + "loss": 0.7039, + "step": 11955 + }, + { + "epoch": 0.36643373789383354, + "grad_norm": 1.7536704245216075, + "learning_rate": 1.4628256357686237e-05, + "loss": 0.8118, + "step": 11956 + }, + { + "epoch": 0.3664643864165747, + "grad_norm": 1.4305837567967707, + "learning_rate": 1.4627376413646245e-05, + "loss": 0.7928, + "step": 11957 + }, + { + "epoch": 0.3664950349393159, + "grad_norm": 1.5820432776943751, + "learning_rate": 1.4626496424011531e-05, + "loss": 0.785, + "step": 11958 + }, + { + "epoch": 0.3665256834620571, + "grad_norm": 1.4465825490273763, + "learning_rate": 1.4625616388790764e-05, + "loss": 0.7705, + "step": 11959 + }, + { + "epoch": 0.3665563319847983, + "grad_norm": 1.4870571434005513, + "learning_rate": 1.4624736307992617e-05, + "loss": 0.7582, + "step": 11960 + }, + { + "epoch": 0.3665869805075395, + "grad_norm": 1.3769903328624944, + "learning_rate": 1.4623856181625757e-05, + "loss": 0.7378, + "step": 11961 + }, + { + "epoch": 0.3666176290302807, + "grad_norm": 1.238502048391158, + "learning_rate": 1.462297600969886e-05, + "loss": 0.6951, + "step": 11962 + }, + { + "epoch": 0.36664827755302193, + "grad_norm": 1.3516553688967072, + "learning_rate": 1.4622095792220598e-05, + "loss": 0.6694, + "step": 11963 + }, + { + "epoch": 0.36667892607576313, + "grad_norm": 1.4275673074632964, + "learning_rate": 1.4621215529199645e-05, + "loss": 0.6936, + "step": 11964 + }, + { + "epoch": 0.36670957459850434, + "grad_norm": 0.7125939025674395, + "learning_rate": 1.4620335220644673e-05, + "loss": 0.5931, + "step": 11965 + }, + { + "epoch": 0.36674022312124555, + "grad_norm": 1.518184073640632, + "learning_rate": 1.4619454866564353e-05, + "loss": 0.8572, + "step": 11966 + }, + { + "epoch": 0.36677087164398675, + "grad_norm": 0.673058296614684, + "learning_rate": 1.4618574466967363e-05, + "loss": 0.6124, + "step": 11967 + }, + { + "epoch": 0.36680152016672796, + "grad_norm": 1.8385376821281836, + "learning_rate": 1.461769402186238e-05, + "loss": 0.7428, + "step": 11968 + }, + { + "epoch": 0.36683216868946916, + "grad_norm": 1.2857786457638665, + "learning_rate": 1.4616813531258074e-05, + "loss": 0.7409, + "step": 11969 + }, + { + "epoch": 0.36686281721221037, + "grad_norm": 1.5212044702996572, + "learning_rate": 1.4615932995163124e-05, + "loss": 0.8154, + "step": 11970 + }, + { + "epoch": 0.3668934657349516, + "grad_norm": 1.3626879091274087, + "learning_rate": 1.4615052413586204e-05, + "loss": 0.6907, + "step": 11971 + }, + { + "epoch": 0.3669241142576928, + "grad_norm": 1.4229094288567288, + "learning_rate": 1.4614171786535991e-05, + "loss": 0.738, + "step": 11972 + }, + { + "epoch": 0.366954762780434, + "grad_norm": 1.5868454469391065, + "learning_rate": 1.4613291114021165e-05, + "loss": 0.9288, + "step": 11973 + }, + { + "epoch": 0.3669854113031752, + "grad_norm": 0.7122750962104601, + "learning_rate": 1.46124103960504e-05, + "loss": 0.6102, + "step": 11974 + }, + { + "epoch": 0.3670160598259164, + "grad_norm": 1.3013096535907964, + "learning_rate": 1.4611529632632376e-05, + "loss": 0.7445, + "step": 11975 + }, + { + "epoch": 0.3670467083486576, + "grad_norm": 1.5187884122227537, + "learning_rate": 1.4610648823775769e-05, + "loss": 0.8244, + "step": 11976 + }, + { + "epoch": 0.3670773568713988, + "grad_norm": 1.4908336863103269, + "learning_rate": 1.4609767969489261e-05, + "loss": 0.7485, + "step": 11977 + }, + { + "epoch": 0.36710800539414, + "grad_norm": 0.6679185993688177, + "learning_rate": 1.4608887069781528e-05, + "loss": 0.6244, + "step": 11978 + }, + { + "epoch": 0.3671386539168812, + "grad_norm": 1.5304910073158098, + "learning_rate": 1.4608006124661254e-05, + "loss": 0.7718, + "step": 11979 + }, + { + "epoch": 0.3671693024396224, + "grad_norm": 0.683149475653313, + "learning_rate": 1.4607125134137115e-05, + "loss": 0.6432, + "step": 11980 + }, + { + "epoch": 0.36719995096236363, + "grad_norm": 0.6569912749490656, + "learning_rate": 1.4606244098217795e-05, + "loss": 0.5952, + "step": 11981 + }, + { + "epoch": 0.36723059948510484, + "grad_norm": 1.3972583678287605, + "learning_rate": 1.460536301691197e-05, + "loss": 0.764, + "step": 11982 + }, + { + "epoch": 0.36726124800784604, + "grad_norm": 1.5664440872302896, + "learning_rate": 1.4604481890228328e-05, + "loss": 0.7038, + "step": 11983 + }, + { + "epoch": 0.36729189653058725, + "grad_norm": 1.5061329428630656, + "learning_rate": 1.4603600718175546e-05, + "loss": 0.9319, + "step": 11984 + }, + { + "epoch": 0.36732254505332845, + "grad_norm": 1.2642615879603536, + "learning_rate": 1.4602719500762308e-05, + "loss": 0.6655, + "step": 11985 + }, + { + "epoch": 0.36735319357606966, + "grad_norm": 1.404221717110269, + "learning_rate": 1.4601838237997297e-05, + "loss": 0.7811, + "step": 11986 + }, + { + "epoch": 0.36738384209881086, + "grad_norm": 1.4448328856141466, + "learning_rate": 1.4600956929889198e-05, + "loss": 0.7476, + "step": 11987 + }, + { + "epoch": 0.367414490621552, + "grad_norm": 1.7111370689220757, + "learning_rate": 1.4600075576446693e-05, + "loss": 0.8313, + "step": 11988 + }, + { + "epoch": 0.3674451391442932, + "grad_norm": 1.3700080801337828, + "learning_rate": 1.4599194177678464e-05, + "loss": 0.6964, + "step": 11989 + }, + { + "epoch": 0.3674757876670344, + "grad_norm": 1.562702131642646, + "learning_rate": 1.4598312733593201e-05, + "loss": 0.8185, + "step": 11990 + }, + { + "epoch": 0.36750643618977563, + "grad_norm": 0.7560490944691766, + "learning_rate": 1.4597431244199587e-05, + "loss": 0.622, + "step": 11991 + }, + { + "epoch": 0.36753708471251684, + "grad_norm": 1.3783114635409244, + "learning_rate": 1.4596549709506305e-05, + "loss": 0.6849, + "step": 11992 + }, + { + "epoch": 0.36756773323525804, + "grad_norm": 1.4191716731489539, + "learning_rate": 1.459566812952204e-05, + "loss": 0.7752, + "step": 11993 + }, + { + "epoch": 0.36759838175799925, + "grad_norm": 1.4530216215045466, + "learning_rate": 1.4594786504255488e-05, + "loss": 0.801, + "step": 11994 + }, + { + "epoch": 0.36762903028074045, + "grad_norm": 1.3957656637806573, + "learning_rate": 1.4593904833715323e-05, + "loss": 0.7508, + "step": 11995 + }, + { + "epoch": 0.36765967880348166, + "grad_norm": 1.3606435572007962, + "learning_rate": 1.459302311791024e-05, + "loss": 0.7127, + "step": 11996 + }, + { + "epoch": 0.36769032732622287, + "grad_norm": 0.6697134491309872, + "learning_rate": 1.4592141356848922e-05, + "loss": 0.6087, + "step": 11997 + }, + { + "epoch": 0.36772097584896407, + "grad_norm": 1.647001388261047, + "learning_rate": 1.4591259550540065e-05, + "loss": 0.789, + "step": 11998 + }, + { + "epoch": 0.3677516243717053, + "grad_norm": 1.6010959332126085, + "learning_rate": 1.4590377698992351e-05, + "loss": 0.8072, + "step": 11999 + }, + { + "epoch": 0.3677822728944465, + "grad_norm": 1.5589133097928964, + "learning_rate": 1.458949580221447e-05, + "loss": 0.8033, + "step": 12000 + }, + { + "epoch": 0.3678129214171877, + "grad_norm": 1.388237693668562, + "learning_rate": 1.4588613860215113e-05, + "loss": 0.6643, + "step": 12001 + }, + { + "epoch": 0.3678435699399289, + "grad_norm": 1.321132248251106, + "learning_rate": 1.458773187300297e-05, + "loss": 0.7237, + "step": 12002 + }, + { + "epoch": 0.3678742184626701, + "grad_norm": 1.489954470774076, + "learning_rate": 1.4586849840586731e-05, + "loss": 0.854, + "step": 12003 + }, + { + "epoch": 0.3679048669854113, + "grad_norm": 1.4845516737817008, + "learning_rate": 1.4585967762975087e-05, + "loss": 0.7331, + "step": 12004 + }, + { + "epoch": 0.3679355155081525, + "grad_norm": 0.679451737313762, + "learning_rate": 1.4585085640176728e-05, + "loss": 0.5975, + "step": 12005 + }, + { + "epoch": 0.3679661640308937, + "grad_norm": 1.4394740955081575, + "learning_rate": 1.458420347220035e-05, + "loss": 0.8199, + "step": 12006 + }, + { + "epoch": 0.3679968125536349, + "grad_norm": 1.5764479312421027, + "learning_rate": 1.4583321259054641e-05, + "loss": 0.7082, + "step": 12007 + }, + { + "epoch": 0.36802746107637613, + "grad_norm": 1.523709676833233, + "learning_rate": 1.4582439000748294e-05, + "loss": 0.7206, + "step": 12008 + }, + { + "epoch": 0.36805810959911733, + "grad_norm": 1.8228976856519237, + "learning_rate": 1.4581556697290003e-05, + "loss": 0.6565, + "step": 12009 + }, + { + "epoch": 0.36808875812185854, + "grad_norm": 1.5978603736828276, + "learning_rate": 1.4580674348688461e-05, + "loss": 0.8318, + "step": 12010 + }, + { + "epoch": 0.36811940664459974, + "grad_norm": 0.6854328656851619, + "learning_rate": 1.4579791954952367e-05, + "loss": 0.6168, + "step": 12011 + }, + { + "epoch": 0.36815005516734095, + "grad_norm": 1.3854585248901057, + "learning_rate": 1.4578909516090405e-05, + "loss": 0.7671, + "step": 12012 + }, + { + "epoch": 0.36818070369008216, + "grad_norm": 1.525652112082996, + "learning_rate": 1.4578027032111279e-05, + "loss": 0.8093, + "step": 12013 + }, + { + "epoch": 0.36821135221282336, + "grad_norm": 1.2927195786187076, + "learning_rate": 1.4577144503023684e-05, + "loss": 0.7898, + "step": 12014 + }, + { + "epoch": 0.36824200073556457, + "grad_norm": 1.4456667164277983, + "learning_rate": 1.4576261928836309e-05, + "loss": 0.7025, + "step": 12015 + }, + { + "epoch": 0.3682726492583058, + "grad_norm": 0.6685577881881203, + "learning_rate": 1.4575379309557856e-05, + "loss": 0.6122, + "step": 12016 + }, + { + "epoch": 0.368303297781047, + "grad_norm": 1.9276830091175055, + "learning_rate": 1.4574496645197019e-05, + "loss": 0.7345, + "step": 12017 + }, + { + "epoch": 0.3683339463037882, + "grad_norm": 1.309780813722885, + "learning_rate": 1.4573613935762496e-05, + "loss": 0.6684, + "step": 12018 + }, + { + "epoch": 0.36836459482652933, + "grad_norm": 1.4876820819155732, + "learning_rate": 1.4572731181262984e-05, + "loss": 0.8013, + "step": 12019 + }, + { + "epoch": 0.36839524334927054, + "grad_norm": 1.4084133601373363, + "learning_rate": 1.4571848381707186e-05, + "loss": 0.7128, + "step": 12020 + }, + { + "epoch": 0.36842589187201175, + "grad_norm": 0.7124092065937692, + "learning_rate": 1.4570965537103794e-05, + "loss": 0.6276, + "step": 12021 + }, + { + "epoch": 0.36845654039475295, + "grad_norm": 1.3847699546908692, + "learning_rate": 1.4570082647461507e-05, + "loss": 0.744, + "step": 12022 + }, + { + "epoch": 0.36848718891749416, + "grad_norm": 1.3782550255142765, + "learning_rate": 1.4569199712789026e-05, + "loss": 0.7672, + "step": 12023 + }, + { + "epoch": 0.36851783744023536, + "grad_norm": 1.325269585147024, + "learning_rate": 1.4568316733095054e-05, + "loss": 0.6838, + "step": 12024 + }, + { + "epoch": 0.36854848596297657, + "grad_norm": 1.405010021141433, + "learning_rate": 1.4567433708388288e-05, + "loss": 0.6132, + "step": 12025 + }, + { + "epoch": 0.3685791344857178, + "grad_norm": 1.4271333641994755, + "learning_rate": 1.4566550638677428e-05, + "loss": 0.6863, + "step": 12026 + }, + { + "epoch": 0.368609783008459, + "grad_norm": 1.5950952181528049, + "learning_rate": 1.4565667523971176e-05, + "loss": 0.6572, + "step": 12027 + }, + { + "epoch": 0.3686404315312002, + "grad_norm": 1.545664221826475, + "learning_rate": 1.4564784364278235e-05, + "loss": 0.6595, + "step": 12028 + }, + { + "epoch": 0.3686710800539414, + "grad_norm": 1.4220111885223559, + "learning_rate": 1.4563901159607305e-05, + "loss": 0.7438, + "step": 12029 + }, + { + "epoch": 0.3687017285766826, + "grad_norm": 1.504028956893335, + "learning_rate": 1.4563017909967088e-05, + "loss": 0.7497, + "step": 12030 + }, + { + "epoch": 0.3687323770994238, + "grad_norm": 1.6296437122644836, + "learning_rate": 1.4562134615366287e-05, + "loss": 0.775, + "step": 12031 + }, + { + "epoch": 0.368763025622165, + "grad_norm": 1.3218737370229308, + "learning_rate": 1.4561251275813608e-05, + "loss": 0.7833, + "step": 12032 + }, + { + "epoch": 0.3687936741449062, + "grad_norm": 1.6186295203953769, + "learning_rate": 1.4560367891317758e-05, + "loss": 0.7198, + "step": 12033 + }, + { + "epoch": 0.3688243226676474, + "grad_norm": 0.6892106060179943, + "learning_rate": 1.4559484461887428e-05, + "loss": 0.6006, + "step": 12034 + }, + { + "epoch": 0.3688549711903886, + "grad_norm": 0.6836582543007423, + "learning_rate": 1.4558600987531337e-05, + "loss": 0.6264, + "step": 12035 + }, + { + "epoch": 0.36888561971312983, + "grad_norm": 0.6830001918940247, + "learning_rate": 1.455771746825818e-05, + "loss": 0.6322, + "step": 12036 + }, + { + "epoch": 0.36891626823587104, + "grad_norm": 1.4761869501750933, + "learning_rate": 1.455683390407667e-05, + "loss": 0.7666, + "step": 12037 + }, + { + "epoch": 0.36894691675861224, + "grad_norm": 1.4275355550302453, + "learning_rate": 1.4555950294995506e-05, + "loss": 0.6893, + "step": 12038 + }, + { + "epoch": 0.36897756528135345, + "grad_norm": 0.6875616522375876, + "learning_rate": 1.4555066641023404e-05, + "loss": 0.6147, + "step": 12039 + }, + { + "epoch": 0.36900821380409465, + "grad_norm": 1.618981148305059, + "learning_rate": 1.455418294216906e-05, + "loss": 0.8922, + "step": 12040 + }, + { + "epoch": 0.36903886232683586, + "grad_norm": 1.4647209720613164, + "learning_rate": 1.4553299198441187e-05, + "loss": 0.7549, + "step": 12041 + }, + { + "epoch": 0.36906951084957706, + "grad_norm": 1.4500660865438284, + "learning_rate": 1.4552415409848493e-05, + "loss": 0.7515, + "step": 12042 + }, + { + "epoch": 0.36910015937231827, + "grad_norm": 1.57021155621529, + "learning_rate": 1.4551531576399684e-05, + "loss": 0.677, + "step": 12043 + }, + { + "epoch": 0.3691308078950595, + "grad_norm": 1.4646701916342308, + "learning_rate": 1.4550647698103469e-05, + "loss": 0.8157, + "step": 12044 + }, + { + "epoch": 0.3691614564178007, + "grad_norm": 1.4126820638409348, + "learning_rate": 1.454976377496856e-05, + "loss": 0.7301, + "step": 12045 + }, + { + "epoch": 0.3691921049405419, + "grad_norm": 1.4095751901095268, + "learning_rate": 1.4548879807003664e-05, + "loss": 0.8008, + "step": 12046 + }, + { + "epoch": 0.3692227534632831, + "grad_norm": 1.4563977509109554, + "learning_rate": 1.4547995794217488e-05, + "loss": 0.7433, + "step": 12047 + }, + { + "epoch": 0.3692534019860243, + "grad_norm": 1.397301598512573, + "learning_rate": 1.4547111736618754e-05, + "loss": 0.774, + "step": 12048 + }, + { + "epoch": 0.3692840505087655, + "grad_norm": 1.5358174976063779, + "learning_rate": 1.4546227634216157e-05, + "loss": 0.7374, + "step": 12049 + }, + { + "epoch": 0.36931469903150665, + "grad_norm": 1.4454380842386998, + "learning_rate": 1.4545343487018419e-05, + "loss": 0.6588, + "step": 12050 + }, + { + "epoch": 0.36934534755424786, + "grad_norm": 1.2542143820369605, + "learning_rate": 1.4544459295034248e-05, + "loss": 0.8383, + "step": 12051 + }, + { + "epoch": 0.36937599607698907, + "grad_norm": 1.3999533873809886, + "learning_rate": 1.4543575058272359e-05, + "loss": 0.6722, + "step": 12052 + }, + { + "epoch": 0.36940664459973027, + "grad_norm": 1.336318387719283, + "learning_rate": 1.4542690776741459e-05, + "loss": 0.8004, + "step": 12053 + }, + { + "epoch": 0.3694372931224715, + "grad_norm": 1.5022661634790508, + "learning_rate": 1.4541806450450265e-05, + "loss": 0.7532, + "step": 12054 + }, + { + "epoch": 0.3694679416452127, + "grad_norm": 1.4996424684511618, + "learning_rate": 1.4540922079407489e-05, + "loss": 0.6442, + "step": 12055 + }, + { + "epoch": 0.3694985901679539, + "grad_norm": 1.5222495726652092, + "learning_rate": 1.4540037663621848e-05, + "loss": 0.931, + "step": 12056 + }, + { + "epoch": 0.3695292386906951, + "grad_norm": 1.381526787766269, + "learning_rate": 1.4539153203102054e-05, + "loss": 0.7448, + "step": 12057 + }, + { + "epoch": 0.3695598872134363, + "grad_norm": 1.3785533183848024, + "learning_rate": 1.4538268697856822e-05, + "loss": 0.6809, + "step": 12058 + }, + { + "epoch": 0.3695905357361775, + "grad_norm": 1.3409870292244808, + "learning_rate": 1.4537384147894868e-05, + "loss": 0.7271, + "step": 12059 + }, + { + "epoch": 0.3696211842589187, + "grad_norm": 1.4052914917122001, + "learning_rate": 1.4536499553224907e-05, + "loss": 0.7391, + "step": 12060 + }, + { + "epoch": 0.3696518327816599, + "grad_norm": 1.5542280234343526, + "learning_rate": 1.4535614913855656e-05, + "loss": 0.7194, + "step": 12061 + }, + { + "epoch": 0.3696824813044011, + "grad_norm": 0.7144498294177826, + "learning_rate": 1.4534730229795827e-05, + "loss": 0.6417, + "step": 12062 + }, + { + "epoch": 0.36971312982714233, + "grad_norm": 0.7399201678242173, + "learning_rate": 1.4533845501054145e-05, + "loss": 0.638, + "step": 12063 + }, + { + "epoch": 0.36974377834988353, + "grad_norm": 1.4693787435446168, + "learning_rate": 1.453296072763932e-05, + "loss": 0.7053, + "step": 12064 + }, + { + "epoch": 0.36977442687262474, + "grad_norm": 0.629927785820299, + "learning_rate": 1.4532075909560077e-05, + "loss": 0.5954, + "step": 12065 + }, + { + "epoch": 0.36980507539536595, + "grad_norm": 1.6708427268311055, + "learning_rate": 1.4531191046825126e-05, + "loss": 0.7428, + "step": 12066 + }, + { + "epoch": 0.36983572391810715, + "grad_norm": 1.489343334839696, + "learning_rate": 1.4530306139443194e-05, + "loss": 0.8215, + "step": 12067 + }, + { + "epoch": 0.36986637244084836, + "grad_norm": 1.5504624950464938, + "learning_rate": 1.4529421187422995e-05, + "loss": 0.7676, + "step": 12068 + }, + { + "epoch": 0.36989702096358956, + "grad_norm": 1.5287457742946513, + "learning_rate": 1.452853619077325e-05, + "loss": 0.6758, + "step": 12069 + }, + { + "epoch": 0.36992766948633077, + "grad_norm": 1.523427671524661, + "learning_rate": 1.4527651149502678e-05, + "loss": 0.726, + "step": 12070 + }, + { + "epoch": 0.369958318009072, + "grad_norm": 0.7373512729433193, + "learning_rate": 1.452676606362e-05, + "loss": 0.581, + "step": 12071 + }, + { + "epoch": 0.3699889665318132, + "grad_norm": 1.5751249935380314, + "learning_rate": 1.4525880933133942e-05, + "loss": 0.7363, + "step": 12072 + }, + { + "epoch": 0.3700196150545544, + "grad_norm": 1.6014192648059138, + "learning_rate": 1.4524995758053217e-05, + "loss": 0.8162, + "step": 12073 + }, + { + "epoch": 0.3700502635772956, + "grad_norm": 1.5368128959678908, + "learning_rate": 1.4524110538386553e-05, + "loss": 0.8072, + "step": 12074 + }, + { + "epoch": 0.3700809121000368, + "grad_norm": 0.7205382136567183, + "learning_rate": 1.4523225274142671e-05, + "loss": 0.6068, + "step": 12075 + }, + { + "epoch": 0.370111560622778, + "grad_norm": 0.6818058733866894, + "learning_rate": 1.4522339965330292e-05, + "loss": 0.5782, + "step": 12076 + }, + { + "epoch": 0.3701422091455192, + "grad_norm": 0.6756634082401034, + "learning_rate": 1.452145461195814e-05, + "loss": 0.593, + "step": 12077 + }, + { + "epoch": 0.3701728576682604, + "grad_norm": 1.5598854351731477, + "learning_rate": 1.452056921403494e-05, + "loss": 0.7589, + "step": 12078 + }, + { + "epoch": 0.3702035061910016, + "grad_norm": 0.6893799811353409, + "learning_rate": 1.4519683771569414e-05, + "loss": 0.6105, + "step": 12079 + }, + { + "epoch": 0.3702341547137428, + "grad_norm": 1.4596693763924677, + "learning_rate": 1.451879828457029e-05, + "loss": 0.7414, + "step": 12080 + }, + { + "epoch": 0.370264803236484, + "grad_norm": 1.5442529317199347, + "learning_rate": 1.4517912753046286e-05, + "loss": 0.7712, + "step": 12081 + }, + { + "epoch": 0.3702954517592252, + "grad_norm": 1.364227194268685, + "learning_rate": 1.4517027177006134e-05, + "loss": 0.7468, + "step": 12082 + }, + { + "epoch": 0.3703261002819664, + "grad_norm": 1.4674066534053725, + "learning_rate": 1.4516141556458558e-05, + "loss": 0.7488, + "step": 12083 + }, + { + "epoch": 0.3703567488047076, + "grad_norm": 1.5413034976276119, + "learning_rate": 1.4515255891412281e-05, + "loss": 0.79, + "step": 12084 + }, + { + "epoch": 0.3703873973274488, + "grad_norm": 1.3595834708811574, + "learning_rate": 1.4514370181876033e-05, + "loss": 0.8106, + "step": 12085 + }, + { + "epoch": 0.37041804585019, + "grad_norm": 1.5873177906920901, + "learning_rate": 1.4513484427858541e-05, + "loss": 0.7969, + "step": 12086 + }, + { + "epoch": 0.3704486943729312, + "grad_norm": 0.7939245132737577, + "learning_rate": 1.4512598629368538e-05, + "loss": 0.6278, + "step": 12087 + }, + { + "epoch": 0.3704793428956724, + "grad_norm": 1.5886961116848306, + "learning_rate": 1.451171278641474e-05, + "loss": 0.7571, + "step": 12088 + }, + { + "epoch": 0.3705099914184136, + "grad_norm": 1.4642905847719903, + "learning_rate": 1.4510826899005884e-05, + "loss": 0.71, + "step": 12089 + }, + { + "epoch": 0.3705406399411548, + "grad_norm": 1.4233711137362535, + "learning_rate": 1.450994096715069e-05, + "loss": 0.6718, + "step": 12090 + }, + { + "epoch": 0.37057128846389603, + "grad_norm": 0.6853618590313761, + "learning_rate": 1.4509054990857902e-05, + "loss": 0.6052, + "step": 12091 + }, + { + "epoch": 0.37060193698663724, + "grad_norm": 1.8523250591130098, + "learning_rate": 1.4508168970136239e-05, + "loss": 0.8789, + "step": 12092 + }, + { + "epoch": 0.37063258550937844, + "grad_norm": 1.6872101917380438, + "learning_rate": 1.4507282904994431e-05, + "loss": 0.8239, + "step": 12093 + }, + { + "epoch": 0.37066323403211965, + "grad_norm": 1.3914689464256345, + "learning_rate": 1.4506396795441214e-05, + "loss": 0.8113, + "step": 12094 + }, + { + "epoch": 0.37069388255486085, + "grad_norm": 0.6586614343886006, + "learning_rate": 1.4505510641485316e-05, + "loss": 0.5894, + "step": 12095 + }, + { + "epoch": 0.37072453107760206, + "grad_norm": 1.4707483909290402, + "learning_rate": 1.4504624443135468e-05, + "loss": 0.7815, + "step": 12096 + }, + { + "epoch": 0.37075517960034327, + "grad_norm": 1.4736863401598983, + "learning_rate": 1.4503738200400403e-05, + "loss": 0.7585, + "step": 12097 + }, + { + "epoch": 0.37078582812308447, + "grad_norm": 1.465927668157157, + "learning_rate": 1.4502851913288853e-05, + "loss": 0.6417, + "step": 12098 + }, + { + "epoch": 0.3708164766458257, + "grad_norm": 0.6865602348766568, + "learning_rate": 1.4501965581809552e-05, + "loss": 0.5797, + "step": 12099 + }, + { + "epoch": 0.3708471251685669, + "grad_norm": 1.744158771834067, + "learning_rate": 1.4501079205971231e-05, + "loss": 0.8616, + "step": 12100 + }, + { + "epoch": 0.3708777736913081, + "grad_norm": 0.6879965842581008, + "learning_rate": 1.4500192785782625e-05, + "loss": 0.5995, + "step": 12101 + }, + { + "epoch": 0.3709084222140493, + "grad_norm": 1.559158114371076, + "learning_rate": 1.4499306321252471e-05, + "loss": 0.7183, + "step": 12102 + }, + { + "epoch": 0.3709390707367905, + "grad_norm": 1.4008765901611033, + "learning_rate": 1.4498419812389497e-05, + "loss": 0.6483, + "step": 12103 + }, + { + "epoch": 0.3709697192595317, + "grad_norm": 1.5767328596180692, + "learning_rate": 1.4497533259202443e-05, + "loss": 0.6148, + "step": 12104 + }, + { + "epoch": 0.3710003677822729, + "grad_norm": 1.5044350961878485, + "learning_rate": 1.4496646661700043e-05, + "loss": 0.7519, + "step": 12105 + }, + { + "epoch": 0.3710310163050141, + "grad_norm": 1.4810925033808149, + "learning_rate": 1.4495760019891034e-05, + "loss": 0.6304, + "step": 12106 + }, + { + "epoch": 0.3710616648277553, + "grad_norm": 1.5688175663616766, + "learning_rate": 1.449487333378415e-05, + "loss": 0.7253, + "step": 12107 + }, + { + "epoch": 0.3710923133504965, + "grad_norm": 1.4121020692079946, + "learning_rate": 1.4493986603388129e-05, + "loss": 0.7906, + "step": 12108 + }, + { + "epoch": 0.37112296187323773, + "grad_norm": 1.426619892794596, + "learning_rate": 1.4493099828711707e-05, + "loss": 0.7665, + "step": 12109 + }, + { + "epoch": 0.37115361039597894, + "grad_norm": 1.5280173260337255, + "learning_rate": 1.4492213009763622e-05, + "loss": 0.6674, + "step": 12110 + }, + { + "epoch": 0.37118425891872014, + "grad_norm": 1.4192796902581448, + "learning_rate": 1.4491326146552618e-05, + "loss": 0.7522, + "step": 12111 + }, + { + "epoch": 0.3712149074414613, + "grad_norm": 1.9927394432425187, + "learning_rate": 1.4490439239087424e-05, + "loss": 0.6586, + "step": 12112 + }, + { + "epoch": 0.3712455559642025, + "grad_norm": 1.514325681501417, + "learning_rate": 1.4489552287376784e-05, + "loss": 0.753, + "step": 12113 + }, + { + "epoch": 0.3712762044869437, + "grad_norm": 1.3164672343183468, + "learning_rate": 1.4488665291429438e-05, + "loss": 0.6331, + "step": 12114 + }, + { + "epoch": 0.3713068530096849, + "grad_norm": 1.352668019683531, + "learning_rate": 1.4487778251254123e-05, + "loss": 0.6973, + "step": 12115 + }, + { + "epoch": 0.3713375015324261, + "grad_norm": 1.3668985702050909, + "learning_rate": 1.448689116685958e-05, + "loss": 0.8235, + "step": 12116 + }, + { + "epoch": 0.3713681500551673, + "grad_norm": 1.48150488219453, + "learning_rate": 1.4486004038254553e-05, + "loss": 0.7889, + "step": 12117 + }, + { + "epoch": 0.37139879857790853, + "grad_norm": 1.3904302214952988, + "learning_rate": 1.4485116865447779e-05, + "loss": 0.6105, + "step": 12118 + }, + { + "epoch": 0.37142944710064973, + "grad_norm": 1.5059874310779864, + "learning_rate": 1.4484229648448001e-05, + "loss": 0.6909, + "step": 12119 + }, + { + "epoch": 0.37146009562339094, + "grad_norm": 1.436337795635777, + "learning_rate": 1.4483342387263959e-05, + "loss": 0.7542, + "step": 12120 + }, + { + "epoch": 0.37149074414613215, + "grad_norm": 1.5204483131552395, + "learning_rate": 1.44824550819044e-05, + "loss": 0.8122, + "step": 12121 + }, + { + "epoch": 0.37152139266887335, + "grad_norm": 1.5452605508839552, + "learning_rate": 1.4481567732378063e-05, + "loss": 0.7511, + "step": 12122 + }, + { + "epoch": 0.37155204119161456, + "grad_norm": 1.4630712045277765, + "learning_rate": 1.4480680338693693e-05, + "loss": 0.5772, + "step": 12123 + }, + { + "epoch": 0.37158268971435576, + "grad_norm": 1.3557556633512977, + "learning_rate": 1.4479792900860032e-05, + "loss": 0.7083, + "step": 12124 + }, + { + "epoch": 0.37161333823709697, + "grad_norm": 1.4636080855362208, + "learning_rate": 1.4478905418885827e-05, + "loss": 0.7982, + "step": 12125 + }, + { + "epoch": 0.3716439867598382, + "grad_norm": 1.557889713775267, + "learning_rate": 1.447801789277982e-05, + "loss": 0.7689, + "step": 12126 + }, + { + "epoch": 0.3716746352825794, + "grad_norm": 1.5537897258144144, + "learning_rate": 1.4477130322550757e-05, + "loss": 0.8086, + "step": 12127 + }, + { + "epoch": 0.3717052838053206, + "grad_norm": 0.7823385014391114, + "learning_rate": 1.4476242708207385e-05, + "loss": 0.6039, + "step": 12128 + }, + { + "epoch": 0.3717359323280618, + "grad_norm": 1.501561708000596, + "learning_rate": 1.4475355049758446e-05, + "loss": 0.7494, + "step": 12129 + }, + { + "epoch": 0.371766580850803, + "grad_norm": 1.5250430360223157, + "learning_rate": 1.4474467347212691e-05, + "loss": 0.7315, + "step": 12130 + }, + { + "epoch": 0.3717972293735442, + "grad_norm": 1.519920870135049, + "learning_rate": 1.447357960057886e-05, + "loss": 0.6838, + "step": 12131 + }, + { + "epoch": 0.3718278778962854, + "grad_norm": 1.4481794178236742, + "learning_rate": 1.4472691809865709e-05, + "loss": 0.7466, + "step": 12132 + }, + { + "epoch": 0.3718585264190266, + "grad_norm": 1.4304907138065184, + "learning_rate": 1.447180397508198e-05, + "loss": 0.7869, + "step": 12133 + }, + { + "epoch": 0.3718891749417678, + "grad_norm": 1.4526747302337653, + "learning_rate": 1.4470916096236422e-05, + "loss": 0.8103, + "step": 12134 + }, + { + "epoch": 0.371919823464509, + "grad_norm": 1.4378746907564186, + "learning_rate": 1.4470028173337783e-05, + "loss": 0.7313, + "step": 12135 + }, + { + "epoch": 0.37195047198725023, + "grad_norm": 1.5985268632794967, + "learning_rate": 1.4469140206394814e-05, + "loss": 0.8224, + "step": 12136 + }, + { + "epoch": 0.37198112050999144, + "grad_norm": 1.5918947352638626, + "learning_rate": 1.4468252195416263e-05, + "loss": 0.6692, + "step": 12137 + }, + { + "epoch": 0.37201176903273264, + "grad_norm": 1.3303935439190306, + "learning_rate": 1.4467364140410878e-05, + "loss": 0.7222, + "step": 12138 + }, + { + "epoch": 0.37204241755547385, + "grad_norm": 1.3933931425572084, + "learning_rate": 1.4466476041387411e-05, + "loss": 0.7189, + "step": 12139 + }, + { + "epoch": 0.37207306607821505, + "grad_norm": 0.7154551013011246, + "learning_rate": 1.4465587898354616e-05, + "loss": 0.6261, + "step": 12140 + }, + { + "epoch": 0.37210371460095626, + "grad_norm": 0.7446596603740221, + "learning_rate": 1.446469971132124e-05, + "loss": 0.6219, + "step": 12141 + }, + { + "epoch": 0.37213436312369746, + "grad_norm": 1.715509801249417, + "learning_rate": 1.4463811480296033e-05, + "loss": 0.6901, + "step": 12142 + }, + { + "epoch": 0.3721650116464386, + "grad_norm": 1.484606512729472, + "learning_rate": 1.4462923205287752e-05, + "loss": 0.7303, + "step": 12143 + }, + { + "epoch": 0.3721956601691798, + "grad_norm": 1.428539841476084, + "learning_rate": 1.4462034886305143e-05, + "loss": 0.8283, + "step": 12144 + }, + { + "epoch": 0.372226308691921, + "grad_norm": 1.6152218765052058, + "learning_rate": 1.4461146523356967e-05, + "loss": 0.8283, + "step": 12145 + }, + { + "epoch": 0.37225695721466223, + "grad_norm": 1.5640702624952219, + "learning_rate": 1.4460258116451971e-05, + "loss": 0.7138, + "step": 12146 + }, + { + "epoch": 0.37228760573740344, + "grad_norm": 1.4217501561268062, + "learning_rate": 1.445936966559891e-05, + "loss": 0.9062, + "step": 12147 + }, + { + "epoch": 0.37231825426014464, + "grad_norm": 1.588080401164327, + "learning_rate": 1.445848117080654e-05, + "loss": 0.735, + "step": 12148 + }, + { + "epoch": 0.37234890278288585, + "grad_norm": 0.7309345623425121, + "learning_rate": 1.4457592632083614e-05, + "loss": 0.6286, + "step": 12149 + }, + { + "epoch": 0.37237955130562705, + "grad_norm": 1.4346107532277552, + "learning_rate": 1.4456704049438885e-05, + "loss": 0.6833, + "step": 12150 + }, + { + "epoch": 0.37241019982836826, + "grad_norm": 1.37806931765149, + "learning_rate": 1.4455815422881115e-05, + "loss": 0.7259, + "step": 12151 + }, + { + "epoch": 0.37244084835110947, + "grad_norm": 1.2959862752481326, + "learning_rate": 1.4454926752419054e-05, + "loss": 0.7514, + "step": 12152 + }, + { + "epoch": 0.37247149687385067, + "grad_norm": 1.265783779344308, + "learning_rate": 1.4454038038061457e-05, + "loss": 0.7055, + "step": 12153 + }, + { + "epoch": 0.3725021453965919, + "grad_norm": 1.4911512680490673, + "learning_rate": 1.4453149279817086e-05, + "loss": 0.8854, + "step": 12154 + }, + { + "epoch": 0.3725327939193331, + "grad_norm": 1.4329423450781316, + "learning_rate": 1.4452260477694694e-05, + "loss": 0.7768, + "step": 12155 + }, + { + "epoch": 0.3725634424420743, + "grad_norm": 1.3623863540345797, + "learning_rate": 1.4451371631703044e-05, + "loss": 0.747, + "step": 12156 + }, + { + "epoch": 0.3725940909648155, + "grad_norm": 1.6385891262324856, + "learning_rate": 1.4450482741850889e-05, + "loss": 0.7574, + "step": 12157 + }, + { + "epoch": 0.3726247394875567, + "grad_norm": 1.5682669613158178, + "learning_rate": 1.4449593808146987e-05, + "loss": 0.6368, + "step": 12158 + }, + { + "epoch": 0.3726553880102979, + "grad_norm": 0.6911713208408675, + "learning_rate": 1.4448704830600098e-05, + "loss": 0.6397, + "step": 12159 + }, + { + "epoch": 0.3726860365330391, + "grad_norm": 1.3110841372146251, + "learning_rate": 1.4447815809218986e-05, + "loss": 0.64, + "step": 12160 + }, + { + "epoch": 0.3727166850557803, + "grad_norm": 1.5025839692760707, + "learning_rate": 1.4446926744012404e-05, + "loss": 0.8289, + "step": 12161 + }, + { + "epoch": 0.3727473335785215, + "grad_norm": 1.5811483823135706, + "learning_rate": 1.4446037634989116e-05, + "loss": 0.7746, + "step": 12162 + }, + { + "epoch": 0.37277798210126273, + "grad_norm": 1.4735651060097925, + "learning_rate": 1.4445148482157879e-05, + "loss": 0.8072, + "step": 12163 + }, + { + "epoch": 0.37280863062400393, + "grad_norm": 1.6221485801595867, + "learning_rate": 1.444425928552746e-05, + "loss": 0.7839, + "step": 12164 + }, + { + "epoch": 0.37283927914674514, + "grad_norm": 1.4230520092446004, + "learning_rate": 1.4443370045106618e-05, + "loss": 0.7349, + "step": 12165 + }, + { + "epoch": 0.37286992766948635, + "grad_norm": 1.4584258886074093, + "learning_rate": 1.444248076090411e-05, + "loss": 0.669, + "step": 12166 + }, + { + "epoch": 0.37290057619222755, + "grad_norm": 0.6930524495297496, + "learning_rate": 1.4441591432928703e-05, + "loss": 0.6134, + "step": 12167 + }, + { + "epoch": 0.37293122471496876, + "grad_norm": 0.6277778312035939, + "learning_rate": 1.4440702061189163e-05, + "loss": 0.5722, + "step": 12168 + }, + { + "epoch": 0.37296187323770996, + "grad_norm": 1.4236285534927993, + "learning_rate": 1.4439812645694247e-05, + "loss": 0.7218, + "step": 12169 + }, + { + "epoch": 0.37299252176045117, + "grad_norm": 1.6536505725989015, + "learning_rate": 1.443892318645272e-05, + "loss": 0.8005, + "step": 12170 + }, + { + "epoch": 0.3730231702831924, + "grad_norm": 1.4210524137771787, + "learning_rate": 1.443803368347335e-05, + "loss": 0.7277, + "step": 12171 + }, + { + "epoch": 0.3730538188059336, + "grad_norm": 1.379614961748721, + "learning_rate": 1.4437144136764896e-05, + "loss": 0.7919, + "step": 12172 + }, + { + "epoch": 0.3730844673286748, + "grad_norm": 1.4744762250393677, + "learning_rate": 1.4436254546336126e-05, + "loss": 0.7021, + "step": 12173 + }, + { + "epoch": 0.37311511585141593, + "grad_norm": 1.2926404749584735, + "learning_rate": 1.4435364912195804e-05, + "loss": 0.6396, + "step": 12174 + }, + { + "epoch": 0.37314576437415714, + "grad_norm": 1.556377358668307, + "learning_rate": 1.44344752343527e-05, + "loss": 0.7423, + "step": 12175 + }, + { + "epoch": 0.37317641289689835, + "grad_norm": 1.363775877117991, + "learning_rate": 1.4433585512815573e-05, + "loss": 0.634, + "step": 12176 + }, + { + "epoch": 0.37320706141963955, + "grad_norm": 1.4201269229098232, + "learning_rate": 1.4432695747593196e-05, + "loss": 0.7605, + "step": 12177 + }, + { + "epoch": 0.37323770994238076, + "grad_norm": 1.5636342037619566, + "learning_rate": 1.4431805938694331e-05, + "loss": 0.7411, + "step": 12178 + }, + { + "epoch": 0.37326835846512196, + "grad_norm": 1.3662382210263295, + "learning_rate": 1.4430916086127753e-05, + "loss": 0.6992, + "step": 12179 + }, + { + "epoch": 0.37329900698786317, + "grad_norm": 1.4128588104165474, + "learning_rate": 1.4430026189902222e-05, + "loss": 0.7617, + "step": 12180 + }, + { + "epoch": 0.3733296555106044, + "grad_norm": 1.5659267276620814, + "learning_rate": 1.4429136250026508e-05, + "loss": 0.8599, + "step": 12181 + }, + { + "epoch": 0.3733603040333456, + "grad_norm": 1.4116923146181464, + "learning_rate": 1.4428246266509382e-05, + "loss": 0.787, + "step": 12182 + }, + { + "epoch": 0.3733909525560868, + "grad_norm": 1.5476895555710313, + "learning_rate": 1.4427356239359615e-05, + "loss": 0.8159, + "step": 12183 + }, + { + "epoch": 0.373421601078828, + "grad_norm": 1.3328874062012992, + "learning_rate": 1.4426466168585972e-05, + "loss": 0.7372, + "step": 12184 + }, + { + "epoch": 0.3734522496015692, + "grad_norm": 1.5508740877959866, + "learning_rate": 1.4425576054197226e-05, + "loss": 0.7525, + "step": 12185 + }, + { + "epoch": 0.3734828981243104, + "grad_norm": 1.5068549843899968, + "learning_rate": 1.442468589620215e-05, + "loss": 0.7144, + "step": 12186 + }, + { + "epoch": 0.3735135466470516, + "grad_norm": 1.5041832699487023, + "learning_rate": 1.4423795694609506e-05, + "loss": 0.7545, + "step": 12187 + }, + { + "epoch": 0.3735441951697928, + "grad_norm": 1.6453679184292176, + "learning_rate": 1.4422905449428075e-05, + "loss": 0.8218, + "step": 12188 + }, + { + "epoch": 0.373574843692534, + "grad_norm": 0.78176394254773, + "learning_rate": 1.4422015160666622e-05, + "loss": 0.6011, + "step": 12189 + }, + { + "epoch": 0.3736054922152752, + "grad_norm": 1.4494755400993464, + "learning_rate": 1.4421124828333923e-05, + "loss": 0.7834, + "step": 12190 + }, + { + "epoch": 0.37363614073801643, + "grad_norm": 0.7227150557917125, + "learning_rate": 1.4420234452438753e-05, + "loss": 0.6334, + "step": 12191 + }, + { + "epoch": 0.37366678926075764, + "grad_norm": 1.4656577018332015, + "learning_rate": 1.441934403298988e-05, + "loss": 0.801, + "step": 12192 + }, + { + "epoch": 0.37369743778349884, + "grad_norm": 0.6597265484530127, + "learning_rate": 1.4418453569996077e-05, + "loss": 0.5814, + "step": 12193 + }, + { + "epoch": 0.37372808630624005, + "grad_norm": 1.4451992260354867, + "learning_rate": 1.4417563063466125e-05, + "loss": 0.7211, + "step": 12194 + }, + { + "epoch": 0.37375873482898125, + "grad_norm": 0.7261686433665461, + "learning_rate": 1.4416672513408791e-05, + "loss": 0.6558, + "step": 12195 + }, + { + "epoch": 0.37378938335172246, + "grad_norm": 0.7098811454039092, + "learning_rate": 1.4415781919832852e-05, + "loss": 0.6026, + "step": 12196 + }, + { + "epoch": 0.37382003187446367, + "grad_norm": 1.2604406426986865, + "learning_rate": 1.4414891282747086e-05, + "loss": 0.7539, + "step": 12197 + }, + { + "epoch": 0.37385068039720487, + "grad_norm": 1.42382413487594, + "learning_rate": 1.4414000602160264e-05, + "loss": 0.7639, + "step": 12198 + }, + { + "epoch": 0.3738813289199461, + "grad_norm": 1.561827643533732, + "learning_rate": 1.441310987808117e-05, + "loss": 0.6693, + "step": 12199 + }, + { + "epoch": 0.3739119774426873, + "grad_norm": 0.6832476325992448, + "learning_rate": 1.4412219110518568e-05, + "loss": 0.5952, + "step": 12200 + }, + { + "epoch": 0.3739426259654285, + "grad_norm": 1.3356678753277227, + "learning_rate": 1.4411328299481247e-05, + "loss": 0.7561, + "step": 12201 + }, + { + "epoch": 0.3739732744881697, + "grad_norm": 1.4473233148063338, + "learning_rate": 1.4410437444977977e-05, + "loss": 0.8109, + "step": 12202 + }, + { + "epoch": 0.3740039230109109, + "grad_norm": 1.3919771218744563, + "learning_rate": 1.4409546547017544e-05, + "loss": 0.7131, + "step": 12203 + }, + { + "epoch": 0.3740345715336521, + "grad_norm": 1.2769902703476432, + "learning_rate": 1.4408655605608713e-05, + "loss": 0.6862, + "step": 12204 + }, + { + "epoch": 0.37406522005639326, + "grad_norm": 1.433542402757718, + "learning_rate": 1.4407764620760273e-05, + "loss": 0.7749, + "step": 12205 + }, + { + "epoch": 0.37409586857913446, + "grad_norm": 1.3125002136505806, + "learning_rate": 1.4406873592481004e-05, + "loss": 0.7376, + "step": 12206 + }, + { + "epoch": 0.37412651710187567, + "grad_norm": 1.219262507387336, + "learning_rate": 1.4405982520779678e-05, + "loss": 0.6652, + "step": 12207 + }, + { + "epoch": 0.37415716562461687, + "grad_norm": 1.6257104229869335, + "learning_rate": 1.4405091405665079e-05, + "loss": 0.7468, + "step": 12208 + }, + { + "epoch": 0.3741878141473581, + "grad_norm": 1.5384068813946514, + "learning_rate": 1.4404200247145988e-05, + "loss": 0.5998, + "step": 12209 + }, + { + "epoch": 0.3742184626700993, + "grad_norm": 1.5410662003531326, + "learning_rate": 1.4403309045231186e-05, + "loss": 0.6674, + "step": 12210 + }, + { + "epoch": 0.3742491111928405, + "grad_norm": 1.3930754509202177, + "learning_rate": 1.4402417799929453e-05, + "loss": 0.6746, + "step": 12211 + }, + { + "epoch": 0.3742797597155817, + "grad_norm": 1.6923298659222905, + "learning_rate": 1.440152651124957e-05, + "loss": 0.7471, + "step": 12212 + }, + { + "epoch": 0.3743104082383229, + "grad_norm": 1.4565988072884608, + "learning_rate": 1.4400635179200321e-05, + "loss": 0.7481, + "step": 12213 + }, + { + "epoch": 0.3743410567610641, + "grad_norm": 1.5778898108836057, + "learning_rate": 1.4399743803790489e-05, + "loss": 0.6634, + "step": 12214 + }, + { + "epoch": 0.3743717052838053, + "grad_norm": 1.5976931334950364, + "learning_rate": 1.4398852385028854e-05, + "loss": 0.7772, + "step": 12215 + }, + { + "epoch": 0.3744023538065465, + "grad_norm": 1.4368815322187394, + "learning_rate": 1.4397960922924201e-05, + "loss": 0.7937, + "step": 12216 + }, + { + "epoch": 0.3744330023292877, + "grad_norm": 1.4837547385351633, + "learning_rate": 1.4397069417485313e-05, + "loss": 0.6799, + "step": 12217 + }, + { + "epoch": 0.37446365085202893, + "grad_norm": 1.3613351082267362, + "learning_rate": 1.4396177868720977e-05, + "loss": 0.8503, + "step": 12218 + }, + { + "epoch": 0.37449429937477013, + "grad_norm": 1.445135254436657, + "learning_rate": 1.4395286276639976e-05, + "loss": 0.8178, + "step": 12219 + }, + { + "epoch": 0.37452494789751134, + "grad_norm": 1.6122529835425576, + "learning_rate": 1.4394394641251091e-05, + "loss": 0.8104, + "step": 12220 + }, + { + "epoch": 0.37455559642025255, + "grad_norm": 1.3941243239453776, + "learning_rate": 1.4393502962563112e-05, + "loss": 0.6972, + "step": 12221 + }, + { + "epoch": 0.37458624494299375, + "grad_norm": 1.3903802613340976, + "learning_rate": 1.4392611240584826e-05, + "loss": 0.7358, + "step": 12222 + }, + { + "epoch": 0.37461689346573496, + "grad_norm": 1.5278257371351303, + "learning_rate": 1.4391719475325019e-05, + "loss": 0.819, + "step": 12223 + }, + { + "epoch": 0.37464754198847616, + "grad_norm": 1.3331453717328827, + "learning_rate": 1.4390827666792473e-05, + "loss": 0.7776, + "step": 12224 + }, + { + "epoch": 0.37467819051121737, + "grad_norm": 1.4831256402412265, + "learning_rate": 1.438993581499598e-05, + "loss": 0.7651, + "step": 12225 + }, + { + "epoch": 0.3747088390339586, + "grad_norm": 1.3382387992066633, + "learning_rate": 1.4389043919944325e-05, + "loss": 0.6648, + "step": 12226 + }, + { + "epoch": 0.3747394875566998, + "grad_norm": 1.601637815858721, + "learning_rate": 1.4388151981646301e-05, + "loss": 0.7601, + "step": 12227 + }, + { + "epoch": 0.374770136079441, + "grad_norm": 1.4566247193941746, + "learning_rate": 1.4387260000110688e-05, + "loss": 0.6673, + "step": 12228 + }, + { + "epoch": 0.3748007846021822, + "grad_norm": 1.5262353804322737, + "learning_rate": 1.4386367975346285e-05, + "loss": 0.7794, + "step": 12229 + }, + { + "epoch": 0.3748314331249234, + "grad_norm": 1.6118999285317337, + "learning_rate": 1.4385475907361872e-05, + "loss": 0.8312, + "step": 12230 + }, + { + "epoch": 0.3748620816476646, + "grad_norm": 1.5057421474302963, + "learning_rate": 1.4384583796166243e-05, + "loss": 0.7691, + "step": 12231 + }, + { + "epoch": 0.3748927301704058, + "grad_norm": 1.5002056871648002, + "learning_rate": 1.4383691641768187e-05, + "loss": 0.6827, + "step": 12232 + }, + { + "epoch": 0.374923378693147, + "grad_norm": 1.543542023668025, + "learning_rate": 1.4382799444176498e-05, + "loss": 0.7208, + "step": 12233 + }, + { + "epoch": 0.3749540272158882, + "grad_norm": 1.37055550429824, + "learning_rate": 1.4381907203399966e-05, + "loss": 0.595, + "step": 12234 + }, + { + "epoch": 0.3749846757386294, + "grad_norm": 1.4701741179959127, + "learning_rate": 1.4381014919447378e-05, + "loss": 0.7561, + "step": 12235 + }, + { + "epoch": 0.3750153242613706, + "grad_norm": 1.4754607530218893, + "learning_rate": 1.438012259232753e-05, + "loss": 0.7911, + "step": 12236 + }, + { + "epoch": 0.3750459727841118, + "grad_norm": 1.417984252162094, + "learning_rate": 1.4379230222049216e-05, + "loss": 0.7286, + "step": 12237 + }, + { + "epoch": 0.375076621306853, + "grad_norm": 1.5640179414209827, + "learning_rate": 1.4378337808621223e-05, + "loss": 0.7356, + "step": 12238 + }, + { + "epoch": 0.3751072698295942, + "grad_norm": 1.4211638760046685, + "learning_rate": 1.4377445352052348e-05, + "loss": 0.7954, + "step": 12239 + }, + { + "epoch": 0.3751379183523354, + "grad_norm": 1.6626933593196958, + "learning_rate": 1.4376552852351385e-05, + "loss": 0.7651, + "step": 12240 + }, + { + "epoch": 0.3751685668750766, + "grad_norm": 0.7636084944509915, + "learning_rate": 1.4375660309527126e-05, + "loss": 0.6214, + "step": 12241 + }, + { + "epoch": 0.3751992153978178, + "grad_norm": 1.4864933611437507, + "learning_rate": 1.4374767723588368e-05, + "loss": 0.8157, + "step": 12242 + }, + { + "epoch": 0.375229863920559, + "grad_norm": 0.7361367284433443, + "learning_rate": 1.4373875094543901e-05, + "loss": 0.6493, + "step": 12243 + }, + { + "epoch": 0.3752605124433002, + "grad_norm": 1.637484852653629, + "learning_rate": 1.4372982422402526e-05, + "loss": 0.8948, + "step": 12244 + }, + { + "epoch": 0.3752911609660414, + "grad_norm": 1.390127371177207, + "learning_rate": 1.4372089707173036e-05, + "loss": 0.7856, + "step": 12245 + }, + { + "epoch": 0.37532180948878263, + "grad_norm": 1.654294368773537, + "learning_rate": 1.4371196948864227e-05, + "loss": 0.6603, + "step": 12246 + }, + { + "epoch": 0.37535245801152384, + "grad_norm": 1.5674113580195952, + "learning_rate": 1.4370304147484895e-05, + "loss": 0.8229, + "step": 12247 + }, + { + "epoch": 0.37538310653426504, + "grad_norm": 1.4407680656489459, + "learning_rate": 1.4369411303043838e-05, + "loss": 0.7829, + "step": 12248 + }, + { + "epoch": 0.37541375505700625, + "grad_norm": 0.677851868733806, + "learning_rate": 1.4368518415549857e-05, + "loss": 0.6203, + "step": 12249 + }, + { + "epoch": 0.37544440357974745, + "grad_norm": 1.4233179365752047, + "learning_rate": 1.4367625485011743e-05, + "loss": 0.7212, + "step": 12250 + }, + { + "epoch": 0.37547505210248866, + "grad_norm": 1.5666259075397262, + "learning_rate": 1.4366732511438299e-05, + "loss": 0.7509, + "step": 12251 + }, + { + "epoch": 0.37550570062522987, + "grad_norm": 1.3912642215597815, + "learning_rate": 1.4365839494838322e-05, + "loss": 0.7009, + "step": 12252 + }, + { + "epoch": 0.37553634914797107, + "grad_norm": 1.5165396795908854, + "learning_rate": 1.4364946435220612e-05, + "loss": 0.7204, + "step": 12253 + }, + { + "epoch": 0.3755669976707123, + "grad_norm": 1.4179964232571232, + "learning_rate": 1.4364053332593967e-05, + "loss": 0.7568, + "step": 12254 + }, + { + "epoch": 0.3755976461934535, + "grad_norm": 1.5492248631935888, + "learning_rate": 1.4363160186967189e-05, + "loss": 0.7451, + "step": 12255 + }, + { + "epoch": 0.3756282947161947, + "grad_norm": 1.362673158921174, + "learning_rate": 1.4362266998349076e-05, + "loss": 0.7559, + "step": 12256 + }, + { + "epoch": 0.3756589432389359, + "grad_norm": 1.5413778299918761, + "learning_rate": 1.4361373766748433e-05, + "loss": 0.684, + "step": 12257 + }, + { + "epoch": 0.3756895917616771, + "grad_norm": 1.2673241529822958, + "learning_rate": 1.4360480492174053e-05, + "loss": 0.7021, + "step": 12258 + }, + { + "epoch": 0.3757202402844183, + "grad_norm": 1.2480651796205857, + "learning_rate": 1.4359587174634748e-05, + "loss": 0.7281, + "step": 12259 + }, + { + "epoch": 0.3757508888071595, + "grad_norm": 1.4551956667402792, + "learning_rate": 1.4358693814139313e-05, + "loss": 0.8284, + "step": 12260 + }, + { + "epoch": 0.3757815373299007, + "grad_norm": 1.3513737626210158, + "learning_rate": 1.4357800410696552e-05, + "loss": 0.7507, + "step": 12261 + }, + { + "epoch": 0.3758121858526419, + "grad_norm": 1.3108174831473334, + "learning_rate": 1.435690696431527e-05, + "loss": 0.6916, + "step": 12262 + }, + { + "epoch": 0.37584283437538313, + "grad_norm": 0.7142640458877146, + "learning_rate": 1.435601347500427e-05, + "loss": 0.6178, + "step": 12263 + }, + { + "epoch": 0.37587348289812433, + "grad_norm": 1.4194295555803933, + "learning_rate": 1.435511994277235e-05, + "loss": 0.7521, + "step": 12264 + }, + { + "epoch": 0.37590413142086554, + "grad_norm": 1.6932444063976866, + "learning_rate": 1.4354226367628323e-05, + "loss": 0.7392, + "step": 12265 + }, + { + "epoch": 0.37593477994360674, + "grad_norm": 0.671707812533303, + "learning_rate": 1.4353332749580988e-05, + "loss": 0.6103, + "step": 12266 + }, + { + "epoch": 0.3759654284663479, + "grad_norm": 1.6090571528713822, + "learning_rate": 1.4352439088639152e-05, + "loss": 0.6853, + "step": 12267 + }, + { + "epoch": 0.3759960769890891, + "grad_norm": 1.5674507682618783, + "learning_rate": 1.4351545384811623e-05, + "loss": 0.7438, + "step": 12268 + }, + { + "epoch": 0.3760267255118303, + "grad_norm": 1.5688246349041108, + "learning_rate": 1.4350651638107198e-05, + "loss": 0.8482, + "step": 12269 + }, + { + "epoch": 0.3760573740345715, + "grad_norm": 1.3952879419699926, + "learning_rate": 1.4349757848534693e-05, + "loss": 0.6615, + "step": 12270 + }, + { + "epoch": 0.3760880225573127, + "grad_norm": 1.554457810367065, + "learning_rate": 1.4348864016102908e-05, + "loss": 0.8135, + "step": 12271 + }, + { + "epoch": 0.3761186710800539, + "grad_norm": 1.517256432167049, + "learning_rate": 1.4347970140820659e-05, + "loss": 0.7157, + "step": 12272 + }, + { + "epoch": 0.37614931960279513, + "grad_norm": 1.4666211730526841, + "learning_rate": 1.434707622269674e-05, + "loss": 0.846, + "step": 12273 + }, + { + "epoch": 0.37617996812553633, + "grad_norm": 1.2723180608457731, + "learning_rate": 1.4346182261739973e-05, + "loss": 0.7617, + "step": 12274 + }, + { + "epoch": 0.37621061664827754, + "grad_norm": 1.561020478381064, + "learning_rate": 1.4345288257959156e-05, + "loss": 0.7441, + "step": 12275 + }, + { + "epoch": 0.37624126517101875, + "grad_norm": 1.5950718919815392, + "learning_rate": 1.4344394211363106e-05, + "loss": 0.7372, + "step": 12276 + }, + { + "epoch": 0.37627191369375995, + "grad_norm": 1.6201308746553282, + "learning_rate": 1.4343500121960628e-05, + "loss": 0.7971, + "step": 12277 + }, + { + "epoch": 0.37630256221650116, + "grad_norm": 1.3708918302076565, + "learning_rate": 1.4342605989760527e-05, + "loss": 0.6701, + "step": 12278 + }, + { + "epoch": 0.37633321073924236, + "grad_norm": 1.512549099884645, + "learning_rate": 1.4341711814771624e-05, + "loss": 0.7581, + "step": 12279 + }, + { + "epoch": 0.37636385926198357, + "grad_norm": 1.6071649536347798, + "learning_rate": 1.434081759700272e-05, + "loss": 0.723, + "step": 12280 + }, + { + "epoch": 0.3763945077847248, + "grad_norm": 1.3671220547429546, + "learning_rate": 1.4339923336462631e-05, + "loss": 0.7249, + "step": 12281 + }, + { + "epoch": 0.376425156307466, + "grad_norm": 1.3290991984962652, + "learning_rate": 1.4339029033160166e-05, + "loss": 0.6529, + "step": 12282 + }, + { + "epoch": 0.3764558048302072, + "grad_norm": 0.7700528218270302, + "learning_rate": 1.4338134687104139e-05, + "loss": 0.599, + "step": 12283 + }, + { + "epoch": 0.3764864533529484, + "grad_norm": 1.4809522292387995, + "learning_rate": 1.4337240298303359e-05, + "loss": 0.7467, + "step": 12284 + }, + { + "epoch": 0.3765171018756896, + "grad_norm": 0.6773807115736917, + "learning_rate": 1.4336345866766643e-05, + "loss": 0.6114, + "step": 12285 + }, + { + "epoch": 0.3765477503984308, + "grad_norm": 1.5015120823710972, + "learning_rate": 1.4335451392502799e-05, + "loss": 0.7622, + "step": 12286 + }, + { + "epoch": 0.376578398921172, + "grad_norm": 1.5015957935519328, + "learning_rate": 1.4334556875520644e-05, + "loss": 0.8939, + "step": 12287 + }, + { + "epoch": 0.3766090474439132, + "grad_norm": 0.6822921424757578, + "learning_rate": 1.4333662315828993e-05, + "loss": 0.5983, + "step": 12288 + }, + { + "epoch": 0.3766396959666544, + "grad_norm": 1.4934378829262853, + "learning_rate": 1.4332767713436657e-05, + "loss": 0.7552, + "step": 12289 + }, + { + "epoch": 0.3766703444893956, + "grad_norm": 1.5415407846818439, + "learning_rate": 1.433187306835245e-05, + "loss": 0.8481, + "step": 12290 + }, + { + "epoch": 0.37670099301213683, + "grad_norm": 1.5143070030522123, + "learning_rate": 1.4330978380585192e-05, + "loss": 0.766, + "step": 12291 + }, + { + "epoch": 0.37673164153487804, + "grad_norm": 1.3521776132502936, + "learning_rate": 1.4330083650143695e-05, + "loss": 0.7714, + "step": 12292 + }, + { + "epoch": 0.37676229005761924, + "grad_norm": 1.5388819986314934, + "learning_rate": 1.4329188877036777e-05, + "loss": 0.7742, + "step": 12293 + }, + { + "epoch": 0.37679293858036045, + "grad_norm": 1.571325775031071, + "learning_rate": 1.4328294061273254e-05, + "loss": 0.8673, + "step": 12294 + }, + { + "epoch": 0.37682358710310165, + "grad_norm": 1.3618788296186484, + "learning_rate": 1.432739920286194e-05, + "loss": 0.7979, + "step": 12295 + }, + { + "epoch": 0.37685423562584286, + "grad_norm": 1.4157706233379443, + "learning_rate": 1.4326504301811656e-05, + "loss": 0.8258, + "step": 12296 + }, + { + "epoch": 0.37688488414858407, + "grad_norm": 1.444151289650611, + "learning_rate": 1.4325609358131216e-05, + "loss": 0.8113, + "step": 12297 + }, + { + "epoch": 0.3769155326713252, + "grad_norm": 1.5207354411120173, + "learning_rate": 1.4324714371829443e-05, + "loss": 0.7294, + "step": 12298 + }, + { + "epoch": 0.3769461811940664, + "grad_norm": 1.4517128929947944, + "learning_rate": 1.4323819342915151e-05, + "loss": 0.8067, + "step": 12299 + }, + { + "epoch": 0.3769768297168076, + "grad_norm": 0.764148397971054, + "learning_rate": 1.4322924271397161e-05, + "loss": 0.589, + "step": 12300 + }, + { + "epoch": 0.37700747823954883, + "grad_norm": 1.5597443579825592, + "learning_rate": 1.4322029157284291e-05, + "loss": 0.8913, + "step": 12301 + }, + { + "epoch": 0.37703812676229004, + "grad_norm": 1.3905343003031407, + "learning_rate": 1.4321134000585365e-05, + "loss": 0.8128, + "step": 12302 + }, + { + "epoch": 0.37706877528503124, + "grad_norm": 1.3430277575680318, + "learning_rate": 1.4320238801309199e-05, + "loss": 0.7099, + "step": 12303 + }, + { + "epoch": 0.37709942380777245, + "grad_norm": 1.3639990001260207, + "learning_rate": 1.431934355946461e-05, + "loss": 0.7178, + "step": 12304 + }, + { + "epoch": 0.37713007233051365, + "grad_norm": 1.384063638378413, + "learning_rate": 1.4318448275060429e-05, + "loss": 0.715, + "step": 12305 + }, + { + "epoch": 0.37716072085325486, + "grad_norm": 1.8234402071348814, + "learning_rate": 1.431755294810547e-05, + "loss": 0.8035, + "step": 12306 + }, + { + "epoch": 0.37719136937599607, + "grad_norm": 1.4055449158962423, + "learning_rate": 1.4316657578608559e-05, + "loss": 0.7338, + "step": 12307 + }, + { + "epoch": 0.37722201789873727, + "grad_norm": 1.4171363803558115, + "learning_rate": 1.4315762166578515e-05, + "loss": 0.8273, + "step": 12308 + }, + { + "epoch": 0.3772526664214785, + "grad_norm": 1.5017558304643297, + "learning_rate": 1.4314866712024162e-05, + "loss": 0.7157, + "step": 12309 + }, + { + "epoch": 0.3772833149442197, + "grad_norm": 1.5081391424664206, + "learning_rate": 1.4313971214954325e-05, + "loss": 0.6929, + "step": 12310 + }, + { + "epoch": 0.3773139634669609, + "grad_norm": 0.6982214623001581, + "learning_rate": 1.4313075675377826e-05, + "loss": 0.6152, + "step": 12311 + }, + { + "epoch": 0.3773446119897021, + "grad_norm": 1.478964053536359, + "learning_rate": 1.4312180093303485e-05, + "loss": 0.7011, + "step": 12312 + }, + { + "epoch": 0.3773752605124433, + "grad_norm": 1.7332942905475404, + "learning_rate": 1.4311284468740133e-05, + "loss": 0.7094, + "step": 12313 + }, + { + "epoch": 0.3774059090351845, + "grad_norm": 1.6529175112803203, + "learning_rate": 1.4310388801696593e-05, + "loss": 0.7504, + "step": 12314 + }, + { + "epoch": 0.3774365575579257, + "grad_norm": 1.4917160291307106, + "learning_rate": 1.4309493092181688e-05, + "loss": 0.7617, + "step": 12315 + }, + { + "epoch": 0.3774672060806669, + "grad_norm": 1.4039776087497684, + "learning_rate": 1.4308597340204245e-05, + "loss": 0.704, + "step": 12316 + }, + { + "epoch": 0.3774978546034081, + "grad_norm": 0.6748623534322653, + "learning_rate": 1.4307701545773089e-05, + "loss": 0.588, + "step": 12317 + }, + { + "epoch": 0.37752850312614933, + "grad_norm": 1.4883186004123483, + "learning_rate": 1.430680570889705e-05, + "loss": 0.8064, + "step": 12318 + }, + { + "epoch": 0.37755915164889053, + "grad_norm": 1.369173193951341, + "learning_rate": 1.4305909829584947e-05, + "loss": 0.7673, + "step": 12319 + }, + { + "epoch": 0.37758980017163174, + "grad_norm": 1.496709673313407, + "learning_rate": 1.4305013907845617e-05, + "loss": 0.747, + "step": 12320 + }, + { + "epoch": 0.37762044869437295, + "grad_norm": 1.248463741864651, + "learning_rate": 1.4304117943687883e-05, + "loss": 0.6085, + "step": 12321 + }, + { + "epoch": 0.37765109721711415, + "grad_norm": 0.7325247594981763, + "learning_rate": 1.4303221937120574e-05, + "loss": 0.6455, + "step": 12322 + }, + { + "epoch": 0.37768174573985536, + "grad_norm": 1.3474563835182232, + "learning_rate": 1.4302325888152518e-05, + "loss": 0.7243, + "step": 12323 + }, + { + "epoch": 0.37771239426259656, + "grad_norm": 1.3931530872761397, + "learning_rate": 1.4301429796792546e-05, + "loss": 0.8224, + "step": 12324 + }, + { + "epoch": 0.37774304278533777, + "grad_norm": 1.4834224168527403, + "learning_rate": 1.430053366304948e-05, + "loss": 0.7972, + "step": 12325 + }, + { + "epoch": 0.377773691308079, + "grad_norm": 1.3264610638057428, + "learning_rate": 1.4299637486932162e-05, + "loss": 0.7972, + "step": 12326 + }, + { + "epoch": 0.3778043398308202, + "grad_norm": 1.4582562522648803, + "learning_rate": 1.4298741268449411e-05, + "loss": 0.8559, + "step": 12327 + }, + { + "epoch": 0.3778349883535614, + "grad_norm": 1.2540447244353328, + "learning_rate": 1.4297845007610068e-05, + "loss": 0.7296, + "step": 12328 + }, + { + "epoch": 0.37786563687630254, + "grad_norm": 0.6622433713297532, + "learning_rate": 1.4296948704422953e-05, + "loss": 0.5966, + "step": 12329 + }, + { + "epoch": 0.37789628539904374, + "grad_norm": 0.6754137549139329, + "learning_rate": 1.4296052358896903e-05, + "loss": 0.5995, + "step": 12330 + }, + { + "epoch": 0.37792693392178495, + "grad_norm": 1.5041819384846706, + "learning_rate": 1.4295155971040753e-05, + "loss": 0.7991, + "step": 12331 + }, + { + "epoch": 0.37795758244452615, + "grad_norm": 1.4225183898793043, + "learning_rate": 1.4294259540863331e-05, + "loss": 0.7352, + "step": 12332 + }, + { + "epoch": 0.37798823096726736, + "grad_norm": 1.2656011715430608, + "learning_rate": 1.4293363068373473e-05, + "loss": 0.7065, + "step": 12333 + }, + { + "epoch": 0.37801887949000856, + "grad_norm": 1.2989712230789097, + "learning_rate": 1.4292466553580007e-05, + "loss": 0.8139, + "step": 12334 + }, + { + "epoch": 0.37804952801274977, + "grad_norm": 0.6929049430003434, + "learning_rate": 1.4291569996491773e-05, + "loss": 0.62, + "step": 12335 + }, + { + "epoch": 0.378080176535491, + "grad_norm": 1.2024884601513748, + "learning_rate": 1.4290673397117595e-05, + "loss": 0.644, + "step": 12336 + }, + { + "epoch": 0.3781108250582322, + "grad_norm": 1.594130568632095, + "learning_rate": 1.4289776755466322e-05, + "loss": 0.7619, + "step": 12337 + }, + { + "epoch": 0.3781414735809734, + "grad_norm": 1.668794194976069, + "learning_rate": 1.4288880071546776e-05, + "loss": 0.6918, + "step": 12338 + }, + { + "epoch": 0.3781721221037146, + "grad_norm": 1.3372922657068946, + "learning_rate": 1.4287983345367802e-05, + "loss": 0.7401, + "step": 12339 + }, + { + "epoch": 0.3782027706264558, + "grad_norm": 1.5956224107379187, + "learning_rate": 1.4287086576938226e-05, + "loss": 0.6497, + "step": 12340 + }, + { + "epoch": 0.378233419149197, + "grad_norm": 0.6739577810708419, + "learning_rate": 1.4286189766266894e-05, + "loss": 0.569, + "step": 12341 + }, + { + "epoch": 0.3782640676719382, + "grad_norm": 1.504555953856577, + "learning_rate": 1.4285292913362634e-05, + "loss": 0.6784, + "step": 12342 + }, + { + "epoch": 0.3782947161946794, + "grad_norm": 0.6893790819853028, + "learning_rate": 1.4284396018234286e-05, + "loss": 0.6212, + "step": 12343 + }, + { + "epoch": 0.3783253647174206, + "grad_norm": 1.8101543722242852, + "learning_rate": 1.4283499080890688e-05, + "loss": 0.7564, + "step": 12344 + }, + { + "epoch": 0.3783560132401618, + "grad_norm": 1.543033405288024, + "learning_rate": 1.4282602101340679e-05, + "loss": 0.6467, + "step": 12345 + }, + { + "epoch": 0.37838666176290303, + "grad_norm": 1.4137410535035027, + "learning_rate": 1.4281705079593095e-05, + "loss": 0.7172, + "step": 12346 + }, + { + "epoch": 0.37841731028564424, + "grad_norm": 1.3749154300644315, + "learning_rate": 1.4280808015656775e-05, + "loss": 0.6842, + "step": 12347 + }, + { + "epoch": 0.37844795880838544, + "grad_norm": 1.4815665073365503, + "learning_rate": 1.4279910909540561e-05, + "loss": 0.8102, + "step": 12348 + }, + { + "epoch": 0.37847860733112665, + "grad_norm": 1.4508762532200485, + "learning_rate": 1.427901376125329e-05, + "loss": 0.802, + "step": 12349 + }, + { + "epoch": 0.37850925585386785, + "grad_norm": 0.6978055977892892, + "learning_rate": 1.4278116570803799e-05, + "loss": 0.6128, + "step": 12350 + }, + { + "epoch": 0.37853990437660906, + "grad_norm": 1.4795454046031231, + "learning_rate": 1.427721933820093e-05, + "loss": 0.6703, + "step": 12351 + }, + { + "epoch": 0.37857055289935027, + "grad_norm": 0.6727903800278624, + "learning_rate": 1.4276322063453524e-05, + "loss": 0.5891, + "step": 12352 + }, + { + "epoch": 0.37860120142209147, + "grad_norm": 1.7617748819336794, + "learning_rate": 1.4275424746570426e-05, + "loss": 0.8649, + "step": 12353 + }, + { + "epoch": 0.3786318499448327, + "grad_norm": 0.6536044737700298, + "learning_rate": 1.4274527387560473e-05, + "loss": 0.586, + "step": 12354 + }, + { + "epoch": 0.3786624984675739, + "grad_norm": 1.508425042860648, + "learning_rate": 1.4273629986432506e-05, + "loss": 0.7241, + "step": 12355 + }, + { + "epoch": 0.3786931469903151, + "grad_norm": 1.5936740674309104, + "learning_rate": 1.427273254319537e-05, + "loss": 0.6974, + "step": 12356 + }, + { + "epoch": 0.3787237955130563, + "grad_norm": 1.3978108311206983, + "learning_rate": 1.427183505785791e-05, + "loss": 0.8064, + "step": 12357 + }, + { + "epoch": 0.3787544440357975, + "grad_norm": 1.5959800730827445, + "learning_rate": 1.4270937530428962e-05, + "loss": 0.7949, + "step": 12358 + }, + { + "epoch": 0.3787850925585387, + "grad_norm": 1.594762916560283, + "learning_rate": 1.4270039960917376e-05, + "loss": 0.6924, + "step": 12359 + }, + { + "epoch": 0.37881574108127986, + "grad_norm": 0.700538643959073, + "learning_rate": 1.4269142349331995e-05, + "loss": 0.5998, + "step": 12360 + }, + { + "epoch": 0.37884638960402106, + "grad_norm": 1.490093169797652, + "learning_rate": 1.4268244695681662e-05, + "loss": 0.8485, + "step": 12361 + }, + { + "epoch": 0.37887703812676227, + "grad_norm": 1.4395424643905357, + "learning_rate": 1.4267346999975218e-05, + "loss": 0.739, + "step": 12362 + }, + { + "epoch": 0.3789076866495035, + "grad_norm": 1.5847696856093367, + "learning_rate": 1.4266449262221516e-05, + "loss": 0.7266, + "step": 12363 + }, + { + "epoch": 0.3789383351722447, + "grad_norm": 1.5335670858429464, + "learning_rate": 1.4265551482429396e-05, + "loss": 0.7136, + "step": 12364 + }, + { + "epoch": 0.3789689836949859, + "grad_norm": 1.4845347689958592, + "learning_rate": 1.4264653660607706e-05, + "loss": 0.7482, + "step": 12365 + }, + { + "epoch": 0.3789996322177271, + "grad_norm": 1.500393212406629, + "learning_rate": 1.4263755796765293e-05, + "loss": 0.7901, + "step": 12366 + }, + { + "epoch": 0.3790302807404683, + "grad_norm": 1.4391495263785823, + "learning_rate": 1.4262857890911001e-05, + "loss": 0.6999, + "step": 12367 + }, + { + "epoch": 0.3790609292632095, + "grad_norm": 1.4638949051837058, + "learning_rate": 1.4261959943053682e-05, + "loss": 0.7918, + "step": 12368 + }, + { + "epoch": 0.3790915777859507, + "grad_norm": 1.6507080848876687, + "learning_rate": 1.4261061953202183e-05, + "loss": 0.862, + "step": 12369 + }, + { + "epoch": 0.3791222263086919, + "grad_norm": 1.6430621895989201, + "learning_rate": 1.4260163921365347e-05, + "loss": 0.7807, + "step": 12370 + }, + { + "epoch": 0.3791528748314331, + "grad_norm": 1.4952603314810053, + "learning_rate": 1.4259265847552026e-05, + "loss": 0.7225, + "step": 12371 + }, + { + "epoch": 0.3791835233541743, + "grad_norm": 1.3854991685633076, + "learning_rate": 1.425836773177107e-05, + "loss": 0.7184, + "step": 12372 + }, + { + "epoch": 0.37921417187691553, + "grad_norm": 1.480612312610393, + "learning_rate": 1.4257469574031324e-05, + "loss": 0.7302, + "step": 12373 + }, + { + "epoch": 0.37924482039965673, + "grad_norm": 1.3188675499189855, + "learning_rate": 1.4256571374341646e-05, + "loss": 0.7474, + "step": 12374 + }, + { + "epoch": 0.37927546892239794, + "grad_norm": 1.7460711458389762, + "learning_rate": 1.4255673132710877e-05, + "loss": 0.7523, + "step": 12375 + }, + { + "epoch": 0.37930611744513915, + "grad_norm": 1.434688440436109, + "learning_rate": 1.4254774849147875e-05, + "loss": 0.8176, + "step": 12376 + }, + { + "epoch": 0.37933676596788035, + "grad_norm": 1.5529869907716203, + "learning_rate": 1.4253876523661486e-05, + "loss": 0.7634, + "step": 12377 + }, + { + "epoch": 0.37936741449062156, + "grad_norm": 1.435966273265402, + "learning_rate": 1.4252978156260564e-05, + "loss": 0.7559, + "step": 12378 + }, + { + "epoch": 0.37939806301336276, + "grad_norm": 1.4314388101490045, + "learning_rate": 1.4252079746953958e-05, + "loss": 0.7856, + "step": 12379 + }, + { + "epoch": 0.37942871153610397, + "grad_norm": 0.7375412601040897, + "learning_rate": 1.4251181295750527e-05, + "loss": 0.6306, + "step": 12380 + }, + { + "epoch": 0.3794593600588452, + "grad_norm": 1.6591460502759041, + "learning_rate": 1.4250282802659114e-05, + "loss": 0.8171, + "step": 12381 + }, + { + "epoch": 0.3794900085815864, + "grad_norm": 1.4606955066829497, + "learning_rate": 1.424938426768858e-05, + "loss": 0.8509, + "step": 12382 + }, + { + "epoch": 0.3795206571043276, + "grad_norm": 1.7284133280610685, + "learning_rate": 1.4248485690847775e-05, + "loss": 0.8487, + "step": 12383 + }, + { + "epoch": 0.3795513056270688, + "grad_norm": 0.6775270075510543, + "learning_rate": 1.4247587072145552e-05, + "loss": 0.5847, + "step": 12384 + }, + { + "epoch": 0.37958195414981, + "grad_norm": 1.2707764092076974, + "learning_rate": 1.4246688411590767e-05, + "loss": 0.7362, + "step": 12385 + }, + { + "epoch": 0.3796126026725512, + "grad_norm": 0.6652208337039669, + "learning_rate": 1.4245789709192277e-05, + "loss": 0.6119, + "step": 12386 + }, + { + "epoch": 0.3796432511952924, + "grad_norm": 1.6059820287778979, + "learning_rate": 1.4244890964958933e-05, + "loss": 0.7559, + "step": 12387 + }, + { + "epoch": 0.3796738997180336, + "grad_norm": 1.4285707071308527, + "learning_rate": 1.424399217889959e-05, + "loss": 0.8377, + "step": 12388 + }, + { + "epoch": 0.3797045482407748, + "grad_norm": 1.5186799338056165, + "learning_rate": 1.424309335102311e-05, + "loss": 0.8618, + "step": 12389 + }, + { + "epoch": 0.379735196763516, + "grad_norm": 1.503346249005217, + "learning_rate": 1.424219448133834e-05, + "loss": 0.7935, + "step": 12390 + }, + { + "epoch": 0.3797658452862572, + "grad_norm": 1.4610820343933806, + "learning_rate": 1.424129556985415e-05, + "loss": 0.816, + "step": 12391 + }, + { + "epoch": 0.3797964938089984, + "grad_norm": 1.5179365033306547, + "learning_rate": 1.4240396616579386e-05, + "loss": 0.7659, + "step": 12392 + }, + { + "epoch": 0.3798271423317396, + "grad_norm": 1.6321999714448807, + "learning_rate": 1.4239497621522909e-05, + "loss": 0.7577, + "step": 12393 + }, + { + "epoch": 0.3798577908544808, + "grad_norm": 1.43058572965847, + "learning_rate": 1.4238598584693576e-05, + "loss": 0.6734, + "step": 12394 + }, + { + "epoch": 0.379888439377222, + "grad_norm": 1.3669488734859143, + "learning_rate": 1.4237699506100251e-05, + "loss": 0.6997, + "step": 12395 + }, + { + "epoch": 0.3799190878999632, + "grad_norm": 1.4546206909303439, + "learning_rate": 1.4236800385751783e-05, + "loss": 0.7498, + "step": 12396 + }, + { + "epoch": 0.3799497364227044, + "grad_norm": 1.3212686148000325, + "learning_rate": 1.423590122365704e-05, + "loss": 0.789, + "step": 12397 + }, + { + "epoch": 0.3799803849454456, + "grad_norm": 1.4869269450337228, + "learning_rate": 1.4235002019824874e-05, + "loss": 0.8617, + "step": 12398 + }, + { + "epoch": 0.3800110334681868, + "grad_norm": 0.7006648390516913, + "learning_rate": 1.4234102774264156e-05, + "loss": 0.5867, + "step": 12399 + }, + { + "epoch": 0.380041681990928, + "grad_norm": 1.4306960074436126, + "learning_rate": 1.4233203486983737e-05, + "loss": 0.8034, + "step": 12400 + }, + { + "epoch": 0.38007233051366923, + "grad_norm": 1.362675955056761, + "learning_rate": 1.423230415799248e-05, + "loss": 0.7022, + "step": 12401 + }, + { + "epoch": 0.38010297903641044, + "grad_norm": 1.3686260885710428, + "learning_rate": 1.423140478729925e-05, + "loss": 0.7624, + "step": 12402 + }, + { + "epoch": 0.38013362755915164, + "grad_norm": 1.7773008954393192, + "learning_rate": 1.4230505374912904e-05, + "loss": 0.7998, + "step": 12403 + }, + { + "epoch": 0.38016427608189285, + "grad_norm": 1.4562414983141396, + "learning_rate": 1.422960592084231e-05, + "loss": 0.7691, + "step": 12404 + }, + { + "epoch": 0.38019492460463405, + "grad_norm": 1.4761815670124865, + "learning_rate": 1.4228706425096318e-05, + "loss": 0.8472, + "step": 12405 + }, + { + "epoch": 0.38022557312737526, + "grad_norm": 1.4062510454474948, + "learning_rate": 1.4227806887683808e-05, + "loss": 0.7223, + "step": 12406 + }, + { + "epoch": 0.38025622165011647, + "grad_norm": 1.3866682743809906, + "learning_rate": 1.422690730861363e-05, + "loss": 0.7672, + "step": 12407 + }, + { + "epoch": 0.38028687017285767, + "grad_norm": 1.3609171017322728, + "learning_rate": 1.4226007687894657e-05, + "loss": 0.7809, + "step": 12408 + }, + { + "epoch": 0.3803175186955989, + "grad_norm": 1.5200555641144065, + "learning_rate": 1.4225108025535743e-05, + "loss": 0.7522, + "step": 12409 + }, + { + "epoch": 0.3803481672183401, + "grad_norm": 0.7115445470004484, + "learning_rate": 1.4224208321545765e-05, + "loss": 0.6101, + "step": 12410 + }, + { + "epoch": 0.3803788157410813, + "grad_norm": 1.459091926991479, + "learning_rate": 1.422330857593358e-05, + "loss": 0.7952, + "step": 12411 + }, + { + "epoch": 0.3804094642638225, + "grad_norm": 1.4701996625750937, + "learning_rate": 1.4222408788708052e-05, + "loss": 0.7659, + "step": 12412 + }, + { + "epoch": 0.3804401127865637, + "grad_norm": 1.6214008935601527, + "learning_rate": 1.422150895987805e-05, + "loss": 0.7546, + "step": 12413 + }, + { + "epoch": 0.3804707613093049, + "grad_norm": 1.6755952016671003, + "learning_rate": 1.4220609089452441e-05, + "loss": 0.8257, + "step": 12414 + }, + { + "epoch": 0.3805014098320461, + "grad_norm": 1.5301160378613547, + "learning_rate": 1.4219709177440094e-05, + "loss": 0.7583, + "step": 12415 + }, + { + "epoch": 0.3805320583547873, + "grad_norm": 1.5860035786024251, + "learning_rate": 1.4218809223849869e-05, + "loss": 0.8115, + "step": 12416 + }, + { + "epoch": 0.3805627068775285, + "grad_norm": 1.4647834485875189, + "learning_rate": 1.4217909228690638e-05, + "loss": 0.7708, + "step": 12417 + }, + { + "epoch": 0.38059335540026973, + "grad_norm": 1.5025494927639875, + "learning_rate": 1.421700919197127e-05, + "loss": 0.7818, + "step": 12418 + }, + { + "epoch": 0.38062400392301093, + "grad_norm": 0.6713945815659395, + "learning_rate": 1.4216109113700631e-05, + "loss": 0.6077, + "step": 12419 + }, + { + "epoch": 0.38065465244575214, + "grad_norm": 1.3649728597189372, + "learning_rate": 1.4215208993887589e-05, + "loss": 0.8241, + "step": 12420 + }, + { + "epoch": 0.38068530096849335, + "grad_norm": 1.6723566525118625, + "learning_rate": 1.4214308832541015e-05, + "loss": 0.87, + "step": 12421 + }, + { + "epoch": 0.3807159494912345, + "grad_norm": 1.5120515874906613, + "learning_rate": 1.4213408629669779e-05, + "loss": 0.7326, + "step": 12422 + }, + { + "epoch": 0.3807465980139757, + "grad_norm": 1.367549450893294, + "learning_rate": 1.4212508385282746e-05, + "loss": 0.745, + "step": 12423 + }, + { + "epoch": 0.3807772465367169, + "grad_norm": 1.2295109651748333, + "learning_rate": 1.4211608099388791e-05, + "loss": 0.5868, + "step": 12424 + }, + { + "epoch": 0.3808078950594581, + "grad_norm": 1.360973214649197, + "learning_rate": 1.4210707771996785e-05, + "loss": 0.8553, + "step": 12425 + }, + { + "epoch": 0.3808385435821993, + "grad_norm": 1.3083780599063608, + "learning_rate": 1.4209807403115599e-05, + "loss": 0.712, + "step": 12426 + }, + { + "epoch": 0.3808691921049405, + "grad_norm": 1.358332199017284, + "learning_rate": 1.4208906992754102e-05, + "loss": 0.754, + "step": 12427 + }, + { + "epoch": 0.38089984062768173, + "grad_norm": 1.3788603777566402, + "learning_rate": 1.420800654092117e-05, + "loss": 0.7772, + "step": 12428 + }, + { + "epoch": 0.38093048915042294, + "grad_norm": 1.5697762923452767, + "learning_rate": 1.4207106047625669e-05, + "loss": 0.8151, + "step": 12429 + }, + { + "epoch": 0.38096113767316414, + "grad_norm": 1.5145998562822343, + "learning_rate": 1.420620551287648e-05, + "loss": 0.7297, + "step": 12430 + }, + { + "epoch": 0.38099178619590535, + "grad_norm": 0.7177470276912642, + "learning_rate": 1.4205304936682467e-05, + "loss": 0.6153, + "step": 12431 + }, + { + "epoch": 0.38102243471864655, + "grad_norm": 1.3872193439887996, + "learning_rate": 1.4204404319052512e-05, + "loss": 0.697, + "step": 12432 + }, + { + "epoch": 0.38105308324138776, + "grad_norm": 1.5655936022449926, + "learning_rate": 1.4203503659995486e-05, + "loss": 0.7591, + "step": 12433 + }, + { + "epoch": 0.38108373176412896, + "grad_norm": 1.2862897672424771, + "learning_rate": 1.420260295952026e-05, + "loss": 0.7507, + "step": 12434 + }, + { + "epoch": 0.38111438028687017, + "grad_norm": 1.4895845791285314, + "learning_rate": 1.4201702217635714e-05, + "loss": 0.7344, + "step": 12435 + }, + { + "epoch": 0.3811450288096114, + "grad_norm": 1.4926455871410025, + "learning_rate": 1.4200801434350719e-05, + "loss": 0.7561, + "step": 12436 + }, + { + "epoch": 0.3811756773323526, + "grad_norm": 1.4973748461686003, + "learning_rate": 1.4199900609674155e-05, + "loss": 0.7303, + "step": 12437 + }, + { + "epoch": 0.3812063258550938, + "grad_norm": 1.4715505101704953, + "learning_rate": 1.4198999743614895e-05, + "loss": 0.6903, + "step": 12438 + }, + { + "epoch": 0.381236974377835, + "grad_norm": 1.4859807217812286, + "learning_rate": 1.4198098836181813e-05, + "loss": 0.7415, + "step": 12439 + }, + { + "epoch": 0.3812676229005762, + "grad_norm": 1.5121948951435644, + "learning_rate": 1.4197197887383793e-05, + "loss": 0.634, + "step": 12440 + }, + { + "epoch": 0.3812982714233174, + "grad_norm": 1.4874231882875855, + "learning_rate": 1.419629689722971e-05, + "loss": 0.748, + "step": 12441 + }, + { + "epoch": 0.3813289199460586, + "grad_norm": 1.4427684347108156, + "learning_rate": 1.4195395865728432e-05, + "loss": 0.6924, + "step": 12442 + }, + { + "epoch": 0.3813595684687998, + "grad_norm": 0.778224531569153, + "learning_rate": 1.4194494792888853e-05, + "loss": 0.6285, + "step": 12443 + }, + { + "epoch": 0.381390216991541, + "grad_norm": 1.4872424148647665, + "learning_rate": 1.4193593678719837e-05, + "loss": 0.7851, + "step": 12444 + }, + { + "epoch": 0.3814208655142822, + "grad_norm": 1.4706846226304628, + "learning_rate": 1.4192692523230278e-05, + "loss": 0.6495, + "step": 12445 + }, + { + "epoch": 0.38145151403702343, + "grad_norm": 0.6598472238724105, + "learning_rate": 1.4191791326429041e-05, + "loss": 0.6023, + "step": 12446 + }, + { + "epoch": 0.38148216255976464, + "grad_norm": 1.6402809233687479, + "learning_rate": 1.419089008832501e-05, + "loss": 0.8426, + "step": 12447 + }, + { + "epoch": 0.38151281108250584, + "grad_norm": 0.6451353916247634, + "learning_rate": 1.4189988808927068e-05, + "loss": 0.5757, + "step": 12448 + }, + { + "epoch": 0.38154345960524705, + "grad_norm": 1.4406830407577282, + "learning_rate": 1.41890874882441e-05, + "loss": 0.8121, + "step": 12449 + }, + { + "epoch": 0.38157410812798825, + "grad_norm": 1.5671639457012283, + "learning_rate": 1.4188186126284975e-05, + "loss": 0.7699, + "step": 12450 + }, + { + "epoch": 0.38160475665072946, + "grad_norm": 1.4067958517051657, + "learning_rate": 1.4187284723058583e-05, + "loss": 0.7808, + "step": 12451 + }, + { + "epoch": 0.38163540517347067, + "grad_norm": 1.4707704301212705, + "learning_rate": 1.41863832785738e-05, + "loss": 0.7887, + "step": 12452 + }, + { + "epoch": 0.3816660536962118, + "grad_norm": 1.5636869563891307, + "learning_rate": 1.4185481792839515e-05, + "loss": 0.7105, + "step": 12453 + }, + { + "epoch": 0.381696702218953, + "grad_norm": 1.3651294079550487, + "learning_rate": 1.4184580265864604e-05, + "loss": 0.7962, + "step": 12454 + }, + { + "epoch": 0.3817273507416942, + "grad_norm": 1.5181977787225045, + "learning_rate": 1.418367869765796e-05, + "loss": 0.8539, + "step": 12455 + }, + { + "epoch": 0.38175799926443543, + "grad_norm": 1.3218711053005783, + "learning_rate": 1.418277708822845e-05, + "loss": 0.6648, + "step": 12456 + }, + { + "epoch": 0.38178864778717664, + "grad_norm": 1.629519788984022, + "learning_rate": 1.4181875437584971e-05, + "loss": 0.7613, + "step": 12457 + }, + { + "epoch": 0.38181929630991784, + "grad_norm": 1.5591750458218863, + "learning_rate": 1.4180973745736406e-05, + "loss": 0.857, + "step": 12458 + }, + { + "epoch": 0.38184994483265905, + "grad_norm": 1.6351532379027325, + "learning_rate": 1.4180072012691632e-05, + "loss": 0.7626, + "step": 12459 + }, + { + "epoch": 0.38188059335540026, + "grad_norm": 1.3893080717992907, + "learning_rate": 1.4179170238459544e-05, + "loss": 0.6389, + "step": 12460 + }, + { + "epoch": 0.38191124187814146, + "grad_norm": 1.3609922074791059, + "learning_rate": 1.4178268423049017e-05, + "loss": 0.6766, + "step": 12461 + }, + { + "epoch": 0.38194189040088267, + "grad_norm": 1.4678803971926777, + "learning_rate": 1.4177366566468948e-05, + "loss": 0.8259, + "step": 12462 + }, + { + "epoch": 0.38197253892362387, + "grad_norm": 1.5412114124158982, + "learning_rate": 1.4176464668728214e-05, + "loss": 0.8275, + "step": 12463 + }, + { + "epoch": 0.3820031874463651, + "grad_norm": 1.3962118612852041, + "learning_rate": 1.4175562729835706e-05, + "loss": 0.757, + "step": 12464 + }, + { + "epoch": 0.3820338359691063, + "grad_norm": 1.727564928849339, + "learning_rate": 1.4174660749800308e-05, + "loss": 0.7403, + "step": 12465 + }, + { + "epoch": 0.3820644844918475, + "grad_norm": 1.486781338663769, + "learning_rate": 1.417375872863091e-05, + "loss": 0.7889, + "step": 12466 + }, + { + "epoch": 0.3820951330145887, + "grad_norm": 1.6223365684094235, + "learning_rate": 1.41728566663364e-05, + "loss": 0.8501, + "step": 12467 + }, + { + "epoch": 0.3821257815373299, + "grad_norm": 1.4863399188013764, + "learning_rate": 1.4171954562925667e-05, + "loss": 0.6827, + "step": 12468 + }, + { + "epoch": 0.3821564300600711, + "grad_norm": 1.497237875484071, + "learning_rate": 1.4171052418407599e-05, + "loss": 0.7872, + "step": 12469 + }, + { + "epoch": 0.3821870785828123, + "grad_norm": 1.4108762389533331, + "learning_rate": 1.417015023279108e-05, + "loss": 0.7482, + "step": 12470 + }, + { + "epoch": 0.3822177271055535, + "grad_norm": 1.462225876526944, + "learning_rate": 1.4169248006085008e-05, + "loss": 0.7873, + "step": 12471 + }, + { + "epoch": 0.3822483756282947, + "grad_norm": 1.3807334464195729, + "learning_rate": 1.4168345738298267e-05, + "loss": 0.7359, + "step": 12472 + }, + { + "epoch": 0.38227902415103593, + "grad_norm": 1.6451566008834544, + "learning_rate": 1.4167443429439748e-05, + "loss": 0.8024, + "step": 12473 + }, + { + "epoch": 0.38230967267377713, + "grad_norm": 1.3474154822505284, + "learning_rate": 1.4166541079518343e-05, + "loss": 0.7252, + "step": 12474 + }, + { + "epoch": 0.38234032119651834, + "grad_norm": 1.5071720812574199, + "learning_rate": 1.4165638688542945e-05, + "loss": 0.7652, + "step": 12475 + }, + { + "epoch": 0.38237096971925955, + "grad_norm": 1.4797252284450886, + "learning_rate": 1.416473625652244e-05, + "loss": 0.6886, + "step": 12476 + }, + { + "epoch": 0.38240161824200075, + "grad_norm": 1.4759656449082321, + "learning_rate": 1.4163833783465725e-05, + "loss": 0.7013, + "step": 12477 + }, + { + "epoch": 0.38243226676474196, + "grad_norm": 1.3912044768701637, + "learning_rate": 1.4162931269381688e-05, + "loss": 0.7183, + "step": 12478 + }, + { + "epoch": 0.38246291528748316, + "grad_norm": 1.423777865617577, + "learning_rate": 1.4162028714279226e-05, + "loss": 0.7152, + "step": 12479 + }, + { + "epoch": 0.38249356381022437, + "grad_norm": 0.7360331353959223, + "learning_rate": 1.4161126118167232e-05, + "loss": 0.6142, + "step": 12480 + }, + { + "epoch": 0.3825242123329656, + "grad_norm": 1.4737440622042157, + "learning_rate": 1.4160223481054595e-05, + "loss": 0.7258, + "step": 12481 + }, + { + "epoch": 0.3825548608557068, + "grad_norm": 0.6840058005099797, + "learning_rate": 1.4159320802950212e-05, + "loss": 0.633, + "step": 12482 + }, + { + "epoch": 0.382585509378448, + "grad_norm": 1.4111137807417489, + "learning_rate": 1.4158418083862978e-05, + "loss": 0.781, + "step": 12483 + }, + { + "epoch": 0.38261615790118914, + "grad_norm": 1.4585257801071452, + "learning_rate": 1.4157515323801785e-05, + "loss": 0.7942, + "step": 12484 + }, + { + "epoch": 0.38264680642393034, + "grad_norm": 1.417818116069342, + "learning_rate": 1.415661252277553e-05, + "loss": 0.7209, + "step": 12485 + }, + { + "epoch": 0.38267745494667155, + "grad_norm": 0.6961115672292207, + "learning_rate": 1.4155709680793108e-05, + "loss": 0.6065, + "step": 12486 + }, + { + "epoch": 0.38270810346941275, + "grad_norm": 1.4431238778789373, + "learning_rate": 1.4154806797863418e-05, + "loss": 0.8248, + "step": 12487 + }, + { + "epoch": 0.38273875199215396, + "grad_norm": 1.6148862569944713, + "learning_rate": 1.4153903873995351e-05, + "loss": 0.6712, + "step": 12488 + }, + { + "epoch": 0.38276940051489516, + "grad_norm": 1.6022052775174476, + "learning_rate": 1.4153000909197806e-05, + "loss": 0.8289, + "step": 12489 + }, + { + "epoch": 0.38280004903763637, + "grad_norm": 1.5680110118975643, + "learning_rate": 1.4152097903479682e-05, + "loss": 0.8051, + "step": 12490 + }, + { + "epoch": 0.3828306975603776, + "grad_norm": 1.5329947592065287, + "learning_rate": 1.4151194856849877e-05, + "loss": 0.8675, + "step": 12491 + }, + { + "epoch": 0.3828613460831188, + "grad_norm": 1.4437897248221525, + "learning_rate": 1.4150291769317284e-05, + "loss": 0.7554, + "step": 12492 + }, + { + "epoch": 0.38289199460586, + "grad_norm": 1.4397988084168676, + "learning_rate": 1.4149388640890802e-05, + "loss": 0.8497, + "step": 12493 + }, + { + "epoch": 0.3829226431286012, + "grad_norm": 1.3806393006460882, + "learning_rate": 1.4148485471579336e-05, + "loss": 0.7192, + "step": 12494 + }, + { + "epoch": 0.3829532916513424, + "grad_norm": 1.2542797260281782, + "learning_rate": 1.4147582261391781e-05, + "loss": 0.7929, + "step": 12495 + }, + { + "epoch": 0.3829839401740836, + "grad_norm": 0.6687711240547155, + "learning_rate": 1.4146679010337035e-05, + "loss": 0.576, + "step": 12496 + }, + { + "epoch": 0.3830145886968248, + "grad_norm": 1.3948983348272777, + "learning_rate": 1.4145775718424002e-05, + "loss": 0.7887, + "step": 12497 + }, + { + "epoch": 0.383045237219566, + "grad_norm": 0.6980010607422215, + "learning_rate": 1.4144872385661576e-05, + "loss": 0.5771, + "step": 12498 + }, + { + "epoch": 0.3830758857423072, + "grad_norm": 0.6596555016707184, + "learning_rate": 1.4143969012058667e-05, + "loss": 0.6239, + "step": 12499 + }, + { + "epoch": 0.3831065342650484, + "grad_norm": 1.4209724424372623, + "learning_rate": 1.4143065597624168e-05, + "loss": 0.7037, + "step": 12500 + }, + { + "epoch": 0.38313718278778963, + "grad_norm": 1.5000590369186821, + "learning_rate": 1.4142162142366985e-05, + "loss": 0.7976, + "step": 12501 + }, + { + "epoch": 0.38316783131053084, + "grad_norm": 1.2878471147386858, + "learning_rate": 1.4141258646296015e-05, + "loss": 0.6678, + "step": 12502 + }, + { + "epoch": 0.38319847983327204, + "grad_norm": 1.5300211431518964, + "learning_rate": 1.414035510942017e-05, + "loss": 0.6983, + "step": 12503 + }, + { + "epoch": 0.38322912835601325, + "grad_norm": 1.4407149407622428, + "learning_rate": 1.4139451531748341e-05, + "loss": 0.803, + "step": 12504 + }, + { + "epoch": 0.38325977687875445, + "grad_norm": 1.5183185414234222, + "learning_rate": 1.413854791328944e-05, + "loss": 0.7792, + "step": 12505 + }, + { + "epoch": 0.38329042540149566, + "grad_norm": 1.5276698256101666, + "learning_rate": 1.4137644254052366e-05, + "loss": 0.7756, + "step": 12506 + }, + { + "epoch": 0.38332107392423687, + "grad_norm": 1.4835540391174438, + "learning_rate": 1.4136740554046027e-05, + "loss": 0.7837, + "step": 12507 + }, + { + "epoch": 0.38335172244697807, + "grad_norm": 0.7882265017855383, + "learning_rate": 1.4135836813279323e-05, + "loss": 0.5923, + "step": 12508 + }, + { + "epoch": 0.3833823709697193, + "grad_norm": 1.5292893127778064, + "learning_rate": 1.4134933031761162e-05, + "loss": 0.7059, + "step": 12509 + }, + { + "epoch": 0.3834130194924605, + "grad_norm": 1.5196708871768339, + "learning_rate": 1.4134029209500447e-05, + "loss": 0.7591, + "step": 12510 + }, + { + "epoch": 0.3834436680152017, + "grad_norm": 1.4175129845523011, + "learning_rate": 1.4133125346506083e-05, + "loss": 0.7899, + "step": 12511 + }, + { + "epoch": 0.3834743165379429, + "grad_norm": 1.6289411675380463, + "learning_rate": 1.4132221442786977e-05, + "loss": 0.7893, + "step": 12512 + }, + { + "epoch": 0.3835049650606841, + "grad_norm": 1.3929393981962837, + "learning_rate": 1.4131317498352037e-05, + "loss": 0.7182, + "step": 12513 + }, + { + "epoch": 0.3835356135834253, + "grad_norm": 0.6819705781264598, + "learning_rate": 1.4130413513210173e-05, + "loss": 0.592, + "step": 12514 + }, + { + "epoch": 0.38356626210616646, + "grad_norm": 0.683277587508414, + "learning_rate": 1.4129509487370282e-05, + "loss": 0.5882, + "step": 12515 + }, + { + "epoch": 0.38359691062890766, + "grad_norm": 1.5486659289116467, + "learning_rate": 1.4128605420841282e-05, + "loss": 0.8845, + "step": 12516 + }, + { + "epoch": 0.38362755915164887, + "grad_norm": 1.44243511353775, + "learning_rate": 1.4127701313632072e-05, + "loss": 0.7727, + "step": 12517 + }, + { + "epoch": 0.3836582076743901, + "grad_norm": 1.4028721764572183, + "learning_rate": 1.412679716575157e-05, + "loss": 0.7569, + "step": 12518 + }, + { + "epoch": 0.3836888561971313, + "grad_norm": 1.6265040996388047, + "learning_rate": 1.4125892977208673e-05, + "loss": 0.6532, + "step": 12519 + }, + { + "epoch": 0.3837195047198725, + "grad_norm": 1.4028875551592785, + "learning_rate": 1.41249887480123e-05, + "loss": 0.8288, + "step": 12520 + }, + { + "epoch": 0.3837501532426137, + "grad_norm": 1.3440112806278999, + "learning_rate": 1.4124084478171358e-05, + "loss": 0.637, + "step": 12521 + }, + { + "epoch": 0.3837808017653549, + "grad_norm": 1.3758302156267441, + "learning_rate": 1.4123180167694757e-05, + "loss": 0.747, + "step": 12522 + }, + { + "epoch": 0.3838114502880961, + "grad_norm": 1.5608550115602864, + "learning_rate": 1.4122275816591407e-05, + "loss": 0.7906, + "step": 12523 + }, + { + "epoch": 0.3838420988108373, + "grad_norm": 1.5643432039859537, + "learning_rate": 1.4121371424870214e-05, + "loss": 0.7842, + "step": 12524 + }, + { + "epoch": 0.3838727473335785, + "grad_norm": 1.4886356251624326, + "learning_rate": 1.41204669925401e-05, + "loss": 0.8187, + "step": 12525 + }, + { + "epoch": 0.3839033958563197, + "grad_norm": 1.518493941821923, + "learning_rate": 1.4119562519609968e-05, + "loss": 0.775, + "step": 12526 + }, + { + "epoch": 0.3839340443790609, + "grad_norm": 1.5500416767041687, + "learning_rate": 1.4118658006088733e-05, + "loss": 0.786, + "step": 12527 + }, + { + "epoch": 0.38396469290180213, + "grad_norm": 1.435632032242802, + "learning_rate": 1.4117753451985306e-05, + "loss": 0.6582, + "step": 12528 + }, + { + "epoch": 0.38399534142454333, + "grad_norm": 1.3078752764848094, + "learning_rate": 1.41168488573086e-05, + "loss": 0.7628, + "step": 12529 + }, + { + "epoch": 0.38402598994728454, + "grad_norm": 1.4293879087682855, + "learning_rate": 1.4115944222067531e-05, + "loss": 0.7598, + "step": 12530 + }, + { + "epoch": 0.38405663847002575, + "grad_norm": 1.4147726504553282, + "learning_rate": 1.411503954627101e-05, + "loss": 0.5978, + "step": 12531 + }, + { + "epoch": 0.38408728699276695, + "grad_norm": 1.577346948941938, + "learning_rate": 1.4114134829927948e-05, + "loss": 0.7401, + "step": 12532 + }, + { + "epoch": 0.38411793551550816, + "grad_norm": 1.5658608461561356, + "learning_rate": 1.4113230073047265e-05, + "loss": 0.8799, + "step": 12533 + }, + { + "epoch": 0.38414858403824936, + "grad_norm": 1.621395746206812, + "learning_rate": 1.4112325275637877e-05, + "loss": 0.8506, + "step": 12534 + }, + { + "epoch": 0.38417923256099057, + "grad_norm": 1.4371720487175295, + "learning_rate": 1.4111420437708693e-05, + "loss": 0.7307, + "step": 12535 + }, + { + "epoch": 0.3842098810837318, + "grad_norm": 1.535587081617445, + "learning_rate": 1.4110515559268632e-05, + "loss": 0.6824, + "step": 12536 + }, + { + "epoch": 0.384240529606473, + "grad_norm": 1.4430847883343534, + "learning_rate": 1.410961064032661e-05, + "loss": 0.7649, + "step": 12537 + }, + { + "epoch": 0.3842711781292142, + "grad_norm": 0.7399814744062077, + "learning_rate": 1.410870568089154e-05, + "loss": 0.5926, + "step": 12538 + }, + { + "epoch": 0.3843018266519554, + "grad_norm": 1.4734790183273734, + "learning_rate": 1.4107800680972344e-05, + "loss": 0.8172, + "step": 12539 + }, + { + "epoch": 0.3843324751746966, + "grad_norm": 1.444728479117426, + "learning_rate": 1.4106895640577936e-05, + "loss": 0.7776, + "step": 12540 + }, + { + "epoch": 0.3843631236974378, + "grad_norm": 0.7102853384011123, + "learning_rate": 1.4105990559717238e-05, + "loss": 0.6356, + "step": 12541 + }, + { + "epoch": 0.384393772220179, + "grad_norm": 1.3094545085480793, + "learning_rate": 1.410508543839916e-05, + "loss": 0.7183, + "step": 12542 + }, + { + "epoch": 0.3844244207429202, + "grad_norm": 1.3358140322730627, + "learning_rate": 1.4104180276632624e-05, + "loss": 0.6882, + "step": 12543 + }, + { + "epoch": 0.3844550692656614, + "grad_norm": 1.6160332883065338, + "learning_rate": 1.4103275074426552e-05, + "loss": 0.8027, + "step": 12544 + }, + { + "epoch": 0.3844857177884026, + "grad_norm": 1.3452357607293393, + "learning_rate": 1.4102369831789864e-05, + "loss": 0.7975, + "step": 12545 + }, + { + "epoch": 0.3845163663111438, + "grad_norm": 0.7131017333799503, + "learning_rate": 1.4101464548731474e-05, + "loss": 0.6153, + "step": 12546 + }, + { + "epoch": 0.384547014833885, + "grad_norm": 1.6384759021171302, + "learning_rate": 1.4100559225260302e-05, + "loss": 0.7972, + "step": 12547 + }, + { + "epoch": 0.3845776633566262, + "grad_norm": 1.5220809963339401, + "learning_rate": 1.4099653861385271e-05, + "loss": 0.7322, + "step": 12548 + }, + { + "epoch": 0.3846083118793674, + "grad_norm": 1.5880934092385992, + "learning_rate": 1.4098748457115305e-05, + "loss": 0.7337, + "step": 12549 + }, + { + "epoch": 0.3846389604021086, + "grad_norm": 1.4234792758070207, + "learning_rate": 1.4097843012459318e-05, + "loss": 0.8491, + "step": 12550 + }, + { + "epoch": 0.3846696089248498, + "grad_norm": 1.4268300665258844, + "learning_rate": 1.4096937527426237e-05, + "loss": 0.8179, + "step": 12551 + }, + { + "epoch": 0.384700257447591, + "grad_norm": 1.5394375258451405, + "learning_rate": 1.4096032002024984e-05, + "loss": 0.7212, + "step": 12552 + }, + { + "epoch": 0.3847309059703322, + "grad_norm": 1.5087693132009354, + "learning_rate": 1.4095126436264476e-05, + "loss": 0.6937, + "step": 12553 + }, + { + "epoch": 0.3847615544930734, + "grad_norm": 0.6982684438553382, + "learning_rate": 1.4094220830153642e-05, + "loss": 0.6027, + "step": 12554 + }, + { + "epoch": 0.3847922030158146, + "grad_norm": 1.3542872862466615, + "learning_rate": 1.40933151837014e-05, + "loss": 0.6669, + "step": 12555 + }, + { + "epoch": 0.38482285153855583, + "grad_norm": 1.634527383748169, + "learning_rate": 1.409240949691668e-05, + "loss": 0.864, + "step": 12556 + }, + { + "epoch": 0.38485350006129704, + "grad_norm": 1.5677530670519768, + "learning_rate": 1.4091503769808402e-05, + "loss": 0.7027, + "step": 12557 + }, + { + "epoch": 0.38488414858403824, + "grad_norm": 1.496544204932032, + "learning_rate": 1.4090598002385487e-05, + "loss": 0.6059, + "step": 12558 + }, + { + "epoch": 0.38491479710677945, + "grad_norm": 1.4467242380077994, + "learning_rate": 1.4089692194656865e-05, + "loss": 0.7303, + "step": 12559 + }, + { + "epoch": 0.38494544562952066, + "grad_norm": 1.556320804832522, + "learning_rate": 1.4088786346631457e-05, + "loss": 0.8109, + "step": 12560 + }, + { + "epoch": 0.38497609415226186, + "grad_norm": 1.2930630501595408, + "learning_rate": 1.4087880458318198e-05, + "loss": 0.691, + "step": 12561 + }, + { + "epoch": 0.38500674267500307, + "grad_norm": 1.5513178492554258, + "learning_rate": 1.4086974529726e-05, + "loss": 0.6923, + "step": 12562 + }, + { + "epoch": 0.38503739119774427, + "grad_norm": 1.5707673097883434, + "learning_rate": 1.4086068560863799e-05, + "loss": 0.7303, + "step": 12563 + }, + { + "epoch": 0.3850680397204855, + "grad_norm": 1.692880046801866, + "learning_rate": 1.4085162551740519e-05, + "loss": 0.7078, + "step": 12564 + }, + { + "epoch": 0.3850986882432267, + "grad_norm": 1.5113540384157813, + "learning_rate": 1.4084256502365086e-05, + "loss": 0.7921, + "step": 12565 + }, + { + "epoch": 0.3851293367659679, + "grad_norm": 1.380601038439958, + "learning_rate": 1.408335041274643e-05, + "loss": 0.8041, + "step": 12566 + }, + { + "epoch": 0.3851599852887091, + "grad_norm": 1.3435099908844872, + "learning_rate": 1.4082444282893474e-05, + "loss": 0.7411, + "step": 12567 + }, + { + "epoch": 0.3851906338114503, + "grad_norm": 1.4439275662839643, + "learning_rate": 1.4081538112815159e-05, + "loss": 0.8462, + "step": 12568 + }, + { + "epoch": 0.3852212823341915, + "grad_norm": 1.3390326069805731, + "learning_rate": 1.4080631902520397e-05, + "loss": 0.6659, + "step": 12569 + }, + { + "epoch": 0.3852519308569327, + "grad_norm": 1.589474122178421, + "learning_rate": 1.4079725652018126e-05, + "loss": 0.6589, + "step": 12570 + }, + { + "epoch": 0.3852825793796739, + "grad_norm": 1.6253960531621678, + "learning_rate": 1.4078819361317272e-05, + "loss": 0.7888, + "step": 12571 + }, + { + "epoch": 0.3853132279024151, + "grad_norm": 1.6666957604929848, + "learning_rate": 1.4077913030426774e-05, + "loss": 0.8086, + "step": 12572 + }, + { + "epoch": 0.38534387642515633, + "grad_norm": 1.5508376219251734, + "learning_rate": 1.407700665935555e-05, + "loss": 0.8063, + "step": 12573 + }, + { + "epoch": 0.38537452494789753, + "grad_norm": 1.3888492935292254, + "learning_rate": 1.407610024811254e-05, + "loss": 0.7671, + "step": 12574 + }, + { + "epoch": 0.38540517347063874, + "grad_norm": 1.3664625574001956, + "learning_rate": 1.4075193796706665e-05, + "loss": 0.8173, + "step": 12575 + }, + { + "epoch": 0.38543582199337995, + "grad_norm": 0.6803661889239844, + "learning_rate": 1.407428730514687e-05, + "loss": 0.6323, + "step": 12576 + }, + { + "epoch": 0.3854664705161211, + "grad_norm": 1.6362028707733667, + "learning_rate": 1.4073380773442076e-05, + "loss": 0.6777, + "step": 12577 + }, + { + "epoch": 0.3854971190388623, + "grad_norm": 1.406151403814568, + "learning_rate": 1.4072474201601221e-05, + "loss": 0.5684, + "step": 12578 + }, + { + "epoch": 0.3855277675616035, + "grad_norm": 1.5037824499297834, + "learning_rate": 1.4071567589633232e-05, + "loss": 0.754, + "step": 12579 + }, + { + "epoch": 0.3855584160843447, + "grad_norm": 1.4365289245731385, + "learning_rate": 1.4070660937547048e-05, + "loss": 0.6972, + "step": 12580 + }, + { + "epoch": 0.3855890646070859, + "grad_norm": 1.5850147565327666, + "learning_rate": 1.4069754245351602e-05, + "loss": 0.7795, + "step": 12581 + }, + { + "epoch": 0.3856197131298271, + "grad_norm": 1.4722956694115497, + "learning_rate": 1.4068847513055823e-05, + "loss": 0.7219, + "step": 12582 + }, + { + "epoch": 0.38565036165256833, + "grad_norm": 1.3685757914819734, + "learning_rate": 1.406794074066865e-05, + "loss": 0.6895, + "step": 12583 + }, + { + "epoch": 0.38568101017530954, + "grad_norm": 1.3622425435261742, + "learning_rate": 1.4067033928199017e-05, + "loss": 0.7392, + "step": 12584 + }, + { + "epoch": 0.38571165869805074, + "grad_norm": 1.7352903941747813, + "learning_rate": 1.4066127075655858e-05, + "loss": 0.7536, + "step": 12585 + }, + { + "epoch": 0.38574230722079195, + "grad_norm": 1.561368201889531, + "learning_rate": 1.4065220183048104e-05, + "loss": 0.7378, + "step": 12586 + }, + { + "epoch": 0.38577295574353315, + "grad_norm": 1.451261950358667, + "learning_rate": 1.4064313250384705e-05, + "loss": 0.6866, + "step": 12587 + }, + { + "epoch": 0.38580360426627436, + "grad_norm": 1.5436242069680624, + "learning_rate": 1.4063406277674578e-05, + "loss": 0.8295, + "step": 12588 + }, + { + "epoch": 0.38583425278901556, + "grad_norm": 1.3945358752534167, + "learning_rate": 1.4062499264926675e-05, + "loss": 0.7849, + "step": 12589 + }, + { + "epoch": 0.38586490131175677, + "grad_norm": 1.4393871374018306, + "learning_rate": 1.4061592212149924e-05, + "loss": 0.7641, + "step": 12590 + }, + { + "epoch": 0.385895549834498, + "grad_norm": 1.4171314006596427, + "learning_rate": 1.4060685119353266e-05, + "loss": 0.6783, + "step": 12591 + }, + { + "epoch": 0.3859261983572392, + "grad_norm": 1.391749805363017, + "learning_rate": 1.4059777986545643e-05, + "loss": 0.7612, + "step": 12592 + }, + { + "epoch": 0.3859568468799804, + "grad_norm": 1.5485026080738578, + "learning_rate": 1.405887081373598e-05, + "loss": 0.7334, + "step": 12593 + }, + { + "epoch": 0.3859874954027216, + "grad_norm": 0.7192637232483822, + "learning_rate": 1.4057963600933234e-05, + "loss": 0.6442, + "step": 12594 + }, + { + "epoch": 0.3860181439254628, + "grad_norm": 1.3427269857783182, + "learning_rate": 1.405705634814633e-05, + "loss": 0.7915, + "step": 12595 + }, + { + "epoch": 0.386048792448204, + "grad_norm": 1.5718441997789907, + "learning_rate": 1.4056149055384211e-05, + "loss": 0.8636, + "step": 12596 + }, + { + "epoch": 0.3860794409709452, + "grad_norm": 1.3784562462165864, + "learning_rate": 1.4055241722655816e-05, + "loss": 0.8583, + "step": 12597 + }, + { + "epoch": 0.3861100894936864, + "grad_norm": 1.5181035605017277, + "learning_rate": 1.4054334349970092e-05, + "loss": 0.7684, + "step": 12598 + }, + { + "epoch": 0.3861407380164276, + "grad_norm": 1.6004646612740874, + "learning_rate": 1.405342693733597e-05, + "loss": 0.8376, + "step": 12599 + }, + { + "epoch": 0.3861713865391688, + "grad_norm": 1.575472023302893, + "learning_rate": 1.4052519484762399e-05, + "loss": 0.7917, + "step": 12600 + }, + { + "epoch": 0.38620203506191003, + "grad_norm": 1.459714312449, + "learning_rate": 1.4051611992258311e-05, + "loss": 0.7191, + "step": 12601 + }, + { + "epoch": 0.38623268358465124, + "grad_norm": 1.216355348822469, + "learning_rate": 1.4050704459832657e-05, + "loss": 0.6097, + "step": 12602 + }, + { + "epoch": 0.38626333210739244, + "grad_norm": 0.689205933148041, + "learning_rate": 1.4049796887494378e-05, + "loss": 0.5878, + "step": 12603 + }, + { + "epoch": 0.38629398063013365, + "grad_norm": 1.4505273382057544, + "learning_rate": 1.4048889275252411e-05, + "loss": 0.7988, + "step": 12604 + }, + { + "epoch": 0.38632462915287485, + "grad_norm": 1.526661793768599, + "learning_rate": 1.4047981623115701e-05, + "loss": 0.6459, + "step": 12605 + }, + { + "epoch": 0.38635527767561606, + "grad_norm": 1.5258421463859337, + "learning_rate": 1.4047073931093196e-05, + "loss": 0.7624, + "step": 12606 + }, + { + "epoch": 0.38638592619835727, + "grad_norm": 0.6555935008584154, + "learning_rate": 1.4046166199193834e-05, + "loss": 0.5968, + "step": 12607 + }, + { + "epoch": 0.3864165747210984, + "grad_norm": 1.565547618753599, + "learning_rate": 1.4045258427426558e-05, + "loss": 0.7749, + "step": 12608 + }, + { + "epoch": 0.3864472232438396, + "grad_norm": 1.3862102242242158, + "learning_rate": 1.4044350615800319e-05, + "loss": 0.7253, + "step": 12609 + }, + { + "epoch": 0.3864778717665808, + "grad_norm": 1.3340556670654002, + "learning_rate": 1.4043442764324058e-05, + "loss": 0.665, + "step": 12610 + }, + { + "epoch": 0.38650852028932203, + "grad_norm": 1.4396261641818666, + "learning_rate": 1.4042534873006724e-05, + "loss": 0.7426, + "step": 12611 + }, + { + "epoch": 0.38653916881206324, + "grad_norm": 1.588359804633327, + "learning_rate": 1.4041626941857253e-05, + "loss": 0.7328, + "step": 12612 + }, + { + "epoch": 0.38656981733480444, + "grad_norm": 1.619876134300774, + "learning_rate": 1.4040718970884604e-05, + "loss": 0.7798, + "step": 12613 + }, + { + "epoch": 0.38660046585754565, + "grad_norm": 1.4961737782434565, + "learning_rate": 1.4039810960097715e-05, + "loss": 0.7436, + "step": 12614 + }, + { + "epoch": 0.38663111438028686, + "grad_norm": 1.5468530701624872, + "learning_rate": 1.4038902909505534e-05, + "loss": 0.7436, + "step": 12615 + }, + { + "epoch": 0.38666176290302806, + "grad_norm": 1.3271087947042288, + "learning_rate": 1.4037994819117008e-05, + "loss": 0.8301, + "step": 12616 + }, + { + "epoch": 0.38669241142576927, + "grad_norm": 1.4232548296932896, + "learning_rate": 1.4037086688941088e-05, + "loss": 0.8099, + "step": 12617 + }, + { + "epoch": 0.3867230599485105, + "grad_norm": 1.4171074708252722, + "learning_rate": 1.403617851898672e-05, + "loss": 0.7908, + "step": 12618 + }, + { + "epoch": 0.3867537084712517, + "grad_norm": 1.4227580207690143, + "learning_rate": 1.4035270309262851e-05, + "loss": 0.8422, + "step": 12619 + }, + { + "epoch": 0.3867843569939929, + "grad_norm": 1.4076335279467653, + "learning_rate": 1.4034362059778432e-05, + "loss": 0.6729, + "step": 12620 + }, + { + "epoch": 0.3868150055167341, + "grad_norm": 1.4622869202092168, + "learning_rate": 1.4033453770542411e-05, + "loss": 0.7579, + "step": 12621 + }, + { + "epoch": 0.3868456540394753, + "grad_norm": 0.6792914521165881, + "learning_rate": 1.403254544156374e-05, + "loss": 0.618, + "step": 12622 + }, + { + "epoch": 0.3868763025622165, + "grad_norm": 1.3606536300204768, + "learning_rate": 1.4031637072851365e-05, + "loss": 0.8109, + "step": 12623 + }, + { + "epoch": 0.3869069510849577, + "grad_norm": 1.3060109233079977, + "learning_rate": 1.4030728664414239e-05, + "loss": 0.7147, + "step": 12624 + }, + { + "epoch": 0.3869375996076989, + "grad_norm": 1.5229731458209914, + "learning_rate": 1.402982021626131e-05, + "loss": 0.7602, + "step": 12625 + }, + { + "epoch": 0.3869682481304401, + "grad_norm": 1.5943841486849306, + "learning_rate": 1.4028911728401537e-05, + "loss": 0.7453, + "step": 12626 + }, + { + "epoch": 0.3869988966531813, + "grad_norm": 0.700564378561628, + "learning_rate": 1.402800320084386e-05, + "loss": 0.6064, + "step": 12627 + }, + { + "epoch": 0.38702954517592253, + "grad_norm": 1.6055890869093286, + "learning_rate": 1.402709463359724e-05, + "loss": 0.8543, + "step": 12628 + }, + { + "epoch": 0.38706019369866373, + "grad_norm": 1.3826688683356818, + "learning_rate": 1.4026186026670624e-05, + "loss": 0.7585, + "step": 12629 + }, + { + "epoch": 0.38709084222140494, + "grad_norm": 1.5132688604008488, + "learning_rate": 1.402527738007297e-05, + "loss": 0.628, + "step": 12630 + }, + { + "epoch": 0.38712149074414615, + "grad_norm": 1.3943905467584972, + "learning_rate": 1.4024368693813223e-05, + "loss": 0.7213, + "step": 12631 + }, + { + "epoch": 0.38715213926688735, + "grad_norm": 1.5383580220190851, + "learning_rate": 1.4023459967900348e-05, + "loss": 0.7837, + "step": 12632 + }, + { + "epoch": 0.38718278778962856, + "grad_norm": 0.6958412973649221, + "learning_rate": 1.4022551202343286e-05, + "loss": 0.6263, + "step": 12633 + }, + { + "epoch": 0.38721343631236976, + "grad_norm": 1.465018473421422, + "learning_rate": 1.4021642397151002e-05, + "loss": 0.7158, + "step": 12634 + }, + { + "epoch": 0.38724408483511097, + "grad_norm": 1.4358207647029178, + "learning_rate": 1.4020733552332448e-05, + "loss": 0.8006, + "step": 12635 + }, + { + "epoch": 0.3872747333578522, + "grad_norm": 1.6515434303806076, + "learning_rate": 1.4019824667896573e-05, + "loss": 0.7809, + "step": 12636 + }, + { + "epoch": 0.3873053818805934, + "grad_norm": 1.3289720509771874, + "learning_rate": 1.4018915743852339e-05, + "loss": 0.7278, + "step": 12637 + }, + { + "epoch": 0.3873360304033346, + "grad_norm": 1.3475090815836672, + "learning_rate": 1.4018006780208702e-05, + "loss": 0.7594, + "step": 12638 + }, + { + "epoch": 0.38736667892607574, + "grad_norm": 1.396631130111955, + "learning_rate": 1.4017097776974615e-05, + "loss": 0.7731, + "step": 12639 + }, + { + "epoch": 0.38739732744881694, + "grad_norm": 1.4336998314351215, + "learning_rate": 1.4016188734159033e-05, + "loss": 0.6472, + "step": 12640 + }, + { + "epoch": 0.38742797597155815, + "grad_norm": 1.3537989502356673, + "learning_rate": 1.401527965177092e-05, + "loss": 0.7465, + "step": 12641 + }, + { + "epoch": 0.38745862449429935, + "grad_norm": 1.547323765196229, + "learning_rate": 1.4014370529819226e-05, + "loss": 0.7049, + "step": 12642 + }, + { + "epoch": 0.38748927301704056, + "grad_norm": 1.417712219553992, + "learning_rate": 1.4013461368312913e-05, + "loss": 0.7597, + "step": 12643 + }, + { + "epoch": 0.38751992153978176, + "grad_norm": 0.7086739833906491, + "learning_rate": 1.4012552167260937e-05, + "loss": 0.6444, + "step": 12644 + }, + { + "epoch": 0.38755057006252297, + "grad_norm": 1.362048909223163, + "learning_rate": 1.401164292667226e-05, + "loss": 0.7038, + "step": 12645 + }, + { + "epoch": 0.3875812185852642, + "grad_norm": 1.4677629794681155, + "learning_rate": 1.4010733646555839e-05, + "loss": 0.7343, + "step": 12646 + }, + { + "epoch": 0.3876118671080054, + "grad_norm": 1.4102449985393852, + "learning_rate": 1.4009824326920631e-05, + "loss": 0.6749, + "step": 12647 + }, + { + "epoch": 0.3876425156307466, + "grad_norm": 1.4393605483713232, + "learning_rate": 1.4008914967775597e-05, + "loss": 0.6917, + "step": 12648 + }, + { + "epoch": 0.3876731641534878, + "grad_norm": 0.7016137699854946, + "learning_rate": 1.4008005569129703e-05, + "loss": 0.6095, + "step": 12649 + }, + { + "epoch": 0.387703812676229, + "grad_norm": 1.477595898270199, + "learning_rate": 1.4007096130991901e-05, + "loss": 0.7282, + "step": 12650 + }, + { + "epoch": 0.3877344611989702, + "grad_norm": 1.5822631552794522, + "learning_rate": 1.4006186653371156e-05, + "loss": 0.7186, + "step": 12651 + }, + { + "epoch": 0.3877651097217114, + "grad_norm": 1.3953597312362143, + "learning_rate": 1.4005277136276429e-05, + "loss": 0.8114, + "step": 12652 + }, + { + "epoch": 0.3877957582444526, + "grad_norm": 1.301002132092235, + "learning_rate": 1.4004367579716682e-05, + "loss": 0.7659, + "step": 12653 + }, + { + "epoch": 0.3878264067671938, + "grad_norm": 1.4503239407505342, + "learning_rate": 1.400345798370088e-05, + "loss": 0.728, + "step": 12654 + }, + { + "epoch": 0.387857055289935, + "grad_norm": 1.4625522510291555, + "learning_rate": 1.4002548348237977e-05, + "loss": 0.7205, + "step": 12655 + }, + { + "epoch": 0.38788770381267623, + "grad_norm": 1.3459393188501954, + "learning_rate": 1.4001638673336941e-05, + "loss": 0.7206, + "step": 12656 + }, + { + "epoch": 0.38791835233541744, + "grad_norm": 1.5580573313421084, + "learning_rate": 1.400072895900674e-05, + "loss": 0.7847, + "step": 12657 + }, + { + "epoch": 0.38794900085815864, + "grad_norm": 1.3968539496948114, + "learning_rate": 1.3999819205256329e-05, + "loss": 0.732, + "step": 12658 + }, + { + "epoch": 0.38797964938089985, + "grad_norm": 1.556266578321872, + "learning_rate": 1.3998909412094675e-05, + "loss": 0.6819, + "step": 12659 + }, + { + "epoch": 0.38801029790364105, + "grad_norm": 1.5185450191984549, + "learning_rate": 1.3997999579530745e-05, + "loss": 0.7981, + "step": 12660 + }, + { + "epoch": 0.38804094642638226, + "grad_norm": 1.4263097287673714, + "learning_rate": 1.3997089707573506e-05, + "loss": 0.8203, + "step": 12661 + }, + { + "epoch": 0.38807159494912347, + "grad_norm": 0.6911959304194236, + "learning_rate": 1.3996179796231912e-05, + "loss": 0.5773, + "step": 12662 + }, + { + "epoch": 0.38810224347186467, + "grad_norm": 1.3601303845088006, + "learning_rate": 1.399526984551494e-05, + "loss": 0.7599, + "step": 12663 + }, + { + "epoch": 0.3881328919946059, + "grad_norm": 1.3622594552025804, + "learning_rate": 1.3994359855431554e-05, + "loss": 0.7974, + "step": 12664 + }, + { + "epoch": 0.3881635405173471, + "grad_norm": 1.437448941181341, + "learning_rate": 1.3993449825990717e-05, + "loss": 0.6998, + "step": 12665 + }, + { + "epoch": 0.3881941890400883, + "grad_norm": 1.605927997007323, + "learning_rate": 1.3992539757201395e-05, + "loss": 0.7427, + "step": 12666 + }, + { + "epoch": 0.3882248375628295, + "grad_norm": 1.6947893349901555, + "learning_rate": 1.399162964907256e-05, + "loss": 0.8797, + "step": 12667 + }, + { + "epoch": 0.3882554860855707, + "grad_norm": 1.4138153363819574, + "learning_rate": 1.3990719501613174e-05, + "loss": 0.7859, + "step": 12668 + }, + { + "epoch": 0.3882861346083119, + "grad_norm": 1.4160082037949506, + "learning_rate": 1.398980931483221e-05, + "loss": 0.7214, + "step": 12669 + }, + { + "epoch": 0.38831678313105306, + "grad_norm": 1.3312207704247112, + "learning_rate": 1.3988899088738632e-05, + "loss": 0.8253, + "step": 12670 + }, + { + "epoch": 0.38834743165379426, + "grad_norm": 1.4774663231601886, + "learning_rate": 1.3987988823341411e-05, + "loss": 0.7701, + "step": 12671 + }, + { + "epoch": 0.38837808017653547, + "grad_norm": 1.4743181474060885, + "learning_rate": 1.3987078518649519e-05, + "loss": 0.7351, + "step": 12672 + }, + { + "epoch": 0.3884087286992767, + "grad_norm": 0.7031090872241664, + "learning_rate": 1.398616817467192e-05, + "loss": 0.6055, + "step": 12673 + }, + { + "epoch": 0.3884393772220179, + "grad_norm": 1.6355299001090116, + "learning_rate": 1.3985257791417584e-05, + "loss": 0.8741, + "step": 12674 + }, + { + "epoch": 0.3884700257447591, + "grad_norm": 1.5202544287128792, + "learning_rate": 1.3984347368895486e-05, + "loss": 0.8213, + "step": 12675 + }, + { + "epoch": 0.3885006742675003, + "grad_norm": 1.380319894857856, + "learning_rate": 1.3983436907114594e-05, + "loss": 0.7996, + "step": 12676 + }, + { + "epoch": 0.3885313227902415, + "grad_norm": 1.625980413205227, + "learning_rate": 1.3982526406083876e-05, + "loss": 0.7005, + "step": 12677 + }, + { + "epoch": 0.3885619713129827, + "grad_norm": 1.4084286869272578, + "learning_rate": 1.3981615865812308e-05, + "loss": 0.7216, + "step": 12678 + }, + { + "epoch": 0.3885926198357239, + "grad_norm": 1.5553341918639803, + "learning_rate": 1.398070528630886e-05, + "loss": 0.8085, + "step": 12679 + }, + { + "epoch": 0.3886232683584651, + "grad_norm": 0.6769090402072772, + "learning_rate": 1.3979794667582507e-05, + "loss": 0.5952, + "step": 12680 + }, + { + "epoch": 0.3886539168812063, + "grad_norm": 1.4739937158185739, + "learning_rate": 1.3978884009642215e-05, + "loss": 0.7593, + "step": 12681 + }, + { + "epoch": 0.3886845654039475, + "grad_norm": 1.4762518862122413, + "learning_rate": 1.3977973312496965e-05, + "loss": 0.6881, + "step": 12682 + }, + { + "epoch": 0.38871521392668873, + "grad_norm": 1.3355964465573484, + "learning_rate": 1.397706257615572e-05, + "loss": 0.7416, + "step": 12683 + }, + { + "epoch": 0.38874586244942994, + "grad_norm": 1.337565164254058, + "learning_rate": 1.3976151800627467e-05, + "loss": 0.605, + "step": 12684 + }, + { + "epoch": 0.38877651097217114, + "grad_norm": 1.3383273162444251, + "learning_rate": 1.3975240985921167e-05, + "loss": 0.7099, + "step": 12685 + }, + { + "epoch": 0.38880715949491235, + "grad_norm": 1.2420784947691368, + "learning_rate": 1.3974330132045804e-05, + "loss": 0.7486, + "step": 12686 + }, + { + "epoch": 0.38883780801765355, + "grad_norm": 1.339717177726748, + "learning_rate": 1.3973419239010346e-05, + "loss": 0.6764, + "step": 12687 + }, + { + "epoch": 0.38886845654039476, + "grad_norm": 1.4929508172955968, + "learning_rate": 1.3972508306823776e-05, + "loss": 0.694, + "step": 12688 + }, + { + "epoch": 0.38889910506313596, + "grad_norm": 1.575642563043459, + "learning_rate": 1.3971597335495061e-05, + "loss": 0.6975, + "step": 12689 + }, + { + "epoch": 0.38892975358587717, + "grad_norm": 1.3516522418589316, + "learning_rate": 1.3970686325033183e-05, + "loss": 0.7657, + "step": 12690 + }, + { + "epoch": 0.3889604021086184, + "grad_norm": 1.670661145103419, + "learning_rate": 1.396977527544712e-05, + "loss": 0.6883, + "step": 12691 + }, + { + "epoch": 0.3889910506313596, + "grad_norm": 1.3709231554431542, + "learning_rate": 1.3968864186745841e-05, + "loss": 0.7559, + "step": 12692 + }, + { + "epoch": 0.3890216991541008, + "grad_norm": 1.5760954276997783, + "learning_rate": 1.396795305893833e-05, + "loss": 0.705, + "step": 12693 + }, + { + "epoch": 0.389052347676842, + "grad_norm": 1.3195059496608472, + "learning_rate": 1.3967041892033559e-05, + "loss": 0.6503, + "step": 12694 + }, + { + "epoch": 0.3890829961995832, + "grad_norm": 1.5327357383929763, + "learning_rate": 1.3966130686040516e-05, + "loss": 0.6658, + "step": 12695 + }, + { + "epoch": 0.3891136447223244, + "grad_norm": 1.3378087370048284, + "learning_rate": 1.3965219440968165e-05, + "loss": 0.7954, + "step": 12696 + }, + { + "epoch": 0.3891442932450656, + "grad_norm": 1.4915470074992623, + "learning_rate": 1.3964308156825497e-05, + "loss": 0.7662, + "step": 12697 + }, + { + "epoch": 0.3891749417678068, + "grad_norm": 0.6662813202783996, + "learning_rate": 1.3963396833621483e-05, + "loss": 0.6001, + "step": 12698 + }, + { + "epoch": 0.389205590290548, + "grad_norm": 0.6876257228113773, + "learning_rate": 1.3962485471365109e-05, + "loss": 0.5934, + "step": 12699 + }, + { + "epoch": 0.3892362388132892, + "grad_norm": 1.3765259741967626, + "learning_rate": 1.3961574070065352e-05, + "loss": 0.795, + "step": 12700 + }, + { + "epoch": 0.3892668873360304, + "grad_norm": 1.4114912023248165, + "learning_rate": 1.3960662629731193e-05, + "loss": 0.6809, + "step": 12701 + }, + { + "epoch": 0.3892975358587716, + "grad_norm": 1.5901017758368112, + "learning_rate": 1.3959751150371605e-05, + "loss": 0.783, + "step": 12702 + }, + { + "epoch": 0.3893281843815128, + "grad_norm": 0.6997798821702713, + "learning_rate": 1.3958839631995583e-05, + "loss": 0.5989, + "step": 12703 + }, + { + "epoch": 0.389358832904254, + "grad_norm": 1.4464076049825805, + "learning_rate": 1.3957928074612097e-05, + "loss": 0.7529, + "step": 12704 + }, + { + "epoch": 0.3893894814269952, + "grad_norm": 1.4247871783118993, + "learning_rate": 1.3957016478230134e-05, + "loss": 0.7797, + "step": 12705 + }, + { + "epoch": 0.3894201299497364, + "grad_norm": 0.7052448876806708, + "learning_rate": 1.3956104842858675e-05, + "loss": 0.6135, + "step": 12706 + }, + { + "epoch": 0.3894507784724776, + "grad_norm": 1.3222685898531712, + "learning_rate": 1.3955193168506704e-05, + "loss": 0.743, + "step": 12707 + }, + { + "epoch": 0.3894814269952188, + "grad_norm": 0.66822196691994, + "learning_rate": 1.3954281455183203e-05, + "loss": 0.6058, + "step": 12708 + }, + { + "epoch": 0.38951207551796, + "grad_norm": 0.6784050030072014, + "learning_rate": 1.3953369702897153e-05, + "loss": 0.5887, + "step": 12709 + }, + { + "epoch": 0.3895427240407012, + "grad_norm": 1.5207247285837864, + "learning_rate": 1.3952457911657542e-05, + "loss": 0.8258, + "step": 12710 + }, + { + "epoch": 0.38957337256344243, + "grad_norm": 1.4839950241173308, + "learning_rate": 1.395154608147335e-05, + "loss": 0.7962, + "step": 12711 + }, + { + "epoch": 0.38960402108618364, + "grad_norm": 1.4273363855650052, + "learning_rate": 1.3950634212353567e-05, + "loss": 0.7618, + "step": 12712 + }, + { + "epoch": 0.38963466960892484, + "grad_norm": 1.511871988289008, + "learning_rate": 1.3949722304307169e-05, + "loss": 0.7188, + "step": 12713 + }, + { + "epoch": 0.38966531813166605, + "grad_norm": 1.2644355255875703, + "learning_rate": 1.394881035734315e-05, + "loss": 0.6906, + "step": 12714 + }, + { + "epoch": 0.38969596665440726, + "grad_norm": 1.4137537478096487, + "learning_rate": 1.3947898371470492e-05, + "loss": 0.6926, + "step": 12715 + }, + { + "epoch": 0.38972661517714846, + "grad_norm": 1.5301622081278026, + "learning_rate": 1.3946986346698179e-05, + "loss": 0.7522, + "step": 12716 + }, + { + "epoch": 0.38975726369988967, + "grad_norm": 1.3736454922502217, + "learning_rate": 1.3946074283035203e-05, + "loss": 0.7008, + "step": 12717 + }, + { + "epoch": 0.3897879122226309, + "grad_norm": 1.394506350604062, + "learning_rate": 1.3945162180490545e-05, + "loss": 0.704, + "step": 12718 + }, + { + "epoch": 0.3898185607453721, + "grad_norm": 1.5533846554049233, + "learning_rate": 1.3944250039073197e-05, + "loss": 0.7828, + "step": 12719 + }, + { + "epoch": 0.3898492092681133, + "grad_norm": 1.648993565234602, + "learning_rate": 1.3943337858792142e-05, + "loss": 0.7454, + "step": 12720 + }, + { + "epoch": 0.3898798577908545, + "grad_norm": 1.9338634550713598, + "learning_rate": 1.394242563965637e-05, + "loss": 0.7415, + "step": 12721 + }, + { + "epoch": 0.3899105063135957, + "grad_norm": 0.8192203329325325, + "learning_rate": 1.3941513381674871e-05, + "loss": 0.6161, + "step": 12722 + }, + { + "epoch": 0.3899411548363369, + "grad_norm": 0.7494262631493807, + "learning_rate": 1.394060108485663e-05, + "loss": 0.5702, + "step": 12723 + }, + { + "epoch": 0.3899718033590781, + "grad_norm": 1.367835874467151, + "learning_rate": 1.393968874921064e-05, + "loss": 0.6832, + "step": 12724 + }, + { + "epoch": 0.3900024518818193, + "grad_norm": 0.6696785255571233, + "learning_rate": 1.3938776374745887e-05, + "loss": 0.5875, + "step": 12725 + }, + { + "epoch": 0.3900331004045605, + "grad_norm": 1.5864605767058648, + "learning_rate": 1.3937863961471365e-05, + "loss": 0.6336, + "step": 12726 + }, + { + "epoch": 0.3900637489273017, + "grad_norm": 1.4044185712107005, + "learning_rate": 1.3936951509396063e-05, + "loss": 0.7225, + "step": 12727 + }, + { + "epoch": 0.39009439745004293, + "grad_norm": 1.3951072475967603, + "learning_rate": 1.3936039018528966e-05, + "loss": 0.7746, + "step": 12728 + }, + { + "epoch": 0.39012504597278413, + "grad_norm": 1.606723669834499, + "learning_rate": 1.3935126488879075e-05, + "loss": 0.7598, + "step": 12729 + }, + { + "epoch": 0.39015569449552534, + "grad_norm": 1.5885407675962717, + "learning_rate": 1.3934213920455371e-05, + "loss": 0.7855, + "step": 12730 + }, + { + "epoch": 0.39018634301826655, + "grad_norm": 0.8122180236135331, + "learning_rate": 1.3933301313266848e-05, + "loss": 0.6097, + "step": 12731 + }, + { + "epoch": 0.3902169915410077, + "grad_norm": 1.6308677926445871, + "learning_rate": 1.3932388667322508e-05, + "loss": 0.7603, + "step": 12732 + }, + { + "epoch": 0.3902476400637489, + "grad_norm": 1.654408858318984, + "learning_rate": 1.3931475982631333e-05, + "loss": 0.7272, + "step": 12733 + }, + { + "epoch": 0.3902782885864901, + "grad_norm": 1.3718054881597106, + "learning_rate": 1.3930563259202321e-05, + "loss": 0.778, + "step": 12734 + }, + { + "epoch": 0.3903089371092313, + "grad_norm": 1.5020936508882299, + "learning_rate": 1.3929650497044461e-05, + "loss": 0.7582, + "step": 12735 + }, + { + "epoch": 0.3903395856319725, + "grad_norm": 1.5149467063729427, + "learning_rate": 1.3928737696166749e-05, + "loss": 0.6883, + "step": 12736 + }, + { + "epoch": 0.3903702341547137, + "grad_norm": 1.6010724353250156, + "learning_rate": 1.392782485657818e-05, + "loss": 0.7309, + "step": 12737 + }, + { + "epoch": 0.39040088267745493, + "grad_norm": 0.7378297340173291, + "learning_rate": 1.3926911978287752e-05, + "loss": 0.5914, + "step": 12738 + }, + { + "epoch": 0.39043153120019614, + "grad_norm": 0.674191791567954, + "learning_rate": 1.3925999061304449e-05, + "loss": 0.5771, + "step": 12739 + }, + { + "epoch": 0.39046217972293734, + "grad_norm": 1.5058295977339675, + "learning_rate": 1.3925086105637275e-05, + "loss": 0.6886, + "step": 12740 + }, + { + "epoch": 0.39049282824567855, + "grad_norm": 0.6609641870764071, + "learning_rate": 1.3924173111295227e-05, + "loss": 0.6076, + "step": 12741 + }, + { + "epoch": 0.39052347676841975, + "grad_norm": 1.5109222692657847, + "learning_rate": 1.3923260078287291e-05, + "loss": 0.715, + "step": 12742 + }, + { + "epoch": 0.39055412529116096, + "grad_norm": 1.4772692593054872, + "learning_rate": 1.3922347006622474e-05, + "loss": 0.777, + "step": 12743 + }, + { + "epoch": 0.39058477381390216, + "grad_norm": 0.7005446458892506, + "learning_rate": 1.3921433896309769e-05, + "loss": 0.5846, + "step": 12744 + }, + { + "epoch": 0.39061542233664337, + "grad_norm": 1.4694367115194598, + "learning_rate": 1.392052074735817e-05, + "loss": 0.7923, + "step": 12745 + }, + { + "epoch": 0.3906460708593846, + "grad_norm": 1.5780884065189755, + "learning_rate": 1.3919607559776676e-05, + "loss": 0.7682, + "step": 12746 + }, + { + "epoch": 0.3906767193821258, + "grad_norm": 1.3976579451737137, + "learning_rate": 1.3918694333574288e-05, + "loss": 0.7182, + "step": 12747 + }, + { + "epoch": 0.390707367904867, + "grad_norm": 1.3831661738277101, + "learning_rate": 1.391778106876e-05, + "loss": 0.6251, + "step": 12748 + }, + { + "epoch": 0.3907380164276082, + "grad_norm": 1.3793893054478026, + "learning_rate": 1.3916867765342817e-05, + "loss": 0.6904, + "step": 12749 + }, + { + "epoch": 0.3907686649503494, + "grad_norm": 1.5831279991896505, + "learning_rate": 1.391595442333173e-05, + "loss": 0.864, + "step": 12750 + }, + { + "epoch": 0.3907993134730906, + "grad_norm": 0.6821429837431122, + "learning_rate": 1.3915041042735741e-05, + "loss": 0.6041, + "step": 12751 + }, + { + "epoch": 0.3908299619958318, + "grad_norm": 0.6577637118520429, + "learning_rate": 1.3914127623563853e-05, + "loss": 0.5931, + "step": 12752 + }, + { + "epoch": 0.390860610518573, + "grad_norm": 1.4404409879159494, + "learning_rate": 1.3913214165825069e-05, + "loss": 0.792, + "step": 12753 + }, + { + "epoch": 0.3908912590413142, + "grad_norm": 1.6009634089457765, + "learning_rate": 1.3912300669528376e-05, + "loss": 0.6306, + "step": 12754 + }, + { + "epoch": 0.3909219075640554, + "grad_norm": 1.6991950481656297, + "learning_rate": 1.3911387134682787e-05, + "loss": 0.8339, + "step": 12755 + }, + { + "epoch": 0.39095255608679663, + "grad_norm": 1.3244097842962823, + "learning_rate": 1.39104735612973e-05, + "loss": 0.7562, + "step": 12756 + }, + { + "epoch": 0.39098320460953784, + "grad_norm": 1.4645113441693314, + "learning_rate": 1.3909559949380915e-05, + "loss": 0.6933, + "step": 12757 + }, + { + "epoch": 0.39101385313227904, + "grad_norm": 1.5636437244807868, + "learning_rate": 1.3908646298942639e-05, + "loss": 0.6886, + "step": 12758 + }, + { + "epoch": 0.39104450165502025, + "grad_norm": 0.7137813081645704, + "learning_rate": 1.3907732609991466e-05, + "loss": 0.6235, + "step": 12759 + }, + { + "epoch": 0.39107515017776145, + "grad_norm": 0.6877247324463623, + "learning_rate": 1.3906818882536407e-05, + "loss": 0.625, + "step": 12760 + }, + { + "epoch": 0.39110579870050266, + "grad_norm": 1.7339216292796231, + "learning_rate": 1.3905905116586462e-05, + "loss": 0.7511, + "step": 12761 + }, + { + "epoch": 0.39113644722324387, + "grad_norm": 1.3939489548856852, + "learning_rate": 1.3904991312150635e-05, + "loss": 0.6997, + "step": 12762 + }, + { + "epoch": 0.391167095745985, + "grad_norm": 1.537463047245472, + "learning_rate": 1.3904077469237928e-05, + "loss": 0.7145, + "step": 12763 + }, + { + "epoch": 0.3911977442687262, + "grad_norm": 1.5564460861882286, + "learning_rate": 1.3903163587857348e-05, + "loss": 0.7645, + "step": 12764 + }, + { + "epoch": 0.3912283927914674, + "grad_norm": 0.6811212297025037, + "learning_rate": 1.3902249668017897e-05, + "loss": 0.5859, + "step": 12765 + }, + { + "epoch": 0.39125904131420863, + "grad_norm": 0.6774159079270913, + "learning_rate": 1.3901335709728586e-05, + "loss": 0.6386, + "step": 12766 + }, + { + "epoch": 0.39128968983694984, + "grad_norm": 1.3913144052522806, + "learning_rate": 1.3900421712998409e-05, + "loss": 0.5731, + "step": 12767 + }, + { + "epoch": 0.39132033835969104, + "grad_norm": 1.54521219222632, + "learning_rate": 1.3899507677836383e-05, + "loss": 0.7806, + "step": 12768 + }, + { + "epoch": 0.39135098688243225, + "grad_norm": 1.5707819379745165, + "learning_rate": 1.389859360425151e-05, + "loss": 0.7884, + "step": 12769 + }, + { + "epoch": 0.39138163540517346, + "grad_norm": 1.7151579352696933, + "learning_rate": 1.3897679492252797e-05, + "loss": 0.6587, + "step": 12770 + }, + { + "epoch": 0.39141228392791466, + "grad_norm": 1.4948367688328674, + "learning_rate": 1.389676534184925e-05, + "loss": 0.685, + "step": 12771 + }, + { + "epoch": 0.39144293245065587, + "grad_norm": 0.7097335069533076, + "learning_rate": 1.3895851153049878e-05, + "loss": 0.6022, + "step": 12772 + }, + { + "epoch": 0.3914735809733971, + "grad_norm": 1.4572025142215568, + "learning_rate": 1.3894936925863686e-05, + "loss": 0.7453, + "step": 12773 + }, + { + "epoch": 0.3915042294961383, + "grad_norm": 1.4652443587850903, + "learning_rate": 1.3894022660299684e-05, + "loss": 0.7949, + "step": 12774 + }, + { + "epoch": 0.3915348780188795, + "grad_norm": 1.671810715463574, + "learning_rate": 1.389310835636688e-05, + "loss": 0.7639, + "step": 12775 + }, + { + "epoch": 0.3915655265416207, + "grad_norm": 1.4971962359758166, + "learning_rate": 1.3892194014074285e-05, + "loss": 0.7134, + "step": 12776 + }, + { + "epoch": 0.3915961750643619, + "grad_norm": 1.248008743198935, + "learning_rate": 1.3891279633430907e-05, + "loss": 0.7181, + "step": 12777 + }, + { + "epoch": 0.3916268235871031, + "grad_norm": 1.5609537337178288, + "learning_rate": 1.3890365214445751e-05, + "loss": 0.7659, + "step": 12778 + }, + { + "epoch": 0.3916574721098443, + "grad_norm": 1.6324659922897817, + "learning_rate": 1.3889450757127836e-05, + "loss": 0.8177, + "step": 12779 + }, + { + "epoch": 0.3916881206325855, + "grad_norm": 1.385989138977849, + "learning_rate": 1.3888536261486167e-05, + "loss": 0.7044, + "step": 12780 + }, + { + "epoch": 0.3917187691553267, + "grad_norm": 1.5092808139290326, + "learning_rate": 1.3887621727529754e-05, + "loss": 0.6644, + "step": 12781 + }, + { + "epoch": 0.3917494176780679, + "grad_norm": 1.828026692209118, + "learning_rate": 1.3886707155267607e-05, + "loss": 0.8009, + "step": 12782 + }, + { + "epoch": 0.39178006620080913, + "grad_norm": 1.6076889561612293, + "learning_rate": 1.3885792544708743e-05, + "loss": 0.8496, + "step": 12783 + }, + { + "epoch": 0.39181071472355034, + "grad_norm": 1.5436480341995045, + "learning_rate": 1.388487789586217e-05, + "loss": 0.6738, + "step": 12784 + }, + { + "epoch": 0.39184136324629154, + "grad_norm": 0.6838763083640145, + "learning_rate": 1.38839632087369e-05, + "loss": 0.5747, + "step": 12785 + }, + { + "epoch": 0.39187201176903275, + "grad_norm": 1.3276658948433862, + "learning_rate": 1.388304848334195e-05, + "loss": 0.7399, + "step": 12786 + }, + { + "epoch": 0.39190266029177395, + "grad_norm": 1.4016944054295633, + "learning_rate": 1.3882133719686327e-05, + "loss": 0.6499, + "step": 12787 + }, + { + "epoch": 0.39193330881451516, + "grad_norm": 1.6256563181057577, + "learning_rate": 1.388121891777905e-05, + "loss": 0.7232, + "step": 12788 + }, + { + "epoch": 0.39196395733725636, + "grad_norm": 0.6543903925120604, + "learning_rate": 1.3880304077629125e-05, + "loss": 0.571, + "step": 12789 + }, + { + "epoch": 0.39199460585999757, + "grad_norm": 0.7099539621319484, + "learning_rate": 1.3879389199245576e-05, + "loss": 0.6159, + "step": 12790 + }, + { + "epoch": 0.3920252543827388, + "grad_norm": 1.5417440984312747, + "learning_rate": 1.3878474282637408e-05, + "loss": 0.7285, + "step": 12791 + }, + { + "epoch": 0.39205590290548, + "grad_norm": 1.6261112378653573, + "learning_rate": 1.3877559327813645e-05, + "loss": 0.8182, + "step": 12792 + }, + { + "epoch": 0.3920865514282212, + "grad_norm": 1.422064485039667, + "learning_rate": 1.3876644334783295e-05, + "loss": 0.6818, + "step": 12793 + }, + { + "epoch": 0.39211719995096234, + "grad_norm": 1.3819276490936883, + "learning_rate": 1.387572930355538e-05, + "loss": 0.6922, + "step": 12794 + }, + { + "epoch": 0.39214784847370354, + "grad_norm": 1.5046613132444622, + "learning_rate": 1.3874814234138909e-05, + "loss": 0.67, + "step": 12795 + }, + { + "epoch": 0.39217849699644475, + "grad_norm": 1.5118534333745481, + "learning_rate": 1.3873899126542904e-05, + "loss": 0.6969, + "step": 12796 + }, + { + "epoch": 0.39220914551918595, + "grad_norm": 1.5405117471503964, + "learning_rate": 1.3872983980776378e-05, + "loss": 0.8062, + "step": 12797 + }, + { + "epoch": 0.39223979404192716, + "grad_norm": 1.4156825938298336, + "learning_rate": 1.387206879684835e-05, + "loss": 0.7022, + "step": 12798 + }, + { + "epoch": 0.39227044256466836, + "grad_norm": 1.491876791093136, + "learning_rate": 1.387115357476784e-05, + "loss": 0.7991, + "step": 12799 + }, + { + "epoch": 0.39230109108740957, + "grad_norm": 1.5772116248728147, + "learning_rate": 1.3870238314543861e-05, + "loss": 0.6935, + "step": 12800 + }, + { + "epoch": 0.3923317396101508, + "grad_norm": 1.5502709643411874, + "learning_rate": 1.3869323016185435e-05, + "loss": 0.8271, + "step": 12801 + }, + { + "epoch": 0.392362388132892, + "grad_norm": 0.789213182972834, + "learning_rate": 1.3868407679701575e-05, + "loss": 0.6056, + "step": 12802 + }, + { + "epoch": 0.3923930366556332, + "grad_norm": 0.7134928200946475, + "learning_rate": 1.386749230510131e-05, + "loss": 0.6115, + "step": 12803 + }, + { + "epoch": 0.3924236851783744, + "grad_norm": 1.4310429837619802, + "learning_rate": 1.386657689239365e-05, + "loss": 0.8116, + "step": 12804 + }, + { + "epoch": 0.3924543337011156, + "grad_norm": 0.7218503394303691, + "learning_rate": 1.3865661441587622e-05, + "loss": 0.6227, + "step": 12805 + }, + { + "epoch": 0.3924849822238568, + "grad_norm": 1.528963816275349, + "learning_rate": 1.3864745952692238e-05, + "loss": 0.7514, + "step": 12806 + }, + { + "epoch": 0.392515630746598, + "grad_norm": 0.6957799803946134, + "learning_rate": 1.386383042571653e-05, + "loss": 0.5934, + "step": 12807 + }, + { + "epoch": 0.3925462792693392, + "grad_norm": 1.5520017758125604, + "learning_rate": 1.3862914860669506e-05, + "loss": 0.8499, + "step": 12808 + }, + { + "epoch": 0.3925769277920804, + "grad_norm": 1.4343269064790511, + "learning_rate": 1.3861999257560194e-05, + "loss": 0.7079, + "step": 12809 + }, + { + "epoch": 0.3926075763148216, + "grad_norm": 1.647627954202362, + "learning_rate": 1.3861083616397618e-05, + "loss": 0.8207, + "step": 12810 + }, + { + "epoch": 0.39263822483756283, + "grad_norm": 1.4336448918618157, + "learning_rate": 1.3860167937190797e-05, + "loss": 0.7663, + "step": 12811 + }, + { + "epoch": 0.39266887336030404, + "grad_norm": 1.815923951078475, + "learning_rate": 1.3859252219948754e-05, + "loss": 0.833, + "step": 12812 + }, + { + "epoch": 0.39269952188304524, + "grad_norm": 1.502858876622997, + "learning_rate": 1.3858336464680506e-05, + "loss": 0.8022, + "step": 12813 + }, + { + "epoch": 0.39273017040578645, + "grad_norm": 1.6272418776395772, + "learning_rate": 1.3857420671395087e-05, + "loss": 0.7844, + "step": 12814 + }, + { + "epoch": 0.39276081892852766, + "grad_norm": 1.4660809642579002, + "learning_rate": 1.3856504840101517e-05, + "loss": 0.7503, + "step": 12815 + }, + { + "epoch": 0.39279146745126886, + "grad_norm": 1.6363595924423744, + "learning_rate": 1.3855588970808814e-05, + "loss": 0.8512, + "step": 12816 + }, + { + "epoch": 0.39282211597401007, + "grad_norm": 1.34207273902633, + "learning_rate": 1.3854673063526005e-05, + "loss": 0.7486, + "step": 12817 + }, + { + "epoch": 0.39285276449675127, + "grad_norm": 1.5201041081674806, + "learning_rate": 1.3853757118262122e-05, + "loss": 0.6873, + "step": 12818 + }, + { + "epoch": 0.3928834130194925, + "grad_norm": 1.29947953376899, + "learning_rate": 1.385284113502618e-05, + "loss": 0.7478, + "step": 12819 + }, + { + "epoch": 0.3929140615422337, + "grad_norm": 1.3029163053866697, + "learning_rate": 1.385192511382721e-05, + "loss": 0.6277, + "step": 12820 + }, + { + "epoch": 0.3929447100649749, + "grad_norm": 1.5239184398444798, + "learning_rate": 1.3851009054674233e-05, + "loss": 0.803, + "step": 12821 + }, + { + "epoch": 0.3929753585877161, + "grad_norm": 1.7128039356264881, + "learning_rate": 1.3850092957576284e-05, + "loss": 0.7423, + "step": 12822 + }, + { + "epoch": 0.3930060071104573, + "grad_norm": 0.7749041567945204, + "learning_rate": 1.384917682254238e-05, + "loss": 0.624, + "step": 12823 + }, + { + "epoch": 0.3930366556331985, + "grad_norm": 1.4859224310545753, + "learning_rate": 1.3848260649581552e-05, + "loss": 0.7161, + "step": 12824 + }, + { + "epoch": 0.39306730415593966, + "grad_norm": 1.503365037793259, + "learning_rate": 1.3847344438702828e-05, + "loss": 0.799, + "step": 12825 + }, + { + "epoch": 0.39309795267868086, + "grad_norm": 1.3881822170942255, + "learning_rate": 1.3846428189915236e-05, + "loss": 0.7098, + "step": 12826 + }, + { + "epoch": 0.39312860120142207, + "grad_norm": 1.34031590227559, + "learning_rate": 1.3845511903227801e-05, + "loss": 0.7257, + "step": 12827 + }, + { + "epoch": 0.3931592497241633, + "grad_norm": 1.4997000367947528, + "learning_rate": 1.3844595578649554e-05, + "loss": 0.6303, + "step": 12828 + }, + { + "epoch": 0.3931898982469045, + "grad_norm": 1.5656986953791407, + "learning_rate": 1.3843679216189522e-05, + "loss": 0.7605, + "step": 12829 + }, + { + "epoch": 0.3932205467696457, + "grad_norm": 1.3818587810334517, + "learning_rate": 1.384276281585674e-05, + "loss": 0.7843, + "step": 12830 + }, + { + "epoch": 0.3932511952923869, + "grad_norm": 1.421928303738432, + "learning_rate": 1.3841846377660227e-05, + "loss": 0.762, + "step": 12831 + }, + { + "epoch": 0.3932818438151281, + "grad_norm": 1.419911561325346, + "learning_rate": 1.3840929901609019e-05, + "loss": 0.7651, + "step": 12832 + }, + { + "epoch": 0.3933124923378693, + "grad_norm": 1.389382438029599, + "learning_rate": 1.3840013387712147e-05, + "loss": 0.6976, + "step": 12833 + }, + { + "epoch": 0.3933431408606105, + "grad_norm": 1.5058019184312061, + "learning_rate": 1.3839096835978642e-05, + "loss": 0.8348, + "step": 12834 + }, + { + "epoch": 0.3933737893833517, + "grad_norm": 0.69244281674268, + "learning_rate": 1.3838180246417534e-05, + "loss": 0.6128, + "step": 12835 + }, + { + "epoch": 0.3934044379060929, + "grad_norm": 1.3407268161942087, + "learning_rate": 1.3837263619037852e-05, + "loss": 0.6607, + "step": 12836 + }, + { + "epoch": 0.3934350864288341, + "grad_norm": 1.558101284752776, + "learning_rate": 1.3836346953848633e-05, + "loss": 0.8078, + "step": 12837 + }, + { + "epoch": 0.39346573495157533, + "grad_norm": 1.3926456793718178, + "learning_rate": 1.3835430250858904e-05, + "loss": 0.7649, + "step": 12838 + }, + { + "epoch": 0.39349638347431654, + "grad_norm": 1.3512841819517802, + "learning_rate": 1.3834513510077696e-05, + "loss": 0.7132, + "step": 12839 + }, + { + "epoch": 0.39352703199705774, + "grad_norm": 1.423965163096782, + "learning_rate": 1.383359673151405e-05, + "loss": 0.772, + "step": 12840 + }, + { + "epoch": 0.39355768051979895, + "grad_norm": 1.4904646564635702, + "learning_rate": 1.3832679915176992e-05, + "loss": 0.7803, + "step": 12841 + }, + { + "epoch": 0.39358832904254015, + "grad_norm": 1.5391478735518558, + "learning_rate": 1.383176306107556e-05, + "loss": 0.7575, + "step": 12842 + }, + { + "epoch": 0.39361897756528136, + "grad_norm": 1.4640597680156717, + "learning_rate": 1.3830846169218784e-05, + "loss": 0.7275, + "step": 12843 + }, + { + "epoch": 0.39364962608802256, + "grad_norm": 1.498060960482114, + "learning_rate": 1.3829929239615703e-05, + "loss": 0.7923, + "step": 12844 + }, + { + "epoch": 0.39368027461076377, + "grad_norm": 1.6527624564897692, + "learning_rate": 1.3829012272275348e-05, + "loss": 0.8533, + "step": 12845 + }, + { + "epoch": 0.393710923133505, + "grad_norm": 1.5357284649179843, + "learning_rate": 1.3828095267206755e-05, + "loss": 0.7922, + "step": 12846 + }, + { + "epoch": 0.3937415716562462, + "grad_norm": 0.7158399232592706, + "learning_rate": 1.382717822441896e-05, + "loss": 0.6298, + "step": 12847 + }, + { + "epoch": 0.3937722201789874, + "grad_norm": 1.4371110866432013, + "learning_rate": 1.3826261143920998e-05, + "loss": 0.7833, + "step": 12848 + }, + { + "epoch": 0.3938028687017286, + "grad_norm": 1.5817828907903961, + "learning_rate": 1.3825344025721909e-05, + "loss": 0.8454, + "step": 12849 + }, + { + "epoch": 0.3938335172244698, + "grad_norm": 0.6687944433067937, + "learning_rate": 1.3824426869830724e-05, + "loss": 0.6239, + "step": 12850 + }, + { + "epoch": 0.393864165747211, + "grad_norm": 1.2756775955541726, + "learning_rate": 1.382350967625648e-05, + "loss": 0.6622, + "step": 12851 + }, + { + "epoch": 0.3938948142699522, + "grad_norm": 0.6612694463890496, + "learning_rate": 1.3822592445008222e-05, + "loss": 0.5713, + "step": 12852 + }, + { + "epoch": 0.3939254627926934, + "grad_norm": 1.6290825700864298, + "learning_rate": 1.3821675176094982e-05, + "loss": 0.7802, + "step": 12853 + }, + { + "epoch": 0.3939561113154346, + "grad_norm": 1.423005038476687, + "learning_rate": 1.3820757869525796e-05, + "loss": 0.7636, + "step": 12854 + }, + { + "epoch": 0.3939867598381758, + "grad_norm": 1.5839643725069636, + "learning_rate": 1.3819840525309704e-05, + "loss": 0.7621, + "step": 12855 + }, + { + "epoch": 0.394017408360917, + "grad_norm": 1.2750054279019916, + "learning_rate": 1.3818923143455748e-05, + "loss": 0.5782, + "step": 12856 + }, + { + "epoch": 0.3940480568836582, + "grad_norm": 1.4013416333064974, + "learning_rate": 1.381800572397297e-05, + "loss": 0.763, + "step": 12857 + }, + { + "epoch": 0.3940787054063994, + "grad_norm": 1.4395703373030875, + "learning_rate": 1.3817088266870397e-05, + "loss": 0.7835, + "step": 12858 + }, + { + "epoch": 0.3941093539291406, + "grad_norm": 1.4699640968372365, + "learning_rate": 1.381617077215708e-05, + "loss": 0.783, + "step": 12859 + }, + { + "epoch": 0.3941400024518818, + "grad_norm": 1.1978828066594636, + "learning_rate": 1.3815253239842054e-05, + "loss": 0.6727, + "step": 12860 + }, + { + "epoch": 0.394170650974623, + "grad_norm": 1.4605503094313241, + "learning_rate": 1.3814335669934367e-05, + "loss": 0.6873, + "step": 12861 + }, + { + "epoch": 0.3942012994973642, + "grad_norm": 1.6468830790499598, + "learning_rate": 1.3813418062443048e-05, + "loss": 0.7914, + "step": 12862 + }, + { + "epoch": 0.3942319480201054, + "grad_norm": 1.3418438436377675, + "learning_rate": 1.381250041737715e-05, + "loss": 0.7797, + "step": 12863 + }, + { + "epoch": 0.3942625965428466, + "grad_norm": 1.5073872381891698, + "learning_rate": 1.3811582734745707e-05, + "loss": 0.7496, + "step": 12864 + }, + { + "epoch": 0.3942932450655878, + "grad_norm": 0.7404151591141143, + "learning_rate": 1.3810665014557765e-05, + "loss": 0.5961, + "step": 12865 + }, + { + "epoch": 0.39432389358832903, + "grad_norm": 1.5501702667899422, + "learning_rate": 1.3809747256822368e-05, + "loss": 0.7672, + "step": 12866 + }, + { + "epoch": 0.39435454211107024, + "grad_norm": 1.4604798361965836, + "learning_rate": 1.3808829461548554e-05, + "loss": 0.7851, + "step": 12867 + }, + { + "epoch": 0.39438519063381144, + "grad_norm": 1.6807167597330794, + "learning_rate": 1.3807911628745368e-05, + "loss": 0.8293, + "step": 12868 + }, + { + "epoch": 0.39441583915655265, + "grad_norm": 1.3684233828839318, + "learning_rate": 1.3806993758421853e-05, + "loss": 0.7883, + "step": 12869 + }, + { + "epoch": 0.39444648767929386, + "grad_norm": 1.2915544623656185, + "learning_rate": 1.380607585058706e-05, + "loss": 0.665, + "step": 12870 + }, + { + "epoch": 0.39447713620203506, + "grad_norm": 1.5333169366359582, + "learning_rate": 1.3805157905250023e-05, + "loss": 0.7592, + "step": 12871 + }, + { + "epoch": 0.39450778472477627, + "grad_norm": 1.4917060894698235, + "learning_rate": 1.3804239922419795e-05, + "loss": 0.6443, + "step": 12872 + }, + { + "epoch": 0.3945384332475175, + "grad_norm": 1.6733937712576237, + "learning_rate": 1.3803321902105415e-05, + "loss": 0.7383, + "step": 12873 + }, + { + "epoch": 0.3945690817702587, + "grad_norm": 1.671300924019045, + "learning_rate": 1.3802403844315933e-05, + "loss": 0.7129, + "step": 12874 + }, + { + "epoch": 0.3945997302929999, + "grad_norm": 1.4756959425529734, + "learning_rate": 1.380148574906039e-05, + "loss": 0.7643, + "step": 12875 + }, + { + "epoch": 0.3946303788157411, + "grad_norm": 1.331681457123088, + "learning_rate": 1.380056761634784e-05, + "loss": 0.721, + "step": 12876 + }, + { + "epoch": 0.3946610273384823, + "grad_norm": 1.525949636633177, + "learning_rate": 1.3799649446187322e-05, + "loss": 0.7783, + "step": 12877 + }, + { + "epoch": 0.3946916758612235, + "grad_norm": 1.3197785977224579, + "learning_rate": 1.3798731238587885e-05, + "loss": 0.72, + "step": 12878 + }, + { + "epoch": 0.3947223243839647, + "grad_norm": 1.5497969099879123, + "learning_rate": 1.379781299355858e-05, + "loss": 0.7868, + "step": 12879 + }, + { + "epoch": 0.3947529729067059, + "grad_norm": 0.7272240638124311, + "learning_rate": 1.379689471110845e-05, + "loss": 0.6174, + "step": 12880 + }, + { + "epoch": 0.3947836214294471, + "grad_norm": 0.6959953841171166, + "learning_rate": 1.3795976391246547e-05, + "loss": 0.6038, + "step": 12881 + }, + { + "epoch": 0.3948142699521883, + "grad_norm": 1.358769564944867, + "learning_rate": 1.3795058033981911e-05, + "loss": 0.7319, + "step": 12882 + }, + { + "epoch": 0.39484491847492953, + "grad_norm": 1.3509915122569325, + "learning_rate": 1.3794139639323603e-05, + "loss": 0.7266, + "step": 12883 + }, + { + "epoch": 0.39487556699767073, + "grad_norm": 1.5822758525610654, + "learning_rate": 1.3793221207280664e-05, + "loss": 0.7847, + "step": 12884 + }, + { + "epoch": 0.39490621552041194, + "grad_norm": 1.4790029413569266, + "learning_rate": 1.379230273786215e-05, + "loss": 0.7935, + "step": 12885 + }, + { + "epoch": 0.39493686404315315, + "grad_norm": 1.4779552195754426, + "learning_rate": 1.3791384231077101e-05, + "loss": 0.7507, + "step": 12886 + }, + { + "epoch": 0.3949675125658943, + "grad_norm": 1.296212904456305, + "learning_rate": 1.3790465686934578e-05, + "loss": 0.7567, + "step": 12887 + }, + { + "epoch": 0.3949981610886355, + "grad_norm": 1.3378042722224257, + "learning_rate": 1.3789547105443624e-05, + "loss": 0.6679, + "step": 12888 + }, + { + "epoch": 0.3950288096113767, + "grad_norm": 1.5672608560621872, + "learning_rate": 1.3788628486613293e-05, + "loss": 0.8254, + "step": 12889 + }, + { + "epoch": 0.3950594581341179, + "grad_norm": 1.3905486942262784, + "learning_rate": 1.3787709830452636e-05, + "loss": 0.7696, + "step": 12890 + }, + { + "epoch": 0.3950901066568591, + "grad_norm": 0.803980415958718, + "learning_rate": 1.3786791136970705e-05, + "loss": 0.595, + "step": 12891 + }, + { + "epoch": 0.3951207551796003, + "grad_norm": 1.296948268786355, + "learning_rate": 1.3785872406176555e-05, + "loss": 0.7205, + "step": 12892 + }, + { + "epoch": 0.39515140370234153, + "grad_norm": 1.260561210651311, + "learning_rate": 1.3784953638079232e-05, + "loss": 0.7433, + "step": 12893 + }, + { + "epoch": 0.39518205222508274, + "grad_norm": 0.6842747949827473, + "learning_rate": 1.3784034832687794e-05, + "loss": 0.5803, + "step": 12894 + }, + { + "epoch": 0.39521270074782394, + "grad_norm": 1.4878605691295852, + "learning_rate": 1.3783115990011292e-05, + "loss": 0.8447, + "step": 12895 + }, + { + "epoch": 0.39524334927056515, + "grad_norm": 0.6838795664594663, + "learning_rate": 1.3782197110058779e-05, + "loss": 0.5941, + "step": 12896 + }, + { + "epoch": 0.39527399779330635, + "grad_norm": 1.3364419560107157, + "learning_rate": 1.3781278192839312e-05, + "loss": 0.6689, + "step": 12897 + }, + { + "epoch": 0.39530464631604756, + "grad_norm": 1.3874524927308103, + "learning_rate": 1.3780359238361943e-05, + "loss": 0.7659, + "step": 12898 + }, + { + "epoch": 0.39533529483878876, + "grad_norm": 1.4066365077516454, + "learning_rate": 1.3779440246635726e-05, + "loss": 0.7962, + "step": 12899 + }, + { + "epoch": 0.39536594336152997, + "grad_norm": 1.5296826645580919, + "learning_rate": 1.377852121766972e-05, + "loss": 0.6415, + "step": 12900 + }, + { + "epoch": 0.3953965918842712, + "grad_norm": 0.7166079725404848, + "learning_rate": 1.3777602151472975e-05, + "loss": 0.5688, + "step": 12901 + }, + { + "epoch": 0.3954272404070124, + "grad_norm": 1.6612604628210468, + "learning_rate": 1.377668304805455e-05, + "loss": 0.8146, + "step": 12902 + }, + { + "epoch": 0.3954578889297536, + "grad_norm": 1.5333742974897244, + "learning_rate": 1.3775763907423503e-05, + "loss": 0.8365, + "step": 12903 + }, + { + "epoch": 0.3954885374524948, + "grad_norm": 1.2443950523535294, + "learning_rate": 1.3774844729588886e-05, + "loss": 0.677, + "step": 12904 + }, + { + "epoch": 0.395519185975236, + "grad_norm": 1.3693901990186965, + "learning_rate": 1.3773925514559756e-05, + "loss": 0.5964, + "step": 12905 + }, + { + "epoch": 0.3955498344979772, + "grad_norm": 1.42091928112623, + "learning_rate": 1.3773006262345177e-05, + "loss": 0.8379, + "step": 12906 + }, + { + "epoch": 0.3955804830207184, + "grad_norm": 1.4278914828697367, + "learning_rate": 1.37720869729542e-05, + "loss": 0.6592, + "step": 12907 + }, + { + "epoch": 0.3956111315434596, + "grad_norm": 1.4737957707276337, + "learning_rate": 1.3771167646395881e-05, + "loss": 0.8444, + "step": 12908 + }, + { + "epoch": 0.3956417800662008, + "grad_norm": 1.6456046313759822, + "learning_rate": 1.3770248282679286e-05, + "loss": 0.7669, + "step": 12909 + }, + { + "epoch": 0.395672428588942, + "grad_norm": 1.5270628677235458, + "learning_rate": 1.3769328881813469e-05, + "loss": 0.8074, + "step": 12910 + }, + { + "epoch": 0.39570307711168323, + "grad_norm": 1.5519539344553184, + "learning_rate": 1.3768409443807493e-05, + "loss": 0.8082, + "step": 12911 + }, + { + "epoch": 0.39573372563442444, + "grad_norm": 1.467577102350733, + "learning_rate": 1.376748996867041e-05, + "loss": 0.8021, + "step": 12912 + }, + { + "epoch": 0.39576437415716564, + "grad_norm": 1.5390649992394676, + "learning_rate": 1.376657045641129e-05, + "loss": 0.7427, + "step": 12913 + }, + { + "epoch": 0.39579502267990685, + "grad_norm": 1.461288190828261, + "learning_rate": 1.3765650907039181e-05, + "loss": 0.767, + "step": 12914 + }, + { + "epoch": 0.39582567120264806, + "grad_norm": 1.5890226704106105, + "learning_rate": 1.3764731320563156e-05, + "loss": 0.7082, + "step": 12915 + }, + { + "epoch": 0.39585631972538926, + "grad_norm": 1.519178112676567, + "learning_rate": 1.3763811696992266e-05, + "loss": 0.7769, + "step": 12916 + }, + { + "epoch": 0.39588696824813047, + "grad_norm": 1.526455419032349, + "learning_rate": 1.376289203633558e-05, + "loss": 0.7137, + "step": 12917 + }, + { + "epoch": 0.3959176167708716, + "grad_norm": 1.9115468047791844, + "learning_rate": 1.3761972338602152e-05, + "loss": 0.8204, + "step": 12918 + }, + { + "epoch": 0.3959482652936128, + "grad_norm": 1.3355126176538812, + "learning_rate": 1.3761052603801053e-05, + "loss": 0.7473, + "step": 12919 + }, + { + "epoch": 0.39597891381635403, + "grad_norm": 0.7629517243772295, + "learning_rate": 1.3760132831941336e-05, + "loss": 0.6251, + "step": 12920 + }, + { + "epoch": 0.39600956233909523, + "grad_norm": 1.3990716157557257, + "learning_rate": 1.375921302303207e-05, + "loss": 0.6901, + "step": 12921 + }, + { + "epoch": 0.39604021086183644, + "grad_norm": 1.4792313168153959, + "learning_rate": 1.3758293177082317e-05, + "loss": 0.6652, + "step": 12922 + }, + { + "epoch": 0.39607085938457764, + "grad_norm": 1.399715228964863, + "learning_rate": 1.3757373294101135e-05, + "loss": 0.7212, + "step": 12923 + }, + { + "epoch": 0.39610150790731885, + "grad_norm": 0.6862033728594813, + "learning_rate": 1.3756453374097596e-05, + "loss": 0.6302, + "step": 12924 + }, + { + "epoch": 0.39613215643006006, + "grad_norm": 1.6612396909025122, + "learning_rate": 1.3755533417080759e-05, + "loss": 0.7887, + "step": 12925 + }, + { + "epoch": 0.39616280495280126, + "grad_norm": 1.302488465777545, + "learning_rate": 1.3754613423059695e-05, + "loss": 0.706, + "step": 12926 + }, + { + "epoch": 0.39619345347554247, + "grad_norm": 1.4315449198594, + "learning_rate": 1.3753693392043461e-05, + "loss": 0.8517, + "step": 12927 + }, + { + "epoch": 0.3962241019982837, + "grad_norm": 1.4922427989928015, + "learning_rate": 1.3752773324041124e-05, + "loss": 0.5961, + "step": 12928 + }, + { + "epoch": 0.3962547505210249, + "grad_norm": 1.3948119806634034, + "learning_rate": 1.3751853219061752e-05, + "loss": 0.6163, + "step": 12929 + }, + { + "epoch": 0.3962853990437661, + "grad_norm": 1.2869730317084018, + "learning_rate": 1.3750933077114414e-05, + "loss": 0.7577, + "step": 12930 + }, + { + "epoch": 0.3963160475665073, + "grad_norm": 1.57734777237557, + "learning_rate": 1.3750012898208169e-05, + "loss": 0.7625, + "step": 12931 + }, + { + "epoch": 0.3963466960892485, + "grad_norm": 1.421357850032801, + "learning_rate": 1.374909268235209e-05, + "loss": 0.7801, + "step": 12932 + }, + { + "epoch": 0.3963773446119897, + "grad_norm": 0.7108076321348685, + "learning_rate": 1.3748172429555237e-05, + "loss": 0.6125, + "step": 12933 + }, + { + "epoch": 0.3964079931347309, + "grad_norm": 1.420820351415182, + "learning_rate": 1.3747252139826688e-05, + "loss": 0.6656, + "step": 12934 + }, + { + "epoch": 0.3964386416574721, + "grad_norm": 1.4265152122734337, + "learning_rate": 1.3746331813175501e-05, + "loss": 0.7795, + "step": 12935 + }, + { + "epoch": 0.3964692901802133, + "grad_norm": 1.5502492328304016, + "learning_rate": 1.3745411449610749e-05, + "loss": 0.8941, + "step": 12936 + }, + { + "epoch": 0.3964999387029545, + "grad_norm": 0.6691361411727162, + "learning_rate": 1.37444910491415e-05, + "loss": 0.6065, + "step": 12937 + }, + { + "epoch": 0.39653058722569573, + "grad_norm": 0.6640106888003836, + "learning_rate": 1.3743570611776822e-05, + "loss": 0.5774, + "step": 12938 + }, + { + "epoch": 0.39656123574843694, + "grad_norm": 1.3413339658330345, + "learning_rate": 1.3742650137525785e-05, + "loss": 0.6757, + "step": 12939 + }, + { + "epoch": 0.39659188427117814, + "grad_norm": 1.4406895719191257, + "learning_rate": 1.374172962639746e-05, + "loss": 0.7031, + "step": 12940 + }, + { + "epoch": 0.39662253279391935, + "grad_norm": 1.4343987182786755, + "learning_rate": 1.3740809078400914e-05, + "loss": 0.7383, + "step": 12941 + }, + { + "epoch": 0.39665318131666055, + "grad_norm": 0.684015428178063, + "learning_rate": 1.3739888493545222e-05, + "loss": 0.6004, + "step": 12942 + }, + { + "epoch": 0.39668382983940176, + "grad_norm": 1.3205465198351074, + "learning_rate": 1.3738967871839449e-05, + "loss": 0.6853, + "step": 12943 + }, + { + "epoch": 0.39671447836214296, + "grad_norm": 1.3945910515343134, + "learning_rate": 1.3738047213292667e-05, + "loss": 0.8245, + "step": 12944 + }, + { + "epoch": 0.39674512688488417, + "grad_norm": 0.6430789912996251, + "learning_rate": 1.3737126517913952e-05, + "loss": 0.5943, + "step": 12945 + }, + { + "epoch": 0.3967757754076254, + "grad_norm": 1.5019295640457957, + "learning_rate": 1.3736205785712372e-05, + "loss": 0.7206, + "step": 12946 + }, + { + "epoch": 0.3968064239303666, + "grad_norm": 1.443845524409405, + "learning_rate": 1.3735285016697004e-05, + "loss": 0.7908, + "step": 12947 + }, + { + "epoch": 0.3968370724531078, + "grad_norm": 1.3514747723783376, + "learning_rate": 1.3734364210876914e-05, + "loss": 0.7437, + "step": 12948 + }, + { + "epoch": 0.39686772097584894, + "grad_norm": 1.4926718910055266, + "learning_rate": 1.3733443368261176e-05, + "loss": 0.8042, + "step": 12949 + }, + { + "epoch": 0.39689836949859014, + "grad_norm": 1.3299885037030048, + "learning_rate": 1.373252248885887e-05, + "loss": 0.6872, + "step": 12950 + }, + { + "epoch": 0.39692901802133135, + "grad_norm": 0.6691021362517133, + "learning_rate": 1.373160157267906e-05, + "loss": 0.6012, + "step": 12951 + }, + { + "epoch": 0.39695966654407255, + "grad_norm": 1.6251346535342364, + "learning_rate": 1.3730680619730827e-05, + "loss": 0.764, + "step": 12952 + }, + { + "epoch": 0.39699031506681376, + "grad_norm": 1.3332251418661958, + "learning_rate": 1.3729759630023245e-05, + "loss": 0.7468, + "step": 12953 + }, + { + "epoch": 0.39702096358955496, + "grad_norm": 0.6630946900790544, + "learning_rate": 1.3728838603565387e-05, + "loss": 0.595, + "step": 12954 + }, + { + "epoch": 0.39705161211229617, + "grad_norm": 1.5289320661785812, + "learning_rate": 1.3727917540366326e-05, + "loss": 0.6845, + "step": 12955 + }, + { + "epoch": 0.3970822606350374, + "grad_norm": 1.5072478604514354, + "learning_rate": 1.372699644043514e-05, + "loss": 0.7393, + "step": 12956 + }, + { + "epoch": 0.3971129091577786, + "grad_norm": 1.3527305557937013, + "learning_rate": 1.3726075303780906e-05, + "loss": 0.5991, + "step": 12957 + }, + { + "epoch": 0.3971435576805198, + "grad_norm": 0.6652343340242455, + "learning_rate": 1.3725154130412699e-05, + "loss": 0.59, + "step": 12958 + }, + { + "epoch": 0.397174206203261, + "grad_norm": 1.2741239166742662, + "learning_rate": 1.3724232920339592e-05, + "loss": 0.6504, + "step": 12959 + }, + { + "epoch": 0.3972048547260022, + "grad_norm": 1.3442596201167623, + "learning_rate": 1.3723311673570667e-05, + "loss": 0.6884, + "step": 12960 + }, + { + "epoch": 0.3972355032487434, + "grad_norm": 1.4814495451206104, + "learning_rate": 1.3722390390115002e-05, + "loss": 0.8114, + "step": 12961 + }, + { + "epoch": 0.3972661517714846, + "grad_norm": 1.447294737550096, + "learning_rate": 1.3721469069981668e-05, + "loss": 0.709, + "step": 12962 + }, + { + "epoch": 0.3972968002942258, + "grad_norm": 1.5415103928296479, + "learning_rate": 1.3720547713179748e-05, + "loss": 0.8278, + "step": 12963 + }, + { + "epoch": 0.397327448816967, + "grad_norm": 1.6511307684602146, + "learning_rate": 1.3719626319718321e-05, + "loss": 0.6945, + "step": 12964 + }, + { + "epoch": 0.3973580973397082, + "grad_norm": 1.2787089796275641, + "learning_rate": 1.3718704889606465e-05, + "loss": 0.7546, + "step": 12965 + }, + { + "epoch": 0.39738874586244943, + "grad_norm": 0.6933335153596821, + "learning_rate": 1.3717783422853255e-05, + "loss": 0.5868, + "step": 12966 + }, + { + "epoch": 0.39741939438519064, + "grad_norm": 1.4061630940512577, + "learning_rate": 1.3716861919467775e-05, + "loss": 0.7134, + "step": 12967 + }, + { + "epoch": 0.39745004290793184, + "grad_norm": 1.542595992582869, + "learning_rate": 1.3715940379459103e-05, + "loss": 0.7578, + "step": 12968 + }, + { + "epoch": 0.39748069143067305, + "grad_norm": 1.6805255128731102, + "learning_rate": 1.3715018802836322e-05, + "loss": 0.8434, + "step": 12969 + }, + { + "epoch": 0.39751133995341426, + "grad_norm": 1.6651543013281946, + "learning_rate": 1.3714097189608508e-05, + "loss": 0.7619, + "step": 12970 + }, + { + "epoch": 0.39754198847615546, + "grad_norm": 1.4799662051334688, + "learning_rate": 1.3713175539784744e-05, + "loss": 0.8574, + "step": 12971 + }, + { + "epoch": 0.39757263699889667, + "grad_norm": 0.6796210599488872, + "learning_rate": 1.3712253853374111e-05, + "loss": 0.5725, + "step": 12972 + }, + { + "epoch": 0.3976032855216379, + "grad_norm": 1.5240428495445282, + "learning_rate": 1.3711332130385695e-05, + "loss": 0.791, + "step": 12973 + }, + { + "epoch": 0.3976339340443791, + "grad_norm": 1.5955433837379398, + "learning_rate": 1.3710410370828569e-05, + "loss": 0.7273, + "step": 12974 + }, + { + "epoch": 0.3976645825671203, + "grad_norm": 1.5007953092771598, + "learning_rate": 1.3709488574711825e-05, + "loss": 0.7451, + "step": 12975 + }, + { + "epoch": 0.3976952310898615, + "grad_norm": 1.5954523875960114, + "learning_rate": 1.3708566742044538e-05, + "loss": 0.6995, + "step": 12976 + }, + { + "epoch": 0.3977258796126027, + "grad_norm": 1.3288275800440135, + "learning_rate": 1.3707644872835793e-05, + "loss": 0.7259, + "step": 12977 + }, + { + "epoch": 0.3977565281353439, + "grad_norm": 1.5238732908710642, + "learning_rate": 1.3706722967094676e-05, + "loss": 0.7381, + "step": 12978 + }, + { + "epoch": 0.3977871766580851, + "grad_norm": 1.5159219621362487, + "learning_rate": 1.3705801024830267e-05, + "loss": 0.7573, + "step": 12979 + }, + { + "epoch": 0.39781782518082626, + "grad_norm": 1.4386268200767511, + "learning_rate": 1.3704879046051657e-05, + "loss": 0.7757, + "step": 12980 + }, + { + "epoch": 0.39784847370356746, + "grad_norm": 1.4912398226892976, + "learning_rate": 1.3703957030767923e-05, + "loss": 0.7566, + "step": 12981 + }, + { + "epoch": 0.39787912222630867, + "grad_norm": 1.4013248560620823, + "learning_rate": 1.3703034978988152e-05, + "loss": 0.6705, + "step": 12982 + }, + { + "epoch": 0.3979097707490499, + "grad_norm": 1.4883789417311906, + "learning_rate": 1.3702112890721428e-05, + "loss": 0.7186, + "step": 12983 + }, + { + "epoch": 0.3979404192717911, + "grad_norm": 1.5743153052267376, + "learning_rate": 1.3701190765976844e-05, + "loss": 0.786, + "step": 12984 + }, + { + "epoch": 0.3979710677945323, + "grad_norm": 1.5454619706619805, + "learning_rate": 1.3700268604763477e-05, + "loss": 0.802, + "step": 12985 + }, + { + "epoch": 0.3980017163172735, + "grad_norm": 1.408443751789179, + "learning_rate": 1.3699346407090416e-05, + "loss": 0.7193, + "step": 12986 + }, + { + "epoch": 0.3980323648400147, + "grad_norm": 1.4082331530049585, + "learning_rate": 1.3698424172966748e-05, + "loss": 0.7744, + "step": 12987 + }, + { + "epoch": 0.3980630133627559, + "grad_norm": 1.6105682086706523, + "learning_rate": 1.3697501902401565e-05, + "loss": 0.7785, + "step": 12988 + }, + { + "epoch": 0.3980936618854971, + "grad_norm": 1.465585116130115, + "learning_rate": 1.3696579595403944e-05, + "loss": 0.65, + "step": 12989 + }, + { + "epoch": 0.3981243104082383, + "grad_norm": 1.5741881260596977, + "learning_rate": 1.3695657251982983e-05, + "loss": 0.8369, + "step": 12990 + }, + { + "epoch": 0.3981549589309795, + "grad_norm": 1.3937006643156882, + "learning_rate": 1.3694734872147761e-05, + "loss": 0.7517, + "step": 12991 + }, + { + "epoch": 0.3981856074537207, + "grad_norm": 1.6490211712733445, + "learning_rate": 1.3693812455907373e-05, + "loss": 0.834, + "step": 12992 + }, + { + "epoch": 0.39821625597646193, + "grad_norm": 1.465586070344737, + "learning_rate": 1.3692890003270909e-05, + "loss": 0.8466, + "step": 12993 + }, + { + "epoch": 0.39824690449920314, + "grad_norm": 1.5939201690728404, + "learning_rate": 1.3691967514247451e-05, + "loss": 0.7907, + "step": 12994 + }, + { + "epoch": 0.39827755302194434, + "grad_norm": 1.3337369391265081, + "learning_rate": 1.3691044988846094e-05, + "loss": 0.6753, + "step": 12995 + }, + { + "epoch": 0.39830820154468555, + "grad_norm": 0.6907423928278631, + "learning_rate": 1.3690122427075924e-05, + "loss": 0.5775, + "step": 12996 + }, + { + "epoch": 0.39833885006742675, + "grad_norm": 1.5945223260117567, + "learning_rate": 1.3689199828946038e-05, + "loss": 0.7725, + "step": 12997 + }, + { + "epoch": 0.39836949859016796, + "grad_norm": 1.3738997778672002, + "learning_rate": 1.3688277194465518e-05, + "loss": 0.6764, + "step": 12998 + }, + { + "epoch": 0.39840014711290916, + "grad_norm": 1.4793554170911545, + "learning_rate": 1.3687354523643464e-05, + "loss": 0.8437, + "step": 12999 + }, + { + "epoch": 0.39843079563565037, + "grad_norm": 1.6073588749247996, + "learning_rate": 1.3686431816488956e-05, + "loss": 0.8471, + "step": 13000 + }, + { + "epoch": 0.3984614441583916, + "grad_norm": 1.654498315421796, + "learning_rate": 1.3685509073011099e-05, + "loss": 0.8867, + "step": 13001 + }, + { + "epoch": 0.3984920926811328, + "grad_norm": 1.4426400380935485, + "learning_rate": 1.368458629321897e-05, + "loss": 0.7331, + "step": 13002 + }, + { + "epoch": 0.398522741203874, + "grad_norm": 1.6551198075169276, + "learning_rate": 1.3683663477121677e-05, + "loss": 0.8476, + "step": 13003 + }, + { + "epoch": 0.3985533897266152, + "grad_norm": 1.4785445942576252, + "learning_rate": 1.3682740624728301e-05, + "loss": 0.6779, + "step": 13004 + }, + { + "epoch": 0.3985840382493564, + "grad_norm": 1.5295840223340667, + "learning_rate": 1.368181773604794e-05, + "loss": 0.6477, + "step": 13005 + }, + { + "epoch": 0.3986146867720976, + "grad_norm": 1.5108741422400147, + "learning_rate": 1.3680894811089687e-05, + "loss": 0.7832, + "step": 13006 + }, + { + "epoch": 0.3986453352948388, + "grad_norm": 0.7278881224528809, + "learning_rate": 1.3679971849862637e-05, + "loss": 0.592, + "step": 13007 + }, + { + "epoch": 0.39867598381758, + "grad_norm": 1.5552914292558826, + "learning_rate": 1.3679048852375882e-05, + "loss": 0.8282, + "step": 13008 + }, + { + "epoch": 0.3987066323403212, + "grad_norm": 0.7296064218726431, + "learning_rate": 1.3678125818638516e-05, + "loss": 0.6138, + "step": 13009 + }, + { + "epoch": 0.3987372808630624, + "grad_norm": 1.4457677790844299, + "learning_rate": 1.3677202748659637e-05, + "loss": 0.8339, + "step": 13010 + }, + { + "epoch": 0.3987679293858036, + "grad_norm": 1.456789045575229, + "learning_rate": 1.3676279642448338e-05, + "loss": 0.666, + "step": 13011 + }, + { + "epoch": 0.3987985779085448, + "grad_norm": 1.6579198988942887, + "learning_rate": 1.3675356500013717e-05, + "loss": 0.7389, + "step": 13012 + }, + { + "epoch": 0.398829226431286, + "grad_norm": 1.4215764142719556, + "learning_rate": 1.3674433321364864e-05, + "loss": 0.7858, + "step": 13013 + }, + { + "epoch": 0.3988598749540272, + "grad_norm": 1.4756738443299613, + "learning_rate": 1.367351010651088e-05, + "loss": 0.7365, + "step": 13014 + }, + { + "epoch": 0.3988905234767684, + "grad_norm": 1.5240308614799212, + "learning_rate": 1.3672586855460863e-05, + "loss": 0.7185, + "step": 13015 + }, + { + "epoch": 0.3989211719995096, + "grad_norm": 1.4931479264062093, + "learning_rate": 1.367166356822391e-05, + "loss": 0.8081, + "step": 13016 + }, + { + "epoch": 0.3989518205222508, + "grad_norm": 1.4880288718468841, + "learning_rate": 1.367074024480911e-05, + "loss": 0.7389, + "step": 13017 + }, + { + "epoch": 0.398982469044992, + "grad_norm": 0.8128683669902798, + "learning_rate": 1.3669816885225573e-05, + "loss": 0.6097, + "step": 13018 + }, + { + "epoch": 0.3990131175677332, + "grad_norm": 1.415437936540108, + "learning_rate": 1.366889348948239e-05, + "loss": 0.6446, + "step": 13019 + }, + { + "epoch": 0.3990437660904744, + "grad_norm": 1.270021627548192, + "learning_rate": 1.3667970057588657e-05, + "loss": 0.6214, + "step": 13020 + }, + { + "epoch": 0.39907441461321563, + "grad_norm": 1.466872540760567, + "learning_rate": 1.366704658955348e-05, + "loss": 0.7626, + "step": 13021 + }, + { + "epoch": 0.39910506313595684, + "grad_norm": 1.492501207514149, + "learning_rate": 1.3666123085385952e-05, + "loss": 0.7365, + "step": 13022 + }, + { + "epoch": 0.39913571165869804, + "grad_norm": 1.5116467590448692, + "learning_rate": 1.366519954509518e-05, + "loss": 0.6882, + "step": 13023 + }, + { + "epoch": 0.39916636018143925, + "grad_norm": 1.3082378097042175, + "learning_rate": 1.3664275968690257e-05, + "loss": 0.6866, + "step": 13024 + }, + { + "epoch": 0.39919700870418046, + "grad_norm": 1.5522904181287631, + "learning_rate": 1.3663352356180284e-05, + "loss": 0.8313, + "step": 13025 + }, + { + "epoch": 0.39922765722692166, + "grad_norm": 1.4059222524356911, + "learning_rate": 1.3662428707574364e-05, + "loss": 0.6755, + "step": 13026 + }, + { + "epoch": 0.39925830574966287, + "grad_norm": 1.3629936778125487, + "learning_rate": 1.3661505022881599e-05, + "loss": 0.8135, + "step": 13027 + }, + { + "epoch": 0.3992889542724041, + "grad_norm": 1.5061488996673147, + "learning_rate": 1.3660581302111083e-05, + "loss": 0.6698, + "step": 13028 + }, + { + "epoch": 0.3993196027951453, + "grad_norm": 0.6891357639304128, + "learning_rate": 1.3659657545271925e-05, + "loss": 0.5933, + "step": 13029 + }, + { + "epoch": 0.3993502513178865, + "grad_norm": 1.3743091710614295, + "learning_rate": 1.3658733752373228e-05, + "loss": 0.7619, + "step": 13030 + }, + { + "epoch": 0.3993808998406277, + "grad_norm": 1.6193155328381206, + "learning_rate": 1.3657809923424085e-05, + "loss": 0.6741, + "step": 13031 + }, + { + "epoch": 0.3994115483633689, + "grad_norm": 1.6992381096912117, + "learning_rate": 1.365688605843361e-05, + "loss": 0.8004, + "step": 13032 + }, + { + "epoch": 0.3994421968861101, + "grad_norm": 1.479121829977597, + "learning_rate": 1.36559621574109e-05, + "loss": 0.7302, + "step": 13033 + }, + { + "epoch": 0.3994728454088513, + "grad_norm": 1.4366744383545467, + "learning_rate": 1.365503822036506e-05, + "loss": 0.8131, + "step": 13034 + }, + { + "epoch": 0.3995034939315925, + "grad_norm": 1.383457058790853, + "learning_rate": 1.3654114247305191e-05, + "loss": 0.7832, + "step": 13035 + }, + { + "epoch": 0.3995341424543337, + "grad_norm": 1.5186190214989537, + "learning_rate": 1.3653190238240401e-05, + "loss": 0.896, + "step": 13036 + }, + { + "epoch": 0.3995647909770749, + "grad_norm": 1.50061700016369, + "learning_rate": 1.365226619317979e-05, + "loss": 0.8361, + "step": 13037 + }, + { + "epoch": 0.39959543949981613, + "grad_norm": 1.4558731601911856, + "learning_rate": 1.3651342112132474e-05, + "loss": 0.7611, + "step": 13038 + }, + { + "epoch": 0.39962608802255734, + "grad_norm": 1.3456516628639856, + "learning_rate": 1.3650417995107541e-05, + "loss": 0.7324, + "step": 13039 + }, + { + "epoch": 0.39965673654529854, + "grad_norm": 1.5471522898932069, + "learning_rate": 1.3649493842114108e-05, + "loss": 0.7658, + "step": 13040 + }, + { + "epoch": 0.39968738506803975, + "grad_norm": 1.5667999608333643, + "learning_rate": 1.3648569653161278e-05, + "loss": 0.7653, + "step": 13041 + }, + { + "epoch": 0.3997180335907809, + "grad_norm": 1.543095418301913, + "learning_rate": 1.364764542825816e-05, + "loss": 0.7989, + "step": 13042 + }, + { + "epoch": 0.3997486821135221, + "grad_norm": 1.4009723590480252, + "learning_rate": 1.3646721167413856e-05, + "loss": 0.5748, + "step": 13043 + }, + { + "epoch": 0.3997793306362633, + "grad_norm": 0.7087216136097901, + "learning_rate": 1.3645796870637478e-05, + "loss": 0.5788, + "step": 13044 + }, + { + "epoch": 0.3998099791590045, + "grad_norm": 1.4434969392445487, + "learning_rate": 1.3644872537938128e-05, + "loss": 0.7362, + "step": 13045 + }, + { + "epoch": 0.3998406276817457, + "grad_norm": 1.335370169544329, + "learning_rate": 1.3643948169324916e-05, + "loss": 0.6354, + "step": 13046 + }, + { + "epoch": 0.3998712762044869, + "grad_norm": 0.7071489020856248, + "learning_rate": 1.3643023764806954e-05, + "loss": 0.615, + "step": 13047 + }, + { + "epoch": 0.39990192472722813, + "grad_norm": 1.4176582629870262, + "learning_rate": 1.364209932439334e-05, + "loss": 0.7125, + "step": 13048 + }, + { + "epoch": 0.39993257324996934, + "grad_norm": 0.6515732378657043, + "learning_rate": 1.3641174848093197e-05, + "loss": 0.5887, + "step": 13049 + }, + { + "epoch": 0.39996322177271054, + "grad_norm": 1.4724664266369796, + "learning_rate": 1.3640250335915624e-05, + "loss": 0.619, + "step": 13050 + }, + { + "epoch": 0.39999387029545175, + "grad_norm": 1.4641141293792752, + "learning_rate": 1.3639325787869732e-05, + "loss": 0.7403, + "step": 13051 + }, + { + "epoch": 0.40002451881819295, + "grad_norm": 0.6669003797928482, + "learning_rate": 1.3638401203964632e-05, + "loss": 0.5936, + "step": 13052 + }, + { + "epoch": 0.40005516734093416, + "grad_norm": 0.6561627261238651, + "learning_rate": 1.3637476584209437e-05, + "loss": 0.6159, + "step": 13053 + }, + { + "epoch": 0.40008581586367536, + "grad_norm": 0.651059628600613, + "learning_rate": 1.3636551928613249e-05, + "loss": 0.5965, + "step": 13054 + }, + { + "epoch": 0.40011646438641657, + "grad_norm": 0.6454719936272897, + "learning_rate": 1.363562723718519e-05, + "loss": 0.6112, + "step": 13055 + }, + { + "epoch": 0.4001471129091578, + "grad_norm": 1.405958679523046, + "learning_rate": 1.3634702509934358e-05, + "loss": 0.8414, + "step": 13056 + }, + { + "epoch": 0.400177761431899, + "grad_norm": 1.6471192810954858, + "learning_rate": 1.3633777746869878e-05, + "loss": 0.7924, + "step": 13057 + }, + { + "epoch": 0.4002084099546402, + "grad_norm": 1.3377214342273138, + "learning_rate": 1.3632852948000857e-05, + "loss": 0.7574, + "step": 13058 + }, + { + "epoch": 0.4002390584773814, + "grad_norm": 1.5730217656684695, + "learning_rate": 1.3631928113336406e-05, + "loss": 0.7518, + "step": 13059 + }, + { + "epoch": 0.4002697070001226, + "grad_norm": 1.4615095484899154, + "learning_rate": 1.3631003242885634e-05, + "loss": 0.7806, + "step": 13060 + }, + { + "epoch": 0.4003003555228638, + "grad_norm": 1.4034130533876148, + "learning_rate": 1.3630078336657661e-05, + "loss": 0.7696, + "step": 13061 + }, + { + "epoch": 0.400331004045605, + "grad_norm": 1.513716843211708, + "learning_rate": 1.3629153394661598e-05, + "loss": 0.7884, + "step": 13062 + }, + { + "epoch": 0.4003616525683462, + "grad_norm": 1.452397692660045, + "learning_rate": 1.3628228416906555e-05, + "loss": 0.7084, + "step": 13063 + }, + { + "epoch": 0.4003923010910874, + "grad_norm": 1.3242280898110848, + "learning_rate": 1.3627303403401653e-05, + "loss": 0.7053, + "step": 13064 + }, + { + "epoch": 0.4004229496138286, + "grad_norm": 0.8057743967143688, + "learning_rate": 1.3626378354156e-05, + "loss": 0.6083, + "step": 13065 + }, + { + "epoch": 0.40045359813656983, + "grad_norm": 1.5538809868470043, + "learning_rate": 1.3625453269178714e-05, + "loss": 0.725, + "step": 13066 + }, + { + "epoch": 0.40048424665931104, + "grad_norm": 1.43615213993693, + "learning_rate": 1.3624528148478909e-05, + "loss": 0.6951, + "step": 13067 + }, + { + "epoch": 0.40051489518205224, + "grad_norm": 1.7258319654221157, + "learning_rate": 1.3623602992065701e-05, + "loss": 0.7877, + "step": 13068 + }, + { + "epoch": 0.40054554370479345, + "grad_norm": 1.7476038798711624, + "learning_rate": 1.3622677799948205e-05, + "loss": 0.8163, + "step": 13069 + }, + { + "epoch": 0.40057619222753466, + "grad_norm": 1.559289005822116, + "learning_rate": 1.3621752572135539e-05, + "loss": 0.7403, + "step": 13070 + }, + { + "epoch": 0.40060684075027586, + "grad_norm": 1.409272267942442, + "learning_rate": 1.3620827308636818e-05, + "loss": 0.744, + "step": 13071 + }, + { + "epoch": 0.40063748927301707, + "grad_norm": 1.6141259791710434, + "learning_rate": 1.3619902009461157e-05, + "loss": 0.6956, + "step": 13072 + }, + { + "epoch": 0.4006681377957582, + "grad_norm": 1.3020266445813837, + "learning_rate": 1.3618976674617677e-05, + "loss": 0.6386, + "step": 13073 + }, + { + "epoch": 0.4006987863184994, + "grad_norm": 1.5630150366942244, + "learning_rate": 1.3618051304115494e-05, + "loss": 0.7579, + "step": 13074 + }, + { + "epoch": 0.40072943484124063, + "grad_norm": 1.4372334975487975, + "learning_rate": 1.3617125897963726e-05, + "loss": 0.716, + "step": 13075 + }, + { + "epoch": 0.40076008336398183, + "grad_norm": 1.4147558601506207, + "learning_rate": 1.361620045617149e-05, + "loss": 0.8146, + "step": 13076 + }, + { + "epoch": 0.40079073188672304, + "grad_norm": 1.2301661456011757, + "learning_rate": 1.3615274978747908e-05, + "loss": 0.6964, + "step": 13077 + }, + { + "epoch": 0.40082138040946425, + "grad_norm": 1.7075182830453433, + "learning_rate": 1.3614349465702092e-05, + "loss": 0.7392, + "step": 13078 + }, + { + "epoch": 0.40085202893220545, + "grad_norm": 1.2242684403236892, + "learning_rate": 1.3613423917043168e-05, + "loss": 0.6174, + "step": 13079 + }, + { + "epoch": 0.40088267745494666, + "grad_norm": 1.3293766970162073, + "learning_rate": 1.3612498332780258e-05, + "loss": 0.6493, + "step": 13080 + }, + { + "epoch": 0.40091332597768786, + "grad_norm": 1.5656189693670022, + "learning_rate": 1.3611572712922473e-05, + "loss": 0.7674, + "step": 13081 + }, + { + "epoch": 0.40094397450042907, + "grad_norm": 1.2649666721003012, + "learning_rate": 1.361064705747894e-05, + "loss": 0.6841, + "step": 13082 + }, + { + "epoch": 0.4009746230231703, + "grad_norm": 1.3998309477934907, + "learning_rate": 1.3609721366458774e-05, + "loss": 0.5931, + "step": 13083 + }, + { + "epoch": 0.4010052715459115, + "grad_norm": 0.7240998021594486, + "learning_rate": 1.3608795639871105e-05, + "loss": 0.5857, + "step": 13084 + }, + { + "epoch": 0.4010359200686527, + "grad_norm": 1.3287954737533498, + "learning_rate": 1.3607869877725046e-05, + "loss": 0.6186, + "step": 13085 + }, + { + "epoch": 0.4010665685913939, + "grad_norm": 1.408378915828206, + "learning_rate": 1.3606944080029723e-05, + "loss": 0.7446, + "step": 13086 + }, + { + "epoch": 0.4010972171141351, + "grad_norm": 1.6347597487263235, + "learning_rate": 1.3606018246794256e-05, + "loss": 0.7755, + "step": 13087 + }, + { + "epoch": 0.4011278656368763, + "grad_norm": 1.4348374226332217, + "learning_rate": 1.3605092378027771e-05, + "loss": 0.7621, + "step": 13088 + }, + { + "epoch": 0.4011585141596175, + "grad_norm": 1.4681486350196806, + "learning_rate": 1.3604166473739384e-05, + "loss": 0.7125, + "step": 13089 + }, + { + "epoch": 0.4011891626823587, + "grad_norm": 1.505149404280401, + "learning_rate": 1.3603240533938226e-05, + "loss": 0.7006, + "step": 13090 + }, + { + "epoch": 0.4012198112050999, + "grad_norm": 1.6051174326538578, + "learning_rate": 1.3602314558633416e-05, + "loss": 0.7291, + "step": 13091 + }, + { + "epoch": 0.4012504597278411, + "grad_norm": 1.3923080155684573, + "learning_rate": 1.3601388547834082e-05, + "loss": 0.7086, + "step": 13092 + }, + { + "epoch": 0.40128110825058233, + "grad_norm": 1.4132512489834594, + "learning_rate": 1.360046250154934e-05, + "loss": 0.6922, + "step": 13093 + }, + { + "epoch": 0.40131175677332354, + "grad_norm": 1.5681606105299273, + "learning_rate": 1.3599536419788323e-05, + "loss": 0.8123, + "step": 13094 + }, + { + "epoch": 0.40134240529606474, + "grad_norm": 1.434051166619825, + "learning_rate": 1.3598610302560151e-05, + "loss": 0.7427, + "step": 13095 + }, + { + "epoch": 0.40137305381880595, + "grad_norm": 1.4113857105286736, + "learning_rate": 1.3597684149873955e-05, + "loss": 0.6782, + "step": 13096 + }, + { + "epoch": 0.40140370234154715, + "grad_norm": 1.5080242273387565, + "learning_rate": 1.3596757961738851e-05, + "loss": 0.6944, + "step": 13097 + }, + { + "epoch": 0.40143435086428836, + "grad_norm": 1.4327165942557487, + "learning_rate": 1.3595831738163973e-05, + "loss": 0.7246, + "step": 13098 + }, + { + "epoch": 0.40146499938702956, + "grad_norm": 1.5255541371704817, + "learning_rate": 1.3594905479158445e-05, + "loss": 0.7613, + "step": 13099 + }, + { + "epoch": 0.40149564790977077, + "grad_norm": 0.6879768993928933, + "learning_rate": 1.3593979184731394e-05, + "loss": 0.5712, + "step": 13100 + }, + { + "epoch": 0.401526296432512, + "grad_norm": 1.4349694624512084, + "learning_rate": 1.3593052854891947e-05, + "loss": 0.7606, + "step": 13101 + }, + { + "epoch": 0.4015569449552532, + "grad_norm": 1.593980741116199, + "learning_rate": 1.3592126489649226e-05, + "loss": 0.808, + "step": 13102 + }, + { + "epoch": 0.4015875934779944, + "grad_norm": 1.964613909462332, + "learning_rate": 1.3591200089012371e-05, + "loss": 0.8305, + "step": 13103 + }, + { + "epoch": 0.40161824200073554, + "grad_norm": 1.3382229292474657, + "learning_rate": 1.3590273652990498e-05, + "loss": 0.6642, + "step": 13104 + }, + { + "epoch": 0.40164889052347674, + "grad_norm": 1.7077361728620268, + "learning_rate": 1.3589347181592743e-05, + "loss": 0.7738, + "step": 13105 + }, + { + "epoch": 0.40167953904621795, + "grad_norm": 1.4430003871456574, + "learning_rate": 1.3588420674828227e-05, + "loss": 0.7459, + "step": 13106 + }, + { + "epoch": 0.40171018756895915, + "grad_norm": 1.4051348697119057, + "learning_rate": 1.3587494132706089e-05, + "loss": 0.75, + "step": 13107 + }, + { + "epoch": 0.40174083609170036, + "grad_norm": 1.3476043330724397, + "learning_rate": 1.358656755523545e-05, + "loss": 0.7752, + "step": 13108 + }, + { + "epoch": 0.40177148461444157, + "grad_norm": 1.5105694906226885, + "learning_rate": 1.3585640942425447e-05, + "loss": 0.7685, + "step": 13109 + }, + { + "epoch": 0.40180213313718277, + "grad_norm": 1.3889219472571646, + "learning_rate": 1.3584714294285201e-05, + "loss": 0.729, + "step": 13110 + }, + { + "epoch": 0.401832781659924, + "grad_norm": 1.640165392660307, + "learning_rate": 1.3583787610823856e-05, + "loss": 0.7961, + "step": 13111 + }, + { + "epoch": 0.4018634301826652, + "grad_norm": 1.3365708895196635, + "learning_rate": 1.3582860892050526e-05, + "loss": 0.6458, + "step": 13112 + }, + { + "epoch": 0.4018940787054064, + "grad_norm": 1.589758943350629, + "learning_rate": 1.3581934137974355e-05, + "loss": 0.8095, + "step": 13113 + }, + { + "epoch": 0.4019247272281476, + "grad_norm": 1.4438998617884833, + "learning_rate": 1.358100734860447e-05, + "loss": 0.7099, + "step": 13114 + }, + { + "epoch": 0.4019553757508888, + "grad_norm": 0.6841107516039308, + "learning_rate": 1.3580080523950003e-05, + "loss": 0.6079, + "step": 13115 + }, + { + "epoch": 0.40198602427363, + "grad_norm": 1.7386735061975875, + "learning_rate": 1.3579153664020088e-05, + "loss": 0.7058, + "step": 13116 + }, + { + "epoch": 0.4020166727963712, + "grad_norm": 1.707976678670643, + "learning_rate": 1.3578226768823855e-05, + "loss": 0.7406, + "step": 13117 + }, + { + "epoch": 0.4020473213191124, + "grad_norm": 0.6650669960226769, + "learning_rate": 1.3577299838370436e-05, + "loss": 0.5983, + "step": 13118 + }, + { + "epoch": 0.4020779698418536, + "grad_norm": 0.6764972675654096, + "learning_rate": 1.357637287266897e-05, + "loss": 0.6131, + "step": 13119 + }, + { + "epoch": 0.4021086183645948, + "grad_norm": 1.4240792070477695, + "learning_rate": 1.3575445871728588e-05, + "loss": 0.7025, + "step": 13120 + }, + { + "epoch": 0.40213926688733603, + "grad_norm": 1.4486339237333568, + "learning_rate": 1.3574518835558418e-05, + "loss": 0.7654, + "step": 13121 + }, + { + "epoch": 0.40216991541007724, + "grad_norm": 1.381506339032036, + "learning_rate": 1.3573591764167603e-05, + "loss": 0.7105, + "step": 13122 + }, + { + "epoch": 0.40220056393281844, + "grad_norm": 0.6397443124401722, + "learning_rate": 1.3572664657565273e-05, + "loss": 0.5906, + "step": 13123 + }, + { + "epoch": 0.40223121245555965, + "grad_norm": 0.6514202551428309, + "learning_rate": 1.3571737515760566e-05, + "loss": 0.6059, + "step": 13124 + }, + { + "epoch": 0.40226186097830086, + "grad_norm": 0.6601574612057569, + "learning_rate": 1.357081033876261e-05, + "loss": 0.5749, + "step": 13125 + }, + { + "epoch": 0.40229250950104206, + "grad_norm": 1.4055966914159348, + "learning_rate": 1.3569883126580552e-05, + "loss": 0.7188, + "step": 13126 + }, + { + "epoch": 0.40232315802378327, + "grad_norm": 1.3093500771896713, + "learning_rate": 1.356895587922352e-05, + "loss": 0.7654, + "step": 13127 + }, + { + "epoch": 0.4023538065465245, + "grad_norm": 1.3673161574557533, + "learning_rate": 1.3568028596700652e-05, + "loss": 0.7483, + "step": 13128 + }, + { + "epoch": 0.4023844550692657, + "grad_norm": 1.4227543000074465, + "learning_rate": 1.3567101279021085e-05, + "loss": 0.8117, + "step": 13129 + }, + { + "epoch": 0.4024151035920069, + "grad_norm": 0.6569106955899723, + "learning_rate": 1.3566173926193958e-05, + "loss": 0.5873, + "step": 13130 + }, + { + "epoch": 0.4024457521147481, + "grad_norm": 1.5206510536605145, + "learning_rate": 1.3565246538228405e-05, + "loss": 0.7314, + "step": 13131 + }, + { + "epoch": 0.4024764006374893, + "grad_norm": 1.383129597656207, + "learning_rate": 1.3564319115133567e-05, + "loss": 0.7076, + "step": 13132 + }, + { + "epoch": 0.4025070491602305, + "grad_norm": 1.5423526111273123, + "learning_rate": 1.3563391656918579e-05, + "loss": 0.7646, + "step": 13133 + }, + { + "epoch": 0.4025376976829717, + "grad_norm": 1.259338370322284, + "learning_rate": 1.3562464163592583e-05, + "loss": 0.7484, + "step": 13134 + }, + { + "epoch": 0.40256834620571286, + "grad_norm": 1.5804163264332127, + "learning_rate": 1.3561536635164715e-05, + "loss": 0.7768, + "step": 13135 + }, + { + "epoch": 0.40259899472845406, + "grad_norm": 1.4034658231165849, + "learning_rate": 1.3560609071644115e-05, + "loss": 0.7302, + "step": 13136 + }, + { + "epoch": 0.40262964325119527, + "grad_norm": 0.692579030639319, + "learning_rate": 1.3559681473039925e-05, + "loss": 0.5878, + "step": 13137 + }, + { + "epoch": 0.4026602917739365, + "grad_norm": 1.5536197756305297, + "learning_rate": 1.3558753839361283e-05, + "loss": 0.706, + "step": 13138 + }, + { + "epoch": 0.4026909402966777, + "grad_norm": 0.6643832427703199, + "learning_rate": 1.3557826170617327e-05, + "loss": 0.6033, + "step": 13139 + }, + { + "epoch": 0.4027215888194189, + "grad_norm": 1.3691829563131785, + "learning_rate": 1.35568984668172e-05, + "loss": 0.6706, + "step": 13140 + }, + { + "epoch": 0.4027522373421601, + "grad_norm": 1.4745325356875256, + "learning_rate": 1.355597072797004e-05, + "loss": 0.8001, + "step": 13141 + }, + { + "epoch": 0.4027828858649013, + "grad_norm": 0.6998991385335548, + "learning_rate": 1.3555042954084994e-05, + "loss": 0.6178, + "step": 13142 + }, + { + "epoch": 0.4028135343876425, + "grad_norm": 1.397016001210422, + "learning_rate": 1.3554115145171199e-05, + "loss": 0.627, + "step": 13143 + }, + { + "epoch": 0.4028441829103837, + "grad_norm": 1.4799808382221604, + "learning_rate": 1.3553187301237798e-05, + "loss": 0.8307, + "step": 13144 + }, + { + "epoch": 0.4028748314331249, + "grad_norm": 1.325274246613634, + "learning_rate": 1.3552259422293933e-05, + "loss": 0.7818, + "step": 13145 + }, + { + "epoch": 0.4029054799558661, + "grad_norm": 1.5942721433808211, + "learning_rate": 1.3551331508348748e-05, + "loss": 0.7794, + "step": 13146 + }, + { + "epoch": 0.4029361284786073, + "grad_norm": 1.46195811415092, + "learning_rate": 1.3550403559411383e-05, + "loss": 0.807, + "step": 13147 + }, + { + "epoch": 0.40296677700134853, + "grad_norm": 1.4629178573892643, + "learning_rate": 1.3549475575490984e-05, + "loss": 0.6936, + "step": 13148 + }, + { + "epoch": 0.40299742552408974, + "grad_norm": 1.531962122526083, + "learning_rate": 1.3548547556596694e-05, + "loss": 0.8416, + "step": 13149 + }, + { + "epoch": 0.40302807404683094, + "grad_norm": 1.4215186265055455, + "learning_rate": 1.3547619502737662e-05, + "loss": 0.6826, + "step": 13150 + }, + { + "epoch": 0.40305872256957215, + "grad_norm": 0.6890418742591143, + "learning_rate": 1.3546691413923018e-05, + "loss": 0.5998, + "step": 13151 + }, + { + "epoch": 0.40308937109231335, + "grad_norm": 1.3051599681413626, + "learning_rate": 1.3545763290161924e-05, + "loss": 0.6739, + "step": 13152 + }, + { + "epoch": 0.40312001961505456, + "grad_norm": 1.430973543242342, + "learning_rate": 1.3544835131463512e-05, + "loss": 0.8172, + "step": 13153 + }, + { + "epoch": 0.40315066813779576, + "grad_norm": 1.3684752249890988, + "learning_rate": 1.3543906937836935e-05, + "loss": 0.6549, + "step": 13154 + }, + { + "epoch": 0.40318131666053697, + "grad_norm": 1.3394369997234719, + "learning_rate": 1.3542978709291336e-05, + "loss": 0.7423, + "step": 13155 + }, + { + "epoch": 0.4032119651832782, + "grad_norm": 1.2782661391219035, + "learning_rate": 1.3542050445835861e-05, + "loss": 0.7258, + "step": 13156 + }, + { + "epoch": 0.4032426137060194, + "grad_norm": 1.370577000165588, + "learning_rate": 1.354112214747966e-05, + "loss": 0.7717, + "step": 13157 + }, + { + "epoch": 0.4032732622287606, + "grad_norm": 1.5419180093833385, + "learning_rate": 1.354019381423187e-05, + "loss": 0.7502, + "step": 13158 + }, + { + "epoch": 0.4033039107515018, + "grad_norm": 1.422404948636196, + "learning_rate": 1.3539265446101649e-05, + "loss": 0.7371, + "step": 13159 + }, + { + "epoch": 0.403334559274243, + "grad_norm": 1.5864658981246376, + "learning_rate": 1.3538337043098138e-05, + "loss": 0.7652, + "step": 13160 + }, + { + "epoch": 0.4033652077969842, + "grad_norm": 1.3794332259063826, + "learning_rate": 1.353740860523049e-05, + "loss": 0.7179, + "step": 13161 + }, + { + "epoch": 0.4033958563197254, + "grad_norm": 1.4841227043848635, + "learning_rate": 1.3536480132507846e-05, + "loss": 0.7311, + "step": 13162 + }, + { + "epoch": 0.4034265048424666, + "grad_norm": 1.4866263362508936, + "learning_rate": 1.3535551624939357e-05, + "loss": 0.8891, + "step": 13163 + }, + { + "epoch": 0.4034571533652078, + "grad_norm": 1.4661795081054756, + "learning_rate": 1.3534623082534178e-05, + "loss": 0.7612, + "step": 13164 + }, + { + "epoch": 0.403487801887949, + "grad_norm": 1.4142439970635683, + "learning_rate": 1.3533694505301453e-05, + "loss": 0.8139, + "step": 13165 + }, + { + "epoch": 0.4035184504106902, + "grad_norm": 0.6734175410969127, + "learning_rate": 1.3532765893250329e-05, + "loss": 0.5981, + "step": 13166 + }, + { + "epoch": 0.4035490989334314, + "grad_norm": 1.4097447091709805, + "learning_rate": 1.353183724638996e-05, + "loss": 0.7045, + "step": 13167 + }, + { + "epoch": 0.4035797474561726, + "grad_norm": 0.6818733579031662, + "learning_rate": 1.3530908564729495e-05, + "loss": 0.6381, + "step": 13168 + }, + { + "epoch": 0.4036103959789138, + "grad_norm": 1.3314970320011186, + "learning_rate": 1.3529979848278086e-05, + "loss": 0.5815, + "step": 13169 + }, + { + "epoch": 0.403641044501655, + "grad_norm": 1.372312617626129, + "learning_rate": 1.3529051097044881e-05, + "loss": 0.714, + "step": 13170 + }, + { + "epoch": 0.4036716930243962, + "grad_norm": 1.4644583788973442, + "learning_rate": 1.352812231103903e-05, + "loss": 0.6795, + "step": 13171 + }, + { + "epoch": 0.4037023415471374, + "grad_norm": 1.6180933665078938, + "learning_rate": 1.352719349026969e-05, + "loss": 0.7312, + "step": 13172 + }, + { + "epoch": 0.4037329900698786, + "grad_norm": 1.5123637598382709, + "learning_rate": 1.3526264634746009e-05, + "loss": 0.7634, + "step": 13173 + }, + { + "epoch": 0.4037636385926198, + "grad_norm": 1.3718734845041585, + "learning_rate": 1.3525335744477144e-05, + "loss": 0.6612, + "step": 13174 + }, + { + "epoch": 0.40379428711536103, + "grad_norm": 1.585356967579665, + "learning_rate": 1.3524406819472239e-05, + "loss": 0.7757, + "step": 13175 + }, + { + "epoch": 0.40382493563810223, + "grad_norm": 1.4034359464795976, + "learning_rate": 1.3523477859740455e-05, + "loss": 0.6666, + "step": 13176 + }, + { + "epoch": 0.40385558416084344, + "grad_norm": 1.4406585325108747, + "learning_rate": 1.3522548865290942e-05, + "loss": 0.7237, + "step": 13177 + }, + { + "epoch": 0.40388623268358465, + "grad_norm": 0.7426021132459018, + "learning_rate": 1.3521619836132851e-05, + "loss": 0.6114, + "step": 13178 + }, + { + "epoch": 0.40391688120632585, + "grad_norm": 1.608880359139165, + "learning_rate": 1.352069077227534e-05, + "loss": 0.7383, + "step": 13179 + }, + { + "epoch": 0.40394752972906706, + "grad_norm": 1.5084745379095408, + "learning_rate": 1.3519761673727562e-05, + "loss": 0.7752, + "step": 13180 + }, + { + "epoch": 0.40397817825180826, + "grad_norm": 1.665978333931471, + "learning_rate": 1.3518832540498674e-05, + "loss": 0.7755, + "step": 13181 + }, + { + "epoch": 0.40400882677454947, + "grad_norm": 1.3851045639746093, + "learning_rate": 1.3517903372597826e-05, + "loss": 0.7282, + "step": 13182 + }, + { + "epoch": 0.4040394752972907, + "grad_norm": 1.3883195078845276, + "learning_rate": 1.3516974170034177e-05, + "loss": 0.694, + "step": 13183 + }, + { + "epoch": 0.4040701238200319, + "grad_norm": 1.460462114998801, + "learning_rate": 1.3516044932816881e-05, + "loss": 0.743, + "step": 13184 + }, + { + "epoch": 0.4041007723427731, + "grad_norm": 1.3560801894771852, + "learning_rate": 1.3515115660955096e-05, + "loss": 0.7443, + "step": 13185 + }, + { + "epoch": 0.4041314208655143, + "grad_norm": 1.43886265956772, + "learning_rate": 1.3514186354457974e-05, + "loss": 0.771, + "step": 13186 + }, + { + "epoch": 0.4041620693882555, + "grad_norm": 1.6022575616741852, + "learning_rate": 1.3513257013334678e-05, + "loss": 0.7674, + "step": 13187 + }, + { + "epoch": 0.4041927179109967, + "grad_norm": 1.4901862503711776, + "learning_rate": 1.3512327637594361e-05, + "loss": 0.7799, + "step": 13188 + }, + { + "epoch": 0.4042233664337379, + "grad_norm": 1.4417280247093063, + "learning_rate": 1.3511398227246182e-05, + "loss": 0.6996, + "step": 13189 + }, + { + "epoch": 0.4042540149564791, + "grad_norm": 1.48431987676219, + "learning_rate": 1.3510468782299292e-05, + "loss": 0.7754, + "step": 13190 + }, + { + "epoch": 0.4042846634792203, + "grad_norm": 1.4417261199328906, + "learning_rate": 1.3509539302762862e-05, + "loss": 0.6872, + "step": 13191 + }, + { + "epoch": 0.4043153120019615, + "grad_norm": 1.5339015883402327, + "learning_rate": 1.350860978864604e-05, + "loss": 0.7507, + "step": 13192 + }, + { + "epoch": 0.40434596052470273, + "grad_norm": 1.4122792876628039, + "learning_rate": 1.350768023995799e-05, + "loss": 0.7927, + "step": 13193 + }, + { + "epoch": 0.40437660904744394, + "grad_norm": 1.5011126861861785, + "learning_rate": 1.3506750656707865e-05, + "loss": 0.7117, + "step": 13194 + }, + { + "epoch": 0.40440725757018514, + "grad_norm": 1.435086050238983, + "learning_rate": 1.3505821038904834e-05, + "loss": 0.8575, + "step": 13195 + }, + { + "epoch": 0.40443790609292635, + "grad_norm": 1.466485716455636, + "learning_rate": 1.3504891386558048e-05, + "loss": 0.8121, + "step": 13196 + }, + { + "epoch": 0.4044685546156675, + "grad_norm": 1.4597751106842667, + "learning_rate": 1.3503961699676672e-05, + "loss": 0.782, + "step": 13197 + }, + { + "epoch": 0.4044992031384087, + "grad_norm": 1.3746656434127118, + "learning_rate": 1.3503031978269863e-05, + "loss": 0.6628, + "step": 13198 + }, + { + "epoch": 0.4045298516611499, + "grad_norm": 1.5076825751626841, + "learning_rate": 1.3502102222346782e-05, + "loss": 0.7218, + "step": 13199 + }, + { + "epoch": 0.4045605001838911, + "grad_norm": 1.5868392901104582, + "learning_rate": 1.3501172431916598e-05, + "loss": 0.6987, + "step": 13200 + }, + { + "epoch": 0.4045911487066323, + "grad_norm": 1.6571626214465374, + "learning_rate": 1.350024260698846e-05, + "loss": 0.719, + "step": 13201 + }, + { + "epoch": 0.4046217972293735, + "grad_norm": 1.5376835233124713, + "learning_rate": 1.3499312747571537e-05, + "loss": 0.8167, + "step": 13202 + }, + { + "epoch": 0.40465244575211473, + "grad_norm": 1.5838873973236378, + "learning_rate": 1.3498382853674992e-05, + "loss": 0.6753, + "step": 13203 + }, + { + "epoch": 0.40468309427485594, + "grad_norm": 1.319523378424134, + "learning_rate": 1.3497452925307988e-05, + "loss": 0.6569, + "step": 13204 + }, + { + "epoch": 0.40471374279759714, + "grad_norm": 1.389435550106838, + "learning_rate": 1.3496522962479681e-05, + "loss": 0.7238, + "step": 13205 + }, + { + "epoch": 0.40474439132033835, + "grad_norm": 1.5227660519175246, + "learning_rate": 1.3495592965199242e-05, + "loss": 0.6564, + "step": 13206 + }, + { + "epoch": 0.40477503984307955, + "grad_norm": 1.6604196992953277, + "learning_rate": 1.3494662933475828e-05, + "loss": 0.698, + "step": 13207 + }, + { + "epoch": 0.40480568836582076, + "grad_norm": 1.5923647319614496, + "learning_rate": 1.3493732867318609e-05, + "loss": 0.6368, + "step": 13208 + }, + { + "epoch": 0.40483633688856197, + "grad_norm": 1.3439817658191864, + "learning_rate": 1.3492802766736744e-05, + "loss": 0.7287, + "step": 13209 + }, + { + "epoch": 0.40486698541130317, + "grad_norm": 1.4381195121697312, + "learning_rate": 1.34918726317394e-05, + "loss": 0.7469, + "step": 13210 + }, + { + "epoch": 0.4048976339340444, + "grad_norm": 1.6094473007706596, + "learning_rate": 1.3490942462335743e-05, + "loss": 0.6828, + "step": 13211 + }, + { + "epoch": 0.4049282824567856, + "grad_norm": 1.3788108157701673, + "learning_rate": 1.3490012258534934e-05, + "loss": 0.7764, + "step": 13212 + }, + { + "epoch": 0.4049589309795268, + "grad_norm": 0.7073187614601333, + "learning_rate": 1.3489082020346143e-05, + "loss": 0.6062, + "step": 13213 + }, + { + "epoch": 0.404989579502268, + "grad_norm": 1.5611673358740068, + "learning_rate": 1.3488151747778533e-05, + "loss": 0.7966, + "step": 13214 + }, + { + "epoch": 0.4050202280250092, + "grad_norm": 1.5480784283944167, + "learning_rate": 1.3487221440841273e-05, + "loss": 0.8153, + "step": 13215 + }, + { + "epoch": 0.4050508765477504, + "grad_norm": 0.7087046533932387, + "learning_rate": 1.3486291099543527e-05, + "loss": 0.6013, + "step": 13216 + }, + { + "epoch": 0.4050815250704916, + "grad_norm": 1.5033022963488276, + "learning_rate": 1.3485360723894462e-05, + "loss": 0.6617, + "step": 13217 + }, + { + "epoch": 0.4051121735932328, + "grad_norm": 1.3631782669944148, + "learning_rate": 1.3484430313903247e-05, + "loss": 0.6344, + "step": 13218 + }, + { + "epoch": 0.405142822115974, + "grad_norm": 1.389396280945693, + "learning_rate": 1.3483499869579051e-05, + "loss": 0.7564, + "step": 13219 + }, + { + "epoch": 0.4051734706387152, + "grad_norm": 1.6095491973653238, + "learning_rate": 1.3482569390931035e-05, + "loss": 0.7477, + "step": 13220 + }, + { + "epoch": 0.40520411916145643, + "grad_norm": 1.3595288863229587, + "learning_rate": 1.3481638877968375e-05, + "loss": 0.7122, + "step": 13221 + }, + { + "epoch": 0.40523476768419764, + "grad_norm": 1.275594374635712, + "learning_rate": 1.3480708330700234e-05, + "loss": 0.7727, + "step": 13222 + }, + { + "epoch": 0.40526541620693884, + "grad_norm": 1.5877126138614233, + "learning_rate": 1.3479777749135786e-05, + "loss": 0.7027, + "step": 13223 + }, + { + "epoch": 0.40529606472968005, + "grad_norm": 1.5059994728118078, + "learning_rate": 1.3478847133284196e-05, + "loss": 0.7719, + "step": 13224 + }, + { + "epoch": 0.40532671325242126, + "grad_norm": 1.4009982563262164, + "learning_rate": 1.3477916483154634e-05, + "loss": 0.7132, + "step": 13225 + }, + { + "epoch": 0.40535736177516246, + "grad_norm": 1.6394177197867128, + "learning_rate": 1.3476985798756273e-05, + "loss": 0.8276, + "step": 13226 + }, + { + "epoch": 0.40538801029790367, + "grad_norm": 1.5533852212089165, + "learning_rate": 1.347605508009828e-05, + "loss": 0.7026, + "step": 13227 + }, + { + "epoch": 0.4054186588206448, + "grad_norm": 1.428076449197132, + "learning_rate": 1.3475124327189828e-05, + "loss": 0.6933, + "step": 13228 + }, + { + "epoch": 0.405449307343386, + "grad_norm": 1.4547451310939399, + "learning_rate": 1.3474193540040084e-05, + "loss": 0.6309, + "step": 13229 + }, + { + "epoch": 0.40547995586612723, + "grad_norm": 1.7200330080101773, + "learning_rate": 1.3473262718658224e-05, + "loss": 0.732, + "step": 13230 + }, + { + "epoch": 0.40551060438886843, + "grad_norm": 1.4453381561654133, + "learning_rate": 1.3472331863053421e-05, + "loss": 0.8122, + "step": 13231 + }, + { + "epoch": 0.40554125291160964, + "grad_norm": 0.8076128907047812, + "learning_rate": 1.347140097323484e-05, + "loss": 0.6048, + "step": 13232 + }, + { + "epoch": 0.40557190143435085, + "grad_norm": 0.7589041923940998, + "learning_rate": 1.3470470049211658e-05, + "loss": 0.5955, + "step": 13233 + }, + { + "epoch": 0.40560254995709205, + "grad_norm": 1.398275635138006, + "learning_rate": 1.3469539090993047e-05, + "loss": 0.7025, + "step": 13234 + }, + { + "epoch": 0.40563319847983326, + "grad_norm": 1.5797084879258005, + "learning_rate": 1.3468608098588178e-05, + "loss": 0.7764, + "step": 13235 + }, + { + "epoch": 0.40566384700257446, + "grad_norm": 1.521811417093803, + "learning_rate": 1.3467677072006227e-05, + "loss": 0.731, + "step": 13236 + }, + { + "epoch": 0.40569449552531567, + "grad_norm": 1.36476782402823, + "learning_rate": 1.3466746011256365e-05, + "loss": 0.618, + "step": 13237 + }, + { + "epoch": 0.4057251440480569, + "grad_norm": 1.29308701332485, + "learning_rate": 1.346581491634777e-05, + "loss": 0.6662, + "step": 13238 + }, + { + "epoch": 0.4057557925707981, + "grad_norm": 1.5161247854862134, + "learning_rate": 1.3464883787289613e-05, + "loss": 0.7576, + "step": 13239 + }, + { + "epoch": 0.4057864410935393, + "grad_norm": 1.548085359170413, + "learning_rate": 1.3463952624091067e-05, + "loss": 0.8202, + "step": 13240 + }, + { + "epoch": 0.4058170896162805, + "grad_norm": 1.4560703378100706, + "learning_rate": 1.3463021426761313e-05, + "loss": 0.7468, + "step": 13241 + }, + { + "epoch": 0.4058477381390217, + "grad_norm": 1.3194508700842196, + "learning_rate": 1.3462090195309523e-05, + "loss": 0.7927, + "step": 13242 + }, + { + "epoch": 0.4058783866617629, + "grad_norm": 1.72449796853935, + "learning_rate": 1.346115892974487e-05, + "loss": 0.7597, + "step": 13243 + }, + { + "epoch": 0.4059090351845041, + "grad_norm": 1.5107658663158479, + "learning_rate": 1.3460227630076533e-05, + "loss": 0.7549, + "step": 13244 + }, + { + "epoch": 0.4059396837072453, + "grad_norm": 1.1953734444726698, + "learning_rate": 1.3459296296313688e-05, + "loss": 0.6006, + "step": 13245 + }, + { + "epoch": 0.4059703322299865, + "grad_norm": 1.3618470480340232, + "learning_rate": 1.3458364928465515e-05, + "loss": 0.6429, + "step": 13246 + }, + { + "epoch": 0.4060009807527277, + "grad_norm": 0.9538687516971752, + "learning_rate": 1.3457433526541184e-05, + "loss": 0.6299, + "step": 13247 + }, + { + "epoch": 0.40603162927546893, + "grad_norm": 1.4871556265762913, + "learning_rate": 1.3456502090549875e-05, + "loss": 0.7777, + "step": 13248 + }, + { + "epoch": 0.40606227779821014, + "grad_norm": 0.7567247500023389, + "learning_rate": 1.3455570620500769e-05, + "loss": 0.601, + "step": 13249 + }, + { + "epoch": 0.40609292632095134, + "grad_norm": 1.4399372576805622, + "learning_rate": 1.345463911640304e-05, + "loss": 0.723, + "step": 13250 + }, + { + "epoch": 0.40612357484369255, + "grad_norm": 1.3925499103163652, + "learning_rate": 1.3453707578265866e-05, + "loss": 0.7097, + "step": 13251 + }, + { + "epoch": 0.40615422336643375, + "grad_norm": 1.4393245727595037, + "learning_rate": 1.3452776006098431e-05, + "loss": 0.7597, + "step": 13252 + }, + { + "epoch": 0.40618487188917496, + "grad_norm": 1.3783282689517482, + "learning_rate": 1.345184439990991e-05, + "loss": 0.6168, + "step": 13253 + }, + { + "epoch": 0.40621552041191616, + "grad_norm": 1.590091533811243, + "learning_rate": 1.3450912759709485e-05, + "loss": 0.8071, + "step": 13254 + }, + { + "epoch": 0.40624616893465737, + "grad_norm": 1.4734969579528139, + "learning_rate": 1.3449981085506327e-05, + "loss": 0.7267, + "step": 13255 + }, + { + "epoch": 0.4062768174573986, + "grad_norm": 1.4654067190859943, + "learning_rate": 1.3449049377309629e-05, + "loss": 0.7177, + "step": 13256 + }, + { + "epoch": 0.4063074659801398, + "grad_norm": 0.9281144922578618, + "learning_rate": 1.344811763512856e-05, + "loss": 0.6122, + "step": 13257 + }, + { + "epoch": 0.406338114502881, + "grad_norm": 0.8568649762305457, + "learning_rate": 1.3447185858972312e-05, + "loss": 0.6183, + "step": 13258 + }, + { + "epoch": 0.4063687630256222, + "grad_norm": 1.6192416427090497, + "learning_rate": 1.3446254048850053e-05, + "loss": 0.727, + "step": 13259 + }, + { + "epoch": 0.40639941154836334, + "grad_norm": 1.4844400896026053, + "learning_rate": 1.3445322204770976e-05, + "loss": 0.7758, + "step": 13260 + }, + { + "epoch": 0.40643006007110455, + "grad_norm": 0.6929899777065353, + "learning_rate": 1.3444390326744256e-05, + "loss": 0.6031, + "step": 13261 + }, + { + "epoch": 0.40646070859384575, + "grad_norm": 1.3320200300701044, + "learning_rate": 1.3443458414779076e-05, + "loss": 0.7134, + "step": 13262 + }, + { + "epoch": 0.40649135711658696, + "grad_norm": 1.4585301735170115, + "learning_rate": 1.344252646888462e-05, + "loss": 0.6653, + "step": 13263 + }, + { + "epoch": 0.40652200563932817, + "grad_norm": 1.4596788583192382, + "learning_rate": 1.3441594489070072e-05, + "loss": 0.6985, + "step": 13264 + }, + { + "epoch": 0.40655265416206937, + "grad_norm": 1.394119304909859, + "learning_rate": 1.344066247534461e-05, + "loss": 0.7477, + "step": 13265 + }, + { + "epoch": 0.4065833026848106, + "grad_norm": 1.6269078972635256, + "learning_rate": 1.343973042771742e-05, + "loss": 0.7976, + "step": 13266 + }, + { + "epoch": 0.4066139512075518, + "grad_norm": 1.6682764883513648, + "learning_rate": 1.3438798346197687e-05, + "loss": 0.7957, + "step": 13267 + }, + { + "epoch": 0.406644599730293, + "grad_norm": 1.6005062011950792, + "learning_rate": 1.3437866230794592e-05, + "loss": 0.7713, + "step": 13268 + }, + { + "epoch": 0.4066752482530342, + "grad_norm": 1.4048692234860094, + "learning_rate": 1.3436934081517326e-05, + "loss": 0.6826, + "step": 13269 + }, + { + "epoch": 0.4067058967757754, + "grad_norm": 1.3960076038548532, + "learning_rate": 1.3436001898375066e-05, + "loss": 0.7411, + "step": 13270 + }, + { + "epoch": 0.4067365452985166, + "grad_norm": 1.5227075154478353, + "learning_rate": 1.3435069681376999e-05, + "loss": 0.8001, + "step": 13271 + }, + { + "epoch": 0.4067671938212578, + "grad_norm": 1.768224521566013, + "learning_rate": 1.3434137430532314e-05, + "loss": 0.7592, + "step": 13272 + }, + { + "epoch": 0.406797842343999, + "grad_norm": 1.3131178816825153, + "learning_rate": 1.3433205145850197e-05, + "loss": 0.7022, + "step": 13273 + }, + { + "epoch": 0.4068284908667402, + "grad_norm": 1.40097206079956, + "learning_rate": 1.3432272827339824e-05, + "loss": 0.7894, + "step": 13274 + }, + { + "epoch": 0.40685913938948143, + "grad_norm": 1.4373431954908689, + "learning_rate": 1.3431340475010396e-05, + "loss": 0.7094, + "step": 13275 + }, + { + "epoch": 0.40688978791222263, + "grad_norm": 1.385939324751185, + "learning_rate": 1.3430408088871086e-05, + "loss": 0.7512, + "step": 13276 + }, + { + "epoch": 0.40692043643496384, + "grad_norm": 1.5726818656568426, + "learning_rate": 1.3429475668931091e-05, + "loss": 0.7496, + "step": 13277 + }, + { + "epoch": 0.40695108495770504, + "grad_norm": 1.5464767303322533, + "learning_rate": 1.3428543215199594e-05, + "loss": 0.7909, + "step": 13278 + }, + { + "epoch": 0.40698173348044625, + "grad_norm": 1.5681517584129139, + "learning_rate": 1.3427610727685785e-05, + "loss": 0.7279, + "step": 13279 + }, + { + "epoch": 0.40701238200318746, + "grad_norm": 1.5595341232682205, + "learning_rate": 1.3426678206398847e-05, + "loss": 0.8044, + "step": 13280 + }, + { + "epoch": 0.40704303052592866, + "grad_norm": 1.5997893805036107, + "learning_rate": 1.3425745651347974e-05, + "loss": 0.8026, + "step": 13281 + }, + { + "epoch": 0.40707367904866987, + "grad_norm": 1.299585602348517, + "learning_rate": 1.3424813062542353e-05, + "loss": 0.6507, + "step": 13282 + }, + { + "epoch": 0.4071043275714111, + "grad_norm": 1.476226939736359, + "learning_rate": 1.342388043999117e-05, + "loss": 0.7454, + "step": 13283 + }, + { + "epoch": 0.4071349760941523, + "grad_norm": 1.0182482730279443, + "learning_rate": 1.3422947783703624e-05, + "loss": 0.5942, + "step": 13284 + }, + { + "epoch": 0.4071656246168935, + "grad_norm": 1.4249988876525297, + "learning_rate": 1.3422015093688889e-05, + "loss": 0.7733, + "step": 13285 + }, + { + "epoch": 0.4071962731396347, + "grad_norm": 1.2424185516599713, + "learning_rate": 1.3421082369956168e-05, + "loss": 0.6609, + "step": 13286 + }, + { + "epoch": 0.4072269216623759, + "grad_norm": 1.3158121834575938, + "learning_rate": 1.3420149612514645e-05, + "loss": 0.686, + "step": 13287 + }, + { + "epoch": 0.4072575701851171, + "grad_norm": 1.5492882459531259, + "learning_rate": 1.3419216821373518e-05, + "loss": 0.7816, + "step": 13288 + }, + { + "epoch": 0.4072882187078583, + "grad_norm": 1.3916516599758968, + "learning_rate": 1.3418283996541967e-05, + "loss": 0.668, + "step": 13289 + }, + { + "epoch": 0.4073188672305995, + "grad_norm": 1.5658713639156698, + "learning_rate": 1.3417351138029193e-05, + "loss": 0.6412, + "step": 13290 + }, + { + "epoch": 0.40734951575334066, + "grad_norm": 1.278398385456231, + "learning_rate": 1.3416418245844381e-05, + "loss": 0.6808, + "step": 13291 + }, + { + "epoch": 0.40738016427608187, + "grad_norm": 1.604893919710233, + "learning_rate": 1.3415485319996728e-05, + "loss": 0.8608, + "step": 13292 + }, + { + "epoch": 0.4074108127988231, + "grad_norm": 1.5379473634639051, + "learning_rate": 1.3414552360495424e-05, + "loss": 0.7182, + "step": 13293 + }, + { + "epoch": 0.4074414613215643, + "grad_norm": 1.3732945733237867, + "learning_rate": 1.3413619367349658e-05, + "loss": 0.7569, + "step": 13294 + }, + { + "epoch": 0.4074721098443055, + "grad_norm": 0.7469392752379044, + "learning_rate": 1.341268634056863e-05, + "loss": 0.5915, + "step": 13295 + }, + { + "epoch": 0.4075027583670467, + "grad_norm": 1.373129093964528, + "learning_rate": 1.3411753280161532e-05, + "loss": 0.8399, + "step": 13296 + }, + { + "epoch": 0.4075334068897879, + "grad_norm": 1.2654707308558142, + "learning_rate": 1.3410820186137557e-05, + "loss": 0.7119, + "step": 13297 + }, + { + "epoch": 0.4075640554125291, + "grad_norm": 1.602452252218933, + "learning_rate": 1.3409887058505893e-05, + "loss": 0.7621, + "step": 13298 + }, + { + "epoch": 0.4075947039352703, + "grad_norm": 1.428415868888199, + "learning_rate": 1.3408953897275743e-05, + "loss": 0.7769, + "step": 13299 + }, + { + "epoch": 0.4076253524580115, + "grad_norm": 1.3755040468986839, + "learning_rate": 1.3408020702456298e-05, + "loss": 0.7417, + "step": 13300 + }, + { + "epoch": 0.4076560009807527, + "grad_norm": 1.3592134657710229, + "learning_rate": 1.340708747405675e-05, + "loss": 0.7681, + "step": 13301 + }, + { + "epoch": 0.4076866495034939, + "grad_norm": 0.6904002566004468, + "learning_rate": 1.3406154212086299e-05, + "loss": 0.5831, + "step": 13302 + }, + { + "epoch": 0.40771729802623513, + "grad_norm": 1.4175752037629599, + "learning_rate": 1.340522091655414e-05, + "loss": 0.7277, + "step": 13303 + }, + { + "epoch": 0.40774794654897634, + "grad_norm": 1.5651711061368827, + "learning_rate": 1.340428758746947e-05, + "loss": 0.7824, + "step": 13304 + }, + { + "epoch": 0.40777859507171754, + "grad_norm": 1.3028294006574466, + "learning_rate": 1.3403354224841481e-05, + "loss": 0.7582, + "step": 13305 + }, + { + "epoch": 0.40780924359445875, + "grad_norm": 1.3929402421592254, + "learning_rate": 1.340242082867937e-05, + "loss": 0.6617, + "step": 13306 + }, + { + "epoch": 0.40783989211719995, + "grad_norm": 1.5147167431240536, + "learning_rate": 1.3401487398992337e-05, + "loss": 0.6459, + "step": 13307 + }, + { + "epoch": 0.40787054063994116, + "grad_norm": 1.4446902682942384, + "learning_rate": 1.3400553935789579e-05, + "loss": 0.7383, + "step": 13308 + }, + { + "epoch": 0.40790118916268236, + "grad_norm": 1.354440830778981, + "learning_rate": 1.3399620439080292e-05, + "loss": 0.6567, + "step": 13309 + }, + { + "epoch": 0.40793183768542357, + "grad_norm": 1.6128366885635324, + "learning_rate": 1.3398686908873679e-05, + "loss": 0.7254, + "step": 13310 + }, + { + "epoch": 0.4079624862081648, + "grad_norm": 1.408721640126275, + "learning_rate": 1.339775334517893e-05, + "loss": 0.7445, + "step": 13311 + }, + { + "epoch": 0.407993134730906, + "grad_norm": 1.5149807198639385, + "learning_rate": 1.3396819748005252e-05, + "loss": 0.6551, + "step": 13312 + }, + { + "epoch": 0.4080237832536472, + "grad_norm": 1.3639382812831713, + "learning_rate": 1.3395886117361836e-05, + "loss": 0.8214, + "step": 13313 + }, + { + "epoch": 0.4080544317763884, + "grad_norm": 1.363645530357144, + "learning_rate": 1.3394952453257886e-05, + "loss": 0.7176, + "step": 13314 + }, + { + "epoch": 0.4080850802991296, + "grad_norm": 1.6588422276188837, + "learning_rate": 1.3394018755702602e-05, + "loss": 0.7736, + "step": 13315 + }, + { + "epoch": 0.4081157288218708, + "grad_norm": 1.530513685227007, + "learning_rate": 1.3393085024705184e-05, + "loss": 0.7345, + "step": 13316 + }, + { + "epoch": 0.408146377344612, + "grad_norm": 1.4449917324008479, + "learning_rate": 1.3392151260274827e-05, + "loss": 0.7562, + "step": 13317 + }, + { + "epoch": 0.4081770258673532, + "grad_norm": 1.570936166642497, + "learning_rate": 1.3391217462420739e-05, + "loss": 0.7838, + "step": 13318 + }, + { + "epoch": 0.4082076743900944, + "grad_norm": 0.7097159229688123, + "learning_rate": 1.3390283631152116e-05, + "loss": 0.5638, + "step": 13319 + }, + { + "epoch": 0.4082383229128356, + "grad_norm": 1.5224243411764447, + "learning_rate": 1.3389349766478162e-05, + "loss": 0.6865, + "step": 13320 + }, + { + "epoch": 0.40826897143557683, + "grad_norm": 1.5091972268614544, + "learning_rate": 1.3388415868408076e-05, + "loss": 0.8073, + "step": 13321 + }, + { + "epoch": 0.408299619958318, + "grad_norm": 1.5088302892740226, + "learning_rate": 1.3387481936951063e-05, + "loss": 0.6982, + "step": 13322 + }, + { + "epoch": 0.4083302684810592, + "grad_norm": 0.6725335630627627, + "learning_rate": 1.3386547972116323e-05, + "loss": 0.6357, + "step": 13323 + }, + { + "epoch": 0.4083609170038004, + "grad_norm": 1.3466374306674327, + "learning_rate": 1.338561397391306e-05, + "loss": 0.7356, + "step": 13324 + }, + { + "epoch": 0.4083915655265416, + "grad_norm": 1.5666641502465737, + "learning_rate": 1.3384679942350478e-05, + "loss": 0.6905, + "step": 13325 + }, + { + "epoch": 0.4084222140492828, + "grad_norm": 1.5410668365266507, + "learning_rate": 1.3383745877437774e-05, + "loss": 0.8234, + "step": 13326 + }, + { + "epoch": 0.408452862572024, + "grad_norm": 1.350811129710963, + "learning_rate": 1.3382811779184162e-05, + "loss": 0.6875, + "step": 13327 + }, + { + "epoch": 0.4084835110947652, + "grad_norm": 1.4774995491290186, + "learning_rate": 1.3381877647598835e-05, + "loss": 0.6465, + "step": 13328 + }, + { + "epoch": 0.4085141596175064, + "grad_norm": 0.6976691543774693, + "learning_rate": 1.3380943482691005e-05, + "loss": 0.6281, + "step": 13329 + }, + { + "epoch": 0.40854480814024763, + "grad_norm": 1.489745703303143, + "learning_rate": 1.338000928446987e-05, + "loss": 0.777, + "step": 13330 + }, + { + "epoch": 0.40857545666298883, + "grad_norm": 1.6649979792370624, + "learning_rate": 1.3379075052944645e-05, + "loss": 0.7701, + "step": 13331 + }, + { + "epoch": 0.40860610518573004, + "grad_norm": 1.665143838487501, + "learning_rate": 1.3378140788124522e-05, + "loss": 0.7772, + "step": 13332 + }, + { + "epoch": 0.40863675370847125, + "grad_norm": 1.280095740054702, + "learning_rate": 1.3377206490018719e-05, + "loss": 0.7646, + "step": 13333 + }, + { + "epoch": 0.40866740223121245, + "grad_norm": 1.551323640841043, + "learning_rate": 1.3376272158636431e-05, + "loss": 0.7375, + "step": 13334 + }, + { + "epoch": 0.40869805075395366, + "grad_norm": 1.233340671423264, + "learning_rate": 1.3375337793986875e-05, + "loss": 0.7076, + "step": 13335 + }, + { + "epoch": 0.40872869927669486, + "grad_norm": 1.5859460466322761, + "learning_rate": 1.337440339607925e-05, + "loss": 0.751, + "step": 13336 + }, + { + "epoch": 0.40875934779943607, + "grad_norm": 1.58910292876451, + "learning_rate": 1.3373468964922762e-05, + "loss": 0.7835, + "step": 13337 + }, + { + "epoch": 0.4087899963221773, + "grad_norm": 1.4944825978212544, + "learning_rate": 1.3372534500526628e-05, + "loss": 0.7259, + "step": 13338 + }, + { + "epoch": 0.4088206448449185, + "grad_norm": 1.43312425812613, + "learning_rate": 1.3371600002900045e-05, + "loss": 0.8606, + "step": 13339 + }, + { + "epoch": 0.4088512933676597, + "grad_norm": 1.4640235486986442, + "learning_rate": 1.3370665472052222e-05, + "loss": 0.7263, + "step": 13340 + }, + { + "epoch": 0.4088819418904009, + "grad_norm": 1.5352744065204929, + "learning_rate": 1.336973090799237e-05, + "loss": 0.7499, + "step": 13341 + }, + { + "epoch": 0.4089125904131421, + "grad_norm": 1.4927059341234008, + "learning_rate": 1.3368796310729704e-05, + "loss": 0.8228, + "step": 13342 + }, + { + "epoch": 0.4089432389358833, + "grad_norm": 1.5166689713568484, + "learning_rate": 1.336786168027342e-05, + "loss": 0.842, + "step": 13343 + }, + { + "epoch": 0.4089738874586245, + "grad_norm": 1.3790691211585735, + "learning_rate": 1.3366927016632733e-05, + "loss": 0.694, + "step": 13344 + }, + { + "epoch": 0.4090045359813657, + "grad_norm": 0.700833363971249, + "learning_rate": 1.3365992319816853e-05, + "loss": 0.615, + "step": 13345 + }, + { + "epoch": 0.4090351845041069, + "grad_norm": 1.7462647130699713, + "learning_rate": 1.336505758983499e-05, + "loss": 0.7835, + "step": 13346 + }, + { + "epoch": 0.4090658330268481, + "grad_norm": 1.4731788318643408, + "learning_rate": 1.3364122826696355e-05, + "loss": 0.7091, + "step": 13347 + }, + { + "epoch": 0.40909648154958933, + "grad_norm": 0.6578172425404601, + "learning_rate": 1.3363188030410156e-05, + "loss": 0.6081, + "step": 13348 + }, + { + "epoch": 0.40912713007233054, + "grad_norm": 0.6787886264757136, + "learning_rate": 1.3362253200985605e-05, + "loss": 0.6078, + "step": 13349 + }, + { + "epoch": 0.40915777859507174, + "grad_norm": 1.2589693852832657, + "learning_rate": 1.3361318338431912e-05, + "loss": 0.789, + "step": 13350 + }, + { + "epoch": 0.40918842711781295, + "grad_norm": 1.5740670779744914, + "learning_rate": 1.3360383442758291e-05, + "loss": 0.7287, + "step": 13351 + }, + { + "epoch": 0.40921907564055415, + "grad_norm": 1.3261692626048083, + "learning_rate": 1.3359448513973949e-05, + "loss": 0.7439, + "step": 13352 + }, + { + "epoch": 0.4092497241632953, + "grad_norm": 1.4838536594572473, + "learning_rate": 1.3358513552088104e-05, + "loss": 0.7777, + "step": 13353 + }, + { + "epoch": 0.4092803726860365, + "grad_norm": 1.3971033844108047, + "learning_rate": 1.3357578557109965e-05, + "loss": 0.7445, + "step": 13354 + }, + { + "epoch": 0.4093110212087777, + "grad_norm": 1.5360271501110576, + "learning_rate": 1.3356643529048747e-05, + "loss": 0.7253, + "step": 13355 + }, + { + "epoch": 0.4093416697315189, + "grad_norm": 1.5476455031482421, + "learning_rate": 1.335570846791366e-05, + "loss": 0.7766, + "step": 13356 + }, + { + "epoch": 0.4093723182542601, + "grad_norm": 1.311492833948135, + "learning_rate": 1.3354773373713919e-05, + "loss": 0.779, + "step": 13357 + }, + { + "epoch": 0.40940296677700133, + "grad_norm": 1.3416799748166246, + "learning_rate": 1.3353838246458737e-05, + "loss": 0.7848, + "step": 13358 + }, + { + "epoch": 0.40943361529974254, + "grad_norm": 0.7310545485914572, + "learning_rate": 1.3352903086157329e-05, + "loss": 0.595, + "step": 13359 + }, + { + "epoch": 0.40946426382248374, + "grad_norm": 1.399663496124342, + "learning_rate": 1.3351967892818908e-05, + "loss": 0.6417, + "step": 13360 + }, + { + "epoch": 0.40949491234522495, + "grad_norm": 0.6969515323574441, + "learning_rate": 1.3351032666452693e-05, + "loss": 0.6341, + "step": 13361 + }, + { + "epoch": 0.40952556086796615, + "grad_norm": 1.594864274338797, + "learning_rate": 1.3350097407067893e-05, + "loss": 0.7565, + "step": 13362 + }, + { + "epoch": 0.40955620939070736, + "grad_norm": 1.4571140935537366, + "learning_rate": 1.3349162114673725e-05, + "loss": 0.671, + "step": 13363 + }, + { + "epoch": 0.40958685791344857, + "grad_norm": 1.4222368664949347, + "learning_rate": 1.3348226789279408e-05, + "loss": 0.698, + "step": 13364 + }, + { + "epoch": 0.40961750643618977, + "grad_norm": 1.4698772403407752, + "learning_rate": 1.3347291430894156e-05, + "loss": 0.763, + "step": 13365 + }, + { + "epoch": 0.409648154958931, + "grad_norm": 1.6106389884707866, + "learning_rate": 1.3346356039527183e-05, + "loss": 0.6975, + "step": 13366 + }, + { + "epoch": 0.4096788034816722, + "grad_norm": 1.478359567952837, + "learning_rate": 1.334542061518771e-05, + "loss": 0.8152, + "step": 13367 + }, + { + "epoch": 0.4097094520044134, + "grad_norm": 0.7325066432447377, + "learning_rate": 1.334448515788495e-05, + "loss": 0.5878, + "step": 13368 + }, + { + "epoch": 0.4097401005271546, + "grad_norm": 1.5791850263666405, + "learning_rate": 1.3343549667628124e-05, + "loss": 0.7767, + "step": 13369 + }, + { + "epoch": 0.4097707490498958, + "grad_norm": 1.3434607865035832, + "learning_rate": 1.3342614144426446e-05, + "loss": 0.7496, + "step": 13370 + }, + { + "epoch": 0.409801397572637, + "grad_norm": 1.6332800479649277, + "learning_rate": 1.3341678588289135e-05, + "loss": 0.832, + "step": 13371 + }, + { + "epoch": 0.4098320460953782, + "grad_norm": 1.4406148310003815, + "learning_rate": 1.334074299922541e-05, + "loss": 0.7526, + "step": 13372 + }, + { + "epoch": 0.4098626946181194, + "grad_norm": 1.65845683463617, + "learning_rate": 1.3339807377244492e-05, + "loss": 0.7334, + "step": 13373 + }, + { + "epoch": 0.4098933431408606, + "grad_norm": 1.409998431958273, + "learning_rate": 1.3338871722355595e-05, + "loss": 0.7738, + "step": 13374 + }, + { + "epoch": 0.4099239916636018, + "grad_norm": 1.3872532955281367, + "learning_rate": 1.3337936034567941e-05, + "loss": 0.6118, + "step": 13375 + }, + { + "epoch": 0.40995464018634303, + "grad_norm": 1.4375363191011903, + "learning_rate": 1.3337000313890752e-05, + "loss": 0.7796, + "step": 13376 + }, + { + "epoch": 0.40998528870908424, + "grad_norm": 1.379063676814525, + "learning_rate": 1.3336064560333243e-05, + "loss": 0.6837, + "step": 13377 + }, + { + "epoch": 0.41001593723182544, + "grad_norm": 1.3303464004992325, + "learning_rate": 1.3335128773904633e-05, + "loss": 0.753, + "step": 13378 + }, + { + "epoch": 0.41004658575456665, + "grad_norm": 1.384820020705774, + "learning_rate": 1.333419295461415e-05, + "loss": 0.7671, + "step": 13379 + }, + { + "epoch": 0.41007723427730786, + "grad_norm": 1.4460581030282098, + "learning_rate": 1.3333257102471007e-05, + "loss": 0.7595, + "step": 13380 + }, + { + "epoch": 0.41010788280004906, + "grad_norm": 1.5129759825330866, + "learning_rate": 1.3332321217484434e-05, + "loss": 0.7655, + "step": 13381 + }, + { + "epoch": 0.41013853132279027, + "grad_norm": 1.2553291898457146, + "learning_rate": 1.3331385299663644e-05, + "loss": 0.7052, + "step": 13382 + }, + { + "epoch": 0.4101691798455315, + "grad_norm": 1.4565809041768354, + "learning_rate": 1.3330449349017864e-05, + "loss": 0.6939, + "step": 13383 + }, + { + "epoch": 0.4101998283682726, + "grad_norm": 1.3788372392182988, + "learning_rate": 1.3329513365556312e-05, + "loss": 0.7292, + "step": 13384 + }, + { + "epoch": 0.41023047689101383, + "grad_norm": 1.3922715683188784, + "learning_rate": 1.3328577349288217e-05, + "loss": 0.7066, + "step": 13385 + }, + { + "epoch": 0.41026112541375503, + "grad_norm": 1.56206695742331, + "learning_rate": 1.3327641300222793e-05, + "loss": 0.6663, + "step": 13386 + }, + { + "epoch": 0.41029177393649624, + "grad_norm": 1.494779068344091, + "learning_rate": 1.332670521836927e-05, + "loss": 0.7596, + "step": 13387 + }, + { + "epoch": 0.41032242245923745, + "grad_norm": 1.5518729400901572, + "learning_rate": 1.3325769103736873e-05, + "loss": 0.7668, + "step": 13388 + }, + { + "epoch": 0.41035307098197865, + "grad_norm": 1.6258407518488756, + "learning_rate": 1.3324832956334815e-05, + "loss": 0.7788, + "step": 13389 + }, + { + "epoch": 0.41038371950471986, + "grad_norm": 1.475958284837192, + "learning_rate": 1.3323896776172331e-05, + "loss": 0.735, + "step": 13390 + }, + { + "epoch": 0.41041436802746106, + "grad_norm": 1.4519919351856583, + "learning_rate": 1.332296056325864e-05, + "loss": 0.6995, + "step": 13391 + }, + { + "epoch": 0.41044501655020227, + "grad_norm": 0.7193747312618837, + "learning_rate": 1.3322024317602974e-05, + "loss": 0.6105, + "step": 13392 + }, + { + "epoch": 0.4104756650729435, + "grad_norm": 1.5713208186469703, + "learning_rate": 1.3321088039214545e-05, + "loss": 0.6919, + "step": 13393 + }, + { + "epoch": 0.4105063135956847, + "grad_norm": 1.3670311444424896, + "learning_rate": 1.3320151728102589e-05, + "loss": 0.6823, + "step": 13394 + }, + { + "epoch": 0.4105369621184259, + "grad_norm": 1.46716206529454, + "learning_rate": 1.3319215384276328e-05, + "loss": 0.6342, + "step": 13395 + }, + { + "epoch": 0.4105676106411671, + "grad_norm": 1.379739933642185, + "learning_rate": 1.3318279007744992e-05, + "loss": 0.7449, + "step": 13396 + }, + { + "epoch": 0.4105982591639083, + "grad_norm": 1.3170543792994946, + "learning_rate": 1.3317342598517797e-05, + "loss": 0.7546, + "step": 13397 + }, + { + "epoch": 0.4106289076866495, + "grad_norm": 1.3417571549112997, + "learning_rate": 1.3316406156603983e-05, + "loss": 0.7725, + "step": 13398 + }, + { + "epoch": 0.4106595562093907, + "grad_norm": 0.6638050284329441, + "learning_rate": 1.3315469682012765e-05, + "loss": 0.5821, + "step": 13399 + }, + { + "epoch": 0.4106902047321319, + "grad_norm": 1.3644890751896117, + "learning_rate": 1.3314533174753383e-05, + "loss": 0.7193, + "step": 13400 + }, + { + "epoch": 0.4107208532548731, + "grad_norm": 1.3671314625349082, + "learning_rate": 1.3313596634835051e-05, + "loss": 0.6917, + "step": 13401 + }, + { + "epoch": 0.4107515017776143, + "grad_norm": 1.4379845668731663, + "learning_rate": 1.3312660062267006e-05, + "loss": 0.6995, + "step": 13402 + }, + { + "epoch": 0.41078215030035553, + "grad_norm": 1.6601377001725446, + "learning_rate": 1.3311723457058475e-05, + "loss": 0.7888, + "step": 13403 + }, + { + "epoch": 0.41081279882309674, + "grad_norm": 1.4599617137414465, + "learning_rate": 1.3310786819218684e-05, + "loss": 0.8041, + "step": 13404 + }, + { + "epoch": 0.41084344734583794, + "grad_norm": 1.4089504242105981, + "learning_rate": 1.3309850148756864e-05, + "loss": 0.7798, + "step": 13405 + }, + { + "epoch": 0.41087409586857915, + "grad_norm": 1.3740555252391615, + "learning_rate": 1.3308913445682241e-05, + "loss": 0.7816, + "step": 13406 + }, + { + "epoch": 0.41090474439132035, + "grad_norm": 1.3686118761554356, + "learning_rate": 1.3307976710004051e-05, + "loss": 0.7162, + "step": 13407 + }, + { + "epoch": 0.41093539291406156, + "grad_norm": 1.3926844774906617, + "learning_rate": 1.3307039941731519e-05, + "loss": 0.6791, + "step": 13408 + }, + { + "epoch": 0.41096604143680276, + "grad_norm": 1.55601282799898, + "learning_rate": 1.3306103140873876e-05, + "loss": 0.8605, + "step": 13409 + }, + { + "epoch": 0.41099668995954397, + "grad_norm": 1.6209909132339533, + "learning_rate": 1.3305166307440352e-05, + "loss": 0.6874, + "step": 13410 + }, + { + "epoch": 0.4110273384822852, + "grad_norm": 1.4904920864149342, + "learning_rate": 1.330422944144018e-05, + "loss": 0.7973, + "step": 13411 + }, + { + "epoch": 0.4110579870050264, + "grad_norm": 1.340339510847293, + "learning_rate": 1.330329254288259e-05, + "loss": 0.7265, + "step": 13412 + }, + { + "epoch": 0.4110886355277676, + "grad_norm": 1.2438566236382893, + "learning_rate": 1.3302355611776814e-05, + "loss": 0.6465, + "step": 13413 + }, + { + "epoch": 0.4111192840505088, + "grad_norm": 1.394199864197518, + "learning_rate": 1.3301418648132081e-05, + "loss": 0.6801, + "step": 13414 + }, + { + "epoch": 0.41114993257324994, + "grad_norm": 0.6720451185963997, + "learning_rate": 1.3300481651957626e-05, + "loss": 0.593, + "step": 13415 + }, + { + "epoch": 0.41118058109599115, + "grad_norm": 1.5908659686018876, + "learning_rate": 1.3299544623262681e-05, + "loss": 0.7799, + "step": 13416 + }, + { + "epoch": 0.41121122961873235, + "grad_norm": 1.4984848451439676, + "learning_rate": 1.3298607562056479e-05, + "loss": 0.7349, + "step": 13417 + }, + { + "epoch": 0.41124187814147356, + "grad_norm": 1.6276828423481873, + "learning_rate": 1.3297670468348251e-05, + "loss": 0.727, + "step": 13418 + }, + { + "epoch": 0.41127252666421477, + "grad_norm": 1.440887724606106, + "learning_rate": 1.3296733342147234e-05, + "loss": 0.8118, + "step": 13419 + }, + { + "epoch": 0.41130317518695597, + "grad_norm": 1.5453826510714161, + "learning_rate": 1.3295796183462662e-05, + "loss": 0.7824, + "step": 13420 + }, + { + "epoch": 0.4113338237096972, + "grad_norm": 1.4642316541842955, + "learning_rate": 1.329485899230376e-05, + "loss": 0.7676, + "step": 13421 + }, + { + "epoch": 0.4113644722324384, + "grad_norm": 1.3384501408486484, + "learning_rate": 1.3293921768679776e-05, + "loss": 0.7578, + "step": 13422 + }, + { + "epoch": 0.4113951207551796, + "grad_norm": 1.4175201120027703, + "learning_rate": 1.3292984512599936e-05, + "loss": 0.735, + "step": 13423 + }, + { + "epoch": 0.4114257692779208, + "grad_norm": 1.6317778056915448, + "learning_rate": 1.3292047224073477e-05, + "loss": 0.7874, + "step": 13424 + }, + { + "epoch": 0.411456417800662, + "grad_norm": 1.6353509383472233, + "learning_rate": 1.3291109903109634e-05, + "loss": 0.8213, + "step": 13425 + }, + { + "epoch": 0.4114870663234032, + "grad_norm": 1.5319505658231112, + "learning_rate": 1.3290172549717644e-05, + "loss": 0.8771, + "step": 13426 + }, + { + "epoch": 0.4115177148461444, + "grad_norm": 1.41399770393006, + "learning_rate": 1.3289235163906742e-05, + "loss": 0.7268, + "step": 13427 + }, + { + "epoch": 0.4115483633688856, + "grad_norm": 1.5641599452302826, + "learning_rate": 1.3288297745686163e-05, + "loss": 0.7371, + "step": 13428 + }, + { + "epoch": 0.4115790118916268, + "grad_norm": 1.9051138320904029, + "learning_rate": 1.3287360295065143e-05, + "loss": 0.7728, + "step": 13429 + }, + { + "epoch": 0.41160966041436803, + "grad_norm": 1.413171522989676, + "learning_rate": 1.3286422812052927e-05, + "loss": 0.6801, + "step": 13430 + }, + { + "epoch": 0.41164030893710923, + "grad_norm": 1.2485710224890658, + "learning_rate": 1.3285485296658742e-05, + "loss": 0.6876, + "step": 13431 + }, + { + "epoch": 0.41167095745985044, + "grad_norm": 1.3011766501207775, + "learning_rate": 1.3284547748891829e-05, + "loss": 0.7566, + "step": 13432 + }, + { + "epoch": 0.41170160598259165, + "grad_norm": 1.418097594184807, + "learning_rate": 1.328361016876143e-05, + "loss": 0.7892, + "step": 13433 + }, + { + "epoch": 0.41173225450533285, + "grad_norm": 1.5247190194493476, + "learning_rate": 1.3282672556276775e-05, + "loss": 0.7037, + "step": 13434 + }, + { + "epoch": 0.41176290302807406, + "grad_norm": 1.5423641938387447, + "learning_rate": 1.3281734911447113e-05, + "loss": 0.688, + "step": 13435 + }, + { + "epoch": 0.41179355155081526, + "grad_norm": 1.3891971463428208, + "learning_rate": 1.3280797234281673e-05, + "loss": 0.7633, + "step": 13436 + }, + { + "epoch": 0.41182420007355647, + "grad_norm": 1.5826957162535236, + "learning_rate": 1.32798595247897e-05, + "loss": 0.689, + "step": 13437 + }, + { + "epoch": 0.4118548485962977, + "grad_norm": 1.6328070136484012, + "learning_rate": 1.3278921782980434e-05, + "loss": 0.6994, + "step": 13438 + }, + { + "epoch": 0.4118854971190389, + "grad_norm": 1.4765915091994937, + "learning_rate": 1.3277984008863109e-05, + "loss": 0.732, + "step": 13439 + }, + { + "epoch": 0.4119161456417801, + "grad_norm": 1.4458328214383132, + "learning_rate": 1.327704620244697e-05, + "loss": 0.7264, + "step": 13440 + }, + { + "epoch": 0.4119467941645213, + "grad_norm": 1.5908945745447325, + "learning_rate": 1.3276108363741259e-05, + "loss": 0.7919, + "step": 13441 + }, + { + "epoch": 0.4119774426872625, + "grad_norm": 1.341073480300468, + "learning_rate": 1.327517049275521e-05, + "loss": 0.8418, + "step": 13442 + }, + { + "epoch": 0.4120080912100037, + "grad_norm": 1.4784327987012236, + "learning_rate": 1.327423258949807e-05, + "loss": 0.8047, + "step": 13443 + }, + { + "epoch": 0.4120387397327449, + "grad_norm": 1.3986635395074407, + "learning_rate": 1.3273294653979079e-05, + "loss": 0.6808, + "step": 13444 + }, + { + "epoch": 0.4120693882554861, + "grad_norm": 1.3712887045119073, + "learning_rate": 1.3272356686207477e-05, + "loss": 0.6681, + "step": 13445 + }, + { + "epoch": 0.41210003677822726, + "grad_norm": 1.525309432391444, + "learning_rate": 1.327141868619251e-05, + "loss": 0.7754, + "step": 13446 + }, + { + "epoch": 0.41213068530096847, + "grad_norm": 1.4047324625524813, + "learning_rate": 1.3270480653943415e-05, + "loss": 0.7564, + "step": 13447 + }, + { + "epoch": 0.4121613338237097, + "grad_norm": 0.6994838875257984, + "learning_rate": 1.3269542589469437e-05, + "loss": 0.6075, + "step": 13448 + }, + { + "epoch": 0.4121919823464509, + "grad_norm": 1.342359554350191, + "learning_rate": 1.326860449277982e-05, + "loss": 0.6759, + "step": 13449 + }, + { + "epoch": 0.4122226308691921, + "grad_norm": 1.5687414581134889, + "learning_rate": 1.326766636388381e-05, + "loss": 0.754, + "step": 13450 + }, + { + "epoch": 0.4122532793919333, + "grad_norm": 0.6838747280052992, + "learning_rate": 1.3266728202790643e-05, + "loss": 0.5752, + "step": 13451 + }, + { + "epoch": 0.4122839279146745, + "grad_norm": 1.5674421972158321, + "learning_rate": 1.326579000950957e-05, + "loss": 0.7887, + "step": 13452 + }, + { + "epoch": 0.4123145764374157, + "grad_norm": 1.4261208045598395, + "learning_rate": 1.3264851784049829e-05, + "loss": 0.7283, + "step": 13453 + }, + { + "epoch": 0.4123452249601569, + "grad_norm": 0.684701966940186, + "learning_rate": 1.326391352642067e-05, + "loss": 0.6016, + "step": 13454 + }, + { + "epoch": 0.4123758734828981, + "grad_norm": 1.4481929003734997, + "learning_rate": 1.3262975236631337e-05, + "loss": 0.7, + "step": 13455 + }, + { + "epoch": 0.4124065220056393, + "grad_norm": 1.4965369691912398, + "learning_rate": 1.3262036914691072e-05, + "loss": 0.7417, + "step": 13456 + }, + { + "epoch": 0.4124371705283805, + "grad_norm": 1.2664750643662512, + "learning_rate": 1.3261098560609122e-05, + "loss": 0.7002, + "step": 13457 + }, + { + "epoch": 0.41246781905112173, + "grad_norm": 1.4186889768855713, + "learning_rate": 1.3260160174394735e-05, + "loss": 0.6707, + "step": 13458 + }, + { + "epoch": 0.41249846757386294, + "grad_norm": 0.7103264132018015, + "learning_rate": 1.3259221756057158e-05, + "loss": 0.6059, + "step": 13459 + }, + { + "epoch": 0.41252911609660414, + "grad_norm": 0.6780084444750052, + "learning_rate": 1.325828330560563e-05, + "loss": 0.5936, + "step": 13460 + }, + { + "epoch": 0.41255976461934535, + "grad_norm": 1.3425241924612894, + "learning_rate": 1.3257344823049407e-05, + "loss": 0.6627, + "step": 13461 + }, + { + "epoch": 0.41259041314208655, + "grad_norm": 1.6879432533598142, + "learning_rate": 1.325640630839773e-05, + "loss": 0.7738, + "step": 13462 + }, + { + "epoch": 0.41262106166482776, + "grad_norm": 1.3994918498676268, + "learning_rate": 1.3255467761659851e-05, + "loss": 0.688, + "step": 13463 + }, + { + "epoch": 0.41265171018756897, + "grad_norm": 1.2845152311921266, + "learning_rate": 1.3254529182845014e-05, + "loss": 0.7294, + "step": 13464 + }, + { + "epoch": 0.41268235871031017, + "grad_norm": 1.5584210013572446, + "learning_rate": 1.325359057196247e-05, + "loss": 0.8168, + "step": 13465 + }, + { + "epoch": 0.4127130072330514, + "grad_norm": 1.4662275890866583, + "learning_rate": 1.3252651929021461e-05, + "loss": 0.7265, + "step": 13466 + }, + { + "epoch": 0.4127436557557926, + "grad_norm": 1.3549002385894529, + "learning_rate": 1.3251713254031242e-05, + "loss": 0.7463, + "step": 13467 + }, + { + "epoch": 0.4127743042785338, + "grad_norm": 0.6820460364188068, + "learning_rate": 1.3250774547001059e-05, + "loss": 0.6164, + "step": 13468 + }, + { + "epoch": 0.412804952801275, + "grad_norm": 1.3793769674662308, + "learning_rate": 1.3249835807940168e-05, + "loss": 0.7204, + "step": 13469 + }, + { + "epoch": 0.4128356013240162, + "grad_norm": 1.4244024946241287, + "learning_rate": 1.324889703685781e-05, + "loss": 0.7693, + "step": 13470 + }, + { + "epoch": 0.4128662498467574, + "grad_norm": 1.4508604909370326, + "learning_rate": 1.3247958233763239e-05, + "loss": 0.6951, + "step": 13471 + }, + { + "epoch": 0.4128968983694986, + "grad_norm": 1.3166314255782947, + "learning_rate": 1.3247019398665702e-05, + "loss": 0.7669, + "step": 13472 + }, + { + "epoch": 0.4129275468922398, + "grad_norm": 0.6777658956694228, + "learning_rate": 1.3246080531574454e-05, + "loss": 0.5891, + "step": 13473 + }, + { + "epoch": 0.412958195414981, + "grad_norm": 1.5113619807293723, + "learning_rate": 1.3245141632498743e-05, + "loss": 0.7848, + "step": 13474 + }, + { + "epoch": 0.4129888439377222, + "grad_norm": 1.3747213717771094, + "learning_rate": 1.3244202701447821e-05, + "loss": 0.7221, + "step": 13475 + }, + { + "epoch": 0.41301949246046343, + "grad_norm": 1.5524106138012637, + "learning_rate": 1.3243263738430943e-05, + "loss": 0.7209, + "step": 13476 + }, + { + "epoch": 0.4130501409832046, + "grad_norm": 1.39759651839721, + "learning_rate": 1.3242324743457356e-05, + "loss": 0.7682, + "step": 13477 + }, + { + "epoch": 0.4130807895059458, + "grad_norm": 1.5591465662526356, + "learning_rate": 1.3241385716536313e-05, + "loss": 0.6156, + "step": 13478 + }, + { + "epoch": 0.413111438028687, + "grad_norm": 1.2468464919983249, + "learning_rate": 1.3240446657677067e-05, + "loss": 0.6554, + "step": 13479 + }, + { + "epoch": 0.4131420865514282, + "grad_norm": 1.537328712432054, + "learning_rate": 1.323950756688887e-05, + "loss": 0.7399, + "step": 13480 + }, + { + "epoch": 0.4131727350741694, + "grad_norm": 1.47247359470985, + "learning_rate": 1.3238568444180977e-05, + "loss": 0.6991, + "step": 13481 + }, + { + "epoch": 0.4132033835969106, + "grad_norm": 1.5461604208596493, + "learning_rate": 1.323762928956264e-05, + "loss": 0.7519, + "step": 13482 + }, + { + "epoch": 0.4132340321196518, + "grad_norm": 1.3232221282030325, + "learning_rate": 1.3236690103043116e-05, + "loss": 0.6731, + "step": 13483 + }, + { + "epoch": 0.413264680642393, + "grad_norm": 1.5205422476470292, + "learning_rate": 1.3235750884631653e-05, + "loss": 0.7439, + "step": 13484 + }, + { + "epoch": 0.41329532916513423, + "grad_norm": 1.5875464056551722, + "learning_rate": 1.323481163433751e-05, + "loss": 0.722, + "step": 13485 + }, + { + "epoch": 0.41332597768787543, + "grad_norm": 1.397302732198422, + "learning_rate": 1.3233872352169939e-05, + "loss": 0.7743, + "step": 13486 + }, + { + "epoch": 0.41335662621061664, + "grad_norm": 1.3232275199590182, + "learning_rate": 1.3232933038138197e-05, + "loss": 0.6003, + "step": 13487 + }, + { + "epoch": 0.41338727473335785, + "grad_norm": 1.4475702659555367, + "learning_rate": 1.323199369225154e-05, + "loss": 0.7214, + "step": 13488 + }, + { + "epoch": 0.41341792325609905, + "grad_norm": 1.3722157780513555, + "learning_rate": 1.3231054314519222e-05, + "loss": 0.8058, + "step": 13489 + }, + { + "epoch": 0.41344857177884026, + "grad_norm": 1.5045061935936974, + "learning_rate": 1.3230114904950498e-05, + "loss": 0.7553, + "step": 13490 + }, + { + "epoch": 0.41347922030158146, + "grad_norm": 1.5550578814757614, + "learning_rate": 1.3229175463554627e-05, + "loss": 0.7903, + "step": 13491 + }, + { + "epoch": 0.41350986882432267, + "grad_norm": 0.6967130809664015, + "learning_rate": 1.3228235990340861e-05, + "loss": 0.5882, + "step": 13492 + }, + { + "epoch": 0.4135405173470639, + "grad_norm": 1.400860289096064, + "learning_rate": 1.3227296485318464e-05, + "loss": 0.7689, + "step": 13493 + }, + { + "epoch": 0.4135711658698051, + "grad_norm": 1.4659542251936797, + "learning_rate": 1.3226356948496683e-05, + "loss": 0.7972, + "step": 13494 + }, + { + "epoch": 0.4136018143925463, + "grad_norm": 1.3890205846388675, + "learning_rate": 1.3225417379884787e-05, + "loss": 0.7098, + "step": 13495 + }, + { + "epoch": 0.4136324629152875, + "grad_norm": 1.5405504982085059, + "learning_rate": 1.3224477779492026e-05, + "loss": 0.7597, + "step": 13496 + }, + { + "epoch": 0.4136631114380287, + "grad_norm": 1.4052788065069453, + "learning_rate": 1.3223538147327661e-05, + "loss": 0.6688, + "step": 13497 + }, + { + "epoch": 0.4136937599607699, + "grad_norm": 1.4768595122570467, + "learning_rate": 1.3222598483400948e-05, + "loss": 0.8181, + "step": 13498 + }, + { + "epoch": 0.4137244084835111, + "grad_norm": 1.3681493550686483, + "learning_rate": 1.322165878772115e-05, + "loss": 0.6358, + "step": 13499 + }, + { + "epoch": 0.4137550570062523, + "grad_norm": 1.3975915205297733, + "learning_rate": 1.3220719060297525e-05, + "loss": 0.7262, + "step": 13500 + }, + { + "epoch": 0.4137857055289935, + "grad_norm": 1.51963752885707, + "learning_rate": 1.3219779301139326e-05, + "loss": 0.741, + "step": 13501 + }, + { + "epoch": 0.4138163540517347, + "grad_norm": 1.4142449060859301, + "learning_rate": 1.3218839510255822e-05, + "loss": 0.6813, + "step": 13502 + }, + { + "epoch": 0.41384700257447593, + "grad_norm": 1.4449809017588275, + "learning_rate": 1.3217899687656265e-05, + "loss": 0.8257, + "step": 13503 + }, + { + "epoch": 0.41387765109721714, + "grad_norm": 1.4138830465178114, + "learning_rate": 1.3216959833349923e-05, + "loss": 0.7203, + "step": 13504 + }, + { + "epoch": 0.41390829961995834, + "grad_norm": 1.5009393178225545, + "learning_rate": 1.321601994734605e-05, + "loss": 0.7233, + "step": 13505 + }, + { + "epoch": 0.41393894814269955, + "grad_norm": 1.2893336743131827, + "learning_rate": 1.3215080029653912e-05, + "loss": 0.7257, + "step": 13506 + }, + { + "epoch": 0.41396959666544075, + "grad_norm": 0.679006456203732, + "learning_rate": 1.3214140080282764e-05, + "loss": 0.596, + "step": 13507 + }, + { + "epoch": 0.4140002451881819, + "grad_norm": 1.2831564181545643, + "learning_rate": 1.3213200099241876e-05, + "loss": 0.6591, + "step": 13508 + }, + { + "epoch": 0.4140308937109231, + "grad_norm": 1.6156551180619758, + "learning_rate": 1.3212260086540502e-05, + "loss": 0.8587, + "step": 13509 + }, + { + "epoch": 0.4140615422336643, + "grad_norm": 1.5169552054558533, + "learning_rate": 1.3211320042187909e-05, + "loss": 0.8361, + "step": 13510 + }, + { + "epoch": 0.4140921907564055, + "grad_norm": 1.379337377008207, + "learning_rate": 1.3210379966193357e-05, + "loss": 0.8171, + "step": 13511 + }, + { + "epoch": 0.4141228392791467, + "grad_norm": 1.340634581124556, + "learning_rate": 1.3209439858566112e-05, + "loss": 0.721, + "step": 13512 + }, + { + "epoch": 0.41415348780188793, + "grad_norm": 1.4791640189683657, + "learning_rate": 1.3208499719315434e-05, + "loss": 0.704, + "step": 13513 + }, + { + "epoch": 0.41418413632462914, + "grad_norm": 1.3069157033384222, + "learning_rate": 1.3207559548450584e-05, + "loss": 0.7082, + "step": 13514 + }, + { + "epoch": 0.41421478484737034, + "grad_norm": 1.4736463470989325, + "learning_rate": 1.3206619345980833e-05, + "loss": 0.7259, + "step": 13515 + }, + { + "epoch": 0.41424543337011155, + "grad_norm": 1.4306334253560793, + "learning_rate": 1.320567911191544e-05, + "loss": 0.8655, + "step": 13516 + }, + { + "epoch": 0.41427608189285275, + "grad_norm": 1.4274245049161094, + "learning_rate": 1.3204738846263672e-05, + "loss": 0.7695, + "step": 13517 + }, + { + "epoch": 0.41430673041559396, + "grad_norm": 1.3908307499814268, + "learning_rate": 1.3203798549034788e-05, + "loss": 0.6835, + "step": 13518 + }, + { + "epoch": 0.41433737893833517, + "grad_norm": 1.404235006822273, + "learning_rate": 1.3202858220238062e-05, + "loss": 0.7367, + "step": 13519 + }, + { + "epoch": 0.41436802746107637, + "grad_norm": 1.5927553946592046, + "learning_rate": 1.3201917859882752e-05, + "loss": 0.6984, + "step": 13520 + }, + { + "epoch": 0.4143986759838176, + "grad_norm": 1.4211620461828645, + "learning_rate": 1.3200977467978127e-05, + "loss": 0.6964, + "step": 13521 + }, + { + "epoch": 0.4144293245065588, + "grad_norm": 1.2278887793562858, + "learning_rate": 1.3200037044533449e-05, + "loss": 0.6873, + "step": 13522 + }, + { + "epoch": 0.4144599730293, + "grad_norm": 1.616421509750203, + "learning_rate": 1.3199096589557992e-05, + "loss": 0.6777, + "step": 13523 + }, + { + "epoch": 0.4144906215520412, + "grad_norm": 1.6004919859037987, + "learning_rate": 1.3198156103061012e-05, + "loss": 0.7466, + "step": 13524 + }, + { + "epoch": 0.4145212700747824, + "grad_norm": 1.503509503027031, + "learning_rate": 1.3197215585051786e-05, + "loss": 0.7137, + "step": 13525 + }, + { + "epoch": 0.4145519185975236, + "grad_norm": 0.7049202182951766, + "learning_rate": 1.3196275035539574e-05, + "loss": 0.5913, + "step": 13526 + }, + { + "epoch": 0.4145825671202648, + "grad_norm": 0.6863306458028305, + "learning_rate": 1.319533445453365e-05, + "loss": 0.6128, + "step": 13527 + }, + { + "epoch": 0.414613215643006, + "grad_norm": 1.4071711940278742, + "learning_rate": 1.3194393842043275e-05, + "loss": 0.7666, + "step": 13528 + }, + { + "epoch": 0.4146438641657472, + "grad_norm": 1.3837739885591698, + "learning_rate": 1.3193453198077721e-05, + "loss": 0.8074, + "step": 13529 + }, + { + "epoch": 0.41467451268848843, + "grad_norm": 1.3993218390902495, + "learning_rate": 1.3192512522646256e-05, + "loss": 0.7108, + "step": 13530 + }, + { + "epoch": 0.41470516121122963, + "grad_norm": 1.3923785930527137, + "learning_rate": 1.3191571815758148e-05, + "loss": 0.7359, + "step": 13531 + }, + { + "epoch": 0.41473580973397084, + "grad_norm": 1.48959752376347, + "learning_rate": 1.3190631077422667e-05, + "loss": 0.8616, + "step": 13532 + }, + { + "epoch": 0.41476645825671205, + "grad_norm": 0.6509801742698499, + "learning_rate": 1.3189690307649081e-05, + "loss": 0.6106, + "step": 13533 + }, + { + "epoch": 0.41479710677945325, + "grad_norm": 1.4100726526407465, + "learning_rate": 1.3188749506446661e-05, + "loss": 0.6352, + "step": 13534 + }, + { + "epoch": 0.41482775530219446, + "grad_norm": 1.5289933885951936, + "learning_rate": 1.3187808673824675e-05, + "loss": 0.7188, + "step": 13535 + }, + { + "epoch": 0.41485840382493566, + "grad_norm": 0.6678459330112072, + "learning_rate": 1.3186867809792397e-05, + "loss": 0.6053, + "step": 13536 + }, + { + "epoch": 0.41488905234767687, + "grad_norm": 1.4109283685313494, + "learning_rate": 1.3185926914359091e-05, + "loss": 0.7053, + "step": 13537 + }, + { + "epoch": 0.4149197008704181, + "grad_norm": 1.513926202140031, + "learning_rate": 1.3184985987534033e-05, + "loss": 0.6297, + "step": 13538 + }, + { + "epoch": 0.4149503493931592, + "grad_norm": 1.438638114822213, + "learning_rate": 1.3184045029326496e-05, + "loss": 0.7376, + "step": 13539 + }, + { + "epoch": 0.41498099791590043, + "grad_norm": 0.6913386986299582, + "learning_rate": 1.3183104039745744e-05, + "loss": 0.6356, + "step": 13540 + }, + { + "epoch": 0.41501164643864163, + "grad_norm": 1.2976038027534744, + "learning_rate": 1.3182163018801058e-05, + "loss": 0.7592, + "step": 13541 + }, + { + "epoch": 0.41504229496138284, + "grad_norm": 1.491626086098497, + "learning_rate": 1.3181221966501706e-05, + "loss": 0.793, + "step": 13542 + }, + { + "epoch": 0.41507294348412405, + "grad_norm": 1.4976764736890589, + "learning_rate": 1.3180280882856957e-05, + "loss": 0.6928, + "step": 13543 + }, + { + "epoch": 0.41510359200686525, + "grad_norm": 1.5063569133102723, + "learning_rate": 1.3179339767876086e-05, + "loss": 0.7536, + "step": 13544 + }, + { + "epoch": 0.41513424052960646, + "grad_norm": 1.4706624948338953, + "learning_rate": 1.3178398621568369e-05, + "loss": 0.7324, + "step": 13545 + }, + { + "epoch": 0.41516488905234766, + "grad_norm": 1.3712392604419803, + "learning_rate": 1.3177457443943076e-05, + "loss": 0.76, + "step": 13546 + }, + { + "epoch": 0.41519553757508887, + "grad_norm": 1.4266140767899138, + "learning_rate": 1.3176516235009485e-05, + "loss": 0.7304, + "step": 13547 + }, + { + "epoch": 0.4152261860978301, + "grad_norm": 1.4714611191981055, + "learning_rate": 1.3175574994776862e-05, + "loss": 0.7414, + "step": 13548 + }, + { + "epoch": 0.4152568346205713, + "grad_norm": 1.3331402223242106, + "learning_rate": 1.3174633723254489e-05, + "loss": 0.6918, + "step": 13549 + }, + { + "epoch": 0.4152874831433125, + "grad_norm": 1.5518383714429083, + "learning_rate": 1.3173692420451638e-05, + "loss": 0.8166, + "step": 13550 + }, + { + "epoch": 0.4153181316660537, + "grad_norm": 1.459194189317132, + "learning_rate": 1.3172751086377582e-05, + "loss": 0.6656, + "step": 13551 + }, + { + "epoch": 0.4153487801887949, + "grad_norm": 1.3853647958882676, + "learning_rate": 1.3171809721041597e-05, + "loss": 0.7586, + "step": 13552 + }, + { + "epoch": 0.4153794287115361, + "grad_norm": 0.7381690966234942, + "learning_rate": 1.317086832445296e-05, + "loss": 0.6027, + "step": 13553 + }, + { + "epoch": 0.4154100772342773, + "grad_norm": 1.335294377747204, + "learning_rate": 1.3169926896620948e-05, + "loss": 0.6745, + "step": 13554 + }, + { + "epoch": 0.4154407257570185, + "grad_norm": 1.5165671787692871, + "learning_rate": 1.3168985437554833e-05, + "loss": 0.7488, + "step": 13555 + }, + { + "epoch": 0.4154713742797597, + "grad_norm": 0.6863495094298816, + "learning_rate": 1.3168043947263895e-05, + "loss": 0.612, + "step": 13556 + }, + { + "epoch": 0.4155020228025009, + "grad_norm": 1.5056927572099572, + "learning_rate": 1.3167102425757408e-05, + "loss": 0.7748, + "step": 13557 + }, + { + "epoch": 0.41553267132524213, + "grad_norm": 1.3558215974819865, + "learning_rate": 1.3166160873044653e-05, + "loss": 0.6192, + "step": 13558 + }, + { + "epoch": 0.41556331984798334, + "grad_norm": 1.5478048744880442, + "learning_rate": 1.31652192891349e-05, + "loss": 0.7982, + "step": 13559 + }, + { + "epoch": 0.41559396837072454, + "grad_norm": 1.6391588593831383, + "learning_rate": 1.3164277674037438e-05, + "loss": 0.7793, + "step": 13560 + }, + { + "epoch": 0.41562461689346575, + "grad_norm": 1.573845240803215, + "learning_rate": 1.3163336027761534e-05, + "loss": 0.762, + "step": 13561 + }, + { + "epoch": 0.41565526541620695, + "grad_norm": 1.3574368899126064, + "learning_rate": 1.3162394350316472e-05, + "loss": 0.76, + "step": 13562 + }, + { + "epoch": 0.41568591393894816, + "grad_norm": 1.576774232751027, + "learning_rate": 1.316145264171153e-05, + "loss": 0.6657, + "step": 13563 + }, + { + "epoch": 0.41571656246168937, + "grad_norm": 0.7040089350995097, + "learning_rate": 1.3160510901955984e-05, + "loss": 0.5995, + "step": 13564 + }, + { + "epoch": 0.41574721098443057, + "grad_norm": 1.41809831779214, + "learning_rate": 1.3159569131059116e-05, + "loss": 0.8567, + "step": 13565 + }, + { + "epoch": 0.4157778595071718, + "grad_norm": 1.385132369233286, + "learning_rate": 1.3158627329030206e-05, + "loss": 0.6681, + "step": 13566 + }, + { + "epoch": 0.415808508029913, + "grad_norm": 1.446633830236573, + "learning_rate": 1.3157685495878534e-05, + "loss": 0.7255, + "step": 13567 + }, + { + "epoch": 0.4158391565526542, + "grad_norm": 1.3138759665166062, + "learning_rate": 1.3156743631613378e-05, + "loss": 0.6981, + "step": 13568 + }, + { + "epoch": 0.4158698050753954, + "grad_norm": 1.5108683781118568, + "learning_rate": 1.3155801736244019e-05, + "loss": 0.8325, + "step": 13569 + }, + { + "epoch": 0.41590045359813654, + "grad_norm": 0.6658057402876228, + "learning_rate": 1.3154859809779736e-05, + "loss": 0.5864, + "step": 13570 + }, + { + "epoch": 0.41593110212087775, + "grad_norm": 0.6822421644935238, + "learning_rate": 1.3153917852229814e-05, + "loss": 0.6142, + "step": 13571 + }, + { + "epoch": 0.41596175064361895, + "grad_norm": 0.6753883938935749, + "learning_rate": 1.3152975863603532e-05, + "loss": 0.6008, + "step": 13572 + }, + { + "epoch": 0.41599239916636016, + "grad_norm": 1.5010981344193581, + "learning_rate": 1.3152033843910175e-05, + "loss": 0.7112, + "step": 13573 + }, + { + "epoch": 0.41602304768910137, + "grad_norm": 1.3739098461949661, + "learning_rate": 1.3151091793159018e-05, + "loss": 0.6875, + "step": 13574 + }, + { + "epoch": 0.41605369621184257, + "grad_norm": 1.3472591117155905, + "learning_rate": 1.315014971135935e-05, + "loss": 0.6057, + "step": 13575 + }, + { + "epoch": 0.4160843447345838, + "grad_norm": 1.4590614792709535, + "learning_rate": 1.3149207598520448e-05, + "loss": 0.7111, + "step": 13576 + }, + { + "epoch": 0.416114993257325, + "grad_norm": 1.4090238762604619, + "learning_rate": 1.3148265454651602e-05, + "loss": 0.7607, + "step": 13577 + }, + { + "epoch": 0.4161456417800662, + "grad_norm": 1.388633440121098, + "learning_rate": 1.3147323279762087e-05, + "loss": 0.8217, + "step": 13578 + }, + { + "epoch": 0.4161762903028074, + "grad_norm": 1.5938400353590438, + "learning_rate": 1.3146381073861191e-05, + "loss": 0.7259, + "step": 13579 + }, + { + "epoch": 0.4162069388255486, + "grad_norm": 1.6157457217682187, + "learning_rate": 1.3145438836958197e-05, + "loss": 0.7469, + "step": 13580 + }, + { + "epoch": 0.4162375873482898, + "grad_norm": 1.4215947780111524, + "learning_rate": 1.3144496569062392e-05, + "loss": 0.7566, + "step": 13581 + }, + { + "epoch": 0.416268235871031, + "grad_norm": 1.4904131099695481, + "learning_rate": 1.3143554270183056e-05, + "loss": 0.6909, + "step": 13582 + }, + { + "epoch": 0.4162988843937722, + "grad_norm": 1.530120826960557, + "learning_rate": 1.3142611940329475e-05, + "loss": 0.6416, + "step": 13583 + }, + { + "epoch": 0.4163295329165134, + "grad_norm": 1.3272795746806982, + "learning_rate": 1.3141669579510937e-05, + "loss": 0.6604, + "step": 13584 + }, + { + "epoch": 0.41636018143925463, + "grad_norm": 1.3180547438810506, + "learning_rate": 1.3140727187736721e-05, + "loss": 0.7365, + "step": 13585 + }, + { + "epoch": 0.41639082996199583, + "grad_norm": 1.4385866919656713, + "learning_rate": 1.3139784765016121e-05, + "loss": 0.8076, + "step": 13586 + }, + { + "epoch": 0.41642147848473704, + "grad_norm": 1.5137294984493546, + "learning_rate": 1.3138842311358414e-05, + "loss": 0.7871, + "step": 13587 + }, + { + "epoch": 0.41645212700747825, + "grad_norm": 1.3372579477005382, + "learning_rate": 1.3137899826772892e-05, + "loss": 0.6368, + "step": 13588 + }, + { + "epoch": 0.41648277553021945, + "grad_norm": 1.4165905753448949, + "learning_rate": 1.313695731126884e-05, + "loss": 0.6997, + "step": 13589 + }, + { + "epoch": 0.41651342405296066, + "grad_norm": 1.461362822467788, + "learning_rate": 1.3136014764855546e-05, + "loss": 0.7937, + "step": 13590 + }, + { + "epoch": 0.41654407257570186, + "grad_norm": 1.823857078666072, + "learning_rate": 1.3135072187542294e-05, + "loss": 0.6799, + "step": 13591 + }, + { + "epoch": 0.41657472109844307, + "grad_norm": 1.3047198456392466, + "learning_rate": 1.3134129579338377e-05, + "loss": 0.7218, + "step": 13592 + }, + { + "epoch": 0.4166053696211843, + "grad_norm": 1.3897995540129262, + "learning_rate": 1.3133186940253077e-05, + "loss": 0.7348, + "step": 13593 + }, + { + "epoch": 0.4166360181439255, + "grad_norm": 1.5874881712912865, + "learning_rate": 1.3132244270295686e-05, + "loss": 0.8172, + "step": 13594 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.5728594049178528, + "learning_rate": 1.313130156947549e-05, + "loss": 0.6542, + "step": 13595 + }, + { + "epoch": 0.4166973151894079, + "grad_norm": 1.6123527035122238, + "learning_rate": 1.3130358837801778e-05, + "loss": 0.8055, + "step": 13596 + }, + { + "epoch": 0.4167279637121491, + "grad_norm": 1.396424950884994, + "learning_rate": 1.312941607528384e-05, + "loss": 0.6854, + "step": 13597 + }, + { + "epoch": 0.4167586122348903, + "grad_norm": 1.3350588170801359, + "learning_rate": 1.3128473281930965e-05, + "loss": 0.6887, + "step": 13598 + }, + { + "epoch": 0.4167892607576315, + "grad_norm": 1.5499137036211128, + "learning_rate": 1.312753045775244e-05, + "loss": 0.7318, + "step": 13599 + }, + { + "epoch": 0.4168199092803727, + "grad_norm": 1.614097890949968, + "learning_rate": 1.3126587602757561e-05, + "loss": 0.8065, + "step": 13600 + }, + { + "epoch": 0.41685055780311386, + "grad_norm": 0.8102679987367076, + "learning_rate": 1.3125644716955613e-05, + "loss": 0.5796, + "step": 13601 + }, + { + "epoch": 0.41688120632585507, + "grad_norm": 1.4716571194662391, + "learning_rate": 1.3124701800355886e-05, + "loss": 0.6977, + "step": 13602 + }, + { + "epoch": 0.4169118548485963, + "grad_norm": 1.4219412876119446, + "learning_rate": 1.3123758852967675e-05, + "loss": 0.6784, + "step": 13603 + }, + { + "epoch": 0.4169425033713375, + "grad_norm": 1.624277527343507, + "learning_rate": 1.3122815874800265e-05, + "loss": 0.8394, + "step": 13604 + }, + { + "epoch": 0.4169731518940787, + "grad_norm": 1.3672112999206225, + "learning_rate": 1.3121872865862954e-05, + "loss": 0.6641, + "step": 13605 + }, + { + "epoch": 0.4170038004168199, + "grad_norm": 1.4408688137636467, + "learning_rate": 1.312092982616503e-05, + "loss": 0.6802, + "step": 13606 + }, + { + "epoch": 0.4170344489395611, + "grad_norm": 1.3555483481576573, + "learning_rate": 1.3119986755715789e-05, + "loss": 0.8185, + "step": 13607 + }, + { + "epoch": 0.4170650974623023, + "grad_norm": 1.6071235144368001, + "learning_rate": 1.3119043654524517e-05, + "loss": 0.7926, + "step": 13608 + }, + { + "epoch": 0.4170957459850435, + "grad_norm": 1.4715366291239167, + "learning_rate": 1.311810052260051e-05, + "loss": 0.8032, + "step": 13609 + }, + { + "epoch": 0.4171263945077847, + "grad_norm": 0.6667877001700195, + "learning_rate": 1.311715735995306e-05, + "loss": 0.5865, + "step": 13610 + }, + { + "epoch": 0.4171570430305259, + "grad_norm": 1.6347072330140116, + "learning_rate": 1.3116214166591462e-05, + "loss": 0.824, + "step": 13611 + }, + { + "epoch": 0.4171876915532671, + "grad_norm": 1.4447147175578332, + "learning_rate": 1.3115270942525012e-05, + "loss": 0.7654, + "step": 13612 + }, + { + "epoch": 0.41721834007600833, + "grad_norm": 1.4130855197110412, + "learning_rate": 1.3114327687762996e-05, + "loss": 0.7742, + "step": 13613 + }, + { + "epoch": 0.41724898859874954, + "grad_norm": 1.4871295280640955, + "learning_rate": 1.3113384402314712e-05, + "loss": 0.7025, + "step": 13614 + }, + { + "epoch": 0.41727963712149074, + "grad_norm": 0.6560900061046762, + "learning_rate": 1.3112441086189455e-05, + "loss": 0.5951, + "step": 13615 + }, + { + "epoch": 0.41731028564423195, + "grad_norm": 1.340801012971335, + "learning_rate": 1.3111497739396523e-05, + "loss": 0.6666, + "step": 13616 + }, + { + "epoch": 0.41734093416697315, + "grad_norm": 1.4424632246877398, + "learning_rate": 1.3110554361945207e-05, + "loss": 0.7588, + "step": 13617 + }, + { + "epoch": 0.41737158268971436, + "grad_norm": 1.5523335906662588, + "learning_rate": 1.3109610953844803e-05, + "loss": 0.7467, + "step": 13618 + }, + { + "epoch": 0.41740223121245557, + "grad_norm": 1.4024836092834607, + "learning_rate": 1.3108667515104605e-05, + "loss": 0.7955, + "step": 13619 + }, + { + "epoch": 0.41743287973519677, + "grad_norm": 1.2652853553633456, + "learning_rate": 1.3107724045733911e-05, + "loss": 0.6527, + "step": 13620 + }, + { + "epoch": 0.417463528257938, + "grad_norm": 1.6512272999507869, + "learning_rate": 1.3106780545742015e-05, + "loss": 0.6735, + "step": 13621 + }, + { + "epoch": 0.4174941767806792, + "grad_norm": 1.5123814328139629, + "learning_rate": 1.3105837015138219e-05, + "loss": 0.6428, + "step": 13622 + }, + { + "epoch": 0.4175248253034204, + "grad_norm": 1.678419120200058, + "learning_rate": 1.3104893453931814e-05, + "loss": 0.7796, + "step": 13623 + }, + { + "epoch": 0.4175554738261616, + "grad_norm": 1.4018618558020337, + "learning_rate": 1.31039498621321e-05, + "loss": 0.6597, + "step": 13624 + }, + { + "epoch": 0.4175861223489028, + "grad_norm": 1.5410138497245174, + "learning_rate": 1.3103006239748372e-05, + "loss": 0.8149, + "step": 13625 + }, + { + "epoch": 0.417616770871644, + "grad_norm": 1.4089337758579197, + "learning_rate": 1.310206258678993e-05, + "loss": 0.7433, + "step": 13626 + }, + { + "epoch": 0.4176474193943852, + "grad_norm": 1.3868677625529546, + "learning_rate": 1.3101118903266077e-05, + "loss": 0.8453, + "step": 13627 + }, + { + "epoch": 0.4176780679171264, + "grad_norm": 1.5013220676554169, + "learning_rate": 1.31001751891861e-05, + "loss": 0.8138, + "step": 13628 + }, + { + "epoch": 0.4177087164398676, + "grad_norm": 1.4684796748638937, + "learning_rate": 1.3099231444559304e-05, + "loss": 0.7558, + "step": 13629 + }, + { + "epoch": 0.41773936496260883, + "grad_norm": 1.8133974226438063, + "learning_rate": 1.3098287669394989e-05, + "loss": 0.7945, + "step": 13630 + }, + { + "epoch": 0.41777001348535003, + "grad_norm": 0.6946819128875246, + "learning_rate": 1.3097343863702454e-05, + "loss": 0.579, + "step": 13631 + }, + { + "epoch": 0.4178006620080912, + "grad_norm": 1.4622387743885503, + "learning_rate": 1.3096400027490997e-05, + "loss": 0.684, + "step": 13632 + }, + { + "epoch": 0.4178313105308324, + "grad_norm": 1.5555276871521133, + "learning_rate": 1.3095456160769918e-05, + "loss": 0.7041, + "step": 13633 + }, + { + "epoch": 0.4178619590535736, + "grad_norm": 1.3990170440970948, + "learning_rate": 1.3094512263548516e-05, + "loss": 0.7649, + "step": 13634 + }, + { + "epoch": 0.4178926075763148, + "grad_norm": 1.4209067502712687, + "learning_rate": 1.3093568335836094e-05, + "loss": 0.6922, + "step": 13635 + }, + { + "epoch": 0.417923256099056, + "grad_norm": 1.364296935423353, + "learning_rate": 1.3092624377641951e-05, + "loss": 0.7466, + "step": 13636 + }, + { + "epoch": 0.4179539046217972, + "grad_norm": 1.398869759768568, + "learning_rate": 1.3091680388975387e-05, + "loss": 0.7267, + "step": 13637 + }, + { + "epoch": 0.4179845531445384, + "grad_norm": 1.4882918080100855, + "learning_rate": 1.309073636984571e-05, + "loss": 0.7785, + "step": 13638 + }, + { + "epoch": 0.4180152016672796, + "grad_norm": 0.6856158563880048, + "learning_rate": 1.3089792320262213e-05, + "loss": 0.5921, + "step": 13639 + }, + { + "epoch": 0.41804585019002083, + "grad_norm": 0.6793874462806397, + "learning_rate": 1.3088848240234206e-05, + "loss": 0.5836, + "step": 13640 + }, + { + "epoch": 0.41807649871276203, + "grad_norm": 1.6786375524211894, + "learning_rate": 1.308790412977098e-05, + "loss": 0.7819, + "step": 13641 + }, + { + "epoch": 0.41810714723550324, + "grad_norm": 1.4864143327876385, + "learning_rate": 1.3086959988881851e-05, + "loss": 0.8221, + "step": 13642 + }, + { + "epoch": 0.41813779575824445, + "grad_norm": 1.439298491696182, + "learning_rate": 1.3086015817576114e-05, + "loss": 0.7892, + "step": 13643 + }, + { + "epoch": 0.41816844428098565, + "grad_norm": 0.6895032320164967, + "learning_rate": 1.3085071615863072e-05, + "loss": 0.5876, + "step": 13644 + }, + { + "epoch": 0.41819909280372686, + "grad_norm": 0.693693447688407, + "learning_rate": 1.308412738375203e-05, + "loss": 0.6213, + "step": 13645 + }, + { + "epoch": 0.41822974132646806, + "grad_norm": 1.459962550862151, + "learning_rate": 1.3083183121252294e-05, + "loss": 0.6622, + "step": 13646 + }, + { + "epoch": 0.41826038984920927, + "grad_norm": 1.493079599571608, + "learning_rate": 1.3082238828373161e-05, + "loss": 0.8212, + "step": 13647 + }, + { + "epoch": 0.4182910383719505, + "grad_norm": 1.5460527295626112, + "learning_rate": 1.3081294505123944e-05, + "loss": 0.7074, + "step": 13648 + }, + { + "epoch": 0.4183216868946917, + "grad_norm": 1.525205980957685, + "learning_rate": 1.3080350151513943e-05, + "loss": 0.7624, + "step": 13649 + }, + { + "epoch": 0.4183523354174329, + "grad_norm": 1.2959933528811243, + "learning_rate": 1.3079405767552464e-05, + "loss": 0.7096, + "step": 13650 + }, + { + "epoch": 0.4183829839401741, + "grad_norm": 1.4519431008634431, + "learning_rate": 1.3078461353248813e-05, + "loss": 0.712, + "step": 13651 + }, + { + "epoch": 0.4184136324629153, + "grad_norm": 1.564124956287738, + "learning_rate": 1.3077516908612294e-05, + "loss": 0.7484, + "step": 13652 + }, + { + "epoch": 0.4184442809856565, + "grad_norm": 1.505784023592303, + "learning_rate": 1.3076572433652213e-05, + "loss": 0.7773, + "step": 13653 + }, + { + "epoch": 0.4184749295083977, + "grad_norm": 1.5523121940667035, + "learning_rate": 1.3075627928377876e-05, + "loss": 0.8188, + "step": 13654 + }, + { + "epoch": 0.4185055780311389, + "grad_norm": 1.4123555455864238, + "learning_rate": 1.307468339279859e-05, + "loss": 0.6521, + "step": 13655 + }, + { + "epoch": 0.4185362265538801, + "grad_norm": 1.4341197667572387, + "learning_rate": 1.3073738826923664e-05, + "loss": 0.7436, + "step": 13656 + }, + { + "epoch": 0.4185668750766213, + "grad_norm": 1.464981834357433, + "learning_rate": 1.30727942307624e-05, + "loss": 0.7943, + "step": 13657 + }, + { + "epoch": 0.41859752359936253, + "grad_norm": 1.3774372330250675, + "learning_rate": 1.3071849604324111e-05, + "loss": 0.7207, + "step": 13658 + }, + { + "epoch": 0.41862817212210374, + "grad_norm": 0.7327942423421704, + "learning_rate": 1.3070904947618101e-05, + "loss": 0.581, + "step": 13659 + }, + { + "epoch": 0.41865882064484494, + "grad_norm": 1.5952018035738995, + "learning_rate": 1.3069960260653679e-05, + "loss": 0.9018, + "step": 13660 + }, + { + "epoch": 0.41868946916758615, + "grad_norm": 1.4415220836177038, + "learning_rate": 1.3069015543440151e-05, + "loss": 0.764, + "step": 13661 + }, + { + "epoch": 0.41872011769032735, + "grad_norm": 1.4350523658708467, + "learning_rate": 1.306807079598683e-05, + "loss": 0.6737, + "step": 13662 + }, + { + "epoch": 0.4187507662130685, + "grad_norm": 1.5643342111348373, + "learning_rate": 1.306712601830302e-05, + "loss": 0.7826, + "step": 13663 + }, + { + "epoch": 0.4187814147358097, + "grad_norm": 1.3936810109152593, + "learning_rate": 1.3066181210398034e-05, + "loss": 0.741, + "step": 13664 + }, + { + "epoch": 0.4188120632585509, + "grad_norm": 1.4845118183450567, + "learning_rate": 1.3065236372281182e-05, + "loss": 0.7527, + "step": 13665 + }, + { + "epoch": 0.4188427117812921, + "grad_norm": 1.5316373322417127, + "learning_rate": 1.3064291503961771e-05, + "loss": 0.7002, + "step": 13666 + }, + { + "epoch": 0.4188733603040333, + "grad_norm": 1.6295101459802757, + "learning_rate": 1.306334660544911e-05, + "loss": 0.8329, + "step": 13667 + }, + { + "epoch": 0.41890400882677453, + "grad_norm": 1.3376449072618761, + "learning_rate": 1.306240167675251e-05, + "loss": 0.7187, + "step": 13668 + }, + { + "epoch": 0.41893465734951574, + "grad_norm": 1.4459021822832927, + "learning_rate": 1.3061456717881285e-05, + "loss": 0.753, + "step": 13669 + }, + { + "epoch": 0.41896530587225694, + "grad_norm": 1.424515409369692, + "learning_rate": 1.3060511728844744e-05, + "loss": 0.7098, + "step": 13670 + }, + { + "epoch": 0.41899595439499815, + "grad_norm": 1.4025467944500976, + "learning_rate": 1.3059566709652196e-05, + "loss": 0.7364, + "step": 13671 + }, + { + "epoch": 0.41902660291773935, + "grad_norm": 1.309613232666523, + "learning_rate": 1.3058621660312958e-05, + "loss": 0.6993, + "step": 13672 + }, + { + "epoch": 0.41905725144048056, + "grad_norm": 1.613576523413074, + "learning_rate": 1.3057676580836333e-05, + "loss": 0.7059, + "step": 13673 + }, + { + "epoch": 0.41908789996322177, + "grad_norm": 0.6687043774237591, + "learning_rate": 1.3056731471231643e-05, + "loss": 0.6006, + "step": 13674 + }, + { + "epoch": 0.41911854848596297, + "grad_norm": 1.4471059752670652, + "learning_rate": 1.3055786331508194e-05, + "loss": 0.7576, + "step": 13675 + }, + { + "epoch": 0.4191491970087042, + "grad_norm": 1.4375062097986981, + "learning_rate": 1.3054841161675301e-05, + "loss": 0.7746, + "step": 13676 + }, + { + "epoch": 0.4191798455314454, + "grad_norm": 1.4611490459981022, + "learning_rate": 1.3053895961742274e-05, + "loss": 0.7764, + "step": 13677 + }, + { + "epoch": 0.4192104940541866, + "grad_norm": 1.4855493478819026, + "learning_rate": 1.305295073171843e-05, + "loss": 0.7515, + "step": 13678 + }, + { + "epoch": 0.4192411425769278, + "grad_norm": 0.6819526091643878, + "learning_rate": 1.3052005471613081e-05, + "loss": 0.6237, + "step": 13679 + }, + { + "epoch": 0.419271791099669, + "grad_norm": 1.5632702421109745, + "learning_rate": 1.3051060181435542e-05, + "loss": 0.7051, + "step": 13680 + }, + { + "epoch": 0.4193024396224102, + "grad_norm": 1.4487650072666705, + "learning_rate": 1.3050114861195127e-05, + "loss": 0.8018, + "step": 13681 + }, + { + "epoch": 0.4193330881451514, + "grad_norm": 1.4312555717392836, + "learning_rate": 1.3049169510901147e-05, + "loss": 0.6631, + "step": 13682 + }, + { + "epoch": 0.4193637366678926, + "grad_norm": 0.7122055454259731, + "learning_rate": 1.3048224130562923e-05, + "loss": 0.6457, + "step": 13683 + }, + { + "epoch": 0.4193943851906338, + "grad_norm": 1.4670144071073987, + "learning_rate": 1.3047278720189764e-05, + "loss": 0.7277, + "step": 13684 + }, + { + "epoch": 0.41942503371337503, + "grad_norm": 1.5656490816573623, + "learning_rate": 1.3046333279790992e-05, + "loss": 0.6782, + "step": 13685 + }, + { + "epoch": 0.41945568223611623, + "grad_norm": 1.5057479264441165, + "learning_rate": 1.3045387809375916e-05, + "loss": 0.7795, + "step": 13686 + }, + { + "epoch": 0.41948633075885744, + "grad_norm": 0.6655781627422184, + "learning_rate": 1.3044442308953855e-05, + "loss": 0.6206, + "step": 13687 + }, + { + "epoch": 0.41951697928159865, + "grad_norm": 1.5267204537183967, + "learning_rate": 1.3043496778534123e-05, + "loss": 0.6946, + "step": 13688 + }, + { + "epoch": 0.41954762780433985, + "grad_norm": 1.5899531261748243, + "learning_rate": 1.3042551218126041e-05, + "loss": 0.792, + "step": 13689 + }, + { + "epoch": 0.41957827632708106, + "grad_norm": 1.5518967659915015, + "learning_rate": 1.3041605627738925e-05, + "loss": 0.7028, + "step": 13690 + }, + { + "epoch": 0.41960892484982226, + "grad_norm": 1.5475103882316763, + "learning_rate": 1.304066000738209e-05, + "loss": 0.7293, + "step": 13691 + }, + { + "epoch": 0.41963957337256347, + "grad_norm": 1.5307833758199845, + "learning_rate": 1.3039714357064848e-05, + "loss": 0.6827, + "step": 13692 + }, + { + "epoch": 0.4196702218953047, + "grad_norm": 1.6685617872636378, + "learning_rate": 1.3038768676796527e-05, + "loss": 0.7379, + "step": 13693 + }, + { + "epoch": 0.4197008704180458, + "grad_norm": 1.390323802379755, + "learning_rate": 1.3037822966586441e-05, + "loss": 0.7179, + "step": 13694 + }, + { + "epoch": 0.41973151894078703, + "grad_norm": 1.346714676167662, + "learning_rate": 1.3036877226443907e-05, + "loss": 0.7088, + "step": 13695 + }, + { + "epoch": 0.41976216746352824, + "grad_norm": 1.6159448623258832, + "learning_rate": 1.3035931456378248e-05, + "loss": 0.812, + "step": 13696 + }, + { + "epoch": 0.41979281598626944, + "grad_norm": 1.6857204494403295, + "learning_rate": 1.3034985656398776e-05, + "loss": 0.6997, + "step": 13697 + }, + { + "epoch": 0.41982346450901065, + "grad_norm": 1.4389617021174104, + "learning_rate": 1.3034039826514815e-05, + "loss": 0.761, + "step": 13698 + }, + { + "epoch": 0.41985411303175185, + "grad_norm": 1.3014232246378392, + "learning_rate": 1.3033093966735682e-05, + "loss": 0.7219, + "step": 13699 + }, + { + "epoch": 0.41988476155449306, + "grad_norm": 1.4404266984572898, + "learning_rate": 1.3032148077070703e-05, + "loss": 0.7615, + "step": 13700 + }, + { + "epoch": 0.41991541007723426, + "grad_norm": 1.3293242540223273, + "learning_rate": 1.3031202157529185e-05, + "loss": 0.6578, + "step": 13701 + }, + { + "epoch": 0.41994605859997547, + "grad_norm": 1.3319017753186608, + "learning_rate": 1.3030256208120465e-05, + "loss": 0.7167, + "step": 13702 + }, + { + "epoch": 0.4199767071227167, + "grad_norm": 1.4247190771320897, + "learning_rate": 1.3029310228853848e-05, + "loss": 0.7628, + "step": 13703 + }, + { + "epoch": 0.4200073556454579, + "grad_norm": 1.5139027626458312, + "learning_rate": 1.3028364219738666e-05, + "loss": 0.7544, + "step": 13704 + }, + { + "epoch": 0.4200380041681991, + "grad_norm": 0.7129066419255046, + "learning_rate": 1.3027418180784237e-05, + "loss": 0.6109, + "step": 13705 + }, + { + "epoch": 0.4200686526909403, + "grad_norm": 1.516552852899433, + "learning_rate": 1.302647211199988e-05, + "loss": 0.698, + "step": 13706 + }, + { + "epoch": 0.4200993012136815, + "grad_norm": 1.453921357864485, + "learning_rate": 1.302552601339492e-05, + "loss": 0.6957, + "step": 13707 + }, + { + "epoch": 0.4201299497364227, + "grad_norm": 1.6314552559896547, + "learning_rate": 1.3024579884978678e-05, + "loss": 0.8406, + "step": 13708 + }, + { + "epoch": 0.4201605982591639, + "grad_norm": 1.421391931585916, + "learning_rate": 1.3023633726760478e-05, + "loss": 0.7888, + "step": 13709 + }, + { + "epoch": 0.4201912467819051, + "grad_norm": 1.46683171745531, + "learning_rate": 1.3022687538749639e-05, + "loss": 0.7342, + "step": 13710 + }, + { + "epoch": 0.4202218953046463, + "grad_norm": 1.4324549675470066, + "learning_rate": 1.3021741320955488e-05, + "loss": 0.8338, + "step": 13711 + }, + { + "epoch": 0.4202525438273875, + "grad_norm": 1.3513469507859863, + "learning_rate": 1.3020795073387347e-05, + "loss": 0.704, + "step": 13712 + }, + { + "epoch": 0.42028319235012873, + "grad_norm": 0.686661574158379, + "learning_rate": 1.3019848796054537e-05, + "loss": 0.6114, + "step": 13713 + }, + { + "epoch": 0.42031384087286994, + "grad_norm": 1.6449333043141388, + "learning_rate": 1.3018902488966383e-05, + "loss": 0.7979, + "step": 13714 + }, + { + "epoch": 0.42034448939561114, + "grad_norm": 1.4062164770690255, + "learning_rate": 1.3017956152132214e-05, + "loss": 0.6905, + "step": 13715 + }, + { + "epoch": 0.42037513791835235, + "grad_norm": 0.6730047932666339, + "learning_rate": 1.301700978556135e-05, + "loss": 0.5857, + "step": 13716 + }, + { + "epoch": 0.42040578644109355, + "grad_norm": 1.4063333019918993, + "learning_rate": 1.3016063389263116e-05, + "loss": 0.7478, + "step": 13717 + }, + { + "epoch": 0.42043643496383476, + "grad_norm": 1.3405574586450852, + "learning_rate": 1.3015116963246837e-05, + "loss": 0.6303, + "step": 13718 + }, + { + "epoch": 0.42046708348657597, + "grad_norm": 1.4087559543201873, + "learning_rate": 1.301417050752184e-05, + "loss": 0.6566, + "step": 13719 + }, + { + "epoch": 0.42049773200931717, + "grad_norm": 1.4109965606203037, + "learning_rate": 1.301322402209745e-05, + "loss": 0.857, + "step": 13720 + }, + { + "epoch": 0.4205283805320584, + "grad_norm": 0.6884200049655335, + "learning_rate": 1.3012277506982991e-05, + "loss": 0.6241, + "step": 13721 + }, + { + "epoch": 0.4205590290547996, + "grad_norm": 1.400150749761807, + "learning_rate": 1.3011330962187794e-05, + "loss": 0.7561, + "step": 13722 + }, + { + "epoch": 0.4205896775775408, + "grad_norm": 1.3421204132287934, + "learning_rate": 1.301038438772118e-05, + "loss": 0.6446, + "step": 13723 + }, + { + "epoch": 0.420620326100282, + "grad_norm": 0.6890934509507181, + "learning_rate": 1.300943778359248e-05, + "loss": 0.6045, + "step": 13724 + }, + { + "epoch": 0.42065097462302314, + "grad_norm": 1.5976836311564868, + "learning_rate": 1.3008491149811017e-05, + "loss": 0.7987, + "step": 13725 + }, + { + "epoch": 0.42068162314576435, + "grad_norm": 0.6636143357317584, + "learning_rate": 1.3007544486386123e-05, + "loss": 0.6159, + "step": 13726 + }, + { + "epoch": 0.42071227166850556, + "grad_norm": 1.5346808862671244, + "learning_rate": 1.3006597793327125e-05, + "loss": 0.7096, + "step": 13727 + }, + { + "epoch": 0.42074292019124676, + "grad_norm": 1.5418431354734992, + "learning_rate": 1.3005651070643348e-05, + "loss": 0.7455, + "step": 13728 + }, + { + "epoch": 0.42077356871398797, + "grad_norm": 1.376082399452832, + "learning_rate": 1.3004704318344122e-05, + "loss": 0.7177, + "step": 13729 + }, + { + "epoch": 0.42080421723672917, + "grad_norm": 1.492561966364756, + "learning_rate": 1.3003757536438774e-05, + "loss": 0.7035, + "step": 13730 + }, + { + "epoch": 0.4208348657594704, + "grad_norm": 1.4023055607626775, + "learning_rate": 1.3002810724936639e-05, + "loss": 0.6336, + "step": 13731 + }, + { + "epoch": 0.4208655142822116, + "grad_norm": 1.463895044987575, + "learning_rate": 1.3001863883847038e-05, + "loss": 0.803, + "step": 13732 + }, + { + "epoch": 0.4208961628049528, + "grad_norm": 0.6727365985550968, + "learning_rate": 1.3000917013179303e-05, + "loss": 0.5725, + "step": 13733 + }, + { + "epoch": 0.420926811327694, + "grad_norm": 1.4892894598604518, + "learning_rate": 1.2999970112942767e-05, + "loss": 0.782, + "step": 13734 + }, + { + "epoch": 0.4209574598504352, + "grad_norm": 1.535508335421347, + "learning_rate": 1.299902318314676e-05, + "loss": 0.7239, + "step": 13735 + }, + { + "epoch": 0.4209881083731764, + "grad_norm": 1.5039989421514615, + "learning_rate": 1.2998076223800604e-05, + "loss": 0.704, + "step": 13736 + }, + { + "epoch": 0.4210187568959176, + "grad_norm": 0.6939987394602454, + "learning_rate": 1.2997129234913641e-05, + "loss": 0.5938, + "step": 13737 + }, + { + "epoch": 0.4210494054186588, + "grad_norm": 1.777270556062654, + "learning_rate": 1.2996182216495194e-05, + "loss": 0.7783, + "step": 13738 + }, + { + "epoch": 0.4210800539414, + "grad_norm": 1.3511618216252046, + "learning_rate": 1.2995235168554601e-05, + "loss": 0.6597, + "step": 13739 + }, + { + "epoch": 0.42111070246414123, + "grad_norm": 1.5145943049766517, + "learning_rate": 1.2994288091101186e-05, + "loss": 0.6991, + "step": 13740 + }, + { + "epoch": 0.42114135098688243, + "grad_norm": 1.4503911383103338, + "learning_rate": 1.2993340984144287e-05, + "loss": 0.7063, + "step": 13741 + }, + { + "epoch": 0.42117199950962364, + "grad_norm": 0.6765477287566737, + "learning_rate": 1.299239384769323e-05, + "loss": 0.6025, + "step": 13742 + }, + { + "epoch": 0.42120264803236485, + "grad_norm": 1.5151301252566913, + "learning_rate": 1.2991446681757354e-05, + "loss": 0.7316, + "step": 13743 + }, + { + "epoch": 0.42123329655510605, + "grad_norm": 1.748531361499106, + "learning_rate": 1.2990499486345987e-05, + "loss": 0.7308, + "step": 13744 + }, + { + "epoch": 0.42126394507784726, + "grad_norm": 1.5730823631652844, + "learning_rate": 1.2989552261468463e-05, + "loss": 0.7781, + "step": 13745 + }, + { + "epoch": 0.42129459360058846, + "grad_norm": 1.4570758181907992, + "learning_rate": 1.2988605007134115e-05, + "loss": 0.7415, + "step": 13746 + }, + { + "epoch": 0.42132524212332967, + "grad_norm": 1.4802673166645413, + "learning_rate": 1.2987657723352278e-05, + "loss": 0.7628, + "step": 13747 + }, + { + "epoch": 0.4213558906460709, + "grad_norm": 1.2926806676887568, + "learning_rate": 1.2986710410132285e-05, + "loss": 0.7481, + "step": 13748 + }, + { + "epoch": 0.4213865391688121, + "grad_norm": 1.210860413214572, + "learning_rate": 1.298576306748347e-05, + "loss": 0.6501, + "step": 13749 + }, + { + "epoch": 0.4214171876915533, + "grad_norm": 1.4957832923588747, + "learning_rate": 1.2984815695415169e-05, + "loss": 0.7275, + "step": 13750 + }, + { + "epoch": 0.4214478362142945, + "grad_norm": 1.40498146373143, + "learning_rate": 1.2983868293936715e-05, + "loss": 0.796, + "step": 13751 + }, + { + "epoch": 0.4214784847370357, + "grad_norm": 1.3049589908947177, + "learning_rate": 1.2982920863057442e-05, + "loss": 0.6315, + "step": 13752 + }, + { + "epoch": 0.4215091332597769, + "grad_norm": 1.278324496481481, + "learning_rate": 1.2981973402786685e-05, + "loss": 0.747, + "step": 13753 + }, + { + "epoch": 0.4215397817825181, + "grad_norm": 1.3523169962120336, + "learning_rate": 1.2981025913133787e-05, + "loss": 0.6877, + "step": 13754 + }, + { + "epoch": 0.4215704303052593, + "grad_norm": 1.4723921962840556, + "learning_rate": 1.2980078394108074e-05, + "loss": 0.7785, + "step": 13755 + }, + { + "epoch": 0.42160107882800046, + "grad_norm": 1.2985458779909755, + "learning_rate": 1.2979130845718885e-05, + "loss": 0.6867, + "step": 13756 + }, + { + "epoch": 0.42163172735074167, + "grad_norm": 1.2740324769351425, + "learning_rate": 1.2978183267975557e-05, + "loss": 0.7704, + "step": 13757 + }, + { + "epoch": 0.4216623758734829, + "grad_norm": 1.616811951095911, + "learning_rate": 1.297723566088743e-05, + "loss": 0.7103, + "step": 13758 + }, + { + "epoch": 0.4216930243962241, + "grad_norm": 1.412302081626516, + "learning_rate": 1.2976288024463836e-05, + "loss": 0.6849, + "step": 13759 + }, + { + "epoch": 0.4217236729189653, + "grad_norm": 0.6974960605084136, + "learning_rate": 1.2975340358714117e-05, + "loss": 0.6036, + "step": 13760 + }, + { + "epoch": 0.4217543214417065, + "grad_norm": 1.31150676442375, + "learning_rate": 1.2974392663647606e-05, + "loss": 0.5643, + "step": 13761 + }, + { + "epoch": 0.4217849699644477, + "grad_norm": 1.4595304522007824, + "learning_rate": 1.2973444939273645e-05, + "loss": 0.7572, + "step": 13762 + }, + { + "epoch": 0.4218156184871889, + "grad_norm": 0.6552814522452897, + "learning_rate": 1.297249718560157e-05, + "loss": 0.5802, + "step": 13763 + }, + { + "epoch": 0.4218462670099301, + "grad_norm": 1.5783615575580325, + "learning_rate": 1.2971549402640717e-05, + "loss": 0.7115, + "step": 13764 + }, + { + "epoch": 0.4218769155326713, + "grad_norm": 1.639810176528202, + "learning_rate": 1.297060159040043e-05, + "loss": 0.7419, + "step": 13765 + }, + { + "epoch": 0.4219075640554125, + "grad_norm": 1.4787756801472325, + "learning_rate": 1.2969653748890045e-05, + "loss": 0.7072, + "step": 13766 + }, + { + "epoch": 0.4219382125781537, + "grad_norm": 1.5158998814885967, + "learning_rate": 1.2968705878118901e-05, + "loss": 0.7878, + "step": 13767 + }, + { + "epoch": 0.42196886110089493, + "grad_norm": 1.503865884301283, + "learning_rate": 1.2967757978096338e-05, + "loss": 0.79, + "step": 13768 + }, + { + "epoch": 0.42199950962363614, + "grad_norm": 1.5149166340233375, + "learning_rate": 1.29668100488317e-05, + "loss": 0.7098, + "step": 13769 + }, + { + "epoch": 0.42203015814637734, + "grad_norm": 0.6749629493050245, + "learning_rate": 1.296586209033432e-05, + "loss": 0.6087, + "step": 13770 + }, + { + "epoch": 0.42206080666911855, + "grad_norm": 1.36119812733225, + "learning_rate": 1.2964914102613544e-05, + "loss": 0.6823, + "step": 13771 + }, + { + "epoch": 0.42209145519185975, + "grad_norm": 0.648798591358565, + "learning_rate": 1.2963966085678708e-05, + "loss": 0.5899, + "step": 13772 + }, + { + "epoch": 0.42212210371460096, + "grad_norm": 1.5472258929751528, + "learning_rate": 1.2963018039539158e-05, + "loss": 0.7545, + "step": 13773 + }, + { + "epoch": 0.42215275223734217, + "grad_norm": 1.3625823874561873, + "learning_rate": 1.2962069964204232e-05, + "loss": 0.789, + "step": 13774 + }, + { + "epoch": 0.42218340076008337, + "grad_norm": 1.5108745282030434, + "learning_rate": 1.2961121859683272e-05, + "loss": 0.756, + "step": 13775 + }, + { + "epoch": 0.4222140492828246, + "grad_norm": 0.6568880680843122, + "learning_rate": 1.2960173725985623e-05, + "loss": 0.6005, + "step": 13776 + }, + { + "epoch": 0.4222446978055658, + "grad_norm": 1.5678697188818427, + "learning_rate": 1.2959225563120623e-05, + "loss": 0.7473, + "step": 13777 + }, + { + "epoch": 0.422275346328307, + "grad_norm": 1.4467937331547625, + "learning_rate": 1.2958277371097619e-05, + "loss": 0.6548, + "step": 13778 + }, + { + "epoch": 0.4223059948510482, + "grad_norm": 1.4925542656026556, + "learning_rate": 1.2957329149925948e-05, + "loss": 0.7063, + "step": 13779 + }, + { + "epoch": 0.4223366433737894, + "grad_norm": 1.6361556358482665, + "learning_rate": 1.2956380899614957e-05, + "loss": 0.7719, + "step": 13780 + }, + { + "epoch": 0.4223672918965306, + "grad_norm": 1.5019692712346648, + "learning_rate": 1.2955432620173989e-05, + "loss": 0.7857, + "step": 13781 + }, + { + "epoch": 0.4223979404192718, + "grad_norm": 1.5172530639103774, + "learning_rate": 1.295448431161239e-05, + "loss": 0.8287, + "step": 13782 + }, + { + "epoch": 0.422428588942013, + "grad_norm": 1.5898057308022189, + "learning_rate": 1.2953535973939496e-05, + "loss": 0.7992, + "step": 13783 + }, + { + "epoch": 0.4224592374647542, + "grad_norm": 1.5488681504992388, + "learning_rate": 1.295258760716466e-05, + "loss": 0.817, + "step": 13784 + }, + { + "epoch": 0.42248988598749543, + "grad_norm": 1.355377143466934, + "learning_rate": 1.2951639211297222e-05, + "loss": 0.6589, + "step": 13785 + }, + { + "epoch": 0.42252053451023663, + "grad_norm": 1.6227448730775202, + "learning_rate": 1.2950690786346527e-05, + "loss": 0.8255, + "step": 13786 + }, + { + "epoch": 0.4225511830329778, + "grad_norm": 1.4898999701017344, + "learning_rate": 1.2949742332321919e-05, + "loss": 0.6827, + "step": 13787 + }, + { + "epoch": 0.422581831555719, + "grad_norm": 0.6646981466215368, + "learning_rate": 1.2948793849232747e-05, + "loss": 0.5591, + "step": 13788 + }, + { + "epoch": 0.4226124800784602, + "grad_norm": 1.4401250466540505, + "learning_rate": 1.2947845337088359e-05, + "loss": 0.6903, + "step": 13789 + }, + { + "epoch": 0.4226431286012014, + "grad_norm": 0.659096498337185, + "learning_rate": 1.294689679589809e-05, + "loss": 0.6141, + "step": 13790 + }, + { + "epoch": 0.4226737771239426, + "grad_norm": 1.4706981549753544, + "learning_rate": 1.2945948225671294e-05, + "loss": 0.7798, + "step": 13791 + }, + { + "epoch": 0.4227044256466838, + "grad_norm": 1.4035736868129813, + "learning_rate": 1.2944999626417319e-05, + "loss": 0.7637, + "step": 13792 + }, + { + "epoch": 0.422735074169425, + "grad_norm": 1.469979209496079, + "learning_rate": 1.2944050998145507e-05, + "loss": 0.8269, + "step": 13793 + }, + { + "epoch": 0.4227657226921662, + "grad_norm": 1.7024468883026005, + "learning_rate": 1.2943102340865208e-05, + "loss": 0.7574, + "step": 13794 + }, + { + "epoch": 0.42279637121490743, + "grad_norm": 0.6756851538562527, + "learning_rate": 1.294215365458577e-05, + "loss": 0.6182, + "step": 13795 + }, + { + "epoch": 0.42282701973764864, + "grad_norm": 1.3380148351058754, + "learning_rate": 1.2941204939316536e-05, + "loss": 0.6087, + "step": 13796 + }, + { + "epoch": 0.42285766826038984, + "grad_norm": 1.442739652545245, + "learning_rate": 1.2940256195066863e-05, + "loss": 0.7045, + "step": 13797 + }, + { + "epoch": 0.42288831678313105, + "grad_norm": 1.4899224681093413, + "learning_rate": 1.2939307421846088e-05, + "loss": 0.6483, + "step": 13798 + }, + { + "epoch": 0.42291896530587225, + "grad_norm": 1.6619250369654999, + "learning_rate": 1.2938358619663566e-05, + "loss": 0.7522, + "step": 13799 + }, + { + "epoch": 0.42294961382861346, + "grad_norm": 1.4743216686546596, + "learning_rate": 1.2937409788528648e-05, + "loss": 0.7049, + "step": 13800 + }, + { + "epoch": 0.42298026235135466, + "grad_norm": 1.3934878257487096, + "learning_rate": 1.2936460928450673e-05, + "loss": 0.7376, + "step": 13801 + }, + { + "epoch": 0.42301091087409587, + "grad_norm": 0.6793726936758371, + "learning_rate": 1.2935512039439002e-05, + "loss": 0.6091, + "step": 13802 + }, + { + "epoch": 0.4230415593968371, + "grad_norm": 1.3287830730333254, + "learning_rate": 1.2934563121502978e-05, + "loss": 0.6665, + "step": 13803 + }, + { + "epoch": 0.4230722079195783, + "grad_norm": 1.519720360336154, + "learning_rate": 1.2933614174651955e-05, + "loss": 0.8545, + "step": 13804 + }, + { + "epoch": 0.4231028564423195, + "grad_norm": 1.6849102717339624, + "learning_rate": 1.293266519889528e-05, + "loss": 0.7676, + "step": 13805 + }, + { + "epoch": 0.4231335049650607, + "grad_norm": 1.5148232436064717, + "learning_rate": 1.2931716194242303e-05, + "loss": 0.8072, + "step": 13806 + }, + { + "epoch": 0.4231641534878019, + "grad_norm": 1.5930251677970877, + "learning_rate": 1.2930767160702377e-05, + "loss": 0.7271, + "step": 13807 + }, + { + "epoch": 0.4231948020105431, + "grad_norm": 0.6768718756216944, + "learning_rate": 1.2929818098284853e-05, + "loss": 0.5957, + "step": 13808 + }, + { + "epoch": 0.4232254505332843, + "grad_norm": 1.5834991578807411, + "learning_rate": 1.2928869006999083e-05, + "loss": 0.8088, + "step": 13809 + }, + { + "epoch": 0.4232560990560255, + "grad_norm": 1.3828141047529763, + "learning_rate": 1.2927919886854415e-05, + "loss": 0.8125, + "step": 13810 + }, + { + "epoch": 0.4232867475787667, + "grad_norm": 0.7252333953732697, + "learning_rate": 1.2926970737860204e-05, + "loss": 0.6065, + "step": 13811 + }, + { + "epoch": 0.4233173961015079, + "grad_norm": 1.3017858296455926, + "learning_rate": 1.2926021560025803e-05, + "loss": 0.6356, + "step": 13812 + }, + { + "epoch": 0.42334804462424913, + "grad_norm": 1.3519262783452621, + "learning_rate": 1.292507235336056e-05, + "loss": 0.715, + "step": 13813 + }, + { + "epoch": 0.42337869314699034, + "grad_norm": 1.5329000147463252, + "learning_rate": 1.2924123117873832e-05, + "loss": 0.7147, + "step": 13814 + }, + { + "epoch": 0.42340934166973154, + "grad_norm": 0.6405249608253191, + "learning_rate": 1.2923173853574969e-05, + "loss": 0.594, + "step": 13815 + }, + { + "epoch": 0.42343999019247275, + "grad_norm": 1.6224140053698746, + "learning_rate": 1.2922224560473326e-05, + "loss": 0.7067, + "step": 13816 + }, + { + "epoch": 0.42347063871521395, + "grad_norm": 0.641355598308507, + "learning_rate": 1.2921275238578259e-05, + "loss": 0.5743, + "step": 13817 + }, + { + "epoch": 0.4235012872379551, + "grad_norm": 1.434333601024596, + "learning_rate": 1.292032588789912e-05, + "loss": 0.6293, + "step": 13818 + }, + { + "epoch": 0.4235319357606963, + "grad_norm": 1.3841863963533234, + "learning_rate": 1.291937650844526e-05, + "loss": 0.7126, + "step": 13819 + }, + { + "epoch": 0.4235625842834375, + "grad_norm": 1.5189512960110307, + "learning_rate": 1.2918427100226038e-05, + "loss": 0.7553, + "step": 13820 + }, + { + "epoch": 0.4235932328061787, + "grad_norm": 1.2892072352312427, + "learning_rate": 1.2917477663250811e-05, + "loss": 0.7506, + "step": 13821 + }, + { + "epoch": 0.4236238813289199, + "grad_norm": 1.3949097119573188, + "learning_rate": 1.2916528197528924e-05, + "loss": 0.7297, + "step": 13822 + }, + { + "epoch": 0.42365452985166113, + "grad_norm": 1.416804148168855, + "learning_rate": 1.2915578703069742e-05, + "loss": 0.6602, + "step": 13823 + }, + { + "epoch": 0.42368517837440234, + "grad_norm": 1.4636444024130921, + "learning_rate": 1.2914629179882616e-05, + "loss": 0.7676, + "step": 13824 + }, + { + "epoch": 0.42371582689714354, + "grad_norm": 1.348554776776169, + "learning_rate": 1.2913679627976902e-05, + "loss": 0.6005, + "step": 13825 + }, + { + "epoch": 0.42374647541988475, + "grad_norm": 1.531126086419256, + "learning_rate": 1.2912730047361957e-05, + "loss": 0.6275, + "step": 13826 + }, + { + "epoch": 0.42377712394262596, + "grad_norm": 1.4169108095179161, + "learning_rate": 1.2911780438047138e-05, + "loss": 0.6555, + "step": 13827 + }, + { + "epoch": 0.42380777246536716, + "grad_norm": 1.2905879882552915, + "learning_rate": 1.2910830800041803e-05, + "loss": 0.6653, + "step": 13828 + }, + { + "epoch": 0.42383842098810837, + "grad_norm": 1.3726791785016494, + "learning_rate": 1.2909881133355305e-05, + "loss": 0.7573, + "step": 13829 + }, + { + "epoch": 0.42386906951084957, + "grad_norm": 1.3495343079093531, + "learning_rate": 1.2908931437997006e-05, + "loss": 0.7087, + "step": 13830 + }, + { + "epoch": 0.4238997180335908, + "grad_norm": 1.4971275754873188, + "learning_rate": 1.290798171397626e-05, + "loss": 0.7825, + "step": 13831 + }, + { + "epoch": 0.423930366556332, + "grad_norm": 0.6961512066113745, + "learning_rate": 1.2907031961302427e-05, + "loss": 0.6088, + "step": 13832 + }, + { + "epoch": 0.4239610150790732, + "grad_norm": 1.4098049601795857, + "learning_rate": 1.2906082179984863e-05, + "loss": 0.7285, + "step": 13833 + }, + { + "epoch": 0.4239916636018144, + "grad_norm": 1.345807975663716, + "learning_rate": 1.2905132370032928e-05, + "loss": 0.7286, + "step": 13834 + }, + { + "epoch": 0.4240223121245556, + "grad_norm": 1.3831615805764437, + "learning_rate": 1.2904182531455983e-05, + "loss": 0.6882, + "step": 13835 + }, + { + "epoch": 0.4240529606472968, + "grad_norm": 1.5297595975527465, + "learning_rate": 1.2903232664263381e-05, + "loss": 0.6905, + "step": 13836 + }, + { + "epoch": 0.424083609170038, + "grad_norm": 0.6965153422132236, + "learning_rate": 1.2902282768464484e-05, + "loss": 0.6229, + "step": 13837 + }, + { + "epoch": 0.4241142576927792, + "grad_norm": 1.4270195258481748, + "learning_rate": 1.2901332844068654e-05, + "loss": 0.6785, + "step": 13838 + }, + { + "epoch": 0.4241449062155204, + "grad_norm": 1.2358187599275874, + "learning_rate": 1.290038289108525e-05, + "loss": 0.5899, + "step": 13839 + }, + { + "epoch": 0.42417555473826163, + "grad_norm": 1.4983694413168176, + "learning_rate": 1.2899432909523633e-05, + "loss": 0.7548, + "step": 13840 + }, + { + "epoch": 0.42420620326100283, + "grad_norm": 1.3032538652409957, + "learning_rate": 1.2898482899393157e-05, + "loss": 0.7659, + "step": 13841 + }, + { + "epoch": 0.42423685178374404, + "grad_norm": 1.4340992326519535, + "learning_rate": 1.289753286070319e-05, + "loss": 0.7176, + "step": 13842 + }, + { + "epoch": 0.42426750030648525, + "grad_norm": 0.68779104082178, + "learning_rate": 1.289658279346309e-05, + "loss": 0.6065, + "step": 13843 + }, + { + "epoch": 0.42429814882922645, + "grad_norm": 1.4591480252483513, + "learning_rate": 1.2895632697682219e-05, + "loss": 0.7416, + "step": 13844 + }, + { + "epoch": 0.42432879735196766, + "grad_norm": 1.4662594792141728, + "learning_rate": 1.2894682573369937e-05, + "loss": 0.6552, + "step": 13845 + }, + { + "epoch": 0.42435944587470886, + "grad_norm": 1.3786459406801979, + "learning_rate": 1.2893732420535608e-05, + "loss": 0.6969, + "step": 13846 + }, + { + "epoch": 0.42439009439745007, + "grad_norm": 1.20845356795322, + "learning_rate": 1.2892782239188595e-05, + "loss": 0.6175, + "step": 13847 + }, + { + "epoch": 0.4244207429201913, + "grad_norm": 1.5901892614120232, + "learning_rate": 1.2891832029338253e-05, + "loss": 0.7671, + "step": 13848 + }, + { + "epoch": 0.4244513914429324, + "grad_norm": 1.3968719247404595, + "learning_rate": 1.2890881790993954e-05, + "loss": 0.6245, + "step": 13849 + }, + { + "epoch": 0.42448203996567363, + "grad_norm": 1.3804170075725026, + "learning_rate": 1.2889931524165055e-05, + "loss": 0.745, + "step": 13850 + }, + { + "epoch": 0.42451268848841484, + "grad_norm": 1.4109327214367362, + "learning_rate": 1.2888981228860926e-05, + "loss": 0.6868, + "step": 13851 + }, + { + "epoch": 0.42454333701115604, + "grad_norm": 1.3942497939949787, + "learning_rate": 1.288803090509092e-05, + "loss": 0.7117, + "step": 13852 + }, + { + "epoch": 0.42457398553389725, + "grad_norm": 1.4601954010298188, + "learning_rate": 1.2887080552864411e-05, + "loss": 0.7121, + "step": 13853 + }, + { + "epoch": 0.42460463405663845, + "grad_norm": 1.3006628428574274, + "learning_rate": 1.2886130172190759e-05, + "loss": 0.7542, + "step": 13854 + }, + { + "epoch": 0.42463528257937966, + "grad_norm": 1.5465511048879943, + "learning_rate": 1.2885179763079323e-05, + "loss": 0.8981, + "step": 13855 + }, + { + "epoch": 0.42466593110212086, + "grad_norm": 1.4726294691826969, + "learning_rate": 1.2884229325539475e-05, + "loss": 0.7902, + "step": 13856 + }, + { + "epoch": 0.42469657962486207, + "grad_norm": 0.7122332611842378, + "learning_rate": 1.2883278859580579e-05, + "loss": 0.5931, + "step": 13857 + }, + { + "epoch": 0.4247272281476033, + "grad_norm": 1.544800331174181, + "learning_rate": 1.2882328365211998e-05, + "loss": 0.728, + "step": 13858 + }, + { + "epoch": 0.4247578766703445, + "grad_norm": 1.598057460390917, + "learning_rate": 1.2881377842443095e-05, + "loss": 0.6591, + "step": 13859 + }, + { + "epoch": 0.4247885251930857, + "grad_norm": 1.4665198424485788, + "learning_rate": 1.2880427291283241e-05, + "loss": 0.7409, + "step": 13860 + }, + { + "epoch": 0.4248191737158269, + "grad_norm": 1.2083417632884434, + "learning_rate": 1.2879476711741801e-05, + "loss": 0.663, + "step": 13861 + }, + { + "epoch": 0.4248498222385681, + "grad_norm": 1.353580238711917, + "learning_rate": 1.2878526103828142e-05, + "loss": 0.7523, + "step": 13862 + }, + { + "epoch": 0.4248804707613093, + "grad_norm": 0.6733456628695939, + "learning_rate": 1.2877575467551624e-05, + "loss": 0.5939, + "step": 13863 + }, + { + "epoch": 0.4249111192840505, + "grad_norm": 1.4989360958885436, + "learning_rate": 1.2876624802921623e-05, + "loss": 0.6266, + "step": 13864 + }, + { + "epoch": 0.4249417678067917, + "grad_norm": 1.7112725187929931, + "learning_rate": 1.2875674109947496e-05, + "loss": 0.7056, + "step": 13865 + }, + { + "epoch": 0.4249724163295329, + "grad_norm": 1.34205748527067, + "learning_rate": 1.2874723388638623e-05, + "loss": 0.6587, + "step": 13866 + }, + { + "epoch": 0.4250030648522741, + "grad_norm": 0.6476152503880488, + "learning_rate": 1.2873772639004361e-05, + "loss": 0.588, + "step": 13867 + }, + { + "epoch": 0.42503371337501533, + "grad_norm": 1.7559001915561598, + "learning_rate": 1.2872821861054084e-05, + "loss": 0.877, + "step": 13868 + }, + { + "epoch": 0.42506436189775654, + "grad_norm": 1.4915910708350915, + "learning_rate": 1.2871871054797155e-05, + "loss": 0.7436, + "step": 13869 + }, + { + "epoch": 0.42509501042049774, + "grad_norm": 1.483539207592496, + "learning_rate": 1.2870920220242948e-05, + "loss": 0.8505, + "step": 13870 + }, + { + "epoch": 0.42512565894323895, + "grad_norm": 1.407632153438472, + "learning_rate": 1.2869969357400831e-05, + "loss": 0.7529, + "step": 13871 + }, + { + "epoch": 0.42515630746598015, + "grad_norm": 0.6959695183481205, + "learning_rate": 1.2869018466280168e-05, + "loss": 0.6028, + "step": 13872 + }, + { + "epoch": 0.42518695598872136, + "grad_norm": 1.3233957692448888, + "learning_rate": 1.2868067546890335e-05, + "loss": 0.6928, + "step": 13873 + }, + { + "epoch": 0.42521760451146257, + "grad_norm": 1.5209659928241774, + "learning_rate": 1.2867116599240697e-05, + "loss": 0.8114, + "step": 13874 + }, + { + "epoch": 0.42524825303420377, + "grad_norm": 1.4754297379433041, + "learning_rate": 1.2866165623340628e-05, + "loss": 0.8145, + "step": 13875 + }, + { + "epoch": 0.425278901556945, + "grad_norm": 1.5341956273949708, + "learning_rate": 1.286521461919949e-05, + "loss": 0.766, + "step": 13876 + }, + { + "epoch": 0.4253095500796862, + "grad_norm": 1.4772965277914796, + "learning_rate": 1.2864263586826666e-05, + "loss": 0.7203, + "step": 13877 + }, + { + "epoch": 0.4253401986024274, + "grad_norm": 1.4168281435155832, + "learning_rate": 1.2863312526231514e-05, + "loss": 0.7042, + "step": 13878 + }, + { + "epoch": 0.4253708471251686, + "grad_norm": 1.481129693257811, + "learning_rate": 1.2862361437423417e-05, + "loss": 0.7275, + "step": 13879 + }, + { + "epoch": 0.42540149564790974, + "grad_norm": 1.6131827001710646, + "learning_rate": 1.2861410320411736e-05, + "loss": 0.774, + "step": 13880 + }, + { + "epoch": 0.42543214417065095, + "grad_norm": 1.4956908267834865, + "learning_rate": 1.2860459175205849e-05, + "loss": 0.7006, + "step": 13881 + }, + { + "epoch": 0.42546279269339216, + "grad_norm": 0.6687603198742538, + "learning_rate": 1.2859508001815127e-05, + "loss": 0.5931, + "step": 13882 + }, + { + "epoch": 0.42549344121613336, + "grad_norm": 1.5620566474759752, + "learning_rate": 1.2858556800248938e-05, + "loss": 0.7007, + "step": 13883 + }, + { + "epoch": 0.42552408973887457, + "grad_norm": 1.430309584838018, + "learning_rate": 1.2857605570516659e-05, + "loss": 0.737, + "step": 13884 + }, + { + "epoch": 0.4255547382616158, + "grad_norm": 1.3957980224395476, + "learning_rate": 1.2856654312627661e-05, + "loss": 0.7472, + "step": 13885 + }, + { + "epoch": 0.425585386784357, + "grad_norm": 1.5382241522518032, + "learning_rate": 1.2855703026591318e-05, + "loss": 0.7609, + "step": 13886 + }, + { + "epoch": 0.4256160353070982, + "grad_norm": 1.5125020900500512, + "learning_rate": 1.2854751712417e-05, + "loss": 0.773, + "step": 13887 + }, + { + "epoch": 0.4256466838298394, + "grad_norm": 0.6844831348520432, + "learning_rate": 1.2853800370114084e-05, + "loss": 0.605, + "step": 13888 + }, + { + "epoch": 0.4256773323525806, + "grad_norm": 1.4931981496153386, + "learning_rate": 1.2852848999691945e-05, + "loss": 0.7328, + "step": 13889 + }, + { + "epoch": 0.4257079808753218, + "grad_norm": 0.6524833893274031, + "learning_rate": 1.2851897601159954e-05, + "loss": 0.5843, + "step": 13890 + }, + { + "epoch": 0.425738629398063, + "grad_norm": 1.5593131238563867, + "learning_rate": 1.2850946174527483e-05, + "loss": 0.7773, + "step": 13891 + }, + { + "epoch": 0.4257692779208042, + "grad_norm": 1.4361617742706279, + "learning_rate": 1.2849994719803914e-05, + "loss": 0.6557, + "step": 13892 + }, + { + "epoch": 0.4257999264435454, + "grad_norm": 1.3702795267439898, + "learning_rate": 1.2849043236998617e-05, + "loss": 0.695, + "step": 13893 + }, + { + "epoch": 0.4258305749662866, + "grad_norm": 0.6949919027322193, + "learning_rate": 1.2848091726120968e-05, + "loss": 0.6309, + "step": 13894 + }, + { + "epoch": 0.42586122348902783, + "grad_norm": 1.5686304419956514, + "learning_rate": 1.284714018718034e-05, + "loss": 0.7545, + "step": 13895 + }, + { + "epoch": 0.42589187201176903, + "grad_norm": 1.5692651462034146, + "learning_rate": 1.2846188620186112e-05, + "loss": 0.6491, + "step": 13896 + }, + { + "epoch": 0.42592252053451024, + "grad_norm": 1.7870124642695715, + "learning_rate": 1.2845237025147661e-05, + "loss": 0.7608, + "step": 13897 + }, + { + "epoch": 0.42595316905725145, + "grad_norm": 1.311196188352887, + "learning_rate": 1.2844285402074359e-05, + "loss": 0.7218, + "step": 13898 + }, + { + "epoch": 0.42598381757999265, + "grad_norm": 1.2662896370503336, + "learning_rate": 1.2843333750975589e-05, + "loss": 0.6323, + "step": 13899 + }, + { + "epoch": 0.42601446610273386, + "grad_norm": 1.5981946596368102, + "learning_rate": 1.284238207186072e-05, + "loss": 0.7722, + "step": 13900 + }, + { + "epoch": 0.42604511462547506, + "grad_norm": 0.6589031981884594, + "learning_rate": 1.2841430364739139e-05, + "loss": 0.5775, + "step": 13901 + }, + { + "epoch": 0.42607576314821627, + "grad_norm": 1.457992138527272, + "learning_rate": 1.2840478629620212e-05, + "loss": 0.8135, + "step": 13902 + }, + { + "epoch": 0.4261064116709575, + "grad_norm": 1.466026887984897, + "learning_rate": 1.2839526866513325e-05, + "loss": 0.7001, + "step": 13903 + }, + { + "epoch": 0.4261370601936987, + "grad_norm": 1.2659410604522172, + "learning_rate": 1.2838575075427853e-05, + "loss": 0.6273, + "step": 13904 + }, + { + "epoch": 0.4261677087164399, + "grad_norm": 1.30383858572645, + "learning_rate": 1.2837623256373175e-05, + "loss": 0.6474, + "step": 13905 + }, + { + "epoch": 0.4261983572391811, + "grad_norm": 1.3046840323203959, + "learning_rate": 1.2836671409358664e-05, + "loss": 0.7133, + "step": 13906 + }, + { + "epoch": 0.4262290057619223, + "grad_norm": 1.5041847595118514, + "learning_rate": 1.283571953439371e-05, + "loss": 0.7608, + "step": 13907 + }, + { + "epoch": 0.4262596542846635, + "grad_norm": 0.6657390287574801, + "learning_rate": 1.2834767631487683e-05, + "loss": 0.5857, + "step": 13908 + }, + { + "epoch": 0.4262903028074047, + "grad_norm": 1.4582526330598187, + "learning_rate": 1.2833815700649967e-05, + "loss": 0.6132, + "step": 13909 + }, + { + "epoch": 0.4263209513301459, + "grad_norm": 1.4435178220847367, + "learning_rate": 1.2832863741889939e-05, + "loss": 0.6734, + "step": 13910 + }, + { + "epoch": 0.42635159985288706, + "grad_norm": 1.5094985864259702, + "learning_rate": 1.283191175521698e-05, + "loss": 0.7924, + "step": 13911 + }, + { + "epoch": 0.42638224837562827, + "grad_norm": 0.6581774985798969, + "learning_rate": 1.2830959740640467e-05, + "loss": 0.6036, + "step": 13912 + }, + { + "epoch": 0.4264128968983695, + "grad_norm": 1.6107072549217152, + "learning_rate": 1.2830007698169787e-05, + "loss": 0.7292, + "step": 13913 + }, + { + "epoch": 0.4264435454211107, + "grad_norm": 1.3564247261959947, + "learning_rate": 1.2829055627814316e-05, + "loss": 0.6142, + "step": 13914 + }, + { + "epoch": 0.4264741939438519, + "grad_norm": 1.430944234136702, + "learning_rate": 1.2828103529583433e-05, + "loss": 0.7781, + "step": 13915 + }, + { + "epoch": 0.4265048424665931, + "grad_norm": 1.4020421929969302, + "learning_rate": 1.2827151403486529e-05, + "loss": 0.7818, + "step": 13916 + }, + { + "epoch": 0.4265354909893343, + "grad_norm": 1.4395984849221257, + "learning_rate": 1.2826199249532974e-05, + "loss": 0.7556, + "step": 13917 + }, + { + "epoch": 0.4265661395120755, + "grad_norm": 1.4059505600578182, + "learning_rate": 1.2825247067732157e-05, + "loss": 0.7216, + "step": 13918 + }, + { + "epoch": 0.4265967880348167, + "grad_norm": 1.509065443030661, + "learning_rate": 1.2824294858093453e-05, + "loss": 0.7395, + "step": 13919 + }, + { + "epoch": 0.4266274365575579, + "grad_norm": 1.4953628500386331, + "learning_rate": 1.2823342620626256e-05, + "loss": 0.7723, + "step": 13920 + }, + { + "epoch": 0.4266580850802991, + "grad_norm": 1.3933638220278455, + "learning_rate": 1.2822390355339936e-05, + "loss": 0.6675, + "step": 13921 + }, + { + "epoch": 0.4266887336030403, + "grad_norm": 1.3656227139749066, + "learning_rate": 1.2821438062243885e-05, + "loss": 0.66, + "step": 13922 + }, + { + "epoch": 0.42671938212578153, + "grad_norm": 1.5291856122345238, + "learning_rate": 1.2820485741347478e-05, + "loss": 0.6708, + "step": 13923 + }, + { + "epoch": 0.42675003064852274, + "grad_norm": 1.6399868696060647, + "learning_rate": 1.281953339266011e-05, + "loss": 0.7964, + "step": 13924 + }, + { + "epoch": 0.42678067917126394, + "grad_norm": 1.4171714960991044, + "learning_rate": 1.2818581016191156e-05, + "loss": 0.692, + "step": 13925 + }, + { + "epoch": 0.42681132769400515, + "grad_norm": 1.2513459118505879, + "learning_rate": 1.2817628611949999e-05, + "loss": 0.6726, + "step": 13926 + }, + { + "epoch": 0.42684197621674635, + "grad_norm": 1.4532939236198057, + "learning_rate": 1.281667617994603e-05, + "loss": 0.6888, + "step": 13927 + }, + { + "epoch": 0.42687262473948756, + "grad_norm": 1.5534466781841507, + "learning_rate": 1.2815723720188628e-05, + "loss": 0.6877, + "step": 13928 + }, + { + "epoch": 0.42690327326222877, + "grad_norm": 1.4194426604770285, + "learning_rate": 1.2814771232687181e-05, + "loss": 0.7203, + "step": 13929 + }, + { + "epoch": 0.42693392178496997, + "grad_norm": 1.5946687634898882, + "learning_rate": 1.2813818717451072e-05, + "loss": 0.6919, + "step": 13930 + }, + { + "epoch": 0.4269645703077112, + "grad_norm": 1.4810454183078345, + "learning_rate": 1.2812866174489691e-05, + "loss": 0.7568, + "step": 13931 + }, + { + "epoch": 0.4269952188304524, + "grad_norm": 1.3155111324803286, + "learning_rate": 1.2811913603812414e-05, + "loss": 0.7813, + "step": 13932 + }, + { + "epoch": 0.4270258673531936, + "grad_norm": 1.4954441126228883, + "learning_rate": 1.2810961005428637e-05, + "loss": 0.7569, + "step": 13933 + }, + { + "epoch": 0.4270565158759348, + "grad_norm": 1.2886733507644288, + "learning_rate": 1.2810008379347739e-05, + "loss": 0.6903, + "step": 13934 + }, + { + "epoch": 0.427087164398676, + "grad_norm": 1.3904431855965955, + "learning_rate": 1.2809055725579111e-05, + "loss": 0.747, + "step": 13935 + }, + { + "epoch": 0.4271178129214172, + "grad_norm": 1.5001506705619108, + "learning_rate": 1.2808103044132136e-05, + "loss": 0.7163, + "step": 13936 + }, + { + "epoch": 0.4271484614441584, + "grad_norm": 1.6292593179263903, + "learning_rate": 1.2807150335016208e-05, + "loss": 0.6925, + "step": 13937 + }, + { + "epoch": 0.4271791099668996, + "grad_norm": 1.4746617721645616, + "learning_rate": 1.2806197598240703e-05, + "loss": 0.7918, + "step": 13938 + }, + { + "epoch": 0.4272097584896408, + "grad_norm": 1.6892065151353908, + "learning_rate": 1.2805244833815021e-05, + "loss": 0.7365, + "step": 13939 + }, + { + "epoch": 0.42724040701238203, + "grad_norm": 0.6866905166178552, + "learning_rate": 1.2804292041748543e-05, + "loss": 0.5855, + "step": 13940 + }, + { + "epoch": 0.42727105553512323, + "grad_norm": 1.678899244779088, + "learning_rate": 1.2803339222050654e-05, + "loss": 0.7005, + "step": 13941 + }, + { + "epoch": 0.4273017040578644, + "grad_norm": 1.7305601441052874, + "learning_rate": 1.280238637473075e-05, + "loss": 0.7899, + "step": 13942 + }, + { + "epoch": 0.4273323525806056, + "grad_norm": 1.5594483870891023, + "learning_rate": 1.2801433499798215e-05, + "loss": 0.9075, + "step": 13943 + }, + { + "epoch": 0.4273630011033468, + "grad_norm": 1.6029752707968972, + "learning_rate": 1.2800480597262439e-05, + "loss": 0.7334, + "step": 13944 + }, + { + "epoch": 0.427393649626088, + "grad_norm": 1.9801808483548808, + "learning_rate": 1.2799527667132811e-05, + "loss": 0.8939, + "step": 13945 + }, + { + "epoch": 0.4274242981488292, + "grad_norm": 1.4628364044004398, + "learning_rate": 1.279857470941872e-05, + "loss": 0.7471, + "step": 13946 + }, + { + "epoch": 0.4274549466715704, + "grad_norm": 1.5143829739790622, + "learning_rate": 1.279762172412956e-05, + "loss": 0.7005, + "step": 13947 + }, + { + "epoch": 0.4274855951943116, + "grad_norm": 1.5432485633187338, + "learning_rate": 1.2796668711274713e-05, + "loss": 0.6501, + "step": 13948 + }, + { + "epoch": 0.4275162437170528, + "grad_norm": 1.4907227226064794, + "learning_rate": 1.2795715670863573e-05, + "loss": 0.6975, + "step": 13949 + }, + { + "epoch": 0.42754689223979403, + "grad_norm": 1.4812405141729594, + "learning_rate": 1.2794762602905535e-05, + "loss": 0.6852, + "step": 13950 + }, + { + "epoch": 0.42757754076253524, + "grad_norm": 1.5267659641204196, + "learning_rate": 1.2793809507409985e-05, + "loss": 0.8111, + "step": 13951 + }, + { + "epoch": 0.42760818928527644, + "grad_norm": 1.4833912845477797, + "learning_rate": 1.2792856384386312e-05, + "loss": 0.7837, + "step": 13952 + }, + { + "epoch": 0.42763883780801765, + "grad_norm": 1.4711783710702497, + "learning_rate": 1.2791903233843915e-05, + "loss": 0.7273, + "step": 13953 + }, + { + "epoch": 0.42766948633075885, + "grad_norm": 1.383764421514668, + "learning_rate": 1.2790950055792178e-05, + "loss": 0.7377, + "step": 13954 + }, + { + "epoch": 0.42770013485350006, + "grad_norm": 1.49786303378327, + "learning_rate": 1.2789996850240499e-05, + "loss": 0.7733, + "step": 13955 + }, + { + "epoch": 0.42773078337624126, + "grad_norm": 1.5703651911964103, + "learning_rate": 1.2789043617198262e-05, + "loss": 0.679, + "step": 13956 + }, + { + "epoch": 0.42776143189898247, + "grad_norm": 1.5366311402104702, + "learning_rate": 1.2788090356674867e-05, + "loss": 0.8079, + "step": 13957 + }, + { + "epoch": 0.4277920804217237, + "grad_norm": 1.4997624307293287, + "learning_rate": 1.2787137068679708e-05, + "loss": 0.7462, + "step": 13958 + }, + { + "epoch": 0.4278227289444649, + "grad_norm": 1.3467907419947909, + "learning_rate": 1.2786183753222173e-05, + "loss": 0.6593, + "step": 13959 + }, + { + "epoch": 0.4278533774672061, + "grad_norm": 0.7401561134993919, + "learning_rate": 1.2785230410311651e-05, + "loss": 0.6066, + "step": 13960 + }, + { + "epoch": 0.4278840259899473, + "grad_norm": 1.4162061062022218, + "learning_rate": 1.2784277039957547e-05, + "loss": 0.724, + "step": 13961 + }, + { + "epoch": 0.4279146745126885, + "grad_norm": 1.4982003294892523, + "learning_rate": 1.2783323642169248e-05, + "loss": 0.6652, + "step": 13962 + }, + { + "epoch": 0.4279453230354297, + "grad_norm": 1.5960565855888385, + "learning_rate": 1.2782370216956149e-05, + "loss": 0.7134, + "step": 13963 + }, + { + "epoch": 0.4279759715581709, + "grad_norm": 1.4007265914364861, + "learning_rate": 1.278141676432764e-05, + "loss": 0.687, + "step": 13964 + }, + { + "epoch": 0.4280066200809121, + "grad_norm": 1.5580359352796125, + "learning_rate": 1.2780463284293125e-05, + "loss": 0.7478, + "step": 13965 + }, + { + "epoch": 0.4280372686036533, + "grad_norm": 1.5308490336062963, + "learning_rate": 1.2779509776861992e-05, + "loss": 0.7126, + "step": 13966 + }, + { + "epoch": 0.4280679171263945, + "grad_norm": 1.5180903704439357, + "learning_rate": 1.2778556242043637e-05, + "loss": 0.6753, + "step": 13967 + }, + { + "epoch": 0.42809856564913573, + "grad_norm": 1.7437500911856532, + "learning_rate": 1.2777602679847458e-05, + "loss": 0.7169, + "step": 13968 + }, + { + "epoch": 0.42812921417187694, + "grad_norm": 1.4421796991320928, + "learning_rate": 1.2776649090282846e-05, + "loss": 0.7914, + "step": 13969 + }, + { + "epoch": 0.42815986269461814, + "grad_norm": 1.600761430881068, + "learning_rate": 1.2775695473359206e-05, + "loss": 0.8098, + "step": 13970 + }, + { + "epoch": 0.42819051121735935, + "grad_norm": 1.443906488568808, + "learning_rate": 1.2774741829085924e-05, + "loss": 0.8143, + "step": 13971 + }, + { + "epoch": 0.42822115974010055, + "grad_norm": 1.597407865053345, + "learning_rate": 1.2773788157472402e-05, + "loss": 0.7732, + "step": 13972 + }, + { + "epoch": 0.4282518082628417, + "grad_norm": 1.6983492768728494, + "learning_rate": 1.2772834458528034e-05, + "loss": 0.8097, + "step": 13973 + }, + { + "epoch": 0.4282824567855829, + "grad_norm": 0.686533906504657, + "learning_rate": 1.2771880732262223e-05, + "loss": 0.5731, + "step": 13974 + }, + { + "epoch": 0.4283131053083241, + "grad_norm": 1.6942721377776766, + "learning_rate": 1.2770926978684359e-05, + "loss": 0.7613, + "step": 13975 + }, + { + "epoch": 0.4283437538310653, + "grad_norm": 1.544844647506238, + "learning_rate": 1.2769973197803843e-05, + "loss": 0.8386, + "step": 13976 + }, + { + "epoch": 0.4283744023538065, + "grad_norm": 1.5656122257917686, + "learning_rate": 1.2769019389630071e-05, + "loss": 0.7954, + "step": 13977 + }, + { + "epoch": 0.42840505087654773, + "grad_norm": 1.444951353684851, + "learning_rate": 1.2768065554172444e-05, + "loss": 0.6529, + "step": 13978 + }, + { + "epoch": 0.42843569939928894, + "grad_norm": 1.6900733778431984, + "learning_rate": 1.276711169144036e-05, + "loss": 0.7937, + "step": 13979 + }, + { + "epoch": 0.42846634792203014, + "grad_norm": 1.5221488301275985, + "learning_rate": 1.2766157801443214e-05, + "loss": 0.7525, + "step": 13980 + }, + { + "epoch": 0.42849699644477135, + "grad_norm": 1.3843245362108716, + "learning_rate": 1.2765203884190407e-05, + "loss": 0.7511, + "step": 13981 + }, + { + "epoch": 0.42852764496751256, + "grad_norm": 1.5118301947696289, + "learning_rate": 1.276424993969134e-05, + "loss": 0.7037, + "step": 13982 + }, + { + "epoch": 0.42855829349025376, + "grad_norm": 0.711761955141693, + "learning_rate": 1.2763295967955411e-05, + "loss": 0.6178, + "step": 13983 + }, + { + "epoch": 0.42858894201299497, + "grad_norm": 1.352492963359168, + "learning_rate": 1.2762341968992017e-05, + "loss": 0.6919, + "step": 13984 + }, + { + "epoch": 0.4286195905357362, + "grad_norm": 1.2973325448042106, + "learning_rate": 1.2761387942810568e-05, + "loss": 0.6567, + "step": 13985 + }, + { + "epoch": 0.4286502390584774, + "grad_norm": 1.499427329597636, + "learning_rate": 1.2760433889420449e-05, + "loss": 0.8, + "step": 13986 + }, + { + "epoch": 0.4286808875812186, + "grad_norm": 1.3775635258161711, + "learning_rate": 1.2759479808831075e-05, + "loss": 0.7574, + "step": 13987 + }, + { + "epoch": 0.4287115361039598, + "grad_norm": 0.664762652053252, + "learning_rate": 1.2758525701051837e-05, + "loss": 0.5993, + "step": 13988 + }, + { + "epoch": 0.428742184626701, + "grad_norm": 1.4484545761353722, + "learning_rate": 1.2757571566092141e-05, + "loss": 0.6667, + "step": 13989 + }, + { + "epoch": 0.4287728331494422, + "grad_norm": 1.280743146980244, + "learning_rate": 1.2756617403961384e-05, + "loss": 0.7398, + "step": 13990 + }, + { + "epoch": 0.4288034816721834, + "grad_norm": 0.6399659693942134, + "learning_rate": 1.2755663214668973e-05, + "loss": 0.5896, + "step": 13991 + }, + { + "epoch": 0.4288341301949246, + "grad_norm": 1.535667305847879, + "learning_rate": 1.2754708998224305e-05, + "loss": 0.7167, + "step": 13992 + }, + { + "epoch": 0.4288647787176658, + "grad_norm": 1.4909114362301585, + "learning_rate": 1.2753754754636786e-05, + "loss": 0.7956, + "step": 13993 + }, + { + "epoch": 0.428895427240407, + "grad_norm": 1.4147935495035096, + "learning_rate": 1.2752800483915819e-05, + "loss": 0.7928, + "step": 13994 + }, + { + "epoch": 0.42892607576314823, + "grad_norm": 1.3905359990769026, + "learning_rate": 1.27518461860708e-05, + "loss": 0.6666, + "step": 13995 + }, + { + "epoch": 0.42895672428588943, + "grad_norm": 1.2769392073525958, + "learning_rate": 1.2750891861111139e-05, + "loss": 0.6791, + "step": 13996 + }, + { + "epoch": 0.42898737280863064, + "grad_norm": 1.4346379099592603, + "learning_rate": 1.2749937509046238e-05, + "loss": 0.7076, + "step": 13997 + }, + { + "epoch": 0.42901802133137185, + "grad_norm": 1.5096781675967725, + "learning_rate": 1.2748983129885497e-05, + "loss": 0.7622, + "step": 13998 + }, + { + "epoch": 0.42904866985411305, + "grad_norm": 1.4013535760854352, + "learning_rate": 1.2748028723638321e-05, + "loss": 0.7222, + "step": 13999 + }, + { + "epoch": 0.42907931837685426, + "grad_norm": 1.2601902909748994, + "learning_rate": 1.2747074290314116e-05, + "loss": 0.8175, + "step": 14000 + }, + { + "epoch": 0.42910996689959546, + "grad_norm": 1.3221457618378079, + "learning_rate": 1.2746119829922287e-05, + "loss": 0.7183, + "step": 14001 + }, + { + "epoch": 0.42914061542233667, + "grad_norm": 1.372555504472871, + "learning_rate": 1.2745165342472236e-05, + "loss": 0.828, + "step": 14002 + }, + { + "epoch": 0.4291712639450779, + "grad_norm": 0.6792092560559253, + "learning_rate": 1.2744210827973367e-05, + "loss": 0.6276, + "step": 14003 + }, + { + "epoch": 0.429201912467819, + "grad_norm": 1.535439316889477, + "learning_rate": 1.2743256286435086e-05, + "loss": 0.8208, + "step": 14004 + }, + { + "epoch": 0.42923256099056023, + "grad_norm": 1.4424397678929164, + "learning_rate": 1.27423017178668e-05, + "loss": 0.7445, + "step": 14005 + }, + { + "epoch": 0.42926320951330144, + "grad_norm": 1.560576222976457, + "learning_rate": 1.2741347122277917e-05, + "loss": 0.7817, + "step": 14006 + }, + { + "epoch": 0.42929385803604264, + "grad_norm": 1.3859856977285456, + "learning_rate": 1.2740392499677833e-05, + "loss": 0.7395, + "step": 14007 + }, + { + "epoch": 0.42932450655878385, + "grad_norm": 1.5970761653438776, + "learning_rate": 1.2739437850075964e-05, + "loss": 0.8246, + "step": 14008 + }, + { + "epoch": 0.42935515508152505, + "grad_norm": 1.658536904543516, + "learning_rate": 1.2738483173481713e-05, + "loss": 0.7458, + "step": 14009 + }, + { + "epoch": 0.42938580360426626, + "grad_norm": 1.6584209322025656, + "learning_rate": 1.2737528469904485e-05, + "loss": 0.7127, + "step": 14010 + }, + { + "epoch": 0.42941645212700746, + "grad_norm": 1.3351649181987537, + "learning_rate": 1.2736573739353691e-05, + "loss": 0.7408, + "step": 14011 + }, + { + "epoch": 0.42944710064974867, + "grad_norm": 1.4407546558479474, + "learning_rate": 1.2735618981838735e-05, + "loss": 0.723, + "step": 14012 + }, + { + "epoch": 0.4294777491724899, + "grad_norm": 1.417516901216193, + "learning_rate": 1.2734664197369024e-05, + "loss": 0.6941, + "step": 14013 + }, + { + "epoch": 0.4295083976952311, + "grad_norm": 1.625815901494302, + "learning_rate": 1.2733709385953967e-05, + "loss": 0.8377, + "step": 14014 + }, + { + "epoch": 0.4295390462179723, + "grad_norm": 1.4591448717221345, + "learning_rate": 1.2732754547602972e-05, + "loss": 0.763, + "step": 14015 + }, + { + "epoch": 0.4295696947407135, + "grad_norm": 1.6685765350385477, + "learning_rate": 1.273179968232545e-05, + "loss": 0.796, + "step": 14016 + }, + { + "epoch": 0.4296003432634547, + "grad_norm": 1.3574231613071115, + "learning_rate": 1.2730844790130806e-05, + "loss": 0.6874, + "step": 14017 + }, + { + "epoch": 0.4296309917861959, + "grad_norm": 1.5159309063998385, + "learning_rate": 1.2729889871028445e-05, + "loss": 0.7072, + "step": 14018 + }, + { + "epoch": 0.4296616403089371, + "grad_norm": 1.4258068855564876, + "learning_rate": 1.2728934925027784e-05, + "loss": 0.6623, + "step": 14019 + }, + { + "epoch": 0.4296922888316783, + "grad_norm": 1.4306814510817114, + "learning_rate": 1.272797995213823e-05, + "loss": 0.659, + "step": 14020 + }, + { + "epoch": 0.4297229373544195, + "grad_norm": 1.4616994824234344, + "learning_rate": 1.272702495236919e-05, + "loss": 0.647, + "step": 14021 + }, + { + "epoch": 0.4297535858771607, + "grad_norm": 1.4755638665485122, + "learning_rate": 1.2726069925730076e-05, + "loss": 0.6904, + "step": 14022 + }, + { + "epoch": 0.42978423439990193, + "grad_norm": 1.3632432973703719, + "learning_rate": 1.2725114872230298e-05, + "loss": 0.7103, + "step": 14023 + }, + { + "epoch": 0.42981488292264314, + "grad_norm": 1.5252089460029938, + "learning_rate": 1.2724159791879265e-05, + "loss": 0.7425, + "step": 14024 + }, + { + "epoch": 0.42984553144538434, + "grad_norm": 1.573062179774838, + "learning_rate": 1.2723204684686387e-05, + "loss": 0.8979, + "step": 14025 + }, + { + "epoch": 0.42987617996812555, + "grad_norm": 1.6771530333211917, + "learning_rate": 1.2722249550661078e-05, + "loss": 0.7425, + "step": 14026 + }, + { + "epoch": 0.42990682849086675, + "grad_norm": 1.4188498697706222, + "learning_rate": 1.2721294389812746e-05, + "loss": 0.7828, + "step": 14027 + }, + { + "epoch": 0.42993747701360796, + "grad_norm": 0.6872170108322335, + "learning_rate": 1.2720339202150809e-05, + "loss": 0.6049, + "step": 14028 + }, + { + "epoch": 0.42996812553634917, + "grad_norm": 1.6386178515615528, + "learning_rate": 1.271938398768467e-05, + "loss": 0.7173, + "step": 14029 + }, + { + "epoch": 0.42999877405909037, + "grad_norm": 1.4821844934640873, + "learning_rate": 1.2718428746423746e-05, + "loss": 0.7432, + "step": 14030 + }, + { + "epoch": 0.4300294225818316, + "grad_norm": 1.2106047722345732, + "learning_rate": 1.2717473478377448e-05, + "loss": 0.6223, + "step": 14031 + }, + { + "epoch": 0.4300600711045728, + "grad_norm": 1.4023547748288558, + "learning_rate": 1.2716518183555189e-05, + "loss": 0.7612, + "step": 14032 + }, + { + "epoch": 0.430090719627314, + "grad_norm": 1.542145474561969, + "learning_rate": 1.2715562861966379e-05, + "loss": 0.6854, + "step": 14033 + }, + { + "epoch": 0.4301213681500552, + "grad_norm": 1.4276204296550132, + "learning_rate": 1.2714607513620436e-05, + "loss": 0.7724, + "step": 14034 + }, + { + "epoch": 0.43015201667279634, + "grad_norm": 1.2500137181702844, + "learning_rate": 1.2713652138526769e-05, + "loss": 0.6952, + "step": 14035 + }, + { + "epoch": 0.43018266519553755, + "grad_norm": 1.504283683320381, + "learning_rate": 1.2712696736694792e-05, + "loss": 0.7527, + "step": 14036 + }, + { + "epoch": 0.43021331371827876, + "grad_norm": 1.5234996931061966, + "learning_rate": 1.2711741308133923e-05, + "loss": 0.7253, + "step": 14037 + }, + { + "epoch": 0.43024396224101996, + "grad_norm": 1.3780888329236525, + "learning_rate": 1.2710785852853569e-05, + "loss": 0.7272, + "step": 14038 + }, + { + "epoch": 0.43027461076376117, + "grad_norm": 1.3663431619827269, + "learning_rate": 1.2709830370863153e-05, + "loss": 0.7247, + "step": 14039 + }, + { + "epoch": 0.4303052592865024, + "grad_norm": 1.4760021351790038, + "learning_rate": 1.2708874862172082e-05, + "loss": 0.7645, + "step": 14040 + }, + { + "epoch": 0.4303359078092436, + "grad_norm": 1.5311892731041672, + "learning_rate": 1.2707919326789774e-05, + "loss": 0.7336, + "step": 14041 + }, + { + "epoch": 0.4303665563319848, + "grad_norm": 1.5225911176983578, + "learning_rate": 1.2706963764725644e-05, + "loss": 0.6981, + "step": 14042 + }, + { + "epoch": 0.430397204854726, + "grad_norm": 0.6917365154223418, + "learning_rate": 1.2706008175989113e-05, + "loss": 0.6048, + "step": 14043 + }, + { + "epoch": 0.4304278533774672, + "grad_norm": 1.385847972147322, + "learning_rate": 1.2705052560589583e-05, + "loss": 0.6875, + "step": 14044 + }, + { + "epoch": 0.4304585019002084, + "grad_norm": 1.7346676500982814, + "learning_rate": 1.2704096918536482e-05, + "loss": 0.6857, + "step": 14045 + }, + { + "epoch": 0.4304891504229496, + "grad_norm": 1.706723572036664, + "learning_rate": 1.270314124983922e-05, + "loss": 0.7948, + "step": 14046 + }, + { + "epoch": 0.4305197989456908, + "grad_norm": 1.3768307788315284, + "learning_rate": 1.2702185554507218e-05, + "loss": 0.5765, + "step": 14047 + }, + { + "epoch": 0.430550447468432, + "grad_norm": 1.356599383969452, + "learning_rate": 1.270122983254989e-05, + "loss": 0.7097, + "step": 14048 + }, + { + "epoch": 0.4305810959911732, + "grad_norm": 1.6959501726375619, + "learning_rate": 1.2700274083976654e-05, + "loss": 0.7562, + "step": 14049 + }, + { + "epoch": 0.43061174451391443, + "grad_norm": 1.5212751178355135, + "learning_rate": 1.2699318308796925e-05, + "loss": 0.7993, + "step": 14050 + }, + { + "epoch": 0.43064239303665564, + "grad_norm": 1.3378536287489449, + "learning_rate": 1.269836250702012e-05, + "loss": 0.7604, + "step": 14051 + }, + { + "epoch": 0.43067304155939684, + "grad_norm": 1.3342582163884384, + "learning_rate": 1.2697406678655663e-05, + "loss": 0.6911, + "step": 14052 + }, + { + "epoch": 0.43070369008213805, + "grad_norm": 1.4712416431386541, + "learning_rate": 1.2696450823712964e-05, + "loss": 0.7651, + "step": 14053 + }, + { + "epoch": 0.43073433860487925, + "grad_norm": 1.4497867431150935, + "learning_rate": 1.269549494220145e-05, + "loss": 0.7573, + "step": 14054 + }, + { + "epoch": 0.43076498712762046, + "grad_norm": 1.3789246472657324, + "learning_rate": 1.269453903413053e-05, + "loss": 0.7799, + "step": 14055 + }, + { + "epoch": 0.43079563565036166, + "grad_norm": 1.438108437717515, + "learning_rate": 1.2693583099509632e-05, + "loss": 0.7467, + "step": 14056 + }, + { + "epoch": 0.43082628417310287, + "grad_norm": 1.3738446184975441, + "learning_rate": 1.2692627138348166e-05, + "loss": 0.7246, + "step": 14057 + }, + { + "epoch": 0.4308569326958441, + "grad_norm": 0.6894011713491943, + "learning_rate": 1.269167115065556e-05, + "loss": 0.6078, + "step": 14058 + }, + { + "epoch": 0.4308875812185853, + "grad_norm": 0.6434227512461369, + "learning_rate": 1.2690715136441226e-05, + "loss": 0.5699, + "step": 14059 + }, + { + "epoch": 0.4309182297413265, + "grad_norm": 1.3960373048638401, + "learning_rate": 1.2689759095714589e-05, + "loss": 0.6925, + "step": 14060 + }, + { + "epoch": 0.4309488782640677, + "grad_norm": 1.5440854445007912, + "learning_rate": 1.2688803028485067e-05, + "loss": 0.6293, + "step": 14061 + }, + { + "epoch": 0.4309795267868089, + "grad_norm": 1.8199216901999422, + "learning_rate": 1.268784693476208e-05, + "loss": 0.7524, + "step": 14062 + }, + { + "epoch": 0.4310101753095501, + "grad_norm": 1.4231523425605852, + "learning_rate": 1.2686890814555051e-05, + "loss": 0.7258, + "step": 14063 + }, + { + "epoch": 0.4310408238322913, + "grad_norm": 1.5683410540952547, + "learning_rate": 1.2685934667873396e-05, + "loss": 0.7385, + "step": 14064 + }, + { + "epoch": 0.4310714723550325, + "grad_norm": 0.7041816796289808, + "learning_rate": 1.2684978494726543e-05, + "loss": 0.6016, + "step": 14065 + }, + { + "epoch": 0.43110212087777366, + "grad_norm": 0.6716485458702249, + "learning_rate": 1.268402229512391e-05, + "loss": 0.5809, + "step": 14066 + }, + { + "epoch": 0.43113276940051487, + "grad_norm": 1.3840386390616337, + "learning_rate": 1.2683066069074918e-05, + "loss": 0.7235, + "step": 14067 + }, + { + "epoch": 0.4311634179232561, + "grad_norm": 1.4225628410000013, + "learning_rate": 1.2682109816588987e-05, + "loss": 0.7916, + "step": 14068 + }, + { + "epoch": 0.4311940664459973, + "grad_norm": 1.494559811623359, + "learning_rate": 1.2681153537675544e-05, + "loss": 0.7576, + "step": 14069 + }, + { + "epoch": 0.4312247149687385, + "grad_norm": 0.6927030573878801, + "learning_rate": 1.2680197232344012e-05, + "loss": 0.5837, + "step": 14070 + }, + { + "epoch": 0.4312553634914797, + "grad_norm": 1.41235164649806, + "learning_rate": 1.2679240900603807e-05, + "loss": 0.7934, + "step": 14071 + }, + { + "epoch": 0.4312860120142209, + "grad_norm": 1.6462147952930235, + "learning_rate": 1.2678284542464355e-05, + "loss": 0.7081, + "step": 14072 + }, + { + "epoch": 0.4313166605369621, + "grad_norm": 1.5284701945736836, + "learning_rate": 1.2677328157935083e-05, + "loss": 0.7519, + "step": 14073 + }, + { + "epoch": 0.4313473090597033, + "grad_norm": 1.4388347092523153, + "learning_rate": 1.267637174702541e-05, + "loss": 0.749, + "step": 14074 + }, + { + "epoch": 0.4313779575824445, + "grad_norm": 1.35032359590618, + "learning_rate": 1.2675415309744763e-05, + "loss": 0.6371, + "step": 14075 + }, + { + "epoch": 0.4314086061051857, + "grad_norm": 1.3895702267962524, + "learning_rate": 1.2674458846102562e-05, + "loss": 0.6632, + "step": 14076 + }, + { + "epoch": 0.4314392546279269, + "grad_norm": 1.6280303236874056, + "learning_rate": 1.2673502356108237e-05, + "loss": 0.8083, + "step": 14077 + }, + { + "epoch": 0.43146990315066813, + "grad_norm": 1.5462854162185902, + "learning_rate": 1.2672545839771206e-05, + "loss": 0.7093, + "step": 14078 + }, + { + "epoch": 0.43150055167340934, + "grad_norm": 0.660626897834215, + "learning_rate": 1.2671589297100896e-05, + "loss": 0.5684, + "step": 14079 + }, + { + "epoch": 0.43153120019615054, + "grad_norm": 1.48939718277351, + "learning_rate": 1.2670632728106738e-05, + "loss": 0.5641, + "step": 14080 + }, + { + "epoch": 0.43156184871889175, + "grad_norm": 1.5022056848645888, + "learning_rate": 1.2669676132798148e-05, + "loss": 0.786, + "step": 14081 + }, + { + "epoch": 0.43159249724163296, + "grad_norm": 0.6466518688985213, + "learning_rate": 1.266871951118456e-05, + "loss": 0.5576, + "step": 14082 + }, + { + "epoch": 0.43162314576437416, + "grad_norm": 1.4316845726618763, + "learning_rate": 1.2667762863275392e-05, + "loss": 0.7375, + "step": 14083 + }, + { + "epoch": 0.43165379428711537, + "grad_norm": 1.575907275209501, + "learning_rate": 1.2666806189080077e-05, + "loss": 0.7848, + "step": 14084 + }, + { + "epoch": 0.43168444280985657, + "grad_norm": 1.4353402383533587, + "learning_rate": 1.2665849488608037e-05, + "loss": 0.7684, + "step": 14085 + }, + { + "epoch": 0.4317150913325978, + "grad_norm": 1.498574973704738, + "learning_rate": 1.2664892761868698e-05, + "loss": 0.8202, + "step": 14086 + }, + { + "epoch": 0.431745739855339, + "grad_norm": 0.6685105033465031, + "learning_rate": 1.2663936008871492e-05, + "loss": 0.6042, + "step": 14087 + }, + { + "epoch": 0.4317763883780802, + "grad_norm": 1.2392959439871016, + "learning_rate": 1.2662979229625841e-05, + "loss": 0.6755, + "step": 14088 + }, + { + "epoch": 0.4318070369008214, + "grad_norm": 1.5517737261989561, + "learning_rate": 1.2662022424141176e-05, + "loss": 0.7545, + "step": 14089 + }, + { + "epoch": 0.4318376854235626, + "grad_norm": 1.3348719739390145, + "learning_rate": 1.266106559242692e-05, + "loss": 0.703, + "step": 14090 + }, + { + "epoch": 0.4318683339463038, + "grad_norm": 1.664554760547693, + "learning_rate": 1.2660108734492507e-05, + "loss": 0.6873, + "step": 14091 + }, + { + "epoch": 0.431898982469045, + "grad_norm": 1.2108517158838537, + "learning_rate": 1.2659151850347358e-05, + "loss": 0.4703, + "step": 14092 + }, + { + "epoch": 0.4319296309917862, + "grad_norm": 1.4462250296806505, + "learning_rate": 1.2658194940000912e-05, + "loss": 0.6791, + "step": 14093 + }, + { + "epoch": 0.4319602795145274, + "grad_norm": 1.454701909455982, + "learning_rate": 1.2657238003462585e-05, + "loss": 0.7426, + "step": 14094 + }, + { + "epoch": 0.43199092803726863, + "grad_norm": 1.571411111178822, + "learning_rate": 1.2656281040741813e-05, + "loss": 0.7268, + "step": 14095 + }, + { + "epoch": 0.43202157656000983, + "grad_norm": 1.5351109776110319, + "learning_rate": 1.2655324051848026e-05, + "loss": 0.7422, + "step": 14096 + }, + { + "epoch": 0.432052225082751, + "grad_norm": 1.5430456184696393, + "learning_rate": 1.2654367036790654e-05, + "loss": 0.7178, + "step": 14097 + }, + { + "epoch": 0.4320828736054922, + "grad_norm": 1.405744915846986, + "learning_rate": 1.265340999557912e-05, + "loss": 0.6356, + "step": 14098 + }, + { + "epoch": 0.4321135221282334, + "grad_norm": 1.540930186218428, + "learning_rate": 1.2652452928222861e-05, + "loss": 0.6926, + "step": 14099 + }, + { + "epoch": 0.4321441706509746, + "grad_norm": 1.4641612960405679, + "learning_rate": 1.2651495834731302e-05, + "loss": 0.7366, + "step": 14100 + }, + { + "epoch": 0.4321748191737158, + "grad_norm": 1.4468479697476497, + "learning_rate": 1.265053871511388e-05, + "loss": 0.773, + "step": 14101 + }, + { + "epoch": 0.432205467696457, + "grad_norm": 0.6969068946176246, + "learning_rate": 1.2649581569380019e-05, + "loss": 0.6105, + "step": 14102 + }, + { + "epoch": 0.4322361162191982, + "grad_norm": 1.631324725346795, + "learning_rate": 1.2648624397539152e-05, + "loss": 0.7164, + "step": 14103 + }, + { + "epoch": 0.4322667647419394, + "grad_norm": 0.673459495265823, + "learning_rate": 1.2647667199600713e-05, + "loss": 0.6078, + "step": 14104 + }, + { + "epoch": 0.43229741326468063, + "grad_norm": 1.301453450402559, + "learning_rate": 1.2646709975574132e-05, + "loss": 0.6243, + "step": 14105 + }, + { + "epoch": 0.43232806178742184, + "grad_norm": 1.4206073959516998, + "learning_rate": 1.264575272546884e-05, + "loss": 0.6924, + "step": 14106 + }, + { + "epoch": 0.43235871031016304, + "grad_norm": 1.384895856308426, + "learning_rate": 1.2644795449294267e-05, + "loss": 0.7408, + "step": 14107 + }, + { + "epoch": 0.43238935883290425, + "grad_norm": 1.463905075660661, + "learning_rate": 1.2643838147059851e-05, + "loss": 0.716, + "step": 14108 + }, + { + "epoch": 0.43242000735564545, + "grad_norm": 0.6815152399355227, + "learning_rate": 1.2642880818775021e-05, + "loss": 0.5845, + "step": 14109 + }, + { + "epoch": 0.43245065587838666, + "grad_norm": 1.5167193656989544, + "learning_rate": 1.264192346444921e-05, + "loss": 0.7303, + "step": 14110 + }, + { + "epoch": 0.43248130440112786, + "grad_norm": 1.5325349303092386, + "learning_rate": 1.2640966084091849e-05, + "loss": 0.675, + "step": 14111 + }, + { + "epoch": 0.43251195292386907, + "grad_norm": 1.4726126299757667, + "learning_rate": 1.2640008677712379e-05, + "loss": 0.7424, + "step": 14112 + }, + { + "epoch": 0.4325426014466103, + "grad_norm": 1.6323969504895266, + "learning_rate": 1.2639051245320222e-05, + "loss": 0.8676, + "step": 14113 + }, + { + "epoch": 0.4325732499693515, + "grad_norm": 1.6501096604658547, + "learning_rate": 1.263809378692482e-05, + "loss": 0.7513, + "step": 14114 + }, + { + "epoch": 0.4326038984920927, + "grad_norm": 1.2164081690572792, + "learning_rate": 1.2637136302535601e-05, + "loss": 0.6888, + "step": 14115 + }, + { + "epoch": 0.4326345470148339, + "grad_norm": 1.4059716949412262, + "learning_rate": 1.2636178792162008e-05, + "loss": 0.7282, + "step": 14116 + }, + { + "epoch": 0.4326651955375751, + "grad_norm": 1.341877704389428, + "learning_rate": 1.2635221255813472e-05, + "loss": 0.7027, + "step": 14117 + }, + { + "epoch": 0.4326958440603163, + "grad_norm": 1.4114816625697189, + "learning_rate": 1.2634263693499422e-05, + "loss": 0.6714, + "step": 14118 + }, + { + "epoch": 0.4327264925830575, + "grad_norm": 1.4633372770396347, + "learning_rate": 1.2633306105229301e-05, + "loss": 0.6919, + "step": 14119 + }, + { + "epoch": 0.4327571411057987, + "grad_norm": 1.4430737651054564, + "learning_rate": 1.2632348491012542e-05, + "loss": 0.7778, + "step": 14120 + }, + { + "epoch": 0.4327877896285399, + "grad_norm": 1.5846232782075083, + "learning_rate": 1.2631390850858578e-05, + "loss": 0.5937, + "step": 14121 + }, + { + "epoch": 0.4328184381512811, + "grad_norm": 1.5228264566155547, + "learning_rate": 1.2630433184776846e-05, + "loss": 0.6803, + "step": 14122 + }, + { + "epoch": 0.43284908667402233, + "grad_norm": 1.3208557574257704, + "learning_rate": 1.2629475492776786e-05, + "loss": 0.7025, + "step": 14123 + }, + { + "epoch": 0.43287973519676354, + "grad_norm": 0.6849550456293327, + "learning_rate": 1.262851777486783e-05, + "loss": 0.5944, + "step": 14124 + }, + { + "epoch": 0.43291038371950474, + "grad_norm": 0.642517250795337, + "learning_rate": 1.2627560031059414e-05, + "loss": 0.5769, + "step": 14125 + }, + { + "epoch": 0.43294103224224595, + "grad_norm": 1.6268978469451194, + "learning_rate": 1.2626602261360977e-05, + "loss": 0.7329, + "step": 14126 + }, + { + "epoch": 0.43297168076498715, + "grad_norm": 1.5201335722548863, + "learning_rate": 1.2625644465781956e-05, + "loss": 0.698, + "step": 14127 + }, + { + "epoch": 0.4330023292877283, + "grad_norm": 1.4208621164975341, + "learning_rate": 1.262468664433179e-05, + "loss": 0.773, + "step": 14128 + }, + { + "epoch": 0.4330329778104695, + "grad_norm": 1.438060493642387, + "learning_rate": 1.2623728797019915e-05, + "loss": 0.7637, + "step": 14129 + }, + { + "epoch": 0.4330636263332107, + "grad_norm": 1.396979337500532, + "learning_rate": 1.2622770923855764e-05, + "loss": 0.7677, + "step": 14130 + }, + { + "epoch": 0.4330942748559519, + "grad_norm": 1.3866068615911273, + "learning_rate": 1.2621813024848786e-05, + "loss": 0.7002, + "step": 14131 + }, + { + "epoch": 0.4331249233786931, + "grad_norm": 1.5123604984732877, + "learning_rate": 1.2620855100008411e-05, + "loss": 0.7521, + "step": 14132 + }, + { + "epoch": 0.43315557190143433, + "grad_norm": 1.5255929476290906, + "learning_rate": 1.261989714934408e-05, + "loss": 0.7597, + "step": 14133 + }, + { + "epoch": 0.43318622042417554, + "grad_norm": 1.4000067651376373, + "learning_rate": 1.2618939172865232e-05, + "loss": 0.763, + "step": 14134 + }, + { + "epoch": 0.43321686894691674, + "grad_norm": 1.3611323580721844, + "learning_rate": 1.2617981170581305e-05, + "loss": 0.6585, + "step": 14135 + }, + { + "epoch": 0.43324751746965795, + "grad_norm": 1.462405750108845, + "learning_rate": 1.2617023142501742e-05, + "loss": 0.7629, + "step": 14136 + }, + { + "epoch": 0.43327816599239916, + "grad_norm": 1.4764719004622817, + "learning_rate": 1.2616065088635981e-05, + "loss": 0.747, + "step": 14137 + }, + { + "epoch": 0.43330881451514036, + "grad_norm": 1.393608218539612, + "learning_rate": 1.2615107008993458e-05, + "loss": 0.7573, + "step": 14138 + }, + { + "epoch": 0.43333946303788157, + "grad_norm": 0.8169993350761557, + "learning_rate": 1.2614148903583621e-05, + "loss": 0.5743, + "step": 14139 + }, + { + "epoch": 0.4333701115606228, + "grad_norm": 1.3463726953066901, + "learning_rate": 1.2613190772415905e-05, + "loss": 0.7011, + "step": 14140 + }, + { + "epoch": 0.433400760083364, + "grad_norm": 0.7088087808273135, + "learning_rate": 1.2612232615499747e-05, + "loss": 0.5746, + "step": 14141 + }, + { + "epoch": 0.4334314086061052, + "grad_norm": 1.5133344883526645, + "learning_rate": 1.2611274432844596e-05, + "loss": 0.8071, + "step": 14142 + }, + { + "epoch": 0.4334620571288464, + "grad_norm": 1.5755814261282401, + "learning_rate": 1.2610316224459891e-05, + "loss": 0.7314, + "step": 14143 + }, + { + "epoch": 0.4334927056515876, + "grad_norm": 1.4651659693250023, + "learning_rate": 1.260935799035507e-05, + "loss": 0.7778, + "step": 14144 + }, + { + "epoch": 0.4335233541743288, + "grad_norm": 1.4045675799587651, + "learning_rate": 1.2608399730539578e-05, + "loss": 0.7976, + "step": 14145 + }, + { + "epoch": 0.43355400269707, + "grad_norm": 1.4478493052540085, + "learning_rate": 1.2607441445022856e-05, + "loss": 0.7218, + "step": 14146 + }, + { + "epoch": 0.4335846512198112, + "grad_norm": 1.3785903849169823, + "learning_rate": 1.2606483133814347e-05, + "loss": 0.6756, + "step": 14147 + }, + { + "epoch": 0.4336152997425524, + "grad_norm": 1.5288894324481883, + "learning_rate": 1.2605524796923492e-05, + "loss": 0.8021, + "step": 14148 + }, + { + "epoch": 0.4336459482652936, + "grad_norm": 1.29253454663942, + "learning_rate": 1.2604566434359735e-05, + "loss": 0.67, + "step": 14149 + }, + { + "epoch": 0.43367659678803483, + "grad_norm": 0.8487671270197145, + "learning_rate": 1.2603608046132515e-05, + "loss": 0.6056, + "step": 14150 + }, + { + "epoch": 0.43370724531077604, + "grad_norm": 1.3721612453528138, + "learning_rate": 1.2602649632251285e-05, + "loss": 0.6925, + "step": 14151 + }, + { + "epoch": 0.43373789383351724, + "grad_norm": 1.496970163143939, + "learning_rate": 1.2601691192725478e-05, + "loss": 0.7262, + "step": 14152 + }, + { + "epoch": 0.43376854235625845, + "grad_norm": 1.5320268361930272, + "learning_rate": 1.2600732727564544e-05, + "loss": 0.8236, + "step": 14153 + }, + { + "epoch": 0.43379919087899965, + "grad_norm": 1.366362164870943, + "learning_rate": 1.2599774236777925e-05, + "loss": 0.6882, + "step": 14154 + }, + { + "epoch": 0.43382983940174086, + "grad_norm": 1.46759347016438, + "learning_rate": 1.2598815720375067e-05, + "loss": 0.7424, + "step": 14155 + }, + { + "epoch": 0.43386048792448206, + "grad_norm": 1.401978721160116, + "learning_rate": 1.2597857178365409e-05, + "loss": 0.7566, + "step": 14156 + }, + { + "epoch": 0.43389113644722327, + "grad_norm": 1.5362832630472258, + "learning_rate": 1.25968986107584e-05, + "loss": 0.6604, + "step": 14157 + }, + { + "epoch": 0.4339217849699645, + "grad_norm": 0.6764992385419707, + "learning_rate": 1.2595940017563484e-05, + "loss": 0.5708, + "step": 14158 + }, + { + "epoch": 0.4339524334927056, + "grad_norm": 1.5053373642846926, + "learning_rate": 1.259498139879011e-05, + "loss": 0.7716, + "step": 14159 + }, + { + "epoch": 0.43398308201544683, + "grad_norm": 1.5444319807793032, + "learning_rate": 1.2594022754447718e-05, + "loss": 0.7896, + "step": 14160 + }, + { + "epoch": 0.43401373053818804, + "grad_norm": 1.643258693891913, + "learning_rate": 1.2593064084545756e-05, + "loss": 0.6653, + "step": 14161 + }, + { + "epoch": 0.43404437906092924, + "grad_norm": 0.6800300499829459, + "learning_rate": 1.2592105389093674e-05, + "loss": 0.5933, + "step": 14162 + }, + { + "epoch": 0.43407502758367045, + "grad_norm": 1.585005161736654, + "learning_rate": 1.259114666810091e-05, + "loss": 0.7201, + "step": 14163 + }, + { + "epoch": 0.43410567610641165, + "grad_norm": 1.4969509338242795, + "learning_rate": 1.2590187921576915e-05, + "loss": 0.8226, + "step": 14164 + }, + { + "epoch": 0.43413632462915286, + "grad_norm": 0.6700238246802347, + "learning_rate": 1.2589229149531135e-05, + "loss": 0.609, + "step": 14165 + }, + { + "epoch": 0.43416697315189406, + "grad_norm": 1.5350034780039759, + "learning_rate": 1.2588270351973022e-05, + "loss": 0.757, + "step": 14166 + }, + { + "epoch": 0.43419762167463527, + "grad_norm": 1.383716036203491, + "learning_rate": 1.2587311528912017e-05, + "loss": 0.7382, + "step": 14167 + }, + { + "epoch": 0.4342282701973765, + "grad_norm": 1.369576949093671, + "learning_rate": 1.2586352680357567e-05, + "loss": 0.717, + "step": 14168 + }, + { + "epoch": 0.4342589187201177, + "grad_norm": 1.3726716425436989, + "learning_rate": 1.2585393806319123e-05, + "loss": 0.706, + "step": 14169 + }, + { + "epoch": 0.4342895672428589, + "grad_norm": 1.7000375217790962, + "learning_rate": 1.2584434906806135e-05, + "loss": 0.7566, + "step": 14170 + }, + { + "epoch": 0.4343202157656001, + "grad_norm": 1.3697043686541563, + "learning_rate": 1.2583475981828048e-05, + "loss": 0.6717, + "step": 14171 + }, + { + "epoch": 0.4343508642883413, + "grad_norm": 1.5099023444364132, + "learning_rate": 1.258251703139431e-05, + "loss": 0.6716, + "step": 14172 + }, + { + "epoch": 0.4343815128110825, + "grad_norm": 1.4400086164846335, + "learning_rate": 1.2581558055514372e-05, + "loss": 0.8096, + "step": 14173 + }, + { + "epoch": 0.4344121613338237, + "grad_norm": 1.644103155664836, + "learning_rate": 1.258059905419768e-05, + "loss": 0.7611, + "step": 14174 + }, + { + "epoch": 0.4344428098565649, + "grad_norm": 1.4407258541508985, + "learning_rate": 1.2579640027453688e-05, + "loss": 0.716, + "step": 14175 + }, + { + "epoch": 0.4344734583793061, + "grad_norm": 1.4363444989936913, + "learning_rate": 1.2578680975291839e-05, + "loss": 0.6489, + "step": 14176 + }, + { + "epoch": 0.4345041069020473, + "grad_norm": 1.6262610008751406, + "learning_rate": 1.2577721897721588e-05, + "loss": 0.671, + "step": 14177 + }, + { + "epoch": 0.43453475542478853, + "grad_norm": 0.6957619936531471, + "learning_rate": 1.2576762794752385e-05, + "loss": 0.5713, + "step": 14178 + }, + { + "epoch": 0.43456540394752974, + "grad_norm": 1.3195021331297538, + "learning_rate": 1.257580366639368e-05, + "loss": 0.6342, + "step": 14179 + }, + { + "epoch": 0.43459605247027094, + "grad_norm": 1.396467524733136, + "learning_rate": 1.257484451265492e-05, + "loss": 0.6756, + "step": 14180 + }, + { + "epoch": 0.43462670099301215, + "grad_norm": 1.3982147417850435, + "learning_rate": 1.257388533354556e-05, + "loss": 0.8558, + "step": 14181 + }, + { + "epoch": 0.43465734951575336, + "grad_norm": 1.4929918191017364, + "learning_rate": 1.2572926129075049e-05, + "loss": 0.7838, + "step": 14182 + }, + { + "epoch": 0.43468799803849456, + "grad_norm": 1.4835110869730996, + "learning_rate": 1.2571966899252836e-05, + "loss": 0.7123, + "step": 14183 + }, + { + "epoch": 0.43471864656123577, + "grad_norm": 1.4247626081926212, + "learning_rate": 1.2571007644088376e-05, + "loss": 0.7797, + "step": 14184 + }, + { + "epoch": 0.43474929508397697, + "grad_norm": 1.4180919681447715, + "learning_rate": 1.2570048363591122e-05, + "loss": 0.6862, + "step": 14185 + }, + { + "epoch": 0.4347799436067182, + "grad_norm": 1.578747751330907, + "learning_rate": 1.2569089057770523e-05, + "loss": 0.766, + "step": 14186 + }, + { + "epoch": 0.4348105921294594, + "grad_norm": 1.6321570108403232, + "learning_rate": 1.2568129726636032e-05, + "loss": 0.7392, + "step": 14187 + }, + { + "epoch": 0.4348412406522006, + "grad_norm": 1.3409983672658974, + "learning_rate": 1.2567170370197102e-05, + "loss": 0.7344, + "step": 14188 + }, + { + "epoch": 0.4348718891749418, + "grad_norm": 1.4739827903219902, + "learning_rate": 1.2566210988463183e-05, + "loss": 0.8697, + "step": 14189 + }, + { + "epoch": 0.43490253769768294, + "grad_norm": 1.4251521750308598, + "learning_rate": 1.2565251581443735e-05, + "loss": 0.7625, + "step": 14190 + }, + { + "epoch": 0.43493318622042415, + "grad_norm": 1.4681510476503186, + "learning_rate": 1.25642921491482e-05, + "loss": 0.6973, + "step": 14191 + }, + { + "epoch": 0.43496383474316536, + "grad_norm": 1.5044189126326717, + "learning_rate": 1.2563332691586045e-05, + "loss": 0.6507, + "step": 14192 + }, + { + "epoch": 0.43499448326590656, + "grad_norm": 1.5087095224817948, + "learning_rate": 1.2562373208766716e-05, + "loss": 0.6864, + "step": 14193 + }, + { + "epoch": 0.43502513178864777, + "grad_norm": 1.35991311873062, + "learning_rate": 1.2561413700699668e-05, + "loss": 0.6765, + "step": 14194 + }, + { + "epoch": 0.435055780311389, + "grad_norm": 1.4077815345143874, + "learning_rate": 1.2560454167394351e-05, + "loss": 0.6558, + "step": 14195 + }, + { + "epoch": 0.4350864288341302, + "grad_norm": 1.6510594034479364, + "learning_rate": 1.255949460886023e-05, + "loss": 0.7452, + "step": 14196 + }, + { + "epoch": 0.4351170773568714, + "grad_norm": 0.7051520874415887, + "learning_rate": 1.255853502510675e-05, + "loss": 0.6145, + "step": 14197 + }, + { + "epoch": 0.4351477258796126, + "grad_norm": 1.3251978726374145, + "learning_rate": 1.2557575416143373e-05, + "loss": 0.7242, + "step": 14198 + }, + { + "epoch": 0.4351783744023538, + "grad_norm": 0.7024847558307592, + "learning_rate": 1.2556615781979547e-05, + "loss": 0.5822, + "step": 14199 + }, + { + "epoch": 0.435209022925095, + "grad_norm": 1.3861820174748505, + "learning_rate": 1.2555656122624733e-05, + "loss": 0.7358, + "step": 14200 + }, + { + "epoch": 0.4352396714478362, + "grad_norm": 1.4551061687898221, + "learning_rate": 1.2554696438088387e-05, + "loss": 0.7727, + "step": 14201 + }, + { + "epoch": 0.4352703199705774, + "grad_norm": 1.486596822818453, + "learning_rate": 1.2553736728379962e-05, + "loss": 0.7438, + "step": 14202 + }, + { + "epoch": 0.4353009684933186, + "grad_norm": 1.2787561633511049, + "learning_rate": 1.2552776993508915e-05, + "loss": 0.7009, + "step": 14203 + }, + { + "epoch": 0.4353316170160598, + "grad_norm": 1.351388030686821, + "learning_rate": 1.2551817233484702e-05, + "loss": 0.7956, + "step": 14204 + }, + { + "epoch": 0.43536226553880103, + "grad_norm": 1.3664280490230787, + "learning_rate": 1.2550857448316786e-05, + "loss": 0.6367, + "step": 14205 + }, + { + "epoch": 0.43539291406154224, + "grad_norm": 1.3911091857695275, + "learning_rate": 1.2549897638014615e-05, + "loss": 0.7098, + "step": 14206 + }, + { + "epoch": 0.43542356258428344, + "grad_norm": 1.6773763508653043, + "learning_rate": 1.254893780258765e-05, + "loss": 0.8104, + "step": 14207 + }, + { + "epoch": 0.43545421110702465, + "grad_norm": 0.7253524990147494, + "learning_rate": 1.2547977942045349e-05, + "loss": 0.6289, + "step": 14208 + }, + { + "epoch": 0.43548485962976585, + "grad_norm": 1.435122195595977, + "learning_rate": 1.2547018056397171e-05, + "loss": 0.7867, + "step": 14209 + }, + { + "epoch": 0.43551550815250706, + "grad_norm": 1.3639801570095278, + "learning_rate": 1.254605814565257e-05, + "loss": 0.7692, + "step": 14210 + }, + { + "epoch": 0.43554615667524826, + "grad_norm": 1.3580182686998374, + "learning_rate": 1.2545098209821009e-05, + "loss": 0.6664, + "step": 14211 + }, + { + "epoch": 0.43557680519798947, + "grad_norm": 0.6577914310136231, + "learning_rate": 1.2544138248911946e-05, + "loss": 0.6221, + "step": 14212 + }, + { + "epoch": 0.4356074537207307, + "grad_norm": 1.273522485074684, + "learning_rate": 1.2543178262934833e-05, + "loss": 0.6513, + "step": 14213 + }, + { + "epoch": 0.4356381022434719, + "grad_norm": 1.3948085240088695, + "learning_rate": 1.2542218251899136e-05, + "loss": 0.7608, + "step": 14214 + }, + { + "epoch": 0.4356687507662131, + "grad_norm": 1.3794391731236095, + "learning_rate": 1.254125821581431e-05, + "loss": 0.6714, + "step": 14215 + }, + { + "epoch": 0.4356993992889543, + "grad_norm": 0.6717453714924411, + "learning_rate": 1.2540298154689821e-05, + "loss": 0.5784, + "step": 14216 + }, + { + "epoch": 0.4357300478116955, + "grad_norm": 1.5428740597220483, + "learning_rate": 1.253933806853512e-05, + "loss": 0.7741, + "step": 14217 + }, + { + "epoch": 0.4357606963344367, + "grad_norm": 1.339043650726321, + "learning_rate": 1.2538377957359674e-05, + "loss": 0.7674, + "step": 14218 + }, + { + "epoch": 0.4357913448571779, + "grad_norm": 1.4475652310879905, + "learning_rate": 1.253741782117294e-05, + "loss": 0.6915, + "step": 14219 + }, + { + "epoch": 0.4358219933799191, + "grad_norm": 1.3359375331294312, + "learning_rate": 1.253645765998438e-05, + "loss": 0.5677, + "step": 14220 + }, + { + "epoch": 0.43585264190266026, + "grad_norm": 1.462023232767114, + "learning_rate": 1.2535497473803452e-05, + "loss": 0.7226, + "step": 14221 + }, + { + "epoch": 0.43588329042540147, + "grad_norm": 1.7949893730042206, + "learning_rate": 1.2534537262639619e-05, + "loss": 0.8127, + "step": 14222 + }, + { + "epoch": 0.4359139389481427, + "grad_norm": 0.6671342329302012, + "learning_rate": 1.253357702650234e-05, + "loss": 0.5999, + "step": 14223 + }, + { + "epoch": 0.4359445874708839, + "grad_norm": 1.4828749832414305, + "learning_rate": 1.2532616765401082e-05, + "loss": 0.7589, + "step": 14224 + }, + { + "epoch": 0.4359752359936251, + "grad_norm": 0.66147235951082, + "learning_rate": 1.25316564793453e-05, + "loss": 0.5833, + "step": 14225 + }, + { + "epoch": 0.4360058845163663, + "grad_norm": 1.59181407501768, + "learning_rate": 1.253069616834446e-05, + "loss": 0.7569, + "step": 14226 + }, + { + "epoch": 0.4360365330391075, + "grad_norm": 1.3246143303820292, + "learning_rate": 1.2529735832408023e-05, + "loss": 0.6518, + "step": 14227 + }, + { + "epoch": 0.4360671815618487, + "grad_norm": 1.4621762938081562, + "learning_rate": 1.2528775471545454e-05, + "loss": 0.6766, + "step": 14228 + }, + { + "epoch": 0.4360978300845899, + "grad_norm": 1.331181892250622, + "learning_rate": 1.2527815085766211e-05, + "loss": 0.6977, + "step": 14229 + }, + { + "epoch": 0.4361284786073311, + "grad_norm": 1.5702916619476188, + "learning_rate": 1.2526854675079756e-05, + "loss": 0.7794, + "step": 14230 + }, + { + "epoch": 0.4361591271300723, + "grad_norm": 1.3929378808565025, + "learning_rate": 1.2525894239495559e-05, + "loss": 0.6643, + "step": 14231 + }, + { + "epoch": 0.4361897756528135, + "grad_norm": 1.5508211314535782, + "learning_rate": 1.252493377902308e-05, + "loss": 0.7284, + "step": 14232 + }, + { + "epoch": 0.43622042417555473, + "grad_norm": 1.6215205308195206, + "learning_rate": 1.2523973293671785e-05, + "loss": 0.6756, + "step": 14233 + }, + { + "epoch": 0.43625107269829594, + "grad_norm": 1.471729352709553, + "learning_rate": 1.252301278345113e-05, + "loss": 0.7468, + "step": 14234 + }, + { + "epoch": 0.43628172122103714, + "grad_norm": 1.2809655820983556, + "learning_rate": 1.2522052248370589e-05, + "loss": 0.6962, + "step": 14235 + }, + { + "epoch": 0.43631236974377835, + "grad_norm": 1.3471676163662438, + "learning_rate": 1.252109168843962e-05, + "loss": 0.6746, + "step": 14236 + }, + { + "epoch": 0.43634301826651956, + "grad_norm": 1.6008694916687922, + "learning_rate": 1.252013110366769e-05, + "loss": 0.711, + "step": 14237 + }, + { + "epoch": 0.43637366678926076, + "grad_norm": 1.3886155965717502, + "learning_rate": 1.2519170494064259e-05, + "loss": 0.6551, + "step": 14238 + }, + { + "epoch": 0.43640431531200197, + "grad_norm": 1.379276877051728, + "learning_rate": 1.2518209859638801e-05, + "loss": 0.7621, + "step": 14239 + }, + { + "epoch": 0.4364349638347432, + "grad_norm": 0.6960593196691829, + "learning_rate": 1.2517249200400779e-05, + "loss": 0.6054, + "step": 14240 + }, + { + "epoch": 0.4364656123574844, + "grad_norm": 1.4796582643830534, + "learning_rate": 1.2516288516359651e-05, + "loss": 0.664, + "step": 14241 + }, + { + "epoch": 0.4364962608802256, + "grad_norm": 1.4145405728470528, + "learning_rate": 1.251532780752489e-05, + "loss": 0.7878, + "step": 14242 + }, + { + "epoch": 0.4365269094029668, + "grad_norm": 1.67672175609251, + "learning_rate": 1.2514367073905964e-05, + "loss": 0.7809, + "step": 14243 + }, + { + "epoch": 0.436557557925708, + "grad_norm": 1.528129369395663, + "learning_rate": 1.2513406315512335e-05, + "loss": 0.7147, + "step": 14244 + }, + { + "epoch": 0.4365882064484492, + "grad_norm": 1.3169003645414572, + "learning_rate": 1.2512445532353467e-05, + "loss": 0.6725, + "step": 14245 + }, + { + "epoch": 0.4366188549711904, + "grad_norm": 1.3869332067175288, + "learning_rate": 1.2511484724438833e-05, + "loss": 0.7752, + "step": 14246 + }, + { + "epoch": 0.4366495034939316, + "grad_norm": 1.3856518002620113, + "learning_rate": 1.2510523891777898e-05, + "loss": 0.719, + "step": 14247 + }, + { + "epoch": 0.4366801520166728, + "grad_norm": 1.268952221610435, + "learning_rate": 1.2509563034380127e-05, + "loss": 0.66, + "step": 14248 + }, + { + "epoch": 0.436710800539414, + "grad_norm": 1.436658567046414, + "learning_rate": 1.250860215225499e-05, + "loss": 0.6597, + "step": 14249 + }, + { + "epoch": 0.43674144906215523, + "grad_norm": 1.2178480620916392, + "learning_rate": 1.2507641245411954e-05, + "loss": 0.7205, + "step": 14250 + }, + { + "epoch": 0.43677209758489643, + "grad_norm": 1.2782658446557194, + "learning_rate": 1.2506680313860486e-05, + "loss": 0.6719, + "step": 14251 + }, + { + "epoch": 0.4368027461076376, + "grad_norm": 1.3686885891225964, + "learning_rate": 1.250571935761006e-05, + "loss": 0.794, + "step": 14252 + }, + { + "epoch": 0.4368333946303788, + "grad_norm": 1.2889914147402795, + "learning_rate": 1.2504758376670133e-05, + "loss": 0.7245, + "step": 14253 + }, + { + "epoch": 0.43686404315312, + "grad_norm": 1.5582740993502893, + "learning_rate": 1.2503797371050186e-05, + "loss": 0.7757, + "step": 14254 + }, + { + "epoch": 0.4368946916758612, + "grad_norm": 1.5267488513883305, + "learning_rate": 1.2502836340759683e-05, + "loss": 0.7085, + "step": 14255 + }, + { + "epoch": 0.4369253401986024, + "grad_norm": 1.4477065423489692, + "learning_rate": 1.250187528580809e-05, + "loss": 0.7763, + "step": 14256 + }, + { + "epoch": 0.4369559887213436, + "grad_norm": 1.3148064588580188, + "learning_rate": 1.2500914206204881e-05, + "loss": 0.6319, + "step": 14257 + }, + { + "epoch": 0.4369866372440848, + "grad_norm": 0.7153918502691258, + "learning_rate": 1.2499953101959523e-05, + "loss": 0.604, + "step": 14258 + }, + { + "epoch": 0.437017285766826, + "grad_norm": 1.646937374408096, + "learning_rate": 1.2498991973081493e-05, + "loss": 0.7306, + "step": 14259 + }, + { + "epoch": 0.43704793428956723, + "grad_norm": 1.4365086249154098, + "learning_rate": 1.2498030819580252e-05, + "loss": 0.741, + "step": 14260 + }, + { + "epoch": 0.43707858281230844, + "grad_norm": 0.6797872817083984, + "learning_rate": 1.2497069641465274e-05, + "loss": 0.6042, + "step": 14261 + }, + { + "epoch": 0.43710923133504964, + "grad_norm": 1.2313156345152003, + "learning_rate": 1.2496108438746029e-05, + "loss": 0.6444, + "step": 14262 + }, + { + "epoch": 0.43713987985779085, + "grad_norm": 1.419098583761155, + "learning_rate": 1.2495147211431992e-05, + "loss": 0.6769, + "step": 14263 + }, + { + "epoch": 0.43717052838053205, + "grad_norm": 1.4577512030533841, + "learning_rate": 1.2494185959532628e-05, + "loss": 0.7818, + "step": 14264 + }, + { + "epoch": 0.43720117690327326, + "grad_norm": 1.6220319766460816, + "learning_rate": 1.2493224683057413e-05, + "loss": 0.6634, + "step": 14265 + }, + { + "epoch": 0.43723182542601446, + "grad_norm": 1.5371619726320964, + "learning_rate": 1.2492263382015816e-05, + "loss": 0.739, + "step": 14266 + }, + { + "epoch": 0.43726247394875567, + "grad_norm": 1.494577888087095, + "learning_rate": 1.2491302056417311e-05, + "loss": 0.7305, + "step": 14267 + }, + { + "epoch": 0.4372931224714969, + "grad_norm": 1.5092045043361506, + "learning_rate": 1.2490340706271371e-05, + "loss": 0.7114, + "step": 14268 + }, + { + "epoch": 0.4373237709942381, + "grad_norm": 1.2636289597329762, + "learning_rate": 1.2489379331587466e-05, + "loss": 0.643, + "step": 14269 + }, + { + "epoch": 0.4373544195169793, + "grad_norm": 1.5103454789685944, + "learning_rate": 1.2488417932375068e-05, + "loss": 0.7902, + "step": 14270 + }, + { + "epoch": 0.4373850680397205, + "grad_norm": 1.3240537517274338, + "learning_rate": 1.2487456508643652e-05, + "loss": 0.7052, + "step": 14271 + }, + { + "epoch": 0.4374157165624617, + "grad_norm": 1.5617996126728761, + "learning_rate": 1.248649506040269e-05, + "loss": 0.7212, + "step": 14272 + }, + { + "epoch": 0.4374463650852029, + "grad_norm": 1.5219214308283413, + "learning_rate": 1.2485533587661657e-05, + "loss": 0.676, + "step": 14273 + }, + { + "epoch": 0.4374770136079441, + "grad_norm": 1.5216849728207966, + "learning_rate": 1.2484572090430028e-05, + "loss": 0.8337, + "step": 14274 + }, + { + "epoch": 0.4375076621306853, + "grad_norm": 1.7492853585515202, + "learning_rate": 1.248361056871727e-05, + "loss": 0.8142, + "step": 14275 + }, + { + "epoch": 0.4375383106534265, + "grad_norm": 1.5076047026595936, + "learning_rate": 1.2482649022532864e-05, + "loss": 0.7946, + "step": 14276 + }, + { + "epoch": 0.4375689591761677, + "grad_norm": 1.3544992368151092, + "learning_rate": 1.2481687451886279e-05, + "loss": 0.6964, + "step": 14277 + }, + { + "epoch": 0.43759960769890893, + "grad_norm": 1.2326165062501522, + "learning_rate": 1.2480725856787e-05, + "loss": 0.7001, + "step": 14278 + }, + { + "epoch": 0.43763025622165014, + "grad_norm": 1.4527527479339177, + "learning_rate": 1.2479764237244488e-05, + "loss": 0.6143, + "step": 14279 + }, + { + "epoch": 0.43766090474439134, + "grad_norm": 1.5353792319314643, + "learning_rate": 1.2478802593268226e-05, + "loss": 0.8441, + "step": 14280 + }, + { + "epoch": 0.43769155326713255, + "grad_norm": 1.6478989153144359, + "learning_rate": 1.2477840924867686e-05, + "loss": 0.7756, + "step": 14281 + }, + { + "epoch": 0.43772220178987375, + "grad_norm": 1.5051870175439954, + "learning_rate": 1.2476879232052348e-05, + "loss": 0.7164, + "step": 14282 + }, + { + "epoch": 0.4377528503126149, + "grad_norm": 1.5258143613888628, + "learning_rate": 1.2475917514831686e-05, + "loss": 0.6247, + "step": 14283 + }, + { + "epoch": 0.4377834988353561, + "grad_norm": 1.3452906700375395, + "learning_rate": 1.2474955773215171e-05, + "loss": 0.7135, + "step": 14284 + }, + { + "epoch": 0.4378141473580973, + "grad_norm": 1.5374976533989198, + "learning_rate": 1.2473994007212287e-05, + "loss": 0.7255, + "step": 14285 + }, + { + "epoch": 0.4378447958808385, + "grad_norm": 1.6891835031278875, + "learning_rate": 1.2473032216832508e-05, + "loss": 0.7884, + "step": 14286 + }, + { + "epoch": 0.4378754444035797, + "grad_norm": 1.463721612701911, + "learning_rate": 1.2472070402085308e-05, + "loss": 0.7631, + "step": 14287 + }, + { + "epoch": 0.43790609292632093, + "grad_norm": 1.5619234246755906, + "learning_rate": 1.2471108562980164e-05, + "loss": 0.7377, + "step": 14288 + }, + { + "epoch": 0.43793674144906214, + "grad_norm": 1.7497565776215724, + "learning_rate": 1.247014669952656e-05, + "loss": 0.7754, + "step": 14289 + }, + { + "epoch": 0.43796738997180334, + "grad_norm": 1.3523692327933237, + "learning_rate": 1.2469184811733963e-05, + "loss": 0.652, + "step": 14290 + }, + { + "epoch": 0.43799803849454455, + "grad_norm": 1.599659616421172, + "learning_rate": 1.2468222899611859e-05, + "loss": 0.7512, + "step": 14291 + }, + { + "epoch": 0.43802868701728576, + "grad_norm": 0.6969434696782336, + "learning_rate": 1.2467260963169723e-05, + "loss": 0.6079, + "step": 14292 + }, + { + "epoch": 0.43805933554002696, + "grad_norm": 1.490526759904563, + "learning_rate": 1.2466299002417036e-05, + "loss": 0.6533, + "step": 14293 + }, + { + "epoch": 0.43808998406276817, + "grad_norm": 1.6946457001195425, + "learning_rate": 1.2465337017363271e-05, + "loss": 0.7848, + "step": 14294 + }, + { + "epoch": 0.4381206325855094, + "grad_norm": 1.641897780249034, + "learning_rate": 1.2464375008017911e-05, + "loss": 0.6463, + "step": 14295 + }, + { + "epoch": 0.4381512811082506, + "grad_norm": 1.5698336370466162, + "learning_rate": 1.246341297439043e-05, + "loss": 0.7141, + "step": 14296 + }, + { + "epoch": 0.4381819296309918, + "grad_norm": 1.5496921173668474, + "learning_rate": 1.2462450916490314e-05, + "loss": 0.8612, + "step": 14297 + }, + { + "epoch": 0.438212578153733, + "grad_norm": 1.4815053695508138, + "learning_rate": 1.2461488834327038e-05, + "loss": 0.6381, + "step": 14298 + }, + { + "epoch": 0.4382432266764742, + "grad_norm": 1.4857143340069352, + "learning_rate": 1.246052672791008e-05, + "loss": 0.7826, + "step": 14299 + }, + { + "epoch": 0.4382738751992154, + "grad_norm": 1.4766498282843645, + "learning_rate": 1.2459564597248928e-05, + "loss": 0.7005, + "step": 14300 + }, + { + "epoch": 0.4383045237219566, + "grad_norm": 1.3194767304997979, + "learning_rate": 1.2458602442353053e-05, + "loss": 0.755, + "step": 14301 + }, + { + "epoch": 0.4383351722446978, + "grad_norm": 0.6914921518051406, + "learning_rate": 1.2457640263231943e-05, + "loss": 0.5773, + "step": 14302 + }, + { + "epoch": 0.438365820767439, + "grad_norm": 0.693194022451669, + "learning_rate": 1.2456678059895069e-05, + "loss": 0.559, + "step": 14303 + }, + { + "epoch": 0.4383964692901802, + "grad_norm": 0.6592743078916277, + "learning_rate": 1.2455715832351923e-05, + "loss": 0.5868, + "step": 14304 + }, + { + "epoch": 0.43842711781292143, + "grad_norm": 0.7056861442212641, + "learning_rate": 1.2454753580611977e-05, + "loss": 0.5903, + "step": 14305 + }, + { + "epoch": 0.43845776633566264, + "grad_norm": 1.7541025642283552, + "learning_rate": 1.2453791304684718e-05, + "loss": 0.8198, + "step": 14306 + }, + { + "epoch": 0.43848841485840384, + "grad_norm": 1.6755528777241981, + "learning_rate": 1.2452829004579622e-05, + "loss": 0.7915, + "step": 14307 + }, + { + "epoch": 0.43851906338114505, + "grad_norm": 1.303285923279656, + "learning_rate": 1.2451866680306179e-05, + "loss": 0.7273, + "step": 14308 + }, + { + "epoch": 0.43854971190388625, + "grad_norm": 0.7367456135539723, + "learning_rate": 1.2450904331873864e-05, + "loss": 0.6053, + "step": 14309 + }, + { + "epoch": 0.43858036042662746, + "grad_norm": 1.3181865700426558, + "learning_rate": 1.244994195929216e-05, + "loss": 0.6288, + "step": 14310 + }, + { + "epoch": 0.43861100894936866, + "grad_norm": 1.56307812868497, + "learning_rate": 1.2448979562570554e-05, + "loss": 0.6828, + "step": 14311 + }, + { + "epoch": 0.43864165747210987, + "grad_norm": 1.5291263793094199, + "learning_rate": 1.2448017141718524e-05, + "loss": 0.7525, + "step": 14312 + }, + { + "epoch": 0.4386723059948511, + "grad_norm": 1.526891823690279, + "learning_rate": 1.2447054696745556e-05, + "loss": 0.7393, + "step": 14313 + }, + { + "epoch": 0.4387029545175922, + "grad_norm": 1.3346325916741122, + "learning_rate": 1.2446092227661129e-05, + "loss": 0.6797, + "step": 14314 + }, + { + "epoch": 0.43873360304033343, + "grad_norm": 1.543994318233735, + "learning_rate": 1.2445129734474732e-05, + "loss": 0.8033, + "step": 14315 + }, + { + "epoch": 0.43876425156307464, + "grad_norm": 1.4277198126317736, + "learning_rate": 1.2444167217195846e-05, + "loss": 0.7644, + "step": 14316 + }, + { + "epoch": 0.43879490008581584, + "grad_norm": 1.4900288423283237, + "learning_rate": 1.2443204675833955e-05, + "loss": 0.7138, + "step": 14317 + }, + { + "epoch": 0.43882554860855705, + "grad_norm": 1.555067342161479, + "learning_rate": 1.2442242110398541e-05, + "loss": 0.6427, + "step": 14318 + }, + { + "epoch": 0.43885619713129825, + "grad_norm": 1.4571279761418208, + "learning_rate": 1.2441279520899094e-05, + "loss": 0.7993, + "step": 14319 + }, + { + "epoch": 0.43888684565403946, + "grad_norm": 1.8342219450809847, + "learning_rate": 1.2440316907345094e-05, + "loss": 0.8252, + "step": 14320 + }, + { + "epoch": 0.43891749417678066, + "grad_norm": 1.4340292208348862, + "learning_rate": 1.2439354269746027e-05, + "loss": 0.7304, + "step": 14321 + }, + { + "epoch": 0.43894814269952187, + "grad_norm": 1.5680526084176247, + "learning_rate": 1.2438391608111378e-05, + "loss": 0.7695, + "step": 14322 + }, + { + "epoch": 0.4389787912222631, + "grad_norm": 1.3886369014760338, + "learning_rate": 1.2437428922450632e-05, + "loss": 0.7597, + "step": 14323 + }, + { + "epoch": 0.4390094397450043, + "grad_norm": 1.7615650442518052, + "learning_rate": 1.2436466212773278e-05, + "loss": 0.755, + "step": 14324 + }, + { + "epoch": 0.4390400882677455, + "grad_norm": 1.474582364121481, + "learning_rate": 1.2435503479088792e-05, + "loss": 0.7504, + "step": 14325 + }, + { + "epoch": 0.4390707367904867, + "grad_norm": 1.3021150425049537, + "learning_rate": 1.2434540721406674e-05, + "loss": 0.6666, + "step": 14326 + }, + { + "epoch": 0.4391013853132279, + "grad_norm": 1.465991804962981, + "learning_rate": 1.2433577939736398e-05, + "loss": 0.7549, + "step": 14327 + }, + { + "epoch": 0.4391320338359691, + "grad_norm": 1.5625775467876408, + "learning_rate": 1.2432615134087465e-05, + "loss": 0.7995, + "step": 14328 + }, + { + "epoch": 0.4391626823587103, + "grad_norm": 0.7836797628864278, + "learning_rate": 1.2431652304469344e-05, + "loss": 0.6258, + "step": 14329 + }, + { + "epoch": 0.4391933308814515, + "grad_norm": 1.2909939324447313, + "learning_rate": 1.2430689450891533e-05, + "loss": 0.7288, + "step": 14330 + }, + { + "epoch": 0.4392239794041927, + "grad_norm": 1.687672099848065, + "learning_rate": 1.2429726573363517e-05, + "loss": 0.7748, + "step": 14331 + }, + { + "epoch": 0.4392546279269339, + "grad_norm": 1.2675327288748355, + "learning_rate": 1.2428763671894786e-05, + "loss": 0.6697, + "step": 14332 + }, + { + "epoch": 0.43928527644967513, + "grad_norm": 1.5648945941651915, + "learning_rate": 1.2427800746494822e-05, + "loss": 0.8705, + "step": 14333 + }, + { + "epoch": 0.43931592497241634, + "grad_norm": 1.58939787216483, + "learning_rate": 1.2426837797173117e-05, + "loss": 0.6374, + "step": 14334 + }, + { + "epoch": 0.43934657349515754, + "grad_norm": 1.4641192319730625, + "learning_rate": 1.2425874823939158e-05, + "loss": 0.6783, + "step": 14335 + }, + { + "epoch": 0.43937722201789875, + "grad_norm": 1.476859613274402, + "learning_rate": 1.2424911826802433e-05, + "loss": 0.7058, + "step": 14336 + }, + { + "epoch": 0.43940787054063996, + "grad_norm": 0.6956510777237987, + "learning_rate": 1.2423948805772435e-05, + "loss": 0.6282, + "step": 14337 + }, + { + "epoch": 0.43943851906338116, + "grad_norm": 1.4666412867640082, + "learning_rate": 1.2422985760858646e-05, + "loss": 0.7598, + "step": 14338 + }, + { + "epoch": 0.43946916758612237, + "grad_norm": 1.5929026226252028, + "learning_rate": 1.2422022692070557e-05, + "loss": 0.8042, + "step": 14339 + }, + { + "epoch": 0.4394998161088636, + "grad_norm": 1.4060169549166857, + "learning_rate": 1.2421059599417663e-05, + "loss": 0.7634, + "step": 14340 + }, + { + "epoch": 0.4395304646316048, + "grad_norm": 1.4973888091751195, + "learning_rate": 1.2420096482909445e-05, + "loss": 0.7196, + "step": 14341 + }, + { + "epoch": 0.439561113154346, + "grad_norm": 0.6554830193517986, + "learning_rate": 1.2419133342555399e-05, + "loss": 0.6037, + "step": 14342 + }, + { + "epoch": 0.4395917616770872, + "grad_norm": 1.39284498008494, + "learning_rate": 1.2418170178365014e-05, + "loss": 0.6637, + "step": 14343 + }, + { + "epoch": 0.4396224101998284, + "grad_norm": 0.6649013049695116, + "learning_rate": 1.241720699034778e-05, + "loss": 0.5658, + "step": 14344 + }, + { + "epoch": 0.43965305872256955, + "grad_norm": 1.415662297285009, + "learning_rate": 1.2416243778513183e-05, + "loss": 0.87, + "step": 14345 + }, + { + "epoch": 0.43968370724531075, + "grad_norm": 1.4170185374931101, + "learning_rate": 1.241528054287072e-05, + "loss": 0.6602, + "step": 14346 + }, + { + "epoch": 0.43971435576805196, + "grad_norm": 1.4023606317683706, + "learning_rate": 1.2414317283429884e-05, + "loss": 0.6832, + "step": 14347 + }, + { + "epoch": 0.43974500429079316, + "grad_norm": 1.2484675461274768, + "learning_rate": 1.2413354000200157e-05, + "loss": 0.6571, + "step": 14348 + }, + { + "epoch": 0.43977565281353437, + "grad_norm": 1.5813652859165719, + "learning_rate": 1.2412390693191036e-05, + "loss": 0.7929, + "step": 14349 + }, + { + "epoch": 0.4398063013362756, + "grad_norm": 1.354345596154528, + "learning_rate": 1.2411427362412015e-05, + "loss": 0.7769, + "step": 14350 + }, + { + "epoch": 0.4398369498590168, + "grad_norm": 1.567731003190385, + "learning_rate": 1.241046400787258e-05, + "loss": 0.6224, + "step": 14351 + }, + { + "epoch": 0.439867598381758, + "grad_norm": 1.3834562047308214, + "learning_rate": 1.240950062958223e-05, + "loss": 0.6996, + "step": 14352 + }, + { + "epoch": 0.4398982469044992, + "grad_norm": 1.5585240065702248, + "learning_rate": 1.240853722755045e-05, + "loss": 0.7861, + "step": 14353 + }, + { + "epoch": 0.4399288954272404, + "grad_norm": 1.6394362749046838, + "learning_rate": 1.2407573801786738e-05, + "loss": 0.6952, + "step": 14354 + }, + { + "epoch": 0.4399595439499816, + "grad_norm": 1.7605545380534713, + "learning_rate": 1.2406610352300586e-05, + "loss": 0.7038, + "step": 14355 + }, + { + "epoch": 0.4399901924727228, + "grad_norm": 1.4607885103891647, + "learning_rate": 1.2405646879101487e-05, + "loss": 0.715, + "step": 14356 + }, + { + "epoch": 0.440020840995464, + "grad_norm": 1.5065043814733838, + "learning_rate": 1.240468338219893e-05, + "loss": 0.7242, + "step": 14357 + }, + { + "epoch": 0.4400514895182052, + "grad_norm": 1.4151839229005978, + "learning_rate": 1.2403719861602417e-05, + "loss": 0.7073, + "step": 14358 + }, + { + "epoch": 0.4400821380409464, + "grad_norm": 1.3990290245720511, + "learning_rate": 1.2402756317321436e-05, + "loss": 0.6936, + "step": 14359 + }, + { + "epoch": 0.44011278656368763, + "grad_norm": 1.4303752980787179, + "learning_rate": 1.2401792749365481e-05, + "loss": 0.6944, + "step": 14360 + }, + { + "epoch": 0.44014343508642884, + "grad_norm": 0.7277467881666321, + "learning_rate": 1.2400829157744048e-05, + "loss": 0.6008, + "step": 14361 + }, + { + "epoch": 0.44017408360917004, + "grad_norm": 1.4672048305818295, + "learning_rate": 1.239986554246663e-05, + "loss": 0.6649, + "step": 14362 + }, + { + "epoch": 0.44020473213191125, + "grad_norm": 1.5589107070216834, + "learning_rate": 1.2398901903542727e-05, + "loss": 0.7479, + "step": 14363 + }, + { + "epoch": 0.44023538065465245, + "grad_norm": 1.4056935577620482, + "learning_rate": 1.2397938240981827e-05, + "loss": 0.6989, + "step": 14364 + }, + { + "epoch": 0.44026602917739366, + "grad_norm": 1.6667100709750198, + "learning_rate": 1.2396974554793423e-05, + "loss": 0.7788, + "step": 14365 + }, + { + "epoch": 0.44029667770013486, + "grad_norm": 1.4125665221632993, + "learning_rate": 1.2396010844987022e-05, + "loss": 0.665, + "step": 14366 + }, + { + "epoch": 0.44032732622287607, + "grad_norm": 1.4546231921821506, + "learning_rate": 1.2395047111572111e-05, + "loss": 0.7574, + "step": 14367 + }, + { + "epoch": 0.4403579747456173, + "grad_norm": 1.4330801918204772, + "learning_rate": 1.2394083354558189e-05, + "loss": 0.7443, + "step": 14368 + }, + { + "epoch": 0.4403886232683585, + "grad_norm": 1.3642449233904461, + "learning_rate": 1.2393119573954748e-05, + "loss": 0.6665, + "step": 14369 + }, + { + "epoch": 0.4404192717910997, + "grad_norm": 1.351403758537649, + "learning_rate": 1.2392155769771293e-05, + "loss": 0.6937, + "step": 14370 + }, + { + "epoch": 0.4404499203138409, + "grad_norm": 1.4750968483603584, + "learning_rate": 1.239119194201731e-05, + "loss": 0.7506, + "step": 14371 + }, + { + "epoch": 0.4404805688365821, + "grad_norm": 0.6614512230779255, + "learning_rate": 1.2390228090702303e-05, + "loss": 0.5476, + "step": 14372 + }, + { + "epoch": 0.4405112173593233, + "grad_norm": 1.3496241519157846, + "learning_rate": 1.2389264215835769e-05, + "loss": 0.6924, + "step": 14373 + }, + { + "epoch": 0.4405418658820645, + "grad_norm": 1.4186892420182926, + "learning_rate": 1.23883003174272e-05, + "loss": 0.6262, + "step": 14374 + }, + { + "epoch": 0.4405725144048057, + "grad_norm": 2.0490525803654456, + "learning_rate": 1.2387336395486101e-05, + "loss": 0.7047, + "step": 14375 + }, + { + "epoch": 0.44060316292754687, + "grad_norm": 1.452615730593293, + "learning_rate": 1.238637245002196e-05, + "loss": 0.8911, + "step": 14376 + }, + { + "epoch": 0.44063381145028807, + "grad_norm": 1.661595738581178, + "learning_rate": 1.2385408481044284e-05, + "loss": 0.7586, + "step": 14377 + }, + { + "epoch": 0.4406644599730293, + "grad_norm": 0.680270298245259, + "learning_rate": 1.2384444488562568e-05, + "loss": 0.5758, + "step": 14378 + }, + { + "epoch": 0.4406951084957705, + "grad_norm": 1.4947672125793379, + "learning_rate": 1.2383480472586308e-05, + "loss": 0.8304, + "step": 14379 + }, + { + "epoch": 0.4407257570185117, + "grad_norm": 1.3567327909896723, + "learning_rate": 1.2382516433125006e-05, + "loss": 0.6943, + "step": 14380 + }, + { + "epoch": 0.4407564055412529, + "grad_norm": 1.3565705918007165, + "learning_rate": 1.238155237018816e-05, + "loss": 0.7361, + "step": 14381 + }, + { + "epoch": 0.4407870540639941, + "grad_norm": 1.3837144628483484, + "learning_rate": 1.2380588283785274e-05, + "loss": 0.5587, + "step": 14382 + }, + { + "epoch": 0.4408177025867353, + "grad_norm": 1.4548212677116354, + "learning_rate": 1.2379624173925837e-05, + "loss": 0.6775, + "step": 14383 + }, + { + "epoch": 0.4408483511094765, + "grad_norm": 1.5221637160544044, + "learning_rate": 1.2378660040619356e-05, + "loss": 0.8283, + "step": 14384 + }, + { + "epoch": 0.4408789996322177, + "grad_norm": 0.7033168235617576, + "learning_rate": 1.2377695883875328e-05, + "loss": 0.5783, + "step": 14385 + }, + { + "epoch": 0.4409096481549589, + "grad_norm": 1.450493548783669, + "learning_rate": 1.2376731703703258e-05, + "loss": 0.7038, + "step": 14386 + }, + { + "epoch": 0.4409402966777001, + "grad_norm": 1.5128857186099234, + "learning_rate": 1.2375767500112637e-05, + "loss": 0.776, + "step": 14387 + }, + { + "epoch": 0.44097094520044133, + "grad_norm": 1.3935136728478317, + "learning_rate": 1.2374803273112974e-05, + "loss": 0.6196, + "step": 14388 + }, + { + "epoch": 0.44100159372318254, + "grad_norm": 1.3021051818944718, + "learning_rate": 1.2373839022713764e-05, + "loss": 0.7285, + "step": 14389 + }, + { + "epoch": 0.44103224224592374, + "grad_norm": 1.48030418456696, + "learning_rate": 1.2372874748924514e-05, + "loss": 0.8662, + "step": 14390 + }, + { + "epoch": 0.44106289076866495, + "grad_norm": 1.6560280816552893, + "learning_rate": 1.237191045175472e-05, + "loss": 0.7047, + "step": 14391 + }, + { + "epoch": 0.44109353929140616, + "grad_norm": 1.5705722916937006, + "learning_rate": 1.2370946131213889e-05, + "loss": 0.7107, + "step": 14392 + }, + { + "epoch": 0.44112418781414736, + "grad_norm": 1.5642983103824655, + "learning_rate": 1.2369981787311515e-05, + "loss": 0.7019, + "step": 14393 + }, + { + "epoch": 0.44115483633688857, + "grad_norm": 1.57223353190676, + "learning_rate": 1.2369017420057104e-05, + "loss": 0.8182, + "step": 14394 + }, + { + "epoch": 0.4411854848596298, + "grad_norm": 1.3983578801739047, + "learning_rate": 1.236805302946016e-05, + "loss": 0.7362, + "step": 14395 + }, + { + "epoch": 0.441216133382371, + "grad_norm": 1.5135909313235953, + "learning_rate": 1.236708861553018e-05, + "loss": 0.8253, + "step": 14396 + }, + { + "epoch": 0.4412467819051122, + "grad_norm": 1.393983941333065, + "learning_rate": 1.2366124178276677e-05, + "loss": 0.7325, + "step": 14397 + }, + { + "epoch": 0.4412774304278534, + "grad_norm": 1.5884530937092145, + "learning_rate": 1.2365159717709144e-05, + "loss": 0.8099, + "step": 14398 + }, + { + "epoch": 0.4413080789505946, + "grad_norm": 1.3934266282027168, + "learning_rate": 1.2364195233837086e-05, + "loss": 0.5937, + "step": 14399 + }, + { + "epoch": 0.4413387274733358, + "grad_norm": 1.7539884738208977, + "learning_rate": 1.2363230726670005e-05, + "loss": 0.7336, + "step": 14400 + }, + { + "epoch": 0.441369375996077, + "grad_norm": 1.636064355612588, + "learning_rate": 1.2362266196217414e-05, + "loss": 0.6314, + "step": 14401 + }, + { + "epoch": 0.4414000245188182, + "grad_norm": 1.3842688591248802, + "learning_rate": 1.2361301642488806e-05, + "loss": 0.7481, + "step": 14402 + }, + { + "epoch": 0.4414306730415594, + "grad_norm": 1.3292307264978132, + "learning_rate": 1.2360337065493689e-05, + "loss": 0.7179, + "step": 14403 + }, + { + "epoch": 0.4414613215643006, + "grad_norm": 1.6677701724180778, + "learning_rate": 1.2359372465241563e-05, + "loss": 0.8067, + "step": 14404 + }, + { + "epoch": 0.44149197008704183, + "grad_norm": 1.5976500747397613, + "learning_rate": 1.2358407841741942e-05, + "loss": 0.7684, + "step": 14405 + }, + { + "epoch": 0.44152261860978304, + "grad_norm": 1.3370882473584667, + "learning_rate": 1.2357443195004324e-05, + "loss": 0.6979, + "step": 14406 + }, + { + "epoch": 0.4415532671325242, + "grad_norm": 1.4245501034880261, + "learning_rate": 1.2356478525038211e-05, + "loss": 0.8355, + "step": 14407 + }, + { + "epoch": 0.4415839156552654, + "grad_norm": 1.575301942199594, + "learning_rate": 1.2355513831853117e-05, + "loss": 0.7694, + "step": 14408 + }, + { + "epoch": 0.4416145641780066, + "grad_norm": 1.5211149128584094, + "learning_rate": 1.235454911545854e-05, + "loss": 0.7268, + "step": 14409 + }, + { + "epoch": 0.4416452127007478, + "grad_norm": 1.4098998759691201, + "learning_rate": 1.235358437586399e-05, + "loss": 0.6487, + "step": 14410 + }, + { + "epoch": 0.441675861223489, + "grad_norm": 0.6954163151498842, + "learning_rate": 1.2352619613078969e-05, + "loss": 0.6263, + "step": 14411 + }, + { + "epoch": 0.4417065097462302, + "grad_norm": 1.4290952736145137, + "learning_rate": 1.2351654827112987e-05, + "loss": 0.7333, + "step": 14412 + }, + { + "epoch": 0.4417371582689714, + "grad_norm": 0.7052160637694505, + "learning_rate": 1.2350690017975546e-05, + "loss": 0.6141, + "step": 14413 + }, + { + "epoch": 0.4417678067917126, + "grad_norm": 1.5834212662321436, + "learning_rate": 1.2349725185676157e-05, + "loss": 0.7551, + "step": 14414 + }, + { + "epoch": 0.44179845531445383, + "grad_norm": 1.6190285583421054, + "learning_rate": 1.2348760330224322e-05, + "loss": 0.6876, + "step": 14415 + }, + { + "epoch": 0.44182910383719504, + "grad_norm": 1.6306304982433457, + "learning_rate": 1.2347795451629553e-05, + "loss": 0.7793, + "step": 14416 + }, + { + "epoch": 0.44185975235993624, + "grad_norm": 1.4744880255082955, + "learning_rate": 1.2346830549901354e-05, + "loss": 0.7492, + "step": 14417 + }, + { + "epoch": 0.44189040088267745, + "grad_norm": 1.4725062811710161, + "learning_rate": 1.2345865625049233e-05, + "loss": 0.7617, + "step": 14418 + }, + { + "epoch": 0.44192104940541865, + "grad_norm": 1.7129763032610252, + "learning_rate": 1.2344900677082696e-05, + "loss": 0.7914, + "step": 14419 + }, + { + "epoch": 0.44195169792815986, + "grad_norm": 0.7030962732379927, + "learning_rate": 1.2343935706011256e-05, + "loss": 0.5936, + "step": 14420 + }, + { + "epoch": 0.44198234645090106, + "grad_norm": 1.561013566067358, + "learning_rate": 1.2342970711844415e-05, + "loss": 0.6343, + "step": 14421 + }, + { + "epoch": 0.44201299497364227, + "grad_norm": 0.682869082091957, + "learning_rate": 1.2342005694591686e-05, + "loss": 0.598, + "step": 14422 + }, + { + "epoch": 0.4420436434963835, + "grad_norm": 1.942961651122071, + "learning_rate": 1.2341040654262576e-05, + "loss": 0.8332, + "step": 14423 + }, + { + "epoch": 0.4420742920191247, + "grad_norm": 1.6760837513601659, + "learning_rate": 1.2340075590866591e-05, + "loss": 0.6791, + "step": 14424 + }, + { + "epoch": 0.4421049405418659, + "grad_norm": 1.4731639653497608, + "learning_rate": 1.2339110504413245e-05, + "loss": 0.6647, + "step": 14425 + }, + { + "epoch": 0.4421355890646071, + "grad_norm": 1.4250545775126147, + "learning_rate": 1.2338145394912042e-05, + "loss": 0.7629, + "step": 14426 + }, + { + "epoch": 0.4421662375873483, + "grad_norm": 1.4777288193541636, + "learning_rate": 1.2337180262372494e-05, + "loss": 0.6557, + "step": 14427 + }, + { + "epoch": 0.4421968861100895, + "grad_norm": 1.616902795363029, + "learning_rate": 1.2336215106804114e-05, + "loss": 0.8752, + "step": 14428 + }, + { + "epoch": 0.4422275346328307, + "grad_norm": 0.6829417474217943, + "learning_rate": 1.233524992821641e-05, + "loss": 0.5929, + "step": 14429 + }, + { + "epoch": 0.4422581831555719, + "grad_norm": 1.5372476328404838, + "learning_rate": 1.2334284726618885e-05, + "loss": 0.6476, + "step": 14430 + }, + { + "epoch": 0.4422888316783131, + "grad_norm": 1.27242858894164, + "learning_rate": 1.2333319502021059e-05, + "loss": 0.6819, + "step": 14431 + }, + { + "epoch": 0.4423194802010543, + "grad_norm": 1.4372310934319954, + "learning_rate": 1.233235425443244e-05, + "loss": 0.738, + "step": 14432 + }, + { + "epoch": 0.44235012872379553, + "grad_norm": 1.5080872464166808, + "learning_rate": 1.2331388983862535e-05, + "loss": 0.6275, + "step": 14433 + }, + { + "epoch": 0.44238077724653674, + "grad_norm": 0.6722998326947808, + "learning_rate": 1.2330423690320859e-05, + "loss": 0.5764, + "step": 14434 + }, + { + "epoch": 0.44241142576927794, + "grad_norm": 1.4858437304837786, + "learning_rate": 1.2329458373816923e-05, + "loss": 0.7897, + "step": 14435 + }, + { + "epoch": 0.44244207429201915, + "grad_norm": 1.405280573222788, + "learning_rate": 1.2328493034360239e-05, + "loss": 0.7158, + "step": 14436 + }, + { + "epoch": 0.44247272281476036, + "grad_norm": 0.6985135463624453, + "learning_rate": 1.2327527671960313e-05, + "loss": 0.6162, + "step": 14437 + }, + { + "epoch": 0.4425033713375015, + "grad_norm": 1.396911056953573, + "learning_rate": 1.2326562286626665e-05, + "loss": 0.796, + "step": 14438 + }, + { + "epoch": 0.4425340198602427, + "grad_norm": 1.432814927313794, + "learning_rate": 1.2325596878368799e-05, + "loss": 0.7789, + "step": 14439 + }, + { + "epoch": 0.4425646683829839, + "grad_norm": 1.4703708988824051, + "learning_rate": 1.2324631447196239e-05, + "loss": 0.7271, + "step": 14440 + }, + { + "epoch": 0.4425953169057251, + "grad_norm": 1.6626226364115335, + "learning_rate": 1.2323665993118483e-05, + "loss": 0.7248, + "step": 14441 + }, + { + "epoch": 0.44262596542846633, + "grad_norm": 0.6789324110585139, + "learning_rate": 1.2322700516145056e-05, + "loss": 0.588, + "step": 14442 + }, + { + "epoch": 0.44265661395120753, + "grad_norm": 1.3982985704213202, + "learning_rate": 1.2321735016285465e-05, + "loss": 0.7154, + "step": 14443 + }, + { + "epoch": 0.44268726247394874, + "grad_norm": 1.506531380382821, + "learning_rate": 1.2320769493549228e-05, + "loss": 0.7291, + "step": 14444 + }, + { + "epoch": 0.44271791099668995, + "grad_norm": 1.4827100024344524, + "learning_rate": 1.2319803947945852e-05, + "loss": 0.7243, + "step": 14445 + }, + { + "epoch": 0.44274855951943115, + "grad_norm": 0.6757209220892889, + "learning_rate": 1.2318838379484854e-05, + "loss": 0.5784, + "step": 14446 + }, + { + "epoch": 0.44277920804217236, + "grad_norm": 1.3507493238319712, + "learning_rate": 1.2317872788175751e-05, + "loss": 0.7437, + "step": 14447 + }, + { + "epoch": 0.44280985656491356, + "grad_norm": 1.362986082435702, + "learning_rate": 1.231690717402805e-05, + "loss": 0.7266, + "step": 14448 + }, + { + "epoch": 0.44284050508765477, + "grad_norm": 1.6591203148012976, + "learning_rate": 1.2315941537051273e-05, + "loss": 0.843, + "step": 14449 + }, + { + "epoch": 0.442871153610396, + "grad_norm": 1.6502911244130112, + "learning_rate": 1.2314975877254928e-05, + "loss": 0.799, + "step": 14450 + }, + { + "epoch": 0.4429018021331372, + "grad_norm": 1.374429604464753, + "learning_rate": 1.231401019464854e-05, + "loss": 0.6934, + "step": 14451 + }, + { + "epoch": 0.4429324506558784, + "grad_norm": 1.5304571384076708, + "learning_rate": 1.2313044489241612e-05, + "loss": 0.8117, + "step": 14452 + }, + { + "epoch": 0.4429630991786196, + "grad_norm": 1.2962218302888886, + "learning_rate": 1.2312078761043667e-05, + "loss": 0.7665, + "step": 14453 + }, + { + "epoch": 0.4429937477013608, + "grad_norm": 1.5360905633935193, + "learning_rate": 1.2311113010064217e-05, + "loss": 0.7276, + "step": 14454 + }, + { + "epoch": 0.443024396224102, + "grad_norm": 1.597609306272546, + "learning_rate": 1.2310147236312781e-05, + "loss": 0.7295, + "step": 14455 + }, + { + "epoch": 0.4430550447468432, + "grad_norm": 0.6918086720245374, + "learning_rate": 1.2309181439798871e-05, + "loss": 0.6197, + "step": 14456 + }, + { + "epoch": 0.4430856932695844, + "grad_norm": 0.6986680623634531, + "learning_rate": 1.2308215620532008e-05, + "loss": 0.6145, + "step": 14457 + }, + { + "epoch": 0.4431163417923256, + "grad_norm": 1.5735315002390202, + "learning_rate": 1.2307249778521704e-05, + "loss": 0.6982, + "step": 14458 + }, + { + "epoch": 0.4431469903150668, + "grad_norm": 1.3314629828388878, + "learning_rate": 1.230628391377748e-05, + "loss": 0.7881, + "step": 14459 + }, + { + "epoch": 0.44317763883780803, + "grad_norm": 1.4110374765144331, + "learning_rate": 1.2305318026308848e-05, + "loss": 0.7663, + "step": 14460 + }, + { + "epoch": 0.44320828736054924, + "grad_norm": 1.3654624045945472, + "learning_rate": 1.230435211612533e-05, + "loss": 0.741, + "step": 14461 + }, + { + "epoch": 0.44323893588329044, + "grad_norm": 1.206158128705005, + "learning_rate": 1.2303386183236437e-05, + "loss": 0.5651, + "step": 14462 + }, + { + "epoch": 0.44326958440603165, + "grad_norm": 1.4688886950690867, + "learning_rate": 1.2302420227651693e-05, + "loss": 0.6284, + "step": 14463 + }, + { + "epoch": 0.44330023292877285, + "grad_norm": 1.4415950700135596, + "learning_rate": 1.2301454249380613e-05, + "loss": 0.6232, + "step": 14464 + }, + { + "epoch": 0.44333088145151406, + "grad_norm": 1.375642244173013, + "learning_rate": 1.2300488248432716e-05, + "loss": 0.6171, + "step": 14465 + }, + { + "epoch": 0.44336152997425526, + "grad_norm": 0.719655814382562, + "learning_rate": 1.229952222481752e-05, + "loss": 0.5884, + "step": 14466 + }, + { + "epoch": 0.44339217849699647, + "grad_norm": 1.6238623845152826, + "learning_rate": 1.2298556178544543e-05, + "loss": 0.7837, + "step": 14467 + }, + { + "epoch": 0.4434228270197377, + "grad_norm": 1.4315342892984544, + "learning_rate": 1.2297590109623304e-05, + "loss": 0.7127, + "step": 14468 + }, + { + "epoch": 0.4434534755424788, + "grad_norm": 1.6462216329377093, + "learning_rate": 1.2296624018063319e-05, + "loss": 0.8076, + "step": 14469 + }, + { + "epoch": 0.44348412406522003, + "grad_norm": 0.6819694789677113, + "learning_rate": 1.2295657903874114e-05, + "loss": 0.6093, + "step": 14470 + }, + { + "epoch": 0.44351477258796124, + "grad_norm": 1.5945359001832575, + "learning_rate": 1.2294691767065202e-05, + "loss": 0.8573, + "step": 14471 + }, + { + "epoch": 0.44354542111070244, + "grad_norm": 1.732232677589781, + "learning_rate": 1.2293725607646106e-05, + "loss": 0.7667, + "step": 14472 + }, + { + "epoch": 0.44357606963344365, + "grad_norm": 1.3089248513231477, + "learning_rate": 1.2292759425626341e-05, + "loss": 0.6165, + "step": 14473 + }, + { + "epoch": 0.44360671815618485, + "grad_norm": 1.4932697882723949, + "learning_rate": 1.2291793221015435e-05, + "loss": 0.7727, + "step": 14474 + }, + { + "epoch": 0.44363736667892606, + "grad_norm": 1.319389718516614, + "learning_rate": 1.2290826993822904e-05, + "loss": 0.6417, + "step": 14475 + }, + { + "epoch": 0.44366801520166727, + "grad_norm": 1.3956049461849522, + "learning_rate": 1.2289860744058265e-05, + "loss": 0.6534, + "step": 14476 + }, + { + "epoch": 0.44369866372440847, + "grad_norm": 1.5448577501147074, + "learning_rate": 1.2288894471731045e-05, + "loss": 0.806, + "step": 14477 + }, + { + "epoch": 0.4437293122471497, + "grad_norm": 1.5246363754856802, + "learning_rate": 1.2287928176850764e-05, + "loss": 0.6891, + "step": 14478 + }, + { + "epoch": 0.4437599607698909, + "grad_norm": 1.4370325097773922, + "learning_rate": 1.2286961859426938e-05, + "loss": 0.71, + "step": 14479 + }, + { + "epoch": 0.4437906092926321, + "grad_norm": 0.666912731458107, + "learning_rate": 1.2285995519469091e-05, + "loss": 0.5718, + "step": 14480 + }, + { + "epoch": 0.4438212578153733, + "grad_norm": 1.7843364926282135, + "learning_rate": 1.2285029156986748e-05, + "loss": 0.8246, + "step": 14481 + }, + { + "epoch": 0.4438519063381145, + "grad_norm": 1.4179342956503795, + "learning_rate": 1.2284062771989427e-05, + "loss": 0.6252, + "step": 14482 + }, + { + "epoch": 0.4438825548608557, + "grad_norm": 1.490102650018502, + "learning_rate": 1.2283096364486653e-05, + "loss": 0.6475, + "step": 14483 + }, + { + "epoch": 0.4439132033835969, + "grad_norm": 1.3067544341541428, + "learning_rate": 1.2282129934487944e-05, + "loss": 0.7099, + "step": 14484 + }, + { + "epoch": 0.4439438519063381, + "grad_norm": 1.2166820927482953, + "learning_rate": 1.2281163482002825e-05, + "loss": 0.6552, + "step": 14485 + }, + { + "epoch": 0.4439745004290793, + "grad_norm": 1.3721611494682346, + "learning_rate": 1.2280197007040819e-05, + "loss": 0.7659, + "step": 14486 + }, + { + "epoch": 0.4440051489518205, + "grad_norm": 1.3359277615692788, + "learning_rate": 1.2279230509611448e-05, + "loss": 0.7256, + "step": 14487 + }, + { + "epoch": 0.44403579747456173, + "grad_norm": 1.8993297590177232, + "learning_rate": 1.2278263989724236e-05, + "loss": 0.7541, + "step": 14488 + }, + { + "epoch": 0.44406644599730294, + "grad_norm": 1.4722806121360712, + "learning_rate": 1.2277297447388705e-05, + "loss": 0.7933, + "step": 14489 + }, + { + "epoch": 0.44409709452004414, + "grad_norm": 1.3312971615046314, + "learning_rate": 1.2276330882614382e-05, + "loss": 0.7127, + "step": 14490 + }, + { + "epoch": 0.44412774304278535, + "grad_norm": 1.2992620482426769, + "learning_rate": 1.2275364295410785e-05, + "loss": 0.6606, + "step": 14491 + }, + { + "epoch": 0.44415839156552656, + "grad_norm": 0.7026437832460798, + "learning_rate": 1.2274397685787443e-05, + "loss": 0.5954, + "step": 14492 + }, + { + "epoch": 0.44418904008826776, + "grad_norm": 1.4640156681719414, + "learning_rate": 1.2273431053753876e-05, + "loss": 0.7221, + "step": 14493 + }, + { + "epoch": 0.44421968861100897, + "grad_norm": 1.339320027861518, + "learning_rate": 1.2272464399319619e-05, + "loss": 0.6846, + "step": 14494 + }, + { + "epoch": 0.4442503371337502, + "grad_norm": 1.6555788353408218, + "learning_rate": 1.227149772249418e-05, + "loss": 0.7292, + "step": 14495 + }, + { + "epoch": 0.4442809856564914, + "grad_norm": 1.3889325845487817, + "learning_rate": 1.22705310232871e-05, + "loss": 0.6981, + "step": 14496 + }, + { + "epoch": 0.4443116341792326, + "grad_norm": 1.3867204374876188, + "learning_rate": 1.2269564301707893e-05, + "loss": 0.7689, + "step": 14497 + }, + { + "epoch": 0.4443422827019738, + "grad_norm": 1.3781365842376714, + "learning_rate": 1.2268597557766091e-05, + "loss": 0.716, + "step": 14498 + }, + { + "epoch": 0.444372931224715, + "grad_norm": 1.2993059908742186, + "learning_rate": 1.2267630791471213e-05, + "loss": 0.7814, + "step": 14499 + }, + { + "epoch": 0.44440357974745615, + "grad_norm": 1.6292798085370033, + "learning_rate": 1.2266664002832789e-05, + "loss": 0.7759, + "step": 14500 + }, + { + "epoch": 0.44443422827019735, + "grad_norm": 0.6475111106443218, + "learning_rate": 1.2265697191860349e-05, + "loss": 0.5833, + "step": 14501 + }, + { + "epoch": 0.44446487679293856, + "grad_norm": 1.4744466428302427, + "learning_rate": 1.2264730358563409e-05, + "loss": 0.6808, + "step": 14502 + }, + { + "epoch": 0.44449552531567976, + "grad_norm": 1.5415865006571832, + "learning_rate": 1.2263763502951504e-05, + "loss": 0.7425, + "step": 14503 + }, + { + "epoch": 0.44452617383842097, + "grad_norm": 1.3700455759720784, + "learning_rate": 1.2262796625034156e-05, + "loss": 0.7692, + "step": 14504 + }, + { + "epoch": 0.4445568223611622, + "grad_norm": 1.3744016256552005, + "learning_rate": 1.22618297248209e-05, + "loss": 0.7609, + "step": 14505 + }, + { + "epoch": 0.4445874708839034, + "grad_norm": 1.3656271119494523, + "learning_rate": 1.2260862802321252e-05, + "loss": 0.69, + "step": 14506 + }, + { + "epoch": 0.4446181194066446, + "grad_norm": 1.5822251293686895, + "learning_rate": 1.2259895857544745e-05, + "loss": 0.7736, + "step": 14507 + }, + { + "epoch": 0.4446487679293858, + "grad_norm": 1.3940530437909298, + "learning_rate": 1.2258928890500905e-05, + "loss": 0.6054, + "step": 14508 + }, + { + "epoch": 0.444679416452127, + "grad_norm": 1.3446559512711658, + "learning_rate": 1.2257961901199264e-05, + "loss": 0.6335, + "step": 14509 + }, + { + "epoch": 0.4447100649748682, + "grad_norm": 1.4318360005016166, + "learning_rate": 1.2256994889649343e-05, + "loss": 0.6569, + "step": 14510 + }, + { + "epoch": 0.4447407134976094, + "grad_norm": 1.491351863045355, + "learning_rate": 1.2256027855860677e-05, + "loss": 0.7856, + "step": 14511 + }, + { + "epoch": 0.4447713620203506, + "grad_norm": 1.4597777701255576, + "learning_rate": 1.2255060799842786e-05, + "loss": 0.728, + "step": 14512 + }, + { + "epoch": 0.4448020105430918, + "grad_norm": 1.8547832299190412, + "learning_rate": 1.225409372160521e-05, + "loss": 0.7714, + "step": 14513 + }, + { + "epoch": 0.444832659065833, + "grad_norm": 1.6063427131172165, + "learning_rate": 1.2253126621157469e-05, + "loss": 0.766, + "step": 14514 + }, + { + "epoch": 0.44486330758857423, + "grad_norm": 1.4001622424817046, + "learning_rate": 1.2252159498509097e-05, + "loss": 0.7078, + "step": 14515 + }, + { + "epoch": 0.44489395611131544, + "grad_norm": 1.5435234494252987, + "learning_rate": 1.2251192353669619e-05, + "loss": 0.7939, + "step": 14516 + }, + { + "epoch": 0.44492460463405664, + "grad_norm": 1.3256083618054861, + "learning_rate": 1.2250225186648565e-05, + "loss": 0.6402, + "step": 14517 + }, + { + "epoch": 0.44495525315679785, + "grad_norm": 1.3970550468745877, + "learning_rate": 1.224925799745547e-05, + "loss": 0.6604, + "step": 14518 + }, + { + "epoch": 0.44498590167953905, + "grad_norm": 1.3951294222303143, + "learning_rate": 1.2248290786099859e-05, + "loss": 0.7512, + "step": 14519 + }, + { + "epoch": 0.44501655020228026, + "grad_norm": 1.4612681441393993, + "learning_rate": 1.2247323552591264e-05, + "loss": 0.7153, + "step": 14520 + }, + { + "epoch": 0.44504719872502146, + "grad_norm": 1.4861393747220928, + "learning_rate": 1.2246356296939217e-05, + "loss": 0.781, + "step": 14521 + }, + { + "epoch": 0.44507784724776267, + "grad_norm": 1.3000629167934898, + "learning_rate": 1.2245389019153244e-05, + "loss": 0.6806, + "step": 14522 + }, + { + "epoch": 0.4451084957705039, + "grad_norm": 1.3619527188033034, + "learning_rate": 1.2244421719242879e-05, + "loss": 0.6792, + "step": 14523 + }, + { + "epoch": 0.4451391442932451, + "grad_norm": 1.347407901019743, + "learning_rate": 1.2243454397217654e-05, + "loss": 0.7502, + "step": 14524 + }, + { + "epoch": 0.4451697928159863, + "grad_norm": 1.4176744105538428, + "learning_rate": 1.2242487053087095e-05, + "loss": 0.63, + "step": 14525 + }, + { + "epoch": 0.4452004413387275, + "grad_norm": 1.4780490720047375, + "learning_rate": 1.2241519686860741e-05, + "loss": 0.7681, + "step": 14526 + }, + { + "epoch": 0.4452310898614687, + "grad_norm": 1.3954400151421784, + "learning_rate": 1.2240552298548119e-05, + "loss": 0.743, + "step": 14527 + }, + { + "epoch": 0.4452617383842099, + "grad_norm": 1.3488734614874893, + "learning_rate": 1.2239584888158762e-05, + "loss": 0.6357, + "step": 14528 + }, + { + "epoch": 0.4452923869069511, + "grad_norm": 0.6878746533930415, + "learning_rate": 1.2238617455702203e-05, + "loss": 0.5751, + "step": 14529 + }, + { + "epoch": 0.4453230354296923, + "grad_norm": 1.6416087548253782, + "learning_rate": 1.2237650001187968e-05, + "loss": 0.7629, + "step": 14530 + }, + { + "epoch": 0.44535368395243347, + "grad_norm": 1.433261832442542, + "learning_rate": 1.22366825246256e-05, + "loss": 0.6962, + "step": 14531 + }, + { + "epoch": 0.44538433247517467, + "grad_norm": 1.284751759558499, + "learning_rate": 1.2235715026024628e-05, + "loss": 0.694, + "step": 14532 + }, + { + "epoch": 0.4454149809979159, + "grad_norm": 1.5556914323340378, + "learning_rate": 1.2234747505394582e-05, + "loss": 0.8433, + "step": 14533 + }, + { + "epoch": 0.4454456295206571, + "grad_norm": 1.2747861081967768, + "learning_rate": 1.2233779962744996e-05, + "loss": 0.7899, + "step": 14534 + }, + { + "epoch": 0.4454762780433983, + "grad_norm": 1.5344731847567683, + "learning_rate": 1.2232812398085406e-05, + "loss": 0.7635, + "step": 14535 + }, + { + "epoch": 0.4455069265661395, + "grad_norm": 1.9105453088726538, + "learning_rate": 1.2231844811425345e-05, + "loss": 0.7215, + "step": 14536 + }, + { + "epoch": 0.4455375750888807, + "grad_norm": 0.7244673656192123, + "learning_rate": 1.2230877202774343e-05, + "loss": 0.611, + "step": 14537 + }, + { + "epoch": 0.4455682236116219, + "grad_norm": 1.6240661320331111, + "learning_rate": 1.2229909572141937e-05, + "loss": 0.727, + "step": 14538 + }, + { + "epoch": 0.4455988721343631, + "grad_norm": 1.4200736145790396, + "learning_rate": 1.2228941919537664e-05, + "loss": 0.6914, + "step": 14539 + }, + { + "epoch": 0.4456295206571043, + "grad_norm": 1.575933226480745, + "learning_rate": 1.2227974244971057e-05, + "loss": 0.7042, + "step": 14540 + }, + { + "epoch": 0.4456601691798455, + "grad_norm": 0.6429363153113271, + "learning_rate": 1.2227006548451648e-05, + "loss": 0.5877, + "step": 14541 + }, + { + "epoch": 0.44569081770258673, + "grad_norm": 1.5325622818139748, + "learning_rate": 1.222603882998897e-05, + "loss": 0.7564, + "step": 14542 + }, + { + "epoch": 0.44572146622532793, + "grad_norm": 1.397796033919156, + "learning_rate": 1.2225071089592569e-05, + "loss": 0.7462, + "step": 14543 + }, + { + "epoch": 0.44575211474806914, + "grad_norm": 0.6311640900472304, + "learning_rate": 1.222410332727197e-05, + "loss": 0.5835, + "step": 14544 + }, + { + "epoch": 0.44578276327081034, + "grad_norm": 1.5368938140494293, + "learning_rate": 1.2223135543036708e-05, + "loss": 0.8086, + "step": 14545 + }, + { + "epoch": 0.44581341179355155, + "grad_norm": 1.5513633824391782, + "learning_rate": 1.2222167736896329e-05, + "loss": 0.7792, + "step": 14546 + }, + { + "epoch": 0.44584406031629276, + "grad_norm": 1.5892419705498537, + "learning_rate": 1.222119990886036e-05, + "loss": 0.7772, + "step": 14547 + }, + { + "epoch": 0.44587470883903396, + "grad_norm": 1.4732394826685924, + "learning_rate": 1.222023205893834e-05, + "loss": 0.7676, + "step": 14548 + }, + { + "epoch": 0.44590535736177517, + "grad_norm": 1.4151952058809494, + "learning_rate": 1.2219264187139803e-05, + "loss": 0.6978, + "step": 14549 + }, + { + "epoch": 0.4459360058845164, + "grad_norm": 1.4328043655359073, + "learning_rate": 1.2218296293474292e-05, + "loss": 0.7974, + "step": 14550 + }, + { + "epoch": 0.4459666544072576, + "grad_norm": 1.4177179092220682, + "learning_rate": 1.2217328377951338e-05, + "loss": 0.7676, + "step": 14551 + }, + { + "epoch": 0.4459973029299988, + "grad_norm": 1.4169757904964089, + "learning_rate": 1.2216360440580482e-05, + "loss": 0.7327, + "step": 14552 + }, + { + "epoch": 0.44602795145274, + "grad_norm": 1.4153296478572852, + "learning_rate": 1.2215392481371255e-05, + "loss": 0.7255, + "step": 14553 + }, + { + "epoch": 0.4460585999754812, + "grad_norm": 1.3946283320355843, + "learning_rate": 1.2214424500333204e-05, + "loss": 0.5841, + "step": 14554 + }, + { + "epoch": 0.4460892484982224, + "grad_norm": 1.2945339690793622, + "learning_rate": 1.2213456497475859e-05, + "loss": 0.7817, + "step": 14555 + }, + { + "epoch": 0.4461198970209636, + "grad_norm": 1.6211949746567547, + "learning_rate": 1.221248847280876e-05, + "loss": 0.8321, + "step": 14556 + }, + { + "epoch": 0.4461505455437048, + "grad_norm": 1.5859680210531721, + "learning_rate": 1.221152042634145e-05, + "loss": 0.848, + "step": 14557 + }, + { + "epoch": 0.446181194066446, + "grad_norm": 1.3782663201291354, + "learning_rate": 1.2210552358083463e-05, + "loss": 0.6998, + "step": 14558 + }, + { + "epoch": 0.4462118425891872, + "grad_norm": 1.4207873693131552, + "learning_rate": 1.2209584268044337e-05, + "loss": 0.8253, + "step": 14559 + }, + { + "epoch": 0.44624249111192843, + "grad_norm": 1.5145068399096808, + "learning_rate": 1.2208616156233608e-05, + "loss": 0.6708, + "step": 14560 + }, + { + "epoch": 0.44627313963466964, + "grad_norm": 1.4943502913445401, + "learning_rate": 1.2207648022660823e-05, + "loss": 0.7304, + "step": 14561 + }, + { + "epoch": 0.4463037881574108, + "grad_norm": 0.680291205154374, + "learning_rate": 1.2206679867335514e-05, + "loss": 0.5259, + "step": 14562 + }, + { + "epoch": 0.446334436680152, + "grad_norm": 1.6723081640387876, + "learning_rate": 1.220571169026723e-05, + "loss": 0.8389, + "step": 14563 + }, + { + "epoch": 0.4463650852028932, + "grad_norm": 0.6519287720213289, + "learning_rate": 1.2204743491465499e-05, + "loss": 0.57, + "step": 14564 + }, + { + "epoch": 0.4463957337256344, + "grad_norm": 1.624042323149513, + "learning_rate": 1.2203775270939866e-05, + "loss": 0.6691, + "step": 14565 + }, + { + "epoch": 0.4464263822483756, + "grad_norm": 1.614097232290609, + "learning_rate": 1.2202807028699872e-05, + "loss": 0.7093, + "step": 14566 + }, + { + "epoch": 0.4464570307711168, + "grad_norm": 1.7012487346871923, + "learning_rate": 1.2201838764755061e-05, + "loss": 0.8063, + "step": 14567 + }, + { + "epoch": 0.446487679293858, + "grad_norm": 1.4644487399938797, + "learning_rate": 1.2200870479114964e-05, + "loss": 0.7975, + "step": 14568 + }, + { + "epoch": 0.4465183278165992, + "grad_norm": 1.495964242971714, + "learning_rate": 1.2199902171789129e-05, + "loss": 0.8, + "step": 14569 + }, + { + "epoch": 0.44654897633934043, + "grad_norm": 0.7575941634604103, + "learning_rate": 1.2198933842787093e-05, + "loss": 0.6208, + "step": 14570 + }, + { + "epoch": 0.44657962486208164, + "grad_norm": 1.844204397705221, + "learning_rate": 1.2197965492118402e-05, + "loss": 0.8719, + "step": 14571 + }, + { + "epoch": 0.44661027338482284, + "grad_norm": 1.3788071244556035, + "learning_rate": 1.2196997119792596e-05, + "loss": 0.6856, + "step": 14572 + }, + { + "epoch": 0.44664092190756405, + "grad_norm": 1.4144799711874319, + "learning_rate": 1.2196028725819212e-05, + "loss": 0.7612, + "step": 14573 + }, + { + "epoch": 0.44667157043030525, + "grad_norm": 0.6691386477663337, + "learning_rate": 1.2195060310207797e-05, + "loss": 0.5885, + "step": 14574 + }, + { + "epoch": 0.44670221895304646, + "grad_norm": 1.3388014080496269, + "learning_rate": 1.2194091872967888e-05, + "loss": 0.7444, + "step": 14575 + }, + { + "epoch": 0.44673286747578766, + "grad_norm": 1.5450335189720685, + "learning_rate": 1.2193123414109034e-05, + "loss": 0.7768, + "step": 14576 + }, + { + "epoch": 0.44676351599852887, + "grad_norm": 1.5517280290055997, + "learning_rate": 1.2192154933640773e-05, + "loss": 0.8164, + "step": 14577 + }, + { + "epoch": 0.4467941645212701, + "grad_norm": 1.388671853552459, + "learning_rate": 1.2191186431572647e-05, + "loss": 0.6829, + "step": 14578 + }, + { + "epoch": 0.4468248130440113, + "grad_norm": 1.3142429365083248, + "learning_rate": 1.2190217907914201e-05, + "loss": 0.675, + "step": 14579 + }, + { + "epoch": 0.4468554615667525, + "grad_norm": 1.4973341933119961, + "learning_rate": 1.218924936267498e-05, + "loss": 0.6504, + "step": 14580 + }, + { + "epoch": 0.4468861100894937, + "grad_norm": 1.4025871738190834, + "learning_rate": 1.218828079586452e-05, + "loss": 0.65, + "step": 14581 + }, + { + "epoch": 0.4469167586122349, + "grad_norm": 1.436218099028833, + "learning_rate": 1.2187312207492374e-05, + "loss": 0.8008, + "step": 14582 + }, + { + "epoch": 0.4469474071349761, + "grad_norm": 1.3792494729780014, + "learning_rate": 1.218634359756808e-05, + "loss": 0.7106, + "step": 14583 + }, + { + "epoch": 0.4469780556577173, + "grad_norm": 1.4056830493765633, + "learning_rate": 1.2185374966101184e-05, + "loss": 0.6629, + "step": 14584 + }, + { + "epoch": 0.4470087041804585, + "grad_norm": 1.467102381208654, + "learning_rate": 1.2184406313101227e-05, + "loss": 0.8107, + "step": 14585 + }, + { + "epoch": 0.4470393527031997, + "grad_norm": 1.553214906341445, + "learning_rate": 1.2183437638577758e-05, + "loss": 0.7448, + "step": 14586 + }, + { + "epoch": 0.4470700012259409, + "grad_norm": 1.489539119724657, + "learning_rate": 1.2182468942540318e-05, + "loss": 0.688, + "step": 14587 + }, + { + "epoch": 0.44710064974868213, + "grad_norm": 1.5181914822162959, + "learning_rate": 1.2181500224998451e-05, + "loss": 0.7077, + "step": 14588 + }, + { + "epoch": 0.44713129827142334, + "grad_norm": 1.4020434383532712, + "learning_rate": 1.2180531485961707e-05, + "loss": 0.6698, + "step": 14589 + }, + { + "epoch": 0.44716194679416454, + "grad_norm": 1.4769777953328227, + "learning_rate": 1.2179562725439627e-05, + "loss": 0.6239, + "step": 14590 + }, + { + "epoch": 0.44719259531690575, + "grad_norm": 1.4807661530390825, + "learning_rate": 1.217859394344176e-05, + "loss": 0.6785, + "step": 14591 + }, + { + "epoch": 0.44722324383964696, + "grad_norm": 0.7992637355411606, + "learning_rate": 1.2177625139977644e-05, + "loss": 0.5977, + "step": 14592 + }, + { + "epoch": 0.4472538923623881, + "grad_norm": 1.443505133015064, + "learning_rate": 1.2176656315056837e-05, + "loss": 0.7137, + "step": 14593 + }, + { + "epoch": 0.4472845408851293, + "grad_norm": 1.4303862541339323, + "learning_rate": 1.2175687468688875e-05, + "loss": 0.7441, + "step": 14594 + }, + { + "epoch": 0.4473151894078705, + "grad_norm": 1.4699185784939546, + "learning_rate": 1.217471860088331e-05, + "loss": 0.6845, + "step": 14595 + }, + { + "epoch": 0.4473458379306117, + "grad_norm": 0.6475196390225699, + "learning_rate": 1.2173749711649683e-05, + "loss": 0.6059, + "step": 14596 + }, + { + "epoch": 0.44737648645335293, + "grad_norm": 1.4539485545427344, + "learning_rate": 1.2172780800997545e-05, + "loss": 0.7643, + "step": 14597 + }, + { + "epoch": 0.44740713497609413, + "grad_norm": 1.526565400359548, + "learning_rate": 1.2171811868936443e-05, + "loss": 0.632, + "step": 14598 + }, + { + "epoch": 0.44743778349883534, + "grad_norm": 1.4361798420829786, + "learning_rate": 1.2170842915475922e-05, + "loss": 0.7541, + "step": 14599 + }, + { + "epoch": 0.44746843202157655, + "grad_norm": 1.559227291944203, + "learning_rate": 1.2169873940625529e-05, + "loss": 0.7863, + "step": 14600 + }, + { + "epoch": 0.44749908054431775, + "grad_norm": 1.3675312534625033, + "learning_rate": 1.2168904944394816e-05, + "loss": 0.7583, + "step": 14601 + }, + { + "epoch": 0.44752972906705896, + "grad_norm": 1.5359498597317418, + "learning_rate": 1.2167935926793327e-05, + "loss": 0.8259, + "step": 14602 + }, + { + "epoch": 0.44756037758980016, + "grad_norm": 0.7137543774439253, + "learning_rate": 1.2166966887830607e-05, + "loss": 0.5907, + "step": 14603 + }, + { + "epoch": 0.44759102611254137, + "grad_norm": 1.4963186336306065, + "learning_rate": 1.2165997827516212e-05, + "loss": 0.6586, + "step": 14604 + }, + { + "epoch": 0.4476216746352826, + "grad_norm": 0.6945965316219596, + "learning_rate": 1.2165028745859686e-05, + "loss": 0.6163, + "step": 14605 + }, + { + "epoch": 0.4476523231580238, + "grad_norm": 1.6512147048019454, + "learning_rate": 1.2164059642870575e-05, + "loss": 0.8154, + "step": 14606 + }, + { + "epoch": 0.447682971680765, + "grad_norm": 1.2673645372977418, + "learning_rate": 1.2163090518558432e-05, + "loss": 0.7127, + "step": 14607 + }, + { + "epoch": 0.4477136202035062, + "grad_norm": 1.5775812457656186, + "learning_rate": 1.2162121372932805e-05, + "loss": 0.669, + "step": 14608 + }, + { + "epoch": 0.4477442687262474, + "grad_norm": 1.4622951542377396, + "learning_rate": 1.2161152206003244e-05, + "loss": 0.7945, + "step": 14609 + }, + { + "epoch": 0.4477749172489886, + "grad_norm": 1.5741907219225943, + "learning_rate": 1.2160183017779297e-05, + "loss": 0.7749, + "step": 14610 + }, + { + "epoch": 0.4478055657717298, + "grad_norm": 1.3852822389167583, + "learning_rate": 1.2159213808270512e-05, + "loss": 0.6302, + "step": 14611 + }, + { + "epoch": 0.447836214294471, + "grad_norm": 1.5800460208464264, + "learning_rate": 1.2158244577486442e-05, + "loss": 0.8642, + "step": 14612 + }, + { + "epoch": 0.4478668628172122, + "grad_norm": 1.4939507393580553, + "learning_rate": 1.2157275325436638e-05, + "loss": 0.7392, + "step": 14613 + }, + { + "epoch": 0.4478975113399534, + "grad_norm": 0.6891572789985014, + "learning_rate": 1.2156306052130642e-05, + "loss": 0.5971, + "step": 14614 + }, + { + "epoch": 0.44792815986269463, + "grad_norm": 1.4242705182087922, + "learning_rate": 1.2155336757578015e-05, + "loss": 0.6521, + "step": 14615 + }, + { + "epoch": 0.44795880838543584, + "grad_norm": 1.5987660252922509, + "learning_rate": 1.2154367441788304e-05, + "loss": 0.693, + "step": 14616 + }, + { + "epoch": 0.44798945690817704, + "grad_norm": 1.7608611849227325, + "learning_rate": 1.2153398104771061e-05, + "loss": 0.7793, + "step": 14617 + }, + { + "epoch": 0.44802010543091825, + "grad_norm": 1.676369153507961, + "learning_rate": 1.2152428746535831e-05, + "loss": 0.7176, + "step": 14618 + }, + { + "epoch": 0.44805075395365945, + "grad_norm": 1.5664526177644302, + "learning_rate": 1.2151459367092173e-05, + "loss": 0.6507, + "step": 14619 + }, + { + "epoch": 0.44808140247640066, + "grad_norm": 1.462511435806246, + "learning_rate": 1.2150489966449632e-05, + "loss": 0.716, + "step": 14620 + }, + { + "epoch": 0.44811205099914186, + "grad_norm": 0.6716140061125041, + "learning_rate": 1.214952054461777e-05, + "loss": 0.621, + "step": 14621 + }, + { + "epoch": 0.44814269952188307, + "grad_norm": 1.4692195585031649, + "learning_rate": 1.2148551101606125e-05, + "loss": 0.8121, + "step": 14622 + }, + { + "epoch": 0.4481733480446243, + "grad_norm": 1.4856671099169885, + "learning_rate": 1.2147581637424262e-05, + "loss": 0.7497, + "step": 14623 + }, + { + "epoch": 0.4482039965673654, + "grad_norm": 1.7217429620713822, + "learning_rate": 1.2146612152081723e-05, + "loss": 0.794, + "step": 14624 + }, + { + "epoch": 0.44823464509010663, + "grad_norm": 1.504453709209832, + "learning_rate": 1.2145642645588068e-05, + "loss": 0.7016, + "step": 14625 + }, + { + "epoch": 0.44826529361284784, + "grad_norm": 1.4307672788523587, + "learning_rate": 1.2144673117952845e-05, + "loss": 0.6658, + "step": 14626 + }, + { + "epoch": 0.44829594213558904, + "grad_norm": 0.6472859332746734, + "learning_rate": 1.2143703569185607e-05, + "loss": 0.5704, + "step": 14627 + }, + { + "epoch": 0.44832659065833025, + "grad_norm": 1.4369955756108672, + "learning_rate": 1.2142733999295917e-05, + "loss": 0.7541, + "step": 14628 + }, + { + "epoch": 0.44835723918107145, + "grad_norm": 1.440917170902308, + "learning_rate": 1.2141764408293312e-05, + "loss": 0.7521, + "step": 14629 + }, + { + "epoch": 0.44838788770381266, + "grad_norm": 1.3505776722179494, + "learning_rate": 1.2140794796187361e-05, + "loss": 0.6733, + "step": 14630 + }, + { + "epoch": 0.44841853622655387, + "grad_norm": 1.4869494926790798, + "learning_rate": 1.2139825162987606e-05, + "loss": 0.6573, + "step": 14631 + }, + { + "epoch": 0.44844918474929507, + "grad_norm": 1.4144474238636693, + "learning_rate": 1.213885550870361e-05, + "loss": 0.642, + "step": 14632 + }, + { + "epoch": 0.4484798332720363, + "grad_norm": 1.8116367917344087, + "learning_rate": 1.2137885833344922e-05, + "loss": 0.8606, + "step": 14633 + }, + { + "epoch": 0.4485104817947775, + "grad_norm": 1.39354904651674, + "learning_rate": 1.2136916136921098e-05, + "loss": 0.7168, + "step": 14634 + }, + { + "epoch": 0.4485411303175187, + "grad_norm": 1.4718133805002414, + "learning_rate": 1.213594641944169e-05, + "loss": 0.6995, + "step": 14635 + }, + { + "epoch": 0.4485717788402599, + "grad_norm": 1.3766397374994215, + "learning_rate": 1.2134976680916262e-05, + "loss": 0.7405, + "step": 14636 + }, + { + "epoch": 0.4486024273630011, + "grad_norm": 1.5419846164743864, + "learning_rate": 1.2134006921354355e-05, + "loss": 0.7843, + "step": 14637 + }, + { + "epoch": 0.4486330758857423, + "grad_norm": 1.6048061293448845, + "learning_rate": 1.2133037140765536e-05, + "loss": 0.7845, + "step": 14638 + }, + { + "epoch": 0.4486637244084835, + "grad_norm": 0.7806754970794759, + "learning_rate": 1.2132067339159354e-05, + "loss": 0.5973, + "step": 14639 + }, + { + "epoch": 0.4486943729312247, + "grad_norm": 1.346315891096714, + "learning_rate": 1.2131097516545369e-05, + "loss": 0.6371, + "step": 14640 + }, + { + "epoch": 0.4487250214539659, + "grad_norm": 1.3103848249787156, + "learning_rate": 1.2130127672933134e-05, + "loss": 0.6904, + "step": 14641 + }, + { + "epoch": 0.44875566997670713, + "grad_norm": 1.6433963025463207, + "learning_rate": 1.2129157808332206e-05, + "loss": 0.7007, + "step": 14642 + }, + { + "epoch": 0.44878631849944833, + "grad_norm": 1.5965318707860088, + "learning_rate": 1.2128187922752141e-05, + "loss": 0.7424, + "step": 14643 + }, + { + "epoch": 0.44881696702218954, + "grad_norm": 1.320600064113939, + "learning_rate": 1.2127218016202497e-05, + "loss": 0.7338, + "step": 14644 + }, + { + "epoch": 0.44884761554493074, + "grad_norm": 1.3506264789559694, + "learning_rate": 1.212624808869283e-05, + "loss": 0.7884, + "step": 14645 + }, + { + "epoch": 0.44887826406767195, + "grad_norm": 1.3805287316217185, + "learning_rate": 1.2125278140232695e-05, + "loss": 0.664, + "step": 14646 + }, + { + "epoch": 0.44890891259041316, + "grad_norm": 1.3415173116897399, + "learning_rate": 1.2124308170831652e-05, + "loss": 0.701, + "step": 14647 + }, + { + "epoch": 0.44893956111315436, + "grad_norm": 0.7290178054502708, + "learning_rate": 1.2123338180499255e-05, + "loss": 0.5833, + "step": 14648 + }, + { + "epoch": 0.44897020963589557, + "grad_norm": 1.5836164335395893, + "learning_rate": 1.2122368169245067e-05, + "loss": 0.7006, + "step": 14649 + }, + { + "epoch": 0.4490008581586368, + "grad_norm": 1.5601601916311814, + "learning_rate": 1.2121398137078639e-05, + "loss": 0.7034, + "step": 14650 + }, + { + "epoch": 0.449031506681378, + "grad_norm": 1.5969490401447977, + "learning_rate": 1.2120428084009534e-05, + "loss": 0.6882, + "step": 14651 + }, + { + "epoch": 0.4490621552041192, + "grad_norm": 1.4048769452264862, + "learning_rate": 1.211945801004731e-05, + "loss": 0.6736, + "step": 14652 + }, + { + "epoch": 0.4490928037268604, + "grad_norm": 1.4327091130335892, + "learning_rate": 1.211848791520152e-05, + "loss": 0.6565, + "step": 14653 + }, + { + "epoch": 0.4491234522496016, + "grad_norm": 1.3019975982624084, + "learning_rate": 1.2117517799481729e-05, + "loss": 0.6553, + "step": 14654 + }, + { + "epoch": 0.44915410077234275, + "grad_norm": 1.5073146046990205, + "learning_rate": 1.2116547662897494e-05, + "loss": 0.7311, + "step": 14655 + }, + { + "epoch": 0.44918474929508395, + "grad_norm": 1.6337256793439947, + "learning_rate": 1.2115577505458373e-05, + "loss": 0.7013, + "step": 14656 + }, + { + "epoch": 0.44921539781782516, + "grad_norm": 1.413497078923511, + "learning_rate": 1.2114607327173925e-05, + "loss": 0.6216, + "step": 14657 + }, + { + "epoch": 0.44924604634056636, + "grad_norm": 1.3755553912971434, + "learning_rate": 1.211363712805371e-05, + "loss": 0.7692, + "step": 14658 + }, + { + "epoch": 0.44927669486330757, + "grad_norm": 1.4395384794200026, + "learning_rate": 1.211266690810729e-05, + "loss": 0.7254, + "step": 14659 + }, + { + "epoch": 0.4493073433860488, + "grad_norm": 1.4167027681345723, + "learning_rate": 1.211169666734422e-05, + "loss": 0.7711, + "step": 14660 + }, + { + "epoch": 0.44933799190879, + "grad_norm": 1.4406625950599208, + "learning_rate": 1.2110726405774061e-05, + "loss": 0.7774, + "step": 14661 + }, + { + "epoch": 0.4493686404315312, + "grad_norm": 1.5211241484971127, + "learning_rate": 1.2109756123406375e-05, + "loss": 0.6355, + "step": 14662 + }, + { + "epoch": 0.4493992889542724, + "grad_norm": 1.3128780268041305, + "learning_rate": 1.2108785820250723e-05, + "loss": 0.678, + "step": 14663 + }, + { + "epoch": 0.4494299374770136, + "grad_norm": 1.3192891783452032, + "learning_rate": 1.2107815496316666e-05, + "loss": 0.6324, + "step": 14664 + }, + { + "epoch": 0.4494605859997548, + "grad_norm": 1.3160036761895466, + "learning_rate": 1.2106845151613762e-05, + "loss": 0.6502, + "step": 14665 + }, + { + "epoch": 0.449491234522496, + "grad_norm": 1.4665428375379177, + "learning_rate": 1.2105874786151574e-05, + "loss": 0.8083, + "step": 14666 + }, + { + "epoch": 0.4495218830452372, + "grad_norm": 1.5242746519349766, + "learning_rate": 1.2104904399939663e-05, + "loss": 0.743, + "step": 14667 + }, + { + "epoch": 0.4495525315679784, + "grad_norm": 0.713156585226441, + "learning_rate": 1.210393399298759e-05, + "loss": 0.5905, + "step": 14668 + }, + { + "epoch": 0.4495831800907196, + "grad_norm": 0.69245363153172, + "learning_rate": 1.210296356530492e-05, + "loss": 0.6169, + "step": 14669 + }, + { + "epoch": 0.44961382861346083, + "grad_norm": 1.4976056788093868, + "learning_rate": 1.2101993116901207e-05, + "loss": 0.7405, + "step": 14670 + }, + { + "epoch": 0.44964447713620204, + "grad_norm": 1.5094080559456864, + "learning_rate": 1.2101022647786022e-05, + "loss": 0.7444, + "step": 14671 + }, + { + "epoch": 0.44967512565894324, + "grad_norm": 0.6618590704013981, + "learning_rate": 1.210005215796892e-05, + "loss": 0.5997, + "step": 14672 + }, + { + "epoch": 0.44970577418168445, + "grad_norm": 0.700691713324578, + "learning_rate": 1.209908164745947e-05, + "loss": 0.6053, + "step": 14673 + }, + { + "epoch": 0.44973642270442565, + "grad_norm": 1.673079041453172, + "learning_rate": 1.2098111116267227e-05, + "loss": 0.7636, + "step": 14674 + }, + { + "epoch": 0.44976707122716686, + "grad_norm": 1.453442538804626, + "learning_rate": 1.2097140564401765e-05, + "loss": 0.7106, + "step": 14675 + }, + { + "epoch": 0.44979771974990806, + "grad_norm": 1.5721138407908117, + "learning_rate": 1.2096169991872635e-05, + "loss": 0.7201, + "step": 14676 + }, + { + "epoch": 0.44982836827264927, + "grad_norm": 0.6570543825683277, + "learning_rate": 1.2095199398689407e-05, + "loss": 0.5902, + "step": 14677 + }, + { + "epoch": 0.4498590167953905, + "grad_norm": 1.4438094980708158, + "learning_rate": 1.2094228784861646e-05, + "loss": 0.7658, + "step": 14678 + }, + { + "epoch": 0.4498896653181317, + "grad_norm": 1.4681770918907677, + "learning_rate": 1.2093258150398913e-05, + "loss": 0.7629, + "step": 14679 + }, + { + "epoch": 0.4499203138408729, + "grad_norm": 1.4333424061393019, + "learning_rate": 1.2092287495310767e-05, + "loss": 0.7095, + "step": 14680 + }, + { + "epoch": 0.4499509623636141, + "grad_norm": 1.3067416675399324, + "learning_rate": 1.209131681960678e-05, + "loss": 0.6758, + "step": 14681 + }, + { + "epoch": 0.4499816108863553, + "grad_norm": 1.5249990260964288, + "learning_rate": 1.2090346123296512e-05, + "loss": 0.6778, + "step": 14682 + }, + { + "epoch": 0.4500122594090965, + "grad_norm": 1.445234513965032, + "learning_rate": 1.208937540638953e-05, + "loss": 0.6435, + "step": 14683 + }, + { + "epoch": 0.4500429079318377, + "grad_norm": 1.4348701657026321, + "learning_rate": 1.2088404668895397e-05, + "loss": 0.7117, + "step": 14684 + }, + { + "epoch": 0.4500735564545789, + "grad_norm": 1.3138878555750437, + "learning_rate": 1.2087433910823679e-05, + "loss": 0.7942, + "step": 14685 + }, + { + "epoch": 0.45010420497732007, + "grad_norm": 1.553273649626513, + "learning_rate": 1.208646313218394e-05, + "loss": 0.665, + "step": 14686 + }, + { + "epoch": 0.45013485350006127, + "grad_norm": 1.317500943655449, + "learning_rate": 1.2085492332985746e-05, + "loss": 0.7197, + "step": 14687 + }, + { + "epoch": 0.4501655020228025, + "grad_norm": 1.48212044407663, + "learning_rate": 1.2084521513238662e-05, + "loss": 0.7505, + "step": 14688 + }, + { + "epoch": 0.4501961505455437, + "grad_norm": 0.7093073561379754, + "learning_rate": 1.2083550672952256e-05, + "loss": 0.5734, + "step": 14689 + }, + { + "epoch": 0.4502267990682849, + "grad_norm": 0.6809504455279455, + "learning_rate": 1.2082579812136092e-05, + "loss": 0.5967, + "step": 14690 + }, + { + "epoch": 0.4502574475910261, + "grad_norm": 1.4904698644911674, + "learning_rate": 1.2081608930799733e-05, + "loss": 0.7418, + "step": 14691 + }, + { + "epoch": 0.4502880961137673, + "grad_norm": 1.4053629635822602, + "learning_rate": 1.2080638028952751e-05, + "loss": 0.7738, + "step": 14692 + }, + { + "epoch": 0.4503187446365085, + "grad_norm": 1.5661966465116377, + "learning_rate": 1.2079667106604709e-05, + "loss": 0.8126, + "step": 14693 + }, + { + "epoch": 0.4503493931592497, + "grad_norm": 1.4579587217140688, + "learning_rate": 1.2078696163765178e-05, + "loss": 0.72, + "step": 14694 + }, + { + "epoch": 0.4503800416819909, + "grad_norm": 1.3406707908621855, + "learning_rate": 1.207772520044372e-05, + "loss": 0.6872, + "step": 14695 + }, + { + "epoch": 0.4504106902047321, + "grad_norm": 1.5862130611754073, + "learning_rate": 1.2076754216649901e-05, + "loss": 0.7488, + "step": 14696 + }, + { + "epoch": 0.45044133872747333, + "grad_norm": 0.6894191193162633, + "learning_rate": 1.2075783212393295e-05, + "loss": 0.5835, + "step": 14697 + }, + { + "epoch": 0.45047198725021453, + "grad_norm": 1.683910053790703, + "learning_rate": 1.2074812187683464e-05, + "loss": 0.6874, + "step": 14698 + }, + { + "epoch": 0.45050263577295574, + "grad_norm": 1.2486220763480698, + "learning_rate": 1.207384114252998e-05, + "loss": 0.6187, + "step": 14699 + }, + { + "epoch": 0.45053328429569695, + "grad_norm": 0.6319652523653365, + "learning_rate": 1.2072870076942407e-05, + "loss": 0.59, + "step": 14700 + }, + { + "epoch": 0.45056393281843815, + "grad_norm": 1.3554115065673034, + "learning_rate": 1.2071898990930314e-05, + "loss": 0.7469, + "step": 14701 + }, + { + "epoch": 0.45059458134117936, + "grad_norm": 1.4498089799600893, + "learning_rate": 1.2070927884503271e-05, + "loss": 0.7918, + "step": 14702 + }, + { + "epoch": 0.45062522986392056, + "grad_norm": 1.766880063839518, + "learning_rate": 1.2069956757670847e-05, + "loss": 0.7541, + "step": 14703 + }, + { + "epoch": 0.45065587838666177, + "grad_norm": 1.4056733459184618, + "learning_rate": 1.2068985610442608e-05, + "loss": 0.7234, + "step": 14704 + }, + { + "epoch": 0.450686526909403, + "grad_norm": 1.260616579648182, + "learning_rate": 1.2068014442828127e-05, + "loss": 0.6732, + "step": 14705 + }, + { + "epoch": 0.4507171754321442, + "grad_norm": 1.4372253671866846, + "learning_rate": 1.2067043254836966e-05, + "loss": 0.868, + "step": 14706 + }, + { + "epoch": 0.4507478239548854, + "grad_norm": 1.4130249184190924, + "learning_rate": 1.2066072046478703e-05, + "loss": 0.8078, + "step": 14707 + }, + { + "epoch": 0.4507784724776266, + "grad_norm": 1.5280564495285156, + "learning_rate": 1.2065100817762898e-05, + "loss": 0.8242, + "step": 14708 + }, + { + "epoch": 0.4508091210003678, + "grad_norm": 1.4941128314921164, + "learning_rate": 1.2064129568699132e-05, + "loss": 0.7418, + "step": 14709 + }, + { + "epoch": 0.450839769523109, + "grad_norm": 1.4299617342742608, + "learning_rate": 1.2063158299296966e-05, + "loss": 0.7447, + "step": 14710 + }, + { + "epoch": 0.4508704180458502, + "grad_norm": 1.3529833996151959, + "learning_rate": 1.2062187009565974e-05, + "loss": 0.8786, + "step": 14711 + }, + { + "epoch": 0.4509010665685914, + "grad_norm": 1.4103142278890297, + "learning_rate": 1.2061215699515727e-05, + "loss": 0.7308, + "step": 14712 + }, + { + "epoch": 0.4509317150913326, + "grad_norm": 0.7139132115074882, + "learning_rate": 1.2060244369155794e-05, + "loss": 0.6121, + "step": 14713 + }, + { + "epoch": 0.4509623636140738, + "grad_norm": 1.454074222320883, + "learning_rate": 1.2059273018495748e-05, + "loss": 0.7481, + "step": 14714 + }, + { + "epoch": 0.45099301213681503, + "grad_norm": 1.3905459292440345, + "learning_rate": 1.2058301647545152e-05, + "loss": 0.8054, + "step": 14715 + }, + { + "epoch": 0.45102366065955624, + "grad_norm": 1.5690336485011416, + "learning_rate": 1.2057330256313589e-05, + "loss": 0.7477, + "step": 14716 + }, + { + "epoch": 0.4510543091822974, + "grad_norm": 1.4144164781034787, + "learning_rate": 1.2056358844810623e-05, + "loss": 0.6574, + "step": 14717 + }, + { + "epoch": 0.4510849577050386, + "grad_norm": 1.6183554840215189, + "learning_rate": 1.2055387413045828e-05, + "loss": 0.7455, + "step": 14718 + }, + { + "epoch": 0.4511156062277798, + "grad_norm": 1.484400985725435, + "learning_rate": 1.2054415961028771e-05, + "loss": 0.741, + "step": 14719 + }, + { + "epoch": 0.451146254750521, + "grad_norm": 1.611742407651326, + "learning_rate": 1.205344448876903e-05, + "loss": 0.8267, + "step": 14720 + }, + { + "epoch": 0.4511769032732622, + "grad_norm": 1.481595853280377, + "learning_rate": 1.2052472996276177e-05, + "loss": 0.7445, + "step": 14721 + }, + { + "epoch": 0.4512075517960034, + "grad_norm": 1.45129706531004, + "learning_rate": 1.205150148355978e-05, + "loss": 0.7355, + "step": 14722 + }, + { + "epoch": 0.4512382003187446, + "grad_norm": 1.3939059931563842, + "learning_rate": 1.2050529950629415e-05, + "loss": 0.6073, + "step": 14723 + }, + { + "epoch": 0.4512688488414858, + "grad_norm": 1.3956401369003435, + "learning_rate": 1.2049558397494653e-05, + "loss": 0.7171, + "step": 14724 + }, + { + "epoch": 0.45129949736422703, + "grad_norm": 1.5265891006697307, + "learning_rate": 1.2048586824165069e-05, + "loss": 0.7636, + "step": 14725 + }, + { + "epoch": 0.45133014588696824, + "grad_norm": 1.6187222549574214, + "learning_rate": 1.2047615230650233e-05, + "loss": 0.653, + "step": 14726 + }, + { + "epoch": 0.45136079440970944, + "grad_norm": 1.409753832645499, + "learning_rate": 1.2046643616959724e-05, + "loss": 0.7036, + "step": 14727 + }, + { + "epoch": 0.45139144293245065, + "grad_norm": 1.6237875816064464, + "learning_rate": 1.204567198310311e-05, + "loss": 0.6864, + "step": 14728 + }, + { + "epoch": 0.45142209145519185, + "grad_norm": 0.6723723397183019, + "learning_rate": 1.2044700329089964e-05, + "loss": 0.5649, + "step": 14729 + }, + { + "epoch": 0.45145273997793306, + "grad_norm": 1.4962786788815918, + "learning_rate": 1.2043728654929866e-05, + "loss": 0.8304, + "step": 14730 + }, + { + "epoch": 0.45148338850067427, + "grad_norm": 1.726020083393555, + "learning_rate": 1.2042756960632385e-05, + "loss": 0.8147, + "step": 14731 + }, + { + "epoch": 0.45151403702341547, + "grad_norm": 1.5028168875489851, + "learning_rate": 1.2041785246207097e-05, + "loss": 0.7377, + "step": 14732 + }, + { + "epoch": 0.4515446855461567, + "grad_norm": 1.512406411076732, + "learning_rate": 1.2040813511663576e-05, + "loss": 0.7734, + "step": 14733 + }, + { + "epoch": 0.4515753340688979, + "grad_norm": 1.370401397966825, + "learning_rate": 1.2039841757011397e-05, + "loss": 0.6866, + "step": 14734 + }, + { + "epoch": 0.4516059825916391, + "grad_norm": 1.6961383172872235, + "learning_rate": 1.2038869982260137e-05, + "loss": 0.7059, + "step": 14735 + }, + { + "epoch": 0.4516366311143803, + "grad_norm": 1.4911916163369425, + "learning_rate": 1.2037898187419368e-05, + "loss": 0.7644, + "step": 14736 + }, + { + "epoch": 0.4516672796371215, + "grad_norm": 1.378891817398288, + "learning_rate": 1.2036926372498666e-05, + "loss": 0.6409, + "step": 14737 + }, + { + "epoch": 0.4516979281598627, + "grad_norm": 1.4268801024875573, + "learning_rate": 1.203595453750761e-05, + "loss": 0.8071, + "step": 14738 + }, + { + "epoch": 0.4517285766826039, + "grad_norm": 1.5254257834185259, + "learning_rate": 1.2034982682455769e-05, + "loss": 0.6413, + "step": 14739 + }, + { + "epoch": 0.4517592252053451, + "grad_norm": 1.3051234147633322, + "learning_rate": 1.203401080735273e-05, + "loss": 0.7099, + "step": 14740 + }, + { + "epoch": 0.4517898737280863, + "grad_norm": 1.5809823787043125, + "learning_rate": 1.2033038912208055e-05, + "loss": 0.7366, + "step": 14741 + }, + { + "epoch": 0.4518205222508275, + "grad_norm": 1.4427402116178991, + "learning_rate": 1.203206699703133e-05, + "loss": 0.7101, + "step": 14742 + }, + { + "epoch": 0.45185117077356873, + "grad_norm": 1.5123864546401775, + "learning_rate": 1.2031095061832128e-05, + "loss": 0.6599, + "step": 14743 + }, + { + "epoch": 0.45188181929630994, + "grad_norm": 1.4695097433973614, + "learning_rate": 1.2030123106620031e-05, + "loss": 0.7107, + "step": 14744 + }, + { + "epoch": 0.45191246781905114, + "grad_norm": 1.6091141097256658, + "learning_rate": 1.2029151131404606e-05, + "loss": 0.7578, + "step": 14745 + }, + { + "epoch": 0.45194311634179235, + "grad_norm": 0.717181678057041, + "learning_rate": 1.202817913619544e-05, + "loss": 0.6118, + "step": 14746 + }, + { + "epoch": 0.45197376486453356, + "grad_norm": 1.287700847840747, + "learning_rate": 1.20272071210021e-05, + "loss": 0.7778, + "step": 14747 + }, + { + "epoch": 0.4520044133872747, + "grad_norm": 1.5751763994141559, + "learning_rate": 1.2026235085834174e-05, + "loss": 0.7954, + "step": 14748 + }, + { + "epoch": 0.4520350619100159, + "grad_norm": 1.4661784467719452, + "learning_rate": 1.2025263030701238e-05, + "loss": 0.6308, + "step": 14749 + }, + { + "epoch": 0.4520657104327571, + "grad_norm": 1.377817038310318, + "learning_rate": 1.2024290955612863e-05, + "loss": 0.683, + "step": 14750 + }, + { + "epoch": 0.4520963589554983, + "grad_norm": 1.5421224427305888, + "learning_rate": 1.2023318860578632e-05, + "loss": 0.7794, + "step": 14751 + }, + { + "epoch": 0.45212700747823953, + "grad_norm": 0.6864268455601135, + "learning_rate": 1.2022346745608122e-05, + "loss": 0.6209, + "step": 14752 + }, + { + "epoch": 0.45215765600098073, + "grad_norm": 1.852613887685595, + "learning_rate": 1.2021374610710915e-05, + "loss": 0.7442, + "step": 14753 + }, + { + "epoch": 0.45218830452372194, + "grad_norm": 1.392251443277267, + "learning_rate": 1.2020402455896583e-05, + "loss": 0.7217, + "step": 14754 + }, + { + "epoch": 0.45221895304646315, + "grad_norm": 1.5668772488369027, + "learning_rate": 1.2019430281174714e-05, + "loss": 0.7895, + "step": 14755 + }, + { + "epoch": 0.45224960156920435, + "grad_norm": 1.4422664175271966, + "learning_rate": 1.2018458086554877e-05, + "loss": 0.741, + "step": 14756 + }, + { + "epoch": 0.45228025009194556, + "grad_norm": 1.4795100199021278, + "learning_rate": 1.2017485872046656e-05, + "loss": 0.7081, + "step": 14757 + }, + { + "epoch": 0.45231089861468676, + "grad_norm": 1.4636567540849132, + "learning_rate": 1.201651363765963e-05, + "loss": 0.706, + "step": 14758 + }, + { + "epoch": 0.45234154713742797, + "grad_norm": 1.5295356615354234, + "learning_rate": 1.2015541383403384e-05, + "loss": 0.8108, + "step": 14759 + }, + { + "epoch": 0.4523721956601692, + "grad_norm": 0.6556063096180228, + "learning_rate": 1.2014569109287488e-05, + "loss": 0.5898, + "step": 14760 + }, + { + "epoch": 0.4524028441829104, + "grad_norm": 1.4703929551359158, + "learning_rate": 1.2013596815321528e-05, + "loss": 0.7281, + "step": 14761 + }, + { + "epoch": 0.4524334927056516, + "grad_norm": 2.4450540766107123, + "learning_rate": 1.2012624501515084e-05, + "loss": 0.7829, + "step": 14762 + }, + { + "epoch": 0.4524641412283928, + "grad_norm": 1.3408880160897716, + "learning_rate": 1.2011652167877737e-05, + "loss": 0.7661, + "step": 14763 + }, + { + "epoch": 0.452494789751134, + "grad_norm": 1.353415432946247, + "learning_rate": 1.2010679814419066e-05, + "loss": 0.6791, + "step": 14764 + }, + { + "epoch": 0.4525254382738752, + "grad_norm": 1.5109294745383341, + "learning_rate": 1.200970744114865e-05, + "loss": 0.8016, + "step": 14765 + }, + { + "epoch": 0.4525560867966164, + "grad_norm": 1.5114985411709485, + "learning_rate": 1.2008735048076077e-05, + "loss": 0.711, + "step": 14766 + }, + { + "epoch": 0.4525867353193576, + "grad_norm": 1.2644628458470533, + "learning_rate": 1.200776263521092e-05, + "loss": 0.6408, + "step": 14767 + }, + { + "epoch": 0.4526173838420988, + "grad_norm": 1.7580446055368406, + "learning_rate": 1.2006790202562765e-05, + "loss": 0.7634, + "step": 14768 + }, + { + "epoch": 0.45264803236484, + "grad_norm": 1.4763507869893757, + "learning_rate": 1.2005817750141193e-05, + "loss": 0.6818, + "step": 14769 + }, + { + "epoch": 0.45267868088758123, + "grad_norm": 1.5396452897356148, + "learning_rate": 1.2004845277955785e-05, + "loss": 0.7053, + "step": 14770 + }, + { + "epoch": 0.45270932941032244, + "grad_norm": 0.6664917739985844, + "learning_rate": 1.2003872786016125e-05, + "loss": 0.5867, + "step": 14771 + }, + { + "epoch": 0.45273997793306364, + "grad_norm": 1.604247659780217, + "learning_rate": 1.2002900274331793e-05, + "loss": 0.7501, + "step": 14772 + }, + { + "epoch": 0.45277062645580485, + "grad_norm": 1.493022571143192, + "learning_rate": 1.2001927742912369e-05, + "loss": 0.6944, + "step": 14773 + }, + { + "epoch": 0.45280127497854605, + "grad_norm": 1.4846191651131164, + "learning_rate": 1.2000955191767442e-05, + "loss": 0.7323, + "step": 14774 + }, + { + "epoch": 0.45283192350128726, + "grad_norm": 1.4803367234190483, + "learning_rate": 1.1999982620906592e-05, + "loss": 0.7629, + "step": 14775 + }, + { + "epoch": 0.45286257202402846, + "grad_norm": 1.450203340698677, + "learning_rate": 1.1999010030339403e-05, + "loss": 0.7008, + "step": 14776 + }, + { + "epoch": 0.45289322054676967, + "grad_norm": 1.576132462730257, + "learning_rate": 1.199803742007545e-05, + "loss": 0.7421, + "step": 14777 + }, + { + "epoch": 0.4529238690695109, + "grad_norm": 1.508898202282977, + "learning_rate": 1.199706479012433e-05, + "loss": 0.7208, + "step": 14778 + }, + { + "epoch": 0.452954517592252, + "grad_norm": 1.548160719933456, + "learning_rate": 1.1996092140495617e-05, + "loss": 0.8046, + "step": 14779 + }, + { + "epoch": 0.45298516611499323, + "grad_norm": 1.548051871442688, + "learning_rate": 1.1995119471198896e-05, + "loss": 0.7487, + "step": 14780 + }, + { + "epoch": 0.45301581463773444, + "grad_norm": 1.4264340641301805, + "learning_rate": 1.1994146782243751e-05, + "loss": 0.7171, + "step": 14781 + }, + { + "epoch": 0.45304646316047564, + "grad_norm": 1.3493668576107984, + "learning_rate": 1.1993174073639773e-05, + "loss": 0.7141, + "step": 14782 + }, + { + "epoch": 0.45307711168321685, + "grad_norm": 1.4614503759466655, + "learning_rate": 1.1992201345396539e-05, + "loss": 0.7706, + "step": 14783 + }, + { + "epoch": 0.45310776020595805, + "grad_norm": 1.563772218627302, + "learning_rate": 1.1991228597523632e-05, + "loss": 0.7671, + "step": 14784 + }, + { + "epoch": 0.45313840872869926, + "grad_norm": 1.2828642052795893, + "learning_rate": 1.1990255830030644e-05, + "loss": 0.7241, + "step": 14785 + }, + { + "epoch": 0.45316905725144047, + "grad_norm": 1.6333541857293135, + "learning_rate": 1.1989283042927156e-05, + "loss": 0.7353, + "step": 14786 + }, + { + "epoch": 0.45319970577418167, + "grad_norm": 1.3745934058321159, + "learning_rate": 1.1988310236222751e-05, + "loss": 0.7216, + "step": 14787 + }, + { + "epoch": 0.4532303542969229, + "grad_norm": 1.378747574417587, + "learning_rate": 1.1987337409927015e-05, + "loss": 0.7969, + "step": 14788 + }, + { + "epoch": 0.4532610028196641, + "grad_norm": 0.7151387965233853, + "learning_rate": 1.198636456404954e-05, + "loss": 0.5716, + "step": 14789 + }, + { + "epoch": 0.4532916513424053, + "grad_norm": 0.674141930464883, + "learning_rate": 1.1985391698599904e-05, + "loss": 0.5822, + "step": 14790 + }, + { + "epoch": 0.4533222998651465, + "grad_norm": 1.7184006854385097, + "learning_rate": 1.1984418813587695e-05, + "loss": 0.7793, + "step": 14791 + }, + { + "epoch": 0.4533529483878877, + "grad_norm": 1.5492225496301821, + "learning_rate": 1.19834459090225e-05, + "loss": 0.7254, + "step": 14792 + }, + { + "epoch": 0.4533835969106289, + "grad_norm": 1.414481616769939, + "learning_rate": 1.1982472984913905e-05, + "loss": 0.6774, + "step": 14793 + }, + { + "epoch": 0.4534142454333701, + "grad_norm": 1.317457045450902, + "learning_rate": 1.19815000412715e-05, + "loss": 0.7287, + "step": 14794 + }, + { + "epoch": 0.4534448939561113, + "grad_norm": 0.6924431705088101, + "learning_rate": 1.1980527078104863e-05, + "loss": 0.5968, + "step": 14795 + }, + { + "epoch": 0.4534755424788525, + "grad_norm": 1.4381823848348514, + "learning_rate": 1.1979554095423586e-05, + "loss": 0.788, + "step": 14796 + }, + { + "epoch": 0.45350619100159373, + "grad_norm": 1.4269427948515465, + "learning_rate": 1.1978581093237259e-05, + "loss": 0.616, + "step": 14797 + }, + { + "epoch": 0.45353683952433493, + "grad_norm": 1.2950971005243355, + "learning_rate": 1.1977608071555467e-05, + "loss": 0.7544, + "step": 14798 + }, + { + "epoch": 0.45356748804707614, + "grad_norm": 1.5874848379111102, + "learning_rate": 1.1976635030387794e-05, + "loss": 0.7016, + "step": 14799 + }, + { + "epoch": 0.45359813656981735, + "grad_norm": 1.4925551464078322, + "learning_rate": 1.197566196974383e-05, + "loss": 0.7828, + "step": 14800 + }, + { + "epoch": 0.45362878509255855, + "grad_norm": 1.5855100388578598, + "learning_rate": 1.1974688889633164e-05, + "loss": 0.8036, + "step": 14801 + }, + { + "epoch": 0.45365943361529976, + "grad_norm": 1.567550537459436, + "learning_rate": 1.1973715790065385e-05, + "loss": 0.8035, + "step": 14802 + }, + { + "epoch": 0.45369008213804096, + "grad_norm": 1.4170656688378354, + "learning_rate": 1.1972742671050077e-05, + "loss": 0.7024, + "step": 14803 + }, + { + "epoch": 0.45372073066078217, + "grad_norm": 1.366933079908977, + "learning_rate": 1.197176953259683e-05, + "loss": 0.7323, + "step": 14804 + }, + { + "epoch": 0.4537513791835234, + "grad_norm": 1.4943127896195871, + "learning_rate": 1.1970796374715236e-05, + "loss": 0.7461, + "step": 14805 + }, + { + "epoch": 0.4537820277062646, + "grad_norm": 1.3641808971716785, + "learning_rate": 1.1969823197414879e-05, + "loss": 0.6225, + "step": 14806 + }, + { + "epoch": 0.4538126762290058, + "grad_norm": 0.7410022412681938, + "learning_rate": 1.1968850000705353e-05, + "loss": 0.6073, + "step": 14807 + }, + { + "epoch": 0.453843324751747, + "grad_norm": 0.6924222959519748, + "learning_rate": 1.196787678459624e-05, + "loss": 0.6027, + "step": 14808 + }, + { + "epoch": 0.4538739732744882, + "grad_norm": 1.3374319117823958, + "learning_rate": 1.1966903549097137e-05, + "loss": 0.7258, + "step": 14809 + }, + { + "epoch": 0.45390462179722935, + "grad_norm": 1.4185986420376677, + "learning_rate": 1.1965930294217627e-05, + "loss": 0.645, + "step": 14810 + }, + { + "epoch": 0.45393527031997055, + "grad_norm": 0.6677528645184732, + "learning_rate": 1.1964957019967305e-05, + "loss": 0.5837, + "step": 14811 + }, + { + "epoch": 0.45396591884271176, + "grad_norm": 1.3320432579749268, + "learning_rate": 1.1963983726355756e-05, + "loss": 0.6538, + "step": 14812 + }, + { + "epoch": 0.45399656736545296, + "grad_norm": 1.5072662904483378, + "learning_rate": 1.196301041339258e-05, + "loss": 0.7079, + "step": 14813 + }, + { + "epoch": 0.45402721588819417, + "grad_norm": 1.384437345588303, + "learning_rate": 1.1962037081087351e-05, + "loss": 0.76, + "step": 14814 + }, + { + "epoch": 0.4540578644109354, + "grad_norm": 1.5169845399775503, + "learning_rate": 1.1961063729449675e-05, + "loss": 0.7159, + "step": 14815 + }, + { + "epoch": 0.4540885129336766, + "grad_norm": 1.6950276480074546, + "learning_rate": 1.1960090358489131e-05, + "loss": 0.7282, + "step": 14816 + }, + { + "epoch": 0.4541191614564178, + "grad_norm": 1.6953328182047052, + "learning_rate": 1.1959116968215321e-05, + "loss": 0.6716, + "step": 14817 + }, + { + "epoch": 0.454149809979159, + "grad_norm": 1.6452330623296056, + "learning_rate": 1.1958143558637827e-05, + "loss": 0.7745, + "step": 14818 + }, + { + "epoch": 0.4541804585019002, + "grad_norm": 1.4716130128493585, + "learning_rate": 1.1957170129766243e-05, + "loss": 0.7228, + "step": 14819 + }, + { + "epoch": 0.4542111070246414, + "grad_norm": 1.4673677712555497, + "learning_rate": 1.1956196681610162e-05, + "loss": 0.723, + "step": 14820 + }, + { + "epoch": 0.4542417555473826, + "grad_norm": 1.4641927475790812, + "learning_rate": 1.1955223214179175e-05, + "loss": 0.7777, + "step": 14821 + }, + { + "epoch": 0.4542724040701238, + "grad_norm": 1.5448213017578445, + "learning_rate": 1.1954249727482873e-05, + "loss": 0.7707, + "step": 14822 + }, + { + "epoch": 0.454303052592865, + "grad_norm": 1.5337185636287685, + "learning_rate": 1.1953276221530848e-05, + "loss": 0.7949, + "step": 14823 + }, + { + "epoch": 0.4543337011156062, + "grad_norm": 1.4135638724222268, + "learning_rate": 1.1952302696332694e-05, + "loss": 0.691, + "step": 14824 + }, + { + "epoch": 0.45436434963834743, + "grad_norm": 1.4091167278166001, + "learning_rate": 1.1951329151898001e-05, + "loss": 0.7771, + "step": 14825 + }, + { + "epoch": 0.45439499816108864, + "grad_norm": 1.4074076486105858, + "learning_rate": 1.1950355588236364e-05, + "loss": 0.7503, + "step": 14826 + }, + { + "epoch": 0.45442564668382984, + "grad_norm": 1.313833852433066, + "learning_rate": 1.194938200535737e-05, + "loss": 0.6844, + "step": 14827 + }, + { + "epoch": 0.45445629520657105, + "grad_norm": 1.5557597811111827, + "learning_rate": 1.1948408403270622e-05, + "loss": 0.6275, + "step": 14828 + }, + { + "epoch": 0.45448694372931225, + "grad_norm": 1.3895412862352228, + "learning_rate": 1.1947434781985706e-05, + "loss": 0.7885, + "step": 14829 + }, + { + "epoch": 0.45451759225205346, + "grad_norm": 1.594079655811942, + "learning_rate": 1.1946461141512215e-05, + "loss": 0.7319, + "step": 14830 + }, + { + "epoch": 0.45454824077479467, + "grad_norm": 0.7745652467021742, + "learning_rate": 1.1945487481859743e-05, + "loss": 0.5683, + "step": 14831 + }, + { + "epoch": 0.45457888929753587, + "grad_norm": 1.5071093122595023, + "learning_rate": 1.1944513803037888e-05, + "loss": 0.7025, + "step": 14832 + }, + { + "epoch": 0.4546095378202771, + "grad_norm": 1.47336889648062, + "learning_rate": 1.1943540105056239e-05, + "loss": 0.7771, + "step": 14833 + }, + { + "epoch": 0.4546401863430183, + "grad_norm": 1.4296260747236775, + "learning_rate": 1.1942566387924393e-05, + "loss": 0.6645, + "step": 14834 + }, + { + "epoch": 0.4546708348657595, + "grad_norm": 1.5634116370736524, + "learning_rate": 1.1941592651651942e-05, + "loss": 0.7853, + "step": 14835 + }, + { + "epoch": 0.4547014833885007, + "grad_norm": 1.4346470908399722, + "learning_rate": 1.1940618896248485e-05, + "loss": 0.693, + "step": 14836 + }, + { + "epoch": 0.4547321319112419, + "grad_norm": 1.3479637103011004, + "learning_rate": 1.1939645121723613e-05, + "loss": 0.6815, + "step": 14837 + }, + { + "epoch": 0.4547627804339831, + "grad_norm": 1.549698247391338, + "learning_rate": 1.1938671328086916e-05, + "loss": 0.7639, + "step": 14838 + }, + { + "epoch": 0.4547934289567243, + "grad_norm": 1.7110124214555913, + "learning_rate": 1.1937697515348e-05, + "loss": 0.7635, + "step": 14839 + }, + { + "epoch": 0.4548240774794655, + "grad_norm": 1.3068043368702782, + "learning_rate": 1.1936723683516456e-05, + "loss": 0.6872, + "step": 14840 + }, + { + "epoch": 0.45485472600220667, + "grad_norm": 1.6229488676285728, + "learning_rate": 1.1935749832601875e-05, + "loss": 0.8623, + "step": 14841 + }, + { + "epoch": 0.45488537452494787, + "grad_norm": 1.360944941101225, + "learning_rate": 1.1934775962613854e-05, + "loss": 0.7415, + "step": 14842 + }, + { + "epoch": 0.4549160230476891, + "grad_norm": 0.7274438408320216, + "learning_rate": 1.1933802073561994e-05, + "loss": 0.6081, + "step": 14843 + }, + { + "epoch": 0.4549466715704303, + "grad_norm": 1.5888625134252021, + "learning_rate": 1.1932828165455886e-05, + "loss": 0.7685, + "step": 14844 + }, + { + "epoch": 0.4549773200931715, + "grad_norm": 0.730642605147964, + "learning_rate": 1.1931854238305123e-05, + "loss": 0.5827, + "step": 14845 + }, + { + "epoch": 0.4550079686159127, + "grad_norm": 1.5922776641293512, + "learning_rate": 1.1930880292119312e-05, + "loss": 0.8194, + "step": 14846 + }, + { + "epoch": 0.4550386171386539, + "grad_norm": 1.4040760888337946, + "learning_rate": 1.192990632690804e-05, + "loss": 0.7785, + "step": 14847 + }, + { + "epoch": 0.4550692656613951, + "grad_norm": 1.4416990173187816, + "learning_rate": 1.192893234268091e-05, + "loss": 0.7705, + "step": 14848 + }, + { + "epoch": 0.4550999141841363, + "grad_norm": 1.3822875459066335, + "learning_rate": 1.1927958339447513e-05, + "loss": 0.6558, + "step": 14849 + }, + { + "epoch": 0.4551305627068775, + "grad_norm": 1.4233872667245095, + "learning_rate": 1.1926984317217451e-05, + "loss": 0.7346, + "step": 14850 + }, + { + "epoch": 0.4551612112296187, + "grad_norm": 1.4725630076404082, + "learning_rate": 1.1926010276000319e-05, + "loss": 0.7395, + "step": 14851 + }, + { + "epoch": 0.45519185975235993, + "grad_norm": 1.5431082686906317, + "learning_rate": 1.1925036215805718e-05, + "loss": 0.7185, + "step": 14852 + }, + { + "epoch": 0.45522250827510113, + "grad_norm": 1.486435464399042, + "learning_rate": 1.1924062136643237e-05, + "loss": 0.8922, + "step": 14853 + }, + { + "epoch": 0.45525315679784234, + "grad_norm": 1.3935187284995147, + "learning_rate": 1.1923088038522484e-05, + "loss": 0.6366, + "step": 14854 + }, + { + "epoch": 0.45528380532058355, + "grad_norm": 1.3755301428176794, + "learning_rate": 1.192211392145305e-05, + "loss": 0.7148, + "step": 14855 + }, + { + "epoch": 0.45531445384332475, + "grad_norm": 1.4556974102823304, + "learning_rate": 1.192113978544454e-05, + "loss": 0.7094, + "step": 14856 + }, + { + "epoch": 0.45534510236606596, + "grad_norm": 1.5589246147063809, + "learning_rate": 1.192016563050654e-05, + "loss": 0.7443, + "step": 14857 + }, + { + "epoch": 0.45537575088880716, + "grad_norm": 1.4559932201268142, + "learning_rate": 1.1919191456648665e-05, + "loss": 0.7727, + "step": 14858 + }, + { + "epoch": 0.45540639941154837, + "grad_norm": 1.392600112248236, + "learning_rate": 1.1918217263880503e-05, + "loss": 0.7366, + "step": 14859 + }, + { + "epoch": 0.4554370479342896, + "grad_norm": 1.40878036674036, + "learning_rate": 1.191724305221165e-05, + "loss": 0.7307, + "step": 14860 + }, + { + "epoch": 0.4554676964570308, + "grad_norm": 0.7229652717860472, + "learning_rate": 1.1916268821651717e-05, + "loss": 0.6027, + "step": 14861 + }, + { + "epoch": 0.455498344979772, + "grad_norm": 1.6636877639915861, + "learning_rate": 1.1915294572210295e-05, + "loss": 0.7396, + "step": 14862 + }, + { + "epoch": 0.4555289935025132, + "grad_norm": 1.3326169697954948, + "learning_rate": 1.1914320303896987e-05, + "loss": 0.693, + "step": 14863 + }, + { + "epoch": 0.4555596420252544, + "grad_norm": 1.5024714681115348, + "learning_rate": 1.191334601672139e-05, + "loss": 0.7826, + "step": 14864 + }, + { + "epoch": 0.4555902905479956, + "grad_norm": 1.4100160692447232, + "learning_rate": 1.1912371710693105e-05, + "loss": 0.6884, + "step": 14865 + }, + { + "epoch": 0.4556209390707368, + "grad_norm": 1.2866824308633702, + "learning_rate": 1.191139738582173e-05, + "loss": 0.6581, + "step": 14866 + }, + { + "epoch": 0.455651587593478, + "grad_norm": 1.6198230386684316, + "learning_rate": 1.1910423042116873e-05, + "loss": 0.7378, + "step": 14867 + }, + { + "epoch": 0.4556822361162192, + "grad_norm": 1.299694865580618, + "learning_rate": 1.1909448679588125e-05, + "loss": 0.5632, + "step": 14868 + }, + { + "epoch": 0.4557128846389604, + "grad_norm": 0.6789280091246244, + "learning_rate": 1.1908474298245092e-05, + "loss": 0.5792, + "step": 14869 + }, + { + "epoch": 0.45574353316170163, + "grad_norm": 1.505461305550094, + "learning_rate": 1.1907499898097372e-05, + "loss": 0.7708, + "step": 14870 + }, + { + "epoch": 0.45577418168444284, + "grad_norm": 1.5631847480553345, + "learning_rate": 1.1906525479154567e-05, + "loss": 0.7126, + "step": 14871 + }, + { + "epoch": 0.455804830207184, + "grad_norm": 0.631958864707102, + "learning_rate": 1.1905551041426282e-05, + "loss": 0.5435, + "step": 14872 + }, + { + "epoch": 0.4558354787299252, + "grad_norm": 1.5898423834273465, + "learning_rate": 1.1904576584922114e-05, + "loss": 0.8768, + "step": 14873 + }, + { + "epoch": 0.4558661272526664, + "grad_norm": 1.4264817432102443, + "learning_rate": 1.1903602109651662e-05, + "loss": 0.6366, + "step": 14874 + }, + { + "epoch": 0.4558967757754076, + "grad_norm": 1.7143818188719442, + "learning_rate": 1.1902627615624536e-05, + "loss": 0.7634, + "step": 14875 + }, + { + "epoch": 0.4559274242981488, + "grad_norm": 1.772988197317246, + "learning_rate": 1.190165310285033e-05, + "loss": 0.7529, + "step": 14876 + }, + { + "epoch": 0.45595807282089, + "grad_norm": 1.5187504777584178, + "learning_rate": 1.1900678571338649e-05, + "loss": 0.7529, + "step": 14877 + }, + { + "epoch": 0.4559887213436312, + "grad_norm": 1.5303074665187013, + "learning_rate": 1.18997040210991e-05, + "loss": 0.8072, + "step": 14878 + }, + { + "epoch": 0.4560193698663724, + "grad_norm": 1.6412788094480866, + "learning_rate": 1.1898729452141277e-05, + "loss": 0.7311, + "step": 14879 + }, + { + "epoch": 0.45605001838911363, + "grad_norm": 1.5271945356417687, + "learning_rate": 1.1897754864474791e-05, + "loss": 0.7795, + "step": 14880 + }, + { + "epoch": 0.45608066691185484, + "grad_norm": 1.5937523213902414, + "learning_rate": 1.1896780258109235e-05, + "loss": 0.8101, + "step": 14881 + }, + { + "epoch": 0.45611131543459604, + "grad_norm": 1.3546408238746677, + "learning_rate": 1.1895805633054225e-05, + "loss": 0.6761, + "step": 14882 + }, + { + "epoch": 0.45614196395733725, + "grad_norm": 1.592366209185557, + "learning_rate": 1.189483098931935e-05, + "loss": 0.6923, + "step": 14883 + }, + { + "epoch": 0.45617261248007845, + "grad_norm": 1.4852336303694926, + "learning_rate": 1.1893856326914225e-05, + "loss": 0.654, + "step": 14884 + }, + { + "epoch": 0.45620326100281966, + "grad_norm": 1.5638037005441403, + "learning_rate": 1.1892881645848447e-05, + "loss": 0.7372, + "step": 14885 + }, + { + "epoch": 0.45623390952556087, + "grad_norm": 1.5368687340891414, + "learning_rate": 1.1891906946131621e-05, + "loss": 0.7486, + "step": 14886 + }, + { + "epoch": 0.45626455804830207, + "grad_norm": 1.6767213159876462, + "learning_rate": 1.1890932227773356e-05, + "loss": 0.7122, + "step": 14887 + }, + { + "epoch": 0.4562952065710433, + "grad_norm": 1.3102654500563193, + "learning_rate": 1.1889957490783247e-05, + "loss": 0.7008, + "step": 14888 + }, + { + "epoch": 0.4563258550937845, + "grad_norm": 1.472710085081783, + "learning_rate": 1.1888982735170906e-05, + "loss": 0.7168, + "step": 14889 + }, + { + "epoch": 0.4563565036165257, + "grad_norm": 1.4621694196424695, + "learning_rate": 1.1888007960945935e-05, + "loss": 0.7202, + "step": 14890 + }, + { + "epoch": 0.4563871521392669, + "grad_norm": 1.6078040775334448, + "learning_rate": 1.1887033168117939e-05, + "loss": 0.7972, + "step": 14891 + }, + { + "epoch": 0.4564178006620081, + "grad_norm": 1.4271320747805274, + "learning_rate": 1.1886058356696519e-05, + "loss": 0.6403, + "step": 14892 + }, + { + "epoch": 0.4564484491847493, + "grad_norm": 0.6760570547131183, + "learning_rate": 1.1885083526691286e-05, + "loss": 0.6068, + "step": 14893 + }, + { + "epoch": 0.4564790977074905, + "grad_norm": 1.3535907988605367, + "learning_rate": 1.1884108678111844e-05, + "loss": 0.6931, + "step": 14894 + }, + { + "epoch": 0.4565097462302317, + "grad_norm": 1.4093408295607852, + "learning_rate": 1.1883133810967796e-05, + "loss": 0.6625, + "step": 14895 + }, + { + "epoch": 0.4565403947529729, + "grad_norm": 1.5646891358720145, + "learning_rate": 1.1882158925268745e-05, + "loss": 0.8252, + "step": 14896 + }, + { + "epoch": 0.45657104327571413, + "grad_norm": 0.6319678789289294, + "learning_rate": 1.1881184021024303e-05, + "loss": 0.5874, + "step": 14897 + }, + { + "epoch": 0.45660169179845533, + "grad_norm": 1.5766391622990585, + "learning_rate": 1.1880209098244078e-05, + "loss": 0.6758, + "step": 14898 + }, + { + "epoch": 0.45663234032119654, + "grad_norm": 1.3571451075207601, + "learning_rate": 1.1879234156937668e-05, + "loss": 0.652, + "step": 14899 + }, + { + "epoch": 0.45666298884393774, + "grad_norm": 1.3111586196972664, + "learning_rate": 1.1878259197114681e-05, + "loss": 0.7503, + "step": 14900 + }, + { + "epoch": 0.45669363736667895, + "grad_norm": 1.3298845911836457, + "learning_rate": 1.1877284218784728e-05, + "loss": 0.6856, + "step": 14901 + }, + { + "epoch": 0.45672428588942016, + "grad_norm": 1.4322493867369264, + "learning_rate": 1.1876309221957411e-05, + "loss": 0.7666, + "step": 14902 + }, + { + "epoch": 0.4567549344121613, + "grad_norm": 1.39613148872179, + "learning_rate": 1.1875334206642342e-05, + "loss": 0.7614, + "step": 14903 + }, + { + "epoch": 0.4567855829349025, + "grad_norm": 1.4487897151895945, + "learning_rate": 1.1874359172849123e-05, + "loss": 0.6945, + "step": 14904 + }, + { + "epoch": 0.4568162314576437, + "grad_norm": 1.547585344760623, + "learning_rate": 1.1873384120587363e-05, + "loss": 0.6222, + "step": 14905 + }, + { + "epoch": 0.4568468799803849, + "grad_norm": 1.5195189573883023, + "learning_rate": 1.1872409049866676e-05, + "loss": 0.8025, + "step": 14906 + }, + { + "epoch": 0.45687752850312613, + "grad_norm": 1.4159403059673061, + "learning_rate": 1.1871433960696657e-05, + "loss": 0.6688, + "step": 14907 + }, + { + "epoch": 0.45690817702586733, + "grad_norm": 1.3526960290003602, + "learning_rate": 1.1870458853086921e-05, + "loss": 0.6844, + "step": 14908 + }, + { + "epoch": 0.45693882554860854, + "grad_norm": 1.4576076099546313, + "learning_rate": 1.186948372704708e-05, + "loss": 0.7254, + "step": 14909 + }, + { + "epoch": 0.45696947407134975, + "grad_norm": 1.4624681114487303, + "learning_rate": 1.1868508582586734e-05, + "loss": 0.6825, + "step": 14910 + }, + { + "epoch": 0.45700012259409095, + "grad_norm": 1.6293487734130718, + "learning_rate": 1.1867533419715493e-05, + "loss": 0.8889, + "step": 14911 + }, + { + "epoch": 0.45703077111683216, + "grad_norm": 1.4835713532669355, + "learning_rate": 1.186655823844297e-05, + "loss": 0.735, + "step": 14912 + }, + { + "epoch": 0.45706141963957336, + "grad_norm": 0.6569086883143581, + "learning_rate": 1.1865583038778771e-05, + "loss": 0.5729, + "step": 14913 + }, + { + "epoch": 0.45709206816231457, + "grad_norm": 1.4834194154128395, + "learning_rate": 1.1864607820732504e-05, + "loss": 0.8575, + "step": 14914 + }, + { + "epoch": 0.4571227166850558, + "grad_norm": 1.427951833780981, + "learning_rate": 1.186363258431378e-05, + "loss": 0.7261, + "step": 14915 + }, + { + "epoch": 0.457153365207797, + "grad_norm": 1.4825972757125616, + "learning_rate": 1.1862657329532205e-05, + "loss": 0.6689, + "step": 14916 + }, + { + "epoch": 0.4571840137305382, + "grad_norm": 1.3905684266308664, + "learning_rate": 1.1861682056397396e-05, + "loss": 0.6576, + "step": 14917 + }, + { + "epoch": 0.4572146622532794, + "grad_norm": 1.356888708371648, + "learning_rate": 1.1860706764918952e-05, + "loss": 0.6735, + "step": 14918 + }, + { + "epoch": 0.4572453107760206, + "grad_norm": 0.6830181254906869, + "learning_rate": 1.1859731455106492e-05, + "loss": 0.6075, + "step": 14919 + }, + { + "epoch": 0.4572759592987618, + "grad_norm": 1.6352208660006051, + "learning_rate": 1.1858756126969619e-05, + "loss": 0.7646, + "step": 14920 + }, + { + "epoch": 0.457306607821503, + "grad_norm": 1.5253991907734985, + "learning_rate": 1.185778078051795e-05, + "loss": 0.6904, + "step": 14921 + }, + { + "epoch": 0.4573372563442442, + "grad_norm": 1.3010191878075796, + "learning_rate": 1.1856805415761087e-05, + "loss": 0.6777, + "step": 14922 + }, + { + "epoch": 0.4573679048669854, + "grad_norm": 1.4087961828187352, + "learning_rate": 1.1855830032708648e-05, + "loss": 0.7265, + "step": 14923 + }, + { + "epoch": 0.4573985533897266, + "grad_norm": 0.6424884320858536, + "learning_rate": 1.185485463137024e-05, + "loss": 0.5617, + "step": 14924 + }, + { + "epoch": 0.45742920191246783, + "grad_norm": 0.6658111980623436, + "learning_rate": 1.1853879211755477e-05, + "loss": 0.5905, + "step": 14925 + }, + { + "epoch": 0.45745985043520904, + "grad_norm": 1.226204559323302, + "learning_rate": 1.1852903773873966e-05, + "loss": 0.7399, + "step": 14926 + }, + { + "epoch": 0.45749049895795024, + "grad_norm": 1.300752552661637, + "learning_rate": 1.1851928317735319e-05, + "loss": 0.6445, + "step": 14927 + }, + { + "epoch": 0.45752114748069145, + "grad_norm": 1.4771622652863952, + "learning_rate": 1.1850952843349148e-05, + "loss": 0.7821, + "step": 14928 + }, + { + "epoch": 0.45755179600343265, + "grad_norm": 1.3466627483517308, + "learning_rate": 1.1849977350725068e-05, + "loss": 0.7365, + "step": 14929 + }, + { + "epoch": 0.45758244452617386, + "grad_norm": 1.4901330913969089, + "learning_rate": 1.1849001839872687e-05, + "loss": 0.7666, + "step": 14930 + }, + { + "epoch": 0.45761309304891506, + "grad_norm": 0.6717491074736079, + "learning_rate": 1.1848026310801615e-05, + "loss": 0.5635, + "step": 14931 + }, + { + "epoch": 0.45764374157165627, + "grad_norm": 1.5857761540293676, + "learning_rate": 1.184705076352147e-05, + "loss": 0.7631, + "step": 14932 + }, + { + "epoch": 0.4576743900943975, + "grad_norm": 1.7907363063588444, + "learning_rate": 1.184607519804186e-05, + "loss": 0.7869, + "step": 14933 + }, + { + "epoch": 0.4577050386171386, + "grad_norm": 1.4333644373550503, + "learning_rate": 1.1845099614372399e-05, + "loss": 0.6773, + "step": 14934 + }, + { + "epoch": 0.45773568713987983, + "grad_norm": 0.6932631227421178, + "learning_rate": 1.1844124012522697e-05, + "loss": 0.5912, + "step": 14935 + }, + { + "epoch": 0.45776633566262104, + "grad_norm": 1.3516043208753523, + "learning_rate": 1.1843148392502376e-05, + "loss": 0.6948, + "step": 14936 + }, + { + "epoch": 0.45779698418536224, + "grad_norm": 1.6381407939881523, + "learning_rate": 1.1842172754321037e-05, + "loss": 0.7512, + "step": 14937 + }, + { + "epoch": 0.45782763270810345, + "grad_norm": 1.3608281807094709, + "learning_rate": 1.18411970979883e-05, + "loss": 0.6675, + "step": 14938 + }, + { + "epoch": 0.45785828123084465, + "grad_norm": 1.540580972488369, + "learning_rate": 1.1840221423513773e-05, + "loss": 0.7458, + "step": 14939 + }, + { + "epoch": 0.45788892975358586, + "grad_norm": 1.6170634930519399, + "learning_rate": 1.1839245730907078e-05, + "loss": 0.7086, + "step": 14940 + }, + { + "epoch": 0.45791957827632707, + "grad_norm": 1.3743171744217981, + "learning_rate": 1.1838270020177825e-05, + "loss": 0.5754, + "step": 14941 + }, + { + "epoch": 0.45795022679906827, + "grad_norm": 1.5999534899908565, + "learning_rate": 1.1837294291335621e-05, + "loss": 0.7289, + "step": 14942 + }, + { + "epoch": 0.4579808753218095, + "grad_norm": 1.5443158877223282, + "learning_rate": 1.1836318544390093e-05, + "loss": 0.7713, + "step": 14943 + }, + { + "epoch": 0.4580115238445507, + "grad_norm": 1.5340308312295678, + "learning_rate": 1.1835342779350847e-05, + "loss": 0.6695, + "step": 14944 + }, + { + "epoch": 0.4580421723672919, + "grad_norm": 1.3736922867111818, + "learning_rate": 1.1834366996227498e-05, + "loss": 0.6377, + "step": 14945 + }, + { + "epoch": 0.4580728208900331, + "grad_norm": 1.4710091557885083, + "learning_rate": 1.183339119502966e-05, + "loss": 0.7679, + "step": 14946 + }, + { + "epoch": 0.4581034694127743, + "grad_norm": 1.4654602672775463, + "learning_rate": 1.1832415375766953e-05, + "loss": 0.7119, + "step": 14947 + }, + { + "epoch": 0.4581341179355155, + "grad_norm": 1.5898425237482092, + "learning_rate": 1.1831439538448985e-05, + "loss": 0.7713, + "step": 14948 + }, + { + "epoch": 0.4581647664582567, + "grad_norm": 1.3807781574195972, + "learning_rate": 1.1830463683085379e-05, + "loss": 0.7256, + "step": 14949 + }, + { + "epoch": 0.4581954149809979, + "grad_norm": 1.5919705712250538, + "learning_rate": 1.182948780968574e-05, + "loss": 0.666, + "step": 14950 + }, + { + "epoch": 0.4582260635037391, + "grad_norm": 0.6614002521436049, + "learning_rate": 1.1828511918259692e-05, + "loss": 0.5695, + "step": 14951 + }, + { + "epoch": 0.45825671202648033, + "grad_norm": 1.7112985604030058, + "learning_rate": 1.182753600881685e-05, + "loss": 0.768, + "step": 14952 + }, + { + "epoch": 0.45828736054922153, + "grad_norm": 1.5655005590366433, + "learning_rate": 1.1826560081366829e-05, + "loss": 0.7193, + "step": 14953 + }, + { + "epoch": 0.45831800907196274, + "grad_norm": 1.6808406681561807, + "learning_rate": 1.1825584135919239e-05, + "loss": 0.7706, + "step": 14954 + }, + { + "epoch": 0.45834865759470395, + "grad_norm": 1.4056657916194553, + "learning_rate": 1.1824608172483706e-05, + "loss": 0.7142, + "step": 14955 + }, + { + "epoch": 0.45837930611744515, + "grad_norm": 1.4138515805605323, + "learning_rate": 1.182363219106984e-05, + "loss": 0.704, + "step": 14956 + }, + { + "epoch": 0.45840995464018636, + "grad_norm": 1.6000987561707387, + "learning_rate": 1.1822656191687258e-05, + "loss": 0.7385, + "step": 14957 + }, + { + "epoch": 0.45844060316292756, + "grad_norm": 1.3488397187040957, + "learning_rate": 1.1821680174345582e-05, + "loss": 0.7642, + "step": 14958 + }, + { + "epoch": 0.45847125168566877, + "grad_norm": 1.539391361633716, + "learning_rate": 1.1820704139054422e-05, + "loss": 0.8719, + "step": 14959 + }, + { + "epoch": 0.45850190020841, + "grad_norm": 0.6964901201357617, + "learning_rate": 1.1819728085823404e-05, + "loss": 0.6013, + "step": 14960 + }, + { + "epoch": 0.4585325487311512, + "grad_norm": 0.6982946927053583, + "learning_rate": 1.1818752014662132e-05, + "loss": 0.5842, + "step": 14961 + }, + { + "epoch": 0.4585631972538924, + "grad_norm": 1.5457011262608171, + "learning_rate": 1.1817775925580234e-05, + "loss": 0.715, + "step": 14962 + }, + { + "epoch": 0.4585938457766336, + "grad_norm": 1.4407446062865097, + "learning_rate": 1.1816799818587325e-05, + "loss": 0.8149, + "step": 14963 + }, + { + "epoch": 0.4586244942993748, + "grad_norm": 1.6078712092288432, + "learning_rate": 1.1815823693693022e-05, + "loss": 0.675, + "step": 14964 + }, + { + "epoch": 0.45865514282211595, + "grad_norm": 1.6449550032862719, + "learning_rate": 1.1814847550906943e-05, + "loss": 0.8401, + "step": 14965 + }, + { + "epoch": 0.45868579134485715, + "grad_norm": 1.490276253454286, + "learning_rate": 1.1813871390238709e-05, + "loss": 0.759, + "step": 14966 + }, + { + "epoch": 0.45871643986759836, + "grad_norm": 0.7080544287587689, + "learning_rate": 1.1812895211697935e-05, + "loss": 0.5862, + "step": 14967 + }, + { + "epoch": 0.45874708839033956, + "grad_norm": 1.556955450792327, + "learning_rate": 1.181191901529424e-05, + "loss": 0.7298, + "step": 14968 + }, + { + "epoch": 0.45877773691308077, + "grad_norm": 1.5710807560797027, + "learning_rate": 1.1810942801037244e-05, + "loss": 0.8861, + "step": 14969 + }, + { + "epoch": 0.458808385435822, + "grad_norm": 1.4944492067083714, + "learning_rate": 1.1809966568936565e-05, + "loss": 0.7273, + "step": 14970 + }, + { + "epoch": 0.4588390339585632, + "grad_norm": 0.6626500061770191, + "learning_rate": 1.1808990319001823e-05, + "loss": 0.5867, + "step": 14971 + }, + { + "epoch": 0.4588696824813044, + "grad_norm": 1.5148071956043305, + "learning_rate": 1.1808014051242633e-05, + "loss": 0.7795, + "step": 14972 + }, + { + "epoch": 0.4589003310040456, + "grad_norm": 1.4730351312162777, + "learning_rate": 1.1807037765668623e-05, + "loss": 0.7015, + "step": 14973 + }, + { + "epoch": 0.4589309795267868, + "grad_norm": 1.5215452361545492, + "learning_rate": 1.1806061462289402e-05, + "loss": 0.7119, + "step": 14974 + }, + { + "epoch": 0.458961628049528, + "grad_norm": 1.405154553965842, + "learning_rate": 1.1805085141114604e-05, + "loss": 0.7499, + "step": 14975 + }, + { + "epoch": 0.4589922765722692, + "grad_norm": 1.4547157537481301, + "learning_rate": 1.1804108802153831e-05, + "loss": 0.7276, + "step": 14976 + }, + { + "epoch": 0.4590229250950104, + "grad_norm": 1.3364833843695803, + "learning_rate": 1.1803132445416719e-05, + "loss": 0.7907, + "step": 14977 + }, + { + "epoch": 0.4590535736177516, + "grad_norm": 1.6589762881907189, + "learning_rate": 1.1802156070912877e-05, + "loss": 0.8256, + "step": 14978 + }, + { + "epoch": 0.4590842221404928, + "grad_norm": 1.528069717139868, + "learning_rate": 1.1801179678651932e-05, + "loss": 0.7461, + "step": 14979 + }, + { + "epoch": 0.45911487066323403, + "grad_norm": 1.1581063545646064, + "learning_rate": 1.1800203268643502e-05, + "loss": 0.6806, + "step": 14980 + }, + { + "epoch": 0.45914551918597524, + "grad_norm": 1.4870988036189068, + "learning_rate": 1.1799226840897212e-05, + "loss": 0.736, + "step": 14981 + }, + { + "epoch": 0.45917616770871644, + "grad_norm": 1.7147511651303788, + "learning_rate": 1.1798250395422674e-05, + "loss": 0.7169, + "step": 14982 + }, + { + "epoch": 0.45920681623145765, + "grad_norm": 1.6967287908651865, + "learning_rate": 1.1797273932229518e-05, + "loss": 0.6883, + "step": 14983 + }, + { + "epoch": 0.45923746475419885, + "grad_norm": 1.5781068419978879, + "learning_rate": 1.1796297451327363e-05, + "loss": 0.7227, + "step": 14984 + }, + { + "epoch": 0.45926811327694006, + "grad_norm": 1.4451111653617061, + "learning_rate": 1.1795320952725827e-05, + "loss": 0.6566, + "step": 14985 + }, + { + "epoch": 0.45929876179968127, + "grad_norm": 1.5052367529342332, + "learning_rate": 1.1794344436434538e-05, + "loss": 0.764, + "step": 14986 + }, + { + "epoch": 0.45932941032242247, + "grad_norm": 0.7323307763286248, + "learning_rate": 1.1793367902463108e-05, + "loss": 0.5964, + "step": 14987 + }, + { + "epoch": 0.4593600588451637, + "grad_norm": 1.5726387786006686, + "learning_rate": 1.1792391350821171e-05, + "loss": 0.7193, + "step": 14988 + }, + { + "epoch": 0.4593907073679049, + "grad_norm": 1.3056884825207922, + "learning_rate": 1.179141478151834e-05, + "loss": 0.6489, + "step": 14989 + }, + { + "epoch": 0.4594213558906461, + "grad_norm": 1.447067154358931, + "learning_rate": 1.1790438194564246e-05, + "loss": 0.6837, + "step": 14990 + }, + { + "epoch": 0.4594520044133873, + "grad_norm": 1.8147078917811732, + "learning_rate": 1.17894615899685e-05, + "loss": 0.8068, + "step": 14991 + }, + { + "epoch": 0.4594826529361285, + "grad_norm": 1.4881221958773787, + "learning_rate": 1.1788484967740735e-05, + "loss": 0.7415, + "step": 14992 + }, + { + "epoch": 0.4595133014588697, + "grad_norm": 1.7247472283589793, + "learning_rate": 1.1787508327890566e-05, + "loss": 0.8271, + "step": 14993 + }, + { + "epoch": 0.4595439499816109, + "grad_norm": 1.5602180114511306, + "learning_rate": 1.1786531670427626e-05, + "loss": 0.7505, + "step": 14994 + }, + { + "epoch": 0.4595745985043521, + "grad_norm": 1.3920687814230288, + "learning_rate": 1.1785554995361527e-05, + "loss": 0.7183, + "step": 14995 + }, + { + "epoch": 0.45960524702709327, + "grad_norm": 1.5704906076476999, + "learning_rate": 1.1784578302701902e-05, + "loss": 0.7217, + "step": 14996 + }, + { + "epoch": 0.4596358955498345, + "grad_norm": 1.442874329018756, + "learning_rate": 1.1783601592458367e-05, + "loss": 0.6834, + "step": 14997 + }, + { + "epoch": 0.4596665440725757, + "grad_norm": 1.7278589296674292, + "learning_rate": 1.178262486464055e-05, + "loss": 0.7874, + "step": 14998 + }, + { + "epoch": 0.4596971925953169, + "grad_norm": 1.44199555618131, + "learning_rate": 1.1781648119258075e-05, + "loss": 0.6945, + "step": 14999 + }, + { + "epoch": 0.4597278411180581, + "grad_norm": 1.4469576381585079, + "learning_rate": 1.1780671356320563e-05, + "loss": 0.7242, + "step": 15000 + }, + { + "epoch": 0.4597584896407993, + "grad_norm": 1.6318018654642896, + "learning_rate": 1.1779694575837643e-05, + "loss": 0.6518, + "step": 15001 + }, + { + "epoch": 0.4597891381635405, + "grad_norm": 0.6716129025738173, + "learning_rate": 1.1778717777818937e-05, + "loss": 0.5748, + "step": 15002 + }, + { + "epoch": 0.4598197866862817, + "grad_norm": 1.235198196194769, + "learning_rate": 1.1777740962274072e-05, + "loss": 0.6588, + "step": 15003 + }, + { + "epoch": 0.4598504352090229, + "grad_norm": 1.4277064491865328, + "learning_rate": 1.1776764129212666e-05, + "loss": 0.7134, + "step": 15004 + }, + { + "epoch": 0.4598810837317641, + "grad_norm": 1.4077884842983008, + "learning_rate": 1.1775787278644349e-05, + "loss": 0.7019, + "step": 15005 + }, + { + "epoch": 0.4599117322545053, + "grad_norm": 1.382743750810091, + "learning_rate": 1.1774810410578747e-05, + "loss": 0.7197, + "step": 15006 + }, + { + "epoch": 0.45994238077724653, + "grad_norm": 1.4173252800987235, + "learning_rate": 1.1773833525025484e-05, + "loss": 0.6968, + "step": 15007 + }, + { + "epoch": 0.45997302929998773, + "grad_norm": 1.4913720670457131, + "learning_rate": 1.1772856621994184e-05, + "loss": 0.7136, + "step": 15008 + }, + { + "epoch": 0.46000367782272894, + "grad_norm": 1.6834583185743535, + "learning_rate": 1.1771879701494475e-05, + "loss": 0.7497, + "step": 15009 + }, + { + "epoch": 0.46003432634547015, + "grad_norm": 1.6222734357072996, + "learning_rate": 1.1770902763535981e-05, + "loss": 0.8214, + "step": 15010 + }, + { + "epoch": 0.46006497486821135, + "grad_norm": 1.4915827717741785, + "learning_rate": 1.1769925808128328e-05, + "loss": 0.5791, + "step": 15011 + }, + { + "epoch": 0.46009562339095256, + "grad_norm": 1.3802537495770324, + "learning_rate": 1.1768948835281146e-05, + "loss": 0.7468, + "step": 15012 + }, + { + "epoch": 0.46012627191369376, + "grad_norm": 1.4830153016695373, + "learning_rate": 1.1767971845004058e-05, + "loss": 0.726, + "step": 15013 + }, + { + "epoch": 0.46015692043643497, + "grad_norm": 1.3806653989326967, + "learning_rate": 1.1766994837306691e-05, + "loss": 0.7451, + "step": 15014 + }, + { + "epoch": 0.4601875689591762, + "grad_norm": 1.432738611115653, + "learning_rate": 1.1766017812198672e-05, + "loss": 0.7854, + "step": 15015 + }, + { + "epoch": 0.4602182174819174, + "grad_norm": 1.3608304048819424, + "learning_rate": 1.1765040769689626e-05, + "loss": 0.6866, + "step": 15016 + }, + { + "epoch": 0.4602488660046586, + "grad_norm": 0.6942386765237524, + "learning_rate": 1.1764063709789185e-05, + "loss": 0.5929, + "step": 15017 + }, + { + "epoch": 0.4602795145273998, + "grad_norm": 0.6818468776682427, + "learning_rate": 1.176308663250697e-05, + "loss": 0.5658, + "step": 15018 + }, + { + "epoch": 0.460310163050141, + "grad_norm": 1.4640318453445447, + "learning_rate": 1.1762109537852611e-05, + "loss": 0.6901, + "step": 15019 + }, + { + "epoch": 0.4603408115728822, + "grad_norm": 1.4005278015853517, + "learning_rate": 1.1761132425835735e-05, + "loss": 0.7242, + "step": 15020 + }, + { + "epoch": 0.4603714600956234, + "grad_norm": 1.469131148358626, + "learning_rate": 1.1760155296465973e-05, + "loss": 0.7146, + "step": 15021 + }, + { + "epoch": 0.4604021086183646, + "grad_norm": 0.6806052259656472, + "learning_rate": 1.1759178149752952e-05, + "loss": 0.6047, + "step": 15022 + }, + { + "epoch": 0.4604327571411058, + "grad_norm": 1.617434433483226, + "learning_rate": 1.1758200985706293e-05, + "loss": 0.5831, + "step": 15023 + }, + { + "epoch": 0.460463405663847, + "grad_norm": 1.572753164801552, + "learning_rate": 1.1757223804335635e-05, + "loss": 0.7634, + "step": 15024 + }, + { + "epoch": 0.46049405418658823, + "grad_norm": 1.4986467777664636, + "learning_rate": 1.17562466056506e-05, + "loss": 0.7075, + "step": 15025 + }, + { + "epoch": 0.46052470270932944, + "grad_norm": 1.3370448697575132, + "learning_rate": 1.1755269389660815e-05, + "loss": 0.7237, + "step": 15026 + }, + { + "epoch": 0.4605553512320706, + "grad_norm": 1.534638303602282, + "learning_rate": 1.1754292156375914e-05, + "loss": 0.7216, + "step": 15027 + }, + { + "epoch": 0.4605859997548118, + "grad_norm": 1.5394846987580169, + "learning_rate": 1.1753314905805524e-05, + "loss": 0.7367, + "step": 15028 + }, + { + "epoch": 0.460616648277553, + "grad_norm": 0.6688401330337409, + "learning_rate": 1.1752337637959275e-05, + "loss": 0.5551, + "step": 15029 + }, + { + "epoch": 0.4606472968002942, + "grad_norm": 1.4132089649442803, + "learning_rate": 1.1751360352846792e-05, + "loss": 0.7102, + "step": 15030 + }, + { + "epoch": 0.4606779453230354, + "grad_norm": 0.6549939757638976, + "learning_rate": 1.1750383050477709e-05, + "loss": 0.5993, + "step": 15031 + }, + { + "epoch": 0.4607085938457766, + "grad_norm": 1.4009051024211723, + "learning_rate": 1.1749405730861652e-05, + "loss": 0.7661, + "step": 15032 + }, + { + "epoch": 0.4607392423685178, + "grad_norm": 0.6775019581078202, + "learning_rate": 1.1748428394008256e-05, + "loss": 0.6109, + "step": 15033 + }, + { + "epoch": 0.460769890891259, + "grad_norm": 1.4855039204821312, + "learning_rate": 1.1747451039927144e-05, + "loss": 0.6762, + "step": 15034 + }, + { + "epoch": 0.46080053941400023, + "grad_norm": 0.686920349175541, + "learning_rate": 1.1746473668627952e-05, + "loss": 0.612, + "step": 15035 + }, + { + "epoch": 0.46083118793674144, + "grad_norm": 1.4914009723634756, + "learning_rate": 1.1745496280120305e-05, + "loss": 0.7136, + "step": 15036 + }, + { + "epoch": 0.46086183645948264, + "grad_norm": 1.5616987541455334, + "learning_rate": 1.1744518874413841e-05, + "loss": 0.7927, + "step": 15037 + }, + { + "epoch": 0.46089248498222385, + "grad_norm": 1.476143353871956, + "learning_rate": 1.1743541451518186e-05, + "loss": 0.7689, + "step": 15038 + }, + { + "epoch": 0.46092313350496505, + "grad_norm": 1.3750048985162133, + "learning_rate": 1.1742564011442968e-05, + "loss": 0.7799, + "step": 15039 + }, + { + "epoch": 0.46095378202770626, + "grad_norm": 1.6471835808381177, + "learning_rate": 1.1741586554197824e-05, + "loss": 0.7645, + "step": 15040 + }, + { + "epoch": 0.46098443055044747, + "grad_norm": 0.6521216543285581, + "learning_rate": 1.1740609079792378e-05, + "loss": 0.5803, + "step": 15041 + }, + { + "epoch": 0.46101507907318867, + "grad_norm": 1.466511050756524, + "learning_rate": 1.1739631588236269e-05, + "loss": 0.6556, + "step": 15042 + }, + { + "epoch": 0.4610457275959299, + "grad_norm": 0.6674873593216744, + "learning_rate": 1.1738654079539122e-05, + "loss": 0.5875, + "step": 15043 + }, + { + "epoch": 0.4610763761186711, + "grad_norm": 1.306064888154778, + "learning_rate": 1.1737676553710575e-05, + "loss": 0.7787, + "step": 15044 + }, + { + "epoch": 0.4611070246414123, + "grad_norm": 1.5515800036573733, + "learning_rate": 1.1736699010760254e-05, + "loss": 0.7395, + "step": 15045 + }, + { + "epoch": 0.4611376731641535, + "grad_norm": 1.4792495708263576, + "learning_rate": 1.1735721450697792e-05, + "loss": 0.6743, + "step": 15046 + }, + { + "epoch": 0.4611683216868947, + "grad_norm": 1.4008184862688018, + "learning_rate": 1.1734743873532824e-05, + "loss": 0.79, + "step": 15047 + }, + { + "epoch": 0.4611989702096359, + "grad_norm": 1.4812881395045503, + "learning_rate": 1.1733766279274984e-05, + "loss": 0.6596, + "step": 15048 + }, + { + "epoch": 0.4612296187323771, + "grad_norm": 1.6116767193701849, + "learning_rate": 1.1732788667933896e-05, + "loss": 0.6637, + "step": 15049 + }, + { + "epoch": 0.4612602672551183, + "grad_norm": 1.6216423119892216, + "learning_rate": 1.1731811039519202e-05, + "loss": 0.7112, + "step": 15050 + }, + { + "epoch": 0.4612909157778595, + "grad_norm": 0.6829690140877517, + "learning_rate": 1.1730833394040526e-05, + "loss": 0.5882, + "step": 15051 + }, + { + "epoch": 0.46132156430060073, + "grad_norm": 1.4608424309084944, + "learning_rate": 1.1729855731507509e-05, + "loss": 0.6767, + "step": 15052 + }, + { + "epoch": 0.46135221282334193, + "grad_norm": 1.4760713036302302, + "learning_rate": 1.172887805192978e-05, + "loss": 0.751, + "step": 15053 + }, + { + "epoch": 0.46138286134608314, + "grad_norm": 0.6873169557299631, + "learning_rate": 1.1727900355316972e-05, + "loss": 0.588, + "step": 15054 + }, + { + "epoch": 0.46141350986882435, + "grad_norm": 1.324736203425559, + "learning_rate": 1.1726922641678721e-05, + "loss": 0.7301, + "step": 15055 + }, + { + "epoch": 0.46144415839156555, + "grad_norm": 1.484956647905212, + "learning_rate": 1.1725944911024661e-05, + "loss": 0.6835, + "step": 15056 + }, + { + "epoch": 0.46147480691430676, + "grad_norm": 1.5592314482618925, + "learning_rate": 1.1724967163364422e-05, + "loss": 0.6703, + "step": 15057 + }, + { + "epoch": 0.4615054554370479, + "grad_norm": 0.6568595556267732, + "learning_rate": 1.172398939870764e-05, + "loss": 0.5972, + "step": 15058 + }, + { + "epoch": 0.4615361039597891, + "grad_norm": 1.3562906816291178, + "learning_rate": 1.172301161706395e-05, + "loss": 0.6725, + "step": 15059 + }, + { + "epoch": 0.4615667524825303, + "grad_norm": 1.3714734188689643, + "learning_rate": 1.1722033818442987e-05, + "loss": 0.676, + "step": 15060 + }, + { + "epoch": 0.4615974010052715, + "grad_norm": 1.4517302075367278, + "learning_rate": 1.1721056002854386e-05, + "loss": 0.7165, + "step": 15061 + }, + { + "epoch": 0.46162804952801273, + "grad_norm": 1.6738148425595476, + "learning_rate": 1.1720078170307775e-05, + "loss": 0.7316, + "step": 15062 + }, + { + "epoch": 0.46165869805075394, + "grad_norm": 0.7055600328963736, + "learning_rate": 1.1719100320812795e-05, + "loss": 0.5639, + "step": 15063 + }, + { + "epoch": 0.46168934657349514, + "grad_norm": 1.4301431408596237, + "learning_rate": 1.1718122454379082e-05, + "loss": 0.6254, + "step": 15064 + }, + { + "epoch": 0.46171999509623635, + "grad_norm": 1.5666263794369455, + "learning_rate": 1.1717144571016267e-05, + "loss": 0.7277, + "step": 15065 + }, + { + "epoch": 0.46175064361897755, + "grad_norm": 1.6118083941860513, + "learning_rate": 1.1716166670733986e-05, + "loss": 0.7896, + "step": 15066 + }, + { + "epoch": 0.46178129214171876, + "grad_norm": 1.5397147568953748, + "learning_rate": 1.1715188753541877e-05, + "loss": 0.7881, + "step": 15067 + }, + { + "epoch": 0.46181194066445996, + "grad_norm": 1.4020008021547543, + "learning_rate": 1.1714210819449576e-05, + "loss": 0.685, + "step": 15068 + }, + { + "epoch": 0.46184258918720117, + "grad_norm": 1.4885413358601407, + "learning_rate": 1.1713232868466715e-05, + "loss": 0.7118, + "step": 15069 + }, + { + "epoch": 0.4618732377099424, + "grad_norm": 1.5097437704034704, + "learning_rate": 1.1712254900602933e-05, + "loss": 0.7503, + "step": 15070 + }, + { + "epoch": 0.4619038862326836, + "grad_norm": 1.3288111791147281, + "learning_rate": 1.1711276915867866e-05, + "loss": 0.6849, + "step": 15071 + }, + { + "epoch": 0.4619345347554248, + "grad_norm": 1.4182677283075813, + "learning_rate": 1.171029891427115e-05, + "loss": 0.7308, + "step": 15072 + }, + { + "epoch": 0.461965183278166, + "grad_norm": 1.5328364562373726, + "learning_rate": 1.1709320895822417e-05, + "loss": 0.8117, + "step": 15073 + }, + { + "epoch": 0.4619958318009072, + "grad_norm": 1.6259211560180857, + "learning_rate": 1.1708342860531313e-05, + "loss": 0.5787, + "step": 15074 + }, + { + "epoch": 0.4620264803236484, + "grad_norm": 1.3869121351797244, + "learning_rate": 1.1707364808407466e-05, + "loss": 0.6348, + "step": 15075 + }, + { + "epoch": 0.4620571288463896, + "grad_norm": 1.3883802072055869, + "learning_rate": 1.170638673946052e-05, + "loss": 0.6358, + "step": 15076 + }, + { + "epoch": 0.4620877773691308, + "grad_norm": 1.4828154618481673, + "learning_rate": 1.1705408653700106e-05, + "loss": 0.6801, + "step": 15077 + }, + { + "epoch": 0.462118425891872, + "grad_norm": 1.6325898096149833, + "learning_rate": 1.1704430551135866e-05, + "loss": 0.728, + "step": 15078 + }, + { + "epoch": 0.4621490744146132, + "grad_norm": 1.8071623673434718, + "learning_rate": 1.1703452431777436e-05, + "loss": 0.689, + "step": 15079 + }, + { + "epoch": 0.46217972293735443, + "grad_norm": 1.4264520833858614, + "learning_rate": 1.1702474295634452e-05, + "loss": 0.822, + "step": 15080 + }, + { + "epoch": 0.46221037146009564, + "grad_norm": 1.3733799008020162, + "learning_rate": 1.1701496142716553e-05, + "loss": 0.7331, + "step": 15081 + }, + { + "epoch": 0.46224101998283684, + "grad_norm": 1.4631240934361753, + "learning_rate": 1.1700517973033376e-05, + "loss": 0.7291, + "step": 15082 + }, + { + "epoch": 0.46227166850557805, + "grad_norm": 1.368417093453645, + "learning_rate": 1.1699539786594565e-05, + "loss": 0.6799, + "step": 15083 + }, + { + "epoch": 0.46230231702831925, + "grad_norm": 1.509788063270724, + "learning_rate": 1.1698561583409748e-05, + "loss": 0.7756, + "step": 15084 + }, + { + "epoch": 0.46233296555106046, + "grad_norm": 0.6769439853066074, + "learning_rate": 1.1697583363488573e-05, + "loss": 0.5835, + "step": 15085 + }, + { + "epoch": 0.46236361407380167, + "grad_norm": 1.48834836422889, + "learning_rate": 1.1696605126840673e-05, + "loss": 0.7947, + "step": 15086 + }, + { + "epoch": 0.46239426259654287, + "grad_norm": 0.6576680161862548, + "learning_rate": 1.1695626873475691e-05, + "loss": 0.584, + "step": 15087 + }, + { + "epoch": 0.4624249111192841, + "grad_norm": 0.6807339707050668, + "learning_rate": 1.169464860340326e-05, + "loss": 0.5842, + "step": 15088 + }, + { + "epoch": 0.4624555596420252, + "grad_norm": 1.6023640695135326, + "learning_rate": 1.1693670316633026e-05, + "loss": 0.7817, + "step": 15089 + }, + { + "epoch": 0.46248620816476643, + "grad_norm": 1.3873377529012316, + "learning_rate": 1.1692692013174624e-05, + "loss": 0.7573, + "step": 15090 + }, + { + "epoch": 0.46251685668750764, + "grad_norm": 1.4386002587432178, + "learning_rate": 1.1691713693037694e-05, + "loss": 0.6424, + "step": 15091 + }, + { + "epoch": 0.46254750521024884, + "grad_norm": 0.6792037319417125, + "learning_rate": 1.1690735356231875e-05, + "loss": 0.5711, + "step": 15092 + }, + { + "epoch": 0.46257815373299005, + "grad_norm": 1.5763285708392307, + "learning_rate": 1.1689757002766811e-05, + "loss": 0.8586, + "step": 15093 + }, + { + "epoch": 0.46260880225573126, + "grad_norm": 1.5602934517346354, + "learning_rate": 1.1688778632652138e-05, + "loss": 0.7171, + "step": 15094 + }, + { + "epoch": 0.46263945077847246, + "grad_norm": 1.625151245840571, + "learning_rate": 1.1687800245897493e-05, + "loss": 0.7717, + "step": 15095 + }, + { + "epoch": 0.46267009930121367, + "grad_norm": 1.6046831816125577, + "learning_rate": 1.1686821842512524e-05, + "loss": 0.7299, + "step": 15096 + }, + { + "epoch": 0.46270074782395487, + "grad_norm": 1.356585404285298, + "learning_rate": 1.1685843422506866e-05, + "loss": 0.6794, + "step": 15097 + }, + { + "epoch": 0.4627313963466961, + "grad_norm": 1.4617433712265702, + "learning_rate": 1.1684864985890166e-05, + "loss": 0.6528, + "step": 15098 + }, + { + "epoch": 0.4627620448694373, + "grad_norm": 1.4747714997357049, + "learning_rate": 1.1683886532672054e-05, + "loss": 0.6982, + "step": 15099 + }, + { + "epoch": 0.4627926933921785, + "grad_norm": 0.662131620916823, + "learning_rate": 1.1682908062862182e-05, + "loss": 0.5692, + "step": 15100 + }, + { + "epoch": 0.4628233419149197, + "grad_norm": 1.6305961316621689, + "learning_rate": 1.1681929576470182e-05, + "loss": 0.7062, + "step": 15101 + }, + { + "epoch": 0.4628539904376609, + "grad_norm": 1.6734587535732655, + "learning_rate": 1.1680951073505704e-05, + "loss": 0.8383, + "step": 15102 + }, + { + "epoch": 0.4628846389604021, + "grad_norm": 1.3565984065292784, + "learning_rate": 1.1679972553978382e-05, + "loss": 0.6737, + "step": 15103 + }, + { + "epoch": 0.4629152874831433, + "grad_norm": 1.3183968098325232, + "learning_rate": 1.1678994017897862e-05, + "loss": 0.6904, + "step": 15104 + }, + { + "epoch": 0.4629459360058845, + "grad_norm": 0.6556072656936868, + "learning_rate": 1.1678015465273782e-05, + "loss": 0.5524, + "step": 15105 + }, + { + "epoch": 0.4629765845286257, + "grad_norm": 1.3709764289725401, + "learning_rate": 1.1677036896115788e-05, + "loss": 0.7419, + "step": 15106 + }, + { + "epoch": 0.46300723305136693, + "grad_norm": 1.9040783897576248, + "learning_rate": 1.167605831043352e-05, + "loss": 0.8276, + "step": 15107 + }, + { + "epoch": 0.46303788157410813, + "grad_norm": 1.514539399802347, + "learning_rate": 1.1675079708236621e-05, + "loss": 0.8099, + "step": 15108 + }, + { + "epoch": 0.46306853009684934, + "grad_norm": 0.6362309722552969, + "learning_rate": 1.1674101089534733e-05, + "loss": 0.6032, + "step": 15109 + }, + { + "epoch": 0.46309917861959055, + "grad_norm": 1.5822409453235735, + "learning_rate": 1.16731224543375e-05, + "loss": 0.794, + "step": 15110 + }, + { + "epoch": 0.46312982714233175, + "grad_norm": 1.6210637093863287, + "learning_rate": 1.1672143802654562e-05, + "loss": 0.8106, + "step": 15111 + }, + { + "epoch": 0.46316047566507296, + "grad_norm": 1.4783636841466323, + "learning_rate": 1.1671165134495561e-05, + "loss": 0.6962, + "step": 15112 + }, + { + "epoch": 0.46319112418781416, + "grad_norm": 1.41069687170277, + "learning_rate": 1.1670186449870146e-05, + "loss": 0.7206, + "step": 15113 + }, + { + "epoch": 0.46322177271055537, + "grad_norm": 1.4773635774548688, + "learning_rate": 1.1669207748787956e-05, + "loss": 0.598, + "step": 15114 + }, + { + "epoch": 0.4632524212332966, + "grad_norm": 1.3277694028556357, + "learning_rate": 1.1668229031258635e-05, + "loss": 0.6515, + "step": 15115 + }, + { + "epoch": 0.4632830697560378, + "grad_norm": 1.705554777616745, + "learning_rate": 1.1667250297291823e-05, + "loss": 0.7739, + "step": 15116 + }, + { + "epoch": 0.463313718278779, + "grad_norm": 1.4097222871960604, + "learning_rate": 1.1666271546897174e-05, + "loss": 0.6812, + "step": 15117 + }, + { + "epoch": 0.4633443668015202, + "grad_norm": 1.5320217127305074, + "learning_rate": 1.1665292780084318e-05, + "loss": 0.6267, + "step": 15118 + }, + { + "epoch": 0.4633750153242614, + "grad_norm": 1.5590333715456295, + "learning_rate": 1.1664313996862911e-05, + "loss": 0.6906, + "step": 15119 + }, + { + "epoch": 0.46340566384700255, + "grad_norm": 1.393622786105968, + "learning_rate": 1.1663335197242589e-05, + "loss": 0.7401, + "step": 15120 + }, + { + "epoch": 0.46343631236974375, + "grad_norm": 1.418251273159604, + "learning_rate": 1.1662356381233002e-05, + "loss": 0.7079, + "step": 15121 + }, + { + "epoch": 0.46346696089248496, + "grad_norm": 1.6158689621251685, + "learning_rate": 1.1661377548843792e-05, + "loss": 0.6596, + "step": 15122 + }, + { + "epoch": 0.46349760941522616, + "grad_norm": 1.4633501432182867, + "learning_rate": 1.16603987000846e-05, + "loss": 0.6531, + "step": 15123 + }, + { + "epoch": 0.46352825793796737, + "grad_norm": 1.4541062980991974, + "learning_rate": 1.165941983496508e-05, + "loss": 0.6782, + "step": 15124 + }, + { + "epoch": 0.4635589064607086, + "grad_norm": 1.4728421743289997, + "learning_rate": 1.1658440953494871e-05, + "loss": 0.6316, + "step": 15125 + }, + { + "epoch": 0.4635895549834498, + "grad_norm": 1.479662489983449, + "learning_rate": 1.165746205568362e-05, + "loss": 0.6767, + "step": 15126 + }, + { + "epoch": 0.463620203506191, + "grad_norm": 1.4432492906296288, + "learning_rate": 1.1656483141540967e-05, + "loss": 0.77, + "step": 15127 + }, + { + "epoch": 0.4636508520289322, + "grad_norm": 1.3466867909975, + "learning_rate": 1.1655504211076565e-05, + "loss": 0.7131, + "step": 15128 + }, + { + "epoch": 0.4636815005516734, + "grad_norm": 1.4597627243778895, + "learning_rate": 1.1654525264300056e-05, + "loss": 0.6625, + "step": 15129 + }, + { + "epoch": 0.4637121490744146, + "grad_norm": 1.4748144560334275, + "learning_rate": 1.1653546301221086e-05, + "loss": 0.7516, + "step": 15130 + }, + { + "epoch": 0.4637427975971558, + "grad_norm": 1.540233182276424, + "learning_rate": 1.16525673218493e-05, + "loss": 0.7157, + "step": 15131 + }, + { + "epoch": 0.463773446119897, + "grad_norm": 1.4501210827838216, + "learning_rate": 1.1651588326194346e-05, + "loss": 0.7087, + "step": 15132 + }, + { + "epoch": 0.4638040946426382, + "grad_norm": 1.497946617959136, + "learning_rate": 1.165060931426587e-05, + "loss": 0.7597, + "step": 15133 + }, + { + "epoch": 0.4638347431653794, + "grad_norm": 0.6871170180657182, + "learning_rate": 1.1649630286073516e-05, + "loss": 0.5866, + "step": 15134 + }, + { + "epoch": 0.46386539168812063, + "grad_norm": 1.4620060110380315, + "learning_rate": 1.1648651241626936e-05, + "loss": 0.6744, + "step": 15135 + }, + { + "epoch": 0.46389604021086184, + "grad_norm": 1.5603039088842565, + "learning_rate": 1.1647672180935774e-05, + "loss": 0.8398, + "step": 15136 + }, + { + "epoch": 0.46392668873360304, + "grad_norm": 1.1790518379079995, + "learning_rate": 1.1646693104009675e-05, + "loss": 0.691, + "step": 15137 + }, + { + "epoch": 0.46395733725634425, + "grad_norm": 1.4173744695785604, + "learning_rate": 1.1645714010858284e-05, + "loss": 0.8318, + "step": 15138 + }, + { + "epoch": 0.46398798577908545, + "grad_norm": 0.7064518352082131, + "learning_rate": 1.1644734901491257e-05, + "loss": 0.6107, + "step": 15139 + }, + { + "epoch": 0.46401863430182666, + "grad_norm": 1.417481019394878, + "learning_rate": 1.1643755775918235e-05, + "loss": 0.6888, + "step": 15140 + }, + { + "epoch": 0.46404928282456787, + "grad_norm": 0.6445115630128723, + "learning_rate": 1.1642776634148867e-05, + "loss": 0.5543, + "step": 15141 + }, + { + "epoch": 0.46407993134730907, + "grad_norm": 1.7105019233117469, + "learning_rate": 1.1641797476192798e-05, + "loss": 0.7734, + "step": 15142 + }, + { + "epoch": 0.4641105798700503, + "grad_norm": 1.3857269809412178, + "learning_rate": 1.164081830205968e-05, + "loss": 0.7205, + "step": 15143 + }, + { + "epoch": 0.4641412283927915, + "grad_norm": 1.4776608446623938, + "learning_rate": 1.1639839111759158e-05, + "loss": 0.6365, + "step": 15144 + }, + { + "epoch": 0.4641718769155327, + "grad_norm": 1.3861927420641136, + "learning_rate": 1.1638859905300885e-05, + "loss": 0.7524, + "step": 15145 + }, + { + "epoch": 0.4642025254382739, + "grad_norm": 1.5047371077105511, + "learning_rate": 1.1637880682694503e-05, + "loss": 0.6551, + "step": 15146 + }, + { + "epoch": 0.4642331739610151, + "grad_norm": 1.555475889373813, + "learning_rate": 1.1636901443949664e-05, + "loss": 0.7554, + "step": 15147 + }, + { + "epoch": 0.4642638224837563, + "grad_norm": 1.4224868241589368, + "learning_rate": 1.1635922189076016e-05, + "loss": 0.7124, + "step": 15148 + }, + { + "epoch": 0.4642944710064975, + "grad_norm": 1.6853683679651994, + "learning_rate": 1.163494291808321e-05, + "loss": 0.8871, + "step": 15149 + }, + { + "epoch": 0.4643251195292387, + "grad_norm": 1.5879081438534375, + "learning_rate": 1.1633963630980892e-05, + "loss": 0.7342, + "step": 15150 + }, + { + "epoch": 0.46435576805197987, + "grad_norm": 1.5448956431331997, + "learning_rate": 1.163298432777871e-05, + "loss": 0.8018, + "step": 15151 + }, + { + "epoch": 0.4643864165747211, + "grad_norm": 1.579714702503979, + "learning_rate": 1.163200500848632e-05, + "loss": 0.779, + "step": 15152 + }, + { + "epoch": 0.4644170650974623, + "grad_norm": 0.7334698465829086, + "learning_rate": 1.1631025673113366e-05, + "loss": 0.5949, + "step": 15153 + }, + { + "epoch": 0.4644477136202035, + "grad_norm": 1.4980213478614883, + "learning_rate": 1.1630046321669498e-05, + "loss": 0.7046, + "step": 15154 + }, + { + "epoch": 0.4644783621429447, + "grad_norm": 1.4150236637521543, + "learning_rate": 1.1629066954164364e-05, + "loss": 0.7426, + "step": 15155 + }, + { + "epoch": 0.4645090106656859, + "grad_norm": 0.6790422101125164, + "learning_rate": 1.162808757060762e-05, + "loss": 0.6163, + "step": 15156 + }, + { + "epoch": 0.4645396591884271, + "grad_norm": 1.5114413563656077, + "learning_rate": 1.162710817100891e-05, + "loss": 0.6701, + "step": 15157 + }, + { + "epoch": 0.4645703077111683, + "grad_norm": 1.656943734400071, + "learning_rate": 1.1626128755377887e-05, + "loss": 0.685, + "step": 15158 + }, + { + "epoch": 0.4646009562339095, + "grad_norm": 1.3974569127427572, + "learning_rate": 1.1625149323724202e-05, + "loss": 0.6786, + "step": 15159 + }, + { + "epoch": 0.4646316047566507, + "grad_norm": 1.5447397281915762, + "learning_rate": 1.1624169876057507e-05, + "loss": 0.7762, + "step": 15160 + }, + { + "epoch": 0.4646622532793919, + "grad_norm": 1.5281665533397977, + "learning_rate": 1.162319041238745e-05, + "loss": 0.6951, + "step": 15161 + }, + { + "epoch": 0.46469290180213313, + "grad_norm": 1.4488555219783656, + "learning_rate": 1.162221093272368e-05, + "loss": 0.7218, + "step": 15162 + }, + { + "epoch": 0.46472355032487433, + "grad_norm": 1.511010878969732, + "learning_rate": 1.1621231437075853e-05, + "loss": 0.7102, + "step": 15163 + }, + { + "epoch": 0.46475419884761554, + "grad_norm": 0.7041047616911024, + "learning_rate": 1.1620251925453616e-05, + "loss": 0.5884, + "step": 15164 + }, + { + "epoch": 0.46478484737035675, + "grad_norm": 1.4218808738774795, + "learning_rate": 1.1619272397866626e-05, + "loss": 0.6609, + "step": 15165 + }, + { + "epoch": 0.46481549589309795, + "grad_norm": 1.5746848887761709, + "learning_rate": 1.1618292854324524e-05, + "loss": 0.7751, + "step": 15166 + }, + { + "epoch": 0.46484614441583916, + "grad_norm": 1.2957019465320545, + "learning_rate": 1.1617313294836977e-05, + "loss": 0.6454, + "step": 15167 + }, + { + "epoch": 0.46487679293858036, + "grad_norm": 1.5340618019115466, + "learning_rate": 1.1616333719413622e-05, + "loss": 0.6537, + "step": 15168 + }, + { + "epoch": 0.46490744146132157, + "grad_norm": 1.3584961432296314, + "learning_rate": 1.161535412806412e-05, + "loss": 0.6993, + "step": 15169 + }, + { + "epoch": 0.4649380899840628, + "grad_norm": 0.6838213632682726, + "learning_rate": 1.1614374520798117e-05, + "loss": 0.5874, + "step": 15170 + }, + { + "epoch": 0.464968738506804, + "grad_norm": 1.3681899991821174, + "learning_rate": 1.1613394897625275e-05, + "loss": 0.7765, + "step": 15171 + }, + { + "epoch": 0.4649993870295452, + "grad_norm": 0.6660220703773719, + "learning_rate": 1.1612415258555234e-05, + "loss": 0.5823, + "step": 15172 + }, + { + "epoch": 0.4650300355522864, + "grad_norm": 1.5999014746163545, + "learning_rate": 1.1611435603597656e-05, + "loss": 0.6715, + "step": 15173 + }, + { + "epoch": 0.4650606840750276, + "grad_norm": 1.579886244384814, + "learning_rate": 1.161045593276219e-05, + "loss": 0.7778, + "step": 15174 + }, + { + "epoch": 0.4650913325977688, + "grad_norm": 1.6460866645884533, + "learning_rate": 1.1609476246058491e-05, + "loss": 0.6793, + "step": 15175 + }, + { + "epoch": 0.46512198112051, + "grad_norm": 1.4387513373648526, + "learning_rate": 1.1608496543496209e-05, + "loss": 0.7649, + "step": 15176 + }, + { + "epoch": 0.4651526296432512, + "grad_norm": 1.5876755466708066, + "learning_rate": 1.1607516825085e-05, + "loss": 0.7076, + "step": 15177 + }, + { + "epoch": 0.4651832781659924, + "grad_norm": 1.5866650568872285, + "learning_rate": 1.1606537090834515e-05, + "loss": 0.7896, + "step": 15178 + }, + { + "epoch": 0.4652139266887336, + "grad_norm": 0.6606341741629829, + "learning_rate": 1.160555734075441e-05, + "loss": 0.5895, + "step": 15179 + }, + { + "epoch": 0.46524457521147483, + "grad_norm": 0.6529270675638501, + "learning_rate": 1.1604577574854339e-05, + "loss": 0.597, + "step": 15180 + }, + { + "epoch": 0.46527522373421604, + "grad_norm": 1.4513959215872412, + "learning_rate": 1.160359779314395e-05, + "loss": 0.8087, + "step": 15181 + }, + { + "epoch": 0.4653058722569572, + "grad_norm": 1.5347961055873611, + "learning_rate": 1.1602617995632907e-05, + "loss": 0.7047, + "step": 15182 + }, + { + "epoch": 0.4653365207796984, + "grad_norm": 1.4376951769321096, + "learning_rate": 1.1601638182330857e-05, + "loss": 0.7174, + "step": 15183 + }, + { + "epoch": 0.4653671693024396, + "grad_norm": 1.6017574599295432, + "learning_rate": 1.1600658353247456e-05, + "loss": 0.8116, + "step": 15184 + }, + { + "epoch": 0.4653978178251808, + "grad_norm": 1.4240039684528194, + "learning_rate": 1.1599678508392358e-05, + "loss": 0.7483, + "step": 15185 + }, + { + "epoch": 0.465428466347922, + "grad_norm": 1.4317616867634313, + "learning_rate": 1.159869864777522e-05, + "loss": 0.7896, + "step": 15186 + }, + { + "epoch": 0.4654591148706632, + "grad_norm": 1.5454734722626555, + "learning_rate": 1.1597718771405695e-05, + "loss": 0.6899, + "step": 15187 + }, + { + "epoch": 0.4654897633934044, + "grad_norm": 1.2837961389610228, + "learning_rate": 1.1596738879293436e-05, + "loss": 0.6521, + "step": 15188 + }, + { + "epoch": 0.4655204119161456, + "grad_norm": 1.5396043111249442, + "learning_rate": 1.1595758971448101e-05, + "loss": 0.8089, + "step": 15189 + }, + { + "epoch": 0.46555106043888683, + "grad_norm": 1.5027955676681644, + "learning_rate": 1.1594779047879348e-05, + "loss": 0.8059, + "step": 15190 + }, + { + "epoch": 0.46558170896162804, + "grad_norm": 1.4714949115163662, + "learning_rate": 1.1593799108596827e-05, + "loss": 0.7195, + "step": 15191 + }, + { + "epoch": 0.46561235748436924, + "grad_norm": 1.4088741652597434, + "learning_rate": 1.1592819153610191e-05, + "loss": 0.7139, + "step": 15192 + }, + { + "epoch": 0.46564300600711045, + "grad_norm": 1.569906701397615, + "learning_rate": 1.1591839182929106e-05, + "loss": 0.813, + "step": 15193 + }, + { + "epoch": 0.46567365452985165, + "grad_norm": 1.4892927811904473, + "learning_rate": 1.159085919656322e-05, + "loss": 0.6411, + "step": 15194 + }, + { + "epoch": 0.46570430305259286, + "grad_norm": 1.4839431424098741, + "learning_rate": 1.1589879194522194e-05, + "loss": 0.7343, + "step": 15195 + }, + { + "epoch": 0.46573495157533407, + "grad_norm": 1.5866304404938842, + "learning_rate": 1.1588899176815675e-05, + "loss": 0.7014, + "step": 15196 + }, + { + "epoch": 0.46576560009807527, + "grad_norm": 1.3949339897246764, + "learning_rate": 1.1587919143453332e-05, + "loss": 0.7556, + "step": 15197 + }, + { + "epoch": 0.4657962486208165, + "grad_norm": 1.368428585368242, + "learning_rate": 1.1586939094444813e-05, + "loss": 0.6456, + "step": 15198 + }, + { + "epoch": 0.4658268971435577, + "grad_norm": 1.4503634395609037, + "learning_rate": 1.158595902979978e-05, + "loss": 0.7398, + "step": 15199 + }, + { + "epoch": 0.4658575456662989, + "grad_norm": 1.435812323625321, + "learning_rate": 1.1584978949527883e-05, + "loss": 0.8146, + "step": 15200 + }, + { + "epoch": 0.4658881941890401, + "grad_norm": 1.3957430411251905, + "learning_rate": 1.1583998853638785e-05, + "loss": 0.6655, + "step": 15201 + }, + { + "epoch": 0.4659188427117813, + "grad_norm": 1.429090338326244, + "learning_rate": 1.158301874214214e-05, + "loss": 0.6832, + "step": 15202 + }, + { + "epoch": 0.4659494912345225, + "grad_norm": 1.4626231978654722, + "learning_rate": 1.1582038615047607e-05, + "loss": 0.6845, + "step": 15203 + }, + { + "epoch": 0.4659801397572637, + "grad_norm": 1.4659570048099675, + "learning_rate": 1.1581058472364842e-05, + "loss": 0.797, + "step": 15204 + }, + { + "epoch": 0.4660107882800049, + "grad_norm": 1.334872575923329, + "learning_rate": 1.1580078314103501e-05, + "loss": 0.6682, + "step": 15205 + }, + { + "epoch": 0.4660414368027461, + "grad_norm": 1.3929836484020157, + "learning_rate": 1.157909814027325e-05, + "loss": 0.6328, + "step": 15206 + }, + { + "epoch": 0.46607208532548733, + "grad_norm": 1.3563630325419833, + "learning_rate": 1.1578117950883737e-05, + "loss": 0.7919, + "step": 15207 + }, + { + "epoch": 0.46610273384822853, + "grad_norm": 1.5126278549298218, + "learning_rate": 1.1577137745944624e-05, + "loss": 0.7054, + "step": 15208 + }, + { + "epoch": 0.46613338237096974, + "grad_norm": 1.6234255628732124, + "learning_rate": 1.157615752546557e-05, + "loss": 0.6505, + "step": 15209 + }, + { + "epoch": 0.46616403089371095, + "grad_norm": 1.5831925143117147, + "learning_rate": 1.1575177289456235e-05, + "loss": 0.7567, + "step": 15210 + }, + { + "epoch": 0.46619467941645215, + "grad_norm": 0.730357828340098, + "learning_rate": 1.1574197037926271e-05, + "loss": 0.6043, + "step": 15211 + }, + { + "epoch": 0.46622532793919336, + "grad_norm": 1.475301904189449, + "learning_rate": 1.1573216770885343e-05, + "loss": 0.6455, + "step": 15212 + }, + { + "epoch": 0.4662559764619345, + "grad_norm": 1.4391838458538866, + "learning_rate": 1.1572236488343104e-05, + "loss": 0.7452, + "step": 15213 + }, + { + "epoch": 0.4662866249846757, + "grad_norm": 1.4697503664053948, + "learning_rate": 1.1571256190309223e-05, + "loss": 0.6179, + "step": 15214 + }, + { + "epoch": 0.4663172735074169, + "grad_norm": 1.2952670295169841, + "learning_rate": 1.1570275876793348e-05, + "loss": 0.6953, + "step": 15215 + }, + { + "epoch": 0.4663479220301581, + "grad_norm": 1.6156894004243614, + "learning_rate": 1.1569295547805148e-05, + "loss": 0.7274, + "step": 15216 + }, + { + "epoch": 0.46637857055289933, + "grad_norm": 1.447705738303238, + "learning_rate": 1.1568315203354272e-05, + "loss": 0.7867, + "step": 15217 + }, + { + "epoch": 0.46640921907564054, + "grad_norm": 1.4620724955276394, + "learning_rate": 1.1567334843450389e-05, + "loss": 0.7177, + "step": 15218 + }, + { + "epoch": 0.46643986759838174, + "grad_norm": 1.488523728739732, + "learning_rate": 1.1566354468103152e-05, + "loss": 0.7674, + "step": 15219 + }, + { + "epoch": 0.46647051612112295, + "grad_norm": 0.7080270661560433, + "learning_rate": 1.1565374077322222e-05, + "loss": 0.5921, + "step": 15220 + }, + { + "epoch": 0.46650116464386415, + "grad_norm": 1.4269917434419863, + "learning_rate": 1.1564393671117267e-05, + "loss": 0.7154, + "step": 15221 + }, + { + "epoch": 0.46653181316660536, + "grad_norm": 1.384560749972437, + "learning_rate": 1.1563413249497936e-05, + "loss": 0.6848, + "step": 15222 + }, + { + "epoch": 0.46656246168934656, + "grad_norm": 1.5691616090304497, + "learning_rate": 1.1562432812473897e-05, + "loss": 0.7735, + "step": 15223 + }, + { + "epoch": 0.46659311021208777, + "grad_norm": 1.446874307430498, + "learning_rate": 1.1561452360054803e-05, + "loss": 0.7543, + "step": 15224 + }, + { + "epoch": 0.466623758734829, + "grad_norm": 0.6812726054786187, + "learning_rate": 1.1560471892250327e-05, + "loss": 0.6066, + "step": 15225 + }, + { + "epoch": 0.4666544072575702, + "grad_norm": 1.475481789531489, + "learning_rate": 1.1559491409070114e-05, + "loss": 0.7617, + "step": 15226 + }, + { + "epoch": 0.4666850557803114, + "grad_norm": 1.3405936211177583, + "learning_rate": 1.1558510910523837e-05, + "loss": 0.7278, + "step": 15227 + }, + { + "epoch": 0.4667157043030526, + "grad_norm": 1.5350413233345424, + "learning_rate": 1.1557530396621153e-05, + "loss": 0.78, + "step": 15228 + }, + { + "epoch": 0.4667463528257938, + "grad_norm": 1.5372033353731964, + "learning_rate": 1.1556549867371725e-05, + "loss": 0.657, + "step": 15229 + }, + { + "epoch": 0.466777001348535, + "grad_norm": 1.4812612325492385, + "learning_rate": 1.1555569322785212e-05, + "loss": 0.7508, + "step": 15230 + }, + { + "epoch": 0.4668076498712762, + "grad_norm": 1.4046079115559864, + "learning_rate": 1.1554588762871272e-05, + "loss": 0.7038, + "step": 15231 + }, + { + "epoch": 0.4668382983940174, + "grad_norm": 1.674679173586877, + "learning_rate": 1.1553608187639578e-05, + "loss": 0.7378, + "step": 15232 + }, + { + "epoch": 0.4668689469167586, + "grad_norm": 1.428985287006202, + "learning_rate": 1.1552627597099782e-05, + "loss": 0.8555, + "step": 15233 + }, + { + "epoch": 0.4668995954394998, + "grad_norm": 1.5082554632909928, + "learning_rate": 1.1551646991261549e-05, + "loss": 0.6961, + "step": 15234 + }, + { + "epoch": 0.46693024396224103, + "grad_norm": 1.4299820679046715, + "learning_rate": 1.155066637013454e-05, + "loss": 0.8106, + "step": 15235 + }, + { + "epoch": 0.46696089248498224, + "grad_norm": 1.4713444782099419, + "learning_rate": 1.1549685733728419e-05, + "loss": 0.7843, + "step": 15236 + }, + { + "epoch": 0.46699154100772344, + "grad_norm": 0.6677440724089048, + "learning_rate": 1.1548705082052851e-05, + "loss": 0.5928, + "step": 15237 + }, + { + "epoch": 0.46702218953046465, + "grad_norm": 1.4674129584676738, + "learning_rate": 1.1547724415117493e-05, + "loss": 0.7537, + "step": 15238 + }, + { + "epoch": 0.46705283805320585, + "grad_norm": 1.437989151384795, + "learning_rate": 1.1546743732932009e-05, + "loss": 0.7377, + "step": 15239 + }, + { + "epoch": 0.46708348657594706, + "grad_norm": 1.5724801039013478, + "learning_rate": 1.1545763035506065e-05, + "loss": 0.7859, + "step": 15240 + }, + { + "epoch": 0.46711413509868827, + "grad_norm": 0.657032051882459, + "learning_rate": 1.1544782322849324e-05, + "loss": 0.5929, + "step": 15241 + }, + { + "epoch": 0.46714478362142947, + "grad_norm": 1.6460851879021983, + "learning_rate": 1.1543801594971447e-05, + "loss": 0.7239, + "step": 15242 + }, + { + "epoch": 0.4671754321441707, + "grad_norm": 1.5006652980005724, + "learning_rate": 1.1542820851882094e-05, + "loss": 0.657, + "step": 15243 + }, + { + "epoch": 0.4672060806669118, + "grad_norm": 1.369568155593432, + "learning_rate": 1.1541840093590937e-05, + "loss": 0.5675, + "step": 15244 + }, + { + "epoch": 0.46723672918965303, + "grad_norm": 1.5942161447192595, + "learning_rate": 1.1540859320107633e-05, + "loss": 0.7183, + "step": 15245 + }, + { + "epoch": 0.46726737771239424, + "grad_norm": 1.381992478878508, + "learning_rate": 1.1539878531441847e-05, + "loss": 0.663, + "step": 15246 + }, + { + "epoch": 0.46729802623513544, + "grad_norm": 1.4820087662990056, + "learning_rate": 1.1538897727603244e-05, + "loss": 0.7398, + "step": 15247 + }, + { + "epoch": 0.46732867475787665, + "grad_norm": 1.4853237046723704, + "learning_rate": 1.1537916908601489e-05, + "loss": 0.7252, + "step": 15248 + }, + { + "epoch": 0.46735932328061786, + "grad_norm": 1.413812684861656, + "learning_rate": 1.1536936074446247e-05, + "loss": 0.7278, + "step": 15249 + }, + { + "epoch": 0.46738997180335906, + "grad_norm": 1.4162654176458431, + "learning_rate": 1.1535955225147178e-05, + "loss": 0.74, + "step": 15250 + }, + { + "epoch": 0.46742062032610027, + "grad_norm": 1.377699871488421, + "learning_rate": 1.1534974360713949e-05, + "loss": 0.7176, + "step": 15251 + }, + { + "epoch": 0.4674512688488415, + "grad_norm": 0.7151396396617852, + "learning_rate": 1.1533993481156226e-05, + "loss": 0.6024, + "step": 15252 + }, + { + "epoch": 0.4674819173715827, + "grad_norm": 1.5432331818896106, + "learning_rate": 1.1533012586483674e-05, + "loss": 0.7064, + "step": 15253 + }, + { + "epoch": 0.4675125658943239, + "grad_norm": 1.3604112166433524, + "learning_rate": 1.1532031676705952e-05, + "loss": 0.6789, + "step": 15254 + }, + { + "epoch": 0.4675432144170651, + "grad_norm": 1.519341651337795, + "learning_rate": 1.1531050751832736e-05, + "loss": 0.7307, + "step": 15255 + }, + { + "epoch": 0.4675738629398063, + "grad_norm": 1.460900646570048, + "learning_rate": 1.153006981187368e-05, + "loss": 0.7768, + "step": 15256 + }, + { + "epoch": 0.4676045114625475, + "grad_norm": 1.5605446808183931, + "learning_rate": 1.1529088856838458e-05, + "loss": 0.708, + "step": 15257 + }, + { + "epoch": 0.4676351599852887, + "grad_norm": 1.461057085923205, + "learning_rate": 1.152810788673673e-05, + "loss": 0.7257, + "step": 15258 + }, + { + "epoch": 0.4676658085080299, + "grad_norm": 1.4719873280568856, + "learning_rate": 1.1527126901578167e-05, + "loss": 0.7898, + "step": 15259 + }, + { + "epoch": 0.4676964570307711, + "grad_norm": 1.458084312403256, + "learning_rate": 1.152614590137243e-05, + "loss": 0.724, + "step": 15260 + }, + { + "epoch": 0.4677271055535123, + "grad_norm": 1.563117025865054, + "learning_rate": 1.1525164886129185e-05, + "loss": 0.7227, + "step": 15261 + }, + { + "epoch": 0.46775775407625353, + "grad_norm": 0.684891712423295, + "learning_rate": 1.1524183855858105e-05, + "loss": 0.587, + "step": 15262 + }, + { + "epoch": 0.46778840259899473, + "grad_norm": 0.7087874302702041, + "learning_rate": 1.1523202810568845e-05, + "loss": 0.6212, + "step": 15263 + }, + { + "epoch": 0.46781905112173594, + "grad_norm": 1.524375260997005, + "learning_rate": 1.1522221750271085e-05, + "loss": 0.6562, + "step": 15264 + }, + { + "epoch": 0.46784969964447715, + "grad_norm": 1.4378600256823184, + "learning_rate": 1.1521240674974479e-05, + "loss": 0.6969, + "step": 15265 + }, + { + "epoch": 0.46788034816721835, + "grad_norm": 1.5100020163954904, + "learning_rate": 1.1520259584688702e-05, + "loss": 0.7746, + "step": 15266 + }, + { + "epoch": 0.46791099668995956, + "grad_norm": 1.7668067282165418, + "learning_rate": 1.1519278479423418e-05, + "loss": 0.7459, + "step": 15267 + }, + { + "epoch": 0.46794164521270076, + "grad_norm": 1.3874682997516508, + "learning_rate": 1.1518297359188297e-05, + "loss": 0.6147, + "step": 15268 + }, + { + "epoch": 0.46797229373544197, + "grad_norm": 1.5431423514414009, + "learning_rate": 1.1517316223992999e-05, + "loss": 0.7017, + "step": 15269 + }, + { + "epoch": 0.4680029422581832, + "grad_norm": 1.534690273666851, + "learning_rate": 1.1516335073847198e-05, + "loss": 0.6351, + "step": 15270 + }, + { + "epoch": 0.4680335907809244, + "grad_norm": 0.7164505449851052, + "learning_rate": 1.1515353908760561e-05, + "loss": 0.6072, + "step": 15271 + }, + { + "epoch": 0.4680642393036656, + "grad_norm": 1.5011282634373617, + "learning_rate": 1.1514372728742751e-05, + "loss": 0.7289, + "step": 15272 + }, + { + "epoch": 0.4680948878264068, + "grad_norm": 1.7328084567482553, + "learning_rate": 1.1513391533803442e-05, + "loss": 0.7972, + "step": 15273 + }, + { + "epoch": 0.468125536349148, + "grad_norm": 1.3873158205517326, + "learning_rate": 1.1512410323952297e-05, + "loss": 0.7617, + "step": 15274 + }, + { + "epoch": 0.46815618487188915, + "grad_norm": 1.4256801369090366, + "learning_rate": 1.151142909919899e-05, + "loss": 0.7687, + "step": 15275 + }, + { + "epoch": 0.46818683339463035, + "grad_norm": 1.6048886396537556, + "learning_rate": 1.151044785955318e-05, + "loss": 0.7085, + "step": 15276 + }, + { + "epoch": 0.46821748191737156, + "grad_norm": 0.667515845061626, + "learning_rate": 1.1509466605024544e-05, + "loss": 0.6162, + "step": 15277 + }, + { + "epoch": 0.46824813044011276, + "grad_norm": 1.664679531601182, + "learning_rate": 1.1508485335622744e-05, + "loss": 0.7357, + "step": 15278 + }, + { + "epoch": 0.46827877896285397, + "grad_norm": 1.5024803340653576, + "learning_rate": 1.1507504051357456e-05, + "loss": 0.7356, + "step": 15279 + }, + { + "epoch": 0.4683094274855952, + "grad_norm": 1.5494174775975806, + "learning_rate": 1.1506522752238343e-05, + "loss": 0.8377, + "step": 15280 + }, + { + "epoch": 0.4683400760083364, + "grad_norm": 1.403028122851907, + "learning_rate": 1.1505541438275076e-05, + "loss": 0.7326, + "step": 15281 + }, + { + "epoch": 0.4683707245310776, + "grad_norm": 0.6623270044110793, + "learning_rate": 1.1504560109477323e-05, + "loss": 0.5833, + "step": 15282 + }, + { + "epoch": 0.4684013730538188, + "grad_norm": 0.655569413006665, + "learning_rate": 1.1503578765854757e-05, + "loss": 0.5582, + "step": 15283 + }, + { + "epoch": 0.46843202157656, + "grad_norm": 0.6678486858465812, + "learning_rate": 1.1502597407417045e-05, + "loss": 0.6127, + "step": 15284 + }, + { + "epoch": 0.4684626700993012, + "grad_norm": 1.36599030011501, + "learning_rate": 1.1501616034173855e-05, + "loss": 0.6667, + "step": 15285 + }, + { + "epoch": 0.4684933186220424, + "grad_norm": 1.5335961448055682, + "learning_rate": 1.1500634646134855e-05, + "loss": 0.7242, + "step": 15286 + }, + { + "epoch": 0.4685239671447836, + "grad_norm": 0.6573063089281606, + "learning_rate": 1.149965324330972e-05, + "loss": 0.552, + "step": 15287 + }, + { + "epoch": 0.4685546156675248, + "grad_norm": 1.403680636458845, + "learning_rate": 1.1498671825708118e-05, + "loss": 0.7139, + "step": 15288 + }, + { + "epoch": 0.468585264190266, + "grad_norm": 1.5011455007839145, + "learning_rate": 1.149769039333972e-05, + "loss": 0.7817, + "step": 15289 + }, + { + "epoch": 0.46861591271300723, + "grad_norm": 1.4520124630560056, + "learning_rate": 1.1496708946214195e-05, + "loss": 0.7227, + "step": 15290 + }, + { + "epoch": 0.46864656123574844, + "grad_norm": 1.465732360185528, + "learning_rate": 1.1495727484341215e-05, + "loss": 0.7081, + "step": 15291 + }, + { + "epoch": 0.46867720975848964, + "grad_norm": 1.357931938863274, + "learning_rate": 1.1494746007730449e-05, + "loss": 0.6356, + "step": 15292 + }, + { + "epoch": 0.46870785828123085, + "grad_norm": 1.43945510100953, + "learning_rate": 1.1493764516391564e-05, + "loss": 0.6846, + "step": 15293 + }, + { + "epoch": 0.46873850680397205, + "grad_norm": 1.3539974144006082, + "learning_rate": 1.1492783010334239e-05, + "loss": 0.6641, + "step": 15294 + }, + { + "epoch": 0.46876915532671326, + "grad_norm": 1.5953822206750907, + "learning_rate": 1.149180148956814e-05, + "loss": 0.7509, + "step": 15295 + }, + { + "epoch": 0.46879980384945447, + "grad_norm": 1.2671622038602566, + "learning_rate": 1.149081995410294e-05, + "loss": 0.61, + "step": 15296 + }, + { + "epoch": 0.46883045237219567, + "grad_norm": 1.531846340060236, + "learning_rate": 1.1489838403948309e-05, + "loss": 0.7337, + "step": 15297 + }, + { + "epoch": 0.4688611008949369, + "grad_norm": 1.6425278653037865, + "learning_rate": 1.1488856839113918e-05, + "loss": 0.8203, + "step": 15298 + }, + { + "epoch": 0.4688917494176781, + "grad_norm": 1.481082078460968, + "learning_rate": 1.1487875259609443e-05, + "loss": 0.7913, + "step": 15299 + }, + { + "epoch": 0.4689223979404193, + "grad_norm": 1.5547118949127436, + "learning_rate": 1.1486893665444548e-05, + "loss": 0.6826, + "step": 15300 + }, + { + "epoch": 0.4689530464631605, + "grad_norm": 1.508348870134533, + "learning_rate": 1.148591205662891e-05, + "loss": 0.6784, + "step": 15301 + }, + { + "epoch": 0.4689836949859017, + "grad_norm": 1.4722498377796984, + "learning_rate": 1.1484930433172203e-05, + "loss": 0.7163, + "step": 15302 + }, + { + "epoch": 0.4690143435086429, + "grad_norm": 1.3741018690686564, + "learning_rate": 1.1483948795084095e-05, + "loss": 0.7351, + "step": 15303 + }, + { + "epoch": 0.4690449920313841, + "grad_norm": 1.4513890753491974, + "learning_rate": 1.1482967142374258e-05, + "loss": 0.7658, + "step": 15304 + }, + { + "epoch": 0.4690756405541253, + "grad_norm": 1.2992559237191388, + "learning_rate": 1.1481985475052369e-05, + "loss": 0.6704, + "step": 15305 + }, + { + "epoch": 0.4691062890768665, + "grad_norm": 1.686474754090249, + "learning_rate": 1.1481003793128098e-05, + "loss": 0.7045, + "step": 15306 + }, + { + "epoch": 0.4691369375996077, + "grad_norm": 1.3843365623867478, + "learning_rate": 1.1480022096611116e-05, + "loss": 0.6998, + "step": 15307 + }, + { + "epoch": 0.4691675861223489, + "grad_norm": 1.4275782295761745, + "learning_rate": 1.1479040385511097e-05, + "loss": 0.6863, + "step": 15308 + }, + { + "epoch": 0.4691982346450901, + "grad_norm": 0.748110783138801, + "learning_rate": 1.1478058659837718e-05, + "loss": 0.6038, + "step": 15309 + }, + { + "epoch": 0.4692288831678313, + "grad_norm": 1.3879041497241706, + "learning_rate": 1.1477076919600647e-05, + "loss": 0.5709, + "step": 15310 + }, + { + "epoch": 0.4692595316905725, + "grad_norm": 1.3906049672008691, + "learning_rate": 1.147609516480956e-05, + "loss": 0.6921, + "step": 15311 + }, + { + "epoch": 0.4692901802133137, + "grad_norm": 0.6780516644898689, + "learning_rate": 1.1475113395474127e-05, + "loss": 0.5901, + "step": 15312 + }, + { + "epoch": 0.4693208287360549, + "grad_norm": 1.525424581375564, + "learning_rate": 1.1474131611604026e-05, + "loss": 0.7064, + "step": 15313 + }, + { + "epoch": 0.4693514772587961, + "grad_norm": 1.2908552181369186, + "learning_rate": 1.147314981320893e-05, + "loss": 0.809, + "step": 15314 + }, + { + "epoch": 0.4693821257815373, + "grad_norm": 1.7160655213294889, + "learning_rate": 1.1472168000298509e-05, + "loss": 0.7616, + "step": 15315 + }, + { + "epoch": 0.4694127743042785, + "grad_norm": 1.3772078957629645, + "learning_rate": 1.1471186172882443e-05, + "loss": 0.6324, + "step": 15316 + }, + { + "epoch": 0.46944342282701973, + "grad_norm": 1.6038919987027211, + "learning_rate": 1.1470204330970401e-05, + "loss": 0.674, + "step": 15317 + }, + { + "epoch": 0.46947407134976094, + "grad_norm": 0.6899843963286929, + "learning_rate": 1.1469222474572064e-05, + "loss": 0.5664, + "step": 15318 + }, + { + "epoch": 0.46950471987250214, + "grad_norm": 0.6938390679149461, + "learning_rate": 1.1468240603697096e-05, + "loss": 0.5695, + "step": 15319 + }, + { + "epoch": 0.46953536839524335, + "grad_norm": 1.3900017248666325, + "learning_rate": 1.1467258718355183e-05, + "loss": 0.6136, + "step": 15320 + }, + { + "epoch": 0.46956601691798455, + "grad_norm": 1.476212960595195, + "learning_rate": 1.1466276818555993e-05, + "loss": 0.8212, + "step": 15321 + }, + { + "epoch": 0.46959666544072576, + "grad_norm": 1.4319348640857295, + "learning_rate": 1.14652949043092e-05, + "loss": 0.7575, + "step": 15322 + }, + { + "epoch": 0.46962731396346696, + "grad_norm": 1.4840764969730957, + "learning_rate": 1.1464312975624482e-05, + "loss": 0.7158, + "step": 15323 + }, + { + "epoch": 0.46965796248620817, + "grad_norm": 1.5552367677697478, + "learning_rate": 1.1463331032511515e-05, + "loss": 0.7112, + "step": 15324 + }, + { + "epoch": 0.4696886110089494, + "grad_norm": 1.585381651781177, + "learning_rate": 1.1462349074979973e-05, + "loss": 0.7978, + "step": 15325 + }, + { + "epoch": 0.4697192595316906, + "grad_norm": 1.4715364186107627, + "learning_rate": 1.1461367103039528e-05, + "loss": 0.6436, + "step": 15326 + }, + { + "epoch": 0.4697499080544318, + "grad_norm": 1.41592994177346, + "learning_rate": 1.1460385116699863e-05, + "loss": 0.6465, + "step": 15327 + }, + { + "epoch": 0.469780556577173, + "grad_norm": 0.656065825379607, + "learning_rate": 1.145940311597065e-05, + "loss": 0.5849, + "step": 15328 + }, + { + "epoch": 0.4698112050999142, + "grad_norm": 1.3979201624160509, + "learning_rate": 1.1458421100861564e-05, + "loss": 0.644, + "step": 15329 + }, + { + "epoch": 0.4698418536226554, + "grad_norm": 1.3887904637680948, + "learning_rate": 1.1457439071382278e-05, + "loss": 0.7146, + "step": 15330 + }, + { + "epoch": 0.4698725021453966, + "grad_norm": 1.6889033373868636, + "learning_rate": 1.1456457027542476e-05, + "loss": 0.6888, + "step": 15331 + }, + { + "epoch": 0.4699031506681378, + "grad_norm": 1.650369196617172, + "learning_rate": 1.1455474969351828e-05, + "loss": 0.8138, + "step": 15332 + }, + { + "epoch": 0.469933799190879, + "grad_norm": 1.3942982453694552, + "learning_rate": 1.1454492896820016e-05, + "loss": 0.6912, + "step": 15333 + }, + { + "epoch": 0.4699644477136202, + "grad_norm": 1.4468798848095235, + "learning_rate": 1.145351080995671e-05, + "loss": 0.6734, + "step": 15334 + }, + { + "epoch": 0.46999509623636143, + "grad_norm": 1.580761118723029, + "learning_rate": 1.145252870877159e-05, + "loss": 0.7558, + "step": 15335 + }, + { + "epoch": 0.47002574475910264, + "grad_norm": 1.5060317084341968, + "learning_rate": 1.1451546593274334e-05, + "loss": 0.6476, + "step": 15336 + }, + { + "epoch": 0.47005639328184384, + "grad_norm": 1.4546388354753241, + "learning_rate": 1.1450564463474621e-05, + "loss": 0.6496, + "step": 15337 + }, + { + "epoch": 0.470087041804585, + "grad_norm": 1.3989227534729791, + "learning_rate": 1.1449582319382122e-05, + "loss": 0.6861, + "step": 15338 + }, + { + "epoch": 0.4701176903273262, + "grad_norm": 1.5854778780994756, + "learning_rate": 1.1448600161006517e-05, + "loss": 0.6971, + "step": 15339 + }, + { + "epoch": 0.4701483388500674, + "grad_norm": 1.5839992855374572, + "learning_rate": 1.1447617988357484e-05, + "loss": 0.7827, + "step": 15340 + }, + { + "epoch": 0.4701789873728086, + "grad_norm": 1.4843305774887905, + "learning_rate": 1.1446635801444703e-05, + "loss": 0.6748, + "step": 15341 + }, + { + "epoch": 0.4702096358955498, + "grad_norm": 1.4284529459818482, + "learning_rate": 1.1445653600277848e-05, + "loss": 0.733, + "step": 15342 + }, + { + "epoch": 0.470240284418291, + "grad_norm": 1.4353211177869614, + "learning_rate": 1.1444671384866597e-05, + "loss": 0.7004, + "step": 15343 + }, + { + "epoch": 0.4702709329410322, + "grad_norm": 1.5648771273451647, + "learning_rate": 1.144368915522063e-05, + "loss": 0.8091, + "step": 15344 + }, + { + "epoch": 0.47030158146377343, + "grad_norm": 1.5782356897016436, + "learning_rate": 1.1442706911349625e-05, + "loss": 0.7909, + "step": 15345 + }, + { + "epoch": 0.47033222998651464, + "grad_norm": 1.6500992627780333, + "learning_rate": 1.1441724653263259e-05, + "loss": 0.8614, + "step": 15346 + }, + { + "epoch": 0.47036287850925584, + "grad_norm": 0.7016747801972283, + "learning_rate": 1.144074238097121e-05, + "loss": 0.561, + "step": 15347 + }, + { + "epoch": 0.47039352703199705, + "grad_norm": 1.497810174117287, + "learning_rate": 1.1439760094483163e-05, + "loss": 0.6915, + "step": 15348 + }, + { + "epoch": 0.47042417555473826, + "grad_norm": 1.3086279139965047, + "learning_rate": 1.1438777793808787e-05, + "loss": 0.6767, + "step": 15349 + }, + { + "epoch": 0.47045482407747946, + "grad_norm": 1.4177085602524326, + "learning_rate": 1.1437795478957765e-05, + "loss": 0.6724, + "step": 15350 + }, + { + "epoch": 0.47048547260022067, + "grad_norm": 1.4006130871235067, + "learning_rate": 1.1436813149939776e-05, + "loss": 0.7757, + "step": 15351 + }, + { + "epoch": 0.4705161211229619, + "grad_norm": 1.4247646382652341, + "learning_rate": 1.1435830806764501e-05, + "loss": 0.7832, + "step": 15352 + }, + { + "epoch": 0.4705467696457031, + "grad_norm": 1.441933047756033, + "learning_rate": 1.1434848449441618e-05, + "loss": 0.8464, + "step": 15353 + }, + { + "epoch": 0.4705774181684443, + "grad_norm": 1.3388006845374243, + "learning_rate": 1.1433866077980804e-05, + "loss": 0.6449, + "step": 15354 + }, + { + "epoch": 0.4706080666911855, + "grad_norm": 1.556279596531543, + "learning_rate": 1.143288369239174e-05, + "loss": 0.7107, + "step": 15355 + }, + { + "epoch": 0.4706387152139267, + "grad_norm": 1.4178616976055873, + "learning_rate": 1.143190129268411e-05, + "loss": 0.7874, + "step": 15356 + }, + { + "epoch": 0.4706693637366679, + "grad_norm": 1.4285339500573964, + "learning_rate": 1.143091887886759e-05, + "loss": 0.7492, + "step": 15357 + }, + { + "epoch": 0.4707000122594091, + "grad_norm": 1.357149428497254, + "learning_rate": 1.1429936450951854e-05, + "loss": 0.7451, + "step": 15358 + }, + { + "epoch": 0.4707306607821503, + "grad_norm": 1.2629428203678523, + "learning_rate": 1.1428954008946595e-05, + "loss": 0.5933, + "step": 15359 + }, + { + "epoch": 0.4707613093048915, + "grad_norm": 1.4904230675298775, + "learning_rate": 1.1427971552861485e-05, + "loss": 0.6511, + "step": 15360 + }, + { + "epoch": 0.4707919578276327, + "grad_norm": 1.381294467524702, + "learning_rate": 1.1426989082706205e-05, + "loss": 0.7675, + "step": 15361 + }, + { + "epoch": 0.47082260635037393, + "grad_norm": 1.396757884574003, + "learning_rate": 1.1426006598490438e-05, + "loss": 0.784, + "step": 15362 + }, + { + "epoch": 0.47085325487311513, + "grad_norm": 1.3718024068126449, + "learning_rate": 1.1425024100223863e-05, + "loss": 0.6948, + "step": 15363 + }, + { + "epoch": 0.47088390339585634, + "grad_norm": 1.443701100843309, + "learning_rate": 1.142404158791616e-05, + "loss": 0.7444, + "step": 15364 + }, + { + "epoch": 0.47091455191859755, + "grad_norm": 0.6972397042854139, + "learning_rate": 1.142305906157701e-05, + "loss": 0.5784, + "step": 15365 + }, + { + "epoch": 0.47094520044133875, + "grad_norm": 1.332864003308867, + "learning_rate": 1.1422076521216094e-05, + "loss": 0.6304, + "step": 15366 + }, + { + "epoch": 0.47097584896407996, + "grad_norm": 1.2380032436508643, + "learning_rate": 1.1421093966843097e-05, + "loss": 0.6971, + "step": 15367 + }, + { + "epoch": 0.47100649748682116, + "grad_norm": 1.6141567323490729, + "learning_rate": 1.1420111398467696e-05, + "loss": 0.7091, + "step": 15368 + }, + { + "epoch": 0.4710371460095623, + "grad_norm": 1.5417696048976255, + "learning_rate": 1.1419128816099574e-05, + "loss": 0.7363, + "step": 15369 + }, + { + "epoch": 0.4710677945323035, + "grad_norm": 1.5288176609468866, + "learning_rate": 1.1418146219748415e-05, + "loss": 0.7927, + "step": 15370 + }, + { + "epoch": 0.4710984430550447, + "grad_norm": 1.61479143627389, + "learning_rate": 1.1417163609423894e-05, + "loss": 0.7647, + "step": 15371 + }, + { + "epoch": 0.47112909157778593, + "grad_norm": 1.4856988305248797, + "learning_rate": 1.1416180985135702e-05, + "loss": 0.794, + "step": 15372 + }, + { + "epoch": 0.47115974010052714, + "grad_norm": 0.6468037782443135, + "learning_rate": 1.1415198346893512e-05, + "loss": 0.5565, + "step": 15373 + }, + { + "epoch": 0.47119038862326834, + "grad_norm": 1.6013479054834077, + "learning_rate": 1.1414215694707015e-05, + "loss": 0.7019, + "step": 15374 + }, + { + "epoch": 0.47122103714600955, + "grad_norm": 1.5067342601829457, + "learning_rate": 1.1413233028585888e-05, + "loss": 0.6263, + "step": 15375 + }, + { + "epoch": 0.47125168566875075, + "grad_norm": 0.6675358688310162, + "learning_rate": 1.1412250348539813e-05, + "loss": 0.599, + "step": 15376 + }, + { + "epoch": 0.47128233419149196, + "grad_norm": 0.6775613736369648, + "learning_rate": 1.1411267654578473e-05, + "loss": 0.5628, + "step": 15377 + }, + { + "epoch": 0.47131298271423316, + "grad_norm": 1.515604784520146, + "learning_rate": 1.1410284946711553e-05, + "loss": 0.6826, + "step": 15378 + }, + { + "epoch": 0.47134363123697437, + "grad_norm": 1.5457942153183577, + "learning_rate": 1.1409302224948735e-05, + "loss": 0.7695, + "step": 15379 + }, + { + "epoch": 0.4713742797597156, + "grad_norm": 1.5179559578865707, + "learning_rate": 1.1408319489299701e-05, + "loss": 0.73, + "step": 15380 + }, + { + "epoch": 0.4714049282824568, + "grad_norm": 1.5693839678895842, + "learning_rate": 1.1407336739774136e-05, + "loss": 0.8104, + "step": 15381 + }, + { + "epoch": 0.471435576805198, + "grad_norm": 0.7328601476125741, + "learning_rate": 1.1406353976381722e-05, + "loss": 0.5835, + "step": 15382 + }, + { + "epoch": 0.4714662253279392, + "grad_norm": 1.621664572467515, + "learning_rate": 1.140537119913214e-05, + "loss": 0.6904, + "step": 15383 + }, + { + "epoch": 0.4714968738506804, + "grad_norm": 1.4226737015463018, + "learning_rate": 1.1404388408035077e-05, + "loss": 0.7286, + "step": 15384 + }, + { + "epoch": 0.4715275223734216, + "grad_norm": 1.4100877757277948, + "learning_rate": 1.1403405603100215e-05, + "loss": 0.7411, + "step": 15385 + }, + { + "epoch": 0.4715581708961628, + "grad_norm": 1.492226049739277, + "learning_rate": 1.1402422784337238e-05, + "loss": 0.7029, + "step": 15386 + }, + { + "epoch": 0.471588819418904, + "grad_norm": 1.4651231305889238, + "learning_rate": 1.1401439951755834e-05, + "loss": 0.6946, + "step": 15387 + }, + { + "epoch": 0.4716194679416452, + "grad_norm": 1.3825529278616042, + "learning_rate": 1.140045710536568e-05, + "loss": 0.7904, + "step": 15388 + }, + { + "epoch": 0.4716501164643864, + "grad_norm": 1.6646695644908927, + "learning_rate": 1.1399474245176467e-05, + "loss": 0.7503, + "step": 15389 + }, + { + "epoch": 0.47168076498712763, + "grad_norm": 0.6849001712796897, + "learning_rate": 1.1398491371197872e-05, + "loss": 0.5872, + "step": 15390 + }, + { + "epoch": 0.47171141350986884, + "grad_norm": 1.395656580424068, + "learning_rate": 1.139750848343959e-05, + "loss": 0.7822, + "step": 15391 + }, + { + "epoch": 0.47174206203261004, + "grad_norm": 1.3376861418059147, + "learning_rate": 1.1396525581911294e-05, + "loss": 0.7431, + "step": 15392 + }, + { + "epoch": 0.47177271055535125, + "grad_norm": 1.5415092835614226, + "learning_rate": 1.1395542666622676e-05, + "loss": 0.6718, + "step": 15393 + }, + { + "epoch": 0.47180335907809245, + "grad_norm": 1.5995363375906821, + "learning_rate": 1.1394559737583418e-05, + "loss": 0.7506, + "step": 15394 + }, + { + "epoch": 0.47183400760083366, + "grad_norm": 1.3521386364001051, + "learning_rate": 1.1393576794803207e-05, + "loss": 0.6452, + "step": 15395 + }, + { + "epoch": 0.47186465612357487, + "grad_norm": 1.8473798650381847, + "learning_rate": 1.1392593838291727e-05, + "loss": 0.7272, + "step": 15396 + }, + { + "epoch": 0.47189530464631607, + "grad_norm": 1.6005911692364976, + "learning_rate": 1.1391610868058662e-05, + "loss": 0.6821, + "step": 15397 + }, + { + "epoch": 0.4719259531690573, + "grad_norm": 1.5429105510852752, + "learning_rate": 1.1390627884113705e-05, + "loss": 0.7422, + "step": 15398 + }, + { + "epoch": 0.4719566016917985, + "grad_norm": 1.517454518195507, + "learning_rate": 1.1389644886466531e-05, + "loss": 0.731, + "step": 15399 + }, + { + "epoch": 0.47198725021453963, + "grad_norm": 1.3812648464742563, + "learning_rate": 1.138866187512683e-05, + "loss": 0.7128, + "step": 15400 + }, + { + "epoch": 0.47201789873728084, + "grad_norm": 1.421113765932972, + "learning_rate": 1.138767885010429e-05, + "loss": 0.7072, + "step": 15401 + }, + { + "epoch": 0.47204854726002204, + "grad_norm": 1.469089046747249, + "learning_rate": 1.1386695811408595e-05, + "loss": 0.7562, + "step": 15402 + }, + { + "epoch": 0.47207919578276325, + "grad_norm": 0.7105881946513052, + "learning_rate": 1.138571275904943e-05, + "loss": 0.6025, + "step": 15403 + }, + { + "epoch": 0.47210984430550446, + "grad_norm": 1.4836577059712837, + "learning_rate": 1.1384729693036483e-05, + "loss": 0.6066, + "step": 15404 + }, + { + "epoch": 0.47214049282824566, + "grad_norm": 1.5281888635165721, + "learning_rate": 1.1383746613379439e-05, + "loss": 0.7529, + "step": 15405 + }, + { + "epoch": 0.47217114135098687, + "grad_norm": 1.6664983319704567, + "learning_rate": 1.138276352008799e-05, + "loss": 0.6647, + "step": 15406 + }, + { + "epoch": 0.4722017898737281, + "grad_norm": 0.6770447055696389, + "learning_rate": 1.1381780413171813e-05, + "loss": 0.5829, + "step": 15407 + }, + { + "epoch": 0.4722324383964693, + "grad_norm": 1.383898272957737, + "learning_rate": 1.1380797292640605e-05, + "loss": 0.8098, + "step": 15408 + }, + { + "epoch": 0.4722630869192105, + "grad_norm": 1.6867049875897508, + "learning_rate": 1.1379814158504041e-05, + "loss": 0.7197, + "step": 15409 + }, + { + "epoch": 0.4722937354419517, + "grad_norm": 1.3264980032084466, + "learning_rate": 1.137883101077182e-05, + "loss": 0.7366, + "step": 15410 + }, + { + "epoch": 0.4723243839646929, + "grad_norm": 1.2046109018689244, + "learning_rate": 1.1377847849453625e-05, + "loss": 0.729, + "step": 15411 + }, + { + "epoch": 0.4723550324874341, + "grad_norm": 1.496682683934195, + "learning_rate": 1.137686467455914e-05, + "loss": 0.7053, + "step": 15412 + }, + { + "epoch": 0.4723856810101753, + "grad_norm": 1.4839378387686164, + "learning_rate": 1.1375881486098057e-05, + "loss": 0.5975, + "step": 15413 + }, + { + "epoch": 0.4724163295329165, + "grad_norm": 1.5621273062177454, + "learning_rate": 1.1374898284080061e-05, + "loss": 0.7847, + "step": 15414 + }, + { + "epoch": 0.4724469780556577, + "grad_norm": 1.4869213955526093, + "learning_rate": 1.137391506851484e-05, + "loss": 0.7657, + "step": 15415 + }, + { + "epoch": 0.4724776265783989, + "grad_norm": 0.706177882442147, + "learning_rate": 1.1372931839412082e-05, + "loss": 0.6067, + "step": 15416 + }, + { + "epoch": 0.47250827510114013, + "grad_norm": 1.7262032046505855, + "learning_rate": 1.1371948596781478e-05, + "loss": 0.7221, + "step": 15417 + }, + { + "epoch": 0.47253892362388134, + "grad_norm": 1.5145652360129325, + "learning_rate": 1.1370965340632712e-05, + "loss": 0.7563, + "step": 15418 + }, + { + "epoch": 0.47256957214662254, + "grad_norm": 1.5200630826011374, + "learning_rate": 1.1369982070975471e-05, + "loss": 0.777, + "step": 15419 + }, + { + "epoch": 0.47260022066936375, + "grad_norm": 1.4511791615866765, + "learning_rate": 1.1368998787819447e-05, + "loss": 0.6368, + "step": 15420 + }, + { + "epoch": 0.47263086919210495, + "grad_norm": 0.6767683498425211, + "learning_rate": 1.1368015491174331e-05, + "loss": 0.5896, + "step": 15421 + }, + { + "epoch": 0.47266151771484616, + "grad_norm": 1.426437067516083, + "learning_rate": 1.1367032181049807e-05, + "loss": 0.7062, + "step": 15422 + }, + { + "epoch": 0.47269216623758736, + "grad_norm": 1.516830958088618, + "learning_rate": 1.1366048857455563e-05, + "loss": 0.663, + "step": 15423 + }, + { + "epoch": 0.47272281476032857, + "grad_norm": 1.366694348547423, + "learning_rate": 1.1365065520401291e-05, + "loss": 0.6632, + "step": 15424 + }, + { + "epoch": 0.4727534632830698, + "grad_norm": 1.5537415641534944, + "learning_rate": 1.136408216989668e-05, + "loss": 0.8754, + "step": 15425 + }, + { + "epoch": 0.472784111805811, + "grad_norm": 1.7299063554062741, + "learning_rate": 1.1363098805951418e-05, + "loss": 0.8099, + "step": 15426 + }, + { + "epoch": 0.4728147603285522, + "grad_norm": 1.414104299047483, + "learning_rate": 1.1362115428575193e-05, + "loss": 0.6451, + "step": 15427 + }, + { + "epoch": 0.4728454088512934, + "grad_norm": 1.4867570977463629, + "learning_rate": 1.13611320377777e-05, + "loss": 0.7012, + "step": 15428 + }, + { + "epoch": 0.4728760573740346, + "grad_norm": 0.6897315927333414, + "learning_rate": 1.1360148633568625e-05, + "loss": 0.5683, + "step": 15429 + }, + { + "epoch": 0.4729067058967758, + "grad_norm": 1.3262922453946917, + "learning_rate": 1.1359165215957652e-05, + "loss": 0.6093, + "step": 15430 + }, + { + "epoch": 0.47293735441951695, + "grad_norm": 1.471201062019668, + "learning_rate": 1.1358181784954479e-05, + "loss": 0.783, + "step": 15431 + }, + { + "epoch": 0.47296800294225816, + "grad_norm": 0.6469309994252767, + "learning_rate": 1.1357198340568795e-05, + "loss": 0.5689, + "step": 15432 + }, + { + "epoch": 0.47299865146499936, + "grad_norm": 1.5040078007474558, + "learning_rate": 1.1356214882810289e-05, + "loss": 0.748, + "step": 15433 + }, + { + "epoch": 0.47302929998774057, + "grad_norm": 1.6559851855973302, + "learning_rate": 1.135523141168865e-05, + "loss": 0.7448, + "step": 15434 + }, + { + "epoch": 0.4730599485104818, + "grad_norm": 1.397467776435871, + "learning_rate": 1.1354247927213566e-05, + "loss": 0.6099, + "step": 15435 + }, + { + "epoch": 0.473090597033223, + "grad_norm": 1.2414757097057076, + "learning_rate": 1.1353264429394733e-05, + "loss": 0.6835, + "step": 15436 + }, + { + "epoch": 0.4731212455559642, + "grad_norm": 1.4962285317474737, + "learning_rate": 1.135228091824184e-05, + "loss": 0.7522, + "step": 15437 + }, + { + "epoch": 0.4731518940787054, + "grad_norm": 1.5140907576221165, + "learning_rate": 1.1351297393764576e-05, + "loss": 0.7605, + "step": 15438 + }, + { + "epoch": 0.4731825426014466, + "grad_norm": 1.4766608613220913, + "learning_rate": 1.1350313855972632e-05, + "loss": 0.7088, + "step": 15439 + }, + { + "epoch": 0.4732131911241878, + "grad_norm": 1.539118318466101, + "learning_rate": 1.1349330304875701e-05, + "loss": 0.6822, + "step": 15440 + }, + { + "epoch": 0.473243839646929, + "grad_norm": 0.738325336322908, + "learning_rate": 1.1348346740483475e-05, + "loss": 0.5741, + "step": 15441 + }, + { + "epoch": 0.4732744881696702, + "grad_norm": 1.4755457528500948, + "learning_rate": 1.1347363162805643e-05, + "loss": 0.7466, + "step": 15442 + }, + { + "epoch": 0.4733051366924114, + "grad_norm": 1.5477311315627034, + "learning_rate": 1.1346379571851895e-05, + "loss": 0.8068, + "step": 15443 + }, + { + "epoch": 0.4733357852151526, + "grad_norm": 1.4088476663740204, + "learning_rate": 1.1345395967631924e-05, + "loss": 0.707, + "step": 15444 + }, + { + "epoch": 0.47336643373789383, + "grad_norm": 0.6764928857176224, + "learning_rate": 1.134441235015543e-05, + "loss": 0.5718, + "step": 15445 + }, + { + "epoch": 0.47339708226063504, + "grad_norm": 1.5332713937339817, + "learning_rate": 1.1343428719432088e-05, + "loss": 0.7375, + "step": 15446 + }, + { + "epoch": 0.47342773078337624, + "grad_norm": 1.4848764506020378, + "learning_rate": 1.1342445075471604e-05, + "loss": 0.7814, + "step": 15447 + }, + { + "epoch": 0.47345837930611745, + "grad_norm": 1.4791762862109885, + "learning_rate": 1.1341461418283661e-05, + "loss": 0.7717, + "step": 15448 + }, + { + "epoch": 0.47348902782885866, + "grad_norm": 1.6093475539106135, + "learning_rate": 1.134047774787796e-05, + "loss": 0.7789, + "step": 15449 + }, + { + "epoch": 0.47351967635159986, + "grad_norm": 1.4702316457628173, + "learning_rate": 1.1339494064264187e-05, + "loss": 0.6569, + "step": 15450 + }, + { + "epoch": 0.47355032487434107, + "grad_norm": 1.590140495599507, + "learning_rate": 1.1338510367452038e-05, + "loss": 0.78, + "step": 15451 + }, + { + "epoch": 0.47358097339708227, + "grad_norm": 1.591583599210561, + "learning_rate": 1.13375266574512e-05, + "loss": 0.7927, + "step": 15452 + }, + { + "epoch": 0.4736116219198235, + "grad_norm": 0.7494553316819835, + "learning_rate": 1.1336542934271371e-05, + "loss": 0.6324, + "step": 15453 + }, + { + "epoch": 0.4736422704425647, + "grad_norm": 1.290604722331277, + "learning_rate": 1.1335559197922243e-05, + "loss": 0.6725, + "step": 15454 + }, + { + "epoch": 0.4736729189653059, + "grad_norm": 1.829733151530684, + "learning_rate": 1.1334575448413508e-05, + "loss": 0.7954, + "step": 15455 + }, + { + "epoch": 0.4737035674880471, + "grad_norm": 1.3848844932229123, + "learning_rate": 1.1333591685754863e-05, + "loss": 0.6648, + "step": 15456 + }, + { + "epoch": 0.4737342160107883, + "grad_norm": 1.512905674072228, + "learning_rate": 1.1332607909955996e-05, + "loss": 0.7231, + "step": 15457 + }, + { + "epoch": 0.4737648645335295, + "grad_norm": 1.3127299809425685, + "learning_rate": 1.1331624121026601e-05, + "loss": 0.701, + "step": 15458 + }, + { + "epoch": 0.4737955130562707, + "grad_norm": 1.4640470359765594, + "learning_rate": 1.1330640318976371e-05, + "loss": 0.7604, + "step": 15459 + }, + { + "epoch": 0.4738261615790119, + "grad_norm": 1.3657680679102173, + "learning_rate": 1.1329656503815008e-05, + "loss": 0.7402, + "step": 15460 + }, + { + "epoch": 0.4738568101017531, + "grad_norm": 1.5541593402635738, + "learning_rate": 1.1328672675552193e-05, + "loss": 0.7061, + "step": 15461 + }, + { + "epoch": 0.4738874586244943, + "grad_norm": 1.226392628516349, + "learning_rate": 1.1327688834197627e-05, + "loss": 0.676, + "step": 15462 + }, + { + "epoch": 0.4739181071472355, + "grad_norm": 0.6786070097692987, + "learning_rate": 1.1326704979761003e-05, + "loss": 0.6059, + "step": 15463 + }, + { + "epoch": 0.4739487556699767, + "grad_norm": 1.5185947216756113, + "learning_rate": 1.1325721112252018e-05, + "loss": 0.7348, + "step": 15464 + }, + { + "epoch": 0.4739794041927179, + "grad_norm": 1.316292880683451, + "learning_rate": 1.1324737231680363e-05, + "loss": 0.5613, + "step": 15465 + }, + { + "epoch": 0.4740100527154591, + "grad_norm": 0.6830513770102692, + "learning_rate": 1.1323753338055731e-05, + "loss": 0.584, + "step": 15466 + }, + { + "epoch": 0.4740407012382003, + "grad_norm": 1.435503121184611, + "learning_rate": 1.1322769431387822e-05, + "loss": 0.6264, + "step": 15467 + }, + { + "epoch": 0.4740713497609415, + "grad_norm": 1.5337513483767231, + "learning_rate": 1.1321785511686325e-05, + "loss": 0.7507, + "step": 15468 + }, + { + "epoch": 0.4741019982836827, + "grad_norm": 0.6673362254758946, + "learning_rate": 1.1320801578960939e-05, + "loss": 0.592, + "step": 15469 + }, + { + "epoch": 0.4741326468064239, + "grad_norm": 1.401102515614491, + "learning_rate": 1.1319817633221355e-05, + "loss": 0.7319, + "step": 15470 + }, + { + "epoch": 0.4741632953291651, + "grad_norm": 1.334229841246843, + "learning_rate": 1.1318833674477272e-05, + "loss": 0.7132, + "step": 15471 + }, + { + "epoch": 0.47419394385190633, + "grad_norm": 1.6243643354266688, + "learning_rate": 1.1317849702738382e-05, + "loss": 0.7266, + "step": 15472 + }, + { + "epoch": 0.47422459237464754, + "grad_norm": 1.4358636408998309, + "learning_rate": 1.1316865718014382e-05, + "loss": 0.7846, + "step": 15473 + }, + { + "epoch": 0.47425524089738874, + "grad_norm": 1.5793923953972333, + "learning_rate": 1.1315881720314968e-05, + "loss": 0.7424, + "step": 15474 + }, + { + "epoch": 0.47428588942012995, + "grad_norm": 1.4641856926784198, + "learning_rate": 1.1314897709649832e-05, + "loss": 0.6784, + "step": 15475 + }, + { + "epoch": 0.47431653794287115, + "grad_norm": 1.5884891058718484, + "learning_rate": 1.1313913686028676e-05, + "loss": 0.7394, + "step": 15476 + }, + { + "epoch": 0.47434718646561236, + "grad_norm": 1.5785193991219608, + "learning_rate": 1.131292964946119e-05, + "loss": 0.7827, + "step": 15477 + }, + { + "epoch": 0.47437783498835356, + "grad_norm": 1.3351233219137113, + "learning_rate": 1.1311945599957073e-05, + "loss": 0.6807, + "step": 15478 + }, + { + "epoch": 0.47440848351109477, + "grad_norm": 0.702453908613681, + "learning_rate": 1.1310961537526021e-05, + "loss": 0.6156, + "step": 15479 + }, + { + "epoch": 0.474439132033836, + "grad_norm": 0.662665447073647, + "learning_rate": 1.1309977462177728e-05, + "loss": 0.57, + "step": 15480 + }, + { + "epoch": 0.4744697805565772, + "grad_norm": 1.374641196650131, + "learning_rate": 1.1308993373921892e-05, + "loss": 0.773, + "step": 15481 + }, + { + "epoch": 0.4745004290793184, + "grad_norm": 0.673987220439863, + "learning_rate": 1.130800927276821e-05, + "loss": 0.5987, + "step": 15482 + }, + { + "epoch": 0.4745310776020596, + "grad_norm": 1.462087703834548, + "learning_rate": 1.1307025158726379e-05, + "loss": 0.7181, + "step": 15483 + }, + { + "epoch": 0.4745617261248008, + "grad_norm": 1.457995288577724, + "learning_rate": 1.1306041031806094e-05, + "loss": 0.7791, + "step": 15484 + }, + { + "epoch": 0.474592374647542, + "grad_norm": 0.6592858088201224, + "learning_rate": 1.1305056892017052e-05, + "loss": 0.5505, + "step": 15485 + }, + { + "epoch": 0.4746230231702832, + "grad_norm": 1.4468189179698223, + "learning_rate": 1.1304072739368952e-05, + "loss": 0.698, + "step": 15486 + }, + { + "epoch": 0.4746536716930244, + "grad_norm": 1.5630571674944138, + "learning_rate": 1.1303088573871489e-05, + "loss": 0.6921, + "step": 15487 + }, + { + "epoch": 0.4746843202157656, + "grad_norm": 1.7006372737167958, + "learning_rate": 1.130210439553436e-05, + "loss": 0.7563, + "step": 15488 + }, + { + "epoch": 0.4747149687385068, + "grad_norm": 1.5154668724488165, + "learning_rate": 1.1301120204367262e-05, + "loss": 0.6683, + "step": 15489 + }, + { + "epoch": 0.47474561726124803, + "grad_norm": 1.7677641566649307, + "learning_rate": 1.1300136000379895e-05, + "loss": 0.7785, + "step": 15490 + }, + { + "epoch": 0.47477626578398924, + "grad_norm": 1.3216802556812173, + "learning_rate": 1.1299151783581956e-05, + "loss": 0.7219, + "step": 15491 + }, + { + "epoch": 0.47480691430673044, + "grad_norm": 1.5011395102900758, + "learning_rate": 1.1298167553983142e-05, + "loss": 0.6793, + "step": 15492 + }, + { + "epoch": 0.4748375628294716, + "grad_norm": 0.6674444401892132, + "learning_rate": 1.1297183311593151e-05, + "loss": 0.5887, + "step": 15493 + }, + { + "epoch": 0.4748682113522128, + "grad_norm": 1.6784312665989791, + "learning_rate": 1.1296199056421679e-05, + "loss": 0.7104, + "step": 15494 + }, + { + "epoch": 0.474898859874954, + "grad_norm": 1.4970296634593319, + "learning_rate": 1.129521478847843e-05, + "loss": 0.7818, + "step": 15495 + }, + { + "epoch": 0.4749295083976952, + "grad_norm": 1.2886402614533792, + "learning_rate": 1.1294230507773094e-05, + "loss": 0.7628, + "step": 15496 + }, + { + "epoch": 0.4749601569204364, + "grad_norm": 1.395829230817951, + "learning_rate": 1.1293246214315376e-05, + "loss": 0.738, + "step": 15497 + }, + { + "epoch": 0.4749908054431776, + "grad_norm": 1.5049932861540063, + "learning_rate": 1.129226190811497e-05, + "loss": 0.782, + "step": 15498 + }, + { + "epoch": 0.4750214539659188, + "grad_norm": 1.4833509088844916, + "learning_rate": 1.1291277589181582e-05, + "loss": 0.7007, + "step": 15499 + }, + { + "epoch": 0.47505210248866003, + "grad_norm": 1.6224441444160502, + "learning_rate": 1.1290293257524901e-05, + "loss": 0.7094, + "step": 15500 + }, + { + "epoch": 0.47508275101140124, + "grad_norm": 0.6488118524789026, + "learning_rate": 1.128930891315463e-05, + "loss": 0.6199, + "step": 15501 + }, + { + "epoch": 0.47511339953414244, + "grad_norm": 1.4041401123705552, + "learning_rate": 1.1288324556080473e-05, + "loss": 0.7714, + "step": 15502 + }, + { + "epoch": 0.47514404805688365, + "grad_norm": 1.4252897510779567, + "learning_rate": 1.128734018631212e-05, + "loss": 0.8086, + "step": 15503 + }, + { + "epoch": 0.47517469657962486, + "grad_norm": 1.3920367314632192, + "learning_rate": 1.1286355803859274e-05, + "loss": 0.694, + "step": 15504 + }, + { + "epoch": 0.47520534510236606, + "grad_norm": 1.5255993731130835, + "learning_rate": 1.128537140873164e-05, + "loss": 0.7078, + "step": 15505 + }, + { + "epoch": 0.47523599362510727, + "grad_norm": 1.6038157343728505, + "learning_rate": 1.128438700093891e-05, + "loss": 0.7455, + "step": 15506 + }, + { + "epoch": 0.4752666421478485, + "grad_norm": 1.3578504283411212, + "learning_rate": 1.1283402580490783e-05, + "loss": 0.691, + "step": 15507 + }, + { + "epoch": 0.4752972906705897, + "grad_norm": 1.3624721033788998, + "learning_rate": 1.1282418147396967e-05, + "loss": 0.6643, + "step": 15508 + }, + { + "epoch": 0.4753279391933309, + "grad_norm": 1.6251161128601679, + "learning_rate": 1.1281433701667152e-05, + "loss": 0.8146, + "step": 15509 + }, + { + "epoch": 0.4753585877160721, + "grad_norm": 0.6850622413992017, + "learning_rate": 1.1280449243311051e-05, + "loss": 0.6031, + "step": 15510 + }, + { + "epoch": 0.4753892362388133, + "grad_norm": 0.6862776813153294, + "learning_rate": 1.1279464772338349e-05, + "loss": 0.6109, + "step": 15511 + }, + { + "epoch": 0.4754198847615545, + "grad_norm": 1.4969462317615494, + "learning_rate": 1.1278480288758755e-05, + "loss": 0.7177, + "step": 15512 + }, + { + "epoch": 0.4754505332842957, + "grad_norm": 1.403991963263495, + "learning_rate": 1.1277495792581968e-05, + "loss": 0.7476, + "step": 15513 + }, + { + "epoch": 0.4754811818070369, + "grad_norm": 1.4323838643776485, + "learning_rate": 1.1276511283817687e-05, + "loss": 0.6744, + "step": 15514 + }, + { + "epoch": 0.4755118303297781, + "grad_norm": 0.6521604861851075, + "learning_rate": 1.1275526762475615e-05, + "loss": 0.5731, + "step": 15515 + }, + { + "epoch": 0.4755424788525193, + "grad_norm": 1.3913789322695855, + "learning_rate": 1.1274542228565451e-05, + "loss": 0.6672, + "step": 15516 + }, + { + "epoch": 0.47557312737526053, + "grad_norm": 0.692487605330713, + "learning_rate": 1.1273557682096893e-05, + "loss": 0.6067, + "step": 15517 + }, + { + "epoch": 0.47560377589800173, + "grad_norm": 1.52343858318617, + "learning_rate": 1.1272573123079651e-05, + "loss": 0.6954, + "step": 15518 + }, + { + "epoch": 0.47563442442074294, + "grad_norm": 0.6560360605923498, + "learning_rate": 1.1271588551523418e-05, + "loss": 0.5777, + "step": 15519 + }, + { + "epoch": 0.47566507294348415, + "grad_norm": 1.6501794844175972, + "learning_rate": 1.1270603967437896e-05, + "loss": 0.6431, + "step": 15520 + }, + { + "epoch": 0.47569572146622535, + "grad_norm": 1.3132803101284616, + "learning_rate": 1.1269619370832791e-05, + "loss": 0.7142, + "step": 15521 + }, + { + "epoch": 0.47572636998896656, + "grad_norm": 1.4869752492110242, + "learning_rate": 1.12686347617178e-05, + "loss": 0.7228, + "step": 15522 + }, + { + "epoch": 0.47575701851170776, + "grad_norm": 1.424148921067821, + "learning_rate": 1.1267650140102628e-05, + "loss": 0.7382, + "step": 15523 + }, + { + "epoch": 0.4757876670344489, + "grad_norm": 1.533158854203945, + "learning_rate": 1.1266665505996972e-05, + "loss": 0.6449, + "step": 15524 + }, + { + "epoch": 0.4758183155571901, + "grad_norm": 1.2911412845693084, + "learning_rate": 1.1265680859410538e-05, + "loss": 0.7036, + "step": 15525 + }, + { + "epoch": 0.4758489640799313, + "grad_norm": 1.6172945670572954, + "learning_rate": 1.1264696200353026e-05, + "loss": 0.8287, + "step": 15526 + }, + { + "epoch": 0.47587961260267253, + "grad_norm": 0.7391959770718457, + "learning_rate": 1.126371152883414e-05, + "loss": 0.6158, + "step": 15527 + }, + { + "epoch": 0.47591026112541374, + "grad_norm": 1.4768335772465881, + "learning_rate": 1.1262726844863578e-05, + "loss": 0.6584, + "step": 15528 + }, + { + "epoch": 0.47594090964815494, + "grad_norm": 1.4813539302261007, + "learning_rate": 1.1261742148451051e-05, + "loss": 0.6893, + "step": 15529 + }, + { + "epoch": 0.47597155817089615, + "grad_norm": 1.4442021900820017, + "learning_rate": 1.1260757439606252e-05, + "loss": 0.7388, + "step": 15530 + }, + { + "epoch": 0.47600220669363735, + "grad_norm": 1.6150274990143347, + "learning_rate": 1.1259772718338887e-05, + "loss": 0.7383, + "step": 15531 + }, + { + "epoch": 0.47603285521637856, + "grad_norm": 1.3598098310542759, + "learning_rate": 1.125878798465866e-05, + "loss": 0.6614, + "step": 15532 + }, + { + "epoch": 0.47606350373911976, + "grad_norm": 1.3911288904701624, + "learning_rate": 1.1257803238575272e-05, + "loss": 0.6249, + "step": 15533 + }, + { + "epoch": 0.47609415226186097, + "grad_norm": 1.5598012797807121, + "learning_rate": 1.1256818480098428e-05, + "loss": 0.7418, + "step": 15534 + }, + { + "epoch": 0.4761248007846022, + "grad_norm": 1.4198307420004668, + "learning_rate": 1.1255833709237827e-05, + "loss": 0.7065, + "step": 15535 + }, + { + "epoch": 0.4761554493073434, + "grad_norm": 1.4691981118141058, + "learning_rate": 1.125484892600318e-05, + "loss": 0.7651, + "step": 15536 + }, + { + "epoch": 0.4761860978300846, + "grad_norm": 1.7779779497710668, + "learning_rate": 1.1253864130404182e-05, + "loss": 0.7119, + "step": 15537 + }, + { + "epoch": 0.4762167463528258, + "grad_norm": 1.503420959101321, + "learning_rate": 1.1252879322450543e-05, + "loss": 0.6532, + "step": 15538 + }, + { + "epoch": 0.476247394875567, + "grad_norm": 1.5950058563716991, + "learning_rate": 1.1251894502151958e-05, + "loss": 0.8116, + "step": 15539 + }, + { + "epoch": 0.4762780433983082, + "grad_norm": 1.452934682511801, + "learning_rate": 1.1250909669518139e-05, + "loss": 0.7334, + "step": 15540 + }, + { + "epoch": 0.4763086919210494, + "grad_norm": 1.4844889333801532, + "learning_rate": 1.124992482455879e-05, + "loss": 0.7286, + "step": 15541 + }, + { + "epoch": 0.4763393404437906, + "grad_norm": 1.5073182184715412, + "learning_rate": 1.124893996728361e-05, + "loss": 0.5749, + "step": 15542 + }, + { + "epoch": 0.4763699889665318, + "grad_norm": 1.3348696289037607, + "learning_rate": 1.1247955097702303e-05, + "loss": 0.6098, + "step": 15543 + }, + { + "epoch": 0.476400637489273, + "grad_norm": 1.4138331875414811, + "learning_rate": 1.1246970215824578e-05, + "loss": 0.676, + "step": 15544 + }, + { + "epoch": 0.47643128601201423, + "grad_norm": 1.5001715785211365, + "learning_rate": 1.1245985321660133e-05, + "loss": 0.7128, + "step": 15545 + }, + { + "epoch": 0.47646193453475544, + "grad_norm": 1.3963402259309328, + "learning_rate": 1.1245000415218676e-05, + "loss": 0.8148, + "step": 15546 + }, + { + "epoch": 0.47649258305749664, + "grad_norm": 0.6825600962130356, + "learning_rate": 1.1244015496509914e-05, + "loss": 0.5908, + "step": 15547 + }, + { + "epoch": 0.47652323158023785, + "grad_norm": 0.6934932949278357, + "learning_rate": 1.1243030565543549e-05, + "loss": 0.5922, + "step": 15548 + }, + { + "epoch": 0.47655388010297905, + "grad_norm": 1.5931913111134557, + "learning_rate": 1.1242045622329286e-05, + "loss": 0.7315, + "step": 15549 + }, + { + "epoch": 0.47658452862572026, + "grad_norm": 0.6598577815451083, + "learning_rate": 1.1241060666876826e-05, + "loss": 0.5744, + "step": 15550 + }, + { + "epoch": 0.47661517714846147, + "grad_norm": 1.840924172485478, + "learning_rate": 1.1240075699195883e-05, + "loss": 0.8042, + "step": 15551 + }, + { + "epoch": 0.47664582567120267, + "grad_norm": 1.4001040035751229, + "learning_rate": 1.123909071929615e-05, + "loss": 0.7108, + "step": 15552 + }, + { + "epoch": 0.4766764741939439, + "grad_norm": 0.6279783676282025, + "learning_rate": 1.1238105727187347e-05, + "loss": 0.5365, + "step": 15553 + }, + { + "epoch": 0.4767071227166851, + "grad_norm": 1.5090742315762644, + "learning_rate": 1.1237120722879167e-05, + "loss": 0.5967, + "step": 15554 + }, + { + "epoch": 0.47673777123942623, + "grad_norm": 1.6215516807251724, + "learning_rate": 1.1236135706381322e-05, + "loss": 0.7661, + "step": 15555 + }, + { + "epoch": 0.47676841976216744, + "grad_norm": 1.3976577982502087, + "learning_rate": 1.1235150677703514e-05, + "loss": 0.7157, + "step": 15556 + }, + { + "epoch": 0.47679906828490864, + "grad_norm": 1.507625315243662, + "learning_rate": 1.1234165636855453e-05, + "loss": 0.7389, + "step": 15557 + }, + { + "epoch": 0.47682971680764985, + "grad_norm": 1.4798431993961916, + "learning_rate": 1.1233180583846837e-05, + "loss": 0.755, + "step": 15558 + }, + { + "epoch": 0.47686036533039106, + "grad_norm": 1.3356524642579006, + "learning_rate": 1.1232195518687381e-05, + "loss": 0.6691, + "step": 15559 + }, + { + "epoch": 0.47689101385313226, + "grad_norm": 1.2806950240370532, + "learning_rate": 1.1231210441386786e-05, + "loss": 0.6202, + "step": 15560 + }, + { + "epoch": 0.47692166237587347, + "grad_norm": 1.4640064460449886, + "learning_rate": 1.1230225351954758e-05, + "loss": 0.7723, + "step": 15561 + }, + { + "epoch": 0.4769523108986147, + "grad_norm": 1.4420980380680442, + "learning_rate": 1.1229240250401008e-05, + "loss": 0.6713, + "step": 15562 + }, + { + "epoch": 0.4769829594213559, + "grad_norm": 1.3740506500690441, + "learning_rate": 1.1228255136735235e-05, + "loss": 0.7782, + "step": 15563 + }, + { + "epoch": 0.4770136079440971, + "grad_norm": 1.5432273229550646, + "learning_rate": 1.1227270010967157e-05, + "loss": 0.7036, + "step": 15564 + }, + { + "epoch": 0.4770442564668383, + "grad_norm": 1.3594337763351254, + "learning_rate": 1.1226284873106467e-05, + "loss": 0.7526, + "step": 15565 + }, + { + "epoch": 0.4770749049895795, + "grad_norm": 1.481603826522457, + "learning_rate": 1.1225299723162881e-05, + "loss": 0.7339, + "step": 15566 + }, + { + "epoch": 0.4771055535123207, + "grad_norm": 1.6062931793470747, + "learning_rate": 1.1224314561146104e-05, + "loss": 0.7813, + "step": 15567 + }, + { + "epoch": 0.4771362020350619, + "grad_norm": 1.2764150128300298, + "learning_rate": 1.1223329387065843e-05, + "loss": 0.6932, + "step": 15568 + }, + { + "epoch": 0.4771668505578031, + "grad_norm": 1.6743097393382889, + "learning_rate": 1.1222344200931804e-05, + "loss": 0.7732, + "step": 15569 + }, + { + "epoch": 0.4771974990805443, + "grad_norm": 1.4879054162618648, + "learning_rate": 1.1221359002753694e-05, + "loss": 0.6572, + "step": 15570 + }, + { + "epoch": 0.4772281476032855, + "grad_norm": 1.3903027017474148, + "learning_rate": 1.1220373792541217e-05, + "loss": 0.591, + "step": 15571 + }, + { + "epoch": 0.47725879612602673, + "grad_norm": 0.7363122310757483, + "learning_rate": 1.121938857030409e-05, + "loss": 0.5693, + "step": 15572 + }, + { + "epoch": 0.47728944464876794, + "grad_norm": 1.398787234936757, + "learning_rate": 1.1218403336052015e-05, + "loss": 0.6538, + "step": 15573 + }, + { + "epoch": 0.47732009317150914, + "grad_norm": 1.4164826942592972, + "learning_rate": 1.1217418089794701e-05, + "loss": 0.6753, + "step": 15574 + }, + { + "epoch": 0.47735074169425035, + "grad_norm": 1.580655787926514, + "learning_rate": 1.1216432831541852e-05, + "loss": 0.769, + "step": 15575 + }, + { + "epoch": 0.47738139021699155, + "grad_norm": 1.68077942084735, + "learning_rate": 1.121544756130318e-05, + "loss": 0.7429, + "step": 15576 + }, + { + "epoch": 0.47741203873973276, + "grad_norm": 1.7254439215537434, + "learning_rate": 1.1214462279088395e-05, + "loss": 0.7277, + "step": 15577 + }, + { + "epoch": 0.47744268726247396, + "grad_norm": 1.3949440186513524, + "learning_rate": 1.1213476984907198e-05, + "loss": 0.6885, + "step": 15578 + }, + { + "epoch": 0.47747333578521517, + "grad_norm": 1.483781283194262, + "learning_rate": 1.1212491678769305e-05, + "loss": 0.7744, + "step": 15579 + }, + { + "epoch": 0.4775039843079564, + "grad_norm": 1.2731647941134512, + "learning_rate": 1.121150636068442e-05, + "loss": 0.71, + "step": 15580 + }, + { + "epoch": 0.4775346328306976, + "grad_norm": 1.5617981243689165, + "learning_rate": 1.1210521030662255e-05, + "loss": 0.7042, + "step": 15581 + }, + { + "epoch": 0.4775652813534388, + "grad_norm": 0.6661641260350395, + "learning_rate": 1.1209535688712512e-05, + "loss": 0.5866, + "step": 15582 + }, + { + "epoch": 0.47759592987618, + "grad_norm": 1.5622543259554145, + "learning_rate": 1.120855033484491e-05, + "loss": 0.7033, + "step": 15583 + }, + { + "epoch": 0.4776265783989212, + "grad_norm": 1.565620811410088, + "learning_rate": 1.1207564969069149e-05, + "loss": 0.7245, + "step": 15584 + }, + { + "epoch": 0.4776572269216624, + "grad_norm": 1.6873377000682444, + "learning_rate": 1.120657959139494e-05, + "loss": 0.7191, + "step": 15585 + }, + { + "epoch": 0.47768787544440355, + "grad_norm": 1.3870501680827458, + "learning_rate": 1.1205594201831995e-05, + "loss": 0.6719, + "step": 15586 + }, + { + "epoch": 0.47771852396714476, + "grad_norm": 1.6967158508255773, + "learning_rate": 1.1204608800390024e-05, + "loss": 0.7714, + "step": 15587 + }, + { + "epoch": 0.47774917248988596, + "grad_norm": 1.5724971937801453, + "learning_rate": 1.1203623387078733e-05, + "loss": 0.7085, + "step": 15588 + }, + { + "epoch": 0.47777982101262717, + "grad_norm": 0.6884894073642305, + "learning_rate": 1.1202637961907831e-05, + "loss": 0.5927, + "step": 15589 + }, + { + "epoch": 0.4778104695353684, + "grad_norm": 1.5737543185399354, + "learning_rate": 1.1201652524887032e-05, + "loss": 0.6741, + "step": 15590 + }, + { + "epoch": 0.4778411180581096, + "grad_norm": 1.6603781902833992, + "learning_rate": 1.1200667076026041e-05, + "loss": 0.7003, + "step": 15591 + }, + { + "epoch": 0.4778717665808508, + "grad_norm": 1.603542915823476, + "learning_rate": 1.1199681615334573e-05, + "loss": 0.7536, + "step": 15592 + }, + { + "epoch": 0.477902415103592, + "grad_norm": 1.409178511527253, + "learning_rate": 1.1198696142822332e-05, + "loss": 0.6695, + "step": 15593 + }, + { + "epoch": 0.4779330636263332, + "grad_norm": 1.4247771498192885, + "learning_rate": 1.1197710658499033e-05, + "loss": 0.8148, + "step": 15594 + }, + { + "epoch": 0.4779637121490744, + "grad_norm": 1.4407089021730282, + "learning_rate": 1.1196725162374384e-05, + "loss": 0.6953, + "step": 15595 + }, + { + "epoch": 0.4779943606718156, + "grad_norm": 1.5820252562316262, + "learning_rate": 1.1195739654458096e-05, + "loss": 0.6815, + "step": 15596 + }, + { + "epoch": 0.4780250091945568, + "grad_norm": 1.5492038481555137, + "learning_rate": 1.1194754134759878e-05, + "loss": 0.6911, + "step": 15597 + }, + { + "epoch": 0.478055657717298, + "grad_norm": 1.4802614342014846, + "learning_rate": 1.1193768603289444e-05, + "loss": 0.6401, + "step": 15598 + }, + { + "epoch": 0.4780863062400392, + "grad_norm": 1.4050622539233029, + "learning_rate": 1.11927830600565e-05, + "loss": 0.6338, + "step": 15599 + }, + { + "epoch": 0.47811695476278043, + "grad_norm": 1.634972563106325, + "learning_rate": 1.1191797505070763e-05, + "loss": 0.7255, + "step": 15600 + }, + { + "epoch": 0.47814760328552164, + "grad_norm": 1.4973854192627354, + "learning_rate": 1.1190811938341935e-05, + "loss": 0.6995, + "step": 15601 + }, + { + "epoch": 0.47817825180826284, + "grad_norm": 1.762653510396281, + "learning_rate": 1.1189826359879736e-05, + "loss": 0.7299, + "step": 15602 + }, + { + "epoch": 0.47820890033100405, + "grad_norm": 1.3564044038630163, + "learning_rate": 1.1188840769693874e-05, + "loss": 0.6382, + "step": 15603 + }, + { + "epoch": 0.47823954885374526, + "grad_norm": 0.7382781396928879, + "learning_rate": 1.1187855167794054e-05, + "loss": 0.6236, + "step": 15604 + }, + { + "epoch": 0.47827019737648646, + "grad_norm": 1.3635018819093752, + "learning_rate": 1.1186869554190001e-05, + "loss": 0.5829, + "step": 15605 + }, + { + "epoch": 0.47830084589922767, + "grad_norm": 1.6950489374721895, + "learning_rate": 1.1185883928891415e-05, + "loss": 0.7016, + "step": 15606 + }, + { + "epoch": 0.4783314944219689, + "grad_norm": 0.6850220266831741, + "learning_rate": 1.1184898291908011e-05, + "loss": 0.5982, + "step": 15607 + }, + { + "epoch": 0.4783621429447101, + "grad_norm": 0.6692668350848842, + "learning_rate": 1.11839126432495e-05, + "loss": 0.5747, + "step": 15608 + }, + { + "epoch": 0.4783927914674513, + "grad_norm": 0.6833054991236711, + "learning_rate": 1.1182926982925598e-05, + "loss": 0.6087, + "step": 15609 + }, + { + "epoch": 0.4784234399901925, + "grad_norm": 1.4200502931284449, + "learning_rate": 1.1181941310946011e-05, + "loss": 0.6922, + "step": 15610 + }, + { + "epoch": 0.4784540885129337, + "grad_norm": 1.4323482636853666, + "learning_rate": 1.1180955627320455e-05, + "loss": 0.7239, + "step": 15611 + }, + { + "epoch": 0.4784847370356749, + "grad_norm": 1.4354869749987977, + "learning_rate": 1.1179969932058638e-05, + "loss": 0.732, + "step": 15612 + }, + { + "epoch": 0.4785153855584161, + "grad_norm": 0.6600281992938866, + "learning_rate": 1.1178984225170279e-05, + "loss": 0.5461, + "step": 15613 + }, + { + "epoch": 0.4785460340811573, + "grad_norm": 1.5352179001509116, + "learning_rate": 1.1177998506665087e-05, + "loss": 0.644, + "step": 15614 + }, + { + "epoch": 0.4785766826038985, + "grad_norm": 1.3410117258242318, + "learning_rate": 1.1177012776552772e-05, + "loss": 0.7489, + "step": 15615 + }, + { + "epoch": 0.4786073311266397, + "grad_norm": 1.5978579266017512, + "learning_rate": 1.1176027034843051e-05, + "loss": 0.7803, + "step": 15616 + }, + { + "epoch": 0.4786379796493809, + "grad_norm": 1.4370139933362116, + "learning_rate": 1.1175041281545631e-05, + "loss": 0.6692, + "step": 15617 + }, + { + "epoch": 0.4786686281721221, + "grad_norm": 1.3411908164068482, + "learning_rate": 1.1174055516670235e-05, + "loss": 0.6922, + "step": 15618 + }, + { + "epoch": 0.4786992766948633, + "grad_norm": 1.3445795367193376, + "learning_rate": 1.1173069740226563e-05, + "loss": 0.7569, + "step": 15619 + }, + { + "epoch": 0.4787299252176045, + "grad_norm": 1.4591519389181113, + "learning_rate": 1.117208395222434e-05, + "loss": 0.6696, + "step": 15620 + }, + { + "epoch": 0.4787605737403457, + "grad_norm": 1.5477558070552213, + "learning_rate": 1.117109815267327e-05, + "loss": 0.7134, + "step": 15621 + }, + { + "epoch": 0.4787912222630869, + "grad_norm": 1.6532812041791003, + "learning_rate": 1.1170112341583074e-05, + "loss": 0.6989, + "step": 15622 + }, + { + "epoch": 0.4788218707858281, + "grad_norm": 1.9862882295728543, + "learning_rate": 1.1169126518963459e-05, + "loss": 0.6777, + "step": 15623 + }, + { + "epoch": 0.4788525193085693, + "grad_norm": 1.313049283265135, + "learning_rate": 1.1168140684824142e-05, + "loss": 0.7013, + "step": 15624 + }, + { + "epoch": 0.4788831678313105, + "grad_norm": 1.5372887086034008, + "learning_rate": 1.1167154839174834e-05, + "loss": 0.6973, + "step": 15625 + }, + { + "epoch": 0.4789138163540517, + "grad_norm": 1.6626137138703267, + "learning_rate": 1.1166168982025256e-05, + "loss": 0.722, + "step": 15626 + }, + { + "epoch": 0.47894446487679293, + "grad_norm": 1.2106463440824522, + "learning_rate": 1.1165183113385112e-05, + "loss": 0.7523, + "step": 15627 + }, + { + "epoch": 0.47897511339953414, + "grad_norm": 1.70543686630794, + "learning_rate": 1.116419723326412e-05, + "loss": 0.7954, + "step": 15628 + }, + { + "epoch": 0.47900576192227534, + "grad_norm": 1.5597872366389143, + "learning_rate": 1.1163211341671995e-05, + "loss": 0.7689, + "step": 15629 + }, + { + "epoch": 0.47903641044501655, + "grad_norm": 1.326203335990933, + "learning_rate": 1.1162225438618454e-05, + "loss": 0.6823, + "step": 15630 + }, + { + "epoch": 0.47906705896775775, + "grad_norm": 1.3861390228403077, + "learning_rate": 1.1161239524113207e-05, + "loss": 0.6861, + "step": 15631 + }, + { + "epoch": 0.47909770749049896, + "grad_norm": 1.4527492146716365, + "learning_rate": 1.1160253598165969e-05, + "loss": 0.6005, + "step": 15632 + }, + { + "epoch": 0.47912835601324016, + "grad_norm": 1.6431996692908797, + "learning_rate": 1.115926766078646e-05, + "loss": 0.7407, + "step": 15633 + }, + { + "epoch": 0.47915900453598137, + "grad_norm": 1.4703435877933144, + "learning_rate": 1.1158281711984385e-05, + "loss": 0.6838, + "step": 15634 + }, + { + "epoch": 0.4791896530587226, + "grad_norm": 1.5715096617225097, + "learning_rate": 1.1157295751769466e-05, + "loss": 0.7457, + "step": 15635 + }, + { + "epoch": 0.4792203015814638, + "grad_norm": 0.6811363562175469, + "learning_rate": 1.1156309780151414e-05, + "loss": 0.6137, + "step": 15636 + }, + { + "epoch": 0.479250950104205, + "grad_norm": 1.284637503200356, + "learning_rate": 1.115532379713995e-05, + "loss": 0.7592, + "step": 15637 + }, + { + "epoch": 0.4792815986269462, + "grad_norm": 1.474235942705987, + "learning_rate": 1.115433780274478e-05, + "loss": 0.6396, + "step": 15638 + }, + { + "epoch": 0.4793122471496874, + "grad_norm": 0.664596663431826, + "learning_rate": 1.1153351796975626e-05, + "loss": 0.5725, + "step": 15639 + }, + { + "epoch": 0.4793428956724286, + "grad_norm": 1.4101865687469743, + "learning_rate": 1.11523657798422e-05, + "loss": 0.717, + "step": 15640 + }, + { + "epoch": 0.4793735441951698, + "grad_norm": 0.6673054247094409, + "learning_rate": 1.1151379751354224e-05, + "loss": 0.5982, + "step": 15641 + }, + { + "epoch": 0.479404192717911, + "grad_norm": 1.6244992432757117, + "learning_rate": 1.1150393711521406e-05, + "loss": 0.7296, + "step": 15642 + }, + { + "epoch": 0.4794348412406522, + "grad_norm": 1.4630431703812095, + "learning_rate": 1.1149407660353463e-05, + "loss": 0.6912, + "step": 15643 + }, + { + "epoch": 0.4794654897633934, + "grad_norm": 1.5573557624232472, + "learning_rate": 1.1148421597860112e-05, + "loss": 0.7951, + "step": 15644 + }, + { + "epoch": 0.47949613828613463, + "grad_norm": 1.4068308222157035, + "learning_rate": 1.1147435524051073e-05, + "loss": 0.7423, + "step": 15645 + }, + { + "epoch": 0.47952678680887584, + "grad_norm": 1.4449491264695753, + "learning_rate": 1.1146449438936056e-05, + "loss": 0.6327, + "step": 15646 + }, + { + "epoch": 0.47955743533161704, + "grad_norm": 0.6809468191439162, + "learning_rate": 1.1145463342524778e-05, + "loss": 0.5912, + "step": 15647 + }, + { + "epoch": 0.4795880838543582, + "grad_norm": 1.321559214835044, + "learning_rate": 1.1144477234826957e-05, + "loss": 0.6897, + "step": 15648 + }, + { + "epoch": 0.4796187323770994, + "grad_norm": 1.4060904996952737, + "learning_rate": 1.1143491115852311e-05, + "loss": 0.7044, + "step": 15649 + }, + { + "epoch": 0.4796493808998406, + "grad_norm": 1.4639487814264283, + "learning_rate": 1.1142504985610556e-05, + "loss": 0.7597, + "step": 15650 + }, + { + "epoch": 0.4796800294225818, + "grad_norm": 1.2993063186687133, + "learning_rate": 1.1141518844111401e-05, + "loss": 0.633, + "step": 15651 + }, + { + "epoch": 0.479710677945323, + "grad_norm": 1.4135003589875188, + "learning_rate": 1.1140532691364574e-05, + "loss": 0.6907, + "step": 15652 + }, + { + "epoch": 0.4797413264680642, + "grad_norm": 1.482304753483333, + "learning_rate": 1.1139546527379786e-05, + "loss": 0.6034, + "step": 15653 + }, + { + "epoch": 0.4797719749908054, + "grad_norm": 1.3255112168038423, + "learning_rate": 1.1138560352166753e-05, + "loss": 0.6132, + "step": 15654 + }, + { + "epoch": 0.47980262351354663, + "grad_norm": 1.273673332237862, + "learning_rate": 1.1137574165735192e-05, + "loss": 0.6276, + "step": 15655 + }, + { + "epoch": 0.47983327203628784, + "grad_norm": 0.6802589269959247, + "learning_rate": 1.1136587968094825e-05, + "loss": 0.5571, + "step": 15656 + }, + { + "epoch": 0.47986392055902904, + "grad_norm": 1.6785479401322163, + "learning_rate": 1.1135601759255363e-05, + "loss": 0.8427, + "step": 15657 + }, + { + "epoch": 0.47989456908177025, + "grad_norm": 1.4661324149694546, + "learning_rate": 1.1134615539226527e-05, + "loss": 0.7092, + "step": 15658 + }, + { + "epoch": 0.47992521760451146, + "grad_norm": 1.5106443911067144, + "learning_rate": 1.1133629308018035e-05, + "loss": 0.7911, + "step": 15659 + }, + { + "epoch": 0.47995586612725266, + "grad_norm": 1.6405023205981963, + "learning_rate": 1.1132643065639604e-05, + "loss": 0.8363, + "step": 15660 + }, + { + "epoch": 0.47998651464999387, + "grad_norm": 1.3662066964896746, + "learning_rate": 1.1131656812100951e-05, + "loss": 0.6426, + "step": 15661 + }, + { + "epoch": 0.4800171631727351, + "grad_norm": 1.3972354486222176, + "learning_rate": 1.1130670547411791e-05, + "loss": 0.7353, + "step": 15662 + }, + { + "epoch": 0.4800478116954763, + "grad_norm": 1.370152752782081, + "learning_rate": 1.1129684271581847e-05, + "loss": 0.607, + "step": 15663 + }, + { + "epoch": 0.4800784602182175, + "grad_norm": 1.561462377759944, + "learning_rate": 1.1128697984620835e-05, + "loss": 0.8272, + "step": 15664 + }, + { + "epoch": 0.4801091087409587, + "grad_norm": 1.5523850193190805, + "learning_rate": 1.1127711686538475e-05, + "loss": 0.7523, + "step": 15665 + }, + { + "epoch": 0.4801397572636999, + "grad_norm": 1.5563618016094505, + "learning_rate": 1.1126725377344475e-05, + "loss": 0.7407, + "step": 15666 + }, + { + "epoch": 0.4801704057864411, + "grad_norm": 1.4182223785758572, + "learning_rate": 1.112573905704857e-05, + "loss": 0.6948, + "step": 15667 + }, + { + "epoch": 0.4802010543091823, + "grad_norm": 1.237652994583824, + "learning_rate": 1.1124752725660469e-05, + "loss": 0.6106, + "step": 15668 + }, + { + "epoch": 0.4802317028319235, + "grad_norm": 1.487246836872521, + "learning_rate": 1.112376638318989e-05, + "loss": 0.6651, + "step": 15669 + }, + { + "epoch": 0.4802623513546647, + "grad_norm": 1.6772934737120304, + "learning_rate": 1.1122780029646551e-05, + "loss": 0.8394, + "step": 15670 + }, + { + "epoch": 0.4802929998774059, + "grad_norm": 1.4953208390156167, + "learning_rate": 1.1121793665040175e-05, + "loss": 0.6535, + "step": 15671 + }, + { + "epoch": 0.48032364840014713, + "grad_norm": 0.7248093899639423, + "learning_rate": 1.112080728938048e-05, + "loss": 0.613, + "step": 15672 + }, + { + "epoch": 0.48035429692288834, + "grad_norm": 1.4853758568074173, + "learning_rate": 1.111982090267718e-05, + "loss": 0.7238, + "step": 15673 + }, + { + "epoch": 0.48038494544562954, + "grad_norm": 0.6988379042894609, + "learning_rate": 1.1118834504940003e-05, + "loss": 0.5814, + "step": 15674 + }, + { + "epoch": 0.48041559396837075, + "grad_norm": 0.6524871816199036, + "learning_rate": 1.111784809617866e-05, + "loss": 0.5842, + "step": 15675 + }, + { + "epoch": 0.48044624249111195, + "grad_norm": 0.6751363868318222, + "learning_rate": 1.111686167640288e-05, + "loss": 0.585, + "step": 15676 + }, + { + "epoch": 0.48047689101385316, + "grad_norm": 0.6560169464754204, + "learning_rate": 1.111587524562237e-05, + "loss": 0.6097, + "step": 15677 + }, + { + "epoch": 0.48050753953659436, + "grad_norm": 1.6740945673179275, + "learning_rate": 1.1114888803846857e-05, + "loss": 0.6971, + "step": 15678 + }, + { + "epoch": 0.4805381880593355, + "grad_norm": 1.5100963432852301, + "learning_rate": 1.1113902351086059e-05, + "loss": 0.6897, + "step": 15679 + }, + { + "epoch": 0.4805688365820767, + "grad_norm": 1.384364436401669, + "learning_rate": 1.1112915887349697e-05, + "loss": 0.5998, + "step": 15680 + }, + { + "epoch": 0.4805994851048179, + "grad_norm": 1.7095595828966796, + "learning_rate": 1.1111929412647491e-05, + "loss": 0.7501, + "step": 15681 + }, + { + "epoch": 0.48063013362755913, + "grad_norm": 1.4735521154396525, + "learning_rate": 1.1110942926989158e-05, + "loss": 0.6408, + "step": 15682 + }, + { + "epoch": 0.48066078215030034, + "grad_norm": 1.4326063486490472, + "learning_rate": 1.1109956430384422e-05, + "loss": 0.7527, + "step": 15683 + }, + { + "epoch": 0.48069143067304154, + "grad_norm": 1.6307089673215813, + "learning_rate": 1.1108969922842997e-05, + "loss": 0.7006, + "step": 15684 + }, + { + "epoch": 0.48072207919578275, + "grad_norm": 0.7417332720260549, + "learning_rate": 1.1107983404374614e-05, + "loss": 0.5658, + "step": 15685 + }, + { + "epoch": 0.48075272771852395, + "grad_norm": 1.423441462522519, + "learning_rate": 1.110699687498898e-05, + "loss": 0.7093, + "step": 15686 + }, + { + "epoch": 0.48078337624126516, + "grad_norm": 1.3460775767151074, + "learning_rate": 1.1106010334695829e-05, + "loss": 0.7254, + "step": 15687 + }, + { + "epoch": 0.48081402476400636, + "grad_norm": 1.4499612602306144, + "learning_rate": 1.110502378350487e-05, + "loss": 0.6607, + "step": 15688 + }, + { + "epoch": 0.48084467328674757, + "grad_norm": 1.5282422068069974, + "learning_rate": 1.1104037221425834e-05, + "loss": 0.7561, + "step": 15689 + }, + { + "epoch": 0.4808753218094888, + "grad_norm": 1.45729331379763, + "learning_rate": 1.1103050648468431e-05, + "loss": 0.6681, + "step": 15690 + }, + { + "epoch": 0.48090597033223, + "grad_norm": 1.5274263046873824, + "learning_rate": 1.1102064064642395e-05, + "loss": 0.7816, + "step": 15691 + }, + { + "epoch": 0.4809366188549712, + "grad_norm": 0.6812590907213132, + "learning_rate": 1.1101077469957435e-05, + "loss": 0.5841, + "step": 15692 + }, + { + "epoch": 0.4809672673777124, + "grad_norm": 1.4760945036945445, + "learning_rate": 1.1100090864423279e-05, + "loss": 0.7282, + "step": 15693 + }, + { + "epoch": 0.4809979159004536, + "grad_norm": 1.455330613734005, + "learning_rate": 1.109910424804964e-05, + "loss": 0.7163, + "step": 15694 + }, + { + "epoch": 0.4810285644231948, + "grad_norm": 1.451946727889204, + "learning_rate": 1.1098117620846256e-05, + "loss": 0.7232, + "step": 15695 + }, + { + "epoch": 0.481059212945936, + "grad_norm": 1.6584680089327906, + "learning_rate": 1.109713098282283e-05, + "loss": 0.7487, + "step": 15696 + }, + { + "epoch": 0.4810898614686772, + "grad_norm": 1.4246918272462643, + "learning_rate": 1.1096144333989097e-05, + "loss": 0.5814, + "step": 15697 + }, + { + "epoch": 0.4811205099914184, + "grad_norm": 1.3275210530298285, + "learning_rate": 1.1095157674354768e-05, + "loss": 0.621, + "step": 15698 + }, + { + "epoch": 0.4811511585141596, + "grad_norm": 1.5408804623535588, + "learning_rate": 1.1094171003929574e-05, + "loss": 0.8072, + "step": 15699 + }, + { + "epoch": 0.48118180703690083, + "grad_norm": 1.356705357708147, + "learning_rate": 1.1093184322723231e-05, + "loss": 0.7865, + "step": 15700 + }, + { + "epoch": 0.48121245555964204, + "grad_norm": 1.3517745259599983, + "learning_rate": 1.1092197630745465e-05, + "loss": 0.6362, + "step": 15701 + }, + { + "epoch": 0.48124310408238324, + "grad_norm": 1.4179076515684417, + "learning_rate": 1.1091210928005996e-05, + "loss": 0.6904, + "step": 15702 + }, + { + "epoch": 0.48127375260512445, + "grad_norm": 1.5634441256223512, + "learning_rate": 1.1090224214514546e-05, + "loss": 0.7848, + "step": 15703 + }, + { + "epoch": 0.48130440112786566, + "grad_norm": 1.5149861879219282, + "learning_rate": 1.108923749028084e-05, + "loss": 0.5924, + "step": 15704 + }, + { + "epoch": 0.48133504965060686, + "grad_norm": 1.3077771733498014, + "learning_rate": 1.1088250755314594e-05, + "loss": 0.663, + "step": 15705 + }, + { + "epoch": 0.48136569817334807, + "grad_norm": 0.7085098308951373, + "learning_rate": 1.1087264009625538e-05, + "loss": 0.5867, + "step": 15706 + }, + { + "epoch": 0.4813963466960893, + "grad_norm": 1.2778780747414524, + "learning_rate": 1.1086277253223393e-05, + "loss": 0.6217, + "step": 15707 + }, + { + "epoch": 0.4814269952188305, + "grad_norm": 1.3305613849325593, + "learning_rate": 1.1085290486117876e-05, + "loss": 0.64, + "step": 15708 + }, + { + "epoch": 0.4814576437415717, + "grad_norm": 1.4707571887704032, + "learning_rate": 1.1084303708318715e-05, + "loss": 0.6604, + "step": 15709 + }, + { + "epoch": 0.48148829226431283, + "grad_norm": 1.4839576819070013, + "learning_rate": 1.1083316919835634e-05, + "loss": 0.7214, + "step": 15710 + }, + { + "epoch": 0.48151894078705404, + "grad_norm": 1.30634029940567, + "learning_rate": 1.1082330120678355e-05, + "loss": 0.6636, + "step": 15711 + }, + { + "epoch": 0.48154958930979525, + "grad_norm": 1.4926285313702288, + "learning_rate": 1.1081343310856597e-05, + "loss": 0.787, + "step": 15712 + }, + { + "epoch": 0.48158023783253645, + "grad_norm": 1.5744748848097614, + "learning_rate": 1.1080356490380088e-05, + "loss": 0.5931, + "step": 15713 + }, + { + "epoch": 0.48161088635527766, + "grad_norm": 1.599166616640824, + "learning_rate": 1.1079369659258551e-05, + "loss": 0.6437, + "step": 15714 + }, + { + "epoch": 0.48164153487801886, + "grad_norm": 1.6819524549324614, + "learning_rate": 1.1078382817501709e-05, + "loss": 0.7029, + "step": 15715 + }, + { + "epoch": 0.48167218340076007, + "grad_norm": 1.4471591447765089, + "learning_rate": 1.1077395965119284e-05, + "loss": 0.7008, + "step": 15716 + }, + { + "epoch": 0.4817028319235013, + "grad_norm": 1.42033814951118, + "learning_rate": 1.1076409102121002e-05, + "loss": 0.8072, + "step": 15717 + }, + { + "epoch": 0.4817334804462425, + "grad_norm": 1.4428015472951687, + "learning_rate": 1.1075422228516586e-05, + "loss": 0.7007, + "step": 15718 + }, + { + "epoch": 0.4817641289689837, + "grad_norm": 1.388683223646549, + "learning_rate": 1.107443534431576e-05, + "loss": 0.7028, + "step": 15719 + }, + { + "epoch": 0.4817947774917249, + "grad_norm": 1.5330735000521043, + "learning_rate": 1.1073448449528243e-05, + "loss": 0.7112, + "step": 15720 + }, + { + "epoch": 0.4818254260144661, + "grad_norm": 1.5147336042175323, + "learning_rate": 1.1072461544163768e-05, + "loss": 0.7421, + "step": 15721 + }, + { + "epoch": 0.4818560745372073, + "grad_norm": 1.7248779088686748, + "learning_rate": 1.1071474628232054e-05, + "loss": 0.8071, + "step": 15722 + }, + { + "epoch": 0.4818867230599485, + "grad_norm": 1.5765001791926012, + "learning_rate": 1.1070487701742829e-05, + "loss": 0.7165, + "step": 15723 + }, + { + "epoch": 0.4819173715826897, + "grad_norm": 1.431678450794307, + "learning_rate": 1.106950076470581e-05, + "loss": 0.6614, + "step": 15724 + }, + { + "epoch": 0.4819480201054309, + "grad_norm": 1.5909446396980946, + "learning_rate": 1.106851381713073e-05, + "loss": 0.7678, + "step": 15725 + }, + { + "epoch": 0.4819786686281721, + "grad_norm": 1.48179106338599, + "learning_rate": 1.106752685902731e-05, + "loss": 0.7236, + "step": 15726 + }, + { + "epoch": 0.48200931715091333, + "grad_norm": 1.392730081867201, + "learning_rate": 1.1066539890405271e-05, + "loss": 0.7092, + "step": 15727 + }, + { + "epoch": 0.48203996567365454, + "grad_norm": 1.8910600820691224, + "learning_rate": 1.1065552911274345e-05, + "loss": 0.7277, + "step": 15728 + }, + { + "epoch": 0.48207061419639574, + "grad_norm": 1.2979230320420223, + "learning_rate": 1.1064565921644251e-05, + "loss": 0.5527, + "step": 15729 + }, + { + "epoch": 0.48210126271913695, + "grad_norm": 1.3433834098509068, + "learning_rate": 1.106357892152472e-05, + "loss": 0.657, + "step": 15730 + }, + { + "epoch": 0.48213191124187815, + "grad_norm": 0.689259246832175, + "learning_rate": 1.106259191092547e-05, + "loss": 0.5685, + "step": 15731 + }, + { + "epoch": 0.48216255976461936, + "grad_norm": 1.5321396518937025, + "learning_rate": 1.1061604889856233e-05, + "loss": 0.8152, + "step": 15732 + }, + { + "epoch": 0.48219320828736056, + "grad_norm": 0.6480327511227367, + "learning_rate": 1.1060617858326728e-05, + "loss": 0.5576, + "step": 15733 + }, + { + "epoch": 0.48222385681010177, + "grad_norm": 1.4125083933747553, + "learning_rate": 1.1059630816346687e-05, + "loss": 0.6941, + "step": 15734 + }, + { + "epoch": 0.482254505332843, + "grad_norm": 1.4179492349494245, + "learning_rate": 1.1058643763925832e-05, + "loss": 0.776, + "step": 15735 + }, + { + "epoch": 0.4822851538555842, + "grad_norm": 1.4804072762587472, + "learning_rate": 1.1057656701073889e-05, + "loss": 0.6462, + "step": 15736 + }, + { + "epoch": 0.4823158023783254, + "grad_norm": 1.6077811670755966, + "learning_rate": 1.1056669627800582e-05, + "loss": 0.6926, + "step": 15737 + }, + { + "epoch": 0.4823464509010666, + "grad_norm": 1.7627766200790311, + "learning_rate": 1.105568254411564e-05, + "loss": 0.7533, + "step": 15738 + }, + { + "epoch": 0.4823770994238078, + "grad_norm": 1.4124728776433366, + "learning_rate": 1.105469545002879e-05, + "loss": 0.7123, + "step": 15739 + }, + { + "epoch": 0.482407747946549, + "grad_norm": 1.4113611730893536, + "learning_rate": 1.1053708345549755e-05, + "loss": 0.6949, + "step": 15740 + }, + { + "epoch": 0.48243839646929015, + "grad_norm": 1.7832595247429195, + "learning_rate": 1.1052721230688259e-05, + "loss": 0.7553, + "step": 15741 + }, + { + "epoch": 0.48246904499203136, + "grad_norm": 1.3795474375991148, + "learning_rate": 1.1051734105454032e-05, + "loss": 0.6775, + "step": 15742 + }, + { + "epoch": 0.48249969351477257, + "grad_norm": 0.6789790649189218, + "learning_rate": 1.1050746969856802e-05, + "loss": 0.5864, + "step": 15743 + }, + { + "epoch": 0.48253034203751377, + "grad_norm": 1.6094399279708196, + "learning_rate": 1.1049759823906291e-05, + "loss": 0.7133, + "step": 15744 + }, + { + "epoch": 0.482560990560255, + "grad_norm": 1.5321299956988226, + "learning_rate": 1.1048772667612233e-05, + "loss": 0.7291, + "step": 15745 + }, + { + "epoch": 0.4825916390829962, + "grad_norm": 1.5432240929725445, + "learning_rate": 1.1047785500984342e-05, + "loss": 0.6607, + "step": 15746 + }, + { + "epoch": 0.4826222876057374, + "grad_norm": 1.5128436682419772, + "learning_rate": 1.1046798324032358e-05, + "loss": 0.7629, + "step": 15747 + }, + { + "epoch": 0.4826529361284786, + "grad_norm": 1.796701212904075, + "learning_rate": 1.1045811136765999e-05, + "loss": 0.804, + "step": 15748 + }, + { + "epoch": 0.4826835846512198, + "grad_norm": 0.6656701767528711, + "learning_rate": 1.1044823939195e-05, + "loss": 0.5649, + "step": 15749 + }, + { + "epoch": 0.482714233173961, + "grad_norm": 1.588296004673238, + "learning_rate": 1.1043836731329078e-05, + "loss": 0.7258, + "step": 15750 + }, + { + "epoch": 0.4827448816967022, + "grad_norm": 1.3432115066755883, + "learning_rate": 1.1042849513177968e-05, + "loss": 0.632, + "step": 15751 + }, + { + "epoch": 0.4827755302194434, + "grad_norm": 0.6462725767844827, + "learning_rate": 1.1041862284751394e-05, + "loss": 0.5809, + "step": 15752 + }, + { + "epoch": 0.4828061787421846, + "grad_norm": 1.5610491023269284, + "learning_rate": 1.1040875046059085e-05, + "loss": 0.6815, + "step": 15753 + }, + { + "epoch": 0.4828368272649258, + "grad_norm": 1.4431348829262673, + "learning_rate": 1.1039887797110769e-05, + "loss": 0.6993, + "step": 15754 + }, + { + "epoch": 0.48286747578766703, + "grad_norm": 1.5107094133373389, + "learning_rate": 1.1038900537916168e-05, + "loss": 0.6076, + "step": 15755 + }, + { + "epoch": 0.48289812431040824, + "grad_norm": 1.4578263971605578, + "learning_rate": 1.1037913268485018e-05, + "loss": 0.7653, + "step": 15756 + }, + { + "epoch": 0.48292877283314944, + "grad_norm": 1.6090367590170156, + "learning_rate": 1.1036925988827045e-05, + "loss": 0.778, + "step": 15757 + }, + { + "epoch": 0.48295942135589065, + "grad_norm": 1.3983194286906862, + "learning_rate": 1.1035938698951974e-05, + "loss": 0.7464, + "step": 15758 + }, + { + "epoch": 0.48299006987863186, + "grad_norm": 1.4620822395658397, + "learning_rate": 1.1034951398869529e-05, + "loss": 0.7528, + "step": 15759 + }, + { + "epoch": 0.48302071840137306, + "grad_norm": 1.5420859549478756, + "learning_rate": 1.1033964088589451e-05, + "loss": 0.7775, + "step": 15760 + }, + { + "epoch": 0.48305136692411427, + "grad_norm": 1.5182016693045144, + "learning_rate": 1.1032976768121452e-05, + "loss": 0.7279, + "step": 15761 + }, + { + "epoch": 0.4830820154468555, + "grad_norm": 1.5132563871854308, + "learning_rate": 1.1031989437475274e-05, + "loss": 0.7443, + "step": 15762 + }, + { + "epoch": 0.4831126639695967, + "grad_norm": 1.4822444190307948, + "learning_rate": 1.1031002096660637e-05, + "loss": 0.7865, + "step": 15763 + }, + { + "epoch": 0.4831433124923379, + "grad_norm": 1.502907652621568, + "learning_rate": 1.1030014745687274e-05, + "loss": 0.7, + "step": 15764 + }, + { + "epoch": 0.4831739610150791, + "grad_norm": 1.4578120286278309, + "learning_rate": 1.1029027384564914e-05, + "loss": 0.7854, + "step": 15765 + }, + { + "epoch": 0.4832046095378203, + "grad_norm": 1.5565320278998152, + "learning_rate": 1.1028040013303282e-05, + "loss": 0.6539, + "step": 15766 + }, + { + "epoch": 0.4832352580605615, + "grad_norm": 1.5943533601074007, + "learning_rate": 1.1027052631912107e-05, + "loss": 0.6396, + "step": 15767 + }, + { + "epoch": 0.4832659065833027, + "grad_norm": 1.8116955057488464, + "learning_rate": 1.1026065240401122e-05, + "loss": 0.8296, + "step": 15768 + }, + { + "epoch": 0.4832965551060439, + "grad_norm": 1.7376772877344553, + "learning_rate": 1.1025077838780054e-05, + "loss": 0.7436, + "step": 15769 + }, + { + "epoch": 0.4833272036287851, + "grad_norm": 1.4965873076460892, + "learning_rate": 1.102409042705863e-05, + "loss": 0.6835, + "step": 15770 + }, + { + "epoch": 0.4833578521515263, + "grad_norm": 1.4426118513694237, + "learning_rate": 1.102310300524658e-05, + "loss": 0.7334, + "step": 15771 + }, + { + "epoch": 0.4833885006742675, + "grad_norm": 1.285996276550454, + "learning_rate": 1.1022115573353637e-05, + "loss": 0.6909, + "step": 15772 + }, + { + "epoch": 0.4834191491970087, + "grad_norm": 1.5041676338617265, + "learning_rate": 1.1021128131389528e-05, + "loss": 0.6514, + "step": 15773 + }, + { + "epoch": 0.4834497977197499, + "grad_norm": 1.4823355719914688, + "learning_rate": 1.1020140679363979e-05, + "loss": 0.744, + "step": 15774 + }, + { + "epoch": 0.4834804462424911, + "grad_norm": 1.415238709097431, + "learning_rate": 1.1019153217286727e-05, + "loss": 0.6199, + "step": 15775 + }, + { + "epoch": 0.4835110947652323, + "grad_norm": 1.6072194769764931, + "learning_rate": 1.1018165745167494e-05, + "loss": 0.7514, + "step": 15776 + }, + { + "epoch": 0.4835417432879735, + "grad_norm": 1.6166990706301705, + "learning_rate": 1.1017178263016017e-05, + "loss": 0.9019, + "step": 15777 + }, + { + "epoch": 0.4835723918107147, + "grad_norm": 1.299001169761225, + "learning_rate": 1.101619077084202e-05, + "loss": 0.6414, + "step": 15778 + }, + { + "epoch": 0.4836030403334559, + "grad_norm": 1.5710175574924639, + "learning_rate": 1.1015203268655235e-05, + "loss": 0.6674, + "step": 15779 + }, + { + "epoch": 0.4836336888561971, + "grad_norm": 1.3749383633388246, + "learning_rate": 1.1014215756465394e-05, + "loss": 0.6661, + "step": 15780 + }, + { + "epoch": 0.4836643373789383, + "grad_norm": 1.495711396285575, + "learning_rate": 1.1013228234282223e-05, + "loss": 0.8026, + "step": 15781 + }, + { + "epoch": 0.48369498590167953, + "grad_norm": 1.5002072076875623, + "learning_rate": 1.1012240702115458e-05, + "loss": 0.6975, + "step": 15782 + }, + { + "epoch": 0.48372563442442074, + "grad_norm": 1.5237758435958737, + "learning_rate": 1.1011253159974822e-05, + "loss": 0.7454, + "step": 15783 + }, + { + "epoch": 0.48375628294716194, + "grad_norm": 1.4644895620873952, + "learning_rate": 1.1010265607870057e-05, + "loss": 0.6794, + "step": 15784 + }, + { + "epoch": 0.48378693146990315, + "grad_norm": 1.4851548796538183, + "learning_rate": 1.100927804581088e-05, + "loss": 0.7577, + "step": 15785 + }, + { + "epoch": 0.48381757999264435, + "grad_norm": 1.4996538630624652, + "learning_rate": 1.100829047380703e-05, + "loss": 0.6556, + "step": 15786 + }, + { + "epoch": 0.48384822851538556, + "grad_norm": 1.4318258695429456, + "learning_rate": 1.1007302891868238e-05, + "loss": 0.6948, + "step": 15787 + }, + { + "epoch": 0.48387887703812676, + "grad_norm": 1.3706735055461832, + "learning_rate": 1.1006315300004231e-05, + "loss": 0.5921, + "step": 15788 + }, + { + "epoch": 0.48390952556086797, + "grad_norm": 1.4505032884935283, + "learning_rate": 1.1005327698224742e-05, + "loss": 0.7594, + "step": 15789 + }, + { + "epoch": 0.4839401740836092, + "grad_norm": 1.4002004724505432, + "learning_rate": 1.1004340086539503e-05, + "loss": 0.7573, + "step": 15790 + }, + { + "epoch": 0.4839708226063504, + "grad_norm": 1.3489541485408634, + "learning_rate": 1.1003352464958244e-05, + "loss": 0.6142, + "step": 15791 + }, + { + "epoch": 0.4840014711290916, + "grad_norm": 1.4096365844961816, + "learning_rate": 1.1002364833490694e-05, + "loss": 0.6893, + "step": 15792 + }, + { + "epoch": 0.4840321196518328, + "grad_norm": 1.3346678871692361, + "learning_rate": 1.100137719214659e-05, + "loss": 0.6122, + "step": 15793 + }, + { + "epoch": 0.484062768174574, + "grad_norm": 1.358398533642615, + "learning_rate": 1.100038954093566e-05, + "loss": 0.6028, + "step": 15794 + }, + { + "epoch": 0.4840934166973152, + "grad_norm": 1.5837121023931342, + "learning_rate": 1.0999401879867635e-05, + "loss": 0.7903, + "step": 15795 + }, + { + "epoch": 0.4841240652200564, + "grad_norm": 1.5643881297919409, + "learning_rate": 1.0998414208952247e-05, + "loss": 0.7639, + "step": 15796 + }, + { + "epoch": 0.4841547137427976, + "grad_norm": 0.6468169468199508, + "learning_rate": 1.099742652819923e-05, + "loss": 0.5484, + "step": 15797 + }, + { + "epoch": 0.4841853622655388, + "grad_norm": 0.6749020248598513, + "learning_rate": 1.0996438837618309e-05, + "loss": 0.5899, + "step": 15798 + }, + { + "epoch": 0.48421601078828, + "grad_norm": 1.720900685298305, + "learning_rate": 1.0995451137219228e-05, + "loss": 0.7299, + "step": 15799 + }, + { + "epoch": 0.48424665931102123, + "grad_norm": 1.5197288212831994, + "learning_rate": 1.0994463427011708e-05, + "loss": 0.6745, + "step": 15800 + }, + { + "epoch": 0.48427730783376244, + "grad_norm": 1.6454100207549098, + "learning_rate": 1.0993475707005488e-05, + "loss": 0.7175, + "step": 15801 + }, + { + "epoch": 0.48430795635650364, + "grad_norm": 1.3066473748324623, + "learning_rate": 1.0992487977210295e-05, + "loss": 0.6924, + "step": 15802 + }, + { + "epoch": 0.4843386048792448, + "grad_norm": 1.377991057319595, + "learning_rate": 1.0991500237635869e-05, + "loss": 0.6657, + "step": 15803 + }, + { + "epoch": 0.484369253401986, + "grad_norm": 1.4247701784198352, + "learning_rate": 1.0990512488291931e-05, + "loss": 0.6974, + "step": 15804 + }, + { + "epoch": 0.4843999019247272, + "grad_norm": 1.4572956855844483, + "learning_rate": 1.0989524729188224e-05, + "loss": 0.674, + "step": 15805 + }, + { + "epoch": 0.4844305504474684, + "grad_norm": 1.4283294641016016, + "learning_rate": 1.0988536960334475e-05, + "loss": 0.7743, + "step": 15806 + }, + { + "epoch": 0.4844611989702096, + "grad_norm": 1.5063085688903222, + "learning_rate": 1.0987549181740418e-05, + "loss": 0.791, + "step": 15807 + }, + { + "epoch": 0.4844918474929508, + "grad_norm": 1.47041518376188, + "learning_rate": 1.0986561393415788e-05, + "loss": 0.748, + "step": 15808 + }, + { + "epoch": 0.48452249601569203, + "grad_norm": 1.6550235189052556, + "learning_rate": 1.0985573595370314e-05, + "loss": 0.725, + "step": 15809 + }, + { + "epoch": 0.48455314453843323, + "grad_norm": 1.5896011189669068, + "learning_rate": 1.0984585787613732e-05, + "loss": 0.8557, + "step": 15810 + }, + { + "epoch": 0.48458379306117444, + "grad_norm": 1.3317500809232505, + "learning_rate": 1.0983597970155777e-05, + "loss": 0.6518, + "step": 15811 + }, + { + "epoch": 0.48461444158391564, + "grad_norm": 1.497911603772777, + "learning_rate": 1.0982610143006178e-05, + "loss": 0.716, + "step": 15812 + }, + { + "epoch": 0.48464509010665685, + "grad_norm": 1.483403020507696, + "learning_rate": 1.0981622306174669e-05, + "loss": 0.6709, + "step": 15813 + }, + { + "epoch": 0.48467573862939806, + "grad_norm": 1.4626668978926152, + "learning_rate": 1.0980634459670987e-05, + "loss": 0.5837, + "step": 15814 + }, + { + "epoch": 0.48470638715213926, + "grad_norm": 0.716801678625376, + "learning_rate": 1.097964660350486e-05, + "loss": 0.5625, + "step": 15815 + }, + { + "epoch": 0.48473703567488047, + "grad_norm": 1.5739625723180726, + "learning_rate": 1.0978658737686024e-05, + "loss": 0.6933, + "step": 15816 + }, + { + "epoch": 0.4847676841976217, + "grad_norm": 1.5196542946594578, + "learning_rate": 1.0977670862224212e-05, + "loss": 0.7674, + "step": 15817 + }, + { + "epoch": 0.4847983327203629, + "grad_norm": 1.5407905590512254, + "learning_rate": 1.0976682977129165e-05, + "loss": 0.8176, + "step": 15818 + }, + { + "epoch": 0.4848289812431041, + "grad_norm": 1.4275813936380906, + "learning_rate": 1.0975695082410604e-05, + "loss": 0.5715, + "step": 15819 + }, + { + "epoch": 0.4848596297658453, + "grad_norm": 1.5428450278689527, + "learning_rate": 1.0974707178078271e-05, + "loss": 0.7288, + "step": 15820 + }, + { + "epoch": 0.4848902782885865, + "grad_norm": 1.5312306958727862, + "learning_rate": 1.09737192641419e-05, + "loss": 0.7073, + "step": 15821 + }, + { + "epoch": 0.4849209268113277, + "grad_norm": 1.647719467669106, + "learning_rate": 1.0972731340611224e-05, + "loss": 0.7357, + "step": 15822 + }, + { + "epoch": 0.4849515753340689, + "grad_norm": 1.556061086420556, + "learning_rate": 1.0971743407495978e-05, + "loss": 0.7578, + "step": 15823 + }, + { + "epoch": 0.4849822238568101, + "grad_norm": 1.554570091468257, + "learning_rate": 1.0970755464805893e-05, + "loss": 0.6837, + "step": 15824 + }, + { + "epoch": 0.4850128723795513, + "grad_norm": 1.255263162251536, + "learning_rate": 1.0969767512550708e-05, + "loss": 0.6274, + "step": 15825 + }, + { + "epoch": 0.4850435209022925, + "grad_norm": 0.7040943463359869, + "learning_rate": 1.0968779550740157e-05, + "loss": 0.543, + "step": 15826 + }, + { + "epoch": 0.48507416942503373, + "grad_norm": 1.4746548153847134, + "learning_rate": 1.096779157938397e-05, + "loss": 0.7178, + "step": 15827 + }, + { + "epoch": 0.48510481794777494, + "grad_norm": 1.4973623854879696, + "learning_rate": 1.0966803598491886e-05, + "loss": 0.7872, + "step": 15828 + }, + { + "epoch": 0.48513546647051614, + "grad_norm": 1.7997766281053371, + "learning_rate": 1.096581560807364e-05, + "loss": 0.7757, + "step": 15829 + }, + { + "epoch": 0.48516611499325735, + "grad_norm": 1.4055294130804672, + "learning_rate": 1.0964827608138966e-05, + "loss": 0.8135, + "step": 15830 + }, + { + "epoch": 0.48519676351599855, + "grad_norm": 1.4593942337953596, + "learning_rate": 1.0963839598697598e-05, + "loss": 0.6742, + "step": 15831 + }, + { + "epoch": 0.48522741203873976, + "grad_norm": 1.4198449866484035, + "learning_rate": 1.096285157975927e-05, + "loss": 0.7357, + "step": 15832 + }, + { + "epoch": 0.48525806056148096, + "grad_norm": 1.4707574150079505, + "learning_rate": 1.096186355133372e-05, + "loss": 0.6697, + "step": 15833 + }, + { + "epoch": 0.4852887090842221, + "grad_norm": 1.4132361160817417, + "learning_rate": 1.0960875513430685e-05, + "loss": 0.5975, + "step": 15834 + }, + { + "epoch": 0.4853193576069633, + "grad_norm": 1.4765984093116364, + "learning_rate": 1.0959887466059894e-05, + "loss": 0.7947, + "step": 15835 + }, + { + "epoch": 0.4853500061297045, + "grad_norm": 1.554268293458807, + "learning_rate": 1.0958899409231087e-05, + "loss": 0.7264, + "step": 15836 + }, + { + "epoch": 0.48538065465244573, + "grad_norm": 1.439147493647531, + "learning_rate": 1.0957911342954e-05, + "loss": 0.6727, + "step": 15837 + }, + { + "epoch": 0.48541130317518694, + "grad_norm": 1.445473908830434, + "learning_rate": 1.095692326723837e-05, + "loss": 0.7386, + "step": 15838 + }, + { + "epoch": 0.48544195169792814, + "grad_norm": 0.7179568011929133, + "learning_rate": 1.0955935182093924e-05, + "loss": 0.5963, + "step": 15839 + }, + { + "epoch": 0.48547260022066935, + "grad_norm": 1.5398512684726187, + "learning_rate": 1.0954947087530407e-05, + "loss": 0.7402, + "step": 15840 + }, + { + "epoch": 0.48550324874341055, + "grad_norm": 1.4013026705754505, + "learning_rate": 1.0953958983557554e-05, + "loss": 0.7151, + "step": 15841 + }, + { + "epoch": 0.48553389726615176, + "grad_norm": 1.4984929734503927, + "learning_rate": 1.0952970870185098e-05, + "loss": 0.6812, + "step": 15842 + }, + { + "epoch": 0.48556454578889297, + "grad_norm": 1.4785806589588146, + "learning_rate": 1.0951982747422774e-05, + "loss": 0.7764, + "step": 15843 + }, + { + "epoch": 0.48559519431163417, + "grad_norm": 1.5312453935882282, + "learning_rate": 1.095099461528032e-05, + "loss": 0.8622, + "step": 15844 + }, + { + "epoch": 0.4856258428343754, + "grad_norm": 0.7113579064726351, + "learning_rate": 1.0950006473767476e-05, + "loss": 0.5939, + "step": 15845 + }, + { + "epoch": 0.4856564913571166, + "grad_norm": 1.3585597194809942, + "learning_rate": 1.0949018322893975e-05, + "loss": 0.7245, + "step": 15846 + }, + { + "epoch": 0.4856871398798578, + "grad_norm": 1.299151303368403, + "learning_rate": 1.0948030162669552e-05, + "loss": 0.6455, + "step": 15847 + }, + { + "epoch": 0.485717788402599, + "grad_norm": 0.6952620270406297, + "learning_rate": 1.0947041993103944e-05, + "loss": 0.5984, + "step": 15848 + }, + { + "epoch": 0.4857484369253402, + "grad_norm": 1.5427361056084175, + "learning_rate": 1.0946053814206892e-05, + "loss": 0.7918, + "step": 15849 + }, + { + "epoch": 0.4857790854480814, + "grad_norm": 1.3957101199724569, + "learning_rate": 1.0945065625988126e-05, + "loss": 0.6312, + "step": 15850 + }, + { + "epoch": 0.4858097339708226, + "grad_norm": 0.6614792860994428, + "learning_rate": 1.094407742845739e-05, + "loss": 0.5978, + "step": 15851 + }, + { + "epoch": 0.4858403824935638, + "grad_norm": 1.4348312561018752, + "learning_rate": 1.0943089221624414e-05, + "loss": 0.6696, + "step": 15852 + }, + { + "epoch": 0.485871031016305, + "grad_norm": 1.5403343484399272, + "learning_rate": 1.0942101005498944e-05, + "loss": 0.7088, + "step": 15853 + }, + { + "epoch": 0.4859016795390462, + "grad_norm": 1.5231778809028969, + "learning_rate": 1.0941112780090707e-05, + "loss": 0.7497, + "step": 15854 + }, + { + "epoch": 0.48593232806178743, + "grad_norm": 1.56972838107031, + "learning_rate": 1.0940124545409447e-05, + "loss": 0.7553, + "step": 15855 + }, + { + "epoch": 0.48596297658452864, + "grad_norm": 1.2661527035378228, + "learning_rate": 1.09391363014649e-05, + "loss": 0.6847, + "step": 15856 + }, + { + "epoch": 0.48599362510726984, + "grad_norm": 1.368659542510779, + "learning_rate": 1.0938148048266803e-05, + "loss": 0.6758, + "step": 15857 + }, + { + "epoch": 0.48602427363001105, + "grad_norm": 1.6463003858142797, + "learning_rate": 1.0937159785824892e-05, + "loss": 0.7214, + "step": 15858 + }, + { + "epoch": 0.48605492215275226, + "grad_norm": 1.657783863734666, + "learning_rate": 1.0936171514148905e-05, + "loss": 0.7789, + "step": 15859 + }, + { + "epoch": 0.48608557067549346, + "grad_norm": 1.415591217608323, + "learning_rate": 1.0935183233248581e-05, + "loss": 0.8059, + "step": 15860 + }, + { + "epoch": 0.48611621919823467, + "grad_norm": 1.5805856924918604, + "learning_rate": 1.0934194943133658e-05, + "loss": 0.6799, + "step": 15861 + }, + { + "epoch": 0.4861468677209759, + "grad_norm": 1.3740433355330168, + "learning_rate": 1.0933206643813874e-05, + "loss": 0.6175, + "step": 15862 + }, + { + "epoch": 0.4861775162437171, + "grad_norm": 1.4042170631633704, + "learning_rate": 1.0932218335298966e-05, + "loss": 0.6812, + "step": 15863 + }, + { + "epoch": 0.4862081647664583, + "grad_norm": 0.7029856870433603, + "learning_rate": 1.0931230017598671e-05, + "loss": 0.5875, + "step": 15864 + }, + { + "epoch": 0.48623881328919943, + "grad_norm": 1.3103647335841437, + "learning_rate": 1.0930241690722727e-05, + "loss": 0.6068, + "step": 15865 + }, + { + "epoch": 0.48626946181194064, + "grad_norm": 1.5728764531725337, + "learning_rate": 1.0929253354680876e-05, + "loss": 0.6349, + "step": 15866 + }, + { + "epoch": 0.48630011033468185, + "grad_norm": 1.2869196028118284, + "learning_rate": 1.0928265009482852e-05, + "loss": 0.6117, + "step": 15867 + }, + { + "epoch": 0.48633075885742305, + "grad_norm": 1.450764092955318, + "learning_rate": 1.09272766551384e-05, + "loss": 0.6676, + "step": 15868 + }, + { + "epoch": 0.48636140738016426, + "grad_norm": 1.563923180620401, + "learning_rate": 1.0926288291657248e-05, + "loss": 0.7392, + "step": 15869 + }, + { + "epoch": 0.48639205590290546, + "grad_norm": 1.524566602939217, + "learning_rate": 1.0925299919049144e-05, + "loss": 0.765, + "step": 15870 + }, + { + "epoch": 0.48642270442564667, + "grad_norm": 1.344513482272133, + "learning_rate": 1.092431153732382e-05, + "loss": 0.6642, + "step": 15871 + }, + { + "epoch": 0.4864533529483879, + "grad_norm": 0.6560512519056504, + "learning_rate": 1.0923323146491023e-05, + "loss": 0.5682, + "step": 15872 + }, + { + "epoch": 0.4864840014711291, + "grad_norm": 1.519135896808422, + "learning_rate": 1.0922334746560481e-05, + "loss": 0.7362, + "step": 15873 + }, + { + "epoch": 0.4865146499938703, + "grad_norm": 1.3806463735827754, + "learning_rate": 1.0921346337541942e-05, + "loss": 0.7381, + "step": 15874 + }, + { + "epoch": 0.4865452985166115, + "grad_norm": 1.388176054165488, + "learning_rate": 1.0920357919445142e-05, + "loss": 0.7542, + "step": 15875 + }, + { + "epoch": 0.4865759470393527, + "grad_norm": 1.4736146714785099, + "learning_rate": 1.0919369492279819e-05, + "loss": 0.708, + "step": 15876 + }, + { + "epoch": 0.4866065955620939, + "grad_norm": 0.6283679979896549, + "learning_rate": 1.0918381056055714e-05, + "loss": 0.5461, + "step": 15877 + }, + { + "epoch": 0.4866372440848351, + "grad_norm": 1.7419156979364254, + "learning_rate": 1.0917392610782563e-05, + "loss": 0.6248, + "step": 15878 + }, + { + "epoch": 0.4866678926075763, + "grad_norm": 0.6845856536943854, + "learning_rate": 1.0916404156470111e-05, + "loss": 0.5867, + "step": 15879 + }, + { + "epoch": 0.4866985411303175, + "grad_norm": 1.4454707907817852, + "learning_rate": 1.0915415693128092e-05, + "loss": 0.7201, + "step": 15880 + }, + { + "epoch": 0.4867291896530587, + "grad_norm": 1.519969667343708, + "learning_rate": 1.091442722076625e-05, + "loss": 0.799, + "step": 15881 + }, + { + "epoch": 0.48675983817579993, + "grad_norm": 0.6564955297810566, + "learning_rate": 1.0913438739394321e-05, + "loss": 0.5672, + "step": 15882 + }, + { + "epoch": 0.48679048669854114, + "grad_norm": 0.6504132720172181, + "learning_rate": 1.0912450249022048e-05, + "loss": 0.5985, + "step": 15883 + }, + { + "epoch": 0.48682113522128234, + "grad_norm": 1.324662721131657, + "learning_rate": 1.0911461749659168e-05, + "loss": 0.7421, + "step": 15884 + }, + { + "epoch": 0.48685178374402355, + "grad_norm": 1.4349909017856548, + "learning_rate": 1.0910473241315424e-05, + "loss": 0.7457, + "step": 15885 + }, + { + "epoch": 0.48688243226676475, + "grad_norm": 1.5222744195247402, + "learning_rate": 1.0909484724000552e-05, + "loss": 0.7509, + "step": 15886 + }, + { + "epoch": 0.48691308078950596, + "grad_norm": 1.5225371512537909, + "learning_rate": 1.0908496197724295e-05, + "loss": 0.7246, + "step": 15887 + }, + { + "epoch": 0.48694372931224716, + "grad_norm": 1.5144755739998301, + "learning_rate": 1.0907507662496392e-05, + "loss": 0.7111, + "step": 15888 + }, + { + "epoch": 0.48697437783498837, + "grad_norm": 1.4172227242748878, + "learning_rate": 1.0906519118326586e-05, + "loss": 0.7573, + "step": 15889 + }, + { + "epoch": 0.4870050263577296, + "grad_norm": 1.6389759628549194, + "learning_rate": 1.0905530565224611e-05, + "loss": 0.641, + "step": 15890 + }, + { + "epoch": 0.4870356748804708, + "grad_norm": 0.672893491784278, + "learning_rate": 1.0904542003200216e-05, + "loss": 0.5835, + "step": 15891 + }, + { + "epoch": 0.487066323403212, + "grad_norm": 1.3359291041984833, + "learning_rate": 1.0903553432263137e-05, + "loss": 0.6683, + "step": 15892 + }, + { + "epoch": 0.4870969719259532, + "grad_norm": 1.5751503591542881, + "learning_rate": 1.090256485242311e-05, + "loss": 0.7152, + "step": 15893 + }, + { + "epoch": 0.4871276204486944, + "grad_norm": 1.372378320286831, + "learning_rate": 1.0901576263689886e-05, + "loss": 0.7657, + "step": 15894 + }, + { + "epoch": 0.4871582689714356, + "grad_norm": 1.401220978381467, + "learning_rate": 1.0900587666073199e-05, + "loss": 0.6059, + "step": 15895 + }, + { + "epoch": 0.48718891749417675, + "grad_norm": 1.5934459943912342, + "learning_rate": 1.089959905958279e-05, + "loss": 0.6653, + "step": 15896 + }, + { + "epoch": 0.48721956601691796, + "grad_norm": 1.3741388077175904, + "learning_rate": 1.0898610444228401e-05, + "loss": 0.6935, + "step": 15897 + }, + { + "epoch": 0.48725021453965917, + "grad_norm": 1.464383558690087, + "learning_rate": 1.0897621820019775e-05, + "loss": 0.7077, + "step": 15898 + }, + { + "epoch": 0.48728086306240037, + "grad_norm": 1.4681605152064818, + "learning_rate": 1.089663318696665e-05, + "loss": 0.6514, + "step": 15899 + }, + { + "epoch": 0.4873115115851416, + "grad_norm": 0.6488018910654492, + "learning_rate": 1.0895644545078771e-05, + "loss": 0.5744, + "step": 15900 + }, + { + "epoch": 0.4873421601078828, + "grad_norm": 0.674258936278552, + "learning_rate": 1.0894655894365873e-05, + "loss": 0.5892, + "step": 15901 + }, + { + "epoch": 0.487372808630624, + "grad_norm": 1.4996619981082788, + "learning_rate": 1.0893667234837706e-05, + "loss": 0.7993, + "step": 15902 + }, + { + "epoch": 0.4874034571533652, + "grad_norm": 1.5612718482143342, + "learning_rate": 1.0892678566504007e-05, + "loss": 0.73, + "step": 15903 + }, + { + "epoch": 0.4874341056761064, + "grad_norm": 1.363618585911179, + "learning_rate": 1.0891689889374513e-05, + "loss": 0.6071, + "step": 15904 + }, + { + "epoch": 0.4874647541988476, + "grad_norm": 1.851936196103485, + "learning_rate": 1.0890701203458976e-05, + "loss": 0.6922, + "step": 15905 + }, + { + "epoch": 0.4874954027215888, + "grad_norm": 1.3985608951827788, + "learning_rate": 1.0889712508767127e-05, + "loss": 0.67, + "step": 15906 + }, + { + "epoch": 0.48752605124433, + "grad_norm": 1.2508994038307006, + "learning_rate": 1.0888723805308718e-05, + "loss": 0.6257, + "step": 15907 + }, + { + "epoch": 0.4875566997670712, + "grad_norm": 0.6766170915899656, + "learning_rate": 1.0887735093093481e-05, + "loss": 0.5687, + "step": 15908 + }, + { + "epoch": 0.48758734828981243, + "grad_norm": 1.7064537451889576, + "learning_rate": 1.0886746372131167e-05, + "loss": 0.715, + "step": 15909 + }, + { + "epoch": 0.48761799681255363, + "grad_norm": 1.5125894745106392, + "learning_rate": 1.0885757642431511e-05, + "loss": 0.6945, + "step": 15910 + }, + { + "epoch": 0.48764864533529484, + "grad_norm": 1.6450803763142359, + "learning_rate": 1.0884768904004263e-05, + "loss": 0.77, + "step": 15911 + }, + { + "epoch": 0.48767929385803604, + "grad_norm": 1.3984877278105745, + "learning_rate": 1.0883780156859156e-05, + "loss": 0.7575, + "step": 15912 + }, + { + "epoch": 0.48770994238077725, + "grad_norm": 1.429161811037103, + "learning_rate": 1.0882791401005938e-05, + "loss": 0.6685, + "step": 15913 + }, + { + "epoch": 0.48774059090351846, + "grad_norm": 1.467758908869141, + "learning_rate": 1.0881802636454353e-05, + "loss": 0.711, + "step": 15914 + }, + { + "epoch": 0.48777123942625966, + "grad_norm": 1.4706241393690922, + "learning_rate": 1.088081386321414e-05, + "loss": 0.7009, + "step": 15915 + }, + { + "epoch": 0.48780188794900087, + "grad_norm": 1.4073927005418554, + "learning_rate": 1.087982508129504e-05, + "loss": 0.7471, + "step": 15916 + }, + { + "epoch": 0.4878325364717421, + "grad_norm": 1.4418638313238927, + "learning_rate": 1.08788362907068e-05, + "loss": 0.658, + "step": 15917 + }, + { + "epoch": 0.4878631849944833, + "grad_norm": 1.47891120328023, + "learning_rate": 1.0877847491459161e-05, + "loss": 0.7636, + "step": 15918 + }, + { + "epoch": 0.4878938335172245, + "grad_norm": 1.4703587687255413, + "learning_rate": 1.0876858683561864e-05, + "loss": 0.748, + "step": 15919 + }, + { + "epoch": 0.4879244820399657, + "grad_norm": 1.7858988998860323, + "learning_rate": 1.0875869867024658e-05, + "loss": 0.766, + "step": 15920 + }, + { + "epoch": 0.4879551305627069, + "grad_norm": 1.5356020267668655, + "learning_rate": 1.087488104185728e-05, + "loss": 0.7323, + "step": 15921 + }, + { + "epoch": 0.4879857790854481, + "grad_norm": 1.552212855213763, + "learning_rate": 1.0873892208069477e-05, + "loss": 0.7377, + "step": 15922 + }, + { + "epoch": 0.4880164276081893, + "grad_norm": 1.5516025635130701, + "learning_rate": 1.0872903365670988e-05, + "loss": 0.7632, + "step": 15923 + }, + { + "epoch": 0.4880470761309305, + "grad_norm": 0.6863906808414659, + "learning_rate": 1.087191451467156e-05, + "loss": 0.5651, + "step": 15924 + }, + { + "epoch": 0.4880777246536717, + "grad_norm": 1.500598029405693, + "learning_rate": 1.0870925655080932e-05, + "loss": 0.7785, + "step": 15925 + }, + { + "epoch": 0.4881083731764129, + "grad_norm": 1.3022698032658044, + "learning_rate": 1.0869936786908859e-05, + "loss": 0.6218, + "step": 15926 + }, + { + "epoch": 0.4881390216991541, + "grad_norm": 1.475128429254323, + "learning_rate": 1.0868947910165068e-05, + "loss": 0.7474, + "step": 15927 + }, + { + "epoch": 0.4881696702218953, + "grad_norm": 1.714078585606198, + "learning_rate": 1.0867959024859315e-05, + "loss": 0.8162, + "step": 15928 + }, + { + "epoch": 0.4882003187446365, + "grad_norm": 1.4219680048047854, + "learning_rate": 1.0866970131001337e-05, + "loss": 0.6666, + "step": 15929 + }, + { + "epoch": 0.4882309672673777, + "grad_norm": 1.5098729970310272, + "learning_rate": 1.0865981228600884e-05, + "loss": 0.7389, + "step": 15930 + }, + { + "epoch": 0.4882616157901189, + "grad_norm": 0.6974058568889779, + "learning_rate": 1.0864992317667692e-05, + "loss": 0.5872, + "step": 15931 + }, + { + "epoch": 0.4882922643128601, + "grad_norm": 1.4796219639087467, + "learning_rate": 1.0864003398211511e-05, + "loss": 0.7402, + "step": 15932 + }, + { + "epoch": 0.4883229128356013, + "grad_norm": 1.429545325440885, + "learning_rate": 1.0863014470242086e-05, + "loss": 0.615, + "step": 15933 + }, + { + "epoch": 0.4883535613583425, + "grad_norm": 1.51303091647599, + "learning_rate": 1.0862025533769159e-05, + "loss": 0.6154, + "step": 15934 + }, + { + "epoch": 0.4883842098810837, + "grad_norm": 0.6830588166674716, + "learning_rate": 1.0861036588802471e-05, + "loss": 0.5775, + "step": 15935 + }, + { + "epoch": 0.4884148584038249, + "grad_norm": 1.4783027469172416, + "learning_rate": 1.0860047635351766e-05, + "loss": 0.8124, + "step": 15936 + }, + { + "epoch": 0.48844550692656613, + "grad_norm": 1.432962469016681, + "learning_rate": 1.0859058673426798e-05, + "loss": 0.5893, + "step": 15937 + }, + { + "epoch": 0.48847615544930734, + "grad_norm": 1.5528777568435, + "learning_rate": 1.0858069703037304e-05, + "loss": 0.6862, + "step": 15938 + }, + { + "epoch": 0.48850680397204854, + "grad_norm": 1.44066272991174, + "learning_rate": 1.0857080724193028e-05, + "loss": 0.6148, + "step": 15939 + }, + { + "epoch": 0.48853745249478975, + "grad_norm": 0.6535224324818837, + "learning_rate": 1.0856091736903715e-05, + "loss": 0.5756, + "step": 15940 + }, + { + "epoch": 0.48856810101753095, + "grad_norm": 1.369630170399053, + "learning_rate": 1.0855102741179115e-05, + "loss": 0.7282, + "step": 15941 + }, + { + "epoch": 0.48859874954027216, + "grad_norm": 1.547974592957697, + "learning_rate": 1.0854113737028967e-05, + "loss": 0.8257, + "step": 15942 + }, + { + "epoch": 0.48862939806301336, + "grad_norm": 1.552695476431273, + "learning_rate": 1.0853124724463018e-05, + "loss": 0.8625, + "step": 15943 + }, + { + "epoch": 0.48866004658575457, + "grad_norm": 1.5245435331005195, + "learning_rate": 1.0852135703491008e-05, + "loss": 0.7455, + "step": 15944 + }, + { + "epoch": 0.4886906951084958, + "grad_norm": 1.571770897868924, + "learning_rate": 1.0851146674122692e-05, + "loss": 0.7574, + "step": 15945 + }, + { + "epoch": 0.488721343631237, + "grad_norm": 1.5699199062576668, + "learning_rate": 1.085015763636781e-05, + "loss": 0.8116, + "step": 15946 + }, + { + "epoch": 0.4887519921539782, + "grad_norm": 1.4939221794017512, + "learning_rate": 1.0849168590236105e-05, + "loss": 0.6001, + "step": 15947 + }, + { + "epoch": 0.4887826406767194, + "grad_norm": 1.641638320410815, + "learning_rate": 1.0848179535737326e-05, + "loss": 0.7409, + "step": 15948 + }, + { + "epoch": 0.4888132891994606, + "grad_norm": 1.3331144624604525, + "learning_rate": 1.084719047288122e-05, + "loss": 0.6896, + "step": 15949 + }, + { + "epoch": 0.4888439377222018, + "grad_norm": 0.6612117323517396, + "learning_rate": 1.0846201401677525e-05, + "loss": 0.5801, + "step": 15950 + }, + { + "epoch": 0.488874586244943, + "grad_norm": 1.4746295937390803, + "learning_rate": 1.0845212322135992e-05, + "loss": 0.6331, + "step": 15951 + }, + { + "epoch": 0.4889052347676842, + "grad_norm": 0.7044819215268623, + "learning_rate": 1.0844223234266367e-05, + "loss": 0.6003, + "step": 15952 + }, + { + "epoch": 0.4889358832904254, + "grad_norm": 1.57024991047497, + "learning_rate": 1.0843234138078396e-05, + "loss": 0.7523, + "step": 15953 + }, + { + "epoch": 0.4889665318131666, + "grad_norm": 1.6282337354133665, + "learning_rate": 1.084224503358182e-05, + "loss": 0.7551, + "step": 15954 + }, + { + "epoch": 0.48899718033590783, + "grad_norm": 1.3811665273303746, + "learning_rate": 1.0841255920786389e-05, + "loss": 0.7194, + "step": 15955 + }, + { + "epoch": 0.48902782885864904, + "grad_norm": 1.5935799911987798, + "learning_rate": 1.0840266799701848e-05, + "loss": 0.6706, + "step": 15956 + }, + { + "epoch": 0.48905847738139024, + "grad_norm": 1.5092984985384439, + "learning_rate": 1.0839277670337944e-05, + "loss": 0.8236, + "step": 15957 + }, + { + "epoch": 0.4890891259041314, + "grad_norm": 1.4227410117226018, + "learning_rate": 1.0838288532704423e-05, + "loss": 0.7249, + "step": 15958 + }, + { + "epoch": 0.4891197744268726, + "grad_norm": 1.475748039615906, + "learning_rate": 1.0837299386811029e-05, + "loss": 0.7741, + "step": 15959 + }, + { + "epoch": 0.4891504229496138, + "grad_norm": 1.531217996854665, + "learning_rate": 1.083631023266751e-05, + "loss": 0.8201, + "step": 15960 + }, + { + "epoch": 0.489181071472355, + "grad_norm": 1.5993643190400688, + "learning_rate": 1.0835321070283613e-05, + "loss": 0.7614, + "step": 15961 + }, + { + "epoch": 0.4892117199950962, + "grad_norm": 1.4719286521357398, + "learning_rate": 1.0834331899669084e-05, + "loss": 0.7453, + "step": 15962 + }, + { + "epoch": 0.4892423685178374, + "grad_norm": 1.6042346547293427, + "learning_rate": 1.0833342720833668e-05, + "loss": 0.7459, + "step": 15963 + }, + { + "epoch": 0.48927301704057863, + "grad_norm": 1.4686253254750192, + "learning_rate": 1.0832353533787112e-05, + "loss": 0.6777, + "step": 15964 + }, + { + "epoch": 0.48930366556331983, + "grad_norm": 0.7021835565699622, + "learning_rate": 1.083136433853917e-05, + "loss": 0.5921, + "step": 15965 + }, + { + "epoch": 0.48933431408606104, + "grad_norm": 1.4440024924595247, + "learning_rate": 1.0830375135099575e-05, + "loss": 0.6764, + "step": 15966 + }, + { + "epoch": 0.48936496260880225, + "grad_norm": 1.3896037195699589, + "learning_rate": 1.0829385923478086e-05, + "loss": 0.581, + "step": 15967 + }, + { + "epoch": 0.48939561113154345, + "grad_norm": 1.4533173905199186, + "learning_rate": 1.0828396703684446e-05, + "loss": 0.7505, + "step": 15968 + }, + { + "epoch": 0.48942625965428466, + "grad_norm": 1.5575972761237225, + "learning_rate": 1.0827407475728398e-05, + "loss": 0.5211, + "step": 15969 + }, + { + "epoch": 0.48945690817702586, + "grad_norm": 1.3883335442874927, + "learning_rate": 1.0826418239619691e-05, + "loss": 0.6814, + "step": 15970 + }, + { + "epoch": 0.48948755669976707, + "grad_norm": 0.6773814491592217, + "learning_rate": 1.0825428995368077e-05, + "loss": 0.5856, + "step": 15971 + }, + { + "epoch": 0.4895182052225083, + "grad_norm": 1.260526336297325, + "learning_rate": 1.0824439742983299e-05, + "loss": 0.7117, + "step": 15972 + }, + { + "epoch": 0.4895488537452495, + "grad_norm": 1.4239059105948229, + "learning_rate": 1.0823450482475104e-05, + "loss": 0.7986, + "step": 15973 + }, + { + "epoch": 0.4895795022679907, + "grad_norm": 1.5772379965392567, + "learning_rate": 1.0822461213853244e-05, + "loss": 0.638, + "step": 15974 + }, + { + "epoch": 0.4896101507907319, + "grad_norm": 1.4431045115148582, + "learning_rate": 1.082147193712746e-05, + "loss": 0.7272, + "step": 15975 + }, + { + "epoch": 0.4896407993134731, + "grad_norm": 0.6563910461150017, + "learning_rate": 1.0820482652307506e-05, + "loss": 0.5873, + "step": 15976 + }, + { + "epoch": 0.4896714478362143, + "grad_norm": 0.6737849556185477, + "learning_rate": 1.0819493359403123e-05, + "loss": 0.5988, + "step": 15977 + }, + { + "epoch": 0.4897020963589555, + "grad_norm": 1.2545284733680122, + "learning_rate": 1.0818504058424064e-05, + "loss": 0.6296, + "step": 15978 + }, + { + "epoch": 0.4897327448816967, + "grad_norm": 1.5635313904480141, + "learning_rate": 1.0817514749380073e-05, + "loss": 0.7257, + "step": 15979 + }, + { + "epoch": 0.4897633934044379, + "grad_norm": 1.4255481338406824, + "learning_rate": 1.0816525432280904e-05, + "loss": 0.6651, + "step": 15980 + }, + { + "epoch": 0.4897940419271791, + "grad_norm": 0.6655210309932162, + "learning_rate": 1.0815536107136297e-05, + "loss": 0.5864, + "step": 15981 + }, + { + "epoch": 0.48982469044992033, + "grad_norm": 1.6503856709711402, + "learning_rate": 1.0814546773956007e-05, + "loss": 0.687, + "step": 15982 + }, + { + "epoch": 0.48985533897266154, + "grad_norm": 1.57805978938046, + "learning_rate": 1.0813557432749776e-05, + "loss": 0.7252, + "step": 15983 + }, + { + "epoch": 0.48988598749540274, + "grad_norm": 1.5206372397928916, + "learning_rate": 1.081256808352736e-05, + "loss": 0.8166, + "step": 15984 + }, + { + "epoch": 0.48991663601814395, + "grad_norm": 0.6432254614544055, + "learning_rate": 1.0811578726298502e-05, + "loss": 0.5624, + "step": 15985 + }, + { + "epoch": 0.48994728454088515, + "grad_norm": 1.522261331859176, + "learning_rate": 1.081058936107295e-05, + "loss": 0.8221, + "step": 15986 + }, + { + "epoch": 0.48997793306362636, + "grad_norm": 1.6028275741460736, + "learning_rate": 1.0809599987860452e-05, + "loss": 0.6797, + "step": 15987 + }, + { + "epoch": 0.49000858158636756, + "grad_norm": 1.215886162836149, + "learning_rate": 1.0808610606670758e-05, + "loss": 0.5747, + "step": 15988 + }, + { + "epoch": 0.4900392301091087, + "grad_norm": 1.494194556234007, + "learning_rate": 1.080762121751362e-05, + "loss": 0.7584, + "step": 15989 + }, + { + "epoch": 0.4900698786318499, + "grad_norm": 1.3750760035817915, + "learning_rate": 1.0806631820398778e-05, + "loss": 0.706, + "step": 15990 + }, + { + "epoch": 0.4901005271545911, + "grad_norm": 1.4468056338598736, + "learning_rate": 1.0805642415335996e-05, + "loss": 0.7321, + "step": 15991 + }, + { + "epoch": 0.49013117567733233, + "grad_norm": 0.6448227055538779, + "learning_rate": 1.0804653002335004e-05, + "loss": 0.5547, + "step": 15992 + }, + { + "epoch": 0.49016182420007354, + "grad_norm": 1.529525012590376, + "learning_rate": 1.0803663581405563e-05, + "loss": 0.7156, + "step": 15993 + }, + { + "epoch": 0.49019247272281474, + "grad_norm": 1.4214806194798297, + "learning_rate": 1.0802674152557418e-05, + "loss": 0.7373, + "step": 15994 + }, + { + "epoch": 0.49022312124555595, + "grad_norm": 0.6644128152707854, + "learning_rate": 1.0801684715800322e-05, + "loss": 0.6053, + "step": 15995 + }, + { + "epoch": 0.49025376976829715, + "grad_norm": 1.6280743507723103, + "learning_rate": 1.080069527114402e-05, + "loss": 0.8178, + "step": 15996 + }, + { + "epoch": 0.49028441829103836, + "grad_norm": 1.5918622832467888, + "learning_rate": 1.0799705818598263e-05, + "loss": 0.6366, + "step": 15997 + }, + { + "epoch": 0.49031506681377957, + "grad_norm": 1.5448107513058207, + "learning_rate": 1.0798716358172799e-05, + "loss": 0.8159, + "step": 15998 + }, + { + "epoch": 0.49034571533652077, + "grad_norm": 0.6522903004454583, + "learning_rate": 1.0797726889877377e-05, + "loss": 0.5677, + "step": 15999 + }, + { + "epoch": 0.490376363859262, + "grad_norm": 1.5645938246377815, + "learning_rate": 1.0796737413721751e-05, + "loss": 0.7281, + "step": 16000 + }, + { + "epoch": 0.4904070123820032, + "grad_norm": 1.4410617890848676, + "learning_rate": 1.0795747929715666e-05, + "loss": 0.7236, + "step": 16001 + }, + { + "epoch": 0.4904376609047444, + "grad_norm": 1.4195457555968203, + "learning_rate": 1.0794758437868873e-05, + "loss": 0.6843, + "step": 16002 + }, + { + "epoch": 0.4904683094274856, + "grad_norm": 1.2831946368457001, + "learning_rate": 1.0793768938191123e-05, + "loss": 0.6705, + "step": 16003 + }, + { + "epoch": 0.4904989579502268, + "grad_norm": 1.5395339990191215, + "learning_rate": 1.0792779430692164e-05, + "loss": 0.7664, + "step": 16004 + }, + { + "epoch": 0.490529606472968, + "grad_norm": 1.5282867708342447, + "learning_rate": 1.0791789915381742e-05, + "loss": 0.7293, + "step": 16005 + }, + { + "epoch": 0.4905602549957092, + "grad_norm": 1.4224774335528814, + "learning_rate": 1.0790800392269618e-05, + "loss": 0.6841, + "step": 16006 + }, + { + "epoch": 0.4905909035184504, + "grad_norm": 1.4978184835620847, + "learning_rate": 1.0789810861365533e-05, + "loss": 0.715, + "step": 16007 + }, + { + "epoch": 0.4906215520411916, + "grad_norm": 1.398834895719265, + "learning_rate": 1.0788821322679239e-05, + "loss": 0.7094, + "step": 16008 + }, + { + "epoch": 0.4906522005639328, + "grad_norm": 1.4514585259383308, + "learning_rate": 1.0787831776220485e-05, + "loss": 0.7165, + "step": 16009 + }, + { + "epoch": 0.49068284908667403, + "grad_norm": 1.4494838664429415, + "learning_rate": 1.0786842221999026e-05, + "loss": 0.5648, + "step": 16010 + }, + { + "epoch": 0.49071349760941524, + "grad_norm": 1.3608453047622415, + "learning_rate": 1.078585266002461e-05, + "loss": 0.7104, + "step": 16011 + }, + { + "epoch": 0.49074414613215644, + "grad_norm": 0.6995920846901001, + "learning_rate": 1.0784863090306983e-05, + "loss": 0.5485, + "step": 16012 + }, + { + "epoch": 0.49077479465489765, + "grad_norm": 1.5875567931594787, + "learning_rate": 1.07838735128559e-05, + "loss": 0.7331, + "step": 16013 + }, + { + "epoch": 0.49080544317763886, + "grad_norm": 1.6409886046033537, + "learning_rate": 1.0782883927681112e-05, + "loss": 0.5979, + "step": 16014 + }, + { + "epoch": 0.49083609170038006, + "grad_norm": 1.5455460680752273, + "learning_rate": 1.0781894334792369e-05, + "loss": 0.7714, + "step": 16015 + }, + { + "epoch": 0.49086674022312127, + "grad_norm": 1.3913857391569469, + "learning_rate": 1.0780904734199417e-05, + "loss": 0.7452, + "step": 16016 + }, + { + "epoch": 0.4908973887458625, + "grad_norm": 0.669782350873577, + "learning_rate": 1.0779915125912014e-05, + "loss": 0.5677, + "step": 16017 + }, + { + "epoch": 0.4909280372686037, + "grad_norm": 1.4357052161970774, + "learning_rate": 1.077892550993991e-05, + "loss": 0.6882, + "step": 16018 + }, + { + "epoch": 0.4909586857913449, + "grad_norm": 0.6918589900612406, + "learning_rate": 1.0777935886292851e-05, + "loss": 0.5741, + "step": 16019 + }, + { + "epoch": 0.49098933431408603, + "grad_norm": 1.7125752644192853, + "learning_rate": 1.077694625498059e-05, + "loss": 0.7739, + "step": 16020 + }, + { + "epoch": 0.49101998283682724, + "grad_norm": 1.4511314690174093, + "learning_rate": 1.0775956616012879e-05, + "loss": 0.6732, + "step": 16021 + }, + { + "epoch": 0.49105063135956845, + "grad_norm": 1.9220812965075977, + "learning_rate": 1.0774966969399472e-05, + "loss": 0.7752, + "step": 16022 + }, + { + "epoch": 0.49108127988230965, + "grad_norm": 0.6809622028922191, + "learning_rate": 1.0773977315150115e-05, + "loss": 0.5639, + "step": 16023 + }, + { + "epoch": 0.49111192840505086, + "grad_norm": 1.5580391309706485, + "learning_rate": 1.0772987653274558e-05, + "loss": 0.7657, + "step": 16024 + }, + { + "epoch": 0.49114257692779206, + "grad_norm": 0.6624833207369378, + "learning_rate": 1.077199798378256e-05, + "loss": 0.5682, + "step": 16025 + }, + { + "epoch": 0.49117322545053327, + "grad_norm": 0.6342231585762246, + "learning_rate": 1.0771008306683868e-05, + "loss": 0.5672, + "step": 16026 + }, + { + "epoch": 0.4912038739732745, + "grad_norm": 1.4025774712137946, + "learning_rate": 1.0770018621988232e-05, + "loss": 0.6061, + "step": 16027 + }, + { + "epoch": 0.4912345224960157, + "grad_norm": 1.3743789783832945, + "learning_rate": 1.0769028929705407e-05, + "loss": 0.6916, + "step": 16028 + }, + { + "epoch": 0.4912651710187569, + "grad_norm": 1.415136551154511, + "learning_rate": 1.0768039229845144e-05, + "loss": 0.7371, + "step": 16029 + }, + { + "epoch": 0.4912958195414981, + "grad_norm": 1.690511478230178, + "learning_rate": 1.0767049522417194e-05, + "loss": 0.789, + "step": 16030 + }, + { + "epoch": 0.4913264680642393, + "grad_norm": 1.5096824649809781, + "learning_rate": 1.0766059807431306e-05, + "loss": 0.7352, + "step": 16031 + }, + { + "epoch": 0.4913571165869805, + "grad_norm": 1.613352468723835, + "learning_rate": 1.0765070084897237e-05, + "loss": 0.6574, + "step": 16032 + }, + { + "epoch": 0.4913877651097217, + "grad_norm": 1.4058937316490077, + "learning_rate": 1.0764080354824735e-05, + "loss": 0.756, + "step": 16033 + }, + { + "epoch": 0.4914184136324629, + "grad_norm": 1.7821906796955818, + "learning_rate": 1.0763090617223557e-05, + "loss": 0.7682, + "step": 16034 + }, + { + "epoch": 0.4914490621552041, + "grad_norm": 0.669295971140072, + "learning_rate": 1.0762100872103449e-05, + "loss": 0.5594, + "step": 16035 + }, + { + "epoch": 0.4914797106779453, + "grad_norm": 1.4759174963251966, + "learning_rate": 1.0761111119474168e-05, + "loss": 0.7652, + "step": 16036 + }, + { + "epoch": 0.49151035920068653, + "grad_norm": 0.6916828608910579, + "learning_rate": 1.076012135934546e-05, + "loss": 0.5623, + "step": 16037 + }, + { + "epoch": 0.49154100772342774, + "grad_norm": 1.476964501060422, + "learning_rate": 1.075913159172709e-05, + "loss": 0.7281, + "step": 16038 + }, + { + "epoch": 0.49157165624616894, + "grad_norm": 1.3369177024289263, + "learning_rate": 1.0758141816628796e-05, + "loss": 0.7033, + "step": 16039 + }, + { + "epoch": 0.49160230476891015, + "grad_norm": 0.6600167293887583, + "learning_rate": 1.0757152034060336e-05, + "loss": 0.5663, + "step": 16040 + }, + { + "epoch": 0.49163295329165135, + "grad_norm": 0.6336230830039964, + "learning_rate": 1.0756162244031466e-05, + "loss": 0.5483, + "step": 16041 + }, + { + "epoch": 0.49166360181439256, + "grad_norm": 0.6639030163200116, + "learning_rate": 1.0755172446551936e-05, + "loss": 0.5653, + "step": 16042 + }, + { + "epoch": 0.49169425033713376, + "grad_norm": 1.4533117570967218, + "learning_rate": 1.0754182641631496e-05, + "loss": 0.6943, + "step": 16043 + }, + { + "epoch": 0.49172489885987497, + "grad_norm": 1.4360551571495557, + "learning_rate": 1.0753192829279905e-05, + "loss": 0.7344, + "step": 16044 + }, + { + "epoch": 0.4917555473826162, + "grad_norm": 0.6655363713548325, + "learning_rate": 1.0752203009506911e-05, + "loss": 0.5799, + "step": 16045 + }, + { + "epoch": 0.4917861959053574, + "grad_norm": 0.6742961682380824, + "learning_rate": 1.0751213182322267e-05, + "loss": 0.5533, + "step": 16046 + }, + { + "epoch": 0.4918168444280986, + "grad_norm": 1.4322559273857, + "learning_rate": 1.075022334773573e-05, + "loss": 0.7384, + "step": 16047 + }, + { + "epoch": 0.4918474929508398, + "grad_norm": 1.5818764740291267, + "learning_rate": 1.0749233505757046e-05, + "loss": 0.7053, + "step": 16048 + }, + { + "epoch": 0.491878141473581, + "grad_norm": 0.6663564720125255, + "learning_rate": 1.0748243656395978e-05, + "loss": 0.5921, + "step": 16049 + }, + { + "epoch": 0.4919087899963222, + "grad_norm": 0.6781564299108443, + "learning_rate": 1.074725379966227e-05, + "loss": 0.5888, + "step": 16050 + }, + { + "epoch": 0.49193943851906335, + "grad_norm": 0.6509075409073852, + "learning_rate": 1.074626393556568e-05, + "loss": 0.6051, + "step": 16051 + }, + { + "epoch": 0.49197008704180456, + "grad_norm": 1.6116893308856919, + "learning_rate": 1.074527406411596e-05, + "loss": 0.6817, + "step": 16052 + }, + { + "epoch": 0.49200073556454577, + "grad_norm": 1.5633528965852015, + "learning_rate": 1.0744284185322865e-05, + "loss": 0.8408, + "step": 16053 + }, + { + "epoch": 0.49203138408728697, + "grad_norm": 1.4167953090884005, + "learning_rate": 1.0743294299196148e-05, + "loss": 0.6685, + "step": 16054 + }, + { + "epoch": 0.4920620326100282, + "grad_norm": 0.6672167344357255, + "learning_rate": 1.0742304405745561e-05, + "loss": 0.5785, + "step": 16055 + }, + { + "epoch": 0.4920926811327694, + "grad_norm": 1.5316836094608042, + "learning_rate": 1.0741314504980858e-05, + "loss": 0.6937, + "step": 16056 + }, + { + "epoch": 0.4921233296555106, + "grad_norm": 1.7490835735782972, + "learning_rate": 1.0740324596911796e-05, + "loss": 0.7483, + "step": 16057 + }, + { + "epoch": 0.4921539781782518, + "grad_norm": 1.325387459677639, + "learning_rate": 1.0739334681548124e-05, + "loss": 0.6676, + "step": 16058 + }, + { + "epoch": 0.492184626700993, + "grad_norm": 1.406854169649975, + "learning_rate": 1.0738344758899597e-05, + "loss": 0.7858, + "step": 16059 + }, + { + "epoch": 0.4922152752237342, + "grad_norm": 1.458180184090181, + "learning_rate": 1.0737354828975974e-05, + "loss": 0.7884, + "step": 16060 + }, + { + "epoch": 0.4922459237464754, + "grad_norm": 1.590090691415962, + "learning_rate": 1.0736364891787003e-05, + "loss": 0.7545, + "step": 16061 + }, + { + "epoch": 0.4922765722692166, + "grad_norm": 1.4373907564473016, + "learning_rate": 1.0735374947342442e-05, + "loss": 0.7128, + "step": 16062 + }, + { + "epoch": 0.4923072207919578, + "grad_norm": 1.3687682808674273, + "learning_rate": 1.073438499565204e-05, + "loss": 0.7178, + "step": 16063 + }, + { + "epoch": 0.49233786931469903, + "grad_norm": 1.2664265265049262, + "learning_rate": 1.0733395036725557e-05, + "loss": 0.6906, + "step": 16064 + }, + { + "epoch": 0.49236851783744023, + "grad_norm": 1.5612296570346962, + "learning_rate": 1.0732405070572747e-05, + "loss": 0.7417, + "step": 16065 + }, + { + "epoch": 0.49239916636018144, + "grad_norm": 1.4323450440851924, + "learning_rate": 1.0731415097203361e-05, + "loss": 0.5745, + "step": 16066 + }, + { + "epoch": 0.49242981488292265, + "grad_norm": 0.6724588336496429, + "learning_rate": 1.0730425116627152e-05, + "loss": 0.5586, + "step": 16067 + }, + { + "epoch": 0.49246046340566385, + "grad_norm": 1.6300556855219985, + "learning_rate": 1.0729435128853881e-05, + "loss": 0.7423, + "step": 16068 + }, + { + "epoch": 0.49249111192840506, + "grad_norm": 1.5062396835634575, + "learning_rate": 1.0728445133893299e-05, + "loss": 0.7375, + "step": 16069 + }, + { + "epoch": 0.49252176045114626, + "grad_norm": 1.4949989903675642, + "learning_rate": 1.0727455131755157e-05, + "loss": 0.7509, + "step": 16070 + }, + { + "epoch": 0.49255240897388747, + "grad_norm": 0.6380813412293371, + "learning_rate": 1.0726465122449216e-05, + "loss": 0.5672, + "step": 16071 + }, + { + "epoch": 0.4925830574966287, + "grad_norm": 1.5449209837778475, + "learning_rate": 1.072547510598523e-05, + "loss": 0.6516, + "step": 16072 + }, + { + "epoch": 0.4926137060193699, + "grad_norm": 1.432805436837267, + "learning_rate": 1.072448508237295e-05, + "loss": 0.6733, + "step": 16073 + }, + { + "epoch": 0.4926443545421111, + "grad_norm": 1.3528121094205985, + "learning_rate": 1.0723495051622133e-05, + "loss": 0.6795, + "step": 16074 + }, + { + "epoch": 0.4926750030648523, + "grad_norm": 1.3498511337445154, + "learning_rate": 1.0722505013742535e-05, + "loss": 0.6, + "step": 16075 + }, + { + "epoch": 0.4927056515875935, + "grad_norm": 1.5199691641405837, + "learning_rate": 1.0721514968743912e-05, + "loss": 0.8104, + "step": 16076 + }, + { + "epoch": 0.4927363001103347, + "grad_norm": 1.6358144810685378, + "learning_rate": 1.0720524916636015e-05, + "loss": 0.6995, + "step": 16077 + }, + { + "epoch": 0.4927669486330759, + "grad_norm": 1.5051009588919677, + "learning_rate": 1.0719534857428599e-05, + "loss": 0.7814, + "step": 16078 + }, + { + "epoch": 0.4927975971558171, + "grad_norm": 1.6904632622609523, + "learning_rate": 1.0718544791131427e-05, + "loss": 0.8512, + "step": 16079 + }, + { + "epoch": 0.4928282456785583, + "grad_norm": 1.4609162111125882, + "learning_rate": 1.0717554717754249e-05, + "loss": 0.7436, + "step": 16080 + }, + { + "epoch": 0.4928588942012995, + "grad_norm": 1.325140155373423, + "learning_rate": 1.0716564637306819e-05, + "loss": 0.5836, + "step": 16081 + }, + { + "epoch": 0.4928895427240407, + "grad_norm": 1.5943643225503406, + "learning_rate": 1.0715574549798893e-05, + "loss": 0.6816, + "step": 16082 + }, + { + "epoch": 0.4929201912467819, + "grad_norm": 1.4225323838029085, + "learning_rate": 1.071458445524023e-05, + "loss": 0.6299, + "step": 16083 + }, + { + "epoch": 0.4929508397695231, + "grad_norm": 0.676506671086365, + "learning_rate": 1.0713594353640583e-05, + "loss": 0.5664, + "step": 16084 + }, + { + "epoch": 0.4929814882922643, + "grad_norm": 1.4228327369347993, + "learning_rate": 1.0712604245009705e-05, + "loss": 0.713, + "step": 16085 + }, + { + "epoch": 0.4930121368150055, + "grad_norm": 1.720941051621718, + "learning_rate": 1.071161412935736e-05, + "loss": 0.7694, + "step": 16086 + }, + { + "epoch": 0.4930427853377467, + "grad_norm": 1.5733225354108418, + "learning_rate": 1.0710624006693296e-05, + "loss": 0.757, + "step": 16087 + }, + { + "epoch": 0.4930734338604879, + "grad_norm": 1.4015319906329118, + "learning_rate": 1.0709633877027275e-05, + "loss": 0.6667, + "step": 16088 + }, + { + "epoch": 0.4931040823832291, + "grad_norm": 0.6939847438530954, + "learning_rate": 1.0708643740369045e-05, + "loss": 0.571, + "step": 16089 + }, + { + "epoch": 0.4931347309059703, + "grad_norm": 1.4563495372247357, + "learning_rate": 1.0707653596728371e-05, + "loss": 0.6664, + "step": 16090 + }, + { + "epoch": 0.4931653794287115, + "grad_norm": 1.359443864404601, + "learning_rate": 1.0706663446115002e-05, + "loss": 0.6904, + "step": 16091 + }, + { + "epoch": 0.49319602795145273, + "grad_norm": 1.6038103990967025, + "learning_rate": 1.07056732885387e-05, + "loss": 0.6246, + "step": 16092 + }, + { + "epoch": 0.49322667647419394, + "grad_norm": 1.6364206816186504, + "learning_rate": 1.0704683124009216e-05, + "loss": 0.5997, + "step": 16093 + }, + { + "epoch": 0.49325732499693514, + "grad_norm": 1.458933474960632, + "learning_rate": 1.0703692952536314e-05, + "loss": 0.7905, + "step": 16094 + }, + { + "epoch": 0.49328797351967635, + "grad_norm": 1.491724847411592, + "learning_rate": 1.070270277412974e-05, + "loss": 0.7048, + "step": 16095 + }, + { + "epoch": 0.49331862204241755, + "grad_norm": 1.4318468874964418, + "learning_rate": 1.0701712588799255e-05, + "loss": 0.6852, + "step": 16096 + }, + { + "epoch": 0.49334927056515876, + "grad_norm": 1.4630929297635735, + "learning_rate": 1.070072239655462e-05, + "loss": 0.8252, + "step": 16097 + }, + { + "epoch": 0.49337991908789997, + "grad_norm": 1.6679996165480557, + "learning_rate": 1.0699732197405585e-05, + "loss": 0.765, + "step": 16098 + }, + { + "epoch": 0.49341056761064117, + "grad_norm": 1.416083396294516, + "learning_rate": 1.0698741991361914e-05, + "loss": 0.7144, + "step": 16099 + }, + { + "epoch": 0.4934412161333824, + "grad_norm": 1.448287115238434, + "learning_rate": 1.0697751778433357e-05, + "loss": 0.704, + "step": 16100 + }, + { + "epoch": 0.4934718646561236, + "grad_norm": 1.4971093589371918, + "learning_rate": 1.0696761558629671e-05, + "loss": 0.6651, + "step": 16101 + }, + { + "epoch": 0.4935025131788648, + "grad_norm": 0.6871613106166132, + "learning_rate": 1.0695771331960615e-05, + "loss": 0.587, + "step": 16102 + }, + { + "epoch": 0.493533161701606, + "grad_norm": 1.6377520584214735, + "learning_rate": 1.0694781098435951e-05, + "loss": 0.6825, + "step": 16103 + }, + { + "epoch": 0.4935638102243472, + "grad_norm": 1.4973150715819437, + "learning_rate": 1.0693790858065428e-05, + "loss": 0.6274, + "step": 16104 + }, + { + "epoch": 0.4935944587470884, + "grad_norm": 0.6569452596790339, + "learning_rate": 1.0692800610858807e-05, + "loss": 0.5791, + "step": 16105 + }, + { + "epoch": 0.4936251072698296, + "grad_norm": 0.6397518120435828, + "learning_rate": 1.069181035682584e-05, + "loss": 0.5641, + "step": 16106 + }, + { + "epoch": 0.4936557557925708, + "grad_norm": 1.3549016718432954, + "learning_rate": 1.0690820095976296e-05, + "loss": 0.5727, + "step": 16107 + }, + { + "epoch": 0.493686404315312, + "grad_norm": 1.6285041442595165, + "learning_rate": 1.068982982831992e-05, + "loss": 0.6739, + "step": 16108 + }, + { + "epoch": 0.4937170528380532, + "grad_norm": 1.6090979026920038, + "learning_rate": 1.0688839553866474e-05, + "loss": 0.6307, + "step": 16109 + }, + { + "epoch": 0.49374770136079443, + "grad_norm": 1.5687815846492665, + "learning_rate": 1.0687849272625716e-05, + "loss": 0.643, + "step": 16110 + }, + { + "epoch": 0.49377834988353564, + "grad_norm": 1.5593118564870903, + "learning_rate": 1.0686858984607404e-05, + "loss": 0.76, + "step": 16111 + }, + { + "epoch": 0.49380899840627684, + "grad_norm": 1.295123383853013, + "learning_rate": 1.0685868689821296e-05, + "loss": 0.6992, + "step": 16112 + }, + { + "epoch": 0.493839646929018, + "grad_norm": 1.5525084700755614, + "learning_rate": 1.0684878388277145e-05, + "loss": 0.6768, + "step": 16113 + }, + { + "epoch": 0.4938702954517592, + "grad_norm": 0.7074818887960248, + "learning_rate": 1.0683888079984715e-05, + "loss": 0.6002, + "step": 16114 + }, + { + "epoch": 0.4939009439745004, + "grad_norm": 0.7024085675320532, + "learning_rate": 1.068289776495376e-05, + "loss": 0.5927, + "step": 16115 + }, + { + "epoch": 0.4939315924972416, + "grad_norm": 1.467453688005501, + "learning_rate": 1.0681907443194038e-05, + "loss": 0.785, + "step": 16116 + }, + { + "epoch": 0.4939622410199828, + "grad_norm": 1.5823254848747648, + "learning_rate": 1.0680917114715306e-05, + "loss": 0.6521, + "step": 16117 + }, + { + "epoch": 0.493992889542724, + "grad_norm": 1.449795100008598, + "learning_rate": 1.0679926779527325e-05, + "loss": 0.6404, + "step": 16118 + }, + { + "epoch": 0.49402353806546523, + "grad_norm": 1.387892760926317, + "learning_rate": 1.0678936437639852e-05, + "loss": 0.6798, + "step": 16119 + }, + { + "epoch": 0.49405418658820643, + "grad_norm": 1.336808785965644, + "learning_rate": 1.0677946089062645e-05, + "loss": 0.6452, + "step": 16120 + }, + { + "epoch": 0.49408483511094764, + "grad_norm": 1.4609600015806674, + "learning_rate": 1.067695573380546e-05, + "loss": 0.6437, + "step": 16121 + }, + { + "epoch": 0.49411548363368885, + "grad_norm": 1.4580438375244853, + "learning_rate": 1.0675965371878059e-05, + "loss": 0.6912, + "step": 16122 + }, + { + "epoch": 0.49414613215643005, + "grad_norm": 1.4191612082897789, + "learning_rate": 1.0674975003290198e-05, + "loss": 0.6015, + "step": 16123 + }, + { + "epoch": 0.49417678067917126, + "grad_norm": 1.4394806191712837, + "learning_rate": 1.0673984628051633e-05, + "loss": 0.7505, + "step": 16124 + }, + { + "epoch": 0.49420742920191246, + "grad_norm": 1.4328317769632566, + "learning_rate": 1.0672994246172126e-05, + "loss": 0.7153, + "step": 16125 + }, + { + "epoch": 0.49423807772465367, + "grad_norm": 1.4826395002051809, + "learning_rate": 1.0672003857661437e-05, + "loss": 0.6812, + "step": 16126 + }, + { + "epoch": 0.4942687262473949, + "grad_norm": 1.5743178124947992, + "learning_rate": 1.0671013462529321e-05, + "loss": 0.6652, + "step": 16127 + }, + { + "epoch": 0.4942993747701361, + "grad_norm": 1.6510564281132003, + "learning_rate": 1.0670023060785535e-05, + "loss": 0.7193, + "step": 16128 + }, + { + "epoch": 0.4943300232928773, + "grad_norm": 1.5811756255267548, + "learning_rate": 1.0669032652439841e-05, + "loss": 0.7994, + "step": 16129 + }, + { + "epoch": 0.4943606718156185, + "grad_norm": 1.4556372348209323, + "learning_rate": 1.0668042237502e-05, + "loss": 0.8063, + "step": 16130 + }, + { + "epoch": 0.4943913203383597, + "grad_norm": 1.355031868567245, + "learning_rate": 1.0667051815981769e-05, + "loss": 0.5819, + "step": 16131 + }, + { + "epoch": 0.4944219688611009, + "grad_norm": 1.3687998732883622, + "learning_rate": 1.06660613878889e-05, + "loss": 0.6985, + "step": 16132 + }, + { + "epoch": 0.4944526173838421, + "grad_norm": 1.6364593417217468, + "learning_rate": 1.066507095323316e-05, + "loss": 0.7395, + "step": 16133 + }, + { + "epoch": 0.4944832659065833, + "grad_norm": 1.4349747503125494, + "learning_rate": 1.0664080512024309e-05, + "loss": 0.7174, + "step": 16134 + }, + { + "epoch": 0.4945139144293245, + "grad_norm": 1.5848353245887377, + "learning_rate": 1.0663090064272098e-05, + "loss": 0.6904, + "step": 16135 + }, + { + "epoch": 0.4945445629520657, + "grad_norm": 1.56134756756536, + "learning_rate": 1.0662099609986294e-05, + "loss": 0.7624, + "step": 16136 + }, + { + "epoch": 0.49457521147480693, + "grad_norm": 1.5994251422508023, + "learning_rate": 1.0661109149176654e-05, + "loss": 0.7793, + "step": 16137 + }, + { + "epoch": 0.49460585999754814, + "grad_norm": 1.5994927956094833, + "learning_rate": 1.0660118681852933e-05, + "loss": 0.7865, + "step": 16138 + }, + { + "epoch": 0.49463650852028934, + "grad_norm": 1.3815153063252188, + "learning_rate": 1.0659128208024896e-05, + "loss": 0.6795, + "step": 16139 + }, + { + "epoch": 0.49466715704303055, + "grad_norm": 0.6755798199455766, + "learning_rate": 1.06581377277023e-05, + "loss": 0.5895, + "step": 16140 + }, + { + "epoch": 0.49469780556577175, + "grad_norm": 1.3796189179001341, + "learning_rate": 1.0657147240894903e-05, + "loss": 0.6718, + "step": 16141 + }, + { + "epoch": 0.49472845408851296, + "grad_norm": 1.5162293140730707, + "learning_rate": 1.065615674761247e-05, + "loss": 0.617, + "step": 16142 + }, + { + "epoch": 0.49475910261125416, + "grad_norm": 1.5298119062490168, + "learning_rate": 1.0655166247864752e-05, + "loss": 0.6571, + "step": 16143 + }, + { + "epoch": 0.4947897511339953, + "grad_norm": 0.6979032556363538, + "learning_rate": 1.0654175741661514e-05, + "loss": 0.5758, + "step": 16144 + }, + { + "epoch": 0.4948203996567365, + "grad_norm": 1.5751670096660104, + "learning_rate": 1.0653185229012517e-05, + "loss": 0.6831, + "step": 16145 + }, + { + "epoch": 0.4948510481794777, + "grad_norm": 1.4456264778068073, + "learning_rate": 1.0652194709927518e-05, + "loss": 0.7204, + "step": 16146 + }, + { + "epoch": 0.49488169670221893, + "grad_norm": 1.5776384013389688, + "learning_rate": 1.0651204184416277e-05, + "loss": 0.7736, + "step": 16147 + }, + { + "epoch": 0.49491234522496014, + "grad_norm": 1.4733017608523058, + "learning_rate": 1.0650213652488557e-05, + "loss": 0.6069, + "step": 16148 + }, + { + "epoch": 0.49494299374770134, + "grad_norm": 1.3323881552728396, + "learning_rate": 1.0649223114154114e-05, + "loss": 0.6286, + "step": 16149 + }, + { + "epoch": 0.49497364227044255, + "grad_norm": 1.4781231189179231, + "learning_rate": 1.064823256942271e-05, + "loss": 0.6999, + "step": 16150 + }, + { + "epoch": 0.49500429079318375, + "grad_norm": 1.3289483843188128, + "learning_rate": 1.0647242018304103e-05, + "loss": 0.6404, + "step": 16151 + }, + { + "epoch": 0.49503493931592496, + "grad_norm": 1.3140245279343226, + "learning_rate": 1.0646251460808057e-05, + "loss": 0.6622, + "step": 16152 + }, + { + "epoch": 0.49506558783866617, + "grad_norm": 1.5159093752436115, + "learning_rate": 1.064526089694433e-05, + "loss": 0.6811, + "step": 16153 + }, + { + "epoch": 0.49509623636140737, + "grad_norm": 1.2499454423570462, + "learning_rate": 1.0644270326722678e-05, + "loss": 0.7146, + "step": 16154 + }, + { + "epoch": 0.4951268848841486, + "grad_norm": 1.3485256849002363, + "learning_rate": 1.064327975015287e-05, + "loss": 0.7063, + "step": 16155 + }, + { + "epoch": 0.4951575334068898, + "grad_norm": 0.7272179164654046, + "learning_rate": 1.064228916724466e-05, + "loss": 0.554, + "step": 16156 + }, + { + "epoch": 0.495188181929631, + "grad_norm": 1.4766596663133786, + "learning_rate": 1.0641298578007813e-05, + "loss": 0.7256, + "step": 16157 + }, + { + "epoch": 0.4952188304523722, + "grad_norm": 1.507696521157049, + "learning_rate": 1.0640307982452085e-05, + "loss": 0.7432, + "step": 16158 + }, + { + "epoch": 0.4952494789751134, + "grad_norm": 1.460294730961105, + "learning_rate": 1.063931738058724e-05, + "loss": 0.7078, + "step": 16159 + }, + { + "epoch": 0.4952801274978546, + "grad_norm": 0.6969533552093422, + "learning_rate": 1.0638326772423033e-05, + "loss": 0.5603, + "step": 16160 + }, + { + "epoch": 0.4953107760205958, + "grad_norm": 1.6352131912474435, + "learning_rate": 1.0637336157969236e-05, + "loss": 0.7613, + "step": 16161 + }, + { + "epoch": 0.495341424543337, + "grad_norm": 1.523017067624451, + "learning_rate": 1.0636345537235597e-05, + "loss": 0.7531, + "step": 16162 + }, + { + "epoch": 0.4953720730660782, + "grad_norm": 1.6277970614328519, + "learning_rate": 1.0635354910231885e-05, + "loss": 0.7198, + "step": 16163 + }, + { + "epoch": 0.49540272158881943, + "grad_norm": 1.571993460767531, + "learning_rate": 1.0634364276967857e-05, + "loss": 0.7368, + "step": 16164 + }, + { + "epoch": 0.49543337011156063, + "grad_norm": 1.570306813328923, + "learning_rate": 1.0633373637453278e-05, + "loss": 0.6811, + "step": 16165 + }, + { + "epoch": 0.49546401863430184, + "grad_norm": 1.455074637310077, + "learning_rate": 1.0632382991697905e-05, + "loss": 0.7117, + "step": 16166 + }, + { + "epoch": 0.49549466715704304, + "grad_norm": 1.4578548504551643, + "learning_rate": 1.0631392339711499e-05, + "loss": 0.6871, + "step": 16167 + }, + { + "epoch": 0.49552531567978425, + "grad_norm": 1.5735307870848654, + "learning_rate": 1.0630401681503824e-05, + "loss": 0.6867, + "step": 16168 + }, + { + "epoch": 0.49555596420252546, + "grad_norm": 1.4346052889110514, + "learning_rate": 1.0629411017084641e-05, + "loss": 0.7384, + "step": 16169 + }, + { + "epoch": 0.49558661272526666, + "grad_norm": 0.7035560894435609, + "learning_rate": 1.062842034646371e-05, + "loss": 0.5795, + "step": 16170 + }, + { + "epoch": 0.49561726124800787, + "grad_norm": 1.418684388774647, + "learning_rate": 1.062742966965079e-05, + "loss": 0.7452, + "step": 16171 + }, + { + "epoch": 0.4956479097707491, + "grad_norm": 1.4927412082968645, + "learning_rate": 1.0626438986655652e-05, + "loss": 0.7461, + "step": 16172 + }, + { + "epoch": 0.4956785582934903, + "grad_norm": 1.6026673899466748, + "learning_rate": 1.0625448297488044e-05, + "loss": 0.7764, + "step": 16173 + }, + { + "epoch": 0.4957092068162315, + "grad_norm": 1.3876778268744714, + "learning_rate": 1.0624457602157733e-05, + "loss": 0.7032, + "step": 16174 + }, + { + "epoch": 0.49573985533897263, + "grad_norm": 1.4234453521154167, + "learning_rate": 1.0623466900674485e-05, + "loss": 0.8152, + "step": 16175 + }, + { + "epoch": 0.49577050386171384, + "grad_norm": 1.271786652280171, + "learning_rate": 1.0622476193048055e-05, + "loss": 0.7754, + "step": 16176 + }, + { + "epoch": 0.49580115238445505, + "grad_norm": 1.3095311676166894, + "learning_rate": 1.0621485479288212e-05, + "loss": 0.7016, + "step": 16177 + }, + { + "epoch": 0.49583180090719625, + "grad_norm": 1.386252072592076, + "learning_rate": 1.0620494759404712e-05, + "loss": 0.5851, + "step": 16178 + }, + { + "epoch": 0.49586244942993746, + "grad_norm": 1.4903095021462844, + "learning_rate": 1.0619504033407315e-05, + "loss": 0.6873, + "step": 16179 + }, + { + "epoch": 0.49589309795267866, + "grad_norm": 1.4749402031243295, + "learning_rate": 1.0618513301305788e-05, + "loss": 0.6943, + "step": 16180 + }, + { + "epoch": 0.49592374647541987, + "grad_norm": 1.3965130892826978, + "learning_rate": 1.0617522563109895e-05, + "loss": 0.7722, + "step": 16181 + }, + { + "epoch": 0.4959543949981611, + "grad_norm": 1.4410566034332208, + "learning_rate": 1.0616531818829388e-05, + "loss": 0.6945, + "step": 16182 + }, + { + "epoch": 0.4959850435209023, + "grad_norm": 1.5785877386650227, + "learning_rate": 1.0615541068474041e-05, + "loss": 0.7892, + "step": 16183 + }, + { + "epoch": 0.4960156920436435, + "grad_norm": 1.4722271975983625, + "learning_rate": 1.0614550312053607e-05, + "loss": 0.7531, + "step": 16184 + }, + { + "epoch": 0.4960463405663847, + "grad_norm": 1.6049118561629199, + "learning_rate": 1.0613559549577852e-05, + "loss": 0.752, + "step": 16185 + }, + { + "epoch": 0.4960769890891259, + "grad_norm": 0.6792149735246105, + "learning_rate": 1.0612568781056538e-05, + "loss": 0.5797, + "step": 16186 + }, + { + "epoch": 0.4961076376118671, + "grad_norm": 1.4465436313583815, + "learning_rate": 1.0611578006499428e-05, + "loss": 0.7358, + "step": 16187 + }, + { + "epoch": 0.4961382861346083, + "grad_norm": 1.6564387285533386, + "learning_rate": 1.0610587225916282e-05, + "loss": 0.7931, + "step": 16188 + }, + { + "epoch": 0.4961689346573495, + "grad_norm": 1.5157902723473111, + "learning_rate": 1.0609596439316865e-05, + "loss": 0.7539, + "step": 16189 + }, + { + "epoch": 0.4961995831800907, + "grad_norm": 1.5511158029433223, + "learning_rate": 1.0608605646710937e-05, + "loss": 0.7973, + "step": 16190 + }, + { + "epoch": 0.4962302317028319, + "grad_norm": 1.4846344260711752, + "learning_rate": 1.0607614848108262e-05, + "loss": 0.7124, + "step": 16191 + }, + { + "epoch": 0.49626088022557313, + "grad_norm": 1.6909592870685757, + "learning_rate": 1.0606624043518605e-05, + "loss": 0.8665, + "step": 16192 + }, + { + "epoch": 0.49629152874831434, + "grad_norm": 1.5046642740310303, + "learning_rate": 1.0605633232951722e-05, + "loss": 0.6815, + "step": 16193 + }, + { + "epoch": 0.49632217727105554, + "grad_norm": 1.4226626607007307, + "learning_rate": 1.0604642416417384e-05, + "loss": 0.7278, + "step": 16194 + }, + { + "epoch": 0.49635282579379675, + "grad_norm": 0.6807604366973997, + "learning_rate": 1.0603651593925344e-05, + "loss": 0.6125, + "step": 16195 + }, + { + "epoch": 0.49638347431653795, + "grad_norm": 1.4290591529147822, + "learning_rate": 1.0602660765485377e-05, + "loss": 0.7163, + "step": 16196 + }, + { + "epoch": 0.49641412283927916, + "grad_norm": 0.6579403646351797, + "learning_rate": 1.0601669931107234e-05, + "loss": 0.5761, + "step": 16197 + }, + { + "epoch": 0.49644477136202037, + "grad_norm": 0.6584775211945708, + "learning_rate": 1.0600679090800688e-05, + "loss": 0.5858, + "step": 16198 + }, + { + "epoch": 0.49647541988476157, + "grad_norm": 1.477282696080905, + "learning_rate": 1.0599688244575495e-05, + "loss": 0.7861, + "step": 16199 + }, + { + "epoch": 0.4965060684075028, + "grad_norm": 0.656473565326177, + "learning_rate": 1.0598697392441419e-05, + "loss": 0.6096, + "step": 16200 + }, + { + "epoch": 0.496536716930244, + "grad_norm": 1.5746219315377827, + "learning_rate": 1.0597706534408223e-05, + "loss": 0.7745, + "step": 16201 + }, + { + "epoch": 0.4965673654529852, + "grad_norm": 1.608113846383515, + "learning_rate": 1.0596715670485676e-05, + "loss": 0.7213, + "step": 16202 + }, + { + "epoch": 0.4965980139757264, + "grad_norm": 1.303003725735711, + "learning_rate": 1.0595724800683536e-05, + "loss": 0.6378, + "step": 16203 + }, + { + "epoch": 0.4966286624984676, + "grad_norm": 1.2287079910735403, + "learning_rate": 1.0594733925011565e-05, + "loss": 0.5538, + "step": 16204 + }, + { + "epoch": 0.4966593110212088, + "grad_norm": 1.5402253041942597, + "learning_rate": 1.0593743043479527e-05, + "loss": 0.7081, + "step": 16205 + }, + { + "epoch": 0.49668995954394995, + "grad_norm": 1.4369006471621448, + "learning_rate": 1.059275215609719e-05, + "loss": 0.6439, + "step": 16206 + }, + { + "epoch": 0.49672060806669116, + "grad_norm": 1.6715160503165742, + "learning_rate": 1.0591761262874316e-05, + "loss": 0.7444, + "step": 16207 + }, + { + "epoch": 0.49675125658943237, + "grad_norm": 1.5429708877908286, + "learning_rate": 1.0590770363820661e-05, + "loss": 0.6806, + "step": 16208 + }, + { + "epoch": 0.49678190511217357, + "grad_norm": 1.500360541477857, + "learning_rate": 1.0589779458945999e-05, + "loss": 0.7227, + "step": 16209 + }, + { + "epoch": 0.4968125536349148, + "grad_norm": 1.6636495495190688, + "learning_rate": 1.0588788548260088e-05, + "loss": 0.6938, + "step": 16210 + }, + { + "epoch": 0.496843202157656, + "grad_norm": 1.490796640516596, + "learning_rate": 1.0587797631772694e-05, + "loss": 0.7567, + "step": 16211 + }, + { + "epoch": 0.4968738506803972, + "grad_norm": 1.5782683248100449, + "learning_rate": 1.0586806709493578e-05, + "loss": 0.7787, + "step": 16212 + }, + { + "epoch": 0.4969044992031384, + "grad_norm": 1.736023207449275, + "learning_rate": 1.0585815781432504e-05, + "loss": 0.7768, + "step": 16213 + }, + { + "epoch": 0.4969351477258796, + "grad_norm": 1.5621027584013245, + "learning_rate": 1.0584824847599238e-05, + "loss": 0.8237, + "step": 16214 + }, + { + "epoch": 0.4969657962486208, + "grad_norm": 0.7164631432091958, + "learning_rate": 1.0583833908003546e-05, + "loss": 0.5912, + "step": 16215 + }, + { + "epoch": 0.496996444771362, + "grad_norm": 1.4984580320261491, + "learning_rate": 1.0582842962655187e-05, + "loss": 0.8037, + "step": 16216 + }, + { + "epoch": 0.4970270932941032, + "grad_norm": 0.6909459224014343, + "learning_rate": 1.0581852011563927e-05, + "loss": 0.5533, + "step": 16217 + }, + { + "epoch": 0.4970577418168444, + "grad_norm": 1.5785262874182662, + "learning_rate": 1.0580861054739529e-05, + "loss": 0.6676, + "step": 16218 + }, + { + "epoch": 0.49708839033958563, + "grad_norm": 1.6534762858949272, + "learning_rate": 1.057987009219176e-05, + "loss": 0.7832, + "step": 16219 + }, + { + "epoch": 0.49711903886232683, + "grad_norm": 1.589452122064042, + "learning_rate": 1.0578879123930384e-05, + "loss": 0.7245, + "step": 16220 + }, + { + "epoch": 0.49714968738506804, + "grad_norm": 1.429733326947149, + "learning_rate": 1.057788814996516e-05, + "loss": 0.7107, + "step": 16221 + }, + { + "epoch": 0.49718033590780925, + "grad_norm": 1.4189612263286413, + "learning_rate": 1.057689717030586e-05, + "loss": 0.6897, + "step": 16222 + }, + { + "epoch": 0.49721098443055045, + "grad_norm": 0.6911915323678528, + "learning_rate": 1.0575906184962244e-05, + "loss": 0.5678, + "step": 16223 + }, + { + "epoch": 0.49724163295329166, + "grad_norm": 1.5191154485368807, + "learning_rate": 1.0574915193944077e-05, + "loss": 0.7023, + "step": 16224 + }, + { + "epoch": 0.49727228147603286, + "grad_norm": 1.6227004957493603, + "learning_rate": 1.057392419726112e-05, + "loss": 0.6874, + "step": 16225 + }, + { + "epoch": 0.49730292999877407, + "grad_norm": 1.563871820691687, + "learning_rate": 1.0572933194923147e-05, + "loss": 0.7486, + "step": 16226 + }, + { + "epoch": 0.4973335785215153, + "grad_norm": 1.555056770100807, + "learning_rate": 1.0571942186939912e-05, + "loss": 0.6847, + "step": 16227 + }, + { + "epoch": 0.4973642270442565, + "grad_norm": 0.6744465095914134, + "learning_rate": 1.0570951173321186e-05, + "loss": 0.5508, + "step": 16228 + }, + { + "epoch": 0.4973948755669977, + "grad_norm": 1.4559116316957836, + "learning_rate": 1.056996015407673e-05, + "loss": 0.5946, + "step": 16229 + }, + { + "epoch": 0.4974255240897389, + "grad_norm": 1.6080937819651602, + "learning_rate": 1.0568969129216316e-05, + "loss": 0.6774, + "step": 16230 + }, + { + "epoch": 0.4974561726124801, + "grad_norm": 1.515933082003418, + "learning_rate": 1.0567978098749699e-05, + "loss": 0.7502, + "step": 16231 + }, + { + "epoch": 0.4974868211352213, + "grad_norm": 1.4398983639133276, + "learning_rate": 1.0566987062686649e-05, + "loss": 0.6125, + "step": 16232 + }, + { + "epoch": 0.4975174696579625, + "grad_norm": 1.4029428066436715, + "learning_rate": 1.056599602103693e-05, + "loss": 0.6863, + "step": 16233 + }, + { + "epoch": 0.4975481181807037, + "grad_norm": 1.53723101190538, + "learning_rate": 1.0565004973810309e-05, + "loss": 0.7405, + "step": 16234 + }, + { + "epoch": 0.4975787667034449, + "grad_norm": 1.5381794553087988, + "learning_rate": 1.056401392101655e-05, + "loss": 0.8295, + "step": 16235 + }, + { + "epoch": 0.4976094152261861, + "grad_norm": 0.653517526476839, + "learning_rate": 1.0563022862665413e-05, + "loss": 0.5584, + "step": 16236 + }, + { + "epoch": 0.4976400637489273, + "grad_norm": 1.6240864369614483, + "learning_rate": 1.0562031798766672e-05, + "loss": 0.8109, + "step": 16237 + }, + { + "epoch": 0.4976707122716685, + "grad_norm": 1.3506733083039597, + "learning_rate": 1.0561040729330088e-05, + "loss": 0.7323, + "step": 16238 + }, + { + "epoch": 0.4977013607944097, + "grad_norm": 1.6097884357521295, + "learning_rate": 1.0560049654365425e-05, + "loss": 0.6798, + "step": 16239 + }, + { + "epoch": 0.4977320093171509, + "grad_norm": 1.5826265071327614, + "learning_rate": 1.0559058573882447e-05, + "loss": 0.7377, + "step": 16240 + }, + { + "epoch": 0.4977626578398921, + "grad_norm": 1.400766964628672, + "learning_rate": 1.0558067487890926e-05, + "loss": 0.6927, + "step": 16241 + }, + { + "epoch": 0.4977933063626333, + "grad_norm": 1.491916398960621, + "learning_rate": 1.055707639640062e-05, + "loss": 0.6968, + "step": 16242 + }, + { + "epoch": 0.4978239548853745, + "grad_norm": 1.532441593119073, + "learning_rate": 1.0556085299421301e-05, + "loss": 0.675, + "step": 16243 + }, + { + "epoch": 0.4978546034081157, + "grad_norm": 1.4131510739682378, + "learning_rate": 1.0555094196962728e-05, + "loss": 0.6941, + "step": 16244 + }, + { + "epoch": 0.4978852519308569, + "grad_norm": 1.3727278188956096, + "learning_rate": 1.0554103089034673e-05, + "loss": 0.6431, + "step": 16245 + }, + { + "epoch": 0.4979159004535981, + "grad_norm": 1.6308072920479757, + "learning_rate": 1.0553111975646897e-05, + "loss": 0.7552, + "step": 16246 + }, + { + "epoch": 0.49794654897633933, + "grad_norm": 1.5328013058460312, + "learning_rate": 1.0552120856809164e-05, + "loss": 0.7162, + "step": 16247 + }, + { + "epoch": 0.49797719749908054, + "grad_norm": 1.524158465219117, + "learning_rate": 1.0551129732531248e-05, + "loss": 0.7692, + "step": 16248 + }, + { + "epoch": 0.49800784602182174, + "grad_norm": 1.487994690929301, + "learning_rate": 1.0550138602822908e-05, + "loss": 0.6937, + "step": 16249 + }, + { + "epoch": 0.49803849454456295, + "grad_norm": 1.4877266755041352, + "learning_rate": 1.0549147467693911e-05, + "loss": 0.6822, + "step": 16250 + }, + { + "epoch": 0.49806914306730415, + "grad_norm": 0.6868039334609282, + "learning_rate": 1.0548156327154023e-05, + "loss": 0.5497, + "step": 16251 + }, + { + "epoch": 0.49809979159004536, + "grad_norm": 1.366645619317383, + "learning_rate": 1.0547165181213013e-05, + "loss": 0.6823, + "step": 16252 + }, + { + "epoch": 0.49813044011278657, + "grad_norm": 1.5399584474273917, + "learning_rate": 1.0546174029880642e-05, + "loss": 0.7537, + "step": 16253 + }, + { + "epoch": 0.49816108863552777, + "grad_norm": 0.6739631169693833, + "learning_rate": 1.054518287316668e-05, + "loss": 0.5697, + "step": 16254 + }, + { + "epoch": 0.498191737158269, + "grad_norm": 1.4435471173224956, + "learning_rate": 1.0544191711080888e-05, + "loss": 0.7212, + "step": 16255 + }, + { + "epoch": 0.4982223856810102, + "grad_norm": 1.6941621560313924, + "learning_rate": 1.0543200543633041e-05, + "loss": 0.6718, + "step": 16256 + }, + { + "epoch": 0.4982530342037514, + "grad_norm": 0.6889186209237683, + "learning_rate": 1.0542209370832898e-05, + "loss": 0.5704, + "step": 16257 + }, + { + "epoch": 0.4982836827264926, + "grad_norm": 1.4386997764582932, + "learning_rate": 1.0541218192690228e-05, + "loss": 0.7448, + "step": 16258 + }, + { + "epoch": 0.4983143312492338, + "grad_norm": 1.5836666419603744, + "learning_rate": 1.0540227009214794e-05, + "loss": 0.7479, + "step": 16259 + }, + { + "epoch": 0.498344979771975, + "grad_norm": 1.4811078839679381, + "learning_rate": 1.0539235820416366e-05, + "loss": 0.6784, + "step": 16260 + }, + { + "epoch": 0.4983756282947162, + "grad_norm": 1.4989680343841554, + "learning_rate": 1.0538244626304712e-05, + "loss": 0.7383, + "step": 16261 + }, + { + "epoch": 0.4984062768174574, + "grad_norm": 1.5974061379448967, + "learning_rate": 1.0537253426889594e-05, + "loss": 0.7075, + "step": 16262 + }, + { + "epoch": 0.4984369253401986, + "grad_norm": 1.3890439302209387, + "learning_rate": 1.053626222218078e-05, + "loss": 0.6656, + "step": 16263 + }, + { + "epoch": 0.49846757386293983, + "grad_norm": 0.6741010711970614, + "learning_rate": 1.0535271012188038e-05, + "loss": 0.5683, + "step": 16264 + }, + { + "epoch": 0.49849822238568103, + "grad_norm": 0.6920917562328323, + "learning_rate": 1.0534279796921136e-05, + "loss": 0.5725, + "step": 16265 + }, + { + "epoch": 0.49852887090842224, + "grad_norm": 1.660925048180952, + "learning_rate": 1.0533288576389836e-05, + "loss": 0.8156, + "step": 16266 + }, + { + "epoch": 0.49855951943116344, + "grad_norm": 1.4581886964627722, + "learning_rate": 1.0532297350603906e-05, + "loss": 0.7097, + "step": 16267 + }, + { + "epoch": 0.4985901679539046, + "grad_norm": 0.6369785344457928, + "learning_rate": 1.0531306119573115e-05, + "loss": 0.5596, + "step": 16268 + }, + { + "epoch": 0.4986208164766458, + "grad_norm": 1.4060437829654544, + "learning_rate": 1.0530314883307231e-05, + "loss": 0.8013, + "step": 16269 + }, + { + "epoch": 0.498651464999387, + "grad_norm": 1.31110616856308, + "learning_rate": 1.0529323641816016e-05, + "loss": 0.6961, + "step": 16270 + }, + { + "epoch": 0.4986821135221282, + "grad_norm": 1.5764849816313737, + "learning_rate": 1.0528332395109241e-05, + "loss": 0.7051, + "step": 16271 + }, + { + "epoch": 0.4987127620448694, + "grad_norm": 1.5592332295981515, + "learning_rate": 1.052734114319667e-05, + "loss": 0.6764, + "step": 16272 + }, + { + "epoch": 0.4987434105676106, + "grad_norm": 1.4347351460538842, + "learning_rate": 1.0526349886088075e-05, + "loss": 0.7767, + "step": 16273 + }, + { + "epoch": 0.49877405909035183, + "grad_norm": 1.3667347753730417, + "learning_rate": 1.0525358623793219e-05, + "loss": 0.6234, + "step": 16274 + }, + { + "epoch": 0.49880470761309303, + "grad_norm": 1.5468731437706082, + "learning_rate": 1.052436735632187e-05, + "loss": 0.7728, + "step": 16275 + }, + { + "epoch": 0.49883535613583424, + "grad_norm": 1.5645052606702103, + "learning_rate": 1.0523376083683793e-05, + "loss": 0.6693, + "step": 16276 + }, + { + "epoch": 0.49886600465857545, + "grad_norm": 0.7481126568521049, + "learning_rate": 1.052238480588876e-05, + "loss": 0.6125, + "step": 16277 + }, + { + "epoch": 0.49889665318131665, + "grad_norm": 1.443204791634472, + "learning_rate": 1.0521393522946535e-05, + "loss": 0.7, + "step": 16278 + }, + { + "epoch": 0.49892730170405786, + "grad_norm": 1.4343758166961091, + "learning_rate": 1.0520402234866882e-05, + "loss": 0.5841, + "step": 16279 + }, + { + "epoch": 0.49895795022679906, + "grad_norm": 1.3019765961985248, + "learning_rate": 1.051941094165958e-05, + "loss": 0.6814, + "step": 16280 + }, + { + "epoch": 0.49898859874954027, + "grad_norm": 1.5510755817369386, + "learning_rate": 1.0518419643334386e-05, + "loss": 0.651, + "step": 16281 + }, + { + "epoch": 0.4990192472722815, + "grad_norm": 1.644640388937626, + "learning_rate": 1.0517428339901071e-05, + "loss": 0.8297, + "step": 16282 + }, + { + "epoch": 0.4990498957950227, + "grad_norm": 1.4786358213876318, + "learning_rate": 1.05164370313694e-05, + "loss": 0.6459, + "step": 16283 + }, + { + "epoch": 0.4990805443177639, + "grad_norm": 1.5315668287918, + "learning_rate": 1.0515445717749147e-05, + "loss": 0.6926, + "step": 16284 + }, + { + "epoch": 0.4991111928405051, + "grad_norm": 1.5856605176817318, + "learning_rate": 1.051445439905007e-05, + "loss": 0.7683, + "step": 16285 + }, + { + "epoch": 0.4991418413632463, + "grad_norm": 1.5034637414108123, + "learning_rate": 1.0513463075281946e-05, + "loss": 0.7414, + "step": 16286 + }, + { + "epoch": 0.4991724898859875, + "grad_norm": 1.3298494622333081, + "learning_rate": 1.0512471746454536e-05, + "loss": 0.6563, + "step": 16287 + }, + { + "epoch": 0.4992031384087287, + "grad_norm": 1.4944970312131212, + "learning_rate": 1.0511480412577615e-05, + "loss": 0.7827, + "step": 16288 + }, + { + "epoch": 0.4992337869314699, + "grad_norm": 1.794698056466515, + "learning_rate": 1.0510489073660943e-05, + "loss": 0.7248, + "step": 16289 + }, + { + "epoch": 0.4992644354542111, + "grad_norm": 1.515521154432849, + "learning_rate": 1.0509497729714293e-05, + "loss": 0.6896, + "step": 16290 + }, + { + "epoch": 0.4992950839769523, + "grad_norm": 1.528017383599072, + "learning_rate": 1.0508506380747431e-05, + "loss": 0.6822, + "step": 16291 + }, + { + "epoch": 0.49932573249969353, + "grad_norm": 1.6899046878274007, + "learning_rate": 1.0507515026770127e-05, + "loss": 0.7574, + "step": 16292 + }, + { + "epoch": 0.49935638102243474, + "grad_norm": 1.6889854560518522, + "learning_rate": 1.0506523667792147e-05, + "loss": 0.7312, + "step": 16293 + }, + { + "epoch": 0.49938702954517594, + "grad_norm": 0.7022923487323458, + "learning_rate": 1.0505532303823258e-05, + "loss": 0.5857, + "step": 16294 + }, + { + "epoch": 0.49941767806791715, + "grad_norm": 1.6408594823141016, + "learning_rate": 1.050454093487323e-05, + "loss": 0.7758, + "step": 16295 + }, + { + "epoch": 0.49944832659065835, + "grad_norm": 1.3381835940274083, + "learning_rate": 1.0503549560951833e-05, + "loss": 0.6592, + "step": 16296 + }, + { + "epoch": 0.49947897511339956, + "grad_norm": 1.5569215821294229, + "learning_rate": 1.0502558182068834e-05, + "loss": 0.7377, + "step": 16297 + }, + { + "epoch": 0.49950962363614076, + "grad_norm": 1.3959258017418494, + "learning_rate": 1.0501566798233997e-05, + "loss": 0.6185, + "step": 16298 + }, + { + "epoch": 0.4995402721588819, + "grad_norm": 1.369488454397396, + "learning_rate": 1.05005754094571e-05, + "loss": 0.6678, + "step": 16299 + }, + { + "epoch": 0.4995709206816231, + "grad_norm": 1.3542785249522171, + "learning_rate": 1.04995840157479e-05, + "loss": 0.6704, + "step": 16300 + }, + { + "epoch": 0.4996015692043643, + "grad_norm": 1.4918110606639592, + "learning_rate": 1.0498592617116172e-05, + "loss": 0.6815, + "step": 16301 + }, + { + "epoch": 0.49963221772710553, + "grad_norm": 1.5435405841700018, + "learning_rate": 1.0497601213571684e-05, + "loss": 0.7117, + "step": 16302 + }, + { + "epoch": 0.49966286624984674, + "grad_norm": 1.4661615036174078, + "learning_rate": 1.0496609805124205e-05, + "loss": 0.7338, + "step": 16303 + }, + { + "epoch": 0.49969351477258794, + "grad_norm": 1.390487772290877, + "learning_rate": 1.04956183917835e-05, + "loss": 0.6627, + "step": 16304 + }, + { + "epoch": 0.49972416329532915, + "grad_norm": 1.5232965004630925, + "learning_rate": 1.0494626973559341e-05, + "loss": 0.6207, + "step": 16305 + }, + { + "epoch": 0.49975481181807035, + "grad_norm": 1.630721677927865, + "learning_rate": 1.0493635550461496e-05, + "loss": 0.6833, + "step": 16306 + }, + { + "epoch": 0.49978546034081156, + "grad_norm": 1.4287461084125799, + "learning_rate": 1.0492644122499735e-05, + "loss": 0.7182, + "step": 16307 + }, + { + "epoch": 0.49981610886355277, + "grad_norm": 1.363033242980847, + "learning_rate": 1.0491652689683825e-05, + "loss": 0.6707, + "step": 16308 + }, + { + "epoch": 0.49984675738629397, + "grad_norm": 1.6253126483253406, + "learning_rate": 1.0490661252023533e-05, + "loss": 0.7758, + "step": 16309 + }, + { + "epoch": 0.4998774059090352, + "grad_norm": 1.272817723574739, + "learning_rate": 1.0489669809528633e-05, + "loss": 0.647, + "step": 16310 + }, + { + "epoch": 0.4999080544317764, + "grad_norm": 1.6519697353045495, + "learning_rate": 1.0488678362208891e-05, + "loss": 0.7344, + "step": 16311 + }, + { + "epoch": 0.4999387029545176, + "grad_norm": 1.4487452284072047, + "learning_rate": 1.0487686910074075e-05, + "loss": 0.5646, + "step": 16312 + }, + { + "epoch": 0.4999693514772588, + "grad_norm": 1.46782024048921, + "learning_rate": 1.0486695453133953e-05, + "loss": 0.7385, + "step": 16313 + }, + { + "epoch": 0.5, + "grad_norm": 1.4639823804786225, + "learning_rate": 1.0485703991398299e-05, + "loss": 0.7643, + "step": 16314 + }, + { + "epoch": 0.5000306485227412, + "grad_norm": 1.198548759954484, + "learning_rate": 1.0484712524876879e-05, + "loss": 0.5854, + "step": 16315 + }, + { + "epoch": 0.5000612970454824, + "grad_norm": 0.6763376504553658, + "learning_rate": 1.048372105357946e-05, + "loss": 0.5546, + "step": 16316 + }, + { + "epoch": 0.5000919455682236, + "grad_norm": 0.6729909199405779, + "learning_rate": 1.0482729577515815e-05, + "loss": 0.5813, + "step": 16317 + }, + { + "epoch": 0.5001225940909648, + "grad_norm": 1.4437644147940618, + "learning_rate": 1.0481738096695715e-05, + "loss": 0.8995, + "step": 16318 + }, + { + "epoch": 0.500153242613706, + "grad_norm": 0.6938437868561027, + "learning_rate": 1.0480746611128925e-05, + "loss": 0.6061, + "step": 16319 + }, + { + "epoch": 0.5001838911364472, + "grad_norm": 0.6884938463874062, + "learning_rate": 1.0479755120825212e-05, + "loss": 0.6027, + "step": 16320 + }, + { + "epoch": 0.5002145396591884, + "grad_norm": 1.528306278541044, + "learning_rate": 1.0478763625794353e-05, + "loss": 0.7092, + "step": 16321 + }, + { + "epoch": 0.5002451881819296, + "grad_norm": 1.5515532732599253, + "learning_rate": 1.047777212604611e-05, + "loss": 0.6998, + "step": 16322 + }, + { + "epoch": 0.5002758367046708, + "grad_norm": 0.6699039039709452, + "learning_rate": 1.0476780621590261e-05, + "loss": 0.5961, + "step": 16323 + }, + { + "epoch": 0.5003064852274121, + "grad_norm": 1.636388139097149, + "learning_rate": 1.0475789112436565e-05, + "loss": 0.7305, + "step": 16324 + }, + { + "epoch": 0.5003371337501532, + "grad_norm": 1.4493141347368588, + "learning_rate": 1.0474797598594801e-05, + "loss": 0.6683, + "step": 16325 + }, + { + "epoch": 0.5003677822728945, + "grad_norm": 0.693012311056706, + "learning_rate": 1.0473806080074732e-05, + "loss": 0.5831, + "step": 16326 + }, + { + "epoch": 0.5003984307956356, + "grad_norm": 0.6366518613043742, + "learning_rate": 1.0472814556886135e-05, + "loss": 0.5715, + "step": 16327 + }, + { + "epoch": 0.5004290793183769, + "grad_norm": 1.7200991243322186, + "learning_rate": 1.047182302903877e-05, + "loss": 0.6644, + "step": 16328 + }, + { + "epoch": 0.500459727841118, + "grad_norm": 1.8134361241111612, + "learning_rate": 1.0470831496542416e-05, + "loss": 0.7375, + "step": 16329 + }, + { + "epoch": 0.5004903763638593, + "grad_norm": 1.4505539250072155, + "learning_rate": 1.0469839959406837e-05, + "loss": 0.7112, + "step": 16330 + }, + { + "epoch": 0.5005210248866004, + "grad_norm": 1.4604341539457446, + "learning_rate": 1.0468848417641804e-05, + "loss": 0.7508, + "step": 16331 + }, + { + "epoch": 0.5005516734093417, + "grad_norm": 1.2843265846478775, + "learning_rate": 1.0467856871257086e-05, + "loss": 0.5891, + "step": 16332 + }, + { + "epoch": 0.5005823219320829, + "grad_norm": 1.380490192894999, + "learning_rate": 1.0466865320262457e-05, + "loss": 0.7757, + "step": 16333 + }, + { + "epoch": 0.5006129704548241, + "grad_norm": 0.6868326768354033, + "learning_rate": 1.0465873764667687e-05, + "loss": 0.5953, + "step": 16334 + }, + { + "epoch": 0.5006436189775653, + "grad_norm": 1.616582470555219, + "learning_rate": 1.0464882204482538e-05, + "loss": 0.7275, + "step": 16335 + }, + { + "epoch": 0.5006742675003065, + "grad_norm": 1.5841528346830944, + "learning_rate": 1.046389063971679e-05, + "loss": 0.7695, + "step": 16336 + }, + { + "epoch": 0.5007049160230477, + "grad_norm": 1.3795946974096254, + "learning_rate": 1.0462899070380206e-05, + "loss": 0.5824, + "step": 16337 + }, + { + "epoch": 0.5007355645457889, + "grad_norm": 1.321960795704562, + "learning_rate": 1.0461907496482565e-05, + "loss": 0.6707, + "step": 16338 + }, + { + "epoch": 0.5007662130685301, + "grad_norm": 1.6889795821897982, + "learning_rate": 1.0460915918033623e-05, + "loss": 0.798, + "step": 16339 + }, + { + "epoch": 0.5007968615912713, + "grad_norm": 1.428155182973066, + "learning_rate": 1.0459924335043164e-05, + "loss": 0.7287, + "step": 16340 + }, + { + "epoch": 0.5008275101140125, + "grad_norm": 0.6760435981543618, + "learning_rate": 1.0458932747520948e-05, + "loss": 0.5788, + "step": 16341 + }, + { + "epoch": 0.5008581586367538, + "grad_norm": 1.497731037570211, + "learning_rate": 1.0457941155476754e-05, + "loss": 0.7331, + "step": 16342 + }, + { + "epoch": 0.5008888071594949, + "grad_norm": 1.6663370343480919, + "learning_rate": 1.0456949558920349e-05, + "loss": 0.6946, + "step": 16343 + }, + { + "epoch": 0.5009194556822362, + "grad_norm": 1.64845436097737, + "learning_rate": 1.0455957957861503e-05, + "loss": 0.6516, + "step": 16344 + }, + { + "epoch": 0.5009501042049773, + "grad_norm": 0.651401196039726, + "learning_rate": 1.0454966352309982e-05, + "loss": 0.5907, + "step": 16345 + }, + { + "epoch": 0.5009807527277185, + "grad_norm": 1.5464111780477914, + "learning_rate": 1.0453974742275567e-05, + "loss": 0.6731, + "step": 16346 + }, + { + "epoch": 0.5010114012504597, + "grad_norm": 1.4982493695887107, + "learning_rate": 1.0452983127768022e-05, + "loss": 0.5826, + "step": 16347 + }, + { + "epoch": 0.5010420497732009, + "grad_norm": 1.5527657667053154, + "learning_rate": 1.0451991508797114e-05, + "loss": 0.7199, + "step": 16348 + }, + { + "epoch": 0.5010726982959421, + "grad_norm": 1.60428089126191, + "learning_rate": 1.045099988537262e-05, + "loss": 0.9044, + "step": 16349 + }, + { + "epoch": 0.5011033468186833, + "grad_norm": 0.6979548549879966, + "learning_rate": 1.0450008257504311e-05, + "loss": 0.5557, + "step": 16350 + }, + { + "epoch": 0.5011339953414246, + "grad_norm": 1.502189812090357, + "learning_rate": 1.0449016625201955e-05, + "loss": 0.65, + "step": 16351 + }, + { + "epoch": 0.5011646438641657, + "grad_norm": 1.5500231381147505, + "learning_rate": 1.0448024988475321e-05, + "loss": 0.6993, + "step": 16352 + }, + { + "epoch": 0.501195292386907, + "grad_norm": 1.548840950488176, + "learning_rate": 1.0447033347334185e-05, + "loss": 0.732, + "step": 16353 + }, + { + "epoch": 0.5012259409096481, + "grad_norm": 1.5829102862874158, + "learning_rate": 1.0446041701788315e-05, + "loss": 0.6898, + "step": 16354 + }, + { + "epoch": 0.5012565894323894, + "grad_norm": 1.646853152307529, + "learning_rate": 1.044505005184748e-05, + "loss": 0.6947, + "step": 16355 + }, + { + "epoch": 0.5012872379551305, + "grad_norm": 1.2207637970052316, + "learning_rate": 1.044405839752145e-05, + "loss": 0.701, + "step": 16356 + }, + { + "epoch": 0.5013178864778718, + "grad_norm": 1.4524006640787235, + "learning_rate": 1.0443066738820004e-05, + "loss": 0.6978, + "step": 16357 + }, + { + "epoch": 0.5013485350006129, + "grad_norm": 1.4157164517568837, + "learning_rate": 1.0442075075752909e-05, + "loss": 0.7243, + "step": 16358 + }, + { + "epoch": 0.5013791835233542, + "grad_norm": 1.3828977239006761, + "learning_rate": 1.0441083408329931e-05, + "loss": 0.7168, + "step": 16359 + }, + { + "epoch": 0.5014098320460953, + "grad_norm": 1.5774301959180126, + "learning_rate": 1.0440091736560848e-05, + "loss": 0.7341, + "step": 16360 + }, + { + "epoch": 0.5014404805688366, + "grad_norm": 1.5699633239770263, + "learning_rate": 1.0439100060455428e-05, + "loss": 0.7388, + "step": 16361 + }, + { + "epoch": 0.5014711290915778, + "grad_norm": 0.6931287761759055, + "learning_rate": 1.0438108380023442e-05, + "loss": 0.5799, + "step": 16362 + }, + { + "epoch": 0.501501777614319, + "grad_norm": 1.459281615918815, + "learning_rate": 1.0437116695274661e-05, + "loss": 0.6558, + "step": 16363 + }, + { + "epoch": 0.5015324261370602, + "grad_norm": 1.4024164608427407, + "learning_rate": 1.0436125006218858e-05, + "loss": 0.743, + "step": 16364 + }, + { + "epoch": 0.5015630746598014, + "grad_norm": 1.712751820824345, + "learning_rate": 1.0435133312865807e-05, + "loss": 0.7453, + "step": 16365 + }, + { + "epoch": 0.5015937231825426, + "grad_norm": 1.427472037216577, + "learning_rate": 1.0434141615225272e-05, + "loss": 0.6448, + "step": 16366 + }, + { + "epoch": 0.5016243717052838, + "grad_norm": 1.5384469399251426, + "learning_rate": 1.043314991330703e-05, + "loss": 0.6809, + "step": 16367 + }, + { + "epoch": 0.501655020228025, + "grad_norm": 1.556520885901485, + "learning_rate": 1.043215820712085e-05, + "loss": 0.6752, + "step": 16368 + }, + { + "epoch": 0.5016856687507663, + "grad_norm": 1.4953423263385164, + "learning_rate": 1.0431166496676508e-05, + "loss": 0.6819, + "step": 16369 + }, + { + "epoch": 0.5017163172735074, + "grad_norm": 1.5226846963394527, + "learning_rate": 1.043017478198377e-05, + "loss": 0.8221, + "step": 16370 + }, + { + "epoch": 0.5017469657962487, + "grad_norm": 1.525700246951889, + "learning_rate": 1.0429183063052408e-05, + "loss": 0.6992, + "step": 16371 + }, + { + "epoch": 0.5017776143189898, + "grad_norm": 1.4223749759580722, + "learning_rate": 1.0428191339892197e-05, + "loss": 0.7227, + "step": 16372 + }, + { + "epoch": 0.5018082628417311, + "grad_norm": 1.5604044534415382, + "learning_rate": 1.042719961251291e-05, + "loss": 0.7157, + "step": 16373 + }, + { + "epoch": 0.5018389113644722, + "grad_norm": 1.621741520500562, + "learning_rate": 1.042620788092431e-05, + "loss": 0.7758, + "step": 16374 + }, + { + "epoch": 0.5018695598872135, + "grad_norm": 1.3301995769784547, + "learning_rate": 1.0425216145136179e-05, + "loss": 0.7229, + "step": 16375 + }, + { + "epoch": 0.5019002084099546, + "grad_norm": 0.6979072289794045, + "learning_rate": 1.0424224405158283e-05, + "loss": 0.5827, + "step": 16376 + }, + { + "epoch": 0.5019308569326958, + "grad_norm": 1.7462228154047512, + "learning_rate": 1.04232326610004e-05, + "loss": 0.6338, + "step": 16377 + }, + { + "epoch": 0.501961505455437, + "grad_norm": 1.4139878548515887, + "learning_rate": 1.042224091267229e-05, + "loss": 0.7213, + "step": 16378 + }, + { + "epoch": 0.5019921539781782, + "grad_norm": 1.5012674957262007, + "learning_rate": 1.0421249160183737e-05, + "loss": 0.6877, + "step": 16379 + }, + { + "epoch": 0.5020228025009195, + "grad_norm": 1.5718141775223438, + "learning_rate": 1.0420257403544507e-05, + "loss": 0.6685, + "step": 16380 + }, + { + "epoch": 0.5020534510236606, + "grad_norm": 0.6572928051554398, + "learning_rate": 1.0419265642764374e-05, + "loss": 0.5815, + "step": 16381 + }, + { + "epoch": 0.5020840995464019, + "grad_norm": 1.415371672808789, + "learning_rate": 1.0418273877853106e-05, + "loss": 0.7899, + "step": 16382 + }, + { + "epoch": 0.502114748069143, + "grad_norm": 1.5433552661329966, + "learning_rate": 1.0417282108820481e-05, + "loss": 0.7226, + "step": 16383 + }, + { + "epoch": 0.5021453965918843, + "grad_norm": 1.4129636556595597, + "learning_rate": 1.0416290335676268e-05, + "loss": 0.7126, + "step": 16384 + }, + { + "epoch": 0.5021760451146254, + "grad_norm": 1.5258566701008316, + "learning_rate": 1.041529855843024e-05, + "loss": 0.6844, + "step": 16385 + }, + { + "epoch": 0.5022066936373667, + "grad_norm": 0.6745612258831875, + "learning_rate": 1.041430677709217e-05, + "loss": 0.5915, + "step": 16386 + }, + { + "epoch": 0.5022373421601078, + "grad_norm": 1.4992220698811474, + "learning_rate": 1.0413314991671828e-05, + "loss": 0.6871, + "step": 16387 + }, + { + "epoch": 0.5022679906828491, + "grad_norm": 0.6705997666009177, + "learning_rate": 1.041232320217899e-05, + "loss": 0.5947, + "step": 16388 + }, + { + "epoch": 0.5022986392055903, + "grad_norm": 1.3941782441915251, + "learning_rate": 1.0411331408623425e-05, + "loss": 0.7144, + "step": 16389 + }, + { + "epoch": 0.5023292877283315, + "grad_norm": 1.4843493885862917, + "learning_rate": 1.0410339611014905e-05, + "loss": 0.7278, + "step": 16390 + }, + { + "epoch": 0.5023599362510727, + "grad_norm": 1.433627458811069, + "learning_rate": 1.0409347809363202e-05, + "loss": 0.6563, + "step": 16391 + }, + { + "epoch": 0.5023905847738139, + "grad_norm": 1.616462962818081, + "learning_rate": 1.0408356003678098e-05, + "loss": 0.7257, + "step": 16392 + }, + { + "epoch": 0.5024212332965551, + "grad_norm": 1.5297115634731862, + "learning_rate": 1.0407364193969348e-05, + "loss": 0.7198, + "step": 16393 + }, + { + "epoch": 0.5024518818192963, + "grad_norm": 1.388666224543823, + "learning_rate": 1.0406372380246742e-05, + "loss": 0.6363, + "step": 16394 + }, + { + "epoch": 0.5024825303420375, + "grad_norm": 1.6224467435384926, + "learning_rate": 1.040538056252004e-05, + "loss": 0.8272, + "step": 16395 + }, + { + "epoch": 0.5025131788647788, + "grad_norm": 0.656353116521444, + "learning_rate": 1.0404388740799022e-05, + "loss": 0.5508, + "step": 16396 + }, + { + "epoch": 0.5025438273875199, + "grad_norm": 1.455301226563786, + "learning_rate": 1.0403396915093458e-05, + "loss": 0.8217, + "step": 16397 + }, + { + "epoch": 0.5025744759102612, + "grad_norm": 1.7124525529478283, + "learning_rate": 1.0402405085413121e-05, + "loss": 0.7993, + "step": 16398 + }, + { + "epoch": 0.5026051244330023, + "grad_norm": 0.6640165756836863, + "learning_rate": 1.0401413251767783e-05, + "loss": 0.5786, + "step": 16399 + }, + { + "epoch": 0.5026357729557436, + "grad_norm": 0.6578257823786473, + "learning_rate": 1.0400421414167219e-05, + "loss": 0.5629, + "step": 16400 + }, + { + "epoch": 0.5026664214784847, + "grad_norm": 1.6941864791327783, + "learning_rate": 1.0399429572621198e-05, + "loss": 0.7086, + "step": 16401 + }, + { + "epoch": 0.502697070001226, + "grad_norm": 0.6725582359514061, + "learning_rate": 1.0398437727139496e-05, + "loss": 0.5552, + "step": 16402 + }, + { + "epoch": 0.5027277185239671, + "grad_norm": 1.4577118511415363, + "learning_rate": 1.0397445877731887e-05, + "loss": 0.7103, + "step": 16403 + }, + { + "epoch": 0.5027583670467084, + "grad_norm": 1.4153608871391685, + "learning_rate": 1.039645402440814e-05, + "loss": 0.731, + "step": 16404 + }, + { + "epoch": 0.5027890155694495, + "grad_norm": 1.7001963869015673, + "learning_rate": 1.0395462167178032e-05, + "loss": 0.7843, + "step": 16405 + }, + { + "epoch": 0.5028196640921908, + "grad_norm": 1.4380857061966201, + "learning_rate": 1.0394470306051332e-05, + "loss": 0.6842, + "step": 16406 + }, + { + "epoch": 0.502850312614932, + "grad_norm": 1.4226929416764271, + "learning_rate": 1.0393478441037819e-05, + "loss": 0.7298, + "step": 16407 + }, + { + "epoch": 0.5028809611376731, + "grad_norm": 1.4095555315812727, + "learning_rate": 1.0392486572147258e-05, + "loss": 0.7494, + "step": 16408 + }, + { + "epoch": 0.5029116096604144, + "grad_norm": 1.6203577746854696, + "learning_rate": 1.0391494699389428e-05, + "loss": 0.6751, + "step": 16409 + }, + { + "epoch": 0.5029422581831555, + "grad_norm": 1.7510561064499273, + "learning_rate": 1.0390502822774098e-05, + "loss": 0.7454, + "step": 16410 + }, + { + "epoch": 0.5029729067058968, + "grad_norm": 1.4758708240140508, + "learning_rate": 1.0389510942311047e-05, + "loss": 0.7439, + "step": 16411 + }, + { + "epoch": 0.5030035552286379, + "grad_norm": 1.3925334671834666, + "learning_rate": 1.0388519058010045e-05, + "loss": 0.6993, + "step": 16412 + }, + { + "epoch": 0.5030342037513792, + "grad_norm": 0.6879463556182756, + "learning_rate": 1.0387527169880862e-05, + "loss": 0.57, + "step": 16413 + }, + { + "epoch": 0.5030648522741203, + "grad_norm": 1.6198707695502759, + "learning_rate": 1.0386535277933279e-05, + "loss": 0.7086, + "step": 16414 + }, + { + "epoch": 0.5030955007968616, + "grad_norm": 1.5831518039513124, + "learning_rate": 1.0385543382177063e-05, + "loss": 0.7966, + "step": 16415 + }, + { + "epoch": 0.5031261493196028, + "grad_norm": 0.6864565471340843, + "learning_rate": 1.038455148262199e-05, + "loss": 0.5734, + "step": 16416 + }, + { + "epoch": 0.503156797842344, + "grad_norm": 1.7478415369326046, + "learning_rate": 1.0383559579277831e-05, + "loss": 0.7406, + "step": 16417 + }, + { + "epoch": 0.5031874463650852, + "grad_norm": 1.552298781624519, + "learning_rate": 1.0382567672154362e-05, + "loss": 0.6826, + "step": 16418 + }, + { + "epoch": 0.5032180948878264, + "grad_norm": 0.6736876170782923, + "learning_rate": 1.0381575761261358e-05, + "loss": 0.5914, + "step": 16419 + }, + { + "epoch": 0.5032487434105676, + "grad_norm": 0.6524795902583354, + "learning_rate": 1.038058384660859e-05, + "loss": 0.5986, + "step": 16420 + }, + { + "epoch": 0.5032793919333088, + "grad_norm": 0.6577533941474565, + "learning_rate": 1.037959192820583e-05, + "loss": 0.6017, + "step": 16421 + }, + { + "epoch": 0.50331004045605, + "grad_norm": 1.4585821041819909, + "learning_rate": 1.0378600006062853e-05, + "loss": 0.6755, + "step": 16422 + }, + { + "epoch": 0.5033406889787913, + "grad_norm": 1.652462608827303, + "learning_rate": 1.0377608080189436e-05, + "loss": 0.8755, + "step": 16423 + }, + { + "epoch": 0.5033713375015324, + "grad_norm": 0.6645918303215348, + "learning_rate": 1.0376616150595348e-05, + "loss": 0.5894, + "step": 16424 + }, + { + "epoch": 0.5034019860242737, + "grad_norm": 1.4439014494354667, + "learning_rate": 1.0375624217290365e-05, + "loss": 0.7991, + "step": 16425 + }, + { + "epoch": 0.5034326345470148, + "grad_norm": 1.3523131372305326, + "learning_rate": 1.0374632280284263e-05, + "loss": 0.7497, + "step": 16426 + }, + { + "epoch": 0.5034632830697561, + "grad_norm": 1.4490452024109586, + "learning_rate": 1.0373640339586811e-05, + "loss": 0.6615, + "step": 16427 + }, + { + "epoch": 0.5034939315924972, + "grad_norm": 0.6821796764305083, + "learning_rate": 1.0372648395207783e-05, + "loss": 0.5782, + "step": 16428 + }, + { + "epoch": 0.5035245801152385, + "grad_norm": 1.3550829521882026, + "learning_rate": 1.0371656447156959e-05, + "loss": 0.6867, + "step": 16429 + }, + { + "epoch": 0.5035552286379796, + "grad_norm": 0.6614352592662501, + "learning_rate": 1.0370664495444106e-05, + "loss": 0.578, + "step": 16430 + }, + { + "epoch": 0.5035858771607209, + "grad_norm": 1.3862509345953895, + "learning_rate": 1.0369672540079005e-05, + "loss": 0.6348, + "step": 16431 + }, + { + "epoch": 0.503616525683462, + "grad_norm": 1.3310665103658965, + "learning_rate": 1.0368680581071422e-05, + "loss": 0.6743, + "step": 16432 + }, + { + "epoch": 0.5036471742062033, + "grad_norm": 1.517309642017647, + "learning_rate": 1.0367688618431135e-05, + "loss": 0.7525, + "step": 16433 + }, + { + "epoch": 0.5036778227289445, + "grad_norm": 0.6934251004843848, + "learning_rate": 1.036669665216792e-05, + "loss": 0.6249, + "step": 16434 + }, + { + "epoch": 0.5037084712516857, + "grad_norm": 1.5756135460358263, + "learning_rate": 1.0365704682291548e-05, + "loss": 0.6916, + "step": 16435 + }, + { + "epoch": 0.5037391197744269, + "grad_norm": 1.5666600725480833, + "learning_rate": 1.0364712708811792e-05, + "loss": 0.7082, + "step": 16436 + }, + { + "epoch": 0.5037697682971681, + "grad_norm": 1.3805392455203052, + "learning_rate": 1.0363720731738431e-05, + "loss": 0.5984, + "step": 16437 + }, + { + "epoch": 0.5038004168199093, + "grad_norm": 1.3752420594750607, + "learning_rate": 1.036272875108124e-05, + "loss": 0.6981, + "step": 16438 + }, + { + "epoch": 0.5038310653426504, + "grad_norm": 1.466897445833229, + "learning_rate": 1.036173676684998e-05, + "loss": 0.6988, + "step": 16439 + }, + { + "epoch": 0.5038617138653917, + "grad_norm": 1.4357158765547, + "learning_rate": 1.0360744779054443e-05, + "loss": 0.6951, + "step": 16440 + }, + { + "epoch": 0.5038923623881328, + "grad_norm": 1.344850411731442, + "learning_rate": 1.0359752787704395e-05, + "loss": 0.6522, + "step": 16441 + }, + { + "epoch": 0.5039230109108741, + "grad_norm": 1.541390693633448, + "learning_rate": 1.035876079280961e-05, + "loss": 0.7232, + "step": 16442 + }, + { + "epoch": 0.5039536594336153, + "grad_norm": 1.5148772522707428, + "learning_rate": 1.0357768794379862e-05, + "loss": 0.7671, + "step": 16443 + }, + { + "epoch": 0.5039843079563565, + "grad_norm": 1.642056121612524, + "learning_rate": 1.0356776792424924e-05, + "loss": 0.776, + "step": 16444 + }, + { + "epoch": 0.5040149564790977, + "grad_norm": 1.4799732302774131, + "learning_rate": 1.0355784786954577e-05, + "loss": 0.7212, + "step": 16445 + }, + { + "epoch": 0.5040456050018389, + "grad_norm": 1.6897019854771531, + "learning_rate": 1.0354792777978592e-05, + "loss": 0.7441, + "step": 16446 + }, + { + "epoch": 0.5040762535245801, + "grad_norm": 1.7488572216171785, + "learning_rate": 1.0353800765506738e-05, + "loss": 0.7764, + "step": 16447 + }, + { + "epoch": 0.5041069020473213, + "grad_norm": 0.6597698586187435, + "learning_rate": 1.03528087495488e-05, + "loss": 0.5561, + "step": 16448 + }, + { + "epoch": 0.5041375505700625, + "grad_norm": 1.3622976081798381, + "learning_rate": 1.0351816730114543e-05, + "loss": 0.6615, + "step": 16449 + }, + { + "epoch": 0.5041681990928037, + "grad_norm": 1.5459929807598496, + "learning_rate": 1.0350824707213752e-05, + "loss": 0.7393, + "step": 16450 + }, + { + "epoch": 0.5041988476155449, + "grad_norm": 1.528327896220028, + "learning_rate": 1.0349832680856189e-05, + "loss": 0.7447, + "step": 16451 + }, + { + "epoch": 0.5042294961382862, + "grad_norm": 1.610076905007372, + "learning_rate": 1.0348840651051637e-05, + "loss": 0.6503, + "step": 16452 + }, + { + "epoch": 0.5042601446610273, + "grad_norm": 1.5864457876235427, + "learning_rate": 1.0347848617809868e-05, + "loss": 0.6394, + "step": 16453 + }, + { + "epoch": 0.5042907931837686, + "grad_norm": 0.6459418841502308, + "learning_rate": 1.0346856581140659e-05, + "loss": 0.5366, + "step": 16454 + }, + { + "epoch": 0.5043214417065097, + "grad_norm": 1.4358935446082681, + "learning_rate": 1.0345864541053783e-05, + "loss": 0.6809, + "step": 16455 + }, + { + "epoch": 0.504352090229251, + "grad_norm": 1.5249192178599875, + "learning_rate": 1.0344872497559013e-05, + "loss": 0.8421, + "step": 16456 + }, + { + "epoch": 0.5043827387519921, + "grad_norm": 1.4019109999482697, + "learning_rate": 1.034388045066613e-05, + "loss": 0.6888, + "step": 16457 + }, + { + "epoch": 0.5044133872747334, + "grad_norm": 1.6214749340438024, + "learning_rate": 1.0342888400384903e-05, + "loss": 0.7563, + "step": 16458 + }, + { + "epoch": 0.5044440357974745, + "grad_norm": 1.513479484733614, + "learning_rate": 1.034189634672511e-05, + "loss": 0.6188, + "step": 16459 + }, + { + "epoch": 0.5044746843202158, + "grad_norm": 1.5613289822732108, + "learning_rate": 1.034090428969652e-05, + "loss": 0.6915, + "step": 16460 + }, + { + "epoch": 0.504505332842957, + "grad_norm": 1.441060217940491, + "learning_rate": 1.0339912229308919e-05, + "loss": 0.6632, + "step": 16461 + }, + { + "epoch": 0.5045359813656982, + "grad_norm": 1.4593764704822803, + "learning_rate": 1.0338920165572073e-05, + "loss": 0.7376, + "step": 16462 + }, + { + "epoch": 0.5045666298884394, + "grad_norm": 1.3603962187920826, + "learning_rate": 1.033792809849576e-05, + "loss": 0.6356, + "step": 16463 + }, + { + "epoch": 0.5045972784111806, + "grad_norm": 1.3522327161529397, + "learning_rate": 1.0336936028089755e-05, + "loss": 0.6654, + "step": 16464 + }, + { + "epoch": 0.5046279269339218, + "grad_norm": 1.6851452043729538, + "learning_rate": 1.0335943954363832e-05, + "loss": 0.7255, + "step": 16465 + }, + { + "epoch": 0.504658575456663, + "grad_norm": 1.4861227858355346, + "learning_rate": 1.033495187732777e-05, + "loss": 0.69, + "step": 16466 + }, + { + "epoch": 0.5046892239794042, + "grad_norm": 1.4469842840017688, + "learning_rate": 1.033395979699134e-05, + "loss": 0.6522, + "step": 16467 + }, + { + "epoch": 0.5047198725021455, + "grad_norm": 1.5147533122674153, + "learning_rate": 1.0332967713364317e-05, + "loss": 0.7154, + "step": 16468 + }, + { + "epoch": 0.5047505210248866, + "grad_norm": 0.6726581443741808, + "learning_rate": 1.0331975626456481e-05, + "loss": 0.5801, + "step": 16469 + }, + { + "epoch": 0.5047811695476278, + "grad_norm": 1.5088230676616927, + "learning_rate": 1.0330983536277603e-05, + "loss": 0.7212, + "step": 16470 + }, + { + "epoch": 0.504811818070369, + "grad_norm": 1.6049122676325287, + "learning_rate": 1.0329991442837458e-05, + "loss": 0.6541, + "step": 16471 + }, + { + "epoch": 0.5048424665931102, + "grad_norm": 1.5452158036073034, + "learning_rate": 1.0328999346145826e-05, + "loss": 0.7358, + "step": 16472 + }, + { + "epoch": 0.5048731151158514, + "grad_norm": 1.841453278405525, + "learning_rate": 1.0328007246212477e-05, + "loss": 0.732, + "step": 16473 + }, + { + "epoch": 0.5049037636385926, + "grad_norm": 1.3336859628349669, + "learning_rate": 1.032701514304719e-05, + "loss": 0.6892, + "step": 16474 + }, + { + "epoch": 0.5049344121613338, + "grad_norm": 1.537306817008843, + "learning_rate": 1.0326023036659735e-05, + "loss": 0.722, + "step": 16475 + }, + { + "epoch": 0.504965060684075, + "grad_norm": 1.3429071466846012, + "learning_rate": 1.0325030927059897e-05, + "loss": 0.7256, + "step": 16476 + }, + { + "epoch": 0.5049957092068162, + "grad_norm": 1.386384036602281, + "learning_rate": 1.0324038814257445e-05, + "loss": 0.7033, + "step": 16477 + }, + { + "epoch": 0.5050263577295574, + "grad_norm": 1.4493610227056213, + "learning_rate": 1.0323046698262156e-05, + "loss": 0.7369, + "step": 16478 + }, + { + "epoch": 0.5050570062522987, + "grad_norm": 1.628081732756835, + "learning_rate": 1.03220545790838e-05, + "loss": 0.7372, + "step": 16479 + }, + { + "epoch": 0.5050876547750398, + "grad_norm": 1.4940771234094177, + "learning_rate": 1.0321062456732162e-05, + "loss": 0.7714, + "step": 16480 + }, + { + "epoch": 0.5051183032977811, + "grad_norm": 1.3881548944034197, + "learning_rate": 1.0320070331217015e-05, + "loss": 0.6661, + "step": 16481 + }, + { + "epoch": 0.5051489518205222, + "grad_norm": 0.6746392421167166, + "learning_rate": 1.031907820254813e-05, + "loss": 0.5639, + "step": 16482 + }, + { + "epoch": 0.5051796003432635, + "grad_norm": 1.3676923139236654, + "learning_rate": 1.0318086070735286e-05, + "loss": 0.6902, + "step": 16483 + }, + { + "epoch": 0.5052102488660046, + "grad_norm": 1.6833315404883518, + "learning_rate": 1.0317093935788262e-05, + "loss": 0.7228, + "step": 16484 + }, + { + "epoch": 0.5052408973887459, + "grad_norm": 1.6565415457054373, + "learning_rate": 1.031610179771683e-05, + "loss": 0.7428, + "step": 16485 + }, + { + "epoch": 0.505271545911487, + "grad_norm": 1.3355908698550256, + "learning_rate": 1.0315109656530762e-05, + "loss": 0.7509, + "step": 16486 + }, + { + "epoch": 0.5053021944342283, + "grad_norm": 1.446554415205881, + "learning_rate": 1.031411751223984e-05, + "loss": 0.7171, + "step": 16487 + }, + { + "epoch": 0.5053328429569695, + "grad_norm": 1.4352712806546033, + "learning_rate": 1.0313125364853838e-05, + "loss": 0.6534, + "step": 16488 + }, + { + "epoch": 0.5053634914797107, + "grad_norm": 0.6749809630671346, + "learning_rate": 1.0312133214382532e-05, + "loss": 0.591, + "step": 16489 + }, + { + "epoch": 0.5053941400024519, + "grad_norm": 1.5776149169771099, + "learning_rate": 1.0311141060835696e-05, + "loss": 0.724, + "step": 16490 + }, + { + "epoch": 0.5054247885251931, + "grad_norm": 1.494468551048892, + "learning_rate": 1.031014890422311e-05, + "loss": 0.7131, + "step": 16491 + }, + { + "epoch": 0.5054554370479343, + "grad_norm": 1.4725604633178282, + "learning_rate": 1.0309156744554545e-05, + "loss": 0.6801, + "step": 16492 + }, + { + "epoch": 0.5054860855706755, + "grad_norm": 1.4184954960934726, + "learning_rate": 1.0308164581839781e-05, + "loss": 0.6107, + "step": 16493 + }, + { + "epoch": 0.5055167340934167, + "grad_norm": 1.5167958786541276, + "learning_rate": 1.0307172416088591e-05, + "loss": 0.7695, + "step": 16494 + }, + { + "epoch": 0.505547382616158, + "grad_norm": 1.444394702054213, + "learning_rate": 1.0306180247310756e-05, + "loss": 0.7681, + "step": 16495 + }, + { + "epoch": 0.5055780311388991, + "grad_norm": 1.583993882351942, + "learning_rate": 1.0305188075516046e-05, + "loss": 0.6845, + "step": 16496 + }, + { + "epoch": 0.5056086796616404, + "grad_norm": 1.6349006446638081, + "learning_rate": 1.0304195900714238e-05, + "loss": 0.6784, + "step": 16497 + }, + { + "epoch": 0.5056393281843815, + "grad_norm": 1.5997363264350402, + "learning_rate": 1.0303203722915114e-05, + "loss": 0.6793, + "step": 16498 + }, + { + "epoch": 0.5056699767071228, + "grad_norm": 1.689374749547493, + "learning_rate": 1.0302211542128441e-05, + "loss": 0.725, + "step": 16499 + }, + { + "epoch": 0.5057006252298639, + "grad_norm": 1.8368985507996822, + "learning_rate": 1.0301219358364008e-05, + "loss": 0.7178, + "step": 16500 + }, + { + "epoch": 0.5057312737526051, + "grad_norm": 1.6487992860813165, + "learning_rate": 1.0300227171631576e-05, + "loss": 0.747, + "step": 16501 + }, + { + "epoch": 0.5057619222753463, + "grad_norm": 1.5198410819498067, + "learning_rate": 1.0299234981940933e-05, + "loss": 0.7272, + "step": 16502 + }, + { + "epoch": 0.5057925707980875, + "grad_norm": 0.6958163158694946, + "learning_rate": 1.0298242789301849e-05, + "loss": 0.6089, + "step": 16503 + }, + { + "epoch": 0.5058232193208287, + "grad_norm": 0.677389838757409, + "learning_rate": 1.0297250593724106e-05, + "loss": 0.5859, + "step": 16504 + }, + { + "epoch": 0.5058538678435699, + "grad_norm": 1.5029238160139662, + "learning_rate": 1.0296258395217474e-05, + "loss": 0.6715, + "step": 16505 + }, + { + "epoch": 0.5058845163663112, + "grad_norm": 1.7236070075212175, + "learning_rate": 1.0295266193791733e-05, + "loss": 0.7176, + "step": 16506 + }, + { + "epoch": 0.5059151648890523, + "grad_norm": 1.7308070498516543, + "learning_rate": 1.029427398945666e-05, + "loss": 0.8237, + "step": 16507 + }, + { + "epoch": 0.5059458134117936, + "grad_norm": 1.6538972136833052, + "learning_rate": 1.0293281782222026e-05, + "loss": 0.8219, + "step": 16508 + }, + { + "epoch": 0.5059764619345347, + "grad_norm": 0.6682184611305316, + "learning_rate": 1.0292289572097616e-05, + "loss": 0.6064, + "step": 16509 + }, + { + "epoch": 0.506007110457276, + "grad_norm": 1.7448141804693846, + "learning_rate": 1.0291297359093197e-05, + "loss": 0.6853, + "step": 16510 + }, + { + "epoch": 0.5060377589800171, + "grad_norm": 1.4425126885429562, + "learning_rate": 1.0290305143218557e-05, + "loss": 0.6745, + "step": 16511 + }, + { + "epoch": 0.5060684075027584, + "grad_norm": 1.373536329884572, + "learning_rate": 1.028931292448346e-05, + "loss": 0.6997, + "step": 16512 + }, + { + "epoch": 0.5060990560254995, + "grad_norm": 1.4975448442445756, + "learning_rate": 1.0288320702897693e-05, + "loss": 0.6305, + "step": 16513 + }, + { + "epoch": 0.5061297045482408, + "grad_norm": 1.7508393311177328, + "learning_rate": 1.0287328478471026e-05, + "loss": 0.7858, + "step": 16514 + }, + { + "epoch": 0.506160353070982, + "grad_norm": 1.4327553026247555, + "learning_rate": 1.0286336251213242e-05, + "loss": 0.746, + "step": 16515 + }, + { + "epoch": 0.5061910015937232, + "grad_norm": 1.3681917643361894, + "learning_rate": 1.0285344021134109e-05, + "loss": 0.6197, + "step": 16516 + }, + { + "epoch": 0.5062216501164644, + "grad_norm": 1.4876437338704092, + "learning_rate": 1.0284351788243411e-05, + "loss": 0.7139, + "step": 16517 + }, + { + "epoch": 0.5062522986392056, + "grad_norm": 1.4253170834506441, + "learning_rate": 1.028335955255092e-05, + "loss": 0.6753, + "step": 16518 + }, + { + "epoch": 0.5062829471619468, + "grad_norm": 1.4103772326702961, + "learning_rate": 1.0282367314066417e-05, + "loss": 0.7065, + "step": 16519 + }, + { + "epoch": 0.506313595684688, + "grad_norm": 1.4599714222179634, + "learning_rate": 1.0281375072799676e-05, + "loss": 0.5675, + "step": 16520 + }, + { + "epoch": 0.5063442442074292, + "grad_norm": 1.4371518627793343, + "learning_rate": 1.0280382828760473e-05, + "loss": 0.7174, + "step": 16521 + }, + { + "epoch": 0.5063748927301704, + "grad_norm": 1.4992616149835507, + "learning_rate": 1.0279390581958585e-05, + "loss": 0.716, + "step": 16522 + }, + { + "epoch": 0.5064055412529116, + "grad_norm": 1.5337959752705417, + "learning_rate": 1.0278398332403793e-05, + "loss": 0.7498, + "step": 16523 + }, + { + "epoch": 0.5064361897756529, + "grad_norm": 1.3524808436717193, + "learning_rate": 1.0277406080105872e-05, + "loss": 0.7638, + "step": 16524 + }, + { + "epoch": 0.506466838298394, + "grad_norm": 1.5784906801958019, + "learning_rate": 1.0276413825074593e-05, + "loss": 0.734, + "step": 16525 + }, + { + "epoch": 0.5064974868211353, + "grad_norm": 1.6946388807161363, + "learning_rate": 1.0275421567319743e-05, + "loss": 0.761, + "step": 16526 + }, + { + "epoch": 0.5065281353438764, + "grad_norm": 1.355317191615109, + "learning_rate": 1.0274429306851092e-05, + "loss": 0.6425, + "step": 16527 + }, + { + "epoch": 0.5065587838666177, + "grad_norm": 1.5880642018755107, + "learning_rate": 1.027343704367842e-05, + "loss": 0.7846, + "step": 16528 + }, + { + "epoch": 0.5065894323893588, + "grad_norm": 1.4455050494382378, + "learning_rate": 1.02724447778115e-05, + "loss": 0.6567, + "step": 16529 + }, + { + "epoch": 0.5066200809121001, + "grad_norm": 1.4165415143691062, + "learning_rate": 1.0271452509260113e-05, + "loss": 0.6938, + "step": 16530 + }, + { + "epoch": 0.5066507294348412, + "grad_norm": 1.2980971391612321, + "learning_rate": 1.0270460238034037e-05, + "loss": 0.6444, + "step": 16531 + }, + { + "epoch": 0.5066813779575824, + "grad_norm": 1.4960680169390315, + "learning_rate": 1.0269467964143045e-05, + "loss": 0.719, + "step": 16532 + }, + { + "epoch": 0.5067120264803237, + "grad_norm": 1.4407538469561463, + "learning_rate": 1.0268475687596915e-05, + "loss": 0.7029, + "step": 16533 + }, + { + "epoch": 0.5067426750030648, + "grad_norm": 1.6405325575882714, + "learning_rate": 1.0267483408405428e-05, + "loss": 0.6646, + "step": 16534 + }, + { + "epoch": 0.5067733235258061, + "grad_norm": 1.503787151162866, + "learning_rate": 1.026649112657836e-05, + "loss": 0.8014, + "step": 16535 + }, + { + "epoch": 0.5068039720485472, + "grad_norm": 1.5496602148213638, + "learning_rate": 1.026549884212548e-05, + "loss": 0.7547, + "step": 16536 + }, + { + "epoch": 0.5068346205712885, + "grad_norm": 1.4716615186673172, + "learning_rate": 1.026450655505658e-05, + "loss": 0.5663, + "step": 16537 + }, + { + "epoch": 0.5068652690940296, + "grad_norm": 0.7161106028392908, + "learning_rate": 1.0263514265381425e-05, + "loss": 0.5802, + "step": 16538 + }, + { + "epoch": 0.5068959176167709, + "grad_norm": 1.5385625728312884, + "learning_rate": 1.0262521973109798e-05, + "loss": 0.6904, + "step": 16539 + }, + { + "epoch": 0.506926566139512, + "grad_norm": 1.436536580108732, + "learning_rate": 1.0261529678251472e-05, + "loss": 0.6752, + "step": 16540 + }, + { + "epoch": 0.5069572146622533, + "grad_norm": 0.694998733549122, + "learning_rate": 1.0260537380816229e-05, + "loss": 0.5954, + "step": 16541 + }, + { + "epoch": 0.5069878631849944, + "grad_norm": 1.504634189304696, + "learning_rate": 1.0259545080813847e-05, + "loss": 0.69, + "step": 16542 + }, + { + "epoch": 0.5070185117077357, + "grad_norm": 1.5435071814262074, + "learning_rate": 1.0258552778254098e-05, + "loss": 0.6904, + "step": 16543 + }, + { + "epoch": 0.5070491602304769, + "grad_norm": 0.6889227597880752, + "learning_rate": 1.0257560473146762e-05, + "loss": 0.5803, + "step": 16544 + }, + { + "epoch": 0.5070798087532181, + "grad_norm": 1.5872349330470497, + "learning_rate": 1.0256568165501617e-05, + "loss": 0.6346, + "step": 16545 + }, + { + "epoch": 0.5071104572759593, + "grad_norm": 0.6997251127714271, + "learning_rate": 1.0255575855328441e-05, + "loss": 0.5595, + "step": 16546 + }, + { + "epoch": 0.5071411057987005, + "grad_norm": 1.6604875904270957, + "learning_rate": 1.0254583542637011e-05, + "loss": 0.7918, + "step": 16547 + }, + { + "epoch": 0.5071717543214417, + "grad_norm": 1.2867143410875244, + "learning_rate": 1.0253591227437103e-05, + "loss": 0.744, + "step": 16548 + }, + { + "epoch": 0.5072024028441829, + "grad_norm": 0.6585796207654455, + "learning_rate": 1.0252598909738497e-05, + "loss": 0.5911, + "step": 16549 + }, + { + "epoch": 0.5072330513669241, + "grad_norm": 1.6462410407410188, + "learning_rate": 1.0251606589550969e-05, + "loss": 0.6817, + "step": 16550 + }, + { + "epoch": 0.5072636998896654, + "grad_norm": 1.5613426478034242, + "learning_rate": 1.0250614266884296e-05, + "loss": 0.6146, + "step": 16551 + }, + { + "epoch": 0.5072943484124065, + "grad_norm": 1.3302262883500557, + "learning_rate": 1.0249621941748258e-05, + "loss": 0.6898, + "step": 16552 + }, + { + "epoch": 0.5073249969351478, + "grad_norm": 1.5269954830934913, + "learning_rate": 1.0248629614152627e-05, + "loss": 0.6938, + "step": 16553 + }, + { + "epoch": 0.5073556454578889, + "grad_norm": 1.7285312528820491, + "learning_rate": 1.0247637284107193e-05, + "loss": 0.7625, + "step": 16554 + }, + { + "epoch": 0.5073862939806302, + "grad_norm": 1.4635176240021008, + "learning_rate": 1.0246644951621717e-05, + "loss": 0.7312, + "step": 16555 + }, + { + "epoch": 0.5074169425033713, + "grad_norm": 1.6573924151657558, + "learning_rate": 1.024565261670599e-05, + "loss": 0.7773, + "step": 16556 + }, + { + "epoch": 0.5074475910261126, + "grad_norm": 1.2791219934946676, + "learning_rate": 1.0244660279369783e-05, + "loss": 0.6862, + "step": 16557 + }, + { + "epoch": 0.5074782395488537, + "grad_norm": 0.6815768479299266, + "learning_rate": 1.0243667939622879e-05, + "loss": 0.5813, + "step": 16558 + }, + { + "epoch": 0.507508888071595, + "grad_norm": 0.668868286449007, + "learning_rate": 1.0242675597475046e-05, + "loss": 0.5643, + "step": 16559 + }, + { + "epoch": 0.5075395365943361, + "grad_norm": 1.3034245625716745, + "learning_rate": 1.0241683252936075e-05, + "loss": 0.6251, + "step": 16560 + }, + { + "epoch": 0.5075701851170774, + "grad_norm": 1.4144799556545418, + "learning_rate": 1.0240690906015734e-05, + "loss": 0.6638, + "step": 16561 + }, + { + "epoch": 0.5076008336398186, + "grad_norm": 1.366174236990851, + "learning_rate": 1.0239698556723803e-05, + "loss": 0.7001, + "step": 16562 + }, + { + "epoch": 0.5076314821625597, + "grad_norm": 0.7018970702854561, + "learning_rate": 1.0238706205070062e-05, + "loss": 0.5545, + "step": 16563 + }, + { + "epoch": 0.507662130685301, + "grad_norm": 1.4990524924917328, + "learning_rate": 1.0237713851064288e-05, + "loss": 0.7797, + "step": 16564 + }, + { + "epoch": 0.5076927792080421, + "grad_norm": 1.3006594298813332, + "learning_rate": 1.023672149471626e-05, + "loss": 0.7048, + "step": 16565 + }, + { + "epoch": 0.5077234277307834, + "grad_norm": 1.601838763322377, + "learning_rate": 1.023572913603575e-05, + "loss": 0.7531, + "step": 16566 + }, + { + "epoch": 0.5077540762535245, + "grad_norm": 1.7272272352954228, + "learning_rate": 1.0234736775032544e-05, + "loss": 0.7557, + "step": 16567 + }, + { + "epoch": 0.5077847247762658, + "grad_norm": 1.3986543950308783, + "learning_rate": 1.0233744411716414e-05, + "loss": 0.6886, + "step": 16568 + }, + { + "epoch": 0.507815373299007, + "grad_norm": 1.4263183995935944, + "learning_rate": 1.0232752046097146e-05, + "loss": 0.755, + "step": 16569 + }, + { + "epoch": 0.5078460218217482, + "grad_norm": 0.6520249668640612, + "learning_rate": 1.0231759678184505e-05, + "loss": 0.5981, + "step": 16570 + }, + { + "epoch": 0.5078766703444894, + "grad_norm": 1.555802642233135, + "learning_rate": 1.0230767307988281e-05, + "loss": 0.5849, + "step": 16571 + }, + { + "epoch": 0.5079073188672306, + "grad_norm": 1.387534352464527, + "learning_rate": 1.0229774935518246e-05, + "loss": 0.6544, + "step": 16572 + }, + { + "epoch": 0.5079379673899718, + "grad_norm": 1.363085874594562, + "learning_rate": 1.0228782560784184e-05, + "loss": 0.751, + "step": 16573 + }, + { + "epoch": 0.507968615912713, + "grad_norm": 0.6735094824228028, + "learning_rate": 1.0227790183795863e-05, + "loss": 0.5759, + "step": 16574 + }, + { + "epoch": 0.5079992644354542, + "grad_norm": 0.6508317311180902, + "learning_rate": 1.0226797804563071e-05, + "loss": 0.5679, + "step": 16575 + }, + { + "epoch": 0.5080299129581954, + "grad_norm": 0.676240949173494, + "learning_rate": 1.0225805423095578e-05, + "loss": 0.5825, + "step": 16576 + }, + { + "epoch": 0.5080605614809366, + "grad_norm": 1.348246098030767, + "learning_rate": 1.0224813039403173e-05, + "loss": 0.6608, + "step": 16577 + }, + { + "epoch": 0.5080912100036779, + "grad_norm": 1.6246356594699547, + "learning_rate": 1.0223820653495622e-05, + "loss": 0.7818, + "step": 16578 + }, + { + "epoch": 0.508121858526419, + "grad_norm": 1.3242209142641563, + "learning_rate": 1.022282826538271e-05, + "loss": 0.6551, + "step": 16579 + }, + { + "epoch": 0.5081525070491603, + "grad_norm": 1.398558143636908, + "learning_rate": 1.0221835875074215e-05, + "loss": 0.6908, + "step": 16580 + }, + { + "epoch": 0.5081831555719014, + "grad_norm": 1.3807814504507328, + "learning_rate": 1.0220843482579915e-05, + "loss": 0.7719, + "step": 16581 + }, + { + "epoch": 0.5082138040946427, + "grad_norm": 1.319126073280906, + "learning_rate": 1.0219851087909587e-05, + "loss": 0.6713, + "step": 16582 + }, + { + "epoch": 0.5082444526173838, + "grad_norm": 1.5618357286534699, + "learning_rate": 1.0218858691073007e-05, + "loss": 0.6986, + "step": 16583 + }, + { + "epoch": 0.5082751011401251, + "grad_norm": 1.4368584660150558, + "learning_rate": 1.0217866292079962e-05, + "loss": 0.7284, + "step": 16584 + }, + { + "epoch": 0.5083057496628662, + "grad_norm": 1.6068277318382496, + "learning_rate": 1.0216873890940221e-05, + "loss": 0.7725, + "step": 16585 + }, + { + "epoch": 0.5083363981856075, + "grad_norm": 1.7959415636725389, + "learning_rate": 1.0215881487663567e-05, + "loss": 0.7595, + "step": 16586 + }, + { + "epoch": 0.5083670467083486, + "grad_norm": 1.4575651750328495, + "learning_rate": 1.0214889082259778e-05, + "loss": 0.7785, + "step": 16587 + }, + { + "epoch": 0.5083976952310899, + "grad_norm": 0.6812414029007915, + "learning_rate": 1.021389667473863e-05, + "loss": 0.5712, + "step": 16588 + }, + { + "epoch": 0.5084283437538311, + "grad_norm": 1.4302593162716872, + "learning_rate": 1.0212904265109906e-05, + "loss": 0.7934, + "step": 16589 + }, + { + "epoch": 0.5084589922765723, + "grad_norm": 1.4353022876445496, + "learning_rate": 1.021191185338338e-05, + "loss": 0.7046, + "step": 16590 + }, + { + "epoch": 0.5084896407993135, + "grad_norm": 1.5535647263416208, + "learning_rate": 1.0210919439568832e-05, + "loss": 0.6596, + "step": 16591 + }, + { + "epoch": 0.5085202893220547, + "grad_norm": 1.5283164056417222, + "learning_rate": 1.0209927023676042e-05, + "loss": 0.6936, + "step": 16592 + }, + { + "epoch": 0.5085509378447959, + "grad_norm": 1.5797331193109914, + "learning_rate": 1.0208934605714786e-05, + "loss": 0.7368, + "step": 16593 + }, + { + "epoch": 0.508581586367537, + "grad_norm": 1.3893810573372118, + "learning_rate": 1.0207942185694844e-05, + "loss": 0.837, + "step": 16594 + }, + { + "epoch": 0.5086122348902783, + "grad_norm": 1.6962220662722252, + "learning_rate": 1.0206949763625995e-05, + "loss": 0.7823, + "step": 16595 + }, + { + "epoch": 0.5086428834130194, + "grad_norm": 1.5345079798482018, + "learning_rate": 1.0205957339518018e-05, + "loss": 0.7, + "step": 16596 + }, + { + "epoch": 0.5086735319357607, + "grad_norm": 1.557694899940798, + "learning_rate": 1.020496491338069e-05, + "loss": 0.735, + "step": 16597 + }, + { + "epoch": 0.5087041804585019, + "grad_norm": 1.3265568328153734, + "learning_rate": 1.020397248522379e-05, + "loss": 0.7013, + "step": 16598 + }, + { + "epoch": 0.5087348289812431, + "grad_norm": 1.538864180070631, + "learning_rate": 1.0202980055057097e-05, + "loss": 0.7227, + "step": 16599 + }, + { + "epoch": 0.5087654775039843, + "grad_norm": 1.6942277479706667, + "learning_rate": 1.020198762289039e-05, + "loss": 0.6935, + "step": 16600 + }, + { + "epoch": 0.5087961260267255, + "grad_norm": 1.4771334055429612, + "learning_rate": 1.0200995188733448e-05, + "loss": 0.8398, + "step": 16601 + }, + { + "epoch": 0.5088267745494667, + "grad_norm": 1.491067922759062, + "learning_rate": 1.0200002752596046e-05, + "loss": 0.7155, + "step": 16602 + }, + { + "epoch": 0.5088574230722079, + "grad_norm": 0.6889232244335162, + "learning_rate": 1.0199010314487967e-05, + "loss": 0.5596, + "step": 16603 + }, + { + "epoch": 0.5088880715949491, + "grad_norm": 1.8652293901252477, + "learning_rate": 1.019801787441899e-05, + "loss": 0.7248, + "step": 16604 + }, + { + "epoch": 0.5089187201176903, + "grad_norm": 1.3566031097819518, + "learning_rate": 1.019702543239889e-05, + "loss": 0.6737, + "step": 16605 + }, + { + "epoch": 0.5089493686404315, + "grad_norm": 0.6564925376306252, + "learning_rate": 1.019603298843745e-05, + "loss": 0.578, + "step": 16606 + }, + { + "epoch": 0.5089800171631728, + "grad_norm": 1.3564094513090266, + "learning_rate": 1.0195040542544446e-05, + "loss": 0.7532, + "step": 16607 + }, + { + "epoch": 0.5090106656859139, + "grad_norm": 1.5221318136704318, + "learning_rate": 1.0194048094729658e-05, + "loss": 0.7383, + "step": 16608 + }, + { + "epoch": 0.5090413142086552, + "grad_norm": 1.3959165179467479, + "learning_rate": 1.0193055645002863e-05, + "loss": 0.7309, + "step": 16609 + }, + { + "epoch": 0.5090719627313963, + "grad_norm": 1.6287198151378777, + "learning_rate": 1.0192063193373843e-05, + "loss": 0.8027, + "step": 16610 + }, + { + "epoch": 0.5091026112541376, + "grad_norm": 1.4502201077487866, + "learning_rate": 1.0191070739852376e-05, + "loss": 0.7064, + "step": 16611 + }, + { + "epoch": 0.5091332597768787, + "grad_norm": 0.6500352171145916, + "learning_rate": 1.019007828444824e-05, + "loss": 0.5712, + "step": 16612 + }, + { + "epoch": 0.50916390829962, + "grad_norm": 1.2819146994918544, + "learning_rate": 1.018908582717121e-05, + "loss": 0.6491, + "step": 16613 + }, + { + "epoch": 0.5091945568223611, + "grad_norm": 1.6677315510381543, + "learning_rate": 1.0188093368031071e-05, + "loss": 0.7545, + "step": 16614 + }, + { + "epoch": 0.5092252053451024, + "grad_norm": 1.802064664522453, + "learning_rate": 1.0187100907037601e-05, + "loss": 0.7295, + "step": 16615 + }, + { + "epoch": 0.5092558538678436, + "grad_norm": 1.486153883366152, + "learning_rate": 1.018610844420058e-05, + "loss": 0.7557, + "step": 16616 + }, + { + "epoch": 0.5092865023905848, + "grad_norm": 1.3966609249067286, + "learning_rate": 1.018511597952978e-05, + "loss": 0.7111, + "step": 16617 + }, + { + "epoch": 0.509317150913326, + "grad_norm": 1.3230487525340295, + "learning_rate": 1.0184123513034987e-05, + "loss": 0.708, + "step": 16618 + }, + { + "epoch": 0.5093477994360672, + "grad_norm": 1.4799915400524082, + "learning_rate": 1.018313104472598e-05, + "loss": 0.7151, + "step": 16619 + }, + { + "epoch": 0.5093784479588084, + "grad_norm": 1.498009448476446, + "learning_rate": 1.0182138574612533e-05, + "loss": 0.7504, + "step": 16620 + }, + { + "epoch": 0.5094090964815496, + "grad_norm": 1.6003767945955487, + "learning_rate": 1.018114610270443e-05, + "loss": 0.7655, + "step": 16621 + }, + { + "epoch": 0.5094397450042908, + "grad_norm": 1.5452556388485046, + "learning_rate": 1.0180153629011445e-05, + "loss": 0.6899, + "step": 16622 + }, + { + "epoch": 0.509470393527032, + "grad_norm": 0.6758914624227528, + "learning_rate": 1.0179161153543364e-05, + "loss": 0.5829, + "step": 16623 + }, + { + "epoch": 0.5095010420497732, + "grad_norm": 1.7651117795908393, + "learning_rate": 1.0178168676309961e-05, + "loss": 0.7324, + "step": 16624 + }, + { + "epoch": 0.5095316905725144, + "grad_norm": 1.5761964012568115, + "learning_rate": 1.0177176197321017e-05, + "loss": 0.7877, + "step": 16625 + }, + { + "epoch": 0.5095623390952556, + "grad_norm": 1.4359237798323643, + "learning_rate": 1.0176183716586307e-05, + "loss": 0.6833, + "step": 16626 + }, + { + "epoch": 0.5095929876179968, + "grad_norm": 1.6557021781609922, + "learning_rate": 1.017519123411562e-05, + "loss": 0.7019, + "step": 16627 + }, + { + "epoch": 0.509623636140738, + "grad_norm": 0.6865045403161083, + "learning_rate": 1.0174198749918724e-05, + "loss": 0.5743, + "step": 16628 + }, + { + "epoch": 0.5096542846634792, + "grad_norm": 0.6799849488472629, + "learning_rate": 1.0173206264005403e-05, + "loss": 0.58, + "step": 16629 + }, + { + "epoch": 0.5096849331862204, + "grad_norm": 0.6889655032203136, + "learning_rate": 1.0172213776385437e-05, + "loss": 0.5643, + "step": 16630 + }, + { + "epoch": 0.5097155817089616, + "grad_norm": 1.6151804607957394, + "learning_rate": 1.017122128706861e-05, + "loss": 0.6558, + "step": 16631 + }, + { + "epoch": 0.5097462302317028, + "grad_norm": 1.5510052482252863, + "learning_rate": 1.0170228796064689e-05, + "loss": 0.6618, + "step": 16632 + }, + { + "epoch": 0.509776878754444, + "grad_norm": 1.8299627168665429, + "learning_rate": 1.0169236303383465e-05, + "loss": 0.7313, + "step": 16633 + }, + { + "epoch": 0.5098075272771853, + "grad_norm": 1.4757357981309784, + "learning_rate": 1.0168243809034708e-05, + "loss": 0.7414, + "step": 16634 + }, + { + "epoch": 0.5098381757999264, + "grad_norm": 1.4862099808024523, + "learning_rate": 1.0167251313028203e-05, + "loss": 0.6848, + "step": 16635 + }, + { + "epoch": 0.5098688243226677, + "grad_norm": 0.6869057061053441, + "learning_rate": 1.016625881537373e-05, + "loss": 0.5799, + "step": 16636 + }, + { + "epoch": 0.5098994728454088, + "grad_norm": 1.3985763172112133, + "learning_rate": 1.0165266316081064e-05, + "loss": 0.6422, + "step": 16637 + }, + { + "epoch": 0.5099301213681501, + "grad_norm": 1.56047349596991, + "learning_rate": 1.016427381515999e-05, + "loss": 0.816, + "step": 16638 + }, + { + "epoch": 0.5099607698908912, + "grad_norm": 1.5408882855704198, + "learning_rate": 1.0163281312620282e-05, + "loss": 0.7369, + "step": 16639 + }, + { + "epoch": 0.5099914184136325, + "grad_norm": 1.809784802782445, + "learning_rate": 1.0162288808471721e-05, + "loss": 0.6611, + "step": 16640 + }, + { + "epoch": 0.5100220669363736, + "grad_norm": 1.5141817230446855, + "learning_rate": 1.0161296302724086e-05, + "loss": 0.6938, + "step": 16641 + }, + { + "epoch": 0.5100527154591149, + "grad_norm": 1.3617587135631335, + "learning_rate": 1.0160303795387161e-05, + "loss": 0.6394, + "step": 16642 + }, + { + "epoch": 0.510083363981856, + "grad_norm": 0.6644266903168207, + "learning_rate": 1.0159311286470716e-05, + "loss": 0.5797, + "step": 16643 + }, + { + "epoch": 0.5101140125045973, + "grad_norm": 1.7099735069075537, + "learning_rate": 1.0158318775984542e-05, + "loss": 0.7407, + "step": 16644 + }, + { + "epoch": 0.5101446610273385, + "grad_norm": 1.3676939239159205, + "learning_rate": 1.0157326263938407e-05, + "loss": 0.625, + "step": 16645 + }, + { + "epoch": 0.5101753095500797, + "grad_norm": 1.4698683098045024, + "learning_rate": 1.01563337503421e-05, + "loss": 0.7538, + "step": 16646 + }, + { + "epoch": 0.5102059580728209, + "grad_norm": 1.5510214152947557, + "learning_rate": 1.0155341235205396e-05, + "loss": 0.7107, + "step": 16647 + }, + { + "epoch": 0.5102366065955621, + "grad_norm": 1.5602191438074993, + "learning_rate": 1.0154348718538071e-05, + "loss": 0.7671, + "step": 16648 + }, + { + "epoch": 0.5102672551183033, + "grad_norm": 1.547675770580019, + "learning_rate": 1.0153356200349916e-05, + "loss": 0.75, + "step": 16649 + }, + { + "epoch": 0.5102979036410445, + "grad_norm": 0.6507978487692272, + "learning_rate": 1.0152363680650699e-05, + "loss": 0.5733, + "step": 16650 + }, + { + "epoch": 0.5103285521637857, + "grad_norm": 1.4194121269521012, + "learning_rate": 1.0151371159450203e-05, + "loss": 0.6688, + "step": 16651 + }, + { + "epoch": 0.510359200686527, + "grad_norm": 1.6252983699146142, + "learning_rate": 1.0150378636758209e-05, + "loss": 0.7719, + "step": 16652 + }, + { + "epoch": 0.5103898492092681, + "grad_norm": 1.4878064177582477, + "learning_rate": 1.0149386112584495e-05, + "loss": 0.6901, + "step": 16653 + }, + { + "epoch": 0.5104204977320094, + "grad_norm": 1.61903338495406, + "learning_rate": 1.0148393586938845e-05, + "loss": 0.7719, + "step": 16654 + }, + { + "epoch": 0.5104511462547505, + "grad_norm": 0.6821438332294465, + "learning_rate": 1.0147401059831033e-05, + "loss": 0.5558, + "step": 16655 + }, + { + "epoch": 0.5104817947774917, + "grad_norm": 1.4370195406340227, + "learning_rate": 1.0146408531270837e-05, + "loss": 0.6638, + "step": 16656 + }, + { + "epoch": 0.5105124433002329, + "grad_norm": 1.5652255432310762, + "learning_rate": 1.0145416001268045e-05, + "loss": 0.6823, + "step": 16657 + }, + { + "epoch": 0.5105430918229741, + "grad_norm": 1.5088309322400464, + "learning_rate": 1.014442346983243e-05, + "loss": 0.904, + "step": 16658 + }, + { + "epoch": 0.5105737403457153, + "grad_norm": 1.6121584243113631, + "learning_rate": 1.0143430936973774e-05, + "loss": 0.6395, + "step": 16659 + }, + { + "epoch": 0.5106043888684565, + "grad_norm": 1.6616545733898298, + "learning_rate": 1.0142438402701856e-05, + "loss": 0.7536, + "step": 16660 + }, + { + "epoch": 0.5106350373911978, + "grad_norm": 1.5565896702031594, + "learning_rate": 1.0141445867026456e-05, + "loss": 0.7468, + "step": 16661 + }, + { + "epoch": 0.5106656859139389, + "grad_norm": 1.5833576937548597, + "learning_rate": 1.0140453329957353e-05, + "loss": 0.7494, + "step": 16662 + }, + { + "epoch": 0.5106963344366802, + "grad_norm": 1.6290439140077901, + "learning_rate": 1.0139460791504327e-05, + "loss": 0.7795, + "step": 16663 + }, + { + "epoch": 0.5107269829594213, + "grad_norm": 1.5476624927963798, + "learning_rate": 1.013846825167716e-05, + "loss": 0.6227, + "step": 16664 + }, + { + "epoch": 0.5107576314821626, + "grad_norm": 1.569616285709509, + "learning_rate": 1.0137475710485631e-05, + "loss": 0.8399, + "step": 16665 + }, + { + "epoch": 0.5107882800049037, + "grad_norm": 1.4909858495203716, + "learning_rate": 1.0136483167939517e-05, + "loss": 0.6926, + "step": 16666 + }, + { + "epoch": 0.510818928527645, + "grad_norm": 1.515671567683578, + "learning_rate": 1.0135490624048599e-05, + "loss": 0.7669, + "step": 16667 + }, + { + "epoch": 0.5108495770503861, + "grad_norm": 0.7040949557531064, + "learning_rate": 1.0134498078822657e-05, + "loss": 0.5678, + "step": 16668 + }, + { + "epoch": 0.5108802255731274, + "grad_norm": 1.4073519794475875, + "learning_rate": 1.0133505532271473e-05, + "loss": 0.6572, + "step": 16669 + }, + { + "epoch": 0.5109108740958686, + "grad_norm": 1.5231228720707868, + "learning_rate": 1.0132512984404823e-05, + "loss": 0.7709, + "step": 16670 + }, + { + "epoch": 0.5109415226186098, + "grad_norm": 0.6468412458981558, + "learning_rate": 1.0131520435232487e-05, + "loss": 0.5848, + "step": 16671 + }, + { + "epoch": 0.510972171141351, + "grad_norm": 1.4133260122691234, + "learning_rate": 1.013052788476425e-05, + "loss": 0.6007, + "step": 16672 + }, + { + "epoch": 0.5110028196640922, + "grad_norm": 1.471797987228263, + "learning_rate": 1.0129535333009888e-05, + "loss": 0.7272, + "step": 16673 + }, + { + "epoch": 0.5110334681868334, + "grad_norm": 1.312533222526721, + "learning_rate": 1.0128542779979178e-05, + "loss": 0.73, + "step": 16674 + }, + { + "epoch": 0.5110641167095746, + "grad_norm": 1.7290980755962502, + "learning_rate": 1.0127550225681906e-05, + "loss": 0.7923, + "step": 16675 + }, + { + "epoch": 0.5110947652323158, + "grad_norm": 0.6496088887136412, + "learning_rate": 1.0126557670127846e-05, + "loss": 0.5901, + "step": 16676 + }, + { + "epoch": 0.511125413755057, + "grad_norm": 1.58705814671676, + "learning_rate": 1.0125565113326785e-05, + "loss": 0.725, + "step": 16677 + }, + { + "epoch": 0.5111560622777982, + "grad_norm": 1.5077836705589296, + "learning_rate": 1.0124572555288496e-05, + "loss": 0.7396, + "step": 16678 + }, + { + "epoch": 0.5111867108005395, + "grad_norm": 1.439984775149819, + "learning_rate": 1.0123579996022763e-05, + "loss": 0.7687, + "step": 16679 + }, + { + "epoch": 0.5112173593232806, + "grad_norm": 1.4092306608231722, + "learning_rate": 1.0122587435539364e-05, + "loss": 0.6259, + "step": 16680 + }, + { + "epoch": 0.5112480078460219, + "grad_norm": 1.5029320287398125, + "learning_rate": 1.0121594873848083e-05, + "loss": 0.7451, + "step": 16681 + }, + { + "epoch": 0.511278656368763, + "grad_norm": 1.3616384972443407, + "learning_rate": 1.0120602310958692e-05, + "loss": 0.6975, + "step": 16682 + }, + { + "epoch": 0.5113093048915043, + "grad_norm": 0.6778949102985155, + "learning_rate": 1.0119609746880976e-05, + "loss": 0.5733, + "step": 16683 + }, + { + "epoch": 0.5113399534142454, + "grad_norm": 0.6953332438939179, + "learning_rate": 1.0118617181624714e-05, + "loss": 0.6092, + "step": 16684 + }, + { + "epoch": 0.5113706019369867, + "grad_norm": 1.4738958541816016, + "learning_rate": 1.0117624615199693e-05, + "loss": 0.7716, + "step": 16685 + }, + { + "epoch": 0.5114012504597278, + "grad_norm": 1.3748842494692428, + "learning_rate": 1.011663204761568e-05, + "loss": 0.8037, + "step": 16686 + }, + { + "epoch": 0.511431898982469, + "grad_norm": 1.6368070927633667, + "learning_rate": 1.0115639478882462e-05, + "loss": 0.6504, + "step": 16687 + }, + { + "epoch": 0.5114625475052103, + "grad_norm": 1.4068203323110846, + "learning_rate": 1.0114646909009822e-05, + "loss": 0.634, + "step": 16688 + }, + { + "epoch": 0.5114931960279514, + "grad_norm": 0.6754637513208526, + "learning_rate": 1.0113654338007532e-05, + "loss": 0.5601, + "step": 16689 + }, + { + "epoch": 0.5115238445506927, + "grad_norm": 0.6094840393543328, + "learning_rate": 1.011266176588538e-05, + "loss": 0.5337, + "step": 16690 + }, + { + "epoch": 0.5115544930734338, + "grad_norm": 1.4970543340057287, + "learning_rate": 1.0111669192653141e-05, + "loss": 0.6708, + "step": 16691 + }, + { + "epoch": 0.5115851415961751, + "grad_norm": 1.502436790482199, + "learning_rate": 1.0110676618320601e-05, + "loss": 0.6972, + "step": 16692 + }, + { + "epoch": 0.5116157901189162, + "grad_norm": 1.2634326195649155, + "learning_rate": 1.0109684042897532e-05, + "loss": 0.6735, + "step": 16693 + }, + { + "epoch": 0.5116464386416575, + "grad_norm": 1.2904608212436035, + "learning_rate": 1.010869146639372e-05, + "loss": 0.6535, + "step": 16694 + }, + { + "epoch": 0.5116770871643986, + "grad_norm": 1.4543029615124006, + "learning_rate": 1.010769888881894e-05, + "loss": 0.6928, + "step": 16695 + }, + { + "epoch": 0.5117077356871399, + "grad_norm": 0.6531890155536328, + "learning_rate": 1.0106706310182982e-05, + "loss": 0.5728, + "step": 16696 + }, + { + "epoch": 0.511738384209881, + "grad_norm": 1.4096873312856617, + "learning_rate": 1.0105713730495613e-05, + "loss": 0.6746, + "step": 16697 + }, + { + "epoch": 0.5117690327326223, + "grad_norm": 1.5487096005971073, + "learning_rate": 1.010472114976662e-05, + "loss": 0.6396, + "step": 16698 + }, + { + "epoch": 0.5117996812553635, + "grad_norm": 1.5322907172259552, + "learning_rate": 1.0103728568005784e-05, + "loss": 0.7153, + "step": 16699 + }, + { + "epoch": 0.5118303297781047, + "grad_norm": 1.6277228716113334, + "learning_rate": 1.0102735985222884e-05, + "loss": 0.7599, + "step": 16700 + }, + { + "epoch": 0.5118609783008459, + "grad_norm": 1.684843168344128, + "learning_rate": 1.0101743401427702e-05, + "loss": 0.7238, + "step": 16701 + }, + { + "epoch": 0.5118916268235871, + "grad_norm": 1.6253471689682213, + "learning_rate": 1.0100750816630012e-05, + "loss": 0.7294, + "step": 16702 + }, + { + "epoch": 0.5119222753463283, + "grad_norm": 1.4851100986465786, + "learning_rate": 1.0099758230839602e-05, + "loss": 0.6866, + "step": 16703 + }, + { + "epoch": 0.5119529238690695, + "grad_norm": 1.4710974298844834, + "learning_rate": 1.0098765644066248e-05, + "loss": 0.7176, + "step": 16704 + }, + { + "epoch": 0.5119835723918107, + "grad_norm": 1.5800284566593294, + "learning_rate": 1.009777305631973e-05, + "loss": 0.7069, + "step": 16705 + }, + { + "epoch": 0.512014220914552, + "grad_norm": 1.5689204285137084, + "learning_rate": 1.0096780467609827e-05, + "loss": 0.7042, + "step": 16706 + }, + { + "epoch": 0.5120448694372931, + "grad_norm": 1.6362221014006342, + "learning_rate": 1.0095787877946326e-05, + "loss": 0.7603, + "step": 16707 + }, + { + "epoch": 0.5120755179600344, + "grad_norm": 1.6661714900167395, + "learning_rate": 1.0094795287339e-05, + "loss": 0.7427, + "step": 16708 + }, + { + "epoch": 0.5121061664827755, + "grad_norm": 0.7331107429358498, + "learning_rate": 1.0093802695797632e-05, + "loss": 0.5739, + "step": 16709 + }, + { + "epoch": 0.5121368150055168, + "grad_norm": 1.5675311886584495, + "learning_rate": 1.0092810103332002e-05, + "loss": 0.7727, + "step": 16710 + }, + { + "epoch": 0.5121674635282579, + "grad_norm": 1.5419269458307452, + "learning_rate": 1.0091817509951892e-05, + "loss": 0.6452, + "step": 16711 + }, + { + "epoch": 0.5121981120509992, + "grad_norm": 1.4381821168627293, + "learning_rate": 1.0090824915667079e-05, + "loss": 0.7251, + "step": 16712 + }, + { + "epoch": 0.5122287605737403, + "grad_norm": 1.7342271160976581, + "learning_rate": 1.0089832320487345e-05, + "loss": 0.7516, + "step": 16713 + }, + { + "epoch": 0.5122594090964816, + "grad_norm": 1.549701123416247, + "learning_rate": 1.0088839724422467e-05, + "loss": 0.7918, + "step": 16714 + }, + { + "epoch": 0.5122900576192227, + "grad_norm": 1.2931899058204612, + "learning_rate": 1.0087847127482233e-05, + "loss": 0.7883, + "step": 16715 + }, + { + "epoch": 0.512320706141964, + "grad_norm": 1.3765635555816054, + "learning_rate": 1.0086854529676418e-05, + "loss": 0.7373, + "step": 16716 + }, + { + "epoch": 0.5123513546647052, + "grad_norm": 1.2897910694798589, + "learning_rate": 1.00858619310148e-05, + "loss": 0.6559, + "step": 16717 + }, + { + "epoch": 0.5123820031874463, + "grad_norm": 1.6430316459987675, + "learning_rate": 1.0084869331507165e-05, + "loss": 0.7928, + "step": 16718 + }, + { + "epoch": 0.5124126517101876, + "grad_norm": 1.4455718870905323, + "learning_rate": 1.0083876731163292e-05, + "loss": 0.6213, + "step": 16719 + }, + { + "epoch": 0.5124433002329287, + "grad_norm": 1.3114704752711244, + "learning_rate": 1.0082884129992958e-05, + "loss": 0.6314, + "step": 16720 + }, + { + "epoch": 0.51247394875567, + "grad_norm": 1.4536326455017494, + "learning_rate": 1.0081891528005944e-05, + "loss": 0.7294, + "step": 16721 + }, + { + "epoch": 0.5125045972784111, + "grad_norm": 1.428446800763003, + "learning_rate": 1.0080898925212035e-05, + "loss": 0.6836, + "step": 16722 + }, + { + "epoch": 0.5125352458011524, + "grad_norm": 1.5244892512537702, + "learning_rate": 1.0079906321621008e-05, + "loss": 0.7278, + "step": 16723 + }, + { + "epoch": 0.5125658943238935, + "grad_norm": 1.3695483730030724, + "learning_rate": 1.0078913717242644e-05, + "loss": 0.6914, + "step": 16724 + }, + { + "epoch": 0.5125965428466348, + "grad_norm": 1.512356865729, + "learning_rate": 1.007792111208672e-05, + "loss": 0.6753, + "step": 16725 + }, + { + "epoch": 0.512627191369376, + "grad_norm": 1.576099455967707, + "learning_rate": 1.0076928506163022e-05, + "loss": 0.7719, + "step": 16726 + }, + { + "epoch": 0.5126578398921172, + "grad_norm": 1.3798084171237734, + "learning_rate": 1.0075935899481326e-05, + "loss": 0.647, + "step": 16727 + }, + { + "epoch": 0.5126884884148584, + "grad_norm": 1.463830616566871, + "learning_rate": 1.0074943292051414e-05, + "loss": 0.6041, + "step": 16728 + }, + { + "epoch": 0.5127191369375996, + "grad_norm": 0.6778031276021464, + "learning_rate": 1.0073950683883067e-05, + "loss": 0.5525, + "step": 16729 + }, + { + "epoch": 0.5127497854603408, + "grad_norm": 1.5599425578240291, + "learning_rate": 1.0072958074986068e-05, + "loss": 0.6604, + "step": 16730 + }, + { + "epoch": 0.512780433983082, + "grad_norm": 0.6744461452898796, + "learning_rate": 1.007196546537019e-05, + "loss": 0.5695, + "step": 16731 + }, + { + "epoch": 0.5128110825058232, + "grad_norm": 1.5723462049240569, + "learning_rate": 1.007097285504522e-05, + "loss": 0.6951, + "step": 16732 + }, + { + "epoch": 0.5128417310285645, + "grad_norm": 1.5356551882537972, + "learning_rate": 1.0069980244020936e-05, + "loss": 0.6462, + "step": 16733 + }, + { + "epoch": 0.5128723795513056, + "grad_norm": 1.3950871995917007, + "learning_rate": 1.0068987632307116e-05, + "loss": 0.6799, + "step": 16734 + }, + { + "epoch": 0.5129030280740469, + "grad_norm": 0.6681410802132887, + "learning_rate": 1.006799501991355e-05, + "loss": 0.5677, + "step": 16735 + }, + { + "epoch": 0.512933676596788, + "grad_norm": 1.6851736638261776, + "learning_rate": 1.0067002406850007e-05, + "loss": 0.7228, + "step": 16736 + }, + { + "epoch": 0.5129643251195293, + "grad_norm": 1.632965105544478, + "learning_rate": 1.0066009793126272e-05, + "loss": 0.7866, + "step": 16737 + }, + { + "epoch": 0.5129949736422704, + "grad_norm": 1.5101417405774185, + "learning_rate": 1.0065017178752125e-05, + "loss": 0.8157, + "step": 16738 + }, + { + "epoch": 0.5130256221650117, + "grad_norm": 0.6642137305733288, + "learning_rate": 1.0064024563737351e-05, + "loss": 0.5918, + "step": 16739 + }, + { + "epoch": 0.5130562706877528, + "grad_norm": 1.4718224793542933, + "learning_rate": 1.0063031948091721e-05, + "loss": 0.685, + "step": 16740 + }, + { + "epoch": 0.5130869192104941, + "grad_norm": 1.489464943424534, + "learning_rate": 1.0062039331825026e-05, + "loss": 0.7131, + "step": 16741 + }, + { + "epoch": 0.5131175677332352, + "grad_norm": 1.3436727474319217, + "learning_rate": 1.0061046714947041e-05, + "loss": 0.7704, + "step": 16742 + }, + { + "epoch": 0.5131482162559765, + "grad_norm": 0.6552153886835684, + "learning_rate": 1.0060054097467544e-05, + "loss": 0.5724, + "step": 16743 + }, + { + "epoch": 0.5131788647787177, + "grad_norm": 1.5359412551268774, + "learning_rate": 1.0059061479396321e-05, + "loss": 0.6558, + "step": 16744 + }, + { + "epoch": 0.5132095133014589, + "grad_norm": 0.6666409348774538, + "learning_rate": 1.0058068860743148e-05, + "loss": 0.5971, + "step": 16745 + }, + { + "epoch": 0.5132401618242001, + "grad_norm": 1.495626276767173, + "learning_rate": 1.0057076241517811e-05, + "loss": 0.7108, + "step": 16746 + }, + { + "epoch": 0.5132708103469413, + "grad_norm": 1.5201293301978676, + "learning_rate": 1.0056083621730085e-05, + "loss": 0.6605, + "step": 16747 + }, + { + "epoch": 0.5133014588696825, + "grad_norm": 1.4500739766650954, + "learning_rate": 1.0055091001389754e-05, + "loss": 0.641, + "step": 16748 + }, + { + "epoch": 0.5133321073924236, + "grad_norm": 1.4981010073968457, + "learning_rate": 1.0054098380506594e-05, + "loss": 0.6294, + "step": 16749 + }, + { + "epoch": 0.5133627559151649, + "grad_norm": 1.43268388643085, + "learning_rate": 1.0053105759090394e-05, + "loss": 0.7216, + "step": 16750 + }, + { + "epoch": 0.513393404437906, + "grad_norm": 0.7006660433570137, + "learning_rate": 1.0052113137150925e-05, + "loss": 0.5942, + "step": 16751 + }, + { + "epoch": 0.5134240529606473, + "grad_norm": 0.6821864774467005, + "learning_rate": 1.0051120514697974e-05, + "loss": 0.5975, + "step": 16752 + }, + { + "epoch": 0.5134547014833885, + "grad_norm": 1.6195174954263256, + "learning_rate": 1.0050127891741318e-05, + "loss": 0.6988, + "step": 16753 + }, + { + "epoch": 0.5134853500061297, + "grad_norm": 1.4850535353589729, + "learning_rate": 1.004913526829074e-05, + "loss": 0.7205, + "step": 16754 + }, + { + "epoch": 0.5135159985288709, + "grad_norm": 1.370839277598381, + "learning_rate": 1.0048142644356021e-05, + "loss": 0.7034, + "step": 16755 + }, + { + "epoch": 0.5135466470516121, + "grad_norm": 1.372126816000582, + "learning_rate": 1.0047150019946939e-05, + "loss": 0.6541, + "step": 16756 + }, + { + "epoch": 0.5135772955743533, + "grad_norm": 1.4715619038893724, + "learning_rate": 1.0046157395073274e-05, + "loss": 0.7403, + "step": 16757 + }, + { + "epoch": 0.5136079440970945, + "grad_norm": 1.5498354627508664, + "learning_rate": 1.0045164769744811e-05, + "loss": 0.7446, + "step": 16758 + }, + { + "epoch": 0.5136385926198357, + "grad_norm": 1.504949647999504, + "learning_rate": 1.0044172143971326e-05, + "loss": 0.6688, + "step": 16759 + }, + { + "epoch": 0.513669241142577, + "grad_norm": 1.2109471681900505, + "learning_rate": 1.0043179517762602e-05, + "loss": 0.6538, + "step": 16760 + }, + { + "epoch": 0.5136998896653181, + "grad_norm": 1.7360606067916666, + "learning_rate": 1.004218689112842e-05, + "loss": 0.7428, + "step": 16761 + }, + { + "epoch": 0.5137305381880594, + "grad_norm": 1.5772485357490738, + "learning_rate": 1.0041194264078562e-05, + "loss": 0.7368, + "step": 16762 + }, + { + "epoch": 0.5137611867108005, + "grad_norm": 1.9041746769648422, + "learning_rate": 1.0040201636622804e-05, + "loss": 0.6723, + "step": 16763 + }, + { + "epoch": 0.5137918352335418, + "grad_norm": 1.6140589635405884, + "learning_rate": 1.0039209008770928e-05, + "loss": 0.8193, + "step": 16764 + }, + { + "epoch": 0.5138224837562829, + "grad_norm": 1.605966714718329, + "learning_rate": 1.0038216380532716e-05, + "loss": 0.712, + "step": 16765 + }, + { + "epoch": 0.5138531322790242, + "grad_norm": 1.574914899352884, + "learning_rate": 1.0037223751917948e-05, + "loss": 0.7133, + "step": 16766 + }, + { + "epoch": 0.5138837808017653, + "grad_norm": 1.454617472291911, + "learning_rate": 1.0036231122936409e-05, + "loss": 0.6718, + "step": 16767 + }, + { + "epoch": 0.5139144293245066, + "grad_norm": 1.5499225693196441, + "learning_rate": 1.003523849359787e-05, + "loss": 0.7028, + "step": 16768 + }, + { + "epoch": 0.5139450778472477, + "grad_norm": 1.3502529317459055, + "learning_rate": 1.0034245863912118e-05, + "loss": 0.5961, + "step": 16769 + }, + { + "epoch": 0.513975726369989, + "grad_norm": 1.469881196758327, + "learning_rate": 1.0033253233888935e-05, + "loss": 0.702, + "step": 16770 + }, + { + "epoch": 0.5140063748927302, + "grad_norm": 1.4844365093181322, + "learning_rate": 1.0032260603538098e-05, + "loss": 0.693, + "step": 16771 + }, + { + "epoch": 0.5140370234154714, + "grad_norm": 1.4418743509265295, + "learning_rate": 1.003126797286939e-05, + "loss": 0.7957, + "step": 16772 + }, + { + "epoch": 0.5140676719382126, + "grad_norm": 1.7997782198639725, + "learning_rate": 1.003027534189259e-05, + "loss": 0.6941, + "step": 16773 + }, + { + "epoch": 0.5140983204609538, + "grad_norm": 1.5119517475953264, + "learning_rate": 1.0029282710617478e-05, + "loss": 0.7528, + "step": 16774 + }, + { + "epoch": 0.514128968983695, + "grad_norm": 1.6131333512716675, + "learning_rate": 1.0028290079053837e-05, + "loss": 0.7366, + "step": 16775 + }, + { + "epoch": 0.5141596175064362, + "grad_norm": 1.4061368515517452, + "learning_rate": 1.0027297447211448e-05, + "loss": 0.6608, + "step": 16776 + }, + { + "epoch": 0.5141902660291774, + "grad_norm": 1.5017828962307656, + "learning_rate": 1.002630481510009e-05, + "loss": 0.5791, + "step": 16777 + }, + { + "epoch": 0.5142209145519187, + "grad_norm": 1.306298169668982, + "learning_rate": 1.0025312182729543e-05, + "loss": 0.6332, + "step": 16778 + }, + { + "epoch": 0.5142515630746598, + "grad_norm": 1.484867367451606, + "learning_rate": 1.0024319550109586e-05, + "loss": 0.6295, + "step": 16779 + }, + { + "epoch": 0.514282211597401, + "grad_norm": 1.2923577435999032, + "learning_rate": 1.0023326917250007e-05, + "loss": 0.5568, + "step": 16780 + }, + { + "epoch": 0.5143128601201422, + "grad_norm": 1.4387641493953105, + "learning_rate": 1.002233428416058e-05, + "loss": 0.6452, + "step": 16781 + }, + { + "epoch": 0.5143435086428834, + "grad_norm": 1.4909614695316498, + "learning_rate": 1.0021341650851086e-05, + "loss": 0.6476, + "step": 16782 + }, + { + "epoch": 0.5143741571656246, + "grad_norm": 0.7293432029044125, + "learning_rate": 1.0020349017331307e-05, + "loss": 0.5845, + "step": 16783 + }, + { + "epoch": 0.5144048056883658, + "grad_norm": 1.4998033647886457, + "learning_rate": 1.0019356383611028e-05, + "loss": 0.6827, + "step": 16784 + }, + { + "epoch": 0.514435454211107, + "grad_norm": 1.4326890734776587, + "learning_rate": 1.0018363749700025e-05, + "loss": 0.6563, + "step": 16785 + }, + { + "epoch": 0.5144661027338482, + "grad_norm": 1.7025303716959135, + "learning_rate": 1.0017371115608075e-05, + "loss": 0.8276, + "step": 16786 + }, + { + "epoch": 0.5144967512565894, + "grad_norm": 1.5282489605728744, + "learning_rate": 1.0016378481344966e-05, + "loss": 0.7308, + "step": 16787 + }, + { + "epoch": 0.5145273997793306, + "grad_norm": 1.6330394173817406, + "learning_rate": 1.0015385846920473e-05, + "loss": 0.7631, + "step": 16788 + }, + { + "epoch": 0.5145580483020719, + "grad_norm": 1.528611531969361, + "learning_rate": 1.0014393212344385e-05, + "loss": 0.7362, + "step": 16789 + }, + { + "epoch": 0.514588696824813, + "grad_norm": 1.5557198657558793, + "learning_rate": 1.0013400577626471e-05, + "loss": 0.7254, + "step": 16790 + }, + { + "epoch": 0.5146193453475543, + "grad_norm": 1.547784150129049, + "learning_rate": 1.001240794277652e-05, + "loss": 0.8103, + "step": 16791 + }, + { + "epoch": 0.5146499938702954, + "grad_norm": 1.508747566957744, + "learning_rate": 1.001141530780431e-05, + "loss": 0.5746, + "step": 16792 + }, + { + "epoch": 0.5146806423930367, + "grad_norm": 1.4810763286564865, + "learning_rate": 1.0010422672719625e-05, + "loss": 0.6344, + "step": 16793 + }, + { + "epoch": 0.5147112909157778, + "grad_norm": 1.5166255529791695, + "learning_rate": 1.0009430037532238e-05, + "loss": 0.6981, + "step": 16794 + }, + { + "epoch": 0.5147419394385191, + "grad_norm": 1.2768744018558784, + "learning_rate": 1.0008437402251935e-05, + "loss": 0.6579, + "step": 16795 + }, + { + "epoch": 0.5147725879612602, + "grad_norm": 1.4368281275306358, + "learning_rate": 1.00074447668885e-05, + "loss": 0.609, + "step": 16796 + }, + { + "epoch": 0.5148032364840015, + "grad_norm": 1.5093577449862725, + "learning_rate": 1.0006452131451706e-05, + "loss": 0.7562, + "step": 16797 + }, + { + "epoch": 0.5148338850067427, + "grad_norm": 1.3379931519705475, + "learning_rate": 1.000545949595134e-05, + "loss": 0.5921, + "step": 16798 + }, + { + "epoch": 0.5148645335294839, + "grad_norm": 1.4673879509506251, + "learning_rate": 1.0004466860397176e-05, + "loss": 0.7541, + "step": 16799 + }, + { + "epoch": 0.5148951820522251, + "grad_norm": 1.6189732726811341, + "learning_rate": 1.0003474224799006e-05, + "loss": 0.7054, + "step": 16800 + }, + { + "epoch": 0.5149258305749663, + "grad_norm": 1.6775900323366857, + "learning_rate": 1.0002481589166597e-05, + "loss": 0.6642, + "step": 16801 + }, + { + "epoch": 0.5149564790977075, + "grad_norm": 1.4971576701781752, + "learning_rate": 1.0001488953509742e-05, + "loss": 0.8348, + "step": 16802 + }, + { + "epoch": 0.5149871276204487, + "grad_norm": 1.4591942808557121, + "learning_rate": 1.0000496317838211e-05, + "loss": 0.6563, + "step": 16803 + }, + { + "epoch": 0.5150177761431899, + "grad_norm": 1.6001349662414674, + "learning_rate": 9.99950368216179e-06, + "loss": 0.7446, + "step": 16804 + }, + { + "epoch": 0.5150484246659311, + "grad_norm": 1.5393829469129559, + "learning_rate": 9.998511046490263e-06, + "loss": 0.8175, + "step": 16805 + }, + { + "epoch": 0.5150790731886723, + "grad_norm": 1.3818495151319259, + "learning_rate": 9.997518410833405e-06, + "loss": 0.8043, + "step": 16806 + }, + { + "epoch": 0.5151097217114136, + "grad_norm": 1.6126792887590853, + "learning_rate": 9.996525775200997e-06, + "loss": 0.6129, + "step": 16807 + }, + { + "epoch": 0.5151403702341547, + "grad_norm": 1.5255756495599007, + "learning_rate": 9.995533139602825e-06, + "loss": 0.7251, + "step": 16808 + }, + { + "epoch": 0.515171018756896, + "grad_norm": 1.5114671653632243, + "learning_rate": 9.994540504048661e-06, + "loss": 0.7429, + "step": 16809 + }, + { + "epoch": 0.5152016672796371, + "grad_norm": 1.509340308786074, + "learning_rate": 9.993547868548296e-06, + "loss": 0.6514, + "step": 16810 + }, + { + "epoch": 0.5152323158023783, + "grad_norm": 1.5558478497093569, + "learning_rate": 9.992555233111506e-06, + "loss": 0.7612, + "step": 16811 + }, + { + "epoch": 0.5152629643251195, + "grad_norm": 0.746718336851063, + "learning_rate": 9.991562597748066e-06, + "loss": 0.5804, + "step": 16812 + }, + { + "epoch": 0.5152936128478607, + "grad_norm": 1.4056860730464684, + "learning_rate": 9.990569962467765e-06, + "loss": 0.7221, + "step": 16813 + }, + { + "epoch": 0.5153242613706019, + "grad_norm": 1.5054391518693346, + "learning_rate": 9.98957732728038e-06, + "loss": 0.6838, + "step": 16814 + }, + { + "epoch": 0.5153549098933431, + "grad_norm": 1.68471811554445, + "learning_rate": 9.988584692195691e-06, + "loss": 0.6828, + "step": 16815 + }, + { + "epoch": 0.5153855584160844, + "grad_norm": 1.342076087204365, + "learning_rate": 9.987592057223483e-06, + "loss": 0.6559, + "step": 16816 + }, + { + "epoch": 0.5154162069388255, + "grad_norm": 1.5592955373156656, + "learning_rate": 9.986599422373536e-06, + "loss": 0.6114, + "step": 16817 + }, + { + "epoch": 0.5154468554615668, + "grad_norm": 1.5368290641995637, + "learning_rate": 9.98560678765562e-06, + "loss": 0.6422, + "step": 16818 + }, + { + "epoch": 0.5154775039843079, + "grad_norm": 1.5334792217026858, + "learning_rate": 9.98461415307953e-06, + "loss": 0.685, + "step": 16819 + }, + { + "epoch": 0.5155081525070492, + "grad_norm": 1.5800626751168039, + "learning_rate": 9.983621518655036e-06, + "loss": 0.7611, + "step": 16820 + }, + { + "epoch": 0.5155388010297903, + "grad_norm": 0.6573121757548159, + "learning_rate": 9.982628884391928e-06, + "loss": 0.5741, + "step": 16821 + }, + { + "epoch": 0.5155694495525316, + "grad_norm": 1.3582945530836799, + "learning_rate": 9.981636250299982e-06, + "loss": 0.6425, + "step": 16822 + }, + { + "epoch": 0.5156000980752727, + "grad_norm": 0.6567994397665625, + "learning_rate": 9.980643616388976e-06, + "loss": 0.5766, + "step": 16823 + }, + { + "epoch": 0.515630746598014, + "grad_norm": 1.301747475673504, + "learning_rate": 9.979650982668694e-06, + "loss": 0.6736, + "step": 16824 + }, + { + "epoch": 0.5156613951207552, + "grad_norm": 0.6526430091459892, + "learning_rate": 9.978658349148917e-06, + "loss": 0.6077, + "step": 16825 + }, + { + "epoch": 0.5156920436434964, + "grad_norm": 1.5689215775488132, + "learning_rate": 9.977665715839423e-06, + "loss": 0.7345, + "step": 16826 + }, + { + "epoch": 0.5157226921662376, + "grad_norm": 1.568076303332861, + "learning_rate": 9.976673082749996e-06, + "loss": 0.6828, + "step": 16827 + }, + { + "epoch": 0.5157533406889788, + "grad_norm": 0.6255839359865609, + "learning_rate": 9.975680449890413e-06, + "loss": 0.5684, + "step": 16828 + }, + { + "epoch": 0.51578398921172, + "grad_norm": 1.4710202587756875, + "learning_rate": 9.974687817270462e-06, + "loss": 0.7031, + "step": 16829 + }, + { + "epoch": 0.5158146377344612, + "grad_norm": 1.5855469581104011, + "learning_rate": 9.973695184899917e-06, + "loss": 0.8154, + "step": 16830 + }, + { + "epoch": 0.5158452862572024, + "grad_norm": 1.6028428953347196, + "learning_rate": 9.972702552788554e-06, + "loss": 0.7376, + "step": 16831 + }, + { + "epoch": 0.5158759347799436, + "grad_norm": 1.5072225260802024, + "learning_rate": 9.971709920946166e-06, + "loss": 0.7917, + "step": 16832 + }, + { + "epoch": 0.5159065833026848, + "grad_norm": 1.352164008085111, + "learning_rate": 9.970717289382526e-06, + "loss": 0.6905, + "step": 16833 + }, + { + "epoch": 0.5159372318254261, + "grad_norm": 1.628329949238508, + "learning_rate": 9.969724658107413e-06, + "loss": 0.7999, + "step": 16834 + }, + { + "epoch": 0.5159678803481672, + "grad_norm": 1.5634609149793188, + "learning_rate": 9.968732027130614e-06, + "loss": 0.7553, + "step": 16835 + }, + { + "epoch": 0.5159985288709085, + "grad_norm": 1.6336752495799398, + "learning_rate": 9.967739396461904e-06, + "loss": 0.7339, + "step": 16836 + }, + { + "epoch": 0.5160291773936496, + "grad_norm": 1.3962247408927357, + "learning_rate": 9.966746766111067e-06, + "loss": 0.7074, + "step": 16837 + }, + { + "epoch": 0.5160598259163909, + "grad_norm": 1.4484150707088046, + "learning_rate": 9.965754136087884e-06, + "loss": 0.7296, + "step": 16838 + }, + { + "epoch": 0.516090474439132, + "grad_norm": 1.7296030933336737, + "learning_rate": 9.964761506402132e-06, + "loss": 0.6594, + "step": 16839 + }, + { + "epoch": 0.5161211229618733, + "grad_norm": 1.3507675254817562, + "learning_rate": 9.963768877063596e-06, + "loss": 0.6977, + "step": 16840 + }, + { + "epoch": 0.5161517714846144, + "grad_norm": 1.4781341191896713, + "learning_rate": 9.962776248082055e-06, + "loss": 0.6502, + "step": 16841 + }, + { + "epoch": 0.5161824200073556, + "grad_norm": 1.358368923226778, + "learning_rate": 9.961783619467285e-06, + "loss": 0.7253, + "step": 16842 + }, + { + "epoch": 0.5162130685300969, + "grad_norm": 0.7130230455989004, + "learning_rate": 9.960790991229075e-06, + "loss": 0.5828, + "step": 16843 + }, + { + "epoch": 0.516243717052838, + "grad_norm": 0.7076367857355151, + "learning_rate": 9.9597983633772e-06, + "loss": 0.5659, + "step": 16844 + }, + { + "epoch": 0.5162743655755793, + "grad_norm": 1.2947600693404924, + "learning_rate": 9.958805735921443e-06, + "loss": 0.6829, + "step": 16845 + }, + { + "epoch": 0.5163050140983204, + "grad_norm": 1.3123133865916967, + "learning_rate": 9.957813108871583e-06, + "loss": 0.6445, + "step": 16846 + }, + { + "epoch": 0.5163356626210617, + "grad_norm": 1.411213076461155, + "learning_rate": 9.956820482237398e-06, + "loss": 0.7168, + "step": 16847 + }, + { + "epoch": 0.5163663111438028, + "grad_norm": 1.4744958665297139, + "learning_rate": 9.955827856028675e-06, + "loss": 0.7285, + "step": 16848 + }, + { + "epoch": 0.5163969596665441, + "grad_norm": 0.6969410142957359, + "learning_rate": 9.954835230255192e-06, + "loss": 0.5731, + "step": 16849 + }, + { + "epoch": 0.5164276081892852, + "grad_norm": 0.7315798398774166, + "learning_rate": 9.953842604926727e-06, + "loss": 0.6042, + "step": 16850 + }, + { + "epoch": 0.5164582567120265, + "grad_norm": 1.4905806309782557, + "learning_rate": 9.952849980053064e-06, + "loss": 0.6483, + "step": 16851 + }, + { + "epoch": 0.5164889052347676, + "grad_norm": 0.6663818353610828, + "learning_rate": 9.951857355643984e-06, + "loss": 0.58, + "step": 16852 + }, + { + "epoch": 0.5165195537575089, + "grad_norm": 1.591302364501798, + "learning_rate": 9.950864731709262e-06, + "loss": 0.7961, + "step": 16853 + }, + { + "epoch": 0.5165502022802501, + "grad_norm": 1.4260067689971003, + "learning_rate": 9.949872108258686e-06, + "loss": 0.7392, + "step": 16854 + }, + { + "epoch": 0.5165808508029913, + "grad_norm": 1.4200833974848222, + "learning_rate": 9.948879485302028e-06, + "loss": 0.6366, + "step": 16855 + }, + { + "epoch": 0.5166114993257325, + "grad_norm": 1.4763816914103496, + "learning_rate": 9.947886862849077e-06, + "loss": 0.6507, + "step": 16856 + }, + { + "epoch": 0.5166421478484737, + "grad_norm": 1.4672897301833359, + "learning_rate": 9.94689424090961e-06, + "loss": 0.7485, + "step": 16857 + }, + { + "epoch": 0.5166727963712149, + "grad_norm": 1.3345456960892423, + "learning_rate": 9.945901619493406e-06, + "loss": 0.7175, + "step": 16858 + }, + { + "epoch": 0.5167034448939561, + "grad_norm": 0.676700399473343, + "learning_rate": 9.94490899861025e-06, + "loss": 0.5659, + "step": 16859 + }, + { + "epoch": 0.5167340934166973, + "grad_norm": 1.5229864761258571, + "learning_rate": 9.94391637826992e-06, + "loss": 0.7718, + "step": 16860 + }, + { + "epoch": 0.5167647419394386, + "grad_norm": 1.3870952755955928, + "learning_rate": 9.94292375848219e-06, + "loss": 0.6335, + "step": 16861 + }, + { + "epoch": 0.5167953904621797, + "grad_norm": 1.397144243576341, + "learning_rate": 9.941931139256855e-06, + "loss": 0.6659, + "step": 16862 + }, + { + "epoch": 0.516826038984921, + "grad_norm": 1.4966966272238003, + "learning_rate": 9.94093852060368e-06, + "loss": 0.6999, + "step": 16863 + }, + { + "epoch": 0.5168566875076621, + "grad_norm": 1.5068969045351985, + "learning_rate": 9.93994590253246e-06, + "loss": 0.6956, + "step": 16864 + }, + { + "epoch": 0.5168873360304034, + "grad_norm": 1.4944639910665607, + "learning_rate": 9.938953285052964e-06, + "loss": 0.6517, + "step": 16865 + }, + { + "epoch": 0.5169179845531445, + "grad_norm": 1.509292324905122, + "learning_rate": 9.937960668174977e-06, + "loss": 0.7242, + "step": 16866 + }, + { + "epoch": 0.5169486330758858, + "grad_norm": 1.3269222740037636, + "learning_rate": 9.93696805190828e-06, + "loss": 0.642, + "step": 16867 + }, + { + "epoch": 0.5169792815986269, + "grad_norm": 1.514872734559328, + "learning_rate": 9.935975436262654e-06, + "loss": 0.6997, + "step": 16868 + }, + { + "epoch": 0.5170099301213682, + "grad_norm": 0.7348155595372097, + "learning_rate": 9.934982821247877e-06, + "loss": 0.5991, + "step": 16869 + }, + { + "epoch": 0.5170405786441093, + "grad_norm": 1.5518952909782617, + "learning_rate": 9.933990206873731e-06, + "loss": 0.7033, + "step": 16870 + }, + { + "epoch": 0.5170712271668506, + "grad_norm": 0.7068214448407284, + "learning_rate": 9.93299759315e-06, + "loss": 0.5832, + "step": 16871 + }, + { + "epoch": 0.5171018756895918, + "grad_norm": 1.5122997364560042, + "learning_rate": 9.932004980086453e-06, + "loss": 0.7406, + "step": 16872 + }, + { + "epoch": 0.5171325242123329, + "grad_norm": 1.5651352281405033, + "learning_rate": 9.931012367692886e-06, + "loss": 0.7199, + "step": 16873 + }, + { + "epoch": 0.5171631727350742, + "grad_norm": 1.4856838175607074, + "learning_rate": 9.930019755979064e-06, + "loss": 0.7558, + "step": 16874 + }, + { + "epoch": 0.5171938212578153, + "grad_norm": 1.5182994468767144, + "learning_rate": 9.929027144954784e-06, + "loss": 0.7258, + "step": 16875 + }, + { + "epoch": 0.5172244697805566, + "grad_norm": 1.3787817821393409, + "learning_rate": 9.928034534629814e-06, + "loss": 0.767, + "step": 16876 + }, + { + "epoch": 0.5172551183032977, + "grad_norm": 1.5704784890100287, + "learning_rate": 9.927041925013937e-06, + "loss": 0.7018, + "step": 16877 + }, + { + "epoch": 0.517285766826039, + "grad_norm": 1.6980171270776259, + "learning_rate": 9.926049316116935e-06, + "loss": 0.766, + "step": 16878 + }, + { + "epoch": 0.5173164153487801, + "grad_norm": 1.6607612205709772, + "learning_rate": 9.92505670794859e-06, + "loss": 0.7079, + "step": 16879 + }, + { + "epoch": 0.5173470638715214, + "grad_norm": 0.6716715027676798, + "learning_rate": 9.924064100518677e-06, + "loss": 0.5517, + "step": 16880 + }, + { + "epoch": 0.5173777123942626, + "grad_norm": 0.6642722857715561, + "learning_rate": 9.923071493836982e-06, + "loss": 0.5997, + "step": 16881 + }, + { + "epoch": 0.5174083609170038, + "grad_norm": 1.4800308215647497, + "learning_rate": 9.92207888791328e-06, + "loss": 0.6562, + "step": 16882 + }, + { + "epoch": 0.517439009439745, + "grad_norm": 1.4012403118846273, + "learning_rate": 9.921086282757359e-06, + "loss": 0.6478, + "step": 16883 + }, + { + "epoch": 0.5174696579624862, + "grad_norm": 1.5772948588912792, + "learning_rate": 9.920093678378997e-06, + "loss": 0.7805, + "step": 16884 + }, + { + "epoch": 0.5175003064852274, + "grad_norm": 1.5900360068228105, + "learning_rate": 9.919101074787965e-06, + "loss": 0.679, + "step": 16885 + }, + { + "epoch": 0.5175309550079686, + "grad_norm": 1.5953048801344294, + "learning_rate": 9.918108471994057e-06, + "loss": 0.7288, + "step": 16886 + }, + { + "epoch": 0.5175616035307098, + "grad_norm": 1.4969403153588736, + "learning_rate": 9.917115870007045e-06, + "loss": 0.6918, + "step": 16887 + }, + { + "epoch": 0.517592252053451, + "grad_norm": 0.7082943513159613, + "learning_rate": 9.916123268836712e-06, + "loss": 0.5861, + "step": 16888 + }, + { + "epoch": 0.5176229005761922, + "grad_norm": 1.9615161388170654, + "learning_rate": 9.915130668492837e-06, + "loss": 0.8317, + "step": 16889 + }, + { + "epoch": 0.5176535490989335, + "grad_norm": 1.4923149813141263, + "learning_rate": 9.9141380689852e-06, + "loss": 0.7295, + "step": 16890 + }, + { + "epoch": 0.5176841976216746, + "grad_norm": 1.3789722950575547, + "learning_rate": 9.913145470323585e-06, + "loss": 0.8107, + "step": 16891 + }, + { + "epoch": 0.5177148461444159, + "grad_norm": 1.3896802098821812, + "learning_rate": 9.91215287251777e-06, + "loss": 0.7615, + "step": 16892 + }, + { + "epoch": 0.517745494667157, + "grad_norm": 1.5178118203515725, + "learning_rate": 9.911160275577533e-06, + "loss": 0.8278, + "step": 16893 + }, + { + "epoch": 0.5177761431898983, + "grad_norm": 1.467311703566351, + "learning_rate": 9.91016767951266e-06, + "loss": 0.7599, + "step": 16894 + }, + { + "epoch": 0.5178067917126394, + "grad_norm": 1.3453416454008316, + "learning_rate": 9.909175084332928e-06, + "loss": 0.6816, + "step": 16895 + }, + { + "epoch": 0.5178374402353807, + "grad_norm": 1.4333110436285557, + "learning_rate": 9.90818249004811e-06, + "loss": 0.7032, + "step": 16896 + }, + { + "epoch": 0.5178680887581218, + "grad_norm": 1.6428762996585835, + "learning_rate": 9.907189896668001e-06, + "loss": 0.7371, + "step": 16897 + }, + { + "epoch": 0.5178987372808631, + "grad_norm": 1.5337889454327047, + "learning_rate": 9.906197304202371e-06, + "loss": 0.7062, + "step": 16898 + }, + { + "epoch": 0.5179293858036043, + "grad_norm": 1.4302418080057875, + "learning_rate": 9.905204712661001e-06, + "loss": 0.7212, + "step": 16899 + }, + { + "epoch": 0.5179600343263455, + "grad_norm": 1.674966447232448, + "learning_rate": 9.904212122053677e-06, + "loss": 0.791, + "step": 16900 + }, + { + "epoch": 0.5179906828490867, + "grad_norm": 1.541388856477507, + "learning_rate": 9.903219532390173e-06, + "loss": 0.759, + "step": 16901 + }, + { + "epoch": 0.5180213313718279, + "grad_norm": 1.4510801399622046, + "learning_rate": 9.902226943680271e-06, + "loss": 0.6993, + "step": 16902 + }, + { + "epoch": 0.5180519798945691, + "grad_norm": 1.379678515375222, + "learning_rate": 9.901234355933755e-06, + "loss": 0.6011, + "step": 16903 + }, + { + "epoch": 0.5180826284173102, + "grad_norm": 0.6626019324146568, + "learning_rate": 9.9002417691604e-06, + "loss": 0.5858, + "step": 16904 + }, + { + "epoch": 0.5181132769400515, + "grad_norm": 1.4435471822320403, + "learning_rate": 9.899249183369991e-06, + "loss": 0.7078, + "step": 16905 + }, + { + "epoch": 0.5181439254627926, + "grad_norm": 0.673331695772459, + "learning_rate": 9.898256598572303e-06, + "loss": 0.5409, + "step": 16906 + }, + { + "epoch": 0.5181745739855339, + "grad_norm": 1.5396126531261256, + "learning_rate": 9.897264014777117e-06, + "loss": 0.671, + "step": 16907 + }, + { + "epoch": 0.518205222508275, + "grad_norm": 1.3943625754965274, + "learning_rate": 9.896271431994219e-06, + "loss": 0.6685, + "step": 16908 + }, + { + "epoch": 0.5182358710310163, + "grad_norm": 1.3475597453186252, + "learning_rate": 9.895278850233381e-06, + "loss": 0.6719, + "step": 16909 + }, + { + "epoch": 0.5182665195537575, + "grad_norm": 0.6706272665195059, + "learning_rate": 9.89428626950439e-06, + "loss": 0.5552, + "step": 16910 + }, + { + "epoch": 0.5182971680764987, + "grad_norm": 1.4069812141833697, + "learning_rate": 9.893293689817025e-06, + "loss": 0.713, + "step": 16911 + }, + { + "epoch": 0.5183278165992399, + "grad_norm": 1.5537166106629492, + "learning_rate": 9.892301111181061e-06, + "loss": 0.7516, + "step": 16912 + }, + { + "epoch": 0.5183584651219811, + "grad_norm": 1.4699835737623503, + "learning_rate": 9.891308533606282e-06, + "loss": 0.719, + "step": 16913 + }, + { + "epoch": 0.5183891136447223, + "grad_norm": 1.5672698930441307, + "learning_rate": 9.890315957102473e-06, + "loss": 0.7128, + "step": 16914 + }, + { + "epoch": 0.5184197621674635, + "grad_norm": 1.3250532992332171, + "learning_rate": 9.889323381679402e-06, + "loss": 0.6451, + "step": 16915 + }, + { + "epoch": 0.5184504106902047, + "grad_norm": 1.6124027208050957, + "learning_rate": 9.888330807346862e-06, + "loss": 0.6998, + "step": 16916 + }, + { + "epoch": 0.518481059212946, + "grad_norm": 1.5171806334183642, + "learning_rate": 9.88733823411462e-06, + "loss": 0.6379, + "step": 16917 + }, + { + "epoch": 0.5185117077356871, + "grad_norm": 1.3075461493556073, + "learning_rate": 9.886345661992471e-06, + "loss": 0.6247, + "step": 16918 + }, + { + "epoch": 0.5185423562584284, + "grad_norm": 1.6685035694437376, + "learning_rate": 9.885353090990183e-06, + "loss": 0.651, + "step": 16919 + }, + { + "epoch": 0.5185730047811695, + "grad_norm": 1.5238100986735141, + "learning_rate": 9.88436052111754e-06, + "loss": 0.7741, + "step": 16920 + }, + { + "epoch": 0.5186036533039108, + "grad_norm": 1.5652275723829345, + "learning_rate": 9.883367952384324e-06, + "loss": 0.719, + "step": 16921 + }, + { + "epoch": 0.5186343018266519, + "grad_norm": 1.7966410714388343, + "learning_rate": 9.882375384800314e-06, + "loss": 0.7216, + "step": 16922 + }, + { + "epoch": 0.5186649503493932, + "grad_norm": 1.656479005599566, + "learning_rate": 9.881382818375286e-06, + "loss": 0.7832, + "step": 16923 + }, + { + "epoch": 0.5186955988721343, + "grad_norm": 1.6482974065132177, + "learning_rate": 9.880390253119027e-06, + "loss": 0.6667, + "step": 16924 + }, + { + "epoch": 0.5187262473948756, + "grad_norm": 1.4926335176249763, + "learning_rate": 9.879397689041315e-06, + "loss": 0.7894, + "step": 16925 + }, + { + "epoch": 0.5187568959176168, + "grad_norm": 1.559782965087144, + "learning_rate": 9.87840512615192e-06, + "loss": 0.7203, + "step": 16926 + }, + { + "epoch": 0.518787544440358, + "grad_norm": 1.3947349973978662, + "learning_rate": 9.87741256446064e-06, + "loss": 0.621, + "step": 16927 + }, + { + "epoch": 0.5188181929630992, + "grad_norm": 1.4638132159591275, + "learning_rate": 9.876420003977237e-06, + "loss": 0.7247, + "step": 16928 + }, + { + "epoch": 0.5188488414858404, + "grad_norm": 1.551115270756926, + "learning_rate": 9.875427444711507e-06, + "loss": 0.7062, + "step": 16929 + }, + { + "epoch": 0.5188794900085816, + "grad_norm": 1.3690929019979394, + "learning_rate": 9.874434886673218e-06, + "loss": 0.6325, + "step": 16930 + }, + { + "epoch": 0.5189101385313228, + "grad_norm": 1.3737009666007383, + "learning_rate": 9.873442329872154e-06, + "loss": 0.6397, + "step": 16931 + }, + { + "epoch": 0.518940787054064, + "grad_norm": 1.4821850345849739, + "learning_rate": 9.872449774318097e-06, + "loss": 0.745, + "step": 16932 + }, + { + "epoch": 0.5189714355768053, + "grad_norm": 1.5059188403125727, + "learning_rate": 9.871457220020824e-06, + "loss": 0.7602, + "step": 16933 + }, + { + "epoch": 0.5190020840995464, + "grad_norm": 1.5427675574414166, + "learning_rate": 9.870464666990116e-06, + "loss": 0.7524, + "step": 16934 + }, + { + "epoch": 0.5190327326222876, + "grad_norm": 1.4534129853106559, + "learning_rate": 9.869472115235754e-06, + "loss": 0.7478, + "step": 16935 + }, + { + "epoch": 0.5190633811450288, + "grad_norm": 1.5401357777331435, + "learning_rate": 9.868479564767513e-06, + "loss": 0.6544, + "step": 16936 + }, + { + "epoch": 0.51909402966777, + "grad_norm": 1.33669729326008, + "learning_rate": 9.86748701559518e-06, + "loss": 0.6292, + "step": 16937 + }, + { + "epoch": 0.5191246781905112, + "grad_norm": 1.5731449101272543, + "learning_rate": 9.866494467728534e-06, + "loss": 0.7328, + "step": 16938 + }, + { + "epoch": 0.5191553267132524, + "grad_norm": 1.527473741125229, + "learning_rate": 9.865501921177344e-06, + "loss": 0.681, + "step": 16939 + }, + { + "epoch": 0.5191859752359936, + "grad_norm": 1.3797741427739618, + "learning_rate": 9.864509375951406e-06, + "loss": 0.7628, + "step": 16940 + }, + { + "epoch": 0.5192166237587348, + "grad_norm": 1.4760462929637403, + "learning_rate": 9.863516832060488e-06, + "loss": 0.7125, + "step": 16941 + }, + { + "epoch": 0.519247272281476, + "grad_norm": 1.5417780523520563, + "learning_rate": 9.862524289514372e-06, + "loss": 0.7654, + "step": 16942 + }, + { + "epoch": 0.5192779208042172, + "grad_norm": 1.5050589455401726, + "learning_rate": 9.861531748322843e-06, + "loss": 0.6315, + "step": 16943 + }, + { + "epoch": 0.5193085693269585, + "grad_norm": 1.3121962397292368, + "learning_rate": 9.860539208495672e-06, + "loss": 0.6963, + "step": 16944 + }, + { + "epoch": 0.5193392178496996, + "grad_norm": 0.7426297416675692, + "learning_rate": 9.859546670042648e-06, + "loss": 0.5855, + "step": 16945 + }, + { + "epoch": 0.5193698663724409, + "grad_norm": 1.5064928659142676, + "learning_rate": 9.858554132973547e-06, + "loss": 0.7992, + "step": 16946 + }, + { + "epoch": 0.519400514895182, + "grad_norm": 1.5849630074337615, + "learning_rate": 9.857561597298146e-06, + "loss": 0.7457, + "step": 16947 + }, + { + "epoch": 0.5194311634179233, + "grad_norm": 1.511698626282459, + "learning_rate": 9.856569063026227e-06, + "loss": 0.717, + "step": 16948 + }, + { + "epoch": 0.5194618119406644, + "grad_norm": 1.5490766958577458, + "learning_rate": 9.855576530167575e-06, + "loss": 0.7157, + "step": 16949 + }, + { + "epoch": 0.5194924604634057, + "grad_norm": 1.5254219145509738, + "learning_rate": 9.854583998731958e-06, + "loss": 0.8197, + "step": 16950 + }, + { + "epoch": 0.5195231089861468, + "grad_norm": 1.4145623906463198, + "learning_rate": 9.853591468729165e-06, + "loss": 0.6992, + "step": 16951 + }, + { + "epoch": 0.5195537575088881, + "grad_norm": 1.4270190771413964, + "learning_rate": 9.852598940168972e-06, + "loss": 0.629, + "step": 16952 + }, + { + "epoch": 0.5195844060316293, + "grad_norm": 1.4846447884908627, + "learning_rate": 9.851606413061158e-06, + "loss": 0.6981, + "step": 16953 + }, + { + "epoch": 0.5196150545543705, + "grad_norm": 1.4383749824262992, + "learning_rate": 9.850613887415506e-06, + "loss": 0.7298, + "step": 16954 + }, + { + "epoch": 0.5196457030771117, + "grad_norm": 1.4963964964152836, + "learning_rate": 9.849621363241793e-06, + "loss": 0.772, + "step": 16955 + }, + { + "epoch": 0.5196763515998529, + "grad_norm": 1.5277970524848645, + "learning_rate": 9.848628840549799e-06, + "loss": 0.6637, + "step": 16956 + }, + { + "epoch": 0.5197070001225941, + "grad_norm": 1.584317895349949, + "learning_rate": 9.847636319349306e-06, + "loss": 0.6515, + "step": 16957 + }, + { + "epoch": 0.5197376486453353, + "grad_norm": 0.6739193847709192, + "learning_rate": 9.846643799650086e-06, + "loss": 0.5792, + "step": 16958 + }, + { + "epoch": 0.5197682971680765, + "grad_norm": 1.6892685140600843, + "learning_rate": 9.84565128146193e-06, + "loss": 0.7683, + "step": 16959 + }, + { + "epoch": 0.5197989456908177, + "grad_norm": 0.6602120820572052, + "learning_rate": 9.844658764794609e-06, + "loss": 0.5655, + "step": 16960 + }, + { + "epoch": 0.5198295942135589, + "grad_norm": 1.5809804427799177, + "learning_rate": 9.843666249657903e-06, + "loss": 0.7174, + "step": 16961 + }, + { + "epoch": 0.5198602427363002, + "grad_norm": 1.449250591186478, + "learning_rate": 9.842673736061595e-06, + "loss": 0.7427, + "step": 16962 + }, + { + "epoch": 0.5198908912590413, + "grad_norm": 1.3536123642000684, + "learning_rate": 9.841681224015462e-06, + "loss": 0.7278, + "step": 16963 + }, + { + "epoch": 0.5199215397817826, + "grad_norm": 1.431313110699145, + "learning_rate": 9.840688713529287e-06, + "loss": 0.6776, + "step": 16964 + }, + { + "epoch": 0.5199521883045237, + "grad_norm": 1.482665355184169, + "learning_rate": 9.839696204612844e-06, + "loss": 0.5728, + "step": 16965 + }, + { + "epoch": 0.5199828368272649, + "grad_norm": 1.47897089578093, + "learning_rate": 9.838703697275916e-06, + "loss": 0.7125, + "step": 16966 + }, + { + "epoch": 0.5200134853500061, + "grad_norm": 1.5879188135529727, + "learning_rate": 9.837711191528282e-06, + "loss": 0.7078, + "step": 16967 + }, + { + "epoch": 0.5200441338727473, + "grad_norm": 1.4688014965531773, + "learning_rate": 9.836718687379723e-06, + "loss": 0.7347, + "step": 16968 + }, + { + "epoch": 0.5200747823954885, + "grad_norm": 1.5225665895723317, + "learning_rate": 9.835726184840012e-06, + "loss": 0.7574, + "step": 16969 + }, + { + "epoch": 0.5201054309182297, + "grad_norm": 1.455834429527129, + "learning_rate": 9.83473368391894e-06, + "loss": 0.6508, + "step": 16970 + }, + { + "epoch": 0.520136079440971, + "grad_norm": 1.5203227291281225, + "learning_rate": 9.83374118462627e-06, + "loss": 0.7942, + "step": 16971 + }, + { + "epoch": 0.5201667279637121, + "grad_norm": 1.1836974127671769, + "learning_rate": 9.832748686971799e-06, + "loss": 0.7391, + "step": 16972 + }, + { + "epoch": 0.5201973764864534, + "grad_norm": 1.4940697253693667, + "learning_rate": 9.831756190965295e-06, + "loss": 0.6585, + "step": 16973 + }, + { + "epoch": 0.5202280250091945, + "grad_norm": 1.5621508023929807, + "learning_rate": 9.830763696616538e-06, + "loss": 0.6724, + "step": 16974 + }, + { + "epoch": 0.5202586735319358, + "grad_norm": 1.4558017983807803, + "learning_rate": 9.829771203935313e-06, + "loss": 0.664, + "step": 16975 + }, + { + "epoch": 0.5202893220546769, + "grad_norm": 1.4172194275964731, + "learning_rate": 9.828778712931395e-06, + "loss": 0.7343, + "step": 16976 + }, + { + "epoch": 0.5203199705774182, + "grad_norm": 1.482196657537574, + "learning_rate": 9.827786223614561e-06, + "loss": 0.7146, + "step": 16977 + }, + { + "epoch": 0.5203506191001593, + "grad_norm": 1.6338499137545026, + "learning_rate": 9.826793735994598e-06, + "loss": 0.6899, + "step": 16978 + }, + { + "epoch": 0.5203812676229006, + "grad_norm": 1.3210674445616823, + "learning_rate": 9.825801250081281e-06, + "loss": 0.6961, + "step": 16979 + }, + { + "epoch": 0.5204119161456418, + "grad_norm": 1.5182969270305073, + "learning_rate": 9.824808765884382e-06, + "loss": 0.6419, + "step": 16980 + }, + { + "epoch": 0.520442564668383, + "grad_norm": 0.7114128888001148, + "learning_rate": 9.823816283413695e-06, + "loss": 0.5568, + "step": 16981 + }, + { + "epoch": 0.5204732131911242, + "grad_norm": 0.7230768074607964, + "learning_rate": 9.822823802678985e-06, + "loss": 0.5728, + "step": 16982 + }, + { + "epoch": 0.5205038617138654, + "grad_norm": 1.6008149961540157, + "learning_rate": 9.821831323690042e-06, + "loss": 0.6679, + "step": 16983 + }, + { + "epoch": 0.5205345102366066, + "grad_norm": 1.5250876143580647, + "learning_rate": 9.82083884645664e-06, + "loss": 0.6609, + "step": 16984 + }, + { + "epoch": 0.5205651587593478, + "grad_norm": 1.5205419279753905, + "learning_rate": 9.819846370988557e-06, + "loss": 0.7237, + "step": 16985 + }, + { + "epoch": 0.520595807282089, + "grad_norm": 1.6937271592720435, + "learning_rate": 9.818853897295574e-06, + "loss": 0.6803, + "step": 16986 + }, + { + "epoch": 0.5206264558048302, + "grad_norm": 1.6837585386966147, + "learning_rate": 9.81786142538747e-06, + "loss": 0.7414, + "step": 16987 + }, + { + "epoch": 0.5206571043275714, + "grad_norm": 1.441619201647016, + "learning_rate": 9.816868955274022e-06, + "loss": 0.701, + "step": 16988 + }, + { + "epoch": 0.5206877528503127, + "grad_norm": 1.4497676599792897, + "learning_rate": 9.815876486965014e-06, + "loss": 0.7405, + "step": 16989 + }, + { + "epoch": 0.5207184013730538, + "grad_norm": 1.3992660068398366, + "learning_rate": 9.81488402047022e-06, + "loss": 0.6763, + "step": 16990 + }, + { + "epoch": 0.5207490498957951, + "grad_norm": 1.6822036780051093, + "learning_rate": 9.813891555799425e-06, + "loss": 0.7486, + "step": 16991 + }, + { + "epoch": 0.5207796984185362, + "grad_norm": 1.5962190352273236, + "learning_rate": 9.812899092962402e-06, + "loss": 0.6642, + "step": 16992 + }, + { + "epoch": 0.5208103469412775, + "grad_norm": 1.4939930651876268, + "learning_rate": 9.81190663196893e-06, + "loss": 0.702, + "step": 16993 + }, + { + "epoch": 0.5208409954640186, + "grad_norm": 1.4572713708984082, + "learning_rate": 9.810914172828793e-06, + "loss": 0.765, + "step": 16994 + }, + { + "epoch": 0.5208716439867599, + "grad_norm": 0.6927541185970103, + "learning_rate": 9.809921715551767e-06, + "loss": 0.5864, + "step": 16995 + }, + { + "epoch": 0.520902292509501, + "grad_norm": 1.4289485259593613, + "learning_rate": 9.808929260147628e-06, + "loss": 0.7148, + "step": 16996 + }, + { + "epoch": 0.5209329410322422, + "grad_norm": 1.4646878547114714, + "learning_rate": 9.80793680662616e-06, + "loss": 0.6696, + "step": 16997 + }, + { + "epoch": 0.5209635895549835, + "grad_norm": 1.472858084550865, + "learning_rate": 9.80694435499714e-06, + "loss": 0.646, + "step": 16998 + }, + { + "epoch": 0.5209942380777246, + "grad_norm": 1.4694382794583634, + "learning_rate": 9.805951905270345e-06, + "loss": 0.7225, + "step": 16999 + }, + { + "epoch": 0.5210248866004659, + "grad_norm": 1.4792889950031956, + "learning_rate": 9.804959457455555e-06, + "loss": 0.6853, + "step": 17000 + }, + { + "epoch": 0.521055535123207, + "grad_norm": 0.647268831508439, + "learning_rate": 9.803967011562551e-06, + "loss": 0.5964, + "step": 17001 + }, + { + "epoch": 0.5210861836459483, + "grad_norm": 1.5229101087759926, + "learning_rate": 9.802974567601113e-06, + "loss": 0.6335, + "step": 17002 + }, + { + "epoch": 0.5211168321686894, + "grad_norm": 1.4664843350687131, + "learning_rate": 9.801982125581014e-06, + "loss": 0.7444, + "step": 17003 + }, + { + "epoch": 0.5211474806914307, + "grad_norm": 1.3484756962833395, + "learning_rate": 9.800989685512034e-06, + "loss": 0.7423, + "step": 17004 + }, + { + "epoch": 0.5211781292141718, + "grad_norm": 1.362297262149981, + "learning_rate": 9.799997247403958e-06, + "loss": 0.609, + "step": 17005 + }, + { + "epoch": 0.5212087777369131, + "grad_norm": 1.5596799747714163, + "learning_rate": 9.799004811266557e-06, + "loss": 0.6961, + "step": 17006 + }, + { + "epoch": 0.5212394262596542, + "grad_norm": 1.8125373679762242, + "learning_rate": 9.798012377109613e-06, + "loss": 0.6963, + "step": 17007 + }, + { + "epoch": 0.5212700747823955, + "grad_norm": 1.5162616771409951, + "learning_rate": 9.797019944942907e-06, + "loss": 0.6492, + "step": 17008 + }, + { + "epoch": 0.5213007233051367, + "grad_norm": 1.539740692007343, + "learning_rate": 9.796027514776211e-06, + "loss": 0.7224, + "step": 17009 + }, + { + "epoch": 0.5213313718278779, + "grad_norm": 0.6993655009423309, + "learning_rate": 9.795035086619311e-06, + "loss": 0.5904, + "step": 17010 + }, + { + "epoch": 0.5213620203506191, + "grad_norm": 1.4780219851176213, + "learning_rate": 9.794042660481985e-06, + "loss": 0.6823, + "step": 17011 + }, + { + "epoch": 0.5213926688733603, + "grad_norm": 1.6450946226025012, + "learning_rate": 9.793050236374005e-06, + "loss": 0.7081, + "step": 17012 + }, + { + "epoch": 0.5214233173961015, + "grad_norm": 1.4088818446608908, + "learning_rate": 9.792057814305157e-06, + "loss": 0.6142, + "step": 17013 + }, + { + "epoch": 0.5214539659188427, + "grad_norm": 1.3940185007599026, + "learning_rate": 9.791065394285217e-06, + "loss": 0.683, + "step": 17014 + }, + { + "epoch": 0.5214846144415839, + "grad_norm": 1.377313342240939, + "learning_rate": 9.790072976323961e-06, + "loss": 0.7676, + "step": 17015 + }, + { + "epoch": 0.5215152629643252, + "grad_norm": 1.4878139172210214, + "learning_rate": 9.789080560431172e-06, + "loss": 0.7337, + "step": 17016 + }, + { + "epoch": 0.5215459114870663, + "grad_norm": 1.51128762292098, + "learning_rate": 9.788088146616622e-06, + "loss": 0.6804, + "step": 17017 + }, + { + "epoch": 0.5215765600098076, + "grad_norm": 1.3871420586296428, + "learning_rate": 9.787095734890098e-06, + "loss": 0.6691, + "step": 17018 + }, + { + "epoch": 0.5216072085325487, + "grad_norm": 1.7532577785248848, + "learning_rate": 9.786103325261373e-06, + "loss": 0.6208, + "step": 17019 + }, + { + "epoch": 0.52163785705529, + "grad_norm": 1.4095639886923894, + "learning_rate": 9.785110917740223e-06, + "loss": 0.6297, + "step": 17020 + }, + { + "epoch": 0.5216685055780311, + "grad_norm": 1.4762506694093909, + "learning_rate": 9.784118512336434e-06, + "loss": 0.6924, + "step": 17021 + }, + { + "epoch": 0.5216991541007724, + "grad_norm": 1.6139148472155835, + "learning_rate": 9.783126109059784e-06, + "loss": 0.7975, + "step": 17022 + }, + { + "epoch": 0.5217298026235135, + "grad_norm": 1.6511834329294681, + "learning_rate": 9.78213370792004e-06, + "loss": 0.7676, + "step": 17023 + }, + { + "epoch": 0.5217604511462548, + "grad_norm": 0.709705146672868, + "learning_rate": 9.781141308926994e-06, + "loss": 0.5916, + "step": 17024 + }, + { + "epoch": 0.521791099668996, + "grad_norm": 1.5344446455396643, + "learning_rate": 9.780148912090418e-06, + "loss": 0.8111, + "step": 17025 + }, + { + "epoch": 0.5218217481917372, + "grad_norm": 2.3220233042307226, + "learning_rate": 9.779156517420087e-06, + "loss": 0.8485, + "step": 17026 + }, + { + "epoch": 0.5218523967144784, + "grad_norm": 0.6695844211983524, + "learning_rate": 9.778164124925788e-06, + "loss": 0.5678, + "step": 17027 + }, + { + "epoch": 0.5218830452372195, + "grad_norm": 1.4088568473994183, + "learning_rate": 9.777171734617292e-06, + "loss": 0.628, + "step": 17028 + }, + { + "epoch": 0.5219136937599608, + "grad_norm": 0.6451146128310888, + "learning_rate": 9.776179346504381e-06, + "loss": 0.574, + "step": 17029 + }, + { + "epoch": 0.5219443422827019, + "grad_norm": 1.4901749950519911, + "learning_rate": 9.775186960596832e-06, + "loss": 0.6217, + "step": 17030 + }, + { + "epoch": 0.5219749908054432, + "grad_norm": 1.5097685449650748, + "learning_rate": 9.77419457690442e-06, + "loss": 0.6846, + "step": 17031 + }, + { + "epoch": 0.5220056393281843, + "grad_norm": 1.5430120362146689, + "learning_rate": 9.773202195436932e-06, + "loss": 0.7287, + "step": 17032 + }, + { + "epoch": 0.5220362878509256, + "grad_norm": 1.6441488479431856, + "learning_rate": 9.772209816204142e-06, + "loss": 0.7613, + "step": 17033 + }, + { + "epoch": 0.5220669363736667, + "grad_norm": 1.5618070963936383, + "learning_rate": 9.771217439215818e-06, + "loss": 0.735, + "step": 17034 + }, + { + "epoch": 0.522097584896408, + "grad_norm": 1.5146354359457805, + "learning_rate": 9.770225064481757e-06, + "loss": 0.6957, + "step": 17035 + }, + { + "epoch": 0.5221282334191492, + "grad_norm": 0.7457134459684914, + "learning_rate": 9.769232692011719e-06, + "loss": 0.611, + "step": 17036 + }, + { + "epoch": 0.5221588819418904, + "grad_norm": 1.5098675327933366, + "learning_rate": 9.768240321815498e-06, + "loss": 0.7103, + "step": 17037 + }, + { + "epoch": 0.5221895304646316, + "grad_norm": 1.3907056393480697, + "learning_rate": 9.767247953902861e-06, + "loss": 0.6858, + "step": 17038 + }, + { + "epoch": 0.5222201789873728, + "grad_norm": 1.5341540408733956, + "learning_rate": 9.766255588283588e-06, + "loss": 0.767, + "step": 17039 + }, + { + "epoch": 0.522250827510114, + "grad_norm": 0.6906554485705398, + "learning_rate": 9.76526322496746e-06, + "loss": 0.5907, + "step": 17040 + }, + { + "epoch": 0.5222814760328552, + "grad_norm": 1.5160504892958389, + "learning_rate": 9.764270863964254e-06, + "loss": 0.7744, + "step": 17041 + }, + { + "epoch": 0.5223121245555964, + "grad_norm": 1.4236766190805983, + "learning_rate": 9.763278505283744e-06, + "loss": 0.7255, + "step": 17042 + }, + { + "epoch": 0.5223427730783377, + "grad_norm": 0.6382803620897559, + "learning_rate": 9.762286148935714e-06, + "loss": 0.5473, + "step": 17043 + }, + { + "epoch": 0.5223734216010788, + "grad_norm": 1.4084550964382998, + "learning_rate": 9.76129379492994e-06, + "loss": 0.7193, + "step": 17044 + }, + { + "epoch": 0.5224040701238201, + "grad_norm": 1.4475046186838754, + "learning_rate": 9.7603014432762e-06, + "loss": 0.6114, + "step": 17045 + }, + { + "epoch": 0.5224347186465612, + "grad_norm": 1.5087805553916591, + "learning_rate": 9.759309093984271e-06, + "loss": 0.7801, + "step": 17046 + }, + { + "epoch": 0.5224653671693025, + "grad_norm": 1.5640929478380965, + "learning_rate": 9.758316747063928e-06, + "loss": 0.7313, + "step": 17047 + }, + { + "epoch": 0.5224960156920436, + "grad_norm": 1.575783870235561, + "learning_rate": 9.757324402524955e-06, + "loss": 0.7879, + "step": 17048 + }, + { + "epoch": 0.5225266642147849, + "grad_norm": 1.4448689624720656, + "learning_rate": 9.756332060377128e-06, + "loss": 0.5599, + "step": 17049 + }, + { + "epoch": 0.522557312737526, + "grad_norm": 1.5343689731691958, + "learning_rate": 9.755339720630218e-06, + "loss": 0.7394, + "step": 17050 + }, + { + "epoch": 0.5225879612602673, + "grad_norm": 1.4081630231285596, + "learning_rate": 9.754347383294012e-06, + "loss": 0.7179, + "step": 17051 + }, + { + "epoch": 0.5226186097830084, + "grad_norm": 1.6006590741688451, + "learning_rate": 9.753355048378288e-06, + "loss": 0.8167, + "step": 17052 + }, + { + "epoch": 0.5226492583057497, + "grad_norm": 1.3865609460555883, + "learning_rate": 9.752362715892812e-06, + "loss": 0.7435, + "step": 17053 + }, + { + "epoch": 0.5226799068284909, + "grad_norm": 1.3875348147991171, + "learning_rate": 9.751370385847376e-06, + "loss": 0.6434, + "step": 17054 + }, + { + "epoch": 0.5227105553512321, + "grad_norm": 1.6249773735864206, + "learning_rate": 9.750378058251744e-06, + "loss": 0.7752, + "step": 17055 + }, + { + "epoch": 0.5227412038739733, + "grad_norm": 1.4779173137339712, + "learning_rate": 9.749385733115709e-06, + "loss": 0.7218, + "step": 17056 + }, + { + "epoch": 0.5227718523967145, + "grad_norm": 1.623253377363146, + "learning_rate": 9.748393410449036e-06, + "loss": 0.704, + "step": 17057 + }, + { + "epoch": 0.5228025009194557, + "grad_norm": 1.5431274521048373, + "learning_rate": 9.747401090261505e-06, + "loss": 0.6062, + "step": 17058 + }, + { + "epoch": 0.5228331494421968, + "grad_norm": 1.4264432129159215, + "learning_rate": 9.7464087725629e-06, + "loss": 0.7523, + "step": 17059 + }, + { + "epoch": 0.5228637979649381, + "grad_norm": 1.473190959896466, + "learning_rate": 9.745416457362994e-06, + "loss": 0.6797, + "step": 17060 + }, + { + "epoch": 0.5228944464876792, + "grad_norm": 1.4117170657373426, + "learning_rate": 9.744424144671562e-06, + "loss": 0.6577, + "step": 17061 + }, + { + "epoch": 0.5229250950104205, + "grad_norm": 1.433559232014655, + "learning_rate": 9.743431834498386e-06, + "loss": 0.7592, + "step": 17062 + }, + { + "epoch": 0.5229557435331617, + "grad_norm": 1.3798386166660692, + "learning_rate": 9.74243952685324e-06, + "loss": 0.6655, + "step": 17063 + }, + { + "epoch": 0.5229863920559029, + "grad_norm": 1.4926656277604267, + "learning_rate": 9.741447221745905e-06, + "loss": 0.7389, + "step": 17064 + }, + { + "epoch": 0.5230170405786441, + "grad_norm": 1.7427900431777676, + "learning_rate": 9.74045491918616e-06, + "loss": 0.6693, + "step": 17065 + }, + { + "epoch": 0.5230476891013853, + "grad_norm": 1.4216819181700848, + "learning_rate": 9.739462619183771e-06, + "loss": 0.7461, + "step": 17066 + }, + { + "epoch": 0.5230783376241265, + "grad_norm": 0.6969311469443585, + "learning_rate": 9.738470321748531e-06, + "loss": 0.5789, + "step": 17067 + }, + { + "epoch": 0.5231089861468677, + "grad_norm": 1.6529340274196582, + "learning_rate": 9.737478026890209e-06, + "loss": 0.6556, + "step": 17068 + }, + { + "epoch": 0.5231396346696089, + "grad_norm": 1.4272103420156144, + "learning_rate": 9.736485734618578e-06, + "loss": 0.6639, + "step": 17069 + }, + { + "epoch": 0.5231702831923501, + "grad_norm": 1.5087750004955283, + "learning_rate": 9.735493444943425e-06, + "loss": 0.7297, + "step": 17070 + }, + { + "epoch": 0.5232009317150913, + "grad_norm": 1.6460997739399295, + "learning_rate": 9.73450115787452e-06, + "loss": 0.7302, + "step": 17071 + }, + { + "epoch": 0.5232315802378326, + "grad_norm": 0.653042208626219, + "learning_rate": 9.733508873421645e-06, + "loss": 0.5841, + "step": 17072 + }, + { + "epoch": 0.5232622287605737, + "grad_norm": 1.5491230140138001, + "learning_rate": 9.732516591594574e-06, + "loss": 0.6917, + "step": 17073 + }, + { + "epoch": 0.523292877283315, + "grad_norm": 1.7024905360976956, + "learning_rate": 9.731524312403085e-06, + "loss": 0.8022, + "step": 17074 + }, + { + "epoch": 0.5233235258060561, + "grad_norm": 1.3155191499836254, + "learning_rate": 9.730532035856956e-06, + "loss": 0.6986, + "step": 17075 + }, + { + "epoch": 0.5233541743287974, + "grad_norm": 1.6453000205551886, + "learning_rate": 9.729539761965968e-06, + "loss": 0.733, + "step": 17076 + }, + { + "epoch": 0.5233848228515385, + "grad_norm": 0.7014611547277712, + "learning_rate": 9.728547490739887e-06, + "loss": 0.6071, + "step": 17077 + }, + { + "epoch": 0.5234154713742798, + "grad_norm": 1.5026037939200763, + "learning_rate": 9.727555222188502e-06, + "loss": 0.5965, + "step": 17078 + }, + { + "epoch": 0.523446119897021, + "grad_norm": 1.3997993368679031, + "learning_rate": 9.726562956321585e-06, + "loss": 0.6741, + "step": 17079 + }, + { + "epoch": 0.5234767684197622, + "grad_norm": 1.3058544383689095, + "learning_rate": 9.725570693148911e-06, + "loss": 0.5893, + "step": 17080 + }, + { + "epoch": 0.5235074169425034, + "grad_norm": 1.5373892598774797, + "learning_rate": 9.724578432680259e-06, + "loss": 0.7686, + "step": 17081 + }, + { + "epoch": 0.5235380654652446, + "grad_norm": 1.5570031203807169, + "learning_rate": 9.723586174925407e-06, + "loss": 0.8377, + "step": 17082 + }, + { + "epoch": 0.5235687139879858, + "grad_norm": 1.5090839659897872, + "learning_rate": 9.722593919894132e-06, + "loss": 0.6672, + "step": 17083 + }, + { + "epoch": 0.523599362510727, + "grad_norm": 1.374737852445765, + "learning_rate": 9.721601667596208e-06, + "loss": 0.629, + "step": 17084 + }, + { + "epoch": 0.5236300110334682, + "grad_norm": 1.4567526032965337, + "learning_rate": 9.720609418041415e-06, + "loss": 0.6567, + "step": 17085 + }, + { + "epoch": 0.5236606595562094, + "grad_norm": 1.3696070683684705, + "learning_rate": 9.719617171239529e-06, + "loss": 0.6885, + "step": 17086 + }, + { + "epoch": 0.5236913080789506, + "grad_norm": 1.5321173047535803, + "learning_rate": 9.71862492720033e-06, + "loss": 0.7036, + "step": 17087 + }, + { + "epoch": 0.5237219566016919, + "grad_norm": 1.6216577560100438, + "learning_rate": 9.717632685933585e-06, + "loss": 0.6913, + "step": 17088 + }, + { + "epoch": 0.523752605124433, + "grad_norm": 1.5062240964577314, + "learning_rate": 9.716640447449083e-06, + "loss": 0.8052, + "step": 17089 + }, + { + "epoch": 0.5237832536471742, + "grad_norm": 1.4662533317654425, + "learning_rate": 9.715648211756592e-06, + "loss": 0.6864, + "step": 17090 + }, + { + "epoch": 0.5238139021699154, + "grad_norm": 1.463612155146449, + "learning_rate": 9.714655978865893e-06, + "loss": 0.7948, + "step": 17091 + }, + { + "epoch": 0.5238445506926566, + "grad_norm": 1.3086681132152553, + "learning_rate": 9.713663748786763e-06, + "loss": 0.6397, + "step": 17092 + }, + { + "epoch": 0.5238751992153978, + "grad_norm": 1.5608454048997014, + "learning_rate": 9.712671521528975e-06, + "loss": 0.7243, + "step": 17093 + }, + { + "epoch": 0.523905847738139, + "grad_norm": 1.3027327569744602, + "learning_rate": 9.711679297102308e-06, + "loss": 0.6929, + "step": 17094 + }, + { + "epoch": 0.5239364962608802, + "grad_norm": 1.6604554777679745, + "learning_rate": 9.710687075516541e-06, + "loss": 0.6312, + "step": 17095 + }, + { + "epoch": 0.5239671447836214, + "grad_norm": 1.561470393063217, + "learning_rate": 9.709694856781446e-06, + "loss": 0.7261, + "step": 17096 + }, + { + "epoch": 0.5239977933063626, + "grad_norm": 1.4834427479373498, + "learning_rate": 9.708702640906805e-06, + "loss": 0.8015, + "step": 17097 + }, + { + "epoch": 0.5240284418291038, + "grad_norm": 1.4135913384774965, + "learning_rate": 9.707710427902386e-06, + "loss": 0.6676, + "step": 17098 + }, + { + "epoch": 0.5240590903518451, + "grad_norm": 1.3711444590354982, + "learning_rate": 9.706718217777977e-06, + "loss": 0.64, + "step": 17099 + }, + { + "epoch": 0.5240897388745862, + "grad_norm": 1.5834660695265723, + "learning_rate": 9.705726010543346e-06, + "loss": 0.7281, + "step": 17100 + }, + { + "epoch": 0.5241203873973275, + "grad_norm": 1.4799275608983007, + "learning_rate": 9.704733806208269e-06, + "loss": 0.6785, + "step": 17101 + }, + { + "epoch": 0.5241510359200686, + "grad_norm": 1.473687233691395, + "learning_rate": 9.703741604782528e-06, + "loss": 0.5905, + "step": 17102 + }, + { + "epoch": 0.5241816844428099, + "grad_norm": 0.6847705049545565, + "learning_rate": 9.702749406275897e-06, + "loss": 0.5901, + "step": 17103 + }, + { + "epoch": 0.524212332965551, + "grad_norm": 1.522679562487515, + "learning_rate": 9.701757210698151e-06, + "loss": 0.696, + "step": 17104 + }, + { + "epoch": 0.5242429814882923, + "grad_norm": 1.4608619322786978, + "learning_rate": 9.700765018059069e-06, + "loss": 0.646, + "step": 17105 + }, + { + "epoch": 0.5242736300110334, + "grad_norm": 1.4063830525639587, + "learning_rate": 9.699772828368427e-06, + "loss": 0.6354, + "step": 17106 + }, + { + "epoch": 0.5243042785337747, + "grad_norm": 0.6879446782637804, + "learning_rate": 9.698780641635995e-06, + "loss": 0.5726, + "step": 17107 + }, + { + "epoch": 0.5243349270565159, + "grad_norm": 1.3750986891316268, + "learning_rate": 9.69778845787156e-06, + "loss": 0.6771, + "step": 17108 + }, + { + "epoch": 0.5243655755792571, + "grad_norm": 1.7810219175107054, + "learning_rate": 9.696796277084888e-06, + "loss": 0.761, + "step": 17109 + }, + { + "epoch": 0.5243962241019983, + "grad_norm": 1.527593486721027, + "learning_rate": 9.695804099285764e-06, + "loss": 0.6638, + "step": 17110 + }, + { + "epoch": 0.5244268726247395, + "grad_norm": 1.5101431106291763, + "learning_rate": 9.694811924483959e-06, + "loss": 0.7369, + "step": 17111 + }, + { + "epoch": 0.5244575211474807, + "grad_norm": 1.4285703966831889, + "learning_rate": 9.693819752689248e-06, + "loss": 0.6584, + "step": 17112 + }, + { + "epoch": 0.5244881696702219, + "grad_norm": 1.4401880639776539, + "learning_rate": 9.692827583911412e-06, + "loss": 0.7723, + "step": 17113 + }, + { + "epoch": 0.5245188181929631, + "grad_norm": 0.6793043847309614, + "learning_rate": 9.691835418160222e-06, + "loss": 0.571, + "step": 17114 + }, + { + "epoch": 0.5245494667157043, + "grad_norm": 1.561540011027667, + "learning_rate": 9.690843255445457e-06, + "loss": 0.7045, + "step": 17115 + }, + { + "epoch": 0.5245801152384455, + "grad_norm": 0.6724029321559669, + "learning_rate": 9.689851095776893e-06, + "loss": 0.5659, + "step": 17116 + }, + { + "epoch": 0.5246107637611868, + "grad_norm": 1.5131745047616358, + "learning_rate": 9.688858939164306e-06, + "loss": 0.6931, + "step": 17117 + }, + { + "epoch": 0.5246414122839279, + "grad_norm": 1.4700141024794042, + "learning_rate": 9.68786678561747e-06, + "loss": 0.7374, + "step": 17118 + }, + { + "epoch": 0.5246720608066692, + "grad_norm": 1.5192348591763878, + "learning_rate": 9.686874635146166e-06, + "loss": 0.6694, + "step": 17119 + }, + { + "epoch": 0.5247027093294103, + "grad_norm": 0.6887064808248669, + "learning_rate": 9.68588248776016e-06, + "loss": 0.5614, + "step": 17120 + }, + { + "epoch": 0.5247333578521515, + "grad_norm": 1.566997349339948, + "learning_rate": 9.684890343469241e-06, + "loss": 0.7067, + "step": 17121 + }, + { + "epoch": 0.5247640063748927, + "grad_norm": 1.8339010060650316, + "learning_rate": 9.683898202283176e-06, + "loss": 0.6794, + "step": 17122 + }, + { + "epoch": 0.5247946548976339, + "grad_norm": 1.5434935067327413, + "learning_rate": 9.682906064211741e-06, + "loss": 0.6979, + "step": 17123 + }, + { + "epoch": 0.5248253034203751, + "grad_norm": 1.5295928644559624, + "learning_rate": 9.681913929264715e-06, + "loss": 0.698, + "step": 17124 + }, + { + "epoch": 0.5248559519431163, + "grad_norm": 1.439094758089656, + "learning_rate": 9.68092179745187e-06, + "loss": 0.6838, + "step": 17125 + }, + { + "epoch": 0.5248866004658576, + "grad_norm": 1.6148106774338198, + "learning_rate": 9.679929668782988e-06, + "loss": 0.8187, + "step": 17126 + }, + { + "epoch": 0.5249172489885987, + "grad_norm": 1.5108639539427489, + "learning_rate": 9.67893754326784e-06, + "loss": 0.8393, + "step": 17127 + }, + { + "epoch": 0.52494789751134, + "grad_norm": 1.419625578607156, + "learning_rate": 9.6779454209162e-06, + "loss": 0.679, + "step": 17128 + }, + { + "epoch": 0.5249785460340811, + "grad_norm": 1.4258467380986672, + "learning_rate": 9.676953301737848e-06, + "loss": 0.6292, + "step": 17129 + }, + { + "epoch": 0.5250091945568224, + "grad_norm": 1.3877051486669423, + "learning_rate": 9.67596118574256e-06, + "loss": 0.6043, + "step": 17130 + }, + { + "epoch": 0.5250398430795635, + "grad_norm": 0.6677714609370986, + "learning_rate": 9.674969072940104e-06, + "loss": 0.5381, + "step": 17131 + }, + { + "epoch": 0.5250704916023048, + "grad_norm": 1.5019917138631529, + "learning_rate": 9.673976963340266e-06, + "loss": 0.7299, + "step": 17132 + }, + { + "epoch": 0.5251011401250459, + "grad_norm": 1.4710417907619449, + "learning_rate": 9.672984856952814e-06, + "loss": 0.7098, + "step": 17133 + }, + { + "epoch": 0.5251317886477872, + "grad_norm": 1.544452484699158, + "learning_rate": 9.671992753787527e-06, + "loss": 0.6818, + "step": 17134 + }, + { + "epoch": 0.5251624371705284, + "grad_norm": 1.589352865823686, + "learning_rate": 9.671000653854178e-06, + "loss": 0.7483, + "step": 17135 + }, + { + "epoch": 0.5251930856932696, + "grad_norm": 1.3326264597149675, + "learning_rate": 9.670008557162542e-06, + "loss": 0.7131, + "step": 17136 + }, + { + "epoch": 0.5252237342160108, + "grad_norm": 1.547459233687493, + "learning_rate": 9.669016463722399e-06, + "loss": 0.7165, + "step": 17137 + }, + { + "epoch": 0.525254382738752, + "grad_norm": 1.5059245354493889, + "learning_rate": 9.668024373543522e-06, + "loss": 0.6812, + "step": 17138 + }, + { + "epoch": 0.5252850312614932, + "grad_norm": 1.5889174874930347, + "learning_rate": 9.667032286635682e-06, + "loss": 0.7888, + "step": 17139 + }, + { + "epoch": 0.5253156797842344, + "grad_norm": 0.6766456568498748, + "learning_rate": 9.666040203008662e-06, + "loss": 0.5511, + "step": 17140 + }, + { + "epoch": 0.5253463283069756, + "grad_norm": 1.5054014591062594, + "learning_rate": 9.665048122672235e-06, + "loss": 0.6569, + "step": 17141 + }, + { + "epoch": 0.5253769768297168, + "grad_norm": 0.6831581402141248, + "learning_rate": 9.66405604563617e-06, + "loss": 0.5583, + "step": 17142 + }, + { + "epoch": 0.525407625352458, + "grad_norm": 1.381449812388507, + "learning_rate": 9.663063971910248e-06, + "loss": 0.7255, + "step": 17143 + }, + { + "epoch": 0.5254382738751993, + "grad_norm": 1.3833648788204038, + "learning_rate": 9.662071901504241e-06, + "loss": 0.6522, + "step": 17144 + }, + { + "epoch": 0.5254689223979404, + "grad_norm": 1.636731267365941, + "learning_rate": 9.66107983442793e-06, + "loss": 0.6416, + "step": 17145 + }, + { + "epoch": 0.5254995709206817, + "grad_norm": 1.4170442875926237, + "learning_rate": 9.660087770691086e-06, + "loss": 0.5884, + "step": 17146 + }, + { + "epoch": 0.5255302194434228, + "grad_norm": 0.6788642706191454, + "learning_rate": 9.65909571030348e-06, + "loss": 0.5736, + "step": 17147 + }, + { + "epoch": 0.5255608679661641, + "grad_norm": 1.6060543244264733, + "learning_rate": 9.658103653274894e-06, + "loss": 0.6794, + "step": 17148 + }, + { + "epoch": 0.5255915164889052, + "grad_norm": 1.5770558676227415, + "learning_rate": 9.657111599615104e-06, + "loss": 0.7695, + "step": 17149 + }, + { + "epoch": 0.5256221650116465, + "grad_norm": 1.5522682075351875, + "learning_rate": 9.656119549333873e-06, + "loss": 0.6803, + "step": 17150 + }, + { + "epoch": 0.5256528135343876, + "grad_norm": 1.489641071679488, + "learning_rate": 9.65512750244099e-06, + "loss": 0.767, + "step": 17151 + }, + { + "epoch": 0.5256834620571288, + "grad_norm": 1.5269223487592647, + "learning_rate": 9.654135458946222e-06, + "loss": 0.69, + "step": 17152 + }, + { + "epoch": 0.52571411057987, + "grad_norm": 1.5278747175661247, + "learning_rate": 9.653143418859346e-06, + "loss": 0.6954, + "step": 17153 + }, + { + "epoch": 0.5257447591026112, + "grad_norm": 1.5176909493458535, + "learning_rate": 9.652151382190136e-06, + "loss": 0.7592, + "step": 17154 + }, + { + "epoch": 0.5257754076253525, + "grad_norm": 1.4011609036470516, + "learning_rate": 9.651159348948366e-06, + "loss": 0.6897, + "step": 17155 + }, + { + "epoch": 0.5258060561480936, + "grad_norm": 1.5347882299626003, + "learning_rate": 9.650167319143814e-06, + "loss": 0.7462, + "step": 17156 + }, + { + "epoch": 0.5258367046708349, + "grad_norm": 1.568292351121823, + "learning_rate": 9.649175292786255e-06, + "loss": 0.6974, + "step": 17157 + }, + { + "epoch": 0.525867353193576, + "grad_norm": 0.6500382878343128, + "learning_rate": 9.648183269885456e-06, + "loss": 0.5749, + "step": 17158 + }, + { + "epoch": 0.5258980017163173, + "grad_norm": 1.5240713941796376, + "learning_rate": 9.647191250451203e-06, + "loss": 0.6845, + "step": 17159 + }, + { + "epoch": 0.5259286502390584, + "grad_norm": 1.2492795052329884, + "learning_rate": 9.646199234493265e-06, + "loss": 0.7088, + "step": 17160 + }, + { + "epoch": 0.5259592987617997, + "grad_norm": 1.5957714882406437, + "learning_rate": 9.645207222021411e-06, + "loss": 0.7049, + "step": 17161 + }, + { + "epoch": 0.5259899472845408, + "grad_norm": 1.6314071330673559, + "learning_rate": 9.644215213045426e-06, + "loss": 0.7392, + "step": 17162 + }, + { + "epoch": 0.5260205958072821, + "grad_norm": 1.3595841233139878, + "learning_rate": 9.643223207575076e-06, + "loss": 0.6933, + "step": 17163 + }, + { + "epoch": 0.5260512443300233, + "grad_norm": 1.4257877396409573, + "learning_rate": 9.642231205620144e-06, + "loss": 0.5702, + "step": 17164 + }, + { + "epoch": 0.5260818928527645, + "grad_norm": 1.5167830798893902, + "learning_rate": 9.641239207190395e-06, + "loss": 0.6689, + "step": 17165 + }, + { + "epoch": 0.5261125413755057, + "grad_norm": 1.493027580835528, + "learning_rate": 9.640247212295608e-06, + "loss": 0.6745, + "step": 17166 + }, + { + "epoch": 0.5261431898982469, + "grad_norm": 1.423721735595542, + "learning_rate": 9.639255220945559e-06, + "loss": 0.596, + "step": 17167 + }, + { + "epoch": 0.5261738384209881, + "grad_norm": 1.5744868327767858, + "learning_rate": 9.638263233150021e-06, + "loss": 0.6867, + "step": 17168 + }, + { + "epoch": 0.5262044869437293, + "grad_norm": 1.506368394209122, + "learning_rate": 9.637271248918766e-06, + "loss": 0.7763, + "step": 17169 + }, + { + "epoch": 0.5262351354664705, + "grad_norm": 1.4006293105660281, + "learning_rate": 9.63627926826157e-06, + "loss": 0.7751, + "step": 17170 + }, + { + "epoch": 0.5262657839892118, + "grad_norm": 1.6572910765635913, + "learning_rate": 9.635287291188208e-06, + "loss": 0.7718, + "step": 17171 + }, + { + "epoch": 0.5262964325119529, + "grad_norm": 1.4082046435121454, + "learning_rate": 9.634295317708453e-06, + "loss": 0.6762, + "step": 17172 + }, + { + "epoch": 0.5263270810346942, + "grad_norm": 1.3890397741799105, + "learning_rate": 9.633303347832085e-06, + "loss": 0.6906, + "step": 17173 + }, + { + "epoch": 0.5263577295574353, + "grad_norm": 1.5206265299510515, + "learning_rate": 9.632311381568865e-06, + "loss": 0.7663, + "step": 17174 + }, + { + "epoch": 0.5263883780801766, + "grad_norm": 1.4658840540500968, + "learning_rate": 9.631319418928581e-06, + "loss": 0.8282, + "step": 17175 + }, + { + "epoch": 0.5264190266029177, + "grad_norm": 1.5998412812306735, + "learning_rate": 9.630327459921e-06, + "loss": 0.6714, + "step": 17176 + }, + { + "epoch": 0.526449675125659, + "grad_norm": 1.3951932702455219, + "learning_rate": 9.629335504555895e-06, + "loss": 0.6854, + "step": 17177 + }, + { + "epoch": 0.5264803236484001, + "grad_norm": 1.462517450698349, + "learning_rate": 9.628343552843043e-06, + "loss": 0.7083, + "step": 17178 + }, + { + "epoch": 0.5265109721711414, + "grad_norm": 1.4028145997821224, + "learning_rate": 9.627351604792219e-06, + "loss": 0.6221, + "step": 17179 + }, + { + "epoch": 0.5265416206938826, + "grad_norm": 1.5996133233171272, + "learning_rate": 9.62635966041319e-06, + "loss": 0.7381, + "step": 17180 + }, + { + "epoch": 0.5265722692166238, + "grad_norm": 1.572068993279815, + "learning_rate": 9.62536771971574e-06, + "loss": 0.6884, + "step": 17181 + }, + { + "epoch": 0.526602917739365, + "grad_norm": 1.4899507475257965, + "learning_rate": 9.624375782709635e-06, + "loss": 0.7264, + "step": 17182 + }, + { + "epoch": 0.5266335662621061, + "grad_norm": 1.2971761758422382, + "learning_rate": 9.623383849404653e-06, + "loss": 0.6963, + "step": 17183 + }, + { + "epoch": 0.5266642147848474, + "grad_norm": 1.4829460939623795, + "learning_rate": 9.622391919810569e-06, + "loss": 0.6939, + "step": 17184 + }, + { + "epoch": 0.5266948633075885, + "grad_norm": 1.5085093321760814, + "learning_rate": 9.621399993937146e-06, + "loss": 0.7105, + "step": 17185 + }, + { + "epoch": 0.5267255118303298, + "grad_norm": 1.5616330424656948, + "learning_rate": 9.620408071794174e-06, + "loss": 0.7304, + "step": 17186 + }, + { + "epoch": 0.5267561603530709, + "grad_norm": 0.6827718124739512, + "learning_rate": 9.619416153391416e-06, + "loss": 0.5661, + "step": 17187 + }, + { + "epoch": 0.5267868088758122, + "grad_norm": 1.554015223231804, + "learning_rate": 9.618424238738645e-06, + "loss": 0.7433, + "step": 17188 + }, + { + "epoch": 0.5268174573985533, + "grad_norm": 1.5034345807823253, + "learning_rate": 9.61743232784564e-06, + "loss": 0.7267, + "step": 17189 + }, + { + "epoch": 0.5268481059212946, + "grad_norm": 1.4320383592488026, + "learning_rate": 9.616440420722169e-06, + "loss": 0.6492, + "step": 17190 + }, + { + "epoch": 0.5268787544440358, + "grad_norm": 1.385032677692155, + "learning_rate": 9.615448517378011e-06, + "loss": 0.69, + "step": 17191 + }, + { + "epoch": 0.526909402966777, + "grad_norm": 1.5716730466386903, + "learning_rate": 9.614456617822939e-06, + "loss": 0.7951, + "step": 17192 + }, + { + "epoch": 0.5269400514895182, + "grad_norm": 0.6592499087881005, + "learning_rate": 9.613464722066723e-06, + "loss": 0.6012, + "step": 17193 + }, + { + "epoch": 0.5269707000122594, + "grad_norm": 1.5331076003394386, + "learning_rate": 9.612472830119141e-06, + "loss": 0.6669, + "step": 17194 + }, + { + "epoch": 0.5270013485350006, + "grad_norm": 1.213037969818151, + "learning_rate": 9.61148094198996e-06, + "loss": 0.5772, + "step": 17195 + }, + { + "epoch": 0.5270319970577418, + "grad_norm": 1.3622063581256696, + "learning_rate": 9.610489057688955e-06, + "loss": 0.669, + "step": 17196 + }, + { + "epoch": 0.527062645580483, + "grad_norm": 0.6610408238860758, + "learning_rate": 9.609497177225903e-06, + "loss": 0.6015, + "step": 17197 + }, + { + "epoch": 0.5270932941032243, + "grad_norm": 0.6584745781924385, + "learning_rate": 9.608505300610575e-06, + "loss": 0.5682, + "step": 17198 + }, + { + "epoch": 0.5271239426259654, + "grad_norm": 1.3980395990317471, + "learning_rate": 9.607513427852747e-06, + "loss": 0.7114, + "step": 17199 + }, + { + "epoch": 0.5271545911487067, + "grad_norm": 1.3840552197149514, + "learning_rate": 9.606521558962186e-06, + "loss": 0.7495, + "step": 17200 + }, + { + "epoch": 0.5271852396714478, + "grad_norm": 0.6670143113629781, + "learning_rate": 9.605529693948668e-06, + "loss": 0.5932, + "step": 17201 + }, + { + "epoch": 0.5272158881941891, + "grad_norm": 1.4120502895786424, + "learning_rate": 9.604537832821971e-06, + "loss": 0.6317, + "step": 17202 + }, + { + "epoch": 0.5272465367169302, + "grad_norm": 1.3783130522138558, + "learning_rate": 9.603545975591864e-06, + "loss": 0.7448, + "step": 17203 + }, + { + "epoch": 0.5272771852396715, + "grad_norm": 1.6238521109960133, + "learning_rate": 9.602554122268114e-06, + "loss": 0.7579, + "step": 17204 + }, + { + "epoch": 0.5273078337624126, + "grad_norm": 1.5564269515761397, + "learning_rate": 9.601562272860508e-06, + "loss": 0.6288, + "step": 17205 + }, + { + "epoch": 0.5273384822851539, + "grad_norm": 1.3451772991228454, + "learning_rate": 9.600570427378805e-06, + "loss": 0.764, + "step": 17206 + }, + { + "epoch": 0.527369130807895, + "grad_norm": 1.5372954819860185, + "learning_rate": 9.599578585832784e-06, + "loss": 0.6972, + "step": 17207 + }, + { + "epoch": 0.5273997793306363, + "grad_norm": 1.442780957279023, + "learning_rate": 9.59858674823222e-06, + "loss": 0.7185, + "step": 17208 + }, + { + "epoch": 0.5274304278533775, + "grad_norm": 1.4527140566940069, + "learning_rate": 9.597594914586882e-06, + "loss": 0.709, + "step": 17209 + }, + { + "epoch": 0.5274610763761187, + "grad_norm": 1.3920847992705785, + "learning_rate": 9.596603084906546e-06, + "loss": 0.6748, + "step": 17210 + }, + { + "epoch": 0.5274917248988599, + "grad_norm": 1.6780506695096935, + "learning_rate": 9.595611259200981e-06, + "loss": 0.7085, + "step": 17211 + }, + { + "epoch": 0.5275223734216011, + "grad_norm": 1.6073707312066217, + "learning_rate": 9.594619437479962e-06, + "loss": 0.7348, + "step": 17212 + }, + { + "epoch": 0.5275530219443423, + "grad_norm": 1.4838500189231, + "learning_rate": 9.593627619753262e-06, + "loss": 0.7046, + "step": 17213 + }, + { + "epoch": 0.5275836704670834, + "grad_norm": 0.7121141213405582, + "learning_rate": 9.592635806030655e-06, + "loss": 0.599, + "step": 17214 + }, + { + "epoch": 0.5276143189898247, + "grad_norm": 1.5811266072871846, + "learning_rate": 9.591643996321907e-06, + "loss": 0.6943, + "step": 17215 + }, + { + "epoch": 0.5276449675125658, + "grad_norm": 1.4789303317744122, + "learning_rate": 9.5906521906368e-06, + "loss": 0.8332, + "step": 17216 + }, + { + "epoch": 0.5276756160353071, + "grad_norm": 1.6061186097646898, + "learning_rate": 9.589660388985097e-06, + "loss": 0.8239, + "step": 17217 + }, + { + "epoch": 0.5277062645580483, + "grad_norm": 1.4346089671973314, + "learning_rate": 9.58866859137658e-06, + "loss": 0.6739, + "step": 17218 + }, + { + "epoch": 0.5277369130807895, + "grad_norm": 1.5307514217326594, + "learning_rate": 9.587676797821013e-06, + "loss": 0.7139, + "step": 17219 + }, + { + "epoch": 0.5277675616035307, + "grad_norm": 1.5752715687625638, + "learning_rate": 9.586685008328172e-06, + "loss": 0.753, + "step": 17220 + }, + { + "epoch": 0.5277982101262719, + "grad_norm": 1.426908125180882, + "learning_rate": 9.585693222907833e-06, + "loss": 0.7542, + "step": 17221 + }, + { + "epoch": 0.5278288586490131, + "grad_norm": 1.3842317082879732, + "learning_rate": 9.584701441569762e-06, + "loss": 0.696, + "step": 17222 + }, + { + "epoch": 0.5278595071717543, + "grad_norm": 1.5353292478794378, + "learning_rate": 9.583709664323733e-06, + "loss": 0.7643, + "step": 17223 + }, + { + "epoch": 0.5278901556944955, + "grad_norm": 1.462729832069513, + "learning_rate": 9.58271789117952e-06, + "loss": 0.7792, + "step": 17224 + }, + { + "epoch": 0.5279208042172367, + "grad_norm": 1.5342083181100186, + "learning_rate": 9.581726122146894e-06, + "loss": 0.7625, + "step": 17225 + }, + { + "epoch": 0.5279514527399779, + "grad_norm": 1.6491889121143637, + "learning_rate": 9.58073435723563e-06, + "loss": 0.6536, + "step": 17226 + }, + { + "epoch": 0.5279821012627192, + "grad_norm": 1.4436452433933222, + "learning_rate": 9.579742596455498e-06, + "loss": 0.673, + "step": 17227 + }, + { + "epoch": 0.5280127497854603, + "grad_norm": 1.5282411167042311, + "learning_rate": 9.578750839816264e-06, + "loss": 0.842, + "step": 17228 + }, + { + "epoch": 0.5280433983082016, + "grad_norm": 1.513610227456548, + "learning_rate": 9.577759087327712e-06, + "loss": 0.683, + "step": 17229 + }, + { + "epoch": 0.5280740468309427, + "grad_norm": 1.6047640302163704, + "learning_rate": 9.576767338999607e-06, + "loss": 0.8169, + "step": 17230 + }, + { + "epoch": 0.528104695353684, + "grad_norm": 1.3815940474751107, + "learning_rate": 9.575775594841717e-06, + "loss": 0.698, + "step": 17231 + }, + { + "epoch": 0.5281353438764251, + "grad_norm": 0.6741178043450519, + "learning_rate": 9.574783854863823e-06, + "loss": 0.5839, + "step": 17232 + }, + { + "epoch": 0.5281659923991664, + "grad_norm": 1.4861207357256805, + "learning_rate": 9.573792119075693e-06, + "loss": 0.6755, + "step": 17233 + }, + { + "epoch": 0.5281966409219075, + "grad_norm": 1.4982973127276138, + "learning_rate": 9.572800387487093e-06, + "loss": 0.5985, + "step": 17234 + }, + { + "epoch": 0.5282272894446488, + "grad_norm": 1.3879017301540777, + "learning_rate": 9.571808660107804e-06, + "loss": 0.7324, + "step": 17235 + }, + { + "epoch": 0.52825793796739, + "grad_norm": 1.6028008186622043, + "learning_rate": 9.570816936947592e-06, + "loss": 0.7153, + "step": 17236 + }, + { + "epoch": 0.5282885864901312, + "grad_norm": 1.7497722664337951, + "learning_rate": 9.569825218016233e-06, + "loss": 0.7672, + "step": 17237 + }, + { + "epoch": 0.5283192350128724, + "grad_norm": 1.4116011511456592, + "learning_rate": 9.568833503323499e-06, + "loss": 0.7164, + "step": 17238 + }, + { + "epoch": 0.5283498835356136, + "grad_norm": 1.3707922781628805, + "learning_rate": 9.567841792879152e-06, + "loss": 0.6485, + "step": 17239 + }, + { + "epoch": 0.5283805320583548, + "grad_norm": 1.3826532796708042, + "learning_rate": 9.566850086692973e-06, + "loss": 0.6241, + "step": 17240 + }, + { + "epoch": 0.528411180581096, + "grad_norm": 1.3978090410308524, + "learning_rate": 9.565858384774733e-06, + "loss": 0.6112, + "step": 17241 + }, + { + "epoch": 0.5284418291038372, + "grad_norm": 1.4287181766731478, + "learning_rate": 9.564866687134198e-06, + "loss": 0.6045, + "step": 17242 + }, + { + "epoch": 0.5284724776265785, + "grad_norm": 1.4546095437849766, + "learning_rate": 9.563874993781145e-06, + "loss": 0.6804, + "step": 17243 + }, + { + "epoch": 0.5285031261493196, + "grad_norm": 0.6518117490752885, + "learning_rate": 9.56288330472534e-06, + "loss": 0.5754, + "step": 17244 + }, + { + "epoch": 0.5285337746720608, + "grad_norm": 0.6330954667996216, + "learning_rate": 9.561891619976561e-06, + "loss": 0.5437, + "step": 17245 + }, + { + "epoch": 0.528564423194802, + "grad_norm": 1.4366172843370495, + "learning_rate": 9.560899939544579e-06, + "loss": 0.7813, + "step": 17246 + }, + { + "epoch": 0.5285950717175432, + "grad_norm": 1.5476715016597424, + "learning_rate": 9.559908263439154e-06, + "loss": 0.6634, + "step": 17247 + }, + { + "epoch": 0.5286257202402844, + "grad_norm": 0.6712161231356681, + "learning_rate": 9.558916591670074e-06, + "loss": 0.5873, + "step": 17248 + }, + { + "epoch": 0.5286563687630256, + "grad_norm": 1.6239722920300685, + "learning_rate": 9.557924924247098e-06, + "loss": 0.768, + "step": 17249 + }, + { + "epoch": 0.5286870172857668, + "grad_norm": 1.4295283828407384, + "learning_rate": 9.556933261179999e-06, + "loss": 0.6184, + "step": 17250 + }, + { + "epoch": 0.528717665808508, + "grad_norm": 1.597947749986356, + "learning_rate": 9.555941602478552e-06, + "loss": 0.68, + "step": 17251 + }, + { + "epoch": 0.5287483143312492, + "grad_norm": 1.2842619108897941, + "learning_rate": 9.554949948152523e-06, + "loss": 0.6813, + "step": 17252 + }, + { + "epoch": 0.5287789628539904, + "grad_norm": 1.49291660769466, + "learning_rate": 9.55395829821169e-06, + "loss": 0.7557, + "step": 17253 + }, + { + "epoch": 0.5288096113767317, + "grad_norm": 1.4572854364059113, + "learning_rate": 9.552966652665818e-06, + "loss": 0.7687, + "step": 17254 + }, + { + "epoch": 0.5288402598994728, + "grad_norm": 1.4651655122788434, + "learning_rate": 9.551975011524679e-06, + "loss": 0.6191, + "step": 17255 + }, + { + "epoch": 0.5288709084222141, + "grad_norm": 1.646705048323211, + "learning_rate": 9.550983374798048e-06, + "loss": 0.7977, + "step": 17256 + }, + { + "epoch": 0.5289015569449552, + "grad_norm": 1.3672956858571332, + "learning_rate": 9.549991742495694e-06, + "loss": 0.6541, + "step": 17257 + }, + { + "epoch": 0.5289322054676965, + "grad_norm": 1.4433784255093696, + "learning_rate": 9.54900011462738e-06, + "loss": 0.5791, + "step": 17258 + }, + { + "epoch": 0.5289628539904376, + "grad_norm": 1.4266139607752606, + "learning_rate": 9.548008491202888e-06, + "loss": 0.7281, + "step": 17259 + }, + { + "epoch": 0.5289935025131789, + "grad_norm": 1.3387914034832988, + "learning_rate": 9.547016872231983e-06, + "loss": 0.6682, + "step": 17260 + }, + { + "epoch": 0.52902415103592, + "grad_norm": 0.6782388752574793, + "learning_rate": 9.546025257724436e-06, + "loss": 0.565, + "step": 17261 + }, + { + "epoch": 0.5290547995586613, + "grad_norm": 1.5006284374722707, + "learning_rate": 9.545033647690019e-06, + "loss": 0.6848, + "step": 17262 + }, + { + "epoch": 0.5290854480814025, + "grad_norm": 1.5931986507389846, + "learning_rate": 9.544042042138499e-06, + "loss": 0.7099, + "step": 17263 + }, + { + "epoch": 0.5291160966041437, + "grad_norm": 0.6483985701868344, + "learning_rate": 9.543050441079653e-06, + "loss": 0.551, + "step": 17264 + }, + { + "epoch": 0.5291467451268849, + "grad_norm": 1.3988180889079704, + "learning_rate": 9.542058844523248e-06, + "loss": 0.5842, + "step": 17265 + }, + { + "epoch": 0.5291773936496261, + "grad_norm": 1.425685063432241, + "learning_rate": 9.541067252479052e-06, + "loss": 0.6671, + "step": 17266 + }, + { + "epoch": 0.5292080421723673, + "grad_norm": 1.7856161439961773, + "learning_rate": 9.540075664956839e-06, + "loss": 0.7439, + "step": 17267 + }, + { + "epoch": 0.5292386906951085, + "grad_norm": 1.4320609034183476, + "learning_rate": 9.539084081966382e-06, + "loss": 0.6469, + "step": 17268 + }, + { + "epoch": 0.5292693392178497, + "grad_norm": 0.6552630520557419, + "learning_rate": 9.53809250351744e-06, + "loss": 0.573, + "step": 17269 + }, + { + "epoch": 0.529299987740591, + "grad_norm": 1.6086204685667165, + "learning_rate": 9.537100929619797e-06, + "loss": 0.7082, + "step": 17270 + }, + { + "epoch": 0.5293306362633321, + "grad_norm": 1.4176465218837344, + "learning_rate": 9.53610936028321e-06, + "loss": 0.7455, + "step": 17271 + }, + { + "epoch": 0.5293612847860734, + "grad_norm": 1.2971283299284073, + "learning_rate": 9.535117795517463e-06, + "loss": 0.6969, + "step": 17272 + }, + { + "epoch": 0.5293919333088145, + "grad_norm": 0.6522276805135653, + "learning_rate": 9.534126235332318e-06, + "loss": 0.5562, + "step": 17273 + }, + { + "epoch": 0.5294225818315558, + "grad_norm": 1.313516677395838, + "learning_rate": 9.533134679737543e-06, + "loss": 0.6511, + "step": 17274 + }, + { + "epoch": 0.5294532303542969, + "grad_norm": 1.3665689921265172, + "learning_rate": 9.532143128742915e-06, + "loss": 0.6709, + "step": 17275 + }, + { + "epoch": 0.5294838788770381, + "grad_norm": 1.434712740248117, + "learning_rate": 9.5311515823582e-06, + "loss": 0.668, + "step": 17276 + }, + { + "epoch": 0.5295145273997793, + "grad_norm": 1.5777130496342842, + "learning_rate": 9.530160040593166e-06, + "loss": 0.7529, + "step": 17277 + }, + { + "epoch": 0.5295451759225205, + "grad_norm": 1.4280445508164428, + "learning_rate": 9.529168503457587e-06, + "loss": 0.6829, + "step": 17278 + }, + { + "epoch": 0.5295758244452617, + "grad_norm": 1.7413900275187943, + "learning_rate": 9.52817697096123e-06, + "loss": 0.7076, + "step": 17279 + }, + { + "epoch": 0.5296064729680029, + "grad_norm": 0.6610184096485492, + "learning_rate": 9.527185443113868e-06, + "loss": 0.5836, + "step": 17280 + }, + { + "epoch": 0.5296371214907442, + "grad_norm": 1.575933860860338, + "learning_rate": 9.52619391992527e-06, + "loss": 0.6393, + "step": 17281 + }, + { + "epoch": 0.5296677700134853, + "grad_norm": 1.5359687949025262, + "learning_rate": 9.5252024014052e-06, + "loss": 0.6813, + "step": 17282 + }, + { + "epoch": 0.5296984185362266, + "grad_norm": 1.5454505262198548, + "learning_rate": 9.524210887563438e-06, + "loss": 0.6737, + "step": 17283 + }, + { + "epoch": 0.5297290670589677, + "grad_norm": 1.5433785329489982, + "learning_rate": 9.523219378409744e-06, + "loss": 0.7533, + "step": 17284 + }, + { + "epoch": 0.529759715581709, + "grad_norm": 1.5027900527779123, + "learning_rate": 9.522227873953891e-06, + "loss": 0.7012, + "step": 17285 + }, + { + "epoch": 0.5297903641044501, + "grad_norm": 1.6302071466375847, + "learning_rate": 9.52123637420565e-06, + "loss": 0.8045, + "step": 17286 + }, + { + "epoch": 0.5298210126271914, + "grad_norm": 1.5360245917773352, + "learning_rate": 9.520244879174791e-06, + "loss": 0.723, + "step": 17287 + }, + { + "epoch": 0.5298516611499325, + "grad_norm": 1.3835073286385302, + "learning_rate": 9.51925338887108e-06, + "loss": 0.7762, + "step": 17288 + }, + { + "epoch": 0.5298823096726738, + "grad_norm": 1.4757160174630461, + "learning_rate": 9.518261903304289e-06, + "loss": 0.6679, + "step": 17289 + }, + { + "epoch": 0.529912958195415, + "grad_norm": 0.6560315791926896, + "learning_rate": 9.517270422484183e-06, + "loss": 0.5578, + "step": 17290 + }, + { + "epoch": 0.5299436067181562, + "grad_norm": 1.6860622338725972, + "learning_rate": 9.516278946420543e-06, + "loss": 0.5863, + "step": 17291 + }, + { + "epoch": 0.5299742552408974, + "grad_norm": 1.4449994329368885, + "learning_rate": 9.515287475123126e-06, + "loss": 0.5353, + "step": 17292 + }, + { + "epoch": 0.5300049037636386, + "grad_norm": 0.655822529658627, + "learning_rate": 9.514296008601705e-06, + "loss": 0.5873, + "step": 17293 + }, + { + "epoch": 0.5300355522863798, + "grad_norm": 0.6815839266271982, + "learning_rate": 9.51330454686605e-06, + "loss": 0.5672, + "step": 17294 + }, + { + "epoch": 0.530066200809121, + "grad_norm": 1.5097579892825659, + "learning_rate": 9.512313089925931e-06, + "loss": 0.626, + "step": 17295 + }, + { + "epoch": 0.5300968493318622, + "grad_norm": 1.384319245695237, + "learning_rate": 9.511321637791114e-06, + "loss": 0.7914, + "step": 17296 + }, + { + "epoch": 0.5301274978546034, + "grad_norm": 1.685979793702175, + "learning_rate": 9.51033019047137e-06, + "loss": 0.6998, + "step": 17297 + }, + { + "epoch": 0.5301581463773446, + "grad_norm": 1.1840127586271885, + "learning_rate": 9.509338747976467e-06, + "loss": 0.6673, + "step": 17298 + }, + { + "epoch": 0.5301887949000859, + "grad_norm": 1.4391746361694933, + "learning_rate": 9.508347310316177e-06, + "loss": 0.7059, + "step": 17299 + }, + { + "epoch": 0.530219443422827, + "grad_norm": 1.4981044085199589, + "learning_rate": 9.50735587750027e-06, + "loss": 0.6967, + "step": 17300 + }, + { + "epoch": 0.5302500919455683, + "grad_norm": 1.5095734736039161, + "learning_rate": 9.506364449538504e-06, + "loss": 0.6637, + "step": 17301 + }, + { + "epoch": 0.5302807404683094, + "grad_norm": 1.4709623404367085, + "learning_rate": 9.505373026440662e-06, + "loss": 0.793, + "step": 17302 + }, + { + "epoch": 0.5303113889910507, + "grad_norm": 0.671395390055451, + "learning_rate": 9.504381608216504e-06, + "loss": 0.5886, + "step": 17303 + }, + { + "epoch": 0.5303420375137918, + "grad_norm": 0.6651327559035197, + "learning_rate": 9.503390194875798e-06, + "loss": 0.5834, + "step": 17304 + }, + { + "epoch": 0.5303726860365331, + "grad_norm": 1.4744453884697561, + "learning_rate": 9.50239878642832e-06, + "loss": 0.7279, + "step": 17305 + }, + { + "epoch": 0.5304033345592742, + "grad_norm": 0.6727199885602249, + "learning_rate": 9.50140738288383e-06, + "loss": 0.5567, + "step": 17306 + }, + { + "epoch": 0.5304339830820154, + "grad_norm": 1.600724094284174, + "learning_rate": 9.500415984252103e-06, + "loss": 0.7818, + "step": 17307 + }, + { + "epoch": 0.5304646316047567, + "grad_norm": 1.4968028314492972, + "learning_rate": 9.499424590542905e-06, + "loss": 0.6897, + "step": 17308 + }, + { + "epoch": 0.5304952801274978, + "grad_norm": 1.5451092101515969, + "learning_rate": 9.498433201766003e-06, + "loss": 0.7697, + "step": 17309 + }, + { + "epoch": 0.5305259286502391, + "grad_norm": 1.4502590002906766, + "learning_rate": 9.497441817931167e-06, + "loss": 0.6977, + "step": 17310 + }, + { + "epoch": 0.5305565771729802, + "grad_norm": 1.5051261843395776, + "learning_rate": 9.49645043904817e-06, + "loss": 0.5808, + "step": 17311 + }, + { + "epoch": 0.5305872256957215, + "grad_norm": 1.4277428510858803, + "learning_rate": 9.495459065126768e-06, + "loss": 0.6776, + "step": 17312 + }, + { + "epoch": 0.5306178742184626, + "grad_norm": 1.4878851119895715, + "learning_rate": 9.494467696176745e-06, + "loss": 0.7346, + "step": 17313 + }, + { + "epoch": 0.5306485227412039, + "grad_norm": 1.5048867219051472, + "learning_rate": 9.493476332207858e-06, + "loss": 0.7064, + "step": 17314 + }, + { + "epoch": 0.530679171263945, + "grad_norm": 1.5177200814036882, + "learning_rate": 9.492484973229876e-06, + "loss": 0.664, + "step": 17315 + }, + { + "epoch": 0.5307098197866863, + "grad_norm": 1.7264159077923251, + "learning_rate": 9.491493619252572e-06, + "loss": 0.7759, + "step": 17316 + }, + { + "epoch": 0.5307404683094274, + "grad_norm": 0.6740538230430834, + "learning_rate": 9.490502270285708e-06, + "loss": 0.5693, + "step": 17317 + }, + { + "epoch": 0.5307711168321687, + "grad_norm": 1.4403339031178934, + "learning_rate": 9.489510926339058e-06, + "loss": 0.6855, + "step": 17318 + }, + { + "epoch": 0.5308017653549099, + "grad_norm": 1.472191541466087, + "learning_rate": 9.48851958742239e-06, + "loss": 0.6909, + "step": 17319 + }, + { + "epoch": 0.5308324138776511, + "grad_norm": 1.6117238973013106, + "learning_rate": 9.487528253545464e-06, + "loss": 0.7015, + "step": 17320 + }, + { + "epoch": 0.5308630624003923, + "grad_norm": 1.616999872115007, + "learning_rate": 9.486536924718057e-06, + "loss": 0.7586, + "step": 17321 + }, + { + "epoch": 0.5308937109231335, + "grad_norm": 1.4840339385384331, + "learning_rate": 9.485545600949934e-06, + "loss": 0.6723, + "step": 17322 + }, + { + "epoch": 0.5309243594458747, + "grad_norm": 1.3473679266164136, + "learning_rate": 9.484554282250856e-06, + "loss": 0.6338, + "step": 17323 + }, + { + "epoch": 0.5309550079686159, + "grad_norm": 1.390389183119398, + "learning_rate": 9.483562968630605e-06, + "loss": 0.7532, + "step": 17324 + }, + { + "epoch": 0.5309856564913571, + "grad_norm": 1.572805204727135, + "learning_rate": 9.48257166009893e-06, + "loss": 0.6802, + "step": 17325 + }, + { + "epoch": 0.5310163050140984, + "grad_norm": 1.3962571364276268, + "learning_rate": 9.481580356665619e-06, + "loss": 0.6691, + "step": 17326 + }, + { + "epoch": 0.5310469535368395, + "grad_norm": 1.5501274447615032, + "learning_rate": 9.480589058340424e-06, + "loss": 0.7191, + "step": 17327 + }, + { + "epoch": 0.5310776020595808, + "grad_norm": 1.6021300179809423, + "learning_rate": 9.479597765133116e-06, + "loss": 0.7881, + "step": 17328 + }, + { + "epoch": 0.5311082505823219, + "grad_norm": 1.4788533016891654, + "learning_rate": 9.47860647705347e-06, + "loss": 0.7392, + "step": 17329 + }, + { + "epoch": 0.5311388991050632, + "grad_norm": 1.5213429264702136, + "learning_rate": 9.477615194111245e-06, + "loss": 0.7694, + "step": 17330 + }, + { + "epoch": 0.5311695476278043, + "grad_norm": 1.5187655618467366, + "learning_rate": 9.476623916316208e-06, + "loss": 0.7289, + "step": 17331 + }, + { + "epoch": 0.5312001961505456, + "grad_norm": 1.7082112298193397, + "learning_rate": 9.475632643678135e-06, + "loss": 0.7632, + "step": 17332 + }, + { + "epoch": 0.5312308446732867, + "grad_norm": 1.3998230382672967, + "learning_rate": 9.474641376206788e-06, + "loss": 0.7811, + "step": 17333 + }, + { + "epoch": 0.531261493196028, + "grad_norm": 1.8134521748829477, + "learning_rate": 9.473650113911929e-06, + "loss": 0.7444, + "step": 17334 + }, + { + "epoch": 0.5312921417187692, + "grad_norm": 1.3038113439027936, + "learning_rate": 9.472658856803333e-06, + "loss": 0.6181, + "step": 17335 + }, + { + "epoch": 0.5313227902415104, + "grad_norm": 1.3477385827174682, + "learning_rate": 9.471667604890762e-06, + "loss": 0.6781, + "step": 17336 + }, + { + "epoch": 0.5313534387642516, + "grad_norm": 1.809424113787315, + "learning_rate": 9.470676358183987e-06, + "loss": 0.7733, + "step": 17337 + }, + { + "epoch": 0.5313840872869927, + "grad_norm": 1.538058746031308, + "learning_rate": 9.469685116692774e-06, + "loss": 0.6826, + "step": 17338 + }, + { + "epoch": 0.531414735809734, + "grad_norm": 0.672911858475262, + "learning_rate": 9.468693880426886e-06, + "loss": 0.5886, + "step": 17339 + }, + { + "epoch": 0.5314453843324751, + "grad_norm": 1.6139297829237393, + "learning_rate": 9.467702649396096e-06, + "loss": 0.6229, + "step": 17340 + }, + { + "epoch": 0.5314760328552164, + "grad_norm": 1.3761084979983274, + "learning_rate": 9.46671142361017e-06, + "loss": 0.725, + "step": 17341 + }, + { + "epoch": 0.5315066813779575, + "grad_norm": 1.648743344927092, + "learning_rate": 9.465720203078868e-06, + "loss": 0.7485, + "step": 17342 + }, + { + "epoch": 0.5315373299006988, + "grad_norm": 1.3819131779487632, + "learning_rate": 9.464728987811965e-06, + "loss": 0.6774, + "step": 17343 + }, + { + "epoch": 0.53156797842344, + "grad_norm": 1.3958118201771301, + "learning_rate": 9.46373777781922e-06, + "loss": 0.6527, + "step": 17344 + }, + { + "epoch": 0.5315986269461812, + "grad_norm": 1.4005067978493273, + "learning_rate": 9.46274657311041e-06, + "loss": 0.7012, + "step": 17345 + }, + { + "epoch": 0.5316292754689224, + "grad_norm": 0.656737447001725, + "learning_rate": 9.461755373695293e-06, + "loss": 0.5695, + "step": 17346 + }, + { + "epoch": 0.5316599239916636, + "grad_norm": 1.445542591588352, + "learning_rate": 9.460764179583635e-06, + "loss": 0.7324, + "step": 17347 + }, + { + "epoch": 0.5316905725144048, + "grad_norm": 0.6678430775013727, + "learning_rate": 9.459772990785208e-06, + "loss": 0.6095, + "step": 17348 + }, + { + "epoch": 0.531721221037146, + "grad_norm": 1.6184057332373423, + "learning_rate": 9.458781807309777e-06, + "loss": 0.7258, + "step": 17349 + }, + { + "epoch": 0.5317518695598872, + "grad_norm": 1.4213724348973231, + "learning_rate": 9.457790629167105e-06, + "loss": 0.6845, + "step": 17350 + }, + { + "epoch": 0.5317825180826284, + "grad_norm": 1.6772817571370833, + "learning_rate": 9.456799456366962e-06, + "loss": 0.6976, + "step": 17351 + }, + { + "epoch": 0.5318131666053696, + "grad_norm": 1.4588838231203811, + "learning_rate": 9.455808288919112e-06, + "loss": 0.7281, + "step": 17352 + }, + { + "epoch": 0.5318438151281109, + "grad_norm": 1.5150030313147944, + "learning_rate": 9.454817126833322e-06, + "loss": 0.7165, + "step": 17353 + }, + { + "epoch": 0.531874463650852, + "grad_norm": 1.3590644483195176, + "learning_rate": 9.453825970119363e-06, + "loss": 0.7236, + "step": 17354 + }, + { + "epoch": 0.5319051121735933, + "grad_norm": 0.6507863988992334, + "learning_rate": 9.452834818786989e-06, + "loss": 0.5486, + "step": 17355 + }, + { + "epoch": 0.5319357606963344, + "grad_norm": 1.5715577519728825, + "learning_rate": 9.451843672845979e-06, + "loss": 0.7467, + "step": 17356 + }, + { + "epoch": 0.5319664092190757, + "grad_norm": 1.6873342978680863, + "learning_rate": 9.450852532306092e-06, + "loss": 0.7602, + "step": 17357 + }, + { + "epoch": 0.5319970577418168, + "grad_norm": 1.570840869218866, + "learning_rate": 9.449861397177096e-06, + "loss": 0.7157, + "step": 17358 + }, + { + "epoch": 0.5320277062645581, + "grad_norm": 1.373033375597807, + "learning_rate": 9.448870267468754e-06, + "loss": 0.5987, + "step": 17359 + }, + { + "epoch": 0.5320583547872992, + "grad_norm": 1.4254883504458624, + "learning_rate": 9.447879143190837e-06, + "loss": 0.7208, + "step": 17360 + }, + { + "epoch": 0.5320890033100405, + "grad_norm": 1.3677805674601435, + "learning_rate": 9.446888024353105e-06, + "loss": 0.6048, + "step": 17361 + }, + { + "epoch": 0.5321196518327816, + "grad_norm": 0.6692897220086277, + "learning_rate": 9.44589691096533e-06, + "loss": 0.5527, + "step": 17362 + }, + { + "epoch": 0.5321503003555229, + "grad_norm": 1.5126663055971072, + "learning_rate": 9.444905803037272e-06, + "loss": 0.7393, + "step": 17363 + }, + { + "epoch": 0.5321809488782641, + "grad_norm": 1.5155177761597816, + "learning_rate": 9.443914700578702e-06, + "loss": 0.7324, + "step": 17364 + }, + { + "epoch": 0.5322115974010053, + "grad_norm": 1.4243778423150233, + "learning_rate": 9.442923603599383e-06, + "loss": 0.5704, + "step": 17365 + }, + { + "epoch": 0.5322422459237465, + "grad_norm": 1.8997138912140636, + "learning_rate": 9.441932512109075e-06, + "loss": 0.6767, + "step": 17366 + }, + { + "epoch": 0.5322728944464877, + "grad_norm": 0.6934684317441352, + "learning_rate": 9.440941426117554e-06, + "loss": 0.5992, + "step": 17367 + }, + { + "epoch": 0.5323035429692289, + "grad_norm": 1.5574016946619886, + "learning_rate": 9.43995034563458e-06, + "loss": 0.6843, + "step": 17368 + }, + { + "epoch": 0.53233419149197, + "grad_norm": 1.4164444369377591, + "learning_rate": 9.438959270669915e-06, + "loss": 0.6344, + "step": 17369 + }, + { + "epoch": 0.5323648400147113, + "grad_norm": 1.5281872910002217, + "learning_rate": 9.43796820123333e-06, + "loss": 0.6493, + "step": 17370 + }, + { + "epoch": 0.5323954885374524, + "grad_norm": 1.5065195082244187, + "learning_rate": 9.436977137334588e-06, + "loss": 0.7348, + "step": 17371 + }, + { + "epoch": 0.5324261370601937, + "grad_norm": 1.3717306049897777, + "learning_rate": 9.435986078983455e-06, + "loss": 0.6819, + "step": 17372 + }, + { + "epoch": 0.5324567855829349, + "grad_norm": 1.500159917802604, + "learning_rate": 9.434995026189695e-06, + "loss": 0.7323, + "step": 17373 + }, + { + "epoch": 0.5324874341056761, + "grad_norm": 1.6230123979494004, + "learning_rate": 9.434003978963072e-06, + "loss": 0.7352, + "step": 17374 + }, + { + "epoch": 0.5325180826284173, + "grad_norm": 0.6401700685458291, + "learning_rate": 9.433012937313355e-06, + "loss": 0.5566, + "step": 17375 + }, + { + "epoch": 0.5325487311511585, + "grad_norm": 1.5724573288781047, + "learning_rate": 9.432021901250306e-06, + "loss": 0.7341, + "step": 17376 + }, + { + "epoch": 0.5325793796738997, + "grad_norm": 1.3484991181481547, + "learning_rate": 9.431030870783687e-06, + "loss": 0.6176, + "step": 17377 + }, + { + "epoch": 0.5326100281966409, + "grad_norm": 1.9347717961842892, + "learning_rate": 9.430039845923273e-06, + "loss": 0.8118, + "step": 17378 + }, + { + "epoch": 0.5326406767193821, + "grad_norm": 1.4876146857973365, + "learning_rate": 9.429048826678817e-06, + "loss": 0.8106, + "step": 17379 + }, + { + "epoch": 0.5326713252421234, + "grad_norm": 0.6712493095042599, + "learning_rate": 9.428057813060092e-06, + "loss": 0.5881, + "step": 17380 + }, + { + "epoch": 0.5327019737648645, + "grad_norm": 0.6596607373719895, + "learning_rate": 9.427066805076858e-06, + "loss": 0.5619, + "step": 17381 + }, + { + "epoch": 0.5327326222876058, + "grad_norm": 1.4673858543534093, + "learning_rate": 9.42607580273888e-06, + "loss": 0.7427, + "step": 17382 + }, + { + "epoch": 0.5327632708103469, + "grad_norm": 0.6862246137811118, + "learning_rate": 9.425084806055926e-06, + "loss": 0.5781, + "step": 17383 + }, + { + "epoch": 0.5327939193330882, + "grad_norm": 1.4880536321402054, + "learning_rate": 9.42409381503776e-06, + "loss": 0.6039, + "step": 17384 + }, + { + "epoch": 0.5328245678558293, + "grad_norm": 1.3717772240074484, + "learning_rate": 9.42310282969414e-06, + "loss": 0.6937, + "step": 17385 + }, + { + "epoch": 0.5328552163785706, + "grad_norm": 0.676484584354918, + "learning_rate": 9.422111850034841e-06, + "loss": 0.5666, + "step": 17386 + }, + { + "epoch": 0.5328858649013117, + "grad_norm": 1.405431734068357, + "learning_rate": 9.421120876069621e-06, + "loss": 0.5911, + "step": 17387 + }, + { + "epoch": 0.532916513424053, + "grad_norm": 1.3998246144247235, + "learning_rate": 9.420129907808241e-06, + "loss": 0.6129, + "step": 17388 + }, + { + "epoch": 0.5329471619467941, + "grad_norm": 1.467568964065431, + "learning_rate": 9.419138945260473e-06, + "loss": 0.6658, + "step": 17389 + }, + { + "epoch": 0.5329778104695354, + "grad_norm": 1.689994425212845, + "learning_rate": 9.418147988436076e-06, + "loss": 0.7344, + "step": 17390 + }, + { + "epoch": 0.5330084589922766, + "grad_norm": 1.3366814247401992, + "learning_rate": 9.417157037344816e-06, + "loss": 0.6504, + "step": 17391 + }, + { + "epoch": 0.5330391075150178, + "grad_norm": 1.4427490693361043, + "learning_rate": 9.416166091996459e-06, + "loss": 0.6938, + "step": 17392 + }, + { + "epoch": 0.533069756037759, + "grad_norm": 1.6626967682925835, + "learning_rate": 9.415175152400762e-06, + "loss": 0.6946, + "step": 17393 + }, + { + "epoch": 0.5331004045605002, + "grad_norm": 1.466813157330706, + "learning_rate": 9.414184218567497e-06, + "loss": 0.6184, + "step": 17394 + }, + { + "epoch": 0.5331310530832414, + "grad_norm": 1.693203145468534, + "learning_rate": 9.413193290506428e-06, + "loss": 0.7334, + "step": 17395 + }, + { + "epoch": 0.5331617016059826, + "grad_norm": 1.4642984428534094, + "learning_rate": 9.41220236822731e-06, + "loss": 0.714, + "step": 17396 + }, + { + "epoch": 0.5331923501287238, + "grad_norm": 1.36542274216321, + "learning_rate": 9.411211451739917e-06, + "loss": 0.6502, + "step": 17397 + }, + { + "epoch": 0.533222998651465, + "grad_norm": 1.5293345683792559, + "learning_rate": 9.410220541054001e-06, + "loss": 0.7046, + "step": 17398 + }, + { + "epoch": 0.5332536471742062, + "grad_norm": 1.1768588651645693, + "learning_rate": 9.40922963617934e-06, + "loss": 0.6681, + "step": 17399 + }, + { + "epoch": 0.5332842956969474, + "grad_norm": 1.520387737695896, + "learning_rate": 9.40823873712569e-06, + "loss": 0.7552, + "step": 17400 + }, + { + "epoch": 0.5333149442196886, + "grad_norm": 1.4740084012327275, + "learning_rate": 9.407247843902812e-06, + "loss": 0.7023, + "step": 17401 + }, + { + "epoch": 0.5333455927424298, + "grad_norm": 1.623314390236203, + "learning_rate": 9.406256956520475e-06, + "loss": 0.669, + "step": 17402 + }, + { + "epoch": 0.533376241265171, + "grad_norm": 1.5751519681209787, + "learning_rate": 9.40526607498844e-06, + "loss": 0.7251, + "step": 17403 + }, + { + "epoch": 0.5334068897879122, + "grad_norm": 0.7157285007456815, + "learning_rate": 9.404275199316469e-06, + "loss": 0.5456, + "step": 17404 + }, + { + "epoch": 0.5334375383106534, + "grad_norm": 1.6076430115389637, + "learning_rate": 9.403284329514327e-06, + "loss": 0.763, + "step": 17405 + }, + { + "epoch": 0.5334681868333946, + "grad_norm": 1.597170501031246, + "learning_rate": 9.402293465591777e-06, + "loss": 0.7517, + "step": 17406 + }, + { + "epoch": 0.5334988353561358, + "grad_norm": 1.5789466925297622, + "learning_rate": 9.401302607558583e-06, + "loss": 0.7107, + "step": 17407 + }, + { + "epoch": 0.533529483878877, + "grad_norm": 1.5120240769861515, + "learning_rate": 9.400311755424512e-06, + "loss": 0.75, + "step": 17408 + }, + { + "epoch": 0.5335601324016183, + "grad_norm": 1.560813251878301, + "learning_rate": 9.399320909199314e-06, + "loss": 0.7249, + "step": 17409 + }, + { + "epoch": 0.5335907809243594, + "grad_norm": 0.6971045040856868, + "learning_rate": 9.398330068892767e-06, + "loss": 0.5888, + "step": 17410 + }, + { + "epoch": 0.5336214294471007, + "grad_norm": 1.4386014071625357, + "learning_rate": 9.397339234514628e-06, + "loss": 0.7121, + "step": 17411 + }, + { + "epoch": 0.5336520779698418, + "grad_norm": 1.48462585651876, + "learning_rate": 9.396348406074656e-06, + "loss": 0.5972, + "step": 17412 + }, + { + "epoch": 0.5336827264925831, + "grad_norm": 1.480462585631367, + "learning_rate": 9.395357583582621e-06, + "loss": 0.6886, + "step": 17413 + }, + { + "epoch": 0.5337133750153242, + "grad_norm": 1.458998306663727, + "learning_rate": 9.394366767048281e-06, + "loss": 0.6935, + "step": 17414 + }, + { + "epoch": 0.5337440235380655, + "grad_norm": 1.5953006451804475, + "learning_rate": 9.393375956481399e-06, + "loss": 0.7841, + "step": 17415 + }, + { + "epoch": 0.5337746720608066, + "grad_norm": 1.6909139555250268, + "learning_rate": 9.39238515189174e-06, + "loss": 0.7511, + "step": 17416 + }, + { + "epoch": 0.5338053205835479, + "grad_norm": 1.384451098513573, + "learning_rate": 9.391394353289063e-06, + "loss": 0.6038, + "step": 17417 + }, + { + "epoch": 0.533835969106289, + "grad_norm": 1.5145718320728563, + "learning_rate": 9.390403560683138e-06, + "loss": 0.6511, + "step": 17418 + }, + { + "epoch": 0.5338666176290303, + "grad_norm": 0.669659018182518, + "learning_rate": 9.389412774083722e-06, + "loss": 0.5509, + "step": 17419 + }, + { + "epoch": 0.5338972661517715, + "grad_norm": 1.7258772935251314, + "learning_rate": 9.388421993500574e-06, + "loss": 0.8072, + "step": 17420 + }, + { + "epoch": 0.5339279146745127, + "grad_norm": 1.485140453677007, + "learning_rate": 9.387431218943466e-06, + "loss": 0.6018, + "step": 17421 + }, + { + "epoch": 0.5339585631972539, + "grad_norm": 1.5605650400051974, + "learning_rate": 9.386440450422151e-06, + "loss": 0.7964, + "step": 17422 + }, + { + "epoch": 0.5339892117199951, + "grad_norm": 1.3939867658804226, + "learning_rate": 9.385449687946396e-06, + "loss": 0.6904, + "step": 17423 + }, + { + "epoch": 0.5340198602427363, + "grad_norm": 0.66969277732447, + "learning_rate": 9.384458931525964e-06, + "loss": 0.5536, + "step": 17424 + }, + { + "epoch": 0.5340505087654775, + "grad_norm": 1.7698406415737344, + "learning_rate": 9.383468181170612e-06, + "loss": 0.7321, + "step": 17425 + }, + { + "epoch": 0.5340811572882187, + "grad_norm": 1.7050946942096856, + "learning_rate": 9.38247743689011e-06, + "loss": 0.7048, + "step": 17426 + }, + { + "epoch": 0.53411180581096, + "grad_norm": 0.6673321080164291, + "learning_rate": 9.381486698694213e-06, + "loss": 0.5883, + "step": 17427 + }, + { + "epoch": 0.5341424543337011, + "grad_norm": 1.512009858704913, + "learning_rate": 9.380495966592685e-06, + "loss": 0.5997, + "step": 17428 + }, + { + "epoch": 0.5341731028564424, + "grad_norm": 1.6172762952936859, + "learning_rate": 9.379505240595293e-06, + "loss": 0.643, + "step": 17429 + }, + { + "epoch": 0.5342037513791835, + "grad_norm": 1.544774285405411, + "learning_rate": 9.378514520711795e-06, + "loss": 0.6896, + "step": 17430 + }, + { + "epoch": 0.5342343999019247, + "grad_norm": 1.4171930657352303, + "learning_rate": 9.377523806951947e-06, + "loss": 0.6135, + "step": 17431 + }, + { + "epoch": 0.5342650484246659, + "grad_norm": 1.797725843781496, + "learning_rate": 9.37653309932552e-06, + "loss": 0.6917, + "step": 17432 + }, + { + "epoch": 0.5342956969474071, + "grad_norm": 1.5630248802052162, + "learning_rate": 9.375542397842268e-06, + "loss": 0.6436, + "step": 17433 + }, + { + "epoch": 0.5343263454701483, + "grad_norm": 0.6753098900215576, + "learning_rate": 9.37455170251196e-06, + "loss": 0.5657, + "step": 17434 + }, + { + "epoch": 0.5343569939928895, + "grad_norm": 1.451153731029477, + "learning_rate": 9.373561013344355e-06, + "loss": 0.6805, + "step": 17435 + }, + { + "epoch": 0.5343876425156308, + "grad_norm": 0.691082870684396, + "learning_rate": 9.37257033034921e-06, + "loss": 0.6003, + "step": 17436 + }, + { + "epoch": 0.5344182910383719, + "grad_norm": 1.4748579930598817, + "learning_rate": 9.371579653536293e-06, + "loss": 0.7437, + "step": 17437 + }, + { + "epoch": 0.5344489395611132, + "grad_norm": 1.7084251712450305, + "learning_rate": 9.370588982915364e-06, + "loss": 0.8338, + "step": 17438 + }, + { + "epoch": 0.5344795880838543, + "grad_norm": 1.759594222804644, + "learning_rate": 9.369598318496176e-06, + "loss": 0.7249, + "step": 17439 + }, + { + "epoch": 0.5345102366065956, + "grad_norm": 1.6742134931031232, + "learning_rate": 9.368607660288504e-06, + "loss": 0.7414, + "step": 17440 + }, + { + "epoch": 0.5345408851293367, + "grad_norm": 1.6778279774893319, + "learning_rate": 9.367617008302102e-06, + "loss": 0.7395, + "step": 17441 + }, + { + "epoch": 0.534571533652078, + "grad_norm": 1.5222045707291403, + "learning_rate": 9.366626362546726e-06, + "loss": 0.6895, + "step": 17442 + }, + { + "epoch": 0.5346021821748191, + "grad_norm": 0.6583542894521935, + "learning_rate": 9.365635723032146e-06, + "loss": 0.5323, + "step": 17443 + }, + { + "epoch": 0.5346328306975604, + "grad_norm": 1.6357856283277963, + "learning_rate": 9.364645089768118e-06, + "loss": 0.79, + "step": 17444 + }, + { + "epoch": 0.5346634792203016, + "grad_norm": 1.745130717351641, + "learning_rate": 9.363654462764407e-06, + "loss": 0.7468, + "step": 17445 + }, + { + "epoch": 0.5346941277430428, + "grad_norm": 0.670885417311331, + "learning_rate": 9.36266384203077e-06, + "loss": 0.5623, + "step": 17446 + }, + { + "epoch": 0.534724776265784, + "grad_norm": 1.4806409920723864, + "learning_rate": 9.361673227576967e-06, + "loss": 0.5645, + "step": 17447 + }, + { + "epoch": 0.5347554247885252, + "grad_norm": 1.2823024500679774, + "learning_rate": 9.360682619412764e-06, + "loss": 0.5986, + "step": 17448 + }, + { + "epoch": 0.5347860733112664, + "grad_norm": 1.3696197494096383, + "learning_rate": 9.359692017547922e-06, + "loss": 0.6098, + "step": 17449 + }, + { + "epoch": 0.5348167218340076, + "grad_norm": 1.4760961662649201, + "learning_rate": 9.358701421992189e-06, + "loss": 0.7103, + "step": 17450 + }, + { + "epoch": 0.5348473703567488, + "grad_norm": 1.70198982696365, + "learning_rate": 9.357710832755344e-06, + "loss": 0.7919, + "step": 17451 + }, + { + "epoch": 0.53487801887949, + "grad_norm": 1.5286153078057265, + "learning_rate": 9.356720249847131e-06, + "loss": 0.7442, + "step": 17452 + }, + { + "epoch": 0.5349086674022312, + "grad_norm": 1.5184744676796254, + "learning_rate": 9.355729673277323e-06, + "loss": 0.7352, + "step": 17453 + }, + { + "epoch": 0.5349393159249725, + "grad_norm": 1.5040910718387392, + "learning_rate": 9.354739103055675e-06, + "loss": 0.7107, + "step": 17454 + }, + { + "epoch": 0.5349699644477136, + "grad_norm": 1.3613848401251092, + "learning_rate": 9.353748539191946e-06, + "loss": 0.7289, + "step": 17455 + }, + { + "epoch": 0.5350006129704549, + "grad_norm": 1.421183145800748, + "learning_rate": 9.352757981695899e-06, + "loss": 0.5896, + "step": 17456 + }, + { + "epoch": 0.535031261493196, + "grad_norm": 1.685159873426498, + "learning_rate": 9.351767430577295e-06, + "loss": 0.7282, + "step": 17457 + }, + { + "epoch": 0.5350619100159373, + "grad_norm": 1.7308923070590412, + "learning_rate": 9.350776885845889e-06, + "loss": 0.7259, + "step": 17458 + }, + { + "epoch": 0.5350925585386784, + "grad_norm": 1.5687841796195523, + "learning_rate": 9.349786347511446e-06, + "loss": 0.6322, + "step": 17459 + }, + { + "epoch": 0.5351232070614197, + "grad_norm": 1.401285820343926, + "learning_rate": 9.348795815583723e-06, + "loss": 0.6781, + "step": 17460 + }, + { + "epoch": 0.5351538555841608, + "grad_norm": 1.328159053502062, + "learning_rate": 9.347805290072483e-06, + "loss": 0.5863, + "step": 17461 + }, + { + "epoch": 0.535184504106902, + "grad_norm": 1.5974249216133223, + "learning_rate": 9.346814770987486e-06, + "loss": 0.7974, + "step": 17462 + }, + { + "epoch": 0.5352151526296433, + "grad_norm": 1.4914730622009484, + "learning_rate": 9.345824258338485e-06, + "loss": 0.6759, + "step": 17463 + }, + { + "epoch": 0.5352458011523844, + "grad_norm": 1.4853870347207596, + "learning_rate": 9.344833752135251e-06, + "loss": 0.6942, + "step": 17464 + }, + { + "epoch": 0.5352764496751257, + "grad_norm": 0.6820006092127561, + "learning_rate": 9.343843252387537e-06, + "loss": 0.5701, + "step": 17465 + }, + { + "epoch": 0.5353070981978668, + "grad_norm": 1.432363062483159, + "learning_rate": 9.342852759105098e-06, + "loss": 0.7008, + "step": 17466 + }, + { + "epoch": 0.5353377467206081, + "grad_norm": 0.6715017442768676, + "learning_rate": 9.341862272297703e-06, + "loss": 0.582, + "step": 17467 + }, + { + "epoch": 0.5353683952433492, + "grad_norm": 1.5981275811214168, + "learning_rate": 9.340871791975107e-06, + "loss": 0.6098, + "step": 17468 + }, + { + "epoch": 0.5353990437660905, + "grad_norm": 1.5749609754222191, + "learning_rate": 9.339881318147069e-06, + "loss": 0.7292, + "step": 17469 + }, + { + "epoch": 0.5354296922888316, + "grad_norm": 0.6763070766783648, + "learning_rate": 9.33889085082335e-06, + "loss": 0.5807, + "step": 17470 + }, + { + "epoch": 0.5354603408115729, + "grad_norm": 1.6311087063660157, + "learning_rate": 9.337900390013707e-06, + "loss": 0.6868, + "step": 17471 + }, + { + "epoch": 0.535490989334314, + "grad_norm": 1.4724257925291666, + "learning_rate": 9.336909935727903e-06, + "loss": 0.6944, + "step": 17472 + }, + { + "epoch": 0.5355216378570553, + "grad_norm": 1.4382382014482966, + "learning_rate": 9.335919487975696e-06, + "loss": 0.6986, + "step": 17473 + }, + { + "epoch": 0.5355522863797965, + "grad_norm": 1.813023876318392, + "learning_rate": 9.334929046766839e-06, + "loss": 0.6904, + "step": 17474 + }, + { + "epoch": 0.5355829349025377, + "grad_norm": 0.6847217316839219, + "learning_rate": 9.333938612111103e-06, + "loss": 0.5568, + "step": 17475 + }, + { + "epoch": 0.5356135834252789, + "grad_norm": 1.3186130802664058, + "learning_rate": 9.332948184018238e-06, + "loss": 0.6624, + "step": 17476 + }, + { + "epoch": 0.5356442319480201, + "grad_norm": 1.3909019625066152, + "learning_rate": 9.331957762498002e-06, + "loss": 0.6754, + "step": 17477 + }, + { + "epoch": 0.5356748804707613, + "grad_norm": 1.604151162694324, + "learning_rate": 9.33096734756016e-06, + "loss": 0.7767, + "step": 17478 + }, + { + "epoch": 0.5357055289935025, + "grad_norm": 1.3103140213964488, + "learning_rate": 9.329976939214465e-06, + "loss": 0.6303, + "step": 17479 + }, + { + "epoch": 0.5357361775162437, + "grad_norm": 1.507528804554905, + "learning_rate": 9.328986537470682e-06, + "loss": 0.7326, + "step": 17480 + }, + { + "epoch": 0.535766826038985, + "grad_norm": 1.4396267329918697, + "learning_rate": 9.327996142338566e-06, + "loss": 0.7035, + "step": 17481 + }, + { + "epoch": 0.5357974745617261, + "grad_norm": 1.3688254121655752, + "learning_rate": 9.327005753827874e-06, + "loss": 0.6367, + "step": 17482 + }, + { + "epoch": 0.5358281230844674, + "grad_norm": 1.5195292343148732, + "learning_rate": 9.32601537194837e-06, + "loss": 0.7321, + "step": 17483 + }, + { + "epoch": 0.5358587716072085, + "grad_norm": 1.3996074231662485, + "learning_rate": 9.325024996709809e-06, + "loss": 0.7115, + "step": 17484 + }, + { + "epoch": 0.5358894201299498, + "grad_norm": 1.6485275869746137, + "learning_rate": 9.324034628121945e-06, + "loss": 0.7774, + "step": 17485 + }, + { + "epoch": 0.5359200686526909, + "grad_norm": 1.4109296104974234, + "learning_rate": 9.323044266194543e-06, + "loss": 0.764, + "step": 17486 + }, + { + "epoch": 0.5359507171754322, + "grad_norm": 1.5718962716080973, + "learning_rate": 9.322053910937356e-06, + "loss": 0.7969, + "step": 17487 + }, + { + "epoch": 0.5359813656981733, + "grad_norm": 1.5077095354541739, + "learning_rate": 9.32106356236015e-06, + "loss": 0.7993, + "step": 17488 + }, + { + "epoch": 0.5360120142209146, + "grad_norm": 1.5487704024577873, + "learning_rate": 9.320073220472678e-06, + "loss": 0.601, + "step": 17489 + }, + { + "epoch": 0.5360426627436558, + "grad_norm": 1.5331304880211551, + "learning_rate": 9.319082885284695e-06, + "loss": 0.7121, + "step": 17490 + }, + { + "epoch": 0.536073311266397, + "grad_norm": 1.2586165852744977, + "learning_rate": 9.318092556805964e-06, + "loss": 0.6986, + "step": 17491 + }, + { + "epoch": 0.5361039597891382, + "grad_norm": 1.6828328045694587, + "learning_rate": 9.317102235046245e-06, + "loss": 0.7219, + "step": 17492 + }, + { + "epoch": 0.5361346083118793, + "grad_norm": 1.7718971740476612, + "learning_rate": 9.316111920015287e-06, + "loss": 0.6805, + "step": 17493 + }, + { + "epoch": 0.5361652568346206, + "grad_norm": 1.5344892276276176, + "learning_rate": 9.315121611722858e-06, + "loss": 0.7441, + "step": 17494 + }, + { + "epoch": 0.5361959053573617, + "grad_norm": 1.5253262590080907, + "learning_rate": 9.31413131017871e-06, + "loss": 0.7495, + "step": 17495 + }, + { + "epoch": 0.536226553880103, + "grad_norm": 1.6925580215684626, + "learning_rate": 9.313141015392599e-06, + "loss": 0.697, + "step": 17496 + }, + { + "epoch": 0.5362572024028441, + "grad_norm": 1.7602345039741991, + "learning_rate": 9.312150727374287e-06, + "loss": 0.786, + "step": 17497 + }, + { + "epoch": 0.5362878509255854, + "grad_norm": 1.4866046174015868, + "learning_rate": 9.311160446133528e-06, + "loss": 0.6189, + "step": 17498 + }, + { + "epoch": 0.5363184994483265, + "grad_norm": 1.6518311349549706, + "learning_rate": 9.310170171680084e-06, + "loss": 0.7136, + "step": 17499 + }, + { + "epoch": 0.5363491479710678, + "grad_norm": 1.4534248210317093, + "learning_rate": 9.309179904023709e-06, + "loss": 0.5814, + "step": 17500 + }, + { + "epoch": 0.536379796493809, + "grad_norm": 1.6118218151370471, + "learning_rate": 9.308189643174158e-06, + "loss": 0.8288, + "step": 17501 + }, + { + "epoch": 0.5364104450165502, + "grad_norm": 1.433477152434686, + "learning_rate": 9.307199389141197e-06, + "loss": 0.7017, + "step": 17502 + }, + { + "epoch": 0.5364410935392914, + "grad_norm": 1.3871023331009973, + "learning_rate": 9.306209141934577e-06, + "loss": 0.6729, + "step": 17503 + }, + { + "epoch": 0.5364717420620326, + "grad_norm": 1.448640040869704, + "learning_rate": 9.305218901564052e-06, + "loss": 0.7625, + "step": 17504 + }, + { + "epoch": 0.5365023905847738, + "grad_norm": 1.5159100181450353, + "learning_rate": 9.304228668039386e-06, + "loss": 0.6576, + "step": 17505 + }, + { + "epoch": 0.536533039107515, + "grad_norm": 1.389893906382828, + "learning_rate": 9.303238441370329e-06, + "loss": 0.7348, + "step": 17506 + }, + { + "epoch": 0.5365636876302562, + "grad_norm": 1.4172130849029725, + "learning_rate": 9.302248221566648e-06, + "loss": 0.6738, + "step": 17507 + }, + { + "epoch": 0.5365943361529975, + "grad_norm": 1.6028806021924673, + "learning_rate": 9.301258008638091e-06, + "loss": 0.6474, + "step": 17508 + }, + { + "epoch": 0.5366249846757386, + "grad_norm": 1.4824499888839062, + "learning_rate": 9.300267802594415e-06, + "loss": 0.6275, + "step": 17509 + }, + { + "epoch": 0.5366556331984799, + "grad_norm": 1.4378408873743016, + "learning_rate": 9.299277603445382e-06, + "loss": 0.6926, + "step": 17510 + }, + { + "epoch": 0.536686281721221, + "grad_norm": 1.573542853371192, + "learning_rate": 9.298287411200747e-06, + "loss": 0.617, + "step": 17511 + }, + { + "epoch": 0.5367169302439623, + "grad_norm": 1.5853824045717422, + "learning_rate": 9.297297225870261e-06, + "loss": 0.7579, + "step": 17512 + }, + { + "epoch": 0.5367475787667034, + "grad_norm": 1.4611464793478135, + "learning_rate": 9.296307047463691e-06, + "loss": 0.7156, + "step": 17513 + }, + { + "epoch": 0.5367782272894447, + "grad_norm": 1.4751083949431456, + "learning_rate": 9.295316875990787e-06, + "loss": 0.7071, + "step": 17514 + }, + { + "epoch": 0.5368088758121858, + "grad_norm": 1.5804773800033016, + "learning_rate": 9.294326711461302e-06, + "loss": 0.637, + "step": 17515 + }, + { + "epoch": 0.5368395243349271, + "grad_norm": 1.354737016128254, + "learning_rate": 9.293336553885e-06, + "loss": 0.5638, + "step": 17516 + }, + { + "epoch": 0.5368701728576682, + "grad_norm": 1.5993132783381852, + "learning_rate": 9.29234640327163e-06, + "loss": 0.726, + "step": 17517 + }, + { + "epoch": 0.5369008213804095, + "grad_norm": 1.4291638417906103, + "learning_rate": 9.291356259630957e-06, + "loss": 0.6395, + "step": 17518 + }, + { + "epoch": 0.5369314699031507, + "grad_norm": 1.478496526031984, + "learning_rate": 9.29036612297273e-06, + "loss": 0.6869, + "step": 17519 + }, + { + "epoch": 0.5369621184258919, + "grad_norm": 1.5826045726884543, + "learning_rate": 9.289375993306706e-06, + "loss": 0.7436, + "step": 17520 + }, + { + "epoch": 0.5369927669486331, + "grad_norm": 1.7094246298219091, + "learning_rate": 9.288385870642644e-06, + "loss": 0.6516, + "step": 17521 + }, + { + "epoch": 0.5370234154713743, + "grad_norm": 0.6847328800862479, + "learning_rate": 9.287395754990297e-06, + "loss": 0.5815, + "step": 17522 + }, + { + "epoch": 0.5370540639941155, + "grad_norm": 1.5074014165687397, + "learning_rate": 9.286405646359419e-06, + "loss": 0.7322, + "step": 17523 + }, + { + "epoch": 0.5370847125168566, + "grad_norm": 1.5251458620080875, + "learning_rate": 9.285415544759774e-06, + "loss": 0.7454, + "step": 17524 + }, + { + "epoch": 0.5371153610395979, + "grad_norm": 1.4484275104544064, + "learning_rate": 9.284425450201109e-06, + "loss": 0.768, + "step": 17525 + }, + { + "epoch": 0.537146009562339, + "grad_norm": 1.5386126743217694, + "learning_rate": 9.283435362693185e-06, + "loss": 0.7355, + "step": 17526 + }, + { + "epoch": 0.5371766580850803, + "grad_norm": 1.536977152774557, + "learning_rate": 9.282445282245756e-06, + "loss": 0.6946, + "step": 17527 + }, + { + "epoch": 0.5372073066078215, + "grad_norm": 1.5364455015544412, + "learning_rate": 9.281455208868577e-06, + "loss": 0.7683, + "step": 17528 + }, + { + "epoch": 0.5372379551305627, + "grad_norm": 1.4938200702727165, + "learning_rate": 9.280465142571403e-06, + "loss": 0.7496, + "step": 17529 + }, + { + "epoch": 0.5372686036533039, + "grad_norm": 1.5120713275341626, + "learning_rate": 9.27947508336399e-06, + "loss": 0.6662, + "step": 17530 + }, + { + "epoch": 0.5372992521760451, + "grad_norm": 0.6808674071719756, + "learning_rate": 9.278485031256092e-06, + "loss": 0.5812, + "step": 17531 + }, + { + "epoch": 0.5373299006987863, + "grad_norm": 1.5279064486473208, + "learning_rate": 9.277494986257467e-06, + "loss": 0.643, + "step": 17532 + }, + { + "epoch": 0.5373605492215275, + "grad_norm": 1.583059032475279, + "learning_rate": 9.276504948377869e-06, + "loss": 0.7467, + "step": 17533 + }, + { + "epoch": 0.5373911977442687, + "grad_norm": 1.581004935126492, + "learning_rate": 9.275514917627053e-06, + "loss": 0.648, + "step": 17534 + }, + { + "epoch": 0.53742184626701, + "grad_norm": 1.6388199629116715, + "learning_rate": 9.274524894014775e-06, + "loss": 0.7169, + "step": 17535 + }, + { + "epoch": 0.5374524947897511, + "grad_norm": 1.5242619979064553, + "learning_rate": 9.273534877550784e-06, + "loss": 0.6662, + "step": 17536 + }, + { + "epoch": 0.5374831433124924, + "grad_norm": 1.6772577857512667, + "learning_rate": 9.272544868244844e-06, + "loss": 0.7811, + "step": 17537 + }, + { + "epoch": 0.5375137918352335, + "grad_norm": 1.453585611596061, + "learning_rate": 9.271554866106707e-06, + "loss": 0.624, + "step": 17538 + }, + { + "epoch": 0.5375444403579748, + "grad_norm": 1.3388781007354655, + "learning_rate": 9.27056487114612e-06, + "loss": 0.6201, + "step": 17539 + }, + { + "epoch": 0.5375750888807159, + "grad_norm": 1.426959933901762, + "learning_rate": 9.26957488337285e-06, + "loss": 0.7096, + "step": 17540 + }, + { + "epoch": 0.5376057374034572, + "grad_norm": 1.5377900982229284, + "learning_rate": 9.268584902796644e-06, + "loss": 0.6785, + "step": 17541 + }, + { + "epoch": 0.5376363859261983, + "grad_norm": 0.6732951650591071, + "learning_rate": 9.267594929427257e-06, + "loss": 0.5508, + "step": 17542 + }, + { + "epoch": 0.5376670344489396, + "grad_norm": 1.3796025654894823, + "learning_rate": 9.266604963274444e-06, + "loss": 0.7277, + "step": 17543 + }, + { + "epoch": 0.5376976829716807, + "grad_norm": 1.8142204327523923, + "learning_rate": 9.26561500434796e-06, + "loss": 0.7993, + "step": 17544 + }, + { + "epoch": 0.537728331494422, + "grad_norm": 1.4003528716621148, + "learning_rate": 9.264625052657561e-06, + "loss": 0.6663, + "step": 17545 + }, + { + "epoch": 0.5377589800171632, + "grad_norm": 1.4897400047335545, + "learning_rate": 9.263635108213002e-06, + "loss": 0.7735, + "step": 17546 + }, + { + "epoch": 0.5377896285399044, + "grad_norm": 0.654905795788524, + "learning_rate": 9.262645171024027e-06, + "loss": 0.5619, + "step": 17547 + }, + { + "epoch": 0.5378202770626456, + "grad_norm": 1.4223426892717412, + "learning_rate": 9.261655241100405e-06, + "loss": 0.6396, + "step": 17548 + }, + { + "epoch": 0.5378509255853868, + "grad_norm": 1.318413238488459, + "learning_rate": 9.260665318451881e-06, + "loss": 0.527, + "step": 17549 + }, + { + "epoch": 0.537881574108128, + "grad_norm": 1.497119925347649, + "learning_rate": 9.259675403088208e-06, + "loss": 0.5906, + "step": 17550 + }, + { + "epoch": 0.5379122226308692, + "grad_norm": 1.56316759084549, + "learning_rate": 9.258685495019145e-06, + "loss": 0.7215, + "step": 17551 + }, + { + "epoch": 0.5379428711536104, + "grad_norm": 1.4116486488223967, + "learning_rate": 9.25769559425444e-06, + "loss": 0.7201, + "step": 17552 + }, + { + "epoch": 0.5379735196763517, + "grad_norm": 1.5602026159526934, + "learning_rate": 9.256705700803855e-06, + "loss": 0.6713, + "step": 17553 + }, + { + "epoch": 0.5380041681990928, + "grad_norm": 1.5133114462429842, + "learning_rate": 9.255715814677137e-06, + "loss": 0.673, + "step": 17554 + }, + { + "epoch": 0.538034816721834, + "grad_norm": 1.5908164867686592, + "learning_rate": 9.254725935884042e-06, + "loss": 0.7633, + "step": 17555 + }, + { + "epoch": 0.5380654652445752, + "grad_norm": 1.3569901154493416, + "learning_rate": 9.253736064434322e-06, + "loss": 0.6189, + "step": 17556 + }, + { + "epoch": 0.5380961137673164, + "grad_norm": 1.4374372677339935, + "learning_rate": 9.252746200337735e-06, + "loss": 0.6581, + "step": 17557 + }, + { + "epoch": 0.5381267622900576, + "grad_norm": 1.6880865876484967, + "learning_rate": 9.251756343604024e-06, + "loss": 0.7713, + "step": 17558 + }, + { + "epoch": 0.5381574108127988, + "grad_norm": 1.582341724161408, + "learning_rate": 9.250766494242957e-06, + "loss": 0.7828, + "step": 17559 + }, + { + "epoch": 0.53818805933554, + "grad_norm": 1.497005497513347, + "learning_rate": 9.249776652264272e-06, + "loss": 0.7635, + "step": 17560 + }, + { + "epoch": 0.5382187078582812, + "grad_norm": 1.5892298674082255, + "learning_rate": 9.248786817677737e-06, + "loss": 0.7086, + "step": 17561 + }, + { + "epoch": 0.5382493563810224, + "grad_norm": 1.6107449051439138, + "learning_rate": 9.247796990493094e-06, + "loss": 0.7471, + "step": 17562 + }, + { + "epoch": 0.5382800049037636, + "grad_norm": 0.6779219739435279, + "learning_rate": 9.246807170720097e-06, + "loss": 0.6023, + "step": 17563 + }, + { + "epoch": 0.5383106534265049, + "grad_norm": 1.4504224284134353, + "learning_rate": 9.245817358368505e-06, + "loss": 0.6722, + "step": 17564 + }, + { + "epoch": 0.538341301949246, + "grad_norm": 1.8007119570302599, + "learning_rate": 9.244827553448069e-06, + "loss": 0.6191, + "step": 17565 + }, + { + "epoch": 0.5383719504719873, + "grad_norm": 1.3691293321598852, + "learning_rate": 9.243837755968536e-06, + "loss": 0.7186, + "step": 17566 + }, + { + "epoch": 0.5384025989947284, + "grad_norm": 1.596547105697559, + "learning_rate": 9.242847965939665e-06, + "loss": 0.7067, + "step": 17567 + }, + { + "epoch": 0.5384332475174697, + "grad_norm": 1.3441439486477536, + "learning_rate": 9.24185818337121e-06, + "loss": 0.6922, + "step": 17568 + }, + { + "epoch": 0.5384638960402108, + "grad_norm": 1.7788675335152857, + "learning_rate": 9.240868408272914e-06, + "loss": 0.7422, + "step": 17569 + }, + { + "epoch": 0.5384945445629521, + "grad_norm": 1.6889322769914825, + "learning_rate": 9.239878640654541e-06, + "loss": 0.7586, + "step": 17570 + }, + { + "epoch": 0.5385251930856932, + "grad_norm": 1.6645814499160387, + "learning_rate": 9.238888880525836e-06, + "loss": 0.7507, + "step": 17571 + }, + { + "epoch": 0.5385558416084345, + "grad_norm": 1.431797197532269, + "learning_rate": 9.237899127896555e-06, + "loss": 0.8123, + "step": 17572 + }, + { + "epoch": 0.5385864901311757, + "grad_norm": 0.6639684395289712, + "learning_rate": 9.236909382776448e-06, + "loss": 0.5533, + "step": 17573 + }, + { + "epoch": 0.5386171386539169, + "grad_norm": 1.5545374236320724, + "learning_rate": 9.235919645175266e-06, + "loss": 0.7737, + "step": 17574 + }, + { + "epoch": 0.5386477871766581, + "grad_norm": 0.6815191986248595, + "learning_rate": 9.234929915102766e-06, + "loss": 0.5587, + "step": 17575 + }, + { + "epoch": 0.5386784356993993, + "grad_norm": 1.644620986968214, + "learning_rate": 9.233940192568696e-06, + "loss": 0.741, + "step": 17576 + }, + { + "epoch": 0.5387090842221405, + "grad_norm": 1.5526101568761526, + "learning_rate": 9.23295047758281e-06, + "loss": 0.7402, + "step": 17577 + }, + { + "epoch": 0.5387397327448817, + "grad_norm": 1.7543310971353416, + "learning_rate": 9.23196077015486e-06, + "loss": 0.7889, + "step": 17578 + }, + { + "epoch": 0.5387703812676229, + "grad_norm": 1.5332214451918655, + "learning_rate": 9.230971070294593e-06, + "loss": 0.7262, + "step": 17579 + }, + { + "epoch": 0.5388010297903641, + "grad_norm": 1.4330666951063884, + "learning_rate": 9.229981378011771e-06, + "loss": 0.6509, + "step": 17580 + }, + { + "epoch": 0.5388316783131053, + "grad_norm": 0.6503180801649161, + "learning_rate": 9.228991693316137e-06, + "loss": 0.5512, + "step": 17581 + }, + { + "epoch": 0.5388623268358466, + "grad_norm": 0.6885018265884122, + "learning_rate": 9.228002016217443e-06, + "loss": 0.58, + "step": 17582 + }, + { + "epoch": 0.5388929753585877, + "grad_norm": 1.6723056870162794, + "learning_rate": 9.227012346725444e-06, + "loss": 0.7506, + "step": 17583 + }, + { + "epoch": 0.538923623881329, + "grad_norm": 1.5063253507144654, + "learning_rate": 9.22602268484989e-06, + "loss": 0.6483, + "step": 17584 + }, + { + "epoch": 0.5389542724040701, + "grad_norm": 1.5800134136175827, + "learning_rate": 9.225033030600531e-06, + "loss": 0.7674, + "step": 17585 + }, + { + "epoch": 0.5389849209268113, + "grad_norm": 1.4419985184077881, + "learning_rate": 9.224043383987123e-06, + "loss": 0.7252, + "step": 17586 + }, + { + "epoch": 0.5390155694495525, + "grad_norm": 1.4478818447719788, + "learning_rate": 9.22305374501941e-06, + "loss": 0.7492, + "step": 17587 + }, + { + "epoch": 0.5390462179722937, + "grad_norm": 1.51255430247194, + "learning_rate": 9.22206411370715e-06, + "loss": 0.7488, + "step": 17588 + }, + { + "epoch": 0.539076866495035, + "grad_norm": 0.6244847296126251, + "learning_rate": 9.221074490060096e-06, + "loss": 0.5383, + "step": 17589 + }, + { + "epoch": 0.5391075150177761, + "grad_norm": 1.5494672636245397, + "learning_rate": 9.220084874087986e-06, + "loss": 0.7331, + "step": 17590 + }, + { + "epoch": 0.5391381635405174, + "grad_norm": 0.6699461668759679, + "learning_rate": 9.219095265800585e-06, + "loss": 0.575, + "step": 17591 + }, + { + "epoch": 0.5391688120632585, + "grad_norm": 1.8691292783636502, + "learning_rate": 9.218105665207636e-06, + "loss": 0.7862, + "step": 17592 + }, + { + "epoch": 0.5391994605859998, + "grad_norm": 1.488239590315606, + "learning_rate": 9.21711607231889e-06, + "loss": 0.6495, + "step": 17593 + }, + { + "epoch": 0.5392301091087409, + "grad_norm": 1.74794592098713, + "learning_rate": 9.216126487144102e-06, + "loss": 0.7665, + "step": 17594 + }, + { + "epoch": 0.5392607576314822, + "grad_norm": 1.2977156869450057, + "learning_rate": 9.21513690969302e-06, + "loss": 0.6898, + "step": 17595 + }, + { + "epoch": 0.5392914061542233, + "grad_norm": 1.5267605471505268, + "learning_rate": 9.214147339975395e-06, + "loss": 0.7412, + "step": 17596 + }, + { + "epoch": 0.5393220546769646, + "grad_norm": 0.6508168420935693, + "learning_rate": 9.213157778000978e-06, + "loss": 0.5586, + "step": 17597 + }, + { + "epoch": 0.5393527031997057, + "grad_norm": 1.4579284459788064, + "learning_rate": 9.212168223779515e-06, + "loss": 0.6647, + "step": 17598 + }, + { + "epoch": 0.539383351722447, + "grad_norm": 1.3414834117999297, + "learning_rate": 9.211178677320764e-06, + "loss": 0.577, + "step": 17599 + }, + { + "epoch": 0.5394140002451882, + "grad_norm": 1.4154852292378903, + "learning_rate": 9.210189138634472e-06, + "loss": 0.6343, + "step": 17600 + }, + { + "epoch": 0.5394446487679294, + "grad_norm": 1.7189580989346613, + "learning_rate": 9.209199607730384e-06, + "loss": 0.6601, + "step": 17601 + }, + { + "epoch": 0.5394752972906706, + "grad_norm": 1.5205558705630275, + "learning_rate": 9.20821008461826e-06, + "loss": 0.6963, + "step": 17602 + }, + { + "epoch": 0.5395059458134118, + "grad_norm": 0.6888082716874245, + "learning_rate": 9.207220569307842e-06, + "loss": 0.583, + "step": 17603 + }, + { + "epoch": 0.539536594336153, + "grad_norm": 1.757151521308909, + "learning_rate": 9.206231061808882e-06, + "loss": 0.7241, + "step": 17604 + }, + { + "epoch": 0.5395672428588942, + "grad_norm": 1.502641385039958, + "learning_rate": 9.20524156213113e-06, + "loss": 0.7361, + "step": 17605 + }, + { + "epoch": 0.5395978913816354, + "grad_norm": 0.6734504585487268, + "learning_rate": 9.204252070284336e-06, + "loss": 0.5712, + "step": 17606 + }, + { + "epoch": 0.5396285399043766, + "grad_norm": 1.6600001081760234, + "learning_rate": 9.203262586278252e-06, + "loss": 0.7323, + "step": 17607 + }, + { + "epoch": 0.5396591884271178, + "grad_norm": 1.462036565604159, + "learning_rate": 9.202273110122624e-06, + "loss": 0.6942, + "step": 17608 + }, + { + "epoch": 0.5396898369498591, + "grad_norm": 1.4424325010391312, + "learning_rate": 9.201283641827203e-06, + "loss": 0.6874, + "step": 17609 + }, + { + "epoch": 0.5397204854726002, + "grad_norm": 1.37748531872572, + "learning_rate": 9.20029418140174e-06, + "loss": 0.7176, + "step": 17610 + }, + { + "epoch": 0.5397511339953415, + "grad_norm": 1.2626181538885959, + "learning_rate": 9.199304728855986e-06, + "loss": 0.5797, + "step": 17611 + }, + { + "epoch": 0.5397817825180826, + "grad_norm": 1.427186596913172, + "learning_rate": 9.19831528419968e-06, + "loss": 0.6489, + "step": 17612 + }, + { + "epoch": 0.5398124310408239, + "grad_norm": 1.5795774474986386, + "learning_rate": 9.197325847442585e-06, + "loss": 0.6453, + "step": 17613 + }, + { + "epoch": 0.539843079563565, + "grad_norm": 1.4547546855270794, + "learning_rate": 9.196336418594437e-06, + "loss": 0.6348, + "step": 17614 + }, + { + "epoch": 0.5398737280863063, + "grad_norm": 1.5997467321843764, + "learning_rate": 9.195346997665e-06, + "loss": 0.7304, + "step": 17615 + }, + { + "epoch": 0.5399043766090474, + "grad_norm": 1.5463925357676658, + "learning_rate": 9.194357584664011e-06, + "loss": 0.7582, + "step": 17616 + }, + { + "epoch": 0.5399350251317886, + "grad_norm": 1.3664283327202822, + "learning_rate": 9.19336817960122e-06, + "loss": 0.7711, + "step": 17617 + }, + { + "epoch": 0.5399656736545299, + "grad_norm": 1.448830538962837, + "learning_rate": 9.192378782486384e-06, + "loss": 0.6765, + "step": 17618 + }, + { + "epoch": 0.539996322177271, + "grad_norm": 1.433623823787945, + "learning_rate": 9.191389393329244e-06, + "loss": 0.679, + "step": 17619 + }, + { + "epoch": 0.5400269707000123, + "grad_norm": 1.305646006292708, + "learning_rate": 9.19040001213955e-06, + "loss": 0.789, + "step": 17620 + }, + { + "epoch": 0.5400576192227534, + "grad_norm": 1.3815420593582288, + "learning_rate": 9.189410638927055e-06, + "loss": 0.7035, + "step": 17621 + }, + { + "epoch": 0.5400882677454947, + "grad_norm": 0.6833832645387339, + "learning_rate": 9.188421273701505e-06, + "loss": 0.5471, + "step": 17622 + }, + { + "epoch": 0.5401189162682358, + "grad_norm": 0.6843539617277528, + "learning_rate": 9.187431916472643e-06, + "loss": 0.5721, + "step": 17623 + }, + { + "epoch": 0.5401495647909771, + "grad_norm": 1.6501008886260364, + "learning_rate": 9.186442567250225e-06, + "loss": 0.8236, + "step": 17624 + }, + { + "epoch": 0.5401802133137182, + "grad_norm": 1.4842303984452265, + "learning_rate": 9.185453226043995e-06, + "loss": 0.6916, + "step": 17625 + }, + { + "epoch": 0.5402108618364595, + "grad_norm": 1.6002424467539547, + "learning_rate": 9.184463892863705e-06, + "loss": 0.7731, + "step": 17626 + }, + { + "epoch": 0.5402415103592006, + "grad_norm": 1.5860997635185323, + "learning_rate": 9.1834745677191e-06, + "loss": 0.7269, + "step": 17627 + }, + { + "epoch": 0.5402721588819419, + "grad_norm": 1.4062645367215132, + "learning_rate": 9.182485250619927e-06, + "loss": 0.7156, + "step": 17628 + }, + { + "epoch": 0.5403028074046831, + "grad_norm": 0.6985565570427996, + "learning_rate": 9.181495941575939e-06, + "loss": 0.5629, + "step": 17629 + }, + { + "epoch": 0.5403334559274243, + "grad_norm": 1.5693718473204539, + "learning_rate": 9.18050664059688e-06, + "loss": 0.7259, + "step": 17630 + }, + { + "epoch": 0.5403641044501655, + "grad_norm": 0.7059420563180463, + "learning_rate": 9.179517347692497e-06, + "loss": 0.5822, + "step": 17631 + }, + { + "epoch": 0.5403947529729067, + "grad_norm": 1.5518687264318558, + "learning_rate": 9.178528062872544e-06, + "loss": 0.6905, + "step": 17632 + }, + { + "epoch": 0.5404254014956479, + "grad_norm": 1.5245181725554338, + "learning_rate": 9.177538786146757e-06, + "loss": 0.6694, + "step": 17633 + }, + { + "epoch": 0.5404560500183891, + "grad_norm": 0.6435250259963143, + "learning_rate": 9.176549517524898e-06, + "loss": 0.5504, + "step": 17634 + }, + { + "epoch": 0.5404866985411303, + "grad_norm": 1.2565122966173246, + "learning_rate": 9.175560257016704e-06, + "loss": 0.6571, + "step": 17635 + }, + { + "epoch": 0.5405173470638716, + "grad_norm": 1.4812672498047663, + "learning_rate": 9.174571004631926e-06, + "loss": 0.6849, + "step": 17636 + }, + { + "epoch": 0.5405479955866127, + "grad_norm": 1.3024380363619832, + "learning_rate": 9.17358176038031e-06, + "loss": 0.6276, + "step": 17637 + }, + { + "epoch": 0.540578644109354, + "grad_norm": 1.3637423796428911, + "learning_rate": 9.172592524271608e-06, + "loss": 0.6214, + "step": 17638 + }, + { + "epoch": 0.5406092926320951, + "grad_norm": 1.5359013022323382, + "learning_rate": 9.17160329631556e-06, + "loss": 0.7508, + "step": 17639 + }, + { + "epoch": 0.5406399411548364, + "grad_norm": 0.6843080754989194, + "learning_rate": 9.170614076521917e-06, + "loss": 0.58, + "step": 17640 + }, + { + "epoch": 0.5406705896775775, + "grad_norm": 1.4616189582111843, + "learning_rate": 9.169624864900425e-06, + "loss": 0.713, + "step": 17641 + }, + { + "epoch": 0.5407012382003188, + "grad_norm": 1.4303385312807755, + "learning_rate": 9.168635661460834e-06, + "loss": 0.6653, + "step": 17642 + }, + { + "epoch": 0.5407318867230599, + "grad_norm": 1.6148312769704933, + "learning_rate": 9.16764646621289e-06, + "loss": 0.7478, + "step": 17643 + }, + { + "epoch": 0.5407625352458012, + "grad_norm": 1.4144243994988157, + "learning_rate": 9.166657279166332e-06, + "loss": 0.6312, + "step": 17644 + }, + { + "epoch": 0.5407931837685424, + "grad_norm": 1.6222483347635581, + "learning_rate": 9.16566810033092e-06, + "loss": 0.7212, + "step": 17645 + }, + { + "epoch": 0.5408238322912836, + "grad_norm": 1.427780444359441, + "learning_rate": 9.16467892971639e-06, + "loss": 0.6108, + "step": 17646 + }, + { + "epoch": 0.5408544808140248, + "grad_norm": 1.338055729655982, + "learning_rate": 9.163689767332492e-06, + "loss": 0.6785, + "step": 17647 + }, + { + "epoch": 0.5408851293367659, + "grad_norm": 1.446456692064376, + "learning_rate": 9.162700613188975e-06, + "loss": 0.6892, + "step": 17648 + }, + { + "epoch": 0.5409157778595072, + "grad_norm": 1.4187751634474113, + "learning_rate": 9.161711467295582e-06, + "loss": 0.6656, + "step": 17649 + }, + { + "epoch": 0.5409464263822483, + "grad_norm": 1.5172153242001551, + "learning_rate": 9.160722329662059e-06, + "loss": 0.7566, + "step": 17650 + }, + { + "epoch": 0.5409770749049896, + "grad_norm": 1.4409183406652712, + "learning_rate": 9.159733200298154e-06, + "loss": 0.7104, + "step": 17651 + }, + { + "epoch": 0.5410077234277307, + "grad_norm": 1.811658765458034, + "learning_rate": 9.158744079213613e-06, + "loss": 0.791, + "step": 17652 + }, + { + "epoch": 0.541038371950472, + "grad_norm": 1.5588938645103354, + "learning_rate": 9.157754966418182e-06, + "loss": 0.6672, + "step": 17653 + }, + { + "epoch": 0.5410690204732131, + "grad_norm": 1.6136686883811626, + "learning_rate": 9.15676586192161e-06, + "loss": 0.6562, + "step": 17654 + }, + { + "epoch": 0.5410996689959544, + "grad_norm": 1.6676822748494498, + "learning_rate": 9.155776765733633e-06, + "loss": 0.843, + "step": 17655 + }, + { + "epoch": 0.5411303175186956, + "grad_norm": 1.4888230084600016, + "learning_rate": 9.154787677864012e-06, + "loss": 0.7946, + "step": 17656 + }, + { + "epoch": 0.5411609660414368, + "grad_norm": 1.7403123373610998, + "learning_rate": 9.153798598322478e-06, + "loss": 0.6382, + "step": 17657 + }, + { + "epoch": 0.541191614564178, + "grad_norm": 1.4499771079790944, + "learning_rate": 9.152809527118784e-06, + "loss": 0.7056, + "step": 17658 + }, + { + "epoch": 0.5412222630869192, + "grad_norm": 1.5986772799268743, + "learning_rate": 9.151820464262676e-06, + "loss": 0.641, + "step": 17659 + }, + { + "epoch": 0.5412529116096604, + "grad_norm": 1.511466520657227, + "learning_rate": 9.150831409763895e-06, + "loss": 0.6159, + "step": 17660 + }, + { + "epoch": 0.5412835601324016, + "grad_norm": 1.430777775424582, + "learning_rate": 9.149842363632193e-06, + "loss": 0.6017, + "step": 17661 + }, + { + "epoch": 0.5413142086551428, + "grad_norm": 0.6876418386438347, + "learning_rate": 9.14885332587731e-06, + "loss": 0.5746, + "step": 17662 + }, + { + "epoch": 0.541344857177884, + "grad_norm": 0.6991505045865688, + "learning_rate": 9.147864296508992e-06, + "loss": 0.5745, + "step": 17663 + }, + { + "epoch": 0.5413755057006252, + "grad_norm": 1.5183659464616832, + "learning_rate": 9.146875275536986e-06, + "loss": 0.6409, + "step": 17664 + }, + { + "epoch": 0.5414061542233665, + "grad_norm": 1.3569464078339928, + "learning_rate": 9.14588626297104e-06, + "loss": 0.6076, + "step": 17665 + }, + { + "epoch": 0.5414368027461076, + "grad_norm": 0.6325959440838653, + "learning_rate": 9.144897258820888e-06, + "loss": 0.5543, + "step": 17666 + }, + { + "epoch": 0.5414674512688489, + "grad_norm": 1.541714405589309, + "learning_rate": 9.143908263096288e-06, + "loss": 0.6817, + "step": 17667 + }, + { + "epoch": 0.54149809979159, + "grad_norm": 1.5498578075844296, + "learning_rate": 9.142919275806977e-06, + "loss": 0.645, + "step": 17668 + }, + { + "epoch": 0.5415287483143313, + "grad_norm": 0.6648826257993525, + "learning_rate": 9.1419302969627e-06, + "loss": 0.5627, + "step": 17669 + }, + { + "epoch": 0.5415593968370724, + "grad_norm": 1.3842094407518362, + "learning_rate": 9.140941326573205e-06, + "loss": 0.6619, + "step": 17670 + }, + { + "epoch": 0.5415900453598137, + "grad_norm": 1.4140609481920146, + "learning_rate": 9.139952364648232e-06, + "loss": 0.7302, + "step": 17671 + }, + { + "epoch": 0.5416206938825548, + "grad_norm": 1.4499156433150269, + "learning_rate": 9.138963411197532e-06, + "loss": 0.7352, + "step": 17672 + }, + { + "epoch": 0.5416513424052961, + "grad_norm": 1.4808256359817094, + "learning_rate": 9.137974466230846e-06, + "loss": 0.7163, + "step": 17673 + }, + { + "epoch": 0.5416819909280373, + "grad_norm": 0.6741006338113689, + "learning_rate": 9.136985529757915e-06, + "loss": 0.5814, + "step": 17674 + }, + { + "epoch": 0.5417126394507785, + "grad_norm": 1.3867314027951343, + "learning_rate": 9.13599660178849e-06, + "loss": 0.5992, + "step": 17675 + }, + { + "epoch": 0.5417432879735197, + "grad_norm": 1.3522595426669684, + "learning_rate": 9.13500768233231e-06, + "loss": 0.6468, + "step": 17676 + }, + { + "epoch": 0.5417739364962609, + "grad_norm": 0.691451818831164, + "learning_rate": 9.13401877139912e-06, + "loss": 0.5886, + "step": 17677 + }, + { + "epoch": 0.5418045850190021, + "grad_norm": 1.5523196030429953, + "learning_rate": 9.133029868998666e-06, + "loss": 0.7176, + "step": 17678 + }, + { + "epoch": 0.5418352335417432, + "grad_norm": 0.6784296282831855, + "learning_rate": 9.132040975140688e-06, + "loss": 0.5779, + "step": 17679 + }, + { + "epoch": 0.5418658820644845, + "grad_norm": 0.6708741176597921, + "learning_rate": 9.131052089834934e-06, + "loss": 0.5809, + "step": 17680 + }, + { + "epoch": 0.5418965305872256, + "grad_norm": 1.3855680952110432, + "learning_rate": 9.130063213091148e-06, + "loss": 0.6641, + "step": 17681 + }, + { + "epoch": 0.5419271791099669, + "grad_norm": 1.5674214924513898, + "learning_rate": 9.129074344919066e-06, + "loss": 0.7177, + "step": 17682 + }, + { + "epoch": 0.5419578276327081, + "grad_norm": 1.60228545410641, + "learning_rate": 9.128085485328443e-06, + "loss": 0.6543, + "step": 17683 + }, + { + "epoch": 0.5419884761554493, + "grad_norm": 1.4873621512007211, + "learning_rate": 9.127096634329019e-06, + "loss": 0.7079, + "step": 17684 + }, + { + "epoch": 0.5420191246781905, + "grad_norm": 0.6416049909111243, + "learning_rate": 9.126107791930526e-06, + "loss": 0.5673, + "step": 17685 + }, + { + "epoch": 0.5420497732009317, + "grad_norm": 0.6538858798813517, + "learning_rate": 9.125118958142724e-06, + "loss": 0.5685, + "step": 17686 + }, + { + "epoch": 0.5420804217236729, + "grad_norm": 1.5407730539398923, + "learning_rate": 9.124130132975343e-06, + "loss": 0.7008, + "step": 17687 + }, + { + "epoch": 0.5421110702464141, + "grad_norm": 1.5123879467434165, + "learning_rate": 9.123141316438137e-06, + "loss": 0.6391, + "step": 17688 + }, + { + "epoch": 0.5421417187691553, + "grad_norm": 1.5898844313068976, + "learning_rate": 9.122152508540844e-06, + "loss": 0.8098, + "step": 17689 + }, + { + "epoch": 0.5421723672918966, + "grad_norm": 1.422910483750236, + "learning_rate": 9.121163709293203e-06, + "loss": 0.7776, + "step": 17690 + }, + { + "epoch": 0.5422030158146377, + "grad_norm": 0.6342973139364657, + "learning_rate": 9.120174918704964e-06, + "loss": 0.5616, + "step": 17691 + }, + { + "epoch": 0.542233664337379, + "grad_norm": 1.405409863788141, + "learning_rate": 9.119186136785866e-06, + "loss": 0.8069, + "step": 17692 + }, + { + "epoch": 0.5422643128601201, + "grad_norm": 0.642394055498334, + "learning_rate": 9.118197363545652e-06, + "loss": 0.575, + "step": 17693 + }, + { + "epoch": 0.5422949613828614, + "grad_norm": 1.5615669999428918, + "learning_rate": 9.117208598994063e-06, + "loss": 0.7214, + "step": 17694 + }, + { + "epoch": 0.5423256099056025, + "grad_norm": 1.3326138757397454, + "learning_rate": 9.11621984314085e-06, + "loss": 0.6403, + "step": 17695 + }, + { + "epoch": 0.5423562584283438, + "grad_norm": 1.423684456616128, + "learning_rate": 9.11523109599574e-06, + "loss": 0.683, + "step": 17696 + }, + { + "epoch": 0.5423869069510849, + "grad_norm": 1.3175633744731818, + "learning_rate": 9.114242357568492e-06, + "loss": 0.6697, + "step": 17697 + }, + { + "epoch": 0.5424175554738262, + "grad_norm": 1.6726023125980278, + "learning_rate": 9.113253627868835e-06, + "loss": 0.8409, + "step": 17698 + }, + { + "epoch": 0.5424482039965673, + "grad_norm": 1.533918257565086, + "learning_rate": 9.11226490690652e-06, + "loss": 0.725, + "step": 17699 + }, + { + "epoch": 0.5424788525193086, + "grad_norm": 1.7278555633973036, + "learning_rate": 9.111276194691288e-06, + "loss": 0.887, + "step": 17700 + }, + { + "epoch": 0.5425095010420498, + "grad_norm": 1.3213617695107507, + "learning_rate": 9.110287491232874e-06, + "loss": 0.6116, + "step": 17701 + }, + { + "epoch": 0.542540149564791, + "grad_norm": 1.3497573155722258, + "learning_rate": 9.10929879654103e-06, + "loss": 0.7931, + "step": 17702 + }, + { + "epoch": 0.5425707980875322, + "grad_norm": 1.4642380848684613, + "learning_rate": 9.108310110625489e-06, + "loss": 0.7198, + "step": 17703 + }, + { + "epoch": 0.5426014466102734, + "grad_norm": 0.6841565715254485, + "learning_rate": 9.107321433495996e-06, + "loss": 0.5652, + "step": 17704 + }, + { + "epoch": 0.5426320951330146, + "grad_norm": 1.4965584630588695, + "learning_rate": 9.106332765162297e-06, + "loss": 0.7628, + "step": 17705 + }, + { + "epoch": 0.5426627436557558, + "grad_norm": 1.4205109878824316, + "learning_rate": 9.105344105634127e-06, + "loss": 0.7184, + "step": 17706 + }, + { + "epoch": 0.542693392178497, + "grad_norm": 0.6822751871449648, + "learning_rate": 9.104355454921232e-06, + "loss": 0.5802, + "step": 17707 + }, + { + "epoch": 0.5427240407012383, + "grad_norm": 1.4962277598486293, + "learning_rate": 9.103366813033353e-06, + "loss": 0.7471, + "step": 17708 + }, + { + "epoch": 0.5427546892239794, + "grad_norm": 1.4830976342377291, + "learning_rate": 9.102378179980226e-06, + "loss": 0.6848, + "step": 17709 + }, + { + "epoch": 0.5427853377467206, + "grad_norm": 1.4959541593510715, + "learning_rate": 9.101389555771602e-06, + "loss": 0.6498, + "step": 17710 + }, + { + "epoch": 0.5428159862694618, + "grad_norm": 0.6870356945187251, + "learning_rate": 9.100400940417215e-06, + "loss": 0.5982, + "step": 17711 + }, + { + "epoch": 0.542846634792203, + "grad_norm": 1.479504326560945, + "learning_rate": 9.099412333926804e-06, + "loss": 0.7103, + "step": 17712 + }, + { + "epoch": 0.5428772833149442, + "grad_norm": 1.3006861195623796, + "learning_rate": 9.098423736310119e-06, + "loss": 0.7159, + "step": 17713 + }, + { + "epoch": 0.5429079318376854, + "grad_norm": 1.526584426569433, + "learning_rate": 9.09743514757689e-06, + "loss": 0.6449, + "step": 17714 + }, + { + "epoch": 0.5429385803604266, + "grad_norm": 1.4844289833926763, + "learning_rate": 9.096446567736866e-06, + "loss": 0.7314, + "step": 17715 + }, + { + "epoch": 0.5429692288831678, + "grad_norm": 1.392306883743097, + "learning_rate": 9.095457996799787e-06, + "loss": 0.7372, + "step": 17716 + }, + { + "epoch": 0.542999877405909, + "grad_norm": 1.4149803953313973, + "learning_rate": 9.094469434775387e-06, + "loss": 0.6186, + "step": 17717 + }, + { + "epoch": 0.5430305259286502, + "grad_norm": 1.421274162557077, + "learning_rate": 9.093480881673417e-06, + "loss": 0.6905, + "step": 17718 + }, + { + "epoch": 0.5430611744513915, + "grad_norm": 0.6631990646471567, + "learning_rate": 9.092492337503611e-06, + "loss": 0.571, + "step": 17719 + }, + { + "epoch": 0.5430918229741326, + "grad_norm": 0.6643893622515776, + "learning_rate": 9.091503802275707e-06, + "loss": 0.5833, + "step": 17720 + }, + { + "epoch": 0.5431224714968739, + "grad_norm": 1.5219136938329736, + "learning_rate": 9.090515275999452e-06, + "loss": 0.6359, + "step": 17721 + }, + { + "epoch": 0.543153120019615, + "grad_norm": 1.6823074413825074, + "learning_rate": 9.089526758684581e-06, + "loss": 0.6428, + "step": 17722 + }, + { + "epoch": 0.5431837685423563, + "grad_norm": 1.5528015405715374, + "learning_rate": 9.088538250340833e-06, + "loss": 0.6789, + "step": 17723 + }, + { + "epoch": 0.5432144170650974, + "grad_norm": 1.7333960843240346, + "learning_rate": 9.087549750977956e-06, + "loss": 0.6314, + "step": 17724 + }, + { + "epoch": 0.5432450655878387, + "grad_norm": 1.5596557885249218, + "learning_rate": 9.08656126060568e-06, + "loss": 0.7755, + "step": 17725 + }, + { + "epoch": 0.5432757141105798, + "grad_norm": 1.4684710136773524, + "learning_rate": 9.085572779233752e-06, + "loss": 0.6825, + "step": 17726 + }, + { + "epoch": 0.5433063626333211, + "grad_norm": 1.6795149453298555, + "learning_rate": 9.084584306871913e-06, + "loss": 0.6602, + "step": 17727 + }, + { + "epoch": 0.5433370111560623, + "grad_norm": 1.3554199530193238, + "learning_rate": 9.08359584352989e-06, + "loss": 0.7309, + "step": 17728 + }, + { + "epoch": 0.5433676596788035, + "grad_norm": 1.8015042717215495, + "learning_rate": 9.082607389217439e-06, + "loss": 0.8074, + "step": 17729 + }, + { + "epoch": 0.5433983082015447, + "grad_norm": 0.669720740105757, + "learning_rate": 9.081618943944291e-06, + "loss": 0.5689, + "step": 17730 + }, + { + "epoch": 0.5434289567242859, + "grad_norm": 1.484153626583734, + "learning_rate": 9.080630507720184e-06, + "loss": 0.7302, + "step": 17731 + }, + { + "epoch": 0.5434596052470271, + "grad_norm": 1.4325144593381491, + "learning_rate": 9.079642080554863e-06, + "loss": 0.6809, + "step": 17732 + }, + { + "epoch": 0.5434902537697683, + "grad_norm": 0.649207430292617, + "learning_rate": 9.078653662458061e-06, + "loss": 0.5127, + "step": 17733 + }, + { + "epoch": 0.5435209022925095, + "grad_norm": 1.6793548097745405, + "learning_rate": 9.07766525343952e-06, + "loss": 0.8734, + "step": 17734 + }, + { + "epoch": 0.5435515508152508, + "grad_norm": 1.6687188915151363, + "learning_rate": 9.076676853508982e-06, + "loss": 0.8495, + "step": 17735 + }, + { + "epoch": 0.5435821993379919, + "grad_norm": 1.5619508346362345, + "learning_rate": 9.075688462676181e-06, + "loss": 0.6131, + "step": 17736 + }, + { + "epoch": 0.5436128478607332, + "grad_norm": 0.6726486722037474, + "learning_rate": 9.07470008095086e-06, + "loss": 0.5509, + "step": 17737 + }, + { + "epoch": 0.5436434963834743, + "grad_norm": 1.675180877695481, + "learning_rate": 9.073711708342757e-06, + "loss": 0.842, + "step": 17738 + }, + { + "epoch": 0.5436741449062156, + "grad_norm": 1.5070396997884103, + "learning_rate": 9.072723344861604e-06, + "loss": 0.7061, + "step": 17739 + }, + { + "epoch": 0.5437047934289567, + "grad_norm": 1.382008771813685, + "learning_rate": 9.071734990517152e-06, + "loss": 0.7029, + "step": 17740 + }, + { + "epoch": 0.5437354419516979, + "grad_norm": 1.5166710585828855, + "learning_rate": 9.070746645319126e-06, + "loss": 0.6995, + "step": 17741 + }, + { + "epoch": 0.5437660904744391, + "grad_norm": 1.566005701104469, + "learning_rate": 9.069758309277275e-06, + "loss": 0.6818, + "step": 17742 + }, + { + "epoch": 0.5437967389971803, + "grad_norm": 1.5036667115496911, + "learning_rate": 9.068769982401334e-06, + "loss": 0.7484, + "step": 17743 + }, + { + "epoch": 0.5438273875199215, + "grad_norm": 1.5916893878769516, + "learning_rate": 9.067781664701038e-06, + "loss": 0.7671, + "step": 17744 + }, + { + "epoch": 0.5438580360426627, + "grad_norm": 1.4716917913406902, + "learning_rate": 9.06679335618613e-06, + "loss": 0.8285, + "step": 17745 + }, + { + "epoch": 0.543888684565404, + "grad_norm": 1.5908292311450576, + "learning_rate": 9.065805056866346e-06, + "loss": 0.6961, + "step": 17746 + }, + { + "epoch": 0.5439193330881451, + "grad_norm": 1.5148930314426166, + "learning_rate": 9.06481676675142e-06, + "loss": 0.8198, + "step": 17747 + }, + { + "epoch": 0.5439499816108864, + "grad_norm": 1.6031394340795084, + "learning_rate": 9.063828485851096e-06, + "loss": 0.7168, + "step": 17748 + }, + { + "epoch": 0.5439806301336275, + "grad_norm": 1.3629669356811587, + "learning_rate": 9.062840214175113e-06, + "loss": 0.68, + "step": 17749 + }, + { + "epoch": 0.5440112786563688, + "grad_norm": 1.5707004235918887, + "learning_rate": 9.061851951733199e-06, + "loss": 0.7127, + "step": 17750 + }, + { + "epoch": 0.5440419271791099, + "grad_norm": 1.5661555697213114, + "learning_rate": 9.060863698535104e-06, + "loss": 0.6378, + "step": 17751 + }, + { + "epoch": 0.5440725757018512, + "grad_norm": 0.7050988894515405, + "learning_rate": 9.059875454590553e-06, + "loss": 0.5722, + "step": 17752 + }, + { + "epoch": 0.5441032242245923, + "grad_norm": 0.696285536372195, + "learning_rate": 9.058887219909294e-06, + "loss": 0.5494, + "step": 17753 + }, + { + "epoch": 0.5441338727473336, + "grad_norm": 1.4225595154761845, + "learning_rate": 9.05789899450106e-06, + "loss": 0.7169, + "step": 17754 + }, + { + "epoch": 0.5441645212700748, + "grad_norm": 1.456704469542516, + "learning_rate": 9.056910778375584e-06, + "loss": 0.74, + "step": 17755 + }, + { + "epoch": 0.544195169792816, + "grad_norm": 1.4623995171096744, + "learning_rate": 9.055922571542612e-06, + "loss": 0.6891, + "step": 17756 + }, + { + "epoch": 0.5442258183155572, + "grad_norm": 1.4768045199189634, + "learning_rate": 9.054934374011875e-06, + "loss": 0.6709, + "step": 17757 + }, + { + "epoch": 0.5442564668382984, + "grad_norm": 1.7381999540505695, + "learning_rate": 9.053946185793112e-06, + "loss": 0.6752, + "step": 17758 + }, + { + "epoch": 0.5442871153610396, + "grad_norm": 1.6318037904385003, + "learning_rate": 9.052958006896057e-06, + "loss": 0.7595, + "step": 17759 + }, + { + "epoch": 0.5443177638837808, + "grad_norm": 1.3965834128600125, + "learning_rate": 9.05196983733045e-06, + "loss": 0.671, + "step": 17760 + }, + { + "epoch": 0.544348412406522, + "grad_norm": 1.4444455340083557, + "learning_rate": 9.050981677106027e-06, + "loss": 0.6872, + "step": 17761 + }, + { + "epoch": 0.5443790609292632, + "grad_norm": 1.3475487614976225, + "learning_rate": 9.04999352623253e-06, + "loss": 0.6797, + "step": 17762 + }, + { + "epoch": 0.5444097094520044, + "grad_norm": 1.5154124034464862, + "learning_rate": 9.049005384719679e-06, + "loss": 0.7153, + "step": 17763 + }, + { + "epoch": 0.5444403579747457, + "grad_norm": 1.5766671776081216, + "learning_rate": 9.048017252577231e-06, + "loss": 0.7304, + "step": 17764 + }, + { + "epoch": 0.5444710064974868, + "grad_norm": 1.594134237751191, + "learning_rate": 9.047029129814908e-06, + "loss": 0.6913, + "step": 17765 + }, + { + "epoch": 0.5445016550202281, + "grad_norm": 1.3849255814436447, + "learning_rate": 9.04604101644245e-06, + "loss": 0.7739, + "step": 17766 + }, + { + "epoch": 0.5445323035429692, + "grad_norm": 1.4479282803214546, + "learning_rate": 9.045052912469595e-06, + "loss": 0.6901, + "step": 17767 + }, + { + "epoch": 0.5445629520657105, + "grad_norm": 1.4629587429735111, + "learning_rate": 9.044064817906078e-06, + "loss": 0.6323, + "step": 17768 + }, + { + "epoch": 0.5445936005884516, + "grad_norm": 1.6882375350478154, + "learning_rate": 9.043076732761636e-06, + "loss": 0.801, + "step": 17769 + }, + { + "epoch": 0.5446242491111929, + "grad_norm": 1.4639351517137174, + "learning_rate": 9.042088657046002e-06, + "loss": 0.7176, + "step": 17770 + }, + { + "epoch": 0.544654897633934, + "grad_norm": 1.4494511102562908, + "learning_rate": 9.041100590768913e-06, + "loss": 0.7586, + "step": 17771 + }, + { + "epoch": 0.5446855461566752, + "grad_norm": 1.2991108753158926, + "learning_rate": 9.04011253394011e-06, + "loss": 0.7809, + "step": 17772 + }, + { + "epoch": 0.5447161946794165, + "grad_norm": 1.4863140151125915, + "learning_rate": 9.03912448656932e-06, + "loss": 0.7222, + "step": 17773 + }, + { + "epoch": 0.5447468432021576, + "grad_norm": 1.5337069960092944, + "learning_rate": 9.038136448666282e-06, + "loss": 0.7588, + "step": 17774 + }, + { + "epoch": 0.5447774917248989, + "grad_norm": 0.7194123044677897, + "learning_rate": 9.037148420240733e-06, + "loss": 0.5834, + "step": 17775 + }, + { + "epoch": 0.54480814024764, + "grad_norm": 1.7065308807043469, + "learning_rate": 9.036160401302407e-06, + "loss": 0.7637, + "step": 17776 + }, + { + "epoch": 0.5448387887703813, + "grad_norm": 1.3570501809202231, + "learning_rate": 9.035172391861038e-06, + "loss": 0.6084, + "step": 17777 + }, + { + "epoch": 0.5448694372931224, + "grad_norm": 1.350500604200895, + "learning_rate": 9.034184391926363e-06, + "loss": 0.5999, + "step": 17778 + }, + { + "epoch": 0.5449000858158637, + "grad_norm": 1.5661620051636027, + "learning_rate": 9.033196401508114e-06, + "loss": 0.7202, + "step": 17779 + }, + { + "epoch": 0.5449307343386048, + "grad_norm": 1.3918543625320556, + "learning_rate": 9.032208420616031e-06, + "loss": 0.7797, + "step": 17780 + }, + { + "epoch": 0.5449613828613461, + "grad_norm": 1.5275388406543025, + "learning_rate": 9.031220449259849e-06, + "loss": 0.7024, + "step": 17781 + }, + { + "epoch": 0.5449920313840872, + "grad_norm": 1.433254005817578, + "learning_rate": 9.030232487449292e-06, + "loss": 0.69, + "step": 17782 + }, + { + "epoch": 0.5450226799068285, + "grad_norm": 0.6677153484798488, + "learning_rate": 9.029244535194109e-06, + "loss": 0.5841, + "step": 17783 + }, + { + "epoch": 0.5450533284295697, + "grad_norm": 1.448632922334323, + "learning_rate": 9.028256592504027e-06, + "loss": 0.6786, + "step": 17784 + }, + { + "epoch": 0.5450839769523109, + "grad_norm": 1.6088221243065584, + "learning_rate": 9.027268659388778e-06, + "loss": 0.7587, + "step": 17785 + }, + { + "epoch": 0.5451146254750521, + "grad_norm": 1.439195070194275, + "learning_rate": 9.026280735858103e-06, + "loss": 0.627, + "step": 17786 + }, + { + "epoch": 0.5451452739977933, + "grad_norm": 1.3048451375860959, + "learning_rate": 9.02529282192173e-06, + "loss": 0.7027, + "step": 17787 + }, + { + "epoch": 0.5451759225205345, + "grad_norm": 1.3029694845902597, + "learning_rate": 9.0243049175894e-06, + "loss": 0.6186, + "step": 17788 + }, + { + "epoch": 0.5452065710432757, + "grad_norm": 1.4750318033090306, + "learning_rate": 9.02331702287084e-06, + "loss": 0.7195, + "step": 17789 + }, + { + "epoch": 0.5452372195660169, + "grad_norm": 0.706735229584846, + "learning_rate": 9.022329137775788e-06, + "loss": 0.5548, + "step": 17790 + }, + { + "epoch": 0.5452678680887582, + "grad_norm": 1.4992547407670633, + "learning_rate": 9.021341262313978e-06, + "loss": 0.5943, + "step": 17791 + }, + { + "epoch": 0.5452985166114993, + "grad_norm": 1.4836278521073116, + "learning_rate": 9.020353396495146e-06, + "loss": 0.7648, + "step": 17792 + }, + { + "epoch": 0.5453291651342406, + "grad_norm": 1.6077916197338964, + "learning_rate": 9.019365540329017e-06, + "loss": 0.7344, + "step": 17793 + }, + { + "epoch": 0.5453598136569817, + "grad_norm": 1.538009518907855, + "learning_rate": 9.018377693825335e-06, + "loss": 0.6528, + "step": 17794 + }, + { + "epoch": 0.545390462179723, + "grad_norm": 1.560042751090898, + "learning_rate": 9.017389856993822e-06, + "loss": 0.6686, + "step": 17795 + }, + { + "epoch": 0.5454211107024641, + "grad_norm": 1.5810270558634545, + "learning_rate": 9.016402029844226e-06, + "loss": 0.6227, + "step": 17796 + }, + { + "epoch": 0.5454517592252054, + "grad_norm": 1.568567706666832, + "learning_rate": 9.01541421238627e-06, + "loss": 0.7043, + "step": 17797 + }, + { + "epoch": 0.5454824077479465, + "grad_norm": 1.5652050618152331, + "learning_rate": 9.014426404629686e-06, + "loss": 0.6889, + "step": 17798 + }, + { + "epoch": 0.5455130562706878, + "grad_norm": 1.8072558625601913, + "learning_rate": 9.013438606584213e-06, + "loss": 0.785, + "step": 17799 + }, + { + "epoch": 0.545543704793429, + "grad_norm": 0.6675809098041192, + "learning_rate": 9.012450818259584e-06, + "loss": 0.5728, + "step": 17800 + }, + { + "epoch": 0.5455743533161702, + "grad_norm": 1.6823286981420513, + "learning_rate": 9.011463039665527e-06, + "loss": 0.7288, + "step": 17801 + }, + { + "epoch": 0.5456050018389114, + "grad_norm": 1.6496773997342629, + "learning_rate": 9.01047527081178e-06, + "loss": 0.7075, + "step": 17802 + }, + { + "epoch": 0.5456356503616525, + "grad_norm": 1.362448315373319, + "learning_rate": 9.009487511708074e-06, + "loss": 0.6535, + "step": 17803 + }, + { + "epoch": 0.5456662988843938, + "grad_norm": 1.439853423606339, + "learning_rate": 9.008499762364134e-06, + "loss": 0.6521, + "step": 17804 + }, + { + "epoch": 0.5456969474071349, + "grad_norm": 1.5674380513192838, + "learning_rate": 9.007512022789709e-06, + "loss": 0.7299, + "step": 17805 + }, + { + "epoch": 0.5457275959298762, + "grad_norm": 1.5295905546253115, + "learning_rate": 9.006524292994512e-06, + "loss": 0.6647, + "step": 17806 + }, + { + "epoch": 0.5457582444526173, + "grad_norm": 1.4903906320176608, + "learning_rate": 9.005536572988296e-06, + "loss": 0.6656, + "step": 17807 + }, + { + "epoch": 0.5457888929753586, + "grad_norm": 1.6274408229315198, + "learning_rate": 9.004548862780777e-06, + "loss": 0.7387, + "step": 17808 + }, + { + "epoch": 0.5458195414980997, + "grad_norm": 1.5106136048049086, + "learning_rate": 9.00356116238169e-06, + "loss": 0.778, + "step": 17809 + }, + { + "epoch": 0.545850190020841, + "grad_norm": 1.394795715176498, + "learning_rate": 9.002573471800776e-06, + "loss": 0.6316, + "step": 17810 + }, + { + "epoch": 0.5458808385435822, + "grad_norm": 1.507783547958544, + "learning_rate": 9.001585791047758e-06, + "loss": 0.6277, + "step": 17811 + }, + { + "epoch": 0.5459114870663234, + "grad_norm": 1.5761275696479582, + "learning_rate": 9.000598120132368e-06, + "loss": 0.6859, + "step": 17812 + }, + { + "epoch": 0.5459421355890646, + "grad_norm": 1.6288541198241484, + "learning_rate": 8.999610459064344e-06, + "loss": 0.7388, + "step": 17813 + }, + { + "epoch": 0.5459727841118058, + "grad_norm": 0.6719559263841359, + "learning_rate": 8.99862280785341e-06, + "loss": 0.5488, + "step": 17814 + }, + { + "epoch": 0.546003432634547, + "grad_norm": 1.5925486436167782, + "learning_rate": 8.997635166509307e-06, + "loss": 0.7132, + "step": 17815 + }, + { + "epoch": 0.5460340811572882, + "grad_norm": 0.6709557187597575, + "learning_rate": 8.996647535041761e-06, + "loss": 0.5719, + "step": 17816 + }, + { + "epoch": 0.5460647296800294, + "grad_norm": 1.3546006485859028, + "learning_rate": 8.9956599134605e-06, + "loss": 0.748, + "step": 17817 + }, + { + "epoch": 0.5460953782027707, + "grad_norm": 0.6820550995824674, + "learning_rate": 8.994672301775261e-06, + "loss": 0.5749, + "step": 17818 + }, + { + "epoch": 0.5461260267255118, + "grad_norm": 1.3321595405160833, + "learning_rate": 8.993684699995772e-06, + "loss": 0.6518, + "step": 17819 + }, + { + "epoch": 0.5461566752482531, + "grad_norm": 1.7106000627283044, + "learning_rate": 8.992697108131765e-06, + "loss": 0.6957, + "step": 17820 + }, + { + "epoch": 0.5461873237709942, + "grad_norm": 1.3546855322887164, + "learning_rate": 8.991709526192973e-06, + "loss": 0.651, + "step": 17821 + }, + { + "epoch": 0.5462179722937355, + "grad_norm": 1.4010345623980849, + "learning_rate": 8.990721954189121e-06, + "loss": 0.7827, + "step": 17822 + }, + { + "epoch": 0.5462486208164766, + "grad_norm": 1.5649798010083475, + "learning_rate": 8.989734392129947e-06, + "loss": 0.6722, + "step": 17823 + }, + { + "epoch": 0.5462792693392179, + "grad_norm": 1.6126142123969314, + "learning_rate": 8.98874684002518e-06, + "loss": 0.825, + "step": 17824 + }, + { + "epoch": 0.546309917861959, + "grad_norm": 1.4542508486336658, + "learning_rate": 8.987759297884544e-06, + "loss": 0.5747, + "step": 17825 + }, + { + "epoch": 0.5463405663847003, + "grad_norm": 0.6690604049660022, + "learning_rate": 8.98677176571778e-06, + "loss": 0.5406, + "step": 17826 + }, + { + "epoch": 0.5463712149074414, + "grad_norm": 1.6142265243729688, + "learning_rate": 8.985784243534611e-06, + "loss": 0.7067, + "step": 17827 + }, + { + "epoch": 0.5464018634301827, + "grad_norm": 1.69107543372973, + "learning_rate": 8.984796731344769e-06, + "loss": 0.6646, + "step": 17828 + }, + { + "epoch": 0.5464325119529239, + "grad_norm": 1.5827851729784594, + "learning_rate": 8.983809229157984e-06, + "loss": 0.7031, + "step": 17829 + }, + { + "epoch": 0.5464631604756651, + "grad_norm": 1.4826435424045021, + "learning_rate": 8.982821736983988e-06, + "loss": 0.6868, + "step": 17830 + }, + { + "epoch": 0.5464938089984063, + "grad_norm": 0.6548574739000047, + "learning_rate": 8.981834254832507e-06, + "loss": 0.5437, + "step": 17831 + }, + { + "epoch": 0.5465244575211475, + "grad_norm": 1.3806833127074676, + "learning_rate": 8.980846782713276e-06, + "loss": 0.7421, + "step": 17832 + }, + { + "epoch": 0.5465551060438887, + "grad_norm": 1.4240247661897203, + "learning_rate": 8.979859320636021e-06, + "loss": 0.7326, + "step": 17833 + }, + { + "epoch": 0.5465857545666298, + "grad_norm": 0.6583022118557664, + "learning_rate": 8.978871868610475e-06, + "loss": 0.5593, + "step": 17834 + }, + { + "epoch": 0.5466164030893711, + "grad_norm": 1.6966686317124704, + "learning_rate": 8.977884426646368e-06, + "loss": 0.7947, + "step": 17835 + }, + { + "epoch": 0.5466470516121122, + "grad_norm": 1.5152703665057243, + "learning_rate": 8.97689699475342e-06, + "loss": 0.7266, + "step": 17836 + }, + { + "epoch": 0.5466777001348535, + "grad_norm": 1.415695799719325, + "learning_rate": 8.975909572941374e-06, + "loss": 0.6393, + "step": 17837 + }, + { + "epoch": 0.5467083486575947, + "grad_norm": 1.7253423137672061, + "learning_rate": 8.974922161219951e-06, + "loss": 0.7187, + "step": 17838 + }, + { + "epoch": 0.5467389971803359, + "grad_norm": 1.5052907595175191, + "learning_rate": 8.973934759598881e-06, + "loss": 0.7098, + "step": 17839 + }, + { + "epoch": 0.5467696457030771, + "grad_norm": 1.4152031304526036, + "learning_rate": 8.972947368087897e-06, + "loss": 0.7312, + "step": 17840 + }, + { + "epoch": 0.5468002942258183, + "grad_norm": 1.756181002712126, + "learning_rate": 8.971959986696721e-06, + "loss": 0.8075, + "step": 17841 + }, + { + "epoch": 0.5468309427485595, + "grad_norm": 1.7327284369795555, + "learning_rate": 8.970972615435089e-06, + "loss": 0.6893, + "step": 17842 + }, + { + "epoch": 0.5468615912713007, + "grad_norm": 1.4767372895934892, + "learning_rate": 8.96998525431273e-06, + "loss": 0.7663, + "step": 17843 + }, + { + "epoch": 0.5468922397940419, + "grad_norm": 1.6255690928912654, + "learning_rate": 8.968997903339364e-06, + "loss": 0.7279, + "step": 17844 + }, + { + "epoch": 0.5469228883167832, + "grad_norm": 1.5383916871070866, + "learning_rate": 8.968010562524728e-06, + "loss": 0.67, + "step": 17845 + }, + { + "epoch": 0.5469535368395243, + "grad_norm": 1.4162398442566149, + "learning_rate": 8.967023231878553e-06, + "loss": 0.6173, + "step": 17846 + }, + { + "epoch": 0.5469841853622656, + "grad_norm": 1.8431739495912993, + "learning_rate": 8.966035911410554e-06, + "loss": 0.6968, + "step": 17847 + }, + { + "epoch": 0.5470148338850067, + "grad_norm": 0.7004461190191242, + "learning_rate": 8.965048601130473e-06, + "loss": 0.5552, + "step": 17848 + }, + { + "epoch": 0.547045482407748, + "grad_norm": 1.2310070721076563, + "learning_rate": 8.964061301048033e-06, + "loss": 0.6118, + "step": 17849 + }, + { + "epoch": 0.5470761309304891, + "grad_norm": 1.3986635146038078, + "learning_rate": 8.963074011172957e-06, + "loss": 0.715, + "step": 17850 + }, + { + "epoch": 0.5471067794532304, + "grad_norm": 0.6749842783656504, + "learning_rate": 8.962086731514984e-06, + "loss": 0.556, + "step": 17851 + }, + { + "epoch": 0.5471374279759715, + "grad_norm": 1.567281895764866, + "learning_rate": 8.96109946208383e-06, + "loss": 0.6359, + "step": 17852 + }, + { + "epoch": 0.5471680764987128, + "grad_norm": 0.6717760880326735, + "learning_rate": 8.960112202889235e-06, + "loss": 0.5641, + "step": 17853 + }, + { + "epoch": 0.547198725021454, + "grad_norm": 1.5340014906078738, + "learning_rate": 8.959124953940917e-06, + "loss": 0.7098, + "step": 17854 + }, + { + "epoch": 0.5472293735441952, + "grad_norm": 1.5732351375558427, + "learning_rate": 8.958137715248608e-06, + "loss": 0.6532, + "step": 17855 + }, + { + "epoch": 0.5472600220669364, + "grad_norm": 1.556049703175008, + "learning_rate": 8.957150486822033e-06, + "loss": 0.6554, + "step": 17856 + }, + { + "epoch": 0.5472906705896776, + "grad_norm": 1.8666504621186173, + "learning_rate": 8.956163268670927e-06, + "loss": 0.7297, + "step": 17857 + }, + { + "epoch": 0.5473213191124188, + "grad_norm": 1.4556256266407641, + "learning_rate": 8.955176060805003e-06, + "loss": 0.7056, + "step": 17858 + }, + { + "epoch": 0.54735196763516, + "grad_norm": 1.6285070058236069, + "learning_rate": 8.954188863234003e-06, + "loss": 0.6129, + "step": 17859 + }, + { + "epoch": 0.5473826161579012, + "grad_norm": 1.5079132137255524, + "learning_rate": 8.953201675967642e-06, + "loss": 0.6987, + "step": 17860 + }, + { + "epoch": 0.5474132646806424, + "grad_norm": 0.7030346990859792, + "learning_rate": 8.95221449901566e-06, + "loss": 0.5683, + "step": 17861 + }, + { + "epoch": 0.5474439132033836, + "grad_norm": 0.6787023349725504, + "learning_rate": 8.951227332387774e-06, + "loss": 0.5643, + "step": 17862 + }, + { + "epoch": 0.5474745617261249, + "grad_norm": 1.5076363652755782, + "learning_rate": 8.95024017609371e-06, + "loss": 0.7066, + "step": 17863 + }, + { + "epoch": 0.547505210248866, + "grad_norm": 1.425264182038557, + "learning_rate": 8.9492530301432e-06, + "loss": 0.6792, + "step": 17864 + }, + { + "epoch": 0.5475358587716072, + "grad_norm": 0.6288338954193764, + "learning_rate": 8.94826589454597e-06, + "loss": 0.5309, + "step": 17865 + }, + { + "epoch": 0.5475665072943484, + "grad_norm": 1.6752311726768923, + "learning_rate": 8.947278769311743e-06, + "loss": 0.8115, + "step": 17866 + }, + { + "epoch": 0.5475971558170896, + "grad_norm": 1.5636047028539344, + "learning_rate": 8.94629165445025e-06, + "loss": 0.7553, + "step": 17867 + }, + { + "epoch": 0.5476278043398308, + "grad_norm": 1.5289831864210555, + "learning_rate": 8.945304549971211e-06, + "loss": 0.7599, + "step": 17868 + }, + { + "epoch": 0.547658452862572, + "grad_norm": 1.5648201909061983, + "learning_rate": 8.944317455884362e-06, + "loss": 0.6054, + "step": 17869 + }, + { + "epoch": 0.5476891013853132, + "grad_norm": 1.5314545943064934, + "learning_rate": 8.943330372199421e-06, + "loss": 0.7717, + "step": 17870 + }, + { + "epoch": 0.5477197499080544, + "grad_norm": 1.473093199455989, + "learning_rate": 8.942343298926115e-06, + "loss": 0.6567, + "step": 17871 + }, + { + "epoch": 0.5477503984307956, + "grad_norm": 1.5795790237500955, + "learning_rate": 8.941356236074173e-06, + "loss": 0.7913, + "step": 17872 + }, + { + "epoch": 0.5477810469535368, + "grad_norm": 1.595319129335057, + "learning_rate": 8.940369183653316e-06, + "loss": 0.7303, + "step": 17873 + }, + { + "epoch": 0.5478116954762781, + "grad_norm": 1.6306160379418557, + "learning_rate": 8.939382141673274e-06, + "loss": 0.7222, + "step": 17874 + }, + { + "epoch": 0.5478423439990192, + "grad_norm": 1.764378740949484, + "learning_rate": 8.938395110143772e-06, + "loss": 0.8162, + "step": 17875 + }, + { + "epoch": 0.5478729925217605, + "grad_norm": 1.5414027396808352, + "learning_rate": 8.937408089074536e-06, + "loss": 0.7403, + "step": 17876 + }, + { + "epoch": 0.5479036410445016, + "grad_norm": 1.527705100038951, + "learning_rate": 8.936421078475284e-06, + "loss": 0.8049, + "step": 17877 + }, + { + "epoch": 0.5479342895672429, + "grad_norm": 1.4195826615333123, + "learning_rate": 8.935434078355754e-06, + "loss": 0.6133, + "step": 17878 + }, + { + "epoch": 0.547964938089984, + "grad_norm": 1.397347946894345, + "learning_rate": 8.934447088725657e-06, + "loss": 0.699, + "step": 17879 + }, + { + "epoch": 0.5479955866127253, + "grad_norm": 1.372558193175049, + "learning_rate": 8.933460109594732e-06, + "loss": 0.6924, + "step": 17880 + }, + { + "epoch": 0.5480262351354664, + "grad_norm": 1.3463721198701881, + "learning_rate": 8.932473140972695e-06, + "loss": 0.6533, + "step": 17881 + }, + { + "epoch": 0.5480568836582077, + "grad_norm": 1.3403286574097875, + "learning_rate": 8.931486182869273e-06, + "loss": 0.7091, + "step": 17882 + }, + { + "epoch": 0.5480875321809489, + "grad_norm": 1.5909641645256256, + "learning_rate": 8.930499235294192e-06, + "loss": 0.6898, + "step": 17883 + }, + { + "epoch": 0.5481181807036901, + "grad_norm": 0.757440576641692, + "learning_rate": 8.929512298257176e-06, + "loss": 0.5733, + "step": 17884 + }, + { + "epoch": 0.5481488292264313, + "grad_norm": 1.73838348227584, + "learning_rate": 8.928525371767947e-06, + "loss": 0.7332, + "step": 17885 + }, + { + "epoch": 0.5481794777491725, + "grad_norm": 1.616949189227796, + "learning_rate": 8.927538455836235e-06, + "loss": 0.7891, + "step": 17886 + }, + { + "epoch": 0.5482101262719137, + "grad_norm": 1.4522743410767696, + "learning_rate": 8.926551550471757e-06, + "loss": 0.6789, + "step": 17887 + }, + { + "epoch": 0.5482407747946549, + "grad_norm": 0.6736221131403133, + "learning_rate": 8.925564655684243e-06, + "loss": 0.6038, + "step": 17888 + }, + { + "epoch": 0.5482714233173961, + "grad_norm": 1.6701850832060328, + "learning_rate": 8.924577771483419e-06, + "loss": 0.7488, + "step": 17889 + }, + { + "epoch": 0.5483020718401374, + "grad_norm": 1.2700757406419558, + "learning_rate": 8.923590897878998e-06, + "loss": 0.5827, + "step": 17890 + }, + { + "epoch": 0.5483327203628785, + "grad_norm": 0.6677720213375988, + "learning_rate": 8.92260403488072e-06, + "loss": 0.5892, + "step": 17891 + }, + { + "epoch": 0.5483633688856198, + "grad_norm": 1.7574415047030112, + "learning_rate": 8.921617182498294e-06, + "loss": 0.7248, + "step": 17892 + }, + { + "epoch": 0.5483940174083609, + "grad_norm": 0.6531922494368191, + "learning_rate": 8.92063034074145e-06, + "loss": 0.5794, + "step": 17893 + }, + { + "epoch": 0.5484246659311022, + "grad_norm": 1.6342866385015307, + "learning_rate": 8.919643509619915e-06, + "loss": 0.7521, + "step": 17894 + }, + { + "epoch": 0.5484553144538433, + "grad_norm": 1.473873717032797, + "learning_rate": 8.918656689143403e-06, + "loss": 0.7871, + "step": 17895 + }, + { + "epoch": 0.5484859629765845, + "grad_norm": 1.4019984477589331, + "learning_rate": 8.917669879321648e-06, + "loss": 0.687, + "step": 17896 + }, + { + "epoch": 0.5485166114993257, + "grad_norm": 1.5953558684428715, + "learning_rate": 8.91668308016437e-06, + "loss": 0.7697, + "step": 17897 + }, + { + "epoch": 0.5485472600220669, + "grad_norm": 0.7112970050128348, + "learning_rate": 8.915696291681285e-06, + "loss": 0.5896, + "step": 17898 + }, + { + "epoch": 0.5485779085448081, + "grad_norm": 1.720672597713547, + "learning_rate": 8.914709513882127e-06, + "loss": 0.6937, + "step": 17899 + }, + { + "epoch": 0.5486085570675493, + "grad_norm": 1.4245640239951167, + "learning_rate": 8.913722746776614e-06, + "loss": 0.5993, + "step": 17900 + }, + { + "epoch": 0.5486392055902906, + "grad_norm": 1.5482983335520266, + "learning_rate": 8.912735990374462e-06, + "loss": 0.6527, + "step": 17901 + }, + { + "epoch": 0.5486698541130317, + "grad_norm": 1.576387766356038, + "learning_rate": 8.91174924468541e-06, + "loss": 0.7147, + "step": 17902 + }, + { + "epoch": 0.548700502635773, + "grad_norm": 1.5188607276221744, + "learning_rate": 8.910762509719166e-06, + "loss": 0.7523, + "step": 17903 + }, + { + "epoch": 0.5487311511585141, + "grad_norm": 1.4049693339659268, + "learning_rate": 8.909775785485455e-06, + "loss": 0.6775, + "step": 17904 + }, + { + "epoch": 0.5487617996812554, + "grad_norm": 1.4214439498174747, + "learning_rate": 8.908789071994008e-06, + "loss": 0.6748, + "step": 17905 + }, + { + "epoch": 0.5487924482039965, + "grad_norm": 1.6681734685473695, + "learning_rate": 8.907802369254537e-06, + "loss": 0.6565, + "step": 17906 + }, + { + "epoch": 0.5488230967267378, + "grad_norm": 1.530061883253603, + "learning_rate": 8.90681567727677e-06, + "loss": 0.7197, + "step": 17907 + }, + { + "epoch": 0.5488537452494789, + "grad_norm": 1.5060958203026897, + "learning_rate": 8.90582899607043e-06, + "loss": 0.7471, + "step": 17908 + }, + { + "epoch": 0.5488843937722202, + "grad_norm": 1.3865348568213962, + "learning_rate": 8.904842325645232e-06, + "loss": 0.6401, + "step": 17909 + }, + { + "epoch": 0.5489150422949614, + "grad_norm": 1.7517623803525642, + "learning_rate": 8.903855666010907e-06, + "loss": 0.6924, + "step": 17910 + }, + { + "epoch": 0.5489456908177026, + "grad_norm": 0.6779607773713847, + "learning_rate": 8.902869017177174e-06, + "loss": 0.5885, + "step": 17911 + }, + { + "epoch": 0.5489763393404438, + "grad_norm": 1.7326101584788467, + "learning_rate": 8.901882379153747e-06, + "loss": 0.7036, + "step": 17912 + }, + { + "epoch": 0.549006987863185, + "grad_norm": 1.5632164342771764, + "learning_rate": 8.900895751950361e-06, + "loss": 0.6249, + "step": 17913 + }, + { + "epoch": 0.5490376363859262, + "grad_norm": 1.3719892197631036, + "learning_rate": 8.899909135576726e-06, + "loss": 0.6084, + "step": 17914 + }, + { + "epoch": 0.5490682849086674, + "grad_norm": 1.4057241595227554, + "learning_rate": 8.898922530042568e-06, + "loss": 0.716, + "step": 17915 + }, + { + "epoch": 0.5490989334314086, + "grad_norm": 0.6718108790393528, + "learning_rate": 8.89793593535761e-06, + "loss": 0.5693, + "step": 17916 + }, + { + "epoch": 0.5491295819541498, + "grad_norm": 1.568508120193709, + "learning_rate": 8.896949351531567e-06, + "loss": 0.6802, + "step": 17917 + }, + { + "epoch": 0.549160230476891, + "grad_norm": 1.534800072249084, + "learning_rate": 8.89596277857417e-06, + "loss": 0.6242, + "step": 17918 + }, + { + "epoch": 0.5491908789996323, + "grad_norm": 1.5604117908976005, + "learning_rate": 8.894976216495131e-06, + "loss": 0.808, + "step": 17919 + }, + { + "epoch": 0.5492215275223734, + "grad_norm": 1.3603544411215422, + "learning_rate": 8.893989665304173e-06, + "loss": 0.6828, + "step": 17920 + }, + { + "epoch": 0.5492521760451147, + "grad_norm": 1.364926677270344, + "learning_rate": 8.893003125011022e-06, + "loss": 0.7322, + "step": 17921 + }, + { + "epoch": 0.5492828245678558, + "grad_norm": 1.57360861332483, + "learning_rate": 8.892016595625387e-06, + "loss": 0.7095, + "step": 17922 + }, + { + "epoch": 0.5493134730905971, + "grad_norm": 1.4095658789223513, + "learning_rate": 8.891030077157004e-06, + "loss": 0.6821, + "step": 17923 + }, + { + "epoch": 0.5493441216133382, + "grad_norm": 1.5240921890224637, + "learning_rate": 8.890043569615583e-06, + "loss": 0.727, + "step": 17924 + }, + { + "epoch": 0.5493747701360795, + "grad_norm": 1.4783456321312796, + "learning_rate": 8.889057073010845e-06, + "loss": 0.6456, + "step": 17925 + }, + { + "epoch": 0.5494054186588206, + "grad_norm": 1.3580514527439176, + "learning_rate": 8.888070587352514e-06, + "loss": 0.6857, + "step": 17926 + }, + { + "epoch": 0.5494360671815618, + "grad_norm": 1.4293141573295622, + "learning_rate": 8.887084112650306e-06, + "loss": 0.5931, + "step": 17927 + }, + { + "epoch": 0.549466715704303, + "grad_norm": 0.6661798322876888, + "learning_rate": 8.886097648913943e-06, + "loss": 0.5524, + "step": 17928 + }, + { + "epoch": 0.5494973642270442, + "grad_norm": 1.527489620712117, + "learning_rate": 8.885111196153146e-06, + "loss": 0.785, + "step": 17929 + }, + { + "epoch": 0.5495280127497855, + "grad_norm": 1.53395527182944, + "learning_rate": 8.884124754377635e-06, + "loss": 0.6646, + "step": 17930 + }, + { + "epoch": 0.5495586612725266, + "grad_norm": 1.410642603600151, + "learning_rate": 8.883138323597123e-06, + "loss": 0.7137, + "step": 17931 + }, + { + "epoch": 0.5495893097952679, + "grad_norm": 1.45262436685441, + "learning_rate": 8.882151903821342e-06, + "loss": 0.6572, + "step": 17932 + }, + { + "epoch": 0.549619958318009, + "grad_norm": 1.8405225628099227, + "learning_rate": 8.881165495059997e-06, + "loss": 0.8256, + "step": 17933 + }, + { + "epoch": 0.5496506068407503, + "grad_norm": 1.552279110832474, + "learning_rate": 8.880179097322821e-06, + "loss": 0.6779, + "step": 17934 + }, + { + "epoch": 0.5496812553634914, + "grad_norm": 1.4498165619044305, + "learning_rate": 8.879192710619525e-06, + "loss": 0.6989, + "step": 17935 + }, + { + "epoch": 0.5497119038862327, + "grad_norm": 0.6363289897178258, + "learning_rate": 8.878206334959827e-06, + "loss": 0.5564, + "step": 17936 + }, + { + "epoch": 0.5497425524089739, + "grad_norm": 1.6106086689091126, + "learning_rate": 8.877219970353452e-06, + "loss": 0.7758, + "step": 17937 + }, + { + "epoch": 0.5497732009317151, + "grad_norm": 1.4520586009876046, + "learning_rate": 8.876233616810116e-06, + "loss": 0.7322, + "step": 17938 + }, + { + "epoch": 0.5498038494544563, + "grad_norm": 1.6447233362425622, + "learning_rate": 8.875247274339536e-06, + "loss": 0.7243, + "step": 17939 + }, + { + "epoch": 0.5498344979771975, + "grad_norm": 1.523006961051186, + "learning_rate": 8.874260942951434e-06, + "loss": 0.662, + "step": 17940 + }, + { + "epoch": 0.5498651464999387, + "grad_norm": 1.8919169479053541, + "learning_rate": 8.873274622655523e-06, + "loss": 0.7378, + "step": 17941 + }, + { + "epoch": 0.5498957950226799, + "grad_norm": 1.4466024043008172, + "learning_rate": 8.87228831346153e-06, + "loss": 0.6294, + "step": 17942 + }, + { + "epoch": 0.5499264435454211, + "grad_norm": 1.446767143408429, + "learning_rate": 8.87130201537917e-06, + "loss": 0.7229, + "step": 17943 + }, + { + "epoch": 0.5499570920681623, + "grad_norm": 0.6555218278992495, + "learning_rate": 8.870315728418155e-06, + "loss": 0.5702, + "step": 17944 + }, + { + "epoch": 0.5499877405909035, + "grad_norm": 1.390948633801489, + "learning_rate": 8.869329452588212e-06, + "loss": 0.7641, + "step": 17945 + }, + { + "epoch": 0.5500183891136448, + "grad_norm": 0.6580838056529758, + "learning_rate": 8.868343187899054e-06, + "loss": 0.5956, + "step": 17946 + }, + { + "epoch": 0.5500490376363859, + "grad_norm": 1.5140346713653046, + "learning_rate": 8.8673569343604e-06, + "loss": 0.7211, + "step": 17947 + }, + { + "epoch": 0.5500796861591272, + "grad_norm": 1.5903361857393807, + "learning_rate": 8.866370691981969e-06, + "loss": 0.725, + "step": 17948 + }, + { + "epoch": 0.5501103346818683, + "grad_norm": 1.6676562393927086, + "learning_rate": 8.865384460773475e-06, + "loss": 0.6391, + "step": 17949 + }, + { + "epoch": 0.5501409832046096, + "grad_norm": 1.458790708551465, + "learning_rate": 8.864398240744638e-06, + "loss": 0.7063, + "step": 17950 + }, + { + "epoch": 0.5501716317273507, + "grad_norm": 1.4639199065121808, + "learning_rate": 8.863412031905178e-06, + "loss": 0.5963, + "step": 17951 + }, + { + "epoch": 0.550202280250092, + "grad_norm": 1.6249243021984296, + "learning_rate": 8.862425834264808e-06, + "loss": 0.7042, + "step": 17952 + }, + { + "epoch": 0.5502329287728331, + "grad_norm": 1.4362103275973366, + "learning_rate": 8.861439647833249e-06, + "loss": 0.6799, + "step": 17953 + }, + { + "epoch": 0.5502635772955744, + "grad_norm": 1.4491683892252643, + "learning_rate": 8.86045347262022e-06, + "loss": 0.7237, + "step": 17954 + }, + { + "epoch": 0.5502942258183156, + "grad_norm": 1.6452995983875978, + "learning_rate": 8.859467308635426e-06, + "loss": 0.6582, + "step": 17955 + }, + { + "epoch": 0.5503248743410568, + "grad_norm": 1.4522313056554559, + "learning_rate": 8.8584811558886e-06, + "loss": 0.6709, + "step": 17956 + }, + { + "epoch": 0.550355522863798, + "grad_norm": 0.6655805918854141, + "learning_rate": 8.85749501438945e-06, + "loss": 0.5739, + "step": 17957 + }, + { + "epoch": 0.5503861713865391, + "grad_norm": 1.5824566257902966, + "learning_rate": 8.85650888414769e-06, + "loss": 0.6823, + "step": 17958 + }, + { + "epoch": 0.5504168199092804, + "grad_norm": 1.9643954286555025, + "learning_rate": 8.855522765173044e-06, + "loss": 0.7543, + "step": 17959 + }, + { + "epoch": 0.5504474684320215, + "grad_norm": 0.651091354020095, + "learning_rate": 8.854536657475222e-06, + "loss": 0.5602, + "step": 17960 + }, + { + "epoch": 0.5504781169547628, + "grad_norm": 1.559959540324451, + "learning_rate": 8.853550561063946e-06, + "loss": 0.7517, + "step": 17961 + }, + { + "epoch": 0.5505087654775039, + "grad_norm": 0.6534716410459388, + "learning_rate": 8.85256447594893e-06, + "loss": 0.5666, + "step": 17962 + }, + { + "epoch": 0.5505394140002452, + "grad_norm": 1.3094072580122442, + "learning_rate": 8.851578402139886e-06, + "loss": 0.665, + "step": 17963 + }, + { + "epoch": 0.5505700625229863, + "grad_norm": 1.4604743191438165, + "learning_rate": 8.850592339646538e-06, + "loss": 0.6892, + "step": 17964 + }, + { + "epoch": 0.5506007110457276, + "grad_norm": 1.4833689791327416, + "learning_rate": 8.849606288478599e-06, + "loss": 0.6475, + "step": 17965 + }, + { + "epoch": 0.5506313595684688, + "grad_norm": 1.513122684670422, + "learning_rate": 8.84862024864578e-06, + "loss": 0.6715, + "step": 17966 + }, + { + "epoch": 0.55066200809121, + "grad_norm": 1.5625769568495473, + "learning_rate": 8.847634220157801e-06, + "loss": 0.6705, + "step": 17967 + }, + { + "epoch": 0.5506926566139512, + "grad_norm": 1.5109228986176697, + "learning_rate": 8.846648203024376e-06, + "loss": 0.6701, + "step": 17968 + }, + { + "epoch": 0.5507233051366924, + "grad_norm": 1.513016263968323, + "learning_rate": 8.845662197255222e-06, + "loss": 0.6512, + "step": 17969 + }, + { + "epoch": 0.5507539536594336, + "grad_norm": 1.3588805683412548, + "learning_rate": 8.844676202860057e-06, + "loss": 0.5848, + "step": 17970 + }, + { + "epoch": 0.5507846021821748, + "grad_norm": 1.7928905383327023, + "learning_rate": 8.843690219848588e-06, + "loss": 0.7091, + "step": 17971 + }, + { + "epoch": 0.550815250704916, + "grad_norm": 1.5051191375678685, + "learning_rate": 8.842704248230537e-06, + "loss": 0.7858, + "step": 17972 + }, + { + "epoch": 0.5508458992276573, + "grad_norm": 1.4044167701916115, + "learning_rate": 8.84171828801562e-06, + "loss": 0.7309, + "step": 17973 + }, + { + "epoch": 0.5508765477503984, + "grad_norm": 1.5124775886522381, + "learning_rate": 8.840732339213543e-06, + "loss": 0.6941, + "step": 17974 + }, + { + "epoch": 0.5509071962731397, + "grad_norm": 1.4335458065128064, + "learning_rate": 8.839746401834033e-06, + "loss": 0.7456, + "step": 17975 + }, + { + "epoch": 0.5509378447958808, + "grad_norm": 1.5802480676663215, + "learning_rate": 8.838760475886793e-06, + "loss": 0.7381, + "step": 17976 + }, + { + "epoch": 0.5509684933186221, + "grad_norm": 2.085938362681148, + "learning_rate": 8.837774561381548e-06, + "loss": 0.7264, + "step": 17977 + }, + { + "epoch": 0.5509991418413632, + "grad_norm": 1.4686705550235748, + "learning_rate": 8.836788658328007e-06, + "loss": 0.6993, + "step": 17978 + }, + { + "epoch": 0.5510297903641045, + "grad_norm": 1.3339480727956536, + "learning_rate": 8.835802766735882e-06, + "loss": 0.6639, + "step": 17979 + }, + { + "epoch": 0.5510604388868456, + "grad_norm": 1.443400443277159, + "learning_rate": 8.834816886614893e-06, + "loss": 0.5855, + "step": 17980 + }, + { + "epoch": 0.5510910874095869, + "grad_norm": 1.5865444888711349, + "learning_rate": 8.83383101797475e-06, + "loss": 0.7739, + "step": 17981 + }, + { + "epoch": 0.551121735932328, + "grad_norm": 1.336024856575836, + "learning_rate": 8.832845160825168e-06, + "loss": 0.6768, + "step": 17982 + }, + { + "epoch": 0.5511523844550693, + "grad_norm": 1.3924405239449615, + "learning_rate": 8.831859315175861e-06, + "loss": 0.672, + "step": 17983 + }, + { + "epoch": 0.5511830329778105, + "grad_norm": 1.3542467578586779, + "learning_rate": 8.830873481036546e-06, + "loss": 0.6693, + "step": 17984 + }, + { + "epoch": 0.5512136815005517, + "grad_norm": 1.6881353556383991, + "learning_rate": 8.829887658416929e-06, + "loss": 0.7786, + "step": 17985 + }, + { + "epoch": 0.5512443300232929, + "grad_norm": 1.4967873072827178, + "learning_rate": 8.828901847326734e-06, + "loss": 0.7992, + "step": 17986 + }, + { + "epoch": 0.5512749785460341, + "grad_norm": 1.3843733835969083, + "learning_rate": 8.827916047775661e-06, + "loss": 0.7002, + "step": 17987 + }, + { + "epoch": 0.5513056270687753, + "grad_norm": 1.4180727094360106, + "learning_rate": 8.826930259773438e-06, + "loss": 0.7423, + "step": 17988 + }, + { + "epoch": 0.5513362755915164, + "grad_norm": 1.4065292859775318, + "learning_rate": 8.82594448332977e-06, + "loss": 0.7324, + "step": 17989 + }, + { + "epoch": 0.5513669241142577, + "grad_norm": 1.5826088232518492, + "learning_rate": 8.82495871845437e-06, + "loss": 0.6823, + "step": 17990 + }, + { + "epoch": 0.5513975726369988, + "grad_norm": 0.6863895368943328, + "learning_rate": 8.823972965156952e-06, + "loss": 0.5392, + "step": 17991 + }, + { + "epoch": 0.5514282211597401, + "grad_norm": 1.3370278619427696, + "learning_rate": 8.822987223447232e-06, + "loss": 0.6874, + "step": 17992 + }, + { + "epoch": 0.5514588696824813, + "grad_norm": 1.4514811580367648, + "learning_rate": 8.822001493334915e-06, + "loss": 0.7301, + "step": 17993 + }, + { + "epoch": 0.5514895182052225, + "grad_norm": 1.5823281356756482, + "learning_rate": 8.821015774829723e-06, + "loss": 0.6793, + "step": 17994 + }, + { + "epoch": 0.5515201667279637, + "grad_norm": 0.6718094921565694, + "learning_rate": 8.820030067941362e-06, + "loss": 0.5774, + "step": 17995 + }, + { + "epoch": 0.5515508152507049, + "grad_norm": 1.442493687162219, + "learning_rate": 8.819044372679548e-06, + "loss": 0.7507, + "step": 17996 + }, + { + "epoch": 0.5515814637734461, + "grad_norm": 1.5244471966622952, + "learning_rate": 8.818058689053994e-06, + "loss": 0.7002, + "step": 17997 + }, + { + "epoch": 0.5516121122961873, + "grad_norm": 0.6467446413003636, + "learning_rate": 8.817073017074404e-06, + "loss": 0.5343, + "step": 17998 + }, + { + "epoch": 0.5516427608189285, + "grad_norm": 0.6571355687149243, + "learning_rate": 8.816087356750502e-06, + "loss": 0.5673, + "step": 17999 + }, + { + "epoch": 0.5516734093416698, + "grad_norm": 1.497981025658501, + "learning_rate": 8.815101708091992e-06, + "loss": 0.6367, + "step": 18000 + }, + { + "epoch": 0.5517040578644109, + "grad_norm": 1.4763742470775403, + "learning_rate": 8.814116071108588e-06, + "loss": 0.7325, + "step": 18001 + }, + { + "epoch": 0.5517347063871522, + "grad_norm": 1.5849049118548737, + "learning_rate": 8.813130445810004e-06, + "loss": 0.7092, + "step": 18002 + }, + { + "epoch": 0.5517653549098933, + "grad_norm": 1.5889432336523355, + "learning_rate": 8.812144832205947e-06, + "loss": 0.7368, + "step": 18003 + }, + { + "epoch": 0.5517960034326346, + "grad_norm": 1.714304297718346, + "learning_rate": 8.81115923030613e-06, + "loss": 0.7532, + "step": 18004 + }, + { + "epoch": 0.5518266519553757, + "grad_norm": 1.538508379425004, + "learning_rate": 8.810173640120266e-06, + "loss": 0.7544, + "step": 18005 + }, + { + "epoch": 0.551857300478117, + "grad_norm": 1.6114183498209194, + "learning_rate": 8.809188061658065e-06, + "loss": 0.6201, + "step": 18006 + }, + { + "epoch": 0.5518879490008581, + "grad_norm": 1.5524732676468658, + "learning_rate": 8.80820249492924e-06, + "loss": 0.8443, + "step": 18007 + }, + { + "epoch": 0.5519185975235994, + "grad_norm": 1.7061531024238095, + "learning_rate": 8.807216939943503e-06, + "loss": 0.6606, + "step": 18008 + }, + { + "epoch": 0.5519492460463405, + "grad_norm": 0.6480315862802689, + "learning_rate": 8.80623139671056e-06, + "loss": 0.5557, + "step": 18009 + }, + { + "epoch": 0.5519798945690818, + "grad_norm": 0.6691293428186216, + "learning_rate": 8.805245865240125e-06, + "loss": 0.5798, + "step": 18010 + }, + { + "epoch": 0.552010543091823, + "grad_norm": 1.5352895453434208, + "learning_rate": 8.804260345541909e-06, + "loss": 0.6134, + "step": 18011 + }, + { + "epoch": 0.5520411916145642, + "grad_norm": 1.4460625617467349, + "learning_rate": 8.803274837625618e-06, + "loss": 0.6983, + "step": 18012 + }, + { + "epoch": 0.5520718401373054, + "grad_norm": 1.5499867014289124, + "learning_rate": 8.80228934150097e-06, + "loss": 0.7867, + "step": 18013 + }, + { + "epoch": 0.5521024886600466, + "grad_norm": 0.6498361420089719, + "learning_rate": 8.80130385717767e-06, + "loss": 0.5532, + "step": 18014 + }, + { + "epoch": 0.5521331371827878, + "grad_norm": 1.6844953212934848, + "learning_rate": 8.800318384665429e-06, + "loss": 0.7931, + "step": 18015 + }, + { + "epoch": 0.552163785705529, + "grad_norm": 0.6791393899588559, + "learning_rate": 8.799332923973964e-06, + "loss": 0.5868, + "step": 18016 + }, + { + "epoch": 0.5521944342282702, + "grad_norm": 1.5618408440901168, + "learning_rate": 8.79834747511297e-06, + "loss": 0.683, + "step": 18017 + }, + { + "epoch": 0.5522250827510115, + "grad_norm": 1.609116674432833, + "learning_rate": 8.797362038092172e-06, + "loss": 0.7868, + "step": 18018 + }, + { + "epoch": 0.5522557312737526, + "grad_norm": 1.4241872592757934, + "learning_rate": 8.79637661292127e-06, + "loss": 0.7291, + "step": 18019 + }, + { + "epoch": 0.5522863797964938, + "grad_norm": 1.4045918363167924, + "learning_rate": 8.79539119960998e-06, + "loss": 0.6149, + "step": 18020 + }, + { + "epoch": 0.552317028319235, + "grad_norm": 1.3719799243136048, + "learning_rate": 8.794405798168007e-06, + "loss": 0.6496, + "step": 18021 + }, + { + "epoch": 0.5523476768419762, + "grad_norm": 1.4976465865601196, + "learning_rate": 8.793420408605061e-06, + "loss": 0.6988, + "step": 18022 + }, + { + "epoch": 0.5523783253647174, + "grad_norm": 1.7035847775236084, + "learning_rate": 8.792435030930853e-06, + "loss": 0.697, + "step": 18023 + }, + { + "epoch": 0.5524089738874586, + "grad_norm": 1.5436390931170294, + "learning_rate": 8.791449665155095e-06, + "loss": 0.735, + "step": 18024 + }, + { + "epoch": 0.5524396224101998, + "grad_norm": 0.6864032525759286, + "learning_rate": 8.790464311287488e-06, + "loss": 0.5417, + "step": 18025 + }, + { + "epoch": 0.552470270932941, + "grad_norm": 1.5153097558611226, + "learning_rate": 8.789478969337748e-06, + "loss": 0.6258, + "step": 18026 + }, + { + "epoch": 0.5525009194556822, + "grad_norm": 0.6685311225592404, + "learning_rate": 8.788493639315584e-06, + "loss": 0.5559, + "step": 18027 + }, + { + "epoch": 0.5525315679784234, + "grad_norm": 1.5694793327066594, + "learning_rate": 8.787508321230696e-06, + "loss": 0.7397, + "step": 18028 + }, + { + "epoch": 0.5525622165011647, + "grad_norm": 1.4335636220648915, + "learning_rate": 8.786523015092805e-06, + "loss": 0.7193, + "step": 18029 + }, + { + "epoch": 0.5525928650239058, + "grad_norm": 0.666846060739267, + "learning_rate": 8.78553772091161e-06, + "loss": 0.5476, + "step": 18030 + }, + { + "epoch": 0.5526235135466471, + "grad_norm": 1.3246087513853628, + "learning_rate": 8.784552438696821e-06, + "loss": 0.7143, + "step": 18031 + }, + { + "epoch": 0.5526541620693882, + "grad_norm": 1.4609829571843431, + "learning_rate": 8.783567168458151e-06, + "loss": 0.7011, + "step": 18032 + }, + { + "epoch": 0.5526848105921295, + "grad_norm": 1.395194964045527, + "learning_rate": 8.782581910205302e-06, + "loss": 0.6623, + "step": 18033 + }, + { + "epoch": 0.5527154591148706, + "grad_norm": 0.6476079794879598, + "learning_rate": 8.781596663947988e-06, + "loss": 0.5385, + "step": 18034 + }, + { + "epoch": 0.5527461076376119, + "grad_norm": 1.5509720513573524, + "learning_rate": 8.780611429695911e-06, + "loss": 0.6776, + "step": 18035 + }, + { + "epoch": 0.552776756160353, + "grad_norm": 1.5874618752701108, + "learning_rate": 8.779626207458783e-06, + "loss": 0.7543, + "step": 18036 + }, + { + "epoch": 0.5528074046830943, + "grad_norm": 1.5471431572879002, + "learning_rate": 8.778640997246311e-06, + "loss": 0.5813, + "step": 18037 + }, + { + "epoch": 0.5528380532058355, + "grad_norm": 1.5606288423195835, + "learning_rate": 8.777655799068203e-06, + "loss": 0.71, + "step": 18038 + }, + { + "epoch": 0.5528687017285767, + "grad_norm": 1.5250589185614258, + "learning_rate": 8.776670612934159e-06, + "loss": 0.628, + "step": 18039 + }, + { + "epoch": 0.5528993502513179, + "grad_norm": 1.4696219567976287, + "learning_rate": 8.775685438853901e-06, + "loss": 0.797, + "step": 18040 + }, + { + "epoch": 0.5529299987740591, + "grad_norm": 1.5505076987356232, + "learning_rate": 8.774700276837117e-06, + "loss": 0.6645, + "step": 18041 + }, + { + "epoch": 0.5529606472968003, + "grad_norm": 1.546452951851921, + "learning_rate": 8.773715126893535e-06, + "loss": 0.7701, + "step": 18042 + }, + { + "epoch": 0.5529912958195415, + "grad_norm": 0.6926377630962454, + "learning_rate": 8.772729989032848e-06, + "loss": 0.5772, + "step": 18043 + }, + { + "epoch": 0.5530219443422827, + "grad_norm": 0.6898877608381421, + "learning_rate": 8.771744863264765e-06, + "loss": 0.5676, + "step": 18044 + }, + { + "epoch": 0.553052592865024, + "grad_norm": 1.4976304157413012, + "learning_rate": 8.770759749598995e-06, + "loss": 0.7125, + "step": 18045 + }, + { + "epoch": 0.5530832413877651, + "grad_norm": 1.4595337523625054, + "learning_rate": 8.769774648045244e-06, + "loss": 0.6649, + "step": 18046 + }, + { + "epoch": 0.5531138899105064, + "grad_norm": 0.6428773243553926, + "learning_rate": 8.768789558613217e-06, + "loss": 0.5551, + "step": 18047 + }, + { + "epoch": 0.5531445384332475, + "grad_norm": 1.4470595964534894, + "learning_rate": 8.767804481312624e-06, + "loss": 0.6547, + "step": 18048 + }, + { + "epoch": 0.5531751869559888, + "grad_norm": 1.5759355349432471, + "learning_rate": 8.766819416153165e-06, + "loss": 0.7051, + "step": 18049 + }, + { + "epoch": 0.5532058354787299, + "grad_norm": 0.6416210514378818, + "learning_rate": 8.765834363144552e-06, + "loss": 0.5396, + "step": 18050 + }, + { + "epoch": 0.5532364840014711, + "grad_norm": 1.6297256181861357, + "learning_rate": 8.76484932229649e-06, + "loss": 0.6927, + "step": 18051 + }, + { + "epoch": 0.5532671325242123, + "grad_norm": 1.4004402562762934, + "learning_rate": 8.76386429361868e-06, + "loss": 0.6579, + "step": 18052 + }, + { + "epoch": 0.5532977810469535, + "grad_norm": 1.4304330117496837, + "learning_rate": 8.762879277120837e-06, + "loss": 0.8142, + "step": 18053 + }, + { + "epoch": 0.5533284295696947, + "grad_norm": 1.5216698113424547, + "learning_rate": 8.761894272812658e-06, + "loss": 0.6388, + "step": 18054 + }, + { + "epoch": 0.5533590780924359, + "grad_norm": 1.6077024790882464, + "learning_rate": 8.760909280703848e-06, + "loss": 0.7225, + "step": 18055 + }, + { + "epoch": 0.5533897266151772, + "grad_norm": 1.443740032449191, + "learning_rate": 8.759924300804122e-06, + "loss": 0.6213, + "step": 18056 + }, + { + "epoch": 0.5534203751379183, + "grad_norm": 1.5690848727448379, + "learning_rate": 8.758939333123176e-06, + "loss": 0.6424, + "step": 18057 + }, + { + "epoch": 0.5534510236606596, + "grad_norm": 1.6677101881669238, + "learning_rate": 8.757954377670716e-06, + "loss": 0.6944, + "step": 18058 + }, + { + "epoch": 0.5534816721834007, + "grad_norm": 1.52559374085292, + "learning_rate": 8.756969434456453e-06, + "loss": 0.727, + "step": 18059 + }, + { + "epoch": 0.553512320706142, + "grad_norm": 0.6603121104252595, + "learning_rate": 8.755984503490086e-06, + "loss": 0.5374, + "step": 18060 + }, + { + "epoch": 0.5535429692288831, + "grad_norm": 1.373140019288182, + "learning_rate": 8.754999584781325e-06, + "loss": 0.7124, + "step": 18061 + }, + { + "epoch": 0.5535736177516244, + "grad_norm": 1.563221453233187, + "learning_rate": 8.75401467833987e-06, + "loss": 0.6315, + "step": 18062 + }, + { + "epoch": 0.5536042662743655, + "grad_norm": 1.6073380428439625, + "learning_rate": 8.753029784175427e-06, + "loss": 0.7201, + "step": 18063 + }, + { + "epoch": 0.5536349147971068, + "grad_norm": 0.6632467948073939, + "learning_rate": 8.7520449022977e-06, + "loss": 0.5831, + "step": 18064 + }, + { + "epoch": 0.553665563319848, + "grad_norm": 1.5348552404185007, + "learning_rate": 8.751060032716396e-06, + "loss": 0.6944, + "step": 18065 + }, + { + "epoch": 0.5536962118425892, + "grad_norm": 1.6773034726106781, + "learning_rate": 8.750075175441212e-06, + "loss": 0.7818, + "step": 18066 + }, + { + "epoch": 0.5537268603653304, + "grad_norm": 1.61458134048961, + "learning_rate": 8.749090330481863e-06, + "loss": 0.7094, + "step": 18067 + }, + { + "epoch": 0.5537575088880716, + "grad_norm": 1.5574457818152994, + "learning_rate": 8.748105497848044e-06, + "loss": 0.767, + "step": 18068 + }, + { + "epoch": 0.5537881574108128, + "grad_norm": 0.6353678309862485, + "learning_rate": 8.747120677549462e-06, + "loss": 0.5636, + "step": 18069 + }, + { + "epoch": 0.553818805933554, + "grad_norm": 1.4232021285602985, + "learning_rate": 8.746135869595823e-06, + "loss": 0.7763, + "step": 18070 + }, + { + "epoch": 0.5538494544562952, + "grad_norm": 1.5455441950221964, + "learning_rate": 8.745151073996822e-06, + "loss": 0.5959, + "step": 18071 + }, + { + "epoch": 0.5538801029790364, + "grad_norm": 0.6583945646639638, + "learning_rate": 8.744166290762174e-06, + "loss": 0.513, + "step": 18072 + }, + { + "epoch": 0.5539107515017776, + "grad_norm": 1.9460584676676147, + "learning_rate": 8.743181519901578e-06, + "loss": 0.6519, + "step": 18073 + }, + { + "epoch": 0.5539414000245189, + "grad_norm": 1.580203797839588, + "learning_rate": 8.742196761424731e-06, + "loss": 0.7521, + "step": 18074 + }, + { + "epoch": 0.55397204854726, + "grad_norm": 0.6324610850140923, + "learning_rate": 8.741212015341345e-06, + "loss": 0.5491, + "step": 18075 + }, + { + "epoch": 0.5540026970700013, + "grad_norm": 1.4836252916170598, + "learning_rate": 8.740227281661115e-06, + "loss": 0.7115, + "step": 18076 + }, + { + "epoch": 0.5540333455927424, + "grad_norm": 1.4667267814001057, + "learning_rate": 8.739242560393753e-06, + "loss": 0.6946, + "step": 18077 + }, + { + "epoch": 0.5540639941154837, + "grad_norm": 1.3416432284329995, + "learning_rate": 8.738257851548954e-06, + "loss": 0.631, + "step": 18078 + }, + { + "epoch": 0.5540946426382248, + "grad_norm": 1.4122780108043655, + "learning_rate": 8.737273155136422e-06, + "loss": 0.6541, + "step": 18079 + }, + { + "epoch": 0.5541252911609661, + "grad_norm": 1.5104228509758297, + "learning_rate": 8.736288471165862e-06, + "loss": 0.7064, + "step": 18080 + }, + { + "epoch": 0.5541559396837072, + "grad_norm": 1.6362685313678749, + "learning_rate": 8.735303799646977e-06, + "loss": 0.7519, + "step": 18081 + }, + { + "epoch": 0.5541865882064484, + "grad_norm": 1.522025436594386, + "learning_rate": 8.734319140589462e-06, + "loss": 0.7408, + "step": 18082 + }, + { + "epoch": 0.5542172367291897, + "grad_norm": 1.5274313230848007, + "learning_rate": 8.733334494003031e-06, + "loss": 0.7375, + "step": 18083 + }, + { + "epoch": 0.5542478852519308, + "grad_norm": 1.5060599888737067, + "learning_rate": 8.732349859897377e-06, + "loss": 0.7218, + "step": 18084 + }, + { + "epoch": 0.5542785337746721, + "grad_norm": 1.576716173103841, + "learning_rate": 8.731365238282203e-06, + "loss": 0.7035, + "step": 18085 + }, + { + "epoch": 0.5543091822974132, + "grad_norm": 0.7143553039008085, + "learning_rate": 8.730380629167212e-06, + "loss": 0.5565, + "step": 18086 + }, + { + "epoch": 0.5543398308201545, + "grad_norm": 1.5152364564607663, + "learning_rate": 8.729396032562104e-06, + "loss": 0.7489, + "step": 18087 + }, + { + "epoch": 0.5543704793428956, + "grad_norm": 1.2816643758148187, + "learning_rate": 8.728411448476584e-06, + "loss": 0.6807, + "step": 18088 + }, + { + "epoch": 0.5544011278656369, + "grad_norm": 0.6504700557576577, + "learning_rate": 8.727426876920352e-06, + "loss": 0.5496, + "step": 18089 + }, + { + "epoch": 0.554431776388378, + "grad_norm": 1.4583251059676967, + "learning_rate": 8.726442317903105e-06, + "loss": 0.6804, + "step": 18090 + }, + { + "epoch": 0.5544624249111193, + "grad_norm": 1.3973819792187536, + "learning_rate": 8.72545777143455e-06, + "loss": 0.7322, + "step": 18091 + }, + { + "epoch": 0.5544930734338605, + "grad_norm": 0.6646388987859604, + "learning_rate": 8.72447323752439e-06, + "loss": 0.5458, + "step": 18092 + }, + { + "epoch": 0.5545237219566017, + "grad_norm": 1.491973984680533, + "learning_rate": 8.723488716182314e-06, + "loss": 0.7344, + "step": 18093 + }, + { + "epoch": 0.5545543704793429, + "grad_norm": 1.5506563787056309, + "learning_rate": 8.722504207418036e-06, + "loss": 0.6556, + "step": 18094 + }, + { + "epoch": 0.5545850190020841, + "grad_norm": 1.6046597470464037, + "learning_rate": 8.721519711241245e-06, + "loss": 0.7221, + "step": 18095 + }, + { + "epoch": 0.5546156675248253, + "grad_norm": 1.4720802816734546, + "learning_rate": 8.720535227661654e-06, + "loss": 0.7492, + "step": 18096 + }, + { + "epoch": 0.5546463160475665, + "grad_norm": 1.601096058618694, + "learning_rate": 8.719550756688955e-06, + "loss": 0.6646, + "step": 18097 + }, + { + "epoch": 0.5546769645703077, + "grad_norm": 1.5303396571391115, + "learning_rate": 8.718566298332846e-06, + "loss": 0.762, + "step": 18098 + }, + { + "epoch": 0.554707613093049, + "grad_norm": 1.4344387651987562, + "learning_rate": 8.717581852603037e-06, + "loss": 0.6616, + "step": 18099 + }, + { + "epoch": 0.5547382616157901, + "grad_norm": 1.4241662093580718, + "learning_rate": 8.716597419509219e-06, + "loss": 0.6147, + "step": 18100 + }, + { + "epoch": 0.5547689101385314, + "grad_norm": 1.431929708320065, + "learning_rate": 8.715612999061093e-06, + "loss": 0.6496, + "step": 18101 + }, + { + "epoch": 0.5547995586612725, + "grad_norm": 1.672444220512899, + "learning_rate": 8.714628591268363e-06, + "loss": 0.7024, + "step": 18102 + }, + { + "epoch": 0.5548302071840138, + "grad_norm": 1.5901070020714594, + "learning_rate": 8.713644196140724e-06, + "loss": 0.7978, + "step": 18103 + }, + { + "epoch": 0.5548608557067549, + "grad_norm": 1.330174256666385, + "learning_rate": 8.712659813687882e-06, + "loss": 0.7075, + "step": 18104 + }, + { + "epoch": 0.5548915042294962, + "grad_norm": 0.657914854758406, + "learning_rate": 8.711675443919532e-06, + "loss": 0.6019, + "step": 18105 + }, + { + "epoch": 0.5549221527522373, + "grad_norm": 1.3614617204643968, + "learning_rate": 8.710691086845371e-06, + "loss": 0.704, + "step": 18106 + }, + { + "epoch": 0.5549528012749786, + "grad_norm": 1.446988333653154, + "learning_rate": 8.709706742475102e-06, + "loss": 0.6024, + "step": 18107 + }, + { + "epoch": 0.5549834497977197, + "grad_norm": 0.7034279855859324, + "learning_rate": 8.708722410818423e-06, + "loss": 0.5506, + "step": 18108 + }, + { + "epoch": 0.555014098320461, + "grad_norm": 0.6935261611801271, + "learning_rate": 8.70773809188503e-06, + "loss": 0.5707, + "step": 18109 + }, + { + "epoch": 0.5550447468432022, + "grad_norm": 0.647295449288221, + "learning_rate": 8.706753785684627e-06, + "loss": 0.5461, + "step": 18110 + }, + { + "epoch": 0.5550753953659434, + "grad_norm": 1.4442507326980523, + "learning_rate": 8.705769492226908e-06, + "loss": 0.6502, + "step": 18111 + }, + { + "epoch": 0.5551060438886846, + "grad_norm": 1.5279802801899875, + "learning_rate": 8.704785211521573e-06, + "loss": 0.6852, + "step": 18112 + }, + { + "epoch": 0.5551366924114257, + "grad_norm": 0.6476306874062846, + "learning_rate": 8.703800943578325e-06, + "loss": 0.5526, + "step": 18113 + }, + { + "epoch": 0.555167340934167, + "grad_norm": 1.3267275799590994, + "learning_rate": 8.70281668840685e-06, + "loss": 0.5994, + "step": 18114 + }, + { + "epoch": 0.5551979894569081, + "grad_norm": 1.5168568759599554, + "learning_rate": 8.701832446016861e-06, + "loss": 0.7253, + "step": 18115 + }, + { + "epoch": 0.5552286379796494, + "grad_norm": 1.4351879026718368, + "learning_rate": 8.700848216418047e-06, + "loss": 0.5536, + "step": 18116 + }, + { + "epoch": 0.5552592865023905, + "grad_norm": 1.4744773927695733, + "learning_rate": 8.699863999620107e-06, + "loss": 0.5552, + "step": 18117 + }, + { + "epoch": 0.5552899350251318, + "grad_norm": 1.4965716344809559, + "learning_rate": 8.698879795632742e-06, + "loss": 0.802, + "step": 18118 + }, + { + "epoch": 0.555320583547873, + "grad_norm": 1.767920643470295, + "learning_rate": 8.697895604465645e-06, + "loss": 0.7077, + "step": 18119 + }, + { + "epoch": 0.5553512320706142, + "grad_norm": 1.490522952370684, + "learning_rate": 8.696911426128515e-06, + "loss": 0.7878, + "step": 18120 + }, + { + "epoch": 0.5553818805933554, + "grad_norm": 1.554441986956554, + "learning_rate": 8.695927260631052e-06, + "loss": 0.7362, + "step": 18121 + }, + { + "epoch": 0.5554125291160966, + "grad_norm": 1.4940958981035344, + "learning_rate": 8.69494310798295e-06, + "loss": 0.6702, + "step": 18122 + }, + { + "epoch": 0.5554431776388378, + "grad_norm": 1.4762627926482912, + "learning_rate": 8.693958968193907e-06, + "loss": 0.6857, + "step": 18123 + }, + { + "epoch": 0.555473826161579, + "grad_norm": 1.4655707078483993, + "learning_rate": 8.692974841273625e-06, + "loss": 0.6163, + "step": 18124 + }, + { + "epoch": 0.5555044746843202, + "grad_norm": 0.6957838975136327, + "learning_rate": 8.691990727231789e-06, + "loss": 0.5645, + "step": 18125 + }, + { + "epoch": 0.5555351232070614, + "grad_norm": 1.3747762717673657, + "learning_rate": 8.691006626078111e-06, + "loss": 0.614, + "step": 18126 + }, + { + "epoch": 0.5555657717298026, + "grad_norm": 1.5195649838987015, + "learning_rate": 8.690022537822276e-06, + "loss": 0.6612, + "step": 18127 + }, + { + "epoch": 0.5555964202525439, + "grad_norm": 0.6973400760695928, + "learning_rate": 8.689038462473982e-06, + "loss": 0.5985, + "step": 18128 + }, + { + "epoch": 0.555627068775285, + "grad_norm": 0.6867202689787535, + "learning_rate": 8.68805440004293e-06, + "loss": 0.5828, + "step": 18129 + }, + { + "epoch": 0.5556577172980263, + "grad_norm": 1.5308355538171274, + "learning_rate": 8.687070350538812e-06, + "loss": 0.6739, + "step": 18130 + }, + { + "epoch": 0.5556883658207674, + "grad_norm": 1.570869040631811, + "learning_rate": 8.686086313971327e-06, + "loss": 0.6284, + "step": 18131 + }, + { + "epoch": 0.5557190143435087, + "grad_norm": 1.4969228776551278, + "learning_rate": 8.68510229035017e-06, + "loss": 0.6767, + "step": 18132 + }, + { + "epoch": 0.5557496628662498, + "grad_norm": 1.4714390421782952, + "learning_rate": 8.684118279685034e-06, + "loss": 0.6242, + "step": 18133 + }, + { + "epoch": 0.5557803113889911, + "grad_norm": 1.5769324690994573, + "learning_rate": 8.68313428198562e-06, + "loss": 0.7569, + "step": 18134 + }, + { + "epoch": 0.5558109599117322, + "grad_norm": 1.4997843434573992, + "learning_rate": 8.682150297261623e-06, + "loss": 0.6966, + "step": 18135 + }, + { + "epoch": 0.5558416084344735, + "grad_norm": 1.4460887356519598, + "learning_rate": 8.68116632552273e-06, + "loss": 0.666, + "step": 18136 + }, + { + "epoch": 0.5558722569572146, + "grad_norm": 1.5233765933887427, + "learning_rate": 8.680182366778649e-06, + "loss": 0.7023, + "step": 18137 + }, + { + "epoch": 0.5559029054799559, + "grad_norm": 1.6256037099156948, + "learning_rate": 8.679198421039066e-06, + "loss": 0.8128, + "step": 18138 + }, + { + "epoch": 0.5559335540026971, + "grad_norm": 1.4946443656515784, + "learning_rate": 8.678214488313677e-06, + "loss": 0.7407, + "step": 18139 + }, + { + "epoch": 0.5559642025254383, + "grad_norm": 1.5449586071147932, + "learning_rate": 8.677230568612182e-06, + "loss": 0.7274, + "step": 18140 + }, + { + "epoch": 0.5559948510481795, + "grad_norm": 0.7176155462239036, + "learning_rate": 8.67624666194427e-06, + "loss": 0.5498, + "step": 18141 + }, + { + "epoch": 0.5560254995709207, + "grad_norm": 1.5610945821277133, + "learning_rate": 8.675262768319638e-06, + "loss": 0.5731, + "step": 18142 + }, + { + "epoch": 0.5560561480936619, + "grad_norm": 1.5249466276193682, + "learning_rate": 8.674278887747984e-06, + "loss": 0.6469, + "step": 18143 + }, + { + "epoch": 0.556086796616403, + "grad_norm": 1.3716152659986292, + "learning_rate": 8.673295020238997e-06, + "loss": 0.7344, + "step": 18144 + }, + { + "epoch": 0.5561174451391443, + "grad_norm": 1.2511832256181568, + "learning_rate": 8.672311165802375e-06, + "loss": 0.5632, + "step": 18145 + }, + { + "epoch": 0.5561480936618854, + "grad_norm": 1.8236335444107925, + "learning_rate": 8.671327324447814e-06, + "loss": 0.7837, + "step": 18146 + }, + { + "epoch": 0.5561787421846267, + "grad_norm": 0.6788938108475919, + "learning_rate": 8.670343496184997e-06, + "loss": 0.6033, + "step": 18147 + }, + { + "epoch": 0.5562093907073679, + "grad_norm": 1.5535147224408623, + "learning_rate": 8.669359681023632e-06, + "loss": 0.6887, + "step": 18148 + }, + { + "epoch": 0.5562400392301091, + "grad_norm": 1.4868100937271418, + "learning_rate": 8.6683758789734e-06, + "loss": 0.7737, + "step": 18149 + }, + { + "epoch": 0.5562706877528503, + "grad_norm": 0.6628174528082852, + "learning_rate": 8.66739209004401e-06, + "loss": 0.5428, + "step": 18150 + }, + { + "epoch": 0.5563013362755915, + "grad_norm": 1.5034225799162386, + "learning_rate": 8.666408314245142e-06, + "loss": 0.6716, + "step": 18151 + }, + { + "epoch": 0.5563319847983327, + "grad_norm": 1.4640631641128539, + "learning_rate": 8.665424551586492e-06, + "loss": 0.5854, + "step": 18152 + }, + { + "epoch": 0.5563626333210739, + "grad_norm": 1.4942569220020117, + "learning_rate": 8.664440802077758e-06, + "loss": 0.6719, + "step": 18153 + }, + { + "epoch": 0.5563932818438151, + "grad_norm": 1.4591111783708839, + "learning_rate": 8.66345706572863e-06, + "loss": 0.697, + "step": 18154 + }, + { + "epoch": 0.5564239303665564, + "grad_norm": 1.4773723976673359, + "learning_rate": 8.6624733425488e-06, + "loss": 0.6041, + "step": 18155 + }, + { + "epoch": 0.5564545788892975, + "grad_norm": 1.6752398572550833, + "learning_rate": 8.661489632547966e-06, + "loss": 0.7901, + "step": 18156 + }, + { + "epoch": 0.5564852274120388, + "grad_norm": 1.5303198862720815, + "learning_rate": 8.660505935735813e-06, + "loss": 0.7182, + "step": 18157 + }, + { + "epoch": 0.5565158759347799, + "grad_norm": 1.6368456399870508, + "learning_rate": 8.659522252122043e-06, + "loss": 0.7005, + "step": 18158 + }, + { + "epoch": 0.5565465244575212, + "grad_norm": 0.6800356977532096, + "learning_rate": 8.658538581716342e-06, + "loss": 0.5842, + "step": 18159 + }, + { + "epoch": 0.5565771729802623, + "grad_norm": 1.439129047716948, + "learning_rate": 8.657554924528399e-06, + "loss": 0.7076, + "step": 18160 + }, + { + "epoch": 0.5566078215030036, + "grad_norm": 1.3666264650929583, + "learning_rate": 8.656571280567914e-06, + "loss": 0.622, + "step": 18161 + }, + { + "epoch": 0.5566384700257447, + "grad_norm": 1.7802956520075048, + "learning_rate": 8.655587649844577e-06, + "loss": 0.7229, + "step": 18162 + }, + { + "epoch": 0.556669118548486, + "grad_norm": 0.6763412769233602, + "learning_rate": 8.654604032368074e-06, + "loss": 0.5808, + "step": 18163 + }, + { + "epoch": 0.5566997670712271, + "grad_norm": 1.6244350944131052, + "learning_rate": 8.653620428148107e-06, + "loss": 0.7356, + "step": 18164 + }, + { + "epoch": 0.5567304155939684, + "grad_norm": 1.6935628053429306, + "learning_rate": 8.652636837194362e-06, + "loss": 0.7376, + "step": 18165 + }, + { + "epoch": 0.5567610641167096, + "grad_norm": 1.392564424064034, + "learning_rate": 8.651653259516526e-06, + "loss": 0.7383, + "step": 18166 + }, + { + "epoch": 0.5567917126394508, + "grad_norm": 1.682128517693494, + "learning_rate": 8.650669695124302e-06, + "loss": 0.724, + "step": 18167 + }, + { + "epoch": 0.556822361162192, + "grad_norm": 1.3886391963200833, + "learning_rate": 8.649686144027368e-06, + "loss": 0.6405, + "step": 18168 + }, + { + "epoch": 0.5568530096849332, + "grad_norm": 1.6438526115549918, + "learning_rate": 8.648702606235429e-06, + "loss": 0.6719, + "step": 18169 + }, + { + "epoch": 0.5568836582076744, + "grad_norm": 0.6454013603433175, + "learning_rate": 8.647719081758165e-06, + "loss": 0.5578, + "step": 18170 + }, + { + "epoch": 0.5569143067304156, + "grad_norm": 0.659906885837621, + "learning_rate": 8.646735570605268e-06, + "loss": 0.567, + "step": 18171 + }, + { + "epoch": 0.5569449552531568, + "grad_norm": 1.4414591277209696, + "learning_rate": 8.645752072786437e-06, + "loss": 0.6088, + "step": 18172 + }, + { + "epoch": 0.556975603775898, + "grad_norm": 1.526778208341273, + "learning_rate": 8.644768588311356e-06, + "loss": 0.7243, + "step": 18173 + }, + { + "epoch": 0.5570062522986392, + "grad_norm": 0.6402308029552137, + "learning_rate": 8.643785117189714e-06, + "loss": 0.5578, + "step": 18174 + }, + { + "epoch": 0.5570369008213804, + "grad_norm": 1.6705536888176264, + "learning_rate": 8.642801659431208e-06, + "loss": 0.8098, + "step": 18175 + }, + { + "epoch": 0.5570675493441216, + "grad_norm": 1.563225558670188, + "learning_rate": 8.641818215045521e-06, + "loss": 0.7957, + "step": 18176 + }, + { + "epoch": 0.5570981978668628, + "grad_norm": 1.3890939334571133, + "learning_rate": 8.64083478404235e-06, + "loss": 0.6416, + "step": 18177 + }, + { + "epoch": 0.557128846389604, + "grad_norm": 1.4087434788587343, + "learning_rate": 8.639851366431382e-06, + "loss": 0.7285, + "step": 18178 + }, + { + "epoch": 0.5571594949123452, + "grad_norm": 1.4265066862899407, + "learning_rate": 8.638867962222302e-06, + "loss": 0.7088, + "step": 18179 + }, + { + "epoch": 0.5571901434350864, + "grad_norm": 1.5510539485953319, + "learning_rate": 8.637884571424808e-06, + "loss": 0.6949, + "step": 18180 + }, + { + "epoch": 0.5572207919578276, + "grad_norm": 1.3788073073985068, + "learning_rate": 8.636901194048585e-06, + "loss": 0.69, + "step": 18181 + }, + { + "epoch": 0.5572514404805688, + "grad_norm": 1.5521298233827332, + "learning_rate": 8.635917830103321e-06, + "loss": 0.7675, + "step": 18182 + }, + { + "epoch": 0.55728208900331, + "grad_norm": 1.4738729637914958, + "learning_rate": 8.63493447959871e-06, + "loss": 0.6863, + "step": 18183 + }, + { + "epoch": 0.5573127375260513, + "grad_norm": 1.489857644775268, + "learning_rate": 8.63395114254444e-06, + "loss": 0.7374, + "step": 18184 + }, + { + "epoch": 0.5573433860487924, + "grad_norm": 1.40590048232767, + "learning_rate": 8.632967818950197e-06, + "loss": 0.7186, + "step": 18185 + }, + { + "epoch": 0.5573740345715337, + "grad_norm": 1.4531646106785694, + "learning_rate": 8.631984508825672e-06, + "loss": 0.6429, + "step": 18186 + }, + { + "epoch": 0.5574046830942748, + "grad_norm": 1.3379499273954294, + "learning_rate": 8.631001212180552e-06, + "loss": 0.685, + "step": 18187 + }, + { + "epoch": 0.5574353316170161, + "grad_norm": 0.6590943194778833, + "learning_rate": 8.63001792902453e-06, + "loss": 0.5399, + "step": 18188 + }, + { + "epoch": 0.5574659801397572, + "grad_norm": 0.6923381768576506, + "learning_rate": 8.629034659367295e-06, + "loss": 0.5692, + "step": 18189 + }, + { + "epoch": 0.5574966286624985, + "grad_norm": 0.6778903939334248, + "learning_rate": 8.628051403218524e-06, + "loss": 0.5608, + "step": 18190 + }, + { + "epoch": 0.5575272771852396, + "grad_norm": 1.60662385716432, + "learning_rate": 8.627068160587921e-06, + "loss": 0.7035, + "step": 18191 + }, + { + "epoch": 0.5575579257079809, + "grad_norm": 0.6388530953273526, + "learning_rate": 8.626084931485164e-06, + "loss": 0.5374, + "step": 18192 + }, + { + "epoch": 0.5575885742307221, + "grad_norm": 1.5829270952233112, + "learning_rate": 8.62510171591994e-06, + "loss": 0.7355, + "step": 18193 + }, + { + "epoch": 0.5576192227534633, + "grad_norm": 0.6828601063890144, + "learning_rate": 8.624118513901947e-06, + "loss": 0.5483, + "step": 18194 + }, + { + "epoch": 0.5576498712762045, + "grad_norm": 1.38885592073282, + "learning_rate": 8.623135325440861e-06, + "loss": 0.7086, + "step": 18195 + }, + { + "epoch": 0.5576805197989457, + "grad_norm": 1.5405634410597997, + "learning_rate": 8.622152150546378e-06, + "loss": 0.6542, + "step": 18196 + }, + { + "epoch": 0.5577111683216869, + "grad_norm": 1.521681323914842, + "learning_rate": 8.621168989228182e-06, + "loss": 0.7748, + "step": 18197 + }, + { + "epoch": 0.5577418168444281, + "grad_norm": 1.5265048979334224, + "learning_rate": 8.620185841495959e-06, + "loss": 0.7335, + "step": 18198 + }, + { + "epoch": 0.5577724653671693, + "grad_norm": 1.6391582010175965, + "learning_rate": 8.6192027073594e-06, + "loss": 0.6002, + "step": 18199 + }, + { + "epoch": 0.5578031138899106, + "grad_norm": 1.4423984328890584, + "learning_rate": 8.618219586828192e-06, + "loss": 0.6955, + "step": 18200 + }, + { + "epoch": 0.5578337624126517, + "grad_norm": 1.3928910505432623, + "learning_rate": 8.617236479912012e-06, + "loss": 0.7708, + "step": 18201 + }, + { + "epoch": 0.557864410935393, + "grad_norm": 0.7008612368211489, + "learning_rate": 8.616253386620563e-06, + "loss": 0.5531, + "step": 18202 + }, + { + "epoch": 0.5578950594581341, + "grad_norm": 1.4792925150657104, + "learning_rate": 8.615270306963519e-06, + "loss": 0.7832, + "step": 18203 + }, + { + "epoch": 0.5579257079808754, + "grad_norm": 1.423407776565722, + "learning_rate": 8.614287240950574e-06, + "loss": 0.7115, + "step": 18204 + }, + { + "epoch": 0.5579563565036165, + "grad_norm": 1.6138488857309192, + "learning_rate": 8.61330418859141e-06, + "loss": 0.7171, + "step": 18205 + }, + { + "epoch": 0.5579870050263577, + "grad_norm": 0.6942278764563496, + "learning_rate": 8.612321149895712e-06, + "loss": 0.5902, + "step": 18206 + }, + { + "epoch": 0.5580176535490989, + "grad_norm": 1.497284572064816, + "learning_rate": 8.611338124873172e-06, + "loss": 0.6851, + "step": 18207 + }, + { + "epoch": 0.5580483020718401, + "grad_norm": 1.6886724803941593, + "learning_rate": 8.610355113533472e-06, + "loss": 0.7045, + "step": 18208 + }, + { + "epoch": 0.5580789505945813, + "grad_norm": 1.387773391334353, + "learning_rate": 8.609372115886297e-06, + "loss": 0.7675, + "step": 18209 + }, + { + "epoch": 0.5581095991173225, + "grad_norm": 1.5840083766479536, + "learning_rate": 8.60838913194134e-06, + "loss": 0.7345, + "step": 18210 + }, + { + "epoch": 0.5581402476400638, + "grad_norm": 1.7618603406893592, + "learning_rate": 8.607406161708276e-06, + "loss": 0.6588, + "step": 18211 + }, + { + "epoch": 0.5581708961628049, + "grad_norm": 1.5706184630350641, + "learning_rate": 8.606423205196795e-06, + "loss": 0.7207, + "step": 18212 + }, + { + "epoch": 0.5582015446855462, + "grad_norm": 1.2924793667148837, + "learning_rate": 8.605440262416584e-06, + "loss": 0.576, + "step": 18213 + }, + { + "epoch": 0.5582321932082873, + "grad_norm": 0.6741272957058517, + "learning_rate": 8.604457333377326e-06, + "loss": 0.5681, + "step": 18214 + }, + { + "epoch": 0.5582628417310286, + "grad_norm": 1.5736663600070542, + "learning_rate": 8.603474418088709e-06, + "loss": 0.6755, + "step": 18215 + }, + { + "epoch": 0.5582934902537697, + "grad_norm": 1.606492216017997, + "learning_rate": 8.602491516560415e-06, + "loss": 0.5183, + "step": 18216 + }, + { + "epoch": 0.558324138776511, + "grad_norm": 0.6705369825931777, + "learning_rate": 8.601508628802128e-06, + "loss": 0.5587, + "step": 18217 + }, + { + "epoch": 0.5583547872992521, + "grad_norm": 1.3484607090303609, + "learning_rate": 8.600525754823535e-06, + "loss": 0.6198, + "step": 18218 + }, + { + "epoch": 0.5583854358219934, + "grad_norm": 1.4283123798444115, + "learning_rate": 8.599542894634325e-06, + "loss": 0.6815, + "step": 18219 + }, + { + "epoch": 0.5584160843447346, + "grad_norm": 1.3792271375097669, + "learning_rate": 8.598560048244167e-06, + "loss": 0.7144, + "step": 18220 + }, + { + "epoch": 0.5584467328674758, + "grad_norm": 1.5937892945371157, + "learning_rate": 8.597577215662765e-06, + "loss": 0.7235, + "step": 18221 + }, + { + "epoch": 0.558477381390217, + "grad_norm": 1.5341303495795275, + "learning_rate": 8.596594396899785e-06, + "loss": 0.6981, + "step": 18222 + }, + { + "epoch": 0.5585080299129582, + "grad_norm": 1.5829183664879938, + "learning_rate": 8.595611591964928e-06, + "loss": 0.6634, + "step": 18223 + }, + { + "epoch": 0.5585386784356994, + "grad_norm": 1.522636010002662, + "learning_rate": 8.594628800867865e-06, + "loss": 0.7355, + "step": 18224 + }, + { + "epoch": 0.5585693269584406, + "grad_norm": 1.5243487185148743, + "learning_rate": 8.593646023618283e-06, + "loss": 0.766, + "step": 18225 + }, + { + "epoch": 0.5585999754811818, + "grad_norm": 1.4633480641908763, + "learning_rate": 8.592663260225869e-06, + "loss": 0.7272, + "step": 18226 + }, + { + "epoch": 0.558630624003923, + "grad_norm": 1.4417783536145814, + "learning_rate": 8.591680510700302e-06, + "loss": 0.7018, + "step": 18227 + }, + { + "epoch": 0.5586612725266642, + "grad_norm": 1.5742790898950183, + "learning_rate": 8.590697775051267e-06, + "loss": 0.7513, + "step": 18228 + }, + { + "epoch": 0.5586919210494055, + "grad_norm": 1.5533156574190286, + "learning_rate": 8.58971505328845e-06, + "loss": 0.6501, + "step": 18229 + }, + { + "epoch": 0.5587225695721466, + "grad_norm": 1.6863838241222755, + "learning_rate": 8.588732345421527e-06, + "loss": 0.7309, + "step": 18230 + }, + { + "epoch": 0.5587532180948879, + "grad_norm": 1.6916045687595438, + "learning_rate": 8.58774965146019e-06, + "loss": 0.6593, + "step": 18231 + }, + { + "epoch": 0.558783866617629, + "grad_norm": 1.574195306658624, + "learning_rate": 8.586766971414117e-06, + "loss": 0.6459, + "step": 18232 + }, + { + "epoch": 0.5588145151403703, + "grad_norm": 1.8525354679313595, + "learning_rate": 8.585784305292986e-06, + "loss": 0.8148, + "step": 18233 + }, + { + "epoch": 0.5588451636631114, + "grad_norm": 1.475959414705162, + "learning_rate": 8.58480165310649e-06, + "loss": 0.6437, + "step": 18234 + }, + { + "epoch": 0.5588758121858527, + "grad_norm": 1.3502923328338337, + "learning_rate": 8.583819014864303e-06, + "loss": 0.7195, + "step": 18235 + }, + { + "epoch": 0.5589064607085938, + "grad_norm": 1.7679389454404195, + "learning_rate": 8.582836390576106e-06, + "loss": 0.7823, + "step": 18236 + }, + { + "epoch": 0.558937109231335, + "grad_norm": 1.6581876376210372, + "learning_rate": 8.581853780251589e-06, + "loss": 0.8319, + "step": 18237 + }, + { + "epoch": 0.5589677577540763, + "grad_norm": 1.4364263234166441, + "learning_rate": 8.58087118390043e-06, + "loss": 0.6639, + "step": 18238 + }, + { + "epoch": 0.5589984062768174, + "grad_norm": 1.5911125182439256, + "learning_rate": 8.579888601532305e-06, + "loss": 0.7495, + "step": 18239 + }, + { + "epoch": 0.5590290547995587, + "grad_norm": 1.4861186630644734, + "learning_rate": 8.578906033156906e-06, + "loss": 0.6442, + "step": 18240 + }, + { + "epoch": 0.5590597033222998, + "grad_norm": 1.4757010739178251, + "learning_rate": 8.577923478783906e-06, + "loss": 0.6661, + "step": 18241 + }, + { + "epoch": 0.5590903518450411, + "grad_norm": 0.6708178827203013, + "learning_rate": 8.576940938422993e-06, + "loss": 0.5586, + "step": 18242 + }, + { + "epoch": 0.5591210003677822, + "grad_norm": 1.6569468474205338, + "learning_rate": 8.575958412083845e-06, + "loss": 0.7424, + "step": 18243 + }, + { + "epoch": 0.5591516488905235, + "grad_norm": 0.67405164457874, + "learning_rate": 8.574975899776139e-06, + "loss": 0.5546, + "step": 18244 + }, + { + "epoch": 0.5591822974132646, + "grad_norm": 2.087890888609623, + "learning_rate": 8.573993401509565e-06, + "loss": 0.7088, + "step": 18245 + }, + { + "epoch": 0.5592129459360059, + "grad_norm": 1.6838519340141893, + "learning_rate": 8.573010917293798e-06, + "loss": 0.7094, + "step": 18246 + }, + { + "epoch": 0.559243594458747, + "grad_norm": 1.445823862153841, + "learning_rate": 8.572028447138517e-06, + "loss": 0.7573, + "step": 18247 + }, + { + "epoch": 0.5592742429814883, + "grad_norm": 1.4890482979074227, + "learning_rate": 8.571045991053407e-06, + "loss": 0.7331, + "step": 18248 + }, + { + "epoch": 0.5593048915042295, + "grad_norm": 0.6507949057682119, + "learning_rate": 8.570063549048144e-06, + "loss": 0.554, + "step": 18249 + }, + { + "epoch": 0.5593355400269707, + "grad_norm": 1.546324587617014, + "learning_rate": 8.569081121132414e-06, + "loss": 0.6455, + "step": 18250 + }, + { + "epoch": 0.5593661885497119, + "grad_norm": 1.482148069365438, + "learning_rate": 8.568098707315892e-06, + "loss": 0.694, + "step": 18251 + }, + { + "epoch": 0.5593968370724531, + "grad_norm": 1.3724032789354885, + "learning_rate": 8.56711630760826e-06, + "loss": 0.6134, + "step": 18252 + }, + { + "epoch": 0.5594274855951943, + "grad_norm": 1.5530715323032793, + "learning_rate": 8.566133922019198e-06, + "loss": 0.7834, + "step": 18253 + }, + { + "epoch": 0.5594581341179355, + "grad_norm": 1.417171420217321, + "learning_rate": 8.565151550558388e-06, + "loss": 0.6569, + "step": 18254 + }, + { + "epoch": 0.5594887826406767, + "grad_norm": 1.75159525233229, + "learning_rate": 8.564169193235504e-06, + "loss": 0.6826, + "step": 18255 + }, + { + "epoch": 0.559519431163418, + "grad_norm": 1.527034201058781, + "learning_rate": 8.563186850060227e-06, + "loss": 0.755, + "step": 18256 + }, + { + "epoch": 0.5595500796861591, + "grad_norm": 1.5031142152239485, + "learning_rate": 8.562204521042238e-06, + "loss": 0.6023, + "step": 18257 + }, + { + "epoch": 0.5595807282089004, + "grad_norm": 1.7160653432775397, + "learning_rate": 8.561222206191218e-06, + "loss": 0.7588, + "step": 18258 + }, + { + "epoch": 0.5596113767316415, + "grad_norm": 1.607503514135677, + "learning_rate": 8.560239905516843e-06, + "loss": 0.7297, + "step": 18259 + }, + { + "epoch": 0.5596420252543828, + "grad_norm": 1.390817701501886, + "learning_rate": 8.55925761902879e-06, + "loss": 0.6513, + "step": 18260 + }, + { + "epoch": 0.5596726737771239, + "grad_norm": 1.534394188958312, + "learning_rate": 8.558275346736742e-06, + "loss": 0.6183, + "step": 18261 + }, + { + "epoch": 0.5597033222998652, + "grad_norm": 1.4744206931601322, + "learning_rate": 8.55729308865038e-06, + "loss": 0.7129, + "step": 18262 + }, + { + "epoch": 0.5597339708226063, + "grad_norm": 1.761665876255541, + "learning_rate": 8.55631084477937e-06, + "loss": 0.7088, + "step": 18263 + }, + { + "epoch": 0.5597646193453476, + "grad_norm": 1.3244123443988407, + "learning_rate": 8.555328615133406e-06, + "loss": 0.7363, + "step": 18264 + }, + { + "epoch": 0.5597952678680888, + "grad_norm": 1.6491526264452299, + "learning_rate": 8.554346399722157e-06, + "loss": 0.7235, + "step": 18265 + }, + { + "epoch": 0.55982591639083, + "grad_norm": 1.6222651389129785, + "learning_rate": 8.5533641985553e-06, + "loss": 0.7486, + "step": 18266 + }, + { + "epoch": 0.5598565649135712, + "grad_norm": 1.4418470814532016, + "learning_rate": 8.552382011642519e-06, + "loss": 0.678, + "step": 18267 + }, + { + "epoch": 0.5598872134363123, + "grad_norm": 1.5610658442301382, + "learning_rate": 8.551399838993485e-06, + "loss": 0.7562, + "step": 18268 + }, + { + "epoch": 0.5599178619590536, + "grad_norm": 1.5636530037197587, + "learning_rate": 8.550417680617882e-06, + "loss": 0.7928, + "step": 18269 + }, + { + "epoch": 0.5599485104817947, + "grad_norm": 0.6898592183620015, + "learning_rate": 8.549435536525384e-06, + "loss": 0.5922, + "step": 18270 + }, + { + "epoch": 0.559979159004536, + "grad_norm": 0.6790318454798276, + "learning_rate": 8.548453406725666e-06, + "loss": 0.5479, + "step": 18271 + }, + { + "epoch": 0.5600098075272771, + "grad_norm": 1.4776063950269558, + "learning_rate": 8.547471291228413e-06, + "loss": 0.7301, + "step": 18272 + }, + { + "epoch": 0.5600404560500184, + "grad_norm": 1.5661744617675806, + "learning_rate": 8.546489190043295e-06, + "loss": 0.762, + "step": 18273 + }, + { + "epoch": 0.5600711045727595, + "grad_norm": 1.4270098489212315, + "learning_rate": 8.545507103179986e-06, + "loss": 0.724, + "step": 18274 + }, + { + "epoch": 0.5601017530955008, + "grad_norm": 1.5064820984500145, + "learning_rate": 8.544525030648175e-06, + "loss": 0.726, + "step": 18275 + }, + { + "epoch": 0.560132401618242, + "grad_norm": 1.2541702393300034, + "learning_rate": 8.543542972457524e-06, + "loss": 0.6123, + "step": 18276 + }, + { + "epoch": 0.5601630501409832, + "grad_norm": 0.6572810951287895, + "learning_rate": 8.542560928617725e-06, + "loss": 0.5612, + "step": 18277 + }, + { + "epoch": 0.5601936986637244, + "grad_norm": 1.5539038955597593, + "learning_rate": 8.541578899138441e-06, + "loss": 0.6749, + "step": 18278 + }, + { + "epoch": 0.5602243471864656, + "grad_norm": 1.6446304629200643, + "learning_rate": 8.540596884029354e-06, + "loss": 0.7248, + "step": 18279 + }, + { + "epoch": 0.5602549957092068, + "grad_norm": 1.44264580537393, + "learning_rate": 8.53961488330014e-06, + "loss": 0.6311, + "step": 18280 + }, + { + "epoch": 0.560285644231948, + "grad_norm": 1.4441019679025924, + "learning_rate": 8.538632896960473e-06, + "loss": 0.7754, + "step": 18281 + }, + { + "epoch": 0.5603162927546892, + "grad_norm": 1.7147746119483862, + "learning_rate": 8.53765092502003e-06, + "loss": 0.738, + "step": 18282 + }, + { + "epoch": 0.5603469412774305, + "grad_norm": 1.6046496018027063, + "learning_rate": 8.536668967488488e-06, + "loss": 0.7468, + "step": 18283 + }, + { + "epoch": 0.5603775898001716, + "grad_norm": 1.6230685454601022, + "learning_rate": 8.53568702437552e-06, + "loss": 0.7571, + "step": 18284 + }, + { + "epoch": 0.5604082383229129, + "grad_norm": 1.7382132747354555, + "learning_rate": 8.534705095690801e-06, + "loss": 0.7311, + "step": 18285 + }, + { + "epoch": 0.560438886845654, + "grad_norm": 1.7088014245771643, + "learning_rate": 8.533723181444014e-06, + "loss": 0.7179, + "step": 18286 + }, + { + "epoch": 0.5604695353683953, + "grad_norm": 1.551400015905421, + "learning_rate": 8.532741281644819e-06, + "loss": 0.7728, + "step": 18287 + }, + { + "epoch": 0.5605001838911364, + "grad_norm": 1.6057281644710912, + "learning_rate": 8.531759396302906e-06, + "loss": 0.6829, + "step": 18288 + }, + { + "epoch": 0.5605308324138777, + "grad_norm": 1.633207495323697, + "learning_rate": 8.53077752542794e-06, + "loss": 0.7049, + "step": 18289 + }, + { + "epoch": 0.5605614809366188, + "grad_norm": 1.5130270759849949, + "learning_rate": 8.529795669029599e-06, + "loss": 0.7283, + "step": 18290 + }, + { + "epoch": 0.5605921294593601, + "grad_norm": 1.3540405796918582, + "learning_rate": 8.528813827117559e-06, + "loss": 0.6835, + "step": 18291 + }, + { + "epoch": 0.5606227779821013, + "grad_norm": 0.6643078339694259, + "learning_rate": 8.527831999701493e-06, + "loss": 0.5465, + "step": 18292 + }, + { + "epoch": 0.5606534265048425, + "grad_norm": 0.6769134031122933, + "learning_rate": 8.526850186791073e-06, + "loss": 0.5466, + "step": 18293 + }, + { + "epoch": 0.5606840750275837, + "grad_norm": 0.8340468021241668, + "learning_rate": 8.525868388395977e-06, + "loss": 0.5562, + "step": 18294 + }, + { + "epoch": 0.5607147235503249, + "grad_norm": 1.464973528947935, + "learning_rate": 8.524886604525873e-06, + "loss": 0.754, + "step": 18295 + }, + { + "epoch": 0.5607453720730661, + "grad_norm": 1.6153014054683723, + "learning_rate": 8.523904835190443e-06, + "loss": 0.7373, + "step": 18296 + }, + { + "epoch": 0.5607760205958073, + "grad_norm": 1.7702594552321635, + "learning_rate": 8.522923080399358e-06, + "loss": 0.7166, + "step": 18297 + }, + { + "epoch": 0.5608066691185485, + "grad_norm": 1.4967846472882782, + "learning_rate": 8.521941340162285e-06, + "loss": 0.5852, + "step": 18298 + }, + { + "epoch": 0.5608373176412896, + "grad_norm": 1.4206972156894855, + "learning_rate": 8.520959614488905e-06, + "loss": 0.6464, + "step": 18299 + }, + { + "epoch": 0.5608679661640309, + "grad_norm": 0.6704046719260643, + "learning_rate": 8.519977903388887e-06, + "loss": 0.5539, + "step": 18300 + }, + { + "epoch": 0.560898614686772, + "grad_norm": 1.4724310933584015, + "learning_rate": 8.518996206871905e-06, + "loss": 0.6636, + "step": 18301 + }, + { + "epoch": 0.5609292632095133, + "grad_norm": 1.576980788415698, + "learning_rate": 8.518014524947634e-06, + "loss": 0.6541, + "step": 18302 + }, + { + "epoch": 0.5609599117322545, + "grad_norm": 0.6872656963691308, + "learning_rate": 8.517032857625742e-06, + "loss": 0.5603, + "step": 18303 + }, + { + "epoch": 0.5609905602549957, + "grad_norm": 1.4074441639249402, + "learning_rate": 8.516051204915909e-06, + "loss": 0.7124, + "step": 18304 + }, + { + "epoch": 0.5610212087777369, + "grad_norm": 1.5599280429471076, + "learning_rate": 8.5150695668278e-06, + "loss": 0.6643, + "step": 18305 + }, + { + "epoch": 0.5610518573004781, + "grad_norm": 1.389784645662653, + "learning_rate": 8.51408794337109e-06, + "loss": 0.621, + "step": 18306 + }, + { + "epoch": 0.5610825058232193, + "grad_norm": 1.7526885569375852, + "learning_rate": 8.513106334555457e-06, + "loss": 0.7022, + "step": 18307 + }, + { + "epoch": 0.5611131543459605, + "grad_norm": 1.6556139716980824, + "learning_rate": 8.512124740390564e-06, + "loss": 0.7388, + "step": 18308 + }, + { + "epoch": 0.5611438028687017, + "grad_norm": 1.407959873567385, + "learning_rate": 8.511143160886085e-06, + "loss": 0.6729, + "step": 18309 + }, + { + "epoch": 0.561174451391443, + "grad_norm": 1.6171817777495126, + "learning_rate": 8.510161596051696e-06, + "loss": 0.7194, + "step": 18310 + }, + { + "epoch": 0.5612050999141841, + "grad_norm": 0.6811160736457083, + "learning_rate": 8.509180045897063e-06, + "loss": 0.5993, + "step": 18311 + }, + { + "epoch": 0.5612357484369254, + "grad_norm": 1.638310427821708, + "learning_rate": 8.508198510431861e-06, + "loss": 0.7061, + "step": 18312 + }, + { + "epoch": 0.5612663969596665, + "grad_norm": 1.5666188610484848, + "learning_rate": 8.507216989665765e-06, + "loss": 0.5939, + "step": 18313 + }, + { + "epoch": 0.5612970454824078, + "grad_norm": 1.5038644236114806, + "learning_rate": 8.506235483608437e-06, + "loss": 0.684, + "step": 18314 + }, + { + "epoch": 0.5613276940051489, + "grad_norm": 1.5002854717308935, + "learning_rate": 8.505253992269556e-06, + "loss": 0.6859, + "step": 18315 + }, + { + "epoch": 0.5613583425278902, + "grad_norm": 1.4334967251676227, + "learning_rate": 8.504272515658792e-06, + "loss": 0.7605, + "step": 18316 + }, + { + "epoch": 0.5613889910506313, + "grad_norm": 0.7010810654171087, + "learning_rate": 8.503291053785805e-06, + "loss": 0.5877, + "step": 18317 + }, + { + "epoch": 0.5614196395733726, + "grad_norm": 0.6797402423079001, + "learning_rate": 8.502309606660284e-06, + "loss": 0.5628, + "step": 18318 + }, + { + "epoch": 0.5614502880961137, + "grad_norm": 1.544081572446351, + "learning_rate": 8.501328174291885e-06, + "loss": 0.7042, + "step": 18319 + }, + { + "epoch": 0.561480936618855, + "grad_norm": 1.4577577495395506, + "learning_rate": 8.500346756690281e-06, + "loss": 0.7052, + "step": 18320 + }, + { + "epoch": 0.5615115851415962, + "grad_norm": 1.4721643307987828, + "learning_rate": 8.499365353865147e-06, + "loss": 0.6478, + "step": 18321 + }, + { + "epoch": 0.5615422336643374, + "grad_norm": 1.5104052549571385, + "learning_rate": 8.498383965826148e-06, + "loss": 0.7027, + "step": 18322 + }, + { + "epoch": 0.5615728821870786, + "grad_norm": 1.480568729079561, + "learning_rate": 8.497402592582959e-06, + "loss": 0.7314, + "step": 18323 + }, + { + "epoch": 0.5616035307098198, + "grad_norm": 1.4150268329849691, + "learning_rate": 8.496421234145246e-06, + "loss": 0.663, + "step": 18324 + }, + { + "epoch": 0.561634179232561, + "grad_norm": 1.3674200911091128, + "learning_rate": 8.495439890522677e-06, + "loss": 0.7311, + "step": 18325 + }, + { + "epoch": 0.5616648277553022, + "grad_norm": 1.4673999552340506, + "learning_rate": 8.494458561724925e-06, + "loss": 0.66, + "step": 18326 + }, + { + "epoch": 0.5616954762780434, + "grad_norm": 1.6238059491541146, + "learning_rate": 8.493477247761662e-06, + "loss": 0.7467, + "step": 18327 + }, + { + "epoch": 0.5617261248007847, + "grad_norm": 1.5172397041927197, + "learning_rate": 8.492495948642545e-06, + "loss": 0.7628, + "step": 18328 + }, + { + "epoch": 0.5617567733235258, + "grad_norm": 1.62587369699703, + "learning_rate": 8.491514664377258e-06, + "loss": 0.6924, + "step": 18329 + }, + { + "epoch": 0.561787421846267, + "grad_norm": 1.7467220568155633, + "learning_rate": 8.490533394975458e-06, + "loss": 0.7357, + "step": 18330 + }, + { + "epoch": 0.5618180703690082, + "grad_norm": 1.584235678200569, + "learning_rate": 8.489552140446824e-06, + "loss": 0.7593, + "step": 18331 + }, + { + "epoch": 0.5618487188917494, + "grad_norm": 1.6060593780689323, + "learning_rate": 8.488570900801016e-06, + "loss": 0.7837, + "step": 18332 + }, + { + "epoch": 0.5618793674144906, + "grad_norm": 1.5484660746063212, + "learning_rate": 8.487589676047705e-06, + "loss": 0.7089, + "step": 18333 + }, + { + "epoch": 0.5619100159372318, + "grad_norm": 0.6971337693482001, + "learning_rate": 8.486608466196561e-06, + "loss": 0.5679, + "step": 18334 + }, + { + "epoch": 0.561940664459973, + "grad_norm": 1.4359211980931592, + "learning_rate": 8.485627271257252e-06, + "loss": 0.5957, + "step": 18335 + }, + { + "epoch": 0.5619713129827142, + "grad_norm": 0.6779997421856973, + "learning_rate": 8.484646091239442e-06, + "loss": 0.5656, + "step": 18336 + }, + { + "epoch": 0.5620019615054554, + "grad_norm": 1.5158809829216873, + "learning_rate": 8.483664926152804e-06, + "loss": 0.7247, + "step": 18337 + }, + { + "epoch": 0.5620326100281966, + "grad_norm": 1.9001542876119086, + "learning_rate": 8.482683776007001e-06, + "loss": 0.7429, + "step": 18338 + }, + { + "epoch": 0.5620632585509379, + "grad_norm": 1.5534180879349564, + "learning_rate": 8.481702640811706e-06, + "loss": 0.698, + "step": 18339 + }, + { + "epoch": 0.562093907073679, + "grad_norm": 1.4006811158061852, + "learning_rate": 8.480721520576586e-06, + "loss": 0.6186, + "step": 18340 + }, + { + "epoch": 0.5621245555964203, + "grad_norm": 1.4976412539320583, + "learning_rate": 8.479740415311297e-06, + "loss": 0.6927, + "step": 18341 + }, + { + "epoch": 0.5621552041191614, + "grad_norm": 1.528625108737052, + "learning_rate": 8.478759325025523e-06, + "loss": 0.6801, + "step": 18342 + }, + { + "epoch": 0.5621858526419027, + "grad_norm": 1.554186833406659, + "learning_rate": 8.477778249728922e-06, + "loss": 0.7218, + "step": 18343 + }, + { + "epoch": 0.5622165011646438, + "grad_norm": 1.7167732770832551, + "learning_rate": 8.476797189431155e-06, + "loss": 0.8448, + "step": 18344 + }, + { + "epoch": 0.5622471496873851, + "grad_norm": 1.6063369380855772, + "learning_rate": 8.4758161441419e-06, + "loss": 0.7018, + "step": 18345 + }, + { + "epoch": 0.5622777982101262, + "grad_norm": 1.4691896290351563, + "learning_rate": 8.474835113870818e-06, + "loss": 0.6482, + "step": 18346 + }, + { + "epoch": 0.5623084467328675, + "grad_norm": 1.6306491350646326, + "learning_rate": 8.473854098627572e-06, + "loss": 0.6419, + "step": 18347 + }, + { + "epoch": 0.5623390952556087, + "grad_norm": 1.4732452985199247, + "learning_rate": 8.472873098421836e-06, + "loss": 0.6106, + "step": 18348 + }, + { + "epoch": 0.5623697437783499, + "grad_norm": 1.5080755461760649, + "learning_rate": 8.47189211326327e-06, + "loss": 0.7056, + "step": 18349 + }, + { + "epoch": 0.5624003923010911, + "grad_norm": 1.413391281758868, + "learning_rate": 8.470911143161547e-06, + "loss": 0.6897, + "step": 18350 + }, + { + "epoch": 0.5624310408238323, + "grad_norm": 1.7472481212775295, + "learning_rate": 8.469930188126323e-06, + "loss": 0.7092, + "step": 18351 + }, + { + "epoch": 0.5624616893465735, + "grad_norm": 1.3668462744086671, + "learning_rate": 8.468949248167269e-06, + "loss": 0.6978, + "step": 18352 + }, + { + "epoch": 0.5624923378693147, + "grad_norm": 1.6044635604757331, + "learning_rate": 8.46796832329405e-06, + "loss": 0.7112, + "step": 18353 + }, + { + "epoch": 0.5625229863920559, + "grad_norm": 1.5871671842287265, + "learning_rate": 8.466987413516331e-06, + "loss": 0.7044, + "step": 18354 + }, + { + "epoch": 0.5625536349147972, + "grad_norm": 0.7587702884292503, + "learning_rate": 8.466006518843777e-06, + "loss": 0.5781, + "step": 18355 + }, + { + "epoch": 0.5625842834375383, + "grad_norm": 1.7299615915604574, + "learning_rate": 8.465025639286053e-06, + "loss": 0.7366, + "step": 18356 + }, + { + "epoch": 0.5626149319602796, + "grad_norm": 1.508022351789155, + "learning_rate": 8.464044774852824e-06, + "loss": 0.7222, + "step": 18357 + }, + { + "epoch": 0.5626455804830207, + "grad_norm": 1.46862047389862, + "learning_rate": 8.463063925553756e-06, + "loss": 0.6557, + "step": 18358 + }, + { + "epoch": 0.562676229005762, + "grad_norm": 1.4062172416380017, + "learning_rate": 8.462083091398514e-06, + "loss": 0.6511, + "step": 18359 + }, + { + "epoch": 0.5627068775285031, + "grad_norm": 1.4554247144518415, + "learning_rate": 8.461102272396754e-06, + "loss": 0.7154, + "step": 18360 + }, + { + "epoch": 0.5627375260512444, + "grad_norm": 1.5659956546677083, + "learning_rate": 8.460121468558157e-06, + "loss": 0.6779, + "step": 18361 + }, + { + "epoch": 0.5627681745739855, + "grad_norm": 1.454121447098287, + "learning_rate": 8.459140679892372e-06, + "loss": 0.7446, + "step": 18362 + }, + { + "epoch": 0.5627988230967267, + "grad_norm": 1.430900802435012, + "learning_rate": 8.458159906409067e-06, + "loss": 0.6896, + "step": 18363 + }, + { + "epoch": 0.562829471619468, + "grad_norm": 1.5870751763653186, + "learning_rate": 8.457179148117907e-06, + "loss": 0.7721, + "step": 18364 + }, + { + "epoch": 0.5628601201422091, + "grad_norm": 0.699703189820915, + "learning_rate": 8.456198405028558e-06, + "loss": 0.5298, + "step": 18365 + }, + { + "epoch": 0.5628907686649504, + "grad_norm": 1.499605251843779, + "learning_rate": 8.455217677150679e-06, + "loss": 0.7341, + "step": 18366 + }, + { + "epoch": 0.5629214171876915, + "grad_norm": 1.5735118079850388, + "learning_rate": 8.454236964493936e-06, + "loss": 0.6809, + "step": 18367 + }, + { + "epoch": 0.5629520657104328, + "grad_norm": 1.3546947581034743, + "learning_rate": 8.453256267067991e-06, + "loss": 0.6973, + "step": 18368 + }, + { + "epoch": 0.5629827142331739, + "grad_norm": 1.2523459458659776, + "learning_rate": 8.452275584882508e-06, + "loss": 0.6778, + "step": 18369 + }, + { + "epoch": 0.5630133627559152, + "grad_norm": 1.327299281854057, + "learning_rate": 8.451294917947156e-06, + "loss": 0.7427, + "step": 18370 + }, + { + "epoch": 0.5630440112786563, + "grad_norm": 1.3915755203624585, + "learning_rate": 8.45031426627158e-06, + "loss": 0.6653, + "step": 18371 + }, + { + "epoch": 0.5630746598013976, + "grad_norm": 1.5697523214725007, + "learning_rate": 8.449333629865462e-06, + "loss": 0.7598, + "step": 18372 + }, + { + "epoch": 0.5631053083241387, + "grad_norm": 1.5352481447632613, + "learning_rate": 8.448353008738456e-06, + "loss": 0.6856, + "step": 18373 + }, + { + "epoch": 0.56313595684688, + "grad_norm": 1.6456366277542824, + "learning_rate": 8.447372402900222e-06, + "loss": 0.7077, + "step": 18374 + }, + { + "epoch": 0.5631666053696212, + "grad_norm": 1.3902662541986892, + "learning_rate": 8.446391812360426e-06, + "loss": 0.691, + "step": 18375 + }, + { + "epoch": 0.5631972538923624, + "grad_norm": 1.543791425572837, + "learning_rate": 8.445411237128727e-06, + "loss": 0.7229, + "step": 18376 + }, + { + "epoch": 0.5632279024151036, + "grad_norm": 1.4386738055425612, + "learning_rate": 8.444430677214792e-06, + "loss": 0.6673, + "step": 18377 + }, + { + "epoch": 0.5632585509378448, + "grad_norm": 1.62619496858721, + "learning_rate": 8.44345013262828e-06, + "loss": 0.7213, + "step": 18378 + }, + { + "epoch": 0.563289199460586, + "grad_norm": 1.6563239157654022, + "learning_rate": 8.442469603378847e-06, + "loss": 0.7563, + "step": 18379 + }, + { + "epoch": 0.5633198479833272, + "grad_norm": 1.3127463014015253, + "learning_rate": 8.441489089476165e-06, + "loss": 0.6488, + "step": 18380 + }, + { + "epoch": 0.5633504965060684, + "grad_norm": 1.7852001692909139, + "learning_rate": 8.44050859092989e-06, + "loss": 0.8409, + "step": 18381 + }, + { + "epoch": 0.5633811450288096, + "grad_norm": 0.6667894945439418, + "learning_rate": 8.439528107749677e-06, + "loss": 0.5827, + "step": 18382 + }, + { + "epoch": 0.5634117935515508, + "grad_norm": 1.4311958976337942, + "learning_rate": 8.4385476399452e-06, + "loss": 0.6599, + "step": 18383 + }, + { + "epoch": 0.5634424420742921, + "grad_norm": 1.4870184759841516, + "learning_rate": 8.437567187526105e-06, + "loss": 0.7243, + "step": 18384 + }, + { + "epoch": 0.5634730905970332, + "grad_norm": 1.4371100076735144, + "learning_rate": 8.436586750502067e-06, + "loss": 0.6666, + "step": 18385 + }, + { + "epoch": 0.5635037391197745, + "grad_norm": 1.64250868246954, + "learning_rate": 8.435606328882738e-06, + "loss": 0.6886, + "step": 18386 + }, + { + "epoch": 0.5635343876425156, + "grad_norm": 1.5311452753463144, + "learning_rate": 8.434625922677777e-06, + "loss": 0.614, + "step": 18387 + }, + { + "epoch": 0.5635650361652569, + "grad_norm": 0.6526793578768546, + "learning_rate": 8.43364553189685e-06, + "loss": 0.5562, + "step": 18388 + }, + { + "epoch": 0.563595684687998, + "grad_norm": 1.6723849888021949, + "learning_rate": 8.432665156549616e-06, + "loss": 0.783, + "step": 18389 + }, + { + "epoch": 0.5636263332107393, + "grad_norm": 0.6611230982777131, + "learning_rate": 8.43168479664573e-06, + "loss": 0.5592, + "step": 18390 + }, + { + "epoch": 0.5636569817334804, + "grad_norm": 0.6446161941672763, + "learning_rate": 8.430704452194856e-06, + "loss": 0.5508, + "step": 18391 + }, + { + "epoch": 0.5636876302562217, + "grad_norm": 1.3455238142497892, + "learning_rate": 8.429724123206655e-06, + "loss": 0.735, + "step": 18392 + }, + { + "epoch": 0.5637182787789629, + "grad_norm": 1.4988945976959702, + "learning_rate": 8.428743809690779e-06, + "loss": 0.7081, + "step": 18393 + }, + { + "epoch": 0.563748927301704, + "grad_norm": 1.4900417649244222, + "learning_rate": 8.427763511656897e-06, + "loss": 0.6537, + "step": 18394 + }, + { + "epoch": 0.5637795758244453, + "grad_norm": 1.4747891059858471, + "learning_rate": 8.426783229114659e-06, + "loss": 0.5922, + "step": 18395 + }, + { + "epoch": 0.5638102243471864, + "grad_norm": 1.3171191007356196, + "learning_rate": 8.425802962073732e-06, + "loss": 0.6873, + "step": 18396 + }, + { + "epoch": 0.5638408728699277, + "grad_norm": 1.4452424087517612, + "learning_rate": 8.42482271054377e-06, + "loss": 0.695, + "step": 18397 + }, + { + "epoch": 0.5638715213926688, + "grad_norm": 1.537611154406471, + "learning_rate": 8.423842474534432e-06, + "loss": 0.7376, + "step": 18398 + }, + { + "epoch": 0.5639021699154101, + "grad_norm": 1.5661162720726483, + "learning_rate": 8.422862254055379e-06, + "loss": 0.709, + "step": 18399 + }, + { + "epoch": 0.5639328184381512, + "grad_norm": 1.6061958571413624, + "learning_rate": 8.421882049116266e-06, + "loss": 0.6384, + "step": 18400 + }, + { + "epoch": 0.5639634669608925, + "grad_norm": 1.3107695558691586, + "learning_rate": 8.420901859726753e-06, + "loss": 0.7866, + "step": 18401 + }, + { + "epoch": 0.5639941154836337, + "grad_norm": 1.5007123389771948, + "learning_rate": 8.4199216858965e-06, + "loss": 0.7494, + "step": 18402 + }, + { + "epoch": 0.5640247640063749, + "grad_norm": 1.524021806260327, + "learning_rate": 8.41894152763516e-06, + "loss": 0.636, + "step": 18403 + }, + { + "epoch": 0.5640554125291161, + "grad_norm": 1.4381486111395454, + "learning_rate": 8.417961384952398e-06, + "loss": 0.5999, + "step": 18404 + }, + { + "epoch": 0.5640860610518573, + "grad_norm": 0.7296447780274532, + "learning_rate": 8.416981257857865e-06, + "loss": 0.5633, + "step": 18405 + }, + { + "epoch": 0.5641167095745985, + "grad_norm": 1.6531207598996247, + "learning_rate": 8.41600114636122e-06, + "loss": 0.6952, + "step": 18406 + }, + { + "epoch": 0.5641473580973397, + "grad_norm": 1.354354456192897, + "learning_rate": 8.41502105047212e-06, + "loss": 0.6692, + "step": 18407 + }, + { + "epoch": 0.5641780066200809, + "grad_norm": 1.455850109778316, + "learning_rate": 8.414040970200225e-06, + "loss": 0.7318, + "step": 18408 + }, + { + "epoch": 0.5642086551428221, + "grad_norm": 1.5497898293579069, + "learning_rate": 8.413060905555189e-06, + "loss": 0.6234, + "step": 18409 + }, + { + "epoch": 0.5642393036655633, + "grad_norm": 1.4943841438942709, + "learning_rate": 8.412080856546671e-06, + "loss": 0.6613, + "step": 18410 + }, + { + "epoch": 0.5642699521883046, + "grad_norm": 0.6583431797287855, + "learning_rate": 8.411100823184324e-06, + "loss": 0.5525, + "step": 18411 + }, + { + "epoch": 0.5643006007110457, + "grad_norm": 1.485031163424748, + "learning_rate": 8.41012080547781e-06, + "loss": 0.6722, + "step": 18412 + }, + { + "epoch": 0.564331249233787, + "grad_norm": 1.7481115052784966, + "learning_rate": 8.409140803436785e-06, + "loss": 0.7249, + "step": 18413 + }, + { + "epoch": 0.5643618977565281, + "grad_norm": 1.4808919113130097, + "learning_rate": 8.408160817070896e-06, + "loss": 0.6964, + "step": 18414 + }, + { + "epoch": 0.5643925462792694, + "grad_norm": 1.3280660164852542, + "learning_rate": 8.40718084638981e-06, + "loss": 0.6513, + "step": 18415 + }, + { + "epoch": 0.5644231948020105, + "grad_norm": 1.4749186064269821, + "learning_rate": 8.40620089140318e-06, + "loss": 0.6766, + "step": 18416 + }, + { + "epoch": 0.5644538433247518, + "grad_norm": 1.4569214656175589, + "learning_rate": 8.405220952120656e-06, + "loss": 0.645, + "step": 18417 + }, + { + "epoch": 0.5644844918474929, + "grad_norm": 1.3856992890109954, + "learning_rate": 8.404241028551902e-06, + "loss": 0.6862, + "step": 18418 + }, + { + "epoch": 0.5645151403702342, + "grad_norm": 0.6825654290243922, + "learning_rate": 8.403261120706567e-06, + "loss": 0.5829, + "step": 18419 + }, + { + "epoch": 0.5645457888929754, + "grad_norm": 1.4522750519609438, + "learning_rate": 8.40228122859431e-06, + "loss": 0.7681, + "step": 18420 + }, + { + "epoch": 0.5645764374157166, + "grad_norm": 0.6923669487678963, + "learning_rate": 8.401301352224783e-06, + "loss": 0.5494, + "step": 18421 + }, + { + "epoch": 0.5646070859384578, + "grad_norm": 1.6000551722217682, + "learning_rate": 8.400321491607642e-06, + "loss": 0.7843, + "step": 18422 + }, + { + "epoch": 0.564637734461199, + "grad_norm": 0.6583187278777216, + "learning_rate": 8.399341646752545e-06, + "loss": 0.5526, + "step": 18423 + }, + { + "epoch": 0.5646683829839402, + "grad_norm": 1.5588515307949655, + "learning_rate": 8.398361817669147e-06, + "loss": 0.5541, + "step": 18424 + }, + { + "epoch": 0.5646990315066813, + "grad_norm": 1.4356320073078317, + "learning_rate": 8.397382004367095e-06, + "loss": 0.6346, + "step": 18425 + }, + { + "epoch": 0.5647296800294226, + "grad_norm": 1.51038284339785, + "learning_rate": 8.39640220685605e-06, + "loss": 0.7464, + "step": 18426 + }, + { + "epoch": 0.5647603285521637, + "grad_norm": 1.6223205401955036, + "learning_rate": 8.395422425145668e-06, + "loss": 0.6075, + "step": 18427 + }, + { + "epoch": 0.564790977074905, + "grad_norm": 1.3716455557094716, + "learning_rate": 8.394442659245592e-06, + "loss": 0.6046, + "step": 18428 + }, + { + "epoch": 0.5648216255976461, + "grad_norm": 1.4930691058201102, + "learning_rate": 8.393462909165488e-06, + "loss": 0.6766, + "step": 18429 + }, + { + "epoch": 0.5648522741203874, + "grad_norm": 1.9244681598481619, + "learning_rate": 8.392483174915002e-06, + "loss": 0.6918, + "step": 18430 + }, + { + "epoch": 0.5648829226431286, + "grad_norm": 1.579923170426291, + "learning_rate": 8.391503456503793e-06, + "loss": 0.7108, + "step": 18431 + }, + { + "epoch": 0.5649135711658698, + "grad_norm": 1.5418774759434333, + "learning_rate": 8.390523753941512e-06, + "loss": 0.7193, + "step": 18432 + }, + { + "epoch": 0.564944219688611, + "grad_norm": 1.9625439878568347, + "learning_rate": 8.389544067237811e-06, + "loss": 0.7565, + "step": 18433 + }, + { + "epoch": 0.5649748682113522, + "grad_norm": 1.5874940704873788, + "learning_rate": 8.388564396402347e-06, + "loss": 0.7355, + "step": 18434 + }, + { + "epoch": 0.5650055167340934, + "grad_norm": 1.5054248465664695, + "learning_rate": 8.387584741444771e-06, + "loss": 0.6498, + "step": 18435 + }, + { + "epoch": 0.5650361652568346, + "grad_norm": 1.4927753252451816, + "learning_rate": 8.386605102374729e-06, + "loss": 0.6596, + "step": 18436 + }, + { + "epoch": 0.5650668137795758, + "grad_norm": 1.51587813901546, + "learning_rate": 8.385625479201885e-06, + "loss": 0.703, + "step": 18437 + }, + { + "epoch": 0.565097462302317, + "grad_norm": 1.5362525875865758, + "learning_rate": 8.384645871935881e-06, + "loss": 0.6908, + "step": 18438 + }, + { + "epoch": 0.5651281108250582, + "grad_norm": 2.037969231986258, + "learning_rate": 8.383666280586382e-06, + "loss": 0.8097, + "step": 18439 + }, + { + "epoch": 0.5651587593477995, + "grad_norm": 1.410287538335948, + "learning_rate": 8.382686705163028e-06, + "loss": 0.7155, + "step": 18440 + }, + { + "epoch": 0.5651894078705406, + "grad_norm": 1.8864611914319986, + "learning_rate": 8.381707145675475e-06, + "loss": 0.8311, + "step": 18441 + }, + { + "epoch": 0.5652200563932819, + "grad_norm": 1.7170184622351083, + "learning_rate": 8.380727602133379e-06, + "loss": 0.671, + "step": 18442 + }, + { + "epoch": 0.565250704916023, + "grad_norm": 1.4001672800995844, + "learning_rate": 8.379748074546385e-06, + "loss": 0.6889, + "step": 18443 + }, + { + "epoch": 0.5652813534387643, + "grad_norm": 1.5922752319155298, + "learning_rate": 8.378768562924149e-06, + "loss": 0.7986, + "step": 18444 + }, + { + "epoch": 0.5653120019615054, + "grad_norm": 1.3861095676782265, + "learning_rate": 8.377789067276322e-06, + "loss": 0.6618, + "step": 18445 + }, + { + "epoch": 0.5653426504842467, + "grad_norm": 1.4176365755019003, + "learning_rate": 8.376809587612555e-06, + "loss": 0.6423, + "step": 18446 + }, + { + "epoch": 0.5653732990069879, + "grad_norm": 0.7014589797009776, + "learning_rate": 8.375830123942497e-06, + "loss": 0.5575, + "step": 18447 + }, + { + "epoch": 0.5654039475297291, + "grad_norm": 1.6260097280733545, + "learning_rate": 8.3748506762758e-06, + "loss": 0.7188, + "step": 18448 + }, + { + "epoch": 0.5654345960524703, + "grad_norm": 1.4782024925372812, + "learning_rate": 8.373871244622114e-06, + "loss": 0.7501, + "step": 18449 + }, + { + "epoch": 0.5654652445752115, + "grad_norm": 1.7587118189644118, + "learning_rate": 8.372891828991092e-06, + "loss": 0.7023, + "step": 18450 + }, + { + "epoch": 0.5654958930979527, + "grad_norm": 1.5418384435013228, + "learning_rate": 8.371912429392385e-06, + "loss": 0.7553, + "step": 18451 + }, + { + "epoch": 0.5655265416206939, + "grad_norm": 1.4338956364615603, + "learning_rate": 8.370933045835638e-06, + "loss": 0.5386, + "step": 18452 + }, + { + "epoch": 0.5655571901434351, + "grad_norm": 1.3505912298954348, + "learning_rate": 8.369953678330507e-06, + "loss": 0.734, + "step": 18453 + }, + { + "epoch": 0.5655878386661763, + "grad_norm": 1.6625507972473355, + "learning_rate": 8.368974326886641e-06, + "loss": 0.7556, + "step": 18454 + }, + { + "epoch": 0.5656184871889175, + "grad_norm": 1.1811374173492315, + "learning_rate": 8.367994991513682e-06, + "loss": 0.5421, + "step": 18455 + }, + { + "epoch": 0.5656491357116586, + "grad_norm": 1.543610649098747, + "learning_rate": 8.367015672221292e-06, + "loss": 0.7145, + "step": 18456 + }, + { + "epoch": 0.5656797842343999, + "grad_norm": 1.6322426867273818, + "learning_rate": 8.36603636901911e-06, + "loss": 0.6921, + "step": 18457 + }, + { + "epoch": 0.5657104327571411, + "grad_norm": 1.6642032526895563, + "learning_rate": 8.365057081916795e-06, + "loss": 0.743, + "step": 18458 + }, + { + "epoch": 0.5657410812798823, + "grad_norm": 1.4561439403701382, + "learning_rate": 8.364077810923987e-06, + "loss": 0.5873, + "step": 18459 + }, + { + "epoch": 0.5657717298026235, + "grad_norm": 1.4738673439770642, + "learning_rate": 8.363098556050339e-06, + "loss": 0.7923, + "step": 18460 + }, + { + "epoch": 0.5658023783253647, + "grad_norm": 1.3502749679413206, + "learning_rate": 8.362119317305502e-06, + "loss": 0.59, + "step": 18461 + }, + { + "epoch": 0.5658330268481059, + "grad_norm": 1.5547399736476766, + "learning_rate": 8.36114009469912e-06, + "loss": 0.6454, + "step": 18462 + }, + { + "epoch": 0.5658636753708471, + "grad_norm": 1.4181398520202289, + "learning_rate": 8.360160888240843e-06, + "loss": 0.6685, + "step": 18463 + }, + { + "epoch": 0.5658943238935883, + "grad_norm": 1.628631948256468, + "learning_rate": 8.359181697940325e-06, + "loss": 0.7526, + "step": 18464 + }, + { + "epoch": 0.5659249724163296, + "grad_norm": 1.3831691923031355, + "learning_rate": 8.358202523807204e-06, + "loss": 0.6049, + "step": 18465 + }, + { + "epoch": 0.5659556209390707, + "grad_norm": 1.46512805931202, + "learning_rate": 8.357223365851138e-06, + "loss": 0.5897, + "step": 18466 + }, + { + "epoch": 0.565986269461812, + "grad_norm": 1.4123864213253716, + "learning_rate": 8.356244224081772e-06, + "loss": 0.6253, + "step": 18467 + }, + { + "epoch": 0.5660169179845531, + "grad_norm": 1.5480927357276335, + "learning_rate": 8.355265098508745e-06, + "loss": 0.7941, + "step": 18468 + }, + { + "epoch": 0.5660475665072944, + "grad_norm": 1.5669816797784943, + "learning_rate": 8.354285989141718e-06, + "loss": 0.6572, + "step": 18469 + }, + { + "epoch": 0.5660782150300355, + "grad_norm": 1.689706936441605, + "learning_rate": 8.35330689599033e-06, + "loss": 0.793, + "step": 18470 + }, + { + "epoch": 0.5661088635527768, + "grad_norm": 1.3909261324172533, + "learning_rate": 8.35232781906423e-06, + "loss": 0.7083, + "step": 18471 + }, + { + "epoch": 0.5661395120755179, + "grad_norm": 1.4752633742849675, + "learning_rate": 8.351348758373067e-06, + "loss": 0.6985, + "step": 18472 + }, + { + "epoch": 0.5661701605982592, + "grad_norm": 1.36033481748972, + "learning_rate": 8.350369713926486e-06, + "loss": 0.6537, + "step": 18473 + }, + { + "epoch": 0.5662008091210003, + "grad_norm": 1.4504966851009242, + "learning_rate": 8.349390685734133e-06, + "loss": 0.7069, + "step": 18474 + }, + { + "epoch": 0.5662314576437416, + "grad_norm": 0.738018287844589, + "learning_rate": 8.348411673805656e-06, + "loss": 0.5811, + "step": 18475 + }, + { + "epoch": 0.5662621061664828, + "grad_norm": 1.7699676648752478, + "learning_rate": 8.347432678150702e-06, + "loss": 0.8737, + "step": 18476 + }, + { + "epoch": 0.566292754689224, + "grad_norm": 1.583916679368825, + "learning_rate": 8.346453698778917e-06, + "loss": 0.7261, + "step": 18477 + }, + { + "epoch": 0.5663234032119652, + "grad_norm": 1.4966224389309066, + "learning_rate": 8.34547473569995e-06, + "loss": 0.6677, + "step": 18478 + }, + { + "epoch": 0.5663540517347064, + "grad_norm": 1.622836331136983, + "learning_rate": 8.344495788923437e-06, + "loss": 0.693, + "step": 18479 + }, + { + "epoch": 0.5663847002574476, + "grad_norm": 0.6767741184432681, + "learning_rate": 8.343516858459037e-06, + "loss": 0.5644, + "step": 18480 + }, + { + "epoch": 0.5664153487801888, + "grad_norm": 1.601475034098218, + "learning_rate": 8.342537944316385e-06, + "loss": 0.7435, + "step": 18481 + }, + { + "epoch": 0.56644599730293, + "grad_norm": 1.5257545078376626, + "learning_rate": 8.34155904650513e-06, + "loss": 0.7162, + "step": 18482 + }, + { + "epoch": 0.5664766458256713, + "grad_norm": 0.6861035913529037, + "learning_rate": 8.340580165034922e-06, + "loss": 0.5739, + "step": 18483 + }, + { + "epoch": 0.5665072943484124, + "grad_norm": 0.7109997150824664, + "learning_rate": 8.339601299915398e-06, + "loss": 0.5728, + "step": 18484 + }, + { + "epoch": 0.5665379428711537, + "grad_norm": 1.299554304476257, + "learning_rate": 8.338622451156211e-06, + "loss": 0.63, + "step": 18485 + }, + { + "epoch": 0.5665685913938948, + "grad_norm": 0.6968841059782942, + "learning_rate": 8.337643618767001e-06, + "loss": 0.5676, + "step": 18486 + }, + { + "epoch": 0.566599239916636, + "grad_norm": 1.5666298182304743, + "learning_rate": 8.336664802757411e-06, + "loss": 0.7317, + "step": 18487 + }, + { + "epoch": 0.5666298884393772, + "grad_norm": 1.5188208881867156, + "learning_rate": 8.33568600313709e-06, + "loss": 0.7181, + "step": 18488 + }, + { + "epoch": 0.5666605369621184, + "grad_norm": 1.7411908961171336, + "learning_rate": 8.334707219915685e-06, + "loss": 0.7124, + "step": 18489 + }, + { + "epoch": 0.5666911854848596, + "grad_norm": 1.4170838297641535, + "learning_rate": 8.333728453102829e-06, + "loss": 0.6616, + "step": 18490 + }, + { + "epoch": 0.5667218340076008, + "grad_norm": 1.6719133959500523, + "learning_rate": 8.332749702708179e-06, + "loss": 0.754, + "step": 18491 + }, + { + "epoch": 0.566752482530342, + "grad_norm": 1.5822224937812785, + "learning_rate": 8.331770968741368e-06, + "loss": 0.803, + "step": 18492 + }, + { + "epoch": 0.5667831310530832, + "grad_norm": 1.4642785242714123, + "learning_rate": 8.330792251212047e-06, + "loss": 0.6243, + "step": 18493 + }, + { + "epoch": 0.5668137795758245, + "grad_norm": 1.4997668039522525, + "learning_rate": 8.329813550129857e-06, + "loss": 0.6811, + "step": 18494 + }, + { + "epoch": 0.5668444280985656, + "grad_norm": 1.443921342557474, + "learning_rate": 8.328834865504439e-06, + "loss": 0.6329, + "step": 18495 + }, + { + "epoch": 0.5668750766213069, + "grad_norm": 1.5230405049773983, + "learning_rate": 8.327856197345441e-06, + "loss": 0.6894, + "step": 18496 + }, + { + "epoch": 0.566905725144048, + "grad_norm": 1.541370045826342, + "learning_rate": 8.326877545662503e-06, + "loss": 0.7228, + "step": 18497 + }, + { + "epoch": 0.5669363736667893, + "grad_norm": 0.7859934062214113, + "learning_rate": 8.325898910465268e-06, + "loss": 0.5739, + "step": 18498 + }, + { + "epoch": 0.5669670221895304, + "grad_norm": 0.7509638216528164, + "learning_rate": 8.324920291763382e-06, + "loss": 0.5875, + "step": 18499 + }, + { + "epoch": 0.5669976707122717, + "grad_norm": 1.5675629814471221, + "learning_rate": 8.323941689566484e-06, + "loss": 0.7466, + "step": 18500 + }, + { + "epoch": 0.5670283192350128, + "grad_norm": 1.471541294117256, + "learning_rate": 8.322963103884214e-06, + "loss": 0.6846, + "step": 18501 + }, + { + "epoch": 0.5670589677577541, + "grad_norm": 0.682266810508187, + "learning_rate": 8.32198453472622e-06, + "loss": 0.5868, + "step": 18502 + }, + { + "epoch": 0.5670896162804953, + "grad_norm": 1.6748454785541778, + "learning_rate": 8.321005982102142e-06, + "loss": 0.7233, + "step": 18503 + }, + { + "epoch": 0.5671202648032365, + "grad_norm": 1.3294848058285123, + "learning_rate": 8.320027446021621e-06, + "loss": 0.6458, + "step": 18504 + }, + { + "epoch": 0.5671509133259777, + "grad_norm": 1.5316871807785337, + "learning_rate": 8.319048926494301e-06, + "loss": 0.6615, + "step": 18505 + }, + { + "epoch": 0.5671815618487189, + "grad_norm": 1.886207049221564, + "learning_rate": 8.318070423529818e-06, + "loss": 0.6326, + "step": 18506 + }, + { + "epoch": 0.5672122103714601, + "grad_norm": 1.4142885673450634, + "learning_rate": 8.317091937137821e-06, + "loss": 0.8087, + "step": 18507 + }, + { + "epoch": 0.5672428588942013, + "grad_norm": 1.5308850164553012, + "learning_rate": 8.31611346732795e-06, + "loss": 0.6302, + "step": 18508 + }, + { + "epoch": 0.5672735074169425, + "grad_norm": 1.4123166040794384, + "learning_rate": 8.315135014109838e-06, + "loss": 0.672, + "step": 18509 + }, + { + "epoch": 0.5673041559396838, + "grad_norm": 1.5659957866682057, + "learning_rate": 8.314156577493137e-06, + "loss": 0.772, + "step": 18510 + }, + { + "epoch": 0.5673348044624249, + "grad_norm": 1.7540353104136668, + "learning_rate": 8.313178157487476e-06, + "loss": 0.8027, + "step": 18511 + }, + { + "epoch": 0.5673654529851662, + "grad_norm": 1.4082379262547664, + "learning_rate": 8.312199754102508e-06, + "loss": 0.6345, + "step": 18512 + }, + { + "epoch": 0.5673961015079073, + "grad_norm": 1.536523870905137, + "learning_rate": 8.311221367347867e-06, + "loss": 0.6575, + "step": 18513 + }, + { + "epoch": 0.5674267500306486, + "grad_norm": 1.425313333699687, + "learning_rate": 8.310242997233192e-06, + "loss": 0.6427, + "step": 18514 + }, + { + "epoch": 0.5674573985533897, + "grad_norm": 0.7037477980381847, + "learning_rate": 8.309264643768127e-06, + "loss": 0.5764, + "step": 18515 + }, + { + "epoch": 0.567488047076131, + "grad_norm": 1.3945708566540593, + "learning_rate": 8.30828630696231e-06, + "loss": 0.6548, + "step": 18516 + }, + { + "epoch": 0.5675186955988721, + "grad_norm": 1.51176349105446, + "learning_rate": 8.30730798682538e-06, + "loss": 0.6186, + "step": 18517 + }, + { + "epoch": 0.5675493441216133, + "grad_norm": 1.4785893553232048, + "learning_rate": 8.306329683366976e-06, + "loss": 0.6456, + "step": 18518 + }, + { + "epoch": 0.5675799926443545, + "grad_norm": 1.404293897769064, + "learning_rate": 8.305351396596743e-06, + "loss": 0.7798, + "step": 18519 + }, + { + "epoch": 0.5676106411670957, + "grad_norm": 1.5108272681257406, + "learning_rate": 8.30437312652431e-06, + "loss": 0.6489, + "step": 18520 + }, + { + "epoch": 0.567641289689837, + "grad_norm": 1.5135117997638254, + "learning_rate": 8.303394873159329e-06, + "loss": 0.6714, + "step": 18521 + }, + { + "epoch": 0.5676719382125781, + "grad_norm": 1.3992098445518948, + "learning_rate": 8.302416636511427e-06, + "loss": 0.6454, + "step": 18522 + }, + { + "epoch": 0.5677025867353194, + "grad_norm": 1.7216833701754934, + "learning_rate": 8.301438416590253e-06, + "loss": 0.6586, + "step": 18523 + }, + { + "epoch": 0.5677332352580605, + "grad_norm": 1.4704606004256908, + "learning_rate": 8.30046021340544e-06, + "loss": 0.6511, + "step": 18524 + }, + { + "epoch": 0.5677638837808018, + "grad_norm": 0.6829141193897305, + "learning_rate": 8.299482026966624e-06, + "loss": 0.5588, + "step": 18525 + }, + { + "epoch": 0.5677945323035429, + "grad_norm": 1.5542870096931098, + "learning_rate": 8.29850385728345e-06, + "loss": 0.6225, + "step": 18526 + }, + { + "epoch": 0.5678251808262842, + "grad_norm": 1.6595238992434174, + "learning_rate": 8.297525704365553e-06, + "loss": 0.7154, + "step": 18527 + }, + { + "epoch": 0.5678558293490253, + "grad_norm": 1.4184090918715264, + "learning_rate": 8.296547568222567e-06, + "loss": 0.7212, + "step": 18528 + }, + { + "epoch": 0.5678864778717666, + "grad_norm": 1.6917120390809133, + "learning_rate": 8.295569448864137e-06, + "loss": 0.7328, + "step": 18529 + }, + { + "epoch": 0.5679171263945078, + "grad_norm": 0.6801194448173125, + "learning_rate": 8.294591346299894e-06, + "loss": 0.5583, + "step": 18530 + }, + { + "epoch": 0.567947774917249, + "grad_norm": 1.7312249400188333, + "learning_rate": 8.293613260539483e-06, + "loss": 0.7015, + "step": 18531 + }, + { + "epoch": 0.5679784234399902, + "grad_norm": 1.7124972707007307, + "learning_rate": 8.292635191592537e-06, + "loss": 0.8243, + "step": 18532 + }, + { + "epoch": 0.5680090719627314, + "grad_norm": 1.5499077509432235, + "learning_rate": 8.291657139468689e-06, + "loss": 0.7234, + "step": 18533 + }, + { + "epoch": 0.5680397204854726, + "grad_norm": 1.6385169214500623, + "learning_rate": 8.290679104177586e-06, + "loss": 0.7342, + "step": 18534 + }, + { + "epoch": 0.5680703690082138, + "grad_norm": 0.6488997003439035, + "learning_rate": 8.289701085728856e-06, + "loss": 0.5682, + "step": 18535 + }, + { + "epoch": 0.568101017530955, + "grad_norm": 1.4106520720892468, + "learning_rate": 8.288723084132137e-06, + "loss": 0.6474, + "step": 18536 + }, + { + "epoch": 0.5681316660536962, + "grad_norm": 1.4859961586736228, + "learning_rate": 8.28774509939707e-06, + "loss": 0.6502, + "step": 18537 + }, + { + "epoch": 0.5681623145764374, + "grad_norm": 1.4552002624282792, + "learning_rate": 8.286767131533286e-06, + "loss": 0.7535, + "step": 18538 + }, + { + "epoch": 0.5681929630991787, + "grad_norm": 1.4611631259965394, + "learning_rate": 8.285789180550427e-06, + "loss": 0.5475, + "step": 18539 + }, + { + "epoch": 0.5682236116219198, + "grad_norm": 1.5045723063503296, + "learning_rate": 8.284811246458125e-06, + "loss": 0.6971, + "step": 18540 + }, + { + "epoch": 0.5682542601446611, + "grad_norm": 1.563883921480955, + "learning_rate": 8.283833329266015e-06, + "loss": 0.7622, + "step": 18541 + }, + { + "epoch": 0.5682849086674022, + "grad_norm": 1.441698321181385, + "learning_rate": 8.282855428983736e-06, + "loss": 0.6629, + "step": 18542 + }, + { + "epoch": 0.5683155571901435, + "grad_norm": 1.4651150290837744, + "learning_rate": 8.281877545620923e-06, + "loss": 0.6138, + "step": 18543 + }, + { + "epoch": 0.5683462057128846, + "grad_norm": 1.4775243653110877, + "learning_rate": 8.280899679187207e-06, + "loss": 0.6903, + "step": 18544 + }, + { + "epoch": 0.5683768542356259, + "grad_norm": 1.408474726375368, + "learning_rate": 8.279921829692227e-06, + "loss": 0.7309, + "step": 18545 + }, + { + "epoch": 0.568407502758367, + "grad_norm": 1.650849750838441, + "learning_rate": 8.27894399714562e-06, + "loss": 0.6844, + "step": 18546 + }, + { + "epoch": 0.5684381512811083, + "grad_norm": 1.2939795075389275, + "learning_rate": 8.277966181557014e-06, + "loss": 0.7089, + "step": 18547 + }, + { + "epoch": 0.5684687998038495, + "grad_norm": 1.380243320983113, + "learning_rate": 8.276988382936051e-06, + "loss": 0.6176, + "step": 18548 + }, + { + "epoch": 0.5684994483265906, + "grad_norm": 0.6681337907354316, + "learning_rate": 8.276010601292361e-06, + "loss": 0.5367, + "step": 18549 + }, + { + "epoch": 0.5685300968493319, + "grad_norm": 1.4698062392618063, + "learning_rate": 8.27503283663558e-06, + "loss": 0.7964, + "step": 18550 + }, + { + "epoch": 0.568560745372073, + "grad_norm": 1.5522172694759033, + "learning_rate": 8.274055088975344e-06, + "loss": 0.7527, + "step": 18551 + }, + { + "epoch": 0.5685913938948143, + "grad_norm": 1.4181671899561756, + "learning_rate": 8.273077358321279e-06, + "loss": 0.7332, + "step": 18552 + }, + { + "epoch": 0.5686220424175554, + "grad_norm": 1.4040874648259343, + "learning_rate": 8.272099644683031e-06, + "loss": 0.6751, + "step": 18553 + }, + { + "epoch": 0.5686526909402967, + "grad_norm": 1.3559899704348017, + "learning_rate": 8.271121948070224e-06, + "loss": 0.6112, + "step": 18554 + }, + { + "epoch": 0.5686833394630378, + "grad_norm": 1.525997480071152, + "learning_rate": 8.270144268492494e-06, + "loss": 0.6996, + "step": 18555 + }, + { + "epoch": 0.5687139879857791, + "grad_norm": 1.460741174474764, + "learning_rate": 8.269166605959479e-06, + "loss": 0.6632, + "step": 18556 + }, + { + "epoch": 0.5687446365085203, + "grad_norm": 0.6463134100902738, + "learning_rate": 8.268188960480803e-06, + "loss": 0.5521, + "step": 18557 + }, + { + "epoch": 0.5687752850312615, + "grad_norm": 1.4939414385065837, + "learning_rate": 8.267211332066107e-06, + "loss": 0.7469, + "step": 18558 + }, + { + "epoch": 0.5688059335540027, + "grad_norm": 0.6629109352543495, + "learning_rate": 8.266233720725021e-06, + "loss": 0.5869, + "step": 18559 + }, + { + "epoch": 0.5688365820767439, + "grad_norm": 1.4361905641207842, + "learning_rate": 8.265256126467177e-06, + "loss": 0.657, + "step": 18560 + }, + { + "epoch": 0.5688672305994851, + "grad_norm": 0.6602387051907451, + "learning_rate": 8.26427854930221e-06, + "loss": 0.5395, + "step": 18561 + }, + { + "epoch": 0.5688978791222263, + "grad_norm": 1.568458107850387, + "learning_rate": 8.263300989239751e-06, + "loss": 0.7402, + "step": 18562 + }, + { + "epoch": 0.5689285276449675, + "grad_norm": 1.4860521892678566, + "learning_rate": 8.262323446289427e-06, + "loss": 0.7157, + "step": 18563 + }, + { + "epoch": 0.5689591761677087, + "grad_norm": 0.6682478744650804, + "learning_rate": 8.261345920460881e-06, + "loss": 0.5916, + "step": 18564 + }, + { + "epoch": 0.5689898246904499, + "grad_norm": 0.6342220088702485, + "learning_rate": 8.260368411763733e-06, + "loss": 0.5208, + "step": 18565 + }, + { + "epoch": 0.5690204732131912, + "grad_norm": 1.5556151006801187, + "learning_rate": 8.259390920207626e-06, + "loss": 0.675, + "step": 18566 + }, + { + "epoch": 0.5690511217359323, + "grad_norm": 1.521387564191401, + "learning_rate": 8.25841344580218e-06, + "loss": 0.7013, + "step": 18567 + }, + { + "epoch": 0.5690817702586736, + "grad_norm": 1.6572529080139138, + "learning_rate": 8.257435988557034e-06, + "loss": 0.6508, + "step": 18568 + }, + { + "epoch": 0.5691124187814147, + "grad_norm": 1.455375033652063, + "learning_rate": 8.256458548481819e-06, + "loss": 0.6318, + "step": 18569 + }, + { + "epoch": 0.569143067304156, + "grad_norm": 1.692640290544645, + "learning_rate": 8.255481125586162e-06, + "loss": 0.6503, + "step": 18570 + }, + { + "epoch": 0.5691737158268971, + "grad_norm": 1.6047482990292845, + "learning_rate": 8.254503719879694e-06, + "loss": 0.7023, + "step": 18571 + }, + { + "epoch": 0.5692043643496384, + "grad_norm": 1.5665006520914688, + "learning_rate": 8.25352633137205e-06, + "loss": 0.769, + "step": 18572 + }, + { + "epoch": 0.5692350128723795, + "grad_norm": 1.4319066461321275, + "learning_rate": 8.25254896007286e-06, + "loss": 0.6795, + "step": 18573 + }, + { + "epoch": 0.5692656613951208, + "grad_norm": 0.6789152806213236, + "learning_rate": 8.251571605991748e-06, + "loss": 0.5188, + "step": 18574 + }, + { + "epoch": 0.569296309917862, + "grad_norm": 1.6642270739799458, + "learning_rate": 8.250594269138352e-06, + "loss": 0.7547, + "step": 18575 + }, + { + "epoch": 0.5693269584406032, + "grad_norm": 1.6130172468374309, + "learning_rate": 8.249616949522293e-06, + "loss": 0.7337, + "step": 18576 + }, + { + "epoch": 0.5693576069633444, + "grad_norm": 1.611601189138616, + "learning_rate": 8.248639647153212e-06, + "loss": 0.7495, + "step": 18577 + }, + { + "epoch": 0.5693882554860856, + "grad_norm": 1.5427014077732804, + "learning_rate": 8.24766236204073e-06, + "loss": 0.6558, + "step": 18578 + }, + { + "epoch": 0.5694189040088268, + "grad_norm": 1.5354686287097592, + "learning_rate": 8.246685094194478e-06, + "loss": 0.6891, + "step": 18579 + }, + { + "epoch": 0.5694495525315679, + "grad_norm": 1.5009540264539325, + "learning_rate": 8.245707843624087e-06, + "loss": 0.6689, + "step": 18580 + }, + { + "epoch": 0.5694802010543092, + "grad_norm": 1.741600085218037, + "learning_rate": 8.244730610339187e-06, + "loss": 0.7075, + "step": 18581 + }, + { + "epoch": 0.5695108495770503, + "grad_norm": 1.4781123923958421, + "learning_rate": 8.243753394349403e-06, + "loss": 0.6564, + "step": 18582 + }, + { + "epoch": 0.5695414980997916, + "grad_norm": 1.5850788503214543, + "learning_rate": 8.242776195664368e-06, + "loss": 0.6935, + "step": 18583 + }, + { + "epoch": 0.5695721466225327, + "grad_norm": 1.7060112524094253, + "learning_rate": 8.241799014293707e-06, + "loss": 0.8096, + "step": 18584 + }, + { + "epoch": 0.569602795145274, + "grad_norm": 1.3747379078467403, + "learning_rate": 8.240821850247051e-06, + "loss": 0.5678, + "step": 18585 + }, + { + "epoch": 0.5696334436680152, + "grad_norm": 1.5423469582173965, + "learning_rate": 8.239844703534032e-06, + "loss": 0.6904, + "step": 18586 + }, + { + "epoch": 0.5696640921907564, + "grad_norm": 0.6597417914148558, + "learning_rate": 8.238867574164266e-06, + "loss": 0.5389, + "step": 18587 + }, + { + "epoch": 0.5696947407134976, + "grad_norm": 1.7854956373253232, + "learning_rate": 8.237890462147394e-06, + "loss": 0.7158, + "step": 18588 + }, + { + "epoch": 0.5697253892362388, + "grad_norm": 1.6255992248076174, + "learning_rate": 8.236913367493036e-06, + "loss": 0.7498, + "step": 18589 + }, + { + "epoch": 0.56975603775898, + "grad_norm": 1.6892831032869682, + "learning_rate": 8.23593629021082e-06, + "loss": 0.6814, + "step": 18590 + }, + { + "epoch": 0.5697866862817212, + "grad_norm": 1.4028772145855657, + "learning_rate": 8.234959230310377e-06, + "loss": 0.7748, + "step": 18591 + }, + { + "epoch": 0.5698173348044624, + "grad_norm": 1.295685594556496, + "learning_rate": 8.23398218780133e-06, + "loss": 0.6558, + "step": 18592 + }, + { + "epoch": 0.5698479833272037, + "grad_norm": 1.564563119790171, + "learning_rate": 8.23300516269331e-06, + "loss": 0.6396, + "step": 18593 + }, + { + "epoch": 0.5698786318499448, + "grad_norm": 1.5143830232159852, + "learning_rate": 8.232028154995943e-06, + "loss": 0.7767, + "step": 18594 + }, + { + "epoch": 0.5699092803726861, + "grad_norm": 1.5138183044051423, + "learning_rate": 8.231051164718854e-06, + "loss": 0.6416, + "step": 18595 + }, + { + "epoch": 0.5699399288954272, + "grad_norm": 1.677461911520808, + "learning_rate": 8.230074191871673e-06, + "loss": 0.7496, + "step": 18596 + }, + { + "epoch": 0.5699705774181685, + "grad_norm": 1.386684433245598, + "learning_rate": 8.229097236464024e-06, + "loss": 0.8525, + "step": 18597 + }, + { + "epoch": 0.5700012259409096, + "grad_norm": 1.6713525420581197, + "learning_rate": 8.228120298505529e-06, + "loss": 0.7076, + "step": 18598 + }, + { + "epoch": 0.5700318744636509, + "grad_norm": 1.516282059324289, + "learning_rate": 8.227143378005819e-06, + "loss": 0.7365, + "step": 18599 + }, + { + "epoch": 0.570062522986392, + "grad_norm": 1.3405250169053944, + "learning_rate": 8.226166474974521e-06, + "loss": 0.6447, + "step": 18600 + }, + { + "epoch": 0.5700931715091333, + "grad_norm": 1.5474882384586963, + "learning_rate": 8.225189589421256e-06, + "loss": 0.678, + "step": 18601 + }, + { + "epoch": 0.5701238200318745, + "grad_norm": 1.578004282920303, + "learning_rate": 8.224212721355653e-06, + "loss": 0.6276, + "step": 18602 + }, + { + "epoch": 0.5701544685546157, + "grad_norm": 1.9059166987970486, + "learning_rate": 8.223235870787336e-06, + "loss": 0.5547, + "step": 18603 + }, + { + "epoch": 0.5701851170773569, + "grad_norm": 1.673109136703312, + "learning_rate": 8.222259037725933e-06, + "loss": 0.7454, + "step": 18604 + }, + { + "epoch": 0.5702157656000981, + "grad_norm": 1.304736667594932, + "learning_rate": 8.221282222181066e-06, + "loss": 0.5885, + "step": 18605 + }, + { + "epoch": 0.5702464141228393, + "grad_norm": 1.5856547146422193, + "learning_rate": 8.220305424162357e-06, + "loss": 0.6821, + "step": 18606 + }, + { + "epoch": 0.5702770626455805, + "grad_norm": 1.5956553874263926, + "learning_rate": 8.219328643679438e-06, + "loss": 0.7223, + "step": 18607 + }, + { + "epoch": 0.5703077111683217, + "grad_norm": 1.6028147467178806, + "learning_rate": 8.21835188074193e-06, + "loss": 0.7531, + "step": 18608 + }, + { + "epoch": 0.570338359691063, + "grad_norm": 0.6784783421445706, + "learning_rate": 8.217375135359452e-06, + "loss": 0.5844, + "step": 18609 + }, + { + "epoch": 0.5703690082138041, + "grad_norm": 1.5044254997496023, + "learning_rate": 8.216398407541637e-06, + "loss": 0.7208, + "step": 18610 + }, + { + "epoch": 0.5703996567365452, + "grad_norm": 1.357577955707925, + "learning_rate": 8.2154216972981e-06, + "loss": 0.7374, + "step": 18611 + }, + { + "epoch": 0.5704303052592865, + "grad_norm": 0.6489922006454194, + "learning_rate": 8.214445004638475e-06, + "loss": 0.5345, + "step": 18612 + }, + { + "epoch": 0.5704609537820277, + "grad_norm": 1.7508309542448832, + "learning_rate": 8.213468329572377e-06, + "loss": 0.7225, + "step": 18613 + }, + { + "epoch": 0.5704916023047689, + "grad_norm": 1.5640097137022715, + "learning_rate": 8.212491672109434e-06, + "loss": 0.7375, + "step": 18614 + }, + { + "epoch": 0.5705222508275101, + "grad_norm": 1.4737095166678187, + "learning_rate": 8.211515032259267e-06, + "loss": 0.7163, + "step": 18615 + }, + { + "epoch": 0.5705528993502513, + "grad_norm": 1.4457143048186831, + "learning_rate": 8.210538410031505e-06, + "loss": 0.6538, + "step": 18616 + }, + { + "epoch": 0.5705835478729925, + "grad_norm": 1.4017465071176651, + "learning_rate": 8.209561805435757e-06, + "loss": 0.7298, + "step": 18617 + }, + { + "epoch": 0.5706141963957337, + "grad_norm": 1.3547053631496633, + "learning_rate": 8.208585218481663e-06, + "loss": 0.5517, + "step": 18618 + }, + { + "epoch": 0.5706448449184749, + "grad_norm": 1.5649993714205537, + "learning_rate": 8.207608649178828e-06, + "loss": 0.6518, + "step": 18619 + }, + { + "epoch": 0.5706754934412162, + "grad_norm": 1.5072800802053845, + "learning_rate": 8.206632097536894e-06, + "loss": 0.6476, + "step": 18620 + }, + { + "epoch": 0.5707061419639573, + "grad_norm": 0.6662858676256008, + "learning_rate": 8.205655563565467e-06, + "loss": 0.5758, + "step": 18621 + }, + { + "epoch": 0.5707367904866986, + "grad_norm": 1.5302787494004961, + "learning_rate": 8.204679047274175e-06, + "loss": 0.738, + "step": 18622 + }, + { + "epoch": 0.5707674390094397, + "grad_norm": 1.556001090494105, + "learning_rate": 8.20370254867264e-06, + "loss": 0.6765, + "step": 18623 + }, + { + "epoch": 0.570798087532181, + "grad_norm": 1.575910565650838, + "learning_rate": 8.202726067770484e-06, + "loss": 0.6388, + "step": 18624 + }, + { + "epoch": 0.5708287360549221, + "grad_norm": 1.592268830934311, + "learning_rate": 8.201749604577327e-06, + "loss": 0.7241, + "step": 18625 + }, + { + "epoch": 0.5708593845776634, + "grad_norm": 1.5245607620105377, + "learning_rate": 8.200773159102793e-06, + "loss": 0.6729, + "step": 18626 + }, + { + "epoch": 0.5708900331004045, + "grad_norm": 0.6804015132578565, + "learning_rate": 8.199796731356503e-06, + "loss": 0.5494, + "step": 18627 + }, + { + "epoch": 0.5709206816231458, + "grad_norm": 1.761206309965673, + "learning_rate": 8.19882032134807e-06, + "loss": 0.7344, + "step": 18628 + }, + { + "epoch": 0.570951330145887, + "grad_norm": 1.3803933263813664, + "learning_rate": 8.197843929087127e-06, + "loss": 0.5632, + "step": 18629 + }, + { + "epoch": 0.5709819786686282, + "grad_norm": 1.389360425669526, + "learning_rate": 8.196867554583283e-06, + "loss": 0.6307, + "step": 18630 + }, + { + "epoch": 0.5710126271913694, + "grad_norm": 1.571270992807752, + "learning_rate": 8.19589119784617e-06, + "loss": 0.7519, + "step": 18631 + }, + { + "epoch": 0.5710432757141106, + "grad_norm": 1.5123796072642401, + "learning_rate": 8.194914858885403e-06, + "loss": 0.7542, + "step": 18632 + }, + { + "epoch": 0.5710739242368518, + "grad_norm": 0.6366352553759617, + "learning_rate": 8.193938537710598e-06, + "loss": 0.5278, + "step": 18633 + }, + { + "epoch": 0.571104572759593, + "grad_norm": 1.4721007507077786, + "learning_rate": 8.19296223433138e-06, + "loss": 0.6914, + "step": 18634 + }, + { + "epoch": 0.5711352212823342, + "grad_norm": 1.412711129909046, + "learning_rate": 8.191985948757369e-06, + "loss": 0.7237, + "step": 18635 + }, + { + "epoch": 0.5711658698050754, + "grad_norm": 1.4825362113367053, + "learning_rate": 8.19100968099818e-06, + "loss": 0.737, + "step": 18636 + }, + { + "epoch": 0.5711965183278166, + "grad_norm": 1.441251631409465, + "learning_rate": 8.190033431063437e-06, + "loss": 0.575, + "step": 18637 + }, + { + "epoch": 0.5712271668505579, + "grad_norm": 1.3853828782399722, + "learning_rate": 8.189057198962757e-06, + "loss": 0.6434, + "step": 18638 + }, + { + "epoch": 0.571257815373299, + "grad_norm": 1.5232261199326342, + "learning_rate": 8.188080984705765e-06, + "loss": 0.6391, + "step": 18639 + }, + { + "epoch": 0.5712884638960403, + "grad_norm": 1.603841650889524, + "learning_rate": 8.187104788302069e-06, + "loss": 0.7144, + "step": 18640 + }, + { + "epoch": 0.5713191124187814, + "grad_norm": 1.6261519646114095, + "learning_rate": 8.186128609761293e-06, + "loss": 0.7264, + "step": 18641 + }, + { + "epoch": 0.5713497609415226, + "grad_norm": 1.4656845540400953, + "learning_rate": 8.185152449093058e-06, + "loss": 0.6034, + "step": 18642 + }, + { + "epoch": 0.5713804094642638, + "grad_norm": 1.3019970777987724, + "learning_rate": 8.184176306306981e-06, + "loss": 0.6124, + "step": 18643 + }, + { + "epoch": 0.571411057987005, + "grad_norm": 1.537882566350982, + "learning_rate": 8.183200181412677e-06, + "loss": 0.6476, + "step": 18644 + }, + { + "epoch": 0.5714417065097462, + "grad_norm": 1.4700539103709547, + "learning_rate": 8.18222407441977e-06, + "loss": 0.667, + "step": 18645 + }, + { + "epoch": 0.5714723550324874, + "grad_norm": 1.4837876331438746, + "learning_rate": 8.181247985337868e-06, + "loss": 0.6054, + "step": 18646 + }, + { + "epoch": 0.5715030035552287, + "grad_norm": 1.4992052143091135, + "learning_rate": 8.180271914176601e-06, + "loss": 0.7174, + "step": 18647 + }, + { + "epoch": 0.5715336520779698, + "grad_norm": 1.456292068933293, + "learning_rate": 8.179295860945581e-06, + "loss": 0.6362, + "step": 18648 + }, + { + "epoch": 0.5715643006007111, + "grad_norm": 1.3372888218496464, + "learning_rate": 8.178319825654418e-06, + "loss": 0.6339, + "step": 18649 + }, + { + "epoch": 0.5715949491234522, + "grad_norm": 1.4270394800124113, + "learning_rate": 8.177343808312743e-06, + "loss": 0.6311, + "step": 18650 + }, + { + "epoch": 0.5716255976461935, + "grad_norm": 1.5136564483092694, + "learning_rate": 8.176367808930163e-06, + "loss": 0.8272, + "step": 18651 + }, + { + "epoch": 0.5716562461689346, + "grad_norm": 1.6330169507641081, + "learning_rate": 8.175391827516297e-06, + "loss": 0.6588, + "step": 18652 + }, + { + "epoch": 0.5716868946916759, + "grad_norm": 1.5581439032467115, + "learning_rate": 8.174415864080763e-06, + "loss": 0.7012, + "step": 18653 + }, + { + "epoch": 0.571717543214417, + "grad_norm": 0.6888252455619135, + "learning_rate": 8.173439918633176e-06, + "loss": 0.5611, + "step": 18654 + }, + { + "epoch": 0.5717481917371583, + "grad_norm": 1.5689932019746917, + "learning_rate": 8.172463991183151e-06, + "loss": 0.7408, + "step": 18655 + }, + { + "epoch": 0.5717788402598994, + "grad_norm": 0.6689481935873681, + "learning_rate": 8.17148808174031e-06, + "loss": 0.561, + "step": 18656 + }, + { + "epoch": 0.5718094887826407, + "grad_norm": 0.6575215571918619, + "learning_rate": 8.17051219031426e-06, + "loss": 0.5171, + "step": 18657 + }, + { + "epoch": 0.5718401373053819, + "grad_norm": 1.6121033069717101, + "learning_rate": 8.169536316914627e-06, + "loss": 0.6668, + "step": 18658 + }, + { + "epoch": 0.5718707858281231, + "grad_norm": 1.7342275117757102, + "learning_rate": 8.16856046155102e-06, + "loss": 0.7482, + "step": 18659 + }, + { + "epoch": 0.5719014343508643, + "grad_norm": 1.2776677721801215, + "learning_rate": 8.167584624233049e-06, + "loss": 0.6544, + "step": 18660 + }, + { + "epoch": 0.5719320828736055, + "grad_norm": 0.6751816425941907, + "learning_rate": 8.166608804970342e-06, + "loss": 0.5679, + "step": 18661 + }, + { + "epoch": 0.5719627313963467, + "grad_norm": 1.620610207316759, + "learning_rate": 8.165633003772507e-06, + "loss": 0.6412, + "step": 18662 + }, + { + "epoch": 0.5719933799190879, + "grad_norm": 1.8462426654112114, + "learning_rate": 8.164657220649158e-06, + "loss": 0.8362, + "step": 18663 + }, + { + "epoch": 0.5720240284418291, + "grad_norm": 1.4282749216150037, + "learning_rate": 8.163681455609909e-06, + "loss": 0.6291, + "step": 18664 + }, + { + "epoch": 0.5720546769645704, + "grad_norm": 1.4676041201493735, + "learning_rate": 8.162705708664379e-06, + "loss": 0.7356, + "step": 18665 + }, + { + "epoch": 0.5720853254873115, + "grad_norm": 1.4777667619869255, + "learning_rate": 8.16172997982218e-06, + "loss": 0.7725, + "step": 18666 + }, + { + "epoch": 0.5721159740100528, + "grad_norm": 1.4923356057479773, + "learning_rate": 8.160754269092924e-06, + "loss": 0.75, + "step": 18667 + }, + { + "epoch": 0.5721466225327939, + "grad_norm": 1.4804621368018924, + "learning_rate": 8.159778576486227e-06, + "loss": 0.7148, + "step": 18668 + }, + { + "epoch": 0.5721772710555352, + "grad_norm": 1.8454986233329438, + "learning_rate": 8.158802902011704e-06, + "loss": 0.7274, + "step": 18669 + }, + { + "epoch": 0.5722079195782763, + "grad_norm": 1.6293617197673287, + "learning_rate": 8.15782724567897e-06, + "loss": 0.7149, + "step": 18670 + }, + { + "epoch": 0.5722385681010176, + "grad_norm": 1.6551799205329814, + "learning_rate": 8.156851607497626e-06, + "loss": 0.7314, + "step": 18671 + }, + { + "epoch": 0.5722692166237587, + "grad_norm": 0.6545873303980768, + "learning_rate": 8.155875987477304e-06, + "loss": 0.5735, + "step": 18672 + }, + { + "epoch": 0.5722998651464999, + "grad_norm": 1.4664660581048126, + "learning_rate": 8.154900385627601e-06, + "loss": 0.6842, + "step": 18673 + }, + { + "epoch": 0.5723305136692411, + "grad_norm": 1.482995026827649, + "learning_rate": 8.153924801958142e-06, + "loss": 0.7728, + "step": 18674 + }, + { + "epoch": 0.5723611621919823, + "grad_norm": 0.6756872639956886, + "learning_rate": 8.152949236478533e-06, + "loss": 0.5876, + "step": 18675 + }, + { + "epoch": 0.5723918107147236, + "grad_norm": 1.6813891208600364, + "learning_rate": 8.151973689198385e-06, + "loss": 0.6552, + "step": 18676 + }, + { + "epoch": 0.5724224592374647, + "grad_norm": 1.604255570960063, + "learning_rate": 8.150998160127316e-06, + "loss": 0.7206, + "step": 18677 + }, + { + "epoch": 0.572453107760206, + "grad_norm": 1.5974286429526676, + "learning_rate": 8.150022649274935e-06, + "loss": 0.806, + "step": 18678 + }, + { + "epoch": 0.5724837562829471, + "grad_norm": 1.4391978385532125, + "learning_rate": 8.149047156650852e-06, + "loss": 0.6678, + "step": 18679 + }, + { + "epoch": 0.5725144048056884, + "grad_norm": 1.6232916187785882, + "learning_rate": 8.148071682264683e-06, + "loss": 0.7353, + "step": 18680 + }, + { + "epoch": 0.5725450533284295, + "grad_norm": 1.5404490673975706, + "learning_rate": 8.14709622612604e-06, + "loss": 0.6947, + "step": 18681 + }, + { + "epoch": 0.5725757018511708, + "grad_norm": 1.4617889932235633, + "learning_rate": 8.146120788244525e-06, + "loss": 0.6607, + "step": 18682 + }, + { + "epoch": 0.5726063503739119, + "grad_norm": 1.5355584354505303, + "learning_rate": 8.145145368629763e-06, + "loss": 0.6702, + "step": 18683 + }, + { + "epoch": 0.5726369988966532, + "grad_norm": 0.6514762908062294, + "learning_rate": 8.144169967291354e-06, + "loss": 0.5603, + "step": 18684 + }, + { + "epoch": 0.5726676474193944, + "grad_norm": 1.2950084142213794, + "learning_rate": 8.143194584238914e-06, + "loss": 0.5396, + "step": 18685 + }, + { + "epoch": 0.5726982959421356, + "grad_norm": 1.465368916731751, + "learning_rate": 8.142219219482054e-06, + "loss": 0.706, + "step": 18686 + }, + { + "epoch": 0.5727289444648768, + "grad_norm": 1.5295992475212683, + "learning_rate": 8.141243873030383e-06, + "loss": 0.7046, + "step": 18687 + }, + { + "epoch": 0.572759592987618, + "grad_norm": 1.4934298051952115, + "learning_rate": 8.14026854489351e-06, + "loss": 0.7165, + "step": 18688 + }, + { + "epoch": 0.5727902415103592, + "grad_norm": 1.5341710890864304, + "learning_rate": 8.13929323508105e-06, + "loss": 0.6547, + "step": 18689 + }, + { + "epoch": 0.5728208900331004, + "grad_norm": 0.6715059864334642, + "learning_rate": 8.138317943602607e-06, + "loss": 0.5518, + "step": 18690 + }, + { + "epoch": 0.5728515385558416, + "grad_norm": 1.5911314486149655, + "learning_rate": 8.137342670467797e-06, + "loss": 0.7328, + "step": 18691 + }, + { + "epoch": 0.5728821870785828, + "grad_norm": 0.650341414458291, + "learning_rate": 8.13636741568622e-06, + "loss": 0.5229, + "step": 18692 + }, + { + "epoch": 0.572912835601324, + "grad_norm": 1.775723302686104, + "learning_rate": 8.135392179267498e-06, + "loss": 0.7765, + "step": 18693 + }, + { + "epoch": 0.5729434841240653, + "grad_norm": 1.441490498293941, + "learning_rate": 8.134416961221234e-06, + "loss": 0.6638, + "step": 18694 + }, + { + "epoch": 0.5729741326468064, + "grad_norm": 1.6012801849061558, + "learning_rate": 8.133441761557033e-06, + "loss": 0.7955, + "step": 18695 + }, + { + "epoch": 0.5730047811695477, + "grad_norm": 0.6818409953661931, + "learning_rate": 8.132466580284509e-06, + "loss": 0.5612, + "step": 18696 + }, + { + "epoch": 0.5730354296922888, + "grad_norm": 1.5418784040143931, + "learning_rate": 8.131491417413271e-06, + "loss": 0.7906, + "step": 18697 + }, + { + "epoch": 0.5730660782150301, + "grad_norm": 1.3852614131161542, + "learning_rate": 8.130516272952925e-06, + "loss": 0.6761, + "step": 18698 + }, + { + "epoch": 0.5730967267377712, + "grad_norm": 1.2989297167820784, + "learning_rate": 8.12954114691308e-06, + "loss": 0.6015, + "step": 18699 + }, + { + "epoch": 0.5731273752605125, + "grad_norm": 1.4496702937920245, + "learning_rate": 8.128566039303348e-06, + "loss": 0.7023, + "step": 18700 + }, + { + "epoch": 0.5731580237832536, + "grad_norm": 0.662119328137614, + "learning_rate": 8.127590950133328e-06, + "loss": 0.5361, + "step": 18701 + }, + { + "epoch": 0.5731886723059949, + "grad_norm": 1.5841025226906045, + "learning_rate": 8.126615879412639e-06, + "loss": 0.7091, + "step": 18702 + }, + { + "epoch": 0.5732193208287361, + "grad_norm": 1.6124810565809817, + "learning_rate": 8.125640827150877e-06, + "loss": 0.6483, + "step": 18703 + }, + { + "epoch": 0.5732499693514772, + "grad_norm": 1.7025527857342941, + "learning_rate": 8.124665793357662e-06, + "loss": 0.6246, + "step": 18704 + }, + { + "epoch": 0.5732806178742185, + "grad_norm": 1.5363628598441024, + "learning_rate": 8.123690778042592e-06, + "loss": 0.6458, + "step": 18705 + }, + { + "epoch": 0.5733112663969596, + "grad_norm": 0.6714013759935078, + "learning_rate": 8.122715781215276e-06, + "loss": 0.5551, + "step": 18706 + }, + { + "epoch": 0.5733419149197009, + "grad_norm": 0.6746687611747073, + "learning_rate": 8.121740802885322e-06, + "loss": 0.5391, + "step": 18707 + }, + { + "epoch": 0.573372563442442, + "grad_norm": 0.6459877748433202, + "learning_rate": 8.120765843062338e-06, + "loss": 0.5405, + "step": 18708 + }, + { + "epoch": 0.5734032119651833, + "grad_norm": 1.503246489546637, + "learning_rate": 8.119790901755927e-06, + "loss": 0.6655, + "step": 18709 + }, + { + "epoch": 0.5734338604879244, + "grad_norm": 1.6072730557615122, + "learning_rate": 8.118815978975698e-06, + "loss": 0.6633, + "step": 18710 + }, + { + "epoch": 0.5734645090106657, + "grad_norm": 1.5097277151023125, + "learning_rate": 8.117841074731255e-06, + "loss": 0.5932, + "step": 18711 + }, + { + "epoch": 0.5734951575334069, + "grad_norm": 1.5106103162080275, + "learning_rate": 8.11686618903221e-06, + "loss": 0.648, + "step": 18712 + }, + { + "epoch": 0.5735258060561481, + "grad_norm": 1.3150073962122826, + "learning_rate": 8.115891321888161e-06, + "loss": 0.6981, + "step": 18713 + }, + { + "epoch": 0.5735564545788893, + "grad_norm": 0.6539810811521063, + "learning_rate": 8.114916473308716e-06, + "loss": 0.5345, + "step": 18714 + }, + { + "epoch": 0.5735871031016305, + "grad_norm": 1.4364089348767648, + "learning_rate": 8.113941643303484e-06, + "loss": 0.7322, + "step": 18715 + }, + { + "epoch": 0.5736177516243717, + "grad_norm": 1.4557590712843897, + "learning_rate": 8.112966831882066e-06, + "loss": 0.6498, + "step": 18716 + }, + { + "epoch": 0.5736484001471129, + "grad_norm": 1.6515802881707464, + "learning_rate": 8.111992039054068e-06, + "loss": 0.764, + "step": 18717 + }, + { + "epoch": 0.5736790486698541, + "grad_norm": 1.6347778639163546, + "learning_rate": 8.111017264829097e-06, + "loss": 0.6843, + "step": 18718 + }, + { + "epoch": 0.5737096971925953, + "grad_norm": 1.6038918336989882, + "learning_rate": 8.110042509216753e-06, + "loss": 0.716, + "step": 18719 + }, + { + "epoch": 0.5737403457153365, + "grad_norm": 1.607907932897738, + "learning_rate": 8.109067772226648e-06, + "loss": 0.7931, + "step": 18720 + }, + { + "epoch": 0.5737709942380778, + "grad_norm": 1.3542382379466753, + "learning_rate": 8.10809305386838e-06, + "loss": 0.7237, + "step": 18721 + }, + { + "epoch": 0.5738016427608189, + "grad_norm": 1.5329337632903381, + "learning_rate": 8.107118354151555e-06, + "loss": 0.7735, + "step": 18722 + }, + { + "epoch": 0.5738322912835602, + "grad_norm": 1.36251677871958, + "learning_rate": 8.106143673085778e-06, + "loss": 0.6806, + "step": 18723 + }, + { + "epoch": 0.5738629398063013, + "grad_norm": 1.4678098705303582, + "learning_rate": 8.105169010680654e-06, + "loss": 0.6022, + "step": 18724 + }, + { + "epoch": 0.5738935883290426, + "grad_norm": 1.422187837919065, + "learning_rate": 8.104194366945779e-06, + "loss": 0.6289, + "step": 18725 + }, + { + "epoch": 0.5739242368517837, + "grad_norm": 1.4377944607059128, + "learning_rate": 8.103219741890767e-06, + "loss": 0.6376, + "step": 18726 + }, + { + "epoch": 0.573954885374525, + "grad_norm": 1.3660185002889687, + "learning_rate": 8.102245135525216e-06, + "loss": 0.6686, + "step": 18727 + }, + { + "epoch": 0.5739855338972661, + "grad_norm": 1.4915289196849315, + "learning_rate": 8.101270547858724e-06, + "loss": 0.6433, + "step": 18728 + }, + { + "epoch": 0.5740161824200074, + "grad_norm": 1.496166932659716, + "learning_rate": 8.100295978900904e-06, + "loss": 0.7247, + "step": 18729 + }, + { + "epoch": 0.5740468309427486, + "grad_norm": 1.6248730979690045, + "learning_rate": 8.099321428661351e-06, + "loss": 0.675, + "step": 18730 + }, + { + "epoch": 0.5740774794654898, + "grad_norm": 1.3712582986980464, + "learning_rate": 8.098346897149672e-06, + "loss": 0.6341, + "step": 18731 + }, + { + "epoch": 0.574108127988231, + "grad_norm": 1.4230305213371273, + "learning_rate": 8.097372384375469e-06, + "loss": 0.6619, + "step": 18732 + }, + { + "epoch": 0.5741387765109722, + "grad_norm": 0.6992150657009722, + "learning_rate": 8.096397890348338e-06, + "loss": 0.577, + "step": 18733 + }, + { + "epoch": 0.5741694250337134, + "grad_norm": 1.5954978532255393, + "learning_rate": 8.09542341507789e-06, + "loss": 0.7464, + "step": 18734 + }, + { + "epoch": 0.5742000735564545, + "grad_norm": 1.3952617703992942, + "learning_rate": 8.094448958573723e-06, + "loss": 0.6987, + "step": 18735 + }, + { + "epoch": 0.5742307220791958, + "grad_norm": 1.6302787671242203, + "learning_rate": 8.093474520845435e-06, + "loss": 0.6495, + "step": 18736 + }, + { + "epoch": 0.5742613706019369, + "grad_norm": 1.6906339830633585, + "learning_rate": 8.092500101902632e-06, + "loss": 0.7809, + "step": 18737 + }, + { + "epoch": 0.5742920191246782, + "grad_norm": 1.6731434478954854, + "learning_rate": 8.091525701754912e-06, + "loss": 0.6902, + "step": 18738 + }, + { + "epoch": 0.5743226676474193, + "grad_norm": 1.853307384707915, + "learning_rate": 8.090551320411879e-06, + "loss": 0.7514, + "step": 18739 + }, + { + "epoch": 0.5743533161701606, + "grad_norm": 1.5916010128312958, + "learning_rate": 8.089576957883132e-06, + "loss": 0.7792, + "step": 18740 + }, + { + "epoch": 0.5743839646929018, + "grad_norm": 1.7015886219683627, + "learning_rate": 8.088602614178269e-06, + "loss": 0.7047, + "step": 18741 + }, + { + "epoch": 0.574414613215643, + "grad_norm": 1.4115813465534863, + "learning_rate": 8.087628289306899e-06, + "loss": 0.5884, + "step": 18742 + }, + { + "epoch": 0.5744452617383842, + "grad_norm": 0.6527737066224288, + "learning_rate": 8.086653983278617e-06, + "loss": 0.5489, + "step": 18743 + }, + { + "epoch": 0.5744759102611254, + "grad_norm": 0.6338803107252567, + "learning_rate": 8.085679696103015e-06, + "loss": 0.5408, + "step": 18744 + }, + { + "epoch": 0.5745065587838666, + "grad_norm": 1.5175925975739522, + "learning_rate": 8.084705427789708e-06, + "loss": 0.7683, + "step": 18745 + }, + { + "epoch": 0.5745372073066078, + "grad_norm": 0.6609891256551997, + "learning_rate": 8.083731178348283e-06, + "loss": 0.5604, + "step": 18746 + }, + { + "epoch": 0.574567855829349, + "grad_norm": 1.6086439943683812, + "learning_rate": 8.082756947788351e-06, + "loss": 0.7215, + "step": 18747 + }, + { + "epoch": 0.5745985043520903, + "grad_norm": 1.521048402192021, + "learning_rate": 8.081782736119504e-06, + "loss": 0.7273, + "step": 18748 + }, + { + "epoch": 0.5746291528748314, + "grad_norm": 1.588009626136895, + "learning_rate": 8.080808543351338e-06, + "loss": 0.6749, + "step": 18749 + }, + { + "epoch": 0.5746598013975727, + "grad_norm": 1.6020150159773603, + "learning_rate": 8.07983436949346e-06, + "loss": 0.6272, + "step": 18750 + }, + { + "epoch": 0.5746904499203138, + "grad_norm": 1.405156569381895, + "learning_rate": 8.078860214555467e-06, + "loss": 0.641, + "step": 18751 + }, + { + "epoch": 0.5747210984430551, + "grad_norm": 1.4699508637745555, + "learning_rate": 8.077886078546952e-06, + "loss": 0.6463, + "step": 18752 + }, + { + "epoch": 0.5747517469657962, + "grad_norm": 0.6826097053743547, + "learning_rate": 8.076911961477518e-06, + "loss": 0.5827, + "step": 18753 + }, + { + "epoch": 0.5747823954885375, + "grad_norm": 1.6317008374624626, + "learning_rate": 8.075937863356766e-06, + "loss": 0.7335, + "step": 18754 + }, + { + "epoch": 0.5748130440112786, + "grad_norm": 1.5560796427205692, + "learning_rate": 8.074963784194285e-06, + "loss": 0.6606, + "step": 18755 + }, + { + "epoch": 0.5748436925340199, + "grad_norm": 0.6703657618903582, + "learning_rate": 8.073989723999685e-06, + "loss": 0.5518, + "step": 18756 + }, + { + "epoch": 0.574874341056761, + "grad_norm": 1.5272792214677762, + "learning_rate": 8.073015682782549e-06, + "loss": 0.6568, + "step": 18757 + }, + { + "epoch": 0.5749049895795023, + "grad_norm": 1.472310685665657, + "learning_rate": 8.07204166055249e-06, + "loss": 0.7383, + "step": 18758 + }, + { + "epoch": 0.5749356381022435, + "grad_norm": 1.3372022415894091, + "learning_rate": 8.071067657319093e-06, + "loss": 0.6492, + "step": 18759 + }, + { + "epoch": 0.5749662866249847, + "grad_norm": 1.4782085486948984, + "learning_rate": 8.070093673091962e-06, + "loss": 0.6793, + "step": 18760 + }, + { + "epoch": 0.5749969351477259, + "grad_norm": 0.6642858125644299, + "learning_rate": 8.069119707880691e-06, + "loss": 0.5683, + "step": 18761 + }, + { + "epoch": 0.5750275836704671, + "grad_norm": 1.6697587222052606, + "learning_rate": 8.068145761694879e-06, + "loss": 0.723, + "step": 18762 + }, + { + "epoch": 0.5750582321932083, + "grad_norm": 1.6708736268545132, + "learning_rate": 8.06717183454412e-06, + "loss": 0.6972, + "step": 18763 + }, + { + "epoch": 0.5750888807159495, + "grad_norm": 1.4672907342742303, + "learning_rate": 8.066197926438011e-06, + "loss": 0.6829, + "step": 18764 + }, + { + "epoch": 0.5751195292386907, + "grad_norm": 1.451564455806992, + "learning_rate": 8.065224037386146e-06, + "loss": 0.7066, + "step": 18765 + }, + { + "epoch": 0.5751501777614318, + "grad_norm": 1.6408439194834112, + "learning_rate": 8.064250167398129e-06, + "loss": 0.6891, + "step": 18766 + }, + { + "epoch": 0.5751808262841731, + "grad_norm": 1.4863956721600442, + "learning_rate": 8.06327631648355e-06, + "loss": 0.7232, + "step": 18767 + }, + { + "epoch": 0.5752114748069143, + "grad_norm": 1.7086861853483883, + "learning_rate": 8.062302484652e-06, + "loss": 0.7247, + "step": 18768 + }, + { + "epoch": 0.5752421233296555, + "grad_norm": 1.5448599313654305, + "learning_rate": 8.061328671913085e-06, + "loss": 0.6978, + "step": 18769 + }, + { + "epoch": 0.5752727718523967, + "grad_norm": 1.5408520488880457, + "learning_rate": 8.060354878276394e-06, + "loss": 0.7223, + "step": 18770 + }, + { + "epoch": 0.5753034203751379, + "grad_norm": 0.69904925322122, + "learning_rate": 8.059381103751518e-06, + "loss": 0.5805, + "step": 18771 + }, + { + "epoch": 0.5753340688978791, + "grad_norm": 1.646468706543708, + "learning_rate": 8.05840734834806e-06, + "loss": 0.6671, + "step": 18772 + }, + { + "epoch": 0.5753647174206203, + "grad_norm": 1.7837065832225065, + "learning_rate": 8.057433612075608e-06, + "loss": 0.7043, + "step": 18773 + }, + { + "epoch": 0.5753953659433615, + "grad_norm": 1.5040429147365404, + "learning_rate": 8.056459894943763e-06, + "loss": 0.6587, + "step": 18774 + }, + { + "epoch": 0.5754260144661028, + "grad_norm": 0.6505487325452289, + "learning_rate": 8.055486196962116e-06, + "loss": 0.5421, + "step": 18775 + }, + { + "epoch": 0.5754566629888439, + "grad_norm": 1.5156341716868258, + "learning_rate": 8.054512518140259e-06, + "loss": 0.6568, + "step": 18776 + }, + { + "epoch": 0.5754873115115852, + "grad_norm": 1.5616628279010443, + "learning_rate": 8.053538858487788e-06, + "loss": 0.6992, + "step": 18777 + }, + { + "epoch": 0.5755179600343263, + "grad_norm": 1.4664095259640193, + "learning_rate": 8.052565218014301e-06, + "loss": 0.6308, + "step": 18778 + }, + { + "epoch": 0.5755486085570676, + "grad_norm": 1.4836494621955123, + "learning_rate": 8.05159159672938e-06, + "loss": 0.7398, + "step": 18779 + }, + { + "epoch": 0.5755792570798087, + "grad_norm": 1.9236396827193192, + "learning_rate": 8.050617994642632e-06, + "loss": 0.6912, + "step": 18780 + }, + { + "epoch": 0.57560990560255, + "grad_norm": 1.4179267315436677, + "learning_rate": 8.049644411763641e-06, + "loss": 0.681, + "step": 18781 + }, + { + "epoch": 0.5756405541252911, + "grad_norm": 1.59524174734303, + "learning_rate": 8.048670848102002e-06, + "loss": 0.6302, + "step": 18782 + }, + { + "epoch": 0.5756712026480324, + "grad_norm": 1.8168517020114952, + "learning_rate": 8.04769730366731e-06, + "loss": 0.7637, + "step": 18783 + }, + { + "epoch": 0.5757018511707735, + "grad_norm": 1.8384494489006409, + "learning_rate": 8.046723778469152e-06, + "loss": 0.6096, + "step": 18784 + }, + { + "epoch": 0.5757324996935148, + "grad_norm": 1.5653457446624832, + "learning_rate": 8.045750272517128e-06, + "loss": 0.6517, + "step": 18785 + }, + { + "epoch": 0.575763148216256, + "grad_norm": 1.4804037671999215, + "learning_rate": 8.044776785820826e-06, + "loss": 0.7317, + "step": 18786 + }, + { + "epoch": 0.5757937967389972, + "grad_norm": 1.5155436550328762, + "learning_rate": 8.043803318389838e-06, + "loss": 0.6993, + "step": 18787 + }, + { + "epoch": 0.5758244452617384, + "grad_norm": 1.6618232897279486, + "learning_rate": 8.04282987023376e-06, + "loss": 0.7829, + "step": 18788 + }, + { + "epoch": 0.5758550937844796, + "grad_norm": 1.633193238006354, + "learning_rate": 8.041856441362178e-06, + "loss": 0.5625, + "step": 18789 + }, + { + "epoch": 0.5758857423072208, + "grad_norm": 1.5719381834715707, + "learning_rate": 8.040883031784682e-06, + "loss": 0.6652, + "step": 18790 + }, + { + "epoch": 0.575916390829962, + "grad_norm": 1.7708929994499534, + "learning_rate": 8.03990964151087e-06, + "loss": 0.8074, + "step": 18791 + }, + { + "epoch": 0.5759470393527032, + "grad_norm": 1.6373565947364455, + "learning_rate": 8.038936270550328e-06, + "loss": 0.6304, + "step": 18792 + }, + { + "epoch": 0.5759776878754445, + "grad_norm": 1.7611848261644572, + "learning_rate": 8.03796291891265e-06, + "loss": 0.7274, + "step": 18793 + }, + { + "epoch": 0.5760083363981856, + "grad_norm": 1.5674157496482333, + "learning_rate": 8.036989586607427e-06, + "loss": 0.7762, + "step": 18794 + }, + { + "epoch": 0.5760389849209269, + "grad_norm": 1.6326922962502415, + "learning_rate": 8.036016273644244e-06, + "loss": 0.6914, + "step": 18795 + }, + { + "epoch": 0.576069633443668, + "grad_norm": 1.5731988583127388, + "learning_rate": 8.035042980032697e-06, + "loss": 0.6695, + "step": 18796 + }, + { + "epoch": 0.5761002819664092, + "grad_norm": 0.6484313374991162, + "learning_rate": 8.034069705782378e-06, + "loss": 0.5406, + "step": 18797 + }, + { + "epoch": 0.5761309304891504, + "grad_norm": 1.6615744114136968, + "learning_rate": 8.033096450902865e-06, + "loss": 0.7552, + "step": 18798 + }, + { + "epoch": 0.5761615790118916, + "grad_norm": 1.6969977416520146, + "learning_rate": 8.032123215403765e-06, + "loss": 0.7524, + "step": 18799 + }, + { + "epoch": 0.5761922275346328, + "grad_norm": 1.429399248438648, + "learning_rate": 8.031149999294649e-06, + "loss": 0.6192, + "step": 18800 + }, + { + "epoch": 0.576222876057374, + "grad_norm": 1.3687633778613217, + "learning_rate": 8.030176802585123e-06, + "loss": 0.6421, + "step": 18801 + }, + { + "epoch": 0.5762535245801153, + "grad_norm": 1.3681356554983308, + "learning_rate": 8.029203625284767e-06, + "loss": 0.6977, + "step": 18802 + }, + { + "epoch": 0.5762841731028564, + "grad_norm": 1.4276317010740749, + "learning_rate": 8.028230467403171e-06, + "loss": 0.6522, + "step": 18803 + }, + { + "epoch": 0.5763148216255977, + "grad_norm": 1.4556590921607038, + "learning_rate": 8.027257328949927e-06, + "loss": 0.7212, + "step": 18804 + }, + { + "epoch": 0.5763454701483388, + "grad_norm": 1.5050627816793833, + "learning_rate": 8.02628420993462e-06, + "loss": 0.6879, + "step": 18805 + }, + { + "epoch": 0.5763761186710801, + "grad_norm": 1.7514298012258978, + "learning_rate": 8.025311110366837e-06, + "loss": 0.6944, + "step": 18806 + }, + { + "epoch": 0.5764067671938212, + "grad_norm": 1.4965822133711695, + "learning_rate": 8.024338030256172e-06, + "loss": 0.6928, + "step": 18807 + }, + { + "epoch": 0.5764374157165625, + "grad_norm": 1.4027973359468702, + "learning_rate": 8.023364969612213e-06, + "loss": 0.6766, + "step": 18808 + }, + { + "epoch": 0.5764680642393036, + "grad_norm": 1.3968302219977107, + "learning_rate": 8.022391928444536e-06, + "loss": 0.5546, + "step": 18809 + }, + { + "epoch": 0.5764987127620449, + "grad_norm": 1.5111870944437964, + "learning_rate": 8.021418906762746e-06, + "loss": 0.6125, + "step": 18810 + }, + { + "epoch": 0.576529361284786, + "grad_norm": 1.449060469920183, + "learning_rate": 8.020445904576414e-06, + "loss": 0.6078, + "step": 18811 + }, + { + "epoch": 0.5765600098075273, + "grad_norm": 1.7191298809364592, + "learning_rate": 8.019472921895142e-06, + "loss": 0.7019, + "step": 18812 + }, + { + "epoch": 0.5765906583302685, + "grad_norm": 1.4577250499663965, + "learning_rate": 8.018499958728507e-06, + "loss": 0.6016, + "step": 18813 + }, + { + "epoch": 0.5766213068530097, + "grad_norm": 1.4087707055545977, + "learning_rate": 8.017527015086097e-06, + "loss": 0.6503, + "step": 18814 + }, + { + "epoch": 0.5766519553757509, + "grad_norm": 1.6316517879459285, + "learning_rate": 8.016554090977503e-06, + "loss": 0.683, + "step": 18815 + }, + { + "epoch": 0.5766826038984921, + "grad_norm": 1.779890459925806, + "learning_rate": 8.015581186412309e-06, + "loss": 0.7118, + "step": 18816 + }, + { + "epoch": 0.5767132524212333, + "grad_norm": 1.658948627834076, + "learning_rate": 8.0146083014001e-06, + "loss": 0.7617, + "step": 18817 + }, + { + "epoch": 0.5767439009439745, + "grad_norm": 1.384242592829995, + "learning_rate": 8.013635435950465e-06, + "loss": 0.7287, + "step": 18818 + }, + { + "epoch": 0.5767745494667157, + "grad_norm": 1.6217740368960754, + "learning_rate": 8.012662590072985e-06, + "loss": 0.726, + "step": 18819 + }, + { + "epoch": 0.576805197989457, + "grad_norm": 1.3992842211087704, + "learning_rate": 8.011689763777252e-06, + "loss": 0.7056, + "step": 18820 + }, + { + "epoch": 0.5768358465121981, + "grad_norm": 1.39521467295111, + "learning_rate": 8.01071695707285e-06, + "loss": 0.6432, + "step": 18821 + }, + { + "epoch": 0.5768664950349394, + "grad_norm": 1.5794708298632116, + "learning_rate": 8.009744169969357e-06, + "loss": 0.6935, + "step": 18822 + }, + { + "epoch": 0.5768971435576805, + "grad_norm": 1.4373962169644572, + "learning_rate": 8.008771402476371e-06, + "loss": 0.666, + "step": 18823 + }, + { + "epoch": 0.5769277920804218, + "grad_norm": 1.5089534652180037, + "learning_rate": 8.007798654603466e-06, + "loss": 0.7792, + "step": 18824 + }, + { + "epoch": 0.5769584406031629, + "grad_norm": 1.5657089102783366, + "learning_rate": 8.00682592636023e-06, + "loss": 0.6486, + "step": 18825 + }, + { + "epoch": 0.5769890891259042, + "grad_norm": 1.6428024351867452, + "learning_rate": 8.00585321775625e-06, + "loss": 0.5769, + "step": 18826 + }, + { + "epoch": 0.5770197376486453, + "grad_norm": 1.6268735681442639, + "learning_rate": 8.004880528801106e-06, + "loss": 0.7448, + "step": 18827 + }, + { + "epoch": 0.5770503861713865, + "grad_norm": 1.7692793448344963, + "learning_rate": 8.003907859504386e-06, + "loss": 0.7471, + "step": 18828 + }, + { + "epoch": 0.5770810346941277, + "grad_norm": 0.7115969792976401, + "learning_rate": 8.002935209875674e-06, + "loss": 0.5578, + "step": 18829 + }, + { + "epoch": 0.5771116832168689, + "grad_norm": 1.4644169652882564, + "learning_rate": 8.00196257992455e-06, + "loss": 0.6603, + "step": 18830 + }, + { + "epoch": 0.5771423317396102, + "grad_norm": 1.5184675136027694, + "learning_rate": 8.000989969660602e-06, + "loss": 0.7017, + "step": 18831 + }, + { + "epoch": 0.5771729802623513, + "grad_norm": 1.3587622212516268, + "learning_rate": 8.000017379093413e-06, + "loss": 0.6803, + "step": 18832 + }, + { + "epoch": 0.5772036287850926, + "grad_norm": 1.60193803827004, + "learning_rate": 7.99904480823256e-06, + "loss": 0.7776, + "step": 18833 + }, + { + "epoch": 0.5772342773078337, + "grad_norm": 1.467317884017783, + "learning_rate": 7.998072257087634e-06, + "loss": 0.7552, + "step": 18834 + }, + { + "epoch": 0.577264925830575, + "grad_norm": 1.5636721225224701, + "learning_rate": 7.997099725668212e-06, + "loss": 0.7477, + "step": 18835 + }, + { + "epoch": 0.5772955743533161, + "grad_norm": 1.3383373794839546, + "learning_rate": 7.996127213983879e-06, + "loss": 0.594, + "step": 18836 + }, + { + "epoch": 0.5773262228760574, + "grad_norm": 1.4309012951734457, + "learning_rate": 7.995154722044218e-06, + "loss": 0.6654, + "step": 18837 + }, + { + "epoch": 0.5773568713987985, + "grad_norm": 1.4614897450129594, + "learning_rate": 7.994182249858808e-06, + "loss": 0.6977, + "step": 18838 + }, + { + "epoch": 0.5773875199215398, + "grad_norm": 1.5251950437110013, + "learning_rate": 7.993209797437237e-06, + "loss": 0.7337, + "step": 18839 + }, + { + "epoch": 0.577418168444281, + "grad_norm": 1.4916019243128933, + "learning_rate": 7.992237364789085e-06, + "loss": 0.6177, + "step": 18840 + }, + { + "epoch": 0.5774488169670222, + "grad_norm": 1.4780106690228623, + "learning_rate": 7.991264951923925e-06, + "loss": 0.6537, + "step": 18841 + }, + { + "epoch": 0.5774794654897634, + "grad_norm": 1.5565836200771201, + "learning_rate": 7.990292558851353e-06, + "loss": 0.7545, + "step": 18842 + }, + { + "epoch": 0.5775101140125046, + "grad_norm": 1.5784995991082718, + "learning_rate": 7.989320185580939e-06, + "loss": 0.8042, + "step": 18843 + }, + { + "epoch": 0.5775407625352458, + "grad_norm": 1.4897231995914701, + "learning_rate": 7.988347832122267e-06, + "loss": 0.7535, + "step": 18844 + }, + { + "epoch": 0.577571411057987, + "grad_norm": 1.4587593738860234, + "learning_rate": 7.987375498484918e-06, + "loss": 0.7191, + "step": 18845 + }, + { + "epoch": 0.5776020595807282, + "grad_norm": 1.461628739436991, + "learning_rate": 7.986403184678473e-06, + "loss": 0.7065, + "step": 18846 + }, + { + "epoch": 0.5776327081034694, + "grad_norm": 1.636709397842647, + "learning_rate": 7.985430890712515e-06, + "loss": 0.7087, + "step": 18847 + }, + { + "epoch": 0.5776633566262106, + "grad_norm": 1.633402034329233, + "learning_rate": 7.984458616596622e-06, + "loss": 0.6531, + "step": 18848 + }, + { + "epoch": 0.5776940051489519, + "grad_norm": 1.5885481395934318, + "learning_rate": 7.983486362340372e-06, + "loss": 0.7107, + "step": 18849 + }, + { + "epoch": 0.577724653671693, + "grad_norm": 1.7032798134762004, + "learning_rate": 7.982514127953346e-06, + "loss": 0.7362, + "step": 18850 + }, + { + "epoch": 0.5777553021944343, + "grad_norm": 1.4463705775755562, + "learning_rate": 7.98154191344513e-06, + "loss": 0.5845, + "step": 18851 + }, + { + "epoch": 0.5777859507171754, + "grad_norm": 1.5407357213456232, + "learning_rate": 7.980569718825291e-06, + "loss": 0.7001, + "step": 18852 + }, + { + "epoch": 0.5778165992399167, + "grad_norm": 1.5652068627057878, + "learning_rate": 7.979597544103422e-06, + "loss": 0.6954, + "step": 18853 + }, + { + "epoch": 0.5778472477626578, + "grad_norm": 1.3046971208541076, + "learning_rate": 7.978625389289087e-06, + "loss": 0.6355, + "step": 18854 + }, + { + "epoch": 0.5778778962853991, + "grad_norm": 0.7071281003910994, + "learning_rate": 7.97765325439188e-06, + "loss": 0.5832, + "step": 18855 + }, + { + "epoch": 0.5779085448081402, + "grad_norm": 1.7839119948961197, + "learning_rate": 7.976681139421371e-06, + "loss": 0.8128, + "step": 18856 + }, + { + "epoch": 0.5779391933308815, + "grad_norm": 1.559575984635521, + "learning_rate": 7.97570904438714e-06, + "loss": 0.6957, + "step": 18857 + }, + { + "epoch": 0.5779698418536227, + "grad_norm": 1.573901265083865, + "learning_rate": 7.974736969298767e-06, + "loss": 0.6004, + "step": 18858 + }, + { + "epoch": 0.5780004903763638, + "grad_norm": 1.6033128946186601, + "learning_rate": 7.973764914165827e-06, + "loss": 0.6707, + "step": 18859 + }, + { + "epoch": 0.5780311388991051, + "grad_norm": 1.3106683316397114, + "learning_rate": 7.9727928789979e-06, + "loss": 0.6588, + "step": 18860 + }, + { + "epoch": 0.5780617874218462, + "grad_norm": 1.778147180930964, + "learning_rate": 7.971820863804564e-06, + "loss": 0.641, + "step": 18861 + }, + { + "epoch": 0.5780924359445875, + "grad_norm": 0.6633027741519225, + "learning_rate": 7.970848868595399e-06, + "loss": 0.5224, + "step": 18862 + }, + { + "epoch": 0.5781230844673286, + "grad_norm": 1.696953063636863, + "learning_rate": 7.969876893379974e-06, + "loss": 0.6315, + "step": 18863 + }, + { + "epoch": 0.5781537329900699, + "grad_norm": 1.7829776595888545, + "learning_rate": 7.968904938167875e-06, + "loss": 0.7417, + "step": 18864 + }, + { + "epoch": 0.578184381512811, + "grad_norm": 1.42388580330585, + "learning_rate": 7.967933002968672e-06, + "loss": 0.7103, + "step": 18865 + }, + { + "epoch": 0.5782150300355523, + "grad_norm": 1.631264624852216, + "learning_rate": 7.966961087791948e-06, + "loss": 0.6827, + "step": 18866 + }, + { + "epoch": 0.5782456785582935, + "grad_norm": 1.4768020108003501, + "learning_rate": 7.965989192647276e-06, + "loss": 0.6333, + "step": 18867 + }, + { + "epoch": 0.5782763270810347, + "grad_norm": 1.6922667055711011, + "learning_rate": 7.965017317544231e-06, + "loss": 0.7578, + "step": 18868 + }, + { + "epoch": 0.5783069756037759, + "grad_norm": 1.4792638112586425, + "learning_rate": 7.964045462492393e-06, + "loss": 0.6688, + "step": 18869 + }, + { + "epoch": 0.5783376241265171, + "grad_norm": 1.7284442030032978, + "learning_rate": 7.963073627501336e-06, + "loss": 0.8059, + "step": 18870 + }, + { + "epoch": 0.5783682726492583, + "grad_norm": 0.6867392663877722, + "learning_rate": 7.962101812580633e-06, + "loss": 0.5972, + "step": 18871 + }, + { + "epoch": 0.5783989211719995, + "grad_norm": 1.4435447296467423, + "learning_rate": 7.961130017739866e-06, + "loss": 0.713, + "step": 18872 + }, + { + "epoch": 0.5784295696947407, + "grad_norm": 1.469791612943538, + "learning_rate": 7.960158242988603e-06, + "loss": 0.6174, + "step": 18873 + }, + { + "epoch": 0.578460218217482, + "grad_norm": 1.6578636118347425, + "learning_rate": 7.959186488336427e-06, + "loss": 0.6657, + "step": 18874 + }, + { + "epoch": 0.5784908667402231, + "grad_norm": 1.4832500556144277, + "learning_rate": 7.958214753792908e-06, + "loss": 0.7587, + "step": 18875 + }, + { + "epoch": 0.5785215152629644, + "grad_norm": 1.5871715743868187, + "learning_rate": 7.957243039367616e-06, + "loss": 0.619, + "step": 18876 + }, + { + "epoch": 0.5785521637857055, + "grad_norm": 1.4119284864174877, + "learning_rate": 7.95627134507014e-06, + "loss": 0.6534, + "step": 18877 + }, + { + "epoch": 0.5785828123084468, + "grad_norm": 1.4894803950180662, + "learning_rate": 7.95529967091004e-06, + "loss": 0.6361, + "step": 18878 + }, + { + "epoch": 0.5786134608311879, + "grad_norm": 1.4332598520833424, + "learning_rate": 7.954328016896894e-06, + "loss": 0.6481, + "step": 18879 + }, + { + "epoch": 0.5786441093539292, + "grad_norm": 1.33938903926675, + "learning_rate": 7.953356383040281e-06, + "loss": 0.6815, + "step": 18880 + }, + { + "epoch": 0.5786747578766703, + "grad_norm": 1.510271222085551, + "learning_rate": 7.952384769349768e-06, + "loss": 0.6516, + "step": 18881 + }, + { + "epoch": 0.5787054063994116, + "grad_norm": 1.5246468311018004, + "learning_rate": 7.951413175834933e-06, + "loss": 0.7852, + "step": 18882 + }, + { + "epoch": 0.5787360549221527, + "grad_norm": 1.3492792493121526, + "learning_rate": 7.950441602505348e-06, + "loss": 0.6185, + "step": 18883 + }, + { + "epoch": 0.578766703444894, + "grad_norm": 1.5542677393019588, + "learning_rate": 7.949470049370586e-06, + "loss": 0.8187, + "step": 18884 + }, + { + "epoch": 0.5787973519676352, + "grad_norm": 1.5656939101561707, + "learning_rate": 7.948498516440225e-06, + "loss": 0.67, + "step": 18885 + }, + { + "epoch": 0.5788280004903764, + "grad_norm": 1.4423049965161476, + "learning_rate": 7.947527003723828e-06, + "loss": 0.6164, + "step": 18886 + }, + { + "epoch": 0.5788586490131176, + "grad_norm": 1.7140233003584648, + "learning_rate": 7.946555511230972e-06, + "loss": 0.6637, + "step": 18887 + }, + { + "epoch": 0.5788892975358588, + "grad_norm": 1.5850814849713297, + "learning_rate": 7.945584038971232e-06, + "loss": 0.6303, + "step": 18888 + }, + { + "epoch": 0.5789199460586, + "grad_norm": 1.5287576896635264, + "learning_rate": 7.944612586954179e-06, + "loss": 0.5876, + "step": 18889 + }, + { + "epoch": 0.5789505945813411, + "grad_norm": 1.6130139260370309, + "learning_rate": 7.94364115518938e-06, + "loss": 0.689, + "step": 18890 + }, + { + "epoch": 0.5789812431040824, + "grad_norm": 1.4842438093018007, + "learning_rate": 7.942669743686415e-06, + "loss": 0.776, + "step": 18891 + }, + { + "epoch": 0.5790118916268235, + "grad_norm": 1.6093113421998926, + "learning_rate": 7.941698352454848e-06, + "loss": 0.7469, + "step": 18892 + }, + { + "epoch": 0.5790425401495648, + "grad_norm": 1.5968374477532334, + "learning_rate": 7.940726981504257e-06, + "loss": 0.7356, + "step": 18893 + }, + { + "epoch": 0.579073188672306, + "grad_norm": 1.5844433156084492, + "learning_rate": 7.939755630844211e-06, + "loss": 0.7336, + "step": 18894 + }, + { + "epoch": 0.5791038371950472, + "grad_norm": 1.4778452123535464, + "learning_rate": 7.938784300484273e-06, + "loss": 0.6511, + "step": 18895 + }, + { + "epoch": 0.5791344857177884, + "grad_norm": 1.569946549574349, + "learning_rate": 7.937812990434028e-06, + "loss": 0.6909, + "step": 18896 + }, + { + "epoch": 0.5791651342405296, + "grad_norm": 1.4055365513534404, + "learning_rate": 7.936841700703037e-06, + "loss": 0.7155, + "step": 18897 + }, + { + "epoch": 0.5791957827632708, + "grad_norm": 0.6609164790230102, + "learning_rate": 7.935870431300872e-06, + "loss": 0.543, + "step": 18898 + }, + { + "epoch": 0.579226431286012, + "grad_norm": 0.6530172189468901, + "learning_rate": 7.934899182237104e-06, + "loss": 0.5663, + "step": 18899 + }, + { + "epoch": 0.5792570798087532, + "grad_norm": 1.5987813813983554, + "learning_rate": 7.933927953521302e-06, + "loss": 0.6838, + "step": 18900 + }, + { + "epoch": 0.5792877283314944, + "grad_norm": 1.517335604961334, + "learning_rate": 7.932956745163035e-06, + "loss": 0.6368, + "step": 18901 + }, + { + "epoch": 0.5793183768542356, + "grad_norm": 1.4222382586099165, + "learning_rate": 7.931985557171878e-06, + "loss": 0.6574, + "step": 18902 + }, + { + "epoch": 0.5793490253769769, + "grad_norm": 1.5404687158687742, + "learning_rate": 7.931014389557394e-06, + "loss": 0.7304, + "step": 18903 + }, + { + "epoch": 0.579379673899718, + "grad_norm": 1.2563980030840163, + "learning_rate": 7.930043242329155e-06, + "loss": 0.6237, + "step": 18904 + }, + { + "epoch": 0.5794103224224593, + "grad_norm": 1.3646970297600252, + "learning_rate": 7.929072115496732e-06, + "loss": 0.6133, + "step": 18905 + }, + { + "epoch": 0.5794409709452004, + "grad_norm": 1.439759320842329, + "learning_rate": 7.928101009069687e-06, + "loss": 0.5933, + "step": 18906 + }, + { + "epoch": 0.5794716194679417, + "grad_norm": 1.4509265574614079, + "learning_rate": 7.927129923057597e-06, + "loss": 0.6308, + "step": 18907 + }, + { + "epoch": 0.5795022679906828, + "grad_norm": 1.5070425331138926, + "learning_rate": 7.926158857470025e-06, + "loss": 0.6899, + "step": 18908 + }, + { + "epoch": 0.5795329165134241, + "grad_norm": 1.362329169925009, + "learning_rate": 7.925187812316537e-06, + "loss": 0.6397, + "step": 18909 + }, + { + "epoch": 0.5795635650361652, + "grad_norm": 1.4901280238263512, + "learning_rate": 7.924216787606708e-06, + "loss": 0.7252, + "step": 18910 + }, + { + "epoch": 0.5795942135589065, + "grad_norm": 1.4910609367735994, + "learning_rate": 7.9232457833501e-06, + "loss": 0.6206, + "step": 18911 + }, + { + "epoch": 0.5796248620816477, + "grad_norm": 1.5170117949759943, + "learning_rate": 7.922274799556284e-06, + "loss": 0.633, + "step": 18912 + }, + { + "epoch": 0.5796555106043889, + "grad_norm": 1.431050849362777, + "learning_rate": 7.921303836234825e-06, + "loss": 0.8135, + "step": 18913 + }, + { + "epoch": 0.5796861591271301, + "grad_norm": 1.5769202458837575, + "learning_rate": 7.92033289339529e-06, + "loss": 0.6957, + "step": 18914 + }, + { + "epoch": 0.5797168076498713, + "grad_norm": 1.608610808435481, + "learning_rate": 7.91936197104725e-06, + "loss": 0.6572, + "step": 18915 + }, + { + "epoch": 0.5797474561726125, + "grad_norm": 1.5252444432936616, + "learning_rate": 7.918391069200272e-06, + "loss": 0.7318, + "step": 18916 + }, + { + "epoch": 0.5797781046953537, + "grad_norm": 1.5549138178752233, + "learning_rate": 7.917420187863911e-06, + "loss": 0.8047, + "step": 18917 + }, + { + "epoch": 0.5798087532180949, + "grad_norm": 1.4687363453335558, + "learning_rate": 7.916449327047749e-06, + "loss": 0.7302, + "step": 18918 + }, + { + "epoch": 0.5798394017408361, + "grad_norm": 1.7293853100870737, + "learning_rate": 7.915478486761338e-06, + "loss": 0.7553, + "step": 18919 + }, + { + "epoch": 0.5798700502635773, + "grad_norm": 1.6204763807943712, + "learning_rate": 7.914507667014257e-06, + "loss": 0.6999, + "step": 18920 + }, + { + "epoch": 0.5799006987863184, + "grad_norm": 1.81080067011037, + "learning_rate": 7.913536867816063e-06, + "loss": 0.798, + "step": 18921 + }, + { + "epoch": 0.5799313473090597, + "grad_norm": 1.4017858510602261, + "learning_rate": 7.912566089176323e-06, + "loss": 0.7147, + "step": 18922 + }, + { + "epoch": 0.5799619958318009, + "grad_norm": 1.4797311654827292, + "learning_rate": 7.911595331104605e-06, + "loss": 0.6952, + "step": 18923 + }, + { + "epoch": 0.5799926443545421, + "grad_norm": 1.5240729304683152, + "learning_rate": 7.910624593610473e-06, + "loss": 0.6743, + "step": 18924 + }, + { + "epoch": 0.5800232928772833, + "grad_norm": 1.5615030063176984, + "learning_rate": 7.90965387670349e-06, + "loss": 0.625, + "step": 18925 + }, + { + "epoch": 0.5800539414000245, + "grad_norm": 1.443891890236922, + "learning_rate": 7.908683180393223e-06, + "loss": 0.6863, + "step": 18926 + }, + { + "epoch": 0.5800845899227657, + "grad_norm": 1.5759817776445526, + "learning_rate": 7.907712504689233e-06, + "loss": 0.74, + "step": 18927 + }, + { + "epoch": 0.5801152384455069, + "grad_norm": 1.5783361142146763, + "learning_rate": 7.906741849601092e-06, + "loss": 0.7425, + "step": 18928 + }, + { + "epoch": 0.5801458869682481, + "grad_norm": 1.5084331735301848, + "learning_rate": 7.905771215138358e-06, + "loss": 0.7388, + "step": 18929 + }, + { + "epoch": 0.5801765354909894, + "grad_norm": 1.4895808237281374, + "learning_rate": 7.904800601310594e-06, + "loss": 0.7634, + "step": 18930 + }, + { + "epoch": 0.5802071840137305, + "grad_norm": 1.4083985438323643, + "learning_rate": 7.903830008127367e-06, + "loss": 0.5956, + "step": 18931 + }, + { + "epoch": 0.5802378325364718, + "grad_norm": 1.395819828788821, + "learning_rate": 7.90285943559824e-06, + "loss": 0.5972, + "step": 18932 + }, + { + "epoch": 0.5802684810592129, + "grad_norm": 1.5458949591290163, + "learning_rate": 7.901888883732773e-06, + "loss": 0.6918, + "step": 18933 + }, + { + "epoch": 0.5802991295819542, + "grad_norm": 1.5666042960964326, + "learning_rate": 7.900918352540534e-06, + "loss": 0.6605, + "step": 18934 + }, + { + "epoch": 0.5803297781046953, + "grad_norm": 1.6071747512929417, + "learning_rate": 7.899947842031081e-06, + "loss": 0.6359, + "step": 18935 + }, + { + "epoch": 0.5803604266274366, + "grad_norm": 1.4734352544950322, + "learning_rate": 7.89897735221398e-06, + "loss": 0.7074, + "step": 18936 + }, + { + "epoch": 0.5803910751501777, + "grad_norm": 1.4380370721553548, + "learning_rate": 7.898006883098796e-06, + "loss": 0.5893, + "step": 18937 + }, + { + "epoch": 0.580421723672919, + "grad_norm": 1.480603174943988, + "learning_rate": 7.897036434695082e-06, + "loss": 0.6913, + "step": 18938 + }, + { + "epoch": 0.5804523721956601, + "grad_norm": 1.4795242589407003, + "learning_rate": 7.896066007012412e-06, + "loss": 0.7003, + "step": 18939 + }, + { + "epoch": 0.5804830207184014, + "grad_norm": 1.5905518385883066, + "learning_rate": 7.89509560006034e-06, + "loss": 0.7847, + "step": 18940 + }, + { + "epoch": 0.5805136692411426, + "grad_norm": 1.5143799609748094, + "learning_rate": 7.894125213848429e-06, + "loss": 0.7496, + "step": 18941 + }, + { + "epoch": 0.5805443177638838, + "grad_norm": 1.5796537117922855, + "learning_rate": 7.893154848386242e-06, + "loss": 0.7764, + "step": 18942 + }, + { + "epoch": 0.580574966286625, + "grad_norm": 1.8705644463544508, + "learning_rate": 7.89218450368334e-06, + "loss": 0.7341, + "step": 18943 + }, + { + "epoch": 0.5806056148093662, + "grad_norm": 1.8199868898953813, + "learning_rate": 7.891214179749278e-06, + "loss": 0.7612, + "step": 18944 + }, + { + "epoch": 0.5806362633321074, + "grad_norm": 1.407215347690578, + "learning_rate": 7.890243876593628e-06, + "loss": 0.6203, + "step": 18945 + }, + { + "epoch": 0.5806669118548486, + "grad_norm": 1.5931988902224699, + "learning_rate": 7.88927359422594e-06, + "loss": 0.7967, + "step": 18946 + }, + { + "epoch": 0.5806975603775898, + "grad_norm": 1.5342700120047637, + "learning_rate": 7.888303332655785e-06, + "loss": 0.7646, + "step": 18947 + }, + { + "epoch": 0.580728208900331, + "grad_norm": 1.3948967829630914, + "learning_rate": 7.887333091892717e-06, + "loss": 0.6148, + "step": 18948 + }, + { + "epoch": 0.5807588574230722, + "grad_norm": 1.5677740097288495, + "learning_rate": 7.886362871946291e-06, + "loss": 0.6584, + "step": 18949 + }, + { + "epoch": 0.5807895059458135, + "grad_norm": 1.5939867650952293, + "learning_rate": 7.885392672826079e-06, + "loss": 0.6618, + "step": 18950 + }, + { + "epoch": 0.5808201544685546, + "grad_norm": 1.515329537203328, + "learning_rate": 7.884422494541632e-06, + "loss": 0.7173, + "step": 18951 + }, + { + "epoch": 0.5808508029912958, + "grad_norm": 0.7286214539748554, + "learning_rate": 7.883452337102508e-06, + "loss": 0.5753, + "step": 18952 + }, + { + "epoch": 0.580881451514037, + "grad_norm": 1.6107537172067148, + "learning_rate": 7.882482200518272e-06, + "loss": 0.7301, + "step": 18953 + }, + { + "epoch": 0.5809121000367782, + "grad_norm": 1.4522165239923208, + "learning_rate": 7.881512084798481e-06, + "loss": 0.6554, + "step": 18954 + }, + { + "epoch": 0.5809427485595194, + "grad_norm": 1.8449052418646645, + "learning_rate": 7.880541989952693e-06, + "loss": 0.7387, + "step": 18955 + }, + { + "epoch": 0.5809733970822606, + "grad_norm": 1.4503877011280282, + "learning_rate": 7.879571915990468e-06, + "loss": 0.6722, + "step": 18956 + }, + { + "epoch": 0.5810040456050019, + "grad_norm": 1.5637250402010332, + "learning_rate": 7.878601862921363e-06, + "loss": 0.7829, + "step": 18957 + }, + { + "epoch": 0.581034694127743, + "grad_norm": 1.5380760349832392, + "learning_rate": 7.877631830754936e-06, + "loss": 0.7285, + "step": 18958 + }, + { + "epoch": 0.5810653426504843, + "grad_norm": 1.5925874818879537, + "learning_rate": 7.876661819500748e-06, + "loss": 0.6758, + "step": 18959 + }, + { + "epoch": 0.5810959911732254, + "grad_norm": 0.7006921733175828, + "learning_rate": 7.87569182916835e-06, + "loss": 0.5757, + "step": 18960 + }, + { + "epoch": 0.5811266396959667, + "grad_norm": 0.6971674903631726, + "learning_rate": 7.874721859767308e-06, + "loss": 0.5716, + "step": 18961 + }, + { + "epoch": 0.5811572882187078, + "grad_norm": 1.7819832208887776, + "learning_rate": 7.873751911307174e-06, + "loss": 0.6632, + "step": 18962 + }, + { + "epoch": 0.5811879367414491, + "grad_norm": 0.6727057653160511, + "learning_rate": 7.872781983797504e-06, + "loss": 0.5796, + "step": 18963 + }, + { + "epoch": 0.5812185852641902, + "grad_norm": 1.5670227088953141, + "learning_rate": 7.87181207724786e-06, + "loss": 0.7881, + "step": 18964 + }, + { + "epoch": 0.5812492337869315, + "grad_norm": 1.514042451629577, + "learning_rate": 7.870842191667795e-06, + "loss": 0.6436, + "step": 18965 + }, + { + "epoch": 0.5812798823096726, + "grad_norm": 1.4217526601264519, + "learning_rate": 7.869872327066867e-06, + "loss": 0.7678, + "step": 18966 + }, + { + "epoch": 0.5813105308324139, + "grad_norm": 0.6886808818537763, + "learning_rate": 7.868902483454633e-06, + "loss": 0.5928, + "step": 18967 + }, + { + "epoch": 0.5813411793551551, + "grad_norm": 0.6711376629607609, + "learning_rate": 7.867932660840647e-06, + "loss": 0.537, + "step": 18968 + }, + { + "epoch": 0.5813718278778963, + "grad_norm": 1.65185997321605, + "learning_rate": 7.866962859234466e-06, + "loss": 0.7203, + "step": 18969 + }, + { + "epoch": 0.5814024764006375, + "grad_norm": 1.6141285466496447, + "learning_rate": 7.86599307864565e-06, + "loss": 0.7258, + "step": 18970 + }, + { + "epoch": 0.5814331249233787, + "grad_norm": 1.8219235677150518, + "learning_rate": 7.865023319083742e-06, + "loss": 0.64, + "step": 18971 + }, + { + "epoch": 0.5814637734461199, + "grad_norm": 1.661943012305781, + "learning_rate": 7.864053580558313e-06, + "loss": 0.6628, + "step": 18972 + }, + { + "epoch": 0.5814944219688611, + "grad_norm": 1.5388864103169126, + "learning_rate": 7.863083863078905e-06, + "loss": 0.6605, + "step": 18973 + }, + { + "epoch": 0.5815250704916023, + "grad_norm": 1.5854217410385836, + "learning_rate": 7.862114166655081e-06, + "loss": 0.7529, + "step": 18974 + }, + { + "epoch": 0.5815557190143436, + "grad_norm": 1.7051897558037, + "learning_rate": 7.861144491296394e-06, + "loss": 0.6507, + "step": 18975 + }, + { + "epoch": 0.5815863675370847, + "grad_norm": 1.46820112812971, + "learning_rate": 7.860174837012395e-06, + "loss": 0.5911, + "step": 18976 + }, + { + "epoch": 0.581617016059826, + "grad_norm": 1.5754195462469032, + "learning_rate": 7.859205203812644e-06, + "loss": 0.7489, + "step": 18977 + }, + { + "epoch": 0.5816476645825671, + "grad_norm": 1.454855893819157, + "learning_rate": 7.85823559170669e-06, + "loss": 0.7122, + "step": 18978 + }, + { + "epoch": 0.5816783131053084, + "grad_norm": 1.614228156560003, + "learning_rate": 7.857266000704086e-06, + "loss": 0.6958, + "step": 18979 + }, + { + "epoch": 0.5817089616280495, + "grad_norm": 1.4605399922541153, + "learning_rate": 7.856296430814395e-06, + "loss": 0.6763, + "step": 18980 + }, + { + "epoch": 0.5817396101507908, + "grad_norm": 1.748423737381484, + "learning_rate": 7.855326882047157e-06, + "loss": 0.7576, + "step": 18981 + }, + { + "epoch": 0.5817702586735319, + "grad_norm": 1.6288342222780046, + "learning_rate": 7.854357354411937e-06, + "loss": 0.6064, + "step": 18982 + }, + { + "epoch": 0.5818009071962731, + "grad_norm": 1.6331719531314826, + "learning_rate": 7.85338784791828e-06, + "loss": 0.691, + "step": 18983 + }, + { + "epoch": 0.5818315557190143, + "grad_norm": 2.1478757669781614, + "learning_rate": 7.852418362575742e-06, + "loss": 0.7826, + "step": 18984 + }, + { + "epoch": 0.5818622042417555, + "grad_norm": 1.4122428090551855, + "learning_rate": 7.851448898393876e-06, + "loss": 0.7874, + "step": 18985 + }, + { + "epoch": 0.5818928527644968, + "grad_norm": 1.2897773107039208, + "learning_rate": 7.850479455382236e-06, + "loss": 0.679, + "step": 18986 + }, + { + "epoch": 0.5819235012872379, + "grad_norm": 1.6352437213765427, + "learning_rate": 7.849510033550368e-06, + "loss": 0.8167, + "step": 18987 + }, + { + "epoch": 0.5819541498099792, + "grad_norm": 1.8808165908446173, + "learning_rate": 7.84854063290783e-06, + "loss": 0.7269, + "step": 18988 + }, + { + "epoch": 0.5819847983327203, + "grad_norm": 1.5305687903999998, + "learning_rate": 7.847571253464174e-06, + "loss": 0.7311, + "step": 18989 + }, + { + "epoch": 0.5820154468554616, + "grad_norm": 1.5473067279774277, + "learning_rate": 7.846601895228942e-06, + "loss": 0.6954, + "step": 18990 + }, + { + "epoch": 0.5820460953782027, + "grad_norm": 1.7305336336086041, + "learning_rate": 7.8456325582117e-06, + "loss": 0.6907, + "step": 18991 + }, + { + "epoch": 0.582076743900944, + "grad_norm": 0.7004820840597591, + "learning_rate": 7.844663242421983e-06, + "loss": 0.5678, + "step": 18992 + }, + { + "epoch": 0.5821073924236851, + "grad_norm": 1.4009585688723325, + "learning_rate": 7.84369394786936e-06, + "loss": 0.613, + "step": 18993 + }, + { + "epoch": 0.5821380409464264, + "grad_norm": 1.4803286706424785, + "learning_rate": 7.842724674563369e-06, + "loss": 0.7453, + "step": 18994 + }, + { + "epoch": 0.5821686894691676, + "grad_norm": 1.6157680719963052, + "learning_rate": 7.841755422513561e-06, + "loss": 0.7313, + "step": 18995 + }, + { + "epoch": 0.5821993379919088, + "grad_norm": 1.4940595547637996, + "learning_rate": 7.840786191729492e-06, + "loss": 0.6883, + "step": 18996 + }, + { + "epoch": 0.58222998651465, + "grad_norm": 1.467937610113521, + "learning_rate": 7.839816982220708e-06, + "loss": 0.767, + "step": 18997 + }, + { + "epoch": 0.5822606350373912, + "grad_norm": 1.467843688763293, + "learning_rate": 7.838847793996759e-06, + "loss": 0.629, + "step": 18998 + }, + { + "epoch": 0.5822912835601324, + "grad_norm": 1.4802300371894508, + "learning_rate": 7.837878627067196e-06, + "loss": 0.723, + "step": 18999 + }, + { + "epoch": 0.5823219320828736, + "grad_norm": 1.5848291040053433, + "learning_rate": 7.836909481441568e-06, + "loss": 0.8127, + "step": 19000 + }, + { + "epoch": 0.5823525806056148, + "grad_norm": 1.668032810781762, + "learning_rate": 7.835940357129426e-06, + "loss": 0.6795, + "step": 19001 + }, + { + "epoch": 0.582383229128356, + "grad_norm": 1.4281512515664407, + "learning_rate": 7.83497125414032e-06, + "loss": 0.75, + "step": 19002 + }, + { + "epoch": 0.5824138776510972, + "grad_norm": 1.5753007089800524, + "learning_rate": 7.83400217248379e-06, + "loss": 0.689, + "step": 19003 + }, + { + "epoch": 0.5824445261738385, + "grad_norm": 1.5331259681718017, + "learning_rate": 7.833033112169395e-06, + "loss": 0.7009, + "step": 19004 + }, + { + "epoch": 0.5824751746965796, + "grad_norm": 1.527592152131558, + "learning_rate": 7.832064073206678e-06, + "loss": 0.6822, + "step": 19005 + }, + { + "epoch": 0.5825058232193209, + "grad_norm": 1.5338819174522114, + "learning_rate": 7.831095055605187e-06, + "loss": 0.7104, + "step": 19006 + }, + { + "epoch": 0.582536471742062, + "grad_norm": 0.6531292444643746, + "learning_rate": 7.830126059374473e-06, + "loss": 0.5645, + "step": 19007 + }, + { + "epoch": 0.5825671202648033, + "grad_norm": 1.4696067791693281, + "learning_rate": 7.82915708452408e-06, + "loss": 0.7377, + "step": 19008 + }, + { + "epoch": 0.5825977687875444, + "grad_norm": 0.6561651973574144, + "learning_rate": 7.828188131063559e-06, + "loss": 0.5521, + "step": 19009 + }, + { + "epoch": 0.5826284173102857, + "grad_norm": 1.5679098585691886, + "learning_rate": 7.827219199002456e-06, + "loss": 0.725, + "step": 19010 + }, + { + "epoch": 0.5826590658330268, + "grad_norm": 1.5625088777538947, + "learning_rate": 7.826250288350318e-06, + "loss": 0.7598, + "step": 19011 + }, + { + "epoch": 0.5826897143557681, + "grad_norm": 1.7492531197300403, + "learning_rate": 7.825281399116693e-06, + "loss": 0.7053, + "step": 19012 + }, + { + "epoch": 0.5827203628785093, + "grad_norm": 1.665307301247666, + "learning_rate": 7.824312531311128e-06, + "loss": 0.8198, + "step": 19013 + }, + { + "epoch": 0.5827510114012504, + "grad_norm": 0.6890395659257528, + "learning_rate": 7.823343684943165e-06, + "loss": 0.585, + "step": 19014 + }, + { + "epoch": 0.5827816599239917, + "grad_norm": 1.6605341891921317, + "learning_rate": 7.822374860022357e-06, + "loss": 0.6827, + "step": 19015 + }, + { + "epoch": 0.5828123084467328, + "grad_norm": 1.7614491715982077, + "learning_rate": 7.821406056558246e-06, + "loss": 0.7949, + "step": 19016 + }, + { + "epoch": 0.5828429569694741, + "grad_norm": 1.5834709984985957, + "learning_rate": 7.820437274560375e-06, + "loss": 0.6712, + "step": 19017 + }, + { + "epoch": 0.5828736054922152, + "grad_norm": 1.4943477322758976, + "learning_rate": 7.819468514038296e-06, + "loss": 0.6123, + "step": 19018 + }, + { + "epoch": 0.5829042540149565, + "grad_norm": 1.5766190429993905, + "learning_rate": 7.81849977500155e-06, + "loss": 0.7189, + "step": 19019 + }, + { + "epoch": 0.5829349025376976, + "grad_norm": 1.4588998859727618, + "learning_rate": 7.817531057459687e-06, + "loss": 0.7115, + "step": 19020 + }, + { + "epoch": 0.5829655510604389, + "grad_norm": 1.5039092350170058, + "learning_rate": 7.816562361422247e-06, + "loss": 0.6068, + "step": 19021 + }, + { + "epoch": 0.58299619958318, + "grad_norm": 1.6075121803710346, + "learning_rate": 7.815593686898774e-06, + "loss": 0.762, + "step": 19022 + }, + { + "epoch": 0.5830268481059213, + "grad_norm": 1.5742891223358106, + "learning_rate": 7.814625033898819e-06, + "loss": 0.7235, + "step": 19023 + }, + { + "epoch": 0.5830574966286625, + "grad_norm": 0.6875930730952908, + "learning_rate": 7.813656402431925e-06, + "loss": 0.6092, + "step": 19024 + }, + { + "epoch": 0.5830881451514037, + "grad_norm": 1.4104860134588582, + "learning_rate": 7.812687792507629e-06, + "loss": 0.6591, + "step": 19025 + }, + { + "epoch": 0.5831187936741449, + "grad_norm": 0.6688565618395794, + "learning_rate": 7.811719204135481e-06, + "loss": 0.5648, + "step": 19026 + }, + { + "epoch": 0.5831494421968861, + "grad_norm": 1.442398977414983, + "learning_rate": 7.810750637325023e-06, + "loss": 0.6487, + "step": 19027 + }, + { + "epoch": 0.5831800907196273, + "grad_norm": 1.696217944790546, + "learning_rate": 7.8097820920858e-06, + "loss": 0.7431, + "step": 19028 + }, + { + "epoch": 0.5832107392423685, + "grad_norm": 1.5359333578014487, + "learning_rate": 7.808813568427356e-06, + "loss": 0.6059, + "step": 19029 + }, + { + "epoch": 0.5832413877651097, + "grad_norm": 0.6691528265719874, + "learning_rate": 7.807845066359229e-06, + "loss": 0.5698, + "step": 19030 + }, + { + "epoch": 0.583272036287851, + "grad_norm": 1.299970186732457, + "learning_rate": 7.80687658589097e-06, + "loss": 0.6352, + "step": 19031 + }, + { + "epoch": 0.5833026848105921, + "grad_norm": 1.7717147075381956, + "learning_rate": 7.805908127032116e-06, + "loss": 0.6994, + "step": 19032 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 1.2600154179366174, + "learning_rate": 7.804939689792206e-06, + "loss": 0.7604, + "step": 19033 + }, + { + "epoch": 0.5833639818560745, + "grad_norm": 1.61795727123116, + "learning_rate": 7.803971274180793e-06, + "loss": 0.7302, + "step": 19034 + }, + { + "epoch": 0.5833946303788158, + "grad_norm": 1.559455785910454, + "learning_rate": 7.803002880207411e-06, + "loss": 0.7071, + "step": 19035 + }, + { + "epoch": 0.5834252789015569, + "grad_norm": 1.4596473983453657, + "learning_rate": 7.802034507881601e-06, + "loss": 0.6385, + "step": 19036 + }, + { + "epoch": 0.5834559274242982, + "grad_norm": 1.7371871668460026, + "learning_rate": 7.801066157212909e-06, + "loss": 0.6624, + "step": 19037 + }, + { + "epoch": 0.5834865759470393, + "grad_norm": 0.6449255584865332, + "learning_rate": 7.800097828210872e-06, + "loss": 0.5689, + "step": 19038 + }, + { + "epoch": 0.5835172244697806, + "grad_norm": 1.4349658117208837, + "learning_rate": 7.79912952088504e-06, + "loss": 0.5931, + "step": 19039 + }, + { + "epoch": 0.5835478729925218, + "grad_norm": 1.3964424123966153, + "learning_rate": 7.798161235244944e-06, + "loss": 0.6848, + "step": 19040 + }, + { + "epoch": 0.583578521515263, + "grad_norm": 0.6972335105682366, + "learning_rate": 7.79719297130013e-06, + "loss": 0.5703, + "step": 19041 + }, + { + "epoch": 0.5836091700380042, + "grad_norm": 1.5668778877333978, + "learning_rate": 7.796224729060135e-06, + "loss": 0.7888, + "step": 19042 + }, + { + "epoch": 0.5836398185607454, + "grad_norm": 1.5178406208502777, + "learning_rate": 7.795256508534508e-06, + "loss": 0.695, + "step": 19043 + }, + { + "epoch": 0.5836704670834866, + "grad_norm": 1.6918177658853473, + "learning_rate": 7.794288309732774e-06, + "loss": 0.7679, + "step": 19044 + }, + { + "epoch": 0.5837011156062277, + "grad_norm": 1.4972589401821481, + "learning_rate": 7.793320132664487e-06, + "loss": 0.708, + "step": 19045 + }, + { + "epoch": 0.583731764128969, + "grad_norm": 1.6681524679206563, + "learning_rate": 7.792351977339177e-06, + "loss": 0.7697, + "step": 19046 + }, + { + "epoch": 0.5837624126517101, + "grad_norm": 0.6903252629570655, + "learning_rate": 7.791383843766395e-06, + "loss": 0.5646, + "step": 19047 + }, + { + "epoch": 0.5837930611744514, + "grad_norm": 1.481981001189213, + "learning_rate": 7.79041573195567e-06, + "loss": 0.5262, + "step": 19048 + }, + { + "epoch": 0.5838237096971925, + "grad_norm": 1.4304861576436947, + "learning_rate": 7.78944764191654e-06, + "loss": 0.7665, + "step": 19049 + }, + { + "epoch": 0.5838543582199338, + "grad_norm": 1.6052720573237536, + "learning_rate": 7.788479573658553e-06, + "loss": 0.7322, + "step": 19050 + }, + { + "epoch": 0.583885006742675, + "grad_norm": 1.6407606811486557, + "learning_rate": 7.787511527191241e-06, + "loss": 0.6339, + "step": 19051 + }, + { + "epoch": 0.5839156552654162, + "grad_norm": 1.3933495823258368, + "learning_rate": 7.786543502524143e-06, + "loss": 0.5838, + "step": 19052 + }, + { + "epoch": 0.5839463037881574, + "grad_norm": 1.4671701243887711, + "learning_rate": 7.7855754996668e-06, + "loss": 0.6547, + "step": 19053 + }, + { + "epoch": 0.5839769523108986, + "grad_norm": 1.4519935556467667, + "learning_rate": 7.784607518628744e-06, + "loss": 0.6822, + "step": 19054 + }, + { + "epoch": 0.5840076008336398, + "grad_norm": 1.5308234510975143, + "learning_rate": 7.783639559419521e-06, + "loss": 0.6673, + "step": 19055 + }, + { + "epoch": 0.584038249356381, + "grad_norm": 1.2557118763521968, + "learning_rate": 7.782671622048667e-06, + "loss": 0.7539, + "step": 19056 + }, + { + "epoch": 0.5840688978791222, + "grad_norm": 1.3447328003215844, + "learning_rate": 7.78170370652571e-06, + "loss": 0.6883, + "step": 19057 + }, + { + "epoch": 0.5840995464018635, + "grad_norm": 1.5328220312792393, + "learning_rate": 7.7807358128602e-06, + "loss": 0.8359, + "step": 19058 + }, + { + "epoch": 0.5841301949246046, + "grad_norm": 0.6543724033128213, + "learning_rate": 7.779767941061666e-06, + "loss": 0.5634, + "step": 19059 + }, + { + "epoch": 0.5841608434473459, + "grad_norm": 1.3724646440640782, + "learning_rate": 7.778800091139645e-06, + "loss": 0.6935, + "step": 19060 + }, + { + "epoch": 0.584191491970087, + "grad_norm": 1.5595740057248182, + "learning_rate": 7.777832263103674e-06, + "loss": 0.8399, + "step": 19061 + }, + { + "epoch": 0.5842221404928283, + "grad_norm": 1.4846877693352813, + "learning_rate": 7.776864456963294e-06, + "loss": 0.6819, + "step": 19062 + }, + { + "epoch": 0.5842527890155694, + "grad_norm": 1.5718294038607425, + "learning_rate": 7.775896672728034e-06, + "loss": 0.7109, + "step": 19063 + }, + { + "epoch": 0.5842834375383107, + "grad_norm": 1.4850446418715453, + "learning_rate": 7.774928910407435e-06, + "loss": 0.771, + "step": 19064 + }, + { + "epoch": 0.5843140860610518, + "grad_norm": 1.4978692637095525, + "learning_rate": 7.77396117001103e-06, + "loss": 0.7096, + "step": 19065 + }, + { + "epoch": 0.5843447345837931, + "grad_norm": 1.4218832522552363, + "learning_rate": 7.772993451548356e-06, + "loss": 0.6506, + "step": 19066 + }, + { + "epoch": 0.5843753831065343, + "grad_norm": 1.494392455559259, + "learning_rate": 7.77202575502895e-06, + "loss": 0.7186, + "step": 19067 + }, + { + "epoch": 0.5844060316292755, + "grad_norm": 1.5897741132039906, + "learning_rate": 7.771058080462336e-06, + "loss": 0.7569, + "step": 19068 + }, + { + "epoch": 0.5844366801520167, + "grad_norm": 1.4963990014675874, + "learning_rate": 7.770090427858064e-06, + "loss": 0.6767, + "step": 19069 + }, + { + "epoch": 0.5844673286747579, + "grad_norm": 1.3969232362132786, + "learning_rate": 7.769122797225662e-06, + "loss": 0.7187, + "step": 19070 + }, + { + "epoch": 0.5844979771974991, + "grad_norm": 0.6787769531908519, + "learning_rate": 7.76815518857466e-06, + "loss": 0.5736, + "step": 19071 + }, + { + "epoch": 0.5845286257202403, + "grad_norm": 1.6239145071794188, + "learning_rate": 7.767187601914598e-06, + "loss": 0.7637, + "step": 19072 + }, + { + "epoch": 0.5845592742429815, + "grad_norm": 1.6322571158332644, + "learning_rate": 7.766220037255006e-06, + "loss": 0.6851, + "step": 19073 + }, + { + "epoch": 0.5845899227657227, + "grad_norm": 1.5175729163375271, + "learning_rate": 7.76525249460542e-06, + "loss": 0.6816, + "step": 19074 + }, + { + "epoch": 0.5846205712884639, + "grad_norm": 1.5731472418337746, + "learning_rate": 7.764284973975375e-06, + "loss": 0.7279, + "step": 19075 + }, + { + "epoch": 0.584651219811205, + "grad_norm": 1.4611917945663566, + "learning_rate": 7.7633174753744e-06, + "loss": 0.6451, + "step": 19076 + }, + { + "epoch": 0.5846818683339463, + "grad_norm": 0.6539989376433986, + "learning_rate": 7.762349998812033e-06, + "loss": 0.5646, + "step": 19077 + }, + { + "epoch": 0.5847125168566875, + "grad_norm": 1.6375210350980798, + "learning_rate": 7.761382544297804e-06, + "loss": 0.6993, + "step": 19078 + }, + { + "epoch": 0.5847431653794287, + "grad_norm": 1.3939511850620498, + "learning_rate": 7.760415111841241e-06, + "loss": 0.6596, + "step": 19079 + }, + { + "epoch": 0.5847738139021699, + "grad_norm": 1.511917316939742, + "learning_rate": 7.759447701451886e-06, + "loss": 0.7046, + "step": 19080 + }, + { + "epoch": 0.5848044624249111, + "grad_norm": 1.5290211671280505, + "learning_rate": 7.758480313139262e-06, + "loss": 0.6097, + "step": 19081 + }, + { + "epoch": 0.5848351109476523, + "grad_norm": 1.5449151636224376, + "learning_rate": 7.757512946912907e-06, + "loss": 0.7385, + "step": 19082 + }, + { + "epoch": 0.5848657594703935, + "grad_norm": 1.8105276828935901, + "learning_rate": 7.756545602782351e-06, + "loss": 0.67, + "step": 19083 + }, + { + "epoch": 0.5848964079931347, + "grad_norm": 1.57332300207716, + "learning_rate": 7.755578280757123e-06, + "loss": 0.7169, + "step": 19084 + }, + { + "epoch": 0.584927056515876, + "grad_norm": 1.4979950456798268, + "learning_rate": 7.75461098084676e-06, + "loss": 0.7099, + "step": 19085 + }, + { + "epoch": 0.5849577050386171, + "grad_norm": 1.601436232329099, + "learning_rate": 7.75364370306079e-06, + "loss": 0.6732, + "step": 19086 + }, + { + "epoch": 0.5849883535613584, + "grad_norm": 1.4838860906773743, + "learning_rate": 7.752676447408736e-06, + "loss": 0.6869, + "step": 19087 + }, + { + "epoch": 0.5850190020840995, + "grad_norm": 1.4056575332327643, + "learning_rate": 7.751709213900145e-06, + "loss": 0.6986, + "step": 19088 + }, + { + "epoch": 0.5850496506068408, + "grad_norm": 1.5160614964908308, + "learning_rate": 7.750742002544533e-06, + "loss": 0.6899, + "step": 19089 + }, + { + "epoch": 0.5850802991295819, + "grad_norm": 0.7119573966619451, + "learning_rate": 7.749774813351436e-06, + "loss": 0.5883, + "step": 19090 + }, + { + "epoch": 0.5851109476523232, + "grad_norm": 1.6522426822879486, + "learning_rate": 7.748807646330385e-06, + "loss": 0.7035, + "step": 19091 + }, + { + "epoch": 0.5851415961750643, + "grad_norm": 1.3395608184149366, + "learning_rate": 7.747840501490906e-06, + "loss": 0.6954, + "step": 19092 + }, + { + "epoch": 0.5851722446978056, + "grad_norm": 1.597632084055949, + "learning_rate": 7.746873378842533e-06, + "loss": 0.6912, + "step": 19093 + }, + { + "epoch": 0.5852028932205467, + "grad_norm": 1.511318307717049, + "learning_rate": 7.745906278394794e-06, + "loss": 0.721, + "step": 19094 + }, + { + "epoch": 0.585233541743288, + "grad_norm": 0.6512784546526011, + "learning_rate": 7.744939200157214e-06, + "loss": 0.5582, + "step": 19095 + }, + { + "epoch": 0.5852641902660292, + "grad_norm": 1.454498795937971, + "learning_rate": 7.743972144139326e-06, + "loss": 0.6692, + "step": 19096 + }, + { + "epoch": 0.5852948387887704, + "grad_norm": 1.525259564655128, + "learning_rate": 7.743005110350662e-06, + "loss": 0.7837, + "step": 19097 + }, + { + "epoch": 0.5853254873115116, + "grad_norm": 1.494877046490788, + "learning_rate": 7.742038098800739e-06, + "loss": 0.6373, + "step": 19098 + }, + { + "epoch": 0.5853561358342528, + "grad_norm": 1.3919851643557701, + "learning_rate": 7.741071109499098e-06, + "loss": 0.5827, + "step": 19099 + }, + { + "epoch": 0.585386784356994, + "grad_norm": 1.5989618880617997, + "learning_rate": 7.740104142455256e-06, + "loss": 0.6734, + "step": 19100 + }, + { + "epoch": 0.5854174328797352, + "grad_norm": 1.5499028627411364, + "learning_rate": 7.739137197678752e-06, + "loss": 0.7525, + "step": 19101 + }, + { + "epoch": 0.5854480814024764, + "grad_norm": 0.6792842939974657, + "learning_rate": 7.738170275179105e-06, + "loss": 0.5715, + "step": 19102 + }, + { + "epoch": 0.5854787299252177, + "grad_norm": 1.3767825438883514, + "learning_rate": 7.737203374965844e-06, + "loss": 0.6812, + "step": 19103 + }, + { + "epoch": 0.5855093784479588, + "grad_norm": 1.553375189837766, + "learning_rate": 7.736236497048499e-06, + "loss": 0.6201, + "step": 19104 + }, + { + "epoch": 0.5855400269707001, + "grad_norm": 1.615379175310635, + "learning_rate": 7.735269641436594e-06, + "loss": 0.6591, + "step": 19105 + }, + { + "epoch": 0.5855706754934412, + "grad_norm": 1.5939681591059285, + "learning_rate": 7.734302808139656e-06, + "loss": 0.6823, + "step": 19106 + }, + { + "epoch": 0.5856013240161824, + "grad_norm": 0.6425045543021116, + "learning_rate": 7.733335997167213e-06, + "loss": 0.5507, + "step": 19107 + }, + { + "epoch": 0.5856319725389236, + "grad_norm": 1.628132254416192, + "learning_rate": 7.732369208528789e-06, + "loss": 0.8025, + "step": 19108 + }, + { + "epoch": 0.5856626210616648, + "grad_norm": 1.519183131682318, + "learning_rate": 7.731402442233914e-06, + "loss": 0.7247, + "step": 19109 + }, + { + "epoch": 0.585693269584406, + "grad_norm": 1.4849931384116681, + "learning_rate": 7.73043569829211e-06, + "loss": 0.7724, + "step": 19110 + }, + { + "epoch": 0.5857239181071472, + "grad_norm": 1.532535944928544, + "learning_rate": 7.729468976712902e-06, + "loss": 0.8312, + "step": 19111 + }, + { + "epoch": 0.5857545666298885, + "grad_norm": 1.3849214079823486, + "learning_rate": 7.728502277505821e-06, + "loss": 0.7414, + "step": 19112 + }, + { + "epoch": 0.5857852151526296, + "grad_norm": 1.6020694207418689, + "learning_rate": 7.727535600680387e-06, + "loss": 0.6079, + "step": 19113 + }, + { + "epoch": 0.5858158636753709, + "grad_norm": 1.4363252135019424, + "learning_rate": 7.726568946246122e-06, + "loss": 0.7004, + "step": 19114 + }, + { + "epoch": 0.585846512198112, + "grad_norm": 1.6278092593559488, + "learning_rate": 7.725602314212559e-06, + "loss": 0.6464, + "step": 19115 + }, + { + "epoch": 0.5858771607208533, + "grad_norm": 1.3995332165084406, + "learning_rate": 7.724635704589219e-06, + "loss": 0.6255, + "step": 19116 + }, + { + "epoch": 0.5859078092435944, + "grad_norm": 1.3398650921121775, + "learning_rate": 7.723669117385621e-06, + "loss": 0.6292, + "step": 19117 + }, + { + "epoch": 0.5859384577663357, + "grad_norm": 1.4757792980154845, + "learning_rate": 7.722702552611298e-06, + "loss": 0.7336, + "step": 19118 + }, + { + "epoch": 0.5859691062890768, + "grad_norm": 1.4648411468476754, + "learning_rate": 7.721736010275766e-06, + "loss": 0.7488, + "step": 19119 + }, + { + "epoch": 0.5859997548118181, + "grad_norm": 1.5678032530946493, + "learning_rate": 7.720769490388555e-06, + "loss": 0.6475, + "step": 19120 + }, + { + "epoch": 0.5860304033345592, + "grad_norm": 1.5851329090430895, + "learning_rate": 7.719802992959186e-06, + "loss": 0.7025, + "step": 19121 + }, + { + "epoch": 0.5860610518573005, + "grad_norm": 1.607613926443543, + "learning_rate": 7.71883651799718e-06, + "loss": 0.6508, + "step": 19122 + }, + { + "epoch": 0.5860917003800417, + "grad_norm": 1.411406457357128, + "learning_rate": 7.717870065512061e-06, + "loss": 0.6795, + "step": 19123 + }, + { + "epoch": 0.5861223489027829, + "grad_norm": 1.6287240091007156, + "learning_rate": 7.716903635513352e-06, + "loss": 0.7046, + "step": 19124 + }, + { + "epoch": 0.5861529974255241, + "grad_norm": 1.5825318025968964, + "learning_rate": 7.715937228010574e-06, + "loss": 0.6489, + "step": 19125 + }, + { + "epoch": 0.5861836459482653, + "grad_norm": 1.4814071584906328, + "learning_rate": 7.714970843013254e-06, + "loss": 0.6301, + "step": 19126 + }, + { + "epoch": 0.5862142944710065, + "grad_norm": 1.333583349801243, + "learning_rate": 7.714004480530909e-06, + "loss": 0.6298, + "step": 19127 + }, + { + "epoch": 0.5862449429937477, + "grad_norm": 1.5173812472876511, + "learning_rate": 7.713038140573064e-06, + "loss": 0.6394, + "step": 19128 + }, + { + "epoch": 0.5862755915164889, + "grad_norm": 1.3507297560431402, + "learning_rate": 7.712071823149241e-06, + "loss": 0.6011, + "step": 19129 + }, + { + "epoch": 0.5863062400392302, + "grad_norm": 1.5364737757001437, + "learning_rate": 7.711105528268955e-06, + "loss": 0.6316, + "step": 19130 + }, + { + "epoch": 0.5863368885619713, + "grad_norm": 1.5407323031556492, + "learning_rate": 7.710139255941738e-06, + "loss": 0.6873, + "step": 19131 + }, + { + "epoch": 0.5863675370847126, + "grad_norm": 1.4707968299287475, + "learning_rate": 7.709173006177101e-06, + "loss": 0.6935, + "step": 19132 + }, + { + "epoch": 0.5863981856074537, + "grad_norm": 1.6686371443351713, + "learning_rate": 7.708206778984567e-06, + "loss": 0.6186, + "step": 19133 + }, + { + "epoch": 0.586428834130195, + "grad_norm": 1.4318690540630912, + "learning_rate": 7.70724057437366e-06, + "loss": 0.7066, + "step": 19134 + }, + { + "epoch": 0.5864594826529361, + "grad_norm": 0.6575433534113001, + "learning_rate": 7.706274392353898e-06, + "loss": 0.5487, + "step": 19135 + }, + { + "epoch": 0.5864901311756774, + "grad_norm": 1.5986026317898059, + "learning_rate": 7.705308232934802e-06, + "loss": 0.5942, + "step": 19136 + }, + { + "epoch": 0.5865207796984185, + "grad_norm": 1.7197534724537833, + "learning_rate": 7.70434209612589e-06, + "loss": 0.7218, + "step": 19137 + }, + { + "epoch": 0.5865514282211597, + "grad_norm": 1.5610762976473196, + "learning_rate": 7.703375981936683e-06, + "loss": 0.6833, + "step": 19138 + }, + { + "epoch": 0.586582076743901, + "grad_norm": 1.5260658691885736, + "learning_rate": 7.7024098903767e-06, + "loss": 0.6765, + "step": 19139 + }, + { + "epoch": 0.5866127252666421, + "grad_norm": 1.5319142942193353, + "learning_rate": 7.701443821455462e-06, + "loss": 0.693, + "step": 19140 + }, + { + "epoch": 0.5866433737893834, + "grad_norm": 1.4357796146757416, + "learning_rate": 7.700477775182482e-06, + "loss": 0.7263, + "step": 19141 + }, + { + "epoch": 0.5866740223121245, + "grad_norm": 1.4228395157708686, + "learning_rate": 7.699511751567287e-06, + "loss": 0.5361, + "step": 19142 + }, + { + "epoch": 0.5867046708348658, + "grad_norm": 1.6255165046726847, + "learning_rate": 7.698545750619392e-06, + "loss": 0.6975, + "step": 19143 + }, + { + "epoch": 0.5867353193576069, + "grad_norm": 1.3591230749625094, + "learning_rate": 7.69757977234831e-06, + "loss": 0.7094, + "step": 19144 + }, + { + "epoch": 0.5867659678803482, + "grad_norm": 0.6810945878135557, + "learning_rate": 7.696613816763567e-06, + "loss": 0.598, + "step": 19145 + }, + { + "epoch": 0.5867966164030893, + "grad_norm": 1.6935947158346212, + "learning_rate": 7.695647883874676e-06, + "loss": 0.6631, + "step": 19146 + }, + { + "epoch": 0.5868272649258306, + "grad_norm": 1.578313589697297, + "learning_rate": 7.694681973691157e-06, + "loss": 0.7042, + "step": 19147 + }, + { + "epoch": 0.5868579134485717, + "grad_norm": 1.4389189651202265, + "learning_rate": 7.693716086222524e-06, + "loss": 0.6635, + "step": 19148 + }, + { + "epoch": 0.586888561971313, + "grad_norm": 1.523501057021693, + "learning_rate": 7.692750221478297e-06, + "loss": 0.7243, + "step": 19149 + }, + { + "epoch": 0.5869192104940542, + "grad_norm": 1.799876870417357, + "learning_rate": 7.691784379467995e-06, + "loss": 0.703, + "step": 19150 + }, + { + "epoch": 0.5869498590167954, + "grad_norm": 1.6327422284970425, + "learning_rate": 7.690818560201134e-06, + "loss": 0.5996, + "step": 19151 + }, + { + "epoch": 0.5869805075395366, + "grad_norm": 1.5214039159060153, + "learning_rate": 7.68985276368722e-06, + "loss": 0.6926, + "step": 19152 + }, + { + "epoch": 0.5870111560622778, + "grad_norm": 1.6180052024011926, + "learning_rate": 7.688886989935786e-06, + "loss": 0.6279, + "step": 19153 + }, + { + "epoch": 0.587041804585019, + "grad_norm": 1.8496297004594255, + "learning_rate": 7.687921238956333e-06, + "loss": 0.7397, + "step": 19154 + }, + { + "epoch": 0.5870724531077602, + "grad_norm": 1.6205253687136947, + "learning_rate": 7.68695551075839e-06, + "loss": 0.7733, + "step": 19155 + }, + { + "epoch": 0.5871031016305014, + "grad_norm": 1.4526403827876417, + "learning_rate": 7.685989805351464e-06, + "loss": 0.7473, + "step": 19156 + }, + { + "epoch": 0.5871337501532427, + "grad_norm": 1.6043587613896948, + "learning_rate": 7.68502412274507e-06, + "loss": 0.724, + "step": 19157 + }, + { + "epoch": 0.5871643986759838, + "grad_norm": 1.5995424674196128, + "learning_rate": 7.684058462948729e-06, + "loss": 0.6999, + "step": 19158 + }, + { + "epoch": 0.5871950471987251, + "grad_norm": 1.4048084557291993, + "learning_rate": 7.683092825971953e-06, + "loss": 0.5766, + "step": 19159 + }, + { + "epoch": 0.5872256957214662, + "grad_norm": 1.552189259271342, + "learning_rate": 7.682127211824252e-06, + "loss": 0.7314, + "step": 19160 + }, + { + "epoch": 0.5872563442442075, + "grad_norm": 1.544961191529943, + "learning_rate": 7.681161620515148e-06, + "loss": 0.6856, + "step": 19161 + }, + { + "epoch": 0.5872869927669486, + "grad_norm": 1.5205815794979485, + "learning_rate": 7.68019605205415e-06, + "loss": 0.6963, + "step": 19162 + }, + { + "epoch": 0.5873176412896899, + "grad_norm": 1.4919893694643127, + "learning_rate": 7.679230506450774e-06, + "loss": 0.7476, + "step": 19163 + }, + { + "epoch": 0.587348289812431, + "grad_norm": 1.437925025572937, + "learning_rate": 7.678264983714538e-06, + "loss": 0.7016, + "step": 19164 + }, + { + "epoch": 0.5873789383351723, + "grad_norm": 1.6291790815623024, + "learning_rate": 7.677299483854944e-06, + "loss": 0.7444, + "step": 19165 + }, + { + "epoch": 0.5874095868579134, + "grad_norm": 1.405588881665758, + "learning_rate": 7.676334006881519e-06, + "loss": 0.6243, + "step": 19166 + }, + { + "epoch": 0.5874402353806547, + "grad_norm": 1.597664777884718, + "learning_rate": 7.675368552803766e-06, + "loss": 0.7582, + "step": 19167 + }, + { + "epoch": 0.5874708839033959, + "grad_norm": 1.5429761761836247, + "learning_rate": 7.674403121631203e-06, + "loss": 0.6691, + "step": 19168 + }, + { + "epoch": 0.587501532426137, + "grad_norm": 1.5010494364946634, + "learning_rate": 7.67343771337334e-06, + "loss": 0.7013, + "step": 19169 + }, + { + "epoch": 0.5875321809488783, + "grad_norm": 1.5404759845102547, + "learning_rate": 7.67247232803969e-06, + "loss": 0.7585, + "step": 19170 + }, + { + "epoch": 0.5875628294716194, + "grad_norm": 1.4802407630730896, + "learning_rate": 7.671506965639766e-06, + "loss": 0.5757, + "step": 19171 + }, + { + "epoch": 0.5875934779943607, + "grad_norm": 1.3695029925336002, + "learning_rate": 7.670541626183078e-06, + "loss": 0.6864, + "step": 19172 + }, + { + "epoch": 0.5876241265171018, + "grad_norm": 1.502099714047463, + "learning_rate": 7.669576309679141e-06, + "loss": 0.7392, + "step": 19173 + }, + { + "epoch": 0.5876547750398431, + "grad_norm": 1.5753218197946028, + "learning_rate": 7.668611016137468e-06, + "loss": 0.6558, + "step": 19174 + }, + { + "epoch": 0.5876854235625842, + "grad_norm": 0.6698747732124287, + "learning_rate": 7.667645745567564e-06, + "loss": 0.5586, + "step": 19175 + }, + { + "epoch": 0.5877160720853255, + "grad_norm": 1.489691378546661, + "learning_rate": 7.666680497978943e-06, + "loss": 0.6088, + "step": 19176 + }, + { + "epoch": 0.5877467206080667, + "grad_norm": 1.4768530352111087, + "learning_rate": 7.665715273381118e-06, + "loss": 0.6395, + "step": 19177 + }, + { + "epoch": 0.5877773691308079, + "grad_norm": 1.584230352254044, + "learning_rate": 7.664750071783596e-06, + "loss": 0.5858, + "step": 19178 + }, + { + "epoch": 0.5878080176535491, + "grad_norm": 1.5122504457608088, + "learning_rate": 7.663784893195888e-06, + "loss": 0.666, + "step": 19179 + }, + { + "epoch": 0.5878386661762903, + "grad_norm": 1.5509528585377153, + "learning_rate": 7.662819737627508e-06, + "loss": 0.764, + "step": 19180 + }, + { + "epoch": 0.5878693146990315, + "grad_norm": 1.4316597811651828, + "learning_rate": 7.66185460508796e-06, + "loss": 0.6101, + "step": 19181 + }, + { + "epoch": 0.5878999632217727, + "grad_norm": 1.6427388400558436, + "learning_rate": 7.660889495586758e-06, + "loss": 0.6432, + "step": 19182 + }, + { + "epoch": 0.5879306117445139, + "grad_norm": 1.466833819841435, + "learning_rate": 7.659924409133414e-06, + "loss": 0.6577, + "step": 19183 + }, + { + "epoch": 0.5879612602672551, + "grad_norm": 1.481188087451447, + "learning_rate": 7.658959345737426e-06, + "loss": 0.7409, + "step": 19184 + }, + { + "epoch": 0.5879919087899963, + "grad_norm": 0.6457251326346969, + "learning_rate": 7.657994305408318e-06, + "loss": 0.5427, + "step": 19185 + }, + { + "epoch": 0.5880225573127376, + "grad_norm": 1.3882586638301393, + "learning_rate": 7.657029288155588e-06, + "loss": 0.5969, + "step": 19186 + }, + { + "epoch": 0.5880532058354787, + "grad_norm": 1.4498583387823882, + "learning_rate": 7.656064293988747e-06, + "loss": 0.6292, + "step": 19187 + }, + { + "epoch": 0.58808385435822, + "grad_norm": 1.5755048199671091, + "learning_rate": 7.655099322917306e-06, + "loss": 0.7157, + "step": 19188 + }, + { + "epoch": 0.5881145028809611, + "grad_norm": 2.053538628951583, + "learning_rate": 7.654134374950769e-06, + "loss": 0.724, + "step": 19189 + }, + { + "epoch": 0.5881451514037024, + "grad_norm": 1.499991718509801, + "learning_rate": 7.65316945009865e-06, + "loss": 0.7606, + "step": 19190 + }, + { + "epoch": 0.5881757999264435, + "grad_norm": 1.4719447695865413, + "learning_rate": 7.65220454837045e-06, + "loss": 0.6981, + "step": 19191 + }, + { + "epoch": 0.5882064484491848, + "grad_norm": 1.2865152332853858, + "learning_rate": 7.65123966977568e-06, + "loss": 0.6774, + "step": 19192 + }, + { + "epoch": 0.5882370969719259, + "grad_norm": 1.4845226278608674, + "learning_rate": 7.650274814323846e-06, + "loss": 0.7159, + "step": 19193 + }, + { + "epoch": 0.5882677454946672, + "grad_norm": 1.5759278523549725, + "learning_rate": 7.649309982024457e-06, + "loss": 0.7208, + "step": 19194 + }, + { + "epoch": 0.5882983940174084, + "grad_norm": 1.6906762343611423, + "learning_rate": 7.648345172887015e-06, + "loss": 0.6966, + "step": 19195 + }, + { + "epoch": 0.5883290425401496, + "grad_norm": 1.4667837674672033, + "learning_rate": 7.647380386921034e-06, + "loss": 0.7678, + "step": 19196 + }, + { + "epoch": 0.5883596910628908, + "grad_norm": 1.6048599996150792, + "learning_rate": 7.646415624136015e-06, + "loss": 0.7634, + "step": 19197 + }, + { + "epoch": 0.588390339585632, + "grad_norm": 1.5444555619450961, + "learning_rate": 7.645450884541462e-06, + "loss": 0.745, + "step": 19198 + }, + { + "epoch": 0.5884209881083732, + "grad_norm": 1.29151627149218, + "learning_rate": 7.644486168146887e-06, + "loss": 0.6461, + "step": 19199 + }, + { + "epoch": 0.5884516366311143, + "grad_norm": 1.578783021771008, + "learning_rate": 7.643521474961788e-06, + "loss": 0.6329, + "step": 19200 + }, + { + "epoch": 0.5884822851538556, + "grad_norm": 1.427019112468825, + "learning_rate": 7.64255680499568e-06, + "loss": 0.6947, + "step": 19201 + }, + { + "epoch": 0.5885129336765967, + "grad_norm": 1.5552188937669966, + "learning_rate": 7.641592158258062e-06, + "loss": 0.7036, + "step": 19202 + }, + { + "epoch": 0.588543582199338, + "grad_norm": 0.6876720541864383, + "learning_rate": 7.640627534758437e-06, + "loss": 0.5305, + "step": 19203 + }, + { + "epoch": 0.5885742307220792, + "grad_norm": 0.6550876877035178, + "learning_rate": 7.639662934506316e-06, + "loss": 0.5515, + "step": 19204 + }, + { + "epoch": 0.5886048792448204, + "grad_norm": 1.5385107154324902, + "learning_rate": 7.6386983575112e-06, + "loss": 0.685, + "step": 19205 + }, + { + "epoch": 0.5886355277675616, + "grad_norm": 0.6994818259723435, + "learning_rate": 7.63773380378259e-06, + "loss": 0.561, + "step": 19206 + }, + { + "epoch": 0.5886661762903028, + "grad_norm": 1.536103870070272, + "learning_rate": 7.636769273329997e-06, + "loss": 0.7415, + "step": 19207 + }, + { + "epoch": 0.588696824813044, + "grad_norm": 1.5186586122585177, + "learning_rate": 7.635804766162915e-06, + "loss": 0.6689, + "step": 19208 + }, + { + "epoch": 0.5887274733357852, + "grad_norm": 1.55801969526857, + "learning_rate": 7.634840282290861e-06, + "loss": 0.7041, + "step": 19209 + }, + { + "epoch": 0.5887581218585264, + "grad_norm": 1.513310778206089, + "learning_rate": 7.633875821723326e-06, + "loss": 0.6813, + "step": 19210 + }, + { + "epoch": 0.5887887703812676, + "grad_norm": 1.499306847044677, + "learning_rate": 7.63291138446982e-06, + "loss": 0.5831, + "step": 19211 + }, + { + "epoch": 0.5888194189040088, + "grad_norm": 1.4748003436823698, + "learning_rate": 7.631946970539843e-06, + "loss": 0.6198, + "step": 19212 + }, + { + "epoch": 0.5888500674267501, + "grad_norm": 1.721046945487973, + "learning_rate": 7.630982579942897e-06, + "loss": 0.7752, + "step": 19213 + }, + { + "epoch": 0.5888807159494912, + "grad_norm": 1.4950847647271517, + "learning_rate": 7.630018212688488e-06, + "loss": 0.6651, + "step": 19214 + }, + { + "epoch": 0.5889113644722325, + "grad_norm": 1.4722423648063045, + "learning_rate": 7.629053868786116e-06, + "loss": 0.6817, + "step": 19215 + }, + { + "epoch": 0.5889420129949736, + "grad_norm": 1.757663525945242, + "learning_rate": 7.628089548245284e-06, + "loss": 0.6255, + "step": 19216 + }, + { + "epoch": 0.5889726615177149, + "grad_norm": 0.6761169292979906, + "learning_rate": 7.627125251075486e-06, + "loss": 0.5465, + "step": 19217 + }, + { + "epoch": 0.589003310040456, + "grad_norm": 1.52690757821099, + "learning_rate": 7.626160977286239e-06, + "loss": 0.5924, + "step": 19218 + }, + { + "epoch": 0.5890339585631973, + "grad_norm": 0.6835900287976351, + "learning_rate": 7.6251967268870295e-06, + "loss": 0.5805, + "step": 19219 + }, + { + "epoch": 0.5890646070859384, + "grad_norm": 1.4280880450977758, + "learning_rate": 7.624232499887366e-06, + "loss": 0.7464, + "step": 19220 + }, + { + "epoch": 0.5890952556086797, + "grad_norm": 1.47133664527952, + "learning_rate": 7.6232682962967475e-06, + "loss": 0.6421, + "step": 19221 + }, + { + "epoch": 0.5891259041314209, + "grad_norm": 1.6069924451744544, + "learning_rate": 7.622304116124674e-06, + "loss": 0.7619, + "step": 19222 + }, + { + "epoch": 0.5891565526541621, + "grad_norm": 1.5535441553863611, + "learning_rate": 7.621339959380647e-06, + "loss": 0.7064, + "step": 19223 + }, + { + "epoch": 0.5891872011769033, + "grad_norm": 1.78415246708314, + "learning_rate": 7.6203758260741655e-06, + "loss": 0.5876, + "step": 19224 + }, + { + "epoch": 0.5892178496996445, + "grad_norm": 1.4137889342304286, + "learning_rate": 7.619411716214729e-06, + "loss": 0.665, + "step": 19225 + }, + { + "epoch": 0.5892484982223857, + "grad_norm": 1.4596477941117527, + "learning_rate": 7.618447629811842e-06, + "loss": 0.5867, + "step": 19226 + }, + { + "epoch": 0.5892791467451269, + "grad_norm": 1.4400959061775396, + "learning_rate": 7.617483566874993e-06, + "loss": 0.5959, + "step": 19227 + }, + { + "epoch": 0.5893097952678681, + "grad_norm": 1.6130391346177324, + "learning_rate": 7.616519527413695e-06, + "loss": 0.6806, + "step": 19228 + }, + { + "epoch": 0.5893404437906093, + "grad_norm": 1.7050563059813255, + "learning_rate": 7.615555511437437e-06, + "loss": 0.6305, + "step": 19229 + }, + { + "epoch": 0.5893710923133505, + "grad_norm": 1.5223864401281366, + "learning_rate": 7.614591518955718e-06, + "loss": 0.7833, + "step": 19230 + }, + { + "epoch": 0.5894017408360916, + "grad_norm": 1.404312246770645, + "learning_rate": 7.613627549978043e-06, + "loss": 0.7155, + "step": 19231 + }, + { + "epoch": 0.5894323893588329, + "grad_norm": 0.6587646387384616, + "learning_rate": 7.6126636045139056e-06, + "loss": 0.5637, + "step": 19232 + }, + { + "epoch": 0.5894630378815741, + "grad_norm": 1.612292878153644, + "learning_rate": 7.611699682572803e-06, + "loss": 0.6833, + "step": 19233 + }, + { + "epoch": 0.5894936864043153, + "grad_norm": 1.4420812671694567, + "learning_rate": 7.610735784164236e-06, + "loss": 0.6594, + "step": 19234 + }, + { + "epoch": 0.5895243349270565, + "grad_norm": 1.615808805859539, + "learning_rate": 7.609771909297698e-06, + "loss": 0.6905, + "step": 19235 + }, + { + "epoch": 0.5895549834497977, + "grad_norm": 1.4498834750230658, + "learning_rate": 7.608808057982692e-06, + "loss": 0.6203, + "step": 19236 + }, + { + "epoch": 0.5895856319725389, + "grad_norm": 1.4772760660736837, + "learning_rate": 7.607844230228713e-06, + "loss": 0.6929, + "step": 19237 + }, + { + "epoch": 0.5896162804952801, + "grad_norm": 0.6548912843313487, + "learning_rate": 7.606880426045251e-06, + "loss": 0.5502, + "step": 19238 + }, + { + "epoch": 0.5896469290180213, + "grad_norm": 1.5597645956089956, + "learning_rate": 7.605916645441815e-06, + "loss": 0.7131, + "step": 19239 + }, + { + "epoch": 0.5896775775407626, + "grad_norm": 1.5939039514169844, + "learning_rate": 7.604952888427893e-06, + "loss": 0.7565, + "step": 19240 + }, + { + "epoch": 0.5897082260635037, + "grad_norm": 1.2863375499791985, + "learning_rate": 7.603989155012981e-06, + "loss": 0.4862, + "step": 19241 + }, + { + "epoch": 0.589738874586245, + "grad_norm": 1.371508364649856, + "learning_rate": 7.6030254452065775e-06, + "loss": 0.5659, + "step": 19242 + }, + { + "epoch": 0.5897695231089861, + "grad_norm": 1.5718684074475986, + "learning_rate": 7.60206175901818e-06, + "loss": 0.652, + "step": 19243 + }, + { + "epoch": 0.5898001716317274, + "grad_norm": 0.6503443438931719, + "learning_rate": 7.601098096457278e-06, + "loss": 0.5732, + "step": 19244 + }, + { + "epoch": 0.5898308201544685, + "grad_norm": 1.5475266537127952, + "learning_rate": 7.600134457533373e-06, + "loss": 0.6961, + "step": 19245 + }, + { + "epoch": 0.5898614686772098, + "grad_norm": 1.4715755005155602, + "learning_rate": 7.599170842255954e-06, + "loss": 0.682, + "step": 19246 + }, + { + "epoch": 0.5898921171999509, + "grad_norm": 1.532949357754149, + "learning_rate": 7.598207250634522e-06, + "loss": 0.5879, + "step": 19247 + }, + { + "epoch": 0.5899227657226922, + "grad_norm": 1.3696136594762414, + "learning_rate": 7.597243682678569e-06, + "loss": 0.6698, + "step": 19248 + }, + { + "epoch": 0.5899534142454333, + "grad_norm": 1.376559166779651, + "learning_rate": 7.596280138397584e-06, + "loss": 0.6614, + "step": 19249 + }, + { + "epoch": 0.5899840627681746, + "grad_norm": 1.724805508788286, + "learning_rate": 7.595316617801072e-06, + "loss": 0.605, + "step": 19250 + }, + { + "epoch": 0.5900147112909158, + "grad_norm": 1.5422290833935342, + "learning_rate": 7.594353120898518e-06, + "loss": 0.6653, + "step": 19251 + }, + { + "epoch": 0.590045359813657, + "grad_norm": 1.4331741286376243, + "learning_rate": 7.5933896476994165e-06, + "loss": 0.6763, + "step": 19252 + }, + { + "epoch": 0.5900760083363982, + "grad_norm": 1.5002119366623363, + "learning_rate": 7.592426198213265e-06, + "loss": 0.7254, + "step": 19253 + }, + { + "epoch": 0.5901066568591394, + "grad_norm": 1.480892357288663, + "learning_rate": 7.591462772449552e-06, + "loss": 0.7599, + "step": 19254 + }, + { + "epoch": 0.5901373053818806, + "grad_norm": 1.3663837619112968, + "learning_rate": 7.590499370417774e-06, + "loss": 0.6349, + "step": 19255 + }, + { + "epoch": 0.5901679539046218, + "grad_norm": 1.5341128076899853, + "learning_rate": 7.589535992127423e-06, + "loss": 0.7099, + "step": 19256 + }, + { + "epoch": 0.590198602427363, + "grad_norm": 1.5750563716005628, + "learning_rate": 7.588572637587988e-06, + "loss": 0.6942, + "step": 19257 + }, + { + "epoch": 0.5902292509501043, + "grad_norm": 1.66367729224791, + "learning_rate": 7.587609306808965e-06, + "loss": 0.7272, + "step": 19258 + }, + { + "epoch": 0.5902598994728454, + "grad_norm": 1.6613922366765, + "learning_rate": 7.586645999799847e-06, + "loss": 0.7183, + "step": 19259 + }, + { + "epoch": 0.5902905479955867, + "grad_norm": 1.3751094476698127, + "learning_rate": 7.585682716570119e-06, + "loss": 0.5746, + "step": 19260 + }, + { + "epoch": 0.5903211965183278, + "grad_norm": 1.44929763275143, + "learning_rate": 7.584719457129281e-06, + "loss": 0.6797, + "step": 19261 + }, + { + "epoch": 0.590351845041069, + "grad_norm": 1.4946125192796218, + "learning_rate": 7.583756221486817e-06, + "loss": 0.6179, + "step": 19262 + }, + { + "epoch": 0.5903824935638102, + "grad_norm": 1.637980324901248, + "learning_rate": 7.582793009652225e-06, + "loss": 0.6533, + "step": 19263 + }, + { + "epoch": 0.5904131420865514, + "grad_norm": 0.6831217855574979, + "learning_rate": 7.58182982163499e-06, + "loss": 0.5764, + "step": 19264 + }, + { + "epoch": 0.5904437906092926, + "grad_norm": 1.5234580960192237, + "learning_rate": 7.580866657444602e-06, + "loss": 0.5792, + "step": 19265 + }, + { + "epoch": 0.5904744391320338, + "grad_norm": 1.5329023523838403, + "learning_rate": 7.579903517090556e-06, + "loss": 0.756, + "step": 19266 + }, + { + "epoch": 0.590505087654775, + "grad_norm": 1.4550154313798216, + "learning_rate": 7.578940400582342e-06, + "loss": 0.6435, + "step": 19267 + }, + { + "epoch": 0.5905357361775162, + "grad_norm": 0.6969571481938198, + "learning_rate": 7.577977307929444e-06, + "loss": 0.5878, + "step": 19268 + }, + { + "epoch": 0.5905663847002575, + "grad_norm": 1.2966279380406756, + "learning_rate": 7.577014239141357e-06, + "loss": 0.5307, + "step": 19269 + }, + { + "epoch": 0.5905970332229986, + "grad_norm": 1.4188779760949606, + "learning_rate": 7.5760511942275715e-06, + "loss": 0.6638, + "step": 19270 + }, + { + "epoch": 0.5906276817457399, + "grad_norm": 1.4604598507528759, + "learning_rate": 7.575088173197569e-06, + "loss": 0.6492, + "step": 19271 + }, + { + "epoch": 0.590658330268481, + "grad_norm": 1.6525758447821473, + "learning_rate": 7.574125176060846e-06, + "loss": 0.6674, + "step": 19272 + }, + { + "epoch": 0.5906889787912223, + "grad_norm": 1.562350238623631, + "learning_rate": 7.573162202826885e-06, + "loss": 0.6839, + "step": 19273 + }, + { + "epoch": 0.5907196273139634, + "grad_norm": 1.594306623516508, + "learning_rate": 7.572199253505181e-06, + "loss": 0.7217, + "step": 19274 + }, + { + "epoch": 0.5907502758367047, + "grad_norm": 1.4541037256581986, + "learning_rate": 7.5712363281052185e-06, + "loss": 0.6072, + "step": 19275 + }, + { + "epoch": 0.5907809243594458, + "grad_norm": 1.266525957892572, + "learning_rate": 7.570273426636483e-06, + "loss": 0.598, + "step": 19276 + }, + { + "epoch": 0.5908115728821871, + "grad_norm": 1.5123547822390733, + "learning_rate": 7.569310549108468e-06, + "loss": 0.7329, + "step": 19277 + }, + { + "epoch": 0.5908422214049283, + "grad_norm": 1.524676303646475, + "learning_rate": 7.568347695530661e-06, + "loss": 0.6485, + "step": 19278 + }, + { + "epoch": 0.5908728699276695, + "grad_norm": 1.540406804150299, + "learning_rate": 7.567384865912539e-06, + "loss": 0.7349, + "step": 19279 + }, + { + "epoch": 0.5909035184504107, + "grad_norm": 1.51258209042236, + "learning_rate": 7.566422060263603e-06, + "loss": 0.5894, + "step": 19280 + }, + { + "epoch": 0.5909341669731519, + "grad_norm": 0.6631173518745538, + "learning_rate": 7.565459278593327e-06, + "loss": 0.542, + "step": 19281 + }, + { + "epoch": 0.5909648154958931, + "grad_norm": 1.5919571866253295, + "learning_rate": 7.564496520911209e-06, + "loss": 0.6662, + "step": 19282 + }, + { + "epoch": 0.5909954640186343, + "grad_norm": 1.6452371740421368, + "learning_rate": 7.563533787226729e-06, + "loss": 0.6432, + "step": 19283 + }, + { + "epoch": 0.5910261125413755, + "grad_norm": 1.9483727167914635, + "learning_rate": 7.562571077549371e-06, + "loss": 0.6503, + "step": 19284 + }, + { + "epoch": 0.5910567610641168, + "grad_norm": 1.5629839357417077, + "learning_rate": 7.561608391888626e-06, + "loss": 0.6793, + "step": 19285 + }, + { + "epoch": 0.5910874095868579, + "grad_norm": 0.6571151302028144, + "learning_rate": 7.5606457302539775e-06, + "loss": 0.5847, + "step": 19286 + }, + { + "epoch": 0.5911180581095992, + "grad_norm": 1.4990909031241069, + "learning_rate": 7.559683092654909e-06, + "loss": 0.661, + "step": 19287 + }, + { + "epoch": 0.5911487066323403, + "grad_norm": 1.396842837221419, + "learning_rate": 7.558720479100909e-06, + "loss": 0.6084, + "step": 19288 + }, + { + "epoch": 0.5911793551550816, + "grad_norm": 1.675826002049127, + "learning_rate": 7.557757889601459e-06, + "loss": 0.8044, + "step": 19289 + }, + { + "epoch": 0.5912100036778227, + "grad_norm": 0.6597390040478799, + "learning_rate": 7.556795324166047e-06, + "loss": 0.5887, + "step": 19290 + }, + { + "epoch": 0.591240652200564, + "grad_norm": 1.809443463718629, + "learning_rate": 7.555832782804159e-06, + "loss": 0.5798, + "step": 19291 + }, + { + "epoch": 0.5912713007233051, + "grad_norm": 0.6546589256052785, + "learning_rate": 7.554870265525268e-06, + "loss": 0.557, + "step": 19292 + }, + { + "epoch": 0.5913019492460463, + "grad_norm": 1.5520052215684035, + "learning_rate": 7.553907772338873e-06, + "loss": 0.7354, + "step": 19293 + }, + { + "epoch": 0.5913325977687875, + "grad_norm": 1.4561862011064892, + "learning_rate": 7.5529453032544485e-06, + "loss": 0.6784, + "step": 19294 + }, + { + "epoch": 0.5913632462915287, + "grad_norm": 1.482362645180337, + "learning_rate": 7.551982858281479e-06, + "loss": 0.6025, + "step": 19295 + }, + { + "epoch": 0.59139389481427, + "grad_norm": 1.7051366151052276, + "learning_rate": 7.55102043742945e-06, + "loss": 0.7876, + "step": 19296 + }, + { + "epoch": 0.5914245433370111, + "grad_norm": 1.5720945929273595, + "learning_rate": 7.550058040707843e-06, + "loss": 0.7174, + "step": 19297 + }, + { + "epoch": 0.5914551918597524, + "grad_norm": 1.7628184352703067, + "learning_rate": 7.549095668126139e-06, + "loss": 0.7882, + "step": 19298 + }, + { + "epoch": 0.5914858403824935, + "grad_norm": 1.599249749149988, + "learning_rate": 7.548133319693824e-06, + "loss": 0.6777, + "step": 19299 + }, + { + "epoch": 0.5915164889052348, + "grad_norm": 1.5583823471828073, + "learning_rate": 7.547170995420378e-06, + "loss": 0.7133, + "step": 19300 + }, + { + "epoch": 0.5915471374279759, + "grad_norm": 1.5886743079638739, + "learning_rate": 7.546208695315285e-06, + "loss": 0.6231, + "step": 19301 + }, + { + "epoch": 0.5915777859507172, + "grad_norm": 1.5124584237590595, + "learning_rate": 7.545246419388027e-06, + "loss": 0.6722, + "step": 19302 + }, + { + "epoch": 0.5916084344734583, + "grad_norm": 1.6113658322577264, + "learning_rate": 7.544284167648078e-06, + "loss": 0.7382, + "step": 19303 + }, + { + "epoch": 0.5916390829961996, + "grad_norm": 1.6126435453778416, + "learning_rate": 7.543321940104933e-06, + "loss": 0.6711, + "step": 19304 + }, + { + "epoch": 0.5916697315189408, + "grad_norm": 1.5267465003246719, + "learning_rate": 7.542359736768062e-06, + "loss": 0.6537, + "step": 19305 + }, + { + "epoch": 0.591700380041682, + "grad_norm": 1.5656501752695342, + "learning_rate": 7.5413975576469475e-06, + "loss": 0.6348, + "step": 19306 + }, + { + "epoch": 0.5917310285644232, + "grad_norm": 0.6442289483963938, + "learning_rate": 7.540435402751075e-06, + "loss": 0.5358, + "step": 19307 + }, + { + "epoch": 0.5917616770871644, + "grad_norm": 0.6977922308789113, + "learning_rate": 7.5394732720899185e-06, + "loss": 0.5578, + "step": 19308 + }, + { + "epoch": 0.5917923256099056, + "grad_norm": 1.4863444447266063, + "learning_rate": 7.538511165672965e-06, + "loss": 0.712, + "step": 19309 + }, + { + "epoch": 0.5918229741326468, + "grad_norm": 1.4093972855860877, + "learning_rate": 7.53754908350969e-06, + "loss": 0.7252, + "step": 19310 + }, + { + "epoch": 0.591853622655388, + "grad_norm": 1.4069837327741728, + "learning_rate": 7.536587025609572e-06, + "loss": 0.6379, + "step": 19311 + }, + { + "epoch": 0.5918842711781293, + "grad_norm": 1.5633831348057197, + "learning_rate": 7.535624991982093e-06, + "loss": 0.687, + "step": 19312 + }, + { + "epoch": 0.5919149197008704, + "grad_norm": 1.4747055754722913, + "learning_rate": 7.534662982636736e-06, + "loss": 0.6566, + "step": 19313 + }, + { + "epoch": 0.5919455682236117, + "grad_norm": 1.5785809374656046, + "learning_rate": 7.533700997582969e-06, + "loss": 0.727, + "step": 19314 + }, + { + "epoch": 0.5919762167463528, + "grad_norm": 1.4342415208566524, + "learning_rate": 7.53273903683028e-06, + "loss": 0.6467, + "step": 19315 + }, + { + "epoch": 0.5920068652690941, + "grad_norm": 1.480021768290735, + "learning_rate": 7.531777100388143e-06, + "loss": 0.6287, + "step": 19316 + }, + { + "epoch": 0.5920375137918352, + "grad_norm": 1.6307212700616982, + "learning_rate": 7.530815188266038e-06, + "loss": 0.6164, + "step": 19317 + }, + { + "epoch": 0.5920681623145765, + "grad_norm": 1.6237617641895539, + "learning_rate": 7.529853300473445e-06, + "loss": 0.7546, + "step": 19318 + }, + { + "epoch": 0.5920988108373176, + "grad_norm": 1.5840635418329683, + "learning_rate": 7.528891437019836e-06, + "loss": 0.7441, + "step": 19319 + }, + { + "epoch": 0.5921294593600589, + "grad_norm": 1.6538729115828519, + "learning_rate": 7.527929597914695e-06, + "loss": 0.6808, + "step": 19320 + }, + { + "epoch": 0.5921601078828, + "grad_norm": 1.6998605078675406, + "learning_rate": 7.5269677831674955e-06, + "loss": 0.6993, + "step": 19321 + }, + { + "epoch": 0.5921907564055413, + "grad_norm": 1.3036676025884177, + "learning_rate": 7.526005992787714e-06, + "loss": 0.6099, + "step": 19322 + }, + { + "epoch": 0.5922214049282825, + "grad_norm": 1.6246771970022138, + "learning_rate": 7.525044226784831e-06, + "loss": 0.7761, + "step": 19323 + }, + { + "epoch": 0.5922520534510236, + "grad_norm": 0.7253147226269999, + "learning_rate": 7.52408248516832e-06, + "loss": 0.5203, + "step": 19324 + }, + { + "epoch": 0.5922827019737649, + "grad_norm": 0.6955709768064745, + "learning_rate": 7.523120767947655e-06, + "loss": 0.5468, + "step": 19325 + }, + { + "epoch": 0.592313350496506, + "grad_norm": 1.8206728490143684, + "learning_rate": 7.522159075132316e-06, + "loss": 0.8091, + "step": 19326 + }, + { + "epoch": 0.5923439990192473, + "grad_norm": 1.5864430520740938, + "learning_rate": 7.521197406731777e-06, + "loss": 0.6439, + "step": 19327 + }, + { + "epoch": 0.5923746475419884, + "grad_norm": 1.7246524872342262, + "learning_rate": 7.520235762755516e-06, + "loss": 0.6622, + "step": 19328 + }, + { + "epoch": 0.5924052960647297, + "grad_norm": 1.2944065070588362, + "learning_rate": 7.519274143213006e-06, + "loss": 0.6475, + "step": 19329 + }, + { + "epoch": 0.5924359445874708, + "grad_norm": 1.4781609768664916, + "learning_rate": 7.51831254811372e-06, + "loss": 0.721, + "step": 19330 + }, + { + "epoch": 0.5924665931102121, + "grad_norm": 1.690813026611834, + "learning_rate": 7.517350977467138e-06, + "loss": 0.7124, + "step": 19331 + }, + { + "epoch": 0.5924972416329533, + "grad_norm": 1.5700476865648147, + "learning_rate": 7.5163894312827346e-06, + "loss": 0.7161, + "step": 19332 + }, + { + "epoch": 0.5925278901556945, + "grad_norm": 1.575968900960146, + "learning_rate": 7.515427909569976e-06, + "loss": 0.7391, + "step": 19333 + }, + { + "epoch": 0.5925585386784357, + "grad_norm": 1.5789841600396537, + "learning_rate": 7.514466412338346e-06, + "loss": 0.7362, + "step": 19334 + }, + { + "epoch": 0.5925891872011769, + "grad_norm": 1.4242785839004206, + "learning_rate": 7.513504939597309e-06, + "loss": 0.7649, + "step": 19335 + }, + { + "epoch": 0.5926198357239181, + "grad_norm": 0.6951180740521663, + "learning_rate": 7.512543491356351e-06, + "loss": 0.5237, + "step": 19336 + }, + { + "epoch": 0.5926504842466593, + "grad_norm": 1.7133843840099248, + "learning_rate": 7.511582067624936e-06, + "loss": 0.618, + "step": 19337 + }, + { + "epoch": 0.5926811327694005, + "grad_norm": 1.397529119049225, + "learning_rate": 7.510620668412538e-06, + "loss": 0.7023, + "step": 19338 + }, + { + "epoch": 0.5927117812921417, + "grad_norm": 1.7352013708894287, + "learning_rate": 7.509659293728633e-06, + "loss": 0.6338, + "step": 19339 + }, + { + "epoch": 0.5927424298148829, + "grad_norm": 1.3849995117957794, + "learning_rate": 7.508697943582692e-06, + "loss": 0.6874, + "step": 19340 + }, + { + "epoch": 0.5927730783376242, + "grad_norm": 1.5183675270281245, + "learning_rate": 7.507736617984186e-06, + "loss": 0.5843, + "step": 19341 + }, + { + "epoch": 0.5928037268603653, + "grad_norm": 1.5633442523993377, + "learning_rate": 7.506775316942591e-06, + "loss": 0.6664, + "step": 19342 + }, + { + "epoch": 0.5928343753831066, + "grad_norm": 1.597080873564904, + "learning_rate": 7.505814040467373e-06, + "loss": 0.665, + "step": 19343 + }, + { + "epoch": 0.5928650239058477, + "grad_norm": 1.708186724063109, + "learning_rate": 7.504852788568011e-06, + "loss": 0.7786, + "step": 19344 + }, + { + "epoch": 0.592895672428589, + "grad_norm": 1.4953162671050224, + "learning_rate": 7.503891561253976e-06, + "loss": 0.6212, + "step": 19345 + }, + { + "epoch": 0.5929263209513301, + "grad_norm": 1.7039777218090855, + "learning_rate": 7.502930358534727e-06, + "loss": 0.6772, + "step": 19346 + }, + { + "epoch": 0.5929569694740714, + "grad_norm": 1.612656674128449, + "learning_rate": 7.501969180419752e-06, + "loss": 0.7282, + "step": 19347 + }, + { + "epoch": 0.5929876179968125, + "grad_norm": 1.4958640949349096, + "learning_rate": 7.5010080269185115e-06, + "loss": 0.5988, + "step": 19348 + }, + { + "epoch": 0.5930182665195538, + "grad_norm": 1.52074118662946, + "learning_rate": 7.500046898040476e-06, + "loss": 0.6543, + "step": 19349 + }, + { + "epoch": 0.593048915042295, + "grad_norm": 0.6566214330723942, + "learning_rate": 7.499085793795121e-06, + "loss": 0.5244, + "step": 19350 + }, + { + "epoch": 0.5930795635650362, + "grad_norm": 1.797640349650068, + "learning_rate": 7.498124714191912e-06, + "loss": 0.811, + "step": 19351 + }, + { + "epoch": 0.5931102120877774, + "grad_norm": 1.4011414291265638, + "learning_rate": 7.497163659240321e-06, + "loss": 0.6603, + "step": 19352 + }, + { + "epoch": 0.5931408606105186, + "grad_norm": 1.5454110660799927, + "learning_rate": 7.4962026289498154e-06, + "loss": 0.6103, + "step": 19353 + }, + { + "epoch": 0.5931715091332598, + "grad_norm": 1.6356210309492312, + "learning_rate": 7.4952416233298665e-06, + "loss": 0.7102, + "step": 19354 + }, + { + "epoch": 0.5932021576560009, + "grad_norm": 1.6852214080694103, + "learning_rate": 7.494280642389944e-06, + "loss": 0.714, + "step": 19355 + }, + { + "epoch": 0.5932328061787422, + "grad_norm": 1.5147055515786418, + "learning_rate": 7.493319686139518e-06, + "loss": 0.7459, + "step": 19356 + }, + { + "epoch": 0.5932634547014833, + "grad_norm": 1.5083787347475048, + "learning_rate": 7.492358754588047e-06, + "loss": 0.7315, + "step": 19357 + }, + { + "epoch": 0.5932941032242246, + "grad_norm": 0.6910436365726809, + "learning_rate": 7.491397847745014e-06, + "loss": 0.5301, + "step": 19358 + }, + { + "epoch": 0.5933247517469658, + "grad_norm": 1.5767361770754946, + "learning_rate": 7.490436965619877e-06, + "loss": 0.6745, + "step": 19359 + }, + { + "epoch": 0.593355400269707, + "grad_norm": 1.5546448199546046, + "learning_rate": 7.489476108222106e-06, + "loss": 0.7108, + "step": 19360 + }, + { + "epoch": 0.5933860487924482, + "grad_norm": 1.5714396089636748, + "learning_rate": 7.48851527556117e-06, + "loss": 0.5934, + "step": 19361 + }, + { + "epoch": 0.5934166973151894, + "grad_norm": 1.5905440671402447, + "learning_rate": 7.487554467646534e-06, + "loss": 0.7225, + "step": 19362 + }, + { + "epoch": 0.5934473458379306, + "grad_norm": 0.6588448717621208, + "learning_rate": 7.486593684487668e-06, + "loss": 0.5463, + "step": 19363 + }, + { + "epoch": 0.5934779943606718, + "grad_norm": 1.5391914687572044, + "learning_rate": 7.485632926094039e-06, + "loss": 0.642, + "step": 19364 + }, + { + "epoch": 0.593508642883413, + "grad_norm": 1.4852116812816258, + "learning_rate": 7.484672192475109e-06, + "loss": 0.6522, + "step": 19365 + }, + { + "epoch": 0.5935392914061542, + "grad_norm": 1.5489749151098597, + "learning_rate": 7.483711483640352e-06, + "loss": 0.759, + "step": 19366 + }, + { + "epoch": 0.5935699399288954, + "grad_norm": 1.5681202344771392, + "learning_rate": 7.482750799599228e-06, + "loss": 0.6989, + "step": 19367 + }, + { + "epoch": 0.5936005884516367, + "grad_norm": 1.411588395730194, + "learning_rate": 7.481790140361201e-06, + "loss": 0.6502, + "step": 19368 + }, + { + "epoch": 0.5936312369743778, + "grad_norm": 1.591866181312311, + "learning_rate": 7.480829505935743e-06, + "loss": 0.6254, + "step": 19369 + }, + { + "epoch": 0.5936618854971191, + "grad_norm": 1.3462508965382347, + "learning_rate": 7.4798688963323164e-06, + "loss": 0.602, + "step": 19370 + }, + { + "epoch": 0.5936925340198602, + "grad_norm": 0.6557737357826936, + "learning_rate": 7.478908311560384e-06, + "loss": 0.57, + "step": 19371 + }, + { + "epoch": 0.5937231825426015, + "grad_norm": 0.67819658534387, + "learning_rate": 7.477947751629415e-06, + "loss": 0.5566, + "step": 19372 + }, + { + "epoch": 0.5937538310653426, + "grad_norm": 1.7331588148060955, + "learning_rate": 7.47698721654887e-06, + "loss": 0.6811, + "step": 19373 + }, + { + "epoch": 0.5937844795880839, + "grad_norm": 1.3348300961753392, + "learning_rate": 7.476026706328219e-06, + "loss": 0.6848, + "step": 19374 + }, + { + "epoch": 0.593815128110825, + "grad_norm": 1.4193337959206958, + "learning_rate": 7.475066220976923e-06, + "loss": 0.6015, + "step": 19375 + }, + { + "epoch": 0.5938457766335663, + "grad_norm": 1.50533090391078, + "learning_rate": 7.47410576050444e-06, + "loss": 0.6346, + "step": 19376 + }, + { + "epoch": 0.5938764251563075, + "grad_norm": 0.6684669162653128, + "learning_rate": 7.4731453249202456e-06, + "loss": 0.5364, + "step": 19377 + }, + { + "epoch": 0.5939070736790487, + "grad_norm": 1.6766160151189895, + "learning_rate": 7.472184914233794e-06, + "loss": 0.7526, + "step": 19378 + }, + { + "epoch": 0.5939377222017899, + "grad_norm": 1.6032449532609947, + "learning_rate": 7.471224528454551e-06, + "loss": 0.8168, + "step": 19379 + }, + { + "epoch": 0.5939683707245311, + "grad_norm": 1.7460683804027464, + "learning_rate": 7.47026416759198e-06, + "loss": 0.6797, + "step": 19380 + }, + { + "epoch": 0.5939990192472723, + "grad_norm": 0.6829189522484219, + "learning_rate": 7.4693038316555415e-06, + "loss": 0.5962, + "step": 19381 + }, + { + "epoch": 0.5940296677700135, + "grad_norm": 1.719113804543492, + "learning_rate": 7.468343520654702e-06, + "loss": 0.6914, + "step": 19382 + }, + { + "epoch": 0.5940603162927547, + "grad_norm": 1.6244007069398474, + "learning_rate": 7.4673832345989216e-06, + "loss": 0.6276, + "step": 19383 + }, + { + "epoch": 0.594090964815496, + "grad_norm": 1.7192789039501761, + "learning_rate": 7.46642297349766e-06, + "loss": 0.8158, + "step": 19384 + }, + { + "epoch": 0.5941216133382371, + "grad_norm": 1.5401739486208557, + "learning_rate": 7.465462737360385e-06, + "loss": 0.6995, + "step": 19385 + }, + { + "epoch": 0.5941522618609782, + "grad_norm": 1.5500889208197992, + "learning_rate": 7.464502526196554e-06, + "loss": 0.6937, + "step": 19386 + }, + { + "epoch": 0.5941829103837195, + "grad_norm": 0.6800964780461218, + "learning_rate": 7.463542340015622e-06, + "loss": 0.5422, + "step": 19387 + }, + { + "epoch": 0.5942135589064607, + "grad_norm": 0.6617219927923538, + "learning_rate": 7.462582178827065e-06, + "loss": 0.5625, + "step": 19388 + }, + { + "epoch": 0.5942442074292019, + "grad_norm": 1.583372500382257, + "learning_rate": 7.461622042640326e-06, + "loss": 0.6508, + "step": 19389 + }, + { + "epoch": 0.5942748559519431, + "grad_norm": 1.2958842041030103, + "learning_rate": 7.460661931464882e-06, + "loss": 0.7165, + "step": 19390 + }, + { + "epoch": 0.5943055044746843, + "grad_norm": 1.6798639066208205, + "learning_rate": 7.459701845310183e-06, + "loss": 0.7328, + "step": 19391 + }, + { + "epoch": 0.5943361529974255, + "grad_norm": 1.469290263356057, + "learning_rate": 7.45874178418569e-06, + "loss": 0.659, + "step": 19392 + }, + { + "epoch": 0.5943668015201667, + "grad_norm": 0.6427746685974021, + "learning_rate": 7.4577817481008675e-06, + "loss": 0.5469, + "step": 19393 + }, + { + "epoch": 0.5943974500429079, + "grad_norm": 0.6296161331167129, + "learning_rate": 7.456821737065171e-06, + "loss": 0.562, + "step": 19394 + }, + { + "epoch": 0.5944280985656492, + "grad_norm": 1.5133497552510962, + "learning_rate": 7.455861751088058e-06, + "loss": 0.7447, + "step": 19395 + }, + { + "epoch": 0.5944587470883903, + "grad_norm": 1.5516006021372626, + "learning_rate": 7.454901790178994e-06, + "loss": 0.5673, + "step": 19396 + }, + { + "epoch": 0.5944893956111316, + "grad_norm": 0.6614065736413535, + "learning_rate": 7.453941854347434e-06, + "loss": 0.5546, + "step": 19397 + }, + { + "epoch": 0.5945200441338727, + "grad_norm": 1.5868997657416763, + "learning_rate": 7.452981943602831e-06, + "loss": 0.6393, + "step": 19398 + }, + { + "epoch": 0.594550692656614, + "grad_norm": 1.5792791006359133, + "learning_rate": 7.452022057954654e-06, + "loss": 0.731, + "step": 19399 + }, + { + "epoch": 0.5945813411793551, + "grad_norm": 1.7256815376083106, + "learning_rate": 7.45106219741235e-06, + "loss": 0.6948, + "step": 19400 + }, + { + "epoch": 0.5946119897020964, + "grad_norm": 1.580505246137133, + "learning_rate": 7.450102361985389e-06, + "loss": 0.6535, + "step": 19401 + }, + { + "epoch": 0.5946426382248375, + "grad_norm": 1.4884799735363357, + "learning_rate": 7.44914255168322e-06, + "loss": 0.7117, + "step": 19402 + }, + { + "epoch": 0.5946732867475788, + "grad_norm": 1.5348834590012839, + "learning_rate": 7.448182766515298e-06, + "loss": 0.7187, + "step": 19403 + }, + { + "epoch": 0.59470393527032, + "grad_norm": 1.8354271200972578, + "learning_rate": 7.447223006491088e-06, + "loss": 0.6529, + "step": 19404 + }, + { + "epoch": 0.5947345837930612, + "grad_norm": 1.523638916819525, + "learning_rate": 7.446263271620042e-06, + "loss": 0.5921, + "step": 19405 + }, + { + "epoch": 0.5947652323158024, + "grad_norm": 1.4076260444728215, + "learning_rate": 7.445303561911617e-06, + "loss": 0.6021, + "step": 19406 + }, + { + "epoch": 0.5947958808385436, + "grad_norm": 1.3350682149352044, + "learning_rate": 7.4443438773752685e-06, + "loss": 0.6615, + "step": 19407 + }, + { + "epoch": 0.5948265293612848, + "grad_norm": 1.4570350591463408, + "learning_rate": 7.443384218020454e-06, + "loss": 0.6515, + "step": 19408 + }, + { + "epoch": 0.594857177884026, + "grad_norm": 1.3866159945516723, + "learning_rate": 7.4424245838566315e-06, + "loss": 0.5292, + "step": 19409 + }, + { + "epoch": 0.5948878264067672, + "grad_norm": 1.4489175762048785, + "learning_rate": 7.441464974893255e-06, + "loss": 0.6823, + "step": 19410 + }, + { + "epoch": 0.5949184749295084, + "grad_norm": 1.4504289425245676, + "learning_rate": 7.440505391139774e-06, + "loss": 0.5923, + "step": 19411 + }, + { + "epoch": 0.5949491234522496, + "grad_norm": 1.4780179104564224, + "learning_rate": 7.4395458326056505e-06, + "loss": 0.6905, + "step": 19412 + }, + { + "epoch": 0.5949797719749909, + "grad_norm": 1.641957431069855, + "learning_rate": 7.438586299300337e-06, + "loss": 0.6048, + "step": 19413 + }, + { + "epoch": 0.595010420497732, + "grad_norm": 0.6750436986482347, + "learning_rate": 7.437626791233288e-06, + "loss": 0.5603, + "step": 19414 + }, + { + "epoch": 0.5950410690204733, + "grad_norm": 1.523295967935075, + "learning_rate": 7.4366673084139584e-06, + "loss": 0.6542, + "step": 19415 + }, + { + "epoch": 0.5950717175432144, + "grad_norm": 1.5519587273189843, + "learning_rate": 7.4357078508517985e-06, + "loss": 0.6739, + "step": 19416 + }, + { + "epoch": 0.5951023660659556, + "grad_norm": 1.621332831780025, + "learning_rate": 7.434748418556269e-06, + "loss": 0.6512, + "step": 19417 + }, + { + "epoch": 0.5951330145886968, + "grad_norm": 0.6961083404817319, + "learning_rate": 7.433789011536821e-06, + "loss": 0.5949, + "step": 19418 + }, + { + "epoch": 0.595163663111438, + "grad_norm": 1.6285325885329605, + "learning_rate": 7.4328296298029e-06, + "loss": 0.7374, + "step": 19419 + }, + { + "epoch": 0.5951943116341792, + "grad_norm": 1.3372902635249904, + "learning_rate": 7.431870273363973e-06, + "loss": 0.5885, + "step": 19420 + }, + { + "epoch": 0.5952249601569204, + "grad_norm": 1.4376890442564834, + "learning_rate": 7.430910942229481e-06, + "loss": 0.7868, + "step": 19421 + }, + { + "epoch": 0.5952556086796617, + "grad_norm": 1.5374407571212, + "learning_rate": 7.429951636408881e-06, + "loss": 0.718, + "step": 19422 + }, + { + "epoch": 0.5952862572024028, + "grad_norm": 1.5066463104961494, + "learning_rate": 7.428992355911626e-06, + "loss": 0.6905, + "step": 19423 + }, + { + "epoch": 0.5953169057251441, + "grad_norm": 1.4973069749366652, + "learning_rate": 7.428033100747167e-06, + "loss": 0.7175, + "step": 19424 + }, + { + "epoch": 0.5953475542478852, + "grad_norm": 1.3766349576992267, + "learning_rate": 7.427073870924955e-06, + "loss": 0.7052, + "step": 19425 + }, + { + "epoch": 0.5953782027706265, + "grad_norm": 1.6166017171589877, + "learning_rate": 7.426114666454444e-06, + "loss": 0.7657, + "step": 19426 + }, + { + "epoch": 0.5954088512933676, + "grad_norm": 1.446909866918492, + "learning_rate": 7.425155487345082e-06, + "loss": 0.6514, + "step": 19427 + }, + { + "epoch": 0.5954394998161089, + "grad_norm": 1.3075124578875341, + "learning_rate": 7.4241963336063216e-06, + "loss": 0.6606, + "step": 19428 + }, + { + "epoch": 0.59547014833885, + "grad_norm": 1.669352253294743, + "learning_rate": 7.423237205247619e-06, + "loss": 0.8057, + "step": 19429 + }, + { + "epoch": 0.5955007968615913, + "grad_norm": 1.648360350209407, + "learning_rate": 7.422278102278411e-06, + "loss": 0.7838, + "step": 19430 + }, + { + "epoch": 0.5955314453843324, + "grad_norm": 0.7249866337498531, + "learning_rate": 7.4213190247081636e-06, + "loss": 0.5641, + "step": 19431 + }, + { + "epoch": 0.5955620939070737, + "grad_norm": 1.6935679421448777, + "learning_rate": 7.420359972546318e-06, + "loss": 0.705, + "step": 19432 + }, + { + "epoch": 0.5955927424298149, + "grad_norm": 1.5214522922076081, + "learning_rate": 7.419400945802322e-06, + "loss": 0.755, + "step": 19433 + }, + { + "epoch": 0.5956233909525561, + "grad_norm": 1.457666734020036, + "learning_rate": 7.4184419444856325e-06, + "loss": 0.6099, + "step": 19434 + }, + { + "epoch": 0.5956540394752973, + "grad_norm": 1.4562005278004797, + "learning_rate": 7.417482968605692e-06, + "loss": 0.627, + "step": 19435 + }, + { + "epoch": 0.5956846879980385, + "grad_norm": 1.3820605770382262, + "learning_rate": 7.416524018171956e-06, + "loss": 0.7345, + "step": 19436 + }, + { + "epoch": 0.5957153365207797, + "grad_norm": 1.4956882443540631, + "learning_rate": 7.415565093193868e-06, + "loss": 0.6761, + "step": 19437 + }, + { + "epoch": 0.5957459850435209, + "grad_norm": 1.741265927946281, + "learning_rate": 7.4146061936808765e-06, + "loss": 0.6858, + "step": 19438 + }, + { + "epoch": 0.5957766335662621, + "grad_norm": 1.5640551557343005, + "learning_rate": 7.413647319642434e-06, + "loss": 0.6355, + "step": 19439 + }, + { + "epoch": 0.5958072820890034, + "grad_norm": 1.5006236493835956, + "learning_rate": 7.41268847108799e-06, + "loss": 0.6661, + "step": 19440 + }, + { + "epoch": 0.5958379306117445, + "grad_norm": 1.5330862154508744, + "learning_rate": 7.411729648026979e-06, + "loss": 0.7563, + "step": 19441 + }, + { + "epoch": 0.5958685791344858, + "grad_norm": 1.443451661299582, + "learning_rate": 7.410770850468867e-06, + "loss": 0.6666, + "step": 19442 + }, + { + "epoch": 0.5958992276572269, + "grad_norm": 0.6868713533940343, + "learning_rate": 7.409812078423085e-06, + "loss": 0.551, + "step": 19443 + }, + { + "epoch": 0.5959298761799682, + "grad_norm": 1.3870958075243345, + "learning_rate": 7.408853331899094e-06, + "loss": 0.6667, + "step": 19444 + }, + { + "epoch": 0.5959605247027093, + "grad_norm": 1.59266986025613, + "learning_rate": 7.4078946109063324e-06, + "loss": 0.7077, + "step": 19445 + }, + { + "epoch": 0.5959911732254506, + "grad_norm": 1.5758759830234443, + "learning_rate": 7.406935915454245e-06, + "loss": 0.7225, + "step": 19446 + }, + { + "epoch": 0.5960218217481917, + "grad_norm": 1.8759330097118028, + "learning_rate": 7.405977245552285e-06, + "loss": 0.7747, + "step": 19447 + }, + { + "epoch": 0.5960524702709329, + "grad_norm": 1.5133769565294386, + "learning_rate": 7.405018601209893e-06, + "loss": 0.6789, + "step": 19448 + }, + { + "epoch": 0.5960831187936741, + "grad_norm": 0.6438988078582124, + "learning_rate": 7.404059982436516e-06, + "loss": 0.5532, + "step": 19449 + }, + { + "epoch": 0.5961137673164153, + "grad_norm": 1.4878043998520485, + "learning_rate": 7.403101389241603e-06, + "loss": 0.7223, + "step": 19450 + }, + { + "epoch": 0.5961444158391566, + "grad_norm": 1.432028059061156, + "learning_rate": 7.402142821634597e-06, + "loss": 0.644, + "step": 19451 + }, + { + "epoch": 0.5961750643618977, + "grad_norm": 1.4900302109618089, + "learning_rate": 7.4011842796249365e-06, + "loss": 0.6257, + "step": 19452 + }, + { + "epoch": 0.596205712884639, + "grad_norm": 1.7409244790706608, + "learning_rate": 7.40022576322208e-06, + "loss": 0.7267, + "step": 19453 + }, + { + "epoch": 0.5962363614073801, + "grad_norm": 1.5999194442276592, + "learning_rate": 7.399267272435455e-06, + "loss": 0.6517, + "step": 19454 + }, + { + "epoch": 0.5962670099301214, + "grad_norm": 1.5339008849649456, + "learning_rate": 7.398308807274524e-06, + "loss": 0.6636, + "step": 19455 + }, + { + "epoch": 0.5962976584528625, + "grad_norm": 0.6569244039321176, + "learning_rate": 7.397350367748719e-06, + "loss": 0.5543, + "step": 19456 + }, + { + "epoch": 0.5963283069756038, + "grad_norm": 1.3779911364072428, + "learning_rate": 7.3963919538674845e-06, + "loss": 0.6572, + "step": 19457 + }, + { + "epoch": 0.596358955498345, + "grad_norm": 1.3935137486223002, + "learning_rate": 7.395433565640269e-06, + "loss": 0.6511, + "step": 19458 + }, + { + "epoch": 0.5963896040210862, + "grad_norm": 1.43700681087627, + "learning_rate": 7.3944752030765125e-06, + "loss": 0.6404, + "step": 19459 + }, + { + "epoch": 0.5964202525438274, + "grad_norm": 1.478530583865065, + "learning_rate": 7.393516866185655e-06, + "loss": 0.6788, + "step": 19460 + }, + { + "epoch": 0.5964509010665686, + "grad_norm": 1.4948788704121083, + "learning_rate": 7.392558554977147e-06, + "loss": 0.7439, + "step": 19461 + }, + { + "epoch": 0.5964815495893098, + "grad_norm": 1.7638201201668522, + "learning_rate": 7.391600269460424e-06, + "loss": 0.8121, + "step": 19462 + }, + { + "epoch": 0.596512198112051, + "grad_norm": 1.6191750701522805, + "learning_rate": 7.390642009644934e-06, + "loss": 0.6048, + "step": 19463 + }, + { + "epoch": 0.5965428466347922, + "grad_norm": 0.6942929842771487, + "learning_rate": 7.3896837755401155e-06, + "loss": 0.5752, + "step": 19464 + }, + { + "epoch": 0.5965734951575334, + "grad_norm": 1.5368882424963626, + "learning_rate": 7.388725567155407e-06, + "loss": 0.6429, + "step": 19465 + }, + { + "epoch": 0.5966041436802746, + "grad_norm": 1.556102472056869, + "learning_rate": 7.387767384500256e-06, + "loss": 0.7099, + "step": 19466 + }, + { + "epoch": 0.5966347922030159, + "grad_norm": 1.4994541197516904, + "learning_rate": 7.386809227584102e-06, + "loss": 0.6758, + "step": 19467 + }, + { + "epoch": 0.596665440725757, + "grad_norm": 1.3648338082119338, + "learning_rate": 7.385851096416383e-06, + "loss": 0.649, + "step": 19468 + }, + { + "epoch": 0.5966960892484983, + "grad_norm": 1.4687535881284348, + "learning_rate": 7.384892991006544e-06, + "loss": 0.6858, + "step": 19469 + }, + { + "epoch": 0.5967267377712394, + "grad_norm": 1.547502974431357, + "learning_rate": 7.3839349113640216e-06, + "loss": 0.749, + "step": 19470 + }, + { + "epoch": 0.5967573862939807, + "grad_norm": 0.6746149163115099, + "learning_rate": 7.382976857498258e-06, + "loss": 0.5717, + "step": 19471 + }, + { + "epoch": 0.5967880348167218, + "grad_norm": 2.072551779381937, + "learning_rate": 7.382018829418698e-06, + "loss": 0.6416, + "step": 19472 + }, + { + "epoch": 0.5968186833394631, + "grad_norm": 1.6702438422554176, + "learning_rate": 7.3810608271347695e-06, + "loss": 0.6351, + "step": 19473 + }, + { + "epoch": 0.5968493318622042, + "grad_norm": 1.472694422691959, + "learning_rate": 7.3801028506559235e-06, + "loss": 0.6232, + "step": 19474 + }, + { + "epoch": 0.5968799803849455, + "grad_norm": 0.6310320477546969, + "learning_rate": 7.379144899991594e-06, + "loss": 0.5597, + "step": 19475 + }, + { + "epoch": 0.5969106289076866, + "grad_norm": 1.488180451939898, + "learning_rate": 7.378186975151217e-06, + "loss": 0.7686, + "step": 19476 + }, + { + "epoch": 0.5969412774304279, + "grad_norm": 1.3729831276152833, + "learning_rate": 7.3772290761442365e-06, + "loss": 0.6906, + "step": 19477 + }, + { + "epoch": 0.5969719259531691, + "grad_norm": 1.6031813751652586, + "learning_rate": 7.3762712029800895e-06, + "loss": 0.6702, + "step": 19478 + }, + { + "epoch": 0.5970025744759102, + "grad_norm": 1.4995643839331898, + "learning_rate": 7.375313355668212e-06, + "loss": 0.651, + "step": 19479 + }, + { + "epoch": 0.5970332229986515, + "grad_norm": 1.4860784825668683, + "learning_rate": 7.3743555342180465e-06, + "loss": 0.6773, + "step": 19480 + }, + { + "epoch": 0.5970638715213926, + "grad_norm": 1.4862835822149134, + "learning_rate": 7.373397738639024e-06, + "loss": 0.7106, + "step": 19481 + }, + { + "epoch": 0.5970945200441339, + "grad_norm": 1.5989558603604843, + "learning_rate": 7.372439968940588e-06, + "loss": 0.6659, + "step": 19482 + }, + { + "epoch": 0.597125168566875, + "grad_norm": 1.493262447236341, + "learning_rate": 7.371482225132176e-06, + "loss": 0.695, + "step": 19483 + }, + { + "epoch": 0.5971558170896163, + "grad_norm": 1.618087881949641, + "learning_rate": 7.370524507223215e-06, + "loss": 0.7435, + "step": 19484 + }, + { + "epoch": 0.5971864656123574, + "grad_norm": 1.4091549742150895, + "learning_rate": 7.369566815223156e-06, + "loss": 0.6457, + "step": 19485 + }, + { + "epoch": 0.5972171141350987, + "grad_norm": 0.6854047077534564, + "learning_rate": 7.368609149141426e-06, + "loss": 0.5849, + "step": 19486 + }, + { + "epoch": 0.5972477626578399, + "grad_norm": 1.660179340435935, + "learning_rate": 7.367651508987461e-06, + "loss": 0.7187, + "step": 19487 + }, + { + "epoch": 0.5972784111805811, + "grad_norm": 1.5752743014136912, + "learning_rate": 7.3666938947707e-06, + "loss": 0.6096, + "step": 19488 + }, + { + "epoch": 0.5973090597033223, + "grad_norm": 1.7020604053222206, + "learning_rate": 7.365736306500577e-06, + "loss": 0.717, + "step": 19489 + }, + { + "epoch": 0.5973397082260635, + "grad_norm": 1.6065787091173218, + "learning_rate": 7.364778744186531e-06, + "loss": 0.7179, + "step": 19490 + }, + { + "epoch": 0.5973703567488047, + "grad_norm": 1.3645988567975313, + "learning_rate": 7.3638212078379935e-06, + "loss": 0.7661, + "step": 19491 + }, + { + "epoch": 0.5974010052715459, + "grad_norm": 1.514176597088081, + "learning_rate": 7.362863697464398e-06, + "loss": 0.7747, + "step": 19492 + }, + { + "epoch": 0.5974316537942871, + "grad_norm": 1.6353208735956064, + "learning_rate": 7.361906213075183e-06, + "loss": 0.7198, + "step": 19493 + }, + { + "epoch": 0.5974623023170283, + "grad_norm": 1.644953761123787, + "learning_rate": 7.360948754679784e-06, + "loss": 0.6719, + "step": 19494 + }, + { + "epoch": 0.5974929508397695, + "grad_norm": 1.6129090977924334, + "learning_rate": 7.359991322287625e-06, + "loss": 0.7858, + "step": 19495 + }, + { + "epoch": 0.5975235993625108, + "grad_norm": 1.532694749381395, + "learning_rate": 7.359033915908154e-06, + "loss": 0.6892, + "step": 19496 + }, + { + "epoch": 0.5975542478852519, + "grad_norm": 1.5045358303744554, + "learning_rate": 7.358076535550791e-06, + "loss": 0.7082, + "step": 19497 + }, + { + "epoch": 0.5975848964079932, + "grad_norm": 1.6894343264158453, + "learning_rate": 7.357119181224981e-06, + "loss": 0.73, + "step": 19498 + }, + { + "epoch": 0.5976155449307343, + "grad_norm": 1.4593111843051234, + "learning_rate": 7.356161852940152e-06, + "loss": 0.6141, + "step": 19499 + }, + { + "epoch": 0.5976461934534756, + "grad_norm": 1.8757451460614574, + "learning_rate": 7.355204550705733e-06, + "loss": 0.6604, + "step": 19500 + }, + { + "epoch": 0.5976768419762167, + "grad_norm": 1.8260939489030414, + "learning_rate": 7.354247274531163e-06, + "loss": 0.6908, + "step": 19501 + }, + { + "epoch": 0.597707490498958, + "grad_norm": 1.5580999136489773, + "learning_rate": 7.353290024425871e-06, + "loss": 0.6039, + "step": 19502 + }, + { + "epoch": 0.5977381390216991, + "grad_norm": 1.4568834365379422, + "learning_rate": 7.352332800399287e-06, + "loss": 0.5955, + "step": 19503 + }, + { + "epoch": 0.5977687875444404, + "grad_norm": 1.6194156229081251, + "learning_rate": 7.3513756024608484e-06, + "loss": 0.6519, + "step": 19504 + }, + { + "epoch": 0.5977994360671816, + "grad_norm": 0.678622874172812, + "learning_rate": 7.350418430619987e-06, + "loss": 0.5422, + "step": 19505 + }, + { + "epoch": 0.5978300845899228, + "grad_norm": 0.6835496643082416, + "learning_rate": 7.349461284886122e-06, + "loss": 0.5556, + "step": 19506 + }, + { + "epoch": 0.597860733112664, + "grad_norm": 1.5632423061403926, + "learning_rate": 7.3485041652687015e-06, + "loss": 0.7502, + "step": 19507 + }, + { + "epoch": 0.5978913816354052, + "grad_norm": 1.6280359854668096, + "learning_rate": 7.347547071777142e-06, + "loss": 0.7062, + "step": 19508 + }, + { + "epoch": 0.5979220301581464, + "grad_norm": 1.5400733850481012, + "learning_rate": 7.346590004420884e-06, + "loss": 0.669, + "step": 19509 + }, + { + "epoch": 0.5979526786808875, + "grad_norm": 1.4760400831018405, + "learning_rate": 7.345632963209352e-06, + "loss": 0.6797, + "step": 19510 + }, + { + "epoch": 0.5979833272036288, + "grad_norm": 1.5076167127356002, + "learning_rate": 7.344675948151976e-06, + "loss": 0.744, + "step": 19511 + }, + { + "epoch": 0.5980139757263699, + "grad_norm": 1.5456147969125535, + "learning_rate": 7.343718959258188e-06, + "loss": 0.7898, + "step": 19512 + }, + { + "epoch": 0.5980446242491112, + "grad_norm": 1.5001218754893382, + "learning_rate": 7.342761996537418e-06, + "loss": 0.6125, + "step": 19513 + }, + { + "epoch": 0.5980752727718524, + "grad_norm": 1.3941709670084736, + "learning_rate": 7.341805059999092e-06, + "loss": 0.6551, + "step": 19514 + }, + { + "epoch": 0.5981059212945936, + "grad_norm": 1.4530143553979673, + "learning_rate": 7.340848149652644e-06, + "loss": 0.6491, + "step": 19515 + }, + { + "epoch": 0.5981365698173348, + "grad_norm": 1.5125539268427592, + "learning_rate": 7.339891265507495e-06, + "loss": 0.693, + "step": 19516 + }, + { + "epoch": 0.598167218340076, + "grad_norm": 1.5457219523049732, + "learning_rate": 7.338934407573083e-06, + "loss": 0.6335, + "step": 19517 + }, + { + "epoch": 0.5981978668628172, + "grad_norm": 1.5299125342578688, + "learning_rate": 7.337977575858829e-06, + "loss": 0.5892, + "step": 19518 + }, + { + "epoch": 0.5982285153855584, + "grad_norm": 1.253799390417553, + "learning_rate": 7.3370207703741615e-06, + "loss": 0.5916, + "step": 19519 + }, + { + "epoch": 0.5982591639082996, + "grad_norm": 0.700363407527966, + "learning_rate": 7.336063991128511e-06, + "loss": 0.5487, + "step": 19520 + }, + { + "epoch": 0.5982898124310408, + "grad_norm": 1.4633772099939228, + "learning_rate": 7.335107238131305e-06, + "loss": 0.7153, + "step": 19521 + }, + { + "epoch": 0.598320460953782, + "grad_norm": 1.3427759389320593, + "learning_rate": 7.334150511391967e-06, + "loss": 0.6373, + "step": 19522 + }, + { + "epoch": 0.5983511094765233, + "grad_norm": 0.7004002675043853, + "learning_rate": 7.333193810919927e-06, + "loss": 0.5935, + "step": 19523 + }, + { + "epoch": 0.5983817579992644, + "grad_norm": 1.4632886309404243, + "learning_rate": 7.3322371367246095e-06, + "loss": 0.6043, + "step": 19524 + }, + { + "epoch": 0.5984124065220057, + "grad_norm": 1.8896621929791158, + "learning_rate": 7.331280488815442e-06, + "loss": 0.6361, + "step": 19525 + }, + { + "epoch": 0.5984430550447468, + "grad_norm": 1.7306836072537068, + "learning_rate": 7.330323867201855e-06, + "loss": 0.7339, + "step": 19526 + }, + { + "epoch": 0.5984737035674881, + "grad_norm": 1.7238333933474792, + "learning_rate": 7.329367271893264e-06, + "loss": 0.6874, + "step": 19527 + }, + { + "epoch": 0.5985043520902292, + "grad_norm": 1.5393052916635228, + "learning_rate": 7.328410702899106e-06, + "loss": 0.6642, + "step": 19528 + }, + { + "epoch": 0.5985350006129705, + "grad_norm": 0.6769139815333279, + "learning_rate": 7.327454160228798e-06, + "loss": 0.5618, + "step": 19529 + }, + { + "epoch": 0.5985656491357116, + "grad_norm": 0.6704413314722062, + "learning_rate": 7.326497643891768e-06, + "loss": 0.5388, + "step": 19530 + }, + { + "epoch": 0.5985962976584529, + "grad_norm": 0.6782257478694705, + "learning_rate": 7.325541153897441e-06, + "loss": 0.5857, + "step": 19531 + }, + { + "epoch": 0.598626946181194, + "grad_norm": 1.505382753691279, + "learning_rate": 7.324584690255242e-06, + "loss": 0.6382, + "step": 19532 + }, + { + "epoch": 0.5986575947039353, + "grad_norm": 1.6940657040672749, + "learning_rate": 7.323628252974593e-06, + "loss": 0.7251, + "step": 19533 + }, + { + "epoch": 0.5986882432266765, + "grad_norm": 1.4284192610822615, + "learning_rate": 7.322671842064921e-06, + "loss": 0.7261, + "step": 19534 + }, + { + "epoch": 0.5987188917494177, + "grad_norm": 1.481080924141451, + "learning_rate": 7.321715457535645e-06, + "loss": 0.8154, + "step": 19535 + }, + { + "epoch": 0.5987495402721589, + "grad_norm": 1.4790950450330156, + "learning_rate": 7.3207590993961965e-06, + "loss": 0.6726, + "step": 19536 + }, + { + "epoch": 0.5987801887949001, + "grad_norm": 1.5833035132884488, + "learning_rate": 7.319802767655995e-06, + "loss": 0.6836, + "step": 19537 + }, + { + "epoch": 0.5988108373176413, + "grad_norm": 1.4890292231187723, + "learning_rate": 7.318846462324456e-06, + "loss": 0.6523, + "step": 19538 + }, + { + "epoch": 0.5988414858403825, + "grad_norm": 1.5040736154369618, + "learning_rate": 7.317890183411016e-06, + "loss": 0.7185, + "step": 19539 + }, + { + "epoch": 0.5988721343631237, + "grad_norm": 1.6560461537648232, + "learning_rate": 7.316933930925087e-06, + "loss": 0.6223, + "step": 19540 + }, + { + "epoch": 0.5989027828858648, + "grad_norm": 1.4833507788829423, + "learning_rate": 7.315977704876094e-06, + "loss": 0.7116, + "step": 19541 + }, + { + "epoch": 0.5989334314086061, + "grad_norm": 1.5950111035201087, + "learning_rate": 7.315021505273459e-06, + "loss": 0.6503, + "step": 19542 + }, + { + "epoch": 0.5989640799313473, + "grad_norm": 0.6851083941135129, + "learning_rate": 7.314065332126604e-06, + "loss": 0.5621, + "step": 19543 + }, + { + "epoch": 0.5989947284540885, + "grad_norm": 1.392624457402743, + "learning_rate": 7.3131091854449524e-06, + "loss": 0.6488, + "step": 19544 + }, + { + "epoch": 0.5990253769768297, + "grad_norm": 1.359601701994785, + "learning_rate": 7.3121530652379235e-06, + "loss": 0.5843, + "step": 19545 + }, + { + "epoch": 0.5990560254995709, + "grad_norm": 1.7579367527696599, + "learning_rate": 7.311196971514936e-06, + "loss": 0.7695, + "step": 19546 + }, + { + "epoch": 0.5990866740223121, + "grad_norm": 1.6750067561487756, + "learning_rate": 7.310240904285414e-06, + "loss": 0.7859, + "step": 19547 + }, + { + "epoch": 0.5991173225450533, + "grad_norm": 1.4137509968445034, + "learning_rate": 7.309284863558779e-06, + "loss": 0.652, + "step": 19548 + }, + { + "epoch": 0.5991479710677945, + "grad_norm": 1.5728594401631588, + "learning_rate": 7.3083288493444425e-06, + "loss": 0.6393, + "step": 19549 + }, + { + "epoch": 0.5991786195905358, + "grad_norm": 0.6596454296107419, + "learning_rate": 7.307372861651838e-06, + "loss": 0.5534, + "step": 19550 + }, + { + "epoch": 0.5992092681132769, + "grad_norm": 1.561302740896166, + "learning_rate": 7.306416900490374e-06, + "loss": 0.639, + "step": 19551 + }, + { + "epoch": 0.5992399166360182, + "grad_norm": 1.59571884558023, + "learning_rate": 7.305460965869471e-06, + "loss": 0.6383, + "step": 19552 + }, + { + "epoch": 0.5992705651587593, + "grad_norm": 1.4983728528156786, + "learning_rate": 7.304505057798554e-06, + "loss": 0.6837, + "step": 19553 + }, + { + "epoch": 0.5993012136815006, + "grad_norm": 0.6681772402043583, + "learning_rate": 7.303549176287036e-06, + "loss": 0.557, + "step": 19554 + }, + { + "epoch": 0.5993318622042417, + "grad_norm": 1.405531032660534, + "learning_rate": 7.30259332134434e-06, + "loss": 0.6623, + "step": 19555 + }, + { + "epoch": 0.599362510726983, + "grad_norm": 0.6726753746155022, + "learning_rate": 7.3016374929798805e-06, + "loss": 0.5433, + "step": 19556 + }, + { + "epoch": 0.5993931592497241, + "grad_norm": 0.6739730287231669, + "learning_rate": 7.300681691203078e-06, + "loss": 0.5356, + "step": 19557 + }, + { + "epoch": 0.5994238077724654, + "grad_norm": 1.601771254786481, + "learning_rate": 7.2997259160233495e-06, + "loss": 0.584, + "step": 19558 + }, + { + "epoch": 0.5994544562952066, + "grad_norm": 1.7977767036328647, + "learning_rate": 7.298770167450115e-06, + "loss": 0.775, + "step": 19559 + }, + { + "epoch": 0.5994851048179478, + "grad_norm": 1.371816061744544, + "learning_rate": 7.297814445492785e-06, + "loss": 0.7305, + "step": 19560 + }, + { + "epoch": 0.599515753340689, + "grad_norm": 1.595631479110983, + "learning_rate": 7.296858750160782e-06, + "loss": 0.6757, + "step": 19561 + }, + { + "epoch": 0.5995464018634302, + "grad_norm": 1.4663875237488826, + "learning_rate": 7.2959030814635205e-06, + "loss": 0.6792, + "step": 19562 + }, + { + "epoch": 0.5995770503861714, + "grad_norm": 1.5950351801976526, + "learning_rate": 7.294947439410419e-06, + "loss": 0.6075, + "step": 19563 + }, + { + "epoch": 0.5996076989089126, + "grad_norm": 1.569454923487274, + "learning_rate": 7.293991824010893e-06, + "loss": 0.713, + "step": 19564 + }, + { + "epoch": 0.5996383474316538, + "grad_norm": 1.6302573743561677, + "learning_rate": 7.293036235274355e-06, + "loss": 0.6377, + "step": 19565 + }, + { + "epoch": 0.599668995954395, + "grad_norm": 1.575217689742268, + "learning_rate": 7.2920806732102265e-06, + "loss": 0.6763, + "step": 19566 + }, + { + "epoch": 0.5996996444771362, + "grad_norm": 0.6781780011524279, + "learning_rate": 7.2911251378279234e-06, + "loss": 0.5164, + "step": 19567 + }, + { + "epoch": 0.5997302929998775, + "grad_norm": 1.689218684971553, + "learning_rate": 7.29016962913685e-06, + "loss": 0.6372, + "step": 19568 + }, + { + "epoch": 0.5997609415226186, + "grad_norm": 1.4572438226620663, + "learning_rate": 7.2892141471464336e-06, + "loss": 0.7472, + "step": 19569 + }, + { + "epoch": 0.5997915900453599, + "grad_norm": 0.6729243628017043, + "learning_rate": 7.288258691866079e-06, + "loss": 0.5724, + "step": 19570 + }, + { + "epoch": 0.599822238568101, + "grad_norm": 1.5903696105922467, + "learning_rate": 7.287303263305211e-06, + "loss": 0.6981, + "step": 19571 + }, + { + "epoch": 0.5998528870908422, + "grad_norm": 1.7199821881291264, + "learning_rate": 7.286347861473236e-06, + "loss": 0.6527, + "step": 19572 + }, + { + "epoch": 0.5998835356135834, + "grad_norm": 1.5408629740207491, + "learning_rate": 7.285392486379568e-06, + "loss": 0.8119, + "step": 19573 + }, + { + "epoch": 0.5999141841363246, + "grad_norm": 0.6972170751582701, + "learning_rate": 7.284437138033625e-06, + "loss": 0.5851, + "step": 19574 + }, + { + "epoch": 0.5999448326590658, + "grad_norm": 0.6683227388613618, + "learning_rate": 7.283481816444816e-06, + "loss": 0.5711, + "step": 19575 + }, + { + "epoch": 0.599975481181807, + "grad_norm": 1.63516254765768, + "learning_rate": 7.282526521622555e-06, + "loss": 0.7503, + "step": 19576 + }, + { + "epoch": 0.6000061297045483, + "grad_norm": 1.758993148199661, + "learning_rate": 7.2815712535762565e-06, + "loss": 0.7939, + "step": 19577 + }, + { + "epoch": 0.6000367782272894, + "grad_norm": 1.3769836795390793, + "learning_rate": 7.280616012315335e-06, + "loss": 0.6306, + "step": 19578 + }, + { + "epoch": 0.6000674267500307, + "grad_norm": 1.5656775940288914, + "learning_rate": 7.279660797849193e-06, + "loss": 0.6585, + "step": 19579 + }, + { + "epoch": 0.6000980752727718, + "grad_norm": 1.5246609083798301, + "learning_rate": 7.278705610187255e-06, + "loss": 0.6928, + "step": 19580 + }, + { + "epoch": 0.6001287237955131, + "grad_norm": 1.510898616084218, + "learning_rate": 7.277750449338923e-06, + "loss": 0.7094, + "step": 19581 + }, + { + "epoch": 0.6001593723182542, + "grad_norm": 1.5158659429253143, + "learning_rate": 7.276795315313616e-06, + "loss": 0.7295, + "step": 19582 + }, + { + "epoch": 0.6001900208409955, + "grad_norm": 1.6226081813826778, + "learning_rate": 7.27584020812074e-06, + "loss": 0.7561, + "step": 19583 + }, + { + "epoch": 0.6002206693637366, + "grad_norm": 1.3453947323709026, + "learning_rate": 7.274885127769706e-06, + "loss": 0.6676, + "step": 19584 + }, + { + "epoch": 0.6002513178864779, + "grad_norm": 1.3990250538737048, + "learning_rate": 7.273930074269928e-06, + "loss": 0.7063, + "step": 19585 + }, + { + "epoch": 0.600281966409219, + "grad_norm": 1.5500115047531058, + "learning_rate": 7.2729750476308145e-06, + "loss": 0.6303, + "step": 19586 + }, + { + "epoch": 0.6003126149319603, + "grad_norm": 1.6790622454530215, + "learning_rate": 7.272020047861773e-06, + "loss": 0.7027, + "step": 19587 + }, + { + "epoch": 0.6003432634547015, + "grad_norm": 0.6891725119058518, + "learning_rate": 7.271065074972219e-06, + "loss": 0.5569, + "step": 19588 + }, + { + "epoch": 0.6003739119774427, + "grad_norm": 1.3201278303772443, + "learning_rate": 7.270110128971556e-06, + "loss": 0.6203, + "step": 19589 + }, + { + "epoch": 0.6004045605001839, + "grad_norm": 1.5855800053062281, + "learning_rate": 7.269155209869198e-06, + "loss": 0.6928, + "step": 19590 + }, + { + "epoch": 0.6004352090229251, + "grad_norm": 1.4273573593891646, + "learning_rate": 7.268200317674556e-06, + "loss": 0.7635, + "step": 19591 + }, + { + "epoch": 0.6004658575456663, + "grad_norm": 1.4893420658722418, + "learning_rate": 7.267245452397028e-06, + "loss": 0.6411, + "step": 19592 + }, + { + "epoch": 0.6004965060684075, + "grad_norm": 1.507864816024488, + "learning_rate": 7.2662906140460365e-06, + "loss": 0.7056, + "step": 19593 + }, + { + "epoch": 0.6005271545911487, + "grad_norm": 1.5695785864225733, + "learning_rate": 7.265335802630981e-06, + "loss": 0.6329, + "step": 19594 + }, + { + "epoch": 0.60055780311389, + "grad_norm": 1.4506103496841107, + "learning_rate": 7.264381018161268e-06, + "loss": 0.7187, + "step": 19595 + }, + { + "epoch": 0.6005884516366311, + "grad_norm": 1.8162044413863494, + "learning_rate": 7.263426260646314e-06, + "loss": 0.6893, + "step": 19596 + }, + { + "epoch": 0.6006191001593724, + "grad_norm": 1.573394566180205, + "learning_rate": 7.262471530095516e-06, + "loss": 0.626, + "step": 19597 + }, + { + "epoch": 0.6006497486821135, + "grad_norm": 1.528943997062597, + "learning_rate": 7.261516826518289e-06, + "loss": 0.7186, + "step": 19598 + }, + { + "epoch": 0.6006803972048548, + "grad_norm": 1.5499052020170927, + "learning_rate": 7.260562149924039e-06, + "loss": 0.6034, + "step": 19599 + }, + { + "epoch": 0.6007110457275959, + "grad_norm": 1.4921764311986085, + "learning_rate": 7.259607500322168e-06, + "loss": 0.6887, + "step": 19600 + }, + { + "epoch": 0.6007416942503372, + "grad_norm": 1.5967484270384495, + "learning_rate": 7.258652877722088e-06, + "loss": 0.7266, + "step": 19601 + }, + { + "epoch": 0.6007723427730783, + "grad_norm": 1.7729259184872623, + "learning_rate": 7.257698282133203e-06, + "loss": 0.6102, + "step": 19602 + }, + { + "epoch": 0.6008029912958195, + "grad_norm": 1.412712819869554, + "learning_rate": 7.256743713564915e-06, + "loss": 0.61, + "step": 19603 + }, + { + "epoch": 0.6008336398185607, + "grad_norm": 1.611880675216647, + "learning_rate": 7.255789172026637e-06, + "loss": 0.6699, + "step": 19604 + }, + { + "epoch": 0.6008642883413019, + "grad_norm": 1.6320834746416428, + "learning_rate": 7.2548346575277695e-06, + "loss": 0.6642, + "step": 19605 + }, + { + "epoch": 0.6008949368640432, + "grad_norm": 0.6690271325989998, + "learning_rate": 7.253880170077716e-06, + "loss": 0.5687, + "step": 19606 + }, + { + "epoch": 0.6009255853867843, + "grad_norm": 1.7193388269248464, + "learning_rate": 7.252925709685885e-06, + "loss": 0.7377, + "step": 19607 + }, + { + "epoch": 0.6009562339095256, + "grad_norm": 1.4164725180376467, + "learning_rate": 7.25197127636168e-06, + "loss": 0.6522, + "step": 19608 + }, + { + "epoch": 0.6009868824322667, + "grad_norm": 0.6602850010202143, + "learning_rate": 7.2510168701145046e-06, + "loss": 0.5561, + "step": 19609 + }, + { + "epoch": 0.601017530955008, + "grad_norm": 1.5412598253502363, + "learning_rate": 7.250062490953765e-06, + "loss": 0.6336, + "step": 19610 + }, + { + "epoch": 0.6010481794777491, + "grad_norm": 1.5433739618729607, + "learning_rate": 7.2491081388888606e-06, + "loss": 0.6598, + "step": 19611 + }, + { + "epoch": 0.6010788280004904, + "grad_norm": 0.6803159304068914, + "learning_rate": 7.248153813929203e-06, + "loss": 0.5881, + "step": 19612 + }, + { + "epoch": 0.6011094765232315, + "grad_norm": 1.6696719657042618, + "learning_rate": 7.247199516084187e-06, + "loss": 0.7239, + "step": 19613 + }, + { + "epoch": 0.6011401250459728, + "grad_norm": 1.718243583043798, + "learning_rate": 7.246245245363216e-06, + "loss": 0.703, + "step": 19614 + }, + { + "epoch": 0.601170773568714, + "grad_norm": 1.4035934166949475, + "learning_rate": 7.245291001775697e-06, + "loss": 0.6293, + "step": 19615 + }, + { + "epoch": 0.6012014220914552, + "grad_norm": 1.7211615273338934, + "learning_rate": 7.24433678533103e-06, + "loss": 0.6443, + "step": 19616 + }, + { + "epoch": 0.6012320706141964, + "grad_norm": 1.5911635642546318, + "learning_rate": 7.243382596038619e-06, + "loss": 0.7456, + "step": 19617 + }, + { + "epoch": 0.6012627191369376, + "grad_norm": 1.5452273197536954, + "learning_rate": 7.242428433907864e-06, + "loss": 0.777, + "step": 19618 + }, + { + "epoch": 0.6012933676596788, + "grad_norm": 1.3354838446089417, + "learning_rate": 7.241474298948166e-06, + "loss": 0.6994, + "step": 19619 + }, + { + "epoch": 0.60132401618242, + "grad_norm": 1.6616108234814455, + "learning_rate": 7.2405201911689285e-06, + "loss": 0.7297, + "step": 19620 + }, + { + "epoch": 0.6013546647051612, + "grad_norm": 1.736172723142796, + "learning_rate": 7.2395661105795545e-06, + "loss": 0.6364, + "step": 19621 + }, + { + "epoch": 0.6013853132279025, + "grad_norm": 1.7996455129629712, + "learning_rate": 7.238612057189436e-06, + "loss": 0.7169, + "step": 19622 + }, + { + "epoch": 0.6014159617506436, + "grad_norm": 1.5717431427585447, + "learning_rate": 7.237658031007985e-06, + "loss": 0.7046, + "step": 19623 + }, + { + "epoch": 0.6014466102733849, + "grad_norm": 1.5557928063655955, + "learning_rate": 7.23670403204459e-06, + "loss": 0.7027, + "step": 19624 + }, + { + "epoch": 0.601477258796126, + "grad_norm": 1.506937984698558, + "learning_rate": 7.235750060308664e-06, + "loss": 0.6328, + "step": 19625 + }, + { + "epoch": 0.6015079073188673, + "grad_norm": 1.4890856888375672, + "learning_rate": 7.234796115809597e-06, + "loss": 0.7198, + "step": 19626 + }, + { + "epoch": 0.6015385558416084, + "grad_norm": 1.4203611098032602, + "learning_rate": 7.2338421985567896e-06, + "loss": 0.6482, + "step": 19627 + }, + { + "epoch": 0.6015692043643497, + "grad_norm": 1.8971291623186401, + "learning_rate": 7.232888308559645e-06, + "loss": 0.6885, + "step": 19628 + }, + { + "epoch": 0.6015998528870908, + "grad_norm": 1.59507627022314, + "learning_rate": 7.23193444582756e-06, + "loss": 0.6162, + "step": 19629 + }, + { + "epoch": 0.6016305014098321, + "grad_norm": 1.5785503498714981, + "learning_rate": 7.230980610369931e-06, + "loss": 0.6633, + "step": 19630 + }, + { + "epoch": 0.6016611499325732, + "grad_norm": 1.5371344391030874, + "learning_rate": 7.230026802196159e-06, + "loss": 0.7624, + "step": 19631 + }, + { + "epoch": 0.6016917984553145, + "grad_norm": 1.6814476043145308, + "learning_rate": 7.229073021315647e-06, + "loss": 0.6691, + "step": 19632 + }, + { + "epoch": 0.6017224469780557, + "grad_norm": 1.3793867281647727, + "learning_rate": 7.228119267737778e-06, + "loss": 0.6921, + "step": 19633 + }, + { + "epoch": 0.6017530955007968, + "grad_norm": 1.5168893200779618, + "learning_rate": 7.227165541471968e-06, + "loss": 0.8376, + "step": 19634 + }, + { + "epoch": 0.6017837440235381, + "grad_norm": 0.6346793103675381, + "learning_rate": 7.226211842527597e-06, + "loss": 0.5128, + "step": 19635 + }, + { + "epoch": 0.6018143925462792, + "grad_norm": 1.7249090894105241, + "learning_rate": 7.225258170914078e-06, + "loss": 0.7901, + "step": 19636 + }, + { + "epoch": 0.6018450410690205, + "grad_norm": 1.45666216695861, + "learning_rate": 7.2243045266407975e-06, + "loss": 0.6378, + "step": 19637 + }, + { + "epoch": 0.6018756895917616, + "grad_norm": 1.6167161301919197, + "learning_rate": 7.223350909717153e-06, + "loss": 0.7111, + "step": 19638 + }, + { + "epoch": 0.6019063381145029, + "grad_norm": 1.6070864515376555, + "learning_rate": 7.222397320152546e-06, + "loss": 0.6955, + "step": 19639 + }, + { + "epoch": 0.601936986637244, + "grad_norm": 1.5041893122867724, + "learning_rate": 7.221443757956366e-06, + "loss": 0.6743, + "step": 19640 + }, + { + "epoch": 0.6019676351599853, + "grad_norm": 1.8201280251699443, + "learning_rate": 7.22049022313801e-06, + "loss": 0.6996, + "step": 19641 + }, + { + "epoch": 0.6019982836827265, + "grad_norm": 1.4925938863660664, + "learning_rate": 7.219536715706878e-06, + "loss": 0.7518, + "step": 19642 + }, + { + "epoch": 0.6020289322054677, + "grad_norm": 1.4943815525736193, + "learning_rate": 7.2185832356723604e-06, + "loss": 0.7013, + "step": 19643 + }, + { + "epoch": 0.6020595807282089, + "grad_norm": 1.416534749709553, + "learning_rate": 7.2176297830438554e-06, + "loss": 0.735, + "step": 19644 + }, + { + "epoch": 0.6020902292509501, + "grad_norm": 1.4637861416424789, + "learning_rate": 7.2166763578307585e-06, + "loss": 0.5711, + "step": 19645 + }, + { + "epoch": 0.6021208777736913, + "grad_norm": 0.6944423641931954, + "learning_rate": 7.215722960042455e-06, + "loss": 0.5812, + "step": 19646 + }, + { + "epoch": 0.6021515262964325, + "grad_norm": 1.6990783501428948, + "learning_rate": 7.214769589688351e-06, + "loss": 0.6791, + "step": 19647 + }, + { + "epoch": 0.6021821748191737, + "grad_norm": 0.6501943253117487, + "learning_rate": 7.213816246777834e-06, + "loss": 0.5511, + "step": 19648 + }, + { + "epoch": 0.602212823341915, + "grad_norm": 1.515976420253902, + "learning_rate": 7.212862931320296e-06, + "loss": 0.568, + "step": 19649 + }, + { + "epoch": 0.6022434718646561, + "grad_norm": 0.6658262749936604, + "learning_rate": 7.211909643325134e-06, + "loss": 0.5763, + "step": 19650 + }, + { + "epoch": 0.6022741203873974, + "grad_norm": 1.537274447280411, + "learning_rate": 7.210956382801739e-06, + "loss": 0.6906, + "step": 19651 + }, + { + "epoch": 0.6023047689101385, + "grad_norm": 1.5867430418809856, + "learning_rate": 7.2100031497595055e-06, + "loss": 0.7079, + "step": 19652 + }, + { + "epoch": 0.6023354174328798, + "grad_norm": 1.6000488925637621, + "learning_rate": 7.2090499442078244e-06, + "loss": 0.8505, + "step": 19653 + }, + { + "epoch": 0.6023660659556209, + "grad_norm": 1.6100316739854754, + "learning_rate": 7.208096766156088e-06, + "loss": 0.6929, + "step": 19654 + }, + { + "epoch": 0.6023967144783622, + "grad_norm": 1.6139643009494693, + "learning_rate": 7.207143615613691e-06, + "loss": 0.7401, + "step": 19655 + }, + { + "epoch": 0.6024273630011033, + "grad_norm": 1.5679517273791233, + "learning_rate": 7.206190492590021e-06, + "loss": 0.6679, + "step": 19656 + }, + { + "epoch": 0.6024580115238446, + "grad_norm": 1.5398366894012125, + "learning_rate": 7.205237397094469e-06, + "loss": 0.7745, + "step": 19657 + }, + { + "epoch": 0.6024886600465857, + "grad_norm": 1.419522439455116, + "learning_rate": 7.204284329136428e-06, + "loss": 0.6227, + "step": 19658 + }, + { + "epoch": 0.602519308569327, + "grad_norm": 1.4798718069600874, + "learning_rate": 7.2033312887252916e-06, + "loss": 0.7227, + "step": 19659 + }, + { + "epoch": 0.6025499570920682, + "grad_norm": 1.5702336554169654, + "learning_rate": 7.202378275870445e-06, + "loss": 0.7221, + "step": 19660 + }, + { + "epoch": 0.6025806056148094, + "grad_norm": 1.622008744064973, + "learning_rate": 7.201425290581282e-06, + "loss": 0.7323, + "step": 19661 + }, + { + "epoch": 0.6026112541375506, + "grad_norm": 1.6358205638516838, + "learning_rate": 7.20047233286719e-06, + "loss": 0.7768, + "step": 19662 + }, + { + "epoch": 0.6026419026602918, + "grad_norm": 1.4994573237183921, + "learning_rate": 7.1995194027375625e-06, + "loss": 0.6949, + "step": 19663 + }, + { + "epoch": 0.602672551183033, + "grad_norm": 1.5193094755833452, + "learning_rate": 7.198566500201789e-06, + "loss": 0.7882, + "step": 19664 + }, + { + "epoch": 0.6027031997057741, + "grad_norm": 1.5874959909940067, + "learning_rate": 7.197613625269251e-06, + "loss": 0.671, + "step": 19665 + }, + { + "epoch": 0.6027338482285154, + "grad_norm": 1.5838674575370921, + "learning_rate": 7.196660777949349e-06, + "loss": 0.6537, + "step": 19666 + }, + { + "epoch": 0.6027644967512565, + "grad_norm": 0.6732577418348713, + "learning_rate": 7.195707958251464e-06, + "loss": 0.5461, + "step": 19667 + }, + { + "epoch": 0.6027951452739978, + "grad_norm": 1.5521949238677226, + "learning_rate": 7.194755166184981e-06, + "loss": 0.6477, + "step": 19668 + }, + { + "epoch": 0.602825793796739, + "grad_norm": 1.5509976794180722, + "learning_rate": 7.1938024017592975e-06, + "loss": 0.6734, + "step": 19669 + }, + { + "epoch": 0.6028564423194802, + "grad_norm": 1.5780872613543515, + "learning_rate": 7.1928496649837955e-06, + "loss": 0.6656, + "step": 19670 + }, + { + "epoch": 0.6028870908422214, + "grad_norm": 1.5025146493788386, + "learning_rate": 7.1918969558678655e-06, + "loss": 0.6913, + "step": 19671 + }, + { + "epoch": 0.6029177393649626, + "grad_norm": 1.5278061424200995, + "learning_rate": 7.190944274420893e-06, + "loss": 0.6808, + "step": 19672 + }, + { + "epoch": 0.6029483878877038, + "grad_norm": 1.5471518430723028, + "learning_rate": 7.189991620652264e-06, + "loss": 0.6584, + "step": 19673 + }, + { + "epoch": 0.602979036410445, + "grad_norm": 1.4884363334067654, + "learning_rate": 7.189038994571367e-06, + "loss": 0.6076, + "step": 19674 + }, + { + "epoch": 0.6030096849331862, + "grad_norm": 1.5939099600210822, + "learning_rate": 7.18808639618759e-06, + "loss": 0.6544, + "step": 19675 + }, + { + "epoch": 0.6030403334559274, + "grad_norm": 1.564781409647611, + "learning_rate": 7.187133825510313e-06, + "loss": 0.6956, + "step": 19676 + }, + { + "epoch": 0.6030709819786686, + "grad_norm": 1.5804377033438768, + "learning_rate": 7.186181282548931e-06, + "loss": 0.7147, + "step": 19677 + }, + { + "epoch": 0.6031016305014099, + "grad_norm": 1.5006806604719072, + "learning_rate": 7.185228767312819e-06, + "loss": 0.7941, + "step": 19678 + }, + { + "epoch": 0.603132279024151, + "grad_norm": 1.6839772138091271, + "learning_rate": 7.184276279811373e-06, + "loss": 0.6705, + "step": 19679 + }, + { + "epoch": 0.6031629275468923, + "grad_norm": 0.6843631774146994, + "learning_rate": 7.183323820053974e-06, + "loss": 0.55, + "step": 19680 + }, + { + "epoch": 0.6031935760696334, + "grad_norm": 1.6232340428856256, + "learning_rate": 7.182371388050001e-06, + "loss": 0.6409, + "step": 19681 + }, + { + "epoch": 0.6032242245923747, + "grad_norm": 1.2892563602770628, + "learning_rate": 7.181418983808847e-06, + "loss": 0.5855, + "step": 19682 + }, + { + "epoch": 0.6032548731151158, + "grad_norm": 0.6699455837985291, + "learning_rate": 7.180466607339893e-06, + "loss": 0.5843, + "step": 19683 + }, + { + "epoch": 0.6032855216378571, + "grad_norm": 1.712330824206786, + "learning_rate": 7.17951425865252e-06, + "loss": 0.7984, + "step": 19684 + }, + { + "epoch": 0.6033161701605982, + "grad_norm": 1.4104301894356908, + "learning_rate": 7.178561937756119e-06, + "loss": 0.637, + "step": 19685 + }, + { + "epoch": 0.6033468186833395, + "grad_norm": 1.5825418139010816, + "learning_rate": 7.1776096446600686e-06, + "loss": 0.7225, + "step": 19686 + }, + { + "epoch": 0.6033774672060807, + "grad_norm": 1.408907296118667, + "learning_rate": 7.176657379373748e-06, + "loss": 0.6456, + "step": 19687 + }, + { + "epoch": 0.6034081157288219, + "grad_norm": 1.6521417306670692, + "learning_rate": 7.17570514190655e-06, + "loss": 0.7015, + "step": 19688 + }, + { + "epoch": 0.6034387642515631, + "grad_norm": 1.8540511573932532, + "learning_rate": 7.174752932267846e-06, + "loss": 0.7098, + "step": 19689 + }, + { + "epoch": 0.6034694127743043, + "grad_norm": 0.648148907481475, + "learning_rate": 7.1738007504670305e-06, + "loss": 0.5499, + "step": 19690 + }, + { + "epoch": 0.6035000612970455, + "grad_norm": 1.369018896298334, + "learning_rate": 7.172848596513477e-06, + "loss": 0.7231, + "step": 19691 + }, + { + "epoch": 0.6035307098197867, + "grad_norm": 0.6271260950045283, + "learning_rate": 7.171896470416567e-06, + "loss": 0.5116, + "step": 19692 + }, + { + "epoch": 0.6035613583425279, + "grad_norm": 0.663207106111667, + "learning_rate": 7.170944372185687e-06, + "loss": 0.5521, + "step": 19693 + }, + { + "epoch": 0.6035920068652691, + "grad_norm": 1.5340952957755036, + "learning_rate": 7.1699923018302175e-06, + "loss": 0.738, + "step": 19694 + }, + { + "epoch": 0.6036226553880103, + "grad_norm": 1.7997814809980412, + "learning_rate": 7.169040259359534e-06, + "loss": 0.6764, + "step": 19695 + }, + { + "epoch": 0.6036533039107514, + "grad_norm": 1.7072515440600124, + "learning_rate": 7.1680882447830245e-06, + "loss": 0.6124, + "step": 19696 + }, + { + "epoch": 0.6036839524334927, + "grad_norm": 1.5096423733223505, + "learning_rate": 7.167136258110063e-06, + "loss": 0.701, + "step": 19697 + }, + { + "epoch": 0.6037146009562339, + "grad_norm": 1.6554780891344854, + "learning_rate": 7.1661842993500355e-06, + "loss": 0.7369, + "step": 19698 + }, + { + "epoch": 0.6037452494789751, + "grad_norm": 1.369572036718105, + "learning_rate": 7.16523236851232e-06, + "loss": 0.7908, + "step": 19699 + }, + { + "epoch": 0.6037758980017163, + "grad_norm": 1.6171631066296634, + "learning_rate": 7.1642804656062926e-06, + "loss": 0.6924, + "step": 19700 + }, + { + "epoch": 0.6038065465244575, + "grad_norm": 1.5966925814196342, + "learning_rate": 7.163328590641337e-06, + "loss": 0.6895, + "step": 19701 + }, + { + "epoch": 0.6038371950471987, + "grad_norm": 1.5509347840298724, + "learning_rate": 7.162376743626831e-06, + "loss": 0.6361, + "step": 19702 + }, + { + "epoch": 0.6038678435699399, + "grad_norm": 1.503257466068707, + "learning_rate": 7.161424924572151e-06, + "loss": 0.6559, + "step": 19703 + }, + { + "epoch": 0.6038984920926811, + "grad_norm": 1.4361584117900852, + "learning_rate": 7.160473133486678e-06, + "loss": 0.7097, + "step": 19704 + }, + { + "epoch": 0.6039291406154224, + "grad_norm": 1.4673594194918091, + "learning_rate": 7.159521370379789e-06, + "loss": 0.5923, + "step": 19705 + }, + { + "epoch": 0.6039597891381635, + "grad_norm": 0.6927402414675734, + "learning_rate": 7.1585696352608646e-06, + "loss": 0.5483, + "step": 19706 + }, + { + "epoch": 0.6039904376609048, + "grad_norm": 0.6923455770735184, + "learning_rate": 7.157617928139282e-06, + "loss": 0.5659, + "step": 19707 + }, + { + "epoch": 0.6040210861836459, + "grad_norm": 1.9925837051829034, + "learning_rate": 7.156666249024412e-06, + "loss": 0.6801, + "step": 19708 + }, + { + "epoch": 0.6040517347063872, + "grad_norm": 1.5339622376343522, + "learning_rate": 7.155714597925643e-06, + "loss": 0.7454, + "step": 19709 + }, + { + "epoch": 0.6040823832291283, + "grad_norm": 1.3466676530614474, + "learning_rate": 7.154762974852343e-06, + "loss": 0.758, + "step": 19710 + }, + { + "epoch": 0.6041130317518696, + "grad_norm": 1.656054400777906, + "learning_rate": 7.153811379813891e-06, + "loss": 0.6693, + "step": 19711 + }, + { + "epoch": 0.6041436802746107, + "grad_norm": 1.7608354915978501, + "learning_rate": 7.152859812819664e-06, + "loss": 0.5975, + "step": 19712 + }, + { + "epoch": 0.604174328797352, + "grad_norm": 1.4404937887943623, + "learning_rate": 7.151908273879038e-06, + "loss": 0.6094, + "step": 19713 + }, + { + "epoch": 0.6042049773200932, + "grad_norm": 0.6821092380449004, + "learning_rate": 7.150956763001386e-06, + "loss": 0.5635, + "step": 19714 + }, + { + "epoch": 0.6042356258428344, + "grad_norm": 1.4671104823136005, + "learning_rate": 7.15000528019609e-06, + "loss": 0.6268, + "step": 19715 + }, + { + "epoch": 0.6042662743655756, + "grad_norm": 1.6368806529255695, + "learning_rate": 7.149053825472517e-06, + "loss": 0.6608, + "step": 19716 + }, + { + "epoch": 0.6042969228883168, + "grad_norm": 1.69748246531903, + "learning_rate": 7.148102398840049e-06, + "loss": 0.6493, + "step": 19717 + }, + { + "epoch": 0.604327571411058, + "grad_norm": 0.6910071521034978, + "learning_rate": 7.14715100030806e-06, + "loss": 0.5337, + "step": 19718 + }, + { + "epoch": 0.6043582199337992, + "grad_norm": 1.4841940201678916, + "learning_rate": 7.146199629885916e-06, + "loss": 0.6709, + "step": 19719 + }, + { + "epoch": 0.6043888684565404, + "grad_norm": 1.4544428112104717, + "learning_rate": 7.145248287583003e-06, + "loss": 0.6734, + "step": 19720 + }, + { + "epoch": 0.6044195169792816, + "grad_norm": 1.5256914696516426, + "learning_rate": 7.144296973408688e-06, + "loss": 0.7377, + "step": 19721 + }, + { + "epoch": 0.6044501655020228, + "grad_norm": 1.369858451401933, + "learning_rate": 7.143345687372343e-06, + "loss": 0.6042, + "step": 19722 + }, + { + "epoch": 0.6044808140247641, + "grad_norm": 1.5959636116699907, + "learning_rate": 7.1423944294833445e-06, + "loss": 0.6171, + "step": 19723 + }, + { + "epoch": 0.6045114625475052, + "grad_norm": 1.563195016718978, + "learning_rate": 7.141443199751064e-06, + "loss": 0.7332, + "step": 19724 + }, + { + "epoch": 0.6045421110702465, + "grad_norm": 1.3988458428897073, + "learning_rate": 7.140491998184877e-06, + "loss": 0.7789, + "step": 19725 + }, + { + "epoch": 0.6045727595929876, + "grad_norm": 1.7911651996130065, + "learning_rate": 7.139540824794153e-06, + "loss": 0.773, + "step": 19726 + }, + { + "epoch": 0.6046034081157288, + "grad_norm": 1.3199052671945635, + "learning_rate": 7.1385896795882645e-06, + "loss": 0.7045, + "step": 19727 + }, + { + "epoch": 0.60463405663847, + "grad_norm": 1.5595196157757327, + "learning_rate": 7.1376385625765855e-06, + "loss": 0.7163, + "step": 19728 + }, + { + "epoch": 0.6046647051612112, + "grad_norm": 1.459052867536339, + "learning_rate": 7.136687473768489e-06, + "loss": 0.7648, + "step": 19729 + }, + { + "epoch": 0.6046953536839524, + "grad_norm": 1.5835781462564722, + "learning_rate": 7.135736413173337e-06, + "loss": 0.6717, + "step": 19730 + }, + { + "epoch": 0.6047260022066936, + "grad_norm": 1.3930485110719482, + "learning_rate": 7.134785380800512e-06, + "loss": 0.6617, + "step": 19731 + }, + { + "epoch": 0.6047566507294349, + "grad_norm": 0.6936382156750842, + "learning_rate": 7.133834376659379e-06, + "loss": 0.5583, + "step": 19732 + }, + { + "epoch": 0.604787299252176, + "grad_norm": 1.8081830766015743, + "learning_rate": 7.132883400759305e-06, + "loss": 0.7174, + "step": 19733 + }, + { + "epoch": 0.6048179477749173, + "grad_norm": 1.4837363951028062, + "learning_rate": 7.131932453109669e-06, + "loss": 0.7062, + "step": 19734 + }, + { + "epoch": 0.6048485962976584, + "grad_norm": 1.6280968412419854, + "learning_rate": 7.130981533719833e-06, + "loss": 0.6154, + "step": 19735 + }, + { + "epoch": 0.6048792448203997, + "grad_norm": 1.5066924213686435, + "learning_rate": 7.130030642599173e-06, + "loss": 0.6341, + "step": 19736 + }, + { + "epoch": 0.6049098933431408, + "grad_norm": 1.551843171086108, + "learning_rate": 7.129079779757054e-06, + "loss": 0.7658, + "step": 19737 + }, + { + "epoch": 0.6049405418658821, + "grad_norm": 1.5396062230872438, + "learning_rate": 7.128128945202846e-06, + "loss": 0.6902, + "step": 19738 + }, + { + "epoch": 0.6049711903886232, + "grad_norm": 1.5800763556822297, + "learning_rate": 7.127178138945919e-06, + "loss": 0.6695, + "step": 19739 + }, + { + "epoch": 0.6050018389113645, + "grad_norm": 1.7169638452094869, + "learning_rate": 7.126227360995643e-06, + "loss": 0.639, + "step": 19740 + }, + { + "epoch": 0.6050324874341056, + "grad_norm": 1.4131304655877253, + "learning_rate": 7.125276611361379e-06, + "loss": 0.713, + "step": 19741 + }, + { + "epoch": 0.6050631359568469, + "grad_norm": 1.684799881135552, + "learning_rate": 7.124325890052506e-06, + "loss": 0.6183, + "step": 19742 + }, + { + "epoch": 0.6050937844795881, + "grad_norm": 1.345803142347132, + "learning_rate": 7.123375197078379e-06, + "loss": 0.6638, + "step": 19743 + }, + { + "epoch": 0.6051244330023293, + "grad_norm": 1.7868368961580634, + "learning_rate": 7.122424532448379e-06, + "loss": 0.7209, + "step": 19744 + }, + { + "epoch": 0.6051550815250705, + "grad_norm": 1.4281914961354405, + "learning_rate": 7.121473896171864e-06, + "loss": 0.6047, + "step": 19745 + }, + { + "epoch": 0.6051857300478117, + "grad_norm": 1.4056285110678357, + "learning_rate": 7.120523288258201e-06, + "loss": 0.6111, + "step": 19746 + }, + { + "epoch": 0.6052163785705529, + "grad_norm": 1.6784330667312675, + "learning_rate": 7.11957270871676e-06, + "loss": 0.7325, + "step": 19747 + }, + { + "epoch": 0.6052470270932941, + "grad_norm": 1.4556826121191093, + "learning_rate": 7.118622157556907e-06, + "loss": 0.7448, + "step": 19748 + }, + { + "epoch": 0.6052776756160353, + "grad_norm": 0.6876166921995269, + "learning_rate": 7.117671634788006e-06, + "loss": 0.5765, + "step": 19749 + }, + { + "epoch": 0.6053083241387766, + "grad_norm": 1.8521782630653918, + "learning_rate": 7.1167211404194245e-06, + "loss": 0.6502, + "step": 19750 + }, + { + "epoch": 0.6053389726615177, + "grad_norm": 1.5067671212987297, + "learning_rate": 7.115770674460526e-06, + "loss": 0.6891, + "step": 19751 + }, + { + "epoch": 0.605369621184259, + "grad_norm": 1.6399770744454885, + "learning_rate": 7.114820236920681e-06, + "loss": 0.7199, + "step": 19752 + }, + { + "epoch": 0.6054002697070001, + "grad_norm": 1.3800481845885668, + "learning_rate": 7.113869827809247e-06, + "loss": 0.6035, + "step": 19753 + }, + { + "epoch": 0.6054309182297414, + "grad_norm": 1.5809973147940788, + "learning_rate": 7.112919447135592e-06, + "loss": 0.6942, + "step": 19754 + }, + { + "epoch": 0.6054615667524825, + "grad_norm": 1.9188355962860182, + "learning_rate": 7.111969094909081e-06, + "loss": 0.6856, + "step": 19755 + }, + { + "epoch": 0.6054922152752238, + "grad_norm": 1.532558429106949, + "learning_rate": 7.111018771139079e-06, + "loss": 0.6602, + "step": 19756 + }, + { + "epoch": 0.6055228637979649, + "grad_norm": 1.559636500899365, + "learning_rate": 7.110068475834945e-06, + "loss": 0.7211, + "step": 19757 + }, + { + "epoch": 0.6055535123207061, + "grad_norm": 1.4974407463565527, + "learning_rate": 7.1091182090060475e-06, + "loss": 0.6931, + "step": 19758 + }, + { + "epoch": 0.6055841608434473, + "grad_norm": 1.4674574863690637, + "learning_rate": 7.108167970661751e-06, + "loss": 0.6241, + "step": 19759 + }, + { + "epoch": 0.6056148093661885, + "grad_norm": 1.6671452912317408, + "learning_rate": 7.107217760811409e-06, + "loss": 0.7259, + "step": 19760 + }, + { + "epoch": 0.6056454578889298, + "grad_norm": 1.6780631545471631, + "learning_rate": 7.106267579464396e-06, + "loss": 0.7152, + "step": 19761 + }, + { + "epoch": 0.6056761064116709, + "grad_norm": 1.6234626707046058, + "learning_rate": 7.105317426630063e-06, + "loss": 0.7132, + "step": 19762 + }, + { + "epoch": 0.6057067549344122, + "grad_norm": 1.6384237622301323, + "learning_rate": 7.104367302317785e-06, + "loss": 0.7812, + "step": 19763 + }, + { + "epoch": 0.6057374034571533, + "grad_norm": 1.5231828175641904, + "learning_rate": 7.103417206536913e-06, + "loss": 0.655, + "step": 19764 + }, + { + "epoch": 0.6057680519798946, + "grad_norm": 1.5682787383980934, + "learning_rate": 7.102467139296813e-06, + "loss": 0.6742, + "step": 19765 + }, + { + "epoch": 0.6057987005026357, + "grad_norm": 1.6531578744373912, + "learning_rate": 7.101517100606846e-06, + "loss": 0.7629, + "step": 19766 + }, + { + "epoch": 0.605829349025377, + "grad_norm": 1.3353500139304582, + "learning_rate": 7.100567090476373e-06, + "loss": 0.704, + "step": 19767 + }, + { + "epoch": 0.6058599975481181, + "grad_norm": 1.418564267412485, + "learning_rate": 7.099617108914751e-06, + "loss": 0.5928, + "step": 19768 + }, + { + "epoch": 0.6058906460708594, + "grad_norm": 1.6923565298993848, + "learning_rate": 7.098667155931348e-06, + "loss": 0.7632, + "step": 19769 + }, + { + "epoch": 0.6059212945936006, + "grad_norm": 0.7063888000660001, + "learning_rate": 7.097717231535517e-06, + "loss": 0.5778, + "step": 19770 + }, + { + "epoch": 0.6059519431163418, + "grad_norm": 1.589334498480037, + "learning_rate": 7.0967673357366215e-06, + "loss": 0.616, + "step": 19771 + }, + { + "epoch": 0.605982591639083, + "grad_norm": 0.6800808301395388, + "learning_rate": 7.095817468544024e-06, + "loss": 0.5665, + "step": 19772 + }, + { + "epoch": 0.6060132401618242, + "grad_norm": 0.6342612262070805, + "learning_rate": 7.094867629967073e-06, + "loss": 0.5235, + "step": 19773 + }, + { + "epoch": 0.6060438886845654, + "grad_norm": 1.5804243490383336, + "learning_rate": 7.093917820015141e-06, + "loss": 0.7134, + "step": 19774 + }, + { + "epoch": 0.6060745372073066, + "grad_norm": 1.478200262704804, + "learning_rate": 7.092968038697578e-06, + "loss": 0.6685, + "step": 19775 + }, + { + "epoch": 0.6061051857300478, + "grad_norm": 1.5669557906708445, + "learning_rate": 7.092018286023743e-06, + "loss": 0.7147, + "step": 19776 + }, + { + "epoch": 0.606135834252789, + "grad_norm": 1.4113118351913208, + "learning_rate": 7.0910685620029975e-06, + "loss": 0.658, + "step": 19777 + }, + { + "epoch": 0.6061664827755302, + "grad_norm": 1.3974218201429862, + "learning_rate": 7.090118866644695e-06, + "loss": 0.6479, + "step": 19778 + }, + { + "epoch": 0.6061971312982715, + "grad_norm": 1.5908578726682514, + "learning_rate": 7.089169199958199e-06, + "loss": 0.6734, + "step": 19779 + }, + { + "epoch": 0.6062277798210126, + "grad_norm": 1.4596251079837737, + "learning_rate": 7.088219561952864e-06, + "loss": 0.7378, + "step": 19780 + }, + { + "epoch": 0.6062584283437539, + "grad_norm": 1.5492576526261892, + "learning_rate": 7.087269952638044e-06, + "loss": 0.6094, + "step": 19781 + }, + { + "epoch": 0.606289076866495, + "grad_norm": 1.8073623845984732, + "learning_rate": 7.0863203720231e-06, + "loss": 0.6481, + "step": 19782 + }, + { + "epoch": 0.6063197253892363, + "grad_norm": 0.7123556034713515, + "learning_rate": 7.08537082011739e-06, + "loss": 0.5242, + "step": 19783 + }, + { + "epoch": 0.6063503739119774, + "grad_norm": 1.485753774693069, + "learning_rate": 7.0844212969302595e-06, + "loss": 0.5579, + "step": 19784 + }, + { + "epoch": 0.6063810224347187, + "grad_norm": 1.446365898536027, + "learning_rate": 7.083471802471079e-06, + "loss": 0.7115, + "step": 19785 + }, + { + "epoch": 0.6064116709574598, + "grad_norm": 1.6833842622578563, + "learning_rate": 7.082522336749196e-06, + "loss": 0.7694, + "step": 19786 + }, + { + "epoch": 0.6064423194802011, + "grad_norm": 1.5799172794886354, + "learning_rate": 7.081572899773963e-06, + "loss": 0.7463, + "step": 19787 + }, + { + "epoch": 0.6064729680029423, + "grad_norm": 1.7171687743316, + "learning_rate": 7.0806234915547416e-06, + "loss": 0.743, + "step": 19788 + }, + { + "epoch": 0.6065036165256834, + "grad_norm": 1.6593182923120708, + "learning_rate": 7.079674112100882e-06, + "loss": 0.7302, + "step": 19789 + }, + { + "epoch": 0.6065342650484247, + "grad_norm": 1.4391007646282898, + "learning_rate": 7.078724761421743e-06, + "loss": 0.5051, + "step": 19790 + }, + { + "epoch": 0.6065649135711658, + "grad_norm": 0.6956026036830545, + "learning_rate": 7.0777754395266755e-06, + "loss": 0.558, + "step": 19791 + }, + { + "epoch": 0.6065955620939071, + "grad_norm": 0.6578488381023916, + "learning_rate": 7.076826146425033e-06, + "loss": 0.511, + "step": 19792 + }, + { + "epoch": 0.6066262106166482, + "grad_norm": 1.4810030484878256, + "learning_rate": 7.0758768821261716e-06, + "loss": 0.6735, + "step": 19793 + }, + { + "epoch": 0.6066568591393895, + "grad_norm": 0.6604402715344473, + "learning_rate": 7.074927646639447e-06, + "loss": 0.5385, + "step": 19794 + }, + { + "epoch": 0.6066875076621306, + "grad_norm": 1.78682275307531, + "learning_rate": 7.0739784399742e-06, + "loss": 0.7534, + "step": 19795 + }, + { + "epoch": 0.6067181561848719, + "grad_norm": 1.485543550946255, + "learning_rate": 7.0730292621398014e-06, + "loss": 0.6451, + "step": 19796 + }, + { + "epoch": 0.606748804707613, + "grad_norm": 1.575857774396402, + "learning_rate": 7.072080113145588e-06, + "loss": 0.5479, + "step": 19797 + }, + { + "epoch": 0.6067794532303543, + "grad_norm": 0.6799640107835287, + "learning_rate": 7.071130993000921e-06, + "loss": 0.5373, + "step": 19798 + }, + { + "epoch": 0.6068101017530955, + "grad_norm": 1.7625575872717931, + "learning_rate": 7.07018190171515e-06, + "loss": 0.6705, + "step": 19799 + }, + { + "epoch": 0.6068407502758367, + "grad_norm": 1.5329246282938305, + "learning_rate": 7.069232839297624e-06, + "loss": 0.729, + "step": 19800 + }, + { + "epoch": 0.6068713987985779, + "grad_norm": 1.4762751067229536, + "learning_rate": 7.068283805757698e-06, + "loss": 0.7552, + "step": 19801 + }, + { + "epoch": 0.6069020473213191, + "grad_norm": 1.5309228385395441, + "learning_rate": 7.067334801104724e-06, + "loss": 0.7615, + "step": 19802 + }, + { + "epoch": 0.6069326958440603, + "grad_norm": 1.5357431824872299, + "learning_rate": 7.066385825348046e-06, + "loss": 0.74, + "step": 19803 + }, + { + "epoch": 0.6069633443668015, + "grad_norm": 1.5318668305441554, + "learning_rate": 7.065436878497025e-06, + "loss": 0.6978, + "step": 19804 + }, + { + "epoch": 0.6069939928895427, + "grad_norm": 1.7952155372582133, + "learning_rate": 7.064487960560999e-06, + "loss": 0.6803, + "step": 19805 + }, + { + "epoch": 0.607024641412284, + "grad_norm": 1.6274540649407947, + "learning_rate": 7.063539071549329e-06, + "loss": 0.7231, + "step": 19806 + }, + { + "epoch": 0.6070552899350251, + "grad_norm": 1.5277507167406983, + "learning_rate": 7.062590211471359e-06, + "loss": 0.7255, + "step": 19807 + }, + { + "epoch": 0.6070859384577664, + "grad_norm": 1.4115183940795037, + "learning_rate": 7.061641380336437e-06, + "loss": 0.7017, + "step": 19808 + }, + { + "epoch": 0.6071165869805075, + "grad_norm": 1.6340906259645476, + "learning_rate": 7.060692578153916e-06, + "loss": 0.6661, + "step": 19809 + }, + { + "epoch": 0.6071472355032488, + "grad_norm": 1.5397973643631064, + "learning_rate": 7.059743804933144e-06, + "loss": 0.6009, + "step": 19810 + }, + { + "epoch": 0.6071778840259899, + "grad_norm": 1.6603275286595682, + "learning_rate": 7.0587950606834645e-06, + "loss": 0.6473, + "step": 19811 + }, + { + "epoch": 0.6072085325487312, + "grad_norm": 0.6883465739406659, + "learning_rate": 7.057846345414233e-06, + "loss": 0.5465, + "step": 19812 + }, + { + "epoch": 0.6072391810714723, + "grad_norm": 1.6277579814782186, + "learning_rate": 7.056897659134796e-06, + "loss": 0.7369, + "step": 19813 + }, + { + "epoch": 0.6072698295942136, + "grad_norm": 1.5393144281333884, + "learning_rate": 7.055949001854494e-06, + "loss": 0.6848, + "step": 19814 + }, + { + "epoch": 0.6073004781169548, + "grad_norm": 1.6496976265458265, + "learning_rate": 7.055000373582686e-06, + "loss": 0.7952, + "step": 19815 + }, + { + "epoch": 0.607331126639696, + "grad_norm": 1.6728927151417414, + "learning_rate": 7.054051774328705e-06, + "loss": 0.8254, + "step": 19816 + }, + { + "epoch": 0.6073617751624372, + "grad_norm": 1.6671229607966216, + "learning_rate": 7.053103204101915e-06, + "loss": 0.6892, + "step": 19817 + }, + { + "epoch": 0.6073924236851784, + "grad_norm": 1.3744715367016822, + "learning_rate": 7.052154662911648e-06, + "loss": 0.6667, + "step": 19818 + }, + { + "epoch": 0.6074230722079196, + "grad_norm": 1.3667522449667693, + "learning_rate": 7.0512061507672535e-06, + "loss": 0.6734, + "step": 19819 + }, + { + "epoch": 0.6074537207306607, + "grad_norm": 1.4796311801963666, + "learning_rate": 7.050257667678082e-06, + "loss": 0.623, + "step": 19820 + }, + { + "epoch": 0.607484369253402, + "grad_norm": 1.6477923215731651, + "learning_rate": 7.0493092136534765e-06, + "loss": 0.5785, + "step": 19821 + }, + { + "epoch": 0.6075150177761431, + "grad_norm": 0.7066939366556354, + "learning_rate": 7.048360788702781e-06, + "loss": 0.5795, + "step": 19822 + }, + { + "epoch": 0.6075456662988844, + "grad_norm": 1.6755325845819904, + "learning_rate": 7.047412392835344e-06, + "loss": 0.6861, + "step": 19823 + }, + { + "epoch": 0.6075763148216256, + "grad_norm": 1.4399539935332428, + "learning_rate": 7.046464026060504e-06, + "loss": 0.6551, + "step": 19824 + }, + { + "epoch": 0.6076069633443668, + "grad_norm": 1.5261021115352942, + "learning_rate": 7.045515688387614e-06, + "loss": 0.6183, + "step": 19825 + }, + { + "epoch": 0.607637611867108, + "grad_norm": 1.4832419182658563, + "learning_rate": 7.044567379826015e-06, + "loss": 0.6715, + "step": 19826 + }, + { + "epoch": 0.6076682603898492, + "grad_norm": 1.3082759580998886, + "learning_rate": 7.043619100385044e-06, + "loss": 0.6169, + "step": 19827 + }, + { + "epoch": 0.6076989089125904, + "grad_norm": 1.4551906339762544, + "learning_rate": 7.0426708500740555e-06, + "loss": 0.7179, + "step": 19828 + }, + { + "epoch": 0.6077295574353316, + "grad_norm": 1.334201421047857, + "learning_rate": 7.041722628902387e-06, + "loss": 0.7185, + "step": 19829 + }, + { + "epoch": 0.6077602059580728, + "grad_norm": 1.3354913860001627, + "learning_rate": 7.040774436879378e-06, + "loss": 0.6279, + "step": 19830 + }, + { + "epoch": 0.607790854480814, + "grad_norm": 1.5615741966407333, + "learning_rate": 7.039826274014381e-06, + "loss": 0.6741, + "step": 19831 + }, + { + "epoch": 0.6078215030035552, + "grad_norm": 1.694303355277099, + "learning_rate": 7.03887814031673e-06, + "loss": 0.7649, + "step": 19832 + }, + { + "epoch": 0.6078521515262965, + "grad_norm": 0.6919577082958721, + "learning_rate": 7.03793003579577e-06, + "loss": 0.5552, + "step": 19833 + }, + { + "epoch": 0.6078828000490376, + "grad_norm": 1.7963604881110433, + "learning_rate": 7.0369819604608456e-06, + "loss": 0.7698, + "step": 19834 + }, + { + "epoch": 0.6079134485717789, + "grad_norm": 1.467317010332212, + "learning_rate": 7.036033914321294e-06, + "loss": 0.71, + "step": 19835 + }, + { + "epoch": 0.60794409709452, + "grad_norm": 1.593761212611128, + "learning_rate": 7.03508589738646e-06, + "loss": 0.709, + "step": 19836 + }, + { + "epoch": 0.6079747456172613, + "grad_norm": 1.6253460752219613, + "learning_rate": 7.034137909665686e-06, + "loss": 0.7992, + "step": 19837 + }, + { + "epoch": 0.6080053941400024, + "grad_norm": 1.5079811846358906, + "learning_rate": 7.033189951168302e-06, + "loss": 0.6916, + "step": 19838 + }, + { + "epoch": 0.6080360426627437, + "grad_norm": 1.5470803451856379, + "learning_rate": 7.032242021903664e-06, + "loss": 0.5664, + "step": 19839 + }, + { + "epoch": 0.6080666911854848, + "grad_norm": 1.3443403668244471, + "learning_rate": 7.031294121881102e-06, + "loss": 0.6774, + "step": 19840 + }, + { + "epoch": 0.6080973397082261, + "grad_norm": 1.5554142765408037, + "learning_rate": 7.030346251109959e-06, + "loss": 0.7235, + "step": 19841 + }, + { + "epoch": 0.6081279882309673, + "grad_norm": 1.7468876580284927, + "learning_rate": 7.029398409599573e-06, + "loss": 0.6574, + "step": 19842 + }, + { + "epoch": 0.6081586367537085, + "grad_norm": 1.8261030341358562, + "learning_rate": 7.028450597359284e-06, + "loss": 0.6587, + "step": 19843 + }, + { + "epoch": 0.6081892852764497, + "grad_norm": 1.6044499188692434, + "learning_rate": 7.027502814398434e-06, + "loss": 0.7446, + "step": 19844 + }, + { + "epoch": 0.6082199337991909, + "grad_norm": 1.5366942242019492, + "learning_rate": 7.0265550607263585e-06, + "loss": 0.7888, + "step": 19845 + }, + { + "epoch": 0.6082505823219321, + "grad_norm": 0.661724549993557, + "learning_rate": 7.025607336352395e-06, + "loss": 0.56, + "step": 19846 + }, + { + "epoch": 0.6082812308446733, + "grad_norm": 1.5088201698981454, + "learning_rate": 7.024659641285885e-06, + "loss": 0.6202, + "step": 19847 + }, + { + "epoch": 0.6083118793674145, + "grad_norm": 1.7527897778328922, + "learning_rate": 7.023711975536167e-06, + "loss": 0.7307, + "step": 19848 + }, + { + "epoch": 0.6083425278901557, + "grad_norm": 1.3538688349753862, + "learning_rate": 7.0227643391125735e-06, + "loss": 0.734, + "step": 19849 + }, + { + "epoch": 0.6083731764128969, + "grad_norm": 1.6016303101696225, + "learning_rate": 7.021816732024447e-06, + "loss": 0.615, + "step": 19850 + }, + { + "epoch": 0.608403824935638, + "grad_norm": 1.7472297044246519, + "learning_rate": 7.020869154281118e-06, + "loss": 0.7111, + "step": 19851 + }, + { + "epoch": 0.6084344734583793, + "grad_norm": 0.6482861295955069, + "learning_rate": 7.019921605891931e-06, + "loss": 0.564, + "step": 19852 + }, + { + "epoch": 0.6084651219811205, + "grad_norm": 1.6988728579459078, + "learning_rate": 7.0189740868662185e-06, + "loss": 0.6876, + "step": 19853 + }, + { + "epoch": 0.6084957705038617, + "grad_norm": 1.5633622524762512, + "learning_rate": 7.0180265972133144e-06, + "loss": 0.6844, + "step": 19854 + }, + { + "epoch": 0.6085264190266029, + "grad_norm": 1.7260830426575335, + "learning_rate": 7.01707913694256e-06, + "loss": 0.6771, + "step": 19855 + }, + { + "epoch": 0.6085570675493441, + "grad_norm": 0.6659412174991048, + "learning_rate": 7.01613170606329e-06, + "loss": 0.5549, + "step": 19856 + }, + { + "epoch": 0.6085877160720853, + "grad_norm": 1.6499282175812024, + "learning_rate": 7.015184304584832e-06, + "loss": 0.7217, + "step": 19857 + }, + { + "epoch": 0.6086183645948265, + "grad_norm": 1.5512218496581127, + "learning_rate": 7.014236932516533e-06, + "loss": 0.7052, + "step": 19858 + }, + { + "epoch": 0.6086490131175677, + "grad_norm": 1.6140530019093375, + "learning_rate": 7.013289589867715e-06, + "loss": 0.7168, + "step": 19859 + }, + { + "epoch": 0.608679661640309, + "grad_norm": 1.4852779727845484, + "learning_rate": 7.012342276647725e-06, + "loss": 0.7895, + "step": 19860 + }, + { + "epoch": 0.6087103101630501, + "grad_norm": 1.506234041360132, + "learning_rate": 7.011394992865889e-06, + "loss": 0.712, + "step": 19861 + }, + { + "epoch": 0.6087409586857914, + "grad_norm": 1.5370574638013157, + "learning_rate": 7.01044773853154e-06, + "loss": 0.6538, + "step": 19862 + }, + { + "epoch": 0.6087716072085325, + "grad_norm": 0.6519388866042904, + "learning_rate": 7.009500513654017e-06, + "loss": 0.5638, + "step": 19863 + }, + { + "epoch": 0.6088022557312738, + "grad_norm": 0.6559819451307753, + "learning_rate": 7.00855331824265e-06, + "loss": 0.5167, + "step": 19864 + }, + { + "epoch": 0.6088329042540149, + "grad_norm": 1.697310506394305, + "learning_rate": 7.0076061523067715e-06, + "loss": 0.7444, + "step": 19865 + }, + { + "epoch": 0.6088635527767562, + "grad_norm": 1.4235318516749373, + "learning_rate": 7.006659015855717e-06, + "loss": 0.6011, + "step": 19866 + }, + { + "epoch": 0.6088942012994973, + "grad_norm": 1.6910280863637461, + "learning_rate": 7.005711908898819e-06, + "loss": 0.6359, + "step": 19867 + }, + { + "epoch": 0.6089248498222386, + "grad_norm": 1.628903201365918, + "learning_rate": 7.004764831445401e-06, + "loss": 0.6464, + "step": 19868 + }, + { + "epoch": 0.6089554983449798, + "grad_norm": 0.6728853379257645, + "learning_rate": 7.003817783504808e-06, + "loss": 0.5469, + "step": 19869 + }, + { + "epoch": 0.608986146867721, + "grad_norm": 1.6416231418575833, + "learning_rate": 7.002870765086359e-06, + "loss": 0.7761, + "step": 19870 + }, + { + "epoch": 0.6090167953904622, + "grad_norm": 1.553575532400217, + "learning_rate": 7.001923776199397e-06, + "loss": 0.6691, + "step": 19871 + }, + { + "epoch": 0.6090474439132034, + "grad_norm": 1.737215770879627, + "learning_rate": 7.000976816853247e-06, + "loss": 0.7255, + "step": 19872 + }, + { + "epoch": 0.6090780924359446, + "grad_norm": 1.5157476222487472, + "learning_rate": 7.0000298870572344e-06, + "loss": 0.7409, + "step": 19873 + }, + { + "epoch": 0.6091087409586858, + "grad_norm": 1.6513319333800827, + "learning_rate": 6.9990829868207e-06, + "loss": 0.7488, + "step": 19874 + }, + { + "epoch": 0.609139389481427, + "grad_norm": 1.430237062557326, + "learning_rate": 6.9981361161529675e-06, + "loss": 0.6526, + "step": 19875 + }, + { + "epoch": 0.6091700380041682, + "grad_norm": 1.3659354244583657, + "learning_rate": 6.9971892750633655e-06, + "loss": 0.6428, + "step": 19876 + }, + { + "epoch": 0.6092006865269094, + "grad_norm": 1.4325748168323504, + "learning_rate": 6.996242463561227e-06, + "loss": 0.7126, + "step": 19877 + }, + { + "epoch": 0.6092313350496507, + "grad_norm": 1.5052787423982261, + "learning_rate": 6.99529568165588e-06, + "loss": 0.7104, + "step": 19878 + }, + { + "epoch": 0.6092619835723918, + "grad_norm": 0.6830545299140264, + "learning_rate": 6.994348929356653e-06, + "loss": 0.569, + "step": 19879 + }, + { + "epoch": 0.6092926320951331, + "grad_norm": 1.3893140810441809, + "learning_rate": 6.99340220667288e-06, + "loss": 0.6779, + "step": 19880 + }, + { + "epoch": 0.6093232806178742, + "grad_norm": 1.5250833690776535, + "learning_rate": 6.992455513613876e-06, + "loss": 0.767, + "step": 19881 + }, + { + "epoch": 0.6093539291406154, + "grad_norm": 1.6519474426494059, + "learning_rate": 6.991508850188986e-06, + "loss": 0.7272, + "step": 19882 + }, + { + "epoch": 0.6093845776633566, + "grad_norm": 1.6165750971850883, + "learning_rate": 6.990562216407525e-06, + "loss": 0.6076, + "step": 19883 + }, + { + "epoch": 0.6094152261860978, + "grad_norm": 1.5394985585293748, + "learning_rate": 6.989615612278823e-06, + "loss": 0.7043, + "step": 19884 + }, + { + "epoch": 0.609445874708839, + "grad_norm": 1.7072560630470917, + "learning_rate": 6.9886690378122105e-06, + "loss": 0.7131, + "step": 19885 + }, + { + "epoch": 0.6094765232315802, + "grad_norm": 1.4710410197495094, + "learning_rate": 6.987722493017012e-06, + "loss": 0.5303, + "step": 19886 + }, + { + "epoch": 0.6095071717543215, + "grad_norm": 1.5060083616901898, + "learning_rate": 6.986775977902554e-06, + "loss": 0.6394, + "step": 19887 + }, + { + "epoch": 0.6095378202770626, + "grad_norm": 1.5984324375713301, + "learning_rate": 6.985829492478162e-06, + "loss": 0.7276, + "step": 19888 + }, + { + "epoch": 0.6095684687998039, + "grad_norm": 1.583720338263387, + "learning_rate": 6.984883036753165e-06, + "loss": 0.7179, + "step": 19889 + }, + { + "epoch": 0.609599117322545, + "grad_norm": 1.6019444095535023, + "learning_rate": 6.983936610736886e-06, + "loss": 0.8334, + "step": 19890 + }, + { + "epoch": 0.6096297658452863, + "grad_norm": 1.4608520152793438, + "learning_rate": 6.982990214438655e-06, + "loss": 0.5782, + "step": 19891 + }, + { + "epoch": 0.6096604143680274, + "grad_norm": 1.6230756724582427, + "learning_rate": 6.9820438478677875e-06, + "loss": 0.6806, + "step": 19892 + }, + { + "epoch": 0.6096910628907687, + "grad_norm": 1.3885111001382902, + "learning_rate": 6.981097511033619e-06, + "loss": 0.6445, + "step": 19893 + }, + { + "epoch": 0.6097217114135098, + "grad_norm": 1.4909105670446305, + "learning_rate": 6.980151203945468e-06, + "loss": 0.6824, + "step": 19894 + }, + { + "epoch": 0.6097523599362511, + "grad_norm": 1.579324214435533, + "learning_rate": 6.9792049266126576e-06, + "loss": 0.6869, + "step": 19895 + }, + { + "epoch": 0.6097830084589922, + "grad_norm": 1.4120580460975454, + "learning_rate": 6.978258679044516e-06, + "loss": 0.6457, + "step": 19896 + }, + { + "epoch": 0.6098136569817335, + "grad_norm": 1.3267385197196035, + "learning_rate": 6.977312461250363e-06, + "loss": 0.5959, + "step": 19897 + }, + { + "epoch": 0.6098443055044747, + "grad_norm": 1.4280892345953422, + "learning_rate": 6.9763662732395254e-06, + "loss": 0.7073, + "step": 19898 + }, + { + "epoch": 0.6098749540272159, + "grad_norm": 1.6777479865718599, + "learning_rate": 6.9754201150213244e-06, + "loss": 0.6424, + "step": 19899 + }, + { + "epoch": 0.6099056025499571, + "grad_norm": 1.4861093918403456, + "learning_rate": 6.974473986605081e-06, + "loss": 0.6219, + "step": 19900 + }, + { + "epoch": 0.6099362510726983, + "grad_norm": 1.3248037203009364, + "learning_rate": 6.973527888000123e-06, + "loss": 0.6117, + "step": 19901 + }, + { + "epoch": 0.6099668995954395, + "grad_norm": 1.6939273664830912, + "learning_rate": 6.972581819215768e-06, + "loss": 0.7204, + "step": 19902 + }, + { + "epoch": 0.6099975481181807, + "grad_norm": 1.5465452684810084, + "learning_rate": 6.971635780261337e-06, + "loss": 0.6208, + "step": 19903 + }, + { + "epoch": 0.6100281966409219, + "grad_norm": 1.3522514162753871, + "learning_rate": 6.970689771146155e-06, + "loss": 0.5733, + "step": 19904 + }, + { + "epoch": 0.6100588451636632, + "grad_norm": 1.5776504611878077, + "learning_rate": 6.96974379187954e-06, + "loss": 0.6981, + "step": 19905 + }, + { + "epoch": 0.6100894936864043, + "grad_norm": 1.7622839643636896, + "learning_rate": 6.968797842470816e-06, + "loss": 0.6101, + "step": 19906 + }, + { + "epoch": 0.6101201422091456, + "grad_norm": 1.7968038634225934, + "learning_rate": 6.967851922929303e-06, + "loss": 0.8195, + "step": 19907 + }, + { + "epoch": 0.6101507907318867, + "grad_norm": 1.8252011245302349, + "learning_rate": 6.966906033264318e-06, + "loss": 0.6932, + "step": 19908 + }, + { + "epoch": 0.610181439254628, + "grad_norm": 1.4960307617472126, + "learning_rate": 6.9659601734851865e-06, + "loss": 0.6611, + "step": 19909 + }, + { + "epoch": 0.6102120877773691, + "grad_norm": 1.5178744975232554, + "learning_rate": 6.9650143436012285e-06, + "loss": 0.7264, + "step": 19910 + }, + { + "epoch": 0.6102427363001104, + "grad_norm": 1.5207592645195698, + "learning_rate": 6.964068543621753e-06, + "loss": 0.6595, + "step": 19911 + }, + { + "epoch": 0.6102733848228515, + "grad_norm": 1.4820923988400498, + "learning_rate": 6.963122773556095e-06, + "loss": 0.7292, + "step": 19912 + }, + { + "epoch": 0.6103040333455927, + "grad_norm": 1.6780442604566435, + "learning_rate": 6.962177033413562e-06, + "loss": 0.7377, + "step": 19913 + }, + { + "epoch": 0.610334681868334, + "grad_norm": 0.6850714416799089, + "learning_rate": 6.961231323203475e-06, + "loss": 0.5817, + "step": 19914 + }, + { + "epoch": 0.6103653303910751, + "grad_norm": 1.5289478636512444, + "learning_rate": 6.960285642935154e-06, + "loss": 0.6869, + "step": 19915 + }, + { + "epoch": 0.6103959789138164, + "grad_norm": 1.6317868781577658, + "learning_rate": 6.9593399926179154e-06, + "loss": 0.7004, + "step": 19916 + }, + { + "epoch": 0.6104266274365575, + "grad_norm": 1.528532717850323, + "learning_rate": 6.958394372261079e-06, + "loss": 0.604, + "step": 19917 + }, + { + "epoch": 0.6104572759592988, + "grad_norm": 0.6794518395590222, + "learning_rate": 6.957448781873961e-06, + "loss": 0.566, + "step": 19918 + }, + { + "epoch": 0.6104879244820399, + "grad_norm": 0.6760717059182711, + "learning_rate": 6.956503221465878e-06, + "loss": 0.5498, + "step": 19919 + }, + { + "epoch": 0.6105185730047812, + "grad_norm": 2.012166125770665, + "learning_rate": 6.955557691046149e-06, + "loss": 0.7038, + "step": 19920 + }, + { + "epoch": 0.6105492215275223, + "grad_norm": 1.3121752450368163, + "learning_rate": 6.95461219062409e-06, + "loss": 0.5689, + "step": 19921 + }, + { + "epoch": 0.6105798700502636, + "grad_norm": 0.6739601475243695, + "learning_rate": 6.95366672020901e-06, + "loss": 0.5547, + "step": 19922 + }, + { + "epoch": 0.6106105185730047, + "grad_norm": 1.319200916215164, + "learning_rate": 6.952721279810238e-06, + "loss": 0.7148, + "step": 19923 + }, + { + "epoch": 0.610641167095746, + "grad_norm": 1.5062314879270928, + "learning_rate": 6.951775869437077e-06, + "loss": 0.6964, + "step": 19924 + }, + { + "epoch": 0.6106718156184872, + "grad_norm": 1.824633279387881, + "learning_rate": 6.950830489098854e-06, + "loss": 0.6186, + "step": 19925 + }, + { + "epoch": 0.6107024641412284, + "grad_norm": 1.6061664814875438, + "learning_rate": 6.949885138804877e-06, + "loss": 0.724, + "step": 19926 + }, + { + "epoch": 0.6107331126639696, + "grad_norm": 1.6095197586643013, + "learning_rate": 6.948939818564459e-06, + "loss": 0.621, + "step": 19927 + }, + { + "epoch": 0.6107637611867108, + "grad_norm": 1.512470511036822, + "learning_rate": 6.947994528386921e-06, + "loss": 0.627, + "step": 19928 + }, + { + "epoch": 0.610794409709452, + "grad_norm": 1.4149880100586614, + "learning_rate": 6.947049268281573e-06, + "loss": 0.7526, + "step": 19929 + }, + { + "epoch": 0.6108250582321932, + "grad_norm": 1.6636875539403548, + "learning_rate": 6.946104038257728e-06, + "loss": 0.7906, + "step": 19930 + }, + { + "epoch": 0.6108557067549344, + "grad_norm": 1.4940824703615254, + "learning_rate": 6.945158838324704e-06, + "loss": 0.6425, + "step": 19931 + }, + { + "epoch": 0.6108863552776757, + "grad_norm": 1.6016070355443754, + "learning_rate": 6.944213668491808e-06, + "loss": 0.7219, + "step": 19932 + }, + { + "epoch": 0.6109170038004168, + "grad_norm": 1.605378748450522, + "learning_rate": 6.943268528768359e-06, + "loss": 0.7806, + "step": 19933 + }, + { + "epoch": 0.6109476523231581, + "grad_norm": 0.6445233634781216, + "learning_rate": 6.94232341916367e-06, + "loss": 0.5544, + "step": 19934 + }, + { + "epoch": 0.6109783008458992, + "grad_norm": 1.4808953040438078, + "learning_rate": 6.941378339687044e-06, + "loss": 0.5257, + "step": 19935 + }, + { + "epoch": 0.6110089493686405, + "grad_norm": 1.4326521904198035, + "learning_rate": 6.940433290347805e-06, + "loss": 0.6938, + "step": 19936 + }, + { + "epoch": 0.6110395978913816, + "grad_norm": 1.4626498613724157, + "learning_rate": 6.939488271155259e-06, + "loss": 0.6861, + "step": 19937 + }, + { + "epoch": 0.6110702464141229, + "grad_norm": 1.5681242636342734, + "learning_rate": 6.938543282118717e-06, + "loss": 0.6746, + "step": 19938 + }, + { + "epoch": 0.611100894936864, + "grad_norm": 1.6486592136179512, + "learning_rate": 6.937598323247492e-06, + "loss": 0.7646, + "step": 19939 + }, + { + "epoch": 0.6111315434596053, + "grad_norm": 0.6566644803809627, + "learning_rate": 6.936653394550894e-06, + "loss": 0.554, + "step": 19940 + }, + { + "epoch": 0.6111621919823464, + "grad_norm": 1.72661845358788, + "learning_rate": 6.935708496038232e-06, + "loss": 0.8151, + "step": 19941 + }, + { + "epoch": 0.6111928405050877, + "grad_norm": 0.6853877369770539, + "learning_rate": 6.934763627718821e-06, + "loss": 0.5492, + "step": 19942 + }, + { + "epoch": 0.6112234890278289, + "grad_norm": 0.6689152743643126, + "learning_rate": 6.933818789601966e-06, + "loss": 0.5554, + "step": 19943 + }, + { + "epoch": 0.61125413755057, + "grad_norm": 1.4664816111690238, + "learning_rate": 6.9328739816969824e-06, + "loss": 0.5257, + "step": 19944 + }, + { + "epoch": 0.6112847860733113, + "grad_norm": 1.603612206429526, + "learning_rate": 6.931929204013175e-06, + "loss": 0.7915, + "step": 19945 + }, + { + "epoch": 0.6113154345960524, + "grad_norm": 1.8249861474355462, + "learning_rate": 6.930984456559851e-06, + "loss": 0.7202, + "step": 19946 + }, + { + "epoch": 0.6113460831187937, + "grad_norm": 1.4558625113848855, + "learning_rate": 6.9300397393463255e-06, + "loss": 0.6817, + "step": 19947 + }, + { + "epoch": 0.6113767316415348, + "grad_norm": 1.6790708939764338, + "learning_rate": 6.929095052381905e-06, + "loss": 0.757, + "step": 19948 + }, + { + "epoch": 0.6114073801642761, + "grad_norm": 1.5706311563177415, + "learning_rate": 6.928150395675892e-06, + "loss": 0.7684, + "step": 19949 + }, + { + "epoch": 0.6114380286870172, + "grad_norm": 1.666367587020437, + "learning_rate": 6.927205769237602e-06, + "loss": 0.6846, + "step": 19950 + }, + { + "epoch": 0.6114686772097585, + "grad_norm": 1.6016971870594434, + "learning_rate": 6.926261173076339e-06, + "loss": 0.7262, + "step": 19951 + }, + { + "epoch": 0.6114993257324997, + "grad_norm": 1.52871164561777, + "learning_rate": 6.925316607201411e-06, + "loss": 0.71, + "step": 19952 + }, + { + "epoch": 0.6115299742552409, + "grad_norm": 1.536471778594829, + "learning_rate": 6.92437207162213e-06, + "loss": 0.6554, + "step": 19953 + }, + { + "epoch": 0.6115606227779821, + "grad_norm": 1.5578143698058087, + "learning_rate": 6.923427566347789e-06, + "loss": 0.6899, + "step": 19954 + }, + { + "epoch": 0.6115912713007233, + "grad_norm": 1.4654790221876655, + "learning_rate": 6.922483091387711e-06, + "loss": 0.6409, + "step": 19955 + }, + { + "epoch": 0.6116219198234645, + "grad_norm": 1.5188224548694103, + "learning_rate": 6.9215386467511915e-06, + "loss": 0.5289, + "step": 19956 + }, + { + "epoch": 0.6116525683462057, + "grad_norm": 1.4345359405897402, + "learning_rate": 6.920594232447538e-06, + "loss": 0.7112, + "step": 19957 + }, + { + "epoch": 0.6116832168689469, + "grad_norm": 1.634406772797302, + "learning_rate": 6.919649848486061e-06, + "loss": 0.6382, + "step": 19958 + }, + { + "epoch": 0.6117138653916881, + "grad_norm": 1.6381790531115645, + "learning_rate": 6.9187054948760575e-06, + "loss": 0.7912, + "step": 19959 + }, + { + "epoch": 0.6117445139144293, + "grad_norm": 1.5232585522878, + "learning_rate": 6.91776117162684e-06, + "loss": 0.6899, + "step": 19960 + }, + { + "epoch": 0.6117751624371706, + "grad_norm": 1.5891681770235841, + "learning_rate": 6.916816878747712e-06, + "loss": 0.6862, + "step": 19961 + }, + { + "epoch": 0.6118058109599117, + "grad_norm": 1.4969462680820376, + "learning_rate": 6.915872616247971e-06, + "loss": 0.6175, + "step": 19962 + }, + { + "epoch": 0.611836459482653, + "grad_norm": 1.412258030087347, + "learning_rate": 6.914928384136931e-06, + "loss": 0.6664, + "step": 19963 + }, + { + "epoch": 0.6118671080053941, + "grad_norm": 1.4229378067553753, + "learning_rate": 6.9139841824238915e-06, + "loss": 0.7151, + "step": 19964 + }, + { + "epoch": 0.6118977565281354, + "grad_norm": 1.5977356385592492, + "learning_rate": 6.91304001111815e-06, + "loss": 0.5848, + "step": 19965 + }, + { + "epoch": 0.6119284050508765, + "grad_norm": 1.4884133399296968, + "learning_rate": 6.912095870229021e-06, + "loss": 0.6723, + "step": 19966 + }, + { + "epoch": 0.6119590535736178, + "grad_norm": 0.6893721616357381, + "learning_rate": 6.9111517597658e-06, + "loss": 0.5515, + "step": 19967 + }, + { + "epoch": 0.611989702096359, + "grad_norm": 1.472219712548373, + "learning_rate": 6.9102076797377885e-06, + "loss": 0.6962, + "step": 19968 + }, + { + "epoch": 0.6120203506191002, + "grad_norm": 1.6173256642918914, + "learning_rate": 6.909263630154293e-06, + "loss": 0.7275, + "step": 19969 + }, + { + "epoch": 0.6120509991418414, + "grad_norm": 1.5775183857447854, + "learning_rate": 6.908319611024612e-06, + "loss": 0.5681, + "step": 19970 + }, + { + "epoch": 0.6120816476645826, + "grad_norm": 1.700921614305141, + "learning_rate": 6.90737562235805e-06, + "loss": 0.6553, + "step": 19971 + }, + { + "epoch": 0.6121122961873238, + "grad_norm": 1.5455532607062246, + "learning_rate": 6.906431664163909e-06, + "loss": 0.7367, + "step": 19972 + }, + { + "epoch": 0.612142944710065, + "grad_norm": 1.7947956999235963, + "learning_rate": 6.905487736451486e-06, + "loss": 0.6787, + "step": 19973 + }, + { + "epoch": 0.6121735932328062, + "grad_norm": 0.6626519173264737, + "learning_rate": 6.904543839230085e-06, + "loss": 0.5475, + "step": 19974 + }, + { + "epoch": 0.6122042417555473, + "grad_norm": 1.4309749390083644, + "learning_rate": 6.903599972509009e-06, + "loss": 0.6648, + "step": 19975 + }, + { + "epoch": 0.6122348902782886, + "grad_norm": 1.7590410710315783, + "learning_rate": 6.9026561362975476e-06, + "loss": 0.7678, + "step": 19976 + }, + { + "epoch": 0.6122655388010297, + "grad_norm": 1.5377112574293674, + "learning_rate": 6.901712330605015e-06, + "loss": 0.6524, + "step": 19977 + }, + { + "epoch": 0.612296187323771, + "grad_norm": 1.5881827183753787, + "learning_rate": 6.900768555440696e-06, + "loss": 0.6209, + "step": 19978 + }, + { + "epoch": 0.6123268358465122, + "grad_norm": 1.4661969615892458, + "learning_rate": 6.899824810813904e-06, + "loss": 0.7038, + "step": 19979 + }, + { + "epoch": 0.6123574843692534, + "grad_norm": 1.6121203003977387, + "learning_rate": 6.89888109673393e-06, + "loss": 0.7678, + "step": 19980 + }, + { + "epoch": 0.6123881328919946, + "grad_norm": 1.5035326552191153, + "learning_rate": 6.897937413210071e-06, + "loss": 0.6737, + "step": 19981 + }, + { + "epoch": 0.6124187814147358, + "grad_norm": 1.4274371268052515, + "learning_rate": 6.89699376025163e-06, + "loss": 0.6607, + "step": 19982 + }, + { + "epoch": 0.612449429937477, + "grad_norm": 1.7169341907549165, + "learning_rate": 6.8960501378679045e-06, + "loss": 0.7138, + "step": 19983 + }, + { + "epoch": 0.6124800784602182, + "grad_norm": 1.6130021595812365, + "learning_rate": 6.895106546068189e-06, + "loss": 0.5982, + "step": 19984 + }, + { + "epoch": 0.6125107269829594, + "grad_norm": 1.6549231118669734, + "learning_rate": 6.894162984861785e-06, + "loss": 0.8239, + "step": 19985 + }, + { + "epoch": 0.6125413755057006, + "grad_norm": 1.646514110416527, + "learning_rate": 6.893219454257986e-06, + "loss": 0.6129, + "step": 19986 + }, + { + "epoch": 0.6125720240284418, + "grad_norm": 1.6378916736552436, + "learning_rate": 6.892275954266092e-06, + "loss": 0.6554, + "step": 19987 + }, + { + "epoch": 0.6126026725511831, + "grad_norm": 1.7134789370713341, + "learning_rate": 6.891332484895401e-06, + "loss": 0.6615, + "step": 19988 + }, + { + "epoch": 0.6126333210739242, + "grad_norm": 1.6033811793464945, + "learning_rate": 6.890389046155201e-06, + "loss": 0.7317, + "step": 19989 + }, + { + "epoch": 0.6126639695966655, + "grad_norm": 1.5871506435166525, + "learning_rate": 6.889445638054797e-06, + "loss": 0.768, + "step": 19990 + }, + { + "epoch": 0.6126946181194066, + "grad_norm": 1.5169500682071857, + "learning_rate": 6.88850226060348e-06, + "loss": 0.5837, + "step": 19991 + }, + { + "epoch": 0.6127252666421479, + "grad_norm": 1.4146175341368115, + "learning_rate": 6.887558913810545e-06, + "loss": 0.7027, + "step": 19992 + }, + { + "epoch": 0.612755915164889, + "grad_norm": 1.578090387074225, + "learning_rate": 6.88661559768529e-06, + "loss": 0.725, + "step": 19993 + }, + { + "epoch": 0.6127865636876303, + "grad_norm": 1.281930544834727, + "learning_rate": 6.885672312237009e-06, + "loss": 0.6237, + "step": 19994 + }, + { + "epoch": 0.6128172122103714, + "grad_norm": 1.5655471277788071, + "learning_rate": 6.884729057474992e-06, + "loss": 0.7132, + "step": 19995 + }, + { + "epoch": 0.6128478607331127, + "grad_norm": 1.596890582295688, + "learning_rate": 6.883785833408541e-06, + "loss": 0.7187, + "step": 19996 + }, + { + "epoch": 0.6128785092558539, + "grad_norm": 1.5781007766736066, + "learning_rate": 6.882842640046939e-06, + "loss": 0.6336, + "step": 19997 + }, + { + "epoch": 0.6129091577785951, + "grad_norm": 1.4958440616155644, + "learning_rate": 6.8818994773994944e-06, + "loss": 0.6925, + "step": 19998 + }, + { + "epoch": 0.6129398063013363, + "grad_norm": 1.5486824628386122, + "learning_rate": 6.880956345475488e-06, + "loss": 0.7035, + "step": 19999 + }, + { + "epoch": 0.6129704548240775, + "grad_norm": 0.6621950635813836, + "learning_rate": 6.880013244284215e-06, + "loss": 0.5569, + "step": 20000 + }, + { + "epoch": 0.6130011033468187, + "grad_norm": 1.3119155460750487, + "learning_rate": 6.879070173834972e-06, + "loss": 0.5982, + "step": 20001 + }, + { + "epoch": 0.6130317518695599, + "grad_norm": 1.4374856447196345, + "learning_rate": 6.878127134137049e-06, + "loss": 0.6875, + "step": 20002 + }, + { + "epoch": 0.6130624003923011, + "grad_norm": 0.6612964446944551, + "learning_rate": 6.877184125199736e-06, + "loss": 0.5449, + "step": 20003 + }, + { + "epoch": 0.6130930489150423, + "grad_norm": 1.397217142586499, + "learning_rate": 6.87624114703233e-06, + "loss": 0.6536, + "step": 20004 + }, + { + "epoch": 0.6131236974377835, + "grad_norm": 1.4860601428834719, + "learning_rate": 6.875298199644116e-06, + "loss": 0.6957, + "step": 20005 + }, + { + "epoch": 0.6131543459605246, + "grad_norm": 1.577389278465823, + "learning_rate": 6.874355283044392e-06, + "loss": 0.7752, + "step": 20006 + }, + { + "epoch": 0.6131849944832659, + "grad_norm": 1.5849730392487393, + "learning_rate": 6.873412397242445e-06, + "loss": 0.6795, + "step": 20007 + }, + { + "epoch": 0.6132156430060071, + "grad_norm": 1.6540898728397495, + "learning_rate": 6.8724695422475595e-06, + "loss": 0.7915, + "step": 20008 + }, + { + "epoch": 0.6132462915287483, + "grad_norm": 1.617781668032986, + "learning_rate": 6.871526718069039e-06, + "loss": 0.6898, + "step": 20009 + }, + { + "epoch": 0.6132769400514895, + "grad_norm": 1.4684696073514603, + "learning_rate": 6.870583924716164e-06, + "loss": 0.6272, + "step": 20010 + }, + { + "epoch": 0.6133075885742307, + "grad_norm": 1.398546882475319, + "learning_rate": 6.869641162198224e-06, + "loss": 0.6563, + "step": 20011 + }, + { + "epoch": 0.6133382370969719, + "grad_norm": 1.3993101068853995, + "learning_rate": 6.868698430524513e-06, + "loss": 0.7035, + "step": 20012 + }, + { + "epoch": 0.6133688856197131, + "grad_norm": 1.4522499823008783, + "learning_rate": 6.867755729704315e-06, + "loss": 0.7196, + "step": 20013 + }, + { + "epoch": 0.6133995341424543, + "grad_norm": 1.5429549809679786, + "learning_rate": 6.866813059746924e-06, + "loss": 0.7286, + "step": 20014 + }, + { + "epoch": 0.6134301826651956, + "grad_norm": 1.645581333262101, + "learning_rate": 6.865870420661625e-06, + "loss": 0.7663, + "step": 20015 + }, + { + "epoch": 0.6134608311879367, + "grad_norm": 1.3536970496990925, + "learning_rate": 6.864927812457704e-06, + "loss": 0.5413, + "step": 20016 + }, + { + "epoch": 0.613491479710678, + "grad_norm": 1.8570396053951734, + "learning_rate": 6.8639852351444544e-06, + "loss": 0.7396, + "step": 20017 + }, + { + "epoch": 0.6135221282334191, + "grad_norm": 1.3561917688102643, + "learning_rate": 6.863042688731163e-06, + "loss": 0.6136, + "step": 20018 + }, + { + "epoch": 0.6135527767561604, + "grad_norm": 1.3833722563208974, + "learning_rate": 6.862100173227109e-06, + "loss": 0.7028, + "step": 20019 + }, + { + "epoch": 0.6135834252789015, + "grad_norm": 1.5129163874680882, + "learning_rate": 6.861157688641589e-06, + "loss": 0.6648, + "step": 20020 + }, + { + "epoch": 0.6136140738016428, + "grad_norm": 0.6650816784223753, + "learning_rate": 6.860215234983885e-06, + "loss": 0.5406, + "step": 20021 + }, + { + "epoch": 0.6136447223243839, + "grad_norm": 1.564588961286012, + "learning_rate": 6.8592728122632805e-06, + "loss": 0.7367, + "step": 20022 + }, + { + "epoch": 0.6136753708471252, + "grad_norm": 1.4771079491134234, + "learning_rate": 6.858330420489067e-06, + "loss": 0.7268, + "step": 20023 + }, + { + "epoch": 0.6137060193698664, + "grad_norm": 0.6943463778919071, + "learning_rate": 6.8573880596705254e-06, + "loss": 0.5782, + "step": 20024 + }, + { + "epoch": 0.6137366678926076, + "grad_norm": 1.707573567105043, + "learning_rate": 6.856445729816947e-06, + "loss": 0.6837, + "step": 20025 + }, + { + "epoch": 0.6137673164153488, + "grad_norm": 1.4598410528711374, + "learning_rate": 6.855503430937611e-06, + "loss": 0.6762, + "step": 20026 + }, + { + "epoch": 0.61379796493809, + "grad_norm": 1.4274759031344617, + "learning_rate": 6.854561163041803e-06, + "loss": 0.7314, + "step": 20027 + }, + { + "epoch": 0.6138286134608312, + "grad_norm": 1.3848605863211692, + "learning_rate": 6.853618926138809e-06, + "loss": 0.7553, + "step": 20028 + }, + { + "epoch": 0.6138592619835724, + "grad_norm": 1.5841399409245631, + "learning_rate": 6.852676720237919e-06, + "loss": 0.7008, + "step": 20029 + }, + { + "epoch": 0.6138899105063136, + "grad_norm": 1.4954765143769781, + "learning_rate": 6.851734545348401e-06, + "loss": 0.6615, + "step": 20030 + }, + { + "epoch": 0.6139205590290548, + "grad_norm": 1.6336204444314555, + "learning_rate": 6.850792401479556e-06, + "loss": 0.6751, + "step": 20031 + }, + { + "epoch": 0.613951207551796, + "grad_norm": 0.6879625509727845, + "learning_rate": 6.849850288640651e-06, + "loss": 0.5477, + "step": 20032 + }, + { + "epoch": 0.6139818560745373, + "grad_norm": 1.3219440306178354, + "learning_rate": 6.848908206840985e-06, + "loss": 0.6152, + "step": 20033 + }, + { + "epoch": 0.6140125045972784, + "grad_norm": 1.6414042695477604, + "learning_rate": 6.8479661560898295e-06, + "loss": 0.6719, + "step": 20034 + }, + { + "epoch": 0.6140431531200197, + "grad_norm": 1.4178177154760259, + "learning_rate": 6.847024136396468e-06, + "loss": 0.685, + "step": 20035 + }, + { + "epoch": 0.6140738016427608, + "grad_norm": 1.5148639374136037, + "learning_rate": 6.846082147770188e-06, + "loss": 0.5627, + "step": 20036 + }, + { + "epoch": 0.614104450165502, + "grad_norm": 1.5013482818622639, + "learning_rate": 6.845140190220266e-06, + "loss": 0.5867, + "step": 20037 + }, + { + "epoch": 0.6141350986882432, + "grad_norm": 1.5508154126501474, + "learning_rate": 6.8441982637559835e-06, + "loss": 0.7423, + "step": 20038 + }, + { + "epoch": 0.6141657472109844, + "grad_norm": 1.7080299164810098, + "learning_rate": 6.843256368386625e-06, + "loss": 0.7374, + "step": 20039 + }, + { + "epoch": 0.6141963957337256, + "grad_norm": 1.3837060734097726, + "learning_rate": 6.842314504121467e-06, + "loss": 0.5438, + "step": 20040 + }, + { + "epoch": 0.6142270442564668, + "grad_norm": 0.655449840704737, + "learning_rate": 6.8413726709697956e-06, + "loss": 0.5213, + "step": 20041 + }, + { + "epoch": 0.614257692779208, + "grad_norm": 1.678723836130477, + "learning_rate": 6.840430868940886e-06, + "loss": 0.67, + "step": 20042 + }, + { + "epoch": 0.6142883413019492, + "grad_norm": 1.500658973624241, + "learning_rate": 6.839489098044017e-06, + "loss": 0.7267, + "step": 20043 + }, + { + "epoch": 0.6143189898246905, + "grad_norm": 1.497672276703136, + "learning_rate": 6.838547358288474e-06, + "loss": 0.7638, + "step": 20044 + }, + { + "epoch": 0.6143496383474316, + "grad_norm": 1.4023251873642004, + "learning_rate": 6.837605649683532e-06, + "loss": 0.6147, + "step": 20045 + }, + { + "epoch": 0.6143802868701729, + "grad_norm": 1.3401295093756742, + "learning_rate": 6.836663972238469e-06, + "loss": 0.7159, + "step": 20046 + }, + { + "epoch": 0.614410935392914, + "grad_norm": 1.4377576285436924, + "learning_rate": 6.835722325962566e-06, + "loss": 0.5685, + "step": 20047 + }, + { + "epoch": 0.6144415839156553, + "grad_norm": 1.5473637740524508, + "learning_rate": 6.8347807108651034e-06, + "loss": 0.6299, + "step": 20048 + }, + { + "epoch": 0.6144722324383964, + "grad_norm": 1.7397804768173006, + "learning_rate": 6.833839126955349e-06, + "loss": 0.6641, + "step": 20049 + }, + { + "epoch": 0.6145028809611377, + "grad_norm": 0.6587585336703903, + "learning_rate": 6.832897574242596e-06, + "loss": 0.5249, + "step": 20050 + }, + { + "epoch": 0.6145335294838788, + "grad_norm": 1.534637099918774, + "learning_rate": 6.831956052736107e-06, + "loss": 0.6937, + "step": 20051 + }, + { + "epoch": 0.6145641780066201, + "grad_norm": 1.5990525128684316, + "learning_rate": 6.8310145624451704e-06, + "loss": 0.6985, + "step": 20052 + }, + { + "epoch": 0.6145948265293613, + "grad_norm": 1.5971947660789936, + "learning_rate": 6.830073103379057e-06, + "loss": 0.7494, + "step": 20053 + }, + { + "epoch": 0.6146254750521025, + "grad_norm": 0.6902751650719113, + "learning_rate": 6.829131675547041e-06, + "loss": 0.5612, + "step": 20054 + }, + { + "epoch": 0.6146561235748437, + "grad_norm": 1.5976847276810147, + "learning_rate": 6.8281902789584066e-06, + "loss": 0.6186, + "step": 20055 + }, + { + "epoch": 0.6146867720975849, + "grad_norm": 1.7234515362700056, + "learning_rate": 6.827248913622423e-06, + "loss": 0.6341, + "step": 20056 + }, + { + "epoch": 0.6147174206203261, + "grad_norm": 1.578410339914226, + "learning_rate": 6.8263075795483656e-06, + "loss": 0.7472, + "step": 20057 + }, + { + "epoch": 0.6147480691430673, + "grad_norm": 1.4933790703868028, + "learning_rate": 6.825366276745514e-06, + "loss": 0.6059, + "step": 20058 + }, + { + "epoch": 0.6147787176658085, + "grad_norm": 1.5513479450147558, + "learning_rate": 6.824425005223138e-06, + "loss": 0.6778, + "step": 20059 + }, + { + "epoch": 0.6148093661885498, + "grad_norm": 1.4973126508406611, + "learning_rate": 6.8234837649905194e-06, + "loss": 0.6099, + "step": 20060 + }, + { + "epoch": 0.6148400147112909, + "grad_norm": 1.5853522486229386, + "learning_rate": 6.822542556056928e-06, + "loss": 0.723, + "step": 20061 + }, + { + "epoch": 0.6148706632340322, + "grad_norm": 1.6243377091834688, + "learning_rate": 6.8216013784316325e-06, + "loss": 0.6405, + "step": 20062 + }, + { + "epoch": 0.6149013117567733, + "grad_norm": 1.6084907952150231, + "learning_rate": 6.820660232123917e-06, + "loss": 0.6632, + "step": 20063 + }, + { + "epoch": 0.6149319602795146, + "grad_norm": 1.4720843288426535, + "learning_rate": 6.8197191171430485e-06, + "loss": 0.6809, + "step": 20064 + }, + { + "epoch": 0.6149626088022557, + "grad_norm": 1.3764864418193106, + "learning_rate": 6.8187780334982986e-06, + "loss": 0.5555, + "step": 20065 + }, + { + "epoch": 0.614993257324997, + "grad_norm": 1.6550921966475673, + "learning_rate": 6.817836981198944e-06, + "loss": 0.7408, + "step": 20066 + }, + { + "epoch": 0.6150239058477381, + "grad_norm": 1.505377799889338, + "learning_rate": 6.816895960254257e-06, + "loss": 0.7124, + "step": 20067 + }, + { + "epoch": 0.6150545543704793, + "grad_norm": 1.7141192743417883, + "learning_rate": 6.815954970673508e-06, + "loss": 0.6938, + "step": 20068 + }, + { + "epoch": 0.6150852028932206, + "grad_norm": 1.546809228597742, + "learning_rate": 6.815014012465969e-06, + "loss": 0.6405, + "step": 20069 + }, + { + "epoch": 0.6151158514159617, + "grad_norm": 1.4592940824074172, + "learning_rate": 6.814073085640911e-06, + "loss": 0.5874, + "step": 20070 + }, + { + "epoch": 0.615146499938703, + "grad_norm": 1.4102283409162544, + "learning_rate": 6.813132190207608e-06, + "loss": 0.6117, + "step": 20071 + }, + { + "epoch": 0.6151771484614441, + "grad_norm": 1.6014908758099862, + "learning_rate": 6.812191326175331e-06, + "loss": 0.7235, + "step": 20072 + }, + { + "epoch": 0.6152077969841854, + "grad_norm": 1.5128913371086175, + "learning_rate": 6.8112504935533406e-06, + "loss": 0.7052, + "step": 20073 + }, + { + "epoch": 0.6152384455069265, + "grad_norm": 1.767974773568382, + "learning_rate": 6.810309692350923e-06, + "loss": 0.7287, + "step": 20074 + }, + { + "epoch": 0.6152690940296678, + "grad_norm": 1.4798333055979376, + "learning_rate": 6.809368922577338e-06, + "loss": 0.6702, + "step": 20075 + }, + { + "epoch": 0.6152997425524089, + "grad_norm": 1.7163216069523453, + "learning_rate": 6.808428184241853e-06, + "loss": 0.7496, + "step": 20076 + }, + { + "epoch": 0.6153303910751502, + "grad_norm": 1.3544684737779833, + "learning_rate": 6.807487477353747e-06, + "loss": 0.5968, + "step": 20077 + }, + { + "epoch": 0.6153610395978913, + "grad_norm": 0.6785262855276243, + "learning_rate": 6.806546801922281e-06, + "loss": 0.5455, + "step": 20078 + }, + { + "epoch": 0.6153916881206326, + "grad_norm": 1.4623198634331969, + "learning_rate": 6.805606157956727e-06, + "loss": 0.7369, + "step": 20079 + }, + { + "epoch": 0.6154223366433738, + "grad_norm": 1.6132706082673054, + "learning_rate": 6.8046655454663536e-06, + "loss": 0.7093, + "step": 20080 + }, + { + "epoch": 0.615452985166115, + "grad_norm": 1.455471273782701, + "learning_rate": 6.803724964460425e-06, + "loss": 0.6906, + "step": 20081 + }, + { + "epoch": 0.6154836336888562, + "grad_norm": 1.5025333184345262, + "learning_rate": 6.802784414948216e-06, + "loss": 0.7086, + "step": 20082 + }, + { + "epoch": 0.6155142822115974, + "grad_norm": 1.599915732756521, + "learning_rate": 6.801843896938991e-06, + "loss": 0.7679, + "step": 20083 + }, + { + "epoch": 0.6155449307343386, + "grad_norm": 0.6547426283682947, + "learning_rate": 6.800903410442011e-06, + "loss": 0.5634, + "step": 20084 + }, + { + "epoch": 0.6155755792570798, + "grad_norm": 1.573460694170909, + "learning_rate": 6.799962955466555e-06, + "loss": 0.6241, + "step": 20085 + }, + { + "epoch": 0.615606227779821, + "grad_norm": 1.6381801148236446, + "learning_rate": 6.799022532021878e-06, + "loss": 0.6434, + "step": 20086 + }, + { + "epoch": 0.6156368763025623, + "grad_norm": 1.475415923720358, + "learning_rate": 6.7980821401172524e-06, + "loss": 0.6447, + "step": 20087 + }, + { + "epoch": 0.6156675248253034, + "grad_norm": 1.755952666374995, + "learning_rate": 6.797141779761942e-06, + "loss": 0.7215, + "step": 20088 + }, + { + "epoch": 0.6156981733480447, + "grad_norm": 0.6665255014164116, + "learning_rate": 6.796201450965213e-06, + "loss": 0.5262, + "step": 20089 + }, + { + "epoch": 0.6157288218707858, + "grad_norm": 1.7233553791294591, + "learning_rate": 6.7952611537363325e-06, + "loss": 0.769, + "step": 20090 + }, + { + "epoch": 0.6157594703935271, + "grad_norm": 1.516854571121119, + "learning_rate": 6.7943208880845625e-06, + "loss": 0.6281, + "step": 20091 + }, + { + "epoch": 0.6157901189162682, + "grad_norm": 1.5684692197335113, + "learning_rate": 6.793380654019168e-06, + "loss": 0.6526, + "step": 20092 + }, + { + "epoch": 0.6158207674390095, + "grad_norm": 1.5555180312656216, + "learning_rate": 6.792440451549418e-06, + "loss": 0.6544, + "step": 20093 + }, + { + "epoch": 0.6158514159617506, + "grad_norm": 0.6527372879811435, + "learning_rate": 6.791500280684572e-06, + "loss": 0.5648, + "step": 20094 + }, + { + "epoch": 0.6158820644844919, + "grad_norm": 1.5684508995552606, + "learning_rate": 6.790560141433892e-06, + "loss": 0.7052, + "step": 20095 + }, + { + "epoch": 0.615912713007233, + "grad_norm": 0.6684800652965804, + "learning_rate": 6.789620033806645e-06, + "loss": 0.5674, + "step": 20096 + }, + { + "epoch": 0.6159433615299743, + "grad_norm": 1.6040541007876699, + "learning_rate": 6.788679957812092e-06, + "loss": 0.6783, + "step": 20097 + }, + { + "epoch": 0.6159740100527155, + "grad_norm": 1.3968515534890862, + "learning_rate": 6.7877399134595e-06, + "loss": 0.7213, + "step": 20098 + }, + { + "epoch": 0.6160046585754566, + "grad_norm": 1.5687439058626602, + "learning_rate": 6.7867999007581276e-06, + "loss": 0.5999, + "step": 20099 + }, + { + "epoch": 0.6160353070981979, + "grad_norm": 1.5974449209270747, + "learning_rate": 6.785859919717237e-06, + "loss": 0.7817, + "step": 20100 + }, + { + "epoch": 0.616065955620939, + "grad_norm": 1.5734735628776315, + "learning_rate": 6.784919970346091e-06, + "loss": 0.5468, + "step": 20101 + }, + { + "epoch": 0.6160966041436803, + "grad_norm": 1.5618895543955158, + "learning_rate": 6.783980052653954e-06, + "loss": 0.7598, + "step": 20102 + }, + { + "epoch": 0.6161272526664214, + "grad_norm": 1.4020817712045994, + "learning_rate": 6.783040166650079e-06, + "loss": 0.6277, + "step": 20103 + }, + { + "epoch": 0.6161579011891627, + "grad_norm": 1.347228652751559, + "learning_rate": 6.782100312343738e-06, + "loss": 0.6128, + "step": 20104 + }, + { + "epoch": 0.6161885497119038, + "grad_norm": 1.4266951986717646, + "learning_rate": 6.78116048974418e-06, + "loss": 0.6776, + "step": 20105 + }, + { + "epoch": 0.6162191982346451, + "grad_norm": 1.6865282933246777, + "learning_rate": 6.780220698860678e-06, + "loss": 0.6846, + "step": 20106 + }, + { + "epoch": 0.6162498467573863, + "grad_norm": 1.553169190883153, + "learning_rate": 6.779280939702482e-06, + "loss": 0.6317, + "step": 20107 + }, + { + "epoch": 0.6162804952801275, + "grad_norm": 1.4854535508055404, + "learning_rate": 6.7783412122788525e-06, + "loss": 0.6329, + "step": 20108 + }, + { + "epoch": 0.6163111438028687, + "grad_norm": 0.6690978365722924, + "learning_rate": 6.777401516599054e-06, + "loss": 0.5855, + "step": 20109 + }, + { + "epoch": 0.6163417923256099, + "grad_norm": 1.6459449464599478, + "learning_rate": 6.776461852672344e-06, + "loss": 0.7549, + "step": 20110 + }, + { + "epoch": 0.6163724408483511, + "grad_norm": 1.36607091255858, + "learning_rate": 6.775522220507977e-06, + "loss": 0.6552, + "step": 20111 + }, + { + "epoch": 0.6164030893710923, + "grad_norm": 0.6921990931850521, + "learning_rate": 6.774582620115216e-06, + "loss": 0.5582, + "step": 20112 + }, + { + "epoch": 0.6164337378938335, + "grad_norm": 1.4420545641789506, + "learning_rate": 6.7736430515033165e-06, + "loss": 0.6805, + "step": 20113 + }, + { + "epoch": 0.6164643864165747, + "grad_norm": 1.4620595507860052, + "learning_rate": 6.77270351468154e-06, + "loss": 0.5635, + "step": 20114 + }, + { + "epoch": 0.6164950349393159, + "grad_norm": 1.7020590576143393, + "learning_rate": 6.771764009659143e-06, + "loss": 0.7187, + "step": 20115 + }, + { + "epoch": 0.6165256834620572, + "grad_norm": 1.5236257708611136, + "learning_rate": 6.770824536445375e-06, + "loss": 0.7427, + "step": 20116 + }, + { + "epoch": 0.6165563319847983, + "grad_norm": 1.5280716095149476, + "learning_rate": 6.7698850950495065e-06, + "loss": 0.7083, + "step": 20117 + }, + { + "epoch": 0.6165869805075396, + "grad_norm": 1.5355862213956892, + "learning_rate": 6.768945685480784e-06, + "loss": 0.7609, + "step": 20118 + }, + { + "epoch": 0.6166176290302807, + "grad_norm": 1.525408953833142, + "learning_rate": 6.768006307748462e-06, + "loss": 0.7055, + "step": 20119 + }, + { + "epoch": 0.616648277553022, + "grad_norm": 1.3328884740835563, + "learning_rate": 6.767066961861806e-06, + "loss": 0.6527, + "step": 20120 + }, + { + "epoch": 0.6166789260757631, + "grad_norm": 1.6603901754262633, + "learning_rate": 6.766127647830064e-06, + "loss": 0.7418, + "step": 20121 + }, + { + "epoch": 0.6167095745985044, + "grad_norm": 1.5647232884263262, + "learning_rate": 6.7651883656624925e-06, + "loss": 0.5678, + "step": 20122 + }, + { + "epoch": 0.6167402231212455, + "grad_norm": 1.6469928057556487, + "learning_rate": 6.76424911536835e-06, + "loss": 0.7202, + "step": 20123 + }, + { + "epoch": 0.6167708716439868, + "grad_norm": 0.669875934841697, + "learning_rate": 6.763309896956887e-06, + "loss": 0.5449, + "step": 20124 + }, + { + "epoch": 0.616801520166728, + "grad_norm": 1.626305083710048, + "learning_rate": 6.76237071043736e-06, + "loss": 0.6117, + "step": 20125 + }, + { + "epoch": 0.6168321686894692, + "grad_norm": 1.5622560484812338, + "learning_rate": 6.761431555819027e-06, + "loss": 0.6679, + "step": 20126 + }, + { + "epoch": 0.6168628172122104, + "grad_norm": 1.7256253286943308, + "learning_rate": 6.760492433111131e-06, + "loss": 0.7567, + "step": 20127 + }, + { + "epoch": 0.6168934657349516, + "grad_norm": 1.5973924052288706, + "learning_rate": 6.759553342322937e-06, + "loss": 0.7736, + "step": 20128 + }, + { + "epoch": 0.6169241142576928, + "grad_norm": 1.6833835827510057, + "learning_rate": 6.758614283463692e-06, + "loss": 0.6715, + "step": 20129 + }, + { + "epoch": 0.6169547627804339, + "grad_norm": 1.4502647619771485, + "learning_rate": 6.757675256542649e-06, + "loss": 0.6819, + "step": 20130 + }, + { + "epoch": 0.6169854113031752, + "grad_norm": 1.596606205163505, + "learning_rate": 6.7567362615690615e-06, + "loss": 0.6546, + "step": 20131 + }, + { + "epoch": 0.6170160598259163, + "grad_norm": 1.78431842655337, + "learning_rate": 6.755797298552179e-06, + "loss": 0.6657, + "step": 20132 + }, + { + "epoch": 0.6170467083486576, + "grad_norm": 1.4717264839947024, + "learning_rate": 6.754858367501258e-06, + "loss": 0.6312, + "step": 20133 + }, + { + "epoch": 0.6170773568713988, + "grad_norm": 0.6716946698066197, + "learning_rate": 6.753919468425549e-06, + "loss": 0.5838, + "step": 20134 + }, + { + "epoch": 0.61710800539414, + "grad_norm": 1.3548665645260203, + "learning_rate": 6.752980601334299e-06, + "loss": 0.643, + "step": 20135 + }, + { + "epoch": 0.6171386539168812, + "grad_norm": 1.5141821484978861, + "learning_rate": 6.752041766236764e-06, + "loss": 0.7093, + "step": 20136 + }, + { + "epoch": 0.6171693024396224, + "grad_norm": 1.4857767618159505, + "learning_rate": 6.751102963142195e-06, + "loss": 0.7162, + "step": 20137 + }, + { + "epoch": 0.6171999509623636, + "grad_norm": 1.6041926125532378, + "learning_rate": 6.750164192059836e-06, + "loss": 0.6058, + "step": 20138 + }, + { + "epoch": 0.6172305994851048, + "grad_norm": 1.387280074268519, + "learning_rate": 6.749225452998942e-06, + "loss": 0.59, + "step": 20139 + }, + { + "epoch": 0.617261248007846, + "grad_norm": 0.6699926932363898, + "learning_rate": 6.748286745968759e-06, + "loss": 0.5709, + "step": 20140 + }, + { + "epoch": 0.6172918965305872, + "grad_norm": 1.6000429055787357, + "learning_rate": 6.7473480709785414e-06, + "loss": 0.6519, + "step": 20141 + }, + { + "epoch": 0.6173225450533284, + "grad_norm": 1.9623722306975582, + "learning_rate": 6.746409428037536e-06, + "loss": 0.6564, + "step": 20142 + }, + { + "epoch": 0.6173531935760697, + "grad_norm": 1.5328401282273947, + "learning_rate": 6.745470817154989e-06, + "loss": 0.7608, + "step": 20143 + }, + { + "epoch": 0.6173838420988108, + "grad_norm": 1.6788001941190358, + "learning_rate": 6.744532238340151e-06, + "loss": 0.618, + "step": 20144 + }, + { + "epoch": 0.6174144906215521, + "grad_norm": 1.5725778925108338, + "learning_rate": 6.743593691602273e-06, + "loss": 0.6585, + "step": 20145 + }, + { + "epoch": 0.6174451391442932, + "grad_norm": 1.4840269884122703, + "learning_rate": 6.742655176950594e-06, + "loss": 0.6413, + "step": 20146 + }, + { + "epoch": 0.6174757876670345, + "grad_norm": 1.6761801957316955, + "learning_rate": 6.741716694394371e-06, + "loss": 0.7493, + "step": 20147 + }, + { + "epoch": 0.6175064361897756, + "grad_norm": 1.7453778201186056, + "learning_rate": 6.7407782439428475e-06, + "loss": 0.6331, + "step": 20148 + }, + { + "epoch": 0.6175370847125169, + "grad_norm": 1.50322467216451, + "learning_rate": 6.739839825605266e-06, + "loss": 0.6302, + "step": 20149 + }, + { + "epoch": 0.617567733235258, + "grad_norm": 1.5133118100413032, + "learning_rate": 6.73890143939088e-06, + "loss": 0.6605, + "step": 20150 + }, + { + "epoch": 0.6175983817579993, + "grad_norm": 1.6070414548741578, + "learning_rate": 6.73796308530893e-06, + "loss": 0.7188, + "step": 20151 + }, + { + "epoch": 0.6176290302807405, + "grad_norm": 1.6450007744625046, + "learning_rate": 6.737024763368667e-06, + "loss": 0.7276, + "step": 20152 + }, + { + "epoch": 0.6176596788034817, + "grad_norm": 1.4960832226171343, + "learning_rate": 6.736086473579333e-06, + "loss": 0.6414, + "step": 20153 + }, + { + "epoch": 0.6176903273262229, + "grad_norm": 1.5108710013808675, + "learning_rate": 6.735148215950174e-06, + "loss": 0.6945, + "step": 20154 + }, + { + "epoch": 0.6177209758489641, + "grad_norm": 1.5350706160662246, + "learning_rate": 6.7342099904904345e-06, + "loss": 0.6268, + "step": 20155 + }, + { + "epoch": 0.6177516243717053, + "grad_norm": 1.9578170487783084, + "learning_rate": 6.733271797209362e-06, + "loss": 0.7421, + "step": 20156 + }, + { + "epoch": 0.6177822728944465, + "grad_norm": 1.5056948722473453, + "learning_rate": 6.732333636116193e-06, + "loss": 0.7376, + "step": 20157 + }, + { + "epoch": 0.6178129214171877, + "grad_norm": 1.3414531409168513, + "learning_rate": 6.731395507220183e-06, + "loss": 0.5524, + "step": 20158 + }, + { + "epoch": 0.617843569939929, + "grad_norm": 1.771733775447582, + "learning_rate": 6.730457410530563e-06, + "loss": 0.6259, + "step": 20159 + }, + { + "epoch": 0.6178742184626701, + "grad_norm": 1.5731942820000147, + "learning_rate": 6.729519346056589e-06, + "loss": 0.7746, + "step": 20160 + }, + { + "epoch": 0.6179048669854112, + "grad_norm": 1.5977604482688372, + "learning_rate": 6.728581313807495e-06, + "loss": 0.6753, + "step": 20161 + }, + { + "epoch": 0.6179355155081525, + "grad_norm": 1.4867018017745834, + "learning_rate": 6.727643313792524e-06, + "loss": 0.723, + "step": 20162 + }, + { + "epoch": 0.6179661640308937, + "grad_norm": 1.5579786863191019, + "learning_rate": 6.726705346020924e-06, + "loss": 0.6274, + "step": 20163 + }, + { + "epoch": 0.6179968125536349, + "grad_norm": 1.5687715898017975, + "learning_rate": 6.725767410501933e-06, + "loss": 0.7355, + "step": 20164 + }, + { + "epoch": 0.6180274610763761, + "grad_norm": 1.4442866842870246, + "learning_rate": 6.7248295072447925e-06, + "loss": 0.7145, + "step": 20165 + }, + { + "epoch": 0.6180581095991173, + "grad_norm": 1.6703621080373767, + "learning_rate": 6.7238916362587455e-06, + "loss": 0.6277, + "step": 20166 + }, + { + "epoch": 0.6180887581218585, + "grad_norm": 1.7449149049307806, + "learning_rate": 6.722953797553031e-06, + "loss": 0.7061, + "step": 20167 + }, + { + "epoch": 0.6181194066445997, + "grad_norm": 1.5555726843874764, + "learning_rate": 6.722015991136892e-06, + "loss": 0.7154, + "step": 20168 + }, + { + "epoch": 0.6181500551673409, + "grad_norm": 0.7170205943502834, + "learning_rate": 6.721078217019572e-06, + "loss": 0.5452, + "step": 20169 + }, + { + "epoch": 0.6181807036900822, + "grad_norm": 1.3755296388655534, + "learning_rate": 6.7201404752102994e-06, + "loss": 0.5977, + "step": 20170 + }, + { + "epoch": 0.6182113522128233, + "grad_norm": 1.397589729284568, + "learning_rate": 6.71920276571833e-06, + "loss": 0.6048, + "step": 20171 + }, + { + "epoch": 0.6182420007355646, + "grad_norm": 1.587271096825645, + "learning_rate": 6.718265088552892e-06, + "loss": 0.7438, + "step": 20172 + }, + { + "epoch": 0.6182726492583057, + "grad_norm": 1.710337874508297, + "learning_rate": 6.717327443723226e-06, + "loss": 0.7102, + "step": 20173 + }, + { + "epoch": 0.618303297781047, + "grad_norm": 1.404274376251755, + "learning_rate": 6.716389831238574e-06, + "loss": 0.6344, + "step": 20174 + }, + { + "epoch": 0.6183339463037881, + "grad_norm": 1.3600665711773032, + "learning_rate": 6.715452251108175e-06, + "loss": 0.7544, + "step": 20175 + }, + { + "epoch": 0.6183645948265294, + "grad_norm": 1.5575781309073842, + "learning_rate": 6.7145147033412614e-06, + "loss": 0.6203, + "step": 20176 + }, + { + "epoch": 0.6183952433492705, + "grad_norm": 1.346913513758687, + "learning_rate": 6.713577187947078e-06, + "loss": 0.673, + "step": 20177 + }, + { + "epoch": 0.6184258918720118, + "grad_norm": 1.6747472946278426, + "learning_rate": 6.712639704934856e-06, + "loss": 0.6058, + "step": 20178 + }, + { + "epoch": 0.618456540394753, + "grad_norm": 1.620283445054918, + "learning_rate": 6.711702254313839e-06, + "loss": 0.7335, + "step": 20179 + }, + { + "epoch": 0.6184871889174942, + "grad_norm": 0.6608238735001218, + "learning_rate": 6.710764836093264e-06, + "loss": 0.5541, + "step": 20180 + }, + { + "epoch": 0.6185178374402354, + "grad_norm": 1.522504176769043, + "learning_rate": 6.7098274502823575e-06, + "loss": 0.7293, + "step": 20181 + }, + { + "epoch": 0.6185484859629766, + "grad_norm": 1.3659173398857603, + "learning_rate": 6.70889009689037e-06, + "loss": 0.6579, + "step": 20182 + }, + { + "epoch": 0.6185791344857178, + "grad_norm": 0.6810353644249932, + "learning_rate": 6.707952775926527e-06, + "loss": 0.5623, + "step": 20183 + }, + { + "epoch": 0.618609783008459, + "grad_norm": 1.7381812514431232, + "learning_rate": 6.707015487400066e-06, + "loss": 0.6672, + "step": 20184 + }, + { + "epoch": 0.6186404315312002, + "grad_norm": 1.7214468279948334, + "learning_rate": 6.706078231320226e-06, + "loss": 0.5856, + "step": 20185 + }, + { + "epoch": 0.6186710800539414, + "grad_norm": 1.4592913023640015, + "learning_rate": 6.705141007696239e-06, + "loss": 0.6581, + "step": 20186 + }, + { + "epoch": 0.6187017285766826, + "grad_norm": 1.577291947860212, + "learning_rate": 6.704203816537342e-06, + "loss": 0.631, + "step": 20187 + }, + { + "epoch": 0.6187323770994239, + "grad_norm": 1.6491110535022209, + "learning_rate": 6.7032666578527685e-06, + "loss": 0.6059, + "step": 20188 + }, + { + "epoch": 0.618763025622165, + "grad_norm": 1.8353981625345799, + "learning_rate": 6.702329531651749e-06, + "loss": 0.6578, + "step": 20189 + }, + { + "epoch": 0.6187936741449063, + "grad_norm": 1.663297058669749, + "learning_rate": 6.7013924379435256e-06, + "loss": 0.6837, + "step": 20190 + }, + { + "epoch": 0.6188243226676474, + "grad_norm": 1.416722856965891, + "learning_rate": 6.700455376737324e-06, + "loss": 0.6608, + "step": 20191 + }, + { + "epoch": 0.6188549711903886, + "grad_norm": 1.5310131919223255, + "learning_rate": 6.699518348042378e-06, + "loss": 0.6693, + "step": 20192 + }, + { + "epoch": 0.6188856197131298, + "grad_norm": 1.7106226798304802, + "learning_rate": 6.698581351867924e-06, + "loss": 0.7288, + "step": 20193 + }, + { + "epoch": 0.618916268235871, + "grad_norm": 0.6586111883971044, + "learning_rate": 6.69764438822319e-06, + "loss": 0.5385, + "step": 20194 + }, + { + "epoch": 0.6189469167586122, + "grad_norm": 1.4934041288107887, + "learning_rate": 6.696707457117413e-06, + "loss": 0.7316, + "step": 20195 + }, + { + "epoch": 0.6189775652813534, + "grad_norm": 1.5672036136749044, + "learning_rate": 6.695770558559823e-06, + "loss": 0.7433, + "step": 20196 + }, + { + "epoch": 0.6190082138040947, + "grad_norm": 1.3735024150950759, + "learning_rate": 6.694833692559649e-06, + "loss": 0.6295, + "step": 20197 + }, + { + "epoch": 0.6190388623268358, + "grad_norm": 1.6634046050503883, + "learning_rate": 6.693896859126127e-06, + "loss": 0.6708, + "step": 20198 + }, + { + "epoch": 0.6190695108495771, + "grad_norm": 0.670686277994272, + "learning_rate": 6.6929600582684864e-06, + "loss": 0.5756, + "step": 20199 + }, + { + "epoch": 0.6191001593723182, + "grad_norm": 1.6893053840822847, + "learning_rate": 6.69202328999595e-06, + "loss": 0.6823, + "step": 20200 + }, + { + "epoch": 0.6191308078950595, + "grad_norm": 1.520960858962224, + "learning_rate": 6.6910865543177605e-06, + "loss": 0.6233, + "step": 20201 + }, + { + "epoch": 0.6191614564178006, + "grad_norm": 1.5261108918632458, + "learning_rate": 6.690149851243142e-06, + "loss": 0.6897, + "step": 20202 + }, + { + "epoch": 0.6191921049405419, + "grad_norm": 0.660414248940497, + "learning_rate": 6.6892131807813195e-06, + "loss": 0.5446, + "step": 20203 + }, + { + "epoch": 0.619222753463283, + "grad_norm": 0.6750604885140457, + "learning_rate": 6.6882765429415294e-06, + "loss": 0.5591, + "step": 20204 + }, + { + "epoch": 0.6192534019860243, + "grad_norm": 1.6606670516124773, + "learning_rate": 6.687339937732995e-06, + "loss": 0.7599, + "step": 20205 + }, + { + "epoch": 0.6192840505087654, + "grad_norm": 1.5961542430784728, + "learning_rate": 6.686403365164951e-06, + "loss": 0.7538, + "step": 20206 + }, + { + "epoch": 0.6193146990315067, + "grad_norm": 1.272799539686569, + "learning_rate": 6.685466825246623e-06, + "loss": 0.5995, + "step": 20207 + }, + { + "epoch": 0.6193453475542479, + "grad_norm": 0.6708360054548828, + "learning_rate": 6.6845303179872346e-06, + "loss": 0.5378, + "step": 20208 + }, + { + "epoch": 0.6193759960769891, + "grad_norm": 1.5226975554483237, + "learning_rate": 6.683593843396021e-06, + "loss": 0.7316, + "step": 20209 + }, + { + "epoch": 0.6194066445997303, + "grad_norm": 1.7918503911356172, + "learning_rate": 6.682657401482207e-06, + "loss": 0.6993, + "step": 20210 + }, + { + "epoch": 0.6194372931224715, + "grad_norm": 1.3730779538241602, + "learning_rate": 6.681720992255012e-06, + "loss": 0.6074, + "step": 20211 + }, + { + "epoch": 0.6194679416452127, + "grad_norm": 1.5541013841628524, + "learning_rate": 6.6807846157236756e-06, + "loss": 0.5556, + "step": 20212 + }, + { + "epoch": 0.6194985901679539, + "grad_norm": 0.6942588833912849, + "learning_rate": 6.679848271897411e-06, + "loss": 0.5436, + "step": 20213 + }, + { + "epoch": 0.6195292386906951, + "grad_norm": 1.3265003843105345, + "learning_rate": 6.678911960785458e-06, + "loss": 0.6876, + "step": 20214 + }, + { + "epoch": 0.6195598872134364, + "grad_norm": 1.5121007568108573, + "learning_rate": 6.677975682397033e-06, + "loss": 0.8159, + "step": 20215 + }, + { + "epoch": 0.6195905357361775, + "grad_norm": 1.5993659407489027, + "learning_rate": 6.677039436741361e-06, + "loss": 0.6785, + "step": 20216 + }, + { + "epoch": 0.6196211842589188, + "grad_norm": 1.8103808321765849, + "learning_rate": 6.676103223827671e-06, + "loss": 0.6883, + "step": 20217 + }, + { + "epoch": 0.6196518327816599, + "grad_norm": 1.5329239399248022, + "learning_rate": 6.675167043665187e-06, + "loss": 0.7036, + "step": 20218 + }, + { + "epoch": 0.6196824813044012, + "grad_norm": 1.573643382231649, + "learning_rate": 6.674230896263132e-06, + "loss": 0.6589, + "step": 20219 + }, + { + "epoch": 0.6197131298271423, + "grad_norm": 1.6040694816744114, + "learning_rate": 6.673294781630732e-06, + "loss": 0.6234, + "step": 20220 + }, + { + "epoch": 0.6197437783498836, + "grad_norm": 1.7491151493863284, + "learning_rate": 6.672358699777207e-06, + "loss": 0.7592, + "step": 20221 + }, + { + "epoch": 0.6197744268726247, + "grad_norm": 1.6018059921083803, + "learning_rate": 6.6714226507117855e-06, + "loss": 0.7526, + "step": 20222 + }, + { + "epoch": 0.6198050753953659, + "grad_norm": 1.6402035243747615, + "learning_rate": 6.6704866344436915e-06, + "loss": 0.6656, + "step": 20223 + }, + { + "epoch": 0.6198357239181072, + "grad_norm": 1.3272422621485391, + "learning_rate": 6.669550650982137e-06, + "loss": 0.5637, + "step": 20224 + }, + { + "epoch": 0.6198663724408483, + "grad_norm": 1.5474004847519653, + "learning_rate": 6.668614700336359e-06, + "loss": 0.7322, + "step": 20225 + }, + { + "epoch": 0.6198970209635896, + "grad_norm": 1.5704365556754505, + "learning_rate": 6.6676787825155695e-06, + "loss": 0.5565, + "step": 20226 + }, + { + "epoch": 0.6199276694863307, + "grad_norm": 1.7606805885572652, + "learning_rate": 6.6667428975289925e-06, + "loss": 0.6418, + "step": 20227 + }, + { + "epoch": 0.619958318009072, + "grad_norm": 1.4374309270270649, + "learning_rate": 6.665807045385853e-06, + "loss": 0.6633, + "step": 20228 + }, + { + "epoch": 0.6199889665318131, + "grad_norm": 1.6540736344789306, + "learning_rate": 6.66487122609537e-06, + "loss": 0.7438, + "step": 20229 + }, + { + "epoch": 0.6200196150545544, + "grad_norm": 1.5018976924476912, + "learning_rate": 6.663935439666761e-06, + "loss": 0.6544, + "step": 20230 + }, + { + "epoch": 0.6200502635772955, + "grad_norm": 1.6007504847269234, + "learning_rate": 6.662999686109252e-06, + "loss": 0.7305, + "step": 20231 + }, + { + "epoch": 0.6200809121000368, + "grad_norm": 1.5224182725480675, + "learning_rate": 6.662063965432059e-06, + "loss": 0.7051, + "step": 20232 + }, + { + "epoch": 0.620111560622778, + "grad_norm": 1.7947179707641707, + "learning_rate": 6.661128277644406e-06, + "loss": 0.7755, + "step": 20233 + }, + { + "epoch": 0.6201422091455192, + "grad_norm": 1.7561490717458221, + "learning_rate": 6.660192622755513e-06, + "loss": 0.6755, + "step": 20234 + }, + { + "epoch": 0.6201728576682604, + "grad_norm": 1.4765489738443969, + "learning_rate": 6.65925700077459e-06, + "loss": 0.6649, + "step": 20235 + }, + { + "epoch": 0.6202035061910016, + "grad_norm": 0.6890405940963228, + "learning_rate": 6.658321411710868e-06, + "loss": 0.5535, + "step": 20236 + }, + { + "epoch": 0.6202341547137428, + "grad_norm": 1.798175631641451, + "learning_rate": 6.657385855573558e-06, + "loss": 0.7355, + "step": 20237 + }, + { + "epoch": 0.620264803236484, + "grad_norm": 1.6119382524815726, + "learning_rate": 6.65645033237188e-06, + "loss": 0.6489, + "step": 20238 + }, + { + "epoch": 0.6202954517592252, + "grad_norm": 1.5895236464963298, + "learning_rate": 6.655514842115052e-06, + "loss": 0.673, + "step": 20239 + }, + { + "epoch": 0.6203261002819664, + "grad_norm": 1.361384179165321, + "learning_rate": 6.654579384812292e-06, + "loss": 0.5996, + "step": 20240 + }, + { + "epoch": 0.6203567488047076, + "grad_norm": 1.536289846675398, + "learning_rate": 6.6536439604728175e-06, + "loss": 0.7945, + "step": 20241 + }, + { + "epoch": 0.6203873973274489, + "grad_norm": 1.5962794689737954, + "learning_rate": 6.652708569105849e-06, + "loss": 0.7522, + "step": 20242 + }, + { + "epoch": 0.62041804585019, + "grad_norm": 0.659040625077863, + "learning_rate": 6.651773210720593e-06, + "loss": 0.5432, + "step": 20243 + }, + { + "epoch": 0.6204486943729313, + "grad_norm": 1.472749766248122, + "learning_rate": 6.650837885326278e-06, + "loss": 0.6566, + "step": 20244 + }, + { + "epoch": 0.6204793428956724, + "grad_norm": 1.6685115008498528, + "learning_rate": 6.649902592932111e-06, + "loss": 0.612, + "step": 20245 + }, + { + "epoch": 0.6205099914184137, + "grad_norm": 0.6707884693338827, + "learning_rate": 6.648967333547311e-06, + "loss": 0.5594, + "step": 20246 + }, + { + "epoch": 0.6205406399411548, + "grad_norm": 1.379178796699741, + "learning_rate": 6.648032107181095e-06, + "loss": 0.7196, + "step": 20247 + }, + { + "epoch": 0.6205712884638961, + "grad_norm": 1.2535265848404402, + "learning_rate": 6.6470969138426745e-06, + "loss": 0.624, + "step": 20248 + }, + { + "epoch": 0.6206019369866372, + "grad_norm": 1.4808203226682726, + "learning_rate": 6.6461617535412656e-06, + "loss": 0.6742, + "step": 20249 + }, + { + "epoch": 0.6206325855093785, + "grad_norm": 1.5444412597077872, + "learning_rate": 6.6452266262860855e-06, + "loss": 0.5921, + "step": 20250 + }, + { + "epoch": 0.6206632340321196, + "grad_norm": 1.4246488604283074, + "learning_rate": 6.6442915320863426e-06, + "loss": 0.6875, + "step": 20251 + }, + { + "epoch": 0.6206938825548609, + "grad_norm": 1.592750376907717, + "learning_rate": 6.643356470951256e-06, + "loss": 0.7463, + "step": 20252 + }, + { + "epoch": 0.6207245310776021, + "grad_norm": 1.566644725807803, + "learning_rate": 6.642421442890039e-06, + "loss": 0.8121, + "step": 20253 + }, + { + "epoch": 0.6207551796003432, + "grad_norm": 0.6611985428128073, + "learning_rate": 6.641486447911896e-06, + "loss": 0.5445, + "step": 20254 + }, + { + "epoch": 0.6207858281230845, + "grad_norm": 0.6538449028839386, + "learning_rate": 6.640551486026053e-06, + "loss": 0.5622, + "step": 20255 + }, + { + "epoch": 0.6208164766458256, + "grad_norm": 1.4975679165910156, + "learning_rate": 6.639616557241715e-06, + "loss": 0.7177, + "step": 20256 + }, + { + "epoch": 0.6208471251685669, + "grad_norm": 0.647730422521044, + "learning_rate": 6.6386816615680905e-06, + "loss": 0.5292, + "step": 20257 + }, + { + "epoch": 0.620877773691308, + "grad_norm": 1.4756054588182066, + "learning_rate": 6.6377467990144e-06, + "loss": 0.6239, + "step": 20258 + }, + { + "epoch": 0.6209084222140493, + "grad_norm": 1.3074559645024497, + "learning_rate": 6.636811969589847e-06, + "loss": 0.5851, + "step": 20259 + }, + { + "epoch": 0.6209390707367904, + "grad_norm": 1.8332026353355617, + "learning_rate": 6.635877173303647e-06, + "loss": 0.7711, + "step": 20260 + }, + { + "epoch": 0.6209697192595317, + "grad_norm": 1.6941315975624485, + "learning_rate": 6.634942410165013e-06, + "loss": 0.6887, + "step": 20261 + }, + { + "epoch": 0.6210003677822729, + "grad_norm": 1.5066273684331395, + "learning_rate": 6.634007680183147e-06, + "loss": 0.6689, + "step": 20262 + }, + { + "epoch": 0.6210310163050141, + "grad_norm": 1.5633664907014915, + "learning_rate": 6.633072983367269e-06, + "loss": 0.6766, + "step": 20263 + }, + { + "epoch": 0.6210616648277553, + "grad_norm": 0.6899363806295801, + "learning_rate": 6.632138319726587e-06, + "loss": 0.533, + "step": 20264 + }, + { + "epoch": 0.6210923133504965, + "grad_norm": 1.4686215289679214, + "learning_rate": 6.6312036892703e-06, + "loss": 0.6272, + "step": 20265 + }, + { + "epoch": 0.6211229618732377, + "grad_norm": 1.6483028637193264, + "learning_rate": 6.630269092007631e-06, + "loss": 0.6679, + "step": 20266 + }, + { + "epoch": 0.6211536103959789, + "grad_norm": 1.6586686209369614, + "learning_rate": 6.629334527947777e-06, + "loss": 0.6497, + "step": 20267 + }, + { + "epoch": 0.6211842589187201, + "grad_norm": 1.5315549890689937, + "learning_rate": 6.628399997099959e-06, + "loss": 0.6801, + "step": 20268 + }, + { + "epoch": 0.6212149074414614, + "grad_norm": 1.5524377004598169, + "learning_rate": 6.627465499473377e-06, + "loss": 0.7387, + "step": 20269 + }, + { + "epoch": 0.6212455559642025, + "grad_norm": 1.5914661099991252, + "learning_rate": 6.6265310350772376e-06, + "loss": 0.693, + "step": 20270 + }, + { + "epoch": 0.6212762044869438, + "grad_norm": 1.6446976312806623, + "learning_rate": 6.625596603920752e-06, + "loss": 0.7752, + "step": 20271 + }, + { + "epoch": 0.6213068530096849, + "grad_norm": 1.488103878026059, + "learning_rate": 6.624662206013128e-06, + "loss": 0.6471, + "step": 20272 + }, + { + "epoch": 0.6213375015324262, + "grad_norm": 1.6453414694055624, + "learning_rate": 6.623727841363567e-06, + "loss": 0.6586, + "step": 20273 + }, + { + "epoch": 0.6213681500551673, + "grad_norm": 1.5325117701867492, + "learning_rate": 6.622793509981285e-06, + "loss": 0.674, + "step": 20274 + }, + { + "epoch": 0.6213987985779086, + "grad_norm": 1.625103729207637, + "learning_rate": 6.621859211875481e-06, + "loss": 0.6845, + "step": 20275 + }, + { + "epoch": 0.6214294471006497, + "grad_norm": 1.5276979498364402, + "learning_rate": 6.620924947055358e-06, + "loss": 0.717, + "step": 20276 + }, + { + "epoch": 0.621460095623391, + "grad_norm": 0.6873710390191601, + "learning_rate": 6.619990715530132e-06, + "loss": 0.5941, + "step": 20277 + }, + { + "epoch": 0.6214907441461321, + "grad_norm": 1.68775195311996, + "learning_rate": 6.619056517309e-06, + "loss": 0.6898, + "step": 20278 + }, + { + "epoch": 0.6215213926688734, + "grad_norm": 1.6102466194987664, + "learning_rate": 6.618122352401168e-06, + "loss": 0.717, + "step": 20279 + }, + { + "epoch": 0.6215520411916146, + "grad_norm": 1.3786467681614667, + "learning_rate": 6.6171882208158435e-06, + "loss": 0.5988, + "step": 20280 + }, + { + "epoch": 0.6215826897143558, + "grad_norm": 1.565327855556298, + "learning_rate": 6.6162541225622265e-06, + "loss": 0.6542, + "step": 20281 + }, + { + "epoch": 0.621613338237097, + "grad_norm": 1.5218197490972605, + "learning_rate": 6.6153200576495254e-06, + "loss": 0.6501, + "step": 20282 + }, + { + "epoch": 0.6216439867598382, + "grad_norm": 1.4343155855978587, + "learning_rate": 6.614386026086943e-06, + "loss": 0.5839, + "step": 20283 + }, + { + "epoch": 0.6216746352825794, + "grad_norm": 1.3675726289437384, + "learning_rate": 6.613452027883678e-06, + "loss": 0.629, + "step": 20284 + }, + { + "epoch": 0.6217052838053205, + "grad_norm": 1.615691462734954, + "learning_rate": 6.612518063048938e-06, + "loss": 0.6991, + "step": 20285 + }, + { + "epoch": 0.6217359323280618, + "grad_norm": 1.4888996856207426, + "learning_rate": 6.611584131591924e-06, + "loss": 0.6432, + "step": 20286 + }, + { + "epoch": 0.6217665808508029, + "grad_norm": 1.5566729515054916, + "learning_rate": 6.610650233521843e-06, + "loss": 0.7004, + "step": 20287 + }, + { + "epoch": 0.6217972293735442, + "grad_norm": 1.6264969615441494, + "learning_rate": 6.609716368847887e-06, + "loss": 0.8237, + "step": 20288 + }, + { + "epoch": 0.6218278778962854, + "grad_norm": 1.4310819178069696, + "learning_rate": 6.608782537579264e-06, + "loss": 0.6324, + "step": 20289 + }, + { + "epoch": 0.6218585264190266, + "grad_norm": 1.5603048151491223, + "learning_rate": 6.607848739725176e-06, + "loss": 0.6744, + "step": 20290 + }, + { + "epoch": 0.6218891749417678, + "grad_norm": 1.7118143737450207, + "learning_rate": 6.6069149752948225e-06, + "loss": 0.7375, + "step": 20291 + }, + { + "epoch": 0.621919823464509, + "grad_norm": 1.6779277647863795, + "learning_rate": 6.6059812442974e-06, + "loss": 0.7937, + "step": 20292 + }, + { + "epoch": 0.6219504719872502, + "grad_norm": 1.6410665885825373, + "learning_rate": 6.605047546742116e-06, + "loss": 0.6437, + "step": 20293 + }, + { + "epoch": 0.6219811205099914, + "grad_norm": 1.4448732906747845, + "learning_rate": 6.604113882638166e-06, + "loss": 0.6513, + "step": 20294 + }, + { + "epoch": 0.6220117690327326, + "grad_norm": 1.576356783417336, + "learning_rate": 6.603180251994752e-06, + "loss": 0.5257, + "step": 20295 + }, + { + "epoch": 0.6220424175554738, + "grad_norm": 1.6670213714043165, + "learning_rate": 6.602246654821074e-06, + "loss": 0.7574, + "step": 20296 + }, + { + "epoch": 0.622073066078215, + "grad_norm": 1.6419737089076765, + "learning_rate": 6.601313091126322e-06, + "loss": 0.6587, + "step": 20297 + }, + { + "epoch": 0.6221037146009563, + "grad_norm": 0.6589514008288921, + "learning_rate": 6.60037956091971e-06, + "loss": 0.5161, + "step": 20298 + }, + { + "epoch": 0.6221343631236974, + "grad_norm": 1.4193400274856711, + "learning_rate": 6.599446064210424e-06, + "loss": 0.6494, + "step": 20299 + }, + { + "epoch": 0.6221650116464387, + "grad_norm": 1.4401541645574325, + "learning_rate": 6.598512601007665e-06, + "loss": 0.6708, + "step": 20300 + }, + { + "epoch": 0.6221956601691798, + "grad_norm": 0.652728236055465, + "learning_rate": 6.597579171320634e-06, + "loss": 0.5652, + "step": 20301 + }, + { + "epoch": 0.6222263086919211, + "grad_norm": 1.6575283185631597, + "learning_rate": 6.596645775158526e-06, + "loss": 0.6374, + "step": 20302 + }, + { + "epoch": 0.6222569572146622, + "grad_norm": 1.5320997352051877, + "learning_rate": 6.595712412530535e-06, + "loss": 0.7251, + "step": 20303 + }, + { + "epoch": 0.6222876057374035, + "grad_norm": 1.6416941426226896, + "learning_rate": 6.5947790834458625e-06, + "loss": 0.704, + "step": 20304 + }, + { + "epoch": 0.6223182542601446, + "grad_norm": 1.5501248192220614, + "learning_rate": 6.593845787913702e-06, + "loss": 0.6994, + "step": 20305 + }, + { + "epoch": 0.6223489027828859, + "grad_norm": 1.4745234543303631, + "learning_rate": 6.592912525943251e-06, + "loss": 0.6403, + "step": 20306 + }, + { + "epoch": 0.622379551305627, + "grad_norm": 0.6798295871095034, + "learning_rate": 6.591979297543708e-06, + "loss": 0.5488, + "step": 20307 + }, + { + "epoch": 0.6224101998283683, + "grad_norm": 0.637668269316976, + "learning_rate": 6.591046102724259e-06, + "loss": 0.5483, + "step": 20308 + }, + { + "epoch": 0.6224408483511095, + "grad_norm": 1.564554546325645, + "learning_rate": 6.59011294149411e-06, + "loss": 0.6339, + "step": 20309 + }, + { + "epoch": 0.6224714968738507, + "grad_norm": 1.42449611299935, + "learning_rate": 6.58917981386245e-06, + "loss": 0.7031, + "step": 20310 + }, + { + "epoch": 0.6225021453965919, + "grad_norm": 1.6404804807198712, + "learning_rate": 6.58824671983847e-06, + "loss": 0.755, + "step": 20311 + }, + { + "epoch": 0.6225327939193331, + "grad_norm": 1.6126044908495527, + "learning_rate": 6.587313659431371e-06, + "loss": 0.6958, + "step": 20312 + }, + { + "epoch": 0.6225634424420743, + "grad_norm": 1.5648763505643077, + "learning_rate": 6.586380632650342e-06, + "loss": 0.6529, + "step": 20313 + }, + { + "epoch": 0.6225940909648155, + "grad_norm": 0.6426163654546201, + "learning_rate": 6.5854476395045794e-06, + "loss": 0.5465, + "step": 20314 + }, + { + "epoch": 0.6226247394875567, + "grad_norm": 1.5259822592370036, + "learning_rate": 6.584514680003276e-06, + "loss": 0.6355, + "step": 20315 + }, + { + "epoch": 0.6226553880102978, + "grad_norm": 1.5782646096995845, + "learning_rate": 6.58358175415562e-06, + "loss": 0.6556, + "step": 20316 + }, + { + "epoch": 0.6226860365330391, + "grad_norm": 0.6559168948903105, + "learning_rate": 6.58264886197081e-06, + "loss": 0.5111, + "step": 20317 + }, + { + "epoch": 0.6227166850557803, + "grad_norm": 1.4959611069760914, + "learning_rate": 6.581716003458037e-06, + "loss": 0.6801, + "step": 20318 + }, + { + "epoch": 0.6227473335785215, + "grad_norm": 1.6297182349451653, + "learning_rate": 6.5807831786264845e-06, + "loss": 0.6558, + "step": 20319 + }, + { + "epoch": 0.6227779821012627, + "grad_norm": 1.4573638693064348, + "learning_rate": 6.579850387485357e-06, + "loss": 0.5983, + "step": 20320 + }, + { + "epoch": 0.6228086306240039, + "grad_norm": 1.3744676666251192, + "learning_rate": 6.578917630043832e-06, + "loss": 0.6195, + "step": 20321 + }, + { + "epoch": 0.6228392791467451, + "grad_norm": 1.6626491277631956, + "learning_rate": 6.577984906311112e-06, + "loss": 0.6708, + "step": 20322 + }, + { + "epoch": 0.6228699276694863, + "grad_norm": 1.7355187083135197, + "learning_rate": 6.577052216296382e-06, + "loss": 0.6858, + "step": 20323 + }, + { + "epoch": 0.6229005761922275, + "grad_norm": 1.4699629062536603, + "learning_rate": 6.576119560008829e-06, + "loss": 0.599, + "step": 20324 + }, + { + "epoch": 0.6229312247149688, + "grad_norm": 1.7792331655726485, + "learning_rate": 6.575186937457649e-06, + "loss": 0.7823, + "step": 20325 + }, + { + "epoch": 0.6229618732377099, + "grad_norm": 0.7093968572897426, + "learning_rate": 6.574254348652028e-06, + "loss": 0.5715, + "step": 20326 + }, + { + "epoch": 0.6229925217604512, + "grad_norm": 1.6137311530209897, + "learning_rate": 6.573321793601154e-06, + "loss": 0.695, + "step": 20327 + }, + { + "epoch": 0.6230231702831923, + "grad_norm": 1.6155586589745932, + "learning_rate": 6.572389272314219e-06, + "loss": 0.5777, + "step": 20328 + }, + { + "epoch": 0.6230538188059336, + "grad_norm": 1.392909511004704, + "learning_rate": 6.571456784800411e-06, + "loss": 0.6921, + "step": 20329 + }, + { + "epoch": 0.6230844673286747, + "grad_norm": 1.600319819704911, + "learning_rate": 6.570524331068912e-06, + "loss": 0.6679, + "step": 20330 + }, + { + "epoch": 0.623115115851416, + "grad_norm": 0.7064025880032178, + "learning_rate": 6.5695919111289165e-06, + "loss": 0.5702, + "step": 20331 + }, + { + "epoch": 0.6231457643741571, + "grad_norm": 1.600063435731269, + "learning_rate": 6.568659524989608e-06, + "loss": 0.7072, + "step": 20332 + }, + { + "epoch": 0.6231764128968984, + "grad_norm": 0.6786052880657855, + "learning_rate": 6.567727172660176e-06, + "loss": 0.5655, + "step": 20333 + }, + { + "epoch": 0.6232070614196396, + "grad_norm": 0.6727224715265722, + "learning_rate": 6.566794854149809e-06, + "loss": 0.5591, + "step": 20334 + }, + { + "epoch": 0.6232377099423808, + "grad_norm": 1.3367885952659422, + "learning_rate": 6.565862569467687e-06, + "loss": 0.5483, + "step": 20335 + }, + { + "epoch": 0.623268358465122, + "grad_norm": 1.642540491038002, + "learning_rate": 6.564930318623002e-06, + "loss": 0.6818, + "step": 20336 + }, + { + "epoch": 0.6232990069878632, + "grad_norm": 1.663128180689824, + "learning_rate": 6.56399810162494e-06, + "loss": 0.7069, + "step": 20337 + }, + { + "epoch": 0.6233296555106044, + "grad_norm": 1.6132614634342604, + "learning_rate": 6.563065918482676e-06, + "loss": 0.5842, + "step": 20338 + }, + { + "epoch": 0.6233603040333456, + "grad_norm": 1.4127650777414036, + "learning_rate": 6.56213376920541e-06, + "loss": 0.651, + "step": 20339 + }, + { + "epoch": 0.6233909525560868, + "grad_norm": 1.5203298288269462, + "learning_rate": 6.561201653802314e-06, + "loss": 0.659, + "step": 20340 + }, + { + "epoch": 0.623421601078828, + "grad_norm": 1.4463982952934697, + "learning_rate": 6.560269572282584e-06, + "loss": 0.7121, + "step": 20341 + }, + { + "epoch": 0.6234522496015692, + "grad_norm": 1.4663852583638541, + "learning_rate": 6.559337524655396e-06, + "loss": 0.7182, + "step": 20342 + }, + { + "epoch": 0.6234828981243105, + "grad_norm": 1.428908234498874, + "learning_rate": 6.5584055109299325e-06, + "loss": 0.6484, + "step": 20343 + }, + { + "epoch": 0.6235135466470516, + "grad_norm": 1.5489209335382155, + "learning_rate": 6.557473531115384e-06, + "loss": 0.6493, + "step": 20344 + }, + { + "epoch": 0.6235441951697929, + "grad_norm": 1.7294123337138154, + "learning_rate": 6.556541585220928e-06, + "loss": 0.7043, + "step": 20345 + }, + { + "epoch": 0.623574843692534, + "grad_norm": 1.4664519766443451, + "learning_rate": 6.555609673255747e-06, + "loss": 0.6598, + "step": 20346 + }, + { + "epoch": 0.6236054922152752, + "grad_norm": 1.485079463672838, + "learning_rate": 6.554677795229028e-06, + "loss": 0.6524, + "step": 20347 + }, + { + "epoch": 0.6236361407380164, + "grad_norm": 1.4672205033625192, + "learning_rate": 6.553745951149947e-06, + "loss": 0.6563, + "step": 20348 + }, + { + "epoch": 0.6236667892607576, + "grad_norm": 0.6750116235319655, + "learning_rate": 6.552814141027693e-06, + "loss": 0.5424, + "step": 20349 + }, + { + "epoch": 0.6236974377834988, + "grad_norm": 1.4002607080166223, + "learning_rate": 6.551882364871443e-06, + "loss": 0.5277, + "step": 20350 + }, + { + "epoch": 0.62372808630624, + "grad_norm": 1.6307525560621694, + "learning_rate": 6.550950622690373e-06, + "loss": 0.7089, + "step": 20351 + }, + { + "epoch": 0.6237587348289813, + "grad_norm": 1.3976458707616437, + "learning_rate": 6.550018914493674e-06, + "loss": 0.588, + "step": 20352 + }, + { + "epoch": 0.6237893833517224, + "grad_norm": 0.6658725732200785, + "learning_rate": 6.549087240290521e-06, + "loss": 0.5413, + "step": 20353 + }, + { + "epoch": 0.6238200318744637, + "grad_norm": 1.5497632363020064, + "learning_rate": 6.548155600090092e-06, + "loss": 0.6893, + "step": 20354 + }, + { + "epoch": 0.6238506803972048, + "grad_norm": 1.5863331809537087, + "learning_rate": 6.5472239939015716e-06, + "loss": 0.6756, + "step": 20355 + }, + { + "epoch": 0.6238813289199461, + "grad_norm": 1.4822053756007492, + "learning_rate": 6.546292421734135e-06, + "loss": 0.6641, + "step": 20356 + }, + { + "epoch": 0.6239119774426872, + "grad_norm": 1.44147456890053, + "learning_rate": 6.545360883596963e-06, + "loss": 0.7608, + "step": 20357 + }, + { + "epoch": 0.6239426259654285, + "grad_norm": 0.6542882936691466, + "learning_rate": 6.544429379499236e-06, + "loss": 0.5285, + "step": 20358 + }, + { + "epoch": 0.6239732744881696, + "grad_norm": 1.6610556834859724, + "learning_rate": 6.543497909450126e-06, + "loss": 0.7731, + "step": 20359 + }, + { + "epoch": 0.6240039230109109, + "grad_norm": 1.5959099277320903, + "learning_rate": 6.542566473458819e-06, + "loss": 0.7652, + "step": 20360 + }, + { + "epoch": 0.624034571533652, + "grad_norm": 1.6766586091460156, + "learning_rate": 6.541635071534491e-06, + "loss": 0.6561, + "step": 20361 + }, + { + "epoch": 0.6240652200563933, + "grad_norm": 1.6020908520413313, + "learning_rate": 6.5407037036863105e-06, + "loss": 0.668, + "step": 20362 + }, + { + "epoch": 0.6240958685791345, + "grad_norm": 1.819559362418363, + "learning_rate": 6.53977236992347e-06, + "loss": 0.7999, + "step": 20363 + }, + { + "epoch": 0.6241265171018757, + "grad_norm": 1.5905581930639412, + "learning_rate": 6.538841070255133e-06, + "loss": 0.6826, + "step": 20364 + }, + { + "epoch": 0.6241571656246169, + "grad_norm": 1.4482864633549022, + "learning_rate": 6.537909804690481e-06, + "loss": 0.7923, + "step": 20365 + }, + { + "epoch": 0.6241878141473581, + "grad_norm": 1.617637996566245, + "learning_rate": 6.53697857323869e-06, + "loss": 0.7205, + "step": 20366 + }, + { + "epoch": 0.6242184626700993, + "grad_norm": 1.423613126748398, + "learning_rate": 6.5360473759089335e-06, + "loss": 0.6011, + "step": 20367 + }, + { + "epoch": 0.6242491111928405, + "grad_norm": 1.4206673177044042, + "learning_rate": 6.535116212710391e-06, + "loss": 0.6682, + "step": 20368 + }, + { + "epoch": 0.6242797597155817, + "grad_norm": 1.5934147617150942, + "learning_rate": 6.534185083652233e-06, + "loss": 0.6483, + "step": 20369 + }, + { + "epoch": 0.624310408238323, + "grad_norm": 1.8106144617354543, + "learning_rate": 6.533253988743635e-06, + "loss": 0.6707, + "step": 20370 + }, + { + "epoch": 0.6243410567610641, + "grad_norm": 1.4601369490859162, + "learning_rate": 6.532322927993776e-06, + "loss": 0.7066, + "step": 20371 + }, + { + "epoch": 0.6243717052838054, + "grad_norm": 1.7912859574694595, + "learning_rate": 6.531391901411827e-06, + "loss": 0.7526, + "step": 20372 + }, + { + "epoch": 0.6244023538065465, + "grad_norm": 1.544321865311888, + "learning_rate": 6.530460909006956e-06, + "loss": 0.692, + "step": 20373 + }, + { + "epoch": 0.6244330023292878, + "grad_norm": 1.6570304518353003, + "learning_rate": 6.529529950788347e-06, + "loss": 0.5276, + "step": 20374 + }, + { + "epoch": 0.6244636508520289, + "grad_norm": 1.5774266641919674, + "learning_rate": 6.528599026765163e-06, + "loss": 0.7948, + "step": 20375 + }, + { + "epoch": 0.6244942993747702, + "grad_norm": 1.5606666943571448, + "learning_rate": 6.527668136946584e-06, + "loss": 0.6873, + "step": 20376 + }, + { + "epoch": 0.6245249478975113, + "grad_norm": 1.5213997401214785, + "learning_rate": 6.5267372813417775e-06, + "loss": 0.6601, + "step": 20377 + }, + { + "epoch": 0.6245555964202525, + "grad_norm": 1.3800535742652422, + "learning_rate": 6.525806459959915e-06, + "loss": 0.6335, + "step": 20378 + }, + { + "epoch": 0.6245862449429938, + "grad_norm": 1.6212654342324697, + "learning_rate": 6.524875672810176e-06, + "loss": 0.689, + "step": 20379 + }, + { + "epoch": 0.6246168934657349, + "grad_norm": 1.4992294452098724, + "learning_rate": 6.523944919901724e-06, + "loss": 0.6788, + "step": 20380 + }, + { + "epoch": 0.6246475419884762, + "grad_norm": 1.7336673306547947, + "learning_rate": 6.523014201243729e-06, + "loss": 0.6372, + "step": 20381 + }, + { + "epoch": 0.6246781905112173, + "grad_norm": 1.42708426056012, + "learning_rate": 6.52208351684537e-06, + "loss": 0.5967, + "step": 20382 + }, + { + "epoch": 0.6247088390339586, + "grad_norm": 1.5714209639191103, + "learning_rate": 6.52115286671581e-06, + "loss": 0.6672, + "step": 20383 + }, + { + "epoch": 0.6247394875566997, + "grad_norm": 1.4243725572735202, + "learning_rate": 6.520222250864217e-06, + "loss": 0.6406, + "step": 20384 + }, + { + "epoch": 0.624770136079441, + "grad_norm": 1.595257864789403, + "learning_rate": 6.519291669299767e-06, + "loss": 0.6923, + "step": 20385 + }, + { + "epoch": 0.6248007846021821, + "grad_norm": 1.7270496959814243, + "learning_rate": 6.518361122031627e-06, + "loss": 0.6039, + "step": 20386 + }, + { + "epoch": 0.6248314331249234, + "grad_norm": 1.6815857243676646, + "learning_rate": 6.517430609068966e-06, + "loss": 0.6285, + "step": 20387 + }, + { + "epoch": 0.6248620816476645, + "grad_norm": 0.6459317463868108, + "learning_rate": 6.516500130420953e-06, + "loss": 0.5485, + "step": 20388 + }, + { + "epoch": 0.6248927301704058, + "grad_norm": 1.7587538291913187, + "learning_rate": 6.5155696860967535e-06, + "loss": 0.7312, + "step": 20389 + }, + { + "epoch": 0.624923378693147, + "grad_norm": 1.5768891356938775, + "learning_rate": 6.514639276105539e-06, + "loss": 0.5984, + "step": 20390 + }, + { + "epoch": 0.6249540272158882, + "grad_norm": 1.5323606781852561, + "learning_rate": 6.513708900456477e-06, + "loss": 0.6546, + "step": 20391 + }, + { + "epoch": 0.6249846757386294, + "grad_norm": 0.6583760788067692, + "learning_rate": 6.512778559158728e-06, + "loss": 0.5601, + "step": 20392 + }, + { + "epoch": 0.6250153242613706, + "grad_norm": 1.5845635775212803, + "learning_rate": 6.51184825222147e-06, + "loss": 0.6258, + "step": 20393 + }, + { + "epoch": 0.6250459727841118, + "grad_norm": 0.6997686680197832, + "learning_rate": 6.510917979653857e-06, + "loss": 0.5655, + "step": 20394 + }, + { + "epoch": 0.625076621306853, + "grad_norm": 1.6916806407004084, + "learning_rate": 6.509987741465069e-06, + "loss": 0.6814, + "step": 20395 + }, + { + "epoch": 0.6251072698295942, + "grad_norm": 0.6898632394516132, + "learning_rate": 6.5090575376642615e-06, + "loss": 0.55, + "step": 20396 + }, + { + "epoch": 0.6251379183523355, + "grad_norm": 1.458334475041395, + "learning_rate": 6.508127368260601e-06, + "loss": 0.7467, + "step": 20397 + }, + { + "epoch": 0.6251685668750766, + "grad_norm": 0.6833876395842812, + "learning_rate": 6.5071972332632584e-06, + "loss": 0.5534, + "step": 20398 + }, + { + "epoch": 0.6251992153978179, + "grad_norm": 1.6707089236493418, + "learning_rate": 6.506267132681395e-06, + "loss": 0.7054, + "step": 20399 + }, + { + "epoch": 0.625229863920559, + "grad_norm": 0.6610176004188592, + "learning_rate": 6.505337066524173e-06, + "loss": 0.5591, + "step": 20400 + }, + { + "epoch": 0.6252605124433003, + "grad_norm": 1.2699480108177532, + "learning_rate": 6.504407034800762e-06, + "loss": 0.6221, + "step": 20401 + }, + { + "epoch": 0.6252911609660414, + "grad_norm": 1.6361508893909071, + "learning_rate": 6.503477037520322e-06, + "loss": 0.7684, + "step": 20402 + }, + { + "epoch": 0.6253218094887827, + "grad_norm": 1.5556409074494784, + "learning_rate": 6.5025470746920135e-06, + "loss": 0.7595, + "step": 20403 + }, + { + "epoch": 0.6253524580115238, + "grad_norm": 0.6479779757792516, + "learning_rate": 6.50161714632501e-06, + "loss": 0.5496, + "step": 20404 + }, + { + "epoch": 0.6253831065342651, + "grad_norm": 1.64998564096918, + "learning_rate": 6.500687252428462e-06, + "loss": 0.7434, + "step": 20405 + }, + { + "epoch": 0.6254137550570062, + "grad_norm": 1.5357761568796264, + "learning_rate": 6.499757393011543e-06, + "loss": 0.6768, + "step": 20406 + }, + { + "epoch": 0.6254444035797475, + "grad_norm": 1.405528387459327, + "learning_rate": 6.498827568083408e-06, + "loss": 0.7712, + "step": 20407 + }, + { + "epoch": 0.6254750521024887, + "grad_norm": 1.4495265929082113, + "learning_rate": 6.497897777653218e-06, + "loss": 0.6914, + "step": 20408 + }, + { + "epoch": 0.6255057006252298, + "grad_norm": 1.4819835350421895, + "learning_rate": 6.496968021730141e-06, + "loss": 0.7431, + "step": 20409 + }, + { + "epoch": 0.6255363491479711, + "grad_norm": 1.4368782504322437, + "learning_rate": 6.4960383003233325e-06, + "loss": 0.6808, + "step": 20410 + }, + { + "epoch": 0.6255669976707122, + "grad_norm": 1.592828118033865, + "learning_rate": 6.4951086134419535e-06, + "loss": 0.683, + "step": 20411 + }, + { + "epoch": 0.6255976461934535, + "grad_norm": 1.5013754287775987, + "learning_rate": 6.49417896109517e-06, + "loss": 0.7113, + "step": 20412 + }, + { + "epoch": 0.6256282947161946, + "grad_norm": 1.5912149607760673, + "learning_rate": 6.493249343292134e-06, + "loss": 0.6242, + "step": 20413 + }, + { + "epoch": 0.6256589432389359, + "grad_norm": 1.7044765186736144, + "learning_rate": 6.492319760042013e-06, + "loss": 0.671, + "step": 20414 + }, + { + "epoch": 0.625689591761677, + "grad_norm": 0.6898510335012483, + "learning_rate": 6.491390211353964e-06, + "loss": 0.5715, + "step": 20415 + }, + { + "epoch": 0.6257202402844183, + "grad_norm": 1.4685724695528206, + "learning_rate": 6.4904606972371396e-06, + "loss": 0.6171, + "step": 20416 + }, + { + "epoch": 0.6257508888071595, + "grad_norm": 1.8555889851933631, + "learning_rate": 6.489531217700708e-06, + "loss": 0.7473, + "step": 20417 + }, + { + "epoch": 0.6257815373299007, + "grad_norm": 1.4591816437088778, + "learning_rate": 6.488601772753824e-06, + "loss": 0.6903, + "step": 20418 + }, + { + "epoch": 0.6258121858526419, + "grad_norm": 1.70131784224216, + "learning_rate": 6.4876723624056424e-06, + "loss": 0.5911, + "step": 20419 + }, + { + "epoch": 0.6258428343753831, + "grad_norm": 1.4100989292821617, + "learning_rate": 6.486742986665326e-06, + "loss": 0.6624, + "step": 20420 + }, + { + "epoch": 0.6258734828981243, + "grad_norm": 1.521557603065078, + "learning_rate": 6.4858136455420275e-06, + "loss": 0.6507, + "step": 20421 + }, + { + "epoch": 0.6259041314208655, + "grad_norm": 1.4525438281804754, + "learning_rate": 6.4848843390449076e-06, + "loss": 0.6729, + "step": 20422 + }, + { + "epoch": 0.6259347799436067, + "grad_norm": 1.6129160619243244, + "learning_rate": 6.483955067183122e-06, + "loss": 0.6194, + "step": 20423 + }, + { + "epoch": 0.625965428466348, + "grad_norm": 1.434942006056121, + "learning_rate": 6.483025829965826e-06, + "loss": 0.6306, + "step": 20424 + }, + { + "epoch": 0.6259960769890891, + "grad_norm": 1.516320283688757, + "learning_rate": 6.482096627402177e-06, + "loss": 0.636, + "step": 20425 + }, + { + "epoch": 0.6260267255118304, + "grad_norm": 1.4823873392491016, + "learning_rate": 6.481167459501332e-06, + "loss": 0.6494, + "step": 20426 + }, + { + "epoch": 0.6260573740345715, + "grad_norm": 1.5083995154181151, + "learning_rate": 6.48023832627244e-06, + "loss": 0.6694, + "step": 20427 + }, + { + "epoch": 0.6260880225573128, + "grad_norm": 1.5738471001799068, + "learning_rate": 6.479309227724663e-06, + "loss": 0.6959, + "step": 20428 + }, + { + "epoch": 0.6261186710800539, + "grad_norm": 1.820357137265977, + "learning_rate": 6.478380163867153e-06, + "loss": 0.7201, + "step": 20429 + }, + { + "epoch": 0.6261493196027952, + "grad_norm": 1.687673804563619, + "learning_rate": 6.477451134709063e-06, + "loss": 0.7069, + "step": 20430 + }, + { + "epoch": 0.6261799681255363, + "grad_norm": 1.5571282018690962, + "learning_rate": 6.476522140259549e-06, + "loss": 0.7154, + "step": 20431 + }, + { + "epoch": 0.6262106166482776, + "grad_norm": 1.6074399805602266, + "learning_rate": 6.475593180527761e-06, + "loss": 0.691, + "step": 20432 + }, + { + "epoch": 0.6262412651710187, + "grad_norm": 1.549780425519838, + "learning_rate": 6.47466425552286e-06, + "loss": 0.6807, + "step": 20433 + }, + { + "epoch": 0.62627191369376, + "grad_norm": 1.5193097935188746, + "learning_rate": 6.4737353652539945e-06, + "loss": 0.6598, + "step": 20434 + }, + { + "epoch": 0.6263025622165012, + "grad_norm": 1.6414849794863131, + "learning_rate": 6.472806509730311e-06, + "loss": 0.6368, + "step": 20435 + }, + { + "epoch": 0.6263332107392424, + "grad_norm": 1.5575847010460064, + "learning_rate": 6.471877688960973e-06, + "loss": 0.7407, + "step": 20436 + }, + { + "epoch": 0.6263638592619836, + "grad_norm": 1.8073738941024693, + "learning_rate": 6.470948902955125e-06, + "loss": 0.6783, + "step": 20437 + }, + { + "epoch": 0.6263945077847248, + "grad_norm": 1.58976171649073, + "learning_rate": 6.470020151721918e-06, + "loss": 0.6172, + "step": 20438 + }, + { + "epoch": 0.626425156307466, + "grad_norm": 1.7337160433660834, + "learning_rate": 6.469091435270509e-06, + "loss": 0.6408, + "step": 20439 + }, + { + "epoch": 0.6264558048302071, + "grad_norm": 1.493603380289223, + "learning_rate": 6.4681627536100425e-06, + "loss": 0.6924, + "step": 20440 + }, + { + "epoch": 0.6264864533529484, + "grad_norm": 1.5713797807589711, + "learning_rate": 6.467234106749674e-06, + "loss": 0.6925, + "step": 20441 + }, + { + "epoch": 0.6265171018756895, + "grad_norm": 0.6558447340909304, + "learning_rate": 6.466305494698552e-06, + "loss": 0.5357, + "step": 20442 + }, + { + "epoch": 0.6265477503984308, + "grad_norm": 1.5352205605527185, + "learning_rate": 6.465376917465824e-06, + "loss": 0.5869, + "step": 20443 + }, + { + "epoch": 0.626578398921172, + "grad_norm": 1.5761424663341108, + "learning_rate": 6.4644483750606435e-06, + "loss": 0.7242, + "step": 20444 + }, + { + "epoch": 0.6266090474439132, + "grad_norm": 1.5840555166025359, + "learning_rate": 6.46351986749216e-06, + "loss": 0.6982, + "step": 20445 + }, + { + "epoch": 0.6266396959666544, + "grad_norm": 1.4287783524433206, + "learning_rate": 6.462591394769514e-06, + "loss": 0.6242, + "step": 20446 + }, + { + "epoch": 0.6266703444893956, + "grad_norm": 1.6490820269295945, + "learning_rate": 6.461662956901867e-06, + "loss": 0.7677, + "step": 20447 + }, + { + "epoch": 0.6267009930121368, + "grad_norm": 1.5836729533995695, + "learning_rate": 6.460734553898352e-06, + "loss": 0.5955, + "step": 20448 + }, + { + "epoch": 0.626731641534878, + "grad_norm": 1.4839333402923283, + "learning_rate": 6.459806185768133e-06, + "loss": 0.6615, + "step": 20449 + }, + { + "epoch": 0.6267622900576192, + "grad_norm": 1.8649139778458796, + "learning_rate": 6.4588778525203466e-06, + "loss": 0.7082, + "step": 20450 + }, + { + "epoch": 0.6267929385803604, + "grad_norm": 1.5114287159024087, + "learning_rate": 6.45794955416414e-06, + "loss": 0.6845, + "step": 20451 + }, + { + "epoch": 0.6268235871031016, + "grad_norm": 1.6572166726712623, + "learning_rate": 6.457021290708666e-06, + "loss": 0.6895, + "step": 20452 + }, + { + "epoch": 0.6268542356258429, + "grad_norm": 1.764463400628119, + "learning_rate": 6.456093062163067e-06, + "loss": 0.7301, + "step": 20453 + }, + { + "epoch": 0.626884884148584, + "grad_norm": 1.5173030606439295, + "learning_rate": 6.455164868536488e-06, + "loss": 0.7174, + "step": 20454 + }, + { + "epoch": 0.6269155326713253, + "grad_norm": 1.6029141840138916, + "learning_rate": 6.45423670983808e-06, + "loss": 0.6025, + "step": 20455 + }, + { + "epoch": 0.6269461811940664, + "grad_norm": 1.7037028308160254, + "learning_rate": 6.453308586076985e-06, + "loss": 0.6071, + "step": 20456 + }, + { + "epoch": 0.6269768297168077, + "grad_norm": 1.4378795327965315, + "learning_rate": 6.452380497262342e-06, + "loss": 0.6379, + "step": 20457 + }, + { + "epoch": 0.6270074782395488, + "grad_norm": 1.500072708793301, + "learning_rate": 6.451452443403309e-06, + "loss": 0.6654, + "step": 20458 + }, + { + "epoch": 0.6270381267622901, + "grad_norm": 0.6519586646535853, + "learning_rate": 6.450524424509015e-06, + "loss": 0.5458, + "step": 20459 + }, + { + "epoch": 0.6270687752850312, + "grad_norm": 1.5826543401138387, + "learning_rate": 6.449596440588619e-06, + "loss": 0.6757, + "step": 20460 + }, + { + "epoch": 0.6270994238077725, + "grad_norm": 1.7092573547365122, + "learning_rate": 6.448668491651257e-06, + "loss": 0.7665, + "step": 20461 + }, + { + "epoch": 0.6271300723305137, + "grad_norm": 1.5723334162038625, + "learning_rate": 6.44774057770607e-06, + "loss": 0.6432, + "step": 20462 + }, + { + "epoch": 0.6271607208532549, + "grad_norm": 1.7397025524405698, + "learning_rate": 6.446812698762206e-06, + "loss": 0.7552, + "step": 20463 + }, + { + "epoch": 0.6271913693759961, + "grad_norm": 1.562379399557194, + "learning_rate": 6.4458848548288055e-06, + "loss": 0.575, + "step": 20464 + }, + { + "epoch": 0.6272220178987373, + "grad_norm": 1.8219244248912803, + "learning_rate": 6.444957045915008e-06, + "loss": 0.6525, + "step": 20465 + }, + { + "epoch": 0.6272526664214785, + "grad_norm": 1.7040375274416315, + "learning_rate": 6.444029272029961e-06, + "loss": 0.7169, + "step": 20466 + }, + { + "epoch": 0.6272833149442197, + "grad_norm": 0.6863460507743873, + "learning_rate": 6.443101533182803e-06, + "loss": 0.5447, + "step": 20467 + }, + { + "epoch": 0.6273139634669609, + "grad_norm": 1.6144513326585173, + "learning_rate": 6.442173829382675e-06, + "loss": 0.8047, + "step": 20468 + }, + { + "epoch": 0.6273446119897022, + "grad_norm": 1.523349504906947, + "learning_rate": 6.441246160638722e-06, + "loss": 0.6869, + "step": 20469 + }, + { + "epoch": 0.6273752605124433, + "grad_norm": 1.7594336721462551, + "learning_rate": 6.440318526960075e-06, + "loss": 0.7413, + "step": 20470 + }, + { + "epoch": 0.6274059090351845, + "grad_norm": 1.6620141329143567, + "learning_rate": 6.439390928355887e-06, + "loss": 0.7099, + "step": 20471 + }, + { + "epoch": 0.6274365575579257, + "grad_norm": 1.6302186806892245, + "learning_rate": 6.438463364835288e-06, + "loss": 0.7415, + "step": 20472 + }, + { + "epoch": 0.6274672060806669, + "grad_norm": 1.445271278143562, + "learning_rate": 6.437535836407419e-06, + "loss": 0.7175, + "step": 20473 + }, + { + "epoch": 0.6274978546034081, + "grad_norm": 1.5383905613443165, + "learning_rate": 6.436608343081423e-06, + "loss": 0.6668, + "step": 20474 + }, + { + "epoch": 0.6275285031261493, + "grad_norm": 1.555203494534656, + "learning_rate": 6.435680884866436e-06, + "loss": 0.7025, + "step": 20475 + }, + { + "epoch": 0.6275591516488905, + "grad_norm": 1.5662902489325188, + "learning_rate": 6.4347534617715965e-06, + "loss": 0.6947, + "step": 20476 + }, + { + "epoch": 0.6275898001716317, + "grad_norm": 1.449958486974468, + "learning_rate": 6.433826073806047e-06, + "loss": 0.6384, + "step": 20477 + }, + { + "epoch": 0.627620448694373, + "grad_norm": 1.6385409677522256, + "learning_rate": 6.432898720978916e-06, + "loss": 0.7296, + "step": 20478 + }, + { + "epoch": 0.6276510972171141, + "grad_norm": 1.326043922166129, + "learning_rate": 6.431971403299353e-06, + "loss": 0.6651, + "step": 20479 + }, + { + "epoch": 0.6276817457398554, + "grad_norm": 1.4037668504769067, + "learning_rate": 6.431044120776486e-06, + "loss": 0.6119, + "step": 20480 + }, + { + "epoch": 0.6277123942625965, + "grad_norm": 0.6810173291438633, + "learning_rate": 6.430116873419452e-06, + "loss": 0.5677, + "step": 20481 + }, + { + "epoch": 0.6277430427853378, + "grad_norm": 1.4699743984617646, + "learning_rate": 6.429189661237392e-06, + "loss": 0.6906, + "step": 20482 + }, + { + "epoch": 0.6277736913080789, + "grad_norm": 1.6276358492837872, + "learning_rate": 6.42826248423944e-06, + "loss": 0.8513, + "step": 20483 + }, + { + "epoch": 0.6278043398308202, + "grad_norm": 1.6249919702257454, + "learning_rate": 6.4273353424347294e-06, + "loss": 0.6939, + "step": 20484 + }, + { + "epoch": 0.6278349883535613, + "grad_norm": 1.6253394515660087, + "learning_rate": 6.4264082358324e-06, + "loss": 0.7169, + "step": 20485 + }, + { + "epoch": 0.6278656368763026, + "grad_norm": 1.6159473466299683, + "learning_rate": 6.425481164441582e-06, + "loss": 0.5626, + "step": 20486 + }, + { + "epoch": 0.6278962853990437, + "grad_norm": 1.504255860467519, + "learning_rate": 6.424554128271416e-06, + "loss": 0.7181, + "step": 20487 + }, + { + "epoch": 0.627926933921785, + "grad_norm": 1.622087111413921, + "learning_rate": 6.423627127331034e-06, + "loss": 0.725, + "step": 20488 + }, + { + "epoch": 0.6279575824445262, + "grad_norm": 0.6977774134282022, + "learning_rate": 6.422700161629563e-06, + "loss": 0.5855, + "step": 20489 + }, + { + "epoch": 0.6279882309672674, + "grad_norm": 1.592679575096105, + "learning_rate": 6.421773231176149e-06, + "loss": 0.728, + "step": 20490 + }, + { + "epoch": 0.6280188794900086, + "grad_norm": 1.5091254539956807, + "learning_rate": 6.420846335979917e-06, + "loss": 0.6856, + "step": 20491 + }, + { + "epoch": 0.6280495280127498, + "grad_norm": 1.4787386715477013, + "learning_rate": 6.4199194760499996e-06, + "loss": 0.6889, + "step": 20492 + }, + { + "epoch": 0.628080176535491, + "grad_norm": 1.4639593440734366, + "learning_rate": 6.418992651395533e-06, + "loss": 0.6893, + "step": 20493 + }, + { + "epoch": 0.6281108250582322, + "grad_norm": 0.6583055572987524, + "learning_rate": 6.418065862025646e-06, + "loss": 0.5724, + "step": 20494 + }, + { + "epoch": 0.6281414735809734, + "grad_norm": 1.6877896902053124, + "learning_rate": 6.417139107949476e-06, + "loss": 0.6253, + "step": 20495 + }, + { + "epoch": 0.6281721221037146, + "grad_norm": 1.3822238835257925, + "learning_rate": 6.416212389176151e-06, + "loss": 0.5773, + "step": 20496 + }, + { + "epoch": 0.6282027706264558, + "grad_norm": 1.4303694485080163, + "learning_rate": 6.415285705714798e-06, + "loss": 0.7123, + "step": 20497 + }, + { + "epoch": 0.6282334191491971, + "grad_norm": 1.4121679200851138, + "learning_rate": 6.414359057574556e-06, + "loss": 0.5985, + "step": 20498 + }, + { + "epoch": 0.6282640676719382, + "grad_norm": 1.5887446589246816, + "learning_rate": 6.413432444764554e-06, + "loss": 0.705, + "step": 20499 + }, + { + "epoch": 0.6282947161946795, + "grad_norm": 1.4497887936454381, + "learning_rate": 6.412505867293912e-06, + "loss": 0.6033, + "step": 20500 + }, + { + "epoch": 0.6283253647174206, + "grad_norm": 1.4956146992297803, + "learning_rate": 6.411579325171775e-06, + "loss": 0.657, + "step": 20501 + }, + { + "epoch": 0.6283560132401618, + "grad_norm": 1.33244379468328, + "learning_rate": 6.410652818407259e-06, + "loss": 0.5968, + "step": 20502 + }, + { + "epoch": 0.628386661762903, + "grad_norm": 0.6693172455425734, + "learning_rate": 6.409726347009504e-06, + "loss": 0.5513, + "step": 20503 + }, + { + "epoch": 0.6284173102856442, + "grad_norm": 1.7302571456514284, + "learning_rate": 6.408799910987633e-06, + "loss": 0.6913, + "step": 20504 + }, + { + "epoch": 0.6284479588083854, + "grad_norm": 1.4865528933440866, + "learning_rate": 6.407873510350772e-06, + "loss": 0.6686, + "step": 20505 + }, + { + "epoch": 0.6284786073311266, + "grad_norm": 1.4762638549206275, + "learning_rate": 6.406947145108057e-06, + "loss": 0.6985, + "step": 20506 + }, + { + "epoch": 0.6285092558538679, + "grad_norm": 0.6800989445442792, + "learning_rate": 6.40602081526861e-06, + "loss": 0.5677, + "step": 20507 + }, + { + "epoch": 0.628539904376609, + "grad_norm": 1.4375348446897527, + "learning_rate": 6.405094520841556e-06, + "loss": 0.6642, + "step": 20508 + }, + { + "epoch": 0.6285705528993503, + "grad_norm": 1.4478485769660725, + "learning_rate": 6.404168261836028e-06, + "loss": 0.5997, + "step": 20509 + }, + { + "epoch": 0.6286012014220914, + "grad_norm": 1.7353697287471055, + "learning_rate": 6.403242038261152e-06, + "loss": 0.7733, + "step": 20510 + }, + { + "epoch": 0.6286318499448327, + "grad_norm": 1.650785150004164, + "learning_rate": 6.402315850126049e-06, + "loss": 0.7094, + "step": 20511 + }, + { + "epoch": 0.6286624984675738, + "grad_norm": 1.466104334733927, + "learning_rate": 6.401389697439853e-06, + "loss": 0.6528, + "step": 20512 + }, + { + "epoch": 0.6286931469903151, + "grad_norm": 0.6349584615071462, + "learning_rate": 6.400463580211677e-06, + "loss": 0.561, + "step": 20513 + }, + { + "epoch": 0.6287237955130562, + "grad_norm": 1.581433418460555, + "learning_rate": 6.399537498450662e-06, + "loss": 0.6731, + "step": 20514 + }, + { + "epoch": 0.6287544440357975, + "grad_norm": 1.651744943678964, + "learning_rate": 6.398611452165924e-06, + "loss": 0.7182, + "step": 20515 + }, + { + "epoch": 0.6287850925585386, + "grad_norm": 1.5770821810191826, + "learning_rate": 6.3976854413665855e-06, + "loss": 0.7572, + "step": 20516 + }, + { + "epoch": 0.6288157410812799, + "grad_norm": 1.5105776094597496, + "learning_rate": 6.396759466061777e-06, + "loss": 0.6718, + "step": 20517 + }, + { + "epoch": 0.6288463896040211, + "grad_norm": 1.4993916962267209, + "learning_rate": 6.395833526260617e-06, + "loss": 0.7144, + "step": 20518 + }, + { + "epoch": 0.6288770381267623, + "grad_norm": 1.5224292768237364, + "learning_rate": 6.394907621972233e-06, + "loss": 0.7066, + "step": 20519 + }, + { + "epoch": 0.6289076866495035, + "grad_norm": 1.4676640800201757, + "learning_rate": 6.393981753205747e-06, + "loss": 0.6861, + "step": 20520 + }, + { + "epoch": 0.6289383351722447, + "grad_norm": 1.3617501417934321, + "learning_rate": 6.393055919970279e-06, + "loss": 0.6782, + "step": 20521 + }, + { + "epoch": 0.6289689836949859, + "grad_norm": 1.6772148847504915, + "learning_rate": 6.392130122274955e-06, + "loss": 0.6339, + "step": 20522 + }, + { + "epoch": 0.6289996322177271, + "grad_norm": 1.581539910016727, + "learning_rate": 6.391204360128899e-06, + "loss": 0.6641, + "step": 20523 + }, + { + "epoch": 0.6290302807404683, + "grad_norm": 0.64720135678067, + "learning_rate": 6.390278633541227e-06, + "loss": 0.5489, + "step": 20524 + }, + { + "epoch": 0.6290609292632096, + "grad_norm": 1.5272169451300517, + "learning_rate": 6.389352942521066e-06, + "loss": 0.6542, + "step": 20525 + }, + { + "epoch": 0.6290915777859507, + "grad_norm": 1.8193858989704563, + "learning_rate": 6.388427287077532e-06, + "loss": 0.743, + "step": 20526 + }, + { + "epoch": 0.629122226308692, + "grad_norm": 1.4689640636682446, + "learning_rate": 6.387501667219746e-06, + "loss": 0.619, + "step": 20527 + }, + { + "epoch": 0.6291528748314331, + "grad_norm": 1.5500159348286577, + "learning_rate": 6.386576082956832e-06, + "loss": 0.6705, + "step": 20528 + }, + { + "epoch": 0.6291835233541744, + "grad_norm": 1.4866729365365088, + "learning_rate": 6.385650534297908e-06, + "loss": 0.6778, + "step": 20529 + }, + { + "epoch": 0.6292141718769155, + "grad_norm": 1.8740978604133929, + "learning_rate": 6.3847250212520966e-06, + "loss": 0.708, + "step": 20530 + }, + { + "epoch": 0.6292448203996568, + "grad_norm": 1.5118591100360186, + "learning_rate": 6.383799543828515e-06, + "loss": 0.6134, + "step": 20531 + }, + { + "epoch": 0.6292754689223979, + "grad_norm": 0.6695804026970043, + "learning_rate": 6.3828741020362765e-06, + "loss": 0.5484, + "step": 20532 + }, + { + "epoch": 0.6293061174451391, + "grad_norm": 1.839689707817923, + "learning_rate": 6.38194869588451e-06, + "loss": 0.6891, + "step": 20533 + }, + { + "epoch": 0.6293367659678804, + "grad_norm": 1.4946431133166405, + "learning_rate": 6.381023325382327e-06, + "loss": 0.6634, + "step": 20534 + }, + { + "epoch": 0.6293674144906215, + "grad_norm": 1.59833323349268, + "learning_rate": 6.380097990538845e-06, + "loss": 0.6908, + "step": 20535 + }, + { + "epoch": 0.6293980630133628, + "grad_norm": 1.6687076355122978, + "learning_rate": 6.3791726913631865e-06, + "loss": 0.7163, + "step": 20536 + }, + { + "epoch": 0.6294287115361039, + "grad_norm": 1.5444875866370664, + "learning_rate": 6.378247427864466e-06, + "loss": 0.7528, + "step": 20537 + }, + { + "epoch": 0.6294593600588452, + "grad_norm": 0.6626426748527536, + "learning_rate": 6.377322200051797e-06, + "loss": 0.526, + "step": 20538 + }, + { + "epoch": 0.6294900085815863, + "grad_norm": 1.6327530237932195, + "learning_rate": 6.376397007934303e-06, + "loss": 0.7887, + "step": 20539 + }, + { + "epoch": 0.6295206571043276, + "grad_norm": 1.5486442619184626, + "learning_rate": 6.375471851521094e-06, + "loss": 0.6445, + "step": 20540 + }, + { + "epoch": 0.6295513056270687, + "grad_norm": 0.6539785542851991, + "learning_rate": 6.374546730821289e-06, + "loss": 0.5885, + "step": 20541 + }, + { + "epoch": 0.62958195414981, + "grad_norm": 1.6961584321838605, + "learning_rate": 6.373621645844005e-06, + "loss": 0.7291, + "step": 20542 + }, + { + "epoch": 0.6296126026725511, + "grad_norm": 1.6754514242404688, + "learning_rate": 6.372696596598349e-06, + "loss": 0.7285, + "step": 20543 + }, + { + "epoch": 0.6296432511952924, + "grad_norm": 1.509430346046819, + "learning_rate": 6.371771583093447e-06, + "loss": 0.7775, + "step": 20544 + }, + { + "epoch": 0.6296738997180336, + "grad_norm": 1.6168751456829138, + "learning_rate": 6.370846605338408e-06, + "loss": 0.6858, + "step": 20545 + }, + { + "epoch": 0.6297045482407748, + "grad_norm": 1.5433375168090417, + "learning_rate": 6.369921663342342e-06, + "loss": 0.528, + "step": 20546 + }, + { + "epoch": 0.629735196763516, + "grad_norm": 1.655622159714192, + "learning_rate": 6.368996757114368e-06, + "loss": 0.6582, + "step": 20547 + }, + { + "epoch": 0.6297658452862572, + "grad_norm": 1.6753942519902236, + "learning_rate": 6.368071886663599e-06, + "loss": 0.787, + "step": 20548 + }, + { + "epoch": 0.6297964938089984, + "grad_norm": 1.5648254739850214, + "learning_rate": 6.367147051999145e-06, + "loss": 0.6633, + "step": 20549 + }, + { + "epoch": 0.6298271423317396, + "grad_norm": 1.6366083793814605, + "learning_rate": 6.366222253130123e-06, + "loss": 0.7625, + "step": 20550 + }, + { + "epoch": 0.6298577908544808, + "grad_norm": 0.680414906168465, + "learning_rate": 6.365297490065641e-06, + "loss": 0.5658, + "step": 20551 + }, + { + "epoch": 0.629888439377222, + "grad_norm": 1.5600728043125172, + "learning_rate": 6.364372762814814e-06, + "loss": 0.7296, + "step": 20552 + }, + { + "epoch": 0.6299190878999632, + "grad_norm": 1.687512522817973, + "learning_rate": 6.363448071386756e-06, + "loss": 0.6647, + "step": 20553 + }, + { + "epoch": 0.6299497364227045, + "grad_norm": 1.569491411795535, + "learning_rate": 6.362523415790567e-06, + "loss": 0.7358, + "step": 20554 + }, + { + "epoch": 0.6299803849454456, + "grad_norm": 1.63003444118982, + "learning_rate": 6.361598796035371e-06, + "loss": 0.7065, + "step": 20555 + }, + { + "epoch": 0.6300110334681869, + "grad_norm": 1.6054262537854795, + "learning_rate": 6.3606742121302686e-06, + "loss": 0.6394, + "step": 20556 + }, + { + "epoch": 0.630041681990928, + "grad_norm": 1.4122059176451252, + "learning_rate": 6.359749664084379e-06, + "loss": 0.6606, + "step": 20557 + }, + { + "epoch": 0.6300723305136693, + "grad_norm": 1.6378978569415679, + "learning_rate": 6.358825151906807e-06, + "loss": 0.6498, + "step": 20558 + }, + { + "epoch": 0.6301029790364104, + "grad_norm": 1.5589073044004838, + "learning_rate": 6.357900675606658e-06, + "loss": 0.6926, + "step": 20559 + }, + { + "epoch": 0.6301336275591517, + "grad_norm": 1.314948161613144, + "learning_rate": 6.3569762351930496e-06, + "loss": 0.6522, + "step": 20560 + }, + { + "epoch": 0.6301642760818928, + "grad_norm": 1.8140574535132097, + "learning_rate": 6.356051830675085e-06, + "loss": 0.7542, + "step": 20561 + }, + { + "epoch": 0.6301949246046341, + "grad_norm": 1.4672525649480912, + "learning_rate": 6.355127462061874e-06, + "loss": 0.6915, + "step": 20562 + }, + { + "epoch": 0.6302255731273753, + "grad_norm": 1.4866670606227157, + "learning_rate": 6.354203129362525e-06, + "loss": 0.6475, + "step": 20563 + }, + { + "epoch": 0.6302562216501164, + "grad_norm": 1.4002901289299392, + "learning_rate": 6.353278832586147e-06, + "loss": 0.6774, + "step": 20564 + }, + { + "epoch": 0.6302868701728577, + "grad_norm": 1.3889812944565043, + "learning_rate": 6.352354571741841e-06, + "loss": 0.6772, + "step": 20565 + }, + { + "epoch": 0.6303175186955988, + "grad_norm": 1.6096743605633819, + "learning_rate": 6.351430346838725e-06, + "loss": 0.7411, + "step": 20566 + }, + { + "epoch": 0.6303481672183401, + "grad_norm": 1.5801254274885486, + "learning_rate": 6.350506157885894e-06, + "loss": 0.652, + "step": 20567 + }, + { + "epoch": 0.6303788157410812, + "grad_norm": 1.8147745509396358, + "learning_rate": 6.349582004892462e-06, + "loss": 0.6736, + "step": 20568 + }, + { + "epoch": 0.6304094642638225, + "grad_norm": 1.476463853917971, + "learning_rate": 6.348657887867533e-06, + "loss": 0.5746, + "step": 20569 + }, + { + "epoch": 0.6304401127865636, + "grad_norm": 1.3990913373480178, + "learning_rate": 6.34773380682021e-06, + "loss": 0.6633, + "step": 20570 + }, + { + "epoch": 0.6304707613093049, + "grad_norm": 0.6894839365497161, + "learning_rate": 6.346809761759602e-06, + "loss": 0.5688, + "step": 20571 + }, + { + "epoch": 0.6305014098320461, + "grad_norm": 1.4255651246143561, + "learning_rate": 6.3458857526948115e-06, + "loss": 0.672, + "step": 20572 + }, + { + "epoch": 0.6305320583547873, + "grad_norm": 1.5438740223577503, + "learning_rate": 6.3449617796349424e-06, + "loss": 0.6341, + "step": 20573 + }, + { + "epoch": 0.6305627068775285, + "grad_norm": 1.639032154168159, + "learning_rate": 6.3440378425891025e-06, + "loss": 0.674, + "step": 20574 + }, + { + "epoch": 0.6305933554002697, + "grad_norm": 0.6583731878553466, + "learning_rate": 6.34311394156639e-06, + "loss": 0.5652, + "step": 20575 + }, + { + "epoch": 0.6306240039230109, + "grad_norm": 1.80427175005163, + "learning_rate": 6.342190076575917e-06, + "loss": 0.686, + "step": 20576 + }, + { + "epoch": 0.6306546524457521, + "grad_norm": 1.8890586937661462, + "learning_rate": 6.341266247626778e-06, + "loss": 0.6079, + "step": 20577 + }, + { + "epoch": 0.6306853009684933, + "grad_norm": 1.59468120807964, + "learning_rate": 6.340342454728077e-06, + "loss": 0.6905, + "step": 20578 + }, + { + "epoch": 0.6307159494912346, + "grad_norm": 0.656848974568067, + "learning_rate": 6.33941869788892e-06, + "loss": 0.5285, + "step": 20579 + }, + { + "epoch": 0.6307465980139757, + "grad_norm": 1.505963891845391, + "learning_rate": 6.338494977118408e-06, + "loss": 0.6162, + "step": 20580 + }, + { + "epoch": 0.630777246536717, + "grad_norm": 1.5829999614397665, + "learning_rate": 6.337571292425638e-06, + "loss": 0.7532, + "step": 20581 + }, + { + "epoch": 0.6308078950594581, + "grad_norm": 0.7063008463197087, + "learning_rate": 6.336647643819719e-06, + "loss": 0.5597, + "step": 20582 + }, + { + "epoch": 0.6308385435821994, + "grad_norm": 1.417873950536795, + "learning_rate": 6.335724031309749e-06, + "loss": 0.5726, + "step": 20583 + }, + { + "epoch": 0.6308691921049405, + "grad_norm": 1.6103130923278752, + "learning_rate": 6.334800454904822e-06, + "loss": 0.7372, + "step": 20584 + }, + { + "epoch": 0.6308998406276818, + "grad_norm": 1.8398883772762125, + "learning_rate": 6.33387691461405e-06, + "loss": 0.747, + "step": 20585 + }, + { + "epoch": 0.6309304891504229, + "grad_norm": 1.8750525398346338, + "learning_rate": 6.3329534104465206e-06, + "loss": 0.6606, + "step": 20586 + }, + { + "epoch": 0.6309611376731642, + "grad_norm": 1.354264393762694, + "learning_rate": 6.3320299424113455e-06, + "loss": 0.722, + "step": 20587 + }, + { + "epoch": 0.6309917861959053, + "grad_norm": 1.7930779266881818, + "learning_rate": 6.331106510517615e-06, + "loss": 0.6854, + "step": 20588 + }, + { + "epoch": 0.6310224347186466, + "grad_norm": 1.544120385393103, + "learning_rate": 6.330183114774431e-06, + "loss": 0.6094, + "step": 20589 + }, + { + "epoch": 0.6310530832413878, + "grad_norm": 1.37896301626367, + "learning_rate": 6.329259755190892e-06, + "loss": 0.6467, + "step": 20590 + }, + { + "epoch": 0.631083731764129, + "grad_norm": 1.438872407690848, + "learning_rate": 6.328336431776096e-06, + "loss": 0.7326, + "step": 20591 + }, + { + "epoch": 0.6311143802868702, + "grad_norm": 1.6104684258538602, + "learning_rate": 6.327413144539138e-06, + "loss": 0.6851, + "step": 20592 + }, + { + "epoch": 0.6311450288096114, + "grad_norm": 1.7199673391459926, + "learning_rate": 6.326489893489122e-06, + "loss": 0.597, + "step": 20593 + }, + { + "epoch": 0.6311756773323526, + "grad_norm": 0.6676200433921952, + "learning_rate": 6.325566678635138e-06, + "loss": 0.5578, + "step": 20594 + }, + { + "epoch": 0.6312063258550937, + "grad_norm": 1.5195428016785384, + "learning_rate": 6.324643499986287e-06, + "loss": 0.6426, + "step": 20595 + }, + { + "epoch": 0.631236974377835, + "grad_norm": 1.7622966605958426, + "learning_rate": 6.323720357551666e-06, + "loss": 0.7308, + "step": 20596 + }, + { + "epoch": 0.6312676229005761, + "grad_norm": 1.3517339215584512, + "learning_rate": 6.322797251340364e-06, + "loss": 0.7368, + "step": 20597 + }, + { + "epoch": 0.6312982714233174, + "grad_norm": 1.7276477224536022, + "learning_rate": 6.321874181361487e-06, + "loss": 0.6872, + "step": 20598 + }, + { + "epoch": 0.6313289199460586, + "grad_norm": 1.3571904244368327, + "learning_rate": 6.320951147624123e-06, + "loss": 0.654, + "step": 20599 + }, + { + "epoch": 0.6313595684687998, + "grad_norm": 1.6573119627777455, + "learning_rate": 6.320028150137365e-06, + "loss": 0.6843, + "step": 20600 + }, + { + "epoch": 0.631390216991541, + "grad_norm": 1.829819299816131, + "learning_rate": 6.319105188910315e-06, + "loss": 0.7625, + "step": 20601 + }, + { + "epoch": 0.6314208655142822, + "grad_norm": 1.600757306553174, + "learning_rate": 6.318182263952062e-06, + "loss": 0.6497, + "step": 20602 + }, + { + "epoch": 0.6314515140370234, + "grad_norm": 1.6935512373727544, + "learning_rate": 6.317259375271701e-06, + "loss": 0.8137, + "step": 20603 + }, + { + "epoch": 0.6314821625597646, + "grad_norm": 1.4257956172997721, + "learning_rate": 6.316336522878327e-06, + "loss": 0.5482, + "step": 20604 + }, + { + "epoch": 0.6315128110825058, + "grad_norm": 1.8506971492423199, + "learning_rate": 6.31541370678103e-06, + "loss": 0.6876, + "step": 20605 + }, + { + "epoch": 0.631543459605247, + "grad_norm": 1.4859496054310617, + "learning_rate": 6.314490926988906e-06, + "loss": 0.6795, + "step": 20606 + }, + { + "epoch": 0.6315741081279882, + "grad_norm": 1.3999136517803281, + "learning_rate": 6.3135681835110475e-06, + "loss": 0.6628, + "step": 20607 + }, + { + "epoch": 0.6316047566507295, + "grad_norm": 1.5408811559838098, + "learning_rate": 6.31264547635654e-06, + "loss": 0.727, + "step": 20608 + }, + { + "epoch": 0.6316354051734706, + "grad_norm": 1.6142278852143717, + "learning_rate": 6.311722805534483e-06, + "loss": 0.6647, + "step": 20609 + }, + { + "epoch": 0.6316660536962119, + "grad_norm": 1.5878816674740732, + "learning_rate": 6.310800171053967e-06, + "loss": 0.7022, + "step": 20610 + }, + { + "epoch": 0.631696702218953, + "grad_norm": 1.5986506905679363, + "learning_rate": 6.309877572924077e-06, + "loss": 0.7098, + "step": 20611 + }, + { + "epoch": 0.6317273507416943, + "grad_norm": 1.5244889360279843, + "learning_rate": 6.30895501115391e-06, + "loss": 0.6574, + "step": 20612 + }, + { + "epoch": 0.6317579992644354, + "grad_norm": 1.4643069468414065, + "learning_rate": 6.308032485752551e-06, + "loss": 0.7015, + "step": 20613 + }, + { + "epoch": 0.6317886477871767, + "grad_norm": 1.489370456082716, + "learning_rate": 6.307109996729094e-06, + "loss": 0.6318, + "step": 20614 + }, + { + "epoch": 0.6318192963099178, + "grad_norm": 0.6761378286008671, + "learning_rate": 6.306187544092628e-06, + "loss": 0.5375, + "step": 20615 + }, + { + "epoch": 0.6318499448326591, + "grad_norm": 1.4089497341423485, + "learning_rate": 6.305265127852238e-06, + "loss": 0.6077, + "step": 20616 + }, + { + "epoch": 0.6318805933554003, + "grad_norm": 1.3702003971393146, + "learning_rate": 6.304342748017021e-06, + "loss": 0.6426, + "step": 20617 + }, + { + "epoch": 0.6319112418781415, + "grad_norm": 1.5016269988851423, + "learning_rate": 6.303420404596059e-06, + "loss": 0.6709, + "step": 20618 + }, + { + "epoch": 0.6319418904008827, + "grad_norm": 1.7098885606656962, + "learning_rate": 6.302498097598439e-06, + "loss": 0.7607, + "step": 20619 + }, + { + "epoch": 0.6319725389236239, + "grad_norm": 1.3984090819506911, + "learning_rate": 6.301575827033254e-06, + "loss": 0.6203, + "step": 20620 + }, + { + "epoch": 0.6320031874463651, + "grad_norm": 1.4770052496484731, + "learning_rate": 6.300653592909585e-06, + "loss": 0.6023, + "step": 20621 + }, + { + "epoch": 0.6320338359691063, + "grad_norm": 1.4695504528005465, + "learning_rate": 6.299731395236526e-06, + "loss": 0.6378, + "step": 20622 + }, + { + "epoch": 0.6320644844918475, + "grad_norm": 1.5917320714455077, + "learning_rate": 6.2988092340231596e-06, + "loss": 0.7274, + "step": 20623 + }, + { + "epoch": 0.6320951330145888, + "grad_norm": 1.522117818402559, + "learning_rate": 6.297887109278572e-06, + "loss": 0.63, + "step": 20624 + }, + { + "epoch": 0.6321257815373299, + "grad_norm": 1.7303868151235537, + "learning_rate": 6.296965021011852e-06, + "loss": 0.6987, + "step": 20625 + }, + { + "epoch": 0.632156430060071, + "grad_norm": 1.7941658210524163, + "learning_rate": 6.296042969232081e-06, + "loss": 0.7031, + "step": 20626 + }, + { + "epoch": 0.6321870785828123, + "grad_norm": 0.6464051307214423, + "learning_rate": 6.295120953948346e-06, + "loss": 0.5299, + "step": 20627 + }, + { + "epoch": 0.6322177271055535, + "grad_norm": 1.4711966920621597, + "learning_rate": 6.294198975169736e-06, + "loss": 0.7198, + "step": 20628 + }, + { + "epoch": 0.6322483756282947, + "grad_norm": 1.5383511805229406, + "learning_rate": 6.293277032905325e-06, + "loss": 0.711, + "step": 20629 + }, + { + "epoch": 0.6322790241510359, + "grad_norm": 1.6302610789585492, + "learning_rate": 6.2923551271642105e-06, + "loss": 0.656, + "step": 20630 + }, + { + "epoch": 0.6323096726737771, + "grad_norm": 1.5864382406968482, + "learning_rate": 6.291433257955467e-06, + "loss": 0.7405, + "step": 20631 + }, + { + "epoch": 0.6323403211965183, + "grad_norm": 1.468902640405448, + "learning_rate": 6.29051142528818e-06, + "loss": 0.6254, + "step": 20632 + }, + { + "epoch": 0.6323709697192595, + "grad_norm": 1.7102272364354247, + "learning_rate": 6.289589629171433e-06, + "loss": 0.7906, + "step": 20633 + }, + { + "epoch": 0.6324016182420007, + "grad_norm": 0.6946737441208144, + "learning_rate": 6.288667869614309e-06, + "loss": 0.5628, + "step": 20634 + }, + { + "epoch": 0.632432266764742, + "grad_norm": 1.57265889588438, + "learning_rate": 6.287746146625889e-06, + "loss": 0.6722, + "step": 20635 + }, + { + "epoch": 0.6324629152874831, + "grad_norm": 1.7078936849693538, + "learning_rate": 6.286824460215257e-06, + "loss": 0.7083, + "step": 20636 + }, + { + "epoch": 0.6324935638102244, + "grad_norm": 0.6813308500907805, + "learning_rate": 6.285902810391498e-06, + "loss": 0.5529, + "step": 20637 + }, + { + "epoch": 0.6325242123329655, + "grad_norm": 1.3520470838914074, + "learning_rate": 6.28498119716368e-06, + "loss": 0.7288, + "step": 20638 + }, + { + "epoch": 0.6325548608557068, + "grad_norm": 1.2981311617196685, + "learning_rate": 6.284059620540901e-06, + "loss": 0.7006, + "step": 20639 + }, + { + "epoch": 0.6325855093784479, + "grad_norm": 1.6498788224402294, + "learning_rate": 6.283138080532225e-06, + "loss": 0.7063, + "step": 20640 + }, + { + "epoch": 0.6326161579011892, + "grad_norm": 1.5218259374275185, + "learning_rate": 6.282216577146749e-06, + "loss": 0.6512, + "step": 20641 + }, + { + "epoch": 0.6326468064239303, + "grad_norm": 1.3781365689952922, + "learning_rate": 6.2812951103935406e-06, + "loss": 0.6788, + "step": 20642 + }, + { + "epoch": 0.6326774549466716, + "grad_norm": 1.472575492803477, + "learning_rate": 6.280373680281682e-06, + "loss": 0.7136, + "step": 20643 + }, + { + "epoch": 0.6327081034694128, + "grad_norm": 1.460252450079271, + "learning_rate": 6.279452286820254e-06, + "loss": 0.6251, + "step": 20644 + }, + { + "epoch": 0.632738751992154, + "grad_norm": 1.4763725677412054, + "learning_rate": 6.278530930018336e-06, + "loss": 0.5935, + "step": 20645 + }, + { + "epoch": 0.6327694005148952, + "grad_norm": 1.7416558829979532, + "learning_rate": 6.2776096098850015e-06, + "loss": 0.7511, + "step": 20646 + }, + { + "epoch": 0.6328000490376364, + "grad_norm": 1.4995479993925511, + "learning_rate": 6.2766883264293345e-06, + "loss": 0.6573, + "step": 20647 + }, + { + "epoch": 0.6328306975603776, + "grad_norm": 1.5499378918674336, + "learning_rate": 6.2757670796604085e-06, + "loss": 0.6734, + "step": 20648 + }, + { + "epoch": 0.6328613460831188, + "grad_norm": 1.7359527563468173, + "learning_rate": 6.274845869587304e-06, + "loss": 0.6579, + "step": 20649 + }, + { + "epoch": 0.63289199460586, + "grad_norm": 1.5459333852062687, + "learning_rate": 6.273924696219098e-06, + "loss": 0.643, + "step": 20650 + }, + { + "epoch": 0.6329226431286012, + "grad_norm": 1.4824910811902547, + "learning_rate": 6.27300355956486e-06, + "loss": 0.6585, + "step": 20651 + }, + { + "epoch": 0.6329532916513424, + "grad_norm": 0.6508024286311961, + "learning_rate": 6.272082459633677e-06, + "loss": 0.5419, + "step": 20652 + }, + { + "epoch": 0.6329839401740837, + "grad_norm": 1.5863370402356434, + "learning_rate": 6.271161396434617e-06, + "loss": 0.6791, + "step": 20653 + }, + { + "epoch": 0.6330145886968248, + "grad_norm": 1.365177477808655, + "learning_rate": 6.270240369976757e-06, + "loss": 0.6173, + "step": 20654 + }, + { + "epoch": 0.6330452372195661, + "grad_norm": 1.7478322390787309, + "learning_rate": 6.269319380269174e-06, + "loss": 0.5916, + "step": 20655 + }, + { + "epoch": 0.6330758857423072, + "grad_norm": 1.2787549223616723, + "learning_rate": 6.268398427320941e-06, + "loss": 0.6864, + "step": 20656 + }, + { + "epoch": 0.6331065342650484, + "grad_norm": 1.6150809362713445, + "learning_rate": 6.2674775111411335e-06, + "loss": 0.7071, + "step": 20657 + }, + { + "epoch": 0.6331371827877896, + "grad_norm": 1.5399721439435585, + "learning_rate": 6.266556631738825e-06, + "loss": 0.6598, + "step": 20658 + }, + { + "epoch": 0.6331678313105308, + "grad_norm": 0.6555021617611447, + "learning_rate": 6.265635789123088e-06, + "loss": 0.5263, + "step": 20659 + }, + { + "epoch": 0.633198479833272, + "grad_norm": 1.4161885764651592, + "learning_rate": 6.264714983303e-06, + "loss": 0.6067, + "step": 20660 + }, + { + "epoch": 0.6332291283560132, + "grad_norm": 1.7648381780892495, + "learning_rate": 6.263794214287631e-06, + "loss": 0.7033, + "step": 20661 + }, + { + "epoch": 0.6332597768787545, + "grad_norm": 1.5270208596501058, + "learning_rate": 6.262873482086048e-06, + "loss": 0.668, + "step": 20662 + }, + { + "epoch": 0.6332904254014956, + "grad_norm": 1.5049124234492999, + "learning_rate": 6.261952786707336e-06, + "loss": 0.6318, + "step": 20663 + }, + { + "epoch": 0.6333210739242369, + "grad_norm": 1.431164121835766, + "learning_rate": 6.261032128160557e-06, + "loss": 0.6267, + "step": 20664 + }, + { + "epoch": 0.633351722446978, + "grad_norm": 0.6705067583028356, + "learning_rate": 6.260111506454783e-06, + "loss": 0.5365, + "step": 20665 + }, + { + "epoch": 0.6333823709697193, + "grad_norm": 1.669852187316319, + "learning_rate": 6.259190921599088e-06, + "loss": 0.6845, + "step": 20666 + }, + { + "epoch": 0.6334130194924604, + "grad_norm": 1.4656416074063863, + "learning_rate": 6.258270373602542e-06, + "loss": 0.7152, + "step": 20667 + }, + { + "epoch": 0.6334436680152017, + "grad_norm": 0.6815365120883047, + "learning_rate": 6.257349862474216e-06, + "loss": 0.5222, + "step": 20668 + }, + { + "epoch": 0.6334743165379428, + "grad_norm": 0.6637349990175855, + "learning_rate": 6.25642938822318e-06, + "loss": 0.5636, + "step": 20669 + }, + { + "epoch": 0.6335049650606841, + "grad_norm": 1.507754236373089, + "learning_rate": 6.255508950858501e-06, + "loss": 0.7031, + "step": 20670 + }, + { + "epoch": 0.6335356135834253, + "grad_norm": 1.7286880305535506, + "learning_rate": 6.254588550389254e-06, + "loss": 0.7323, + "step": 20671 + }, + { + "epoch": 0.6335662621061665, + "grad_norm": 1.5249804515741279, + "learning_rate": 6.253668186824503e-06, + "loss": 0.6844, + "step": 20672 + }, + { + "epoch": 0.6335969106289077, + "grad_norm": 1.6232500322203103, + "learning_rate": 6.252747860173316e-06, + "loss": 0.7767, + "step": 20673 + }, + { + "epoch": 0.6336275591516489, + "grad_norm": 1.3520360834362937, + "learning_rate": 6.251827570444764e-06, + "loss": 0.6133, + "step": 20674 + }, + { + "epoch": 0.6336582076743901, + "grad_norm": 1.7267484743138597, + "learning_rate": 6.250907317647913e-06, + "loss": 0.6491, + "step": 20675 + }, + { + "epoch": 0.6336888561971313, + "grad_norm": 1.5325495611337636, + "learning_rate": 6.249987101791833e-06, + "loss": 0.7052, + "step": 20676 + }, + { + "epoch": 0.6337195047198725, + "grad_norm": 1.4776996248823988, + "learning_rate": 6.249066922885589e-06, + "loss": 0.6109, + "step": 20677 + }, + { + "epoch": 0.6337501532426137, + "grad_norm": 1.6108175024460605, + "learning_rate": 6.248146780938247e-06, + "loss": 0.6612, + "step": 20678 + }, + { + "epoch": 0.6337808017653549, + "grad_norm": 1.4190003971287055, + "learning_rate": 6.247226675958877e-06, + "loss": 0.6803, + "step": 20679 + }, + { + "epoch": 0.6338114502880962, + "grad_norm": 1.5767957737151617, + "learning_rate": 6.246306607956545e-06, + "loss": 0.6566, + "step": 20680 + }, + { + "epoch": 0.6338420988108373, + "grad_norm": 1.817349200149597, + "learning_rate": 6.245386576940307e-06, + "loss": 0.7287, + "step": 20681 + }, + { + "epoch": 0.6338727473335786, + "grad_norm": 1.572178459201932, + "learning_rate": 6.244466582919243e-06, + "loss": 0.7234, + "step": 20682 + }, + { + "epoch": 0.6339033958563197, + "grad_norm": 1.467331095012271, + "learning_rate": 6.243546625902404e-06, + "loss": 0.6942, + "step": 20683 + }, + { + "epoch": 0.633934044379061, + "grad_norm": 1.4925707150154388, + "learning_rate": 6.242626705898868e-06, + "loss": 0.7437, + "step": 20684 + }, + { + "epoch": 0.6339646929018021, + "grad_norm": 1.4876275657935256, + "learning_rate": 6.24170682291769e-06, + "loss": 0.5619, + "step": 20685 + }, + { + "epoch": 0.6339953414245434, + "grad_norm": 1.4171763997389848, + "learning_rate": 6.240786976967934e-06, + "loss": 0.6293, + "step": 20686 + }, + { + "epoch": 0.6340259899472845, + "grad_norm": 0.6691289687257127, + "learning_rate": 6.239867168058668e-06, + "loss": 0.5644, + "step": 20687 + }, + { + "epoch": 0.6340566384700257, + "grad_norm": 1.4518939473644197, + "learning_rate": 6.238947396198953e-06, + "loss": 0.6317, + "step": 20688 + }, + { + "epoch": 0.634087286992767, + "grad_norm": 1.446229850934083, + "learning_rate": 6.238027661397849e-06, + "loss": 0.6685, + "step": 20689 + }, + { + "epoch": 0.6341179355155081, + "grad_norm": 1.9174803513524805, + "learning_rate": 6.237107963664424e-06, + "loss": 0.6952, + "step": 20690 + }, + { + "epoch": 0.6341485840382494, + "grad_norm": 1.5418913699138992, + "learning_rate": 6.236188303007738e-06, + "loss": 0.6731, + "step": 20691 + }, + { + "epoch": 0.6341792325609905, + "grad_norm": 1.668651523360916, + "learning_rate": 6.235268679436845e-06, + "loss": 0.7294, + "step": 20692 + }, + { + "epoch": 0.6342098810837318, + "grad_norm": 1.7080899745601972, + "learning_rate": 6.234349092960821e-06, + "loss": 0.6166, + "step": 20693 + }, + { + "epoch": 0.6342405296064729, + "grad_norm": 1.5597918769912211, + "learning_rate": 6.233429543588711e-06, + "loss": 0.6693, + "step": 20694 + }, + { + "epoch": 0.6342711781292142, + "grad_norm": 1.56524617691123, + "learning_rate": 6.23251003132959e-06, + "loss": 0.6871, + "step": 20695 + }, + { + "epoch": 0.6343018266519553, + "grad_norm": 1.5586840310446162, + "learning_rate": 6.231590556192511e-06, + "loss": 0.6765, + "step": 20696 + }, + { + "epoch": 0.6343324751746966, + "grad_norm": 1.6216408736298382, + "learning_rate": 6.230671118186531e-06, + "loss": 0.6745, + "step": 20697 + }, + { + "epoch": 0.6343631236974377, + "grad_norm": 1.449866101004825, + "learning_rate": 6.229751717320716e-06, + "loss": 0.6574, + "step": 20698 + }, + { + "epoch": 0.634393772220179, + "grad_norm": 1.5918769223855742, + "learning_rate": 6.22883235360412e-06, + "loss": 0.784, + "step": 20699 + }, + { + "epoch": 0.6344244207429202, + "grad_norm": 1.4219759197653787, + "learning_rate": 6.227913027045804e-06, + "loss": 0.6406, + "step": 20700 + }, + { + "epoch": 0.6344550692656614, + "grad_norm": 0.6517759892565592, + "learning_rate": 6.226993737654827e-06, + "loss": 0.5536, + "step": 20701 + }, + { + "epoch": 0.6344857177884026, + "grad_norm": 1.5140545508168568, + "learning_rate": 6.226074485440243e-06, + "loss": 0.6191, + "step": 20702 + }, + { + "epoch": 0.6345163663111438, + "grad_norm": 1.7474012397387992, + "learning_rate": 6.225155270411117e-06, + "loss": 0.7173, + "step": 20703 + }, + { + "epoch": 0.634547014833885, + "grad_norm": 0.7051218584821288, + "learning_rate": 6.224236092576502e-06, + "loss": 0.5852, + "step": 20704 + }, + { + "epoch": 0.6345776633566262, + "grad_norm": 1.5175224324871934, + "learning_rate": 6.223316951945451e-06, + "loss": 0.6935, + "step": 20705 + }, + { + "epoch": 0.6346083118793674, + "grad_norm": 0.6629045280908807, + "learning_rate": 6.222397848527029e-06, + "loss": 0.5629, + "step": 20706 + }, + { + "epoch": 0.6346389604021087, + "grad_norm": 1.786005042372572, + "learning_rate": 6.221478782330284e-06, + "loss": 0.6585, + "step": 20707 + }, + { + "epoch": 0.6346696089248498, + "grad_norm": 1.6567154029588034, + "learning_rate": 6.220559753364274e-06, + "loss": 0.6609, + "step": 20708 + }, + { + "epoch": 0.6347002574475911, + "grad_norm": 1.6237987925564272, + "learning_rate": 6.219640761638059e-06, + "loss": 0.7505, + "step": 20709 + }, + { + "epoch": 0.6347309059703322, + "grad_norm": 1.5721343538451058, + "learning_rate": 6.218721807160689e-06, + "loss": 0.699, + "step": 20710 + }, + { + "epoch": 0.6347615544930735, + "grad_norm": 1.6139004570605047, + "learning_rate": 6.217802889941223e-06, + "loss": 0.5917, + "step": 20711 + }, + { + "epoch": 0.6347922030158146, + "grad_norm": 1.5931971966919685, + "learning_rate": 6.216884009988711e-06, + "loss": 0.649, + "step": 20712 + }, + { + "epoch": 0.6348228515385559, + "grad_norm": 0.6894710257390768, + "learning_rate": 6.215965167312208e-06, + "loss": 0.5972, + "step": 20713 + }, + { + "epoch": 0.634853500061297, + "grad_norm": 1.504251031342349, + "learning_rate": 6.2150463619207694e-06, + "loss": 0.7132, + "step": 20714 + }, + { + "epoch": 0.6348841485840383, + "grad_norm": 1.590693243676309, + "learning_rate": 6.21412759382345e-06, + "loss": 0.6289, + "step": 20715 + }, + { + "epoch": 0.6349147971067794, + "grad_norm": 1.458298342497124, + "learning_rate": 6.213208863029296e-06, + "loss": 0.6005, + "step": 20716 + }, + { + "epoch": 0.6349454456295207, + "grad_norm": 1.7171374939236073, + "learning_rate": 6.212290169547366e-06, + "loss": 0.7864, + "step": 20717 + }, + { + "epoch": 0.6349760941522619, + "grad_norm": 1.2949032670326026, + "learning_rate": 6.21137151338671e-06, + "loss": 0.6823, + "step": 20718 + }, + { + "epoch": 0.635006742675003, + "grad_norm": 1.6311484362781712, + "learning_rate": 6.210452894556378e-06, + "loss": 0.6762, + "step": 20719 + }, + { + "epoch": 0.6350373911977443, + "grad_norm": 1.7510217123942184, + "learning_rate": 6.209534313065426e-06, + "loss": 0.6841, + "step": 20720 + }, + { + "epoch": 0.6350680397204854, + "grad_norm": 1.6105665294229945, + "learning_rate": 6.208615768922899e-06, + "loss": 0.6373, + "step": 20721 + }, + { + "epoch": 0.6350986882432267, + "grad_norm": 1.4778950471342276, + "learning_rate": 6.207697262137853e-06, + "loss": 0.7289, + "step": 20722 + }, + { + "epoch": 0.6351293367659678, + "grad_norm": 2.2386038950639717, + "learning_rate": 6.206778792719339e-06, + "loss": 0.6834, + "step": 20723 + }, + { + "epoch": 0.6351599852887091, + "grad_norm": 1.4769389972141256, + "learning_rate": 6.205860360676397e-06, + "loss": 0.6025, + "step": 20724 + }, + { + "epoch": 0.6351906338114502, + "grad_norm": 1.4991264887496856, + "learning_rate": 6.2049419660180906e-06, + "loss": 0.6903, + "step": 20725 + }, + { + "epoch": 0.6352212823341915, + "grad_norm": 1.3277372829369554, + "learning_rate": 6.20402360875346e-06, + "loss": 0.5972, + "step": 20726 + }, + { + "epoch": 0.6352519308569327, + "grad_norm": 1.538826007687714, + "learning_rate": 6.2031052888915535e-06, + "loss": 0.6506, + "step": 20727 + }, + { + "epoch": 0.6352825793796739, + "grad_norm": 1.5814275686817698, + "learning_rate": 6.202187006441425e-06, + "loss": 0.7143, + "step": 20728 + }, + { + "epoch": 0.6353132279024151, + "grad_norm": 1.4528229830513637, + "learning_rate": 6.201268761412116e-06, + "loss": 0.6905, + "step": 20729 + }, + { + "epoch": 0.6353438764251563, + "grad_norm": 1.5785535719699275, + "learning_rate": 6.20035055381268e-06, + "loss": 0.7465, + "step": 20730 + }, + { + "epoch": 0.6353745249478975, + "grad_norm": 1.5919723780480477, + "learning_rate": 6.199432383652164e-06, + "loss": 0.7059, + "step": 20731 + }, + { + "epoch": 0.6354051734706387, + "grad_norm": 1.3524548169054866, + "learning_rate": 6.19851425093961e-06, + "loss": 0.5728, + "step": 20732 + }, + { + "epoch": 0.6354358219933799, + "grad_norm": 1.6280993162025377, + "learning_rate": 6.197596155684069e-06, + "loss": 0.7184, + "step": 20733 + }, + { + "epoch": 0.6354664705161212, + "grad_norm": 1.4943330251145548, + "learning_rate": 6.1966780978945896e-06, + "loss": 0.5817, + "step": 20734 + }, + { + "epoch": 0.6354971190388623, + "grad_norm": 1.6607636179530862, + "learning_rate": 6.1957600775802065e-06, + "loss": 0.6421, + "step": 20735 + }, + { + "epoch": 0.6355277675616036, + "grad_norm": 1.4735314301510003, + "learning_rate": 6.19484209474998e-06, + "loss": 0.6792, + "step": 20736 + }, + { + "epoch": 0.6355584160843447, + "grad_norm": 1.6569030340425364, + "learning_rate": 6.193924149412941e-06, + "loss": 0.7518, + "step": 20737 + }, + { + "epoch": 0.635589064607086, + "grad_norm": 1.6961237504191535, + "learning_rate": 6.193006241578148e-06, + "loss": 0.7198, + "step": 20738 + }, + { + "epoch": 0.6356197131298271, + "grad_norm": 1.4731532627125277, + "learning_rate": 6.1920883712546366e-06, + "loss": 0.6154, + "step": 20739 + }, + { + "epoch": 0.6356503616525684, + "grad_norm": 1.584119446936356, + "learning_rate": 6.19117053845145e-06, + "loss": 0.718, + "step": 20740 + }, + { + "epoch": 0.6356810101753095, + "grad_norm": 1.4575160491022938, + "learning_rate": 6.190252743177636e-06, + "loss": 0.6616, + "step": 20741 + }, + { + "epoch": 0.6357116586980508, + "grad_norm": 0.6746937018566515, + "learning_rate": 6.189334985442237e-06, + "loss": 0.5442, + "step": 20742 + }, + { + "epoch": 0.635742307220792, + "grad_norm": 1.393645416598255, + "learning_rate": 6.188417265254294e-06, + "loss": 0.6531, + "step": 20743 + }, + { + "epoch": 0.6357729557435332, + "grad_norm": 1.5877444121289548, + "learning_rate": 6.187499582622854e-06, + "loss": 0.6723, + "step": 20744 + }, + { + "epoch": 0.6358036042662744, + "grad_norm": 1.4881420495208417, + "learning_rate": 6.186581937556956e-06, + "loss": 0.6504, + "step": 20745 + }, + { + "epoch": 0.6358342527890156, + "grad_norm": 1.5072372803150096, + "learning_rate": 6.185664330065637e-06, + "loss": 0.6792, + "step": 20746 + }, + { + "epoch": 0.6358649013117568, + "grad_norm": 0.6693581142821361, + "learning_rate": 6.184746760157948e-06, + "loss": 0.5377, + "step": 20747 + }, + { + "epoch": 0.635895549834498, + "grad_norm": 1.4143882542900097, + "learning_rate": 6.183829227842922e-06, + "loss": 0.6084, + "step": 20748 + }, + { + "epoch": 0.6359261983572392, + "grad_norm": 1.735253457952785, + "learning_rate": 6.182911733129606e-06, + "loss": 0.6944, + "step": 20749 + }, + { + "epoch": 0.6359568468799803, + "grad_norm": 1.399060395253127, + "learning_rate": 6.181994276027037e-06, + "loss": 0.7411, + "step": 20750 + }, + { + "epoch": 0.6359874954027216, + "grad_norm": 1.6429671185679595, + "learning_rate": 6.1810768565442524e-06, + "loss": 0.6558, + "step": 20751 + }, + { + "epoch": 0.6360181439254627, + "grad_norm": 1.512504970144562, + "learning_rate": 6.180159474690297e-06, + "loss": 0.6389, + "step": 20752 + }, + { + "epoch": 0.636048792448204, + "grad_norm": 1.7297419203642068, + "learning_rate": 6.179242130474208e-06, + "loss": 0.7064, + "step": 20753 + }, + { + "epoch": 0.6360794409709452, + "grad_norm": 1.4627211292483704, + "learning_rate": 6.178324823905022e-06, + "loss": 0.6954, + "step": 20754 + }, + { + "epoch": 0.6361100894936864, + "grad_norm": 1.6161356464503387, + "learning_rate": 6.177407554991781e-06, + "loss": 0.7172, + "step": 20755 + }, + { + "epoch": 0.6361407380164276, + "grad_norm": 1.404315984971427, + "learning_rate": 6.176490323743518e-06, + "loss": 0.6651, + "step": 20756 + }, + { + "epoch": 0.6361713865391688, + "grad_norm": 1.4986412720354456, + "learning_rate": 6.175573130169279e-06, + "loss": 0.7144, + "step": 20757 + }, + { + "epoch": 0.63620203506191, + "grad_norm": 1.7005445883233277, + "learning_rate": 6.174655974278096e-06, + "loss": 0.6726, + "step": 20758 + }, + { + "epoch": 0.6362326835846512, + "grad_norm": 0.7001761574402816, + "learning_rate": 6.173738856079001e-06, + "loss": 0.5548, + "step": 20759 + }, + { + "epoch": 0.6362633321073924, + "grad_norm": 1.4756229319308003, + "learning_rate": 6.172821775581044e-06, + "loss": 0.6515, + "step": 20760 + }, + { + "epoch": 0.6362939806301336, + "grad_norm": 1.4895892775582584, + "learning_rate": 6.171904732793249e-06, + "loss": 0.6485, + "step": 20761 + }, + { + "epoch": 0.6363246291528748, + "grad_norm": 1.3319422451224914, + "learning_rate": 6.170987727724655e-06, + "loss": 0.5792, + "step": 20762 + }, + { + "epoch": 0.6363552776756161, + "grad_norm": 1.4128411952169624, + "learning_rate": 6.1700707603843e-06, + "loss": 0.66, + "step": 20763 + }, + { + "epoch": 0.6363859261983572, + "grad_norm": 0.6553706182533355, + "learning_rate": 6.169153830781218e-06, + "loss": 0.5102, + "step": 20764 + }, + { + "epoch": 0.6364165747210985, + "grad_norm": 1.4433401280248617, + "learning_rate": 6.168236938924442e-06, + "loss": 0.6404, + "step": 20765 + }, + { + "epoch": 0.6364472232438396, + "grad_norm": 1.7133637576583824, + "learning_rate": 6.16732008482301e-06, + "loss": 0.683, + "step": 20766 + }, + { + "epoch": 0.6364778717665809, + "grad_norm": 1.4826858618992413, + "learning_rate": 6.166403268485951e-06, + "loss": 0.6709, + "step": 20767 + }, + { + "epoch": 0.636508520289322, + "grad_norm": 1.797873580842827, + "learning_rate": 6.1654864899223055e-06, + "loss": 0.7647, + "step": 20768 + }, + { + "epoch": 0.6365391688120633, + "grad_norm": 1.531033254857654, + "learning_rate": 6.164569749141102e-06, + "loss": 0.659, + "step": 20769 + }, + { + "epoch": 0.6365698173348044, + "grad_norm": 1.6838437479277488, + "learning_rate": 6.16365304615137e-06, + "loss": 0.6641, + "step": 20770 + }, + { + "epoch": 0.6366004658575457, + "grad_norm": 1.4865810276733542, + "learning_rate": 6.1627363809621495e-06, + "loss": 0.6538, + "step": 20771 + }, + { + "epoch": 0.6366311143802869, + "grad_norm": 1.4594241567704211, + "learning_rate": 6.16181975358247e-06, + "loss": 0.6272, + "step": 20772 + }, + { + "epoch": 0.6366617629030281, + "grad_norm": 1.4440456861263433, + "learning_rate": 6.160903164021359e-06, + "loss": 0.6191, + "step": 20773 + }, + { + "epoch": 0.6366924114257693, + "grad_norm": 1.3801871869997993, + "learning_rate": 6.159986612287854e-06, + "loss": 0.6199, + "step": 20774 + }, + { + "epoch": 0.6367230599485105, + "grad_norm": 1.614277445310602, + "learning_rate": 6.159070098390981e-06, + "loss": 0.7089, + "step": 20775 + }, + { + "epoch": 0.6367537084712517, + "grad_norm": 1.4694811834504151, + "learning_rate": 6.158153622339776e-06, + "loss": 0.6339, + "step": 20776 + }, + { + "epoch": 0.6367843569939929, + "grad_norm": 0.6681576656099246, + "learning_rate": 6.1572371841432675e-06, + "loss": 0.5232, + "step": 20777 + }, + { + "epoch": 0.6368150055167341, + "grad_norm": 1.7908122794493688, + "learning_rate": 6.156320783810479e-06, + "loss": 0.7121, + "step": 20778 + }, + { + "epoch": 0.6368456540394754, + "grad_norm": 1.7781879418148978, + "learning_rate": 6.155404421350451e-06, + "loss": 0.7035, + "step": 20779 + }, + { + "epoch": 0.6368763025622165, + "grad_norm": 1.6773707111260288, + "learning_rate": 6.1544880967722045e-06, + "loss": 0.7055, + "step": 20780 + }, + { + "epoch": 0.6369069510849577, + "grad_norm": 1.5726462041900702, + "learning_rate": 6.153571810084768e-06, + "loss": 0.6809, + "step": 20781 + }, + { + "epoch": 0.6369375996076989, + "grad_norm": 1.74460352432644, + "learning_rate": 6.152655561297176e-06, + "loss": 0.7269, + "step": 20782 + }, + { + "epoch": 0.6369682481304401, + "grad_norm": 1.587444502762165, + "learning_rate": 6.151739350418451e-06, + "loss": 0.6362, + "step": 20783 + }, + { + "epoch": 0.6369988966531813, + "grad_norm": 1.7176583079026129, + "learning_rate": 6.150823177457623e-06, + "loss": 0.6769, + "step": 20784 + }, + { + "epoch": 0.6370295451759225, + "grad_norm": 1.6609780420827265, + "learning_rate": 6.1499070424237216e-06, + "loss": 0.5564, + "step": 20785 + }, + { + "epoch": 0.6370601936986637, + "grad_norm": 0.6600077535592971, + "learning_rate": 6.148990945325768e-06, + "loss": 0.5675, + "step": 20786 + }, + { + "epoch": 0.6370908422214049, + "grad_norm": 1.5666677300624725, + "learning_rate": 6.148074886172793e-06, + "loss": 0.6932, + "step": 20787 + }, + { + "epoch": 0.6371214907441461, + "grad_norm": 1.533648159313058, + "learning_rate": 6.147158864973825e-06, + "loss": 0.686, + "step": 20788 + }, + { + "epoch": 0.6371521392668873, + "grad_norm": 1.56924921577438, + "learning_rate": 6.146242881737881e-06, + "loss": 0.6678, + "step": 20789 + }, + { + "epoch": 0.6371827877896286, + "grad_norm": 1.6231165941361787, + "learning_rate": 6.145326936473997e-06, + "loss": 0.5301, + "step": 20790 + }, + { + "epoch": 0.6372134363123697, + "grad_norm": 1.6378650504742678, + "learning_rate": 6.144411029191191e-06, + "loss": 0.6735, + "step": 20791 + }, + { + "epoch": 0.637244084835111, + "grad_norm": 1.6322268094427475, + "learning_rate": 6.143495159898487e-06, + "loss": 0.7605, + "step": 20792 + }, + { + "epoch": 0.6372747333578521, + "grad_norm": 1.6416470426934209, + "learning_rate": 6.142579328604915e-06, + "loss": 0.7253, + "step": 20793 + }, + { + "epoch": 0.6373053818805934, + "grad_norm": 0.6558449883564541, + "learning_rate": 6.141663535319493e-06, + "loss": 0.5447, + "step": 20794 + }, + { + "epoch": 0.6373360304033345, + "grad_norm": 1.453263589336194, + "learning_rate": 6.14074778005125e-06, + "loss": 0.6394, + "step": 20795 + }, + { + "epoch": 0.6373666789260758, + "grad_norm": 0.6640760640404498, + "learning_rate": 6.139832062809207e-06, + "loss": 0.5578, + "step": 20796 + }, + { + "epoch": 0.6373973274488169, + "grad_norm": 0.6709924755710326, + "learning_rate": 6.138916383602383e-06, + "loss": 0.5327, + "step": 20797 + }, + { + "epoch": 0.6374279759715582, + "grad_norm": 1.3719511851650614, + "learning_rate": 6.138000742439807e-06, + "loss": 0.5773, + "step": 20798 + }, + { + "epoch": 0.6374586244942994, + "grad_norm": 1.5026869814115476, + "learning_rate": 6.137085139330498e-06, + "loss": 0.5776, + "step": 20799 + }, + { + "epoch": 0.6374892730170406, + "grad_norm": 1.5426673571736313, + "learning_rate": 6.1361695742834746e-06, + "loss": 0.8003, + "step": 20800 + }, + { + "epoch": 0.6375199215397818, + "grad_norm": 0.6627520984263412, + "learning_rate": 6.1352540473077646e-06, + "loss": 0.5258, + "step": 20801 + }, + { + "epoch": 0.637550570062523, + "grad_norm": 1.7931703065124307, + "learning_rate": 6.134338558412381e-06, + "loss": 0.668, + "step": 20802 + }, + { + "epoch": 0.6375812185852642, + "grad_norm": 1.535517111003723, + "learning_rate": 6.133423107606353e-06, + "loss": 0.7341, + "step": 20803 + }, + { + "epoch": 0.6376118671080054, + "grad_norm": 1.7788222422255349, + "learning_rate": 6.132507694898695e-06, + "loss": 0.6054, + "step": 20804 + }, + { + "epoch": 0.6376425156307466, + "grad_norm": 1.5668006949099633, + "learning_rate": 6.131592320298427e-06, + "loss": 0.6453, + "step": 20805 + }, + { + "epoch": 0.6376731641534878, + "grad_norm": 1.711917661330371, + "learning_rate": 6.13067698381457e-06, + "loss": 0.6533, + "step": 20806 + }, + { + "epoch": 0.637703812676229, + "grad_norm": 1.4582477556089086, + "learning_rate": 6.129761685456143e-06, + "loss": 0.5738, + "step": 20807 + }, + { + "epoch": 0.6377344611989703, + "grad_norm": 1.7587663626896186, + "learning_rate": 6.128846425232163e-06, + "loss": 0.6783, + "step": 20808 + }, + { + "epoch": 0.6377651097217114, + "grad_norm": 1.7052504247410725, + "learning_rate": 6.127931203151651e-06, + "loss": 0.6783, + "step": 20809 + }, + { + "epoch": 0.6377957582444527, + "grad_norm": 1.5598842742158732, + "learning_rate": 6.127016019223624e-06, + "loss": 0.6537, + "step": 20810 + }, + { + "epoch": 0.6378264067671938, + "grad_norm": 1.4729347911840678, + "learning_rate": 6.1261008734570986e-06, + "loss": 0.6949, + "step": 20811 + }, + { + "epoch": 0.637857055289935, + "grad_norm": 1.6437729354172887, + "learning_rate": 6.125185765861095e-06, + "loss": 0.7007, + "step": 20812 + }, + { + "epoch": 0.6378877038126762, + "grad_norm": 1.4753987415360887, + "learning_rate": 6.124270696444623e-06, + "loss": 0.6347, + "step": 20813 + }, + { + "epoch": 0.6379183523354174, + "grad_norm": 1.4030667734904068, + "learning_rate": 6.123355665216706e-06, + "loss": 0.6768, + "step": 20814 + }, + { + "epoch": 0.6379490008581586, + "grad_norm": 1.7567469014545223, + "learning_rate": 6.1224406721863584e-06, + "loss": 0.6304, + "step": 20815 + }, + { + "epoch": 0.6379796493808998, + "grad_norm": 1.590954642601854, + "learning_rate": 6.121525717362592e-06, + "loss": 0.6706, + "step": 20816 + }, + { + "epoch": 0.638010297903641, + "grad_norm": 1.749046998697389, + "learning_rate": 6.120610800754427e-06, + "loss": 0.8365, + "step": 20817 + }, + { + "epoch": 0.6380409464263822, + "grad_norm": 1.5009381655417975, + "learning_rate": 6.119695922370876e-06, + "loss": 0.7076, + "step": 20818 + }, + { + "epoch": 0.6380715949491235, + "grad_norm": 1.456844176347099, + "learning_rate": 6.118781082220952e-06, + "loss": 0.6623, + "step": 20819 + }, + { + "epoch": 0.6381022434718646, + "grad_norm": 1.5829642273558588, + "learning_rate": 6.117866280313677e-06, + "loss": 0.6583, + "step": 20820 + }, + { + "epoch": 0.6381328919946059, + "grad_norm": 1.6227467427520768, + "learning_rate": 6.116951516658051e-06, + "loss": 0.6367, + "step": 20821 + }, + { + "epoch": 0.638163540517347, + "grad_norm": 0.705831152287806, + "learning_rate": 6.1160367912631025e-06, + "loss": 0.5482, + "step": 20822 + }, + { + "epoch": 0.6381941890400883, + "grad_norm": 1.6213291947050232, + "learning_rate": 6.115122104137834e-06, + "loss": 0.7404, + "step": 20823 + }, + { + "epoch": 0.6382248375628294, + "grad_norm": 1.5104688909332737, + "learning_rate": 6.1142074552912585e-06, + "loss": 0.6417, + "step": 20824 + }, + { + "epoch": 0.6382554860855707, + "grad_norm": 1.5323522131058132, + "learning_rate": 6.113292844732395e-06, + "loss": 0.5728, + "step": 20825 + }, + { + "epoch": 0.6382861346083119, + "grad_norm": 1.4706421913834147, + "learning_rate": 6.112378272470252e-06, + "loss": 0.6695, + "step": 20826 + }, + { + "epoch": 0.6383167831310531, + "grad_norm": 1.8219415408040105, + "learning_rate": 6.111463738513837e-06, + "loss": 0.6985, + "step": 20827 + }, + { + "epoch": 0.6383474316537943, + "grad_norm": 0.656888519882392, + "learning_rate": 6.110549242872167e-06, + "loss": 0.5442, + "step": 20828 + }, + { + "epoch": 0.6383780801765355, + "grad_norm": 0.6434241909592933, + "learning_rate": 6.109634785554248e-06, + "loss": 0.5385, + "step": 20829 + }, + { + "epoch": 0.6384087286992767, + "grad_norm": 1.7463701790861685, + "learning_rate": 6.108720366569096e-06, + "loss": 0.6662, + "step": 20830 + }, + { + "epoch": 0.6384393772220179, + "grad_norm": 0.6833038783288145, + "learning_rate": 6.107805985925719e-06, + "loss": 0.5602, + "step": 20831 + }, + { + "epoch": 0.6384700257447591, + "grad_norm": 1.559267522675864, + "learning_rate": 6.10689164363312e-06, + "loss": 0.7161, + "step": 20832 + }, + { + "epoch": 0.6385006742675003, + "grad_norm": 1.5656425810812422, + "learning_rate": 6.10597733970032e-06, + "loss": 0.7106, + "step": 20833 + }, + { + "epoch": 0.6385313227902415, + "grad_norm": 1.3669101322038542, + "learning_rate": 6.10506307413632e-06, + "loss": 0.5878, + "step": 20834 + }, + { + "epoch": 0.6385619713129828, + "grad_norm": 0.6562841846195376, + "learning_rate": 6.104148846950126e-06, + "loss": 0.5492, + "step": 20835 + }, + { + "epoch": 0.6385926198357239, + "grad_norm": 1.420274550417557, + "learning_rate": 6.103234658150754e-06, + "loss": 0.6512, + "step": 20836 + }, + { + "epoch": 0.6386232683584652, + "grad_norm": 1.6817648372349772, + "learning_rate": 6.102320507747206e-06, + "loss": 0.6346, + "step": 20837 + }, + { + "epoch": 0.6386539168812063, + "grad_norm": 1.4726368443928912, + "learning_rate": 6.101406395748493e-06, + "loss": 0.606, + "step": 20838 + }, + { + "epoch": 0.6386845654039476, + "grad_norm": 0.6665769473692191, + "learning_rate": 6.100492322163619e-06, + "loss": 0.5531, + "step": 20839 + }, + { + "epoch": 0.6387152139266887, + "grad_norm": 1.4373420224718045, + "learning_rate": 6.0995782870015904e-06, + "loss": 0.6854, + "step": 20840 + }, + { + "epoch": 0.63874586244943, + "grad_norm": 1.8789887420416072, + "learning_rate": 6.098664290271419e-06, + "loss": 0.6595, + "step": 20841 + }, + { + "epoch": 0.6387765109721711, + "grad_norm": 1.7328722553751805, + "learning_rate": 6.0977503319821066e-06, + "loss": 0.7411, + "step": 20842 + }, + { + "epoch": 0.6388071594949123, + "grad_norm": 1.701435693280379, + "learning_rate": 6.096836412142652e-06, + "loss": 0.6751, + "step": 20843 + }, + { + "epoch": 0.6388378080176536, + "grad_norm": 1.6575904274513185, + "learning_rate": 6.095922530762075e-06, + "loss": 0.7026, + "step": 20844 + }, + { + "epoch": 0.6388684565403947, + "grad_norm": 0.6709480278074088, + "learning_rate": 6.095008687849369e-06, + "loss": 0.5486, + "step": 20845 + }, + { + "epoch": 0.638899105063136, + "grad_norm": 1.5853152610754895, + "learning_rate": 6.094094883413539e-06, + "loss": 0.6104, + "step": 20846 + }, + { + "epoch": 0.6389297535858771, + "grad_norm": 0.6467242852392526, + "learning_rate": 6.093181117463593e-06, + "loss": 0.5022, + "step": 20847 + }, + { + "epoch": 0.6389604021086184, + "grad_norm": 1.6433051846448754, + "learning_rate": 6.092267390008533e-06, + "loss": 0.6168, + "step": 20848 + }, + { + "epoch": 0.6389910506313595, + "grad_norm": 1.5907758099883806, + "learning_rate": 6.091353701057363e-06, + "loss": 0.6276, + "step": 20849 + }, + { + "epoch": 0.6390216991541008, + "grad_norm": 1.5589032501163838, + "learning_rate": 6.090440050619087e-06, + "loss": 0.6923, + "step": 20850 + }, + { + "epoch": 0.6390523476768419, + "grad_norm": 1.8702330316727804, + "learning_rate": 6.089526438702702e-06, + "loss": 0.7626, + "step": 20851 + }, + { + "epoch": 0.6390829961995832, + "grad_norm": 1.8246935328222094, + "learning_rate": 6.088612865317214e-06, + "loss": 0.7668, + "step": 20852 + }, + { + "epoch": 0.6391136447223243, + "grad_norm": 1.3552269545434785, + "learning_rate": 6.087699330471628e-06, + "loss": 0.5963, + "step": 20853 + }, + { + "epoch": 0.6391442932450656, + "grad_norm": 1.598439278925159, + "learning_rate": 6.086785834174935e-06, + "loss": 0.6913, + "step": 20854 + }, + { + "epoch": 0.6391749417678068, + "grad_norm": 1.6501138858057156, + "learning_rate": 6.085872376436149e-06, + "loss": 0.7168, + "step": 20855 + }, + { + "epoch": 0.639205590290548, + "grad_norm": 1.4999608783532155, + "learning_rate": 6.084958957264258e-06, + "loss": 0.7474, + "step": 20856 + }, + { + "epoch": 0.6392362388132892, + "grad_norm": 1.480781165305329, + "learning_rate": 6.084045576668274e-06, + "loss": 0.592, + "step": 20857 + }, + { + "epoch": 0.6392668873360304, + "grad_norm": 1.530379347984527, + "learning_rate": 6.0831322346571875e-06, + "loss": 0.7255, + "step": 20858 + }, + { + "epoch": 0.6392975358587716, + "grad_norm": 0.6559451769101925, + "learning_rate": 6.08221893124e-06, + "loss": 0.5442, + "step": 20859 + }, + { + "epoch": 0.6393281843815128, + "grad_norm": 1.5983536577188941, + "learning_rate": 6.081305666425714e-06, + "loss": 0.6664, + "step": 20860 + }, + { + "epoch": 0.639358832904254, + "grad_norm": 1.5401222656511655, + "learning_rate": 6.080392440223326e-06, + "loss": 0.7771, + "step": 20861 + }, + { + "epoch": 0.6393894814269953, + "grad_norm": 0.6465638009471003, + "learning_rate": 6.079479252641833e-06, + "loss": 0.5488, + "step": 20862 + }, + { + "epoch": 0.6394201299497364, + "grad_norm": 1.4864406320942631, + "learning_rate": 6.078566103690235e-06, + "loss": 0.5362, + "step": 20863 + }, + { + "epoch": 0.6394507784724777, + "grad_norm": 1.6801088707149014, + "learning_rate": 6.077652993377527e-06, + "loss": 0.7428, + "step": 20864 + }, + { + "epoch": 0.6394814269952188, + "grad_norm": 1.537585295670197, + "learning_rate": 6.076739921712711e-06, + "loss": 0.7608, + "step": 20865 + }, + { + "epoch": 0.6395120755179601, + "grad_norm": 1.5091375800566214, + "learning_rate": 6.0758268887047785e-06, + "loss": 0.6415, + "step": 20866 + }, + { + "epoch": 0.6395427240407012, + "grad_norm": 0.7047587996988406, + "learning_rate": 6.0749138943627265e-06, + "loss": 0.5209, + "step": 20867 + }, + { + "epoch": 0.6395733725634425, + "grad_norm": 1.5740477819204801, + "learning_rate": 6.074000938695553e-06, + "loss": 0.5847, + "step": 20868 + }, + { + "epoch": 0.6396040210861836, + "grad_norm": 1.5472450168099545, + "learning_rate": 6.073088021712253e-06, + "loss": 0.7007, + "step": 20869 + }, + { + "epoch": 0.6396346696089249, + "grad_norm": 1.4695837358284614, + "learning_rate": 6.07217514342182e-06, + "loss": 0.7018, + "step": 20870 + }, + { + "epoch": 0.639665318131666, + "grad_norm": 1.5732931852720136, + "learning_rate": 6.071262303833252e-06, + "loss": 0.6755, + "step": 20871 + }, + { + "epoch": 0.6396959666544073, + "grad_norm": 1.5566479194058913, + "learning_rate": 6.070349502955543e-06, + "loss": 0.5419, + "step": 20872 + }, + { + "epoch": 0.6397266151771485, + "grad_norm": 1.5007905472951577, + "learning_rate": 6.069436740797682e-06, + "loss": 0.6302, + "step": 20873 + }, + { + "epoch": 0.6397572636998896, + "grad_norm": 1.4566823267294098, + "learning_rate": 6.068524017368671e-06, + "loss": 0.5942, + "step": 20874 + }, + { + "epoch": 0.6397879122226309, + "grad_norm": 1.7603086079309767, + "learning_rate": 6.067611332677492e-06, + "loss": 0.6888, + "step": 20875 + }, + { + "epoch": 0.639818560745372, + "grad_norm": 1.635004444543182, + "learning_rate": 6.066698686733152e-06, + "loss": 0.7345, + "step": 20876 + }, + { + "epoch": 0.6398492092681133, + "grad_norm": 1.527691053406359, + "learning_rate": 6.065786079544633e-06, + "loss": 0.5085, + "step": 20877 + }, + { + "epoch": 0.6398798577908544, + "grad_norm": 1.4849080376536796, + "learning_rate": 6.06487351112093e-06, + "loss": 0.6536, + "step": 20878 + }, + { + "epoch": 0.6399105063135957, + "grad_norm": 1.6697311370105772, + "learning_rate": 6.063960981471036e-06, + "loss": 0.6173, + "step": 20879 + }, + { + "epoch": 0.6399411548363368, + "grad_norm": 0.7167783141291483, + "learning_rate": 6.063048490603942e-06, + "loss": 0.5564, + "step": 20880 + }, + { + "epoch": 0.6399718033590781, + "grad_norm": 1.7967073660755857, + "learning_rate": 6.062136038528636e-06, + "loss": 0.747, + "step": 20881 + }, + { + "epoch": 0.6400024518818193, + "grad_norm": 1.4707680141512618, + "learning_rate": 6.061223625254113e-06, + "loss": 0.6356, + "step": 20882 + }, + { + "epoch": 0.6400331004045605, + "grad_norm": 1.727161961483516, + "learning_rate": 6.060311250789361e-06, + "loss": 0.7122, + "step": 20883 + }, + { + "epoch": 0.6400637489273017, + "grad_norm": 1.547406269829438, + "learning_rate": 6.059398915143371e-06, + "loss": 0.6935, + "step": 20884 + }, + { + "epoch": 0.6400943974500429, + "grad_norm": 1.3969033505006185, + "learning_rate": 6.0584866183251345e-06, + "loss": 0.6875, + "step": 20885 + }, + { + "epoch": 0.6401250459727841, + "grad_norm": 1.5539162721361022, + "learning_rate": 6.05757436034363e-06, + "loss": 0.6935, + "step": 20886 + }, + { + "epoch": 0.6401556944955253, + "grad_norm": 1.702393183930322, + "learning_rate": 6.056662141207862e-06, + "loss": 0.688, + "step": 20887 + }, + { + "epoch": 0.6401863430182665, + "grad_norm": 1.530608744091386, + "learning_rate": 6.055749960926808e-06, + "loss": 0.7083, + "step": 20888 + }, + { + "epoch": 0.6402169915410078, + "grad_norm": 1.3881402072211364, + "learning_rate": 6.054837819509457e-06, + "loss": 0.6041, + "step": 20889 + }, + { + "epoch": 0.6402476400637489, + "grad_norm": 1.4863652799674956, + "learning_rate": 6.0539257169648005e-06, + "loss": 0.6392, + "step": 20890 + }, + { + "epoch": 0.6402782885864902, + "grad_norm": 1.670356790616474, + "learning_rate": 6.053013653301821e-06, + "loss": 0.7629, + "step": 20891 + }, + { + "epoch": 0.6403089371092313, + "grad_norm": 1.5717218943293405, + "learning_rate": 6.0521016285295095e-06, + "loss": 0.6762, + "step": 20892 + }, + { + "epoch": 0.6403395856319726, + "grad_norm": 0.6514848551622995, + "learning_rate": 6.051189642656852e-06, + "loss": 0.537, + "step": 20893 + }, + { + "epoch": 0.6403702341547137, + "grad_norm": 1.4960609235713827, + "learning_rate": 6.050277695692831e-06, + "loss": 0.6931, + "step": 20894 + }, + { + "epoch": 0.640400882677455, + "grad_norm": 0.6724181660400625, + "learning_rate": 6.049365787646437e-06, + "loss": 0.545, + "step": 20895 + }, + { + "epoch": 0.6404315312001961, + "grad_norm": 0.6591470832123095, + "learning_rate": 6.0484539185266534e-06, + "loss": 0.5136, + "step": 20896 + }, + { + "epoch": 0.6404621797229374, + "grad_norm": 1.4822599463468593, + "learning_rate": 6.04754208834246e-06, + "loss": 0.6062, + "step": 20897 + }, + { + "epoch": 0.6404928282456785, + "grad_norm": 1.5436726341345153, + "learning_rate": 6.046630297102849e-06, + "loss": 0.6398, + "step": 20898 + }, + { + "epoch": 0.6405234767684198, + "grad_norm": 0.6292458076443849, + "learning_rate": 6.0457185448168006e-06, + "loss": 0.5341, + "step": 20899 + }, + { + "epoch": 0.640554125291161, + "grad_norm": 1.7688174495191546, + "learning_rate": 6.044806831493298e-06, + "loss": 0.7346, + "step": 20900 + }, + { + "epoch": 0.6405847738139022, + "grad_norm": 1.5276260537323005, + "learning_rate": 6.0438951571413266e-06, + "loss": 0.7034, + "step": 20901 + }, + { + "epoch": 0.6406154223366434, + "grad_norm": 0.6589969359798256, + "learning_rate": 6.042983521769868e-06, + "loss": 0.5568, + "step": 20902 + }, + { + "epoch": 0.6406460708593846, + "grad_norm": 1.754795754059124, + "learning_rate": 6.0420719253879045e-06, + "loss": 0.7893, + "step": 20903 + }, + { + "epoch": 0.6406767193821258, + "grad_norm": 0.6560678402442323, + "learning_rate": 6.041160368004422e-06, + "loss": 0.5378, + "step": 20904 + }, + { + "epoch": 0.6407073679048669, + "grad_norm": 1.421200165430242, + "learning_rate": 6.040248849628395e-06, + "loss": 0.6663, + "step": 20905 + }, + { + "epoch": 0.6407380164276082, + "grad_norm": 1.41317640833129, + "learning_rate": 6.039337370268812e-06, + "loss": 0.6967, + "step": 20906 + }, + { + "epoch": 0.6407686649503493, + "grad_norm": 1.5521443622898463, + "learning_rate": 6.0384259299346534e-06, + "loss": 0.7522, + "step": 20907 + }, + { + "epoch": 0.6407993134730906, + "grad_norm": 1.7164933033836562, + "learning_rate": 6.037514528634893e-06, + "loss": 0.7661, + "step": 20908 + }, + { + "epoch": 0.6408299619958318, + "grad_norm": 1.8339477009581175, + "learning_rate": 6.0366031663785185e-06, + "loss": 0.6227, + "step": 20909 + }, + { + "epoch": 0.640860610518573, + "grad_norm": 1.6400112359149974, + "learning_rate": 6.0356918431745055e-06, + "loss": 0.7476, + "step": 20910 + }, + { + "epoch": 0.6408912590413142, + "grad_norm": 2.524753898106661, + "learning_rate": 6.034780559031836e-06, + "loss": 0.714, + "step": 20911 + }, + { + "epoch": 0.6409219075640554, + "grad_norm": 1.3638431121856756, + "learning_rate": 6.033869313959489e-06, + "loss": 0.6086, + "step": 20912 + }, + { + "epoch": 0.6409525560867966, + "grad_norm": 1.4462324880945339, + "learning_rate": 6.03295810796644e-06, + "loss": 0.6543, + "step": 20913 + }, + { + "epoch": 0.6409832046095378, + "grad_norm": 1.5689679419493046, + "learning_rate": 6.032046941061673e-06, + "loss": 0.7279, + "step": 20914 + }, + { + "epoch": 0.641013853132279, + "grad_norm": 1.545978761985977, + "learning_rate": 6.031135813254161e-06, + "loss": 0.6282, + "step": 20915 + }, + { + "epoch": 0.6410445016550202, + "grad_norm": 1.2673323762981066, + "learning_rate": 6.030224724552882e-06, + "loss": 0.5779, + "step": 20916 + }, + { + "epoch": 0.6410751501777614, + "grad_norm": 1.6717437590909412, + "learning_rate": 6.029313674966819e-06, + "loss": 0.7133, + "step": 20917 + }, + { + "epoch": 0.6411057987005027, + "grad_norm": 1.7721673363327803, + "learning_rate": 6.028402664504942e-06, + "loss": 0.6134, + "step": 20918 + }, + { + "epoch": 0.6411364472232438, + "grad_norm": 1.5922902287203264, + "learning_rate": 6.027491693176228e-06, + "loss": 0.5901, + "step": 20919 + }, + { + "epoch": 0.6411670957459851, + "grad_norm": 1.7675946731370202, + "learning_rate": 6.026580760989655e-06, + "loss": 0.7651, + "step": 20920 + }, + { + "epoch": 0.6411977442687262, + "grad_norm": 1.6298455819420257, + "learning_rate": 6.025669867954198e-06, + "loss": 0.7064, + "step": 20921 + }, + { + "epoch": 0.6412283927914675, + "grad_norm": 1.8017447515279008, + "learning_rate": 6.024759014078836e-06, + "loss": 0.7335, + "step": 20922 + }, + { + "epoch": 0.6412590413142086, + "grad_norm": 1.5031536013306008, + "learning_rate": 6.0238481993725385e-06, + "loss": 0.6275, + "step": 20923 + }, + { + "epoch": 0.6412896898369499, + "grad_norm": 1.6460470972607575, + "learning_rate": 6.0229374238442795e-06, + "loss": 0.7427, + "step": 20924 + }, + { + "epoch": 0.641320338359691, + "grad_norm": 1.7336154643530308, + "learning_rate": 6.022026687503039e-06, + "loss": 0.6745, + "step": 20925 + }, + { + "epoch": 0.6413509868824323, + "grad_norm": 2.6661672260383202, + "learning_rate": 6.021115990357789e-06, + "loss": 0.7227, + "step": 20926 + }, + { + "epoch": 0.6413816354051735, + "grad_norm": 1.6942462360414885, + "learning_rate": 6.020205332417495e-06, + "loss": 0.6633, + "step": 20927 + }, + { + "epoch": 0.6414122839279147, + "grad_norm": 0.6860983178472633, + "learning_rate": 6.019294713691143e-06, + "loss": 0.5375, + "step": 20928 + }, + { + "epoch": 0.6414429324506559, + "grad_norm": 1.5384857145074142, + "learning_rate": 6.018384134187692e-06, + "loss": 0.6825, + "step": 20929 + }, + { + "epoch": 0.6414735809733971, + "grad_norm": 0.7108123629614369, + "learning_rate": 6.017473593916127e-06, + "loss": 0.5961, + "step": 20930 + }, + { + "epoch": 0.6415042294961383, + "grad_norm": 1.6804036560434197, + "learning_rate": 6.016563092885412e-06, + "loss": 0.6821, + "step": 20931 + }, + { + "epoch": 0.6415348780188795, + "grad_norm": 0.6802714969913191, + "learning_rate": 6.015652631104516e-06, + "loss": 0.5591, + "step": 20932 + }, + { + "epoch": 0.6415655265416207, + "grad_norm": 1.7425338650222502, + "learning_rate": 6.014742208582418e-06, + "loss": 0.8655, + "step": 20933 + }, + { + "epoch": 0.641596175064362, + "grad_norm": 1.761770904343287, + "learning_rate": 6.013831825328085e-06, + "loss": 0.7555, + "step": 20934 + }, + { + "epoch": 0.6416268235871031, + "grad_norm": 0.6323151080622063, + "learning_rate": 6.012921481350484e-06, + "loss": 0.5475, + "step": 20935 + }, + { + "epoch": 0.6416574721098443, + "grad_norm": 1.6204975412429246, + "learning_rate": 6.012011176658589e-06, + "loss": 0.7327, + "step": 20936 + }, + { + "epoch": 0.6416881206325855, + "grad_norm": 1.4612414712009445, + "learning_rate": 6.0111009112613685e-06, + "loss": 0.579, + "step": 20937 + }, + { + "epoch": 0.6417187691553267, + "grad_norm": 1.7318728244015067, + "learning_rate": 6.010190685167792e-06, + "loss": 0.5996, + "step": 20938 + }, + { + "epoch": 0.6417494176780679, + "grad_norm": 1.6155572437367, + "learning_rate": 6.009280498386829e-06, + "loss": 0.6655, + "step": 20939 + }, + { + "epoch": 0.6417800662008091, + "grad_norm": 0.6841085089098059, + "learning_rate": 6.008370350927442e-06, + "loss": 0.5332, + "step": 20940 + }, + { + "epoch": 0.6418107147235503, + "grad_norm": 1.5280551687924404, + "learning_rate": 6.007460242798608e-06, + "loss": 0.6674, + "step": 20941 + }, + { + "epoch": 0.6418413632462915, + "grad_norm": 1.68087725262877, + "learning_rate": 6.006550174009287e-06, + "loss": 0.6602, + "step": 20942 + }, + { + "epoch": 0.6418720117690327, + "grad_norm": 1.5891362641129811, + "learning_rate": 6.0056401445684486e-06, + "loss": 0.7503, + "step": 20943 + }, + { + "epoch": 0.6419026602917739, + "grad_norm": 1.677965963863771, + "learning_rate": 6.004730154485061e-06, + "loss": 0.7004, + "step": 20944 + }, + { + "epoch": 0.6419333088145152, + "grad_norm": 1.8753644966144842, + "learning_rate": 6.003820203768089e-06, + "loss": 0.7012, + "step": 20945 + }, + { + "epoch": 0.6419639573372563, + "grad_norm": 1.393937449011912, + "learning_rate": 6.002910292426498e-06, + "loss": 0.593, + "step": 20946 + }, + { + "epoch": 0.6419946058599976, + "grad_norm": 1.5726113696870023, + "learning_rate": 6.002000420469256e-06, + "loss": 0.7397, + "step": 20947 + }, + { + "epoch": 0.6420252543827387, + "grad_norm": 1.731861134663581, + "learning_rate": 6.001090587905325e-06, + "loss": 0.6329, + "step": 20948 + }, + { + "epoch": 0.64205590290548, + "grad_norm": 1.6744584610255728, + "learning_rate": 6.000180794743673e-06, + "loss": 0.7093, + "step": 20949 + }, + { + "epoch": 0.6420865514282211, + "grad_norm": 1.4230241307119265, + "learning_rate": 5.999271040993267e-06, + "loss": 0.5964, + "step": 20950 + }, + { + "epoch": 0.6421171999509624, + "grad_norm": 1.578352421321063, + "learning_rate": 5.998361326663058e-06, + "loss": 0.7267, + "step": 20951 + }, + { + "epoch": 0.6421478484737035, + "grad_norm": 2.003156788937655, + "learning_rate": 5.997451651762027e-06, + "loss": 0.8481, + "step": 20952 + }, + { + "epoch": 0.6421784969964448, + "grad_norm": 1.652170548395225, + "learning_rate": 5.996542016299126e-06, + "loss": 0.6526, + "step": 20953 + }, + { + "epoch": 0.642209145519186, + "grad_norm": 1.6255156455417399, + "learning_rate": 5.995632420283319e-06, + "loss": 0.602, + "step": 20954 + }, + { + "epoch": 0.6422397940419272, + "grad_norm": 1.555931174317358, + "learning_rate": 5.994722863723572e-06, + "loss": 0.67, + "step": 20955 + }, + { + "epoch": 0.6422704425646684, + "grad_norm": 1.5982851254923098, + "learning_rate": 5.993813346628845e-06, + "loss": 0.7498, + "step": 20956 + }, + { + "epoch": 0.6423010910874096, + "grad_norm": 1.4705151332567306, + "learning_rate": 5.992903869008101e-06, + "loss": 0.6919, + "step": 20957 + }, + { + "epoch": 0.6423317396101508, + "grad_norm": 1.8539001529112766, + "learning_rate": 5.991994430870301e-06, + "loss": 0.678, + "step": 20958 + }, + { + "epoch": 0.642362388132892, + "grad_norm": 1.6493595379713137, + "learning_rate": 5.991085032224402e-06, + "loss": 0.7191, + "step": 20959 + }, + { + "epoch": 0.6423930366556332, + "grad_norm": 1.5157786812459748, + "learning_rate": 5.990175673079373e-06, + "loss": 0.64, + "step": 20960 + }, + { + "epoch": 0.6424236851783744, + "grad_norm": 1.4222032629606671, + "learning_rate": 5.989266353444166e-06, + "loss": 0.6011, + "step": 20961 + }, + { + "epoch": 0.6424543337011156, + "grad_norm": 0.6682187266185489, + "learning_rate": 5.988357073327743e-06, + "loss": 0.5477, + "step": 20962 + }, + { + "epoch": 0.6424849822238569, + "grad_norm": 1.688060800023355, + "learning_rate": 5.987447832739066e-06, + "loss": 0.6208, + "step": 20963 + }, + { + "epoch": 0.642515630746598, + "grad_norm": 1.567816338395705, + "learning_rate": 5.986538631687089e-06, + "loss": 0.6215, + "step": 20964 + }, + { + "epoch": 0.6425462792693393, + "grad_norm": 1.6953809777124735, + "learning_rate": 5.9856294701807775e-06, + "loss": 0.6903, + "step": 20965 + }, + { + "epoch": 0.6425769277920804, + "grad_norm": 1.6462368398848892, + "learning_rate": 5.984720348229085e-06, + "loss": 0.7576, + "step": 20966 + }, + { + "epoch": 0.6426075763148216, + "grad_norm": 0.6674138150954821, + "learning_rate": 5.983811265840969e-06, + "loss": 0.531, + "step": 20967 + }, + { + "epoch": 0.6426382248375628, + "grad_norm": 1.7149130175381122, + "learning_rate": 5.982902223025388e-06, + "loss": 0.7392, + "step": 20968 + }, + { + "epoch": 0.642668873360304, + "grad_norm": 1.5224285883620945, + "learning_rate": 5.981993219791303e-06, + "loss": 0.6324, + "step": 20969 + }, + { + "epoch": 0.6426995218830452, + "grad_norm": 1.644393774123452, + "learning_rate": 5.981084256147661e-06, + "loss": 0.7283, + "step": 20970 + }, + { + "epoch": 0.6427301704057864, + "grad_norm": 1.551159775864122, + "learning_rate": 5.980175332103431e-06, + "loss": 0.6574, + "step": 20971 + }, + { + "epoch": 0.6427608189285277, + "grad_norm": 1.6804541493604488, + "learning_rate": 5.979266447667558e-06, + "loss": 0.6829, + "step": 20972 + }, + { + "epoch": 0.6427914674512688, + "grad_norm": 1.674254219931632, + "learning_rate": 5.978357602849e-06, + "loss": 0.6594, + "step": 20973 + }, + { + "epoch": 0.6428221159740101, + "grad_norm": 1.4461565872037734, + "learning_rate": 5.977448797656715e-06, + "loss": 0.7022, + "step": 20974 + }, + { + "epoch": 0.6428527644967512, + "grad_norm": 1.5674495291159154, + "learning_rate": 5.976540032099656e-06, + "loss": 0.6384, + "step": 20975 + }, + { + "epoch": 0.6428834130194925, + "grad_norm": 1.6076690106357912, + "learning_rate": 5.975631306186777e-06, + "loss": 0.7578, + "step": 20976 + }, + { + "epoch": 0.6429140615422336, + "grad_norm": 1.6276695717778487, + "learning_rate": 5.974722619927033e-06, + "loss": 0.6255, + "step": 20977 + }, + { + "epoch": 0.6429447100649749, + "grad_norm": 1.476517650345777, + "learning_rate": 5.9738139733293764e-06, + "loss": 0.7453, + "step": 20978 + }, + { + "epoch": 0.642975358587716, + "grad_norm": 1.3765752243691627, + "learning_rate": 5.972905366402763e-06, + "loss": 0.66, + "step": 20979 + }, + { + "epoch": 0.6430060071104573, + "grad_norm": 1.4899405516561457, + "learning_rate": 5.971996799156144e-06, + "loss": 0.6784, + "step": 20980 + }, + { + "epoch": 0.6430366556331985, + "grad_norm": 1.8894101100184069, + "learning_rate": 5.971088271598467e-06, + "loss": 0.7552, + "step": 20981 + }, + { + "epoch": 0.6430673041559397, + "grad_norm": 1.6488762834586692, + "learning_rate": 5.970179783738692e-06, + "loss": 0.6548, + "step": 20982 + }, + { + "epoch": 0.6430979526786809, + "grad_norm": 1.7820023793726874, + "learning_rate": 5.969271335585761e-06, + "loss": 0.7422, + "step": 20983 + }, + { + "epoch": 0.6431286012014221, + "grad_norm": 1.6127442025522039, + "learning_rate": 5.9683629271486375e-06, + "loss": 0.7087, + "step": 20984 + }, + { + "epoch": 0.6431592497241633, + "grad_norm": 1.675178993044596, + "learning_rate": 5.967454558436263e-06, + "loss": 0.6354, + "step": 20985 + }, + { + "epoch": 0.6431898982469045, + "grad_norm": 1.882041366139825, + "learning_rate": 5.96654622945759e-06, + "loss": 0.7341, + "step": 20986 + }, + { + "epoch": 0.6432205467696457, + "grad_norm": 1.6241865296292937, + "learning_rate": 5.9656379402215695e-06, + "loss": 0.7142, + "step": 20987 + }, + { + "epoch": 0.643251195292387, + "grad_norm": 1.7080881918627544, + "learning_rate": 5.964729690737152e-06, + "loss": 0.7215, + "step": 20988 + }, + { + "epoch": 0.6432818438151281, + "grad_norm": 1.7801642088153573, + "learning_rate": 5.963821481013281e-06, + "loss": 0.6359, + "step": 20989 + }, + { + "epoch": 0.6433124923378694, + "grad_norm": 1.7766662396742272, + "learning_rate": 5.9629133110589135e-06, + "loss": 0.743, + "step": 20990 + }, + { + "epoch": 0.6433431408606105, + "grad_norm": 1.4946658222577087, + "learning_rate": 5.9620051808829925e-06, + "loss": 0.6649, + "step": 20991 + }, + { + "epoch": 0.6433737893833518, + "grad_norm": 1.5801458242608444, + "learning_rate": 5.961097090494468e-06, + "loss": 0.6584, + "step": 20992 + }, + { + "epoch": 0.6434044379060929, + "grad_norm": 1.6056082989328517, + "learning_rate": 5.960189039902291e-06, + "loss": 0.6938, + "step": 20993 + }, + { + "epoch": 0.6434350864288342, + "grad_norm": 1.6835399567400102, + "learning_rate": 5.959281029115398e-06, + "loss": 0.7386, + "step": 20994 + }, + { + "epoch": 0.6434657349515753, + "grad_norm": 0.6742076410563711, + "learning_rate": 5.958373058142748e-06, + "loss": 0.5453, + "step": 20995 + }, + { + "epoch": 0.6434963834743166, + "grad_norm": 1.6312555748929134, + "learning_rate": 5.957465126993282e-06, + "loss": 0.7048, + "step": 20996 + }, + { + "epoch": 0.6435270319970577, + "grad_norm": 1.6282030440958841, + "learning_rate": 5.956557235675944e-06, + "loss": 0.6532, + "step": 20997 + }, + { + "epoch": 0.6435576805197989, + "grad_norm": 1.7253974267070473, + "learning_rate": 5.9556493841996836e-06, + "loss": 0.5475, + "step": 20998 + }, + { + "epoch": 0.6435883290425402, + "grad_norm": 1.4986655743455972, + "learning_rate": 5.954741572573443e-06, + "loss": 0.6112, + "step": 20999 + }, + { + "epoch": 0.6436189775652813, + "grad_norm": 1.4861555067513845, + "learning_rate": 5.95383380080617e-06, + "loss": 0.6735, + "step": 21000 + }, + { + "epoch": 0.6436496260880226, + "grad_norm": 1.4966793528364486, + "learning_rate": 5.952926068906808e-06, + "loss": 0.6763, + "step": 21001 + }, + { + "epoch": 0.6436802746107637, + "grad_norm": 1.5797239240288785, + "learning_rate": 5.952018376884299e-06, + "loss": 0.6815, + "step": 21002 + }, + { + "epoch": 0.643710923133505, + "grad_norm": 1.308273759360435, + "learning_rate": 5.9511107247475904e-06, + "loss": 0.5936, + "step": 21003 + }, + { + "epoch": 0.6437415716562461, + "grad_norm": 1.6416294966692868, + "learning_rate": 5.950203112505628e-06, + "loss": 0.656, + "step": 21004 + }, + { + "epoch": 0.6437722201789874, + "grad_norm": 0.6401228420528612, + "learning_rate": 5.9492955401673435e-06, + "loss": 0.538, + "step": 21005 + }, + { + "epoch": 0.6438028687017285, + "grad_norm": 1.505030557326839, + "learning_rate": 5.94838800774169e-06, + "loss": 0.744, + "step": 21006 + }, + { + "epoch": 0.6438335172244698, + "grad_norm": 1.5564364265068553, + "learning_rate": 5.947480515237607e-06, + "loss": 0.7146, + "step": 21007 + }, + { + "epoch": 0.643864165747211, + "grad_norm": 1.4936291714539807, + "learning_rate": 5.946573062664031e-06, + "loss": 0.6241, + "step": 21008 + }, + { + "epoch": 0.6438948142699522, + "grad_norm": 1.5616187566246427, + "learning_rate": 5.9456656500299115e-06, + "loss": 0.6166, + "step": 21009 + }, + { + "epoch": 0.6439254627926934, + "grad_norm": 1.611477678001425, + "learning_rate": 5.944758277344183e-06, + "loss": 0.7489, + "step": 21010 + }, + { + "epoch": 0.6439561113154346, + "grad_norm": 1.5206444346277137, + "learning_rate": 5.943850944615791e-06, + "loss": 0.6258, + "step": 21011 + }, + { + "epoch": 0.6439867598381758, + "grad_norm": 1.66170137562776, + "learning_rate": 5.942943651853677e-06, + "loss": 0.6814, + "step": 21012 + }, + { + "epoch": 0.644017408360917, + "grad_norm": 1.4921037700410047, + "learning_rate": 5.942036399066769e-06, + "loss": 0.6903, + "step": 21013 + }, + { + "epoch": 0.6440480568836582, + "grad_norm": 1.4538144558997104, + "learning_rate": 5.9411291862640205e-06, + "loss": 0.5841, + "step": 21014 + }, + { + "epoch": 0.6440787054063994, + "grad_norm": 0.6673029071981396, + "learning_rate": 5.940222013454364e-06, + "loss": 0.5486, + "step": 21015 + }, + { + "epoch": 0.6441093539291406, + "grad_norm": 1.8920898630066463, + "learning_rate": 5.939314880646736e-06, + "loss": 0.6873, + "step": 21016 + }, + { + "epoch": 0.6441400024518819, + "grad_norm": 1.5972731976739807, + "learning_rate": 5.93840778785008e-06, + "loss": 0.7199, + "step": 21017 + }, + { + "epoch": 0.644170650974623, + "grad_norm": 0.709989527213017, + "learning_rate": 5.937500735073329e-06, + "loss": 0.5252, + "step": 21018 + }, + { + "epoch": 0.6442012994973643, + "grad_norm": 1.4683704719109505, + "learning_rate": 5.936593722325423e-06, + "loss": 0.6512, + "step": 21019 + }, + { + "epoch": 0.6442319480201054, + "grad_norm": 1.8280958133834515, + "learning_rate": 5.9356867496153015e-06, + "loss": 0.744, + "step": 21020 + }, + { + "epoch": 0.6442625965428467, + "grad_norm": 1.548409889416176, + "learning_rate": 5.934779816951895e-06, + "loss": 0.7791, + "step": 21021 + }, + { + "epoch": 0.6442932450655878, + "grad_norm": 1.6108676355918465, + "learning_rate": 5.933872924344145e-06, + "loss": 0.6976, + "step": 21022 + }, + { + "epoch": 0.6443238935883291, + "grad_norm": 1.5210505692832725, + "learning_rate": 5.9329660718009874e-06, + "loss": 0.6717, + "step": 21023 + }, + { + "epoch": 0.6443545421110702, + "grad_norm": 1.421601973470985, + "learning_rate": 5.932059259331351e-06, + "loss": 0.6869, + "step": 21024 + }, + { + "epoch": 0.6443851906338115, + "grad_norm": 0.6620164080133205, + "learning_rate": 5.931152486944181e-06, + "loss": 0.5494, + "step": 21025 + }, + { + "epoch": 0.6444158391565527, + "grad_norm": 1.5233886523655027, + "learning_rate": 5.930245754648403e-06, + "loss": 0.5597, + "step": 21026 + }, + { + "epoch": 0.6444464876792939, + "grad_norm": 1.875666039646687, + "learning_rate": 5.929339062452955e-06, + "loss": 0.6826, + "step": 21027 + }, + { + "epoch": 0.6444771362020351, + "grad_norm": 1.5781774445360157, + "learning_rate": 5.9284324103667715e-06, + "loss": 0.6683, + "step": 21028 + }, + { + "epoch": 0.6445077847247762, + "grad_norm": 1.5010748538036156, + "learning_rate": 5.927525798398783e-06, + "loss": 0.7235, + "step": 21029 + }, + { + "epoch": 0.6445384332475175, + "grad_norm": 1.5304898701782328, + "learning_rate": 5.926619226557927e-06, + "loss": 0.6416, + "step": 21030 + }, + { + "epoch": 0.6445690817702586, + "grad_norm": 0.6648195939517817, + "learning_rate": 5.925712694853134e-06, + "loss": 0.5576, + "step": 21031 + }, + { + "epoch": 0.6445997302929999, + "grad_norm": 1.460297707723285, + "learning_rate": 5.924806203293334e-06, + "loss": 0.5852, + "step": 21032 + }, + { + "epoch": 0.644630378815741, + "grad_norm": 0.6794622128479709, + "learning_rate": 5.923899751887465e-06, + "loss": 0.5609, + "step": 21033 + }, + { + "epoch": 0.6446610273384823, + "grad_norm": 1.7422342470861478, + "learning_rate": 5.922993340644455e-06, + "loss": 0.7481, + "step": 21034 + }, + { + "epoch": 0.6446916758612234, + "grad_norm": 1.3275336731934733, + "learning_rate": 5.922086969573229e-06, + "loss": 0.6619, + "step": 21035 + }, + { + "epoch": 0.6447223243839647, + "grad_norm": 1.498217919163818, + "learning_rate": 5.921180638682729e-06, + "loss": 0.6944, + "step": 21036 + }, + { + "epoch": 0.6447529729067059, + "grad_norm": 1.4165971933881398, + "learning_rate": 5.920274347981875e-06, + "loss": 0.592, + "step": 21037 + }, + { + "epoch": 0.6447836214294471, + "grad_norm": 0.6381116374816139, + "learning_rate": 5.919368097479607e-06, + "loss": 0.521, + "step": 21038 + }, + { + "epoch": 0.6448142699521883, + "grad_norm": 1.70979776259592, + "learning_rate": 5.918461887184848e-06, + "loss": 0.6864, + "step": 21039 + }, + { + "epoch": 0.6448449184749295, + "grad_norm": 1.61962946898723, + "learning_rate": 5.917555717106525e-06, + "loss": 0.6693, + "step": 21040 + }, + { + "epoch": 0.6448755669976707, + "grad_norm": 1.4407246147396044, + "learning_rate": 5.916649587253573e-06, + "loss": 0.5854, + "step": 21041 + }, + { + "epoch": 0.6449062155204119, + "grad_norm": 1.3325557744028083, + "learning_rate": 5.915743497634916e-06, + "loss": 0.6628, + "step": 21042 + }, + { + "epoch": 0.6449368640431531, + "grad_norm": 1.6622467765905609, + "learning_rate": 5.914837448259483e-06, + "loss": 0.7356, + "step": 21043 + }, + { + "epoch": 0.6449675125658944, + "grad_norm": 1.8331174546847375, + "learning_rate": 5.9139314391362025e-06, + "loss": 0.8481, + "step": 21044 + }, + { + "epoch": 0.6449981610886355, + "grad_norm": 1.6183956333383263, + "learning_rate": 5.913025470274001e-06, + "loss": 0.7316, + "step": 21045 + }, + { + "epoch": 0.6450288096113768, + "grad_norm": 1.5585042231335888, + "learning_rate": 5.912119541681804e-06, + "loss": 0.6194, + "step": 21046 + }, + { + "epoch": 0.6450594581341179, + "grad_norm": 1.5107980950418742, + "learning_rate": 5.911213653368544e-06, + "loss": 0.7173, + "step": 21047 + }, + { + "epoch": 0.6450901066568592, + "grad_norm": 0.699858269140051, + "learning_rate": 5.910307805343135e-06, + "loss": 0.5684, + "step": 21048 + }, + { + "epoch": 0.6451207551796003, + "grad_norm": 1.6225587621254067, + "learning_rate": 5.909401997614516e-06, + "loss": 0.7854, + "step": 21049 + }, + { + "epoch": 0.6451514037023416, + "grad_norm": 1.347774727641953, + "learning_rate": 5.908496230191603e-06, + "loss": 0.6366, + "step": 21050 + }, + { + "epoch": 0.6451820522250827, + "grad_norm": 1.6398483087418685, + "learning_rate": 5.907590503083323e-06, + "loss": 0.648, + "step": 21051 + }, + { + "epoch": 0.645212700747824, + "grad_norm": 1.550446396890499, + "learning_rate": 5.9066848162986e-06, + "loss": 0.625, + "step": 21052 + }, + { + "epoch": 0.6452433492705651, + "grad_norm": 1.3895201674284776, + "learning_rate": 5.905779169846362e-06, + "loss": 0.6807, + "step": 21053 + }, + { + "epoch": 0.6452739977933064, + "grad_norm": 0.6648989666444184, + "learning_rate": 5.904873563735524e-06, + "loss": 0.5525, + "step": 21054 + }, + { + "epoch": 0.6453046463160476, + "grad_norm": 1.533606920027652, + "learning_rate": 5.90396799797502e-06, + "loss": 0.7629, + "step": 21055 + }, + { + "epoch": 0.6453352948387888, + "grad_norm": 1.5573125474238527, + "learning_rate": 5.903062472573764e-06, + "loss": 0.6123, + "step": 21056 + }, + { + "epoch": 0.64536594336153, + "grad_norm": 0.6567229144323633, + "learning_rate": 5.902156987540686e-06, + "loss": 0.5481, + "step": 21057 + }, + { + "epoch": 0.6453965918842712, + "grad_norm": 0.6468043310880724, + "learning_rate": 5.901251542884701e-06, + "loss": 0.5407, + "step": 21058 + }, + { + "epoch": 0.6454272404070124, + "grad_norm": 1.482508086997479, + "learning_rate": 5.900346138614731e-06, + "loss": 0.6731, + "step": 21059 + }, + { + "epoch": 0.6454578889297535, + "grad_norm": 1.5365524938677135, + "learning_rate": 5.899440774739702e-06, + "loss": 0.7598, + "step": 21060 + }, + { + "epoch": 0.6454885374524948, + "grad_norm": 0.6570084891831653, + "learning_rate": 5.898535451268533e-06, + "loss": 0.5524, + "step": 21061 + }, + { + "epoch": 0.6455191859752359, + "grad_norm": 1.3855063536882968, + "learning_rate": 5.89763016821014e-06, + "loss": 0.6018, + "step": 21062 + }, + { + "epoch": 0.6455498344979772, + "grad_norm": 1.6599706042603641, + "learning_rate": 5.896724925573449e-06, + "loss": 0.7068, + "step": 21063 + }, + { + "epoch": 0.6455804830207184, + "grad_norm": 1.5530835458158467, + "learning_rate": 5.895819723367375e-06, + "loss": 0.6483, + "step": 21064 + }, + { + "epoch": 0.6456111315434596, + "grad_norm": 1.7699775228058563, + "learning_rate": 5.894914561600842e-06, + "loss": 0.7958, + "step": 21065 + }, + { + "epoch": 0.6456417800662008, + "grad_norm": 0.6368548160266532, + "learning_rate": 5.8940094402827686e-06, + "loss": 0.5147, + "step": 21066 + }, + { + "epoch": 0.645672428588942, + "grad_norm": 1.8737559223530413, + "learning_rate": 5.893104359422064e-06, + "loss": 0.6835, + "step": 21067 + }, + { + "epoch": 0.6457030771116832, + "grad_norm": 1.488015574198934, + "learning_rate": 5.89219931902766e-06, + "loss": 0.6099, + "step": 21068 + }, + { + "epoch": 0.6457337256344244, + "grad_norm": 1.658438788520401, + "learning_rate": 5.8912943191084635e-06, + "loss": 0.669, + "step": 21069 + }, + { + "epoch": 0.6457643741571656, + "grad_norm": 1.3898701427400937, + "learning_rate": 5.890389359673394e-06, + "loss": 0.5886, + "step": 21070 + }, + { + "epoch": 0.6457950226799068, + "grad_norm": 1.6194082866616144, + "learning_rate": 5.889484440731372e-06, + "loss": 0.7346, + "step": 21071 + }, + { + "epoch": 0.645825671202648, + "grad_norm": 1.4702586991096396, + "learning_rate": 5.888579562291309e-06, + "loss": 0.5989, + "step": 21072 + }, + { + "epoch": 0.6458563197253893, + "grad_norm": 1.381255224716636, + "learning_rate": 5.887674724362126e-06, + "loss": 0.6433, + "step": 21073 + }, + { + "epoch": 0.6458869682481304, + "grad_norm": 1.708761869304109, + "learning_rate": 5.8867699269527355e-06, + "loss": 0.7801, + "step": 21074 + }, + { + "epoch": 0.6459176167708717, + "grad_norm": 1.6458562463089663, + "learning_rate": 5.8858651700720515e-06, + "loss": 0.6553, + "step": 21075 + }, + { + "epoch": 0.6459482652936128, + "grad_norm": 1.5606582095260282, + "learning_rate": 5.884960453728994e-06, + "loss": 0.6918, + "step": 21076 + }, + { + "epoch": 0.6459789138163541, + "grad_norm": 1.4646198680425242, + "learning_rate": 5.884055777932473e-06, + "loss": 0.7192, + "step": 21077 + }, + { + "epoch": 0.6460095623390952, + "grad_norm": 1.494651853020942, + "learning_rate": 5.8831511426914015e-06, + "loss": 0.6345, + "step": 21078 + }, + { + "epoch": 0.6460402108618365, + "grad_norm": 1.66663660805919, + "learning_rate": 5.882246548014699e-06, + "loss": 0.7256, + "step": 21079 + }, + { + "epoch": 0.6460708593845776, + "grad_norm": 1.721422643326697, + "learning_rate": 5.881341993911271e-06, + "loss": 0.6466, + "step": 21080 + }, + { + "epoch": 0.6461015079073189, + "grad_norm": 1.764135895859917, + "learning_rate": 5.880437480390036e-06, + "loss": 0.6744, + "step": 21081 + }, + { + "epoch": 0.6461321564300601, + "grad_norm": 1.6477770578724495, + "learning_rate": 5.8795330074599035e-06, + "loss": 0.6511, + "step": 21082 + }, + { + "epoch": 0.6461628049528013, + "grad_norm": 0.6894835569102907, + "learning_rate": 5.878628575129786e-06, + "loss": 0.5594, + "step": 21083 + }, + { + "epoch": 0.6461934534755425, + "grad_norm": 1.6519023062518652, + "learning_rate": 5.8777241834085975e-06, + "loss": 0.6366, + "step": 21084 + }, + { + "epoch": 0.6462241019982837, + "grad_norm": 1.630266528777278, + "learning_rate": 5.876819832305247e-06, + "loss": 0.6944, + "step": 21085 + }, + { + "epoch": 0.6462547505210249, + "grad_norm": 1.6586467069404223, + "learning_rate": 5.875915521828644e-06, + "loss": 0.7251, + "step": 21086 + }, + { + "epoch": 0.6462853990437661, + "grad_norm": 1.7524096573073829, + "learning_rate": 5.875011251987701e-06, + "loss": 0.7084, + "step": 21087 + }, + { + "epoch": 0.6463160475665073, + "grad_norm": 1.377363630866509, + "learning_rate": 5.874107022791331e-06, + "loss": 0.6773, + "step": 21088 + }, + { + "epoch": 0.6463466960892486, + "grad_norm": 1.5933868467246655, + "learning_rate": 5.873202834248435e-06, + "loss": 0.6461, + "step": 21089 + }, + { + "epoch": 0.6463773446119897, + "grad_norm": 1.5464865991051573, + "learning_rate": 5.872298686367932e-06, + "loss": 0.6536, + "step": 21090 + }, + { + "epoch": 0.6464079931347309, + "grad_norm": 1.4042724271213063, + "learning_rate": 5.87139457915872e-06, + "loss": 0.6889, + "step": 21091 + }, + { + "epoch": 0.6464386416574721, + "grad_norm": 1.7908858197245154, + "learning_rate": 5.870490512629721e-06, + "loss": 0.5891, + "step": 21092 + }, + { + "epoch": 0.6464692901802133, + "grad_norm": 1.5725595381040716, + "learning_rate": 5.869586486789832e-06, + "loss": 0.6381, + "step": 21093 + }, + { + "epoch": 0.6464999387029545, + "grad_norm": 1.565757079776689, + "learning_rate": 5.8686825016479634e-06, + "loss": 0.61, + "step": 21094 + }, + { + "epoch": 0.6465305872256957, + "grad_norm": 1.6371460809825797, + "learning_rate": 5.8677785572130245e-06, + "loss": 0.6164, + "step": 21095 + }, + { + "epoch": 0.6465612357484369, + "grad_norm": 1.5305555327420797, + "learning_rate": 5.86687465349392e-06, + "loss": 0.6873, + "step": 21096 + }, + { + "epoch": 0.6465918842711781, + "grad_norm": 1.5476150648569982, + "learning_rate": 5.865970790499556e-06, + "loss": 0.6805, + "step": 21097 + }, + { + "epoch": 0.6466225327939193, + "grad_norm": 1.6223462216989923, + "learning_rate": 5.865066968238842e-06, + "loss": 0.6798, + "step": 21098 + }, + { + "epoch": 0.6466531813166605, + "grad_norm": 1.4734562445563903, + "learning_rate": 5.864163186720682e-06, + "loss": 0.6079, + "step": 21099 + }, + { + "epoch": 0.6466838298394018, + "grad_norm": 1.643865811647817, + "learning_rate": 5.863259445953975e-06, + "loss": 0.7195, + "step": 21100 + }, + { + "epoch": 0.6467144783621429, + "grad_norm": 1.7109858880796613, + "learning_rate": 5.862355745947637e-06, + "loss": 0.643, + "step": 21101 + }, + { + "epoch": 0.6467451268848842, + "grad_norm": 0.6682673717892675, + "learning_rate": 5.861452086710562e-06, + "loss": 0.5616, + "step": 21102 + }, + { + "epoch": 0.6467757754076253, + "grad_norm": 1.5747305226876203, + "learning_rate": 5.860548468251661e-06, + "loss": 0.7092, + "step": 21103 + }, + { + "epoch": 0.6468064239303666, + "grad_norm": 1.443519336817446, + "learning_rate": 5.859644890579835e-06, + "loss": 0.7047, + "step": 21104 + }, + { + "epoch": 0.6468370724531077, + "grad_norm": 1.427574331368691, + "learning_rate": 5.858741353703985e-06, + "loss": 0.6638, + "step": 21105 + }, + { + "epoch": 0.646867720975849, + "grad_norm": 1.6425574833437677, + "learning_rate": 5.8578378576330195e-06, + "loss": 0.6427, + "step": 21106 + }, + { + "epoch": 0.6468983694985901, + "grad_norm": 1.7810843737387692, + "learning_rate": 5.856934402375836e-06, + "loss": 0.6561, + "step": 21107 + }, + { + "epoch": 0.6469290180213314, + "grad_norm": 0.6614269680740922, + "learning_rate": 5.856030987941336e-06, + "loss": 0.5528, + "step": 21108 + }, + { + "epoch": 0.6469596665440726, + "grad_norm": 0.6793721119992407, + "learning_rate": 5.8551276143384274e-06, + "loss": 0.5531, + "step": 21109 + }, + { + "epoch": 0.6469903150668138, + "grad_norm": 1.3143435881928172, + "learning_rate": 5.8542242815759994e-06, + "loss": 0.656, + "step": 21110 + }, + { + "epoch": 0.647020963589555, + "grad_norm": 1.522092862323668, + "learning_rate": 5.853320989662969e-06, + "loss": 0.7593, + "step": 21111 + }, + { + "epoch": 0.6470516121122962, + "grad_norm": 1.6357442483867939, + "learning_rate": 5.852417738608223e-06, + "loss": 0.6808, + "step": 21112 + }, + { + "epoch": 0.6470822606350374, + "grad_norm": 1.6217988331726032, + "learning_rate": 5.851514528420665e-06, + "loss": 0.7388, + "step": 21113 + }, + { + "epoch": 0.6471129091577786, + "grad_norm": 1.641169135839272, + "learning_rate": 5.850611359109199e-06, + "loss": 0.699, + "step": 21114 + }, + { + "epoch": 0.6471435576805198, + "grad_norm": 1.5097327482554943, + "learning_rate": 5.84970823068272e-06, + "loss": 0.6228, + "step": 21115 + }, + { + "epoch": 0.647174206203261, + "grad_norm": 1.5354290070323804, + "learning_rate": 5.848805143150127e-06, + "loss": 0.7209, + "step": 21116 + }, + { + "epoch": 0.6472048547260022, + "grad_norm": 1.5674281860220158, + "learning_rate": 5.847902096520319e-06, + "loss": 0.7262, + "step": 21117 + }, + { + "epoch": 0.6472355032487435, + "grad_norm": 0.6807927886150392, + "learning_rate": 5.8469990908021935e-06, + "loss": 0.5763, + "step": 21118 + }, + { + "epoch": 0.6472661517714846, + "grad_norm": 0.6560880492517747, + "learning_rate": 5.84609612600465e-06, + "loss": 0.5445, + "step": 21119 + }, + { + "epoch": 0.6472968002942259, + "grad_norm": 1.8051570676457065, + "learning_rate": 5.845193202136587e-06, + "loss": 0.6563, + "step": 21120 + }, + { + "epoch": 0.647327448816967, + "grad_norm": 1.6715356648432471, + "learning_rate": 5.8442903192068914e-06, + "loss": 0.745, + "step": 21121 + }, + { + "epoch": 0.6473580973397082, + "grad_norm": 1.858014377758246, + "learning_rate": 5.843387477224472e-06, + "loss": 0.7074, + "step": 21122 + }, + { + "epoch": 0.6473887458624494, + "grad_norm": 1.4780835011372413, + "learning_rate": 5.842484676198219e-06, + "loss": 0.6621, + "step": 21123 + }, + { + "epoch": 0.6474193943851906, + "grad_norm": 1.4737267540658396, + "learning_rate": 5.841581916137025e-06, + "loss": 0.6617, + "step": 21124 + }, + { + "epoch": 0.6474500429079318, + "grad_norm": 1.6112922221433104, + "learning_rate": 5.840679197049791e-06, + "loss": 0.5938, + "step": 21125 + }, + { + "epoch": 0.647480691430673, + "grad_norm": 1.8897600364837115, + "learning_rate": 5.839776518945408e-06, + "loss": 0.7184, + "step": 21126 + }, + { + "epoch": 0.6475113399534143, + "grad_norm": 1.387899774716116, + "learning_rate": 5.838873881832772e-06, + "loss": 0.6535, + "step": 21127 + }, + { + "epoch": 0.6475419884761554, + "grad_norm": 1.438833322487765, + "learning_rate": 5.837971285720776e-06, + "loss": 0.7083, + "step": 21128 + }, + { + "epoch": 0.6475726369988967, + "grad_norm": 1.52168144885185, + "learning_rate": 5.8370687306183114e-06, + "loss": 0.5765, + "step": 21129 + }, + { + "epoch": 0.6476032855216378, + "grad_norm": 1.5978618750538256, + "learning_rate": 5.836166216534279e-06, + "loss": 0.6957, + "step": 21130 + }, + { + "epoch": 0.6476339340443791, + "grad_norm": 0.6819394798508448, + "learning_rate": 5.8352637434775616e-06, + "loss": 0.5281, + "step": 21131 + }, + { + "epoch": 0.6476645825671202, + "grad_norm": 1.3861096091922207, + "learning_rate": 5.834361311457058e-06, + "loss": 0.6971, + "step": 21132 + }, + { + "epoch": 0.6476952310898615, + "grad_norm": 1.572628655257945, + "learning_rate": 5.83345892048166e-06, + "loss": 0.6073, + "step": 21133 + }, + { + "epoch": 0.6477258796126026, + "grad_norm": 1.6195248436111158, + "learning_rate": 5.8325565705602535e-06, + "loss": 0.6312, + "step": 21134 + }, + { + "epoch": 0.6477565281353439, + "grad_norm": 1.5800892543344434, + "learning_rate": 5.831654261701733e-06, + "loss": 0.7425, + "step": 21135 + }, + { + "epoch": 0.647787176658085, + "grad_norm": 0.6720416446954273, + "learning_rate": 5.830751993914996e-06, + "loss": 0.5372, + "step": 21136 + }, + { + "epoch": 0.6478178251808263, + "grad_norm": 1.4730926390336623, + "learning_rate": 5.82984976720892e-06, + "loss": 0.678, + "step": 21137 + }, + { + "epoch": 0.6478484737035675, + "grad_norm": 1.4356812440086253, + "learning_rate": 5.828947581592407e-06, + "loss": 0.7368, + "step": 21138 + }, + { + "epoch": 0.6478791222263087, + "grad_norm": 1.6989010115581893, + "learning_rate": 5.828045437074336e-06, + "loss": 0.6352, + "step": 21139 + }, + { + "epoch": 0.6479097707490499, + "grad_norm": 1.6137358101838268, + "learning_rate": 5.8271433336636e-06, + "loss": 0.7667, + "step": 21140 + }, + { + "epoch": 0.6479404192717911, + "grad_norm": 0.6754117092278576, + "learning_rate": 5.826241271369093e-06, + "loss": 0.539, + "step": 21141 + }, + { + "epoch": 0.6479710677945323, + "grad_norm": 0.6502078285971911, + "learning_rate": 5.825339250199694e-06, + "loss": 0.5601, + "step": 21142 + }, + { + "epoch": 0.6480017163172735, + "grad_norm": 1.6136302080861615, + "learning_rate": 5.824437270164296e-06, + "loss": 0.6084, + "step": 21143 + }, + { + "epoch": 0.6480323648400147, + "grad_norm": 1.420327938254674, + "learning_rate": 5.82353533127179e-06, + "loss": 0.6837, + "step": 21144 + }, + { + "epoch": 0.648063013362756, + "grad_norm": 1.6356794228262561, + "learning_rate": 5.822633433531055e-06, + "loss": 0.7256, + "step": 21145 + }, + { + "epoch": 0.6480936618854971, + "grad_norm": 1.4921919945667137, + "learning_rate": 5.8217315769509815e-06, + "loss": 0.7208, + "step": 21146 + }, + { + "epoch": 0.6481243104082384, + "grad_norm": 1.8061252165002435, + "learning_rate": 5.8208297615404605e-06, + "loss": 0.7983, + "step": 21147 + }, + { + "epoch": 0.6481549589309795, + "grad_norm": 1.54619019673687, + "learning_rate": 5.819927987308369e-06, + "loss": 0.7398, + "step": 21148 + }, + { + "epoch": 0.6481856074537208, + "grad_norm": 1.4873296111603538, + "learning_rate": 5.8190262542636e-06, + "loss": 0.5306, + "step": 21149 + }, + { + "epoch": 0.6482162559764619, + "grad_norm": 1.5333690539498874, + "learning_rate": 5.81812456241503e-06, + "loss": 0.6863, + "step": 21150 + }, + { + "epoch": 0.6482469044992032, + "grad_norm": 1.7639424108719195, + "learning_rate": 5.81722291177155e-06, + "loss": 0.7208, + "step": 21151 + }, + { + "epoch": 0.6482775530219443, + "grad_norm": 1.4724054129589565, + "learning_rate": 5.816321302342047e-06, + "loss": 0.6561, + "step": 21152 + }, + { + "epoch": 0.6483082015446855, + "grad_norm": 1.5016301747581224, + "learning_rate": 5.815419734135397e-06, + "loss": 0.675, + "step": 21153 + }, + { + "epoch": 0.6483388500674268, + "grad_norm": 1.5059671923321838, + "learning_rate": 5.814518207160487e-06, + "loss": 0.6289, + "step": 21154 + }, + { + "epoch": 0.6483694985901679, + "grad_norm": 1.631517263996803, + "learning_rate": 5.813616721426203e-06, + "loss": 0.6299, + "step": 21155 + }, + { + "epoch": 0.6484001471129092, + "grad_norm": 1.5592329292436222, + "learning_rate": 5.8127152769414206e-06, + "loss": 0.6611, + "step": 21156 + }, + { + "epoch": 0.6484307956356503, + "grad_norm": 1.6318500669231217, + "learning_rate": 5.811813873715026e-06, + "loss": 0.7498, + "step": 21157 + }, + { + "epoch": 0.6484614441583916, + "grad_norm": 1.5498147926254853, + "learning_rate": 5.810912511755905e-06, + "loss": 0.7839, + "step": 21158 + }, + { + "epoch": 0.6484920926811327, + "grad_norm": 1.7563570010747913, + "learning_rate": 5.81001119107293e-06, + "loss": 0.7201, + "step": 21159 + }, + { + "epoch": 0.648522741203874, + "grad_norm": 1.6165745187997926, + "learning_rate": 5.809109911674993e-06, + "loss": 0.5982, + "step": 21160 + }, + { + "epoch": 0.6485533897266151, + "grad_norm": 1.5005352321152134, + "learning_rate": 5.808208673570963e-06, + "loss": 0.6579, + "step": 21161 + }, + { + "epoch": 0.6485840382493564, + "grad_norm": 1.8314718795104896, + "learning_rate": 5.807307476769726e-06, + "loss": 0.7504, + "step": 21162 + }, + { + "epoch": 0.6486146867720975, + "grad_norm": 1.6046379187789275, + "learning_rate": 5.806406321280165e-06, + "loss": 0.7, + "step": 21163 + }, + { + "epoch": 0.6486453352948388, + "grad_norm": 1.521658566024875, + "learning_rate": 5.805505207111151e-06, + "loss": 0.5716, + "step": 21164 + }, + { + "epoch": 0.64867598381758, + "grad_norm": 1.3372888568233312, + "learning_rate": 5.8046041342715675e-06, + "loss": 0.6565, + "step": 21165 + }, + { + "epoch": 0.6487066323403212, + "grad_norm": 1.5665896268276531, + "learning_rate": 5.803703102770297e-06, + "loss": 0.6544, + "step": 21166 + }, + { + "epoch": 0.6487372808630624, + "grad_norm": 1.6631871371531235, + "learning_rate": 5.80280211261621e-06, + "loss": 0.7899, + "step": 21167 + }, + { + "epoch": 0.6487679293858036, + "grad_norm": 1.6381491000145358, + "learning_rate": 5.801901163818187e-06, + "loss": 0.7299, + "step": 21168 + }, + { + "epoch": 0.6487985779085448, + "grad_norm": 1.4504458166681173, + "learning_rate": 5.8010002563851096e-06, + "loss": 0.595, + "step": 21169 + }, + { + "epoch": 0.648829226431286, + "grad_norm": 1.546825978438198, + "learning_rate": 5.800099390325849e-06, + "loss": 0.7548, + "step": 21170 + }, + { + "epoch": 0.6488598749540272, + "grad_norm": 1.568873393706024, + "learning_rate": 5.7991985656492856e-06, + "loss": 0.6621, + "step": 21171 + }, + { + "epoch": 0.6488905234767685, + "grad_norm": 1.6253897829127097, + "learning_rate": 5.798297782364291e-06, + "loss": 0.6796, + "step": 21172 + }, + { + "epoch": 0.6489211719995096, + "grad_norm": 1.5348505575915528, + "learning_rate": 5.797397040479742e-06, + "loss": 0.7106, + "step": 21173 + }, + { + "epoch": 0.6489518205222509, + "grad_norm": 1.5381454908806047, + "learning_rate": 5.796496340004521e-06, + "loss": 0.7165, + "step": 21174 + }, + { + "epoch": 0.648982469044992, + "grad_norm": 1.5089398241664727, + "learning_rate": 5.7955956809474915e-06, + "loss": 0.6867, + "step": 21175 + }, + { + "epoch": 0.6490131175677333, + "grad_norm": 1.7163233289193542, + "learning_rate": 5.794695063317533e-06, + "loss": 0.6878, + "step": 21176 + }, + { + "epoch": 0.6490437660904744, + "grad_norm": 1.5213157924119112, + "learning_rate": 5.793794487123525e-06, + "loss": 0.7031, + "step": 21177 + }, + { + "epoch": 0.6490744146132157, + "grad_norm": 1.6854065208111657, + "learning_rate": 5.792893952374332e-06, + "loss": 0.6654, + "step": 21178 + }, + { + "epoch": 0.6491050631359568, + "grad_norm": 0.7380257247747879, + "learning_rate": 5.791993459078837e-06, + "loss": 0.5754, + "step": 21179 + }, + { + "epoch": 0.6491357116586981, + "grad_norm": 1.602567762882245, + "learning_rate": 5.7910930072459005e-06, + "loss": 0.6799, + "step": 21180 + }, + { + "epoch": 0.6491663601814393, + "grad_norm": 1.3284522815309987, + "learning_rate": 5.790192596884403e-06, + "loss": 0.6214, + "step": 21181 + }, + { + "epoch": 0.6491970087041805, + "grad_norm": 1.360327271020817, + "learning_rate": 5.789292228003218e-06, + "loss": 0.4703, + "step": 21182 + }, + { + "epoch": 0.6492276572269217, + "grad_norm": 1.4384914124484955, + "learning_rate": 5.788391900611211e-06, + "loss": 0.6299, + "step": 21183 + }, + { + "epoch": 0.6492583057496628, + "grad_norm": 1.6278684530475351, + "learning_rate": 5.787491614717255e-06, + "loss": 0.6864, + "step": 21184 + }, + { + "epoch": 0.6492889542724041, + "grad_norm": 1.5643279899416165, + "learning_rate": 5.786591370330228e-06, + "loss": 0.6766, + "step": 21185 + }, + { + "epoch": 0.6493196027951452, + "grad_norm": 1.6223920512980288, + "learning_rate": 5.785691167458989e-06, + "loss": 0.5983, + "step": 21186 + }, + { + "epoch": 0.6493502513178865, + "grad_norm": 1.7441499216409733, + "learning_rate": 5.784791006112414e-06, + "loss": 0.665, + "step": 21187 + }, + { + "epoch": 0.6493808998406276, + "grad_norm": 0.6827957225106501, + "learning_rate": 5.783890886299374e-06, + "loss": 0.527, + "step": 21188 + }, + { + "epoch": 0.6494115483633689, + "grad_norm": 1.4685977102919088, + "learning_rate": 5.782990808028732e-06, + "loss": 0.5384, + "step": 21189 + }, + { + "epoch": 0.64944219688611, + "grad_norm": 1.5200579219664394, + "learning_rate": 5.782090771309366e-06, + "loss": 0.6587, + "step": 21190 + }, + { + "epoch": 0.6494728454088513, + "grad_norm": 1.3944763385375214, + "learning_rate": 5.781190776150129e-06, + "loss": 0.6841, + "step": 21191 + }, + { + "epoch": 0.6495034939315925, + "grad_norm": 0.6722218016162903, + "learning_rate": 5.780290822559909e-06, + "loss": 0.5558, + "step": 21192 + }, + { + "epoch": 0.6495341424543337, + "grad_norm": 1.5222726135656108, + "learning_rate": 5.779390910547562e-06, + "loss": 0.6403, + "step": 21193 + }, + { + "epoch": 0.6495647909770749, + "grad_norm": 1.6052295762783528, + "learning_rate": 5.778491040121952e-06, + "loss": 0.6888, + "step": 21194 + }, + { + "epoch": 0.6495954394998161, + "grad_norm": 1.467110088482705, + "learning_rate": 5.777591211291951e-06, + "loss": 0.6235, + "step": 21195 + }, + { + "epoch": 0.6496260880225573, + "grad_norm": 0.6437889999854035, + "learning_rate": 5.776691424066427e-06, + "loss": 0.5487, + "step": 21196 + }, + { + "epoch": 0.6496567365452985, + "grad_norm": 1.6186751955898844, + "learning_rate": 5.775791678454239e-06, + "loss": 0.5985, + "step": 21197 + }, + { + "epoch": 0.6496873850680397, + "grad_norm": 0.675091412863427, + "learning_rate": 5.7748919744642565e-06, + "loss": 0.553, + "step": 21198 + }, + { + "epoch": 0.649718033590781, + "grad_norm": 1.5821976860040117, + "learning_rate": 5.773992312105346e-06, + "loss": 0.6679, + "step": 21199 + }, + { + "epoch": 0.6497486821135221, + "grad_norm": 1.5838749809526, + "learning_rate": 5.773092691386373e-06, + "loss": 0.7168, + "step": 21200 + }, + { + "epoch": 0.6497793306362634, + "grad_norm": 1.7017634170396143, + "learning_rate": 5.772193112316198e-06, + "loss": 0.6425, + "step": 21201 + }, + { + "epoch": 0.6498099791590045, + "grad_norm": 1.7003302748913283, + "learning_rate": 5.77129357490368e-06, + "loss": 0.6792, + "step": 21202 + }, + { + "epoch": 0.6498406276817458, + "grad_norm": 1.4905435272465042, + "learning_rate": 5.770394079157695e-06, + "loss": 0.6713, + "step": 21203 + }, + { + "epoch": 0.6498712762044869, + "grad_norm": 1.3599838512871891, + "learning_rate": 5.769494625087099e-06, + "loss": 0.6808, + "step": 21204 + }, + { + "epoch": 0.6499019247272282, + "grad_norm": 1.5228545962875215, + "learning_rate": 5.768595212700754e-06, + "loss": 0.6339, + "step": 21205 + }, + { + "epoch": 0.6499325732499693, + "grad_norm": 1.5096465450165089, + "learning_rate": 5.767695842007521e-06, + "loss": 0.6817, + "step": 21206 + }, + { + "epoch": 0.6499632217727106, + "grad_norm": 1.4829807588491788, + "learning_rate": 5.766796513016266e-06, + "loss": 0.5954, + "step": 21207 + }, + { + "epoch": 0.6499938702954517, + "grad_norm": 1.6048578374262459, + "learning_rate": 5.765897225735847e-06, + "loss": 0.7133, + "step": 21208 + }, + { + "epoch": 0.650024518818193, + "grad_norm": 1.4888159577858764, + "learning_rate": 5.764997980175125e-06, + "loss": 0.6393, + "step": 21209 + }, + { + "epoch": 0.6500551673409342, + "grad_norm": 1.3936183626255298, + "learning_rate": 5.764098776342961e-06, + "loss": 0.6573, + "step": 21210 + }, + { + "epoch": 0.6500858158636754, + "grad_norm": 1.3752557318223064, + "learning_rate": 5.7631996142482194e-06, + "loss": 0.5421, + "step": 21211 + }, + { + "epoch": 0.6501164643864166, + "grad_norm": 1.812602977698109, + "learning_rate": 5.762300493899756e-06, + "loss": 0.7235, + "step": 21212 + }, + { + "epoch": 0.6501471129091578, + "grad_norm": 1.49271706779753, + "learning_rate": 5.761401415306422e-06, + "loss": 0.6205, + "step": 21213 + }, + { + "epoch": 0.650177761431899, + "grad_norm": 1.5615509813597277, + "learning_rate": 5.760502378477093e-06, + "loss": 0.7144, + "step": 21214 + }, + { + "epoch": 0.6502084099546401, + "grad_norm": 1.4744952811604275, + "learning_rate": 5.75960338342062e-06, + "loss": 0.6463, + "step": 21215 + }, + { + "epoch": 0.6502390584773814, + "grad_norm": 1.6202558253859756, + "learning_rate": 5.758704430145854e-06, + "loss": 0.6599, + "step": 21216 + }, + { + "epoch": 0.6502697070001225, + "grad_norm": 0.7141515511786253, + "learning_rate": 5.757805518661659e-06, + "loss": 0.5661, + "step": 21217 + }, + { + "epoch": 0.6503003555228638, + "grad_norm": 1.4615369692922604, + "learning_rate": 5.756906648976892e-06, + "loss": 0.6706, + "step": 21218 + }, + { + "epoch": 0.650331004045605, + "grad_norm": 0.6918672410327424, + "learning_rate": 5.756007821100412e-06, + "loss": 0.5632, + "step": 21219 + }, + { + "epoch": 0.6503616525683462, + "grad_norm": 1.5193125685289237, + "learning_rate": 5.75510903504107e-06, + "loss": 0.6131, + "step": 21220 + }, + { + "epoch": 0.6503923010910874, + "grad_norm": 1.3987361673932976, + "learning_rate": 5.7542102908077244e-06, + "loss": 0.5907, + "step": 21221 + }, + { + "epoch": 0.6504229496138286, + "grad_norm": 1.7665914254421005, + "learning_rate": 5.753311588409236e-06, + "loss": 0.694, + "step": 21222 + }, + { + "epoch": 0.6504535981365698, + "grad_norm": 1.5745302649342579, + "learning_rate": 5.752412927854454e-06, + "loss": 0.6224, + "step": 21223 + }, + { + "epoch": 0.650484246659311, + "grad_norm": 1.5303420225532887, + "learning_rate": 5.7515143091522305e-06, + "loss": 0.6431, + "step": 21224 + }, + { + "epoch": 0.6505148951820522, + "grad_norm": 1.4430168393367535, + "learning_rate": 5.750615732311424e-06, + "loss": 0.7452, + "step": 21225 + }, + { + "epoch": 0.6505455437047934, + "grad_norm": 1.566316305809485, + "learning_rate": 5.749717197340887e-06, + "loss": 0.666, + "step": 21226 + }, + { + "epoch": 0.6505761922275346, + "grad_norm": 1.7432767969891547, + "learning_rate": 5.748818704249479e-06, + "loss": 0.8003, + "step": 21227 + }, + { + "epoch": 0.6506068407502759, + "grad_norm": 1.6455302252689332, + "learning_rate": 5.747920253046043e-06, + "loss": 0.7625, + "step": 21228 + }, + { + "epoch": 0.650637489273017, + "grad_norm": 1.4594554835925222, + "learning_rate": 5.747021843739438e-06, + "loss": 0.6665, + "step": 21229 + }, + { + "epoch": 0.6506681377957583, + "grad_norm": 1.6503223197862087, + "learning_rate": 5.746123476338517e-06, + "loss": 0.5928, + "step": 21230 + }, + { + "epoch": 0.6506987863184994, + "grad_norm": 1.6285219737501735, + "learning_rate": 5.745225150852132e-06, + "loss": 0.7794, + "step": 21231 + }, + { + "epoch": 0.6507294348412407, + "grad_norm": 1.4703886255414573, + "learning_rate": 5.744326867289123e-06, + "loss": 0.649, + "step": 21232 + }, + { + "epoch": 0.6507600833639818, + "grad_norm": 1.6989104588893933, + "learning_rate": 5.743428625658358e-06, + "loss": 0.7085, + "step": 21233 + }, + { + "epoch": 0.6507907318867231, + "grad_norm": 1.5710704288895705, + "learning_rate": 5.74253042596868e-06, + "loss": 0.6477, + "step": 21234 + }, + { + "epoch": 0.6508213804094642, + "grad_norm": 1.6319154822315878, + "learning_rate": 5.741632268228936e-06, + "loss": 0.7467, + "step": 21235 + }, + { + "epoch": 0.6508520289322055, + "grad_norm": 1.6170247707984713, + "learning_rate": 5.740734152447977e-06, + "loss": 0.6135, + "step": 21236 + }, + { + "epoch": 0.6508826774549467, + "grad_norm": 1.7109672907914821, + "learning_rate": 5.739836078634655e-06, + "loss": 0.7134, + "step": 21237 + }, + { + "epoch": 0.6509133259776879, + "grad_norm": 1.5972436671747665, + "learning_rate": 5.738938046797823e-06, + "loss": 0.7393, + "step": 21238 + }, + { + "epoch": 0.6509439745004291, + "grad_norm": 0.6718762487267693, + "learning_rate": 5.73804005694632e-06, + "loss": 0.5414, + "step": 21239 + }, + { + "epoch": 0.6509746230231703, + "grad_norm": 1.755648920320009, + "learning_rate": 5.737142109088999e-06, + "loss": 0.6652, + "step": 21240 + }, + { + "epoch": 0.6510052715459115, + "grad_norm": 1.5669025325855408, + "learning_rate": 5.736244203234711e-06, + "loss": 0.6232, + "step": 21241 + }, + { + "epoch": 0.6510359200686527, + "grad_norm": 1.4783044846591524, + "learning_rate": 5.7353463393923e-06, + "loss": 0.7268, + "step": 21242 + }, + { + "epoch": 0.6510665685913939, + "grad_norm": 1.3614287607044415, + "learning_rate": 5.734448517570606e-06, + "loss": 0.8488, + "step": 21243 + }, + { + "epoch": 0.6510972171141352, + "grad_norm": 1.5129673962559131, + "learning_rate": 5.7335507377784885e-06, + "loss": 0.6493, + "step": 21244 + }, + { + "epoch": 0.6511278656368763, + "grad_norm": 1.51932545205709, + "learning_rate": 5.732653000024784e-06, + "loss": 0.7718, + "step": 21245 + }, + { + "epoch": 0.6511585141596175, + "grad_norm": 0.6407592305695353, + "learning_rate": 5.731755304318344e-06, + "loss": 0.5083, + "step": 21246 + }, + { + "epoch": 0.6511891626823587, + "grad_norm": 1.732506738225777, + "learning_rate": 5.730857650668008e-06, + "loss": 0.6966, + "step": 21247 + }, + { + "epoch": 0.6512198112050999, + "grad_norm": 1.4949156409606026, + "learning_rate": 5.729960039082624e-06, + "loss": 0.654, + "step": 21248 + }, + { + "epoch": 0.6512504597278411, + "grad_norm": 1.5634610762728116, + "learning_rate": 5.729062469571041e-06, + "loss": 0.7383, + "step": 21249 + }, + { + "epoch": 0.6512811082505823, + "grad_norm": 1.7965889637733516, + "learning_rate": 5.728164942142093e-06, + "loss": 0.6964, + "step": 21250 + }, + { + "epoch": 0.6513117567733235, + "grad_norm": 1.5313600607726316, + "learning_rate": 5.727267456804629e-06, + "loss": 0.6114, + "step": 21251 + }, + { + "epoch": 0.6513424052960647, + "grad_norm": 1.7077973770093449, + "learning_rate": 5.726370013567496e-06, + "loss": 0.6525, + "step": 21252 + }, + { + "epoch": 0.651373053818806, + "grad_norm": 1.5252106656651805, + "learning_rate": 5.725472612439533e-06, + "loss": 0.6322, + "step": 21253 + }, + { + "epoch": 0.6514037023415471, + "grad_norm": 1.6279105532283413, + "learning_rate": 5.724575253429574e-06, + "loss": 0.6933, + "step": 21254 + }, + { + "epoch": 0.6514343508642884, + "grad_norm": 1.6416404529258777, + "learning_rate": 5.723677936546476e-06, + "loss": 0.7101, + "step": 21255 + }, + { + "epoch": 0.6514649993870295, + "grad_norm": 1.5817922106823132, + "learning_rate": 5.722780661799071e-06, + "loss": 0.6724, + "step": 21256 + }, + { + "epoch": 0.6514956479097708, + "grad_norm": 1.5547994745530893, + "learning_rate": 5.721883429196207e-06, + "loss": 0.6619, + "step": 21257 + }, + { + "epoch": 0.6515262964325119, + "grad_norm": 1.5620019775885652, + "learning_rate": 5.720986238746714e-06, + "loss": 0.6514, + "step": 21258 + }, + { + "epoch": 0.6515569449552532, + "grad_norm": 1.612878319893483, + "learning_rate": 5.72008909045944e-06, + "loss": 0.6042, + "step": 21259 + }, + { + "epoch": 0.6515875934779943, + "grad_norm": 1.5678663073989256, + "learning_rate": 5.719191984343226e-06, + "loss": 0.6502, + "step": 21260 + }, + { + "epoch": 0.6516182420007356, + "grad_norm": 1.5832039062412187, + "learning_rate": 5.718294920406906e-06, + "loss": 0.6428, + "step": 21261 + }, + { + "epoch": 0.6516488905234767, + "grad_norm": 1.5751168039784815, + "learning_rate": 5.717397898659321e-06, + "loss": 0.6967, + "step": 21262 + }, + { + "epoch": 0.651679539046218, + "grad_norm": 1.4447483905406941, + "learning_rate": 5.716500919109314e-06, + "loss": 0.6164, + "step": 21263 + }, + { + "epoch": 0.6517101875689592, + "grad_norm": 0.6579781384866558, + "learning_rate": 5.715603981765716e-06, + "loss": 0.5452, + "step": 21264 + }, + { + "epoch": 0.6517408360917004, + "grad_norm": 1.7043084988189203, + "learning_rate": 5.714707086637368e-06, + "loss": 0.7657, + "step": 21265 + }, + { + "epoch": 0.6517714846144416, + "grad_norm": 1.847936261188444, + "learning_rate": 5.713810233733112e-06, + "loss": 0.7499, + "step": 21266 + }, + { + "epoch": 0.6518021331371828, + "grad_norm": 1.5626804112916937, + "learning_rate": 5.712913423061774e-06, + "loss": 0.6472, + "step": 21267 + }, + { + "epoch": 0.651832781659924, + "grad_norm": 1.4388599221319909, + "learning_rate": 5.712016654632204e-06, + "loss": 0.7079, + "step": 21268 + }, + { + "epoch": 0.6518634301826652, + "grad_norm": 1.9859713153379912, + "learning_rate": 5.711119928453226e-06, + "loss": 0.6277, + "step": 21269 + }, + { + "epoch": 0.6518940787054064, + "grad_norm": 1.5681595097417502, + "learning_rate": 5.71022324453368e-06, + "loss": 0.6786, + "step": 21270 + }, + { + "epoch": 0.6519247272281476, + "grad_norm": 1.3547995996455098, + "learning_rate": 5.709326602882407e-06, + "loss": 0.6002, + "step": 21271 + }, + { + "epoch": 0.6519553757508888, + "grad_norm": 1.330658364034522, + "learning_rate": 5.7084300035082316e-06, + "loss": 0.6185, + "step": 21272 + }, + { + "epoch": 0.6519860242736301, + "grad_norm": 1.712726179609623, + "learning_rate": 5.707533446419995e-06, + "loss": 0.6586, + "step": 21273 + }, + { + "epoch": 0.6520166727963712, + "grad_norm": 1.5184458464228558, + "learning_rate": 5.7066369316265324e-06, + "loss": 0.6865, + "step": 21274 + }, + { + "epoch": 0.6520473213191125, + "grad_norm": 1.4980136685084664, + "learning_rate": 5.70574045913667e-06, + "loss": 0.6282, + "step": 21275 + }, + { + "epoch": 0.6520779698418536, + "grad_norm": 1.3567580600534799, + "learning_rate": 5.704844028959251e-06, + "loss": 0.5629, + "step": 21276 + }, + { + "epoch": 0.6521086183645948, + "grad_norm": 1.5701083035076473, + "learning_rate": 5.703947641103098e-06, + "loss": 0.6083, + "step": 21277 + }, + { + "epoch": 0.652139266887336, + "grad_norm": 1.4229994147770704, + "learning_rate": 5.703051295577049e-06, + "loss": 0.6517, + "step": 21278 + }, + { + "epoch": 0.6521699154100772, + "grad_norm": 1.7716061551150402, + "learning_rate": 5.702154992389939e-06, + "loss": 0.6871, + "step": 21279 + }, + { + "epoch": 0.6522005639328184, + "grad_norm": 1.4094186156156399, + "learning_rate": 5.7012587315505895e-06, + "loss": 0.671, + "step": 21280 + }, + { + "epoch": 0.6522312124555596, + "grad_norm": 1.4197608551486334, + "learning_rate": 5.70036251306784e-06, + "loss": 0.6612, + "step": 21281 + }, + { + "epoch": 0.6522618609783009, + "grad_norm": 1.5972575599983525, + "learning_rate": 5.699466336950521e-06, + "loss": 0.7202, + "step": 21282 + }, + { + "epoch": 0.652292509501042, + "grad_norm": 1.3040895626052107, + "learning_rate": 5.698570203207458e-06, + "loss": 0.613, + "step": 21283 + }, + { + "epoch": 0.6523231580237833, + "grad_norm": 1.495686029430019, + "learning_rate": 5.697674111847482e-06, + "loss": 0.6311, + "step": 21284 + }, + { + "epoch": 0.6523538065465244, + "grad_norm": 1.6502554020496656, + "learning_rate": 5.696778062879429e-06, + "loss": 0.6756, + "step": 21285 + }, + { + "epoch": 0.6523844550692657, + "grad_norm": 0.6975857465977716, + "learning_rate": 5.695882056312119e-06, + "loss": 0.5743, + "step": 21286 + }, + { + "epoch": 0.6524151035920068, + "grad_norm": 1.5074440272618044, + "learning_rate": 5.694986092154387e-06, + "loss": 0.6583, + "step": 21287 + }, + { + "epoch": 0.6524457521147481, + "grad_norm": 4.4672515766525045, + "learning_rate": 5.6940901704150546e-06, + "loss": 0.6534, + "step": 21288 + }, + { + "epoch": 0.6524764006374892, + "grad_norm": 1.8307017100533676, + "learning_rate": 5.693194291102955e-06, + "loss": 0.68, + "step": 21289 + }, + { + "epoch": 0.6525070491602305, + "grad_norm": 0.6666217031479159, + "learning_rate": 5.692298454226917e-06, + "loss": 0.5613, + "step": 21290 + }, + { + "epoch": 0.6525376976829717, + "grad_norm": 1.7126630405065582, + "learning_rate": 5.691402659795759e-06, + "loss": 0.6124, + "step": 21291 + }, + { + "epoch": 0.6525683462057129, + "grad_norm": 1.4197541437781769, + "learning_rate": 5.690506907818315e-06, + "loss": 0.5743, + "step": 21292 + }, + { + "epoch": 0.6525989947284541, + "grad_norm": 1.596783541590359, + "learning_rate": 5.689611198303413e-06, + "loss": 0.6711, + "step": 21293 + }, + { + "epoch": 0.6526296432511953, + "grad_norm": 1.6120228711549196, + "learning_rate": 5.68871553125987e-06, + "loss": 0.5894, + "step": 21294 + }, + { + "epoch": 0.6526602917739365, + "grad_norm": 1.5616689186243526, + "learning_rate": 5.687819906696516e-06, + "loss": 0.6234, + "step": 21295 + }, + { + "epoch": 0.6526909402966777, + "grad_norm": 1.6138284179212705, + "learning_rate": 5.686924324622181e-06, + "loss": 0.6797, + "step": 21296 + }, + { + "epoch": 0.6527215888194189, + "grad_norm": 1.4870425658390527, + "learning_rate": 5.686028785045679e-06, + "loss": 0.6277, + "step": 21297 + }, + { + "epoch": 0.6527522373421601, + "grad_norm": 0.6632195338765648, + "learning_rate": 5.685133287975841e-06, + "loss": 0.5587, + "step": 21298 + }, + { + "epoch": 0.6527828858649013, + "grad_norm": 1.4744875973961327, + "learning_rate": 5.6842378334214845e-06, + "loss": 0.6407, + "step": 21299 + }, + { + "epoch": 0.6528135343876426, + "grad_norm": 1.5083307406860471, + "learning_rate": 5.683342421391443e-06, + "loss": 0.6702, + "step": 21300 + }, + { + "epoch": 0.6528441829103837, + "grad_norm": 1.518816044217789, + "learning_rate": 5.6824470518945326e-06, + "loss": 0.724, + "step": 21301 + }, + { + "epoch": 0.652874831433125, + "grad_norm": 1.7004555062476845, + "learning_rate": 5.681551724939574e-06, + "loss": 0.8045, + "step": 21302 + }, + { + "epoch": 0.6529054799558661, + "grad_norm": 1.4531234287245762, + "learning_rate": 5.68065644053539e-06, + "loss": 0.6241, + "step": 21303 + }, + { + "epoch": 0.6529361284786074, + "grad_norm": 1.3903476041542893, + "learning_rate": 5.679761198690807e-06, + "loss": 0.6805, + "step": 21304 + }, + { + "epoch": 0.6529667770013485, + "grad_norm": 1.4846249562757234, + "learning_rate": 5.678865999414639e-06, + "loss": 0.6857, + "step": 21305 + }, + { + "epoch": 0.6529974255240898, + "grad_norm": 1.4660003072013033, + "learning_rate": 5.67797084271571e-06, + "loss": 0.6262, + "step": 21306 + }, + { + "epoch": 0.6530280740468309, + "grad_norm": 1.7459901062551562, + "learning_rate": 5.677075728602843e-06, + "loss": 0.6703, + "step": 21307 + }, + { + "epoch": 0.6530587225695721, + "grad_norm": 1.5666793220298953, + "learning_rate": 5.676180657084852e-06, + "loss": 0.6694, + "step": 21308 + }, + { + "epoch": 0.6530893710923134, + "grad_norm": 1.6269747575034348, + "learning_rate": 5.6752856281705624e-06, + "loss": 0.773, + "step": 21309 + }, + { + "epoch": 0.6531200196150545, + "grad_norm": 1.622151749099076, + "learning_rate": 5.6743906418687836e-06, + "loss": 0.7083, + "step": 21310 + }, + { + "epoch": 0.6531506681377958, + "grad_norm": 1.4490486185218208, + "learning_rate": 5.673495698188347e-06, + "loss": 0.6926, + "step": 21311 + }, + { + "epoch": 0.6531813166605369, + "grad_norm": 1.4348244337341227, + "learning_rate": 5.672600797138065e-06, + "loss": 0.5762, + "step": 21312 + }, + { + "epoch": 0.6532119651832782, + "grad_norm": 0.6768903684224395, + "learning_rate": 5.6717059387267504e-06, + "loss": 0.5535, + "step": 21313 + }, + { + "epoch": 0.6532426137060193, + "grad_norm": 1.644364748458146, + "learning_rate": 5.670811122963224e-06, + "loss": 0.6599, + "step": 21314 + }, + { + "epoch": 0.6532732622287606, + "grad_norm": 1.6881078090762098, + "learning_rate": 5.669916349856308e-06, + "loss": 0.6667, + "step": 21315 + }, + { + "epoch": 0.6533039107515017, + "grad_norm": 1.4853212925545145, + "learning_rate": 5.66902161941481e-06, + "loss": 0.6193, + "step": 21316 + }, + { + "epoch": 0.653334559274243, + "grad_norm": 1.7389286357314977, + "learning_rate": 5.6681269316475494e-06, + "loss": 0.8354, + "step": 21317 + }, + { + "epoch": 0.6533652077969841, + "grad_norm": 0.655904522375108, + "learning_rate": 5.667232286563343e-06, + "loss": 0.544, + "step": 21318 + }, + { + "epoch": 0.6533958563197254, + "grad_norm": 1.6144628962686909, + "learning_rate": 5.66633768417101e-06, + "loss": 0.6801, + "step": 21319 + }, + { + "epoch": 0.6534265048424666, + "grad_norm": 1.8205357260979529, + "learning_rate": 5.665443124479361e-06, + "loss": 0.7138, + "step": 21320 + }, + { + "epoch": 0.6534571533652078, + "grad_norm": 1.6428539859303606, + "learning_rate": 5.6645486074972045e-06, + "loss": 0.6128, + "step": 21321 + }, + { + "epoch": 0.653487801887949, + "grad_norm": 1.5640983273049909, + "learning_rate": 5.66365413323336e-06, + "loss": 0.6886, + "step": 21322 + }, + { + "epoch": 0.6535184504106902, + "grad_norm": 1.6095867798705235, + "learning_rate": 5.662759701696645e-06, + "loss": 0.7097, + "step": 21323 + }, + { + "epoch": 0.6535490989334314, + "grad_norm": 1.54652613191878, + "learning_rate": 5.6618653128958656e-06, + "loss": 0.7663, + "step": 21324 + }, + { + "epoch": 0.6535797474561726, + "grad_norm": 1.5566337756275765, + "learning_rate": 5.660970966839836e-06, + "loss": 0.6379, + "step": 21325 + }, + { + "epoch": 0.6536103959789138, + "grad_norm": 1.4740519896923274, + "learning_rate": 5.66007666353737e-06, + "loss": 0.6218, + "step": 21326 + }, + { + "epoch": 0.653641044501655, + "grad_norm": 1.5840722768917592, + "learning_rate": 5.659182402997283e-06, + "loss": 0.6295, + "step": 21327 + }, + { + "epoch": 0.6536716930243962, + "grad_norm": 1.8386629404715777, + "learning_rate": 5.6582881852283824e-06, + "loss": 0.7615, + "step": 21328 + }, + { + "epoch": 0.6537023415471375, + "grad_norm": 1.4224273643623988, + "learning_rate": 5.657394010239472e-06, + "loss": 0.6311, + "step": 21329 + }, + { + "epoch": 0.6537329900698786, + "grad_norm": 1.6166260534657524, + "learning_rate": 5.656499878039377e-06, + "loss": 0.7468, + "step": 21330 + }, + { + "epoch": 0.6537636385926199, + "grad_norm": 1.6516622065985416, + "learning_rate": 5.6556057886369e-06, + "loss": 0.7321, + "step": 21331 + }, + { + "epoch": 0.653794287115361, + "grad_norm": 1.589731586188044, + "learning_rate": 5.654711742040846e-06, + "loss": 0.7616, + "step": 21332 + }, + { + "epoch": 0.6538249356381023, + "grad_norm": 1.496613930626085, + "learning_rate": 5.65381773826003e-06, + "loss": 0.6229, + "step": 21333 + }, + { + "epoch": 0.6538555841608434, + "grad_norm": 1.362603476700039, + "learning_rate": 5.652923777303263e-06, + "loss": 0.5436, + "step": 21334 + }, + { + "epoch": 0.6538862326835847, + "grad_norm": 1.6644866138393861, + "learning_rate": 5.652029859179347e-06, + "loss": 0.7042, + "step": 21335 + }, + { + "epoch": 0.6539168812063259, + "grad_norm": 1.6611528616181168, + "learning_rate": 5.651135983897092e-06, + "loss": 0.6944, + "step": 21336 + }, + { + "epoch": 0.6539475297290671, + "grad_norm": 0.6729006526497928, + "learning_rate": 5.650242151465308e-06, + "loss": 0.5642, + "step": 21337 + }, + { + "epoch": 0.6539781782518083, + "grad_norm": 1.6993443792755571, + "learning_rate": 5.649348361892805e-06, + "loss": 0.6048, + "step": 21338 + }, + { + "epoch": 0.6540088267745494, + "grad_norm": 1.4379884217076602, + "learning_rate": 5.648454615188386e-06, + "loss": 0.6953, + "step": 21339 + }, + { + "epoch": 0.6540394752972907, + "grad_norm": 1.5769663202161928, + "learning_rate": 5.647560911360848e-06, + "loss": 0.7227, + "step": 21340 + }, + { + "epoch": 0.6540701238200318, + "grad_norm": 1.6711765268703775, + "learning_rate": 5.6466672504190146e-06, + "loss": 0.6595, + "step": 21341 + }, + { + "epoch": 0.6541007723427731, + "grad_norm": 1.5846374939419476, + "learning_rate": 5.645773632371683e-06, + "loss": 0.6737, + "step": 21342 + }, + { + "epoch": 0.6541314208655142, + "grad_norm": 1.4514725481650887, + "learning_rate": 5.644880057227653e-06, + "loss": 0.6531, + "step": 21343 + }, + { + "epoch": 0.6541620693882555, + "grad_norm": 1.4584979812155248, + "learning_rate": 5.643986524995735e-06, + "loss": 0.7063, + "step": 21344 + }, + { + "epoch": 0.6541927179109966, + "grad_norm": 1.439127966691631, + "learning_rate": 5.643093035684733e-06, + "loss": 0.6696, + "step": 21345 + }, + { + "epoch": 0.6542233664337379, + "grad_norm": 1.5585878030576577, + "learning_rate": 5.642199589303452e-06, + "loss": 0.6344, + "step": 21346 + }, + { + "epoch": 0.6542540149564791, + "grad_norm": 0.6717708360261465, + "learning_rate": 5.64130618586069e-06, + "loss": 0.5412, + "step": 21347 + }, + { + "epoch": 0.6542846634792203, + "grad_norm": 1.729111352301401, + "learning_rate": 5.640412825365254e-06, + "loss": 0.6291, + "step": 21348 + }, + { + "epoch": 0.6543153120019615, + "grad_norm": 1.8167441614811384, + "learning_rate": 5.63951950782595e-06, + "loss": 0.7, + "step": 21349 + }, + { + "epoch": 0.6543459605247027, + "grad_norm": 1.5670274946191494, + "learning_rate": 5.638626233251575e-06, + "loss": 0.6792, + "step": 21350 + }, + { + "epoch": 0.6543766090474439, + "grad_norm": 1.6790608276558356, + "learning_rate": 5.6377330016509245e-06, + "loss": 0.8119, + "step": 21351 + }, + { + "epoch": 0.6544072575701851, + "grad_norm": 1.5391887076598734, + "learning_rate": 5.636839813032815e-06, + "loss": 0.6566, + "step": 21352 + }, + { + "epoch": 0.6544379060929263, + "grad_norm": 0.6518910230165516, + "learning_rate": 5.635946667406033e-06, + "loss": 0.5552, + "step": 21353 + }, + { + "epoch": 0.6544685546156676, + "grad_norm": 1.8366172128831348, + "learning_rate": 5.635053564779392e-06, + "loss": 0.8016, + "step": 21354 + }, + { + "epoch": 0.6544992031384087, + "grad_norm": 1.5462231507089763, + "learning_rate": 5.6341605051616795e-06, + "loss": 0.6466, + "step": 21355 + }, + { + "epoch": 0.65452985166115, + "grad_norm": 1.6915287982711318, + "learning_rate": 5.633267488561702e-06, + "loss": 0.6581, + "step": 21356 + }, + { + "epoch": 0.6545605001838911, + "grad_norm": 1.4877580280801599, + "learning_rate": 5.632374514988259e-06, + "loss": 0.6607, + "step": 21357 + }, + { + "epoch": 0.6545911487066324, + "grad_norm": 1.7742848474326511, + "learning_rate": 5.631481584450145e-06, + "loss": 0.7749, + "step": 21358 + }, + { + "epoch": 0.6546217972293735, + "grad_norm": 1.4679409739965708, + "learning_rate": 5.630588696956161e-06, + "loss": 0.7162, + "step": 21359 + }, + { + "epoch": 0.6546524457521148, + "grad_norm": 0.6793198879388851, + "learning_rate": 5.629695852515107e-06, + "loss": 0.5386, + "step": 21360 + }, + { + "epoch": 0.6546830942748559, + "grad_norm": 1.5754362827168684, + "learning_rate": 5.62880305113578e-06, + "loss": 0.6616, + "step": 21361 + }, + { + "epoch": 0.6547137427975972, + "grad_norm": 1.6735283664469336, + "learning_rate": 5.6279102928269655e-06, + "loss": 0.654, + "step": 21362 + }, + { + "epoch": 0.6547443913203383, + "grad_norm": 1.6695634317087282, + "learning_rate": 5.627017577597478e-06, + "loss": 0.678, + "step": 21363 + }, + { + "epoch": 0.6547750398430796, + "grad_norm": 0.6519519708882767, + "learning_rate": 5.6261249054561e-06, + "loss": 0.5501, + "step": 21364 + }, + { + "epoch": 0.6548056883658208, + "grad_norm": 1.7008472945463726, + "learning_rate": 5.625232276411638e-06, + "loss": 0.698, + "step": 21365 + }, + { + "epoch": 0.654836336888562, + "grad_norm": 1.7453268193469553, + "learning_rate": 5.624339690472878e-06, + "loss": 0.5411, + "step": 21366 + }, + { + "epoch": 0.6548669854113032, + "grad_norm": 1.6842964322420613, + "learning_rate": 5.6234471476486174e-06, + "loss": 0.7186, + "step": 21367 + }, + { + "epoch": 0.6548976339340444, + "grad_norm": 1.80034513379286, + "learning_rate": 5.622554647947656e-06, + "loss": 0.6431, + "step": 21368 + }, + { + "epoch": 0.6549282824567856, + "grad_norm": 1.6024034508061995, + "learning_rate": 5.621662191378779e-06, + "loss": 0.6395, + "step": 21369 + }, + { + "epoch": 0.6549589309795267, + "grad_norm": 1.6897095356652936, + "learning_rate": 5.620769777950786e-06, + "loss": 0.6021, + "step": 21370 + }, + { + "epoch": 0.654989579502268, + "grad_norm": 1.5059874513845763, + "learning_rate": 5.619877407672471e-06, + "loss": 0.7025, + "step": 21371 + }, + { + "epoch": 0.6550202280250091, + "grad_norm": 1.7106770827489781, + "learning_rate": 5.618985080552624e-06, + "loss": 0.636, + "step": 21372 + }, + { + "epoch": 0.6550508765477504, + "grad_norm": 1.73190046816103, + "learning_rate": 5.618092796600038e-06, + "loss": 0.7143, + "step": 21373 + }, + { + "epoch": 0.6550815250704916, + "grad_norm": 1.5989381091075292, + "learning_rate": 5.617200555823503e-06, + "loss": 0.7355, + "step": 21374 + }, + { + "epoch": 0.6551121735932328, + "grad_norm": 1.5948321229912035, + "learning_rate": 5.6163083582318125e-06, + "loss": 0.7238, + "step": 21375 + }, + { + "epoch": 0.655142822115974, + "grad_norm": 1.5975837102741448, + "learning_rate": 5.615416203833761e-06, + "loss": 0.6527, + "step": 21376 + }, + { + "epoch": 0.6551734706387152, + "grad_norm": 1.4622922547235897, + "learning_rate": 5.614524092638132e-06, + "loss": 0.6368, + "step": 21377 + }, + { + "epoch": 0.6552041191614564, + "grad_norm": 1.5383649862874478, + "learning_rate": 5.613632024653718e-06, + "loss": 0.6223, + "step": 21378 + }, + { + "epoch": 0.6552347676841976, + "grad_norm": 1.6513145284757749, + "learning_rate": 5.612739999889314e-06, + "loss": 0.7627, + "step": 21379 + }, + { + "epoch": 0.6552654162069388, + "grad_norm": 1.419166193577394, + "learning_rate": 5.611848018353703e-06, + "loss": 0.5955, + "step": 21380 + }, + { + "epoch": 0.65529606472968, + "grad_norm": 0.6930787593853754, + "learning_rate": 5.610956080055674e-06, + "loss": 0.5296, + "step": 21381 + }, + { + "epoch": 0.6553267132524212, + "grad_norm": 1.6135554245499169, + "learning_rate": 5.6100641850040224e-06, + "loss": 0.5843, + "step": 21382 + }, + { + "epoch": 0.6553573617751625, + "grad_norm": 1.4885609818143395, + "learning_rate": 5.609172333207529e-06, + "loss": 0.6118, + "step": 21383 + }, + { + "epoch": 0.6553880102979036, + "grad_norm": 1.63606978292898, + "learning_rate": 5.608280524674987e-06, + "loss": 0.6187, + "step": 21384 + }, + { + "epoch": 0.6554186588206449, + "grad_norm": 1.5823484700929666, + "learning_rate": 5.607388759415177e-06, + "loss": 0.6805, + "step": 21385 + }, + { + "epoch": 0.655449307343386, + "grad_norm": 1.6100725545942762, + "learning_rate": 5.606497037436889e-06, + "loss": 0.6116, + "step": 21386 + }, + { + "epoch": 0.6554799558661273, + "grad_norm": 1.463653988599714, + "learning_rate": 5.605605358748914e-06, + "loss": 0.6744, + "step": 21387 + }, + { + "epoch": 0.6555106043888684, + "grad_norm": 0.6918993449592155, + "learning_rate": 5.6047137233600295e-06, + "loss": 0.5679, + "step": 21388 + }, + { + "epoch": 0.6555412529116097, + "grad_norm": 0.6574405462016206, + "learning_rate": 5.603822131279025e-06, + "loss": 0.5113, + "step": 21389 + }, + { + "epoch": 0.6555719014343508, + "grad_norm": 1.492359873701369, + "learning_rate": 5.602930582514691e-06, + "loss": 0.6629, + "step": 21390 + }, + { + "epoch": 0.6556025499570921, + "grad_norm": 0.6918164177765019, + "learning_rate": 5.602039077075803e-06, + "loss": 0.5616, + "step": 21391 + }, + { + "epoch": 0.6556331984798333, + "grad_norm": 1.6587358336054463, + "learning_rate": 5.601147614971148e-06, + "loss": 0.6651, + "step": 21392 + }, + { + "epoch": 0.6556638470025745, + "grad_norm": 1.5004730340739103, + "learning_rate": 5.600256196209515e-06, + "loss": 0.6348, + "step": 21393 + }, + { + "epoch": 0.6556944955253157, + "grad_norm": 0.675465939146438, + "learning_rate": 5.5993648207996796e-06, + "loss": 0.5376, + "step": 21394 + }, + { + "epoch": 0.6557251440480569, + "grad_norm": 1.6563844242689276, + "learning_rate": 5.598473488750433e-06, + "loss": 0.6049, + "step": 21395 + }, + { + "epoch": 0.6557557925707981, + "grad_norm": 1.56356996903418, + "learning_rate": 5.5975822000705504e-06, + "loss": 0.5662, + "step": 21396 + }, + { + "epoch": 0.6557864410935393, + "grad_norm": 1.5369437307372833, + "learning_rate": 5.5966909547688155e-06, + "loss": 0.6665, + "step": 21397 + }, + { + "epoch": 0.6558170896162805, + "grad_norm": 1.680810333097002, + "learning_rate": 5.595799752854016e-06, + "loss": 0.7236, + "step": 21398 + }, + { + "epoch": 0.6558477381390218, + "grad_norm": 0.649335807353236, + "learning_rate": 5.594908594334923e-06, + "loss": 0.5283, + "step": 21399 + }, + { + "epoch": 0.6558783866617629, + "grad_norm": 1.6352893488699118, + "learning_rate": 5.594017479220324e-06, + "loss": 0.722, + "step": 21400 + }, + { + "epoch": 0.655909035184504, + "grad_norm": 1.8728616368861308, + "learning_rate": 5.5931264075190004e-06, + "loss": 0.7279, + "step": 21401 + }, + { + "epoch": 0.6559396837072453, + "grad_norm": 1.4681578546197211, + "learning_rate": 5.592235379239727e-06, + "loss": 0.6523, + "step": 21402 + }, + { + "epoch": 0.6559703322299865, + "grad_norm": 1.5944535099336155, + "learning_rate": 5.591344394391287e-06, + "loss": 0.6805, + "step": 21403 + }, + { + "epoch": 0.6560009807527277, + "grad_norm": 1.4442480026485423, + "learning_rate": 5.590453452982463e-06, + "loss": 0.6262, + "step": 21404 + }, + { + "epoch": 0.6560316292754689, + "grad_norm": 1.6541032981236998, + "learning_rate": 5.589562555022023e-06, + "loss": 0.807, + "step": 21405 + }, + { + "epoch": 0.6560622777982101, + "grad_norm": 1.4604770661148536, + "learning_rate": 5.5886717005187575e-06, + "loss": 0.6334, + "step": 21406 + }, + { + "epoch": 0.6560929263209513, + "grad_norm": 1.563216343842757, + "learning_rate": 5.58778088948143e-06, + "loss": 0.6494, + "step": 21407 + }, + { + "epoch": 0.6561235748436925, + "grad_norm": 1.5229630016204065, + "learning_rate": 5.586890121918834e-06, + "loss": 0.6749, + "step": 21408 + }, + { + "epoch": 0.6561542233664337, + "grad_norm": 1.68786147151283, + "learning_rate": 5.585999397839739e-06, + "loss": 0.8346, + "step": 21409 + }, + { + "epoch": 0.656184871889175, + "grad_norm": 1.5430474893716377, + "learning_rate": 5.5851087172529175e-06, + "loss": 0.6714, + "step": 21410 + }, + { + "epoch": 0.6562155204119161, + "grad_norm": 1.6074116750831564, + "learning_rate": 5.5842180801671494e-06, + "loss": 0.6673, + "step": 21411 + }, + { + "epoch": 0.6562461689346574, + "grad_norm": 1.9332734177798527, + "learning_rate": 5.583327486591213e-06, + "loss": 0.7557, + "step": 21412 + }, + { + "epoch": 0.6562768174573985, + "grad_norm": 1.4652178947718855, + "learning_rate": 5.582436936533879e-06, + "loss": 0.6278, + "step": 21413 + }, + { + "epoch": 0.6563074659801398, + "grad_norm": 1.746362816646124, + "learning_rate": 5.581546430003923e-06, + "loss": 0.5702, + "step": 21414 + }, + { + "epoch": 0.6563381145028809, + "grad_norm": 1.6118586733617752, + "learning_rate": 5.580655967010124e-06, + "loss": 0.7621, + "step": 21415 + }, + { + "epoch": 0.6563687630256222, + "grad_norm": 1.5139863108736855, + "learning_rate": 5.57976554756125e-06, + "loss": 0.6645, + "step": 21416 + }, + { + "epoch": 0.6563994115483633, + "grad_norm": 1.5703299287688515, + "learning_rate": 5.57887517166608e-06, + "loss": 0.6336, + "step": 21417 + }, + { + "epoch": 0.6564300600711046, + "grad_norm": 1.5545229438606751, + "learning_rate": 5.5779848393333815e-06, + "loss": 0.6719, + "step": 21418 + }, + { + "epoch": 0.6564607085938458, + "grad_norm": 0.6839111206232105, + "learning_rate": 5.577094550571928e-06, + "loss": 0.551, + "step": 21419 + }, + { + "epoch": 0.656491357116587, + "grad_norm": 0.6829278296272105, + "learning_rate": 5.576204305390498e-06, + "loss": 0.5486, + "step": 21420 + }, + { + "epoch": 0.6565220056393282, + "grad_norm": 1.741974995807316, + "learning_rate": 5.575314103797856e-06, + "loss": 0.7184, + "step": 21421 + }, + { + "epoch": 0.6565526541620694, + "grad_norm": 1.665129424531364, + "learning_rate": 5.574423945802774e-06, + "loss": 0.7498, + "step": 21422 + }, + { + "epoch": 0.6565833026848106, + "grad_norm": 1.4989894280170075, + "learning_rate": 5.573533831414031e-06, + "loss": 0.6641, + "step": 21423 + }, + { + "epoch": 0.6566139512075518, + "grad_norm": 1.5139623177860382, + "learning_rate": 5.5726437606403876e-06, + "loss": 0.6869, + "step": 21424 + }, + { + "epoch": 0.656644599730293, + "grad_norm": 1.7936304329262585, + "learning_rate": 5.571753733490621e-06, + "loss": 0.769, + "step": 21425 + }, + { + "epoch": 0.6566752482530342, + "grad_norm": 1.5101312202214545, + "learning_rate": 5.570863749973491e-06, + "loss": 0.6344, + "step": 21426 + }, + { + "epoch": 0.6567058967757754, + "grad_norm": 1.6230688440341687, + "learning_rate": 5.569973810097782e-06, + "loss": 0.6858, + "step": 21427 + }, + { + "epoch": 0.6567365452985167, + "grad_norm": 1.5311298658010462, + "learning_rate": 5.569083913872253e-06, + "loss": 0.6729, + "step": 21428 + }, + { + "epoch": 0.6567671938212578, + "grad_norm": 1.8197980523634245, + "learning_rate": 5.56819406130567e-06, + "loss": 0.6661, + "step": 21429 + }, + { + "epoch": 0.6567978423439991, + "grad_norm": 1.6020357195238664, + "learning_rate": 5.567304252406807e-06, + "loss": 0.6013, + "step": 21430 + }, + { + "epoch": 0.6568284908667402, + "grad_norm": 0.6726781262731792, + "learning_rate": 5.566414487184431e-06, + "loss": 0.5352, + "step": 21431 + }, + { + "epoch": 0.6568591393894814, + "grad_norm": 1.4706284967619172, + "learning_rate": 5.5655247656473045e-06, + "loss": 0.6461, + "step": 21432 + }, + { + "epoch": 0.6568897879122226, + "grad_norm": 1.6438956573790875, + "learning_rate": 5.564635087804197e-06, + "loss": 0.6957, + "step": 21433 + }, + { + "epoch": 0.6569204364349638, + "grad_norm": 1.485453281098958, + "learning_rate": 5.563745453663878e-06, + "loss": 0.6308, + "step": 21434 + }, + { + "epoch": 0.656951084957705, + "grad_norm": 1.537386217955091, + "learning_rate": 5.562855863235108e-06, + "loss": 0.7231, + "step": 21435 + }, + { + "epoch": 0.6569817334804462, + "grad_norm": 1.6900770483536212, + "learning_rate": 5.561966316526657e-06, + "loss": 0.7476, + "step": 21436 + }, + { + "epoch": 0.6570123820031875, + "grad_norm": 1.3926823269700364, + "learning_rate": 5.5610768135472795e-06, + "loss": 0.641, + "step": 21437 + }, + { + "epoch": 0.6570430305259286, + "grad_norm": 0.6594487186463128, + "learning_rate": 5.560187354305756e-06, + "loss": 0.5426, + "step": 21438 + }, + { + "epoch": 0.6570736790486699, + "grad_norm": 1.5389287804240328, + "learning_rate": 5.559297938810843e-06, + "loss": 0.5653, + "step": 21439 + }, + { + "epoch": 0.657104327571411, + "grad_norm": 1.7617105377422435, + "learning_rate": 5.5584085670712984e-06, + "loss": 0.7016, + "step": 21440 + }, + { + "epoch": 0.6571349760941523, + "grad_norm": 1.4543862677561346, + "learning_rate": 5.557519239095892e-06, + "loss": 0.6128, + "step": 21441 + }, + { + "epoch": 0.6571656246168934, + "grad_norm": 1.4312914108338648, + "learning_rate": 5.556629954893389e-06, + "loss": 0.6018, + "step": 21442 + }, + { + "epoch": 0.6571962731396347, + "grad_norm": 0.6627572598036398, + "learning_rate": 5.555740714472543e-06, + "loss": 0.5643, + "step": 21443 + }, + { + "epoch": 0.6572269216623758, + "grad_norm": 1.5192549067762884, + "learning_rate": 5.554851517842121e-06, + "loss": 0.65, + "step": 21444 + }, + { + "epoch": 0.6572575701851171, + "grad_norm": 1.5037905886929326, + "learning_rate": 5.5539623650108855e-06, + "loss": 0.6376, + "step": 21445 + }, + { + "epoch": 0.6572882187078583, + "grad_norm": 1.6346494090113486, + "learning_rate": 5.5530732559876e-06, + "loss": 0.7187, + "step": 21446 + }, + { + "epoch": 0.6573188672305995, + "grad_norm": 1.6805304852814922, + "learning_rate": 5.552184190781021e-06, + "loss": 0.7633, + "step": 21447 + }, + { + "epoch": 0.6573495157533407, + "grad_norm": 0.6523671561193096, + "learning_rate": 5.551295169399901e-06, + "loss": 0.5353, + "step": 21448 + }, + { + "epoch": 0.6573801642760819, + "grad_norm": 1.5677233996645699, + "learning_rate": 5.550406191853016e-06, + "loss": 0.6581, + "step": 21449 + }, + { + "epoch": 0.6574108127988231, + "grad_norm": 1.581815542242007, + "learning_rate": 5.549517258149117e-06, + "loss": 0.6456, + "step": 21450 + }, + { + "epoch": 0.6574414613215643, + "grad_norm": 1.5327065974372187, + "learning_rate": 5.54862836829696e-06, + "loss": 0.7565, + "step": 21451 + }, + { + "epoch": 0.6574721098443055, + "grad_norm": 1.500037332322226, + "learning_rate": 5.5477395223053065e-06, + "loss": 0.6239, + "step": 21452 + }, + { + "epoch": 0.6575027583670467, + "grad_norm": 1.7012180731429012, + "learning_rate": 5.546850720182914e-06, + "loss": 0.7818, + "step": 21453 + }, + { + "epoch": 0.6575334068897879, + "grad_norm": 1.4066038511915386, + "learning_rate": 5.545961961938547e-06, + "loss": 0.6536, + "step": 21454 + }, + { + "epoch": 0.6575640554125292, + "grad_norm": 0.67586288328741, + "learning_rate": 5.54507324758095e-06, + "loss": 0.5761, + "step": 21455 + }, + { + "epoch": 0.6575947039352703, + "grad_norm": 1.6367059893763942, + "learning_rate": 5.544184577118887e-06, + "loss": 0.7489, + "step": 21456 + }, + { + "epoch": 0.6576253524580116, + "grad_norm": 1.7552947956097518, + "learning_rate": 5.543295950561116e-06, + "loss": 0.6306, + "step": 21457 + }, + { + "epoch": 0.6576560009807527, + "grad_norm": 1.6836517927458374, + "learning_rate": 5.542407367916391e-06, + "loss": 0.6759, + "step": 21458 + }, + { + "epoch": 0.657686649503494, + "grad_norm": 1.7214535561067608, + "learning_rate": 5.54151882919346e-06, + "loss": 0.7235, + "step": 21459 + }, + { + "epoch": 0.6577172980262351, + "grad_norm": 0.6926898652690779, + "learning_rate": 5.540630334401091e-06, + "loss": 0.5391, + "step": 21460 + }, + { + "epoch": 0.6577479465489764, + "grad_norm": 1.6475712184499975, + "learning_rate": 5.539741883548033e-06, + "loss": 0.641, + "step": 21461 + }, + { + "epoch": 0.6577785950717175, + "grad_norm": 1.4092995324297883, + "learning_rate": 5.538853476643036e-06, + "loss": 0.6943, + "step": 21462 + }, + { + "epoch": 0.6578092435944587, + "grad_norm": 1.4108120893479412, + "learning_rate": 5.537965113694858e-06, + "loss": 0.6988, + "step": 21463 + }, + { + "epoch": 0.6578398921172, + "grad_norm": 1.7354960537025326, + "learning_rate": 5.53707679471225e-06, + "loss": 0.7258, + "step": 21464 + }, + { + "epoch": 0.6578705406399411, + "grad_norm": 1.6871870558954027, + "learning_rate": 5.53618851970397e-06, + "loss": 0.7223, + "step": 21465 + }, + { + "epoch": 0.6579011891626824, + "grad_norm": 1.774103028493881, + "learning_rate": 5.535300288678762e-06, + "loss": 0.6677, + "step": 21466 + }, + { + "epoch": 0.6579318376854235, + "grad_norm": 1.6044068764816826, + "learning_rate": 5.5344121016453845e-06, + "loss": 0.6382, + "step": 21467 + }, + { + "epoch": 0.6579624862081648, + "grad_norm": 1.4829660488335876, + "learning_rate": 5.53352395861259e-06, + "loss": 0.6344, + "step": 21468 + }, + { + "epoch": 0.6579931347309059, + "grad_norm": 1.383190996024128, + "learning_rate": 5.5326358595891274e-06, + "loss": 0.6622, + "step": 21469 + }, + { + "epoch": 0.6580237832536472, + "grad_norm": 1.3723896251718046, + "learning_rate": 5.531747804583742e-06, + "loss": 0.5965, + "step": 21470 + }, + { + "epoch": 0.6580544317763883, + "grad_norm": 1.7001624148241419, + "learning_rate": 5.53085979360519e-06, + "loss": 0.6269, + "step": 21471 + }, + { + "epoch": 0.6580850802991296, + "grad_norm": 1.7526907128011886, + "learning_rate": 5.5299718266622185e-06, + "loss": 0.7051, + "step": 21472 + }, + { + "epoch": 0.6581157288218707, + "grad_norm": 0.6953443685874975, + "learning_rate": 5.529083903763582e-06, + "loss": 0.5537, + "step": 21473 + }, + { + "epoch": 0.658146377344612, + "grad_norm": 1.7895310576011008, + "learning_rate": 5.528196024918023e-06, + "loss": 0.7229, + "step": 21474 + }, + { + "epoch": 0.6581770258673532, + "grad_norm": 0.6789360583486828, + "learning_rate": 5.527308190134293e-06, + "loss": 0.566, + "step": 21475 + }, + { + "epoch": 0.6582076743900944, + "grad_norm": 1.635124742961975, + "learning_rate": 5.5264203994211415e-06, + "loss": 0.6494, + "step": 21476 + }, + { + "epoch": 0.6582383229128356, + "grad_norm": 1.5754574955467189, + "learning_rate": 5.5255326527873164e-06, + "loss": 0.658, + "step": 21477 + }, + { + "epoch": 0.6582689714355768, + "grad_norm": 1.590578243971749, + "learning_rate": 5.5246449502415545e-06, + "loss": 0.7274, + "step": 21478 + }, + { + "epoch": 0.658299619958318, + "grad_norm": 1.6871358024459118, + "learning_rate": 5.523757291792619e-06, + "loss": 0.6421, + "step": 21479 + }, + { + "epoch": 0.6583302684810592, + "grad_norm": 1.4433791261784137, + "learning_rate": 5.522869677449244e-06, + "loss": 0.6788, + "step": 21480 + }, + { + "epoch": 0.6583609170038004, + "grad_norm": 1.6062499706383184, + "learning_rate": 5.521982107220184e-06, + "loss": 0.6994, + "step": 21481 + }, + { + "epoch": 0.6583915655265417, + "grad_norm": 1.9443241701352816, + "learning_rate": 5.521094581114175e-06, + "loss": 0.7225, + "step": 21482 + }, + { + "epoch": 0.6584222140492828, + "grad_norm": 1.7776884452335031, + "learning_rate": 5.5202070991399685e-06, + "loss": 0.7055, + "step": 21483 + }, + { + "epoch": 0.6584528625720241, + "grad_norm": 1.6596414109362256, + "learning_rate": 5.519319661306311e-06, + "loss": 0.6589, + "step": 21484 + }, + { + "epoch": 0.6584835110947652, + "grad_norm": 1.4579876853798344, + "learning_rate": 5.51843226762194e-06, + "loss": 0.6507, + "step": 21485 + }, + { + "epoch": 0.6585141596175065, + "grad_norm": 1.7402456471448586, + "learning_rate": 5.517544918095601e-06, + "loss": 0.6759, + "step": 21486 + }, + { + "epoch": 0.6585448081402476, + "grad_norm": 1.5402353951210825, + "learning_rate": 5.516657612736043e-06, + "loss": 0.621, + "step": 21487 + }, + { + "epoch": 0.6585754566629889, + "grad_norm": 1.4959498075889608, + "learning_rate": 5.515770351552006e-06, + "loss": 0.637, + "step": 21488 + }, + { + "epoch": 0.65860610518573, + "grad_norm": 1.4843532714663366, + "learning_rate": 5.514883134552223e-06, + "loss": 0.6653, + "step": 21489 + }, + { + "epoch": 0.6586367537084713, + "grad_norm": 1.5635426676295445, + "learning_rate": 5.513995961745451e-06, + "loss": 0.5991, + "step": 21490 + }, + { + "epoch": 0.6586674022312125, + "grad_norm": 1.5513238478722273, + "learning_rate": 5.51310883314042e-06, + "loss": 0.5772, + "step": 21491 + }, + { + "epoch": 0.6586980507539537, + "grad_norm": 0.7056885188295804, + "learning_rate": 5.51222174874588e-06, + "loss": 0.5363, + "step": 21492 + }, + { + "epoch": 0.6587286992766949, + "grad_norm": 1.9261768305084763, + "learning_rate": 5.511334708570565e-06, + "loss": 0.705, + "step": 21493 + }, + { + "epoch": 0.658759347799436, + "grad_norm": 1.6019322147013217, + "learning_rate": 5.510447712623217e-06, + "loss": 0.7535, + "step": 21494 + }, + { + "epoch": 0.6587899963221773, + "grad_norm": 1.6396139287608975, + "learning_rate": 5.50956076091258e-06, + "loss": 0.7626, + "step": 21495 + }, + { + "epoch": 0.6588206448449184, + "grad_norm": 1.6301970081742043, + "learning_rate": 5.508673853447386e-06, + "loss": 0.6675, + "step": 21496 + }, + { + "epoch": 0.6588512933676597, + "grad_norm": 1.591229515861554, + "learning_rate": 5.507786990236377e-06, + "loss": 0.734, + "step": 21497 + }, + { + "epoch": 0.6588819418904008, + "grad_norm": 1.4993616611044365, + "learning_rate": 5.506900171288297e-06, + "loss": 0.6747, + "step": 21498 + }, + { + "epoch": 0.6589125904131421, + "grad_norm": 1.453913200579675, + "learning_rate": 5.506013396611873e-06, + "loss": 0.7649, + "step": 21499 + }, + { + "epoch": 0.6589432389358832, + "grad_norm": 1.4156638661769065, + "learning_rate": 5.505126666215852e-06, + "loss": 0.6908, + "step": 21500 + }, + { + "epoch": 0.6589738874586245, + "grad_norm": 1.5478784535640604, + "learning_rate": 5.5042399801089695e-06, + "loss": 0.6902, + "step": 21501 + }, + { + "epoch": 0.6590045359813657, + "grad_norm": 1.58008266396409, + "learning_rate": 5.503353338299959e-06, + "loss": 0.6525, + "step": 21502 + }, + { + "epoch": 0.6590351845041069, + "grad_norm": 0.6346341858786851, + "learning_rate": 5.502466740797561e-06, + "loss": 0.5828, + "step": 21503 + }, + { + "epoch": 0.6590658330268481, + "grad_norm": 0.678368578022773, + "learning_rate": 5.501580187610506e-06, + "loss": 0.5392, + "step": 21504 + }, + { + "epoch": 0.6590964815495893, + "grad_norm": 1.9340564712022388, + "learning_rate": 5.500693678747532e-06, + "loss": 0.6357, + "step": 21505 + }, + { + "epoch": 0.6591271300723305, + "grad_norm": 1.6145299091237992, + "learning_rate": 5.499807214217379e-06, + "loss": 0.6883, + "step": 21506 + }, + { + "epoch": 0.6591577785950717, + "grad_norm": 1.4639037186243762, + "learning_rate": 5.49892079402877e-06, + "loss": 0.5478, + "step": 21507 + }, + { + "epoch": 0.6591884271178129, + "grad_norm": 1.3689198202728041, + "learning_rate": 5.49803441819045e-06, + "loss": 0.6255, + "step": 21508 + }, + { + "epoch": 0.6592190756405542, + "grad_norm": 1.7597248865310118, + "learning_rate": 5.497148086711151e-06, + "loss": 0.5769, + "step": 21509 + }, + { + "epoch": 0.6592497241632953, + "grad_norm": 1.7943652449578515, + "learning_rate": 5.4962617995996e-06, + "loss": 0.6377, + "step": 21510 + }, + { + "epoch": 0.6592803726860366, + "grad_norm": 0.660657811623441, + "learning_rate": 5.4953755568645324e-06, + "loss": 0.5409, + "step": 21511 + }, + { + "epoch": 0.6593110212087777, + "grad_norm": 1.760469590049515, + "learning_rate": 5.494489358514687e-06, + "loss": 0.7331, + "step": 21512 + }, + { + "epoch": 0.659341669731519, + "grad_norm": 1.8616229797637887, + "learning_rate": 5.493603204558788e-06, + "loss": 0.5722, + "step": 21513 + }, + { + "epoch": 0.6593723182542601, + "grad_norm": 1.4512953719542863, + "learning_rate": 5.492717095005573e-06, + "loss": 0.5955, + "step": 21514 + }, + { + "epoch": 0.6594029667770014, + "grad_norm": 1.8173301482181274, + "learning_rate": 5.4918310298637655e-06, + "loss": 0.6479, + "step": 21515 + }, + { + "epoch": 0.6594336152997425, + "grad_norm": 1.7048377088367317, + "learning_rate": 5.4909450091421e-06, + "loss": 0.6895, + "step": 21516 + }, + { + "epoch": 0.6594642638224838, + "grad_norm": 1.3861505050708047, + "learning_rate": 5.490059032849311e-06, + "loss": 0.6155, + "step": 21517 + }, + { + "epoch": 0.659494912345225, + "grad_norm": 1.6541578171320217, + "learning_rate": 5.48917310099412e-06, + "loss": 0.6193, + "step": 21518 + }, + { + "epoch": 0.6595255608679662, + "grad_norm": 1.6156754749313373, + "learning_rate": 5.488287213585261e-06, + "loss": 0.7131, + "step": 21519 + }, + { + "epoch": 0.6595562093907074, + "grad_norm": 1.3950991900028318, + "learning_rate": 5.487401370631468e-06, + "loss": 0.6433, + "step": 21520 + }, + { + "epoch": 0.6595868579134486, + "grad_norm": 1.3609001324234158, + "learning_rate": 5.486515572141458e-06, + "loss": 0.5969, + "step": 21521 + }, + { + "epoch": 0.6596175064361898, + "grad_norm": 0.6797078488547025, + "learning_rate": 5.48562981812397e-06, + "loss": 0.5726, + "step": 21522 + }, + { + "epoch": 0.659648154958931, + "grad_norm": 1.5276494437836492, + "learning_rate": 5.48474410858772e-06, + "loss": 0.6901, + "step": 21523 + }, + { + "epoch": 0.6596788034816722, + "grad_norm": 1.438246807907416, + "learning_rate": 5.483858443541446e-06, + "loss": 0.5976, + "step": 21524 + }, + { + "epoch": 0.6597094520044133, + "grad_norm": 1.6070171170957301, + "learning_rate": 5.482972822993871e-06, + "loss": 0.7271, + "step": 21525 + }, + { + "epoch": 0.6597401005271546, + "grad_norm": 1.6342167232502003, + "learning_rate": 5.482087246953717e-06, + "loss": 0.6923, + "step": 21526 + }, + { + "epoch": 0.6597707490498957, + "grad_norm": 1.4907285243241692, + "learning_rate": 5.481201715429714e-06, + "loss": 0.6403, + "step": 21527 + }, + { + "epoch": 0.659801397572637, + "grad_norm": 1.5830371521308881, + "learning_rate": 5.480316228430589e-06, + "loss": 0.7057, + "step": 21528 + }, + { + "epoch": 0.6598320460953782, + "grad_norm": 1.6419766950478076, + "learning_rate": 5.479430785965063e-06, + "loss": 0.684, + "step": 21529 + }, + { + "epoch": 0.6598626946181194, + "grad_norm": 1.6822882259420966, + "learning_rate": 5.47854538804186e-06, + "loss": 0.6496, + "step": 21530 + }, + { + "epoch": 0.6598933431408606, + "grad_norm": 1.485931814790461, + "learning_rate": 5.4776600346697114e-06, + "loss": 0.6019, + "step": 21531 + }, + { + "epoch": 0.6599239916636018, + "grad_norm": 1.3260433151255333, + "learning_rate": 5.4767747258573315e-06, + "loss": 0.6455, + "step": 21532 + }, + { + "epoch": 0.659954640186343, + "grad_norm": 1.6776875248042624, + "learning_rate": 5.47588946161345e-06, + "loss": 0.601, + "step": 21533 + }, + { + "epoch": 0.6599852887090842, + "grad_norm": 1.5468710057559845, + "learning_rate": 5.475004241946782e-06, + "loss": 0.6935, + "step": 21534 + }, + { + "epoch": 0.6600159372318254, + "grad_norm": 1.5243714562365018, + "learning_rate": 5.474119066866062e-06, + "loss": 0.7495, + "step": 21535 + }, + { + "epoch": 0.6600465857545667, + "grad_norm": 1.6309810324656495, + "learning_rate": 5.4732339363800025e-06, + "loss": 0.7374, + "step": 21536 + }, + { + "epoch": 0.6600772342773078, + "grad_norm": 1.4013502702339518, + "learning_rate": 5.472348850497325e-06, + "loss": 0.6811, + "step": 21537 + }, + { + "epoch": 0.6601078828000491, + "grad_norm": 1.8062071186969963, + "learning_rate": 5.471463809226754e-06, + "loss": 0.6872, + "step": 21538 + }, + { + "epoch": 0.6601385313227902, + "grad_norm": 0.6727201926331411, + "learning_rate": 5.47057881257701e-06, + "loss": 0.5386, + "step": 21539 + }, + { + "epoch": 0.6601691798455315, + "grad_norm": 0.6804505523427161, + "learning_rate": 5.46969386055681e-06, + "loss": 0.5494, + "step": 21540 + }, + { + "epoch": 0.6601998283682726, + "grad_norm": 1.4548384431446644, + "learning_rate": 5.468808953174876e-06, + "loss": 0.6513, + "step": 21541 + }, + { + "epoch": 0.6602304768910139, + "grad_norm": 0.6813270356842106, + "learning_rate": 5.467924090439929e-06, + "loss": 0.5259, + "step": 21542 + }, + { + "epoch": 0.660261125413755, + "grad_norm": 1.335738044684967, + "learning_rate": 5.4670392723606815e-06, + "loss": 0.6241, + "step": 21543 + }, + { + "epoch": 0.6602917739364963, + "grad_norm": 1.655840084243379, + "learning_rate": 5.466154498945861e-06, + "loss": 0.7395, + "step": 21544 + }, + { + "epoch": 0.6603224224592374, + "grad_norm": 1.519730230452517, + "learning_rate": 5.465269770204172e-06, + "loss": 0.6881, + "step": 21545 + }, + { + "epoch": 0.6603530709819787, + "grad_norm": 1.6031597524213967, + "learning_rate": 5.464385086144348e-06, + "loss": 0.6192, + "step": 21546 + }, + { + "epoch": 0.6603837195047199, + "grad_norm": 1.5057330114875604, + "learning_rate": 5.463500446775097e-06, + "loss": 0.6519, + "step": 21547 + }, + { + "epoch": 0.6604143680274611, + "grad_norm": 1.5905983343413552, + "learning_rate": 5.462615852105136e-06, + "loss": 0.6937, + "step": 21548 + }, + { + "epoch": 0.6604450165502023, + "grad_norm": 1.5817250069285647, + "learning_rate": 5.461731302143178e-06, + "loss": 0.6643, + "step": 21549 + }, + { + "epoch": 0.6604756650729435, + "grad_norm": 1.455662036948295, + "learning_rate": 5.460846796897949e-06, + "loss": 0.6449, + "step": 21550 + }, + { + "epoch": 0.6605063135956847, + "grad_norm": 1.5076769749449068, + "learning_rate": 5.459962336378153e-06, + "loss": 0.6625, + "step": 21551 + }, + { + "epoch": 0.6605369621184259, + "grad_norm": 1.8535328811011749, + "learning_rate": 5.45907792059251e-06, + "loss": 0.6108, + "step": 21552 + }, + { + "epoch": 0.6605676106411671, + "grad_norm": 1.4634994830091013, + "learning_rate": 5.458193549549735e-06, + "loss": 0.6731, + "step": 21553 + }, + { + "epoch": 0.6605982591639084, + "grad_norm": 0.6944406551431653, + "learning_rate": 5.4573092232585445e-06, + "loss": 0.5715, + "step": 21554 + }, + { + "epoch": 0.6606289076866495, + "grad_norm": 1.7462805665935048, + "learning_rate": 5.4564249417276484e-06, + "loss": 0.7433, + "step": 21555 + }, + { + "epoch": 0.6606595562093907, + "grad_norm": 2.0448862547061486, + "learning_rate": 5.455540704965751e-06, + "loss": 0.6404, + "step": 21556 + }, + { + "epoch": 0.6606902047321319, + "grad_norm": 1.6685718666881855, + "learning_rate": 5.4546565129815836e-06, + "loss": 0.7479, + "step": 21557 + }, + { + "epoch": 0.6607208532548731, + "grad_norm": 0.6924067183552789, + "learning_rate": 5.4537723657838475e-06, + "loss": 0.538, + "step": 21558 + }, + { + "epoch": 0.6607515017776143, + "grad_norm": 1.4562148700636843, + "learning_rate": 5.452888263381251e-06, + "loss": 0.6439, + "step": 21559 + }, + { + "epoch": 0.6607821503003555, + "grad_norm": 1.6327763900320256, + "learning_rate": 5.452004205782511e-06, + "loss": 0.7064, + "step": 21560 + }, + { + "epoch": 0.6608127988230967, + "grad_norm": 1.4749382695267463, + "learning_rate": 5.451120192996337e-06, + "loss": 0.6198, + "step": 21561 + }, + { + "epoch": 0.6608434473458379, + "grad_norm": 1.673978617927063, + "learning_rate": 5.450236225031442e-06, + "loss": 0.702, + "step": 21562 + }, + { + "epoch": 0.6608740958685791, + "grad_norm": 1.6794779646822686, + "learning_rate": 5.449352301896531e-06, + "loss": 0.7042, + "step": 21563 + }, + { + "epoch": 0.6609047443913203, + "grad_norm": 1.5246683744577882, + "learning_rate": 5.448468423600317e-06, + "loss": 0.597, + "step": 21564 + }, + { + "epoch": 0.6609353929140616, + "grad_norm": 1.622179561269672, + "learning_rate": 5.447584590151511e-06, + "loss": 0.673, + "step": 21565 + }, + { + "epoch": 0.6609660414368027, + "grad_norm": 1.5514217972543998, + "learning_rate": 5.446700801558819e-06, + "loss": 0.6944, + "step": 21566 + }, + { + "epoch": 0.660996689959544, + "grad_norm": 1.630644499012842, + "learning_rate": 5.445817057830944e-06, + "loss": 0.6778, + "step": 21567 + }, + { + "epoch": 0.6610273384822851, + "grad_norm": 0.6620951961920817, + "learning_rate": 5.4449333589766004e-06, + "loss": 0.5242, + "step": 21568 + }, + { + "epoch": 0.6610579870050264, + "grad_norm": 1.5281399024292817, + "learning_rate": 5.444049705004497e-06, + "loss": 0.6083, + "step": 21569 + }, + { + "epoch": 0.6610886355277675, + "grad_norm": 1.6416866101292602, + "learning_rate": 5.443166095923334e-06, + "loss": 0.6607, + "step": 21570 + }, + { + "epoch": 0.6611192840505088, + "grad_norm": 1.6247810321822276, + "learning_rate": 5.44228253174182e-06, + "loss": 0.6675, + "step": 21571 + }, + { + "epoch": 0.6611499325732499, + "grad_norm": 1.2267734074447816, + "learning_rate": 5.4413990124686645e-06, + "loss": 0.5795, + "step": 21572 + }, + { + "epoch": 0.6611805810959912, + "grad_norm": 0.6757090812055492, + "learning_rate": 5.440515538112574e-06, + "loss": 0.5698, + "step": 21573 + }, + { + "epoch": 0.6612112296187324, + "grad_norm": 1.5369217482680144, + "learning_rate": 5.43963210868225e-06, + "loss": 0.6733, + "step": 21574 + }, + { + "epoch": 0.6612418781414736, + "grad_norm": 1.6502907230893151, + "learning_rate": 5.43874872418639e-06, + "loss": 0.6804, + "step": 21575 + }, + { + "epoch": 0.6612725266642148, + "grad_norm": 1.5223459045034167, + "learning_rate": 5.4378653846337135e-06, + "loss": 0.6743, + "step": 21576 + }, + { + "epoch": 0.661303175186956, + "grad_norm": 1.4651714473175939, + "learning_rate": 5.436982090032917e-06, + "loss": 0.6704, + "step": 21577 + }, + { + "epoch": 0.6613338237096972, + "grad_norm": 1.5430274595264482, + "learning_rate": 5.436098840392701e-06, + "loss": 0.6693, + "step": 21578 + }, + { + "epoch": 0.6613644722324384, + "grad_norm": 1.8320930826937516, + "learning_rate": 5.435215635721769e-06, + "loss": 0.6622, + "step": 21579 + }, + { + "epoch": 0.6613951207551796, + "grad_norm": 1.558249857048238, + "learning_rate": 5.434332476028825e-06, + "loss": 0.6587, + "step": 21580 + }, + { + "epoch": 0.6614257692779208, + "grad_norm": 1.4635693616521226, + "learning_rate": 5.433449361322576e-06, + "loss": 0.5669, + "step": 21581 + }, + { + "epoch": 0.661456417800662, + "grad_norm": 1.5477328267913888, + "learning_rate": 5.432566291611715e-06, + "loss": 0.6571, + "step": 21582 + }, + { + "epoch": 0.6614870663234033, + "grad_norm": 1.654759904249839, + "learning_rate": 5.4316832669049455e-06, + "loss": 0.667, + "step": 21583 + }, + { + "epoch": 0.6615177148461444, + "grad_norm": 1.3748552217638004, + "learning_rate": 5.430800287210975e-06, + "loss": 0.6326, + "step": 21584 + }, + { + "epoch": 0.6615483633688857, + "grad_norm": 1.5166127036072188, + "learning_rate": 5.429917352538498e-06, + "loss": 0.6934, + "step": 21585 + }, + { + "epoch": 0.6615790118916268, + "grad_norm": 1.579070703337489, + "learning_rate": 5.429034462896207e-06, + "loss": 0.5814, + "step": 21586 + }, + { + "epoch": 0.661609660414368, + "grad_norm": 1.6072716581290223, + "learning_rate": 5.428151618292818e-06, + "loss": 0.6097, + "step": 21587 + }, + { + "epoch": 0.6616403089371092, + "grad_norm": 1.8113803194961697, + "learning_rate": 5.427268818737015e-06, + "loss": 0.7371, + "step": 21588 + }, + { + "epoch": 0.6616709574598504, + "grad_norm": 0.6566354725051218, + "learning_rate": 5.426386064237508e-06, + "loss": 0.5538, + "step": 21589 + }, + { + "epoch": 0.6617016059825916, + "grad_norm": 1.727245065129431, + "learning_rate": 5.425503354802983e-06, + "loss": 0.7249, + "step": 21590 + }, + { + "epoch": 0.6617322545053328, + "grad_norm": 1.5168877966117782, + "learning_rate": 5.424620690442146e-06, + "loss": 0.5995, + "step": 21591 + }, + { + "epoch": 0.6617629030280741, + "grad_norm": 1.7244208092591082, + "learning_rate": 5.423738071163696e-06, + "loss": 0.6493, + "step": 21592 + }, + { + "epoch": 0.6617935515508152, + "grad_norm": 1.6383726545467279, + "learning_rate": 5.42285549697632e-06, + "loss": 0.6758, + "step": 21593 + }, + { + "epoch": 0.6618242000735565, + "grad_norm": 1.5435942030227243, + "learning_rate": 5.42197296788872e-06, + "loss": 0.6401, + "step": 21594 + }, + { + "epoch": 0.6618548485962976, + "grad_norm": 1.6498622809929897, + "learning_rate": 5.4210904839095965e-06, + "loss": 0.6525, + "step": 21595 + }, + { + "epoch": 0.6618854971190389, + "grad_norm": 1.5836857471466406, + "learning_rate": 5.420208045047641e-06, + "loss": 0.6915, + "step": 21596 + }, + { + "epoch": 0.66191614564178, + "grad_norm": 1.517447502715607, + "learning_rate": 5.419325651311538e-06, + "loss": 0.6998, + "step": 21597 + }, + { + "epoch": 0.6619467941645213, + "grad_norm": 1.6275556786530627, + "learning_rate": 5.418443302709999e-06, + "loss": 0.7272, + "step": 21598 + }, + { + "epoch": 0.6619774426872624, + "grad_norm": 1.6165396046404867, + "learning_rate": 5.417560999251708e-06, + "loss": 0.6501, + "step": 21599 + }, + { + "epoch": 0.6620080912100037, + "grad_norm": 1.6855591306390583, + "learning_rate": 5.416678740945365e-06, + "loss": 0.6851, + "step": 21600 + }, + { + "epoch": 0.6620387397327449, + "grad_norm": 1.4975304078849034, + "learning_rate": 5.415796527799653e-06, + "loss": 0.6196, + "step": 21601 + }, + { + "epoch": 0.6620693882554861, + "grad_norm": 1.841346832006278, + "learning_rate": 5.414914359823271e-06, + "loss": 0.7126, + "step": 21602 + }, + { + "epoch": 0.6621000367782273, + "grad_norm": 1.385866340097297, + "learning_rate": 5.4140322370249164e-06, + "loss": 0.5909, + "step": 21603 + }, + { + "epoch": 0.6621306853009685, + "grad_norm": 1.57334485144882, + "learning_rate": 5.413150159413272e-06, + "loss": 0.5728, + "step": 21604 + }, + { + "epoch": 0.6621613338237097, + "grad_norm": 1.6812153696160803, + "learning_rate": 5.412268126997031e-06, + "loss": 0.6748, + "step": 21605 + }, + { + "epoch": 0.6621919823464509, + "grad_norm": 1.637342600766577, + "learning_rate": 5.411386139784891e-06, + "loss": 0.6724, + "step": 21606 + }, + { + "epoch": 0.6622226308691921, + "grad_norm": 1.5287839541867747, + "learning_rate": 5.410504197785533e-06, + "loss": 0.7104, + "step": 21607 + }, + { + "epoch": 0.6622532793919333, + "grad_norm": 1.5220882125136455, + "learning_rate": 5.4096223010076506e-06, + "loss": 0.6761, + "step": 21608 + }, + { + "epoch": 0.6622839279146745, + "grad_norm": 1.6608367341753523, + "learning_rate": 5.408740449459939e-06, + "loss": 0.6573, + "step": 21609 + }, + { + "epoch": 0.6623145764374158, + "grad_norm": 1.5520424282824294, + "learning_rate": 5.407858643151078e-06, + "loss": 0.6722, + "step": 21610 + }, + { + "epoch": 0.6623452249601569, + "grad_norm": 1.6258114312801557, + "learning_rate": 5.406976882089766e-06, + "loss": 0.7034, + "step": 21611 + }, + { + "epoch": 0.6623758734828982, + "grad_norm": 1.4602786035230355, + "learning_rate": 5.406095166284681e-06, + "loss": 0.6396, + "step": 21612 + }, + { + "epoch": 0.6624065220056393, + "grad_norm": 1.9634254677603142, + "learning_rate": 5.405213495744516e-06, + "loss": 0.7777, + "step": 21613 + }, + { + "epoch": 0.6624371705283806, + "grad_norm": 1.4573156629096329, + "learning_rate": 5.404331870477963e-06, + "loss": 0.601, + "step": 21614 + }, + { + "epoch": 0.6624678190511217, + "grad_norm": 1.6528837242403207, + "learning_rate": 5.403450290493698e-06, + "loss": 0.6991, + "step": 21615 + }, + { + "epoch": 0.662498467573863, + "grad_norm": 1.6630279747873233, + "learning_rate": 5.402568755800415e-06, + "loss": 0.6527, + "step": 21616 + }, + { + "epoch": 0.6625291160966041, + "grad_norm": 1.6153038398820112, + "learning_rate": 5.401687266406801e-06, + "loss": 0.7252, + "step": 21617 + }, + { + "epoch": 0.6625597646193453, + "grad_norm": 1.7558838893890354, + "learning_rate": 5.400805822321536e-06, + "loss": 0.6662, + "step": 21618 + }, + { + "epoch": 0.6625904131420866, + "grad_norm": 1.7253880883418347, + "learning_rate": 5.399924423553311e-06, + "loss": 0.6056, + "step": 21619 + }, + { + "epoch": 0.6626210616648277, + "grad_norm": 1.548692031130166, + "learning_rate": 5.399043070110803e-06, + "loss": 0.6973, + "step": 21620 + }, + { + "epoch": 0.662651710187569, + "grad_norm": 0.678715957119655, + "learning_rate": 5.398161762002702e-06, + "loss": 0.5694, + "step": 21621 + }, + { + "epoch": 0.6626823587103101, + "grad_norm": 1.5573899380990281, + "learning_rate": 5.397280499237696e-06, + "loss": 0.6155, + "step": 21622 + }, + { + "epoch": 0.6627130072330514, + "grad_norm": 1.7353938587993278, + "learning_rate": 5.396399281824457e-06, + "loss": 0.6631, + "step": 21623 + }, + { + "epoch": 0.6627436557557925, + "grad_norm": 1.7638606927198803, + "learning_rate": 5.395518109771674e-06, + "loss": 0.7539, + "step": 21624 + }, + { + "epoch": 0.6627743042785338, + "grad_norm": 1.549509031317298, + "learning_rate": 5.394636983088033e-06, + "loss": 0.613, + "step": 21625 + }, + { + "epoch": 0.6628049528012749, + "grad_norm": 0.6844531464765313, + "learning_rate": 5.3937559017822095e-06, + "loss": 0.5601, + "step": 21626 + }, + { + "epoch": 0.6628356013240162, + "grad_norm": 1.5552509441181088, + "learning_rate": 5.392874865862886e-06, + "loss": 0.6176, + "step": 21627 + }, + { + "epoch": 0.6628662498467573, + "grad_norm": 1.8515634118431819, + "learning_rate": 5.39199387533875e-06, + "loss": 0.6504, + "step": 21628 + }, + { + "epoch": 0.6628968983694986, + "grad_norm": 0.6657672842865721, + "learning_rate": 5.3911129302184736e-06, + "loss": 0.5295, + "step": 21629 + }, + { + "epoch": 0.6629275468922398, + "grad_norm": 0.6546384075298977, + "learning_rate": 5.390232030510745e-06, + "loss": 0.5273, + "step": 21630 + }, + { + "epoch": 0.662958195414981, + "grad_norm": 1.9142867060977835, + "learning_rate": 5.389351176224234e-06, + "loss": 0.7135, + "step": 21631 + }, + { + "epoch": 0.6629888439377222, + "grad_norm": 1.6627509995165926, + "learning_rate": 5.388470367367627e-06, + "loss": 0.6747, + "step": 21632 + }, + { + "epoch": 0.6630194924604634, + "grad_norm": 1.5959073069710312, + "learning_rate": 5.387589603949605e-06, + "loss": 0.6942, + "step": 21633 + }, + { + "epoch": 0.6630501409832046, + "grad_norm": 0.6774714407422572, + "learning_rate": 5.3867088859788384e-06, + "loss": 0.5408, + "step": 21634 + }, + { + "epoch": 0.6630807895059458, + "grad_norm": 1.5111745269915593, + "learning_rate": 5.3858282134640105e-06, + "loss": 0.6434, + "step": 21635 + }, + { + "epoch": 0.663111438028687, + "grad_norm": 1.508425302821457, + "learning_rate": 5.3849475864138005e-06, + "loss": 0.673, + "step": 21636 + }, + { + "epoch": 0.6631420865514283, + "grad_norm": 1.7684219938776038, + "learning_rate": 5.3840670048368796e-06, + "loss": 0.7095, + "step": 21637 + }, + { + "epoch": 0.6631727350741694, + "grad_norm": 1.660123886180481, + "learning_rate": 5.383186468741928e-06, + "loss": 0.6802, + "step": 21638 + }, + { + "epoch": 0.6632033835969107, + "grad_norm": 1.4922708070475785, + "learning_rate": 5.382305978137624e-06, + "loss": 0.7377, + "step": 21639 + }, + { + "epoch": 0.6632340321196518, + "grad_norm": 1.5748896848194047, + "learning_rate": 5.381425533032638e-06, + "loss": 0.6014, + "step": 21640 + }, + { + "epoch": 0.6632646806423931, + "grad_norm": 0.6647367202440773, + "learning_rate": 5.380545133435651e-06, + "loss": 0.5573, + "step": 21641 + }, + { + "epoch": 0.6632953291651342, + "grad_norm": 1.7049753663051228, + "learning_rate": 5.379664779355332e-06, + "loss": 0.7536, + "step": 21642 + }, + { + "epoch": 0.6633259776878755, + "grad_norm": 1.6963667198388097, + "learning_rate": 5.378784470800355e-06, + "loss": 0.6557, + "step": 21643 + }, + { + "epoch": 0.6633566262106166, + "grad_norm": 1.7741058077993284, + "learning_rate": 5.3779042077794045e-06, + "loss": 0.6838, + "step": 21644 + }, + { + "epoch": 0.6633872747333579, + "grad_norm": 1.7146574869616342, + "learning_rate": 5.37702399030114e-06, + "loss": 0.6606, + "step": 21645 + }, + { + "epoch": 0.663417923256099, + "grad_norm": 1.715708940812616, + "learning_rate": 5.3761438183742424e-06, + "loss": 0.7832, + "step": 21646 + }, + { + "epoch": 0.6634485717788403, + "grad_norm": 0.6938186772080397, + "learning_rate": 5.375263692007386e-06, + "loss": 0.5876, + "step": 21647 + }, + { + "epoch": 0.6634792203015815, + "grad_norm": 1.6755861553054603, + "learning_rate": 5.374383611209237e-06, + "loss": 0.6358, + "step": 21648 + }, + { + "epoch": 0.6635098688243226, + "grad_norm": 1.7089237804075208, + "learning_rate": 5.373503575988469e-06, + "loss": 0.6821, + "step": 21649 + }, + { + "epoch": 0.6635405173470639, + "grad_norm": 1.5285810104788249, + "learning_rate": 5.372623586353758e-06, + "loss": 0.6467, + "step": 21650 + }, + { + "epoch": 0.663571165869805, + "grad_norm": 1.48172219039065, + "learning_rate": 5.371743642313767e-06, + "loss": 0.6621, + "step": 21651 + }, + { + "epoch": 0.6636018143925463, + "grad_norm": 1.5621698749618003, + "learning_rate": 5.370863743877174e-06, + "loss": 0.6319, + "step": 21652 + }, + { + "epoch": 0.6636324629152874, + "grad_norm": 1.5320765613895626, + "learning_rate": 5.369983891052637e-06, + "loss": 0.6254, + "step": 21653 + }, + { + "epoch": 0.6636631114380287, + "grad_norm": 1.6327676500112351, + "learning_rate": 5.369104083848842e-06, + "loss": 0.6143, + "step": 21654 + }, + { + "epoch": 0.6636937599607698, + "grad_norm": 1.55878847876413, + "learning_rate": 5.368224322274447e-06, + "loss": 0.6744, + "step": 21655 + }, + { + "epoch": 0.6637244084835111, + "grad_norm": 0.692769649984985, + "learning_rate": 5.367344606338121e-06, + "loss": 0.5366, + "step": 21656 + }, + { + "epoch": 0.6637550570062523, + "grad_norm": 1.5818182792889701, + "learning_rate": 5.366464936048533e-06, + "loss": 0.6845, + "step": 21657 + }, + { + "epoch": 0.6637857055289935, + "grad_norm": 1.6305217679394919, + "learning_rate": 5.365585311414356e-06, + "loss": 0.6107, + "step": 21658 + }, + { + "epoch": 0.6638163540517347, + "grad_norm": 1.53134426799279, + "learning_rate": 5.364705732444249e-06, + "loss": 0.6045, + "step": 21659 + }, + { + "epoch": 0.6638470025744759, + "grad_norm": 1.762488847369886, + "learning_rate": 5.363826199146882e-06, + "loss": 0.7227, + "step": 21660 + }, + { + "epoch": 0.6638776510972171, + "grad_norm": 1.71642606441017, + "learning_rate": 5.362946711530921e-06, + "loss": 0.6296, + "step": 21661 + }, + { + "epoch": 0.6639082996199583, + "grad_norm": 0.6137908796109671, + "learning_rate": 5.362067269605037e-06, + "loss": 0.5113, + "step": 21662 + }, + { + "epoch": 0.6639389481426995, + "grad_norm": 1.521517318246867, + "learning_rate": 5.361187873377891e-06, + "loss": 0.7419, + "step": 21663 + }, + { + "epoch": 0.6639695966654408, + "grad_norm": 1.6913412250542075, + "learning_rate": 5.360308522858144e-06, + "loss": 0.6126, + "step": 21664 + }, + { + "epoch": 0.6640002451881819, + "grad_norm": 1.594447950669572, + "learning_rate": 5.359429218054464e-06, + "loss": 0.6593, + "step": 21665 + }, + { + "epoch": 0.6640308937109232, + "grad_norm": 0.6767970146995597, + "learning_rate": 5.358549958975518e-06, + "loss": 0.547, + "step": 21666 + }, + { + "epoch": 0.6640615422336643, + "grad_norm": 0.7174461125200954, + "learning_rate": 5.357670745629965e-06, + "loss": 0.545, + "step": 21667 + }, + { + "epoch": 0.6640921907564056, + "grad_norm": 1.5943827880073345, + "learning_rate": 5.356791578026469e-06, + "loss": 0.6794, + "step": 21668 + }, + { + "epoch": 0.6641228392791467, + "grad_norm": 1.8417134988327248, + "learning_rate": 5.355912456173698e-06, + "loss": 0.7186, + "step": 21669 + }, + { + "epoch": 0.664153487801888, + "grad_norm": 0.6818043391555318, + "learning_rate": 5.3550333800803054e-06, + "loss": 0.5467, + "step": 21670 + }, + { + "epoch": 0.6641841363246291, + "grad_norm": 1.6284684539194263, + "learning_rate": 5.354154349754961e-06, + "loss": 0.6636, + "step": 21671 + }, + { + "epoch": 0.6642147848473704, + "grad_norm": 0.6593874582738108, + "learning_rate": 5.353275365206314e-06, + "loss": 0.5763, + "step": 21672 + }, + { + "epoch": 0.6642454333701115, + "grad_norm": 1.578688927625426, + "learning_rate": 5.3523964264430424e-06, + "loss": 0.7696, + "step": 21673 + }, + { + "epoch": 0.6642760818928528, + "grad_norm": 1.3522975770099885, + "learning_rate": 5.351517533473799e-06, + "loss": 0.5909, + "step": 21674 + }, + { + "epoch": 0.664306730415594, + "grad_norm": 0.6649002268323944, + "learning_rate": 5.350638686307238e-06, + "loss": 0.5497, + "step": 21675 + }, + { + "epoch": 0.6643373789383352, + "grad_norm": 1.5155760383758534, + "learning_rate": 5.349759884952024e-06, + "loss": 0.6761, + "step": 21676 + }, + { + "epoch": 0.6643680274610764, + "grad_norm": 1.7649508989824763, + "learning_rate": 5.348881129416818e-06, + "loss": 0.7945, + "step": 21677 + }, + { + "epoch": 0.6643986759838176, + "grad_norm": 1.722843974953676, + "learning_rate": 5.348002419710274e-06, + "loss": 0.708, + "step": 21678 + }, + { + "epoch": 0.6644293245065588, + "grad_norm": 1.4145547458968386, + "learning_rate": 5.347123755841052e-06, + "loss": 0.627, + "step": 21679 + }, + { + "epoch": 0.6644599730292999, + "grad_norm": 0.6681039265548155, + "learning_rate": 5.346245137817809e-06, + "loss": 0.5238, + "step": 21680 + }, + { + "epoch": 0.6644906215520412, + "grad_norm": 1.469424041791024, + "learning_rate": 5.345366565649208e-06, + "loss": 0.611, + "step": 21681 + }, + { + "epoch": 0.6645212700747823, + "grad_norm": 1.785245943005652, + "learning_rate": 5.344488039343903e-06, + "loss": 0.733, + "step": 21682 + }, + { + "epoch": 0.6645519185975236, + "grad_norm": 1.6901737775494499, + "learning_rate": 5.3436095589105385e-06, + "loss": 0.6517, + "step": 21683 + }, + { + "epoch": 0.6645825671202648, + "grad_norm": 1.6111450960535998, + "learning_rate": 5.342731124357789e-06, + "loss": 0.7183, + "step": 21684 + }, + { + "epoch": 0.664613215643006, + "grad_norm": 1.6801803778529154, + "learning_rate": 5.341852735694301e-06, + "loss": 0.7456, + "step": 21685 + }, + { + "epoch": 0.6646438641657472, + "grad_norm": 1.8030785020258837, + "learning_rate": 5.340974392928726e-06, + "loss": 0.6657, + "step": 21686 + }, + { + "epoch": 0.6646745126884884, + "grad_norm": 0.6480611169434035, + "learning_rate": 5.340096096069722e-06, + "loss": 0.5008, + "step": 21687 + }, + { + "epoch": 0.6647051612112296, + "grad_norm": 1.7228855245529457, + "learning_rate": 5.3392178451259435e-06, + "loss": 0.6763, + "step": 21688 + }, + { + "epoch": 0.6647358097339708, + "grad_norm": 1.4829955938462818, + "learning_rate": 5.338339640106049e-06, + "loss": 0.578, + "step": 21689 + }, + { + "epoch": 0.664766458256712, + "grad_norm": 1.476376213992438, + "learning_rate": 5.337461481018682e-06, + "loss": 0.6467, + "step": 21690 + }, + { + "epoch": 0.6647971067794533, + "grad_norm": 1.759467616975466, + "learning_rate": 5.336583367872501e-06, + "loss": 0.6066, + "step": 21691 + }, + { + "epoch": 0.6648277553021944, + "grad_norm": 1.4680672591899528, + "learning_rate": 5.335705300676159e-06, + "loss": 0.625, + "step": 21692 + }, + { + "epoch": 0.6648584038249357, + "grad_norm": 1.521008277250692, + "learning_rate": 5.334827279438308e-06, + "loss": 0.696, + "step": 21693 + }, + { + "epoch": 0.6648890523476768, + "grad_norm": 1.58608056540398, + "learning_rate": 5.33394930416759e-06, + "loss": 0.6936, + "step": 21694 + }, + { + "epoch": 0.6649197008704181, + "grad_norm": 0.6443279443137537, + "learning_rate": 5.33307137487267e-06, + "loss": 0.5593, + "step": 21695 + }, + { + "epoch": 0.6649503493931592, + "grad_norm": 0.6428710660053406, + "learning_rate": 5.332193491562192e-06, + "loss": 0.5296, + "step": 21696 + }, + { + "epoch": 0.6649809979159005, + "grad_norm": 1.5344502261144344, + "learning_rate": 5.331315654244802e-06, + "loss": 0.6425, + "step": 21697 + }, + { + "epoch": 0.6650116464386416, + "grad_norm": 1.6652898993162928, + "learning_rate": 5.330437862929154e-06, + "loss": 0.7196, + "step": 21698 + }, + { + "epoch": 0.6650422949613829, + "grad_norm": 1.7508456015027827, + "learning_rate": 5.3295601176238955e-06, + "loss": 0.6522, + "step": 21699 + }, + { + "epoch": 0.665072943484124, + "grad_norm": 1.5160560189846286, + "learning_rate": 5.3286824183376806e-06, + "loss": 0.6028, + "step": 21700 + }, + { + "epoch": 0.6651035920068653, + "grad_norm": 1.483968460829544, + "learning_rate": 5.32780476507915e-06, + "loss": 0.6644, + "step": 21701 + }, + { + "epoch": 0.6651342405296065, + "grad_norm": 1.6962459155447924, + "learning_rate": 5.3269271578569525e-06, + "loss": 0.775, + "step": 21702 + }, + { + "epoch": 0.6651648890523477, + "grad_norm": 1.4186196919148597, + "learning_rate": 5.326049596679743e-06, + "loss": 0.6461, + "step": 21703 + }, + { + "epoch": 0.6651955375750889, + "grad_norm": 1.7177586075020457, + "learning_rate": 5.325172081556161e-06, + "loss": 0.7517, + "step": 21704 + }, + { + "epoch": 0.6652261860978301, + "grad_norm": 1.5431617840309217, + "learning_rate": 5.324294612494849e-06, + "loss": 0.7624, + "step": 21705 + }, + { + "epoch": 0.6652568346205713, + "grad_norm": 1.7837354363764173, + "learning_rate": 5.323417189504465e-06, + "loss": 0.8122, + "step": 21706 + }, + { + "epoch": 0.6652874831433125, + "grad_norm": 1.5433802264274317, + "learning_rate": 5.322539812593643e-06, + "loss": 0.7093, + "step": 21707 + }, + { + "epoch": 0.6653181316660537, + "grad_norm": 1.580031071668623, + "learning_rate": 5.32166248177104e-06, + "loss": 0.6339, + "step": 21708 + }, + { + "epoch": 0.665348780188795, + "grad_norm": 1.6187571184209781, + "learning_rate": 5.320785197045286e-06, + "loss": 0.7091, + "step": 21709 + }, + { + "epoch": 0.6653794287115361, + "grad_norm": 1.3850077315169693, + "learning_rate": 5.319907958425034e-06, + "loss": 0.5868, + "step": 21710 + }, + { + "epoch": 0.6654100772342773, + "grad_norm": 1.560692582214815, + "learning_rate": 5.319030765918931e-06, + "loss": 0.6496, + "step": 21711 + }, + { + "epoch": 0.6654407257570185, + "grad_norm": 1.6334302151397508, + "learning_rate": 5.318153619535612e-06, + "loss": 0.7076, + "step": 21712 + }, + { + "epoch": 0.6654713742797597, + "grad_norm": 1.5207571566771698, + "learning_rate": 5.317276519283723e-06, + "loss": 0.6305, + "step": 21713 + }, + { + "epoch": 0.6655020228025009, + "grad_norm": 1.4503960567225143, + "learning_rate": 5.31639946517191e-06, + "loss": 0.5849, + "step": 21714 + }, + { + "epoch": 0.6655326713252421, + "grad_norm": 1.4742242086044794, + "learning_rate": 5.315522457208808e-06, + "loss": 0.6771, + "step": 21715 + }, + { + "epoch": 0.6655633198479833, + "grad_norm": 1.5272835094147372, + "learning_rate": 5.314645495403064e-06, + "loss": 0.5948, + "step": 21716 + }, + { + "epoch": 0.6655939683707245, + "grad_norm": 1.5322928728088367, + "learning_rate": 5.313768579763314e-06, + "loss": 0.5689, + "step": 21717 + }, + { + "epoch": 0.6656246168934657, + "grad_norm": 1.6263947222462198, + "learning_rate": 5.312891710298202e-06, + "loss": 0.6412, + "step": 21718 + }, + { + "epoch": 0.6656552654162069, + "grad_norm": 1.442184726659187, + "learning_rate": 5.31201488701637e-06, + "loss": 0.621, + "step": 21719 + }, + { + "epoch": 0.6656859139389482, + "grad_norm": 1.6763710174100295, + "learning_rate": 5.311138109926452e-06, + "loss": 0.6168, + "step": 21720 + }, + { + "epoch": 0.6657165624616893, + "grad_norm": 2.0898734931396685, + "learning_rate": 5.3102613790370894e-06, + "loss": 0.7364, + "step": 21721 + }, + { + "epoch": 0.6657472109844306, + "grad_norm": 1.5569619367863725, + "learning_rate": 5.3093846943569245e-06, + "loss": 0.6662, + "step": 21722 + }, + { + "epoch": 0.6657778595071717, + "grad_norm": 1.5673040564732956, + "learning_rate": 5.308508055894595e-06, + "loss": 0.6705, + "step": 21723 + }, + { + "epoch": 0.665808508029913, + "grad_norm": 1.6773788247819612, + "learning_rate": 5.307631463658724e-06, + "loss": 0.577, + "step": 21724 + }, + { + "epoch": 0.6658391565526541, + "grad_norm": 1.460486036699476, + "learning_rate": 5.306754917657972e-06, + "loss": 0.6788, + "step": 21725 + }, + { + "epoch": 0.6658698050753954, + "grad_norm": 1.6076015789228555, + "learning_rate": 5.3058784179009596e-06, + "loss": 0.6135, + "step": 21726 + }, + { + "epoch": 0.6659004535981365, + "grad_norm": 1.4720518587361766, + "learning_rate": 5.305001964396333e-06, + "loss": 0.5553, + "step": 21727 + }, + { + "epoch": 0.6659311021208778, + "grad_norm": 1.543256226427469, + "learning_rate": 5.3041255571527175e-06, + "loss": 0.7215, + "step": 21728 + }, + { + "epoch": 0.665961750643619, + "grad_norm": 1.583985781835748, + "learning_rate": 5.303249196178755e-06, + "loss": 0.6549, + "step": 21729 + }, + { + "epoch": 0.6659923991663602, + "grad_norm": 1.6136748692487508, + "learning_rate": 5.3023728814830845e-06, + "loss": 0.6961, + "step": 21730 + }, + { + "epoch": 0.6660230476891014, + "grad_norm": 0.6884239589271834, + "learning_rate": 5.301496613074331e-06, + "loss": 0.5566, + "step": 21731 + }, + { + "epoch": 0.6660536962118426, + "grad_norm": 1.7308987809820036, + "learning_rate": 5.300620390961134e-06, + "loss": 0.7194, + "step": 21732 + }, + { + "epoch": 0.6660843447345838, + "grad_norm": 1.5890392196268144, + "learning_rate": 5.299744215152132e-06, + "loss": 0.7078, + "step": 21733 + }, + { + "epoch": 0.666114993257325, + "grad_norm": 1.5848450349816594, + "learning_rate": 5.298868085655946e-06, + "loss": 0.6169, + "step": 21734 + }, + { + "epoch": 0.6661456417800662, + "grad_norm": 1.3945433191317025, + "learning_rate": 5.297992002481218e-06, + "loss": 0.6042, + "step": 21735 + }, + { + "epoch": 0.6661762903028075, + "grad_norm": 1.3852950325414481, + "learning_rate": 5.2971159656365815e-06, + "loss": 0.618, + "step": 21736 + }, + { + "epoch": 0.6662069388255486, + "grad_norm": 1.5609636272107341, + "learning_rate": 5.296239975130659e-06, + "loss": 0.6346, + "step": 21737 + }, + { + "epoch": 0.6662375873482899, + "grad_norm": 0.6899841998837208, + "learning_rate": 5.2953640309720935e-06, + "loss": 0.5507, + "step": 21738 + }, + { + "epoch": 0.666268235871031, + "grad_norm": 0.6391918292254413, + "learning_rate": 5.294488133169506e-06, + "loss": 0.5129, + "step": 21739 + }, + { + "epoch": 0.6662988843937723, + "grad_norm": 1.7186970520027798, + "learning_rate": 5.293612281731529e-06, + "loss": 0.6496, + "step": 21740 + }, + { + "epoch": 0.6663295329165134, + "grad_norm": 1.5843172450770544, + "learning_rate": 5.2927364766667995e-06, + "loss": 0.6601, + "step": 21741 + }, + { + "epoch": 0.6663601814392546, + "grad_norm": 1.5598914487270956, + "learning_rate": 5.291860717983939e-06, + "loss": 0.68, + "step": 21742 + }, + { + "epoch": 0.6663908299619958, + "grad_norm": 1.6742094629529778, + "learning_rate": 5.290985005691578e-06, + "loss": 0.6456, + "step": 21743 + }, + { + "epoch": 0.666421478484737, + "grad_norm": 1.5702733949266774, + "learning_rate": 5.29010933979835e-06, + "loss": 0.6176, + "step": 21744 + }, + { + "epoch": 0.6664521270074782, + "grad_norm": 1.6944185319932745, + "learning_rate": 5.2892337203128775e-06, + "loss": 0.701, + "step": 21745 + }, + { + "epoch": 0.6664827755302194, + "grad_norm": 1.523760773836325, + "learning_rate": 5.28835814724379e-06, + "loss": 0.5955, + "step": 21746 + }, + { + "epoch": 0.6665134240529607, + "grad_norm": 1.6163000064661124, + "learning_rate": 5.287482620599718e-06, + "loss": 0.654, + "step": 21747 + }, + { + "epoch": 0.6665440725757018, + "grad_norm": 1.5553438031940663, + "learning_rate": 5.286607140389282e-06, + "loss": 0.593, + "step": 21748 + }, + { + "epoch": 0.6665747210984431, + "grad_norm": 1.5935369605901877, + "learning_rate": 5.285731706621117e-06, + "loss": 0.7153, + "step": 21749 + }, + { + "epoch": 0.6666053696211842, + "grad_norm": 1.6741853528001454, + "learning_rate": 5.28485631930384e-06, + "loss": 0.6695, + "step": 21750 + }, + { + "epoch": 0.6666360181439255, + "grad_norm": 1.8243000548560846, + "learning_rate": 5.283980978446077e-06, + "loss": 0.732, + "step": 21751 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.3271512390738502, + "learning_rate": 5.283105684056462e-06, + "loss": 0.6734, + "step": 21752 + }, + { + "epoch": 0.6666973151894079, + "grad_norm": 1.7067764419837552, + "learning_rate": 5.282230436143609e-06, + "loss": 0.7551, + "step": 21753 + }, + { + "epoch": 0.666727963712149, + "grad_norm": 1.8750495039616775, + "learning_rate": 5.281355234716148e-06, + "loss": 0.6905, + "step": 21754 + }, + { + "epoch": 0.6667586122348903, + "grad_norm": 1.4860942958469798, + "learning_rate": 5.280480079782705e-06, + "loss": 0.6895, + "step": 21755 + }, + { + "epoch": 0.6667892607576315, + "grad_norm": 1.592518559265332, + "learning_rate": 5.2796049713518945e-06, + "loss": 0.5707, + "step": 21756 + }, + { + "epoch": 0.6668199092803727, + "grad_norm": 1.3854918002988572, + "learning_rate": 5.278729909432344e-06, + "loss": 0.5892, + "step": 21757 + }, + { + "epoch": 0.6668505578031139, + "grad_norm": 0.6560672556179716, + "learning_rate": 5.27785489403268e-06, + "loss": 0.5465, + "step": 21758 + }, + { + "epoch": 0.6668812063258551, + "grad_norm": 1.5718133961190117, + "learning_rate": 5.276979925161516e-06, + "loss": 0.564, + "step": 21759 + }, + { + "epoch": 0.6669118548485963, + "grad_norm": 1.9139455798617329, + "learning_rate": 5.27610500282748e-06, + "loss": 0.7258, + "step": 21760 + }, + { + "epoch": 0.6669425033713375, + "grad_norm": 1.8016375239205598, + "learning_rate": 5.2752301270391884e-06, + "loss": 0.7758, + "step": 21761 + }, + { + "epoch": 0.6669731518940787, + "grad_norm": 1.6194027572734258, + "learning_rate": 5.274355297805261e-06, + "loss": 0.6128, + "step": 21762 + }, + { + "epoch": 0.66700380041682, + "grad_norm": 1.7605875546525223, + "learning_rate": 5.273480515134326e-06, + "loss": 0.6829, + "step": 21763 + }, + { + "epoch": 0.6670344489395611, + "grad_norm": 1.6331209351329983, + "learning_rate": 5.272605779034992e-06, + "loss": 0.6368, + "step": 21764 + }, + { + "epoch": 0.6670650974623024, + "grad_norm": 1.6861270288478962, + "learning_rate": 5.2717310895158815e-06, + "loss": 0.616, + "step": 21765 + }, + { + "epoch": 0.6670957459850435, + "grad_norm": 1.4694830752727464, + "learning_rate": 5.2708564465856195e-06, + "loss": 0.7146, + "step": 21766 + }, + { + "epoch": 0.6671263945077848, + "grad_norm": 1.7004442155217128, + "learning_rate": 5.269981850252814e-06, + "loss": 0.696, + "step": 21767 + }, + { + "epoch": 0.6671570430305259, + "grad_norm": 1.5261398813407863, + "learning_rate": 5.269107300526093e-06, + "loss": 0.6697, + "step": 21768 + }, + { + "epoch": 0.6671876915532672, + "grad_norm": 1.6986977367534062, + "learning_rate": 5.268232797414064e-06, + "loss": 0.6861, + "step": 21769 + }, + { + "epoch": 0.6672183400760083, + "grad_norm": 1.6396735663151434, + "learning_rate": 5.267358340925348e-06, + "loss": 0.7746, + "step": 21770 + }, + { + "epoch": 0.6672489885987496, + "grad_norm": 1.7421795703890706, + "learning_rate": 5.2664839310685645e-06, + "loss": 0.7474, + "step": 21771 + }, + { + "epoch": 0.6672796371214907, + "grad_norm": 1.5039621820411522, + "learning_rate": 5.2656095678523215e-06, + "loss": 0.7284, + "step": 21772 + }, + { + "epoch": 0.6673102856442319, + "grad_norm": 1.8889375439293248, + "learning_rate": 5.26473525128524e-06, + "loss": 0.7454, + "step": 21773 + }, + { + "epoch": 0.6673409341669732, + "grad_norm": 1.593863254183436, + "learning_rate": 5.2638609813759364e-06, + "loss": 0.6914, + "step": 21774 + }, + { + "epoch": 0.6673715826897143, + "grad_norm": 1.620140211816671, + "learning_rate": 5.26298675813302e-06, + "loss": 0.5911, + "step": 21775 + }, + { + "epoch": 0.6674022312124556, + "grad_norm": 1.4661796398383014, + "learning_rate": 5.262112581565106e-06, + "loss": 0.6922, + "step": 21776 + }, + { + "epoch": 0.6674328797351967, + "grad_norm": 0.6625052727310046, + "learning_rate": 5.2612384516808124e-06, + "loss": 0.5638, + "step": 21777 + }, + { + "epoch": 0.667463528257938, + "grad_norm": 0.7037843005820257, + "learning_rate": 5.2603643684887465e-06, + "loss": 0.5434, + "step": 21778 + }, + { + "epoch": 0.6674941767806791, + "grad_norm": 1.6405248964199395, + "learning_rate": 5.259490331997525e-06, + "loss": 0.6947, + "step": 21779 + }, + { + "epoch": 0.6675248253034204, + "grad_norm": 1.7204712819588937, + "learning_rate": 5.258616342215752e-06, + "loss": 0.6662, + "step": 21780 + }, + { + "epoch": 0.6675554738261615, + "grad_norm": 1.5471815525528472, + "learning_rate": 5.257742399152052e-06, + "loss": 0.6874, + "step": 21781 + }, + { + "epoch": 0.6675861223489028, + "grad_norm": 0.6732081597771289, + "learning_rate": 5.256868502815031e-06, + "loss": 0.5505, + "step": 21782 + }, + { + "epoch": 0.667616770871644, + "grad_norm": 1.675028668658028, + "learning_rate": 5.255994653213292e-06, + "loss": 0.6822, + "step": 21783 + }, + { + "epoch": 0.6676474193943852, + "grad_norm": 1.4615513621063663, + "learning_rate": 5.255120850355453e-06, + "loss": 0.5858, + "step": 21784 + }, + { + "epoch": 0.6676780679171264, + "grad_norm": 1.6138134898754117, + "learning_rate": 5.254247094250127e-06, + "loss": 0.6791, + "step": 21785 + }, + { + "epoch": 0.6677087164398676, + "grad_norm": 1.7272011080841305, + "learning_rate": 5.253373384905913e-06, + "loss": 0.7381, + "step": 21786 + }, + { + "epoch": 0.6677393649626088, + "grad_norm": 1.6517431937417646, + "learning_rate": 5.252499722331427e-06, + "loss": 0.6575, + "step": 21787 + }, + { + "epoch": 0.66777001348535, + "grad_norm": 1.4891283336353154, + "learning_rate": 5.251626106535274e-06, + "loss": 0.6776, + "step": 21788 + }, + { + "epoch": 0.6678006620080912, + "grad_norm": 0.6616854194366854, + "learning_rate": 5.25075253752607e-06, + "loss": 0.5143, + "step": 21789 + }, + { + "epoch": 0.6678313105308324, + "grad_norm": 1.704647740983644, + "learning_rate": 5.2498790153124155e-06, + "loss": 0.6702, + "step": 21790 + }, + { + "epoch": 0.6678619590535736, + "grad_norm": 1.4803045081808714, + "learning_rate": 5.249005539902911e-06, + "loss": 0.6725, + "step": 21791 + }, + { + "epoch": 0.6678926075763149, + "grad_norm": 1.722384104955294, + "learning_rate": 5.248132111306178e-06, + "loss": 0.648, + "step": 21792 + }, + { + "epoch": 0.667923256099056, + "grad_norm": 1.4733885219313727, + "learning_rate": 5.2472587295308155e-06, + "loss": 0.632, + "step": 21793 + }, + { + "epoch": 0.6679539046217973, + "grad_norm": 1.5654142957217758, + "learning_rate": 5.246385394585424e-06, + "loss": 0.7208, + "step": 21794 + }, + { + "epoch": 0.6679845531445384, + "grad_norm": 1.57905800105444, + "learning_rate": 5.245512106478614e-06, + "loss": 0.6812, + "step": 21795 + }, + { + "epoch": 0.6680152016672797, + "grad_norm": 1.5629866640755998, + "learning_rate": 5.244638865218993e-06, + "loss": 0.7364, + "step": 21796 + }, + { + "epoch": 0.6680458501900208, + "grad_norm": 1.4677579527980207, + "learning_rate": 5.243765670815158e-06, + "loss": 0.6091, + "step": 21797 + }, + { + "epoch": 0.6680764987127621, + "grad_norm": 1.4759887735430295, + "learning_rate": 5.242892523275718e-06, + "loss": 0.6117, + "step": 21798 + }, + { + "epoch": 0.6681071472355032, + "grad_norm": 1.4769071364252846, + "learning_rate": 5.2420194226092745e-06, + "loss": 0.7142, + "step": 21799 + }, + { + "epoch": 0.6681377957582445, + "grad_norm": 0.6932488042804548, + "learning_rate": 5.241146368824434e-06, + "loss": 0.5507, + "step": 21800 + }, + { + "epoch": 0.6681684442809857, + "grad_norm": 1.7541579220943018, + "learning_rate": 5.240273361929797e-06, + "loss": 0.6554, + "step": 21801 + }, + { + "epoch": 0.6681990928037269, + "grad_norm": 1.5632034312092418, + "learning_rate": 5.239400401933961e-06, + "loss": 0.5962, + "step": 21802 + }, + { + "epoch": 0.6682297413264681, + "grad_norm": 1.5076662191998602, + "learning_rate": 5.238527488845529e-06, + "loss": 0.7386, + "step": 21803 + }, + { + "epoch": 0.6682603898492092, + "grad_norm": 1.6486737925657753, + "learning_rate": 5.23765462267311e-06, + "loss": 0.6228, + "step": 21804 + }, + { + "epoch": 0.6682910383719505, + "grad_norm": 1.7898595422363341, + "learning_rate": 5.2367818034252924e-06, + "loss": 0.6902, + "step": 21805 + }, + { + "epoch": 0.6683216868946916, + "grad_norm": 1.6536254961210448, + "learning_rate": 5.235909031110684e-06, + "loss": 0.6881, + "step": 21806 + }, + { + "epoch": 0.6683523354174329, + "grad_norm": 0.6730394986123953, + "learning_rate": 5.235036305737883e-06, + "loss": 0.5508, + "step": 21807 + }, + { + "epoch": 0.668382983940174, + "grad_norm": 1.6418665022903107, + "learning_rate": 5.234163627315492e-06, + "loss": 0.6686, + "step": 21808 + }, + { + "epoch": 0.6684136324629153, + "grad_norm": 1.5801352150422443, + "learning_rate": 5.233290995852105e-06, + "loss": 0.5663, + "step": 21809 + }, + { + "epoch": 0.6684442809856564, + "grad_norm": 1.5441072024969593, + "learning_rate": 5.232418411356315e-06, + "loss": 0.7817, + "step": 21810 + }, + { + "epoch": 0.6684749295083977, + "grad_norm": 1.6161653181565818, + "learning_rate": 5.231545873836734e-06, + "loss": 0.6045, + "step": 21811 + }, + { + "epoch": 0.6685055780311389, + "grad_norm": 1.7844785884007452, + "learning_rate": 5.2306733833019514e-06, + "loss": 0.6995, + "step": 21812 + }, + { + "epoch": 0.6685362265538801, + "grad_norm": 1.4031529734532462, + "learning_rate": 5.22980093976056e-06, + "loss": 0.5487, + "step": 21813 + }, + { + "epoch": 0.6685668750766213, + "grad_norm": 1.321471020626161, + "learning_rate": 5.228928543221161e-06, + "loss": 0.6781, + "step": 21814 + }, + { + "epoch": 0.6685975235993625, + "grad_norm": 1.4348487190479038, + "learning_rate": 5.228056193692349e-06, + "loss": 0.5137, + "step": 21815 + }, + { + "epoch": 0.6686281721221037, + "grad_norm": 1.5716408966097657, + "learning_rate": 5.227183891182724e-06, + "loss": 0.605, + "step": 21816 + }, + { + "epoch": 0.6686588206448449, + "grad_norm": 1.4959429540661302, + "learning_rate": 5.226311635700875e-06, + "loss": 0.6596, + "step": 21817 + }, + { + "epoch": 0.6686894691675861, + "grad_norm": 1.661865644670245, + "learning_rate": 5.225439427255397e-06, + "loss": 0.6531, + "step": 21818 + }, + { + "epoch": 0.6687201176903274, + "grad_norm": 1.6556235434783313, + "learning_rate": 5.224567265854892e-06, + "loss": 0.7281, + "step": 21819 + }, + { + "epoch": 0.6687507662130685, + "grad_norm": 0.6876364701402398, + "learning_rate": 5.223695151507946e-06, + "loss": 0.5892, + "step": 21820 + }, + { + "epoch": 0.6687814147358098, + "grad_norm": 1.574081831564705, + "learning_rate": 5.222823084223145e-06, + "loss": 0.6858, + "step": 21821 + }, + { + "epoch": 0.6688120632585509, + "grad_norm": 1.7711088896421623, + "learning_rate": 5.221951064009101e-06, + "loss": 0.6586, + "step": 21822 + }, + { + "epoch": 0.6688427117812922, + "grad_norm": 1.3819315969494521, + "learning_rate": 5.221079090874392e-06, + "loss": 0.6159, + "step": 21823 + }, + { + "epoch": 0.6688733603040333, + "grad_norm": 1.5015315813787853, + "learning_rate": 5.220207164827613e-06, + "loss": 0.7058, + "step": 21824 + }, + { + "epoch": 0.6689040088267746, + "grad_norm": 1.7709218429403468, + "learning_rate": 5.2193352858773535e-06, + "loss": 0.6543, + "step": 21825 + }, + { + "epoch": 0.6689346573495157, + "grad_norm": 1.5700045782414414, + "learning_rate": 5.2184634540322075e-06, + "loss": 0.6577, + "step": 21826 + }, + { + "epoch": 0.668965305872257, + "grad_norm": 1.4782167329860727, + "learning_rate": 5.21759166930077e-06, + "loss": 0.6899, + "step": 21827 + }, + { + "epoch": 0.6689959543949981, + "grad_norm": 1.5476348906458832, + "learning_rate": 5.216719931691619e-06, + "loss": 0.6283, + "step": 21828 + }, + { + "epoch": 0.6690266029177394, + "grad_norm": 0.6660950302249253, + "learning_rate": 5.215848241213352e-06, + "loss": 0.5335, + "step": 21829 + }, + { + "epoch": 0.6690572514404806, + "grad_norm": 1.6663101195538195, + "learning_rate": 5.2149765978745596e-06, + "loss": 0.6305, + "step": 21830 + }, + { + "epoch": 0.6690878999632218, + "grad_norm": 1.6302247035292003, + "learning_rate": 5.214105001683827e-06, + "loss": 0.7272, + "step": 21831 + }, + { + "epoch": 0.669118548485963, + "grad_norm": 1.560260457957343, + "learning_rate": 5.213233452649735e-06, + "loss": 0.702, + "step": 21832 + }, + { + "epoch": 0.6691491970087042, + "grad_norm": 1.582750981535187, + "learning_rate": 5.212361950780885e-06, + "loss": 0.6435, + "step": 21833 + }, + { + "epoch": 0.6691798455314454, + "grad_norm": 1.6726295992773346, + "learning_rate": 5.2114904960858555e-06, + "loss": 0.6979, + "step": 21834 + }, + { + "epoch": 0.6692104940541865, + "grad_norm": 1.4836311658923595, + "learning_rate": 5.210619088573239e-06, + "loss": 0.6231, + "step": 21835 + }, + { + "epoch": 0.6692411425769278, + "grad_norm": 0.6557347245603194, + "learning_rate": 5.209747728251613e-06, + "loss": 0.5472, + "step": 21836 + }, + { + "epoch": 0.669271791099669, + "grad_norm": 1.6211701009264838, + "learning_rate": 5.208876415129569e-06, + "loss": 0.7842, + "step": 21837 + }, + { + "epoch": 0.6693024396224102, + "grad_norm": 1.774901385242316, + "learning_rate": 5.208005149215694e-06, + "loss": 0.6527, + "step": 21838 + }, + { + "epoch": 0.6693330881451514, + "grad_norm": 1.7032488869951727, + "learning_rate": 5.2071339305185685e-06, + "loss": 0.7495, + "step": 21839 + }, + { + "epoch": 0.6693637366678926, + "grad_norm": 0.6583677835006292, + "learning_rate": 5.206262759046779e-06, + "loss": 0.5458, + "step": 21840 + }, + { + "epoch": 0.6693943851906338, + "grad_norm": 1.46359741631159, + "learning_rate": 5.2053916348089115e-06, + "loss": 0.7379, + "step": 21841 + }, + { + "epoch": 0.669425033713375, + "grad_norm": 1.5750382725015564, + "learning_rate": 5.204520557813544e-06, + "loss": 0.611, + "step": 21842 + }, + { + "epoch": 0.6694556822361162, + "grad_norm": 1.5385528388578795, + "learning_rate": 5.203649528069261e-06, + "loss": 0.6146, + "step": 21843 + }, + { + "epoch": 0.6694863307588574, + "grad_norm": 1.5406264402597023, + "learning_rate": 5.202778545584652e-06, + "loss": 0.6319, + "step": 21844 + }, + { + "epoch": 0.6695169792815986, + "grad_norm": 1.6308004314217264, + "learning_rate": 5.201907610368289e-06, + "loss": 0.6996, + "step": 21845 + }, + { + "epoch": 0.6695476278043399, + "grad_norm": 1.7104772807251445, + "learning_rate": 5.20103672242876e-06, + "loss": 0.592, + "step": 21846 + }, + { + "epoch": 0.669578276327081, + "grad_norm": 1.4591648735484617, + "learning_rate": 5.200165881774642e-06, + "loss": 0.6566, + "step": 21847 + }, + { + "epoch": 0.6696089248498223, + "grad_norm": 1.5394655924505058, + "learning_rate": 5.199295088414518e-06, + "loss": 0.7592, + "step": 21848 + }, + { + "epoch": 0.6696395733725634, + "grad_norm": 1.5961858216085951, + "learning_rate": 5.1984243423569715e-06, + "loss": 0.6684, + "step": 21849 + }, + { + "epoch": 0.6696702218953047, + "grad_norm": 1.441262071713039, + "learning_rate": 5.197553643610573e-06, + "loss": 0.6583, + "step": 21850 + }, + { + "epoch": 0.6697008704180458, + "grad_norm": 1.5354307867643404, + "learning_rate": 5.196682992183909e-06, + "loss": 0.7865, + "step": 21851 + }, + { + "epoch": 0.6697315189407871, + "grad_norm": 1.5810255190917453, + "learning_rate": 5.1958123880855596e-06, + "loss": 0.6801, + "step": 21852 + }, + { + "epoch": 0.6697621674635282, + "grad_norm": 1.6027214761139523, + "learning_rate": 5.194941831324097e-06, + "loss": 0.6715, + "step": 21853 + }, + { + "epoch": 0.6697928159862695, + "grad_norm": 1.5560156152667282, + "learning_rate": 5.1940713219081044e-06, + "loss": 0.646, + "step": 21854 + }, + { + "epoch": 0.6698234645090106, + "grad_norm": 1.5794538290584357, + "learning_rate": 5.193200859846152e-06, + "loss": 0.7121, + "step": 21855 + }, + { + "epoch": 0.6698541130317519, + "grad_norm": 1.9490564671779689, + "learning_rate": 5.192330445146825e-06, + "loss": 0.6995, + "step": 21856 + }, + { + "epoch": 0.6698847615544931, + "grad_norm": 0.6466093620771405, + "learning_rate": 5.191460077818697e-06, + "loss": 0.5074, + "step": 21857 + }, + { + "epoch": 0.6699154100772343, + "grad_norm": 0.7056748725334523, + "learning_rate": 5.1905897578703415e-06, + "loss": 0.5346, + "step": 21858 + }, + { + "epoch": 0.6699460585999755, + "grad_norm": 1.3657535821025444, + "learning_rate": 5.189719485310334e-06, + "loss": 0.6784, + "step": 21859 + }, + { + "epoch": 0.6699767071227167, + "grad_norm": 1.5512121813184476, + "learning_rate": 5.188849260147255e-06, + "loss": 0.6207, + "step": 21860 + }, + { + "epoch": 0.6700073556454579, + "grad_norm": 1.6254871026012094, + "learning_rate": 5.187979082389671e-06, + "loss": 0.6532, + "step": 21861 + }, + { + "epoch": 0.6700380041681991, + "grad_norm": 1.7171393662644394, + "learning_rate": 5.187108952046163e-06, + "loss": 0.5866, + "step": 21862 + }, + { + "epoch": 0.6700686526909403, + "grad_norm": 1.4814561802261097, + "learning_rate": 5.186238869125303e-06, + "loss": 0.6811, + "step": 21863 + }, + { + "epoch": 0.6700993012136816, + "grad_norm": 1.7003075664582679, + "learning_rate": 5.18536883363566e-06, + "loss": 0.7361, + "step": 21864 + }, + { + "epoch": 0.6701299497364227, + "grad_norm": 1.5761557763682652, + "learning_rate": 5.184498845585814e-06, + "loss": 0.5662, + "step": 21865 + }, + { + "epoch": 0.6701605982591639, + "grad_norm": 1.6544964698891436, + "learning_rate": 5.183628904984328e-06, + "loss": 0.6944, + "step": 21866 + }, + { + "epoch": 0.6701912467819051, + "grad_norm": 0.6499371473872187, + "learning_rate": 5.18275901183978e-06, + "loss": 0.5579, + "step": 21867 + }, + { + "epoch": 0.6702218953046463, + "grad_norm": 1.6122176483145778, + "learning_rate": 5.181889166160744e-06, + "loss": 0.6979, + "step": 21868 + }, + { + "epoch": 0.6702525438273875, + "grad_norm": 1.4689913451364929, + "learning_rate": 5.1810193679557815e-06, + "loss": 0.6519, + "step": 21869 + }, + { + "epoch": 0.6702831923501287, + "grad_norm": 1.555322199523315, + "learning_rate": 5.18014961723347e-06, + "loss": 0.6752, + "step": 21870 + }, + { + "epoch": 0.6703138408728699, + "grad_norm": 1.6364134209815773, + "learning_rate": 5.17927991400238e-06, + "loss": 0.6953, + "step": 21871 + }, + { + "epoch": 0.6703444893956111, + "grad_norm": 0.6716782381302986, + "learning_rate": 5.178410258271076e-06, + "loss": 0.5523, + "step": 21872 + }, + { + "epoch": 0.6703751379183523, + "grad_norm": 1.6198379188816217, + "learning_rate": 5.177540650048127e-06, + "loss": 0.6389, + "step": 21873 + }, + { + "epoch": 0.6704057864410935, + "grad_norm": 1.9064490611928697, + "learning_rate": 5.176671089342109e-06, + "loss": 0.6822, + "step": 21874 + }, + { + "epoch": 0.6704364349638348, + "grad_norm": 1.583810611819562, + "learning_rate": 5.175801576161582e-06, + "loss": 0.7215, + "step": 21875 + }, + { + "epoch": 0.6704670834865759, + "grad_norm": 1.2808958856466546, + "learning_rate": 5.17493211051512e-06, + "loss": 0.5658, + "step": 21876 + }, + { + "epoch": 0.6704977320093172, + "grad_norm": 0.6679494506081847, + "learning_rate": 5.174062692411281e-06, + "loss": 0.5426, + "step": 21877 + }, + { + "epoch": 0.6705283805320583, + "grad_norm": 1.4185865896180099, + "learning_rate": 5.173193321858639e-06, + "loss": 0.5548, + "step": 21878 + }, + { + "epoch": 0.6705590290547996, + "grad_norm": 1.7504445650182392, + "learning_rate": 5.1723239988657605e-06, + "loss": 0.7384, + "step": 21879 + }, + { + "epoch": 0.6705896775775407, + "grad_norm": 1.5172516742280149, + "learning_rate": 5.171454723441205e-06, + "loss": 0.5802, + "step": 21880 + }, + { + "epoch": 0.670620326100282, + "grad_norm": 1.3788286808470473, + "learning_rate": 5.170585495593543e-06, + "loss": 0.706, + "step": 21881 + }, + { + "epoch": 0.6706509746230231, + "grad_norm": 1.757934827009014, + "learning_rate": 5.169716315331341e-06, + "loss": 0.6799, + "step": 21882 + }, + { + "epoch": 0.6706816231457644, + "grad_norm": 1.7019551887887068, + "learning_rate": 5.168847182663155e-06, + "loss": 0.6169, + "step": 21883 + }, + { + "epoch": 0.6707122716685056, + "grad_norm": 1.3501730635773492, + "learning_rate": 5.167978097597555e-06, + "loss": 0.6122, + "step": 21884 + }, + { + "epoch": 0.6707429201912468, + "grad_norm": 1.7274627478113513, + "learning_rate": 5.167109060143107e-06, + "loss": 0.7461, + "step": 21885 + }, + { + "epoch": 0.670773568713988, + "grad_norm": 1.721110836251699, + "learning_rate": 5.166240070308366e-06, + "loss": 0.719, + "step": 21886 + }, + { + "epoch": 0.6708042172367292, + "grad_norm": 1.593351125533496, + "learning_rate": 5.1653711281019015e-06, + "loss": 0.6791, + "step": 21887 + }, + { + "epoch": 0.6708348657594704, + "grad_norm": 1.698778518059956, + "learning_rate": 5.1645022335322656e-06, + "loss": 0.7443, + "step": 21888 + }, + { + "epoch": 0.6708655142822116, + "grad_norm": 1.6512475441206431, + "learning_rate": 5.163633386608034e-06, + "loss": 0.692, + "step": 21889 + }, + { + "epoch": 0.6708961628049528, + "grad_norm": 1.8005951426285032, + "learning_rate": 5.16276458733776e-06, + "loss": 0.681, + "step": 21890 + }, + { + "epoch": 0.670926811327694, + "grad_norm": 1.6829539466423697, + "learning_rate": 5.16189583573e-06, + "loss": 0.6229, + "step": 21891 + }, + { + "epoch": 0.6709574598504352, + "grad_norm": 0.683160581744616, + "learning_rate": 5.161027131793318e-06, + "loss": 0.5603, + "step": 21892 + }, + { + "epoch": 0.6709881083731765, + "grad_norm": 0.6664355943164375, + "learning_rate": 5.160158475536277e-06, + "loss": 0.5395, + "step": 21893 + }, + { + "epoch": 0.6710187568959176, + "grad_norm": 1.8644662797145164, + "learning_rate": 5.15928986696743e-06, + "loss": 0.7376, + "step": 21894 + }, + { + "epoch": 0.6710494054186589, + "grad_norm": 1.5539637441584557, + "learning_rate": 5.158421306095339e-06, + "loss": 0.6261, + "step": 21895 + }, + { + "epoch": 0.6710800539414, + "grad_norm": 1.4818512540953503, + "learning_rate": 5.157552792928562e-06, + "loss": 0.6984, + "step": 21896 + }, + { + "epoch": 0.6711107024641412, + "grad_norm": 0.6743106123714572, + "learning_rate": 5.156684327475659e-06, + "loss": 0.5716, + "step": 21897 + }, + { + "epoch": 0.6711413509868824, + "grad_norm": 1.6981100978529642, + "learning_rate": 5.155815909745185e-06, + "loss": 0.6191, + "step": 21898 + }, + { + "epoch": 0.6711719995096236, + "grad_norm": 1.4536555784163574, + "learning_rate": 5.1549475397456915e-06, + "loss": 0.6239, + "step": 21899 + }, + { + "epoch": 0.6712026480323648, + "grad_norm": 1.484796772492433, + "learning_rate": 5.15407921748574e-06, + "loss": 0.6923, + "step": 21900 + }, + { + "epoch": 0.671233296555106, + "grad_norm": 1.633702722407245, + "learning_rate": 5.15321094297389e-06, + "loss": 0.7347, + "step": 21901 + }, + { + "epoch": 0.6712639450778473, + "grad_norm": 1.406966989350746, + "learning_rate": 5.152342716218689e-06, + "loss": 0.5574, + "step": 21902 + }, + { + "epoch": 0.6712945936005884, + "grad_norm": 1.4536703426066702, + "learning_rate": 5.1514745372286955e-06, + "loss": 0.5139, + "step": 21903 + }, + { + "epoch": 0.6713252421233297, + "grad_norm": 0.67363185343483, + "learning_rate": 5.1506064060124675e-06, + "loss": 0.5449, + "step": 21904 + }, + { + "epoch": 0.6713558906460708, + "grad_norm": 1.610975031960066, + "learning_rate": 5.149738322578551e-06, + "loss": 0.6716, + "step": 21905 + }, + { + "epoch": 0.6713865391688121, + "grad_norm": 1.6544942266667317, + "learning_rate": 5.148870286935509e-06, + "loss": 0.6541, + "step": 21906 + }, + { + "epoch": 0.6714171876915532, + "grad_norm": 1.8199508372381612, + "learning_rate": 5.148002299091881e-06, + "loss": 0.7212, + "step": 21907 + }, + { + "epoch": 0.6714478362142945, + "grad_norm": 1.8311239984999306, + "learning_rate": 5.147134359056235e-06, + "loss": 0.7333, + "step": 21908 + }, + { + "epoch": 0.6714784847370356, + "grad_norm": 1.5742476404794346, + "learning_rate": 5.146266466837115e-06, + "loss": 0.5957, + "step": 21909 + }, + { + "epoch": 0.6715091332597769, + "grad_norm": 1.637017455829124, + "learning_rate": 5.145398622443072e-06, + "loss": 0.6612, + "step": 21910 + }, + { + "epoch": 0.671539781782518, + "grad_norm": 1.6765600252092743, + "learning_rate": 5.1445308258826566e-06, + "loss": 0.6432, + "step": 21911 + }, + { + "epoch": 0.6715704303052593, + "grad_norm": 1.5442403311670476, + "learning_rate": 5.143663077164426e-06, + "loss": 0.6396, + "step": 21912 + }, + { + "epoch": 0.6716010788280005, + "grad_norm": 1.455455641073124, + "learning_rate": 5.142795376296921e-06, + "loss": 0.6751, + "step": 21913 + }, + { + "epoch": 0.6716317273507417, + "grad_norm": 1.8074716582121968, + "learning_rate": 5.1419277232886965e-06, + "loss": 0.7541, + "step": 21914 + }, + { + "epoch": 0.6716623758734829, + "grad_norm": 1.7583384280121872, + "learning_rate": 5.141060118148302e-06, + "loss": 0.6479, + "step": 21915 + }, + { + "epoch": 0.6716930243962241, + "grad_norm": 1.5928496774926921, + "learning_rate": 5.140192560884288e-06, + "loss": 0.5582, + "step": 21916 + }, + { + "epoch": 0.6717236729189653, + "grad_norm": 1.697393271605074, + "learning_rate": 5.1393250515052e-06, + "loss": 0.6944, + "step": 21917 + }, + { + "epoch": 0.6717543214417065, + "grad_norm": 1.6567183499156521, + "learning_rate": 5.138457590019579e-06, + "loss": 0.6169, + "step": 21918 + }, + { + "epoch": 0.6717849699644477, + "grad_norm": 1.4649880021237423, + "learning_rate": 5.137590176435987e-06, + "loss": 0.7197, + "step": 21919 + }, + { + "epoch": 0.671815618487189, + "grad_norm": 1.4696048503401642, + "learning_rate": 5.136722810762962e-06, + "loss": 0.6539, + "step": 21920 + }, + { + "epoch": 0.6718462670099301, + "grad_norm": 1.9827153960040116, + "learning_rate": 5.135855493009048e-06, + "loss": 0.7692, + "step": 21921 + }, + { + "epoch": 0.6718769155326714, + "grad_norm": 1.580665370965905, + "learning_rate": 5.134988223182795e-06, + "loss": 0.6256, + "step": 21922 + }, + { + "epoch": 0.6719075640554125, + "grad_norm": 1.5638107468126605, + "learning_rate": 5.134121001292746e-06, + "loss": 0.6282, + "step": 21923 + }, + { + "epoch": 0.6719382125781538, + "grad_norm": 1.5620422095459465, + "learning_rate": 5.133253827347455e-06, + "loss": 0.5526, + "step": 21924 + }, + { + "epoch": 0.6719688611008949, + "grad_norm": 1.689491457065471, + "learning_rate": 5.132386701355453e-06, + "loss": 0.7448, + "step": 21925 + }, + { + "epoch": 0.6719995096236362, + "grad_norm": 1.711199481331027, + "learning_rate": 5.131519623325291e-06, + "loss": 0.7205, + "step": 21926 + }, + { + "epoch": 0.6720301581463773, + "grad_norm": 0.6724782786400131, + "learning_rate": 5.1306525932655145e-06, + "loss": 0.5675, + "step": 21927 + }, + { + "epoch": 0.6720608066691185, + "grad_norm": 1.5892584761955486, + "learning_rate": 5.129785611184666e-06, + "loss": 0.6763, + "step": 21928 + }, + { + "epoch": 0.6720914551918598, + "grad_norm": 1.3538854957462603, + "learning_rate": 5.128918677091277e-06, + "loss": 0.6216, + "step": 21929 + }, + { + "epoch": 0.6721221037146009, + "grad_norm": 1.5035234294915107, + "learning_rate": 5.128051790993907e-06, + "loss": 0.7618, + "step": 21930 + }, + { + "epoch": 0.6721527522373422, + "grad_norm": 1.4931880513425055, + "learning_rate": 5.1271849529010875e-06, + "loss": 0.6512, + "step": 21931 + }, + { + "epoch": 0.6721834007600833, + "grad_norm": 0.6702094341058278, + "learning_rate": 5.1263181628213585e-06, + "loss": 0.5688, + "step": 21932 + }, + { + "epoch": 0.6722140492828246, + "grad_norm": 1.502540459313064, + "learning_rate": 5.125451420763263e-06, + "loss": 0.6547, + "step": 21933 + }, + { + "epoch": 0.6722446978055657, + "grad_norm": 1.7984813505336041, + "learning_rate": 5.124584726735343e-06, + "loss": 0.6454, + "step": 21934 + }, + { + "epoch": 0.672275346328307, + "grad_norm": 1.5201094846781273, + "learning_rate": 5.1237180807461404e-06, + "loss": 0.6677, + "step": 21935 + }, + { + "epoch": 0.6723059948510481, + "grad_norm": 1.7616366784545403, + "learning_rate": 5.122851482804187e-06, + "loss": 0.642, + "step": 21936 + }, + { + "epoch": 0.6723366433737894, + "grad_norm": 1.8213696563596493, + "learning_rate": 5.121984932918027e-06, + "loss": 0.7483, + "step": 21937 + }, + { + "epoch": 0.6723672918965306, + "grad_norm": 1.8928656800249037, + "learning_rate": 5.121118431096201e-06, + "loss": 0.7115, + "step": 21938 + }, + { + "epoch": 0.6723979404192718, + "grad_norm": 1.5672839156146792, + "learning_rate": 5.120251977347243e-06, + "loss": 0.6852, + "step": 21939 + }, + { + "epoch": 0.672428588942013, + "grad_norm": 1.32557702736489, + "learning_rate": 5.119385571679684e-06, + "loss": 0.6806, + "step": 21940 + }, + { + "epoch": 0.6724592374647542, + "grad_norm": 1.5922260470059741, + "learning_rate": 5.118519214102075e-06, + "loss": 0.7204, + "step": 21941 + }, + { + "epoch": 0.6724898859874954, + "grad_norm": 1.655319403296525, + "learning_rate": 5.117652904622941e-06, + "loss": 0.6908, + "step": 21942 + }, + { + "epoch": 0.6725205345102366, + "grad_norm": 1.6597373771402701, + "learning_rate": 5.116786643250827e-06, + "loss": 0.6637, + "step": 21943 + }, + { + "epoch": 0.6725511830329778, + "grad_norm": 1.5597521564787706, + "learning_rate": 5.1159204299942565e-06, + "loss": 0.6857, + "step": 21944 + }, + { + "epoch": 0.672581831555719, + "grad_norm": 1.867017885301953, + "learning_rate": 5.115054264861775e-06, + "loss": 0.7678, + "step": 21945 + }, + { + "epoch": 0.6726124800784602, + "grad_norm": 0.7220574131115886, + "learning_rate": 5.114188147861916e-06, + "loss": 0.5771, + "step": 21946 + }, + { + "epoch": 0.6726431286012015, + "grad_norm": 0.6875141310767923, + "learning_rate": 5.113322079003209e-06, + "loss": 0.5467, + "step": 21947 + }, + { + "epoch": 0.6726737771239426, + "grad_norm": 1.9662700077860353, + "learning_rate": 5.112456058294188e-06, + "loss": 0.7818, + "step": 21948 + }, + { + "epoch": 0.6727044256466839, + "grad_norm": 1.6386206918912463, + "learning_rate": 5.111590085743392e-06, + "loss": 0.6993, + "step": 21949 + }, + { + "epoch": 0.672735074169425, + "grad_norm": 1.5344866902176784, + "learning_rate": 5.11072416135935e-06, + "loss": 0.6883, + "step": 21950 + }, + { + "epoch": 0.6727657226921663, + "grad_norm": 0.6817738828663154, + "learning_rate": 5.109858285150591e-06, + "loss": 0.548, + "step": 21951 + }, + { + "epoch": 0.6727963712149074, + "grad_norm": 1.6872313081044354, + "learning_rate": 5.108992457125649e-06, + "loss": 0.7463, + "step": 21952 + }, + { + "epoch": 0.6728270197376487, + "grad_norm": 1.7020252609280002, + "learning_rate": 5.108126677293055e-06, + "loss": 0.6636, + "step": 21953 + }, + { + "epoch": 0.6728576682603898, + "grad_norm": 1.5301488183199148, + "learning_rate": 5.107260945661345e-06, + "loss": 0.6435, + "step": 21954 + }, + { + "epoch": 0.6728883167831311, + "grad_norm": 1.6436882390643333, + "learning_rate": 5.106395262239041e-06, + "loss": 0.6296, + "step": 21955 + }, + { + "epoch": 0.6729189653058723, + "grad_norm": 1.514726211025667, + "learning_rate": 5.1055296270346755e-06, + "loss": 0.6604, + "step": 21956 + }, + { + "epoch": 0.6729496138286135, + "grad_norm": 1.4101971195768623, + "learning_rate": 5.104664040056784e-06, + "loss": 0.5621, + "step": 21957 + }, + { + "epoch": 0.6729802623513547, + "grad_norm": 1.6874506929646187, + "learning_rate": 5.103798501313891e-06, + "loss": 0.7333, + "step": 21958 + }, + { + "epoch": 0.6730109108740958, + "grad_norm": 1.6298730766079499, + "learning_rate": 5.1029330108145145e-06, + "loss": 0.6612, + "step": 21959 + }, + { + "epoch": 0.6730415593968371, + "grad_norm": 0.6898668369178174, + "learning_rate": 5.1020675685671994e-06, + "loss": 0.5538, + "step": 21960 + }, + { + "epoch": 0.6730722079195782, + "grad_norm": 1.676141171913708, + "learning_rate": 5.101202174580464e-06, + "loss": 0.6709, + "step": 21961 + }, + { + "epoch": 0.6731028564423195, + "grad_norm": 1.5952522885782636, + "learning_rate": 5.10033682886284e-06, + "loss": 0.7358, + "step": 21962 + }, + { + "epoch": 0.6731335049650606, + "grad_norm": 0.6617204861332241, + "learning_rate": 5.099471531422846e-06, + "loss": 0.5433, + "step": 21963 + }, + { + "epoch": 0.6731641534878019, + "grad_norm": 1.6096315936947618, + "learning_rate": 5.098606282269014e-06, + "loss": 0.6394, + "step": 21964 + }, + { + "epoch": 0.673194802010543, + "grad_norm": 1.6974956779473191, + "learning_rate": 5.0977410814098705e-06, + "loss": 0.6335, + "step": 21965 + }, + { + "epoch": 0.6732254505332843, + "grad_norm": 1.6087822682681292, + "learning_rate": 5.096875928853937e-06, + "loss": 0.5935, + "step": 21966 + }, + { + "epoch": 0.6732560990560255, + "grad_norm": 1.7207792882564885, + "learning_rate": 5.096010824609739e-06, + "loss": 0.7057, + "step": 21967 + }, + { + "epoch": 0.6732867475787667, + "grad_norm": 1.6341307307536066, + "learning_rate": 5.095145768685803e-06, + "loss": 0.6839, + "step": 21968 + }, + { + "epoch": 0.6733173961015079, + "grad_norm": 0.6820993132903486, + "learning_rate": 5.094280761090648e-06, + "loss": 0.559, + "step": 21969 + }, + { + "epoch": 0.6733480446242491, + "grad_norm": 0.6446672429760242, + "learning_rate": 5.0934158018328e-06, + "loss": 0.5428, + "step": 21970 + }, + { + "epoch": 0.6733786931469903, + "grad_norm": 1.6216461889927756, + "learning_rate": 5.0925508909207855e-06, + "loss": 0.6494, + "step": 21971 + }, + { + "epoch": 0.6734093416697315, + "grad_norm": 1.7372655969541058, + "learning_rate": 5.091686028363118e-06, + "loss": 0.728, + "step": 21972 + }, + { + "epoch": 0.6734399901924727, + "grad_norm": 1.4687156980564533, + "learning_rate": 5.090821214168329e-06, + "loss": 0.7276, + "step": 21973 + }, + { + "epoch": 0.673470638715214, + "grad_norm": 1.7967322900401632, + "learning_rate": 5.08995644834493e-06, + "loss": 0.7455, + "step": 21974 + }, + { + "epoch": 0.6735012872379551, + "grad_norm": 1.6391060127844568, + "learning_rate": 5.089091730901448e-06, + "loss": 0.7119, + "step": 21975 + }, + { + "epoch": 0.6735319357606964, + "grad_norm": 1.7313859613049973, + "learning_rate": 5.088227061846402e-06, + "loss": 0.6627, + "step": 21976 + }, + { + "epoch": 0.6735625842834375, + "grad_norm": 1.6086543245958078, + "learning_rate": 5.08736244118831e-06, + "loss": 0.6803, + "step": 21977 + }, + { + "epoch": 0.6735932328061788, + "grad_norm": 1.5437775779681986, + "learning_rate": 5.086497868935693e-06, + "loss": 0.6837, + "step": 21978 + }, + { + "epoch": 0.6736238813289199, + "grad_norm": 1.5807791011111958, + "learning_rate": 5.0856333450970744e-06, + "loss": 0.5643, + "step": 21979 + }, + { + "epoch": 0.6736545298516612, + "grad_norm": 1.59639624118692, + "learning_rate": 5.0847688696809624e-06, + "loss": 0.7692, + "step": 21980 + }, + { + "epoch": 0.6736851783744023, + "grad_norm": 1.507292071268539, + "learning_rate": 5.08390444269588e-06, + "loss": 0.5602, + "step": 21981 + }, + { + "epoch": 0.6737158268971436, + "grad_norm": 1.6210212269011819, + "learning_rate": 5.083040064150351e-06, + "loss": 0.56, + "step": 21982 + }, + { + "epoch": 0.6737464754198847, + "grad_norm": 1.6072996523788896, + "learning_rate": 5.08217573405288e-06, + "loss": 0.7307, + "step": 21983 + }, + { + "epoch": 0.673777123942626, + "grad_norm": 1.6755300648378542, + "learning_rate": 5.081311452411995e-06, + "loss": 0.6135, + "step": 21984 + }, + { + "epoch": 0.6738077724653672, + "grad_norm": 1.864904487747338, + "learning_rate": 5.080447219236202e-06, + "loss": 0.7973, + "step": 21985 + }, + { + "epoch": 0.6738384209881084, + "grad_norm": 1.5018375930612249, + "learning_rate": 5.079583034534021e-06, + "loss": 0.6891, + "step": 21986 + }, + { + "epoch": 0.6738690695108496, + "grad_norm": 1.5273056291768468, + "learning_rate": 5.078718898313972e-06, + "loss": 0.695, + "step": 21987 + }, + { + "epoch": 0.6738997180335908, + "grad_norm": 0.6589871938200924, + "learning_rate": 5.0778548105845615e-06, + "loss": 0.538, + "step": 21988 + }, + { + "epoch": 0.673930366556332, + "grad_norm": 0.6377475657963998, + "learning_rate": 5.076990771354307e-06, + "loss": 0.5202, + "step": 21989 + }, + { + "epoch": 0.6739610150790731, + "grad_norm": 1.7060024783539889, + "learning_rate": 5.0761267806317245e-06, + "loss": 0.7045, + "step": 21990 + }, + { + "epoch": 0.6739916636018144, + "grad_norm": 1.6178592046433422, + "learning_rate": 5.075262838425322e-06, + "loss": 0.6236, + "step": 21991 + }, + { + "epoch": 0.6740223121245555, + "grad_norm": 1.442852712299575, + "learning_rate": 5.074398944743615e-06, + "loss": 0.5674, + "step": 21992 + }, + { + "epoch": 0.6740529606472968, + "grad_norm": 1.4879301340365696, + "learning_rate": 5.073535099595118e-06, + "loss": 0.6205, + "step": 21993 + }, + { + "epoch": 0.674083609170038, + "grad_norm": 1.4594081509098078, + "learning_rate": 5.072671302988337e-06, + "loss": 0.5835, + "step": 21994 + }, + { + "epoch": 0.6741142576927792, + "grad_norm": 1.452005324599664, + "learning_rate": 5.07180755493179e-06, + "loss": 0.5936, + "step": 21995 + }, + { + "epoch": 0.6741449062155204, + "grad_norm": 1.7099673058118718, + "learning_rate": 5.070943855433981e-06, + "loss": 0.7004, + "step": 21996 + }, + { + "epoch": 0.6741755547382616, + "grad_norm": 1.340522897180536, + "learning_rate": 5.070080204503423e-06, + "loss": 0.6255, + "step": 21997 + }, + { + "epoch": 0.6742062032610028, + "grad_norm": 1.4850122111625843, + "learning_rate": 5.06921660214863e-06, + "loss": 0.6358, + "step": 21998 + }, + { + "epoch": 0.674236851783744, + "grad_norm": 1.6438091295680068, + "learning_rate": 5.068353048378103e-06, + "loss": 0.6913, + "step": 21999 + }, + { + "epoch": 0.6742675003064852, + "grad_norm": 1.7176122846281117, + "learning_rate": 5.067489543200355e-06, + "loss": 0.6909, + "step": 22000 + }, + { + "epoch": 0.6742981488292265, + "grad_norm": 0.6680066485181699, + "learning_rate": 5.066626086623899e-06, + "loss": 0.5293, + "step": 22001 + }, + { + "epoch": 0.6743287973519676, + "grad_norm": 1.6114418983799328, + "learning_rate": 5.065762678657234e-06, + "loss": 0.6861, + "step": 22002 + }, + { + "epoch": 0.6743594458747089, + "grad_norm": 1.5583956582342835, + "learning_rate": 5.064899319308877e-06, + "loss": 0.6361, + "step": 22003 + }, + { + "epoch": 0.67439009439745, + "grad_norm": 1.579046018669795, + "learning_rate": 5.064036008587325e-06, + "loss": 0.6205, + "step": 22004 + }, + { + "epoch": 0.6744207429201913, + "grad_norm": 1.7911764356216866, + "learning_rate": 5.063172746501088e-06, + "loss": 0.7378, + "step": 22005 + }, + { + "epoch": 0.6744513914429324, + "grad_norm": 1.576465879304519, + "learning_rate": 5.0623095330586794e-06, + "loss": 0.7119, + "step": 22006 + }, + { + "epoch": 0.6744820399656737, + "grad_norm": 1.517245600099827, + "learning_rate": 5.0614463682685925e-06, + "loss": 0.6434, + "step": 22007 + }, + { + "epoch": 0.6745126884884148, + "grad_norm": 1.8490772571801823, + "learning_rate": 5.0605832521393396e-06, + "loss": 0.6563, + "step": 22008 + }, + { + "epoch": 0.6745433370111561, + "grad_norm": 0.655588197596335, + "learning_rate": 5.059720184679427e-06, + "loss": 0.5342, + "step": 22009 + }, + { + "epoch": 0.6745739855338972, + "grad_norm": 1.538931168976699, + "learning_rate": 5.05885716589735e-06, + "loss": 0.6312, + "step": 22010 + }, + { + "epoch": 0.6746046340566385, + "grad_norm": 1.6816565824473062, + "learning_rate": 5.05799419580162e-06, + "loss": 0.6643, + "step": 22011 + }, + { + "epoch": 0.6746352825793797, + "grad_norm": 1.5936603562813871, + "learning_rate": 5.05713127440074e-06, + "loss": 0.6526, + "step": 22012 + }, + { + "epoch": 0.6746659311021209, + "grad_norm": 1.3322328542503703, + "learning_rate": 5.056268401703207e-06, + "loss": 0.6693, + "step": 22013 + }, + { + "epoch": 0.6746965796248621, + "grad_norm": 1.6397513145779186, + "learning_rate": 5.05540557771753e-06, + "loss": 0.7102, + "step": 22014 + }, + { + "epoch": 0.6747272281476033, + "grad_norm": 1.412880380117517, + "learning_rate": 5.054542802452199e-06, + "loss": 0.6815, + "step": 22015 + }, + { + "epoch": 0.6747578766703445, + "grad_norm": 1.718574774487479, + "learning_rate": 5.053680075915733e-06, + "loss": 0.6461, + "step": 22016 + }, + { + "epoch": 0.6747885251930857, + "grad_norm": 1.4146434939859376, + "learning_rate": 5.0528173981166194e-06, + "loss": 0.6495, + "step": 22017 + }, + { + "epoch": 0.6748191737158269, + "grad_norm": 1.5667748612755434, + "learning_rate": 5.0519547690633596e-06, + "loss": 0.7339, + "step": 22018 + }, + { + "epoch": 0.6748498222385682, + "grad_norm": 1.6144311664247193, + "learning_rate": 5.051092188764455e-06, + "loss": 0.6293, + "step": 22019 + }, + { + "epoch": 0.6748804707613093, + "grad_norm": 1.6319528178874856, + "learning_rate": 5.050229657228409e-06, + "loss": 0.6942, + "step": 22020 + }, + { + "epoch": 0.6749111192840505, + "grad_norm": 1.9202510532822565, + "learning_rate": 5.049367174463714e-06, + "loss": 0.6954, + "step": 22021 + }, + { + "epoch": 0.6749417678067917, + "grad_norm": 1.688953471633113, + "learning_rate": 5.04850474047887e-06, + "loss": 0.6859, + "step": 22022 + }, + { + "epoch": 0.6749724163295329, + "grad_norm": 1.6407247943665255, + "learning_rate": 5.047642355282376e-06, + "loss": 0.6317, + "step": 22023 + }, + { + "epoch": 0.6750030648522741, + "grad_norm": 1.6255797467946531, + "learning_rate": 5.0467800188827335e-06, + "loss": 0.6342, + "step": 22024 + }, + { + "epoch": 0.6750337133750153, + "grad_norm": 1.6220809039560893, + "learning_rate": 5.045917731288434e-06, + "loss": 0.7518, + "step": 22025 + }, + { + "epoch": 0.6750643618977565, + "grad_norm": 1.7491651137736377, + "learning_rate": 5.045055492507967e-06, + "loss": 0.647, + "step": 22026 + }, + { + "epoch": 0.6750950104204977, + "grad_norm": 1.9291492782908208, + "learning_rate": 5.0441933025498425e-06, + "loss": 0.6912, + "step": 22027 + }, + { + "epoch": 0.675125658943239, + "grad_norm": 0.6410216544421945, + "learning_rate": 5.043331161422551e-06, + "loss": 0.5218, + "step": 22028 + }, + { + "epoch": 0.6751563074659801, + "grad_norm": 1.5412321490305512, + "learning_rate": 5.042469069134582e-06, + "loss": 0.7376, + "step": 22029 + }, + { + "epoch": 0.6751869559887214, + "grad_norm": 1.7428238954526878, + "learning_rate": 5.041607025694433e-06, + "loss": 0.6982, + "step": 22030 + }, + { + "epoch": 0.6752176045114625, + "grad_norm": 2.0315519692085475, + "learning_rate": 5.0407450311106024e-06, + "loss": 0.6282, + "step": 22031 + }, + { + "epoch": 0.6752482530342038, + "grad_norm": 1.4603602160833649, + "learning_rate": 5.039883085391576e-06, + "loss": 0.7355, + "step": 22032 + }, + { + "epoch": 0.6752789015569449, + "grad_norm": 1.4477510293995195, + "learning_rate": 5.0390211885458515e-06, + "loss": 0.6384, + "step": 22033 + }, + { + "epoch": 0.6753095500796862, + "grad_norm": 1.6485612864133883, + "learning_rate": 5.03815934058192e-06, + "loss": 0.7386, + "step": 22034 + }, + { + "epoch": 0.6753401986024273, + "grad_norm": 1.5780645837840057, + "learning_rate": 5.037297541508277e-06, + "loss": 0.5876, + "step": 22035 + }, + { + "epoch": 0.6753708471251686, + "grad_norm": 0.6636825536101469, + "learning_rate": 5.036435791333411e-06, + "loss": 0.5262, + "step": 22036 + }, + { + "epoch": 0.6754014956479097, + "grad_norm": 1.8936632946496579, + "learning_rate": 5.035574090065808e-06, + "loss": 0.7032, + "step": 22037 + }, + { + "epoch": 0.675432144170651, + "grad_norm": 1.7410180105973068, + "learning_rate": 5.034712437713969e-06, + "loss": 0.7193, + "step": 22038 + }, + { + "epoch": 0.6754627926933922, + "grad_norm": 1.3001101762636291, + "learning_rate": 5.0338508342863805e-06, + "loss": 0.6321, + "step": 22039 + }, + { + "epoch": 0.6754934412161334, + "grad_norm": 1.6139341345064848, + "learning_rate": 5.032989279791525e-06, + "loss": 0.645, + "step": 22040 + }, + { + "epoch": 0.6755240897388746, + "grad_norm": 1.5558783852325635, + "learning_rate": 5.032127774237898e-06, + "loss": 0.639, + "step": 22041 + }, + { + "epoch": 0.6755547382616158, + "grad_norm": 1.661801337051237, + "learning_rate": 5.031266317633987e-06, + "loss": 0.8145, + "step": 22042 + }, + { + "epoch": 0.675585386784357, + "grad_norm": 1.644661762070942, + "learning_rate": 5.030404909988283e-06, + "loss": 0.6861, + "step": 22043 + }, + { + "epoch": 0.6756160353070982, + "grad_norm": 1.4756877881961281, + "learning_rate": 5.029543551309269e-06, + "loss": 0.6483, + "step": 22044 + }, + { + "epoch": 0.6756466838298394, + "grad_norm": 0.6491112013555121, + "learning_rate": 5.028682241605433e-06, + "loss": 0.5199, + "step": 22045 + }, + { + "epoch": 0.6756773323525807, + "grad_norm": 1.5431324423772739, + "learning_rate": 5.027820980885266e-06, + "loss": 0.6475, + "step": 22046 + }, + { + "epoch": 0.6757079808753218, + "grad_norm": 1.5704074445940281, + "learning_rate": 5.026959769157252e-06, + "loss": 0.5621, + "step": 22047 + }, + { + "epoch": 0.6757386293980631, + "grad_norm": 1.50924729291351, + "learning_rate": 5.026098606429872e-06, + "loss": 0.6937, + "step": 22048 + }, + { + "epoch": 0.6757692779208042, + "grad_norm": 1.7085076033202293, + "learning_rate": 5.025237492711614e-06, + "loss": 0.6962, + "step": 22049 + }, + { + "epoch": 0.6757999264435455, + "grad_norm": 1.4231284767974006, + "learning_rate": 5.024376428010967e-06, + "loss": 0.5846, + "step": 22050 + }, + { + "epoch": 0.6758305749662866, + "grad_norm": 1.4482311894214748, + "learning_rate": 5.0235154123364125e-06, + "loss": 0.6395, + "step": 22051 + }, + { + "epoch": 0.6758612234890278, + "grad_norm": 0.6740318393607515, + "learning_rate": 5.022654445696431e-06, + "loss": 0.5215, + "step": 22052 + }, + { + "epoch": 0.675891872011769, + "grad_norm": 1.6681936203421397, + "learning_rate": 5.021793528099509e-06, + "loss": 0.7496, + "step": 22053 + }, + { + "epoch": 0.6759225205345102, + "grad_norm": 1.7806952124977535, + "learning_rate": 5.020932659554133e-06, + "loss": 0.8361, + "step": 22054 + }, + { + "epoch": 0.6759531690572514, + "grad_norm": 1.6636393812683277, + "learning_rate": 5.020071840068781e-06, + "loss": 0.7403, + "step": 22055 + }, + { + "epoch": 0.6759838175799926, + "grad_norm": 1.5838954516549077, + "learning_rate": 5.019211069651928e-06, + "loss": 0.6609, + "step": 22056 + }, + { + "epoch": 0.6760144661027339, + "grad_norm": 1.6457798742804937, + "learning_rate": 5.018350348312071e-06, + "loss": 0.7156, + "step": 22057 + }, + { + "epoch": 0.676045114625475, + "grad_norm": 1.5679283639741988, + "learning_rate": 5.017489676057682e-06, + "loss": 0.5955, + "step": 22058 + }, + { + "epoch": 0.6760757631482163, + "grad_norm": 1.73910979473911, + "learning_rate": 5.016629052897239e-06, + "loss": 0.678, + "step": 22059 + }, + { + "epoch": 0.6761064116709574, + "grad_norm": 1.5030678148937406, + "learning_rate": 5.015768478839224e-06, + "loss": 0.6506, + "step": 22060 + }, + { + "epoch": 0.6761370601936987, + "grad_norm": 1.4899201134685562, + "learning_rate": 5.0149079538921175e-06, + "loss": 0.6547, + "step": 22061 + }, + { + "epoch": 0.6761677087164398, + "grad_norm": 1.6500038741814083, + "learning_rate": 5.014047478064402e-06, + "loss": 0.6457, + "step": 22062 + }, + { + "epoch": 0.6761983572391811, + "grad_norm": 1.730311216312237, + "learning_rate": 5.01318705136455e-06, + "loss": 0.7861, + "step": 22063 + }, + { + "epoch": 0.6762290057619222, + "grad_norm": 1.6571987837807844, + "learning_rate": 5.01232667380104e-06, + "loss": 0.6604, + "step": 22064 + }, + { + "epoch": 0.6762596542846635, + "grad_norm": 1.6905575694632946, + "learning_rate": 5.011466345382356e-06, + "loss": 0.6671, + "step": 22065 + }, + { + "epoch": 0.6762903028074047, + "grad_norm": 1.6371303895140712, + "learning_rate": 5.0106060661169716e-06, + "loss": 0.726, + "step": 22066 + }, + { + "epoch": 0.6763209513301459, + "grad_norm": 1.748362465745301, + "learning_rate": 5.009745836013353e-06, + "loss": 0.644, + "step": 22067 + }, + { + "epoch": 0.6763515998528871, + "grad_norm": 1.4927287252706463, + "learning_rate": 5.0088856550799935e-06, + "loss": 0.6487, + "step": 22068 + }, + { + "epoch": 0.6763822483756283, + "grad_norm": 0.6901558518008114, + "learning_rate": 5.008025523325357e-06, + "loss": 0.5297, + "step": 22069 + }, + { + "epoch": 0.6764128968983695, + "grad_norm": 1.570800509562612, + "learning_rate": 5.007165440757928e-06, + "loss": 0.6846, + "step": 22070 + }, + { + "epoch": 0.6764435454211107, + "grad_norm": 1.6684284302250452, + "learning_rate": 5.00630540738617e-06, + "loss": 0.7712, + "step": 22071 + }, + { + "epoch": 0.6764741939438519, + "grad_norm": 1.518314453597865, + "learning_rate": 5.005445423218561e-06, + "loss": 0.6977, + "step": 22072 + }, + { + "epoch": 0.6765048424665931, + "grad_norm": 0.6766104504986586, + "learning_rate": 5.0045854882635825e-06, + "loss": 0.5454, + "step": 22073 + }, + { + "epoch": 0.6765354909893343, + "grad_norm": 1.5953457863033438, + "learning_rate": 5.003725602529696e-06, + "loss": 0.6416, + "step": 22074 + }, + { + "epoch": 0.6765661395120756, + "grad_norm": 1.6030945664486016, + "learning_rate": 5.00286576602538e-06, + "loss": 0.7509, + "step": 22075 + }, + { + "epoch": 0.6765967880348167, + "grad_norm": 1.8099401532205344, + "learning_rate": 5.002005978759109e-06, + "loss": 0.7151, + "step": 22076 + }, + { + "epoch": 0.676627436557558, + "grad_norm": 0.6740004090577815, + "learning_rate": 5.00114624073935e-06, + "loss": 0.5582, + "step": 22077 + }, + { + "epoch": 0.6766580850802991, + "grad_norm": 1.5779141178690281, + "learning_rate": 5.0002865519745735e-06, + "loss": 0.6221, + "step": 22078 + }, + { + "epoch": 0.6766887336030404, + "grad_norm": 1.674443488405802, + "learning_rate": 4.999426912473259e-06, + "loss": 0.7276, + "step": 22079 + }, + { + "epoch": 0.6767193821257815, + "grad_norm": 1.4553132783765874, + "learning_rate": 4.998567322243866e-06, + "loss": 0.629, + "step": 22080 + }, + { + "epoch": 0.6767500306485228, + "grad_norm": 1.3596566108742092, + "learning_rate": 4.997707781294871e-06, + "loss": 0.6187, + "step": 22081 + }, + { + "epoch": 0.6767806791712639, + "grad_norm": 0.641695967506551, + "learning_rate": 4.9968482896347406e-06, + "loss": 0.5264, + "step": 22082 + }, + { + "epoch": 0.6768113276940051, + "grad_norm": 0.6670986508766943, + "learning_rate": 4.995988847271942e-06, + "loss": 0.5301, + "step": 22083 + }, + { + "epoch": 0.6768419762167464, + "grad_norm": 1.4903690812512704, + "learning_rate": 4.99512945421495e-06, + "loss": 0.6276, + "step": 22084 + }, + { + "epoch": 0.6768726247394875, + "grad_norm": 1.7429231526244209, + "learning_rate": 4.994270110472223e-06, + "loss": 0.7382, + "step": 22085 + }, + { + "epoch": 0.6769032732622288, + "grad_norm": 0.6927126665788867, + "learning_rate": 4.993410816052235e-06, + "loss": 0.5157, + "step": 22086 + }, + { + "epoch": 0.6769339217849699, + "grad_norm": 0.6343111398721241, + "learning_rate": 4.992551570963454e-06, + "loss": 0.5368, + "step": 22087 + }, + { + "epoch": 0.6769645703077112, + "grad_norm": 1.56891720138786, + "learning_rate": 4.991692375214341e-06, + "loss": 0.6708, + "step": 22088 + }, + { + "epoch": 0.6769952188304523, + "grad_norm": 1.5864841243494996, + "learning_rate": 4.990833228813363e-06, + "loss": 0.6867, + "step": 22089 + }, + { + "epoch": 0.6770258673531936, + "grad_norm": 1.5390522973584924, + "learning_rate": 4.989974131768991e-06, + "loss": 0.6887, + "step": 22090 + }, + { + "epoch": 0.6770565158759347, + "grad_norm": 0.6447529087443677, + "learning_rate": 4.989115084089683e-06, + "loss": 0.5082, + "step": 22091 + }, + { + "epoch": 0.677087164398676, + "grad_norm": 1.7708933236529718, + "learning_rate": 4.988256085783909e-06, + "loss": 0.6623, + "step": 22092 + }, + { + "epoch": 0.6771178129214172, + "grad_norm": 1.4580341170100715, + "learning_rate": 4.987397136860126e-06, + "loss": 0.6992, + "step": 22093 + }, + { + "epoch": 0.6771484614441584, + "grad_norm": 1.450618342260998, + "learning_rate": 4.986538237326802e-06, + "loss": 0.6832, + "step": 22094 + }, + { + "epoch": 0.6771791099668996, + "grad_norm": 1.5002630283539, + "learning_rate": 4.985679387192404e-06, + "loss": 0.5787, + "step": 22095 + }, + { + "epoch": 0.6772097584896408, + "grad_norm": 1.4559636962005102, + "learning_rate": 4.984820586465385e-06, + "loss": 0.714, + "step": 22096 + }, + { + "epoch": 0.677240407012382, + "grad_norm": 1.6241946199669262, + "learning_rate": 4.983961835154213e-06, + "loss": 0.7421, + "step": 22097 + }, + { + "epoch": 0.6772710555351232, + "grad_norm": 1.716383378178834, + "learning_rate": 4.9831031332673516e-06, + "loss": 0.7068, + "step": 22098 + }, + { + "epoch": 0.6773017040578644, + "grad_norm": 1.6676560624423535, + "learning_rate": 4.982244480813255e-06, + "loss": 0.583, + "step": 22099 + }, + { + "epoch": 0.6773323525806056, + "grad_norm": 1.633421932159125, + "learning_rate": 4.981385877800391e-06, + "loss": 0.715, + "step": 22100 + }, + { + "epoch": 0.6773630011033468, + "grad_norm": 1.5488232118345027, + "learning_rate": 4.980527324237212e-06, + "loss": 0.615, + "step": 22101 + }, + { + "epoch": 0.6773936496260881, + "grad_norm": 1.6369215864693833, + "learning_rate": 4.979668820132182e-06, + "loss": 0.6922, + "step": 22102 + }, + { + "epoch": 0.6774242981488292, + "grad_norm": 1.6977648850164633, + "learning_rate": 4.978810365493763e-06, + "loss": 0.7415, + "step": 22103 + }, + { + "epoch": 0.6774549466715705, + "grad_norm": 1.6846317322844513, + "learning_rate": 4.977951960330407e-06, + "loss": 0.7638, + "step": 22104 + }, + { + "epoch": 0.6774855951943116, + "grad_norm": 1.6726729289560436, + "learning_rate": 4.977093604650576e-06, + "loss": 0.702, + "step": 22105 + }, + { + "epoch": 0.6775162437170529, + "grad_norm": 1.586781454406802, + "learning_rate": 4.97623529846273e-06, + "loss": 0.6004, + "step": 22106 + }, + { + "epoch": 0.677546892239794, + "grad_norm": 1.5094839404434615, + "learning_rate": 4.975377041775318e-06, + "loss": 0.648, + "step": 22107 + }, + { + "epoch": 0.6775775407625353, + "grad_norm": 1.5903099770359832, + "learning_rate": 4.974518834596802e-06, + "loss": 0.7354, + "step": 22108 + }, + { + "epoch": 0.6776081892852764, + "grad_norm": 0.6772963362766024, + "learning_rate": 4.973660676935643e-06, + "loss": 0.5564, + "step": 22109 + }, + { + "epoch": 0.6776388378080177, + "grad_norm": 0.6746239469102702, + "learning_rate": 4.972802568800287e-06, + "loss": 0.5272, + "step": 22110 + }, + { + "epoch": 0.6776694863307589, + "grad_norm": 1.335754954776511, + "learning_rate": 4.9719445101991956e-06, + "loss": 0.5738, + "step": 22111 + }, + { + "epoch": 0.6777001348535001, + "grad_norm": 1.7508832974120558, + "learning_rate": 4.971086501140819e-06, + "loss": 0.584, + "step": 22112 + }, + { + "epoch": 0.6777307833762413, + "grad_norm": 1.6640307366446638, + "learning_rate": 4.970228541633615e-06, + "loss": 0.8459, + "step": 22113 + }, + { + "epoch": 0.6777614318989824, + "grad_norm": 1.4935741073364786, + "learning_rate": 4.969370631686038e-06, + "loss": 0.587, + "step": 22114 + }, + { + "epoch": 0.6777920804217237, + "grad_norm": 1.5622832982114234, + "learning_rate": 4.968512771306536e-06, + "loss": 0.7021, + "step": 22115 + }, + { + "epoch": 0.6778227289444648, + "grad_norm": 1.5770870790195652, + "learning_rate": 4.967654960503566e-06, + "loss": 0.5856, + "step": 22116 + }, + { + "epoch": 0.6778533774672061, + "grad_norm": 1.5932239766329495, + "learning_rate": 4.966797199285582e-06, + "loss": 0.662, + "step": 22117 + }, + { + "epoch": 0.6778840259899472, + "grad_norm": 1.8558853479586725, + "learning_rate": 4.96593948766103e-06, + "loss": 0.7207, + "step": 22118 + }, + { + "epoch": 0.6779146745126885, + "grad_norm": 1.5481826948776367, + "learning_rate": 4.9650818256383636e-06, + "loss": 0.6399, + "step": 22119 + }, + { + "epoch": 0.6779453230354296, + "grad_norm": 0.6926350487588521, + "learning_rate": 4.964224213226038e-06, + "loss": 0.5631, + "step": 22120 + }, + { + "epoch": 0.6779759715581709, + "grad_norm": 1.499165109084416, + "learning_rate": 4.9633666504324964e-06, + "loss": 0.6356, + "step": 22121 + }, + { + "epoch": 0.6780066200809121, + "grad_norm": 1.586695087144395, + "learning_rate": 4.962509137266195e-06, + "loss": 0.6109, + "step": 22122 + }, + { + "epoch": 0.6780372686036533, + "grad_norm": 1.5905687283167138, + "learning_rate": 4.9616516737355725e-06, + "loss": 0.6709, + "step": 22123 + }, + { + "epoch": 0.6780679171263945, + "grad_norm": 0.6806473938355767, + "learning_rate": 4.960794259849093e-06, + "loss": 0.5869, + "step": 22124 + }, + { + "epoch": 0.6780985656491357, + "grad_norm": 1.8148373767596542, + "learning_rate": 4.959936895615197e-06, + "loss": 0.7732, + "step": 22125 + }, + { + "epoch": 0.6781292141718769, + "grad_norm": 1.5879303294971168, + "learning_rate": 4.959079581042329e-06, + "loss": 0.6069, + "step": 22126 + }, + { + "epoch": 0.6781598626946181, + "grad_norm": 1.4589825297838568, + "learning_rate": 4.958222316138938e-06, + "loss": 0.7064, + "step": 22127 + }, + { + "epoch": 0.6781905112173593, + "grad_norm": 1.7258143583800785, + "learning_rate": 4.957365100913478e-06, + "loss": 0.6132, + "step": 22128 + }, + { + "epoch": 0.6782211597401006, + "grad_norm": 1.5636449370361984, + "learning_rate": 4.9565079353743864e-06, + "loss": 0.6718, + "step": 22129 + }, + { + "epoch": 0.6782518082628417, + "grad_norm": 1.6817035226336357, + "learning_rate": 4.955650819530112e-06, + "loss": 0.7125, + "step": 22130 + }, + { + "epoch": 0.678282456785583, + "grad_norm": 1.5106572725152798, + "learning_rate": 4.954793753389103e-06, + "loss": 0.5508, + "step": 22131 + }, + { + "epoch": 0.6783131053083241, + "grad_norm": 1.5367406708583227, + "learning_rate": 4.9539367369598005e-06, + "loss": 0.6982, + "step": 22132 + }, + { + "epoch": 0.6783437538310654, + "grad_norm": 1.733397460453495, + "learning_rate": 4.9530797702506525e-06, + "loss": 0.7126, + "step": 22133 + }, + { + "epoch": 0.6783744023538065, + "grad_norm": 1.9033354791595443, + "learning_rate": 4.952222853270095e-06, + "loss": 0.6651, + "step": 22134 + }, + { + "epoch": 0.6784050508765478, + "grad_norm": 0.665727643816541, + "learning_rate": 4.951365986026583e-06, + "loss": 0.5463, + "step": 22135 + }, + { + "epoch": 0.6784356993992889, + "grad_norm": 1.444824302057982, + "learning_rate": 4.950509168528554e-06, + "loss": 0.6286, + "step": 22136 + }, + { + "epoch": 0.6784663479220302, + "grad_norm": 1.8446209645137246, + "learning_rate": 4.949652400784447e-06, + "loss": 0.6248, + "step": 22137 + }, + { + "epoch": 0.6784969964447713, + "grad_norm": 1.5712206252448133, + "learning_rate": 4.948795682802707e-06, + "loss": 0.6611, + "step": 22138 + }, + { + "epoch": 0.6785276449675126, + "grad_norm": 1.5254792402170498, + "learning_rate": 4.9479390145917795e-06, + "loss": 0.7166, + "step": 22139 + }, + { + "epoch": 0.6785582934902538, + "grad_norm": 1.6890146821192213, + "learning_rate": 4.9470823961600966e-06, + "loss": 0.6302, + "step": 22140 + }, + { + "epoch": 0.678588942012995, + "grad_norm": 1.5845699049830464, + "learning_rate": 4.946225827516105e-06, + "loss": 0.7006, + "step": 22141 + }, + { + "epoch": 0.6786195905357362, + "grad_norm": 1.756886228825968, + "learning_rate": 4.945369308668243e-06, + "loss": 0.6327, + "step": 22142 + }, + { + "epoch": 0.6786502390584774, + "grad_norm": 1.5654156841924953, + "learning_rate": 4.944512839624954e-06, + "loss": 0.5674, + "step": 22143 + }, + { + "epoch": 0.6786808875812186, + "grad_norm": 1.6163978021721999, + "learning_rate": 4.943656420394674e-06, + "loss": 0.6141, + "step": 22144 + }, + { + "epoch": 0.6787115361039597, + "grad_norm": 1.6720095978821279, + "learning_rate": 4.9428000509858366e-06, + "loss": 0.6226, + "step": 22145 + }, + { + "epoch": 0.678742184626701, + "grad_norm": 1.8044305300930943, + "learning_rate": 4.941943731406884e-06, + "loss": 0.6665, + "step": 22146 + }, + { + "epoch": 0.6787728331494421, + "grad_norm": 1.5866258238579238, + "learning_rate": 4.9410874616662585e-06, + "loss": 0.6324, + "step": 22147 + }, + { + "epoch": 0.6788034816721834, + "grad_norm": 1.3542195044735026, + "learning_rate": 4.940231241772389e-06, + "loss": 0.6433, + "step": 22148 + }, + { + "epoch": 0.6788341301949246, + "grad_norm": 1.5970217351999711, + "learning_rate": 4.939375071733716e-06, + "loss": 0.6784, + "step": 22149 + }, + { + "epoch": 0.6788647787176658, + "grad_norm": 1.6934478313581505, + "learning_rate": 4.938518951558674e-06, + "loss": 0.6115, + "step": 22150 + }, + { + "epoch": 0.678895427240407, + "grad_norm": 0.6513294773930727, + "learning_rate": 4.937662881255704e-06, + "loss": 0.5289, + "step": 22151 + }, + { + "epoch": 0.6789260757631482, + "grad_norm": 1.7737736179601515, + "learning_rate": 4.936806860833236e-06, + "loss": 0.7977, + "step": 22152 + }, + { + "epoch": 0.6789567242858894, + "grad_norm": 1.6683455141453918, + "learning_rate": 4.9359508902997e-06, + "loss": 0.6769, + "step": 22153 + }, + { + "epoch": 0.6789873728086306, + "grad_norm": 1.6044881696547106, + "learning_rate": 4.935094969663542e-06, + "loss": 0.6508, + "step": 22154 + }, + { + "epoch": 0.6790180213313718, + "grad_norm": 1.5093522738200333, + "learning_rate": 4.934239098933189e-06, + "loss": 0.7368, + "step": 22155 + }, + { + "epoch": 0.679048669854113, + "grad_norm": 1.5593082937111333, + "learning_rate": 4.933383278117071e-06, + "loss": 0.5971, + "step": 22156 + }, + { + "epoch": 0.6790793183768542, + "grad_norm": 0.6582891839204901, + "learning_rate": 4.932527507223623e-06, + "loss": 0.5496, + "step": 22157 + }, + { + "epoch": 0.6791099668995955, + "grad_norm": 1.5708691438959568, + "learning_rate": 4.931671786261283e-06, + "loss": 0.6525, + "step": 22158 + }, + { + "epoch": 0.6791406154223366, + "grad_norm": 1.5190644038337835, + "learning_rate": 4.930816115238474e-06, + "loss": 0.6314, + "step": 22159 + }, + { + "epoch": 0.6791712639450779, + "grad_norm": 1.6472168312410473, + "learning_rate": 4.929960494163629e-06, + "loss": 0.5384, + "step": 22160 + }, + { + "epoch": 0.679201912467819, + "grad_norm": 1.6009129419797763, + "learning_rate": 4.929104923045182e-06, + "loss": 0.6218, + "step": 22161 + }, + { + "epoch": 0.6792325609905603, + "grad_norm": 1.6301723891147166, + "learning_rate": 4.928249401891565e-06, + "loss": 0.7509, + "step": 22162 + }, + { + "epoch": 0.6792632095133014, + "grad_norm": 1.888195289394873, + "learning_rate": 4.927393930711204e-06, + "loss": 0.6808, + "step": 22163 + }, + { + "epoch": 0.6792938580360427, + "grad_norm": 1.6457607701175296, + "learning_rate": 4.926538509512522e-06, + "loss": 0.7274, + "step": 22164 + }, + { + "epoch": 0.6793245065587838, + "grad_norm": 1.5158135851794776, + "learning_rate": 4.925683138303961e-06, + "loss": 0.6852, + "step": 22165 + }, + { + "epoch": 0.6793551550815251, + "grad_norm": 1.6245454734937472, + "learning_rate": 4.924827817093942e-06, + "loss": 0.7072, + "step": 22166 + }, + { + "epoch": 0.6793858036042663, + "grad_norm": 1.6845901536192678, + "learning_rate": 4.923972545890889e-06, + "loss": 0.7939, + "step": 22167 + }, + { + "epoch": 0.6794164521270075, + "grad_norm": 1.6315550706705422, + "learning_rate": 4.923117324703235e-06, + "loss": 0.7054, + "step": 22168 + }, + { + "epoch": 0.6794471006497487, + "grad_norm": 0.6807531249000913, + "learning_rate": 4.922262153539403e-06, + "loss": 0.5745, + "step": 22169 + }, + { + "epoch": 0.6794777491724899, + "grad_norm": 1.565663615973595, + "learning_rate": 4.921407032407827e-06, + "loss": 0.7059, + "step": 22170 + }, + { + "epoch": 0.6795083976952311, + "grad_norm": 1.453274857824083, + "learning_rate": 4.920551961316922e-06, + "loss": 0.5575, + "step": 22171 + }, + { + "epoch": 0.6795390462179723, + "grad_norm": 1.7265174409404007, + "learning_rate": 4.919696940275118e-06, + "loss": 0.6762, + "step": 22172 + }, + { + "epoch": 0.6795696947407135, + "grad_norm": 0.6720045558187698, + "learning_rate": 4.918841969290844e-06, + "loss": 0.5536, + "step": 22173 + }, + { + "epoch": 0.6796003432634548, + "grad_norm": 1.651496534922737, + "learning_rate": 4.91798704837252e-06, + "loss": 0.7016, + "step": 22174 + }, + { + "epoch": 0.6796309917861959, + "grad_norm": 1.6162812515777958, + "learning_rate": 4.917132177528562e-06, + "loss": 0.6308, + "step": 22175 + }, + { + "epoch": 0.679661640308937, + "grad_norm": 1.543514031467949, + "learning_rate": 4.91627735676741e-06, + "loss": 0.618, + "step": 22176 + }, + { + "epoch": 0.6796922888316783, + "grad_norm": 1.891954360438131, + "learning_rate": 4.915422586097472e-06, + "loss": 0.7107, + "step": 22177 + }, + { + "epoch": 0.6797229373544195, + "grad_norm": 1.5231071064393038, + "learning_rate": 4.914567865527181e-06, + "loss": 0.5661, + "step": 22178 + }, + { + "epoch": 0.6797535858771607, + "grad_norm": 1.490005412601828, + "learning_rate": 4.913713195064951e-06, + "loss": 0.6656, + "step": 22179 + }, + { + "epoch": 0.6797842343999019, + "grad_norm": 1.5374350445425908, + "learning_rate": 4.912858574719206e-06, + "loss": 0.6215, + "step": 22180 + }, + { + "epoch": 0.6798148829226431, + "grad_norm": 1.6153590234123496, + "learning_rate": 4.91200400449837e-06, + "loss": 0.7024, + "step": 22181 + }, + { + "epoch": 0.6798455314453843, + "grad_norm": 0.661020521934575, + "learning_rate": 4.911149484410857e-06, + "loss": 0.5439, + "step": 22182 + }, + { + "epoch": 0.6798761799681255, + "grad_norm": 1.6273207702818047, + "learning_rate": 4.910295014465091e-06, + "loss": 0.7131, + "step": 22183 + }, + { + "epoch": 0.6799068284908667, + "grad_norm": 1.6086305194226305, + "learning_rate": 4.909440594669494e-06, + "loss": 0.5705, + "step": 22184 + }, + { + "epoch": 0.679937477013608, + "grad_norm": 1.6894325217241961, + "learning_rate": 4.90858622503248e-06, + "loss": 0.6187, + "step": 22185 + }, + { + "epoch": 0.6799681255363491, + "grad_norm": 1.5420416697251373, + "learning_rate": 4.907731905562462e-06, + "loss": 0.6246, + "step": 22186 + }, + { + "epoch": 0.6799987740590904, + "grad_norm": 1.8408405967726078, + "learning_rate": 4.906877636267872e-06, + "loss": 0.7395, + "step": 22187 + }, + { + "epoch": 0.6800294225818315, + "grad_norm": 1.543398351915239, + "learning_rate": 4.906023417157115e-06, + "loss": 0.6684, + "step": 22188 + }, + { + "epoch": 0.6800600711045728, + "grad_norm": 1.5947210659514506, + "learning_rate": 4.905169248238618e-06, + "loss": 0.616, + "step": 22189 + }, + { + "epoch": 0.6800907196273139, + "grad_norm": 1.4281677630418714, + "learning_rate": 4.904315129520787e-06, + "loss": 0.5836, + "step": 22190 + }, + { + "epoch": 0.6801213681500552, + "grad_norm": 1.6857232315224222, + "learning_rate": 4.903461061012044e-06, + "loss": 0.742, + "step": 22191 + }, + { + "epoch": 0.6801520166727963, + "grad_norm": 1.890944346788099, + "learning_rate": 4.902607042720806e-06, + "loss": 0.6358, + "step": 22192 + }, + { + "epoch": 0.6801826651955376, + "grad_norm": 1.5974908393271015, + "learning_rate": 4.9017530746554824e-06, + "loss": 0.7619, + "step": 22193 + }, + { + "epoch": 0.6802133137182788, + "grad_norm": 1.5256603027464533, + "learning_rate": 4.900899156824488e-06, + "loss": 0.6476, + "step": 22194 + }, + { + "epoch": 0.68024396224102, + "grad_norm": 1.6638707042036947, + "learning_rate": 4.900045289236243e-06, + "loss": 0.7231, + "step": 22195 + }, + { + "epoch": 0.6802746107637612, + "grad_norm": 1.5555106158058853, + "learning_rate": 4.899191471899155e-06, + "loss": 0.5547, + "step": 22196 + }, + { + "epoch": 0.6803052592865024, + "grad_norm": 1.5726772300992407, + "learning_rate": 4.898337704821642e-06, + "loss": 0.6702, + "step": 22197 + }, + { + "epoch": 0.6803359078092436, + "grad_norm": 1.6203958168069408, + "learning_rate": 4.8974839880121075e-06, + "loss": 0.716, + "step": 22198 + }, + { + "epoch": 0.6803665563319848, + "grad_norm": 0.6928957624737287, + "learning_rate": 4.89663032147897e-06, + "loss": 0.5463, + "step": 22199 + }, + { + "epoch": 0.680397204854726, + "grad_norm": 1.6412564096101192, + "learning_rate": 4.895776705230642e-06, + "loss": 0.6538, + "step": 22200 + }, + { + "epoch": 0.6804278533774673, + "grad_norm": 1.4364940312241576, + "learning_rate": 4.89492313927553e-06, + "loss": 0.6989, + "step": 22201 + }, + { + "epoch": 0.6804585019002084, + "grad_norm": 1.7017950465438538, + "learning_rate": 4.894069623622046e-06, + "loss": 0.7311, + "step": 22202 + }, + { + "epoch": 0.6804891504229497, + "grad_norm": 1.4851262331602837, + "learning_rate": 4.893216158278604e-06, + "loss": 0.6226, + "step": 22203 + }, + { + "epoch": 0.6805197989456908, + "grad_norm": 1.5306220376560542, + "learning_rate": 4.892362743253606e-06, + "loss": 0.6108, + "step": 22204 + }, + { + "epoch": 0.6805504474684321, + "grad_norm": 1.5396240820563276, + "learning_rate": 4.891509378555464e-06, + "loss": 0.6819, + "step": 22205 + }, + { + "epoch": 0.6805810959911732, + "grad_norm": 1.516101559709982, + "learning_rate": 4.890656064192593e-06, + "loss": 0.6551, + "step": 22206 + }, + { + "epoch": 0.6806117445139144, + "grad_norm": 0.6480184147094541, + "learning_rate": 4.8898028001733895e-06, + "loss": 0.5638, + "step": 22207 + }, + { + "epoch": 0.6806423930366556, + "grad_norm": 1.6845247386683344, + "learning_rate": 4.888949586506271e-06, + "loss": 0.7056, + "step": 22208 + }, + { + "epoch": 0.6806730415593968, + "grad_norm": 1.5688038789782828, + "learning_rate": 4.8880964231996364e-06, + "loss": 0.6678, + "step": 22209 + }, + { + "epoch": 0.680703690082138, + "grad_norm": 1.8476706761992154, + "learning_rate": 4.887243310261894e-06, + "loss": 0.769, + "step": 22210 + }, + { + "epoch": 0.6807343386048792, + "grad_norm": 1.6299191078209017, + "learning_rate": 4.886390247701457e-06, + "loss": 0.6847, + "step": 22211 + }, + { + "epoch": 0.6807649871276205, + "grad_norm": 1.7903162040838099, + "learning_rate": 4.885537235526722e-06, + "loss": 0.5778, + "step": 22212 + }, + { + "epoch": 0.6807956356503616, + "grad_norm": 1.637085699574747, + "learning_rate": 4.8846842737460954e-06, + "loss": 0.7049, + "step": 22213 + }, + { + "epoch": 0.6808262841731029, + "grad_norm": 0.6834365015972343, + "learning_rate": 4.883831362367988e-06, + "loss": 0.5546, + "step": 22214 + }, + { + "epoch": 0.680856932695844, + "grad_norm": 1.5173965137842005, + "learning_rate": 4.882978501400796e-06, + "loss": 0.5764, + "step": 22215 + }, + { + "epoch": 0.6808875812185853, + "grad_norm": 0.6569790517269661, + "learning_rate": 4.882125690852925e-06, + "loss": 0.5169, + "step": 22216 + }, + { + "epoch": 0.6809182297413264, + "grad_norm": 1.7161311357342068, + "learning_rate": 4.8812729307327835e-06, + "loss": 0.7289, + "step": 22217 + }, + { + "epoch": 0.6809488782640677, + "grad_norm": 0.6677605934839912, + "learning_rate": 4.880420221048765e-06, + "loss": 0.577, + "step": 22218 + }, + { + "epoch": 0.6809795267868088, + "grad_norm": 1.6211725711637013, + "learning_rate": 4.879567561809281e-06, + "loss": 0.6727, + "step": 22219 + }, + { + "epoch": 0.6810101753095501, + "grad_norm": 1.4693044673519018, + "learning_rate": 4.878714953022723e-06, + "loss": 0.5987, + "step": 22220 + }, + { + "epoch": 0.6810408238322913, + "grad_norm": 1.8464897374014118, + "learning_rate": 4.877862394697498e-06, + "loss": 0.6897, + "step": 22221 + }, + { + "epoch": 0.6810714723550325, + "grad_norm": 0.6838119245112281, + "learning_rate": 4.877009886842008e-06, + "loss": 0.5435, + "step": 22222 + }, + { + "epoch": 0.6811021208777737, + "grad_norm": 1.6019709535042301, + "learning_rate": 4.876157429464647e-06, + "loss": 0.6601, + "step": 22223 + }, + { + "epoch": 0.6811327694005149, + "grad_norm": 0.6605334902502425, + "learning_rate": 4.875305022573818e-06, + "loss": 0.5443, + "step": 22224 + }, + { + "epoch": 0.6811634179232561, + "grad_norm": 1.7560612011483163, + "learning_rate": 4.874452666177923e-06, + "loss": 0.8045, + "step": 22225 + }, + { + "epoch": 0.6811940664459973, + "grad_norm": 1.5475787735207667, + "learning_rate": 4.873600360285354e-06, + "loss": 0.6384, + "step": 22226 + }, + { + "epoch": 0.6812247149687385, + "grad_norm": 1.6269321808555652, + "learning_rate": 4.872748104904513e-06, + "loss": 0.6961, + "step": 22227 + }, + { + "epoch": 0.6812553634914797, + "grad_norm": 1.4837221177370772, + "learning_rate": 4.871895900043799e-06, + "loss": 0.6317, + "step": 22228 + }, + { + "epoch": 0.6812860120142209, + "grad_norm": 1.6193296952408671, + "learning_rate": 4.8710437457116045e-06, + "loss": 0.6177, + "step": 22229 + }, + { + "epoch": 0.6813166605369622, + "grad_norm": 1.6237332091123031, + "learning_rate": 4.870191641916332e-06, + "loss": 0.6205, + "step": 22230 + }, + { + "epoch": 0.6813473090597033, + "grad_norm": 1.43433843741923, + "learning_rate": 4.869339588666365e-06, + "loss": 0.6302, + "step": 22231 + }, + { + "epoch": 0.6813779575824446, + "grad_norm": 1.597197684645679, + "learning_rate": 4.868487585970116e-06, + "loss": 0.7455, + "step": 22232 + }, + { + "epoch": 0.6814086061051857, + "grad_norm": 1.7163027368788337, + "learning_rate": 4.867635633835972e-06, + "loss": 0.6821, + "step": 22233 + }, + { + "epoch": 0.681439254627927, + "grad_norm": 0.6801273070408135, + "learning_rate": 4.866783732272323e-06, + "loss": 0.5482, + "step": 22234 + }, + { + "epoch": 0.6814699031506681, + "grad_norm": 0.6435643294883915, + "learning_rate": 4.865931881287568e-06, + "loss": 0.517, + "step": 22235 + }, + { + "epoch": 0.6815005516734094, + "grad_norm": 1.557366698788726, + "learning_rate": 4.865080080890104e-06, + "loss": 0.68, + "step": 22236 + }, + { + "epoch": 0.6815312001961505, + "grad_norm": 1.5649379484879224, + "learning_rate": 4.8642283310883145e-06, + "loss": 0.616, + "step": 22237 + }, + { + "epoch": 0.6815618487188917, + "grad_norm": 1.408040700301867, + "learning_rate": 4.863376631890597e-06, + "loss": 0.5563, + "step": 22238 + }, + { + "epoch": 0.681592497241633, + "grad_norm": 1.7939025826703479, + "learning_rate": 4.862524983305349e-06, + "loss": 0.7185, + "step": 22239 + }, + { + "epoch": 0.6816231457643741, + "grad_norm": 1.5880077249757165, + "learning_rate": 4.861673385340953e-06, + "loss": 0.6024, + "step": 22240 + }, + { + "epoch": 0.6816537942871154, + "grad_norm": 1.5385729782731252, + "learning_rate": 4.860821838005807e-06, + "loss": 0.6584, + "step": 22241 + }, + { + "epoch": 0.6816844428098565, + "grad_norm": 1.4468946080945944, + "learning_rate": 4.8599703413082945e-06, + "loss": 0.7435, + "step": 22242 + }, + { + "epoch": 0.6817150913325978, + "grad_norm": 0.6612376980200441, + "learning_rate": 4.859118895256809e-06, + "loss": 0.5399, + "step": 22243 + }, + { + "epoch": 0.6817457398553389, + "grad_norm": 0.6753935168858025, + "learning_rate": 4.858267499859746e-06, + "loss": 0.5424, + "step": 22244 + }, + { + "epoch": 0.6817763883780802, + "grad_norm": 1.515445912401204, + "learning_rate": 4.8574161551254825e-06, + "loss": 0.6069, + "step": 22245 + }, + { + "epoch": 0.6818070369008213, + "grad_norm": 1.4143596657008994, + "learning_rate": 4.856564861062415e-06, + "loss": 0.6127, + "step": 22246 + }, + { + "epoch": 0.6818376854235626, + "grad_norm": 1.5414507279736767, + "learning_rate": 4.855713617678935e-06, + "loss": 0.6374, + "step": 22247 + }, + { + "epoch": 0.6818683339463038, + "grad_norm": 1.839183452999274, + "learning_rate": 4.854862424983419e-06, + "loss": 0.6504, + "step": 22248 + }, + { + "epoch": 0.681898982469045, + "grad_norm": 1.4689916729946566, + "learning_rate": 4.854011282984264e-06, + "loss": 0.6175, + "step": 22249 + }, + { + "epoch": 0.6819296309917862, + "grad_norm": 1.657825311550659, + "learning_rate": 4.853160191689845e-06, + "loss": 0.6417, + "step": 22250 + }, + { + "epoch": 0.6819602795145274, + "grad_norm": 2.0056206331183692, + "learning_rate": 4.852309151108564e-06, + "loss": 0.6696, + "step": 22251 + }, + { + "epoch": 0.6819909280372686, + "grad_norm": 1.5540760288146205, + "learning_rate": 4.851458161248797e-06, + "loss": 0.6364, + "step": 22252 + }, + { + "epoch": 0.6820215765600098, + "grad_norm": 1.483436720347164, + "learning_rate": 4.850607222118927e-06, + "loss": 0.5621, + "step": 22253 + }, + { + "epoch": 0.682052225082751, + "grad_norm": 1.5001889546448224, + "learning_rate": 4.849756333727341e-06, + "loss": 0.7322, + "step": 22254 + }, + { + "epoch": 0.6820828736054922, + "grad_norm": 1.674580235347907, + "learning_rate": 4.848905496082428e-06, + "loss": 0.6682, + "step": 22255 + }, + { + "epoch": 0.6821135221282334, + "grad_norm": 1.5086962846206353, + "learning_rate": 4.848054709192562e-06, + "loss": 0.8414, + "step": 22256 + }, + { + "epoch": 0.6821441706509747, + "grad_norm": 0.6561588755789608, + "learning_rate": 4.847203973066133e-06, + "loss": 0.5374, + "step": 22257 + }, + { + "epoch": 0.6821748191737158, + "grad_norm": 1.602558666814667, + "learning_rate": 4.846353287711521e-06, + "loss": 0.6251, + "step": 22258 + }, + { + "epoch": 0.6822054676964571, + "grad_norm": 1.5066349970949142, + "learning_rate": 4.8455026531371116e-06, + "loss": 0.561, + "step": 22259 + }, + { + "epoch": 0.6822361162191982, + "grad_norm": 1.6118503484025577, + "learning_rate": 4.844652069351283e-06, + "loss": 0.7424, + "step": 22260 + }, + { + "epoch": 0.6822667647419395, + "grad_norm": 1.5898474716186013, + "learning_rate": 4.84380153636241e-06, + "loss": 0.6567, + "step": 22261 + }, + { + "epoch": 0.6822974132646806, + "grad_norm": 1.5902257625976455, + "learning_rate": 4.842951054178888e-06, + "loss": 0.62, + "step": 22262 + }, + { + "epoch": 0.6823280617874219, + "grad_norm": 1.5125699315001044, + "learning_rate": 4.842100622809088e-06, + "loss": 0.6871, + "step": 22263 + }, + { + "epoch": 0.682358710310163, + "grad_norm": 1.6904683151612492, + "learning_rate": 4.841250242261387e-06, + "loss": 0.6652, + "step": 22264 + }, + { + "epoch": 0.6823893588329043, + "grad_norm": 0.6820520075936598, + "learning_rate": 4.840399912544167e-06, + "loss": 0.5621, + "step": 22265 + }, + { + "epoch": 0.6824200073556455, + "grad_norm": 1.3854289708630738, + "learning_rate": 4.83954963366581e-06, + "loss": 0.568, + "step": 22266 + }, + { + "epoch": 0.6824506558783867, + "grad_norm": 1.5419257385400007, + "learning_rate": 4.838699405634687e-06, + "loss": 0.7192, + "step": 22267 + }, + { + "epoch": 0.6824813044011279, + "grad_norm": 1.5723895601351385, + "learning_rate": 4.837849228459181e-06, + "loss": 0.6233, + "step": 22268 + }, + { + "epoch": 0.682511952923869, + "grad_norm": 1.4168436632164723, + "learning_rate": 4.836999102147666e-06, + "loss": 0.6815, + "step": 22269 + }, + { + "epoch": 0.6825426014466103, + "grad_norm": 1.5212193103745106, + "learning_rate": 4.8361490267085235e-06, + "loss": 0.6227, + "step": 22270 + }, + { + "epoch": 0.6825732499693514, + "grad_norm": 1.612535923146062, + "learning_rate": 4.835299002150125e-06, + "loss": 0.6832, + "step": 22271 + }, + { + "epoch": 0.6826038984920927, + "grad_norm": 1.7333300767012239, + "learning_rate": 4.834449028480841e-06, + "loss": 0.5707, + "step": 22272 + }, + { + "epoch": 0.6826345470148338, + "grad_norm": 1.4885595169034187, + "learning_rate": 4.833599105709059e-06, + "loss": 0.642, + "step": 22273 + }, + { + "epoch": 0.6826651955375751, + "grad_norm": 1.567746615966763, + "learning_rate": 4.832749233843148e-06, + "loss": 0.6593, + "step": 22274 + }, + { + "epoch": 0.6826958440603162, + "grad_norm": 1.530909239261712, + "learning_rate": 4.831899412891476e-06, + "loss": 0.6837, + "step": 22275 + }, + { + "epoch": 0.6827264925830575, + "grad_norm": 1.7102176838590075, + "learning_rate": 4.831049642862422e-06, + "loss": 0.6704, + "step": 22276 + }, + { + "epoch": 0.6827571411057987, + "grad_norm": 1.6094489841021893, + "learning_rate": 4.830199923764358e-06, + "loss": 0.6478, + "step": 22277 + }, + { + "epoch": 0.6827877896285399, + "grad_norm": 1.6337908101495742, + "learning_rate": 4.829350255605661e-06, + "loss": 0.6397, + "step": 22278 + }, + { + "epoch": 0.6828184381512811, + "grad_norm": 1.6504413712776802, + "learning_rate": 4.828500638394695e-06, + "loss": 0.5525, + "step": 22279 + }, + { + "epoch": 0.6828490866740223, + "grad_norm": 1.7895962870910112, + "learning_rate": 4.827651072139837e-06, + "loss": 0.6367, + "step": 22280 + }, + { + "epoch": 0.6828797351967635, + "grad_norm": 1.4127556430100607, + "learning_rate": 4.826801556849457e-06, + "loss": 0.6144, + "step": 22281 + }, + { + "epoch": 0.6829103837195047, + "grad_norm": 1.6907285901161873, + "learning_rate": 4.825952092531927e-06, + "loss": 0.7445, + "step": 22282 + }, + { + "epoch": 0.6829410322422459, + "grad_norm": 1.4940368934085344, + "learning_rate": 4.825102679195607e-06, + "loss": 0.6777, + "step": 22283 + }, + { + "epoch": 0.6829716807649872, + "grad_norm": 1.6811543758600356, + "learning_rate": 4.824253316848881e-06, + "loss": 0.6579, + "step": 22284 + }, + { + "epoch": 0.6830023292877283, + "grad_norm": 2.050105209509793, + "learning_rate": 4.823404005500112e-06, + "loss": 0.6772, + "step": 22285 + }, + { + "epoch": 0.6830329778104696, + "grad_norm": 0.6682105536279106, + "learning_rate": 4.822554745157665e-06, + "loss": 0.5267, + "step": 22286 + }, + { + "epoch": 0.6830636263332107, + "grad_norm": 1.5933265836412387, + "learning_rate": 4.8217055358299095e-06, + "loss": 0.656, + "step": 22287 + }, + { + "epoch": 0.683094274855952, + "grad_norm": 0.6931300608985526, + "learning_rate": 4.820856377525215e-06, + "loss": 0.5478, + "step": 22288 + }, + { + "epoch": 0.6831249233786931, + "grad_norm": 1.6725939767154574, + "learning_rate": 4.820007270251951e-06, + "loss": 0.8017, + "step": 22289 + }, + { + "epoch": 0.6831555719014344, + "grad_norm": 1.6995515077442904, + "learning_rate": 4.819158214018477e-06, + "loss": 0.6401, + "step": 22290 + }, + { + "epoch": 0.6831862204241755, + "grad_norm": 0.7020929359160037, + "learning_rate": 4.818309208833163e-06, + "loss": 0.5573, + "step": 22291 + }, + { + "epoch": 0.6832168689469168, + "grad_norm": 1.6982530990444047, + "learning_rate": 4.8174602547043766e-06, + "loss": 0.7385, + "step": 22292 + }, + { + "epoch": 0.683247517469658, + "grad_norm": 1.5876105026719018, + "learning_rate": 4.816611351640482e-06, + "loss": 0.6644, + "step": 22293 + }, + { + "epoch": 0.6832781659923992, + "grad_norm": 1.6830625711788132, + "learning_rate": 4.815762499649838e-06, + "loss": 0.7195, + "step": 22294 + }, + { + "epoch": 0.6833088145151404, + "grad_norm": 1.5604642534040432, + "learning_rate": 4.814913698740812e-06, + "loss": 0.7187, + "step": 22295 + }, + { + "epoch": 0.6833394630378816, + "grad_norm": 0.6830165594005446, + "learning_rate": 4.814064948921768e-06, + "loss": 0.5362, + "step": 22296 + }, + { + "epoch": 0.6833701115606228, + "grad_norm": 1.7252771097394612, + "learning_rate": 4.813216250201072e-06, + "loss": 0.71, + "step": 22297 + }, + { + "epoch": 0.683400760083364, + "grad_norm": 1.8292954620227757, + "learning_rate": 4.812367602587081e-06, + "loss": 0.7099, + "step": 22298 + }, + { + "epoch": 0.6834314086061052, + "grad_norm": 1.8144404877761349, + "learning_rate": 4.81151900608816e-06, + "loss": 0.7796, + "step": 22299 + }, + { + "epoch": 0.6834620571288463, + "grad_norm": 1.7686602817771444, + "learning_rate": 4.810670460712672e-06, + "loss": 0.6464, + "step": 22300 + }, + { + "epoch": 0.6834927056515876, + "grad_norm": 1.584233130883642, + "learning_rate": 4.809821966468976e-06, + "loss": 0.6865, + "step": 22301 + }, + { + "epoch": 0.6835233541743287, + "grad_norm": 1.7424282601049839, + "learning_rate": 4.808973523365424e-06, + "loss": 0.6487, + "step": 22302 + }, + { + "epoch": 0.68355400269707, + "grad_norm": 1.5830766934450249, + "learning_rate": 4.808125131410393e-06, + "loss": 0.6235, + "step": 22303 + }, + { + "epoch": 0.6835846512198112, + "grad_norm": 1.689968120340029, + "learning_rate": 4.807276790612228e-06, + "loss": 0.6245, + "step": 22304 + }, + { + "epoch": 0.6836152997425524, + "grad_norm": 1.5687850994316561, + "learning_rate": 4.806428500979299e-06, + "loss": 0.7184, + "step": 22305 + }, + { + "epoch": 0.6836459482652936, + "grad_norm": 1.472304314803923, + "learning_rate": 4.8055802625199545e-06, + "loss": 0.6083, + "step": 22306 + }, + { + "epoch": 0.6836765967880348, + "grad_norm": 1.56444899253307, + "learning_rate": 4.804732075242557e-06, + "loss": 0.649, + "step": 22307 + }, + { + "epoch": 0.683707245310776, + "grad_norm": 1.600330411151809, + "learning_rate": 4.803883939155466e-06, + "loss": 0.6056, + "step": 22308 + }, + { + "epoch": 0.6837378938335172, + "grad_norm": 1.3911455797215142, + "learning_rate": 4.803035854267033e-06, + "loss": 0.5798, + "step": 22309 + }, + { + "epoch": 0.6837685423562584, + "grad_norm": 0.7072708062614822, + "learning_rate": 4.802187820585617e-06, + "loss": 0.5471, + "step": 22310 + }, + { + "epoch": 0.6837991908789997, + "grad_norm": 1.821146410106103, + "learning_rate": 4.801339838119579e-06, + "loss": 0.7475, + "step": 22311 + }, + { + "epoch": 0.6838298394017408, + "grad_norm": 1.4293188503692278, + "learning_rate": 4.80049190687727e-06, + "loss": 0.7552, + "step": 22312 + }, + { + "epoch": 0.6838604879244821, + "grad_norm": 0.6395806752012881, + "learning_rate": 4.799644026867036e-06, + "loss": 0.5349, + "step": 22313 + }, + { + "epoch": 0.6838911364472232, + "grad_norm": 1.3712851457040502, + "learning_rate": 4.7987961980972475e-06, + "loss": 0.6094, + "step": 22314 + }, + { + "epoch": 0.6839217849699645, + "grad_norm": 1.6621267029938303, + "learning_rate": 4.797948420576247e-06, + "loss": 0.7591, + "step": 22315 + }, + { + "epoch": 0.6839524334927056, + "grad_norm": 1.7215422361335735, + "learning_rate": 4.797100694312396e-06, + "loss": 0.5271, + "step": 22316 + }, + { + "epoch": 0.6839830820154469, + "grad_norm": 1.6799757098222692, + "learning_rate": 4.79625301931404e-06, + "loss": 0.591, + "step": 22317 + }, + { + "epoch": 0.684013730538188, + "grad_norm": 1.6490503318756435, + "learning_rate": 4.795405395589533e-06, + "loss": 0.6621, + "step": 22318 + }, + { + "epoch": 0.6840443790609293, + "grad_norm": 0.6807116471429596, + "learning_rate": 4.794557823147234e-06, + "loss": 0.555, + "step": 22319 + }, + { + "epoch": 0.6840750275836704, + "grad_norm": 1.3574410187555173, + "learning_rate": 4.793710301995483e-06, + "loss": 0.5517, + "step": 22320 + }, + { + "epoch": 0.6841056761064117, + "grad_norm": 1.639997742551937, + "learning_rate": 4.792862832142636e-06, + "loss": 0.689, + "step": 22321 + }, + { + "epoch": 0.6841363246291529, + "grad_norm": 1.7290675999122664, + "learning_rate": 4.79201541359705e-06, + "loss": 0.6137, + "step": 22322 + }, + { + "epoch": 0.6841669731518941, + "grad_norm": 1.8376134231374517, + "learning_rate": 4.791168046367063e-06, + "loss": 0.7364, + "step": 22323 + }, + { + "epoch": 0.6841976216746353, + "grad_norm": 1.5115362177820797, + "learning_rate": 4.79032073046103e-06, + "loss": 0.5502, + "step": 22324 + }, + { + "epoch": 0.6842282701973765, + "grad_norm": 1.5903714900693233, + "learning_rate": 4.7894734658873045e-06, + "loss": 0.599, + "step": 22325 + }, + { + "epoch": 0.6842589187201177, + "grad_norm": 0.6968223388081795, + "learning_rate": 4.788626252654226e-06, + "loss": 0.5184, + "step": 22326 + }, + { + "epoch": 0.6842895672428589, + "grad_norm": 1.3619439449781783, + "learning_rate": 4.787779090770151e-06, + "loss": 0.5869, + "step": 22327 + }, + { + "epoch": 0.6843202157656001, + "grad_norm": 0.6561007568052077, + "learning_rate": 4.786931980243416e-06, + "loss": 0.5202, + "step": 22328 + }, + { + "epoch": 0.6843508642883414, + "grad_norm": 0.6728696864852866, + "learning_rate": 4.786084921082377e-06, + "loss": 0.5118, + "step": 22329 + }, + { + "epoch": 0.6843815128110825, + "grad_norm": 1.5307729919522859, + "learning_rate": 4.785237913295378e-06, + "loss": 0.6524, + "step": 22330 + }, + { + "epoch": 0.6844121613338237, + "grad_norm": 1.5496656083826772, + "learning_rate": 4.784390956890763e-06, + "loss": 0.5953, + "step": 22331 + }, + { + "epoch": 0.6844428098565649, + "grad_norm": 1.6626217728659018, + "learning_rate": 4.783544051876877e-06, + "loss": 0.6524, + "step": 22332 + }, + { + "epoch": 0.6844734583793061, + "grad_norm": 1.6639612087259994, + "learning_rate": 4.7826971982620705e-06, + "loss": 0.7291, + "step": 22333 + }, + { + "epoch": 0.6845041069020473, + "grad_norm": 1.5660283922203115, + "learning_rate": 4.781850396054679e-06, + "loss": 0.6324, + "step": 22334 + }, + { + "epoch": 0.6845347554247885, + "grad_norm": 1.5415341607669863, + "learning_rate": 4.781003645263051e-06, + "loss": 0.6031, + "step": 22335 + }, + { + "epoch": 0.6845654039475297, + "grad_norm": 1.7000815947310264, + "learning_rate": 4.7801569458955345e-06, + "loss": 0.606, + "step": 22336 + }, + { + "epoch": 0.6845960524702709, + "grad_norm": 1.7450892466750063, + "learning_rate": 4.779310297960461e-06, + "loss": 0.7117, + "step": 22337 + }, + { + "epoch": 0.6846267009930121, + "grad_norm": 1.600851713047572, + "learning_rate": 4.778463701466184e-06, + "loss": 0.7288, + "step": 22338 + }, + { + "epoch": 0.6846573495157533, + "grad_norm": 1.902733914697765, + "learning_rate": 4.777617156421036e-06, + "loss": 0.6862, + "step": 22339 + }, + { + "epoch": 0.6846879980384946, + "grad_norm": 0.6786538181001422, + "learning_rate": 4.776770662833363e-06, + "loss": 0.5525, + "step": 22340 + }, + { + "epoch": 0.6847186465612357, + "grad_norm": 1.5257696289320064, + "learning_rate": 4.775924220711509e-06, + "loss": 0.6547, + "step": 22341 + }, + { + "epoch": 0.684749295083977, + "grad_norm": 0.6658407425261975, + "learning_rate": 4.775077830063806e-06, + "loss": 0.5472, + "step": 22342 + }, + { + "epoch": 0.6847799436067181, + "grad_norm": 1.8070641004995485, + "learning_rate": 4.774231490898597e-06, + "loss": 0.6689, + "step": 22343 + }, + { + "epoch": 0.6848105921294594, + "grad_norm": 0.6807044914009125, + "learning_rate": 4.773385203224228e-06, + "loss": 0.5284, + "step": 22344 + }, + { + "epoch": 0.6848412406522005, + "grad_norm": 1.3621390952379446, + "learning_rate": 4.772538967049026e-06, + "loss": 0.586, + "step": 22345 + }, + { + "epoch": 0.6848718891749418, + "grad_norm": 0.6716029295863161, + "learning_rate": 4.771692782381341e-06, + "loss": 0.568, + "step": 22346 + }, + { + "epoch": 0.684902537697683, + "grad_norm": 1.4985934910696155, + "learning_rate": 4.770846649229499e-06, + "loss": 0.6005, + "step": 22347 + }, + { + "epoch": 0.6849331862204242, + "grad_norm": 1.5397605851706246, + "learning_rate": 4.770000567601843e-06, + "loss": 0.6981, + "step": 22348 + }, + { + "epoch": 0.6849638347431654, + "grad_norm": 1.4043487600482227, + "learning_rate": 4.769154537506715e-06, + "loss": 0.5308, + "step": 22349 + }, + { + "epoch": 0.6849944832659066, + "grad_norm": 1.4450024652820062, + "learning_rate": 4.768308558952442e-06, + "loss": 0.6216, + "step": 22350 + }, + { + "epoch": 0.6850251317886478, + "grad_norm": 1.7272792365689493, + "learning_rate": 4.767462631947362e-06, + "loss": 0.6272, + "step": 22351 + }, + { + "epoch": 0.685055780311389, + "grad_norm": 1.4846420002335061, + "learning_rate": 4.766616756499814e-06, + "loss": 0.6642, + "step": 22352 + }, + { + "epoch": 0.6850864288341302, + "grad_norm": 0.6588884092980309, + "learning_rate": 4.765770932618129e-06, + "loss": 0.5353, + "step": 22353 + }, + { + "epoch": 0.6851170773568714, + "grad_norm": 1.776265284682184, + "learning_rate": 4.7649251603106405e-06, + "loss": 0.5988, + "step": 22354 + }, + { + "epoch": 0.6851477258796126, + "grad_norm": 1.5782879293305843, + "learning_rate": 4.764079439585688e-06, + "loss": 0.6774, + "step": 22355 + }, + { + "epoch": 0.6851783744023539, + "grad_norm": 1.4482363150443047, + "learning_rate": 4.763233770451597e-06, + "loss": 0.721, + "step": 22356 + }, + { + "epoch": 0.685209022925095, + "grad_norm": 1.9341645840341897, + "learning_rate": 4.762388152916708e-06, + "loss": 0.748, + "step": 22357 + }, + { + "epoch": 0.6852396714478363, + "grad_norm": 1.4088982653313422, + "learning_rate": 4.761542586989341e-06, + "loss": 0.6248, + "step": 22358 + }, + { + "epoch": 0.6852703199705774, + "grad_norm": 1.6845274311121872, + "learning_rate": 4.760697072677841e-06, + "loss": 0.7099, + "step": 22359 + }, + { + "epoch": 0.6853009684933187, + "grad_norm": 1.6278049237062073, + "learning_rate": 4.759851609990535e-06, + "loss": 0.5866, + "step": 22360 + }, + { + "epoch": 0.6853316170160598, + "grad_norm": 1.5477161037133642, + "learning_rate": 4.759006198935747e-06, + "loss": 0.7282, + "step": 22361 + }, + { + "epoch": 0.685362265538801, + "grad_norm": 1.7299003229287861, + "learning_rate": 4.7581608395218125e-06, + "loss": 0.6687, + "step": 22362 + }, + { + "epoch": 0.6853929140615422, + "grad_norm": 1.6937367408050235, + "learning_rate": 4.757315531757064e-06, + "loss": 0.5348, + "step": 22363 + }, + { + "epoch": 0.6854235625842834, + "grad_norm": 0.6572941483357391, + "learning_rate": 4.756470275649824e-06, + "loss": 0.543, + "step": 22364 + }, + { + "epoch": 0.6854542111070246, + "grad_norm": 1.6669075861238365, + "learning_rate": 4.7556250712084225e-06, + "loss": 0.6553, + "step": 22365 + }, + { + "epoch": 0.6854848596297658, + "grad_norm": 1.5369002541933972, + "learning_rate": 4.754779918441193e-06, + "loss": 0.6533, + "step": 22366 + }, + { + "epoch": 0.6855155081525071, + "grad_norm": 1.7614613423389696, + "learning_rate": 4.753934817356457e-06, + "loss": 0.7207, + "step": 22367 + }, + { + "epoch": 0.6855461566752482, + "grad_norm": 1.5961161140448652, + "learning_rate": 4.7530897679625455e-06, + "loss": 0.6744, + "step": 22368 + }, + { + "epoch": 0.6855768051979895, + "grad_norm": 1.8492133718375823, + "learning_rate": 4.752244770267776e-06, + "loss": 0.6394, + "step": 22369 + }, + { + "epoch": 0.6856074537207306, + "grad_norm": 0.6439182156508563, + "learning_rate": 4.751399824280489e-06, + "loss": 0.5154, + "step": 22370 + }, + { + "epoch": 0.6856381022434719, + "grad_norm": 0.6688990627485946, + "learning_rate": 4.750554930009003e-06, + "loss": 0.5356, + "step": 22371 + }, + { + "epoch": 0.685668750766213, + "grad_norm": 1.6731866336785897, + "learning_rate": 4.7497100874616375e-06, + "loss": 0.634, + "step": 22372 + }, + { + "epoch": 0.6856993992889543, + "grad_norm": 1.5517134117457867, + "learning_rate": 4.748865296646723e-06, + "loss": 0.6172, + "step": 22373 + }, + { + "epoch": 0.6857300478116954, + "grad_norm": 0.6913799266229922, + "learning_rate": 4.748020557572585e-06, + "loss": 0.561, + "step": 22374 + }, + { + "epoch": 0.6857606963344367, + "grad_norm": 1.5544567371896523, + "learning_rate": 4.747175870247541e-06, + "loss": 0.7005, + "step": 22375 + }, + { + "epoch": 0.6857913448571779, + "grad_norm": 0.6760722551900714, + "learning_rate": 4.746331234679917e-06, + "loss": 0.5197, + "step": 22376 + }, + { + "epoch": 0.6858219933799191, + "grad_norm": 1.611273472606536, + "learning_rate": 4.745486650878036e-06, + "loss": 0.5829, + "step": 22377 + }, + { + "epoch": 0.6858526419026603, + "grad_norm": 1.5603284435909512, + "learning_rate": 4.744642118850222e-06, + "loss": 0.7319, + "step": 22378 + }, + { + "epoch": 0.6858832904254015, + "grad_norm": 0.671967853887353, + "learning_rate": 4.743797638604795e-06, + "loss": 0.554, + "step": 22379 + }, + { + "epoch": 0.6859139389481427, + "grad_norm": 1.4437886662452273, + "learning_rate": 4.742953210150071e-06, + "loss": 0.709, + "step": 22380 + }, + { + "epoch": 0.6859445874708839, + "grad_norm": 1.6938877042686213, + "learning_rate": 4.742108833494373e-06, + "loss": 0.6825, + "step": 22381 + }, + { + "epoch": 0.6859752359936251, + "grad_norm": 1.8798007603534923, + "learning_rate": 4.741264508646027e-06, + "loss": 0.6046, + "step": 22382 + }, + { + "epoch": 0.6860058845163663, + "grad_norm": 1.4709618421410382, + "learning_rate": 4.7404202356133435e-06, + "loss": 0.6279, + "step": 22383 + }, + { + "epoch": 0.6860365330391075, + "grad_norm": 1.544291068986656, + "learning_rate": 4.7395760144046445e-06, + "loss": 0.624, + "step": 22384 + }, + { + "epoch": 0.6860671815618488, + "grad_norm": 1.5660858749760984, + "learning_rate": 4.73873184502825e-06, + "loss": 0.6638, + "step": 22385 + }, + { + "epoch": 0.6860978300845899, + "grad_norm": 1.4980547819191554, + "learning_rate": 4.7378877274924786e-06, + "loss": 0.7094, + "step": 22386 + }, + { + "epoch": 0.6861284786073312, + "grad_norm": 2.2918702357702343, + "learning_rate": 4.737043661805644e-06, + "loss": 0.7583, + "step": 22387 + }, + { + "epoch": 0.6861591271300723, + "grad_norm": 1.7874944401021065, + "learning_rate": 4.736199647976063e-06, + "loss": 0.6093, + "step": 22388 + }, + { + "epoch": 0.6861897756528136, + "grad_norm": 0.6628201012754652, + "learning_rate": 4.735355686012058e-06, + "loss": 0.5328, + "step": 22389 + }, + { + "epoch": 0.6862204241755547, + "grad_norm": 1.529604018468916, + "learning_rate": 4.734511775921941e-06, + "loss": 0.6667, + "step": 22390 + }, + { + "epoch": 0.686251072698296, + "grad_norm": 1.7655930022560127, + "learning_rate": 4.733667917714023e-06, + "loss": 0.704, + "step": 22391 + }, + { + "epoch": 0.6862817212210371, + "grad_norm": 1.6259915179321502, + "learning_rate": 4.732824111396622e-06, + "loss": 0.7074, + "step": 22392 + }, + { + "epoch": 0.6863123697437783, + "grad_norm": 0.6346699072551141, + "learning_rate": 4.731980356978056e-06, + "loss": 0.531, + "step": 22393 + }, + { + "epoch": 0.6863430182665196, + "grad_norm": 1.7636367991319761, + "learning_rate": 4.731136654466633e-06, + "loss": 0.7393, + "step": 22394 + }, + { + "epoch": 0.6863736667892607, + "grad_norm": 1.7547347616019755, + "learning_rate": 4.730293003870668e-06, + "loss": 0.6305, + "step": 22395 + }, + { + "epoch": 0.686404315312002, + "grad_norm": 1.5780755888850673, + "learning_rate": 4.729449405198474e-06, + "loss": 0.6156, + "step": 22396 + }, + { + "epoch": 0.6864349638347431, + "grad_norm": 1.8139593294428191, + "learning_rate": 4.728605858458368e-06, + "loss": 0.7203, + "step": 22397 + }, + { + "epoch": 0.6864656123574844, + "grad_norm": 1.6067951283886475, + "learning_rate": 4.727762363658657e-06, + "loss": 0.6435, + "step": 22398 + }, + { + "epoch": 0.6864962608802255, + "grad_norm": 0.6377961770794615, + "learning_rate": 4.726918920807644e-06, + "loss": 0.5214, + "step": 22399 + }, + { + "epoch": 0.6865269094029668, + "grad_norm": 1.5576729499151665, + "learning_rate": 4.726075529913656e-06, + "loss": 0.6571, + "step": 22400 + }, + { + "epoch": 0.6865575579257079, + "grad_norm": 1.4300971089569094, + "learning_rate": 4.725232190984996e-06, + "loss": 0.6463, + "step": 22401 + }, + { + "epoch": 0.6865882064484492, + "grad_norm": 1.5333033732477153, + "learning_rate": 4.7243889040299685e-06, + "loss": 0.5745, + "step": 22402 + }, + { + "epoch": 0.6866188549711904, + "grad_norm": 1.6305916891161083, + "learning_rate": 4.723545669056887e-06, + "loss": 0.675, + "step": 22403 + }, + { + "epoch": 0.6866495034939316, + "grad_norm": 1.7214500014353453, + "learning_rate": 4.72270248607406e-06, + "loss": 0.7409, + "step": 22404 + }, + { + "epoch": 0.6866801520166728, + "grad_norm": 1.674322894795349, + "learning_rate": 4.7218593550897996e-06, + "loss": 0.656, + "step": 22405 + }, + { + "epoch": 0.686710800539414, + "grad_norm": 0.6640075210184413, + "learning_rate": 4.721016276112406e-06, + "loss": 0.5421, + "step": 22406 + }, + { + "epoch": 0.6867414490621552, + "grad_norm": 1.7402952654310138, + "learning_rate": 4.720173249150188e-06, + "loss": 0.7548, + "step": 22407 + }, + { + "epoch": 0.6867720975848964, + "grad_norm": 1.5994705981739559, + "learning_rate": 4.719330274211459e-06, + "loss": 0.6994, + "step": 22408 + }, + { + "epoch": 0.6868027461076376, + "grad_norm": 1.7427002541875034, + "learning_rate": 4.71848735130452e-06, + "loss": 0.6867, + "step": 22409 + }, + { + "epoch": 0.6868333946303788, + "grad_norm": 1.5324399788154457, + "learning_rate": 4.717644480437669e-06, + "loss": 0.6814, + "step": 22410 + }, + { + "epoch": 0.68686404315312, + "grad_norm": 1.6820242753222738, + "learning_rate": 4.7168016616192254e-06, + "loss": 0.6114, + "step": 22411 + }, + { + "epoch": 0.6868946916758613, + "grad_norm": 1.654394579142449, + "learning_rate": 4.715958894857483e-06, + "loss": 0.7072, + "step": 22412 + }, + { + "epoch": 0.6869253401986024, + "grad_norm": 1.560799459066264, + "learning_rate": 4.715116180160754e-06, + "loss": 0.6688, + "step": 22413 + }, + { + "epoch": 0.6869559887213437, + "grad_norm": 1.4895593289355342, + "learning_rate": 4.7142735175373334e-06, + "loss": 0.6933, + "step": 22414 + }, + { + "epoch": 0.6869866372440848, + "grad_norm": 1.4619590385360945, + "learning_rate": 4.7134309069955286e-06, + "loss": 0.52, + "step": 22415 + }, + { + "epoch": 0.6870172857668261, + "grad_norm": 2.1552846315513117, + "learning_rate": 4.712588348543645e-06, + "loss": 0.6712, + "step": 22416 + }, + { + "epoch": 0.6870479342895672, + "grad_norm": 1.6573464253390238, + "learning_rate": 4.711745842189978e-06, + "loss": 0.6256, + "step": 22417 + }, + { + "epoch": 0.6870785828123085, + "grad_norm": 1.7249440494737678, + "learning_rate": 4.710903387942831e-06, + "loss": 0.7161, + "step": 22418 + }, + { + "epoch": 0.6871092313350496, + "grad_norm": 1.8252648754628307, + "learning_rate": 4.710060985810512e-06, + "loss": 0.6819, + "step": 22419 + }, + { + "epoch": 0.6871398798577909, + "grad_norm": 1.8211093167878563, + "learning_rate": 4.709218635801314e-06, + "loss": 0.6323, + "step": 22420 + }, + { + "epoch": 0.687170528380532, + "grad_norm": 1.5993625012336874, + "learning_rate": 4.708376337923532e-06, + "loss": 0.6604, + "step": 22421 + }, + { + "epoch": 0.6872011769032733, + "grad_norm": 1.6377512530774703, + "learning_rate": 4.707534092185478e-06, + "loss": 0.6755, + "step": 22422 + }, + { + "epoch": 0.6872318254260145, + "grad_norm": 1.453473679488336, + "learning_rate": 4.7066918985954415e-06, + "loss": 0.6654, + "step": 22423 + }, + { + "epoch": 0.6872624739487556, + "grad_norm": 1.4912092259632663, + "learning_rate": 4.705849757161728e-06, + "loss": 0.6667, + "step": 22424 + }, + { + "epoch": 0.6872931224714969, + "grad_norm": 1.605266497485239, + "learning_rate": 4.7050076678926285e-06, + "loss": 0.6625, + "step": 22425 + }, + { + "epoch": 0.687323770994238, + "grad_norm": 0.6644147298515063, + "learning_rate": 4.704165630796442e-06, + "loss": 0.5543, + "step": 22426 + }, + { + "epoch": 0.6873544195169793, + "grad_norm": 1.8055343295252713, + "learning_rate": 4.703323645881471e-06, + "loss": 0.6791, + "step": 22427 + }, + { + "epoch": 0.6873850680397204, + "grad_norm": 1.7649816183784441, + "learning_rate": 4.702481713156003e-06, + "loss": 0.7155, + "step": 22428 + }, + { + "epoch": 0.6874157165624617, + "grad_norm": 1.5295373579066593, + "learning_rate": 4.701639832628339e-06, + "loss": 0.6766, + "step": 22429 + }, + { + "epoch": 0.6874463650852028, + "grad_norm": 1.983640097557952, + "learning_rate": 4.700798004306776e-06, + "loss": 0.696, + "step": 22430 + }, + { + "epoch": 0.6874770136079441, + "grad_norm": 0.6889681982881596, + "learning_rate": 4.699956228199603e-06, + "loss": 0.5517, + "step": 22431 + }, + { + "epoch": 0.6875076621306853, + "grad_norm": 1.753035471346137, + "learning_rate": 4.6991145043151205e-06, + "loss": 0.5386, + "step": 22432 + }, + { + "epoch": 0.6875383106534265, + "grad_norm": 1.5262939864415506, + "learning_rate": 4.698272832661617e-06, + "loss": 0.6402, + "step": 22433 + }, + { + "epoch": 0.6875689591761677, + "grad_norm": 1.5719015261956424, + "learning_rate": 4.697431213247387e-06, + "loss": 0.6819, + "step": 22434 + }, + { + "epoch": 0.6875996076989089, + "grad_norm": 1.8365694054780783, + "learning_rate": 4.696589646080727e-06, + "loss": 0.6783, + "step": 22435 + }, + { + "epoch": 0.6876302562216501, + "grad_norm": 1.936421595754591, + "learning_rate": 4.6957481311699224e-06, + "loss": 0.7873, + "step": 22436 + }, + { + "epoch": 0.6876609047443913, + "grad_norm": 1.5824052766097312, + "learning_rate": 4.694906668523269e-06, + "loss": 0.5918, + "step": 22437 + }, + { + "epoch": 0.6876915532671325, + "grad_norm": 0.6582522977180068, + "learning_rate": 4.6940652581490605e-06, + "loss": 0.5231, + "step": 22438 + }, + { + "epoch": 0.6877222017898738, + "grad_norm": 1.4652683681848298, + "learning_rate": 4.693223900055582e-06, + "loss": 0.7567, + "step": 22439 + }, + { + "epoch": 0.6877528503126149, + "grad_norm": 1.690104491952852, + "learning_rate": 4.692382594251127e-06, + "loss": 0.6502, + "step": 22440 + }, + { + "epoch": 0.6877834988353562, + "grad_norm": 1.7512490170197856, + "learning_rate": 4.691541340743986e-06, + "loss": 0.6665, + "step": 22441 + }, + { + "epoch": 0.6878141473580973, + "grad_norm": 1.8372252914672642, + "learning_rate": 4.690700139542444e-06, + "loss": 0.802, + "step": 22442 + }, + { + "epoch": 0.6878447958808386, + "grad_norm": 0.6903163563304061, + "learning_rate": 4.689858990654796e-06, + "loss": 0.5061, + "step": 22443 + }, + { + "epoch": 0.6878754444035797, + "grad_norm": 0.6653816906774717, + "learning_rate": 4.689017894089321e-06, + "loss": 0.5201, + "step": 22444 + }, + { + "epoch": 0.687906092926321, + "grad_norm": 1.7746755182513905, + "learning_rate": 4.688176849854312e-06, + "loss": 0.6766, + "step": 22445 + }, + { + "epoch": 0.6879367414490621, + "grad_norm": 1.5501240325131356, + "learning_rate": 4.6873358579580594e-06, + "loss": 0.5958, + "step": 22446 + }, + { + "epoch": 0.6879673899718034, + "grad_norm": 1.5376548722964578, + "learning_rate": 4.686494918408843e-06, + "loss": 0.6255, + "step": 22447 + }, + { + "epoch": 0.6879980384945446, + "grad_norm": 1.4298111953328956, + "learning_rate": 4.68565403121495e-06, + "loss": 0.6386, + "step": 22448 + }, + { + "epoch": 0.6880286870172858, + "grad_norm": 1.8653328294292824, + "learning_rate": 4.684813196384672e-06, + "loss": 0.6555, + "step": 22449 + }, + { + "epoch": 0.688059335540027, + "grad_norm": 1.8072989793314773, + "learning_rate": 4.683972413926287e-06, + "loss": 0.6417, + "step": 22450 + }, + { + "epoch": 0.6880899840627682, + "grad_norm": 0.6463066104073697, + "learning_rate": 4.68313168384808e-06, + "loss": 0.5219, + "step": 22451 + }, + { + "epoch": 0.6881206325855094, + "grad_norm": 1.6112523281055824, + "learning_rate": 4.682291006158342e-06, + "loss": 0.6648, + "step": 22452 + }, + { + "epoch": 0.6881512811082506, + "grad_norm": 1.5648561600469095, + "learning_rate": 4.681450380865347e-06, + "loss": 0.5881, + "step": 22453 + }, + { + "epoch": 0.6881819296309918, + "grad_norm": 1.5986265103125041, + "learning_rate": 4.6806098079773865e-06, + "loss": 0.6735, + "step": 22454 + }, + { + "epoch": 0.688212578153733, + "grad_norm": 1.580197641855564, + "learning_rate": 4.679769287502734e-06, + "loss": 0.5514, + "step": 22455 + }, + { + "epoch": 0.6882432266764742, + "grad_norm": 1.4447393057867284, + "learning_rate": 4.678928819449676e-06, + "loss": 0.6188, + "step": 22456 + }, + { + "epoch": 0.6882738751992153, + "grad_norm": 1.7442194426485775, + "learning_rate": 4.678088403826498e-06, + "loss": 0.6894, + "step": 22457 + }, + { + "epoch": 0.6883045237219566, + "grad_norm": 1.4945068959200707, + "learning_rate": 4.677248040641473e-06, + "loss": 0.6361, + "step": 22458 + }, + { + "epoch": 0.6883351722446978, + "grad_norm": 1.6810649786237408, + "learning_rate": 4.676407729902886e-06, + "loss": 0.6462, + "step": 22459 + }, + { + "epoch": 0.688365820767439, + "grad_norm": 1.6071209330370977, + "learning_rate": 4.675567471619018e-06, + "loss": 0.6575, + "step": 22460 + }, + { + "epoch": 0.6883964692901802, + "grad_norm": 0.6643468237352981, + "learning_rate": 4.674727265798143e-06, + "loss": 0.5275, + "step": 22461 + }, + { + "epoch": 0.6884271178129214, + "grad_norm": 1.5705302806394799, + "learning_rate": 4.673887112448542e-06, + "loss": 0.5933, + "step": 22462 + }, + { + "epoch": 0.6884577663356626, + "grad_norm": 1.7628899134478087, + "learning_rate": 4.673047011578498e-06, + "loss": 0.5854, + "step": 22463 + }, + { + "epoch": 0.6884884148584038, + "grad_norm": 1.7687086892976829, + "learning_rate": 4.672206963196281e-06, + "loss": 0.7719, + "step": 22464 + }, + { + "epoch": 0.688519063381145, + "grad_norm": 1.7136236146999098, + "learning_rate": 4.671366967310176e-06, + "loss": 0.7135, + "step": 22465 + }, + { + "epoch": 0.6885497119038863, + "grad_norm": 1.7018556766623965, + "learning_rate": 4.6705270239284505e-06, + "loss": 0.5845, + "step": 22466 + }, + { + "epoch": 0.6885803604266274, + "grad_norm": 1.6110051242183108, + "learning_rate": 4.669687133059387e-06, + "loss": 0.7991, + "step": 22467 + }, + { + "epoch": 0.6886110089493687, + "grad_norm": 1.4733547508183318, + "learning_rate": 4.668847294711264e-06, + "loss": 0.7076, + "step": 22468 + }, + { + "epoch": 0.6886416574721098, + "grad_norm": 1.5261664770355456, + "learning_rate": 4.668007508892349e-06, + "loss": 0.6125, + "step": 22469 + }, + { + "epoch": 0.6886723059948511, + "grad_norm": 1.6163444666955016, + "learning_rate": 4.6671677756109205e-06, + "loss": 0.6058, + "step": 22470 + }, + { + "epoch": 0.6887029545175922, + "grad_norm": 0.6894236960006458, + "learning_rate": 4.666328094875255e-06, + "loss": 0.5491, + "step": 22471 + }, + { + "epoch": 0.6887336030403335, + "grad_norm": 1.6669835858385953, + "learning_rate": 4.665488466693621e-06, + "loss": 0.6254, + "step": 22472 + }, + { + "epoch": 0.6887642515630746, + "grad_norm": 0.712834509202756, + "learning_rate": 4.664648891074293e-06, + "loss": 0.5406, + "step": 22473 + }, + { + "epoch": 0.6887949000858159, + "grad_norm": 1.6883041297011043, + "learning_rate": 4.6638093680255484e-06, + "loss": 0.6515, + "step": 22474 + }, + { + "epoch": 0.688825548608557, + "grad_norm": 0.6385715390867209, + "learning_rate": 4.6629698975556515e-06, + "loss": 0.5008, + "step": 22475 + }, + { + "epoch": 0.6888561971312983, + "grad_norm": 1.7742858571068967, + "learning_rate": 4.662130479672883e-06, + "loss": 0.7058, + "step": 22476 + }, + { + "epoch": 0.6888868456540395, + "grad_norm": 1.6034842049736386, + "learning_rate": 4.661291114385504e-06, + "loss": 0.718, + "step": 22477 + }, + { + "epoch": 0.6889174941767807, + "grad_norm": 0.6631093653527189, + "learning_rate": 4.6604518017017885e-06, + "loss": 0.5392, + "step": 22478 + }, + { + "epoch": 0.6889481426995219, + "grad_norm": 1.5775172736108758, + "learning_rate": 4.659612541630012e-06, + "loss": 0.682, + "step": 22479 + }, + { + "epoch": 0.6889787912222631, + "grad_norm": 1.4948792701087394, + "learning_rate": 4.658773334178437e-06, + "loss": 0.5367, + "step": 22480 + }, + { + "epoch": 0.6890094397450043, + "grad_norm": 1.5207048954666176, + "learning_rate": 4.657934179355333e-06, + "loss": 0.7241, + "step": 22481 + }, + { + "epoch": 0.6890400882677455, + "grad_norm": 1.4979767982085634, + "learning_rate": 4.657095077168975e-06, + "loss": 0.6632, + "step": 22482 + }, + { + "epoch": 0.6890707367904867, + "grad_norm": 1.6800122374369566, + "learning_rate": 4.656256027627622e-06, + "loss": 0.6821, + "step": 22483 + }, + { + "epoch": 0.689101385313228, + "grad_norm": 0.6770966306904882, + "learning_rate": 4.655417030739551e-06, + "loss": 0.5224, + "step": 22484 + }, + { + "epoch": 0.6891320338359691, + "grad_norm": 1.555467680194033, + "learning_rate": 4.6545780865130155e-06, + "loss": 0.6435, + "step": 22485 + }, + { + "epoch": 0.6891626823587104, + "grad_norm": 1.6345457987008005, + "learning_rate": 4.653739194956296e-06, + "loss": 0.8129, + "step": 22486 + }, + { + "epoch": 0.6891933308814515, + "grad_norm": 0.6778891441408549, + "learning_rate": 4.652900356077653e-06, + "loss": 0.5268, + "step": 22487 + }, + { + "epoch": 0.6892239794041927, + "grad_norm": 0.6683947971083041, + "learning_rate": 4.6520615698853465e-06, + "loss": 0.5351, + "step": 22488 + }, + { + "epoch": 0.6892546279269339, + "grad_norm": 1.5362895113461996, + "learning_rate": 4.651222836387646e-06, + "loss": 0.7153, + "step": 22489 + }, + { + "epoch": 0.6892852764496751, + "grad_norm": 1.7084871729629039, + "learning_rate": 4.6503841555928195e-06, + "loss": 0.6546, + "step": 22490 + }, + { + "epoch": 0.6893159249724163, + "grad_norm": 1.6995026839157188, + "learning_rate": 4.6495455275091225e-06, + "loss": 0.6695, + "step": 22491 + }, + { + "epoch": 0.6893465734951575, + "grad_norm": 0.6787698705261084, + "learning_rate": 4.648706952144824e-06, + "loss": 0.5436, + "step": 22492 + }, + { + "epoch": 0.6893772220178987, + "grad_norm": 1.5859046111292188, + "learning_rate": 4.6478684295081865e-06, + "loss": 0.7172, + "step": 22493 + }, + { + "epoch": 0.6894078705406399, + "grad_norm": 0.6695405933613207, + "learning_rate": 4.647029959607469e-06, + "loss": 0.541, + "step": 22494 + }, + { + "epoch": 0.6894385190633812, + "grad_norm": 0.6767900748591805, + "learning_rate": 4.646191542450937e-06, + "loss": 0.5401, + "step": 22495 + }, + { + "epoch": 0.6894691675861223, + "grad_norm": 1.5490835959275076, + "learning_rate": 4.645353178046843e-06, + "loss": 0.659, + "step": 22496 + }, + { + "epoch": 0.6894998161088636, + "grad_norm": 1.510195653356313, + "learning_rate": 4.644514866403461e-06, + "loss": 0.736, + "step": 22497 + }, + { + "epoch": 0.6895304646316047, + "grad_norm": 1.6317344825952926, + "learning_rate": 4.643676607529045e-06, + "loss": 0.7781, + "step": 22498 + }, + { + "epoch": 0.689561113154346, + "grad_norm": 1.6491550863154114, + "learning_rate": 4.642838401431849e-06, + "loss": 0.7086, + "step": 22499 + }, + { + "epoch": 0.6895917616770871, + "grad_norm": 1.3290671934742873, + "learning_rate": 4.642000248120139e-06, + "loss": 0.6079, + "step": 22500 + }, + { + "epoch": 0.6896224101998284, + "grad_norm": 1.5006433597669433, + "learning_rate": 4.641162147602173e-06, + "loss": 0.6268, + "step": 22501 + }, + { + "epoch": 0.6896530587225695, + "grad_norm": 1.624914197908833, + "learning_rate": 4.640324099886205e-06, + "loss": 0.6778, + "step": 22502 + }, + { + "epoch": 0.6896837072453108, + "grad_norm": 1.5291782263769924, + "learning_rate": 4.6394861049804955e-06, + "loss": 0.6204, + "step": 22503 + }, + { + "epoch": 0.689714355768052, + "grad_norm": 0.6475984884259047, + "learning_rate": 4.638648162893299e-06, + "loss": 0.5169, + "step": 22504 + }, + { + "epoch": 0.6897450042907932, + "grad_norm": 1.677135363310006, + "learning_rate": 4.637810273632879e-06, + "loss": 0.6293, + "step": 22505 + }, + { + "epoch": 0.6897756528135344, + "grad_norm": 1.767474655783596, + "learning_rate": 4.636972437207486e-06, + "loss": 0.7242, + "step": 22506 + }, + { + "epoch": 0.6898063013362756, + "grad_norm": 1.5345932596319896, + "learning_rate": 4.6361346536253684e-06, + "loss": 0.742, + "step": 22507 + }, + { + "epoch": 0.6898369498590168, + "grad_norm": 1.7918042777101066, + "learning_rate": 4.635296922894796e-06, + "loss": 0.6202, + "step": 22508 + }, + { + "epoch": 0.689867598381758, + "grad_norm": 1.4100658461278777, + "learning_rate": 4.634459245024016e-06, + "loss": 0.6093, + "step": 22509 + }, + { + "epoch": 0.6898982469044992, + "grad_norm": 1.4530680272889658, + "learning_rate": 4.633621620021277e-06, + "loss": 0.6121, + "step": 22510 + }, + { + "epoch": 0.6899288954272405, + "grad_norm": 1.4897203711727265, + "learning_rate": 4.632784047894838e-06, + "loss": 0.6569, + "step": 22511 + }, + { + "epoch": 0.6899595439499816, + "grad_norm": 1.5057429989515363, + "learning_rate": 4.6319465286529505e-06, + "loss": 0.5843, + "step": 22512 + }, + { + "epoch": 0.6899901924727229, + "grad_norm": 1.7939313998842088, + "learning_rate": 4.631109062303873e-06, + "loss": 0.7679, + "step": 22513 + }, + { + "epoch": 0.690020840995464, + "grad_norm": 1.921179587361474, + "learning_rate": 4.6302716488558455e-06, + "loss": 0.7248, + "step": 22514 + }, + { + "epoch": 0.6900514895182053, + "grad_norm": 1.821420998673597, + "learning_rate": 4.6294342883171266e-06, + "loss": 0.7072, + "step": 22515 + }, + { + "epoch": 0.6900821380409464, + "grad_norm": 1.594746962869948, + "learning_rate": 4.628596980695969e-06, + "loss": 0.5839, + "step": 22516 + }, + { + "epoch": 0.6901127865636877, + "grad_norm": 1.4473750363779014, + "learning_rate": 4.62775972600062e-06, + "loss": 0.6077, + "step": 22517 + }, + { + "epoch": 0.6901434350864288, + "grad_norm": 1.658307789137873, + "learning_rate": 4.626922524239321e-06, + "loss": 0.6454, + "step": 22518 + }, + { + "epoch": 0.69017408360917, + "grad_norm": 1.6094082830837892, + "learning_rate": 4.626085375420337e-06, + "loss": 0.6686, + "step": 22519 + }, + { + "epoch": 0.6902047321319112, + "grad_norm": 1.5391035314616361, + "learning_rate": 4.625248279551909e-06, + "loss": 0.5834, + "step": 22520 + }, + { + "epoch": 0.6902353806546524, + "grad_norm": 1.6431167894066578, + "learning_rate": 4.624411236642281e-06, + "loss": 0.7116, + "step": 22521 + }, + { + "epoch": 0.6902660291773937, + "grad_norm": 1.4690579473366665, + "learning_rate": 4.623574246699704e-06, + "loss": 0.5982, + "step": 22522 + }, + { + "epoch": 0.6902966777001348, + "grad_norm": 2.4117100082188285, + "learning_rate": 4.6227373097324255e-06, + "loss": 0.6195, + "step": 22523 + }, + { + "epoch": 0.6903273262228761, + "grad_norm": 1.5594434227645344, + "learning_rate": 4.6219004257486966e-06, + "loss": 0.6999, + "step": 22524 + }, + { + "epoch": 0.6903579747456172, + "grad_norm": 1.9113453178541675, + "learning_rate": 4.621063594756755e-06, + "loss": 0.8129, + "step": 22525 + }, + { + "epoch": 0.6903886232683585, + "grad_norm": 1.4637396051514084, + "learning_rate": 4.62022681676485e-06, + "loss": 0.6425, + "step": 22526 + }, + { + "epoch": 0.6904192717910996, + "grad_norm": 1.6966494350139147, + "learning_rate": 4.61939009178123e-06, + "loss": 0.723, + "step": 22527 + }, + { + "epoch": 0.6904499203138409, + "grad_norm": 1.797742529928451, + "learning_rate": 4.6185534198141366e-06, + "loss": 0.6841, + "step": 22528 + }, + { + "epoch": 0.690480568836582, + "grad_norm": 1.5796667391967583, + "learning_rate": 4.61771680087181e-06, + "loss": 0.6445, + "step": 22529 + }, + { + "epoch": 0.6905112173593233, + "grad_norm": 1.5718609677341784, + "learning_rate": 4.616880234962495e-06, + "loss": 0.6348, + "step": 22530 + }, + { + "epoch": 0.6905418658820645, + "grad_norm": 1.4665377419877959, + "learning_rate": 4.616043722094438e-06, + "loss": 0.5363, + "step": 22531 + }, + { + "epoch": 0.6905725144048057, + "grad_norm": 1.8669443420733163, + "learning_rate": 4.615207262275883e-06, + "loss": 0.6475, + "step": 22532 + }, + { + "epoch": 0.6906031629275469, + "grad_norm": 1.6078429194051258, + "learning_rate": 4.614370855515065e-06, + "loss": 0.6439, + "step": 22533 + }, + { + "epoch": 0.6906338114502881, + "grad_norm": 0.650028192506346, + "learning_rate": 4.613534501820228e-06, + "loss": 0.5477, + "step": 22534 + }, + { + "epoch": 0.6906644599730293, + "grad_norm": 0.6486955494900585, + "learning_rate": 4.612698201199619e-06, + "loss": 0.5356, + "step": 22535 + }, + { + "epoch": 0.6906951084957705, + "grad_norm": 1.3726320477852814, + "learning_rate": 4.611861953661473e-06, + "loss": 0.5757, + "step": 22536 + }, + { + "epoch": 0.6907257570185117, + "grad_norm": 1.4009459082421234, + "learning_rate": 4.611025759214021e-06, + "loss": 0.5735, + "step": 22537 + }, + { + "epoch": 0.690756405541253, + "grad_norm": 1.6109431980322302, + "learning_rate": 4.610189617865519e-06, + "loss": 0.6732, + "step": 22538 + }, + { + "epoch": 0.6907870540639941, + "grad_norm": 1.522703054572605, + "learning_rate": 4.609353529624194e-06, + "loss": 0.613, + "step": 22539 + }, + { + "epoch": 0.6908177025867354, + "grad_norm": 1.562348457456303, + "learning_rate": 4.608517494498293e-06, + "loss": 0.615, + "step": 22540 + }, + { + "epoch": 0.6908483511094765, + "grad_norm": 1.4957259305501995, + "learning_rate": 4.607681512496043e-06, + "loss": 0.7009, + "step": 22541 + }, + { + "epoch": 0.6908789996322178, + "grad_norm": 1.6470264367037268, + "learning_rate": 4.6068455836256875e-06, + "loss": 0.5799, + "step": 22542 + }, + { + "epoch": 0.6909096481549589, + "grad_norm": 1.554818656947422, + "learning_rate": 4.606009707895466e-06, + "loss": 0.6025, + "step": 22543 + }, + { + "epoch": 0.6909402966777002, + "grad_norm": 1.5781396100531968, + "learning_rate": 4.605173885313606e-06, + "loss": 0.6226, + "step": 22544 + }, + { + "epoch": 0.6909709452004413, + "grad_norm": 1.5493919701781587, + "learning_rate": 4.604338115888351e-06, + "loss": 0.6368, + "step": 22545 + }, + { + "epoch": 0.6910015937231826, + "grad_norm": 1.487412862598171, + "learning_rate": 4.6035023996279334e-06, + "loss": 0.5557, + "step": 22546 + }, + { + "epoch": 0.6910322422459237, + "grad_norm": 0.6988919802281834, + "learning_rate": 4.60266673654059e-06, + "loss": 0.5288, + "step": 22547 + }, + { + "epoch": 0.691062890768665, + "grad_norm": 1.5414826939389397, + "learning_rate": 4.601831126634544e-06, + "loss": 0.6977, + "step": 22548 + }, + { + "epoch": 0.6910935392914062, + "grad_norm": 0.7228388687848489, + "learning_rate": 4.600995569918044e-06, + "loss": 0.5514, + "step": 22549 + }, + { + "epoch": 0.6911241878141473, + "grad_norm": 1.5144982613665394, + "learning_rate": 4.600160066399313e-06, + "loss": 0.7048, + "step": 22550 + }, + { + "epoch": 0.6911548363368886, + "grad_norm": 1.3430444416020415, + "learning_rate": 4.59932461608659e-06, + "loss": 0.5869, + "step": 22551 + }, + { + "epoch": 0.6911854848596297, + "grad_norm": 1.5415889292732838, + "learning_rate": 4.598489218988099e-06, + "loss": 0.6274, + "step": 22552 + }, + { + "epoch": 0.691216133382371, + "grad_norm": 1.6418456275560822, + "learning_rate": 4.5976538751120766e-06, + "loss": 0.6393, + "step": 22553 + }, + { + "epoch": 0.6912467819051121, + "grad_norm": 0.6675764831406132, + "learning_rate": 4.596818584466756e-06, + "loss": 0.5576, + "step": 22554 + }, + { + "epoch": 0.6912774304278534, + "grad_norm": 1.5769314620650066, + "learning_rate": 4.595983347060361e-06, + "loss": 0.6935, + "step": 22555 + }, + { + "epoch": 0.6913080789505945, + "grad_norm": 1.5035113325197058, + "learning_rate": 4.595148162901126e-06, + "loss": 0.5652, + "step": 22556 + }, + { + "epoch": 0.6913387274733358, + "grad_norm": 1.5866144245560712, + "learning_rate": 4.59431303199728e-06, + "loss": 0.6433, + "step": 22557 + }, + { + "epoch": 0.691369375996077, + "grad_norm": 1.8212681370855692, + "learning_rate": 4.5934779543570485e-06, + "loss": 0.6425, + "step": 22558 + }, + { + "epoch": 0.6914000245188182, + "grad_norm": 1.7107156271468047, + "learning_rate": 4.592642929988662e-06, + "loss": 0.8067, + "step": 22559 + }, + { + "epoch": 0.6914306730415594, + "grad_norm": 1.8292386174339887, + "learning_rate": 4.591807958900352e-06, + "loss": 0.6974, + "step": 22560 + }, + { + "epoch": 0.6914613215643006, + "grad_norm": 1.6042106355298373, + "learning_rate": 4.590973041100338e-06, + "loss": 0.7135, + "step": 22561 + }, + { + "epoch": 0.6914919700870418, + "grad_norm": 0.6773829777740781, + "learning_rate": 4.590138176596855e-06, + "loss": 0.5263, + "step": 22562 + }, + { + "epoch": 0.691522618609783, + "grad_norm": 1.5670877187345171, + "learning_rate": 4.58930336539812e-06, + "loss": 0.5611, + "step": 22563 + }, + { + "epoch": 0.6915532671325242, + "grad_norm": 1.7154239083467993, + "learning_rate": 4.588468607512364e-06, + "loss": 0.6928, + "step": 22564 + }, + { + "epoch": 0.6915839156552654, + "grad_norm": 1.5798542412052057, + "learning_rate": 4.587633902947816e-06, + "loss": 0.6906, + "step": 22565 + }, + { + "epoch": 0.6916145641780066, + "grad_norm": 1.5584179623445436, + "learning_rate": 4.58679925171269e-06, + "loss": 0.6511, + "step": 22566 + }, + { + "epoch": 0.6916452127007479, + "grad_norm": 1.4336206048577727, + "learning_rate": 4.585964653815217e-06, + "loss": 0.6192, + "step": 22567 + }, + { + "epoch": 0.691675861223489, + "grad_norm": 0.7054749459338704, + "learning_rate": 4.585130109263624e-06, + "loss": 0.5516, + "step": 22568 + }, + { + "epoch": 0.6917065097462303, + "grad_norm": 0.6641345332032101, + "learning_rate": 4.584295618066125e-06, + "loss": 0.5442, + "step": 22569 + }, + { + "epoch": 0.6917371582689714, + "grad_norm": 1.5098186125947999, + "learning_rate": 4.583461180230947e-06, + "loss": 0.6458, + "step": 22570 + }, + { + "epoch": 0.6917678067917127, + "grad_norm": 1.628801171350947, + "learning_rate": 4.5826267957663165e-06, + "loss": 0.7023, + "step": 22571 + }, + { + "epoch": 0.6917984553144538, + "grad_norm": 1.6293459152905698, + "learning_rate": 4.581792464680446e-06, + "loss": 0.7234, + "step": 22572 + }, + { + "epoch": 0.6918291038371951, + "grad_norm": 0.6686905923896603, + "learning_rate": 4.580958186981563e-06, + "loss": 0.5387, + "step": 22573 + }, + { + "epoch": 0.6918597523599362, + "grad_norm": 1.4333253025980197, + "learning_rate": 4.580123962677884e-06, + "loss": 0.6683, + "step": 22574 + }, + { + "epoch": 0.6918904008826775, + "grad_norm": 1.8374759611903657, + "learning_rate": 4.579289791777629e-06, + "loss": 0.6886, + "step": 22575 + }, + { + "epoch": 0.6919210494054187, + "grad_norm": 1.6855940525745359, + "learning_rate": 4.578455674289021e-06, + "loss": 0.6788, + "step": 22576 + }, + { + "epoch": 0.6919516979281599, + "grad_norm": 0.6775702230776915, + "learning_rate": 4.577621610220275e-06, + "loss": 0.5522, + "step": 22577 + }, + { + "epoch": 0.6919823464509011, + "grad_norm": 1.5509828595348776, + "learning_rate": 4.576787599579611e-06, + "loss": 0.6818, + "step": 22578 + }, + { + "epoch": 0.6920129949736423, + "grad_norm": 1.6044839442177938, + "learning_rate": 4.575953642375248e-06, + "loss": 0.6793, + "step": 22579 + }, + { + "epoch": 0.6920436434963835, + "grad_norm": 1.6067578269043281, + "learning_rate": 4.575119738615399e-06, + "loss": 0.7185, + "step": 22580 + }, + { + "epoch": 0.6920742920191246, + "grad_norm": 0.6511468299681988, + "learning_rate": 4.574285888308288e-06, + "loss": 0.527, + "step": 22581 + }, + { + "epoch": 0.6921049405418659, + "grad_norm": 1.5268926845755926, + "learning_rate": 4.57345209146212e-06, + "loss": 0.6497, + "step": 22582 + }, + { + "epoch": 0.692135589064607, + "grad_norm": 1.5841339161799846, + "learning_rate": 4.572618348085119e-06, + "loss": 0.5953, + "step": 22583 + }, + { + "epoch": 0.6921662375873483, + "grad_norm": 1.53434346601183, + "learning_rate": 4.571784658185502e-06, + "loss": 0.6108, + "step": 22584 + }, + { + "epoch": 0.6921968861100894, + "grad_norm": 1.6836022006055287, + "learning_rate": 4.570951021771475e-06, + "loss": 0.7197, + "step": 22585 + }, + { + "epoch": 0.6922275346328307, + "grad_norm": 1.5778099630545657, + "learning_rate": 4.570117438851257e-06, + "loss": 0.6061, + "step": 22586 + }, + { + "epoch": 0.6922581831555719, + "grad_norm": 1.5126531532775835, + "learning_rate": 4.569283909433065e-06, + "loss": 0.7206, + "step": 22587 + }, + { + "epoch": 0.6922888316783131, + "grad_norm": 1.760745153067174, + "learning_rate": 4.568450433525103e-06, + "loss": 0.6376, + "step": 22588 + }, + { + "epoch": 0.6923194802010543, + "grad_norm": 1.696473982815756, + "learning_rate": 4.56761701113559e-06, + "loss": 0.6643, + "step": 22589 + }, + { + "epoch": 0.6923501287237955, + "grad_norm": 1.50215108766446, + "learning_rate": 4.566783642272741e-06, + "loss": 0.6474, + "step": 22590 + }, + { + "epoch": 0.6923807772465367, + "grad_norm": 1.6498930459471237, + "learning_rate": 4.565950326944757e-06, + "loss": 0.6801, + "step": 22591 + }, + { + "epoch": 0.6924114257692779, + "grad_norm": 0.6782268783169245, + "learning_rate": 4.56511706515986e-06, + "loss": 0.5569, + "step": 22592 + }, + { + "epoch": 0.6924420742920191, + "grad_norm": 1.4988657941447465, + "learning_rate": 4.564283856926247e-06, + "loss": 0.6795, + "step": 22593 + }, + { + "epoch": 0.6924727228147604, + "grad_norm": 1.5350733484467662, + "learning_rate": 4.5634507022521445e-06, + "loss": 0.6109, + "step": 22594 + }, + { + "epoch": 0.6925033713375015, + "grad_norm": 1.8556894142372522, + "learning_rate": 4.562617601145752e-06, + "loss": 0.6924, + "step": 22595 + }, + { + "epoch": 0.6925340198602428, + "grad_norm": 1.3878478833879722, + "learning_rate": 4.561784553615277e-06, + "loss": 0.6127, + "step": 22596 + }, + { + "epoch": 0.6925646683829839, + "grad_norm": 1.8743082687736374, + "learning_rate": 4.560951559668929e-06, + "loss": 0.6786, + "step": 22597 + }, + { + "epoch": 0.6925953169057252, + "grad_norm": 0.6838094833236812, + "learning_rate": 4.560118619314921e-06, + "loss": 0.5546, + "step": 22598 + }, + { + "epoch": 0.6926259654284663, + "grad_norm": 1.6954022065400751, + "learning_rate": 4.5592857325614524e-06, + "loss": 0.6754, + "step": 22599 + }, + { + "epoch": 0.6926566139512076, + "grad_norm": 1.5329830744399615, + "learning_rate": 4.558452899416734e-06, + "loss": 0.6944, + "step": 22600 + }, + { + "epoch": 0.6926872624739487, + "grad_norm": 0.681079103671707, + "learning_rate": 4.557620119888975e-06, + "loss": 0.5383, + "step": 22601 + }, + { + "epoch": 0.69271791099669, + "grad_norm": 0.6742246442477853, + "learning_rate": 4.556787393986374e-06, + "loss": 0.5515, + "step": 22602 + }, + { + "epoch": 0.6927485595194312, + "grad_norm": 1.737814815374047, + "learning_rate": 4.555954721717143e-06, + "loss": 0.6932, + "step": 22603 + }, + { + "epoch": 0.6927792080421724, + "grad_norm": 1.7216098052926023, + "learning_rate": 4.555122103089475e-06, + "loss": 0.6556, + "step": 22604 + }, + { + "epoch": 0.6928098565649136, + "grad_norm": 1.7237908116289287, + "learning_rate": 4.5542895381115895e-06, + "loss": 0.6912, + "step": 22605 + }, + { + "epoch": 0.6928405050876548, + "grad_norm": 1.646204292301625, + "learning_rate": 4.553457026791683e-06, + "loss": 0.6428, + "step": 22606 + }, + { + "epoch": 0.692871153610396, + "grad_norm": 1.5324364899075085, + "learning_rate": 4.5526245691379545e-06, + "loss": 0.6398, + "step": 22607 + }, + { + "epoch": 0.6929018021331372, + "grad_norm": 1.6947121217539936, + "learning_rate": 4.551792165158609e-06, + "loss": 0.6146, + "step": 22608 + }, + { + "epoch": 0.6929324506558784, + "grad_norm": 1.4842263465468968, + "learning_rate": 4.550959814861854e-06, + "loss": 0.5246, + "step": 22609 + }, + { + "epoch": 0.6929630991786196, + "grad_norm": 1.585689818861801, + "learning_rate": 4.550127518255883e-06, + "loss": 0.736, + "step": 22610 + }, + { + "epoch": 0.6929937477013608, + "grad_norm": 1.6592202277435757, + "learning_rate": 4.5492952753488985e-06, + "loss": 0.7081, + "step": 22611 + }, + { + "epoch": 0.693024396224102, + "grad_norm": 0.6385935835301356, + "learning_rate": 4.548463086149102e-06, + "loss": 0.5343, + "step": 22612 + }, + { + "epoch": 0.6930550447468432, + "grad_norm": 1.609184959876922, + "learning_rate": 4.547630950664699e-06, + "loss": 0.7668, + "step": 22613 + }, + { + "epoch": 0.6930856932695844, + "grad_norm": 1.4296127648019126, + "learning_rate": 4.546798868903882e-06, + "loss": 0.6276, + "step": 22614 + }, + { + "epoch": 0.6931163417923256, + "grad_norm": 0.6427799147037877, + "learning_rate": 4.545966840874844e-06, + "loss": 0.5295, + "step": 22615 + }, + { + "epoch": 0.6931469903150668, + "grad_norm": 1.6145093269051076, + "learning_rate": 4.545134866585798e-06, + "loss": 0.7126, + "step": 22616 + }, + { + "epoch": 0.693177638837808, + "grad_norm": 1.4661431055272256, + "learning_rate": 4.544302946044933e-06, + "loss": 0.6421, + "step": 22617 + }, + { + "epoch": 0.6932082873605492, + "grad_norm": 1.52809567447954, + "learning_rate": 4.543471079260443e-06, + "loss": 0.7466, + "step": 22618 + }, + { + "epoch": 0.6932389358832904, + "grad_norm": 1.5856384136977693, + "learning_rate": 4.54263926624053e-06, + "loss": 0.624, + "step": 22619 + }, + { + "epoch": 0.6932695844060316, + "grad_norm": 1.4044489110736997, + "learning_rate": 4.541807506993388e-06, + "loss": 0.5651, + "step": 22620 + }, + { + "epoch": 0.6933002329287729, + "grad_norm": 1.6338199686820105, + "learning_rate": 4.540975801527215e-06, + "loss": 0.6734, + "step": 22621 + }, + { + "epoch": 0.693330881451514, + "grad_norm": 1.5049561587445501, + "learning_rate": 4.540144149850203e-06, + "loss": 0.6802, + "step": 22622 + }, + { + "epoch": 0.6933615299742553, + "grad_norm": 1.8864502158317815, + "learning_rate": 4.5393125519705475e-06, + "loss": 0.7437, + "step": 22623 + }, + { + "epoch": 0.6933921784969964, + "grad_norm": 1.6922983785593617, + "learning_rate": 4.538481007896445e-06, + "loss": 0.6918, + "step": 22624 + }, + { + "epoch": 0.6934228270197377, + "grad_norm": 1.5289235633749658, + "learning_rate": 4.5376495176360865e-06, + "loss": 0.6666, + "step": 22625 + }, + { + "epoch": 0.6934534755424788, + "grad_norm": 0.6602899852805894, + "learning_rate": 4.536818081197663e-06, + "loss": 0.5236, + "step": 22626 + }, + { + "epoch": 0.6934841240652201, + "grad_norm": 1.6589852820698805, + "learning_rate": 4.535986698589367e-06, + "loss": 0.6318, + "step": 22627 + }, + { + "epoch": 0.6935147725879612, + "grad_norm": 1.5845495497080206, + "learning_rate": 4.535155369819396e-06, + "loss": 0.7128, + "step": 22628 + }, + { + "epoch": 0.6935454211107025, + "grad_norm": 0.6480967104897992, + "learning_rate": 4.534324094895934e-06, + "loss": 0.5553, + "step": 22629 + }, + { + "epoch": 0.6935760696334436, + "grad_norm": 1.4527491280927745, + "learning_rate": 4.533492873827176e-06, + "loss": 0.6392, + "step": 22630 + }, + { + "epoch": 0.6936067181561849, + "grad_norm": 1.7765218471579922, + "learning_rate": 4.532661706621311e-06, + "loss": 0.6074, + "step": 22631 + }, + { + "epoch": 0.6936373666789261, + "grad_norm": 1.7639203266732877, + "learning_rate": 4.531830593286532e-06, + "loss": 0.5783, + "step": 22632 + }, + { + "epoch": 0.6936680152016673, + "grad_norm": 1.3869470984231742, + "learning_rate": 4.530999533831025e-06, + "loss": 0.567, + "step": 22633 + }, + { + "epoch": 0.6936986637244085, + "grad_norm": 0.6534917957230452, + "learning_rate": 4.530168528262973e-06, + "loss": 0.5187, + "step": 22634 + }, + { + "epoch": 0.6937293122471497, + "grad_norm": 1.712721191794932, + "learning_rate": 4.529337576590577e-06, + "loss": 0.7554, + "step": 22635 + }, + { + "epoch": 0.6937599607698909, + "grad_norm": 1.6416271751949776, + "learning_rate": 4.5285066788220165e-06, + "loss": 0.6411, + "step": 22636 + }, + { + "epoch": 0.6937906092926321, + "grad_norm": 1.5078521401482885, + "learning_rate": 4.527675834965477e-06, + "loss": 0.6343, + "step": 22637 + }, + { + "epoch": 0.6938212578153733, + "grad_norm": 1.4922858323795998, + "learning_rate": 4.526845045029147e-06, + "loss": 0.619, + "step": 22638 + }, + { + "epoch": 0.6938519063381146, + "grad_norm": 1.6650613230717368, + "learning_rate": 4.526014309021213e-06, + "loss": 0.6842, + "step": 22639 + }, + { + "epoch": 0.6938825548608557, + "grad_norm": 1.663553624794193, + "learning_rate": 4.525183626949865e-06, + "loss": 0.6855, + "step": 22640 + }, + { + "epoch": 0.693913203383597, + "grad_norm": 0.6767281540872356, + "learning_rate": 4.524352998823279e-06, + "loss": 0.5453, + "step": 22641 + }, + { + "epoch": 0.6939438519063381, + "grad_norm": 1.5190996483734536, + "learning_rate": 4.523522424649645e-06, + "loss": 0.7035, + "step": 22642 + }, + { + "epoch": 0.6939745004290793, + "grad_norm": 1.5511007221893276, + "learning_rate": 4.522691904437149e-06, + "loss": 0.6859, + "step": 22643 + }, + { + "epoch": 0.6940051489518205, + "grad_norm": 0.6858498358213436, + "learning_rate": 4.5218614381939705e-06, + "loss": 0.5401, + "step": 22644 + }, + { + "epoch": 0.6940357974745617, + "grad_norm": 1.6762053042002727, + "learning_rate": 4.521031025928286e-06, + "loss": 0.7135, + "step": 22645 + }, + { + "epoch": 0.6940664459973029, + "grad_norm": 1.586619574626875, + "learning_rate": 4.520200667648292e-06, + "loss": 0.6572, + "step": 22646 + }, + { + "epoch": 0.6940970945200441, + "grad_norm": 1.5131636883990858, + "learning_rate": 4.519370363362163e-06, + "loss": 0.6426, + "step": 22647 + }, + { + "epoch": 0.6941277430427854, + "grad_norm": 1.471410103695104, + "learning_rate": 4.518540113078076e-06, + "loss": 0.7189, + "step": 22648 + }, + { + "epoch": 0.6941583915655265, + "grad_norm": 1.5957974610684946, + "learning_rate": 4.517709916804216e-06, + "loss": 0.6448, + "step": 22649 + }, + { + "epoch": 0.6941890400882678, + "grad_norm": 1.8095464249001818, + "learning_rate": 4.5168797745487634e-06, + "loss": 0.7034, + "step": 22650 + }, + { + "epoch": 0.6942196886110089, + "grad_norm": 1.5832079602324118, + "learning_rate": 4.5160496863199e-06, + "loss": 0.7226, + "step": 22651 + }, + { + "epoch": 0.6942503371337502, + "grad_norm": 0.6268643173273677, + "learning_rate": 4.5152196521258e-06, + "loss": 0.4815, + "step": 22652 + }, + { + "epoch": 0.6942809856564913, + "grad_norm": 1.6411995587010992, + "learning_rate": 4.5143896719746425e-06, + "loss": 0.6852, + "step": 22653 + }, + { + "epoch": 0.6943116341792326, + "grad_norm": 1.5451276709884527, + "learning_rate": 4.513559745874612e-06, + "loss": 0.6249, + "step": 22654 + }, + { + "epoch": 0.6943422827019737, + "grad_norm": 1.5987149907869453, + "learning_rate": 4.51272987383388e-06, + "loss": 0.6444, + "step": 22655 + }, + { + "epoch": 0.694372931224715, + "grad_norm": 1.7985416005005133, + "learning_rate": 4.5119000558606175e-06, + "loss": 0.6802, + "step": 22656 + }, + { + "epoch": 0.6944035797474561, + "grad_norm": 0.6603501846527358, + "learning_rate": 4.511070291963015e-06, + "loss": 0.5556, + "step": 22657 + }, + { + "epoch": 0.6944342282701974, + "grad_norm": 1.5388160247993445, + "learning_rate": 4.510240582149239e-06, + "loss": 0.7605, + "step": 22658 + }, + { + "epoch": 0.6944648767929386, + "grad_norm": 1.5752080584176735, + "learning_rate": 4.50941092642747e-06, + "loss": 0.613, + "step": 22659 + }, + { + "epoch": 0.6944955253156798, + "grad_norm": 1.6889707660755475, + "learning_rate": 4.508581324805876e-06, + "loss": 0.6972, + "step": 22660 + }, + { + "epoch": 0.694526173838421, + "grad_norm": 0.6502001244464553, + "learning_rate": 4.507751777292635e-06, + "loss": 0.5503, + "step": 22661 + }, + { + "epoch": 0.6945568223611622, + "grad_norm": 1.4098870449937715, + "learning_rate": 4.506922283895926e-06, + "loss": 0.7534, + "step": 22662 + }, + { + "epoch": 0.6945874708839034, + "grad_norm": 1.5911635807037239, + "learning_rate": 4.506092844623912e-06, + "loss": 0.5949, + "step": 22663 + }, + { + "epoch": 0.6946181194066446, + "grad_norm": 1.5789808930406417, + "learning_rate": 4.505263459484772e-06, + "loss": 0.6392, + "step": 22664 + }, + { + "epoch": 0.6946487679293858, + "grad_norm": 1.4551141759494859, + "learning_rate": 4.50443412848668e-06, + "loss": 0.7026, + "step": 22665 + }, + { + "epoch": 0.694679416452127, + "grad_norm": 1.875253769093784, + "learning_rate": 4.503604851637801e-06, + "loss": 0.7459, + "step": 22666 + }, + { + "epoch": 0.6947100649748682, + "grad_norm": 1.6273460326833475, + "learning_rate": 4.50277562894631e-06, + "loss": 0.732, + "step": 22667 + }, + { + "epoch": 0.6947407134976095, + "grad_norm": 1.5434151288841933, + "learning_rate": 4.501946460420381e-06, + "loss": 0.603, + "step": 22668 + }, + { + "epoch": 0.6947713620203506, + "grad_norm": 1.575599691975568, + "learning_rate": 4.501117346068177e-06, + "loss": 0.5956, + "step": 22669 + }, + { + "epoch": 0.6948020105430919, + "grad_norm": 1.496933705914044, + "learning_rate": 4.500288285897873e-06, + "loss": 0.5704, + "step": 22670 + }, + { + "epoch": 0.694832659065833, + "grad_norm": 1.6252755441837698, + "learning_rate": 4.499459279917633e-06, + "loss": 0.6662, + "step": 22671 + }, + { + "epoch": 0.6948633075885743, + "grad_norm": 1.6892199964524783, + "learning_rate": 4.498630328135628e-06, + "loss": 0.7031, + "step": 22672 + }, + { + "epoch": 0.6948939561113154, + "grad_norm": 1.649186505054161, + "learning_rate": 4.497801430560029e-06, + "loss": 0.627, + "step": 22673 + }, + { + "epoch": 0.6949246046340566, + "grad_norm": 1.5828293150196224, + "learning_rate": 4.496972587198998e-06, + "loss": 0.5887, + "step": 22674 + }, + { + "epoch": 0.6949552531567978, + "grad_norm": 1.6276778887245504, + "learning_rate": 4.496143798060703e-06, + "loss": 0.6035, + "step": 22675 + }, + { + "epoch": 0.694985901679539, + "grad_norm": 1.5260112942688526, + "learning_rate": 4.495315063153316e-06, + "loss": 0.643, + "step": 22676 + }, + { + "epoch": 0.6950165502022803, + "grad_norm": 1.816324866822364, + "learning_rate": 4.494486382484994e-06, + "loss": 0.6974, + "step": 22677 + }, + { + "epoch": 0.6950471987250214, + "grad_norm": 1.5293968592677072, + "learning_rate": 4.49365775606391e-06, + "loss": 0.6256, + "step": 22678 + }, + { + "epoch": 0.6950778472477627, + "grad_norm": 0.6622239103959503, + "learning_rate": 4.492829183898221e-06, + "loss": 0.5316, + "step": 22679 + }, + { + "epoch": 0.6951084957705038, + "grad_norm": 1.5725640552258053, + "learning_rate": 4.492000665996094e-06, + "loss": 0.6055, + "step": 22680 + }, + { + "epoch": 0.6951391442932451, + "grad_norm": 0.6820159880484722, + "learning_rate": 4.491172202365699e-06, + "loss": 0.5536, + "step": 22681 + }, + { + "epoch": 0.6951697928159862, + "grad_norm": 1.5376925947832216, + "learning_rate": 4.49034379301519e-06, + "loss": 0.6551, + "step": 22682 + }, + { + "epoch": 0.6952004413387275, + "grad_norm": 1.6549205991575344, + "learning_rate": 4.4895154379527324e-06, + "loss": 0.6764, + "step": 22683 + }, + { + "epoch": 0.6952310898614686, + "grad_norm": 1.92929216077745, + "learning_rate": 4.488687137186494e-06, + "loss": 0.7075, + "step": 22684 + }, + { + "epoch": 0.6952617383842099, + "grad_norm": 1.6621734435560378, + "learning_rate": 4.487858890724627e-06, + "loss": 0.759, + "step": 22685 + }, + { + "epoch": 0.695292386906951, + "grad_norm": 0.6542980094221723, + "learning_rate": 4.487030698575297e-06, + "loss": 0.5541, + "step": 22686 + }, + { + "epoch": 0.6953230354296923, + "grad_norm": 1.6204114566133567, + "learning_rate": 4.4862025607466675e-06, + "loss": 0.6746, + "step": 22687 + }, + { + "epoch": 0.6953536839524335, + "grad_norm": 1.535241434912887, + "learning_rate": 4.485374477246891e-06, + "loss": 0.6572, + "step": 22688 + }, + { + "epoch": 0.6953843324751747, + "grad_norm": 1.461689782451156, + "learning_rate": 4.484546448084135e-06, + "loss": 0.6401, + "step": 22689 + }, + { + "epoch": 0.6954149809979159, + "grad_norm": 1.7495536521765203, + "learning_rate": 4.483718473266551e-06, + "loss": 0.6175, + "step": 22690 + }, + { + "epoch": 0.6954456295206571, + "grad_norm": 1.4755037363078378, + "learning_rate": 4.482890552802299e-06, + "loss": 0.6693, + "step": 22691 + }, + { + "epoch": 0.6954762780433983, + "grad_norm": 1.489726923579985, + "learning_rate": 4.482062686699542e-06, + "loss": 0.6389, + "step": 22692 + }, + { + "epoch": 0.6955069265661395, + "grad_norm": 1.6342622536254647, + "learning_rate": 4.4812348749664295e-06, + "loss": 0.7048, + "step": 22693 + }, + { + "epoch": 0.6955375750888807, + "grad_norm": 1.8654872161927194, + "learning_rate": 4.480407117611122e-06, + "loss": 0.6604, + "step": 22694 + }, + { + "epoch": 0.695568223611622, + "grad_norm": 1.6423374350260391, + "learning_rate": 4.4795794146417794e-06, + "loss": 0.7121, + "step": 22695 + }, + { + "epoch": 0.6955988721343631, + "grad_norm": 1.430402924004398, + "learning_rate": 4.478751766066549e-06, + "loss": 0.578, + "step": 22696 + }, + { + "epoch": 0.6956295206571044, + "grad_norm": 1.41542887020735, + "learning_rate": 4.47792417189359e-06, + "loss": 0.6791, + "step": 22697 + }, + { + "epoch": 0.6956601691798455, + "grad_norm": 1.7264295218142771, + "learning_rate": 4.477096632131062e-06, + "loss": 0.679, + "step": 22698 + }, + { + "epoch": 0.6956908177025868, + "grad_norm": 1.5137939604352955, + "learning_rate": 4.476269146787109e-06, + "loss": 0.5797, + "step": 22699 + }, + { + "epoch": 0.6957214662253279, + "grad_norm": 1.4837754374792127, + "learning_rate": 4.475441715869893e-06, + "loss": 0.7343, + "step": 22700 + }, + { + "epoch": 0.6957521147480692, + "grad_norm": 1.726559647139586, + "learning_rate": 4.47461433938756e-06, + "loss": 0.6618, + "step": 22701 + }, + { + "epoch": 0.6957827632708103, + "grad_norm": 1.59057847711905, + "learning_rate": 4.473787017348265e-06, + "loss": 0.7024, + "step": 22702 + }, + { + "epoch": 0.6958134117935516, + "grad_norm": 1.648188402101706, + "learning_rate": 4.472959749760165e-06, + "loss": 0.6818, + "step": 22703 + }, + { + "epoch": 0.6958440603162928, + "grad_norm": 1.6177969432008097, + "learning_rate": 4.472132536631403e-06, + "loss": 0.6375, + "step": 22704 + }, + { + "epoch": 0.6958747088390339, + "grad_norm": 1.4932107351044506, + "learning_rate": 4.471305377970133e-06, + "loss": 0.6606, + "step": 22705 + }, + { + "epoch": 0.6959053573617752, + "grad_norm": 1.6156920175562248, + "learning_rate": 4.47047827378451e-06, + "loss": 0.5304, + "step": 22706 + }, + { + "epoch": 0.6959360058845163, + "grad_norm": 1.5052885041635273, + "learning_rate": 4.469651224082676e-06, + "loss": 0.6398, + "step": 22707 + }, + { + "epoch": 0.6959666544072576, + "grad_norm": 1.773691896765844, + "learning_rate": 4.4688242288727824e-06, + "loss": 0.7447, + "step": 22708 + }, + { + "epoch": 0.6959973029299987, + "grad_norm": 1.8933917203500705, + "learning_rate": 4.467997288162983e-06, + "loss": 0.6381, + "step": 22709 + }, + { + "epoch": 0.69602795145274, + "grad_norm": 1.8355596524155042, + "learning_rate": 4.467170401961418e-06, + "loss": 0.6696, + "step": 22710 + }, + { + "epoch": 0.6960585999754811, + "grad_norm": 0.6499886871720392, + "learning_rate": 4.466343570276242e-06, + "loss": 0.5524, + "step": 22711 + }, + { + "epoch": 0.6960892484982224, + "grad_norm": 1.4790735429691815, + "learning_rate": 4.465516793115593e-06, + "loss": 0.658, + "step": 22712 + }, + { + "epoch": 0.6961198970209636, + "grad_norm": 1.4333471772727655, + "learning_rate": 4.464690070487628e-06, + "loss": 0.6278, + "step": 22713 + }, + { + "epoch": 0.6961505455437048, + "grad_norm": 1.6906016241918327, + "learning_rate": 4.4638634024004905e-06, + "loss": 0.6549, + "step": 22714 + }, + { + "epoch": 0.696181194066446, + "grad_norm": 1.5549335363570265, + "learning_rate": 4.463036788862318e-06, + "loss": 0.5999, + "step": 22715 + }, + { + "epoch": 0.6962118425891872, + "grad_norm": 1.527403001452757, + "learning_rate": 4.462210229881261e-06, + "loss": 0.554, + "step": 22716 + }, + { + "epoch": 0.6962424911119284, + "grad_norm": 0.6575848129997443, + "learning_rate": 4.461383725465467e-06, + "loss": 0.5408, + "step": 22717 + }, + { + "epoch": 0.6962731396346696, + "grad_norm": 1.6203132870237014, + "learning_rate": 4.4605572756230734e-06, + "loss": 0.7629, + "step": 22718 + }, + { + "epoch": 0.6963037881574108, + "grad_norm": 1.5078497792503158, + "learning_rate": 4.459730880362225e-06, + "loss": 0.6702, + "step": 22719 + }, + { + "epoch": 0.696334436680152, + "grad_norm": 1.3314012336525887, + "learning_rate": 4.4589045396910665e-06, + "loss": 0.5714, + "step": 22720 + }, + { + "epoch": 0.6963650852028932, + "grad_norm": 1.6444016023292911, + "learning_rate": 4.458078253617744e-06, + "loss": 0.7029, + "step": 22721 + }, + { + "epoch": 0.6963957337256345, + "grad_norm": 1.5997882511139576, + "learning_rate": 4.4572520221503936e-06, + "loss": 0.6466, + "step": 22722 + }, + { + "epoch": 0.6964263822483756, + "grad_norm": 2.2205761408970472, + "learning_rate": 4.456425845297153e-06, + "loss": 0.677, + "step": 22723 + }, + { + "epoch": 0.6964570307711169, + "grad_norm": 1.4909747128985376, + "learning_rate": 4.455599723066168e-06, + "loss": 0.5972, + "step": 22724 + }, + { + "epoch": 0.696487679293858, + "grad_norm": 1.6444115136822668, + "learning_rate": 4.454773655465579e-06, + "loss": 0.5537, + "step": 22725 + }, + { + "epoch": 0.6965183278165993, + "grad_norm": 0.6918045189985969, + "learning_rate": 4.4539476425035235e-06, + "loss": 0.5537, + "step": 22726 + }, + { + "epoch": 0.6965489763393404, + "grad_norm": 1.5730072383003981, + "learning_rate": 4.453121684188139e-06, + "loss": 0.5769, + "step": 22727 + }, + { + "epoch": 0.6965796248620817, + "grad_norm": 1.563103300187083, + "learning_rate": 4.4522957805275695e-06, + "loss": 0.663, + "step": 22728 + }, + { + "epoch": 0.6966102733848228, + "grad_norm": 1.726817714995464, + "learning_rate": 4.451469931529946e-06, + "loss": 0.7227, + "step": 22729 + }, + { + "epoch": 0.6966409219075641, + "grad_norm": 1.6265621349712542, + "learning_rate": 4.450644137203411e-06, + "loss": 0.6354, + "step": 22730 + }, + { + "epoch": 0.6966715704303053, + "grad_norm": 1.747488317408471, + "learning_rate": 4.449818397556094e-06, + "loss": 0.6282, + "step": 22731 + }, + { + "epoch": 0.6967022189530465, + "grad_norm": 1.7371667830100215, + "learning_rate": 4.4489927125961426e-06, + "loss": 0.7395, + "step": 22732 + }, + { + "epoch": 0.6967328674757877, + "grad_norm": 1.614682926451537, + "learning_rate": 4.448167082331687e-06, + "loss": 0.6562, + "step": 22733 + }, + { + "epoch": 0.6967635159985289, + "grad_norm": 1.398327093790356, + "learning_rate": 4.447341506770857e-06, + "loss": 0.6075, + "step": 22734 + }, + { + "epoch": 0.6967941645212701, + "grad_norm": 0.667241276239831, + "learning_rate": 4.4465159859217925e-06, + "loss": 0.5225, + "step": 22735 + }, + { + "epoch": 0.6968248130440112, + "grad_norm": 1.5312179626496667, + "learning_rate": 4.44569051979263e-06, + "loss": 0.6731, + "step": 22736 + }, + { + "epoch": 0.6968554615667525, + "grad_norm": 1.4351400612921914, + "learning_rate": 4.4448651083914966e-06, + "loss": 0.6359, + "step": 22737 + }, + { + "epoch": 0.6968861100894936, + "grad_norm": 1.503186339487562, + "learning_rate": 4.444039751726529e-06, + "loss": 0.7061, + "step": 22738 + }, + { + "epoch": 0.6969167586122349, + "grad_norm": 1.803819332645779, + "learning_rate": 4.443214449805858e-06, + "loss": 0.7148, + "step": 22739 + }, + { + "epoch": 0.696947407134976, + "grad_norm": 1.7477463449581632, + "learning_rate": 4.442389202637622e-06, + "loss": 0.67, + "step": 22740 + }, + { + "epoch": 0.6969780556577173, + "grad_norm": 1.8027100841934696, + "learning_rate": 4.441564010229947e-06, + "loss": 0.6658, + "step": 22741 + }, + { + "epoch": 0.6970087041804585, + "grad_norm": 1.6520949538677698, + "learning_rate": 4.440738872590956e-06, + "loss": 0.6969, + "step": 22742 + }, + { + "epoch": 0.6970393527031997, + "grad_norm": 1.580597289454352, + "learning_rate": 4.439913789728794e-06, + "loss": 0.6564, + "step": 22743 + }, + { + "epoch": 0.6970700012259409, + "grad_norm": 1.7604594366380197, + "learning_rate": 4.439088761651586e-06, + "loss": 0.6088, + "step": 22744 + }, + { + "epoch": 0.6971006497486821, + "grad_norm": 1.5753518524756918, + "learning_rate": 4.438263788367454e-06, + "loss": 0.6086, + "step": 22745 + }, + { + "epoch": 0.6971312982714233, + "grad_norm": 0.6588507666675979, + "learning_rate": 4.437438869884533e-06, + "loss": 0.5273, + "step": 22746 + }, + { + "epoch": 0.6971619467941645, + "grad_norm": 1.507095818442311, + "learning_rate": 4.4366140062109495e-06, + "loss": 0.6536, + "step": 22747 + }, + { + "epoch": 0.6971925953169057, + "grad_norm": 0.6682072932479846, + "learning_rate": 4.435789197354835e-06, + "loss": 0.5428, + "step": 22748 + }, + { + "epoch": 0.697223243839647, + "grad_norm": 1.698394727211246, + "learning_rate": 4.43496444332431e-06, + "loss": 0.6754, + "step": 22749 + }, + { + "epoch": 0.6972538923623881, + "grad_norm": 0.6934899977021094, + "learning_rate": 4.434139744127504e-06, + "loss": 0.5579, + "step": 22750 + }, + { + "epoch": 0.6972845408851294, + "grad_norm": 1.8545761737266138, + "learning_rate": 4.433315099772547e-06, + "loss": 0.6634, + "step": 22751 + }, + { + "epoch": 0.6973151894078705, + "grad_norm": 1.4774618261252863, + "learning_rate": 4.432490510267561e-06, + "loss": 0.6956, + "step": 22752 + }, + { + "epoch": 0.6973458379306118, + "grad_norm": 1.5700993831324603, + "learning_rate": 4.431665975620662e-06, + "loss": 0.5781, + "step": 22753 + }, + { + "epoch": 0.6973764864533529, + "grad_norm": 1.7956406931392928, + "learning_rate": 4.430841495839992e-06, + "loss": 0.7087, + "step": 22754 + }, + { + "epoch": 0.6974071349760942, + "grad_norm": 1.6788195726275394, + "learning_rate": 4.4300170709336635e-06, + "loss": 0.6957, + "step": 22755 + }, + { + "epoch": 0.6974377834988353, + "grad_norm": 1.7578669356987802, + "learning_rate": 4.429192700909799e-06, + "loss": 0.6555, + "step": 22756 + }, + { + "epoch": 0.6974684320215766, + "grad_norm": 1.7541484686180333, + "learning_rate": 4.428368385776525e-06, + "loss": 0.6088, + "step": 22757 + }, + { + "epoch": 0.6974990805443178, + "grad_norm": 1.4931359535147917, + "learning_rate": 4.4275441255419624e-06, + "loss": 0.6326, + "step": 22758 + }, + { + "epoch": 0.697529729067059, + "grad_norm": 1.4458881067185456, + "learning_rate": 4.426719920214236e-06, + "loss": 0.6407, + "step": 22759 + }, + { + "epoch": 0.6975603775898002, + "grad_norm": 1.5400133062275188, + "learning_rate": 4.425895769801462e-06, + "loss": 0.5611, + "step": 22760 + }, + { + "epoch": 0.6975910261125414, + "grad_norm": 1.8254140683097166, + "learning_rate": 4.425071674311763e-06, + "loss": 0.7026, + "step": 22761 + }, + { + "epoch": 0.6976216746352826, + "grad_norm": 1.538216984006336, + "learning_rate": 4.424247633753262e-06, + "loss": 0.6077, + "step": 22762 + }, + { + "epoch": 0.6976523231580238, + "grad_norm": 1.7091402554072634, + "learning_rate": 4.423423648134076e-06, + "loss": 0.672, + "step": 22763 + }, + { + "epoch": 0.697682971680765, + "grad_norm": 0.6854692070855241, + "learning_rate": 4.422599717462317e-06, + "loss": 0.5482, + "step": 22764 + }, + { + "epoch": 0.6977136202035062, + "grad_norm": 0.6377816284042287, + "learning_rate": 4.421775841746116e-06, + "loss": 0.5079, + "step": 22765 + }, + { + "epoch": 0.6977442687262474, + "grad_norm": 0.6765388890036407, + "learning_rate": 4.420952020993583e-06, + "loss": 0.5469, + "step": 22766 + }, + { + "epoch": 0.6977749172489885, + "grad_norm": 1.7499289333145243, + "learning_rate": 4.42012825521284e-06, + "loss": 0.6301, + "step": 22767 + }, + { + "epoch": 0.6978055657717298, + "grad_norm": 1.5043354295895093, + "learning_rate": 4.419304544411997e-06, + "loss": 0.5857, + "step": 22768 + }, + { + "epoch": 0.697836214294471, + "grad_norm": 0.6427148748644524, + "learning_rate": 4.4184808885991744e-06, + "loss": 0.5321, + "step": 22769 + }, + { + "epoch": 0.6978668628172122, + "grad_norm": 1.6599626980451438, + "learning_rate": 4.417657287782492e-06, + "loss": 0.6144, + "step": 22770 + }, + { + "epoch": 0.6978975113399534, + "grad_norm": 1.7712781525120413, + "learning_rate": 4.416833741970056e-06, + "loss": 0.624, + "step": 22771 + }, + { + "epoch": 0.6979281598626946, + "grad_norm": 1.591259662549407, + "learning_rate": 4.4160102511699866e-06, + "loss": 0.665, + "step": 22772 + }, + { + "epoch": 0.6979588083854358, + "grad_norm": 1.7643330735760878, + "learning_rate": 4.4151868153904e-06, + "loss": 0.5912, + "step": 22773 + }, + { + "epoch": 0.697989456908177, + "grad_norm": 1.6752731275648904, + "learning_rate": 4.414363434639403e-06, + "loss": 0.6823, + "step": 22774 + }, + { + "epoch": 0.6980201054309182, + "grad_norm": 1.733548081473547, + "learning_rate": 4.413540108925115e-06, + "loss": 0.6821, + "step": 22775 + }, + { + "epoch": 0.6980507539536595, + "grad_norm": 1.5851572513782939, + "learning_rate": 4.412716838255643e-06, + "loss": 0.7185, + "step": 22776 + }, + { + "epoch": 0.6980814024764006, + "grad_norm": 1.6817202008003163, + "learning_rate": 4.411893622639102e-06, + "loss": 0.6806, + "step": 22777 + }, + { + "epoch": 0.6981120509991419, + "grad_norm": 1.787639973425934, + "learning_rate": 4.411070462083606e-06, + "loss": 0.515, + "step": 22778 + }, + { + "epoch": 0.698142699521883, + "grad_norm": 1.648160077780403, + "learning_rate": 4.410247356597259e-06, + "loss": 0.648, + "step": 22779 + }, + { + "epoch": 0.6981733480446243, + "grad_norm": 1.9650502300563972, + "learning_rate": 4.409424306188175e-06, + "loss": 0.7409, + "step": 22780 + }, + { + "epoch": 0.6982039965673654, + "grad_norm": 0.6798409887196951, + "learning_rate": 4.408601310864468e-06, + "loss": 0.5441, + "step": 22781 + }, + { + "epoch": 0.6982346450901067, + "grad_norm": 1.6632103713842188, + "learning_rate": 4.407778370634243e-06, + "loss": 0.5684, + "step": 22782 + }, + { + "epoch": 0.6982652936128478, + "grad_norm": 0.6635226661749424, + "learning_rate": 4.4069554855055996e-06, + "loss": 0.5483, + "step": 22783 + }, + { + "epoch": 0.6982959421355891, + "grad_norm": 1.6646608730048915, + "learning_rate": 4.406132655486663e-06, + "loss": 0.7228, + "step": 22784 + }, + { + "epoch": 0.6983265906583302, + "grad_norm": 1.5310826054813322, + "learning_rate": 4.405309880585529e-06, + "loss": 0.6011, + "step": 22785 + }, + { + "epoch": 0.6983572391810715, + "grad_norm": 1.7881946421620174, + "learning_rate": 4.404487160810312e-06, + "loss": 0.6839, + "step": 22786 + }, + { + "epoch": 0.6983878877038127, + "grad_norm": 1.49398618451173, + "learning_rate": 4.403664496169111e-06, + "loss": 0.5636, + "step": 22787 + }, + { + "epoch": 0.6984185362265539, + "grad_norm": 1.4519400155892228, + "learning_rate": 4.402841886670036e-06, + "loss": 0.6517, + "step": 22788 + }, + { + "epoch": 0.6984491847492951, + "grad_norm": 1.7754889900306385, + "learning_rate": 4.402019332321195e-06, + "loss": 0.678, + "step": 22789 + }, + { + "epoch": 0.6984798332720363, + "grad_norm": 0.6607649679100758, + "learning_rate": 4.401196833130686e-06, + "loss": 0.5201, + "step": 22790 + }, + { + "epoch": 0.6985104817947775, + "grad_norm": 1.3876600553715635, + "learning_rate": 4.400374389106617e-06, + "loss": 0.5968, + "step": 22791 + }, + { + "epoch": 0.6985411303175187, + "grad_norm": 0.6490115339711956, + "learning_rate": 4.399552000257097e-06, + "loss": 0.5383, + "step": 22792 + }, + { + "epoch": 0.6985717788402599, + "grad_norm": 1.7596987612659816, + "learning_rate": 4.39872966659022e-06, + "loss": 0.6523, + "step": 22793 + }, + { + "epoch": 0.6986024273630012, + "grad_norm": 1.5642677850573412, + "learning_rate": 4.397907388114092e-06, + "loss": 0.6989, + "step": 22794 + }, + { + "epoch": 0.6986330758857423, + "grad_norm": 1.588656677318717, + "learning_rate": 4.397085164836819e-06, + "loss": 0.5753, + "step": 22795 + }, + { + "epoch": 0.6986637244084836, + "grad_norm": 1.5938962304860427, + "learning_rate": 4.396262996766497e-06, + "loss": 0.764, + "step": 22796 + }, + { + "epoch": 0.6986943729312247, + "grad_norm": 2.8949023568600287, + "learning_rate": 4.395440883911233e-06, + "loss": 0.5172, + "step": 22797 + }, + { + "epoch": 0.6987250214539659, + "grad_norm": 1.6971121665764848, + "learning_rate": 4.39461882627912e-06, + "loss": 0.7174, + "step": 22798 + }, + { + "epoch": 0.6987556699767071, + "grad_norm": 1.4947504007692156, + "learning_rate": 4.3937968238782616e-06, + "loss": 0.718, + "step": 22799 + }, + { + "epoch": 0.6987863184994483, + "grad_norm": 1.8776276916243908, + "learning_rate": 4.392974876716761e-06, + "loss": 0.6359, + "step": 22800 + }, + { + "epoch": 0.6988169670221895, + "grad_norm": 1.455594078711846, + "learning_rate": 4.392152984802711e-06, + "loss": 0.689, + "step": 22801 + }, + { + "epoch": 0.6988476155449307, + "grad_norm": 1.6884526485383877, + "learning_rate": 4.391331148144211e-06, + "loss": 0.6972, + "step": 22802 + }, + { + "epoch": 0.698878264067672, + "grad_norm": 1.7371617970091142, + "learning_rate": 4.390509366749365e-06, + "loss": 0.7432, + "step": 22803 + }, + { + "epoch": 0.6989089125904131, + "grad_norm": 1.5281923308798286, + "learning_rate": 4.389687640626261e-06, + "loss": 0.5998, + "step": 22804 + }, + { + "epoch": 0.6989395611131544, + "grad_norm": 1.6856128564214303, + "learning_rate": 4.388865969783002e-06, + "loss": 0.7056, + "step": 22805 + }, + { + "epoch": 0.6989702096358955, + "grad_norm": 1.601517054759162, + "learning_rate": 4.388044354227684e-06, + "loss": 0.7174, + "step": 22806 + }, + { + "epoch": 0.6990008581586368, + "grad_norm": 1.6799508147377031, + "learning_rate": 4.387222793968398e-06, + "loss": 0.6268, + "step": 22807 + }, + { + "epoch": 0.6990315066813779, + "grad_norm": 0.638906602763898, + "learning_rate": 4.386401289013244e-06, + "loss": 0.5476, + "step": 22808 + }, + { + "epoch": 0.6990621552041192, + "grad_norm": 1.6223942346870164, + "learning_rate": 4.385579839370313e-06, + "loss": 0.5621, + "step": 22809 + }, + { + "epoch": 0.6990928037268603, + "grad_norm": 1.5669019263448265, + "learning_rate": 4.3847584450477e-06, + "loss": 0.6126, + "step": 22810 + }, + { + "epoch": 0.6991234522496016, + "grad_norm": 1.6371370455282237, + "learning_rate": 4.3839371060535005e-06, + "loss": 0.6677, + "step": 22811 + }, + { + "epoch": 0.6991541007723427, + "grad_norm": 1.640372222110606, + "learning_rate": 4.383115822395804e-06, + "loss": 0.7444, + "step": 22812 + }, + { + "epoch": 0.699184749295084, + "grad_norm": 1.3339408765421958, + "learning_rate": 4.3822945940827035e-06, + "loss": 0.6004, + "step": 22813 + }, + { + "epoch": 0.6992153978178252, + "grad_norm": 1.4906217062823546, + "learning_rate": 4.381473421122295e-06, + "loss": 0.6338, + "step": 22814 + }, + { + "epoch": 0.6992460463405664, + "grad_norm": 1.9122483465404136, + "learning_rate": 4.380652303522665e-06, + "loss": 0.6648, + "step": 22815 + }, + { + "epoch": 0.6992766948633076, + "grad_norm": 1.7101076541330613, + "learning_rate": 4.379831241291903e-06, + "loss": 0.7125, + "step": 22816 + }, + { + "epoch": 0.6993073433860488, + "grad_norm": 1.6355193143268558, + "learning_rate": 4.379010234438107e-06, + "loss": 0.6176, + "step": 22817 + }, + { + "epoch": 0.69933799190879, + "grad_norm": 1.589882568085901, + "learning_rate": 4.378189282969357e-06, + "loss": 0.7328, + "step": 22818 + }, + { + "epoch": 0.6993686404315312, + "grad_norm": 1.6010392149220911, + "learning_rate": 4.37736838689375e-06, + "loss": 0.6228, + "step": 22819 + }, + { + "epoch": 0.6993992889542724, + "grad_norm": 1.6387022464916192, + "learning_rate": 4.376547546219368e-06, + "loss": 0.6777, + "step": 22820 + }, + { + "epoch": 0.6994299374770137, + "grad_norm": 1.5641318180689776, + "learning_rate": 4.375726760954301e-06, + "loss": 0.6416, + "step": 22821 + }, + { + "epoch": 0.6994605859997548, + "grad_norm": 1.5303267146420343, + "learning_rate": 4.37490603110664e-06, + "loss": 0.6629, + "step": 22822 + }, + { + "epoch": 0.6994912345224961, + "grad_norm": 1.446113546979864, + "learning_rate": 4.374085356684468e-06, + "loss": 0.6373, + "step": 22823 + }, + { + "epoch": 0.6995218830452372, + "grad_norm": 1.6987512434813337, + "learning_rate": 4.37326473769587e-06, + "loss": 0.7244, + "step": 22824 + }, + { + "epoch": 0.6995525315679785, + "grad_norm": 1.4933785702421039, + "learning_rate": 4.37244417414894e-06, + "loss": 0.6336, + "step": 22825 + }, + { + "epoch": 0.6995831800907196, + "grad_norm": 1.5581386276594695, + "learning_rate": 4.371623666051752e-06, + "loss": 0.7171, + "step": 22826 + }, + { + "epoch": 0.6996138286134609, + "grad_norm": 1.6493364634074497, + "learning_rate": 4.370803213412401e-06, + "loss": 0.6103, + "step": 22827 + }, + { + "epoch": 0.699644477136202, + "grad_norm": 1.6994335272836218, + "learning_rate": 4.369982816238962e-06, + "loss": 0.7135, + "step": 22828 + }, + { + "epoch": 0.6996751256589432, + "grad_norm": 1.5263397930440137, + "learning_rate": 4.369162474539522e-06, + "loss": 0.6345, + "step": 22829 + }, + { + "epoch": 0.6997057741816844, + "grad_norm": 1.663578806334842, + "learning_rate": 4.36834218832217e-06, + "loss": 0.6667, + "step": 22830 + }, + { + "epoch": 0.6997364227044256, + "grad_norm": 1.385955792948479, + "learning_rate": 4.367521957594979e-06, + "loss": 0.5476, + "step": 22831 + }, + { + "epoch": 0.6997670712271669, + "grad_norm": 0.6609583446981749, + "learning_rate": 4.366701782366035e-06, + "loss": 0.5261, + "step": 22832 + }, + { + "epoch": 0.699797719749908, + "grad_norm": 1.597223907525147, + "learning_rate": 4.365881662643424e-06, + "loss": 0.6967, + "step": 22833 + }, + { + "epoch": 0.6998283682726493, + "grad_norm": 1.7133602451906031, + "learning_rate": 4.365061598435219e-06, + "loss": 0.6641, + "step": 22834 + }, + { + "epoch": 0.6998590167953904, + "grad_norm": 0.6670225971528454, + "learning_rate": 4.364241589749503e-06, + "loss": 0.5269, + "step": 22835 + }, + { + "epoch": 0.6998896653181317, + "grad_norm": 1.9816801956312275, + "learning_rate": 4.36342163659436e-06, + "loss": 0.6989, + "step": 22836 + }, + { + "epoch": 0.6999203138408728, + "grad_norm": 1.7638444253692567, + "learning_rate": 4.362601738977863e-06, + "loss": 0.7014, + "step": 22837 + }, + { + "epoch": 0.6999509623636141, + "grad_norm": 1.5968399570313483, + "learning_rate": 4.361781896908097e-06, + "loss": 0.7043, + "step": 22838 + }, + { + "epoch": 0.6999816108863552, + "grad_norm": 1.7634031673492565, + "learning_rate": 4.36096211039313e-06, + "loss": 0.6164, + "step": 22839 + }, + { + "epoch": 0.7000122594090965, + "grad_norm": 1.5590648298656378, + "learning_rate": 4.360142379441052e-06, + "loss": 0.6761, + "step": 22840 + }, + { + "epoch": 0.7000429079318377, + "grad_norm": 1.5998177857031444, + "learning_rate": 4.359322704059935e-06, + "loss": 0.726, + "step": 22841 + }, + { + "epoch": 0.7000735564545789, + "grad_norm": 1.6905599987051185, + "learning_rate": 4.35850308425785e-06, + "loss": 0.702, + "step": 22842 + }, + { + "epoch": 0.7001042049773201, + "grad_norm": 1.600168749832562, + "learning_rate": 4.3576835200428795e-06, + "loss": 0.6926, + "step": 22843 + }, + { + "epoch": 0.7001348535000613, + "grad_norm": 0.63887130980602, + "learning_rate": 4.356864011423099e-06, + "loss": 0.5347, + "step": 22844 + }, + { + "epoch": 0.7001655020228025, + "grad_norm": 1.4751993977911122, + "learning_rate": 4.356044558406577e-06, + "loss": 0.5777, + "step": 22845 + }, + { + "epoch": 0.7001961505455437, + "grad_norm": 1.9326483954237654, + "learning_rate": 4.355225161001393e-06, + "loss": 0.705, + "step": 22846 + }, + { + "epoch": 0.7002267990682849, + "grad_norm": 1.669122820087519, + "learning_rate": 4.35440581921562e-06, + "loss": 0.6878, + "step": 22847 + }, + { + "epoch": 0.7002574475910261, + "grad_norm": 1.460315974291011, + "learning_rate": 4.353586533057334e-06, + "loss": 0.5833, + "step": 22848 + }, + { + "epoch": 0.7002880961137673, + "grad_norm": 1.7363483602811465, + "learning_rate": 4.352767302534605e-06, + "loss": 0.7472, + "step": 22849 + }, + { + "epoch": 0.7003187446365086, + "grad_norm": 1.593581220617369, + "learning_rate": 4.351948127655497e-06, + "loss": 0.6666, + "step": 22850 + }, + { + "epoch": 0.7003493931592497, + "grad_norm": 0.6988630778972866, + "learning_rate": 4.351129008428098e-06, + "loss": 0.5498, + "step": 22851 + }, + { + "epoch": 0.700380041681991, + "grad_norm": 1.571131848139639, + "learning_rate": 4.350309944860468e-06, + "loss": 0.6354, + "step": 22852 + }, + { + "epoch": 0.7004106902047321, + "grad_norm": 1.5974282444341663, + "learning_rate": 4.3494909369606774e-06, + "loss": 0.6896, + "step": 22853 + }, + { + "epoch": 0.7004413387274734, + "grad_norm": 1.6170277281176926, + "learning_rate": 4.348671984736798e-06, + "loss": 0.6652, + "step": 22854 + }, + { + "epoch": 0.7004719872502145, + "grad_norm": 1.33311841421067, + "learning_rate": 4.3478530881969025e-06, + "loss": 0.5585, + "step": 22855 + }, + { + "epoch": 0.7005026357729558, + "grad_norm": 1.7233496726783797, + "learning_rate": 4.347034247349055e-06, + "loss": 0.5929, + "step": 22856 + }, + { + "epoch": 0.700533284295697, + "grad_norm": 1.817964494790664, + "learning_rate": 4.346215462201323e-06, + "loss": 0.6845, + "step": 22857 + }, + { + "epoch": 0.7005639328184382, + "grad_norm": 1.5006498467479332, + "learning_rate": 4.345396732761778e-06, + "loss": 0.6616, + "step": 22858 + }, + { + "epoch": 0.7005945813411794, + "grad_norm": 1.559975593241484, + "learning_rate": 4.344578059038489e-06, + "loss": 0.5844, + "step": 22859 + }, + { + "epoch": 0.7006252298639205, + "grad_norm": 1.4995224153311688, + "learning_rate": 4.34375944103952e-06, + "loss": 0.7639, + "step": 22860 + }, + { + "epoch": 0.7006558783866618, + "grad_norm": 0.6604993486023586, + "learning_rate": 4.3429408787729275e-06, + "loss": 0.5163, + "step": 22861 + }, + { + "epoch": 0.7006865269094029, + "grad_norm": 1.5926293143944852, + "learning_rate": 4.3421223722467955e-06, + "loss": 0.6716, + "step": 22862 + }, + { + "epoch": 0.7007171754321442, + "grad_norm": 1.6136455984472784, + "learning_rate": 4.341303921469178e-06, + "loss": 0.6097, + "step": 22863 + }, + { + "epoch": 0.7007478239548853, + "grad_norm": 1.49839545264153, + "learning_rate": 4.340485526448137e-06, + "loss": 0.6719, + "step": 22864 + }, + { + "epoch": 0.7007784724776266, + "grad_norm": 1.682257074271093, + "learning_rate": 4.339667187191741e-06, + "loss": 0.7295, + "step": 22865 + }, + { + "epoch": 0.7008091210003677, + "grad_norm": 1.5508685098331731, + "learning_rate": 4.338848903708052e-06, + "loss": 0.5993, + "step": 22866 + }, + { + "epoch": 0.700839769523109, + "grad_norm": 1.8490907715611236, + "learning_rate": 4.338030676005137e-06, + "loss": 0.7778, + "step": 22867 + }, + { + "epoch": 0.7008704180458502, + "grad_norm": 1.6706124515025311, + "learning_rate": 4.33721250409105e-06, + "loss": 0.6671, + "step": 22868 + }, + { + "epoch": 0.7009010665685914, + "grad_norm": 1.4886299320934804, + "learning_rate": 4.336394387973859e-06, + "loss": 0.5685, + "step": 22869 + }, + { + "epoch": 0.7009317150913326, + "grad_norm": 1.7112898297400645, + "learning_rate": 4.335576327661625e-06, + "loss": 0.5935, + "step": 22870 + }, + { + "epoch": 0.7009623636140738, + "grad_norm": 1.5726554252742266, + "learning_rate": 4.334758323162408e-06, + "loss": 0.6324, + "step": 22871 + }, + { + "epoch": 0.700993012136815, + "grad_norm": 1.5522787688962318, + "learning_rate": 4.3339403744842625e-06, + "loss": 0.6151, + "step": 22872 + }, + { + "epoch": 0.7010236606595562, + "grad_norm": 1.4673833769325477, + "learning_rate": 4.333122481635252e-06, + "loss": 0.6171, + "step": 22873 + }, + { + "epoch": 0.7010543091822974, + "grad_norm": 0.6916917764397832, + "learning_rate": 4.332304644623435e-06, + "loss": 0.543, + "step": 22874 + }, + { + "epoch": 0.7010849577050386, + "grad_norm": 1.674957901206853, + "learning_rate": 4.3314868634568754e-06, + "loss": 0.7016, + "step": 22875 + }, + { + "epoch": 0.7011156062277798, + "grad_norm": 1.5227779548224287, + "learning_rate": 4.330669138143622e-06, + "loss": 0.6722, + "step": 22876 + }, + { + "epoch": 0.7011462547505211, + "grad_norm": 1.5659892524406018, + "learning_rate": 4.329851468691736e-06, + "loss": 0.6269, + "step": 22877 + }, + { + "epoch": 0.7011769032732622, + "grad_norm": 1.6872767762018932, + "learning_rate": 4.329033855109278e-06, + "loss": 0.6981, + "step": 22878 + }, + { + "epoch": 0.7012075517960035, + "grad_norm": 0.6660778050673822, + "learning_rate": 4.3282162974043e-06, + "loss": 0.5545, + "step": 22879 + }, + { + "epoch": 0.7012382003187446, + "grad_norm": 1.4493720371715832, + "learning_rate": 4.327398795584852e-06, + "loss": 0.5458, + "step": 22880 + }, + { + "epoch": 0.7012688488414859, + "grad_norm": 1.4667134733146483, + "learning_rate": 4.326581349659001e-06, + "loss": 0.6622, + "step": 22881 + }, + { + "epoch": 0.701299497364227, + "grad_norm": 1.6100954557566618, + "learning_rate": 4.3257639596347965e-06, + "loss": 0.7225, + "step": 22882 + }, + { + "epoch": 0.7013301458869683, + "grad_norm": 1.3306640453799325, + "learning_rate": 4.324946625520287e-06, + "loss": 0.6536, + "step": 22883 + }, + { + "epoch": 0.7013607944097094, + "grad_norm": 1.8030196846917417, + "learning_rate": 4.3241293473235315e-06, + "loss": 0.7101, + "step": 22884 + }, + { + "epoch": 0.7013914429324507, + "grad_norm": 1.5022328083192498, + "learning_rate": 4.323312125052581e-06, + "loss": 0.6826, + "step": 22885 + }, + { + "epoch": 0.7014220914551919, + "grad_norm": 1.6151510783664749, + "learning_rate": 4.3224949587154915e-06, + "loss": 0.8138, + "step": 22886 + }, + { + "epoch": 0.7014527399779331, + "grad_norm": 0.6311970910307793, + "learning_rate": 4.32167784832031e-06, + "loss": 0.5219, + "step": 22887 + }, + { + "epoch": 0.7014833885006743, + "grad_norm": 0.6916226206936535, + "learning_rate": 4.320860793875088e-06, + "loss": 0.5228, + "step": 22888 + }, + { + "epoch": 0.7015140370234155, + "grad_norm": 1.4539266681432117, + "learning_rate": 4.3200437953878825e-06, + "loss": 0.4981, + "step": 22889 + }, + { + "epoch": 0.7015446855461567, + "grad_norm": 1.8759793450483202, + "learning_rate": 4.319226852866738e-06, + "loss": 0.6451, + "step": 22890 + }, + { + "epoch": 0.7015753340688978, + "grad_norm": 1.6089269922103813, + "learning_rate": 4.318409966319697e-06, + "loss": 0.6514, + "step": 22891 + }, + { + "epoch": 0.7016059825916391, + "grad_norm": 1.594155736371741, + "learning_rate": 4.317593135754825e-06, + "loss": 0.6009, + "step": 22892 + }, + { + "epoch": 0.7016366311143802, + "grad_norm": 0.6579694263503505, + "learning_rate": 4.316776361180157e-06, + "loss": 0.5493, + "step": 22893 + }, + { + "epoch": 0.7016672796371215, + "grad_norm": 0.8933501454391314, + "learning_rate": 4.31595964260375e-06, + "loss": 0.5497, + "step": 22894 + }, + { + "epoch": 0.7016979281598626, + "grad_norm": 1.4318334156722765, + "learning_rate": 4.315142980033643e-06, + "loss": 0.5262, + "step": 22895 + }, + { + "epoch": 0.7017285766826039, + "grad_norm": 0.6496468506331948, + "learning_rate": 4.314326373477886e-06, + "loss": 0.5475, + "step": 22896 + }, + { + "epoch": 0.7017592252053451, + "grad_norm": 1.4753140475173288, + "learning_rate": 4.31350982294453e-06, + "loss": 0.5208, + "step": 22897 + }, + { + "epoch": 0.7017898737280863, + "grad_norm": 1.7485746515959355, + "learning_rate": 4.312693328441614e-06, + "loss": 0.7862, + "step": 22898 + }, + { + "epoch": 0.7018205222508275, + "grad_norm": 1.629372691692815, + "learning_rate": 4.311876889977186e-06, + "loss": 0.6762, + "step": 22899 + }, + { + "epoch": 0.7018511707735687, + "grad_norm": 1.8865706502717394, + "learning_rate": 4.3110605075592926e-06, + "loss": 0.6658, + "step": 22900 + }, + { + "epoch": 0.7018818192963099, + "grad_norm": 1.7360808301171275, + "learning_rate": 4.310244181195973e-06, + "loss": 0.6469, + "step": 22901 + }, + { + "epoch": 0.7019124678190511, + "grad_norm": 1.6903232459527524, + "learning_rate": 4.309427910895272e-06, + "loss": 0.6833, + "step": 22902 + }, + { + "epoch": 0.7019431163417923, + "grad_norm": 1.604151287742116, + "learning_rate": 4.308611696665238e-06, + "loss": 0.6649, + "step": 22903 + }, + { + "epoch": 0.7019737648645336, + "grad_norm": 1.6790719947687351, + "learning_rate": 4.307795538513906e-06, + "loss": 0.57, + "step": 22904 + }, + { + "epoch": 0.7020044133872747, + "grad_norm": 1.590319947066966, + "learning_rate": 4.306979436449325e-06, + "loss": 0.6768, + "step": 22905 + }, + { + "epoch": 0.702035061910016, + "grad_norm": 1.6476021152249125, + "learning_rate": 4.306163390479527e-06, + "loss": 0.6462, + "step": 22906 + }, + { + "epoch": 0.7020657104327571, + "grad_norm": 1.7731307076425404, + "learning_rate": 4.30534740061256e-06, + "loss": 0.7036, + "step": 22907 + }, + { + "epoch": 0.7020963589554984, + "grad_norm": 0.6754061303869225, + "learning_rate": 4.304531466856464e-06, + "loss": 0.5416, + "step": 22908 + }, + { + "epoch": 0.7021270074782395, + "grad_norm": 1.7695583905264167, + "learning_rate": 4.303715589219274e-06, + "loss": 0.7017, + "step": 22909 + }, + { + "epoch": 0.7021576560009808, + "grad_norm": 1.8226528769764683, + "learning_rate": 4.302899767709031e-06, + "loss": 0.7325, + "step": 22910 + }, + { + "epoch": 0.7021883045237219, + "grad_norm": 1.6028043366691849, + "learning_rate": 4.3020840023337785e-06, + "loss": 0.7033, + "step": 22911 + }, + { + "epoch": 0.7022189530464632, + "grad_norm": 1.732429367983148, + "learning_rate": 4.3012682931015456e-06, + "loss": 0.6603, + "step": 22912 + }, + { + "epoch": 0.7022496015692044, + "grad_norm": 1.6398411411438605, + "learning_rate": 4.300452640020376e-06, + "loss": 0.6158, + "step": 22913 + }, + { + "epoch": 0.7022802500919456, + "grad_norm": 1.6664172989663337, + "learning_rate": 4.299637043098307e-06, + "loss": 0.5091, + "step": 22914 + }, + { + "epoch": 0.7023108986146868, + "grad_norm": 1.6511831588457087, + "learning_rate": 4.29882150234337e-06, + "loss": 0.6406, + "step": 22915 + }, + { + "epoch": 0.702341547137428, + "grad_norm": 1.9676876677952062, + "learning_rate": 4.2980060177636064e-06, + "loss": 0.6539, + "step": 22916 + }, + { + "epoch": 0.7023721956601692, + "grad_norm": 1.6692725708197191, + "learning_rate": 4.297190589367045e-06, + "loss": 0.6611, + "step": 22917 + }, + { + "epoch": 0.7024028441829104, + "grad_norm": 1.5295044880298103, + "learning_rate": 4.296375217161724e-06, + "loss": 0.6453, + "step": 22918 + }, + { + "epoch": 0.7024334927056516, + "grad_norm": 1.7628913536373731, + "learning_rate": 4.295559901155681e-06, + "loss": 0.5836, + "step": 22919 + }, + { + "epoch": 0.7024641412283928, + "grad_norm": 0.670046348407048, + "learning_rate": 4.294744641356942e-06, + "loss": 0.5294, + "step": 22920 + }, + { + "epoch": 0.702494789751134, + "grad_norm": 1.6230295538466393, + "learning_rate": 4.293929437773544e-06, + "loss": 0.6157, + "step": 22921 + }, + { + "epoch": 0.7025254382738751, + "grad_norm": 1.6686250419157374, + "learning_rate": 4.293114290413523e-06, + "loss": 0.6425, + "step": 22922 + }, + { + "epoch": 0.7025560867966164, + "grad_norm": 1.6422603039368824, + "learning_rate": 4.292299199284903e-06, + "loss": 0.6221, + "step": 22923 + }, + { + "epoch": 0.7025867353193576, + "grad_norm": 1.5968281683205376, + "learning_rate": 4.291484164395724e-06, + "loss": 0.6385, + "step": 22924 + }, + { + "epoch": 0.7026173838420988, + "grad_norm": 1.5773663515938923, + "learning_rate": 4.290669185754007e-06, + "loss": 0.594, + "step": 22925 + }, + { + "epoch": 0.70264803236484, + "grad_norm": 1.728472218608017, + "learning_rate": 4.289854263367788e-06, + "loss": 0.7556, + "step": 22926 + }, + { + "epoch": 0.7026786808875812, + "grad_norm": 1.4346900974470442, + "learning_rate": 4.2890393972451e-06, + "loss": 0.563, + "step": 22927 + }, + { + "epoch": 0.7027093294103224, + "grad_norm": 1.4246895694166262, + "learning_rate": 4.288224587393963e-06, + "loss": 0.5989, + "step": 22928 + }, + { + "epoch": 0.7027399779330636, + "grad_norm": 0.7207456990356866, + "learning_rate": 4.2874098338224125e-06, + "loss": 0.5367, + "step": 22929 + }, + { + "epoch": 0.7027706264558048, + "grad_norm": 1.6154032054725869, + "learning_rate": 4.286595136538477e-06, + "loss": 0.7302, + "step": 22930 + }, + { + "epoch": 0.702801274978546, + "grad_norm": 0.6570743190277686, + "learning_rate": 4.285780495550178e-06, + "loss": 0.5142, + "step": 22931 + }, + { + "epoch": 0.7028319235012872, + "grad_norm": 1.459521935151343, + "learning_rate": 4.284965910865546e-06, + "loss": 0.5939, + "step": 22932 + }, + { + "epoch": 0.7028625720240285, + "grad_norm": 1.7400911867220819, + "learning_rate": 4.28415138249261e-06, + "loss": 0.7393, + "step": 22933 + }, + { + "epoch": 0.7028932205467696, + "grad_norm": 1.5732526100905169, + "learning_rate": 4.2833369104393894e-06, + "loss": 0.6175, + "step": 22934 + }, + { + "epoch": 0.7029238690695109, + "grad_norm": 1.688421066243506, + "learning_rate": 4.282522494713918e-06, + "loss": 0.6835, + "step": 22935 + }, + { + "epoch": 0.702954517592252, + "grad_norm": 1.583790841496585, + "learning_rate": 4.281708135324211e-06, + "loss": 0.6782, + "step": 22936 + }, + { + "epoch": 0.7029851661149933, + "grad_norm": 1.7435510441956112, + "learning_rate": 4.280893832278296e-06, + "loss": 0.6485, + "step": 22937 + }, + { + "epoch": 0.7030158146377344, + "grad_norm": 0.6531739046822449, + "learning_rate": 4.280079585584202e-06, + "loss": 0.5173, + "step": 22938 + }, + { + "epoch": 0.7030464631604757, + "grad_norm": 1.7036675278735884, + "learning_rate": 4.279265395249943e-06, + "loss": 0.582, + "step": 22939 + }, + { + "epoch": 0.7030771116832168, + "grad_norm": 1.7399026722497648, + "learning_rate": 4.278451261283546e-06, + "loss": 0.6326, + "step": 22940 + }, + { + "epoch": 0.7031077602059581, + "grad_norm": 1.3569065386696617, + "learning_rate": 4.277637183693037e-06, + "loss": 0.5754, + "step": 22941 + }, + { + "epoch": 0.7031384087286993, + "grad_norm": 1.802784160075084, + "learning_rate": 4.2768231624864275e-06, + "loss": 0.7424, + "step": 22942 + }, + { + "epoch": 0.7031690572514405, + "grad_norm": 1.7028766618556266, + "learning_rate": 4.276009197671744e-06, + "loss": 0.6593, + "step": 22943 + }, + { + "epoch": 0.7031997057741817, + "grad_norm": 0.7122174200369064, + "learning_rate": 4.275195289257011e-06, + "loss": 0.5461, + "step": 22944 + }, + { + "epoch": 0.7032303542969229, + "grad_norm": 1.6655060104994932, + "learning_rate": 4.27438143725024e-06, + "loss": 0.7428, + "step": 22945 + }, + { + "epoch": 0.7032610028196641, + "grad_norm": 1.5429202725908515, + "learning_rate": 4.273567641659457e-06, + "loss": 0.6223, + "step": 22946 + }, + { + "epoch": 0.7032916513424053, + "grad_norm": 1.579825329589653, + "learning_rate": 4.2727539024926715e-06, + "loss": 0.6642, + "step": 22947 + }, + { + "epoch": 0.7033222998651465, + "grad_norm": 0.6756994817844753, + "learning_rate": 4.2719402197579115e-06, + "loss": 0.5321, + "step": 22948 + }, + { + "epoch": 0.7033529483878878, + "grad_norm": 1.501313715508641, + "learning_rate": 4.271126593463193e-06, + "loss": 0.6576, + "step": 22949 + }, + { + "epoch": 0.7033835969106289, + "grad_norm": 1.7003295910047769, + "learning_rate": 4.270313023616525e-06, + "loss": 0.6645, + "step": 22950 + }, + { + "epoch": 0.7034142454333702, + "grad_norm": 0.685472962203506, + "learning_rate": 4.269499510225929e-06, + "loss": 0.5645, + "step": 22951 + }, + { + "epoch": 0.7034448939561113, + "grad_norm": 1.6193862347747712, + "learning_rate": 4.268686053299423e-06, + "loss": 0.6595, + "step": 22952 + }, + { + "epoch": 0.7034755424788525, + "grad_norm": 1.746575806065043, + "learning_rate": 4.267872652845017e-06, + "loss": 0.7104, + "step": 22953 + }, + { + "epoch": 0.7035061910015937, + "grad_norm": 1.6931259372904608, + "learning_rate": 4.267059308870728e-06, + "loss": 0.7221, + "step": 22954 + }, + { + "epoch": 0.7035368395243349, + "grad_norm": 1.5632174357302902, + "learning_rate": 4.2662460213845715e-06, + "loss": 0.6582, + "step": 22955 + }, + { + "epoch": 0.7035674880470761, + "grad_norm": 1.8227424387780655, + "learning_rate": 4.265432790394563e-06, + "loss": 0.736, + "step": 22956 + }, + { + "epoch": 0.7035981365698173, + "grad_norm": 1.5866963809611632, + "learning_rate": 4.264619615908712e-06, + "loss": 0.725, + "step": 22957 + }, + { + "epoch": 0.7036287850925586, + "grad_norm": 1.7864741015753496, + "learning_rate": 4.263806497935024e-06, + "loss": 0.6952, + "step": 22958 + }, + { + "epoch": 0.7036594336152997, + "grad_norm": 1.5940415529963428, + "learning_rate": 4.262993436481526e-06, + "loss": 0.6504, + "step": 22959 + }, + { + "epoch": 0.703690082138041, + "grad_norm": 1.6780297021005928, + "learning_rate": 4.262180431556222e-06, + "loss": 0.7496, + "step": 22960 + }, + { + "epoch": 0.7037207306607821, + "grad_norm": 1.722422468350872, + "learning_rate": 4.261367483167118e-06, + "loss": 0.6873, + "step": 22961 + }, + { + "epoch": 0.7037513791835234, + "grad_norm": 1.659346820092034, + "learning_rate": 4.260554591322229e-06, + "loss": 0.6431, + "step": 22962 + }, + { + "epoch": 0.7037820277062645, + "grad_norm": 1.5329446070325916, + "learning_rate": 4.259741756029568e-06, + "loss": 0.6021, + "step": 22963 + }, + { + "epoch": 0.7038126762290058, + "grad_norm": 1.537784701937858, + "learning_rate": 4.258928977297135e-06, + "loss": 0.625, + "step": 22964 + }, + { + "epoch": 0.7038433247517469, + "grad_norm": 1.465177078395192, + "learning_rate": 4.258116255132946e-06, + "loss": 0.6595, + "step": 22965 + }, + { + "epoch": 0.7038739732744882, + "grad_norm": 1.7261050015276378, + "learning_rate": 4.257303589545006e-06, + "loss": 0.7443, + "step": 22966 + }, + { + "epoch": 0.7039046217972293, + "grad_norm": 1.3919391452430623, + "learning_rate": 4.256490980541325e-06, + "loss": 0.6034, + "step": 22967 + }, + { + "epoch": 0.7039352703199706, + "grad_norm": 1.4049020814900044, + "learning_rate": 4.255678428129909e-06, + "loss": 0.6155, + "step": 22968 + }, + { + "epoch": 0.7039659188427118, + "grad_norm": 1.5241645909745651, + "learning_rate": 4.254865932318759e-06, + "loss": 0.6297, + "step": 22969 + }, + { + "epoch": 0.703996567365453, + "grad_norm": 1.456552936399337, + "learning_rate": 4.254053493115886e-06, + "loss": 0.6822, + "step": 22970 + }, + { + "epoch": 0.7040272158881942, + "grad_norm": 1.6502876002147147, + "learning_rate": 4.253241110529297e-06, + "loss": 0.6328, + "step": 22971 + }, + { + "epoch": 0.7040578644109354, + "grad_norm": 1.6226107466984019, + "learning_rate": 4.252428784566991e-06, + "loss": 0.6454, + "step": 22972 + }, + { + "epoch": 0.7040885129336766, + "grad_norm": 1.782007280603148, + "learning_rate": 4.2516165152369735e-06, + "loss": 0.7544, + "step": 22973 + }, + { + "epoch": 0.7041191614564178, + "grad_norm": 1.3775179892390388, + "learning_rate": 4.25080430254725e-06, + "loss": 0.6028, + "step": 22974 + }, + { + "epoch": 0.704149809979159, + "grad_norm": 1.64786511793477, + "learning_rate": 4.249992146505826e-06, + "loss": 0.7732, + "step": 22975 + }, + { + "epoch": 0.7041804585019003, + "grad_norm": 1.5899861452021078, + "learning_rate": 4.249180047120701e-06, + "loss": 0.6051, + "step": 22976 + }, + { + "epoch": 0.7042111070246414, + "grad_norm": 1.661224226672963, + "learning_rate": 4.248368004399868e-06, + "loss": 0.7675, + "step": 22977 + }, + { + "epoch": 0.7042417555473827, + "grad_norm": 1.460906599252647, + "learning_rate": 4.247556018351345e-06, + "loss": 0.6395, + "step": 22978 + }, + { + "epoch": 0.7042724040701238, + "grad_norm": 2.266048434343548, + "learning_rate": 4.246744088983124e-06, + "loss": 0.7232, + "step": 22979 + }, + { + "epoch": 0.7043030525928651, + "grad_norm": 1.5612971335596382, + "learning_rate": 4.245932216303203e-06, + "loss": 0.6848, + "step": 22980 + }, + { + "epoch": 0.7043337011156062, + "grad_norm": 1.6167072053298384, + "learning_rate": 4.2451204003195835e-06, + "loss": 0.632, + "step": 22981 + }, + { + "epoch": 0.7043643496383475, + "grad_norm": 1.772542163103707, + "learning_rate": 4.244308641040268e-06, + "loss": 0.7389, + "step": 22982 + }, + { + "epoch": 0.7043949981610886, + "grad_norm": 1.6654132629239602, + "learning_rate": 4.243496938473249e-06, + "loss": 0.6476, + "step": 22983 + }, + { + "epoch": 0.7044256466838298, + "grad_norm": 1.5727612471018952, + "learning_rate": 4.242685292626528e-06, + "loss": 0.6657, + "step": 22984 + }, + { + "epoch": 0.704456295206571, + "grad_norm": 1.6043152238301266, + "learning_rate": 4.241873703508101e-06, + "loss": 0.7084, + "step": 22985 + }, + { + "epoch": 0.7044869437293122, + "grad_norm": 0.6841017630877004, + "learning_rate": 4.24106217112597e-06, + "loss": 0.535, + "step": 22986 + }, + { + "epoch": 0.7045175922520535, + "grad_norm": 1.5954500917987064, + "learning_rate": 4.240250695488126e-06, + "loss": 0.5694, + "step": 22987 + }, + { + "epoch": 0.7045482407747946, + "grad_norm": 0.6798229228573557, + "learning_rate": 4.239439276602559e-06, + "loss": 0.5545, + "step": 22988 + }, + { + "epoch": 0.7045788892975359, + "grad_norm": 1.512009927616403, + "learning_rate": 4.238627914477278e-06, + "loss": 0.6085, + "step": 22989 + }, + { + "epoch": 0.704609537820277, + "grad_norm": 1.8840341793343665, + "learning_rate": 4.237816609120271e-06, + "loss": 0.6449, + "step": 22990 + }, + { + "epoch": 0.7046401863430183, + "grad_norm": 1.5579373806238095, + "learning_rate": 4.237005360539526e-06, + "loss": 0.6097, + "step": 22991 + }, + { + "epoch": 0.7046708348657594, + "grad_norm": 1.7199124950408817, + "learning_rate": 4.236194168743043e-06, + "loss": 0.5302, + "step": 22992 + }, + { + "epoch": 0.7047014833885007, + "grad_norm": 1.6602310184568567, + "learning_rate": 4.235383033738813e-06, + "loss": 0.6361, + "step": 22993 + }, + { + "epoch": 0.7047321319112418, + "grad_norm": 1.5561786140298683, + "learning_rate": 4.234571955534833e-06, + "loss": 0.6324, + "step": 22994 + }, + { + "epoch": 0.7047627804339831, + "grad_norm": 1.441354030656879, + "learning_rate": 4.233760934139086e-06, + "loss": 0.5906, + "step": 22995 + }, + { + "epoch": 0.7047934289567243, + "grad_norm": 1.8411890640557393, + "learning_rate": 4.232949969559569e-06, + "loss": 0.658, + "step": 22996 + }, + { + "epoch": 0.7048240774794655, + "grad_norm": 0.6845509571159931, + "learning_rate": 4.2321390618042745e-06, + "loss": 0.5507, + "step": 22997 + }, + { + "epoch": 0.7048547260022067, + "grad_norm": 1.7003018359606437, + "learning_rate": 4.2313282108811905e-06, + "loss": 0.6538, + "step": 22998 + }, + { + "epoch": 0.7048853745249479, + "grad_norm": 1.5338729750423712, + "learning_rate": 4.230517416798297e-06, + "loss": 0.6327, + "step": 22999 + }, + { + "epoch": 0.7049160230476891, + "grad_norm": 1.514383754671911, + "learning_rate": 4.2297066795636e-06, + "loss": 0.5864, + "step": 23000 + }, + { + "epoch": 0.7049466715704303, + "grad_norm": 1.6139735551792744, + "learning_rate": 4.228895999185076e-06, + "loss": 0.5884, + "step": 23001 + }, + { + "epoch": 0.7049773200931715, + "grad_norm": 1.5256046665846994, + "learning_rate": 4.228085375670718e-06, + "loss": 0.6533, + "step": 23002 + }, + { + "epoch": 0.7050079686159128, + "grad_norm": 1.7107394784221857, + "learning_rate": 4.22727480902851e-06, + "loss": 0.6113, + "step": 23003 + }, + { + "epoch": 0.7050386171386539, + "grad_norm": 1.5232078304092207, + "learning_rate": 4.22646429926644e-06, + "loss": 0.6695, + "step": 23004 + }, + { + "epoch": 0.7050692656613952, + "grad_norm": 1.7091926197373442, + "learning_rate": 4.225653846392497e-06, + "loss": 0.5831, + "step": 23005 + }, + { + "epoch": 0.7050999141841363, + "grad_norm": 1.441843535829697, + "learning_rate": 4.22484345041466e-06, + "loss": 0.6157, + "step": 23006 + }, + { + "epoch": 0.7051305627068776, + "grad_norm": 1.7281500069620532, + "learning_rate": 4.224033111340921e-06, + "loss": 0.6269, + "step": 23007 + }, + { + "epoch": 0.7051612112296187, + "grad_norm": 1.546407439866078, + "learning_rate": 4.223222829179263e-06, + "loss": 0.6869, + "step": 23008 + }, + { + "epoch": 0.70519185975236, + "grad_norm": 1.5498299623880183, + "learning_rate": 4.2224126039376685e-06, + "loss": 0.5778, + "step": 23009 + }, + { + "epoch": 0.7052225082751011, + "grad_norm": 1.5241172471332702, + "learning_rate": 4.221602435624115e-06, + "loss": 0.7274, + "step": 23010 + }, + { + "epoch": 0.7052531567978424, + "grad_norm": 1.758783825546647, + "learning_rate": 4.2207923242465975e-06, + "loss": 0.5933, + "step": 23011 + }, + { + "epoch": 0.7052838053205835, + "grad_norm": 0.67402882071573, + "learning_rate": 4.2199822698130875e-06, + "loss": 0.5746, + "step": 23012 + }, + { + "epoch": 0.7053144538433248, + "grad_norm": 1.7596446126603775, + "learning_rate": 4.2191722723315765e-06, + "loss": 0.6741, + "step": 23013 + }, + { + "epoch": 0.705345102366066, + "grad_norm": 0.690306080083462, + "learning_rate": 4.218362331810035e-06, + "loss": 0.5331, + "step": 23014 + }, + { + "epoch": 0.7053757508888071, + "grad_norm": 1.4577493121483431, + "learning_rate": 4.217552448256449e-06, + "loss": 0.6635, + "step": 23015 + }, + { + "epoch": 0.7054063994115484, + "grad_norm": 0.6767468177433776, + "learning_rate": 4.216742621678803e-06, + "loss": 0.5448, + "step": 23016 + }, + { + "epoch": 0.7054370479342895, + "grad_norm": 1.9092640532747471, + "learning_rate": 4.215932852085067e-06, + "loss": 0.6817, + "step": 23017 + }, + { + "epoch": 0.7054676964570308, + "grad_norm": 1.6679741844157872, + "learning_rate": 4.2151231394832245e-06, + "loss": 0.5503, + "step": 23018 + }, + { + "epoch": 0.7054983449797719, + "grad_norm": 1.464481965502814, + "learning_rate": 4.2143134838812585e-06, + "loss": 0.6362, + "step": 23019 + }, + { + "epoch": 0.7055289935025132, + "grad_norm": 1.6213188111527088, + "learning_rate": 4.2135038852871365e-06, + "loss": 0.748, + "step": 23020 + }, + { + "epoch": 0.7055596420252543, + "grad_norm": 0.6657795111293742, + "learning_rate": 4.212694343708846e-06, + "loss": 0.5494, + "step": 23021 + }, + { + "epoch": 0.7055902905479956, + "grad_norm": 0.6482565350553801, + "learning_rate": 4.211884859154356e-06, + "loss": 0.524, + "step": 23022 + }, + { + "epoch": 0.7056209390707368, + "grad_norm": 1.6602187635685488, + "learning_rate": 4.211075431631645e-06, + "loss": 0.5898, + "step": 23023 + }, + { + "epoch": 0.705651587593478, + "grad_norm": 0.654079516702696, + "learning_rate": 4.210266061148692e-06, + "loss": 0.5264, + "step": 23024 + }, + { + "epoch": 0.7056822361162192, + "grad_norm": 1.830189183466918, + "learning_rate": 4.209456747713465e-06, + "loss": 0.7468, + "step": 23025 + }, + { + "epoch": 0.7057128846389604, + "grad_norm": 1.7128572756570408, + "learning_rate": 4.208647491333944e-06, + "loss": 0.6169, + "step": 23026 + }, + { + "epoch": 0.7057435331617016, + "grad_norm": 1.7151138202332776, + "learning_rate": 4.207838292018103e-06, + "loss": 0.6924, + "step": 23027 + }, + { + "epoch": 0.7057741816844428, + "grad_norm": 1.59741462589548, + "learning_rate": 4.207029149773911e-06, + "loss": 0.6553, + "step": 23028 + }, + { + "epoch": 0.705804830207184, + "grad_norm": 1.6695682343968237, + "learning_rate": 4.206220064609341e-06, + "loss": 0.6229, + "step": 23029 + }, + { + "epoch": 0.7058354787299252, + "grad_norm": 1.5057779998946448, + "learning_rate": 4.205411036532372e-06, + "loss": 0.6919, + "step": 23030 + }, + { + "epoch": 0.7058661272526664, + "grad_norm": 1.496375086077642, + "learning_rate": 4.204602065550967e-06, + "loss": 0.6508, + "step": 23031 + }, + { + "epoch": 0.7058967757754077, + "grad_norm": 1.4917353248212906, + "learning_rate": 4.203793151673104e-06, + "loss": 0.7059, + "step": 23032 + }, + { + "epoch": 0.7059274242981488, + "grad_norm": 1.6882203297567315, + "learning_rate": 4.2029842949067465e-06, + "loss": 0.6746, + "step": 23033 + }, + { + "epoch": 0.7059580728208901, + "grad_norm": 1.348626942780525, + "learning_rate": 4.202175495259868e-06, + "loss": 0.5687, + "step": 23034 + }, + { + "epoch": 0.7059887213436312, + "grad_norm": 1.6285119072228704, + "learning_rate": 4.201366752740441e-06, + "loss": 0.6291, + "step": 23035 + }, + { + "epoch": 0.7060193698663725, + "grad_norm": 0.6602519600376697, + "learning_rate": 4.200558067356429e-06, + "loss": 0.5302, + "step": 23036 + }, + { + "epoch": 0.7060500183891136, + "grad_norm": 0.6634373972331943, + "learning_rate": 4.199749439115801e-06, + "loss": 0.5383, + "step": 23037 + }, + { + "epoch": 0.7060806669118549, + "grad_norm": 0.6659463075532261, + "learning_rate": 4.19894086802653e-06, + "loss": 0.5435, + "step": 23038 + }, + { + "epoch": 0.706111315434596, + "grad_norm": 1.9366900514434486, + "learning_rate": 4.198132354096574e-06, + "loss": 0.8291, + "step": 23039 + }, + { + "epoch": 0.7061419639573373, + "grad_norm": 1.7034644271465147, + "learning_rate": 4.197323897333906e-06, + "loss": 0.6619, + "step": 23040 + }, + { + "epoch": 0.7061726124800785, + "grad_norm": 1.6459547851416976, + "learning_rate": 4.196515497746493e-06, + "loss": 0.5917, + "step": 23041 + }, + { + "epoch": 0.7062032610028197, + "grad_norm": 1.6211345775142185, + "learning_rate": 4.195707155342294e-06, + "loss": 0.7052, + "step": 23042 + }, + { + "epoch": 0.7062339095255609, + "grad_norm": 1.7622388613803195, + "learning_rate": 4.1948988701292816e-06, + "loss": 0.6721, + "step": 23043 + }, + { + "epoch": 0.7062645580483021, + "grad_norm": 1.5231876928403063, + "learning_rate": 4.1940906421154116e-06, + "loss": 0.6341, + "step": 23044 + }, + { + "epoch": 0.7062952065710433, + "grad_norm": 0.6409889422506354, + "learning_rate": 4.193282471308653e-06, + "loss": 0.5223, + "step": 23045 + }, + { + "epoch": 0.7063258550937844, + "grad_norm": 1.6568907390717509, + "learning_rate": 4.19247435771697e-06, + "loss": 0.682, + "step": 23046 + }, + { + "epoch": 0.7063565036165257, + "grad_norm": 1.5426904708063724, + "learning_rate": 4.191666301348322e-06, + "loss": 0.6845, + "step": 23047 + }, + { + "epoch": 0.7063871521392668, + "grad_norm": 1.547211020075081, + "learning_rate": 4.1908583022106695e-06, + "loss": 0.653, + "step": 23048 + }, + { + "epoch": 0.7064178006620081, + "grad_norm": 1.6381394057434486, + "learning_rate": 4.190050360311981e-06, + "loss": 0.6157, + "step": 23049 + }, + { + "epoch": 0.7064484491847492, + "grad_norm": 1.6329274687183373, + "learning_rate": 4.18924247566021e-06, + "loss": 0.7421, + "step": 23050 + }, + { + "epoch": 0.7064790977074905, + "grad_norm": 1.6208846651230755, + "learning_rate": 4.188434648263319e-06, + "loss": 0.6833, + "step": 23051 + }, + { + "epoch": 0.7065097462302317, + "grad_norm": 1.7344093669357656, + "learning_rate": 4.1876268781292714e-06, + "loss": 0.6445, + "step": 23052 + }, + { + "epoch": 0.7065403947529729, + "grad_norm": 1.6542930664188702, + "learning_rate": 4.18681916526602e-06, + "loss": 0.7095, + "step": 23053 + }, + { + "epoch": 0.7065710432757141, + "grad_norm": 0.672023508676128, + "learning_rate": 4.1860115096815316e-06, + "loss": 0.5479, + "step": 23054 + }, + { + "epoch": 0.7066016917984553, + "grad_norm": 1.7143574412141305, + "learning_rate": 4.185203911383755e-06, + "loss": 0.7093, + "step": 23055 + }, + { + "epoch": 0.7066323403211965, + "grad_norm": 1.602505660759351, + "learning_rate": 4.184396370380651e-06, + "loss": 0.6302, + "step": 23056 + }, + { + "epoch": 0.7066629888439377, + "grad_norm": 0.6573580151657163, + "learning_rate": 4.1835888866801825e-06, + "loss": 0.5489, + "step": 23057 + }, + { + "epoch": 0.7066936373666789, + "grad_norm": 1.7000159718521828, + "learning_rate": 4.182781460290297e-06, + "loss": 0.6379, + "step": 23058 + }, + { + "epoch": 0.7067242858894202, + "grad_norm": 1.900711410923301, + "learning_rate": 4.181974091218953e-06, + "loss": 0.7013, + "step": 23059 + }, + { + "epoch": 0.7067549344121613, + "grad_norm": 1.7038460720079829, + "learning_rate": 4.181166779474112e-06, + "loss": 0.7139, + "step": 23060 + }, + { + "epoch": 0.7067855829349026, + "grad_norm": 0.6851846356907274, + "learning_rate": 4.18035952506372e-06, + "loss": 0.5364, + "step": 23061 + }, + { + "epoch": 0.7068162314576437, + "grad_norm": 1.6141186750703715, + "learning_rate": 4.179552327995734e-06, + "loss": 0.6301, + "step": 23062 + }, + { + "epoch": 0.706846879980385, + "grad_norm": 1.7558479639637832, + "learning_rate": 4.178745188278112e-06, + "loss": 0.6991, + "step": 23063 + }, + { + "epoch": 0.7068775285031261, + "grad_norm": 1.6485638946295775, + "learning_rate": 4.1779381059187986e-06, + "loss": 0.6984, + "step": 23064 + }, + { + "epoch": 0.7069081770258674, + "grad_norm": 1.5681680363707204, + "learning_rate": 4.177131080925755e-06, + "loss": 0.5946, + "step": 23065 + }, + { + "epoch": 0.7069388255486085, + "grad_norm": 1.870837203457495, + "learning_rate": 4.176324113306924e-06, + "loss": 0.8489, + "step": 23066 + }, + { + "epoch": 0.7069694740713498, + "grad_norm": 0.6744361715082079, + "learning_rate": 4.175517203070263e-06, + "loss": 0.5461, + "step": 23067 + }, + { + "epoch": 0.707000122594091, + "grad_norm": 1.599977087182212, + "learning_rate": 4.174710350223725e-06, + "loss": 0.6122, + "step": 23068 + }, + { + "epoch": 0.7070307711168322, + "grad_norm": 0.6451173592526478, + "learning_rate": 4.173903554775252e-06, + "loss": 0.5407, + "step": 23069 + }, + { + "epoch": 0.7070614196395734, + "grad_norm": 1.8787519922124096, + "learning_rate": 4.173096816732798e-06, + "loss": 0.6273, + "step": 23070 + }, + { + "epoch": 0.7070920681623146, + "grad_norm": 1.5559942507118245, + "learning_rate": 4.172290136104315e-06, + "loss": 0.5413, + "step": 23071 + }, + { + "epoch": 0.7071227166850558, + "grad_norm": 1.6919783317531574, + "learning_rate": 4.171483512897746e-06, + "loss": 0.6919, + "step": 23072 + }, + { + "epoch": 0.707153365207797, + "grad_norm": 1.7031500915337987, + "learning_rate": 4.170676947121045e-06, + "loss": 0.6923, + "step": 23073 + }, + { + "epoch": 0.7071840137305382, + "grad_norm": 1.5257874192647323, + "learning_rate": 4.169870438782148e-06, + "loss": 0.5778, + "step": 23074 + }, + { + "epoch": 0.7072146622532794, + "grad_norm": 0.6692921677393845, + "learning_rate": 4.169063987889015e-06, + "loss": 0.5286, + "step": 23075 + }, + { + "epoch": 0.7072453107760206, + "grad_norm": 1.596235459286198, + "learning_rate": 4.168257594449587e-06, + "loss": 0.5965, + "step": 23076 + }, + { + "epoch": 0.7072759592987617, + "grad_norm": 1.4889016501862062, + "learning_rate": 4.167451258471806e-06, + "loss": 0.6612, + "step": 23077 + }, + { + "epoch": 0.707306607821503, + "grad_norm": 1.6374478715611778, + "learning_rate": 4.166644979963621e-06, + "loss": 0.6467, + "step": 23078 + }, + { + "epoch": 0.7073372563442442, + "grad_norm": 1.6039058424642514, + "learning_rate": 4.165838758932978e-06, + "loss": 0.7129, + "step": 23079 + }, + { + "epoch": 0.7073679048669854, + "grad_norm": 1.4227392162897998, + "learning_rate": 4.165032595387815e-06, + "loss": 0.6276, + "step": 23080 + }, + { + "epoch": 0.7073985533897266, + "grad_norm": 1.4814560745094365, + "learning_rate": 4.164226489336079e-06, + "loss": 0.5944, + "step": 23081 + }, + { + "epoch": 0.7074292019124678, + "grad_norm": 1.6191615274563063, + "learning_rate": 4.163420440785712e-06, + "loss": 0.7446, + "step": 23082 + }, + { + "epoch": 0.707459850435209, + "grad_norm": 1.7091375066468788, + "learning_rate": 4.1626144497446605e-06, + "loss": 0.6705, + "step": 23083 + }, + { + "epoch": 0.7074904989579502, + "grad_norm": 1.8238992780558574, + "learning_rate": 4.1618085162208635e-06, + "loss": 0.662, + "step": 23084 + }, + { + "epoch": 0.7075211474806914, + "grad_norm": 0.6346646813391285, + "learning_rate": 4.161002640222253e-06, + "loss": 0.521, + "step": 23085 + }, + { + "epoch": 0.7075517960034327, + "grad_norm": 1.6514301453267386, + "learning_rate": 4.160196821756785e-06, + "loss": 0.6346, + "step": 23086 + }, + { + "epoch": 0.7075824445261738, + "grad_norm": 1.672309782276578, + "learning_rate": 4.159391060832391e-06, + "loss": 0.668, + "step": 23087 + }, + { + "epoch": 0.7076130930489151, + "grad_norm": 1.6903925485877875, + "learning_rate": 4.158585357457008e-06, + "loss": 0.5464, + "step": 23088 + }, + { + "epoch": 0.7076437415716562, + "grad_norm": 0.6911445537161874, + "learning_rate": 4.157779711638577e-06, + "loss": 0.5441, + "step": 23089 + }, + { + "epoch": 0.7076743900943975, + "grad_norm": 1.5760332068643106, + "learning_rate": 4.156974123385042e-06, + "loss": 0.727, + "step": 23090 + }, + { + "epoch": 0.7077050386171386, + "grad_norm": 1.7129697591489061, + "learning_rate": 4.156168592704333e-06, + "loss": 0.7041, + "step": 23091 + }, + { + "epoch": 0.7077356871398799, + "grad_norm": 1.616635270487931, + "learning_rate": 4.15536311960439e-06, + "loss": 0.7103, + "step": 23092 + }, + { + "epoch": 0.707766335662621, + "grad_norm": 1.6771071829444018, + "learning_rate": 4.154557704093148e-06, + "loss": 0.7024, + "step": 23093 + }, + { + "epoch": 0.7077969841853623, + "grad_norm": 1.6102074076918858, + "learning_rate": 4.15375234617855e-06, + "loss": 0.6255, + "step": 23094 + }, + { + "epoch": 0.7078276327081034, + "grad_norm": 1.5916031555542325, + "learning_rate": 4.152947045868525e-06, + "loss": 0.7004, + "step": 23095 + }, + { + "epoch": 0.7078582812308447, + "grad_norm": 1.5417230048577697, + "learning_rate": 4.152141803171001e-06, + "loss": 0.6759, + "step": 23096 + }, + { + "epoch": 0.7078889297535859, + "grad_norm": 1.6760122615871176, + "learning_rate": 4.151336618093928e-06, + "loss": 0.6215, + "step": 23097 + }, + { + "epoch": 0.7079195782763271, + "grad_norm": 1.4694052079334448, + "learning_rate": 4.1505314906452324e-06, + "loss": 0.7431, + "step": 23098 + }, + { + "epoch": 0.7079502267990683, + "grad_norm": 0.6995217826347896, + "learning_rate": 4.1497264208328426e-06, + "loss": 0.5624, + "step": 23099 + }, + { + "epoch": 0.7079808753218095, + "grad_norm": 1.702578082948326, + "learning_rate": 4.1489214086646955e-06, + "loss": 0.6351, + "step": 23100 + }, + { + "epoch": 0.7080115238445507, + "grad_norm": 1.5239932512567467, + "learning_rate": 4.148116454148722e-06, + "loss": 0.7054, + "step": 23101 + }, + { + "epoch": 0.7080421723672919, + "grad_norm": 1.471890404922721, + "learning_rate": 4.147311557292858e-06, + "loss": 0.513, + "step": 23102 + }, + { + "epoch": 0.7080728208900331, + "grad_norm": 1.7003673723060502, + "learning_rate": 4.146506718105028e-06, + "loss": 0.566, + "step": 23103 + }, + { + "epoch": 0.7081034694127744, + "grad_norm": 1.4177358508787246, + "learning_rate": 4.145701936593164e-06, + "loss": 0.6358, + "step": 23104 + }, + { + "epoch": 0.7081341179355155, + "grad_norm": 0.6948125609070313, + "learning_rate": 4.144897212765201e-06, + "loss": 0.5478, + "step": 23105 + }, + { + "epoch": 0.7081647664582568, + "grad_norm": 1.6711958711189463, + "learning_rate": 4.144092546629064e-06, + "loss": 0.7385, + "step": 23106 + }, + { + "epoch": 0.7081954149809979, + "grad_norm": 1.638628303905172, + "learning_rate": 4.143287938192677e-06, + "loss": 0.6273, + "step": 23107 + }, + { + "epoch": 0.7082260635037391, + "grad_norm": 1.6480664510457659, + "learning_rate": 4.142483387463972e-06, + "loss": 0.6281, + "step": 23108 + }, + { + "epoch": 0.7082567120264803, + "grad_norm": 1.8501102089344739, + "learning_rate": 4.141678894450879e-06, + "loss": 0.5498, + "step": 23109 + }, + { + "epoch": 0.7082873605492215, + "grad_norm": 1.673699154009327, + "learning_rate": 4.1408744591613244e-06, + "loss": 0.6619, + "step": 23110 + }, + { + "epoch": 0.7083180090719627, + "grad_norm": 0.6668644622072734, + "learning_rate": 4.14007008160323e-06, + "loss": 0.5585, + "step": 23111 + }, + { + "epoch": 0.7083486575947039, + "grad_norm": 1.5510586211252555, + "learning_rate": 4.1392657617845246e-06, + "loss": 0.6417, + "step": 23112 + }, + { + "epoch": 0.7083793061174452, + "grad_norm": 1.6395343134326594, + "learning_rate": 4.138461499713137e-06, + "loss": 0.671, + "step": 23113 + }, + { + "epoch": 0.7084099546401863, + "grad_norm": 0.6514845562245234, + "learning_rate": 4.137657295396984e-06, + "loss": 0.5206, + "step": 23114 + }, + { + "epoch": 0.7084406031629276, + "grad_norm": 1.534411173749718, + "learning_rate": 4.136853148843993e-06, + "loss": 0.6492, + "step": 23115 + }, + { + "epoch": 0.7084712516856687, + "grad_norm": 0.6854420757028686, + "learning_rate": 4.136049060062093e-06, + "loss": 0.5327, + "step": 23116 + }, + { + "epoch": 0.70850190020841, + "grad_norm": 0.6785062968319899, + "learning_rate": 4.1352450290592e-06, + "loss": 0.554, + "step": 23117 + }, + { + "epoch": 0.7085325487311511, + "grad_norm": 1.6673742167972752, + "learning_rate": 4.134441055843237e-06, + "loss": 0.6323, + "step": 23118 + }, + { + "epoch": 0.7085631972538924, + "grad_norm": 0.6559024100323361, + "learning_rate": 4.133637140422127e-06, + "loss": 0.5341, + "step": 23119 + }, + { + "epoch": 0.7085938457766335, + "grad_norm": 1.7600685479375844, + "learning_rate": 4.132833282803788e-06, + "loss": 0.7587, + "step": 23120 + }, + { + "epoch": 0.7086244942993748, + "grad_norm": 1.6296350686429526, + "learning_rate": 4.13202948299615e-06, + "loss": 0.7338, + "step": 23121 + }, + { + "epoch": 0.708655142822116, + "grad_norm": 1.769102044069149, + "learning_rate": 4.131225741007124e-06, + "loss": 0.6131, + "step": 23122 + }, + { + "epoch": 0.7086857913448572, + "grad_norm": 1.9435760605148629, + "learning_rate": 4.130422056844631e-06, + "loss": 0.6245, + "step": 23123 + }, + { + "epoch": 0.7087164398675984, + "grad_norm": 0.6701887005741443, + "learning_rate": 4.129618430516596e-06, + "loss": 0.5331, + "step": 23124 + }, + { + "epoch": 0.7087470883903396, + "grad_norm": 1.5017339430317131, + "learning_rate": 4.128814862030931e-06, + "loss": 0.5987, + "step": 23125 + }, + { + "epoch": 0.7087777369130808, + "grad_norm": 0.6598179823731196, + "learning_rate": 4.128011351395549e-06, + "loss": 0.5242, + "step": 23126 + }, + { + "epoch": 0.708808385435822, + "grad_norm": 1.7041312810283422, + "learning_rate": 4.12720789861838e-06, + "loss": 0.6767, + "step": 23127 + }, + { + "epoch": 0.7088390339585632, + "grad_norm": 0.625868911694995, + "learning_rate": 4.126404503707332e-06, + "loss": 0.4884, + "step": 23128 + }, + { + "epoch": 0.7088696824813044, + "grad_norm": 1.693004512364738, + "learning_rate": 4.125601166670327e-06, + "loss": 0.6166, + "step": 23129 + }, + { + "epoch": 0.7089003310040456, + "grad_norm": 1.588632458829587, + "learning_rate": 4.124797887515272e-06, + "loss": 0.7084, + "step": 23130 + }, + { + "epoch": 0.7089309795267869, + "grad_norm": 1.7906196170097683, + "learning_rate": 4.123994666250086e-06, + "loss": 0.718, + "step": 23131 + }, + { + "epoch": 0.708961628049528, + "grad_norm": 2.001377576388039, + "learning_rate": 4.123191502882689e-06, + "loss": 0.7559, + "step": 23132 + }, + { + "epoch": 0.7089922765722693, + "grad_norm": 0.6841438985049884, + "learning_rate": 4.122388397420985e-06, + "loss": 0.5315, + "step": 23133 + }, + { + "epoch": 0.7090229250950104, + "grad_norm": 1.6737566066467617, + "learning_rate": 4.1215853498728935e-06, + "loss": 0.7382, + "step": 23134 + }, + { + "epoch": 0.7090535736177517, + "grad_norm": 1.4401626202800553, + "learning_rate": 4.120782360246328e-06, + "loss": 0.667, + "step": 23135 + }, + { + "epoch": 0.7090842221404928, + "grad_norm": 1.7615110395501443, + "learning_rate": 4.119979428549199e-06, + "loss": 0.595, + "step": 23136 + }, + { + "epoch": 0.7091148706632341, + "grad_norm": 1.6371397731816728, + "learning_rate": 4.119176554789409e-06, + "loss": 0.6106, + "step": 23137 + }, + { + "epoch": 0.7091455191859752, + "grad_norm": 0.6552531507372196, + "learning_rate": 4.1183737389748854e-06, + "loss": 0.5277, + "step": 23138 + }, + { + "epoch": 0.7091761677087164, + "grad_norm": 1.5208284239088876, + "learning_rate": 4.117570981113526e-06, + "loss": 0.6338, + "step": 23139 + }, + { + "epoch": 0.7092068162314576, + "grad_norm": 0.6696734875258702, + "learning_rate": 4.116768281213248e-06, + "loss": 0.5324, + "step": 23140 + }, + { + "epoch": 0.7092374647541988, + "grad_norm": 1.5309836672402157, + "learning_rate": 4.115965639281955e-06, + "loss": 0.6922, + "step": 23141 + }, + { + "epoch": 0.7092681132769401, + "grad_norm": 1.7315115194186639, + "learning_rate": 4.1151630553275565e-06, + "loss": 0.6068, + "step": 23142 + }, + { + "epoch": 0.7092987617996812, + "grad_norm": 1.677126686933142, + "learning_rate": 4.1143605293579665e-06, + "loss": 0.7341, + "step": 23143 + }, + { + "epoch": 0.7093294103224225, + "grad_norm": 1.5720890208920344, + "learning_rate": 4.113558061381085e-06, + "loss": 0.6198, + "step": 23144 + }, + { + "epoch": 0.7093600588451636, + "grad_norm": 1.4445927526498052, + "learning_rate": 4.112755651404822e-06, + "loss": 0.562, + "step": 23145 + }, + { + "epoch": 0.7093907073679049, + "grad_norm": 1.6247494655863204, + "learning_rate": 4.111953299437087e-06, + "loss": 0.5557, + "step": 23146 + }, + { + "epoch": 0.709421355890646, + "grad_norm": 1.4753648202070409, + "learning_rate": 4.111151005485778e-06, + "loss": 0.6831, + "step": 23147 + }, + { + "epoch": 0.7094520044133873, + "grad_norm": 1.6329292653980907, + "learning_rate": 4.110348769558806e-06, + "loss": 0.659, + "step": 23148 + }, + { + "epoch": 0.7094826529361284, + "grad_norm": 1.6624896833349723, + "learning_rate": 4.109546591664078e-06, + "loss": 0.7314, + "step": 23149 + }, + { + "epoch": 0.7095133014588697, + "grad_norm": 1.8227359865226196, + "learning_rate": 4.108744471809492e-06, + "loss": 0.69, + "step": 23150 + }, + { + "epoch": 0.7095439499816109, + "grad_norm": 1.6105822321517755, + "learning_rate": 4.1079424100029566e-06, + "loss": 0.7459, + "step": 23151 + }, + { + "epoch": 0.7095745985043521, + "grad_norm": 1.6527849756400572, + "learning_rate": 4.107140406252369e-06, + "loss": 0.647, + "step": 23152 + }, + { + "epoch": 0.7096052470270933, + "grad_norm": 1.6473521652688654, + "learning_rate": 4.106338460565634e-06, + "loss": 0.6556, + "step": 23153 + }, + { + "epoch": 0.7096358955498345, + "grad_norm": 0.6706126341952559, + "learning_rate": 4.105536572950658e-06, + "loss": 0.5355, + "step": 23154 + }, + { + "epoch": 0.7096665440725757, + "grad_norm": 1.5313545646700437, + "learning_rate": 4.104734743415335e-06, + "loss": 0.6643, + "step": 23155 + }, + { + "epoch": 0.7096971925953169, + "grad_norm": 1.517255593734103, + "learning_rate": 4.103932971967569e-06, + "loss": 0.5983, + "step": 23156 + }, + { + "epoch": 0.7097278411180581, + "grad_norm": 0.6638229987117047, + "learning_rate": 4.103131258615263e-06, + "loss": 0.5211, + "step": 23157 + }, + { + "epoch": 0.7097584896407994, + "grad_norm": 1.5022644436260426, + "learning_rate": 4.102329603366311e-06, + "loss": 0.6233, + "step": 23158 + }, + { + "epoch": 0.7097891381635405, + "grad_norm": 1.3608147784560574, + "learning_rate": 4.1015280062286165e-06, + "loss": 0.4817, + "step": 23159 + }, + { + "epoch": 0.7098197866862818, + "grad_norm": 1.5796455915349665, + "learning_rate": 4.1007264672100734e-06, + "loss": 0.6079, + "step": 23160 + }, + { + "epoch": 0.7098504352090229, + "grad_norm": 1.8030490703970925, + "learning_rate": 4.099924986318581e-06, + "loss": 0.7215, + "step": 23161 + }, + { + "epoch": 0.7098810837317642, + "grad_norm": 1.8075339988616097, + "learning_rate": 4.099123563562042e-06, + "loss": 0.6279, + "step": 23162 + }, + { + "epoch": 0.7099117322545053, + "grad_norm": 1.6159360167034864, + "learning_rate": 4.098322198948344e-06, + "loss": 0.669, + "step": 23163 + }, + { + "epoch": 0.7099423807772466, + "grad_norm": 1.4513156863657366, + "learning_rate": 4.097520892485387e-06, + "loss": 0.5675, + "step": 23164 + }, + { + "epoch": 0.7099730292999877, + "grad_norm": 1.7376525878825475, + "learning_rate": 4.096719644181071e-06, + "loss": 0.6429, + "step": 23165 + }, + { + "epoch": 0.710003677822729, + "grad_norm": 0.6990587851542545, + "learning_rate": 4.095918454043283e-06, + "loss": 0.5544, + "step": 23166 + }, + { + "epoch": 0.7100343263454701, + "grad_norm": 1.7667346566869067, + "learning_rate": 4.09511732207992e-06, + "loss": 0.768, + "step": 23167 + }, + { + "epoch": 0.7100649748682114, + "grad_norm": 1.6218273928543676, + "learning_rate": 4.094316248298882e-06, + "loss": 0.6853, + "step": 23168 + }, + { + "epoch": 0.7100956233909526, + "grad_norm": 2.1270453661173825, + "learning_rate": 4.093515232708053e-06, + "loss": 0.7947, + "step": 23169 + }, + { + "epoch": 0.7101262719136937, + "grad_norm": 1.6280283056033342, + "learning_rate": 4.0927142753153334e-06, + "loss": 0.6858, + "step": 23170 + }, + { + "epoch": 0.710156920436435, + "grad_norm": 1.715853396466766, + "learning_rate": 4.0919133761286075e-06, + "loss": 0.6964, + "step": 23171 + }, + { + "epoch": 0.7101875689591761, + "grad_norm": 1.5964079986248054, + "learning_rate": 4.091112535155771e-06, + "loss": 0.6795, + "step": 23172 + }, + { + "epoch": 0.7102182174819174, + "grad_norm": 1.4862266361921976, + "learning_rate": 4.090311752404719e-06, + "loss": 0.6056, + "step": 23173 + }, + { + "epoch": 0.7102488660046585, + "grad_norm": 1.6668403416713395, + "learning_rate": 4.0895110278833315e-06, + "loss": 0.6524, + "step": 23174 + }, + { + "epoch": 0.7102795145273998, + "grad_norm": 1.682788813339699, + "learning_rate": 4.088710361599506e-06, + "loss": 0.5484, + "step": 23175 + }, + { + "epoch": 0.7103101630501409, + "grad_norm": 1.4681045485270119, + "learning_rate": 4.0879097535611335e-06, + "loss": 0.6608, + "step": 23176 + }, + { + "epoch": 0.7103408115728822, + "grad_norm": 1.7739109302828102, + "learning_rate": 4.087109203776094e-06, + "loss": 0.6607, + "step": 23177 + }, + { + "epoch": 0.7103714600956234, + "grad_norm": 1.8592703897484706, + "learning_rate": 4.0863087122522816e-06, + "loss": 0.7641, + "step": 23178 + }, + { + "epoch": 0.7104021086183646, + "grad_norm": 1.7072727497298703, + "learning_rate": 4.085508278997585e-06, + "loss": 0.6834, + "step": 23179 + }, + { + "epoch": 0.7104327571411058, + "grad_norm": 1.6209207884442185, + "learning_rate": 4.084707904019886e-06, + "loss": 0.5685, + "step": 23180 + }, + { + "epoch": 0.710463405663847, + "grad_norm": 1.4909874965155292, + "learning_rate": 4.083907587327076e-06, + "loss": 0.6375, + "step": 23181 + }, + { + "epoch": 0.7104940541865882, + "grad_norm": 0.6791079126835473, + "learning_rate": 4.083107328927032e-06, + "loss": 0.5388, + "step": 23182 + }, + { + "epoch": 0.7105247027093294, + "grad_norm": 1.6764956266129802, + "learning_rate": 4.082307128827653e-06, + "loss": 0.6749, + "step": 23183 + }, + { + "epoch": 0.7105553512320706, + "grad_norm": 1.6027292581947712, + "learning_rate": 4.081506987036815e-06, + "loss": 0.7412, + "step": 23184 + }, + { + "epoch": 0.7105859997548118, + "grad_norm": 1.499414312663558, + "learning_rate": 4.080706903562399e-06, + "loss": 0.5947, + "step": 23185 + }, + { + "epoch": 0.710616648277553, + "grad_norm": 1.6413204721735057, + "learning_rate": 4.079906878412293e-06, + "loss": 0.633, + "step": 23186 + }, + { + "epoch": 0.7106472968002943, + "grad_norm": 1.6329546744915096, + "learning_rate": 4.079106911594384e-06, + "loss": 0.718, + "step": 23187 + }, + { + "epoch": 0.7106779453230354, + "grad_norm": 1.5227421279713178, + "learning_rate": 4.078307003116544e-06, + "loss": 0.6017, + "step": 23188 + }, + { + "epoch": 0.7107085938457767, + "grad_norm": 1.430059506090496, + "learning_rate": 4.077507152986661e-06, + "loss": 0.6136, + "step": 23189 + }, + { + "epoch": 0.7107392423685178, + "grad_norm": 1.6079981230467628, + "learning_rate": 4.07670736121262e-06, + "loss": 0.709, + "step": 23190 + }, + { + "epoch": 0.7107698908912591, + "grad_norm": 1.7497229667835796, + "learning_rate": 4.075907627802291e-06, + "loss": 0.7071, + "step": 23191 + }, + { + "epoch": 0.7108005394140002, + "grad_norm": 1.6384194041167408, + "learning_rate": 4.075107952763565e-06, + "loss": 0.7595, + "step": 23192 + }, + { + "epoch": 0.7108311879367415, + "grad_norm": 1.6977419875694566, + "learning_rate": 4.0743083361043086e-06, + "loss": 0.6409, + "step": 23193 + }, + { + "epoch": 0.7108618364594826, + "grad_norm": 1.669363412404883, + "learning_rate": 4.0735087778324166e-06, + "loss": 0.6615, + "step": 23194 + }, + { + "epoch": 0.7108924849822239, + "grad_norm": 1.634306387446793, + "learning_rate": 4.072709277955758e-06, + "loss": 0.625, + "step": 23195 + }, + { + "epoch": 0.710923133504965, + "grad_norm": 0.628673800012635, + "learning_rate": 4.071909836482209e-06, + "loss": 0.5086, + "step": 23196 + }, + { + "epoch": 0.7109537820277063, + "grad_norm": 1.7012761555794174, + "learning_rate": 4.071110453419648e-06, + "loss": 0.7279, + "step": 23197 + }, + { + "epoch": 0.7109844305504475, + "grad_norm": 1.6231813377916127, + "learning_rate": 4.070311128775955e-06, + "loss": 0.6875, + "step": 23198 + }, + { + "epoch": 0.7110150790731887, + "grad_norm": 1.6217091169350188, + "learning_rate": 4.0695118625590026e-06, + "loss": 0.5567, + "step": 23199 + }, + { + "epoch": 0.7110457275959299, + "grad_norm": 1.6813092605980167, + "learning_rate": 4.068712654776666e-06, + "loss": 0.6528, + "step": 23200 + }, + { + "epoch": 0.711076376118671, + "grad_norm": 0.6469544537573139, + "learning_rate": 4.0679135054368215e-06, + "loss": 0.5179, + "step": 23201 + }, + { + "epoch": 0.7111070246414123, + "grad_norm": 0.6818024151147924, + "learning_rate": 4.067114414547346e-06, + "loss": 0.5094, + "step": 23202 + }, + { + "epoch": 0.7111376731641534, + "grad_norm": 1.766069798816308, + "learning_rate": 4.066315382116111e-06, + "loss": 0.574, + "step": 23203 + }, + { + "epoch": 0.7111683216868947, + "grad_norm": 1.7325793855439806, + "learning_rate": 4.065516408150983e-06, + "loss": 0.6809, + "step": 23204 + }, + { + "epoch": 0.7111989702096359, + "grad_norm": 0.6566460153050562, + "learning_rate": 4.0647174926598435e-06, + "loss": 0.5437, + "step": 23205 + }, + { + "epoch": 0.7112296187323771, + "grad_norm": 0.6493449252609813, + "learning_rate": 4.063918635650562e-06, + "loss": 0.5564, + "step": 23206 + }, + { + "epoch": 0.7112602672551183, + "grad_norm": 1.6297484930671766, + "learning_rate": 4.063119837131008e-06, + "loss": 0.6747, + "step": 23207 + }, + { + "epoch": 0.7112909157778595, + "grad_norm": 1.451817358204021, + "learning_rate": 4.062321097109051e-06, + "loss": 0.7128, + "step": 23208 + }, + { + "epoch": 0.7113215643006007, + "grad_norm": 1.7122618768205267, + "learning_rate": 4.0615224155925644e-06, + "loss": 0.7177, + "step": 23209 + }, + { + "epoch": 0.7113522128233419, + "grad_norm": 1.5575478442091093, + "learning_rate": 4.06072379258942e-06, + "loss": 0.5852, + "step": 23210 + }, + { + "epoch": 0.7113828613460831, + "grad_norm": 1.7013430648399643, + "learning_rate": 4.059925228107484e-06, + "loss": 0.6486, + "step": 23211 + }, + { + "epoch": 0.7114135098688243, + "grad_norm": 0.6695974696671525, + "learning_rate": 4.059126722154618e-06, + "loss": 0.5296, + "step": 23212 + }, + { + "epoch": 0.7114441583915655, + "grad_norm": 1.706877184075491, + "learning_rate": 4.058328274738703e-06, + "loss": 0.6138, + "step": 23213 + }, + { + "epoch": 0.7114748069143068, + "grad_norm": 1.5508553065357151, + "learning_rate": 4.057529885867599e-06, + "loss": 0.6392, + "step": 23214 + }, + { + "epoch": 0.7115054554370479, + "grad_norm": 1.6484253291701134, + "learning_rate": 4.056731555549171e-06, + "loss": 0.6366, + "step": 23215 + }, + { + "epoch": 0.7115361039597892, + "grad_norm": 0.6535866152980047, + "learning_rate": 4.055933283791288e-06, + "loss": 0.5482, + "step": 23216 + }, + { + "epoch": 0.7115667524825303, + "grad_norm": 1.417507172412513, + "learning_rate": 4.055135070601818e-06, + "loss": 0.5835, + "step": 23217 + }, + { + "epoch": 0.7115974010052716, + "grad_norm": 2.098867506288439, + "learning_rate": 4.054336915988619e-06, + "loss": 0.6176, + "step": 23218 + }, + { + "epoch": 0.7116280495280127, + "grad_norm": 1.659812585016732, + "learning_rate": 4.05353881995956e-06, + "loss": 0.7377, + "step": 23219 + }, + { + "epoch": 0.711658698050754, + "grad_norm": 0.6594892043539164, + "learning_rate": 4.052740782522506e-06, + "loss": 0.518, + "step": 23220 + }, + { + "epoch": 0.7116893465734951, + "grad_norm": 1.5775858707097359, + "learning_rate": 4.051942803685321e-06, + "loss": 0.5707, + "step": 23221 + }, + { + "epoch": 0.7117199950962364, + "grad_norm": 1.6907526654284544, + "learning_rate": 4.051144883455865e-06, + "loss": 0.6118, + "step": 23222 + }, + { + "epoch": 0.7117506436189776, + "grad_norm": 1.6784900616004441, + "learning_rate": 4.050347021841995e-06, + "loss": 0.6737, + "step": 23223 + }, + { + "epoch": 0.7117812921417188, + "grad_norm": 1.5277098765170014, + "learning_rate": 4.049549218851584e-06, + "loss": 0.6752, + "step": 23224 + }, + { + "epoch": 0.71181194066446, + "grad_norm": 1.6032165863333931, + "learning_rate": 4.048751474492487e-06, + "loss": 0.6227, + "step": 23225 + }, + { + "epoch": 0.7118425891872012, + "grad_norm": 1.5759496556573906, + "learning_rate": 4.0479537887725615e-06, + "loss": 0.6247, + "step": 23226 + }, + { + "epoch": 0.7118732377099424, + "grad_norm": 1.6126481368839936, + "learning_rate": 4.047156161699669e-06, + "loss": 0.7219, + "step": 23227 + }, + { + "epoch": 0.7119038862326836, + "grad_norm": 1.5584910134371797, + "learning_rate": 4.0463585932816714e-06, + "loss": 0.655, + "step": 23228 + }, + { + "epoch": 0.7119345347554248, + "grad_norm": 1.8476521211670456, + "learning_rate": 4.0455610835264295e-06, + "loss": 0.6269, + "step": 23229 + }, + { + "epoch": 0.711965183278166, + "grad_norm": 2.0545376085413505, + "learning_rate": 4.044763632441793e-06, + "loss": 0.7928, + "step": 23230 + }, + { + "epoch": 0.7119958318009072, + "grad_norm": 1.6579220383793227, + "learning_rate": 4.043966240035624e-06, + "loss": 0.5601, + "step": 23231 + }, + { + "epoch": 0.7120264803236483, + "grad_norm": 1.5318225418174731, + "learning_rate": 4.043168906315784e-06, + "loss": 0.5998, + "step": 23232 + }, + { + "epoch": 0.7120571288463896, + "grad_norm": 1.795343208539919, + "learning_rate": 4.0423716312901255e-06, + "loss": 0.6673, + "step": 23233 + }, + { + "epoch": 0.7120877773691308, + "grad_norm": 1.9024831799893225, + "learning_rate": 4.041574414966495e-06, + "loss": 0.7664, + "step": 23234 + }, + { + "epoch": 0.712118425891872, + "grad_norm": 1.5680745567261505, + "learning_rate": 4.040777257352764e-06, + "loss": 0.737, + "step": 23235 + }, + { + "epoch": 0.7121490744146132, + "grad_norm": 1.6469321633232223, + "learning_rate": 4.039980158456776e-06, + "loss": 0.7033, + "step": 23236 + }, + { + "epoch": 0.7121797229373544, + "grad_norm": 1.5300553731968525, + "learning_rate": 4.039183118286391e-06, + "loss": 0.6326, + "step": 23237 + }, + { + "epoch": 0.7122103714600956, + "grad_norm": 1.8212024456441107, + "learning_rate": 4.038386136849458e-06, + "loss": 0.7147, + "step": 23238 + }, + { + "epoch": 0.7122410199828368, + "grad_norm": 1.6305565550688217, + "learning_rate": 4.037589214153831e-06, + "loss": 0.6505, + "step": 23239 + }, + { + "epoch": 0.712271668505578, + "grad_norm": 1.5267846851627744, + "learning_rate": 4.036792350207367e-06, + "loss": 0.7563, + "step": 23240 + }, + { + "epoch": 0.7123023170283193, + "grad_norm": 1.746447747804624, + "learning_rate": 4.03599554501791e-06, + "loss": 0.6932, + "step": 23241 + }, + { + "epoch": 0.7123329655510604, + "grad_norm": 1.5033548537769506, + "learning_rate": 4.0351987985933136e-06, + "loss": 0.6768, + "step": 23242 + }, + { + "epoch": 0.7123636140738017, + "grad_norm": 1.6651799649967496, + "learning_rate": 4.034402110941434e-06, + "loss": 0.5477, + "step": 23243 + }, + { + "epoch": 0.7123942625965428, + "grad_norm": 0.6505532065890096, + "learning_rate": 4.033605482070117e-06, + "loss": 0.5474, + "step": 23244 + }, + { + "epoch": 0.7124249111192841, + "grad_norm": 1.7462468716321096, + "learning_rate": 4.032808911987205e-06, + "loss": 0.7521, + "step": 23245 + }, + { + "epoch": 0.7124555596420252, + "grad_norm": 1.6285852796284217, + "learning_rate": 4.03201240070056e-06, + "loss": 0.6787, + "step": 23246 + }, + { + "epoch": 0.7124862081647665, + "grad_norm": 1.698733415602117, + "learning_rate": 4.0312159482180215e-06, + "loss": 0.7612, + "step": 23247 + }, + { + "epoch": 0.7125168566875076, + "grad_norm": 1.8802396070850775, + "learning_rate": 4.030419554547441e-06, + "loss": 0.7726, + "step": 23248 + }, + { + "epoch": 0.7125475052102489, + "grad_norm": 1.5829177310161255, + "learning_rate": 4.0296232196966626e-06, + "loss": 0.6467, + "step": 23249 + }, + { + "epoch": 0.71257815373299, + "grad_norm": 1.7793266463370094, + "learning_rate": 4.028826943673533e-06, + "loss": 0.7843, + "step": 23250 + }, + { + "epoch": 0.7126088022557313, + "grad_norm": 1.5403506530729265, + "learning_rate": 4.028030726485902e-06, + "loss": 0.6101, + "step": 23251 + }, + { + "epoch": 0.7126394507784725, + "grad_norm": 1.857412680580375, + "learning_rate": 4.0272345681416106e-06, + "loss": 0.699, + "step": 23252 + }, + { + "epoch": 0.7126700993012137, + "grad_norm": 1.6931205128604092, + "learning_rate": 4.026438468648504e-06, + "loss": 0.6619, + "step": 23253 + }, + { + "epoch": 0.7127007478239549, + "grad_norm": 1.6318934280322106, + "learning_rate": 4.025642428014431e-06, + "loss": 0.7072, + "step": 23254 + }, + { + "epoch": 0.7127313963466961, + "grad_norm": 1.7033594825573803, + "learning_rate": 4.024846446247228e-06, + "loss": 0.5552, + "step": 23255 + }, + { + "epoch": 0.7127620448694373, + "grad_norm": 0.6852533693617968, + "learning_rate": 4.024050523354747e-06, + "loss": 0.5232, + "step": 23256 + }, + { + "epoch": 0.7127926933921785, + "grad_norm": 1.786553863542299, + "learning_rate": 4.0232546593448195e-06, + "loss": 0.6509, + "step": 23257 + }, + { + "epoch": 0.7128233419149197, + "grad_norm": 1.781949726179714, + "learning_rate": 4.022458854225294e-06, + "loss": 0.7658, + "step": 23258 + }, + { + "epoch": 0.712853990437661, + "grad_norm": 1.4216290734526191, + "learning_rate": 4.0216631080040145e-06, + "loss": 0.6054, + "step": 23259 + }, + { + "epoch": 0.7128846389604021, + "grad_norm": 0.6796386870246807, + "learning_rate": 4.020867420688815e-06, + "loss": 0.5454, + "step": 23260 + }, + { + "epoch": 0.7129152874831434, + "grad_norm": 1.731043797508513, + "learning_rate": 4.020071792287538e-06, + "loss": 0.7021, + "step": 23261 + }, + { + "epoch": 0.7129459360058845, + "grad_norm": 1.6827011968354608, + "learning_rate": 4.019276222808027e-06, + "loss": 0.6777, + "step": 23262 + }, + { + "epoch": 0.7129765845286257, + "grad_norm": 1.5764611386517742, + "learning_rate": 4.018480712258114e-06, + "loss": 0.59, + "step": 23263 + }, + { + "epoch": 0.7130072330513669, + "grad_norm": 1.4738162794711855, + "learning_rate": 4.0176852606456415e-06, + "loss": 0.6353, + "step": 23264 + }, + { + "epoch": 0.7130378815741081, + "grad_norm": 1.6035725403717862, + "learning_rate": 4.0168898679784495e-06, + "loss": 0.6906, + "step": 23265 + }, + { + "epoch": 0.7130685300968493, + "grad_norm": 1.758288184811549, + "learning_rate": 4.016094534264369e-06, + "loss": 0.6696, + "step": 23266 + }, + { + "epoch": 0.7130991786195905, + "grad_norm": 1.686352314044357, + "learning_rate": 4.015299259511245e-06, + "loss": 0.6495, + "step": 23267 + }, + { + "epoch": 0.7131298271423318, + "grad_norm": 1.7043671083780494, + "learning_rate": 4.014504043726905e-06, + "loss": 0.5829, + "step": 23268 + }, + { + "epoch": 0.7131604756650729, + "grad_norm": 1.6078379231691808, + "learning_rate": 4.013708886919188e-06, + "loss": 0.6518, + "step": 23269 + }, + { + "epoch": 0.7131911241878142, + "grad_norm": 1.5952126647463727, + "learning_rate": 4.012913789095932e-06, + "loss": 0.67, + "step": 23270 + }, + { + "epoch": 0.7132217727105553, + "grad_norm": 1.728501413543024, + "learning_rate": 4.0121187502649635e-06, + "loss": 0.6107, + "step": 23271 + }, + { + "epoch": 0.7132524212332966, + "grad_norm": 0.6537315110329636, + "learning_rate": 4.011323770434123e-06, + "loss": 0.517, + "step": 23272 + }, + { + "epoch": 0.7132830697560377, + "grad_norm": 0.6687344235929701, + "learning_rate": 4.0105288496112434e-06, + "loss": 0.5237, + "step": 23273 + }, + { + "epoch": 0.713313718278779, + "grad_norm": 0.6534778685807101, + "learning_rate": 4.009733987804153e-06, + "loss": 0.5389, + "step": 23274 + }, + { + "epoch": 0.7133443668015201, + "grad_norm": 1.6762779087291408, + "learning_rate": 4.008939185020687e-06, + "loss": 0.5882, + "step": 23275 + }, + { + "epoch": 0.7133750153242614, + "grad_norm": 1.487656419216837, + "learning_rate": 4.008144441268678e-06, + "loss": 0.633, + "step": 23276 + }, + { + "epoch": 0.7134056638470025, + "grad_norm": 1.84887809953487, + "learning_rate": 4.007349756555953e-06, + "loss": 0.619, + "step": 23277 + }, + { + "epoch": 0.7134363123697438, + "grad_norm": 1.807196842815115, + "learning_rate": 4.006555130890347e-06, + "loss": 0.6324, + "step": 23278 + }, + { + "epoch": 0.713466960892485, + "grad_norm": 1.6122301847571658, + "learning_rate": 4.005760564279683e-06, + "loss": 0.6278, + "step": 23279 + }, + { + "epoch": 0.7134976094152262, + "grad_norm": 1.7819214941361428, + "learning_rate": 4.0049660567317936e-06, + "loss": 0.6941, + "step": 23280 + }, + { + "epoch": 0.7135282579379674, + "grad_norm": 1.5828158354149546, + "learning_rate": 4.004171608254512e-06, + "loss": 0.7651, + "step": 23281 + }, + { + "epoch": 0.7135589064607086, + "grad_norm": 1.564212001119544, + "learning_rate": 4.003377218855657e-06, + "loss": 0.5648, + "step": 23282 + }, + { + "epoch": 0.7135895549834498, + "grad_norm": 1.7345638925524267, + "learning_rate": 4.002582888543062e-06, + "loss": 0.7071, + "step": 23283 + }, + { + "epoch": 0.713620203506191, + "grad_norm": 1.6708908569743184, + "learning_rate": 4.001788617324554e-06, + "loss": 0.5533, + "step": 23284 + }, + { + "epoch": 0.7136508520289322, + "grad_norm": 1.4875229251284616, + "learning_rate": 4.000994405207956e-06, + "loss": 0.6976, + "step": 23285 + }, + { + "epoch": 0.7136815005516735, + "grad_norm": 1.6247200591843818, + "learning_rate": 4.000200252201094e-06, + "loss": 0.6488, + "step": 23286 + }, + { + "epoch": 0.7137121490744146, + "grad_norm": 1.9433930129479662, + "learning_rate": 3.999406158311797e-06, + "loss": 0.6404, + "step": 23287 + }, + { + "epoch": 0.7137427975971559, + "grad_norm": 1.7302085243443204, + "learning_rate": 3.998612123547884e-06, + "loss": 0.6307, + "step": 23288 + }, + { + "epoch": 0.713773446119897, + "grad_norm": 1.5902626142704175, + "learning_rate": 3.997818147917184e-06, + "loss": 0.5522, + "step": 23289 + }, + { + "epoch": 0.7138040946426383, + "grad_norm": 0.6714753509245044, + "learning_rate": 3.997024231427511e-06, + "loss": 0.5385, + "step": 23290 + }, + { + "epoch": 0.7138347431653794, + "grad_norm": 1.496405659811965, + "learning_rate": 3.9962303740867e-06, + "loss": 0.6929, + "step": 23291 + }, + { + "epoch": 0.7138653916881207, + "grad_norm": 1.6896378957523215, + "learning_rate": 3.99543657590257e-06, + "loss": 0.6714, + "step": 23292 + }, + { + "epoch": 0.7138960402108618, + "grad_norm": 1.7705721101580456, + "learning_rate": 3.994642836882933e-06, + "loss": 0.6318, + "step": 23293 + }, + { + "epoch": 0.713926688733603, + "grad_norm": 1.631271359971976, + "learning_rate": 3.993849157035619e-06, + "loss": 0.559, + "step": 23294 + }, + { + "epoch": 0.7139573372563442, + "grad_norm": 1.4290130362245737, + "learning_rate": 3.993055536368449e-06, + "loss": 0.6251, + "step": 23295 + }, + { + "epoch": 0.7139879857790854, + "grad_norm": 1.499859621399687, + "learning_rate": 3.992261974889236e-06, + "loss": 0.6825, + "step": 23296 + }, + { + "epoch": 0.7140186343018267, + "grad_norm": 0.6717854202282498, + "learning_rate": 3.991468472605802e-06, + "loss": 0.5563, + "step": 23297 + }, + { + "epoch": 0.7140492828245678, + "grad_norm": 1.7038868694467952, + "learning_rate": 3.990675029525971e-06, + "loss": 0.6401, + "step": 23298 + }, + { + "epoch": 0.7140799313473091, + "grad_norm": 1.60013887988103, + "learning_rate": 3.989881645657552e-06, + "loss": 0.6282, + "step": 23299 + }, + { + "epoch": 0.7141105798700502, + "grad_norm": 1.7525310817365378, + "learning_rate": 3.989088321008372e-06, + "loss": 0.697, + "step": 23300 + }, + { + "epoch": 0.7141412283927915, + "grad_norm": 0.6859293695853356, + "learning_rate": 3.988295055586237e-06, + "loss": 0.538, + "step": 23301 + }, + { + "epoch": 0.7141718769155326, + "grad_norm": 0.6862897382360178, + "learning_rate": 3.987501849398972e-06, + "loss": 0.5406, + "step": 23302 + }, + { + "epoch": 0.7142025254382739, + "grad_norm": 1.4784886270171878, + "learning_rate": 3.986708702454391e-06, + "loss": 0.5843, + "step": 23303 + }, + { + "epoch": 0.714233173961015, + "grad_norm": 1.5936853331364218, + "learning_rate": 3.985915614760304e-06, + "loss": 0.5568, + "step": 23304 + }, + { + "epoch": 0.7142638224837563, + "grad_norm": 1.574034908010274, + "learning_rate": 3.98512258632453e-06, + "loss": 0.6575, + "step": 23305 + }, + { + "epoch": 0.7142944710064975, + "grad_norm": 0.6308001645187606, + "learning_rate": 3.984329617154886e-06, + "loss": 0.4967, + "step": 23306 + }, + { + "epoch": 0.7143251195292387, + "grad_norm": 1.9653617291381331, + "learning_rate": 3.983536707259177e-06, + "loss": 0.7154, + "step": 23307 + }, + { + "epoch": 0.7143557680519799, + "grad_norm": 1.6622799487187425, + "learning_rate": 3.982743856645225e-06, + "loss": 0.705, + "step": 23308 + }, + { + "epoch": 0.7143864165747211, + "grad_norm": 1.8801328309850793, + "learning_rate": 3.981951065320829e-06, + "loss": 0.789, + "step": 23309 + }, + { + "epoch": 0.7144170650974623, + "grad_norm": 1.6887047354354146, + "learning_rate": 3.981158333293817e-06, + "loss": 0.615, + "step": 23310 + }, + { + "epoch": 0.7144477136202035, + "grad_norm": 1.7830891193151521, + "learning_rate": 3.980365660571991e-06, + "loss": 0.6915, + "step": 23311 + }, + { + "epoch": 0.7144783621429447, + "grad_norm": 0.6411565127927282, + "learning_rate": 3.979573047163159e-06, + "loss": 0.5234, + "step": 23312 + }, + { + "epoch": 0.714509010665686, + "grad_norm": 1.7371437933567022, + "learning_rate": 3.978780493075135e-06, + "loss": 0.6362, + "step": 23313 + }, + { + "epoch": 0.7145396591884271, + "grad_norm": 0.6625275908296946, + "learning_rate": 3.9779879983157296e-06, + "loss": 0.523, + "step": 23314 + }, + { + "epoch": 0.7145703077111684, + "grad_norm": 1.7401439782380022, + "learning_rate": 3.977195562892747e-06, + "loss": 0.8566, + "step": 23315 + }, + { + "epoch": 0.7146009562339095, + "grad_norm": 1.820269315576997, + "learning_rate": 3.976403186813997e-06, + "loss": 0.6733, + "step": 23316 + }, + { + "epoch": 0.7146316047566508, + "grad_norm": 0.6461601537854299, + "learning_rate": 3.9756108700872905e-06, + "loss": 0.5324, + "step": 23317 + }, + { + "epoch": 0.7146622532793919, + "grad_norm": 1.6592391165620648, + "learning_rate": 3.974818612720429e-06, + "loss": 0.5728, + "step": 23318 + }, + { + "epoch": 0.7146929018021332, + "grad_norm": 1.472820384760725, + "learning_rate": 3.974026414721225e-06, + "loss": 0.6552, + "step": 23319 + }, + { + "epoch": 0.7147235503248743, + "grad_norm": 1.6855166510440305, + "learning_rate": 3.973234276097473e-06, + "loss": 0.7033, + "step": 23320 + }, + { + "epoch": 0.7147541988476156, + "grad_norm": 1.7601667845078495, + "learning_rate": 3.972442196856993e-06, + "loss": 0.6943, + "step": 23321 + }, + { + "epoch": 0.7147848473703567, + "grad_norm": 1.7149913900646505, + "learning_rate": 3.971650177007581e-06, + "loss": 0.7176, + "step": 23322 + }, + { + "epoch": 0.714815495893098, + "grad_norm": 1.668237991197202, + "learning_rate": 3.97085821655704e-06, + "loss": 0.6693, + "step": 23323 + }, + { + "epoch": 0.7148461444158392, + "grad_norm": 1.7329513256602191, + "learning_rate": 3.970066315513174e-06, + "loss": 0.6214, + "step": 23324 + }, + { + "epoch": 0.7148767929385803, + "grad_norm": 0.6560727530328065, + "learning_rate": 3.969274473883793e-06, + "loss": 0.5348, + "step": 23325 + }, + { + "epoch": 0.7149074414613216, + "grad_norm": 0.6578109353509457, + "learning_rate": 3.96848269167669e-06, + "loss": 0.5228, + "step": 23326 + }, + { + "epoch": 0.7149380899840627, + "grad_norm": 1.6791665861707428, + "learning_rate": 3.967690968899669e-06, + "loss": 0.6157, + "step": 23327 + }, + { + "epoch": 0.714968738506804, + "grad_norm": 0.6572773850770329, + "learning_rate": 3.966899305560533e-06, + "loss": 0.5235, + "step": 23328 + }, + { + "epoch": 0.7149993870295451, + "grad_norm": 1.8672386998983856, + "learning_rate": 3.966107701667085e-06, + "loss": 0.6404, + "step": 23329 + }, + { + "epoch": 0.7150300355522864, + "grad_norm": 1.4914556045918321, + "learning_rate": 3.965316157227122e-06, + "loss": 0.6496, + "step": 23330 + }, + { + "epoch": 0.7150606840750275, + "grad_norm": 0.6583961713103442, + "learning_rate": 3.964524672248435e-06, + "loss": 0.5382, + "step": 23331 + }, + { + "epoch": 0.7150913325977688, + "grad_norm": 1.526259338267031, + "learning_rate": 3.963733246738839e-06, + "loss": 0.6334, + "step": 23332 + }, + { + "epoch": 0.71512198112051, + "grad_norm": 1.7305061974841183, + "learning_rate": 3.962941880706124e-06, + "loss": 0.6836, + "step": 23333 + }, + { + "epoch": 0.7151526296432512, + "grad_norm": 1.6263976576549262, + "learning_rate": 3.962150574158082e-06, + "loss": 0.6708, + "step": 23334 + }, + { + "epoch": 0.7151832781659924, + "grad_norm": 1.643276090290119, + "learning_rate": 3.961359327102517e-06, + "loss": 0.7386, + "step": 23335 + }, + { + "epoch": 0.7152139266887336, + "grad_norm": 1.5786911494050115, + "learning_rate": 3.960568139547222e-06, + "loss": 0.6941, + "step": 23336 + }, + { + "epoch": 0.7152445752114748, + "grad_norm": 1.4926946230593174, + "learning_rate": 3.959777011499999e-06, + "loss": 0.6635, + "step": 23337 + }, + { + "epoch": 0.715275223734216, + "grad_norm": 0.6535746735692423, + "learning_rate": 3.958985942968635e-06, + "loss": 0.5138, + "step": 23338 + }, + { + "epoch": 0.7153058722569572, + "grad_norm": 1.576192409471088, + "learning_rate": 3.958194933960927e-06, + "loss": 0.6986, + "step": 23339 + }, + { + "epoch": 0.7153365207796984, + "grad_norm": 1.6164562962089568, + "learning_rate": 3.957403984484675e-06, + "loss": 0.7076, + "step": 23340 + }, + { + "epoch": 0.7153671693024396, + "grad_norm": 1.5889916569118288, + "learning_rate": 3.956613094547668e-06, + "loss": 0.6137, + "step": 23341 + }, + { + "epoch": 0.7153978178251809, + "grad_norm": 1.4583172774737703, + "learning_rate": 3.95582226415769e-06, + "loss": 0.6289, + "step": 23342 + }, + { + "epoch": 0.715428466347922, + "grad_norm": 5.954640140533, + "learning_rate": 3.95503149332255e-06, + "loss": 0.5813, + "step": 23343 + }, + { + "epoch": 0.7154591148706633, + "grad_norm": 1.6050594427831446, + "learning_rate": 3.954240782050031e-06, + "loss": 0.7593, + "step": 23344 + }, + { + "epoch": 0.7154897633934044, + "grad_norm": 1.7394913503619953, + "learning_rate": 3.95345013034792e-06, + "loss": 0.6243, + "step": 23345 + }, + { + "epoch": 0.7155204119161457, + "grad_norm": 0.6490042213329913, + "learning_rate": 3.952659538224013e-06, + "loss": 0.5365, + "step": 23346 + }, + { + "epoch": 0.7155510604388868, + "grad_norm": 1.5366649687238296, + "learning_rate": 3.951869005686098e-06, + "loss": 0.637, + "step": 23347 + }, + { + "epoch": 0.7155817089616281, + "grad_norm": 1.767100045227204, + "learning_rate": 3.9510785327419685e-06, + "loss": 0.7453, + "step": 23348 + }, + { + "epoch": 0.7156123574843692, + "grad_norm": 1.8253935181648013, + "learning_rate": 3.950288119399408e-06, + "loss": 0.7536, + "step": 23349 + }, + { + "epoch": 0.7156430060071105, + "grad_norm": 0.6614700318900781, + "learning_rate": 3.9494977656662044e-06, + "loss": 0.5418, + "step": 23350 + }, + { + "epoch": 0.7156736545298517, + "grad_norm": 1.5913053341297547, + "learning_rate": 3.948707471550153e-06, + "loss": 0.6255, + "step": 23351 + }, + { + "epoch": 0.7157043030525929, + "grad_norm": 1.7562097207733118, + "learning_rate": 3.9479172370590345e-06, + "loss": 0.7257, + "step": 23352 + }, + { + "epoch": 0.7157349515753341, + "grad_norm": 1.8499929387311604, + "learning_rate": 3.947127062200632e-06, + "loss": 0.6744, + "step": 23353 + }, + { + "epoch": 0.7157656000980753, + "grad_norm": 1.7346304674346218, + "learning_rate": 3.946336946982735e-06, + "loss": 0.7402, + "step": 23354 + }, + { + "epoch": 0.7157962486208165, + "grad_norm": 1.4633223419113537, + "learning_rate": 3.94554689141313e-06, + "loss": 0.7131, + "step": 23355 + }, + { + "epoch": 0.7158268971435576, + "grad_norm": 0.6434167224219605, + "learning_rate": 3.944756895499603e-06, + "loss": 0.5067, + "step": 23356 + }, + { + "epoch": 0.7158575456662989, + "grad_norm": 1.7915720081375373, + "learning_rate": 3.943966959249933e-06, + "loss": 0.6588, + "step": 23357 + }, + { + "epoch": 0.71588819418904, + "grad_norm": 0.6879305067382299, + "learning_rate": 3.943177082671905e-06, + "loss": 0.5473, + "step": 23358 + }, + { + "epoch": 0.7159188427117813, + "grad_norm": 2.0908894911320552, + "learning_rate": 3.942387265773308e-06, + "loss": 0.7564, + "step": 23359 + }, + { + "epoch": 0.7159494912345225, + "grad_norm": 1.6340209281704998, + "learning_rate": 3.941597508561917e-06, + "loss": 0.65, + "step": 23360 + }, + { + "epoch": 0.7159801397572637, + "grad_norm": 1.5152292959701972, + "learning_rate": 3.94080781104551e-06, + "loss": 0.5935, + "step": 23361 + }, + { + "epoch": 0.7160107882800049, + "grad_norm": 0.6846682834239449, + "learning_rate": 3.940018173231882e-06, + "loss": 0.5701, + "step": 23362 + }, + { + "epoch": 0.7160414368027461, + "grad_norm": 1.8630546565875017, + "learning_rate": 3.9392285951288015e-06, + "loss": 0.7006, + "step": 23363 + }, + { + "epoch": 0.7160720853254873, + "grad_norm": 1.859145155695303, + "learning_rate": 3.938439076744055e-06, + "loss": 0.6137, + "step": 23364 + }, + { + "epoch": 0.7161027338482285, + "grad_norm": 1.5310143947024175, + "learning_rate": 3.937649618085416e-06, + "loss": 0.5899, + "step": 23365 + }, + { + "epoch": 0.7161333823709697, + "grad_norm": 1.5878797150018815, + "learning_rate": 3.936860219160666e-06, + "loss": 0.7134, + "step": 23366 + }, + { + "epoch": 0.716164030893711, + "grad_norm": 1.5303351311646833, + "learning_rate": 3.936070879977588e-06, + "loss": 0.6602, + "step": 23367 + }, + { + "epoch": 0.7161946794164521, + "grad_norm": 1.734355186807409, + "learning_rate": 3.935281600543951e-06, + "loss": 0.6927, + "step": 23368 + }, + { + "epoch": 0.7162253279391934, + "grad_norm": 1.598843428209045, + "learning_rate": 3.934492380867536e-06, + "loss": 0.6862, + "step": 23369 + }, + { + "epoch": 0.7162559764619345, + "grad_norm": 1.7604865308425632, + "learning_rate": 3.933703220956124e-06, + "loss": 0.7334, + "step": 23370 + }, + { + "epoch": 0.7162866249846758, + "grad_norm": 1.7336169154931094, + "learning_rate": 3.9329141208174855e-06, + "loss": 0.5915, + "step": 23371 + }, + { + "epoch": 0.7163172735074169, + "grad_norm": 1.5598623972106553, + "learning_rate": 3.9321250804593895e-06, + "loss": 0.6055, + "step": 23372 + }, + { + "epoch": 0.7163479220301582, + "grad_norm": 1.6741764845265665, + "learning_rate": 3.931336099889624e-06, + "loss": 0.6538, + "step": 23373 + }, + { + "epoch": 0.7163785705528993, + "grad_norm": 0.6483040394713653, + "learning_rate": 3.930547179115955e-06, + "loss": 0.5192, + "step": 23374 + }, + { + "epoch": 0.7164092190756406, + "grad_norm": 1.6913544308212694, + "learning_rate": 3.92975831814616e-06, + "loss": 0.6711, + "step": 23375 + }, + { + "epoch": 0.7164398675983817, + "grad_norm": 1.7277555837621248, + "learning_rate": 3.928969516988006e-06, + "loss": 0.6552, + "step": 23376 + }, + { + "epoch": 0.716470516121123, + "grad_norm": 1.7008876714364307, + "learning_rate": 3.928180775649269e-06, + "loss": 0.7191, + "step": 23377 + }, + { + "epoch": 0.7165011646438642, + "grad_norm": 1.7461703384982672, + "learning_rate": 3.927392094137723e-06, + "loss": 0.6196, + "step": 23378 + }, + { + "epoch": 0.7165318131666054, + "grad_norm": 1.5143818542359229, + "learning_rate": 3.926603472461134e-06, + "loss": 0.6963, + "step": 23379 + }, + { + "epoch": 0.7165624616893466, + "grad_norm": 1.4526783638614795, + "learning_rate": 3.9258149106272735e-06, + "loss": 0.6359, + "step": 23380 + }, + { + "epoch": 0.7165931102120878, + "grad_norm": 1.7690693302751435, + "learning_rate": 3.925026408643917e-06, + "loss": 0.625, + "step": 23381 + }, + { + "epoch": 0.716623758734829, + "grad_norm": 1.5696905897013405, + "learning_rate": 3.924237966518826e-06, + "loss": 0.6498, + "step": 23382 + }, + { + "epoch": 0.7166544072575702, + "grad_norm": 1.5784168354956878, + "learning_rate": 3.923449584259773e-06, + "loss": 0.5802, + "step": 23383 + }, + { + "epoch": 0.7166850557803114, + "grad_norm": 0.6791112941764815, + "learning_rate": 3.92266126187453e-06, + "loss": 0.5536, + "step": 23384 + }, + { + "epoch": 0.7167157043030526, + "grad_norm": 1.6791353105204627, + "learning_rate": 3.921872999370857e-06, + "loss": 0.6594, + "step": 23385 + }, + { + "epoch": 0.7167463528257938, + "grad_norm": 1.4936876917679855, + "learning_rate": 3.9210847967565266e-06, + "loss": 0.6327, + "step": 23386 + }, + { + "epoch": 0.716777001348535, + "grad_norm": 0.6620025216715141, + "learning_rate": 3.920296654039302e-06, + "loss": 0.5191, + "step": 23387 + }, + { + "epoch": 0.7168076498712762, + "grad_norm": 1.6094197874787026, + "learning_rate": 3.9195085712269474e-06, + "loss": 0.6502, + "step": 23388 + }, + { + "epoch": 0.7168382983940174, + "grad_norm": 1.5822041924900416, + "learning_rate": 3.918720548327236e-06, + "loss": 0.5325, + "step": 23389 + }, + { + "epoch": 0.7168689469167586, + "grad_norm": 1.5605252325617758, + "learning_rate": 3.917932585347923e-06, + "loss": 0.6495, + "step": 23390 + }, + { + "epoch": 0.7168995954394998, + "grad_norm": 1.572406526694425, + "learning_rate": 3.917144682296776e-06, + "loss": 0.5659, + "step": 23391 + }, + { + "epoch": 0.716930243962241, + "grad_norm": 1.7365803467233845, + "learning_rate": 3.916356839181563e-06, + "loss": 0.6611, + "step": 23392 + }, + { + "epoch": 0.7169608924849822, + "grad_norm": 1.8269071496259692, + "learning_rate": 3.915569056010039e-06, + "loss": 0.6669, + "step": 23393 + }, + { + "epoch": 0.7169915410077234, + "grad_norm": 0.6958424268136508, + "learning_rate": 3.914781332789969e-06, + "loss": 0.5344, + "step": 23394 + }, + { + "epoch": 0.7170221895304646, + "grad_norm": 1.4886531209713068, + "learning_rate": 3.913993669529119e-06, + "loss": 0.6805, + "step": 23395 + }, + { + "epoch": 0.7170528380532059, + "grad_norm": 1.5400502386164416, + "learning_rate": 3.913206066235245e-06, + "loss": 0.5972, + "step": 23396 + }, + { + "epoch": 0.717083486575947, + "grad_norm": 0.7215300119583771, + "learning_rate": 3.91241852291611e-06, + "loss": 0.5443, + "step": 23397 + }, + { + "epoch": 0.7171141350986883, + "grad_norm": 1.5517160994081043, + "learning_rate": 3.911631039579471e-06, + "loss": 0.6423, + "step": 23398 + }, + { + "epoch": 0.7171447836214294, + "grad_norm": 1.763692899269798, + "learning_rate": 3.910843616233089e-06, + "loss": 0.718, + "step": 23399 + }, + { + "epoch": 0.7171754321441707, + "grad_norm": 1.6862408728109233, + "learning_rate": 3.910056252884725e-06, + "loss": 0.688, + "step": 23400 + }, + { + "epoch": 0.7172060806669118, + "grad_norm": 1.625439732805005, + "learning_rate": 3.909268949542133e-06, + "loss": 0.6477, + "step": 23401 + }, + { + "epoch": 0.7172367291896531, + "grad_norm": 1.7089770949495833, + "learning_rate": 3.908481706213072e-06, + "loss": 0.6018, + "step": 23402 + }, + { + "epoch": 0.7172673777123942, + "grad_norm": 1.8160478508610618, + "learning_rate": 3.907694522905302e-06, + "loss": 0.6707, + "step": 23403 + }, + { + "epoch": 0.7172980262351355, + "grad_norm": 1.8116886461845174, + "learning_rate": 3.906907399626574e-06, + "loss": 0.7552, + "step": 23404 + }, + { + "epoch": 0.7173286747578766, + "grad_norm": 1.7415677303268216, + "learning_rate": 3.90612033638465e-06, + "loss": 0.5895, + "step": 23405 + }, + { + "epoch": 0.7173593232806179, + "grad_norm": 1.608017780825379, + "learning_rate": 3.9053333331872775e-06, + "loss": 0.692, + "step": 23406 + }, + { + "epoch": 0.7173899718033591, + "grad_norm": 1.515643736756332, + "learning_rate": 3.904546390042216e-06, + "loss": 0.6649, + "step": 23407 + }, + { + "epoch": 0.7174206203261003, + "grad_norm": 0.6440662395711491, + "learning_rate": 3.90375950695722e-06, + "loss": 0.5192, + "step": 23408 + }, + { + "epoch": 0.7174512688488415, + "grad_norm": 1.776606794380402, + "learning_rate": 3.9029726839400396e-06, + "loss": 0.6193, + "step": 23409 + }, + { + "epoch": 0.7174819173715827, + "grad_norm": 1.5107214675900869, + "learning_rate": 3.902185920998429e-06, + "loss": 0.5846, + "step": 23410 + }, + { + "epoch": 0.7175125658943239, + "grad_norm": 1.567064445098181, + "learning_rate": 3.901399218140144e-06, + "loss": 0.6805, + "step": 23411 + }, + { + "epoch": 0.7175432144170651, + "grad_norm": 1.5328124536391698, + "learning_rate": 3.90061257537293e-06, + "loss": 0.6473, + "step": 23412 + }, + { + "epoch": 0.7175738629398063, + "grad_norm": 1.4921476111886534, + "learning_rate": 3.89982599270454e-06, + "loss": 0.6556, + "step": 23413 + }, + { + "epoch": 0.7176045114625476, + "grad_norm": 1.671437027911705, + "learning_rate": 3.899039470142729e-06, + "loss": 0.6591, + "step": 23414 + }, + { + "epoch": 0.7176351599852887, + "grad_norm": 1.7415713680231673, + "learning_rate": 3.8982530076952395e-06, + "loss": 0.7157, + "step": 23415 + }, + { + "epoch": 0.71766580850803, + "grad_norm": 1.5518126928172977, + "learning_rate": 3.897466605369828e-06, + "loss": 0.6167, + "step": 23416 + }, + { + "epoch": 0.7176964570307711, + "grad_norm": 0.6755960746684598, + "learning_rate": 3.8966802631742325e-06, + "loss": 0.5441, + "step": 23417 + }, + { + "epoch": 0.7177271055535123, + "grad_norm": 1.5449022850084333, + "learning_rate": 3.895893981116214e-06, + "loss": 0.5131, + "step": 23418 + }, + { + "epoch": 0.7177577540762535, + "grad_norm": 1.4065041492332981, + "learning_rate": 3.895107759203516e-06, + "loss": 0.6355, + "step": 23419 + }, + { + "epoch": 0.7177884025989947, + "grad_norm": 1.4416422568632414, + "learning_rate": 3.894321597443879e-06, + "loss": 0.5813, + "step": 23420 + }, + { + "epoch": 0.7178190511217359, + "grad_norm": 0.69884563814382, + "learning_rate": 3.893535495845052e-06, + "loss": 0.5409, + "step": 23421 + }, + { + "epoch": 0.7178496996444771, + "grad_norm": 1.6762031581900891, + "learning_rate": 3.892749454414787e-06, + "loss": 0.7502, + "step": 23422 + }, + { + "epoch": 0.7178803481672184, + "grad_norm": 1.6016010552082007, + "learning_rate": 3.89196347316082e-06, + "loss": 0.6906, + "step": 23423 + }, + { + "epoch": 0.7179109966899595, + "grad_norm": 1.769531858306547, + "learning_rate": 3.891177552090901e-06, + "loss": 0.6379, + "step": 23424 + }, + { + "epoch": 0.7179416452127008, + "grad_norm": 1.4815437520274857, + "learning_rate": 3.890391691212775e-06, + "loss": 0.6138, + "step": 23425 + }, + { + "epoch": 0.7179722937354419, + "grad_norm": 1.6076818678660825, + "learning_rate": 3.8896058905341805e-06, + "loss": 0.6162, + "step": 23426 + }, + { + "epoch": 0.7180029422581832, + "grad_norm": 1.564458014083452, + "learning_rate": 3.8888201500628655e-06, + "loss": 0.5963, + "step": 23427 + }, + { + "epoch": 0.7180335907809243, + "grad_norm": 0.6565408062368862, + "learning_rate": 3.888034469806561e-06, + "loss": 0.5226, + "step": 23428 + }, + { + "epoch": 0.7180642393036656, + "grad_norm": 1.9159640741937887, + "learning_rate": 3.887248849773025e-06, + "loss": 0.7144, + "step": 23429 + }, + { + "epoch": 0.7180948878264067, + "grad_norm": 1.7395471542536944, + "learning_rate": 3.886463289969989e-06, + "loss": 0.6455, + "step": 23430 + }, + { + "epoch": 0.718125536349148, + "grad_norm": 1.595198858291383, + "learning_rate": 3.885677790405193e-06, + "loss": 0.6956, + "step": 23431 + }, + { + "epoch": 0.7181561848718891, + "grad_norm": 0.6489148265519362, + "learning_rate": 3.884892351086376e-06, + "loss": 0.535, + "step": 23432 + }, + { + "epoch": 0.7181868333946304, + "grad_norm": 1.5622001365977751, + "learning_rate": 3.8841069720212835e-06, + "loss": 0.6986, + "step": 23433 + }, + { + "epoch": 0.7182174819173716, + "grad_norm": 1.6058726205452534, + "learning_rate": 3.883321653217646e-06, + "loss": 0.611, + "step": 23434 + }, + { + "epoch": 0.7182481304401128, + "grad_norm": 1.3926171773246356, + "learning_rate": 3.882536394683206e-06, + "loss": 0.5501, + "step": 23435 + }, + { + "epoch": 0.718278778962854, + "grad_norm": 1.4505739482966924, + "learning_rate": 3.8817511964256995e-06, + "loss": 0.5468, + "step": 23436 + }, + { + "epoch": 0.7183094274855952, + "grad_norm": 1.6974409186273385, + "learning_rate": 3.880966058452867e-06, + "loss": 0.6665, + "step": 23437 + }, + { + "epoch": 0.7183400760083364, + "grad_norm": 1.5187390051970233, + "learning_rate": 3.880180980772443e-06, + "loss": 0.5527, + "step": 23438 + }, + { + "epoch": 0.7183707245310776, + "grad_norm": 1.4542182342552483, + "learning_rate": 3.879395963392154e-06, + "loss": 0.6117, + "step": 23439 + }, + { + "epoch": 0.7184013730538188, + "grad_norm": 1.6115104084661847, + "learning_rate": 3.878611006319749e-06, + "loss": 0.6416, + "step": 23440 + }, + { + "epoch": 0.71843202157656, + "grad_norm": 0.6801066469390741, + "learning_rate": 3.877826109562957e-06, + "loss": 0.5488, + "step": 23441 + }, + { + "epoch": 0.7184626700993012, + "grad_norm": 1.4643301967108124, + "learning_rate": 3.877041273129506e-06, + "loss": 0.5574, + "step": 23442 + }, + { + "epoch": 0.7184933186220425, + "grad_norm": 1.5228622143066561, + "learning_rate": 3.876256497027135e-06, + "loss": 0.6115, + "step": 23443 + }, + { + "epoch": 0.7185239671447836, + "grad_norm": 1.7506440275326156, + "learning_rate": 3.875471781263576e-06, + "loss": 0.6427, + "step": 23444 + }, + { + "epoch": 0.7185546156675249, + "grad_norm": 1.620017012812322, + "learning_rate": 3.874687125846562e-06, + "loss": 0.6188, + "step": 23445 + }, + { + "epoch": 0.718585264190266, + "grad_norm": 1.5689890764827104, + "learning_rate": 3.873902530783822e-06, + "loss": 0.6643, + "step": 23446 + }, + { + "epoch": 0.7186159127130073, + "grad_norm": 1.7889649172347315, + "learning_rate": 3.873117996083085e-06, + "loss": 0.7063, + "step": 23447 + }, + { + "epoch": 0.7186465612357484, + "grad_norm": 1.8958985730194724, + "learning_rate": 3.87233352175209e-06, + "loss": 0.6472, + "step": 23448 + }, + { + "epoch": 0.7186772097584896, + "grad_norm": 0.672581035682863, + "learning_rate": 3.87154910779856e-06, + "loss": 0.5611, + "step": 23449 + }, + { + "epoch": 0.7187078582812308, + "grad_norm": 1.6381095112059865, + "learning_rate": 3.87076475423022e-06, + "loss": 0.6801, + "step": 23450 + }, + { + "epoch": 0.718738506803972, + "grad_norm": 1.4681977135755453, + "learning_rate": 3.869980461054804e-06, + "loss": 0.5774, + "step": 23451 + }, + { + "epoch": 0.7187691553267133, + "grad_norm": 0.6336339138505284, + "learning_rate": 3.869196228280043e-06, + "loss": 0.5091, + "step": 23452 + }, + { + "epoch": 0.7187998038494544, + "grad_norm": 0.6502348740104887, + "learning_rate": 3.868412055913656e-06, + "loss": 0.5498, + "step": 23453 + }, + { + "epoch": 0.7188304523721957, + "grad_norm": 1.5285644289103724, + "learning_rate": 3.867627943963373e-06, + "loss": 0.6925, + "step": 23454 + }, + { + "epoch": 0.7188611008949368, + "grad_norm": 1.6309561522885119, + "learning_rate": 3.866843892436922e-06, + "loss": 0.7573, + "step": 23455 + }, + { + "epoch": 0.7188917494176781, + "grad_norm": 1.5355440635227438, + "learning_rate": 3.866059901342032e-06, + "loss": 0.7838, + "step": 23456 + }, + { + "epoch": 0.7189223979404192, + "grad_norm": 1.6560624163612647, + "learning_rate": 3.865275970686422e-06, + "loss": 0.6759, + "step": 23457 + }, + { + "epoch": 0.7189530464631605, + "grad_norm": 1.500124803507911, + "learning_rate": 3.86449210047781e-06, + "loss": 0.6175, + "step": 23458 + }, + { + "epoch": 0.7189836949859016, + "grad_norm": 1.5137685302137778, + "learning_rate": 3.863708290723935e-06, + "loss": 0.7175, + "step": 23459 + }, + { + "epoch": 0.7190143435086429, + "grad_norm": 1.631927892892067, + "learning_rate": 3.862924541432511e-06, + "loss": 0.5882, + "step": 23460 + }, + { + "epoch": 0.7190449920313841, + "grad_norm": 1.8420942015131456, + "learning_rate": 3.862140852611259e-06, + "loss": 0.5599, + "step": 23461 + }, + { + "epoch": 0.7190756405541253, + "grad_norm": 1.6348423259742506, + "learning_rate": 3.8613572242679045e-06, + "loss": 0.665, + "step": 23462 + }, + { + "epoch": 0.7191062890768665, + "grad_norm": 1.6151345171394966, + "learning_rate": 3.860573656410167e-06, + "loss": 0.6867, + "step": 23463 + }, + { + "epoch": 0.7191369375996077, + "grad_norm": 1.5468041008862918, + "learning_rate": 3.8597901490457716e-06, + "loss": 0.6524, + "step": 23464 + }, + { + "epoch": 0.7191675861223489, + "grad_norm": 1.5576649429360019, + "learning_rate": 3.859006702182432e-06, + "loss": 0.4908, + "step": 23465 + }, + { + "epoch": 0.7191982346450901, + "grad_norm": 1.6033393023314835, + "learning_rate": 3.858223315827869e-06, + "loss": 0.6183, + "step": 23466 + }, + { + "epoch": 0.7192288831678313, + "grad_norm": 1.78138173781622, + "learning_rate": 3.857439989989809e-06, + "loss": 0.7062, + "step": 23467 + }, + { + "epoch": 0.7192595316905726, + "grad_norm": 1.550830034969113, + "learning_rate": 3.856656724675962e-06, + "loss": 0.6256, + "step": 23468 + }, + { + "epoch": 0.7192901802133137, + "grad_norm": 1.480110437861246, + "learning_rate": 3.855873519894043e-06, + "loss": 0.6641, + "step": 23469 + }, + { + "epoch": 0.719320828736055, + "grad_norm": 1.7396119010605084, + "learning_rate": 3.855090375651781e-06, + "loss": 0.683, + "step": 23470 + }, + { + "epoch": 0.7193514772587961, + "grad_norm": 0.6647188825297313, + "learning_rate": 3.854307291956881e-06, + "loss": 0.5452, + "step": 23471 + }, + { + "epoch": 0.7193821257815374, + "grad_norm": 1.892344669894396, + "learning_rate": 3.853524268817068e-06, + "loss": 0.6842, + "step": 23472 + }, + { + "epoch": 0.7194127743042785, + "grad_norm": 1.5728674746966147, + "learning_rate": 3.85274130624005e-06, + "loss": 0.6617, + "step": 23473 + }, + { + "epoch": 0.7194434228270198, + "grad_norm": 1.5985190578552984, + "learning_rate": 3.851958404233545e-06, + "loss": 0.7069, + "step": 23474 + }, + { + "epoch": 0.7194740713497609, + "grad_norm": 1.634297553224433, + "learning_rate": 3.85117556280527e-06, + "loss": 0.8014, + "step": 23475 + }, + { + "epoch": 0.7195047198725022, + "grad_norm": 0.6720539282136874, + "learning_rate": 3.85039278196293e-06, + "loss": 0.548, + "step": 23476 + }, + { + "epoch": 0.7195353683952433, + "grad_norm": 1.554578163309357, + "learning_rate": 3.849610061714245e-06, + "loss": 0.6244, + "step": 23477 + }, + { + "epoch": 0.7195660169179846, + "grad_norm": 1.7171750002270916, + "learning_rate": 3.84882740206693e-06, + "loss": 0.6385, + "step": 23478 + }, + { + "epoch": 0.7195966654407258, + "grad_norm": 1.4319233535560527, + "learning_rate": 3.848044803028691e-06, + "loss": 0.5684, + "step": 23479 + }, + { + "epoch": 0.7196273139634669, + "grad_norm": 1.5030301863420081, + "learning_rate": 3.8472622646072344e-06, + "loss": 0.6261, + "step": 23480 + }, + { + "epoch": 0.7196579624862082, + "grad_norm": 1.5748179204488737, + "learning_rate": 3.846479786810284e-06, + "loss": 0.6983, + "step": 23481 + }, + { + "epoch": 0.7196886110089493, + "grad_norm": 1.6000629200359686, + "learning_rate": 3.8456973696455394e-06, + "loss": 0.5845, + "step": 23482 + }, + { + "epoch": 0.7197192595316906, + "grad_norm": 0.642791538551977, + "learning_rate": 3.844915013120716e-06, + "loss": 0.5118, + "step": 23483 + }, + { + "epoch": 0.7197499080544317, + "grad_norm": 1.7197584099257934, + "learning_rate": 3.844132717243517e-06, + "loss": 0.6968, + "step": 23484 + }, + { + "epoch": 0.719780556577173, + "grad_norm": 1.9099565643845462, + "learning_rate": 3.843350482021653e-06, + "loss": 0.7114, + "step": 23485 + }, + { + "epoch": 0.7198112050999141, + "grad_norm": 1.779363449889473, + "learning_rate": 3.842568307462835e-06, + "loss": 0.607, + "step": 23486 + }, + { + "epoch": 0.7198418536226554, + "grad_norm": 1.5586313229533086, + "learning_rate": 3.841786193574765e-06, + "loss": 0.654, + "step": 23487 + }, + { + "epoch": 0.7198725021453966, + "grad_norm": 1.5476721986792163, + "learning_rate": 3.84100414036515e-06, + "loss": 0.6142, + "step": 23488 + }, + { + "epoch": 0.7199031506681378, + "grad_norm": 0.6652063212237621, + "learning_rate": 3.8402221478417e-06, + "loss": 0.542, + "step": 23489 + }, + { + "epoch": 0.719933799190879, + "grad_norm": 1.6315627424934358, + "learning_rate": 3.8394402160121145e-06, + "loss": 0.6184, + "step": 23490 + }, + { + "epoch": 0.7199644477136202, + "grad_norm": 1.904008493829751, + "learning_rate": 3.838658344884101e-06, + "loss": 0.6747, + "step": 23491 + }, + { + "epoch": 0.7199950962363614, + "grad_norm": 1.691471994163407, + "learning_rate": 3.837876534465367e-06, + "loss": 0.7361, + "step": 23492 + }, + { + "epoch": 0.7200257447591026, + "grad_norm": 1.5279157835693054, + "learning_rate": 3.837094784763608e-06, + "loss": 0.7111, + "step": 23493 + }, + { + "epoch": 0.7200563932818438, + "grad_norm": 1.642095874738827, + "learning_rate": 3.836313095786535e-06, + "loss": 0.7223, + "step": 23494 + }, + { + "epoch": 0.720087041804585, + "grad_norm": 1.5596568249889031, + "learning_rate": 3.835531467541842e-06, + "loss": 0.7134, + "step": 23495 + }, + { + "epoch": 0.7201176903273262, + "grad_norm": 1.2350043241202473, + "learning_rate": 3.834749900037235e-06, + "loss": 0.5254, + "step": 23496 + }, + { + "epoch": 0.7201483388500675, + "grad_norm": 1.800073768737865, + "learning_rate": 3.833968393280417e-06, + "loss": 0.6359, + "step": 23497 + }, + { + "epoch": 0.7201789873728086, + "grad_norm": 1.609877650973949, + "learning_rate": 3.833186947279084e-06, + "loss": 0.7487, + "step": 23498 + }, + { + "epoch": 0.7202096358955499, + "grad_norm": 0.6543217932566724, + "learning_rate": 3.832405562040938e-06, + "loss": 0.5475, + "step": 23499 + }, + { + "epoch": 0.720240284418291, + "grad_norm": 0.7320680799939357, + "learning_rate": 3.8316242375736815e-06, + "loss": 0.5638, + "step": 23500 + }, + { + "epoch": 0.7202709329410323, + "grad_norm": 0.6952235907196367, + "learning_rate": 3.830842973885005e-06, + "loss": 0.5502, + "step": 23501 + }, + { + "epoch": 0.7203015814637734, + "grad_norm": 1.5150199598679714, + "learning_rate": 3.830061770982616e-06, + "loss": 0.7072, + "step": 23502 + }, + { + "epoch": 0.7203322299865147, + "grad_norm": 1.6052272699072847, + "learning_rate": 3.829280628874203e-06, + "loss": 0.6719, + "step": 23503 + }, + { + "epoch": 0.7203628785092558, + "grad_norm": 1.582579044275908, + "learning_rate": 3.8284995475674655e-06, + "loss": 0.7164, + "step": 23504 + }, + { + "epoch": 0.7203935270319971, + "grad_norm": 1.6588537318484158, + "learning_rate": 3.827718527070107e-06, + "loss": 0.6674, + "step": 23505 + }, + { + "epoch": 0.7204241755547383, + "grad_norm": 0.8815261859185551, + "learning_rate": 3.826937567389812e-06, + "loss": 0.5854, + "step": 23506 + }, + { + "epoch": 0.7204548240774795, + "grad_norm": 1.8232836428726868, + "learning_rate": 3.826156668534281e-06, + "loss": 0.601, + "step": 23507 + }, + { + "epoch": 0.7204854726002207, + "grad_norm": 1.5927198364771558, + "learning_rate": 3.825375830511211e-06, + "loss": 0.6247, + "step": 23508 + }, + { + "epoch": 0.7205161211229619, + "grad_norm": 1.63018008826826, + "learning_rate": 3.824595053328289e-06, + "loss": 0.7667, + "step": 23509 + }, + { + "epoch": 0.7205467696457031, + "grad_norm": 1.4528327669725607, + "learning_rate": 3.823814336993213e-06, + "loss": 0.5316, + "step": 23510 + }, + { + "epoch": 0.7205774181684442, + "grad_norm": 0.6291115285098607, + "learning_rate": 3.823033681513678e-06, + "loss": 0.5279, + "step": 23511 + }, + { + "epoch": 0.7206080666911855, + "grad_norm": 1.7090542691226822, + "learning_rate": 3.82225308689737e-06, + "loss": 0.5521, + "step": 23512 + }, + { + "epoch": 0.7206387152139266, + "grad_norm": 1.6632113519413683, + "learning_rate": 3.821472553151984e-06, + "loss": 0.6181, + "step": 23513 + }, + { + "epoch": 0.7206693637366679, + "grad_norm": 1.8160572463751252, + "learning_rate": 3.820692080285208e-06, + "loss": 0.6043, + "step": 23514 + }, + { + "epoch": 0.720700012259409, + "grad_norm": 1.6490138084380954, + "learning_rate": 3.819911668304733e-06, + "loss": 0.73, + "step": 23515 + }, + { + "epoch": 0.7207306607821503, + "grad_norm": 0.6539038112346749, + "learning_rate": 3.8191313172182545e-06, + "loss": 0.5142, + "step": 23516 + }, + { + "epoch": 0.7207613093048915, + "grad_norm": 0.6833944844496687, + "learning_rate": 3.818351027033452e-06, + "loss": 0.5282, + "step": 23517 + }, + { + "epoch": 0.7207919578276327, + "grad_norm": 1.631388399200601, + "learning_rate": 3.817570797758018e-06, + "loss": 0.7001, + "step": 23518 + }, + { + "epoch": 0.7208226063503739, + "grad_norm": 0.633386573907981, + "learning_rate": 3.816790629399645e-06, + "loss": 0.514, + "step": 23519 + }, + { + "epoch": 0.7208532548731151, + "grad_norm": 1.7254278141871657, + "learning_rate": 3.816010521966013e-06, + "loss": 0.6978, + "step": 23520 + }, + { + "epoch": 0.7208839033958563, + "grad_norm": 1.6945469959904491, + "learning_rate": 3.81523047546481e-06, + "loss": 0.6985, + "step": 23521 + }, + { + "epoch": 0.7209145519185975, + "grad_norm": 1.5868795867090622, + "learning_rate": 3.8144504899037295e-06, + "loss": 0.6528, + "step": 23522 + }, + { + "epoch": 0.7209452004413387, + "grad_norm": 1.6957980346873374, + "learning_rate": 3.813670565290445e-06, + "loss": 0.6036, + "step": 23523 + }, + { + "epoch": 0.72097584896408, + "grad_norm": 1.7212021334041117, + "learning_rate": 3.8128907016326523e-06, + "loss": 0.706, + "step": 23524 + }, + { + "epoch": 0.7210064974868211, + "grad_norm": 1.4651537562732437, + "learning_rate": 3.812110898938026e-06, + "loss": 0.5855, + "step": 23525 + }, + { + "epoch": 0.7210371460095624, + "grad_norm": 1.686389155802489, + "learning_rate": 3.8113311572142554e-06, + "loss": 0.6805, + "step": 23526 + }, + { + "epoch": 0.7210677945323035, + "grad_norm": 0.6830357541820649, + "learning_rate": 3.8105514764690256e-06, + "loss": 0.552, + "step": 23527 + }, + { + "epoch": 0.7210984430550448, + "grad_norm": 1.77016581402958, + "learning_rate": 3.8097718567100117e-06, + "loss": 0.6284, + "step": 23528 + }, + { + "epoch": 0.7211290915777859, + "grad_norm": 1.5990280103358567, + "learning_rate": 3.808992297944899e-06, + "loss": 0.5566, + "step": 23529 + }, + { + "epoch": 0.7211597401005272, + "grad_norm": 1.5690766828528797, + "learning_rate": 3.8082128001813735e-06, + "loss": 0.6694, + "step": 23530 + }, + { + "epoch": 0.7211903886232683, + "grad_norm": 1.4598968332845337, + "learning_rate": 3.8074333634271076e-06, + "loss": 0.63, + "step": 23531 + }, + { + "epoch": 0.7212210371460096, + "grad_norm": 0.6471256350033282, + "learning_rate": 3.8066539876897855e-06, + "loss": 0.5291, + "step": 23532 + }, + { + "epoch": 0.7212516856687508, + "grad_norm": 1.666542811775366, + "learning_rate": 3.80587467297709e-06, + "loss": 0.7161, + "step": 23533 + }, + { + "epoch": 0.721282334191492, + "grad_norm": 0.6584639818045503, + "learning_rate": 3.8050954192966926e-06, + "loss": 0.5178, + "step": 23534 + }, + { + "epoch": 0.7213129827142332, + "grad_norm": 1.5970822013551123, + "learning_rate": 3.8043162266562794e-06, + "loss": 0.7986, + "step": 23535 + }, + { + "epoch": 0.7213436312369744, + "grad_norm": 1.8305650970023986, + "learning_rate": 3.8035370950635153e-06, + "loss": 0.7694, + "step": 23536 + }, + { + "epoch": 0.7213742797597156, + "grad_norm": 1.5869318968188004, + "learning_rate": 3.802758024526093e-06, + "loss": 0.7753, + "step": 23537 + }, + { + "epoch": 0.7214049282824568, + "grad_norm": 0.671359410618703, + "learning_rate": 3.801979015051682e-06, + "loss": 0.5085, + "step": 23538 + }, + { + "epoch": 0.721435576805198, + "grad_norm": 1.4949356232561182, + "learning_rate": 3.8012000666479533e-06, + "loss": 0.5974, + "step": 23539 + }, + { + "epoch": 0.7214662253279392, + "grad_norm": 1.7098834829207945, + "learning_rate": 3.8004211793225865e-06, + "loss": 0.6051, + "step": 23540 + }, + { + "epoch": 0.7214968738506804, + "grad_norm": 1.5193820037056625, + "learning_rate": 3.7996423530832606e-06, + "loss": 0.5835, + "step": 23541 + }, + { + "epoch": 0.7215275223734215, + "grad_norm": 1.6665549813319767, + "learning_rate": 3.79886358793764e-06, + "loss": 0.6769, + "step": 23542 + }, + { + "epoch": 0.7215581708961628, + "grad_norm": 1.8405058540447035, + "learning_rate": 3.7980848838934038e-06, + "loss": 0.6071, + "step": 23543 + }, + { + "epoch": 0.721588819418904, + "grad_norm": 1.4315809340792973, + "learning_rate": 3.797306240958225e-06, + "loss": 0.5193, + "step": 23544 + }, + { + "epoch": 0.7216194679416452, + "grad_norm": 0.6628187894815679, + "learning_rate": 3.796527659139777e-06, + "loss": 0.5576, + "step": 23545 + }, + { + "epoch": 0.7216501164643864, + "grad_norm": 1.8701678056055708, + "learning_rate": 3.79574913844573e-06, + "loss": 0.6348, + "step": 23546 + }, + { + "epoch": 0.7216807649871276, + "grad_norm": 1.6086409597016305, + "learning_rate": 3.7949706788837504e-06, + "loss": 0.6781, + "step": 23547 + }, + { + "epoch": 0.7217114135098688, + "grad_norm": 1.6092769360845787, + "learning_rate": 3.794192280461512e-06, + "loss": 0.628, + "step": 23548 + }, + { + "epoch": 0.72174206203261, + "grad_norm": 1.600691387541776, + "learning_rate": 3.793413943186689e-06, + "loss": 0.5995, + "step": 23549 + }, + { + "epoch": 0.7217727105553512, + "grad_norm": 1.6405297708846922, + "learning_rate": 3.7926356670669417e-06, + "loss": 0.682, + "step": 23550 + }, + { + "epoch": 0.7218033590780925, + "grad_norm": 0.6486215554114217, + "learning_rate": 3.7918574521099448e-06, + "loss": 0.5003, + "step": 23551 + }, + { + "epoch": 0.7218340076008336, + "grad_norm": 1.718529029471955, + "learning_rate": 3.791079298323368e-06, + "loss": 0.6628, + "step": 23552 + }, + { + "epoch": 0.7218646561235749, + "grad_norm": 1.4396290063145083, + "learning_rate": 3.7903012057148712e-06, + "loss": 0.5365, + "step": 23553 + }, + { + "epoch": 0.721895304646316, + "grad_norm": 1.534857227037561, + "learning_rate": 3.78952317429213e-06, + "loss": 0.6827, + "step": 23554 + }, + { + "epoch": 0.7219259531690573, + "grad_norm": 1.525351393909228, + "learning_rate": 3.788745204062798e-06, + "loss": 0.6257, + "step": 23555 + }, + { + "epoch": 0.7219566016917984, + "grad_norm": 1.8279861821852204, + "learning_rate": 3.787967295034557e-06, + "loss": 0.6992, + "step": 23556 + }, + { + "epoch": 0.7219872502145397, + "grad_norm": 1.8248255622796066, + "learning_rate": 3.787189447215063e-06, + "loss": 0.7614, + "step": 23557 + }, + { + "epoch": 0.7220178987372808, + "grad_norm": 1.5906570225894672, + "learning_rate": 3.7864116606119773e-06, + "loss": 0.6349, + "step": 23558 + }, + { + "epoch": 0.7220485472600221, + "grad_norm": 0.6493619075192899, + "learning_rate": 3.7856339352329673e-06, + "loss": 0.5322, + "step": 23559 + }, + { + "epoch": 0.7220791957827633, + "grad_norm": 0.6539821073341023, + "learning_rate": 3.7848562710856997e-06, + "loss": 0.5149, + "step": 23560 + }, + { + "epoch": 0.7221098443055045, + "grad_norm": 1.8380351186608639, + "learning_rate": 3.7840786681778295e-06, + "loss": 0.591, + "step": 23561 + }, + { + "epoch": 0.7221404928282457, + "grad_norm": 1.5399953888660252, + "learning_rate": 3.7833011265170237e-06, + "loss": 0.697, + "step": 23562 + }, + { + "epoch": 0.7221711413509869, + "grad_norm": 0.6766792627680618, + "learning_rate": 3.7825236461109416e-06, + "loss": 0.5041, + "step": 23563 + }, + { + "epoch": 0.7222017898737281, + "grad_norm": 1.7103523358556414, + "learning_rate": 3.781746226967249e-06, + "loss": 0.7038, + "step": 23564 + }, + { + "epoch": 0.7222324383964693, + "grad_norm": 1.487961881957499, + "learning_rate": 3.780968869093601e-06, + "loss": 0.5684, + "step": 23565 + }, + { + "epoch": 0.7222630869192105, + "grad_norm": 1.7586905518291251, + "learning_rate": 3.7801915724976524e-06, + "loss": 0.6588, + "step": 23566 + }, + { + "epoch": 0.7222937354419517, + "grad_norm": 1.5960947306256126, + "learning_rate": 3.7794143371870727e-06, + "loss": 0.6683, + "step": 23567 + }, + { + "epoch": 0.7223243839646929, + "grad_norm": 1.7954443784311975, + "learning_rate": 3.7786371631695162e-06, + "loss": 0.703, + "step": 23568 + }, + { + "epoch": 0.7223550324874342, + "grad_norm": 1.7419920886406255, + "learning_rate": 3.777860050452636e-06, + "loss": 0.7281, + "step": 23569 + }, + { + "epoch": 0.7223856810101753, + "grad_norm": 1.8870734416292572, + "learning_rate": 3.777082999044093e-06, + "loss": 0.7087, + "step": 23570 + }, + { + "epoch": 0.7224163295329166, + "grad_norm": 1.501772623123797, + "learning_rate": 3.7763060089515436e-06, + "loss": 0.6443, + "step": 23571 + }, + { + "epoch": 0.7224469780556577, + "grad_norm": 1.5980660427008595, + "learning_rate": 3.7755290801826463e-06, + "loss": 0.6064, + "step": 23572 + }, + { + "epoch": 0.7224776265783989, + "grad_norm": 1.5290353102752048, + "learning_rate": 3.77475221274505e-06, + "loss": 0.621, + "step": 23573 + }, + { + "epoch": 0.7225082751011401, + "grad_norm": 0.6960132741516938, + "learning_rate": 3.773975406646413e-06, + "loss": 0.5333, + "step": 23574 + }, + { + "epoch": 0.7225389236238813, + "grad_norm": 1.4351125883072875, + "learning_rate": 3.773198661894393e-06, + "loss": 0.6641, + "step": 23575 + }, + { + "epoch": 0.7225695721466225, + "grad_norm": 1.978579836801263, + "learning_rate": 3.77242197849664e-06, + "loss": 0.6414, + "step": 23576 + }, + { + "epoch": 0.7226002206693637, + "grad_norm": 0.6623563384804984, + "learning_rate": 3.7716453564607993e-06, + "loss": 0.5084, + "step": 23577 + }, + { + "epoch": 0.722630869192105, + "grad_norm": 1.7335581387505075, + "learning_rate": 3.770868795794538e-06, + "loss": 0.7273, + "step": 23578 + }, + { + "epoch": 0.7226615177148461, + "grad_norm": 1.6740840020210679, + "learning_rate": 3.7700922965054997e-06, + "loss": 0.6735, + "step": 23579 + }, + { + "epoch": 0.7226921662375874, + "grad_norm": 1.8117572237283137, + "learning_rate": 3.769315858601332e-06, + "loss": 0.7139, + "step": 23580 + }, + { + "epoch": 0.7227228147603285, + "grad_norm": 1.6193772595063163, + "learning_rate": 3.7685394820896913e-06, + "loss": 0.6236, + "step": 23581 + }, + { + "epoch": 0.7227534632830698, + "grad_norm": 1.679403024611467, + "learning_rate": 3.7677631669782233e-06, + "loss": 0.6341, + "step": 23582 + }, + { + "epoch": 0.7227841118058109, + "grad_norm": 1.8593199154143603, + "learning_rate": 3.766986913274584e-06, + "loss": 0.6148, + "step": 23583 + }, + { + "epoch": 0.7228147603285522, + "grad_norm": 1.781005721473806, + "learning_rate": 3.766210720986414e-06, + "loss": 0.6237, + "step": 23584 + }, + { + "epoch": 0.7228454088512933, + "grad_norm": 1.6217220345456436, + "learning_rate": 3.765434590121364e-06, + "loss": 0.6729, + "step": 23585 + }, + { + "epoch": 0.7228760573740346, + "grad_norm": 1.5041409328122541, + "learning_rate": 3.764658520687087e-06, + "loss": 0.6666, + "step": 23586 + }, + { + "epoch": 0.7229067058967757, + "grad_norm": 1.7207563280362452, + "learning_rate": 3.7638825126912235e-06, + "loss": 0.6617, + "step": 23587 + }, + { + "epoch": 0.722937354419517, + "grad_norm": 1.429140164419655, + "learning_rate": 3.763106566141416e-06, + "loss": 0.5396, + "step": 23588 + }, + { + "epoch": 0.7229680029422582, + "grad_norm": 1.5289728406895686, + "learning_rate": 3.76233068104532e-06, + "loss": 0.676, + "step": 23589 + }, + { + "epoch": 0.7229986514649994, + "grad_norm": 1.9514183936522054, + "learning_rate": 3.761554857410573e-06, + "loss": 0.6595, + "step": 23590 + }, + { + "epoch": 0.7230292999877406, + "grad_norm": 1.7326375424163125, + "learning_rate": 3.7607790952448265e-06, + "loss": 0.6677, + "step": 23591 + }, + { + "epoch": 0.7230599485104818, + "grad_norm": 1.6683609823760215, + "learning_rate": 3.7600033945557157e-06, + "loss": 0.5927, + "step": 23592 + }, + { + "epoch": 0.723090597033223, + "grad_norm": 1.80661404104499, + "learning_rate": 3.7592277553508884e-06, + "loss": 0.604, + "step": 23593 + }, + { + "epoch": 0.7231212455559642, + "grad_norm": 1.8638376563457073, + "learning_rate": 3.75845217763799e-06, + "loss": 0.6108, + "step": 23594 + }, + { + "epoch": 0.7231518940787054, + "grad_norm": 1.4869705999057476, + "learning_rate": 3.757676661424656e-06, + "loss": 0.5634, + "step": 23595 + }, + { + "epoch": 0.7231825426014467, + "grad_norm": 1.78602517071632, + "learning_rate": 3.7569012067185316e-06, + "loss": 0.5922, + "step": 23596 + }, + { + "epoch": 0.7232131911241878, + "grad_norm": 1.5855889996050896, + "learning_rate": 3.7561258135272592e-06, + "loss": 0.5481, + "step": 23597 + }, + { + "epoch": 0.7232438396469291, + "grad_norm": 1.3976183386424021, + "learning_rate": 3.755350481858474e-06, + "loss": 0.6424, + "step": 23598 + }, + { + "epoch": 0.7232744881696702, + "grad_norm": 1.5563665325991776, + "learning_rate": 3.754575211719822e-06, + "loss": 0.6282, + "step": 23599 + }, + { + "epoch": 0.7233051366924115, + "grad_norm": 1.6240418585135228, + "learning_rate": 3.753800003118935e-06, + "loss": 0.5454, + "step": 23600 + }, + { + "epoch": 0.7233357852151526, + "grad_norm": 1.7159293257147805, + "learning_rate": 3.753024856063454e-06, + "loss": 0.652, + "step": 23601 + }, + { + "epoch": 0.7233664337378939, + "grad_norm": 1.674322728802387, + "learning_rate": 3.7522497705610206e-06, + "loss": 0.6847, + "step": 23602 + }, + { + "epoch": 0.723397082260635, + "grad_norm": 1.5230238997858825, + "learning_rate": 3.7514747466192667e-06, + "loss": 0.6142, + "step": 23603 + }, + { + "epoch": 0.7234277307833762, + "grad_norm": 1.670398982403069, + "learning_rate": 3.7506997842458293e-06, + "loss": 0.6954, + "step": 23604 + }, + { + "epoch": 0.7234583793061174, + "grad_norm": 1.5178629854193098, + "learning_rate": 3.7499248834483502e-06, + "loss": 0.6937, + "step": 23605 + }, + { + "epoch": 0.7234890278288586, + "grad_norm": 1.5596037225101542, + "learning_rate": 3.749150044234461e-06, + "loss": 0.5888, + "step": 23606 + }, + { + "epoch": 0.7235196763515999, + "grad_norm": 1.7496964957367853, + "learning_rate": 3.748375266611788e-06, + "loss": 0.7336, + "step": 23607 + }, + { + "epoch": 0.723550324874341, + "grad_norm": 1.6613156239129718, + "learning_rate": 3.7476005505879798e-06, + "loss": 0.6624, + "step": 23608 + }, + { + "epoch": 0.7235809733970823, + "grad_norm": 1.6965092764054879, + "learning_rate": 3.7468258961706604e-06, + "loss": 0.6178, + "step": 23609 + }, + { + "epoch": 0.7236116219198234, + "grad_norm": 0.6641706099802639, + "learning_rate": 3.7460513033674684e-06, + "loss": 0.5482, + "step": 23610 + }, + { + "epoch": 0.7236422704425647, + "grad_norm": 0.6732610625439911, + "learning_rate": 3.7452767721860296e-06, + "loss": 0.5548, + "step": 23611 + }, + { + "epoch": 0.7236729189653058, + "grad_norm": 1.7065193102831653, + "learning_rate": 3.7445023026339787e-06, + "loss": 0.6385, + "step": 23612 + }, + { + "epoch": 0.7237035674880471, + "grad_norm": 1.5693173167996228, + "learning_rate": 3.7437278947189514e-06, + "loss": 0.6109, + "step": 23613 + }, + { + "epoch": 0.7237342160107882, + "grad_norm": 1.6989392302385011, + "learning_rate": 3.74295354844857e-06, + "loss": 0.7052, + "step": 23614 + }, + { + "epoch": 0.7237648645335295, + "grad_norm": 1.8025572834605859, + "learning_rate": 3.7421792638304677e-06, + "loss": 0.7496, + "step": 23615 + }, + { + "epoch": 0.7237955130562707, + "grad_norm": 1.6530529177941933, + "learning_rate": 3.741405040872279e-06, + "loss": 0.6524, + "step": 23616 + }, + { + "epoch": 0.7238261615790119, + "grad_norm": 1.6485223978002879, + "learning_rate": 3.7406308795816238e-06, + "loss": 0.5313, + "step": 23617 + }, + { + "epoch": 0.7238568101017531, + "grad_norm": 1.3985491821267317, + "learning_rate": 3.7398567799661334e-06, + "loss": 0.64, + "step": 23618 + }, + { + "epoch": 0.7238874586244943, + "grad_norm": 1.7878980534792783, + "learning_rate": 3.73908274203344e-06, + "loss": 0.6611, + "step": 23619 + }, + { + "epoch": 0.7239181071472355, + "grad_norm": 1.7055719062003152, + "learning_rate": 3.738308765791162e-06, + "loss": 0.738, + "step": 23620 + }, + { + "epoch": 0.7239487556699767, + "grad_norm": 1.705658408760791, + "learning_rate": 3.7375348512469344e-06, + "loss": 0.6381, + "step": 23621 + }, + { + "epoch": 0.7239794041927179, + "grad_norm": 1.7364009352922813, + "learning_rate": 3.736760998408374e-06, + "loss": 0.7052, + "step": 23622 + }, + { + "epoch": 0.7240100527154592, + "grad_norm": 1.600843897867622, + "learning_rate": 3.7359872072831104e-06, + "loss": 0.6435, + "step": 23623 + }, + { + "epoch": 0.7240407012382003, + "grad_norm": 1.632511326571733, + "learning_rate": 3.7352134778787708e-06, + "loss": 0.6481, + "step": 23624 + }, + { + "epoch": 0.7240713497609416, + "grad_norm": 0.6564604378157073, + "learning_rate": 3.7344398102029724e-06, + "loss": 0.5414, + "step": 23625 + }, + { + "epoch": 0.7241019982836827, + "grad_norm": 1.615850073548586, + "learning_rate": 3.733666204263342e-06, + "loss": 0.6415, + "step": 23626 + }, + { + "epoch": 0.724132646806424, + "grad_norm": 1.70038179981977, + "learning_rate": 3.7328926600675042e-06, + "loss": 0.723, + "step": 23627 + }, + { + "epoch": 0.7241632953291651, + "grad_norm": 1.644179911204126, + "learning_rate": 3.732119177623076e-06, + "loss": 0.6267, + "step": 23628 + }, + { + "epoch": 0.7241939438519064, + "grad_norm": 1.5630118869929117, + "learning_rate": 3.731345756937681e-06, + "loss": 0.592, + "step": 23629 + }, + { + "epoch": 0.7242245923746475, + "grad_norm": 1.471810556851846, + "learning_rate": 3.7305723980189434e-06, + "loss": 0.7289, + "step": 23630 + }, + { + "epoch": 0.7242552408973888, + "grad_norm": 1.6110946370709225, + "learning_rate": 3.729799100874477e-06, + "loss": 0.6623, + "step": 23631 + }, + { + "epoch": 0.72428588942013, + "grad_norm": 1.911038456400576, + "learning_rate": 3.7290258655119072e-06, + "loss": 0.717, + "step": 23632 + }, + { + "epoch": 0.7243165379428712, + "grad_norm": 1.4960829230351866, + "learning_rate": 3.7282526919388475e-06, + "loss": 0.6305, + "step": 23633 + }, + { + "epoch": 0.7243471864656124, + "grad_norm": 1.6765657428792973, + "learning_rate": 3.7274795801629182e-06, + "loss": 0.6353, + "step": 23634 + }, + { + "epoch": 0.7243778349883535, + "grad_norm": 1.7535678788024263, + "learning_rate": 3.7267065301917403e-06, + "loss": 0.6692, + "step": 23635 + }, + { + "epoch": 0.7244084835110948, + "grad_norm": 1.6110531543899513, + "learning_rate": 3.7259335420329255e-06, + "loss": 0.693, + "step": 23636 + }, + { + "epoch": 0.7244391320338359, + "grad_norm": 2.1665246758503445, + "learning_rate": 3.7251606156940934e-06, + "loss": 0.6505, + "step": 23637 + }, + { + "epoch": 0.7244697805565772, + "grad_norm": 1.486154957809421, + "learning_rate": 3.7243877511828617e-06, + "loss": 0.6012, + "step": 23638 + }, + { + "epoch": 0.7245004290793183, + "grad_norm": 1.63892274294727, + "learning_rate": 3.7236149485068398e-06, + "loss": 0.6524, + "step": 23639 + }, + { + "epoch": 0.7245310776020596, + "grad_norm": 1.6961998280111759, + "learning_rate": 3.722842207673646e-06, + "loss": 0.6467, + "step": 23640 + }, + { + "epoch": 0.7245617261248007, + "grad_norm": 0.6646046356513478, + "learning_rate": 3.722069528690897e-06, + "loss": 0.5242, + "step": 23641 + }, + { + "epoch": 0.724592374647542, + "grad_norm": 1.7175405036459976, + "learning_rate": 3.7212969115662e-06, + "loss": 0.7032, + "step": 23642 + }, + { + "epoch": 0.7246230231702832, + "grad_norm": 1.5159907705023552, + "learning_rate": 3.720524356307175e-06, + "loss": 0.5513, + "step": 23643 + }, + { + "epoch": 0.7246536716930244, + "grad_norm": 1.5778775683912267, + "learning_rate": 3.7197518629214258e-06, + "loss": 0.5885, + "step": 23644 + }, + { + "epoch": 0.7246843202157656, + "grad_norm": 1.716373083249037, + "learning_rate": 3.718979431416568e-06, + "loss": 0.7254, + "step": 23645 + }, + { + "epoch": 0.7247149687385068, + "grad_norm": 1.5915244830932724, + "learning_rate": 3.7182070618002174e-06, + "loss": 0.5772, + "step": 23646 + }, + { + "epoch": 0.724745617261248, + "grad_norm": 1.7079953447512461, + "learning_rate": 3.717434754079977e-06, + "loss": 0.6496, + "step": 23647 + }, + { + "epoch": 0.7247762657839892, + "grad_norm": 1.7259442655567645, + "learning_rate": 3.7166625082634576e-06, + "loss": 0.7097, + "step": 23648 + }, + { + "epoch": 0.7248069143067304, + "grad_norm": 0.6669262109561558, + "learning_rate": 3.7158903243582754e-06, + "loss": 0.5539, + "step": 23649 + }, + { + "epoch": 0.7248375628294716, + "grad_norm": 0.6674952008318179, + "learning_rate": 3.715118202372029e-06, + "loss": 0.527, + "step": 23650 + }, + { + "epoch": 0.7248682113522128, + "grad_norm": 1.6315665140633075, + "learning_rate": 3.714346142312335e-06, + "loss": 0.6983, + "step": 23651 + }, + { + "epoch": 0.7248988598749541, + "grad_norm": 0.6616265200997312, + "learning_rate": 3.7135741441867933e-06, + "loss": 0.5171, + "step": 23652 + }, + { + "epoch": 0.7249295083976952, + "grad_norm": 1.5954578215774329, + "learning_rate": 3.712802208003015e-06, + "loss": 0.6468, + "step": 23653 + }, + { + "epoch": 0.7249601569204365, + "grad_norm": 1.5087229560073978, + "learning_rate": 3.712030333768607e-06, + "loss": 0.5876, + "step": 23654 + }, + { + "epoch": 0.7249908054431776, + "grad_norm": 0.6799417046307512, + "learning_rate": 3.71125852149117e-06, + "loss": 0.5329, + "step": 23655 + }, + { + "epoch": 0.7250214539659189, + "grad_norm": 1.9766597029703903, + "learning_rate": 3.710486771178312e-06, + "loss": 0.7778, + "step": 23656 + }, + { + "epoch": 0.72505210248866, + "grad_norm": 1.7656793369221708, + "learning_rate": 3.7097150828376403e-06, + "loss": 0.6892, + "step": 23657 + }, + { + "epoch": 0.7250827510114013, + "grad_norm": 1.5995073234862833, + "learning_rate": 3.708943456476751e-06, + "loss": 0.6618, + "step": 23658 + }, + { + "epoch": 0.7251133995341424, + "grad_norm": 0.6718520019900537, + "learning_rate": 3.708171892103253e-06, + "loss": 0.5404, + "step": 23659 + }, + { + "epoch": 0.7251440480568837, + "grad_norm": 1.7127309199197647, + "learning_rate": 3.70740038972475e-06, + "loss": 0.6255, + "step": 23660 + }, + { + "epoch": 0.7251746965796249, + "grad_norm": 1.4819188067373021, + "learning_rate": 3.7066289493488383e-06, + "loss": 0.6139, + "step": 23661 + }, + { + "epoch": 0.7252053451023661, + "grad_norm": 0.656247538221746, + "learning_rate": 3.7058575709831245e-06, + "loss": 0.5312, + "step": 23662 + }, + { + "epoch": 0.7252359936251073, + "grad_norm": 1.8669552050995397, + "learning_rate": 3.7050862546351995e-06, + "loss": 0.64, + "step": 23663 + }, + { + "epoch": 0.7252666421478485, + "grad_norm": 1.5557291571215337, + "learning_rate": 3.704315000312677e-06, + "loss": 0.7152, + "step": 23664 + }, + { + "epoch": 0.7252972906705897, + "grad_norm": 1.748256775599163, + "learning_rate": 3.70354380802315e-06, + "loss": 0.6395, + "step": 23665 + }, + { + "epoch": 0.7253279391933308, + "grad_norm": 1.6249973632352304, + "learning_rate": 3.7027726777742133e-06, + "loss": 0.7075, + "step": 23666 + }, + { + "epoch": 0.7253585877160721, + "grad_norm": 1.7249131517406986, + "learning_rate": 3.702001609573469e-06, + "loss": 0.6774, + "step": 23667 + }, + { + "epoch": 0.7253892362388132, + "grad_norm": 1.5282922935291743, + "learning_rate": 3.7012306034285173e-06, + "loss": 0.6092, + "step": 23668 + }, + { + "epoch": 0.7254198847615545, + "grad_norm": 0.6626557263844992, + "learning_rate": 3.700459659346949e-06, + "loss": 0.5249, + "step": 23669 + }, + { + "epoch": 0.7254505332842957, + "grad_norm": 1.454523224234163, + "learning_rate": 3.6996887773363633e-06, + "loss": 0.5897, + "step": 23670 + }, + { + "epoch": 0.7254811818070369, + "grad_norm": 1.751384431382426, + "learning_rate": 3.6989179574043554e-06, + "loss": 0.7076, + "step": 23671 + }, + { + "epoch": 0.7255118303297781, + "grad_norm": 1.508774641136245, + "learning_rate": 3.698147199558525e-06, + "loss": 0.6762, + "step": 23672 + }, + { + "epoch": 0.7255424788525193, + "grad_norm": 1.372718509593933, + "learning_rate": 3.6973765038064634e-06, + "loss": 0.6847, + "step": 23673 + }, + { + "epoch": 0.7255731273752605, + "grad_norm": 1.6554841839771992, + "learning_rate": 3.696605870155756e-06, + "loss": 0.6693, + "step": 23674 + }, + { + "epoch": 0.7256037758980017, + "grad_norm": 1.6111902338956856, + "learning_rate": 3.695835298614011e-06, + "loss": 0.5859, + "step": 23675 + }, + { + "epoch": 0.7256344244207429, + "grad_norm": 0.6670003871791759, + "learning_rate": 3.6950647891888134e-06, + "loss": 0.5183, + "step": 23676 + }, + { + "epoch": 0.7256650729434841, + "grad_norm": 1.8035531425429077, + "learning_rate": 3.694294341887752e-06, + "loss": 0.6261, + "step": 23677 + }, + { + "epoch": 0.7256957214662253, + "grad_norm": 1.6135233522095636, + "learning_rate": 3.6935239567184224e-06, + "loss": 0.6216, + "step": 23678 + }, + { + "epoch": 0.7257263699889666, + "grad_norm": 1.6452642496997585, + "learning_rate": 3.6927536336884183e-06, + "loss": 0.6308, + "step": 23679 + }, + { + "epoch": 0.7257570185117077, + "grad_norm": 1.6451159099315726, + "learning_rate": 3.6919833728053223e-06, + "loss": 0.6269, + "step": 23680 + }, + { + "epoch": 0.725787667034449, + "grad_norm": 1.6530742546120514, + "learning_rate": 3.6912131740767285e-06, + "loss": 0.6625, + "step": 23681 + }, + { + "epoch": 0.7258183155571901, + "grad_norm": 1.6059950656584279, + "learning_rate": 3.6904430375102264e-06, + "loss": 0.6222, + "step": 23682 + }, + { + "epoch": 0.7258489640799314, + "grad_norm": 1.5579069420895482, + "learning_rate": 3.6896729631134053e-06, + "loss": 0.721, + "step": 23683 + }, + { + "epoch": 0.7258796126026725, + "grad_norm": 1.6115338352440831, + "learning_rate": 3.688902950893852e-06, + "loss": 0.6244, + "step": 23684 + }, + { + "epoch": 0.7259102611254138, + "grad_norm": 1.6431683598855724, + "learning_rate": 3.6881330008591487e-06, + "loss": 0.6712, + "step": 23685 + }, + { + "epoch": 0.7259409096481549, + "grad_norm": 1.5969053834595004, + "learning_rate": 3.6873631130168864e-06, + "loss": 0.6386, + "step": 23686 + }, + { + "epoch": 0.7259715581708962, + "grad_norm": 1.6806344089673786, + "learning_rate": 3.6865932873746536e-06, + "loss": 0.6426, + "step": 23687 + }, + { + "epoch": 0.7260022066936374, + "grad_norm": 1.6082218129151316, + "learning_rate": 3.6858235239400298e-06, + "loss": 0.6571, + "step": 23688 + }, + { + "epoch": 0.7260328552163786, + "grad_norm": 1.574269490959761, + "learning_rate": 3.685053822720601e-06, + "loss": 0.6104, + "step": 23689 + }, + { + "epoch": 0.7260635037391198, + "grad_norm": 1.7979923166211365, + "learning_rate": 3.684284183723954e-06, + "loss": 0.5889, + "step": 23690 + }, + { + "epoch": 0.726094152261861, + "grad_norm": 1.5474148684321463, + "learning_rate": 3.6835146069576735e-06, + "loss": 0.6914, + "step": 23691 + }, + { + "epoch": 0.7261248007846022, + "grad_norm": 1.818480922275163, + "learning_rate": 3.682745092429336e-06, + "loss": 0.6324, + "step": 23692 + }, + { + "epoch": 0.7261554493073434, + "grad_norm": 1.4920969599203222, + "learning_rate": 3.681975640146529e-06, + "loss": 0.6776, + "step": 23693 + }, + { + "epoch": 0.7261860978300846, + "grad_norm": 0.6791764645623161, + "learning_rate": 3.6812062501168342e-06, + "loss": 0.5152, + "step": 23694 + }, + { + "epoch": 0.7262167463528258, + "grad_norm": 1.9939011421423574, + "learning_rate": 3.680436922347832e-06, + "loss": 0.6231, + "step": 23695 + }, + { + "epoch": 0.726247394875567, + "grad_norm": 1.9052486381604896, + "learning_rate": 3.679667656847098e-06, + "loss": 0.6999, + "step": 23696 + }, + { + "epoch": 0.7262780433983081, + "grad_norm": 1.717579680603964, + "learning_rate": 3.6788984536222163e-06, + "loss": 0.6165, + "step": 23697 + }, + { + "epoch": 0.7263086919210494, + "grad_norm": 1.4593442288299887, + "learning_rate": 3.6781293126807638e-06, + "loss": 0.5446, + "step": 23698 + }, + { + "epoch": 0.7263393404437906, + "grad_norm": 1.7745944422659976, + "learning_rate": 3.677360234030326e-06, + "loss": 0.6423, + "step": 23699 + }, + { + "epoch": 0.7263699889665318, + "grad_norm": 1.831126822702906, + "learning_rate": 3.676591217678471e-06, + "loss": 0.6886, + "step": 23700 + }, + { + "epoch": 0.726400637489273, + "grad_norm": 1.5173577294174845, + "learning_rate": 3.675822263632781e-06, + "loss": 0.5648, + "step": 23701 + }, + { + "epoch": 0.7264312860120142, + "grad_norm": 0.686925727881389, + "learning_rate": 3.6750533719008353e-06, + "loss": 0.5537, + "step": 23702 + }, + { + "epoch": 0.7264619345347554, + "grad_norm": 1.745251598966618, + "learning_rate": 3.6742845424902074e-06, + "loss": 0.6627, + "step": 23703 + }, + { + "epoch": 0.7264925830574966, + "grad_norm": 1.6383404531568408, + "learning_rate": 3.673515775408466e-06, + "loss": 0.6666, + "step": 23704 + }, + { + "epoch": 0.7265232315802378, + "grad_norm": 1.7250113082943663, + "learning_rate": 3.6727470706631983e-06, + "loss": 0.6511, + "step": 23705 + }, + { + "epoch": 0.726553880102979, + "grad_norm": 1.7096488209890102, + "learning_rate": 3.671978428261974e-06, + "loss": 0.5764, + "step": 23706 + }, + { + "epoch": 0.7265845286257202, + "grad_norm": 1.4536085623209116, + "learning_rate": 3.6712098482123603e-06, + "loss": 0.559, + "step": 23707 + }, + { + "epoch": 0.7266151771484615, + "grad_norm": 1.5197460074233653, + "learning_rate": 3.6704413305219365e-06, + "loss": 0.6201, + "step": 23708 + }, + { + "epoch": 0.7266458256712026, + "grad_norm": 1.701943406389599, + "learning_rate": 3.6696728751982736e-06, + "loss": 0.6219, + "step": 23709 + }, + { + "epoch": 0.7266764741939439, + "grad_norm": 1.7303241279286088, + "learning_rate": 3.668904482248946e-06, + "loss": 0.658, + "step": 23710 + }, + { + "epoch": 0.726707122716685, + "grad_norm": 0.6746660579472841, + "learning_rate": 3.6681361516815194e-06, + "loss": 0.5157, + "step": 23711 + }, + { + "epoch": 0.7267377712394263, + "grad_norm": 1.516847101746777, + "learning_rate": 3.6673678835035673e-06, + "loss": 0.6225, + "step": 23712 + }, + { + "epoch": 0.7267684197621674, + "grad_norm": 1.6113743436885253, + "learning_rate": 3.666599677722664e-06, + "loss": 0.6879, + "step": 23713 + }, + { + "epoch": 0.7267990682849087, + "grad_norm": 1.8364965853484605, + "learning_rate": 3.6658315343463746e-06, + "loss": 0.7258, + "step": 23714 + }, + { + "epoch": 0.7268297168076499, + "grad_norm": 1.6645447197222014, + "learning_rate": 3.6650634533822594e-06, + "loss": 0.654, + "step": 23715 + }, + { + "epoch": 0.7268603653303911, + "grad_norm": 0.6512730050328136, + "learning_rate": 3.6642954348379036e-06, + "loss": 0.5174, + "step": 23716 + }, + { + "epoch": 0.7268910138531323, + "grad_norm": 1.8635227408419979, + "learning_rate": 3.6635274787208607e-06, + "loss": 0.6091, + "step": 23717 + }, + { + "epoch": 0.7269216623758735, + "grad_norm": 1.5533606289819428, + "learning_rate": 3.662759585038708e-06, + "loss": 0.6741, + "step": 23718 + }, + { + "epoch": 0.7269523108986147, + "grad_norm": 1.5210930043682178, + "learning_rate": 3.6619917537990014e-06, + "loss": 0.6469, + "step": 23719 + }, + { + "epoch": 0.7269829594213559, + "grad_norm": 1.785788022549337, + "learning_rate": 3.661223985009312e-06, + "loss": 0.7423, + "step": 23720 + }, + { + "epoch": 0.7270136079440971, + "grad_norm": 1.4863927011638485, + "learning_rate": 3.660456278677209e-06, + "loss": 0.7759, + "step": 23721 + }, + { + "epoch": 0.7270442564668383, + "grad_norm": 1.8484259664269949, + "learning_rate": 3.659688634810248e-06, + "loss": 0.7254, + "step": 23722 + }, + { + "epoch": 0.7270749049895795, + "grad_norm": 1.4860125222736325, + "learning_rate": 3.658921053415998e-06, + "loss": 0.5954, + "step": 23723 + }, + { + "epoch": 0.7271055535123208, + "grad_norm": 1.8702944048391856, + "learning_rate": 3.6581535345020235e-06, + "loss": 0.6776, + "step": 23724 + }, + { + "epoch": 0.7271362020350619, + "grad_norm": 0.6666827748408213, + "learning_rate": 3.657386078075883e-06, + "loss": 0.5249, + "step": 23725 + }, + { + "epoch": 0.7271668505578032, + "grad_norm": 1.5725052998503122, + "learning_rate": 3.656618684145139e-06, + "loss": 0.6332, + "step": 23726 + }, + { + "epoch": 0.7271974990805443, + "grad_norm": 1.5223196893186601, + "learning_rate": 3.655851352717358e-06, + "loss": 0.785, + "step": 23727 + }, + { + "epoch": 0.7272281476032855, + "grad_norm": 1.5244673272689384, + "learning_rate": 3.6550840838000933e-06, + "loss": 0.6349, + "step": 23728 + }, + { + "epoch": 0.7272587961260267, + "grad_norm": 1.672122477488953, + "learning_rate": 3.6543168774009117e-06, + "loss": 0.6954, + "step": 23729 + }, + { + "epoch": 0.7272894446487679, + "grad_norm": 1.5729333957990697, + "learning_rate": 3.6535497335273662e-06, + "loss": 0.5565, + "step": 23730 + }, + { + "epoch": 0.7273200931715091, + "grad_norm": 1.7496739361663, + "learning_rate": 3.6527826521870204e-06, + "loss": 0.6898, + "step": 23731 + }, + { + "epoch": 0.7273507416942503, + "grad_norm": 1.7438433556187583, + "learning_rate": 3.6520156333874322e-06, + "loss": 0.6951, + "step": 23732 + }, + { + "epoch": 0.7273813902169916, + "grad_norm": 1.780363216186756, + "learning_rate": 3.6512486771361565e-06, + "loss": 0.6212, + "step": 23733 + }, + { + "epoch": 0.7274120387397327, + "grad_norm": 1.6491567194917147, + "learning_rate": 3.650481783440751e-06, + "loss": 0.7382, + "step": 23734 + }, + { + "epoch": 0.727442687262474, + "grad_norm": 1.558897151562971, + "learning_rate": 3.649714952308777e-06, + "loss": 0.569, + "step": 23735 + }, + { + "epoch": 0.7274733357852151, + "grad_norm": 1.707772611116773, + "learning_rate": 3.6489481837477834e-06, + "loss": 0.6298, + "step": 23736 + }, + { + "epoch": 0.7275039843079564, + "grad_norm": 1.6578390501933118, + "learning_rate": 3.6481814777653312e-06, + "loss": 0.6479, + "step": 23737 + }, + { + "epoch": 0.7275346328306975, + "grad_norm": 1.6942701056016463, + "learning_rate": 3.6474148343689686e-06, + "loss": 0.6767, + "step": 23738 + }, + { + "epoch": 0.7275652813534388, + "grad_norm": 0.6833638365730831, + "learning_rate": 3.646648253566253e-06, + "loss": 0.5414, + "step": 23739 + }, + { + "epoch": 0.7275959298761799, + "grad_norm": 1.6096002337202224, + "learning_rate": 3.6458817353647413e-06, + "loss": 0.6097, + "step": 23740 + }, + { + "epoch": 0.7276265783989212, + "grad_norm": 0.6523833746743362, + "learning_rate": 3.645115279771979e-06, + "loss": 0.5229, + "step": 23741 + }, + { + "epoch": 0.7276572269216623, + "grad_norm": 1.6301380779178287, + "learning_rate": 3.6443488867955224e-06, + "loss": 0.6215, + "step": 23742 + }, + { + "epoch": 0.7276878754444036, + "grad_norm": 1.9544532116677493, + "learning_rate": 3.643582556442925e-06, + "loss": 0.705, + "step": 23743 + }, + { + "epoch": 0.7277185239671448, + "grad_norm": 1.720663075468177, + "learning_rate": 3.642816288721732e-06, + "loss": 0.6107, + "step": 23744 + }, + { + "epoch": 0.727749172489886, + "grad_norm": 1.4348802404460208, + "learning_rate": 3.642050083639497e-06, + "loss": 0.5591, + "step": 23745 + }, + { + "epoch": 0.7277798210126272, + "grad_norm": 1.5960747037843663, + "learning_rate": 3.6412839412037714e-06, + "loss": 0.7053, + "step": 23746 + }, + { + "epoch": 0.7278104695353684, + "grad_norm": 1.4827396776910826, + "learning_rate": 3.6405178614221002e-06, + "loss": 0.6054, + "step": 23747 + }, + { + "epoch": 0.7278411180581096, + "grad_norm": 1.2944973823705703, + "learning_rate": 3.6397518443020364e-06, + "loss": 0.5782, + "step": 23748 + }, + { + "epoch": 0.7278717665808508, + "grad_norm": 1.6117390143619408, + "learning_rate": 3.638985889851121e-06, + "loss": 0.6452, + "step": 23749 + }, + { + "epoch": 0.727902415103592, + "grad_norm": 1.4826257851977425, + "learning_rate": 3.638219998076906e-06, + "loss": 0.7305, + "step": 23750 + }, + { + "epoch": 0.7279330636263333, + "grad_norm": 1.6886760600470245, + "learning_rate": 3.6374541689869404e-06, + "loss": 0.6401, + "step": 23751 + }, + { + "epoch": 0.7279637121490744, + "grad_norm": 0.6672400254710343, + "learning_rate": 3.636688402588764e-06, + "loss": 0.5232, + "step": 23752 + }, + { + "epoch": 0.7279943606718157, + "grad_norm": 1.4205973393570825, + "learning_rate": 3.635922698889923e-06, + "loss": 0.6014, + "step": 23753 + }, + { + "epoch": 0.7280250091945568, + "grad_norm": 1.4692779747795135, + "learning_rate": 3.6351570578979688e-06, + "loss": 0.6333, + "step": 23754 + }, + { + "epoch": 0.7280556577172981, + "grad_norm": 0.6551029757385911, + "learning_rate": 3.6343914796204372e-06, + "loss": 0.5245, + "step": 23755 + }, + { + "epoch": 0.7280863062400392, + "grad_norm": 1.6473245974783168, + "learning_rate": 3.633625964064875e-06, + "loss": 0.6783, + "step": 23756 + }, + { + "epoch": 0.7281169547627805, + "grad_norm": 1.329223392220828, + "learning_rate": 3.632860511238828e-06, + "loss": 0.5951, + "step": 23757 + }, + { + "epoch": 0.7281476032855216, + "grad_norm": 0.6579152275191896, + "learning_rate": 3.6320951211498333e-06, + "loss": 0.5346, + "step": 23758 + }, + { + "epoch": 0.7281782518082628, + "grad_norm": 1.4962205806288236, + "learning_rate": 3.631329793805437e-06, + "loss": 0.5867, + "step": 23759 + }, + { + "epoch": 0.728208900331004, + "grad_norm": 1.7481305616796283, + "learning_rate": 3.630564529213174e-06, + "loss": 0.7001, + "step": 23760 + }, + { + "epoch": 0.7282395488537452, + "grad_norm": 1.7900972678557436, + "learning_rate": 3.62979932738059e-06, + "loss": 0.7693, + "step": 23761 + }, + { + "epoch": 0.7282701973764865, + "grad_norm": 0.6820209139428902, + "learning_rate": 3.629034188315225e-06, + "loss": 0.5346, + "step": 23762 + }, + { + "epoch": 0.7283008458992276, + "grad_norm": 1.7554288132202938, + "learning_rate": 3.628269112024613e-06, + "loss": 0.675, + "step": 23763 + }, + { + "epoch": 0.7283314944219689, + "grad_norm": 1.6288356826169943, + "learning_rate": 3.6275040985162956e-06, + "loss": 0.7707, + "step": 23764 + }, + { + "epoch": 0.72836214294471, + "grad_norm": 1.5892779489414932, + "learning_rate": 3.6267391477978154e-06, + "loss": 0.7616, + "step": 23765 + }, + { + "epoch": 0.7283927914674513, + "grad_norm": 1.7281399952394554, + "learning_rate": 3.6259742598767e-06, + "loss": 0.7143, + "step": 23766 + }, + { + "epoch": 0.7284234399901924, + "grad_norm": 1.5420815862328547, + "learning_rate": 3.6252094347604926e-06, + "loss": 0.6318, + "step": 23767 + }, + { + "epoch": 0.7284540885129337, + "grad_norm": 1.5935386101958577, + "learning_rate": 3.6244446724567306e-06, + "loss": 0.6618, + "step": 23768 + }, + { + "epoch": 0.7284847370356748, + "grad_norm": 1.6660051469960864, + "learning_rate": 3.623679972972942e-06, + "loss": 0.5559, + "step": 23769 + }, + { + "epoch": 0.7285153855584161, + "grad_norm": 1.4762091946546991, + "learning_rate": 3.6229153363166703e-06, + "loss": 0.6458, + "step": 23770 + }, + { + "epoch": 0.7285460340811573, + "grad_norm": 1.6314355056304495, + "learning_rate": 3.622150762495439e-06, + "loss": 0.6477, + "step": 23771 + }, + { + "epoch": 0.7285766826038985, + "grad_norm": 1.5116524443031603, + "learning_rate": 3.621386251516795e-06, + "loss": 0.6839, + "step": 23772 + }, + { + "epoch": 0.7286073311266397, + "grad_norm": 0.6862632302944242, + "learning_rate": 3.6206218033882635e-06, + "loss": 0.5244, + "step": 23773 + }, + { + "epoch": 0.7286379796493809, + "grad_norm": 1.4635634455802016, + "learning_rate": 3.6198574181173752e-06, + "loss": 0.5917, + "step": 23774 + }, + { + "epoch": 0.7286686281721221, + "grad_norm": 1.3863476589833437, + "learning_rate": 3.6190930957116634e-06, + "loss": 0.6393, + "step": 23775 + }, + { + "epoch": 0.7286992766948633, + "grad_norm": 0.6634506743643228, + "learning_rate": 3.6183288361786627e-06, + "loss": 0.5394, + "step": 23776 + }, + { + "epoch": 0.7287299252176045, + "grad_norm": 1.5923487210793084, + "learning_rate": 3.617564639525899e-06, + "loss": 0.6024, + "step": 23777 + }, + { + "epoch": 0.7287605737403458, + "grad_norm": 1.5771551533873684, + "learning_rate": 3.6168005057609035e-06, + "loss": 0.5717, + "step": 23778 + }, + { + "epoch": 0.7287912222630869, + "grad_norm": 1.488487063890923, + "learning_rate": 3.616036434891205e-06, + "loss": 0.6098, + "step": 23779 + }, + { + "epoch": 0.7288218707858282, + "grad_norm": 1.594470846188784, + "learning_rate": 3.6152724269243366e-06, + "loss": 0.5638, + "step": 23780 + }, + { + "epoch": 0.7288525193085693, + "grad_norm": 1.668304735595981, + "learning_rate": 3.6145084818678234e-06, + "loss": 0.6523, + "step": 23781 + }, + { + "epoch": 0.7288831678313106, + "grad_norm": 1.6187974859391985, + "learning_rate": 3.6137445997291877e-06, + "loss": 0.6388, + "step": 23782 + }, + { + "epoch": 0.7289138163540517, + "grad_norm": 1.7887874019523895, + "learning_rate": 3.61298078051596e-06, + "loss": 0.7612, + "step": 23783 + }, + { + "epoch": 0.728944464876793, + "grad_norm": 1.5452721242944878, + "learning_rate": 3.6122170242356715e-06, + "loss": 0.6194, + "step": 23784 + }, + { + "epoch": 0.7289751133995341, + "grad_norm": 1.5113698270165385, + "learning_rate": 3.611453330895839e-06, + "loss": 0.6435, + "step": 23785 + }, + { + "epoch": 0.7290057619222754, + "grad_norm": 1.6639724376815708, + "learning_rate": 3.610689700503991e-06, + "loss": 0.6817, + "step": 23786 + }, + { + "epoch": 0.7290364104450165, + "grad_norm": 0.6789378295082197, + "learning_rate": 3.609926133067656e-06, + "loss": 0.5522, + "step": 23787 + }, + { + "epoch": 0.7290670589677578, + "grad_norm": 1.650870077836778, + "learning_rate": 3.6091626285943504e-06, + "loss": 0.6648, + "step": 23788 + }, + { + "epoch": 0.729097707490499, + "grad_norm": 1.652324507547153, + "learning_rate": 3.6083991870916047e-06, + "loss": 0.6298, + "step": 23789 + }, + { + "epoch": 0.7291283560132401, + "grad_norm": 1.9461969869828053, + "learning_rate": 3.6076358085669296e-06, + "loss": 0.6758, + "step": 23790 + }, + { + "epoch": 0.7291590045359814, + "grad_norm": 1.8096089271737807, + "learning_rate": 3.606872493027861e-06, + "loss": 0.6902, + "step": 23791 + }, + { + "epoch": 0.7291896530587225, + "grad_norm": 1.7821402889613498, + "learning_rate": 3.606109240481914e-06, + "loss": 0.5803, + "step": 23792 + }, + { + "epoch": 0.7292203015814638, + "grad_norm": 1.6232343929265844, + "learning_rate": 3.6053460509366046e-06, + "loss": 0.6328, + "step": 23793 + }, + { + "epoch": 0.7292509501042049, + "grad_norm": 1.8608211378599944, + "learning_rate": 3.604582924399458e-06, + "loss": 0.686, + "step": 23794 + }, + { + "epoch": 0.7292815986269462, + "grad_norm": 1.5994382586155893, + "learning_rate": 3.603819860877994e-06, + "loss": 0.6027, + "step": 23795 + }, + { + "epoch": 0.7293122471496873, + "grad_norm": 1.6196325761118702, + "learning_rate": 3.6030568603797266e-06, + "loss": 0.7069, + "step": 23796 + }, + { + "epoch": 0.7293428956724286, + "grad_norm": 1.4764676500003897, + "learning_rate": 3.6022939229121765e-06, + "loss": 0.6391, + "step": 23797 + }, + { + "epoch": 0.7293735441951698, + "grad_norm": 1.7104091399213202, + "learning_rate": 3.6015310484828627e-06, + "loss": 0.6113, + "step": 23798 + }, + { + "epoch": 0.729404192717911, + "grad_norm": 1.8305999709696807, + "learning_rate": 3.6007682370993025e-06, + "loss": 0.6301, + "step": 23799 + }, + { + "epoch": 0.7294348412406522, + "grad_norm": 1.5940015188594463, + "learning_rate": 3.6000054887690105e-06, + "loss": 0.6902, + "step": 23800 + }, + { + "epoch": 0.7294654897633934, + "grad_norm": 1.874236244709882, + "learning_rate": 3.5992428034994955e-06, + "loss": 0.716, + "step": 23801 + }, + { + "epoch": 0.7294961382861346, + "grad_norm": 1.8866293177652358, + "learning_rate": 3.598480181298285e-06, + "loss": 0.6597, + "step": 23802 + }, + { + "epoch": 0.7295267868088758, + "grad_norm": 1.7220061425323656, + "learning_rate": 3.597717622172887e-06, + "loss": 0.6677, + "step": 23803 + }, + { + "epoch": 0.729557435331617, + "grad_norm": 1.5634090127976397, + "learning_rate": 3.5969551261308133e-06, + "loss": 0.6342, + "step": 23804 + }, + { + "epoch": 0.7295880838543582, + "grad_norm": 1.6069688458007143, + "learning_rate": 3.596192693179578e-06, + "loss": 0.7428, + "step": 23805 + }, + { + "epoch": 0.7296187323770994, + "grad_norm": 1.5100548152898738, + "learning_rate": 3.595430323326695e-06, + "loss": 0.7113, + "step": 23806 + }, + { + "epoch": 0.7296493808998407, + "grad_norm": 1.6757505212634027, + "learning_rate": 3.594668016579679e-06, + "loss": 0.6835, + "step": 23807 + }, + { + "epoch": 0.7296800294225818, + "grad_norm": 1.6350164680022334, + "learning_rate": 3.5939057729460335e-06, + "loss": 0.6396, + "step": 23808 + }, + { + "epoch": 0.7297106779453231, + "grad_norm": 1.731986556508516, + "learning_rate": 3.593143592433275e-06, + "loss": 0.6715, + "step": 23809 + }, + { + "epoch": 0.7297413264680642, + "grad_norm": 1.5565953544210593, + "learning_rate": 3.592381475048915e-06, + "loss": 0.665, + "step": 23810 + }, + { + "epoch": 0.7297719749908055, + "grad_norm": 1.5178396502991551, + "learning_rate": 3.5916194208004595e-06, + "loss": 0.6683, + "step": 23811 + }, + { + "epoch": 0.7298026235135466, + "grad_norm": 1.5638427745589578, + "learning_rate": 3.59085742969541e-06, + "loss": 0.687, + "step": 23812 + }, + { + "epoch": 0.7298332720362879, + "grad_norm": 1.5353180069089434, + "learning_rate": 3.5900955017412896e-06, + "loss": 0.5938, + "step": 23813 + }, + { + "epoch": 0.729863920559029, + "grad_norm": 1.5822895519235474, + "learning_rate": 3.589333636945599e-06, + "loss": 0.6273, + "step": 23814 + }, + { + "epoch": 0.7298945690817703, + "grad_norm": 1.5131030169625308, + "learning_rate": 3.5885718353158406e-06, + "loss": 0.6287, + "step": 23815 + }, + { + "epoch": 0.7299252176045115, + "grad_norm": 1.6733569018334098, + "learning_rate": 3.5878100968595233e-06, + "loss": 0.6609, + "step": 23816 + }, + { + "epoch": 0.7299558661272527, + "grad_norm": 1.6742109029497168, + "learning_rate": 3.587048421584155e-06, + "loss": 0.5638, + "step": 23817 + }, + { + "epoch": 0.7299865146499939, + "grad_norm": 1.6005031693595964, + "learning_rate": 3.5862868094972416e-06, + "loss": 0.7432, + "step": 23818 + }, + { + "epoch": 0.7300171631727351, + "grad_norm": 1.5802475210232123, + "learning_rate": 3.585525260606283e-06, + "loss": 0.6636, + "step": 23819 + }, + { + "epoch": 0.7300478116954763, + "grad_norm": 1.6574083713468486, + "learning_rate": 3.5847637749187847e-06, + "loss": 0.5781, + "step": 23820 + }, + { + "epoch": 0.7300784602182174, + "grad_norm": 1.6410181866578344, + "learning_rate": 3.584002352442254e-06, + "loss": 0.5954, + "step": 23821 + }, + { + "epoch": 0.7301091087409587, + "grad_norm": 1.7717171983927862, + "learning_rate": 3.5832409931841892e-06, + "loss": 0.6139, + "step": 23822 + }, + { + "epoch": 0.7301397572636998, + "grad_norm": 1.7880241842571696, + "learning_rate": 3.582479697152086e-06, + "loss": 0.7118, + "step": 23823 + }, + { + "epoch": 0.7301704057864411, + "grad_norm": 1.7310225361111098, + "learning_rate": 3.5817184643534597e-06, + "loss": 0.6254, + "step": 23824 + }, + { + "epoch": 0.7302010543091823, + "grad_norm": 1.2591653397739688, + "learning_rate": 3.5809572947957993e-06, + "loss": 0.5429, + "step": 23825 + }, + { + "epoch": 0.7302317028319235, + "grad_norm": 1.5353339340433538, + "learning_rate": 3.5801961884866134e-06, + "loss": 0.6026, + "step": 23826 + }, + { + "epoch": 0.7302623513546647, + "grad_norm": 1.7760267892988497, + "learning_rate": 3.579435145433393e-06, + "loss": 0.6405, + "step": 23827 + }, + { + "epoch": 0.7302929998774059, + "grad_norm": 1.655770634187448, + "learning_rate": 3.5786741656436408e-06, + "loss": 0.6861, + "step": 23828 + }, + { + "epoch": 0.7303236484001471, + "grad_norm": 0.6704163410758687, + "learning_rate": 3.577913249124859e-06, + "loss": 0.5185, + "step": 23829 + }, + { + "epoch": 0.7303542969228883, + "grad_norm": 1.76872771990645, + "learning_rate": 3.577152395884538e-06, + "loss": 0.6159, + "step": 23830 + }, + { + "epoch": 0.7303849454456295, + "grad_norm": 0.7169889254412577, + "learning_rate": 3.576391605930176e-06, + "loss": 0.5327, + "step": 23831 + }, + { + "epoch": 0.7304155939683707, + "grad_norm": 0.6762443074860266, + "learning_rate": 3.575630879269276e-06, + "loss": 0.5486, + "step": 23832 + }, + { + "epoch": 0.7304462424911119, + "grad_norm": 1.7015304960685598, + "learning_rate": 3.5748702159093283e-06, + "loss": 0.6942, + "step": 23833 + }, + { + "epoch": 0.7304768910138532, + "grad_norm": 1.5819659962964467, + "learning_rate": 3.5741096158578246e-06, + "loss": 0.7002, + "step": 23834 + }, + { + "epoch": 0.7305075395365943, + "grad_norm": 1.8224912677582705, + "learning_rate": 3.5733490791222637e-06, + "loss": 0.6822, + "step": 23835 + }, + { + "epoch": 0.7305381880593356, + "grad_norm": 1.65804222357668, + "learning_rate": 3.572588605710139e-06, + "loss": 0.7534, + "step": 23836 + }, + { + "epoch": 0.7305688365820767, + "grad_norm": 1.623610775618752, + "learning_rate": 3.571828195628946e-06, + "loss": 0.5896, + "step": 23837 + }, + { + "epoch": 0.730599485104818, + "grad_norm": 1.657590499308058, + "learning_rate": 3.5710678488861704e-06, + "loss": 0.7055, + "step": 23838 + }, + { + "epoch": 0.7306301336275591, + "grad_norm": 1.5857523340316242, + "learning_rate": 3.5703075654893095e-06, + "loss": 0.6915, + "step": 23839 + }, + { + "epoch": 0.7306607821503004, + "grad_norm": 1.5412987301078667, + "learning_rate": 3.5695473454458553e-06, + "loss": 0.6823, + "step": 23840 + }, + { + "epoch": 0.7306914306730415, + "grad_norm": 1.417466592706807, + "learning_rate": 3.5687871887632975e-06, + "loss": 0.6251, + "step": 23841 + }, + { + "epoch": 0.7307220791957828, + "grad_norm": 1.5108141040108773, + "learning_rate": 3.568027095449118e-06, + "loss": 0.5781, + "step": 23842 + }, + { + "epoch": 0.730752727718524, + "grad_norm": 0.6771419556829447, + "learning_rate": 3.5672670655108197e-06, + "loss": 0.5373, + "step": 23843 + }, + { + "epoch": 0.7307833762412652, + "grad_norm": 1.801787968887492, + "learning_rate": 3.5665070989558815e-06, + "loss": 0.6067, + "step": 23844 + }, + { + "epoch": 0.7308140247640064, + "grad_norm": 1.8545098577120571, + "learning_rate": 3.565747195791799e-06, + "loss": 0.6045, + "step": 23845 + }, + { + "epoch": 0.7308446732867476, + "grad_norm": 1.43749926968064, + "learning_rate": 3.564987356026052e-06, + "loss": 0.5961, + "step": 23846 + }, + { + "epoch": 0.7308753218094888, + "grad_norm": 1.5576655110254645, + "learning_rate": 3.5642275796661307e-06, + "loss": 0.6251, + "step": 23847 + }, + { + "epoch": 0.73090597033223, + "grad_norm": 0.6530469723829747, + "learning_rate": 3.5634678667195244e-06, + "loss": 0.5224, + "step": 23848 + }, + { + "epoch": 0.7309366188549712, + "grad_norm": 0.6912790434292815, + "learning_rate": 3.5627082171937146e-06, + "loss": 0.5509, + "step": 23849 + }, + { + "epoch": 0.7309672673777124, + "grad_norm": 1.5469754977958312, + "learning_rate": 3.5619486310961857e-06, + "loss": 0.69, + "step": 23850 + }, + { + "epoch": 0.7309979159004536, + "grad_norm": 1.6850402640294773, + "learning_rate": 3.5611891084344286e-06, + "loss": 0.7286, + "step": 23851 + }, + { + "epoch": 0.7310285644231947, + "grad_norm": 1.5397951119276927, + "learning_rate": 3.5604296492159194e-06, + "loss": 0.6324, + "step": 23852 + }, + { + "epoch": 0.731059212945936, + "grad_norm": 1.5150759360568087, + "learning_rate": 3.5596702534481443e-06, + "loss": 0.5619, + "step": 23853 + }, + { + "epoch": 0.7310898614686772, + "grad_norm": 1.7491064585428433, + "learning_rate": 3.55891092113859e-06, + "loss": 0.6668, + "step": 23854 + }, + { + "epoch": 0.7311205099914184, + "grad_norm": 1.532419267210903, + "learning_rate": 3.5581516522947302e-06, + "loss": 0.7023, + "step": 23855 + }, + { + "epoch": 0.7311511585141596, + "grad_norm": 1.8421130358638946, + "learning_rate": 3.557392446924054e-06, + "loss": 0.6877, + "step": 23856 + }, + { + "epoch": 0.7311818070369008, + "grad_norm": 1.6189068883406004, + "learning_rate": 3.556633305034035e-06, + "loss": 0.5908, + "step": 23857 + }, + { + "epoch": 0.731212455559642, + "grad_norm": 1.587068199601538, + "learning_rate": 3.555874226632157e-06, + "loss": 0.5648, + "step": 23858 + }, + { + "epoch": 0.7312431040823832, + "grad_norm": 0.6589214972175956, + "learning_rate": 3.5551152117259024e-06, + "loss": 0.5324, + "step": 23859 + }, + { + "epoch": 0.7312737526051244, + "grad_norm": 1.6615050710637302, + "learning_rate": 3.5543562603227432e-06, + "loss": 0.6784, + "step": 23860 + }, + { + "epoch": 0.7313044011278657, + "grad_norm": 1.89263129727293, + "learning_rate": 3.553597372430161e-06, + "loss": 0.6455, + "step": 23861 + }, + { + "epoch": 0.7313350496506068, + "grad_norm": 1.7400314580868985, + "learning_rate": 3.552838548055636e-06, + "loss": 0.6465, + "step": 23862 + }, + { + "epoch": 0.7313656981733481, + "grad_norm": 0.7186022971602481, + "learning_rate": 3.552079787206639e-06, + "loss": 0.544, + "step": 23863 + }, + { + "epoch": 0.7313963466960892, + "grad_norm": 1.5219641050991375, + "learning_rate": 3.5513210898906504e-06, + "loss": 0.7216, + "step": 23864 + }, + { + "epoch": 0.7314269952188305, + "grad_norm": 1.6431610430305448, + "learning_rate": 3.5505624561151475e-06, + "loss": 0.6111, + "step": 23865 + }, + { + "epoch": 0.7314576437415716, + "grad_norm": 1.7441781970449357, + "learning_rate": 3.5498038858876006e-06, + "loss": 0.5999, + "step": 23866 + }, + { + "epoch": 0.7314882922643129, + "grad_norm": 1.9386141661935086, + "learning_rate": 3.5490453792154888e-06, + "loss": 0.6727, + "step": 23867 + }, + { + "epoch": 0.731518940787054, + "grad_norm": 0.652782070680289, + "learning_rate": 3.548286936106281e-06, + "loss": 0.5447, + "step": 23868 + }, + { + "epoch": 0.7315495893097953, + "grad_norm": 1.7941053941141207, + "learning_rate": 3.547528556567452e-06, + "loss": 0.6346, + "step": 23869 + }, + { + "epoch": 0.7315802378325365, + "grad_norm": 1.6710811203234912, + "learning_rate": 3.5467702406064787e-06, + "loss": 0.6537, + "step": 23870 + }, + { + "epoch": 0.7316108863552777, + "grad_norm": 1.7624202308808326, + "learning_rate": 3.5460119882308265e-06, + "loss": 0.7068, + "step": 23871 + }, + { + "epoch": 0.7316415348780189, + "grad_norm": 1.678207388554551, + "learning_rate": 3.5452537994479686e-06, + "loss": 0.7135, + "step": 23872 + }, + { + "epoch": 0.7316721834007601, + "grad_norm": 1.752020224420697, + "learning_rate": 3.5444956742653804e-06, + "loss": 0.6307, + "step": 23873 + }, + { + "epoch": 0.7317028319235013, + "grad_norm": 1.7387025213340204, + "learning_rate": 3.5437376126905242e-06, + "loss": 0.7057, + "step": 23874 + }, + { + "epoch": 0.7317334804462425, + "grad_norm": 1.722761973128951, + "learning_rate": 3.5429796147308736e-06, + "loss": 0.6126, + "step": 23875 + }, + { + "epoch": 0.7317641289689837, + "grad_norm": 1.5509303651871673, + "learning_rate": 3.5422216803939004e-06, + "loss": 0.7426, + "step": 23876 + }, + { + "epoch": 0.731794777491725, + "grad_norm": 0.6417301827878467, + "learning_rate": 3.541463809687066e-06, + "loss": 0.5412, + "step": 23877 + }, + { + "epoch": 0.7318254260144661, + "grad_norm": 1.9273742525041906, + "learning_rate": 3.5407060026178443e-06, + "loss": 0.6996, + "step": 23878 + }, + { + "epoch": 0.7318560745372074, + "grad_norm": 1.5688913191637612, + "learning_rate": 3.5399482591936953e-06, + "loss": 0.5596, + "step": 23879 + }, + { + "epoch": 0.7318867230599485, + "grad_norm": 1.667517693142955, + "learning_rate": 3.5391905794220894e-06, + "loss": 0.6195, + "step": 23880 + }, + { + "epoch": 0.7319173715826898, + "grad_norm": 1.7269087643899188, + "learning_rate": 3.5384329633104953e-06, + "loss": 0.6723, + "step": 23881 + }, + { + "epoch": 0.7319480201054309, + "grad_norm": 1.4448383600368393, + "learning_rate": 3.5376754108663715e-06, + "loss": 0.5649, + "step": 23882 + }, + { + "epoch": 0.7319786686281721, + "grad_norm": 1.6391696476219826, + "learning_rate": 3.536917922097184e-06, + "loss": 0.6064, + "step": 23883 + }, + { + "epoch": 0.7320093171509133, + "grad_norm": 1.8058162217080949, + "learning_rate": 3.5361604970104023e-06, + "loss": 0.7929, + "step": 23884 + }, + { + "epoch": 0.7320399656736545, + "grad_norm": 0.6631787866186456, + "learning_rate": 3.535403135613481e-06, + "loss": 0.5502, + "step": 23885 + }, + { + "epoch": 0.7320706141963957, + "grad_norm": 1.5840037147376824, + "learning_rate": 3.5346458379138903e-06, + "loss": 0.6672, + "step": 23886 + }, + { + "epoch": 0.7321012627191369, + "grad_norm": 1.7512013494015246, + "learning_rate": 3.533888603919086e-06, + "loss": 0.6459, + "step": 23887 + }, + { + "epoch": 0.7321319112418782, + "grad_norm": 1.6504272370001403, + "learning_rate": 3.533131433636531e-06, + "loss": 0.6135, + "step": 23888 + }, + { + "epoch": 0.7321625597646193, + "grad_norm": 1.5091801216314031, + "learning_rate": 3.532374327073689e-06, + "loss": 0.6519, + "step": 23889 + }, + { + "epoch": 0.7321932082873606, + "grad_norm": 1.6209435564679748, + "learning_rate": 3.5316172842380148e-06, + "loss": 0.6307, + "step": 23890 + }, + { + "epoch": 0.7322238568101017, + "grad_norm": 1.6627048757869756, + "learning_rate": 3.5308603051369706e-06, + "loss": 0.6802, + "step": 23891 + }, + { + "epoch": 0.732254505332843, + "grad_norm": 1.6163618724066215, + "learning_rate": 3.530103389778019e-06, + "loss": 0.5833, + "step": 23892 + }, + { + "epoch": 0.7322851538555841, + "grad_norm": 1.7526994818657886, + "learning_rate": 3.52934653816861e-06, + "loss": 0.7016, + "step": 23893 + }, + { + "epoch": 0.7323158023783254, + "grad_norm": 0.6662031183720798, + "learning_rate": 3.5285897503162057e-06, + "loss": 0.54, + "step": 23894 + }, + { + "epoch": 0.7323464509010665, + "grad_norm": 1.5261824387469938, + "learning_rate": 3.5278330262282657e-06, + "loss": 0.7215, + "step": 23895 + }, + { + "epoch": 0.7323770994238078, + "grad_norm": 1.6714847860624165, + "learning_rate": 3.5270763659122386e-06, + "loss": 0.7055, + "step": 23896 + }, + { + "epoch": 0.732407747946549, + "grad_norm": 1.858991416923301, + "learning_rate": 3.526319769375588e-06, + "loss": 0.6223, + "step": 23897 + }, + { + "epoch": 0.7324383964692902, + "grad_norm": 0.6385180084460906, + "learning_rate": 3.5255632366257585e-06, + "loss": 0.5281, + "step": 23898 + }, + { + "epoch": 0.7324690449920314, + "grad_norm": 1.5993562138406026, + "learning_rate": 3.524806767670218e-06, + "loss": 0.5941, + "step": 23899 + }, + { + "epoch": 0.7324996935147726, + "grad_norm": 0.6325726719069938, + "learning_rate": 3.5240503625164135e-06, + "loss": 0.5491, + "step": 23900 + }, + { + "epoch": 0.7325303420375138, + "grad_norm": 1.6806149897384814, + "learning_rate": 3.5232940211717935e-06, + "loss": 0.7509, + "step": 23901 + }, + { + "epoch": 0.732560990560255, + "grad_norm": 1.5915156852384913, + "learning_rate": 3.5225377436438145e-06, + "loss": 0.6658, + "step": 23902 + }, + { + "epoch": 0.7325916390829962, + "grad_norm": 1.6481786763451691, + "learning_rate": 3.5217815299399327e-06, + "loss": 0.7381, + "step": 23903 + }, + { + "epoch": 0.7326222876057374, + "grad_norm": 0.6621292105902914, + "learning_rate": 3.5210253800675907e-06, + "loss": 0.5239, + "step": 23904 + }, + { + "epoch": 0.7326529361284786, + "grad_norm": 1.6163337548832135, + "learning_rate": 3.520269294034244e-06, + "loss": 0.6832, + "step": 23905 + }, + { + "epoch": 0.7326835846512199, + "grad_norm": 1.4400070999134278, + "learning_rate": 3.5195132718473424e-06, + "loss": 0.7049, + "step": 23906 + }, + { + "epoch": 0.732714233173961, + "grad_norm": 1.727914607304951, + "learning_rate": 3.518757313514337e-06, + "loss": 0.6141, + "step": 23907 + }, + { + "epoch": 0.7327448816967023, + "grad_norm": 0.6581559295281192, + "learning_rate": 3.5180014190426737e-06, + "loss": 0.5098, + "step": 23908 + }, + { + "epoch": 0.7327755302194434, + "grad_norm": 0.6421136720116748, + "learning_rate": 3.517245588439795e-06, + "loss": 0.5351, + "step": 23909 + }, + { + "epoch": 0.7328061787421847, + "grad_norm": 0.6810148826534175, + "learning_rate": 3.5164898217131615e-06, + "loss": 0.5795, + "step": 23910 + }, + { + "epoch": 0.7328368272649258, + "grad_norm": 1.689768987817744, + "learning_rate": 3.515734118870212e-06, + "loss": 0.6549, + "step": 23911 + }, + { + "epoch": 0.7328674757876671, + "grad_norm": 1.4702779698378, + "learning_rate": 3.5149784799183893e-06, + "loss": 0.6457, + "step": 23912 + }, + { + "epoch": 0.7328981243104082, + "grad_norm": 1.684921239600372, + "learning_rate": 3.514222904865143e-06, + "loss": 0.6029, + "step": 23913 + }, + { + "epoch": 0.7329287728331494, + "grad_norm": 1.8675235445643725, + "learning_rate": 3.513467393717922e-06, + "loss": 0.6924, + "step": 23914 + }, + { + "epoch": 0.7329594213558907, + "grad_norm": 1.6148760953998846, + "learning_rate": 3.512711946484163e-06, + "loss": 0.6494, + "step": 23915 + }, + { + "epoch": 0.7329900698786318, + "grad_norm": 1.4819347836498082, + "learning_rate": 3.5119565631713125e-06, + "loss": 0.5874, + "step": 23916 + }, + { + "epoch": 0.7330207184013731, + "grad_norm": 0.6919037033028106, + "learning_rate": 3.5112012437868147e-06, + "loss": 0.5231, + "step": 23917 + }, + { + "epoch": 0.7330513669241142, + "grad_norm": 1.7254765415700701, + "learning_rate": 3.5104459883381146e-06, + "loss": 0.7559, + "step": 23918 + }, + { + "epoch": 0.7330820154468555, + "grad_norm": 0.6413548747878136, + "learning_rate": 3.50969079683265e-06, + "loss": 0.5222, + "step": 23919 + }, + { + "epoch": 0.7331126639695966, + "grad_norm": 1.6112169825675873, + "learning_rate": 3.5089356692778565e-06, + "loss": 0.6129, + "step": 23920 + }, + { + "epoch": 0.7331433124923379, + "grad_norm": 0.6720337211159718, + "learning_rate": 3.5081806056811873e-06, + "loss": 0.56, + "step": 23921 + }, + { + "epoch": 0.733173961015079, + "grad_norm": 1.5783501987006936, + "learning_rate": 3.5074256060500745e-06, + "loss": 0.6049, + "step": 23922 + }, + { + "epoch": 0.7332046095378203, + "grad_norm": 1.5312270271000532, + "learning_rate": 3.5066706703919564e-06, + "loss": 0.6826, + "step": 23923 + }, + { + "epoch": 0.7332352580605614, + "grad_norm": 1.6629246123285142, + "learning_rate": 3.5059157987142733e-06, + "loss": 0.7036, + "step": 23924 + }, + { + "epoch": 0.7332659065833027, + "grad_norm": 1.5271238879751297, + "learning_rate": 3.505160991024463e-06, + "loss": 0.6256, + "step": 23925 + }, + { + "epoch": 0.7332965551060439, + "grad_norm": 1.6725875737433527, + "learning_rate": 3.5044062473299665e-06, + "loss": 0.7044, + "step": 23926 + }, + { + "epoch": 0.7333272036287851, + "grad_norm": 1.6505670669428303, + "learning_rate": 3.5036515676382145e-06, + "loss": 0.6771, + "step": 23927 + }, + { + "epoch": 0.7333578521515263, + "grad_norm": 1.5372751122235868, + "learning_rate": 3.5028969519566445e-06, + "loss": 0.6193, + "step": 23928 + }, + { + "epoch": 0.7333885006742675, + "grad_norm": 0.6853716949605068, + "learning_rate": 3.5021424002926986e-06, + "loss": 0.5465, + "step": 23929 + }, + { + "epoch": 0.7334191491970087, + "grad_norm": 0.6467215990362887, + "learning_rate": 3.5013879126538042e-06, + "loss": 0.5331, + "step": 23930 + }, + { + "epoch": 0.7334497977197499, + "grad_norm": 1.538256498871522, + "learning_rate": 3.5006334890473947e-06, + "loss": 0.6058, + "step": 23931 + }, + { + "epoch": 0.7334804462424911, + "grad_norm": 0.6653536658232664, + "learning_rate": 3.4998791294809065e-06, + "loss": 0.5455, + "step": 23932 + }, + { + "epoch": 0.7335110947652324, + "grad_norm": 1.5129986603910746, + "learning_rate": 3.4991248339617723e-06, + "loss": 0.6936, + "step": 23933 + }, + { + "epoch": 0.7335417432879735, + "grad_norm": 1.6916120841023794, + "learning_rate": 3.4983706024974283e-06, + "loss": 0.6156, + "step": 23934 + }, + { + "epoch": 0.7335723918107148, + "grad_norm": 1.5785345704000944, + "learning_rate": 3.497616435095299e-06, + "loss": 0.6531, + "step": 23935 + }, + { + "epoch": 0.7336030403334559, + "grad_norm": 0.6909454657023524, + "learning_rate": 3.496862331762818e-06, + "loss": 0.5685, + "step": 23936 + }, + { + "epoch": 0.7336336888561972, + "grad_norm": 0.645518722662574, + "learning_rate": 3.4961082925074196e-06, + "loss": 0.5413, + "step": 23937 + }, + { + "epoch": 0.7336643373789383, + "grad_norm": 1.499573282596641, + "learning_rate": 3.495354317336531e-06, + "loss": 0.5621, + "step": 23938 + }, + { + "epoch": 0.7336949859016796, + "grad_norm": 0.6678816501244876, + "learning_rate": 3.4946004062575734e-06, + "loss": 0.547, + "step": 23939 + }, + { + "epoch": 0.7337256344244207, + "grad_norm": 1.6893040046308134, + "learning_rate": 3.493846559277989e-06, + "loss": 0.5865, + "step": 23940 + }, + { + "epoch": 0.733756282947162, + "grad_norm": 1.6222190575635802, + "learning_rate": 3.493092776405199e-06, + "loss": 0.6607, + "step": 23941 + }, + { + "epoch": 0.7337869314699031, + "grad_norm": 0.6399672594581468, + "learning_rate": 3.4923390576466276e-06, + "loss": 0.5148, + "step": 23942 + }, + { + "epoch": 0.7338175799926444, + "grad_norm": 0.645889662438682, + "learning_rate": 3.491585403009705e-06, + "loss": 0.5287, + "step": 23943 + }, + { + "epoch": 0.7338482285153856, + "grad_norm": 1.700695991058834, + "learning_rate": 3.490831812501857e-06, + "loss": 0.6132, + "step": 23944 + }, + { + "epoch": 0.7338788770381267, + "grad_norm": 1.746908669781458, + "learning_rate": 3.4900782861305105e-06, + "loss": 0.7231, + "step": 23945 + }, + { + "epoch": 0.733909525560868, + "grad_norm": 1.7901033471581478, + "learning_rate": 3.4893248239030863e-06, + "loss": 0.741, + "step": 23946 + }, + { + "epoch": 0.7339401740836091, + "grad_norm": 1.5409342383775506, + "learning_rate": 3.48857142582701e-06, + "loss": 0.5332, + "step": 23947 + }, + { + "epoch": 0.7339708226063504, + "grad_norm": 1.5244027989432702, + "learning_rate": 3.4878180919097083e-06, + "loss": 0.5732, + "step": 23948 + }, + { + "epoch": 0.7340014711290915, + "grad_norm": 1.7413396983784024, + "learning_rate": 3.487064822158601e-06, + "loss": 0.6955, + "step": 23949 + }, + { + "epoch": 0.7340321196518328, + "grad_norm": 1.6764886797424874, + "learning_rate": 3.486311616581105e-06, + "loss": 0.6818, + "step": 23950 + }, + { + "epoch": 0.7340627681745739, + "grad_norm": 1.6584283655882723, + "learning_rate": 3.4855584751846527e-06, + "loss": 0.7015, + "step": 23951 + }, + { + "epoch": 0.7340934166973152, + "grad_norm": 1.9947855555479337, + "learning_rate": 3.484805397976657e-06, + "loss": 0.6828, + "step": 23952 + }, + { + "epoch": 0.7341240652200564, + "grad_norm": 0.650302985212376, + "learning_rate": 3.4840523849645434e-06, + "loss": 0.5308, + "step": 23953 + }, + { + "epoch": 0.7341547137427976, + "grad_norm": 1.9061554813784727, + "learning_rate": 3.483299436155726e-06, + "loss": 0.6987, + "step": 23954 + }, + { + "epoch": 0.7341853622655388, + "grad_norm": 1.7366612541669606, + "learning_rate": 3.482546551557626e-06, + "loss": 0.6174, + "step": 23955 + }, + { + "epoch": 0.73421601078828, + "grad_norm": 1.5439894264259897, + "learning_rate": 3.481793731177666e-06, + "loss": 0.579, + "step": 23956 + }, + { + "epoch": 0.7342466593110212, + "grad_norm": 0.7026240376915536, + "learning_rate": 3.4810409750232577e-06, + "loss": 0.5679, + "step": 23957 + }, + { + "epoch": 0.7342773078337624, + "grad_norm": 0.6518236102374234, + "learning_rate": 3.480288283101819e-06, + "loss": 0.5299, + "step": 23958 + }, + { + "epoch": 0.7343079563565036, + "grad_norm": 1.5182311073940475, + "learning_rate": 3.4795356554207727e-06, + "loss": 0.7185, + "step": 23959 + }, + { + "epoch": 0.7343386048792448, + "grad_norm": 1.5943286588534877, + "learning_rate": 3.4787830919875263e-06, + "loss": 0.5801, + "step": 23960 + }, + { + "epoch": 0.734369253401986, + "grad_norm": 1.3719575340359667, + "learning_rate": 3.4780305928094984e-06, + "loss": 0.5239, + "step": 23961 + }, + { + "epoch": 0.7343999019247273, + "grad_norm": 1.6501170498943436, + "learning_rate": 3.4772781578941072e-06, + "loss": 0.6633, + "step": 23962 + }, + { + "epoch": 0.7344305504474684, + "grad_norm": 1.6704928888737536, + "learning_rate": 3.47652578724876e-06, + "loss": 0.6464, + "step": 23963 + }, + { + "epoch": 0.7344611989702097, + "grad_norm": 1.850705104074881, + "learning_rate": 3.4757734808808763e-06, + "loss": 0.6626, + "step": 23964 + }, + { + "epoch": 0.7344918474929508, + "grad_norm": 1.8368029561582515, + "learning_rate": 3.475021238797862e-06, + "loss": 0.6959, + "step": 23965 + }, + { + "epoch": 0.7345224960156921, + "grad_norm": 1.451902226969731, + "learning_rate": 3.474269061007134e-06, + "loss": 0.635, + "step": 23966 + }, + { + "epoch": 0.7345531445384332, + "grad_norm": 1.705581396079629, + "learning_rate": 3.4735169475161057e-06, + "loss": 0.6914, + "step": 23967 + }, + { + "epoch": 0.7345837930611745, + "grad_norm": 1.649214595071089, + "learning_rate": 3.4727648983321804e-06, + "loss": 0.6586, + "step": 23968 + }, + { + "epoch": 0.7346144415839156, + "grad_norm": 1.6084218400747252, + "learning_rate": 3.472012913462773e-06, + "loss": 0.6637, + "step": 23969 + }, + { + "epoch": 0.7346450901066569, + "grad_norm": 1.590076801907884, + "learning_rate": 3.4712609929152975e-06, + "loss": 0.5733, + "step": 23970 + }, + { + "epoch": 0.7346757386293981, + "grad_norm": 1.6690481606315681, + "learning_rate": 3.470509136697153e-06, + "loss": 0.6253, + "step": 23971 + }, + { + "epoch": 0.7347063871521393, + "grad_norm": 1.758281868476492, + "learning_rate": 3.469757344815753e-06, + "loss": 0.6995, + "step": 23972 + }, + { + "epoch": 0.7347370356748805, + "grad_norm": 1.7749540118652047, + "learning_rate": 3.469005617278508e-06, + "loss": 0.6453, + "step": 23973 + }, + { + "epoch": 0.7347676841976217, + "grad_norm": 1.6203874952528992, + "learning_rate": 3.4682539540928182e-06, + "loss": 0.7314, + "step": 23974 + }, + { + "epoch": 0.7347983327203629, + "grad_norm": 0.6732439893932765, + "learning_rate": 3.467502355266098e-06, + "loss": 0.5301, + "step": 23975 + }, + { + "epoch": 0.734828981243104, + "grad_norm": 0.6648950791897863, + "learning_rate": 3.4667508208057442e-06, + "loss": 0.5144, + "step": 23976 + }, + { + "epoch": 0.7348596297658453, + "grad_norm": 1.7755792048716077, + "learning_rate": 3.465999350719166e-06, + "loss": 0.6178, + "step": 23977 + }, + { + "epoch": 0.7348902782885864, + "grad_norm": 1.6995820424522288, + "learning_rate": 3.465247945013771e-06, + "loss": 0.6944, + "step": 23978 + }, + { + "epoch": 0.7349209268113277, + "grad_norm": 0.6675204936865281, + "learning_rate": 3.4644966036969574e-06, + "loss": 0.5184, + "step": 23979 + }, + { + "epoch": 0.7349515753340689, + "grad_norm": 1.5599144016121818, + "learning_rate": 3.463745326776131e-06, + "loss": 0.6696, + "step": 23980 + }, + { + "epoch": 0.7349822238568101, + "grad_norm": 1.7042472450694937, + "learning_rate": 3.4629941142586976e-06, + "loss": 0.7029, + "step": 23981 + }, + { + "epoch": 0.7350128723795513, + "grad_norm": 0.6951176167077756, + "learning_rate": 3.4622429661520516e-06, + "loss": 0.5285, + "step": 23982 + }, + { + "epoch": 0.7350435209022925, + "grad_norm": 1.47257278401279, + "learning_rate": 3.4614918824636025e-06, + "loss": 0.6123, + "step": 23983 + }, + { + "epoch": 0.7350741694250337, + "grad_norm": 1.49774667163277, + "learning_rate": 3.4607408632007433e-06, + "loss": 0.7211, + "step": 23984 + }, + { + "epoch": 0.7351048179477749, + "grad_norm": 1.918570703392557, + "learning_rate": 3.4599899083708765e-06, + "loss": 0.8041, + "step": 23985 + }, + { + "epoch": 0.7351354664705161, + "grad_norm": 1.4725810353404398, + "learning_rate": 3.4592390179814073e-06, + "loss": 0.5618, + "step": 23986 + }, + { + "epoch": 0.7351661149932573, + "grad_norm": 0.662037565287511, + "learning_rate": 3.4584881920397262e-06, + "loss": 0.5163, + "step": 23987 + }, + { + "epoch": 0.7351967635159985, + "grad_norm": 1.5864565124122263, + "learning_rate": 3.457737430553234e-06, + "loss": 0.7482, + "step": 23988 + }, + { + "epoch": 0.7352274120387398, + "grad_norm": 1.7028172410458837, + "learning_rate": 3.456986733529332e-06, + "loss": 0.6062, + "step": 23989 + }, + { + "epoch": 0.7352580605614809, + "grad_norm": 1.892638690826855, + "learning_rate": 3.4562361009754107e-06, + "loss": 0.6815, + "step": 23990 + }, + { + "epoch": 0.7352887090842222, + "grad_norm": 1.6238720007855405, + "learning_rate": 3.4554855328988703e-06, + "loss": 0.6756, + "step": 23991 + }, + { + "epoch": 0.7353193576069633, + "grad_norm": 0.6793944580950158, + "learning_rate": 3.454735029307107e-06, + "loss": 0.5466, + "step": 23992 + }, + { + "epoch": 0.7353500061297046, + "grad_norm": 1.500315024537947, + "learning_rate": 3.453984590207512e-06, + "loss": 0.5351, + "step": 23993 + }, + { + "epoch": 0.7353806546524457, + "grad_norm": 0.6709519971614167, + "learning_rate": 3.4532342156074848e-06, + "loss": 0.5419, + "step": 23994 + }, + { + "epoch": 0.735411303175187, + "grad_norm": 1.549978645640155, + "learning_rate": 3.4524839055144124e-06, + "loss": 0.5936, + "step": 23995 + }, + { + "epoch": 0.7354419516979281, + "grad_norm": 0.6421272032546491, + "learning_rate": 3.451733659935692e-06, + "loss": 0.5347, + "step": 23996 + }, + { + "epoch": 0.7354726002206694, + "grad_norm": 0.6637241969312864, + "learning_rate": 3.4509834788787176e-06, + "loss": 0.5323, + "step": 23997 + }, + { + "epoch": 0.7355032487434106, + "grad_norm": 1.5906729547056468, + "learning_rate": 3.4502333623508767e-06, + "loss": 0.7582, + "step": 23998 + }, + { + "epoch": 0.7355338972661518, + "grad_norm": 1.652452372158989, + "learning_rate": 3.4494833103595604e-06, + "loss": 0.6455, + "step": 23999 + }, + { + "epoch": 0.735564545788893, + "grad_norm": 1.840199803836958, + "learning_rate": 3.4487333229121656e-06, + "loss": 0.7532, + "step": 24000 + }, + { + "epoch": 0.7355951943116342, + "grad_norm": 1.6222717294066664, + "learning_rate": 3.447983400016074e-06, + "loss": 0.5664, + "step": 24001 + }, + { + "epoch": 0.7356258428343754, + "grad_norm": 1.8952580315969074, + "learning_rate": 3.4472335416786786e-06, + "loss": 0.6847, + "step": 24002 + }, + { + "epoch": 0.7356564913571166, + "grad_norm": 0.6595628535085877, + "learning_rate": 3.446483747907371e-06, + "loss": 0.5016, + "step": 24003 + }, + { + "epoch": 0.7356871398798578, + "grad_norm": 0.6936452288452745, + "learning_rate": 3.4457340187095322e-06, + "loss": 0.5114, + "step": 24004 + }, + { + "epoch": 0.735717788402599, + "grad_norm": 1.7597135795844776, + "learning_rate": 3.4449843540925564e-06, + "loss": 0.6476, + "step": 24005 + }, + { + "epoch": 0.7357484369253402, + "grad_norm": 1.4567994313862689, + "learning_rate": 3.444234754063821e-06, + "loss": 0.553, + "step": 24006 + }, + { + "epoch": 0.7357790854480813, + "grad_norm": 0.7061758881285806, + "learning_rate": 3.4434852186307246e-06, + "loss": 0.507, + "step": 24007 + }, + { + "epoch": 0.7358097339708226, + "grad_norm": 1.4291006401734734, + "learning_rate": 3.442735747800645e-06, + "loss": 0.5699, + "step": 24008 + }, + { + "epoch": 0.7358403824935638, + "grad_norm": 1.6834135567624795, + "learning_rate": 3.4419863415809652e-06, + "loss": 0.6103, + "step": 24009 + }, + { + "epoch": 0.735871031016305, + "grad_norm": 1.8655259719930393, + "learning_rate": 3.441236999979071e-06, + "loss": 0.6397, + "step": 24010 + }, + { + "epoch": 0.7359016795390462, + "grad_norm": 1.605375344970292, + "learning_rate": 3.4404877230023513e-06, + "loss": 0.6377, + "step": 24011 + }, + { + "epoch": 0.7359323280617874, + "grad_norm": 1.6774278876633595, + "learning_rate": 3.4397385106581806e-06, + "loss": 0.63, + "step": 24012 + }, + { + "epoch": 0.7359629765845286, + "grad_norm": 1.7739962258563429, + "learning_rate": 3.438989362953944e-06, + "loss": 0.7081, + "step": 24013 + }, + { + "epoch": 0.7359936251072698, + "grad_norm": 1.820184370937101, + "learning_rate": 3.4382402798970283e-06, + "loss": 0.5711, + "step": 24014 + }, + { + "epoch": 0.736024273630011, + "grad_norm": 1.6069833025616032, + "learning_rate": 3.4374912614948062e-06, + "loss": 0.7281, + "step": 24015 + }, + { + "epoch": 0.7360549221527523, + "grad_norm": 2.0099329484235833, + "learning_rate": 3.4367423077546656e-06, + "loss": 0.6801, + "step": 24016 + }, + { + "epoch": 0.7360855706754934, + "grad_norm": 1.61701842009343, + "learning_rate": 3.435993418683975e-06, + "loss": 0.6142, + "step": 24017 + }, + { + "epoch": 0.7361162191982347, + "grad_norm": 1.8612810755918354, + "learning_rate": 3.435244594290128e-06, + "loss": 0.6534, + "step": 24018 + }, + { + "epoch": 0.7361468677209758, + "grad_norm": 1.5982594123976992, + "learning_rate": 3.434495834580495e-06, + "loss": 0.5997, + "step": 24019 + }, + { + "epoch": 0.7361775162437171, + "grad_norm": 1.619327138836765, + "learning_rate": 3.433747139562451e-06, + "loss": 0.6883, + "step": 24020 + }, + { + "epoch": 0.7362081647664582, + "grad_norm": 1.4419802343103625, + "learning_rate": 3.432998509243377e-06, + "loss": 0.656, + "step": 24021 + }, + { + "epoch": 0.7362388132891995, + "grad_norm": 1.9343225810095297, + "learning_rate": 3.4322499436306532e-06, + "loss": 0.6297, + "step": 24022 + }, + { + "epoch": 0.7362694618119406, + "grad_norm": 1.815328922296306, + "learning_rate": 3.4315014427316463e-06, + "loss": 0.5719, + "step": 24023 + }, + { + "epoch": 0.7363001103346819, + "grad_norm": 1.5520089245175221, + "learning_rate": 3.4307530065537366e-06, + "loss": 0.6509, + "step": 24024 + }, + { + "epoch": 0.736330758857423, + "grad_norm": 1.7334715898172566, + "learning_rate": 3.430004635104299e-06, + "loss": 0.6787, + "step": 24025 + }, + { + "epoch": 0.7363614073801643, + "grad_norm": 0.6559742387715346, + "learning_rate": 3.42925632839071e-06, + "loss": 0.539, + "step": 24026 + }, + { + "epoch": 0.7363920559029055, + "grad_norm": 1.5727682717075793, + "learning_rate": 3.428508086420339e-06, + "loss": 0.6367, + "step": 24027 + }, + { + "epoch": 0.7364227044256467, + "grad_norm": 1.7363745234191008, + "learning_rate": 3.427759909200555e-06, + "loss": 0.6361, + "step": 24028 + }, + { + "epoch": 0.7364533529483879, + "grad_norm": 1.584188811389467, + "learning_rate": 3.4270117967387364e-06, + "loss": 0.5012, + "step": 24029 + }, + { + "epoch": 0.7364840014711291, + "grad_norm": 1.4960405095018723, + "learning_rate": 3.4262637490422545e-06, + "loss": 0.5947, + "step": 24030 + }, + { + "epoch": 0.7365146499938703, + "grad_norm": 1.611657275760595, + "learning_rate": 3.425515766118475e-06, + "loss": 0.6649, + "step": 24031 + }, + { + "epoch": 0.7365452985166115, + "grad_norm": 1.7723904715926482, + "learning_rate": 3.424767847974769e-06, + "loss": 0.7133, + "step": 24032 + }, + { + "epoch": 0.7365759470393527, + "grad_norm": 1.6005184416378886, + "learning_rate": 3.4240199946185103e-06, + "loss": 0.6184, + "step": 24033 + }, + { + "epoch": 0.736606595562094, + "grad_norm": 1.6553216412094638, + "learning_rate": 3.423272206057067e-06, + "loss": 0.651, + "step": 24034 + }, + { + "epoch": 0.7366372440848351, + "grad_norm": 0.663323010317272, + "learning_rate": 3.4225244822978053e-06, + "loss": 0.5179, + "step": 24035 + }, + { + "epoch": 0.7366678926075764, + "grad_norm": 1.7744273385459723, + "learning_rate": 3.4217768233480864e-06, + "loss": 0.6815, + "step": 24036 + }, + { + "epoch": 0.7366985411303175, + "grad_norm": 1.7475275577356564, + "learning_rate": 3.4210292292152903e-06, + "loss": 0.5683, + "step": 24037 + }, + { + "epoch": 0.7367291896530587, + "grad_norm": 1.6514401414306592, + "learning_rate": 3.4202816999067766e-06, + "loss": 0.5573, + "step": 24038 + }, + { + "epoch": 0.7367598381757999, + "grad_norm": 1.927001252233124, + "learning_rate": 3.4195342354299076e-06, + "loss": 0.6407, + "step": 24039 + }, + { + "epoch": 0.7367904866985411, + "grad_norm": 1.710542989354492, + "learning_rate": 3.4187868357920516e-06, + "loss": 0.6697, + "step": 24040 + }, + { + "epoch": 0.7368211352212823, + "grad_norm": 1.5969705397522558, + "learning_rate": 3.4180395010005753e-06, + "loss": 0.6339, + "step": 24041 + }, + { + "epoch": 0.7368517837440235, + "grad_norm": 1.6869067762805676, + "learning_rate": 3.4172922310628377e-06, + "loss": 0.639, + "step": 24042 + }, + { + "epoch": 0.7368824322667648, + "grad_norm": 1.6775359837644537, + "learning_rate": 3.416545025986203e-06, + "loss": 0.6828, + "step": 24043 + }, + { + "epoch": 0.7369130807895059, + "grad_norm": 2.0322064849654513, + "learning_rate": 3.415797885778035e-06, + "loss": 0.6083, + "step": 24044 + }, + { + "epoch": 0.7369437293122472, + "grad_norm": 1.5706405218760762, + "learning_rate": 3.415050810445698e-06, + "loss": 0.5913, + "step": 24045 + }, + { + "epoch": 0.7369743778349883, + "grad_norm": 1.8008475109680013, + "learning_rate": 3.414303799996551e-06, + "loss": 0.6721, + "step": 24046 + }, + { + "epoch": 0.7370050263577296, + "grad_norm": 1.807890906083444, + "learning_rate": 3.413556854437946e-06, + "loss": 0.6546, + "step": 24047 + }, + { + "epoch": 0.7370356748804707, + "grad_norm": 1.537987973087923, + "learning_rate": 3.412809973777258e-06, + "loss": 0.6709, + "step": 24048 + }, + { + "epoch": 0.737066323403212, + "grad_norm": 1.7167499456531095, + "learning_rate": 3.412063158021839e-06, + "loss": 0.5551, + "step": 24049 + }, + { + "epoch": 0.7370969719259531, + "grad_norm": 1.647725474971911, + "learning_rate": 3.4113164071790426e-06, + "loss": 0.6109, + "step": 24050 + }, + { + "epoch": 0.7371276204486944, + "grad_norm": 1.5641214647155277, + "learning_rate": 3.4105697212562327e-06, + "loss": 0.6299, + "step": 24051 + }, + { + "epoch": 0.7371582689714355, + "grad_norm": 1.898151775302546, + "learning_rate": 3.4098231002607653e-06, + "loss": 0.7717, + "step": 24052 + }, + { + "epoch": 0.7371889174941768, + "grad_norm": 1.6162315122330628, + "learning_rate": 3.4090765441999994e-06, + "loss": 0.5978, + "step": 24053 + }, + { + "epoch": 0.737219566016918, + "grad_norm": 1.8026444450980499, + "learning_rate": 3.4083300530812856e-06, + "loss": 0.6532, + "step": 24054 + }, + { + "epoch": 0.7372502145396592, + "grad_norm": 1.75299838318998, + "learning_rate": 3.4075836269119833e-06, + "loss": 0.5766, + "step": 24055 + }, + { + "epoch": 0.7372808630624004, + "grad_norm": 0.6678772702989887, + "learning_rate": 3.4068372656994486e-06, + "loss": 0.5215, + "step": 24056 + }, + { + "epoch": 0.7373115115851416, + "grad_norm": 1.6767596606051485, + "learning_rate": 3.4060909694510337e-06, + "loss": 0.4933, + "step": 24057 + }, + { + "epoch": 0.7373421601078828, + "grad_norm": 0.6628146123254096, + "learning_rate": 3.4053447381740844e-06, + "loss": 0.5128, + "step": 24058 + }, + { + "epoch": 0.737372808630624, + "grad_norm": 1.4762113173036813, + "learning_rate": 3.404598571875969e-06, + "loss": 0.5008, + "step": 24059 + }, + { + "epoch": 0.7374034571533652, + "grad_norm": 0.6469516308650501, + "learning_rate": 3.4038524705640264e-06, + "loss": 0.5158, + "step": 24060 + }, + { + "epoch": 0.7374341056761065, + "grad_norm": 1.7456091578638409, + "learning_rate": 3.4031064342456166e-06, + "loss": 0.7745, + "step": 24061 + }, + { + "epoch": 0.7374647541988476, + "grad_norm": 1.488534610680256, + "learning_rate": 3.4023604629280836e-06, + "loss": 0.6051, + "step": 24062 + }, + { + "epoch": 0.7374954027215889, + "grad_norm": 0.6682790868405537, + "learning_rate": 3.4016145566187817e-06, + "loss": 0.4881, + "step": 24063 + }, + { + "epoch": 0.73752605124433, + "grad_norm": 1.798413413492155, + "learning_rate": 3.400868715325063e-06, + "loss": 0.6645, + "step": 24064 + }, + { + "epoch": 0.7375566997670713, + "grad_norm": 1.4487017011673233, + "learning_rate": 3.40012293905427e-06, + "loss": 0.5883, + "step": 24065 + }, + { + "epoch": 0.7375873482898124, + "grad_norm": 1.8026862091598503, + "learning_rate": 3.399377227813754e-06, + "loss": 0.6982, + "step": 24066 + }, + { + "epoch": 0.7376179968125537, + "grad_norm": 1.709485480281757, + "learning_rate": 3.398631581610867e-06, + "loss": 0.5742, + "step": 24067 + }, + { + "epoch": 0.7376486453352948, + "grad_norm": 0.6599393113871126, + "learning_rate": 3.3978860004529514e-06, + "loss": 0.5489, + "step": 24068 + }, + { + "epoch": 0.737679293858036, + "grad_norm": 1.5318674538033636, + "learning_rate": 3.397140484347348e-06, + "loss": 0.6071, + "step": 24069 + }, + { + "epoch": 0.7377099423807773, + "grad_norm": 1.7973090032909091, + "learning_rate": 3.3963950333014153e-06, + "loss": 0.5649, + "step": 24070 + }, + { + "epoch": 0.7377405909035184, + "grad_norm": 1.6616877106589907, + "learning_rate": 3.3956496473224887e-06, + "loss": 0.7008, + "step": 24071 + }, + { + "epoch": 0.7377712394262597, + "grad_norm": 0.6506515302758121, + "learning_rate": 3.3949043264179194e-06, + "loss": 0.5109, + "step": 24072 + }, + { + "epoch": 0.7378018879490008, + "grad_norm": 1.633953986512526, + "learning_rate": 3.394159070595043e-06, + "loss": 0.7, + "step": 24073 + }, + { + "epoch": 0.7378325364717421, + "grad_norm": 1.7983508240016537, + "learning_rate": 3.3934138798612094e-06, + "loss": 0.7263, + "step": 24074 + }, + { + "epoch": 0.7378631849944832, + "grad_norm": 0.6488515943100219, + "learning_rate": 3.392668754223761e-06, + "loss": 0.5424, + "step": 24075 + }, + { + "epoch": 0.7378938335172245, + "grad_norm": 1.5330355496927464, + "learning_rate": 3.3919236936900358e-06, + "loss": 0.61, + "step": 24076 + }, + { + "epoch": 0.7379244820399656, + "grad_norm": 1.7621749256298094, + "learning_rate": 3.391178698267377e-06, + "loss": 0.6491, + "step": 24077 + }, + { + "epoch": 0.7379551305627069, + "grad_norm": 1.606126411497074, + "learning_rate": 3.390433767963128e-06, + "loss": 0.699, + "step": 24078 + }, + { + "epoch": 0.737985779085448, + "grad_norm": 1.5452353497765285, + "learning_rate": 3.3896889027846237e-06, + "loss": 0.6484, + "step": 24079 + }, + { + "epoch": 0.7380164276081893, + "grad_norm": 1.7247198586470478, + "learning_rate": 3.388944102739209e-06, + "loss": 0.5691, + "step": 24080 + }, + { + "epoch": 0.7380470761309305, + "grad_norm": 0.6575941081063591, + "learning_rate": 3.388199367834216e-06, + "loss": 0.4987, + "step": 24081 + }, + { + "epoch": 0.7380777246536717, + "grad_norm": 1.5945787355442071, + "learning_rate": 3.387454698076987e-06, + "loss": 0.652, + "step": 24082 + }, + { + "epoch": 0.7381083731764129, + "grad_norm": 1.762681907269611, + "learning_rate": 3.386710093474862e-06, + "loss": 0.7347, + "step": 24083 + }, + { + "epoch": 0.7381390216991541, + "grad_norm": 1.5865860723614744, + "learning_rate": 3.3859655540351697e-06, + "loss": 0.5697, + "step": 24084 + }, + { + "epoch": 0.7381696702218953, + "grad_norm": 1.6348472989881173, + "learning_rate": 3.385221079765253e-06, + "loss": 0.6328, + "step": 24085 + }, + { + "epoch": 0.7382003187446365, + "grad_norm": 1.7349895568255853, + "learning_rate": 3.3844766706724474e-06, + "loss": 0.6496, + "step": 24086 + }, + { + "epoch": 0.7382309672673777, + "grad_norm": 1.7214851815284893, + "learning_rate": 3.383732326764083e-06, + "loss": 0.6433, + "step": 24087 + }, + { + "epoch": 0.738261615790119, + "grad_norm": 1.7035738438385357, + "learning_rate": 3.3829880480474973e-06, + "loss": 0.589, + "step": 24088 + }, + { + "epoch": 0.7382922643128601, + "grad_norm": 1.678307857833222, + "learning_rate": 3.3822438345300266e-06, + "loss": 0.6421, + "step": 24089 + }, + { + "epoch": 0.7383229128356014, + "grad_norm": 1.7129391188980772, + "learning_rate": 3.381499686218996e-06, + "loss": 0.6656, + "step": 24090 + }, + { + "epoch": 0.7383535613583425, + "grad_norm": 1.6611800120126579, + "learning_rate": 3.380755603121748e-06, + "loss": 0.7048, + "step": 24091 + }, + { + "epoch": 0.7383842098810838, + "grad_norm": 1.413539274758759, + "learning_rate": 3.380011585245604e-06, + "loss": 0.6562, + "step": 24092 + }, + { + "epoch": 0.7384148584038249, + "grad_norm": 1.7225560776771796, + "learning_rate": 3.379267632597899e-06, + "loss": 0.6064, + "step": 24093 + }, + { + "epoch": 0.7384455069265662, + "grad_norm": 1.4379592324315766, + "learning_rate": 3.3785237451859686e-06, + "loss": 0.7343, + "step": 24094 + }, + { + "epoch": 0.7384761554493073, + "grad_norm": 1.607180782660448, + "learning_rate": 3.3777799230171336e-06, + "loss": 0.6016, + "step": 24095 + }, + { + "epoch": 0.7385068039720486, + "grad_norm": 0.6617864022606031, + "learning_rate": 3.377036166098728e-06, + "loss": 0.5827, + "step": 24096 + }, + { + "epoch": 0.7385374524947897, + "grad_norm": 1.5052407921881874, + "learning_rate": 3.376292474438083e-06, + "loss": 0.6292, + "step": 24097 + }, + { + "epoch": 0.738568101017531, + "grad_norm": 1.7840278201270252, + "learning_rate": 3.3755488480425192e-06, + "loss": 0.6301, + "step": 24098 + }, + { + "epoch": 0.7385987495402722, + "grad_norm": 2.0022421695104686, + "learning_rate": 3.374805286919368e-06, + "loss": 0.7071, + "step": 24099 + }, + { + "epoch": 0.7386293980630133, + "grad_norm": 1.5177704722533163, + "learning_rate": 3.374061791075959e-06, + "loss": 0.6694, + "step": 24100 + }, + { + "epoch": 0.7386600465857546, + "grad_norm": 1.6057677319422112, + "learning_rate": 3.3733183605196107e-06, + "loss": 0.7351, + "step": 24101 + }, + { + "epoch": 0.7386906951084957, + "grad_norm": 1.7144629890934908, + "learning_rate": 3.372574995257655e-06, + "loss": 0.6472, + "step": 24102 + }, + { + "epoch": 0.738721343631237, + "grad_norm": 1.6974724041887606, + "learning_rate": 3.3718316952974106e-06, + "loss": 0.6854, + "step": 24103 + }, + { + "epoch": 0.7387519921539781, + "grad_norm": 1.4499661811226245, + "learning_rate": 3.3710884606462047e-06, + "loss": 0.715, + "step": 24104 + }, + { + "epoch": 0.7387826406767194, + "grad_norm": 1.441134028729883, + "learning_rate": 3.370345291311363e-06, + "loss": 0.6313, + "step": 24105 + }, + { + "epoch": 0.7388132891994605, + "grad_norm": 0.6306479696060515, + "learning_rate": 3.3696021873002028e-06, + "loss": 0.5269, + "step": 24106 + }, + { + "epoch": 0.7388439377222018, + "grad_norm": 1.5649024903584008, + "learning_rate": 3.3688591486200485e-06, + "loss": 0.6835, + "step": 24107 + }, + { + "epoch": 0.738874586244943, + "grad_norm": 1.5970629990935272, + "learning_rate": 3.3681161752782252e-06, + "loss": 0.6508, + "step": 24108 + }, + { + "epoch": 0.7389052347676842, + "grad_norm": 1.6412032298580301, + "learning_rate": 3.3673732672820457e-06, + "loss": 0.6028, + "step": 24109 + }, + { + "epoch": 0.7389358832904254, + "grad_norm": 1.6025102998619085, + "learning_rate": 3.366630424638836e-06, + "loss": 0.6601, + "step": 24110 + }, + { + "epoch": 0.7389665318131666, + "grad_norm": 1.505276328462111, + "learning_rate": 3.3658876473559165e-06, + "loss": 0.6029, + "step": 24111 + }, + { + "epoch": 0.7389971803359078, + "grad_norm": 1.6608362168427633, + "learning_rate": 3.3651449354405997e-06, + "loss": 0.5467, + "step": 24112 + }, + { + "epoch": 0.739027828858649, + "grad_norm": 1.7489064327470265, + "learning_rate": 3.3644022889002115e-06, + "loss": 0.6771, + "step": 24113 + }, + { + "epoch": 0.7390584773813902, + "grad_norm": 1.7507314322915857, + "learning_rate": 3.3636597077420573e-06, + "loss": 0.6617, + "step": 24114 + }, + { + "epoch": 0.7390891259041314, + "grad_norm": 1.4637392817890071, + "learning_rate": 3.3629171919734706e-06, + "loss": 0.583, + "step": 24115 + }, + { + "epoch": 0.7391197744268726, + "grad_norm": 1.58596439452409, + "learning_rate": 3.362174741601758e-06, + "loss": 0.7002, + "step": 24116 + }, + { + "epoch": 0.7391504229496139, + "grad_norm": 1.5746999133709336, + "learning_rate": 3.361432356634232e-06, + "loss": 0.6771, + "step": 24117 + }, + { + "epoch": 0.739181071472355, + "grad_norm": 1.5733912999597626, + "learning_rate": 3.3606900370782125e-06, + "loss": 0.594, + "step": 24118 + }, + { + "epoch": 0.7392117199950963, + "grad_norm": 1.5122606818944002, + "learning_rate": 3.3599477829410156e-06, + "loss": 0.5592, + "step": 24119 + }, + { + "epoch": 0.7392423685178374, + "grad_norm": 1.7080457081663003, + "learning_rate": 3.3592055942299497e-06, + "loss": 0.738, + "step": 24120 + }, + { + "epoch": 0.7392730170405787, + "grad_norm": 1.497198085886663, + "learning_rate": 3.358463470952329e-06, + "loss": 0.599, + "step": 24121 + }, + { + "epoch": 0.7393036655633198, + "grad_norm": 1.5429546710873097, + "learning_rate": 3.357721413115471e-06, + "loss": 0.635, + "step": 24122 + }, + { + "epoch": 0.7393343140860611, + "grad_norm": 1.6207220556049542, + "learning_rate": 3.3569794207266803e-06, + "loss": 0.7653, + "step": 24123 + }, + { + "epoch": 0.7393649626088022, + "grad_norm": 1.4156393055331788, + "learning_rate": 3.356237493793274e-06, + "loss": 0.5499, + "step": 24124 + }, + { + "epoch": 0.7393956111315435, + "grad_norm": 1.4850527137925955, + "learning_rate": 3.3554956323225562e-06, + "loss": 0.6915, + "step": 24125 + }, + { + "epoch": 0.7394262596542847, + "grad_norm": 1.4756672981574792, + "learning_rate": 3.3547538363218402e-06, + "loss": 0.5026, + "step": 24126 + }, + { + "epoch": 0.7394569081770259, + "grad_norm": 0.6841094821396839, + "learning_rate": 3.354012105798439e-06, + "loss": 0.5397, + "step": 24127 + }, + { + "epoch": 0.7394875566997671, + "grad_norm": 1.6272916275016371, + "learning_rate": 3.353270440759652e-06, + "loss": 0.5907, + "step": 24128 + }, + { + "epoch": 0.7395182052225083, + "grad_norm": 1.564841726963651, + "learning_rate": 3.3525288412127933e-06, + "loss": 0.6744, + "step": 24129 + }, + { + "epoch": 0.7395488537452495, + "grad_norm": 1.4279221696579578, + "learning_rate": 3.3517873071651706e-06, + "loss": 0.5693, + "step": 24130 + }, + { + "epoch": 0.7395795022679906, + "grad_norm": 0.6807980257708849, + "learning_rate": 3.3510458386240873e-06, + "loss": 0.5593, + "step": 24131 + }, + { + "epoch": 0.7396101507907319, + "grad_norm": 0.6696112826470448, + "learning_rate": 3.350304435596853e-06, + "loss": 0.5206, + "step": 24132 + }, + { + "epoch": 0.739640799313473, + "grad_norm": 1.8547213505848652, + "learning_rate": 3.3495630980907632e-06, + "loss": 0.6871, + "step": 24133 + }, + { + "epoch": 0.7396714478362143, + "grad_norm": 1.701467380150355, + "learning_rate": 3.3488218261131367e-06, + "loss": 0.718, + "step": 24134 + }, + { + "epoch": 0.7397020963589555, + "grad_norm": 0.6764101295558611, + "learning_rate": 3.3480806196712714e-06, + "loss": 0.515, + "step": 24135 + }, + { + "epoch": 0.7397327448816967, + "grad_norm": 1.766887987382133, + "learning_rate": 3.3473394787724656e-06, + "loss": 0.6445, + "step": 24136 + }, + { + "epoch": 0.7397633934044379, + "grad_norm": 1.6939538190622152, + "learning_rate": 3.346598403424026e-06, + "loss": 0.6328, + "step": 24137 + }, + { + "epoch": 0.7397940419271791, + "grad_norm": 1.8928658421112037, + "learning_rate": 3.3458573936332583e-06, + "loss": 0.6429, + "step": 24138 + }, + { + "epoch": 0.7398246904499203, + "grad_norm": 1.5498967574419031, + "learning_rate": 3.345116449407456e-06, + "loss": 0.6832, + "step": 24139 + }, + { + "epoch": 0.7398553389726615, + "grad_norm": 1.8956281319381667, + "learning_rate": 3.3443755707539248e-06, + "loss": 0.7354, + "step": 24140 + }, + { + "epoch": 0.7398859874954027, + "grad_norm": 1.5188136604204086, + "learning_rate": 3.3436347576799644e-06, + "loss": 0.5512, + "step": 24141 + }, + { + "epoch": 0.739916636018144, + "grad_norm": 1.9202045668961274, + "learning_rate": 3.3428940101928773e-06, + "loss": 0.7017, + "step": 24142 + }, + { + "epoch": 0.7399472845408851, + "grad_norm": 1.5557969554539903, + "learning_rate": 3.3421533282999586e-06, + "loss": 0.6282, + "step": 24143 + }, + { + "epoch": 0.7399779330636264, + "grad_norm": 1.7186712473402814, + "learning_rate": 3.3414127120084994e-06, + "loss": 0.6054, + "step": 24144 + }, + { + "epoch": 0.7400085815863675, + "grad_norm": 0.681478882505057, + "learning_rate": 3.340672161325811e-06, + "loss": 0.5309, + "step": 24145 + }, + { + "epoch": 0.7400392301091088, + "grad_norm": 1.646462053064782, + "learning_rate": 3.3399316762591837e-06, + "loss": 0.6041, + "step": 24146 + }, + { + "epoch": 0.7400698786318499, + "grad_norm": 1.8838632725083198, + "learning_rate": 3.3391912568159115e-06, + "loss": 0.6829, + "step": 24147 + }, + { + "epoch": 0.7401005271545912, + "grad_norm": 0.6893895451287166, + "learning_rate": 3.338450903003291e-06, + "loss": 0.5385, + "step": 24148 + }, + { + "epoch": 0.7401311756773323, + "grad_norm": 1.6248634949650698, + "learning_rate": 3.337710614828622e-06, + "loss": 0.569, + "step": 24149 + }, + { + "epoch": 0.7401618242000736, + "grad_norm": 1.6050486495491683, + "learning_rate": 3.3369703922991912e-06, + "loss": 0.6386, + "step": 24150 + }, + { + "epoch": 0.7401924727228147, + "grad_norm": 1.693114348144413, + "learning_rate": 3.3362302354222963e-06, + "loss": 0.6067, + "step": 24151 + }, + { + "epoch": 0.740223121245556, + "grad_norm": 1.944145086428143, + "learning_rate": 3.33549014420523e-06, + "loss": 0.6421, + "step": 24152 + }, + { + "epoch": 0.7402537697682972, + "grad_norm": 1.7285331550889032, + "learning_rate": 3.3347501186552866e-06, + "loss": 0.6608, + "step": 24153 + }, + { + "epoch": 0.7402844182910384, + "grad_norm": 1.6142014159435025, + "learning_rate": 3.334010158779757e-06, + "loss": 0.6539, + "step": 24154 + }, + { + "epoch": 0.7403150668137796, + "grad_norm": 1.7174528380149623, + "learning_rate": 3.3332702645859237e-06, + "loss": 0.6166, + "step": 24155 + }, + { + "epoch": 0.7403457153365208, + "grad_norm": 1.7167683720909508, + "learning_rate": 3.3325304360810917e-06, + "loss": 0.5938, + "step": 24156 + }, + { + "epoch": 0.740376363859262, + "grad_norm": 1.6816978169817522, + "learning_rate": 3.3317906732725425e-06, + "loss": 0.7168, + "step": 24157 + }, + { + "epoch": 0.7404070123820032, + "grad_norm": 1.5408155206099299, + "learning_rate": 3.3310509761675624e-06, + "loss": 0.6743, + "step": 24158 + }, + { + "epoch": 0.7404376609047444, + "grad_norm": 1.544154615724104, + "learning_rate": 3.3303113447734438e-06, + "loss": 0.6215, + "step": 24159 + }, + { + "epoch": 0.7404683094274856, + "grad_norm": 1.8294430834538224, + "learning_rate": 3.3295717790974737e-06, + "loss": 0.6066, + "step": 24160 + }, + { + "epoch": 0.7404989579502268, + "grad_norm": 1.6751450739257143, + "learning_rate": 3.328832279146943e-06, + "loss": 0.721, + "step": 24161 + }, + { + "epoch": 0.740529606472968, + "grad_norm": 0.638651986207528, + "learning_rate": 3.3280928449291314e-06, + "loss": 0.5283, + "step": 24162 + }, + { + "epoch": 0.7405602549957092, + "grad_norm": 1.602200084221282, + "learning_rate": 3.3273534764513283e-06, + "loss": 0.7108, + "step": 24163 + }, + { + "epoch": 0.7405909035184504, + "grad_norm": 1.731994843955099, + "learning_rate": 3.3266141737208213e-06, + "loss": 0.6396, + "step": 24164 + }, + { + "epoch": 0.7406215520411916, + "grad_norm": 0.7015044399565799, + "learning_rate": 3.325874936744893e-06, + "loss": 0.5357, + "step": 24165 + }, + { + "epoch": 0.7406522005639328, + "grad_norm": 0.6534453553580162, + "learning_rate": 3.3251357655308205e-06, + "loss": 0.512, + "step": 24166 + }, + { + "epoch": 0.740682849086674, + "grad_norm": 1.5352272124789261, + "learning_rate": 3.324396660085899e-06, + "loss": 0.6011, + "step": 24167 + }, + { + "epoch": 0.7407134976094152, + "grad_norm": 1.625778372873857, + "learning_rate": 3.323657620417405e-06, + "loss": 0.6297, + "step": 24168 + }, + { + "epoch": 0.7407441461321564, + "grad_norm": 1.512857432815584, + "learning_rate": 3.3229186465326178e-06, + "loss": 0.5813, + "step": 24169 + }, + { + "epoch": 0.7407747946548976, + "grad_norm": 1.4468215657767427, + "learning_rate": 3.322179738438821e-06, + "loss": 0.5577, + "step": 24170 + }, + { + "epoch": 0.7408054431776389, + "grad_norm": 1.625538929166821, + "learning_rate": 3.3214408961432966e-06, + "loss": 0.6837, + "step": 24171 + }, + { + "epoch": 0.74083609170038, + "grad_norm": 1.691750387799884, + "learning_rate": 3.3207021196533262e-06, + "loss": 0.6963, + "step": 24172 + }, + { + "epoch": 0.7408667402231213, + "grad_norm": 1.830647616045526, + "learning_rate": 3.3199634089761845e-06, + "loss": 0.7567, + "step": 24173 + }, + { + "epoch": 0.7408973887458624, + "grad_norm": 1.490574553567452, + "learning_rate": 3.319224764119152e-06, + "loss": 0.6171, + "step": 24174 + }, + { + "epoch": 0.7409280372686037, + "grad_norm": 1.7724509544429219, + "learning_rate": 3.3184861850895113e-06, + "loss": 0.728, + "step": 24175 + }, + { + "epoch": 0.7409586857913448, + "grad_norm": 1.7622070970243346, + "learning_rate": 3.3177476718945344e-06, + "loss": 0.7776, + "step": 24176 + }, + { + "epoch": 0.7409893343140861, + "grad_norm": 1.8265078489322581, + "learning_rate": 3.3170092245414978e-06, + "loss": 0.737, + "step": 24177 + }, + { + "epoch": 0.7410199828368272, + "grad_norm": 0.7325691202881417, + "learning_rate": 3.316270843037678e-06, + "loss": 0.5483, + "step": 24178 + }, + { + "epoch": 0.7410506313595685, + "grad_norm": 1.6969957864325134, + "learning_rate": 3.3155325273903515e-06, + "loss": 0.7449, + "step": 24179 + }, + { + "epoch": 0.7410812798823097, + "grad_norm": 1.6435585931092047, + "learning_rate": 3.3147942776067978e-06, + "loss": 0.7731, + "step": 24180 + }, + { + "epoch": 0.7411119284050509, + "grad_norm": 0.6948405024880826, + "learning_rate": 3.3140560936942824e-06, + "loss": 0.5284, + "step": 24181 + }, + { + "epoch": 0.7411425769277921, + "grad_norm": 1.5301282590631688, + "learning_rate": 3.3133179756600832e-06, + "loss": 0.633, + "step": 24182 + }, + { + "epoch": 0.7411732254505333, + "grad_norm": 1.6531566633639818, + "learning_rate": 3.3125799235114753e-06, + "loss": 0.764, + "step": 24183 + }, + { + "epoch": 0.7412038739732745, + "grad_norm": 1.8966567265243897, + "learning_rate": 3.3118419372557296e-06, + "loss": 0.6857, + "step": 24184 + }, + { + "epoch": 0.7412345224960157, + "grad_norm": 1.75480309415358, + "learning_rate": 3.31110401690011e-06, + "loss": 0.5414, + "step": 24185 + }, + { + "epoch": 0.7412651710187569, + "grad_norm": 0.6642750356072961, + "learning_rate": 3.310366162451899e-06, + "loss": 0.5335, + "step": 24186 + }, + { + "epoch": 0.7412958195414981, + "grad_norm": 1.5776344834035718, + "learning_rate": 3.30962837391836e-06, + "loss": 0.6076, + "step": 24187 + }, + { + "epoch": 0.7413264680642393, + "grad_norm": 1.6761106270541712, + "learning_rate": 3.3088906513067677e-06, + "loss": 0.7369, + "step": 24188 + }, + { + "epoch": 0.7413571165869806, + "grad_norm": 1.5627712024748905, + "learning_rate": 3.308152994624384e-06, + "loss": 0.6443, + "step": 24189 + }, + { + "epoch": 0.7413877651097217, + "grad_norm": 1.6382591352868299, + "learning_rate": 3.307415403878481e-06, + "loss": 0.5672, + "step": 24190 + }, + { + "epoch": 0.741418413632463, + "grad_norm": 1.6527987322631184, + "learning_rate": 3.30667787907633e-06, + "loss": 0.6979, + "step": 24191 + }, + { + "epoch": 0.7414490621552041, + "grad_norm": 0.640034337668121, + "learning_rate": 3.305940420225191e-06, + "loss": 0.5337, + "step": 24192 + }, + { + "epoch": 0.7414797106779453, + "grad_norm": 1.6154000124904402, + "learning_rate": 3.305203027332333e-06, + "loss": 0.6995, + "step": 24193 + }, + { + "epoch": 0.7415103592006865, + "grad_norm": 1.6587864943840922, + "learning_rate": 3.3044657004050274e-06, + "loss": 0.7262, + "step": 24194 + }, + { + "epoch": 0.7415410077234277, + "grad_norm": 1.6053183597039056, + "learning_rate": 3.303728439450533e-06, + "loss": 0.687, + "step": 24195 + }, + { + "epoch": 0.7415716562461689, + "grad_norm": 0.6649561767802543, + "learning_rate": 3.3029912444761104e-06, + "loss": 0.5362, + "step": 24196 + }, + { + "epoch": 0.7416023047689101, + "grad_norm": 1.4954855916264445, + "learning_rate": 3.3022541154890343e-06, + "loss": 0.6127, + "step": 24197 + }, + { + "epoch": 0.7416329532916514, + "grad_norm": 1.6912528476417894, + "learning_rate": 3.301517052496558e-06, + "loss": 0.5921, + "step": 24198 + }, + { + "epoch": 0.7416636018143925, + "grad_norm": 1.6145343580326406, + "learning_rate": 3.3007800555059522e-06, + "loss": 0.5527, + "step": 24199 + }, + { + "epoch": 0.7416942503371338, + "grad_norm": 1.7736779309588206, + "learning_rate": 3.3000431245244713e-06, + "loss": 0.6657, + "step": 24200 + }, + { + "epoch": 0.7417248988598749, + "grad_norm": 1.8383684646751923, + "learning_rate": 3.2993062595593804e-06, + "loss": 0.6745, + "step": 24201 + }, + { + "epoch": 0.7417555473826162, + "grad_norm": 0.6988499719784063, + "learning_rate": 3.298569460617942e-06, + "loss": 0.5206, + "step": 24202 + }, + { + "epoch": 0.7417861959053573, + "grad_norm": 1.6514092262955196, + "learning_rate": 3.2978327277074095e-06, + "loss": 0.6899, + "step": 24203 + }, + { + "epoch": 0.7418168444280986, + "grad_norm": 1.5741264347421187, + "learning_rate": 3.2970960608350464e-06, + "loss": 0.5648, + "step": 24204 + }, + { + "epoch": 0.7418474929508397, + "grad_norm": 1.6714541336499567, + "learning_rate": 3.296359460008114e-06, + "loss": 0.6842, + "step": 24205 + }, + { + "epoch": 0.741878141473581, + "grad_norm": 0.6737441120901563, + "learning_rate": 3.2956229252338644e-06, + "loss": 0.539, + "step": 24206 + }, + { + "epoch": 0.7419087899963221, + "grad_norm": 0.6834531992768386, + "learning_rate": 3.294886456519557e-06, + "loss": 0.5303, + "step": 24207 + }, + { + "epoch": 0.7419394385190634, + "grad_norm": 1.6778434588277422, + "learning_rate": 3.2941500538724525e-06, + "loss": 0.7171, + "step": 24208 + }, + { + "epoch": 0.7419700870418046, + "grad_norm": 1.4961701530552347, + "learning_rate": 3.2934137172997995e-06, + "loss": 0.5474, + "step": 24209 + }, + { + "epoch": 0.7420007355645458, + "grad_norm": 1.70155599625654, + "learning_rate": 3.2926774468088605e-06, + "loss": 0.6503, + "step": 24210 + }, + { + "epoch": 0.742031384087287, + "grad_norm": 1.6565700811385107, + "learning_rate": 3.2919412424068842e-06, + "loss": 0.6534, + "step": 24211 + }, + { + "epoch": 0.7420620326100282, + "grad_norm": 1.6403905796379616, + "learning_rate": 3.2912051041011262e-06, + "loss": 0.6074, + "step": 24212 + }, + { + "epoch": 0.7420926811327694, + "grad_norm": 1.9115467618617614, + "learning_rate": 3.2904690318988443e-06, + "loss": 0.7065, + "step": 24213 + }, + { + "epoch": 0.7421233296555106, + "grad_norm": 0.6740772679424261, + "learning_rate": 3.2897330258072845e-06, + "loss": 0.5152, + "step": 24214 + }, + { + "epoch": 0.7421539781782518, + "grad_norm": 1.6108512524225111, + "learning_rate": 3.288997085833703e-06, + "loss": 0.6837, + "step": 24215 + }, + { + "epoch": 0.742184626700993, + "grad_norm": 0.6730108816589228, + "learning_rate": 3.288261211985353e-06, + "loss": 0.5648, + "step": 24216 + }, + { + "epoch": 0.7422152752237342, + "grad_norm": 1.4134209468946544, + "learning_rate": 3.287525404269478e-06, + "loss": 0.6781, + "step": 24217 + }, + { + "epoch": 0.7422459237464755, + "grad_norm": 0.6617142817843829, + "learning_rate": 3.286789662693335e-06, + "loss": 0.5319, + "step": 24218 + }, + { + "epoch": 0.7422765722692166, + "grad_norm": 1.562214460511393, + "learning_rate": 3.286053987264173e-06, + "loss": 0.6337, + "step": 24219 + }, + { + "epoch": 0.7423072207919579, + "grad_norm": 1.6600219341980247, + "learning_rate": 3.285318377989235e-06, + "loss": 0.6989, + "step": 24220 + }, + { + "epoch": 0.742337869314699, + "grad_norm": 1.7977476510072226, + "learning_rate": 3.284582834875778e-06, + "loss": 0.7732, + "step": 24221 + }, + { + "epoch": 0.7423685178374403, + "grad_norm": 1.6483717601092505, + "learning_rate": 3.2838473579310404e-06, + "loss": 0.6386, + "step": 24222 + }, + { + "epoch": 0.7423991663601814, + "grad_norm": 1.695956350138435, + "learning_rate": 3.283111947162273e-06, + "loss": 0.6705, + "step": 24223 + }, + { + "epoch": 0.7424298148829226, + "grad_norm": 1.7616000467900135, + "learning_rate": 3.282376602576726e-06, + "loss": 0.6928, + "step": 24224 + }, + { + "epoch": 0.7424604634056639, + "grad_norm": 1.6729132998774077, + "learning_rate": 3.281641324181637e-06, + "loss": 0.5467, + "step": 24225 + }, + { + "epoch": 0.742491111928405, + "grad_norm": 1.4929490114272428, + "learning_rate": 3.2809061119842557e-06, + "loss": 0.6079, + "step": 24226 + }, + { + "epoch": 0.7425217604511463, + "grad_norm": 1.7867267879777045, + "learning_rate": 3.280170965991828e-06, + "loss": 0.6803, + "step": 24227 + }, + { + "epoch": 0.7425524089738874, + "grad_norm": 1.4346125704165695, + "learning_rate": 3.279435886211593e-06, + "loss": 0.6917, + "step": 24228 + }, + { + "epoch": 0.7425830574966287, + "grad_norm": 1.707981705614432, + "learning_rate": 3.2787008726507973e-06, + "loss": 0.6346, + "step": 24229 + }, + { + "epoch": 0.7426137060193698, + "grad_norm": 1.862909248694529, + "learning_rate": 3.277965925316681e-06, + "loss": 0.6406, + "step": 24230 + }, + { + "epoch": 0.7426443545421111, + "grad_norm": 1.6345552835791581, + "learning_rate": 3.2772310442164844e-06, + "loss": 0.6862, + "step": 24231 + }, + { + "epoch": 0.7426750030648522, + "grad_norm": 1.5366712295329872, + "learning_rate": 3.2764962293574543e-06, + "loss": 0.7435, + "step": 24232 + }, + { + "epoch": 0.7427056515875935, + "grad_norm": 1.9637397555025968, + "learning_rate": 3.275761480746823e-06, + "loss": 0.7678, + "step": 24233 + }, + { + "epoch": 0.7427363001103346, + "grad_norm": 1.6871930056203919, + "learning_rate": 3.275026798391835e-06, + "loss": 0.6754, + "step": 24234 + }, + { + "epoch": 0.7427669486330759, + "grad_norm": 1.6421164938554305, + "learning_rate": 3.274292182299732e-06, + "loss": 0.6276, + "step": 24235 + }, + { + "epoch": 0.7427975971558171, + "grad_norm": 1.5453245864845906, + "learning_rate": 3.2735576324777464e-06, + "loss": 0.5884, + "step": 24236 + }, + { + "epoch": 0.7428282456785583, + "grad_norm": 1.5190075466386441, + "learning_rate": 3.2728231489331177e-06, + "loss": 0.6787, + "step": 24237 + }, + { + "epoch": 0.7428588942012995, + "grad_norm": 1.6664867592190198, + "learning_rate": 3.2720887316730865e-06, + "loss": 0.6573, + "step": 24238 + }, + { + "epoch": 0.7428895427240407, + "grad_norm": 0.6458115779386113, + "learning_rate": 3.2713543807048833e-06, + "loss": 0.5198, + "step": 24239 + }, + { + "epoch": 0.7429201912467819, + "grad_norm": 0.6573212496292368, + "learning_rate": 3.2706200960357513e-06, + "loss": 0.5238, + "step": 24240 + }, + { + "epoch": 0.7429508397695231, + "grad_norm": 1.5122663319364122, + "learning_rate": 3.2698858776729136e-06, + "loss": 0.6498, + "step": 24241 + }, + { + "epoch": 0.7429814882922643, + "grad_norm": 1.6517065855301143, + "learning_rate": 3.269151725623619e-06, + "loss": 0.6562, + "step": 24242 + }, + { + "epoch": 0.7430121368150056, + "grad_norm": 1.5309870394057172, + "learning_rate": 3.268417639895095e-06, + "loss": 0.7054, + "step": 24243 + }, + { + "epoch": 0.7430427853377467, + "grad_norm": 1.2636515139759459, + "learning_rate": 3.2676836204945715e-06, + "loss": 0.5721, + "step": 24244 + }, + { + "epoch": 0.743073433860488, + "grad_norm": 1.556179968007569, + "learning_rate": 3.2669496674292834e-06, + "loss": 0.6267, + "step": 24245 + }, + { + "epoch": 0.7431040823832291, + "grad_norm": 1.7086571632283765, + "learning_rate": 3.2662157807064664e-06, + "loss": 0.6862, + "step": 24246 + }, + { + "epoch": 0.7431347309059704, + "grad_norm": 0.676794801725502, + "learning_rate": 3.2654819603333444e-06, + "loss": 0.5155, + "step": 24247 + }, + { + "epoch": 0.7431653794287115, + "grad_norm": 1.5398987174894425, + "learning_rate": 3.264748206317152e-06, + "loss": 0.6392, + "step": 24248 + }, + { + "epoch": 0.7431960279514528, + "grad_norm": 1.792271784476526, + "learning_rate": 3.2640145186651216e-06, + "loss": 0.6531, + "step": 24249 + }, + { + "epoch": 0.7432266764741939, + "grad_norm": 1.6085506768811475, + "learning_rate": 3.263280897384478e-06, + "loss": 0.6669, + "step": 24250 + }, + { + "epoch": 0.7432573249969352, + "grad_norm": 1.720387974629025, + "learning_rate": 3.2625473424824527e-06, + "loss": 0.6791, + "step": 24251 + }, + { + "epoch": 0.7432879735196763, + "grad_norm": 0.6499170480505684, + "learning_rate": 3.261813853966267e-06, + "loss": 0.5119, + "step": 24252 + }, + { + "epoch": 0.7433186220424176, + "grad_norm": 1.6524179748035026, + "learning_rate": 3.2610804318431597e-06, + "loss": 0.6852, + "step": 24253 + }, + { + "epoch": 0.7433492705651588, + "grad_norm": 1.5133810608385618, + "learning_rate": 3.2603470761203503e-06, + "loss": 0.7076, + "step": 24254 + }, + { + "epoch": 0.7433799190878999, + "grad_norm": 1.4162074550327335, + "learning_rate": 3.2596137868050637e-06, + "loss": 0.5406, + "step": 24255 + }, + { + "epoch": 0.7434105676106412, + "grad_norm": 1.608199362722906, + "learning_rate": 3.2588805639045263e-06, + "loss": 0.6367, + "step": 24256 + }, + { + "epoch": 0.7434412161333823, + "grad_norm": 1.7797703852869293, + "learning_rate": 3.258147407425967e-06, + "loss": 0.6832, + "step": 24257 + }, + { + "epoch": 0.7434718646561236, + "grad_norm": 1.621894264452516, + "learning_rate": 3.257414317376603e-06, + "loss": 0.6321, + "step": 24258 + }, + { + "epoch": 0.7435025131788647, + "grad_norm": 1.5777148290700533, + "learning_rate": 3.2566812937636607e-06, + "loss": 0.539, + "step": 24259 + }, + { + "epoch": 0.743533161701606, + "grad_norm": 1.8220737045637085, + "learning_rate": 3.255948336594362e-06, + "loss": 0.6009, + "step": 24260 + }, + { + "epoch": 0.7435638102243471, + "grad_norm": 2.119753702274322, + "learning_rate": 3.2552154458759343e-06, + "loss": 0.5997, + "step": 24261 + }, + { + "epoch": 0.7435944587470884, + "grad_norm": 1.687254677742609, + "learning_rate": 3.2544826216155946e-06, + "loss": 0.6738, + "step": 24262 + }, + { + "epoch": 0.7436251072698296, + "grad_norm": 1.7956297728709731, + "learning_rate": 3.253749863820557e-06, + "loss": 0.7602, + "step": 24263 + }, + { + "epoch": 0.7436557557925708, + "grad_norm": 0.6730338826875653, + "learning_rate": 3.253017172498054e-06, + "loss": 0.5553, + "step": 24264 + }, + { + "epoch": 0.743686404315312, + "grad_norm": 1.6425206471881333, + "learning_rate": 3.2522845476553e-06, + "loss": 0.7248, + "step": 24265 + }, + { + "epoch": 0.7437170528380532, + "grad_norm": 1.4399264289171463, + "learning_rate": 3.2515519892995085e-06, + "loss": 0.5077, + "step": 24266 + }, + { + "epoch": 0.7437477013607944, + "grad_norm": 1.403667748324028, + "learning_rate": 3.2508194974379027e-06, + "loss": 0.6728, + "step": 24267 + }, + { + "epoch": 0.7437783498835356, + "grad_norm": 0.6908745089844501, + "learning_rate": 3.250087072077699e-06, + "loss": 0.535, + "step": 24268 + }, + { + "epoch": 0.7438089984062768, + "grad_norm": 1.4478542807567862, + "learning_rate": 3.249354713226118e-06, + "loss": 0.5558, + "step": 24269 + }, + { + "epoch": 0.743839646929018, + "grad_norm": 1.5322316355498429, + "learning_rate": 3.248622420890368e-06, + "loss": 0.5665, + "step": 24270 + }, + { + "epoch": 0.7438702954517592, + "grad_norm": 1.7705056473501077, + "learning_rate": 3.24789019507767e-06, + "loss": 0.7426, + "step": 24271 + }, + { + "epoch": 0.7439009439745005, + "grad_norm": 1.7196432874160015, + "learning_rate": 3.24715803579524e-06, + "loss": 0.587, + "step": 24272 + }, + { + "epoch": 0.7439315924972416, + "grad_norm": 1.8449391358711378, + "learning_rate": 3.2464259430502898e-06, + "loss": 0.5905, + "step": 24273 + }, + { + "epoch": 0.7439622410199829, + "grad_norm": 1.5178845775619607, + "learning_rate": 3.2456939168500302e-06, + "loss": 0.6331, + "step": 24274 + }, + { + "epoch": 0.743992889542724, + "grad_norm": 1.6757547230517926, + "learning_rate": 3.2449619572016754e-06, + "loss": 0.6212, + "step": 24275 + }, + { + "epoch": 0.7440235380654653, + "grad_norm": 1.6578754902829929, + "learning_rate": 3.2442300641124424e-06, + "loss": 0.6043, + "step": 24276 + }, + { + "epoch": 0.7440541865882064, + "grad_norm": 1.5615633655600227, + "learning_rate": 3.2434982375895364e-06, + "loss": 0.6185, + "step": 24277 + }, + { + "epoch": 0.7440848351109477, + "grad_norm": 1.6809931834235399, + "learning_rate": 3.24276647764017e-06, + "loss": 0.6702, + "step": 24278 + }, + { + "epoch": 0.7441154836336888, + "grad_norm": 1.495089176101978, + "learning_rate": 3.242034784271555e-06, + "loss": 0.5596, + "step": 24279 + }, + { + "epoch": 0.7441461321564301, + "grad_norm": 1.5356472492165203, + "learning_rate": 3.2413031574909036e-06, + "loss": 0.6612, + "step": 24280 + }, + { + "epoch": 0.7441767806791713, + "grad_norm": 1.5425966471691162, + "learning_rate": 3.240571597305422e-06, + "loss": 0.5509, + "step": 24281 + }, + { + "epoch": 0.7442074292019125, + "grad_norm": 1.5998417590602325, + "learning_rate": 3.2398401037223105e-06, + "loss": 0.6202, + "step": 24282 + }, + { + "epoch": 0.7442380777246537, + "grad_norm": 1.6737827270980095, + "learning_rate": 3.2391086767487913e-06, + "loss": 0.6014, + "step": 24283 + }, + { + "epoch": 0.7442687262473949, + "grad_norm": 1.8061170248393221, + "learning_rate": 3.238377316392064e-06, + "loss": 0.6186, + "step": 24284 + }, + { + "epoch": 0.7442993747701361, + "grad_norm": 1.4514624217783043, + "learning_rate": 3.237646022659332e-06, + "loss": 0.5292, + "step": 24285 + }, + { + "epoch": 0.7443300232928772, + "grad_norm": 1.7581938625015572, + "learning_rate": 3.236914795557803e-06, + "loss": 0.6352, + "step": 24286 + }, + { + "epoch": 0.7443606718156185, + "grad_norm": 0.6608503977439795, + "learning_rate": 3.236183635094684e-06, + "loss": 0.4948, + "step": 24287 + }, + { + "epoch": 0.7443913203383596, + "grad_norm": 1.6313563121200818, + "learning_rate": 3.235452541277181e-06, + "loss": 0.7395, + "step": 24288 + }, + { + "epoch": 0.7444219688611009, + "grad_norm": 1.6624292631378275, + "learning_rate": 3.234721514112492e-06, + "loss": 0.5189, + "step": 24289 + }, + { + "epoch": 0.744452617383842, + "grad_norm": 1.7064170936840888, + "learning_rate": 3.233990553607822e-06, + "loss": 0.631, + "step": 24290 + }, + { + "epoch": 0.7444832659065833, + "grad_norm": 1.543134541526878, + "learning_rate": 3.233259659770378e-06, + "loss": 0.5909, + "step": 24291 + }, + { + "epoch": 0.7445139144293245, + "grad_norm": 1.8381525209102443, + "learning_rate": 3.232528832607359e-06, + "loss": 0.7245, + "step": 24292 + }, + { + "epoch": 0.7445445629520657, + "grad_norm": 0.6668571126075108, + "learning_rate": 3.2317980721259567e-06, + "loss": 0.52, + "step": 24293 + }, + { + "epoch": 0.7445752114748069, + "grad_norm": 1.7784739363644693, + "learning_rate": 3.231067378333387e-06, + "loss": 0.628, + "step": 24294 + }, + { + "epoch": 0.7446058599975481, + "grad_norm": 1.7276024819434757, + "learning_rate": 3.2303367512368387e-06, + "loss": 0.7496, + "step": 24295 + }, + { + "epoch": 0.7446365085202893, + "grad_norm": 0.640691318267209, + "learning_rate": 3.2296061908435184e-06, + "loss": 0.5177, + "step": 24296 + }, + { + "epoch": 0.7446671570430305, + "grad_norm": 0.6721870833664437, + "learning_rate": 3.2288756971606173e-06, + "loss": 0.5227, + "step": 24297 + }, + { + "epoch": 0.7446978055657717, + "grad_norm": 1.50157841059151, + "learning_rate": 3.2281452701953366e-06, + "loss": 0.6275, + "step": 24298 + }, + { + "epoch": 0.744728454088513, + "grad_norm": 1.778744833080796, + "learning_rate": 3.227414909954876e-06, + "loss": 0.6861, + "step": 24299 + }, + { + "epoch": 0.7447591026112541, + "grad_norm": 1.7009824015397783, + "learning_rate": 3.2266846164464262e-06, + "loss": 0.738, + "step": 24300 + }, + { + "epoch": 0.7447897511339954, + "grad_norm": 0.6532100802357695, + "learning_rate": 3.225954389677185e-06, + "loss": 0.5235, + "step": 24301 + }, + { + "epoch": 0.7448203996567365, + "grad_norm": 1.8068291955812301, + "learning_rate": 3.225224229654352e-06, + "loss": 0.6589, + "step": 24302 + }, + { + "epoch": 0.7448510481794778, + "grad_norm": 1.7521675107945673, + "learning_rate": 3.224494136385119e-06, + "loss": 0.7066, + "step": 24303 + }, + { + "epoch": 0.7448816967022189, + "grad_norm": 1.5842274990998118, + "learning_rate": 3.2237641098766716e-06, + "loss": 0.698, + "step": 24304 + }, + { + "epoch": 0.7449123452249602, + "grad_norm": 1.6122385994381154, + "learning_rate": 3.223034150136216e-06, + "loss": 0.6429, + "step": 24305 + }, + { + "epoch": 0.7449429937477013, + "grad_norm": 1.890300987695384, + "learning_rate": 3.2223042571709373e-06, + "loss": 0.6568, + "step": 24306 + }, + { + "epoch": 0.7449736422704426, + "grad_norm": 0.6475859736076532, + "learning_rate": 3.2215744309880305e-06, + "loss": 0.5491, + "step": 24307 + }, + { + "epoch": 0.7450042907931838, + "grad_norm": 1.6036428457198089, + "learning_rate": 3.220844671594683e-06, + "loss": 0.7268, + "step": 24308 + }, + { + "epoch": 0.745034939315925, + "grad_norm": 1.8099330404850988, + "learning_rate": 3.2201149789980867e-06, + "loss": 0.6672, + "step": 24309 + }, + { + "epoch": 0.7450655878386662, + "grad_norm": 1.6870534378524351, + "learning_rate": 3.2193853532054366e-06, + "loss": 0.6326, + "step": 24310 + }, + { + "epoch": 0.7450962363614074, + "grad_norm": 1.4714431865788662, + "learning_rate": 3.2186557942239137e-06, + "loss": 0.5525, + "step": 24311 + }, + { + "epoch": 0.7451268848841486, + "grad_norm": 1.5511675703755012, + "learning_rate": 3.21792630206071e-06, + "loss": 0.593, + "step": 24312 + }, + { + "epoch": 0.7451575334068898, + "grad_norm": 1.7352874720503149, + "learning_rate": 3.2171968767230167e-06, + "loss": 0.6434, + "step": 24313 + }, + { + "epoch": 0.745188181929631, + "grad_norm": 1.7987806994514421, + "learning_rate": 3.216467518218016e-06, + "loss": 0.6911, + "step": 24314 + }, + { + "epoch": 0.7452188304523722, + "grad_norm": 1.446146812856877, + "learning_rate": 3.2157382265528956e-06, + "loss": 0.6269, + "step": 24315 + }, + { + "epoch": 0.7452494789751134, + "grad_norm": 1.584674514163911, + "learning_rate": 3.2150090017348456e-06, + "loss": 0.7325, + "step": 24316 + }, + { + "epoch": 0.7452801274978545, + "grad_norm": 1.677050739938103, + "learning_rate": 3.2142798437710445e-06, + "loss": 0.5813, + "step": 24317 + }, + { + "epoch": 0.7453107760205958, + "grad_norm": 1.7588015328473234, + "learning_rate": 3.213550752668684e-06, + "loss": 0.7697, + "step": 24318 + }, + { + "epoch": 0.745341424543337, + "grad_norm": 1.5666899148507607, + "learning_rate": 3.2128217284349404e-06, + "loss": 0.6258, + "step": 24319 + }, + { + "epoch": 0.7453720730660782, + "grad_norm": 0.8151794061547546, + "learning_rate": 3.212092771077001e-06, + "loss": 0.5294, + "step": 24320 + }, + { + "epoch": 0.7454027215888194, + "grad_norm": 1.5395303651814436, + "learning_rate": 3.211363880602053e-06, + "loss": 0.6696, + "step": 24321 + }, + { + "epoch": 0.7454333701115606, + "grad_norm": 1.6903067072073443, + "learning_rate": 3.2106350570172683e-06, + "loss": 0.6646, + "step": 24322 + }, + { + "epoch": 0.7454640186343018, + "grad_norm": 1.6426917142920507, + "learning_rate": 3.2099063003298346e-06, + "loss": 0.672, + "step": 24323 + }, + { + "epoch": 0.745494667157043, + "grad_norm": 1.8115183592093922, + "learning_rate": 3.209177610546935e-06, + "loss": 0.6799, + "step": 24324 + }, + { + "epoch": 0.7455253156797842, + "grad_norm": 1.651428914014384, + "learning_rate": 3.2084489876757417e-06, + "loss": 0.7143, + "step": 24325 + }, + { + "epoch": 0.7455559642025255, + "grad_norm": 1.8022610340994094, + "learning_rate": 3.2077204317234423e-06, + "loss": 0.7033, + "step": 24326 + }, + { + "epoch": 0.7455866127252666, + "grad_norm": 1.8405186676477001, + "learning_rate": 3.2069919426972087e-06, + "loss": 0.7436, + "step": 24327 + }, + { + "epoch": 0.7456172612480079, + "grad_norm": 1.7556800806322492, + "learning_rate": 3.2062635206042203e-06, + "loss": 0.6152, + "step": 24328 + }, + { + "epoch": 0.745647909770749, + "grad_norm": 1.7433865494575338, + "learning_rate": 3.2055351654516588e-06, + "loss": 0.6407, + "step": 24329 + }, + { + "epoch": 0.7456785582934903, + "grad_norm": 1.8443134492294648, + "learning_rate": 3.2048068772466955e-06, + "loss": 0.7255, + "step": 24330 + }, + { + "epoch": 0.7457092068162314, + "grad_norm": 1.6368684654463366, + "learning_rate": 3.2040786559965077e-06, + "loss": 0.6274, + "step": 24331 + }, + { + "epoch": 0.7457398553389727, + "grad_norm": 1.5162725434714388, + "learning_rate": 3.2033505017082754e-06, + "loss": 0.6759, + "step": 24332 + }, + { + "epoch": 0.7457705038617138, + "grad_norm": 1.557223150953066, + "learning_rate": 3.202622414389167e-06, + "loss": 0.6997, + "step": 24333 + }, + { + "epoch": 0.7458011523844551, + "grad_norm": 1.8568802240934668, + "learning_rate": 3.2018943940463585e-06, + "loss": 0.5944, + "step": 24334 + }, + { + "epoch": 0.7458318009071963, + "grad_norm": 1.8530166193575213, + "learning_rate": 3.201166440687027e-06, + "loss": 0.7048, + "step": 24335 + }, + { + "epoch": 0.7458624494299375, + "grad_norm": 1.8779234862920526, + "learning_rate": 3.2004385543183393e-06, + "loss": 0.7041, + "step": 24336 + }, + { + "epoch": 0.7458930979526787, + "grad_norm": 0.6507460801119472, + "learning_rate": 3.199710734947473e-06, + "loss": 0.533, + "step": 24337 + }, + { + "epoch": 0.7459237464754199, + "grad_norm": 1.6877206506362896, + "learning_rate": 3.1989829825815945e-06, + "loss": 0.6645, + "step": 24338 + }, + { + "epoch": 0.7459543949981611, + "grad_norm": 1.5850831688580238, + "learning_rate": 3.198255297227876e-06, + "loss": 0.5609, + "step": 24339 + }, + { + "epoch": 0.7459850435209023, + "grad_norm": 1.7204979242578498, + "learning_rate": 3.1975276788934907e-06, + "loss": 0.6917, + "step": 24340 + }, + { + "epoch": 0.7460156920436435, + "grad_norm": 1.3479007901967406, + "learning_rate": 3.196800127585604e-06, + "loss": 0.5803, + "step": 24341 + }, + { + "epoch": 0.7460463405663847, + "grad_norm": 1.626311239105003, + "learning_rate": 3.196072643311385e-06, + "loss": 0.6766, + "step": 24342 + }, + { + "epoch": 0.7460769890891259, + "grad_norm": 1.5307936329535103, + "learning_rate": 3.195345226078007e-06, + "loss": 0.5875, + "step": 24343 + }, + { + "epoch": 0.7461076376118672, + "grad_norm": 1.5860122941599124, + "learning_rate": 3.1946178758926304e-06, + "loss": 0.6682, + "step": 24344 + }, + { + "epoch": 0.7461382861346083, + "grad_norm": 1.6005729299601612, + "learning_rate": 3.193890592762425e-06, + "loss": 0.6433, + "step": 24345 + }, + { + "epoch": 0.7461689346573496, + "grad_norm": 1.6973403427919784, + "learning_rate": 3.1931633766945614e-06, + "loss": 0.6485, + "step": 24346 + }, + { + "epoch": 0.7461995831800907, + "grad_norm": 1.5538974737753783, + "learning_rate": 3.1924362276961962e-06, + "loss": 0.6463, + "step": 24347 + }, + { + "epoch": 0.7462302317028319, + "grad_norm": 1.6299718151746223, + "learning_rate": 3.191709145774502e-06, + "loss": 0.5973, + "step": 24348 + }, + { + "epoch": 0.7462608802255731, + "grad_norm": 1.7270063856869382, + "learning_rate": 3.190982130936636e-06, + "loss": 0.6361, + "step": 24349 + }, + { + "epoch": 0.7462915287483143, + "grad_norm": 1.564665779780471, + "learning_rate": 3.1902551831897654e-06, + "loss": 0.5824, + "step": 24350 + }, + { + "epoch": 0.7463221772710555, + "grad_norm": 1.6209296309191379, + "learning_rate": 3.1895283025410564e-06, + "loss": 0.6781, + "step": 24351 + }, + { + "epoch": 0.7463528257937967, + "grad_norm": 0.6795191769557907, + "learning_rate": 3.1888014889976635e-06, + "loss": 0.5326, + "step": 24352 + }, + { + "epoch": 0.746383474316538, + "grad_norm": 1.771312665841103, + "learning_rate": 3.1880747425667536e-06, + "loss": 0.7077, + "step": 24353 + }, + { + "epoch": 0.7464141228392791, + "grad_norm": 1.6603499944342461, + "learning_rate": 3.1873480632554888e-06, + "loss": 0.7211, + "step": 24354 + }, + { + "epoch": 0.7464447713620204, + "grad_norm": 1.5628697600183978, + "learning_rate": 3.1866214510710226e-06, + "loss": 0.5927, + "step": 24355 + }, + { + "epoch": 0.7464754198847615, + "grad_norm": 1.6350834310734959, + "learning_rate": 3.185894906020519e-06, + "loss": 0.7283, + "step": 24356 + }, + { + "epoch": 0.7465060684075028, + "grad_norm": 1.4807279107689446, + "learning_rate": 3.1851684281111407e-06, + "loss": 0.5719, + "step": 24357 + }, + { + "epoch": 0.7465367169302439, + "grad_norm": 1.6217600869112052, + "learning_rate": 3.1844420173500366e-06, + "loss": 0.6222, + "step": 24358 + }, + { + "epoch": 0.7465673654529852, + "grad_norm": 1.657923271505433, + "learning_rate": 3.183715673744373e-06, + "loss": 0.5497, + "step": 24359 + }, + { + "epoch": 0.7465980139757263, + "grad_norm": 1.6758834584222808, + "learning_rate": 3.1829893973013005e-06, + "loss": 0.749, + "step": 24360 + }, + { + "epoch": 0.7466286624984676, + "grad_norm": 1.61379239745047, + "learning_rate": 3.1822631880279765e-06, + "loss": 0.6347, + "step": 24361 + }, + { + "epoch": 0.7466593110212087, + "grad_norm": 1.576827827718298, + "learning_rate": 3.181537045931562e-06, + "loss": 0.6359, + "step": 24362 + }, + { + "epoch": 0.74668995954395, + "grad_norm": 1.691718860815717, + "learning_rate": 3.1808109710192037e-06, + "loss": 0.6875, + "step": 24363 + }, + { + "epoch": 0.7467206080666912, + "grad_norm": 1.6933350941849092, + "learning_rate": 3.180084963298059e-06, + "loss": 0.7724, + "step": 24364 + }, + { + "epoch": 0.7467512565894324, + "grad_norm": 1.451603780499244, + "learning_rate": 3.179359022775287e-06, + "loss": 0.5836, + "step": 24365 + }, + { + "epoch": 0.7467819051121736, + "grad_norm": 1.6298117268254715, + "learning_rate": 3.178633149458031e-06, + "loss": 0.5371, + "step": 24366 + }, + { + "epoch": 0.7468125536349148, + "grad_norm": 1.6636696480502409, + "learning_rate": 3.1779073433534492e-06, + "loss": 0.7303, + "step": 24367 + }, + { + "epoch": 0.746843202157656, + "grad_norm": 1.4862252417138082, + "learning_rate": 3.177181604468691e-06, + "loss": 0.6015, + "step": 24368 + }, + { + "epoch": 0.7468738506803972, + "grad_norm": 1.619596961782188, + "learning_rate": 3.1764559328109115e-06, + "loss": 0.6143, + "step": 24369 + }, + { + "epoch": 0.7469044992031384, + "grad_norm": 1.493575610764635, + "learning_rate": 3.1757303283872565e-06, + "loss": 0.7028, + "step": 24370 + }, + { + "epoch": 0.7469351477258797, + "grad_norm": 1.6790619827050772, + "learning_rate": 3.1750047912048755e-06, + "loss": 0.5605, + "step": 24371 + }, + { + "epoch": 0.7469657962486208, + "grad_norm": 1.6908191968112032, + "learning_rate": 3.174279321270917e-06, + "loss": 0.5886, + "step": 24372 + }, + { + "epoch": 0.7469964447713621, + "grad_norm": 0.6793005958213912, + "learning_rate": 3.1735539185925346e-06, + "loss": 0.535, + "step": 24373 + }, + { + "epoch": 0.7470270932941032, + "grad_norm": 0.664118474682999, + "learning_rate": 3.172828583176868e-06, + "loss": 0.5292, + "step": 24374 + }, + { + "epoch": 0.7470577418168445, + "grad_norm": 1.6291481249941488, + "learning_rate": 3.17210331503107e-06, + "loss": 0.6891, + "step": 24375 + }, + { + "epoch": 0.7470883903395856, + "grad_norm": 1.7465609805752091, + "learning_rate": 3.1713781141622867e-06, + "loss": 0.7073, + "step": 24376 + }, + { + "epoch": 0.7471190388623269, + "grad_norm": 1.6611469718245173, + "learning_rate": 3.1706529805776588e-06, + "loss": 0.6346, + "step": 24377 + }, + { + "epoch": 0.747149687385068, + "grad_norm": 1.4302924993559638, + "learning_rate": 3.1699279142843385e-06, + "loss": 0.6697, + "step": 24378 + }, + { + "epoch": 0.7471803359078092, + "grad_norm": 1.4509763087752745, + "learning_rate": 3.169202915289459e-06, + "loss": 0.6144, + "step": 24379 + }, + { + "epoch": 0.7472109844305505, + "grad_norm": 0.6722985981531815, + "learning_rate": 3.1684779836001776e-06, + "loss": 0.5376, + "step": 24380 + }, + { + "epoch": 0.7472416329532916, + "grad_norm": 1.7612427628424807, + "learning_rate": 3.16775311922363e-06, + "loss": 0.6334, + "step": 24381 + }, + { + "epoch": 0.7472722814760329, + "grad_norm": 0.6593844854451327, + "learning_rate": 3.167028322166956e-06, + "loss": 0.51, + "step": 24382 + }, + { + "epoch": 0.747302929998774, + "grad_norm": 1.8253981335956706, + "learning_rate": 3.166303592437301e-06, + "loss": 0.6833, + "step": 24383 + }, + { + "epoch": 0.7473335785215153, + "grad_norm": 1.5802346191571566, + "learning_rate": 3.165578930041808e-06, + "loss": 0.6945, + "step": 24384 + }, + { + "epoch": 0.7473642270442564, + "grad_norm": 1.525398781240899, + "learning_rate": 3.1648543349876113e-06, + "loss": 0.5929, + "step": 24385 + }, + { + "epoch": 0.7473948755669977, + "grad_norm": 1.9033655275821946, + "learning_rate": 3.1641298072818537e-06, + "loss": 0.7169, + "step": 24386 + }, + { + "epoch": 0.7474255240897388, + "grad_norm": 1.8495191496158878, + "learning_rate": 3.1634053469316738e-06, + "loss": 0.6086, + "step": 24387 + }, + { + "epoch": 0.7474561726124801, + "grad_norm": 1.4916409456092867, + "learning_rate": 3.162680953944214e-06, + "loss": 0.6857, + "step": 24388 + }, + { + "epoch": 0.7474868211352212, + "grad_norm": 1.5395525835860697, + "learning_rate": 3.161956628326608e-06, + "loss": 0.7137, + "step": 24389 + }, + { + "epoch": 0.7475174696579625, + "grad_norm": 1.7486033839158006, + "learning_rate": 3.161232370085986e-06, + "loss": 0.6613, + "step": 24390 + }, + { + "epoch": 0.7475481181807037, + "grad_norm": 1.6763234218417402, + "learning_rate": 3.160508179229498e-06, + "loss": 0.5763, + "step": 24391 + }, + { + "epoch": 0.7475787667034449, + "grad_norm": 1.4242202037648677, + "learning_rate": 3.1597840557642724e-06, + "loss": 0.6656, + "step": 24392 + }, + { + "epoch": 0.7476094152261861, + "grad_norm": 1.782040743789141, + "learning_rate": 3.1590599996974425e-06, + "loss": 0.6978, + "step": 24393 + }, + { + "epoch": 0.7476400637489273, + "grad_norm": 0.6637414744189336, + "learning_rate": 3.158336011036144e-06, + "loss": 0.5327, + "step": 24394 + }, + { + "epoch": 0.7476707122716685, + "grad_norm": 1.7916786556485544, + "learning_rate": 3.157612089787511e-06, + "loss": 0.7122, + "step": 24395 + }, + { + "epoch": 0.7477013607944097, + "grad_norm": 1.7663355480573106, + "learning_rate": 3.1568882359586804e-06, + "loss": 0.6702, + "step": 24396 + }, + { + "epoch": 0.7477320093171509, + "grad_norm": 1.7177511257845004, + "learning_rate": 3.1561644495567777e-06, + "loss": 0.7416, + "step": 24397 + }, + { + "epoch": 0.7477626578398922, + "grad_norm": 1.9356357035057585, + "learning_rate": 3.155440730588938e-06, + "loss": 0.6768, + "step": 24398 + }, + { + "epoch": 0.7477933063626333, + "grad_norm": 1.5012995221379994, + "learning_rate": 3.154717079062295e-06, + "loss": 0.5939, + "step": 24399 + }, + { + "epoch": 0.7478239548853746, + "grad_norm": 0.6780331234327264, + "learning_rate": 3.153993494983976e-06, + "loss": 0.5385, + "step": 24400 + }, + { + "epoch": 0.7478546034081157, + "grad_norm": 1.4504809187053607, + "learning_rate": 3.1532699783611042e-06, + "loss": 0.5859, + "step": 24401 + }, + { + "epoch": 0.747885251930857, + "grad_norm": 1.5296774952808685, + "learning_rate": 3.1525465292008218e-06, + "loss": 0.5998, + "step": 24402 + }, + { + "epoch": 0.7479159004535981, + "grad_norm": 1.596810553656698, + "learning_rate": 3.15182314751025e-06, + "loss": 0.5526, + "step": 24403 + }, + { + "epoch": 0.7479465489763394, + "grad_norm": 0.6422692267586465, + "learning_rate": 3.1510998332965135e-06, + "loss": 0.5251, + "step": 24404 + }, + { + "epoch": 0.7479771974990805, + "grad_norm": 1.4089872241923727, + "learning_rate": 3.1503765865667433e-06, + "loss": 0.7071, + "step": 24405 + }, + { + "epoch": 0.7480078460218218, + "grad_norm": 1.4867552478957742, + "learning_rate": 3.149653407328066e-06, + "loss": 0.6106, + "step": 24406 + }, + { + "epoch": 0.748038494544563, + "grad_norm": 1.604598164871453, + "learning_rate": 3.148930295587608e-06, + "loss": 0.5897, + "step": 24407 + }, + { + "epoch": 0.7480691430673042, + "grad_norm": 0.6982011976686123, + "learning_rate": 3.148207251352491e-06, + "loss": 0.5458, + "step": 24408 + }, + { + "epoch": 0.7480997915900454, + "grad_norm": 1.74050153673249, + "learning_rate": 3.1474842746298396e-06, + "loss": 0.691, + "step": 24409 + }, + { + "epoch": 0.7481304401127865, + "grad_norm": 1.6757509385249607, + "learning_rate": 3.1467613654267827e-06, + "loss": 0.721, + "step": 24410 + }, + { + "epoch": 0.7481610886355278, + "grad_norm": 1.9356143746272354, + "learning_rate": 3.14603852375044e-06, + "loss": 0.6907, + "step": 24411 + }, + { + "epoch": 0.7481917371582689, + "grad_norm": 0.6936806008499922, + "learning_rate": 3.14531574960793e-06, + "loss": 0.5663, + "step": 24412 + }, + { + "epoch": 0.7482223856810102, + "grad_norm": 0.6624500482728346, + "learning_rate": 3.1445930430063775e-06, + "loss": 0.5543, + "step": 24413 + }, + { + "epoch": 0.7482530342037513, + "grad_norm": 1.5895185585972866, + "learning_rate": 3.143870403952903e-06, + "loss": 0.643, + "step": 24414 + }, + { + "epoch": 0.7482836827264926, + "grad_norm": 1.5915727355348719, + "learning_rate": 3.1431478324546325e-06, + "loss": 0.6043, + "step": 24415 + }, + { + "epoch": 0.7483143312492337, + "grad_norm": 1.5362731838903048, + "learning_rate": 3.142425328518677e-06, + "loss": 0.6421, + "step": 24416 + }, + { + "epoch": 0.748344979771975, + "grad_norm": 1.7455994689272119, + "learning_rate": 3.1417028921521587e-06, + "loss": 0.6839, + "step": 24417 + }, + { + "epoch": 0.7483756282947162, + "grad_norm": 1.6556666903482729, + "learning_rate": 3.1409805233622004e-06, + "loss": 0.6575, + "step": 24418 + }, + { + "epoch": 0.7484062768174574, + "grad_norm": 1.9862640402989522, + "learning_rate": 3.140258222155913e-06, + "loss": 0.6783, + "step": 24419 + }, + { + "epoch": 0.7484369253401986, + "grad_norm": 0.6789167088847139, + "learning_rate": 3.1395359885404154e-06, + "loss": 0.5453, + "step": 24420 + }, + { + "epoch": 0.7484675738629398, + "grad_norm": 1.645273364448033, + "learning_rate": 3.1388138225228293e-06, + "loss": 0.6908, + "step": 24421 + }, + { + "epoch": 0.748498222385681, + "grad_norm": 1.6561606942575942, + "learning_rate": 3.1380917241102626e-06, + "loss": 0.608, + "step": 24422 + }, + { + "epoch": 0.7485288709084222, + "grad_norm": 1.529939108365875, + "learning_rate": 3.1373696933098365e-06, + "loss": 0.6349, + "step": 24423 + }, + { + "epoch": 0.7485595194311634, + "grad_norm": 0.6637452571011405, + "learning_rate": 3.1366477301286604e-06, + "loss": 0.5312, + "step": 24424 + }, + { + "epoch": 0.7485901679539047, + "grad_norm": 1.6547094240461335, + "learning_rate": 3.13592583457385e-06, + "loss": 0.6109, + "step": 24425 + }, + { + "epoch": 0.7486208164766458, + "grad_norm": 1.5892363735866335, + "learning_rate": 3.135204006652521e-06, + "loss": 0.635, + "step": 24426 + }, + { + "epoch": 0.7486514649993871, + "grad_norm": 1.6662676172438775, + "learning_rate": 3.1344822463717805e-06, + "loss": 0.6681, + "step": 24427 + }, + { + "epoch": 0.7486821135221282, + "grad_norm": 1.5283710671254518, + "learning_rate": 3.133760553738744e-06, + "loss": 0.7311, + "step": 24428 + }, + { + "epoch": 0.7487127620448695, + "grad_norm": 1.6844954637640324, + "learning_rate": 3.1330389287605234e-06, + "loss": 0.7098, + "step": 24429 + }, + { + "epoch": 0.7487434105676106, + "grad_norm": 1.6725060711230078, + "learning_rate": 3.1323173714442278e-06, + "loss": 0.5953, + "step": 24430 + }, + { + "epoch": 0.7487740590903519, + "grad_norm": 1.6851411223181636, + "learning_rate": 3.1315958817969606e-06, + "loss": 0.6751, + "step": 24431 + }, + { + "epoch": 0.748804707613093, + "grad_norm": 1.8612009819071043, + "learning_rate": 3.1308744598258425e-06, + "loss": 0.6488, + "step": 24432 + }, + { + "epoch": 0.7488353561358343, + "grad_norm": 1.5795168140619198, + "learning_rate": 3.130153105537972e-06, + "loss": 0.6418, + "step": 24433 + }, + { + "epoch": 0.7488660046585754, + "grad_norm": 1.740414863135035, + "learning_rate": 3.129431818940465e-06, + "loss": 0.6904, + "step": 24434 + }, + { + "epoch": 0.7488966531813167, + "grad_norm": 1.4798311095426804, + "learning_rate": 3.12871060004042e-06, + "loss": 0.586, + "step": 24435 + }, + { + "epoch": 0.7489273017040579, + "grad_norm": 1.6793155115159906, + "learning_rate": 3.127989448844948e-06, + "loss": 0.6434, + "step": 24436 + }, + { + "epoch": 0.7489579502267991, + "grad_norm": 1.6368857368504375, + "learning_rate": 3.1272683653611567e-06, + "loss": 0.698, + "step": 24437 + }, + { + "epoch": 0.7489885987495403, + "grad_norm": 1.6444651583666339, + "learning_rate": 3.126547349596146e-06, + "loss": 0.6496, + "step": 24438 + }, + { + "epoch": 0.7490192472722815, + "grad_norm": 1.622403194620231, + "learning_rate": 3.125826401557024e-06, + "loss": 0.6022, + "step": 24439 + }, + { + "epoch": 0.7490498957950227, + "grad_norm": 1.6196237363882893, + "learning_rate": 3.1251055212508954e-06, + "loss": 0.6516, + "step": 24440 + }, + { + "epoch": 0.7490805443177638, + "grad_norm": 1.626980082476504, + "learning_rate": 3.1243847086848576e-06, + "loss": 0.6122, + "step": 24441 + }, + { + "epoch": 0.7491111928405051, + "grad_norm": 1.7612560082785875, + "learning_rate": 3.123663963866017e-06, + "loss": 0.6291, + "step": 24442 + }, + { + "epoch": 0.7491418413632462, + "grad_norm": 1.7937663949660796, + "learning_rate": 3.1229432868014786e-06, + "loss": 0.655, + "step": 24443 + }, + { + "epoch": 0.7491724898859875, + "grad_norm": 2.054567036392004, + "learning_rate": 3.1222226774983357e-06, + "loss": 0.7029, + "step": 24444 + }, + { + "epoch": 0.7492031384087287, + "grad_norm": 1.4639444965562531, + "learning_rate": 3.1215021359636955e-06, + "loss": 0.5652, + "step": 24445 + }, + { + "epoch": 0.7492337869314699, + "grad_norm": 1.6342893380959658, + "learning_rate": 3.1207816622046516e-06, + "loss": 0.6272, + "step": 24446 + }, + { + "epoch": 0.7492644354542111, + "grad_norm": 2.1923505532169925, + "learning_rate": 3.120061256228306e-06, + "loss": 0.6963, + "step": 24447 + }, + { + "epoch": 0.7492950839769523, + "grad_norm": 1.6101012540673296, + "learning_rate": 3.119340918041761e-06, + "loss": 0.6715, + "step": 24448 + }, + { + "epoch": 0.7493257324996935, + "grad_norm": 1.6603698924509143, + "learning_rate": 3.1186206476521062e-06, + "loss": 0.7266, + "step": 24449 + }, + { + "epoch": 0.7493563810224347, + "grad_norm": 1.648482040181769, + "learning_rate": 3.117900445066444e-06, + "loss": 0.7595, + "step": 24450 + }, + { + "epoch": 0.7493870295451759, + "grad_norm": 1.8663335511913537, + "learning_rate": 3.117180310291872e-06, + "loss": 0.6445, + "step": 24451 + }, + { + "epoch": 0.7494176780679171, + "grad_norm": 0.6666159805403592, + "learning_rate": 3.1164602433354797e-06, + "loss": 0.5163, + "step": 24452 + }, + { + "epoch": 0.7494483265906583, + "grad_norm": 1.5814267714925845, + "learning_rate": 3.115740244204367e-06, + "loss": 0.5939, + "step": 24453 + }, + { + "epoch": 0.7494789751133996, + "grad_norm": 1.7370957508760378, + "learning_rate": 3.11502031290563e-06, + "loss": 0.5703, + "step": 24454 + }, + { + "epoch": 0.7495096236361407, + "grad_norm": 1.7594714499069897, + "learning_rate": 3.1143004494463557e-06, + "loss": 0.5195, + "step": 24455 + }, + { + "epoch": 0.749540272158882, + "grad_norm": 2.25322250131856, + "learning_rate": 3.1135806538336445e-06, + "loss": 0.6833, + "step": 24456 + }, + { + "epoch": 0.7495709206816231, + "grad_norm": 1.7856927264500737, + "learning_rate": 3.1128609260745823e-06, + "loss": 0.7616, + "step": 24457 + }, + { + "epoch": 0.7496015692043644, + "grad_norm": 1.6936112417342728, + "learning_rate": 3.1121412661762627e-06, + "loss": 0.6487, + "step": 24458 + }, + { + "epoch": 0.7496322177271055, + "grad_norm": 1.4764066276246721, + "learning_rate": 3.1114216741457816e-06, + "loss": 0.6603, + "step": 24459 + }, + { + "epoch": 0.7496628662498468, + "grad_norm": 1.7333060402038514, + "learning_rate": 3.110702149990221e-06, + "loss": 0.6542, + "step": 24460 + }, + { + "epoch": 0.7496935147725879, + "grad_norm": 1.4052884729497745, + "learning_rate": 3.1099826937166755e-06, + "loss": 0.6489, + "step": 24461 + }, + { + "epoch": 0.7497241632953292, + "grad_norm": 1.8667804092678109, + "learning_rate": 3.109263305332236e-06, + "loss": 0.6991, + "step": 24462 + }, + { + "epoch": 0.7497548118180704, + "grad_norm": 1.8941977262713376, + "learning_rate": 3.1085439848439856e-06, + "loss": 0.7365, + "step": 24463 + }, + { + "epoch": 0.7497854603408116, + "grad_norm": 2.005787773881389, + "learning_rate": 3.107824732259018e-06, + "loss": 0.6711, + "step": 24464 + }, + { + "epoch": 0.7498161088635528, + "grad_norm": 1.633344305226745, + "learning_rate": 3.1071055475844135e-06, + "loss": 0.6491, + "step": 24465 + }, + { + "epoch": 0.749846757386294, + "grad_norm": 1.6887503141929803, + "learning_rate": 3.1063864308272616e-06, + "loss": 0.7032, + "step": 24466 + }, + { + "epoch": 0.7498774059090352, + "grad_norm": 1.4129087169852839, + "learning_rate": 3.1056673819946516e-06, + "loss": 0.6416, + "step": 24467 + }, + { + "epoch": 0.7499080544317764, + "grad_norm": 1.8666453886987096, + "learning_rate": 3.1049484010936605e-06, + "loss": 0.8176, + "step": 24468 + }, + { + "epoch": 0.7499387029545176, + "grad_norm": 1.6087279476771077, + "learning_rate": 3.1042294881313775e-06, + "loss": 0.6604, + "step": 24469 + }, + { + "epoch": 0.7499693514772588, + "grad_norm": 1.732107948531488, + "learning_rate": 3.1035106431148888e-06, + "loss": 0.6354, + "step": 24470 + }, + { + "epoch": 0.75, + "grad_norm": 1.4429891432492792, + "learning_rate": 3.1027918660512714e-06, + "loss": 0.6194, + "step": 24471 + }, + { + "epoch": 0.7500306485227412, + "grad_norm": 1.9064923241053708, + "learning_rate": 3.1020731569476104e-06, + "loss": 0.6561, + "step": 24472 + }, + { + "epoch": 0.7500612970454824, + "grad_norm": 1.7951436211158476, + "learning_rate": 3.1013545158109904e-06, + "loss": 0.6974, + "step": 24473 + }, + { + "epoch": 0.7500919455682236, + "grad_norm": 0.6729346125151651, + "learning_rate": 3.1006359426484865e-06, + "loss": 0.52, + "step": 24474 + }, + { + "epoch": 0.7501225940909648, + "grad_norm": 1.7712662430012163, + "learning_rate": 3.099917437467186e-06, + "loss": 0.6469, + "step": 24475 + }, + { + "epoch": 0.750153242613706, + "grad_norm": 1.5925304200681476, + "learning_rate": 3.0991990002741567e-06, + "loss": 0.6106, + "step": 24476 + }, + { + "epoch": 0.7501838911364472, + "grad_norm": 1.7858683329551344, + "learning_rate": 3.0984806310764924e-06, + "loss": 0.6449, + "step": 24477 + }, + { + "epoch": 0.7502145396591884, + "grad_norm": 1.6355977167277271, + "learning_rate": 3.0977623298812644e-06, + "loss": 0.7392, + "step": 24478 + }, + { + "epoch": 0.7502451881819296, + "grad_norm": 1.58574932148445, + "learning_rate": 3.0970440966955462e-06, + "loss": 0.7245, + "step": 24479 + }, + { + "epoch": 0.7502758367046708, + "grad_norm": 1.6735908205985386, + "learning_rate": 3.096325931526419e-06, + "loss": 0.7696, + "step": 24480 + }, + { + "epoch": 0.7503064852274121, + "grad_norm": 1.7712879901090626, + "learning_rate": 3.0956078343809627e-06, + "loss": 0.6287, + "step": 24481 + }, + { + "epoch": 0.7503371337501532, + "grad_norm": 1.7831418376545467, + "learning_rate": 3.0948898052662445e-06, + "loss": 0.7006, + "step": 24482 + }, + { + "epoch": 0.7503677822728945, + "grad_norm": 1.5833675214315415, + "learning_rate": 3.0941718441893444e-06, + "loss": 0.5596, + "step": 24483 + }, + { + "epoch": 0.7503984307956356, + "grad_norm": 0.6636267558601229, + "learning_rate": 3.093453951157339e-06, + "loss": 0.5282, + "step": 24484 + }, + { + "epoch": 0.7504290793183769, + "grad_norm": 0.717318433187053, + "learning_rate": 3.092736126177297e-06, + "loss": 0.5675, + "step": 24485 + }, + { + "epoch": 0.750459727841118, + "grad_norm": 1.5944039516351025, + "learning_rate": 3.0920183692562955e-06, + "loss": 0.7164, + "step": 24486 + }, + { + "epoch": 0.7504903763638593, + "grad_norm": 1.5000497495951104, + "learning_rate": 3.0913006804013978e-06, + "loss": 0.6439, + "step": 24487 + }, + { + "epoch": 0.7505210248866004, + "grad_norm": 1.5626750025229041, + "learning_rate": 3.090583059619688e-06, + "loss": 0.5665, + "step": 24488 + }, + { + "epoch": 0.7505516734093417, + "grad_norm": 1.7212576724781417, + "learning_rate": 3.0898655069182327e-06, + "loss": 0.5277, + "step": 24489 + }, + { + "epoch": 0.7505823219320829, + "grad_norm": 0.7486898154145759, + "learning_rate": 3.0891480223040972e-06, + "loss": 0.5355, + "step": 24490 + }, + { + "epoch": 0.7506129704548241, + "grad_norm": 1.8087217788408916, + "learning_rate": 3.0884306057843537e-06, + "loss": 0.6858, + "step": 24491 + }, + { + "epoch": 0.7506436189775653, + "grad_norm": 0.6385722586564645, + "learning_rate": 3.087713257366075e-06, + "loss": 0.5098, + "step": 24492 + }, + { + "epoch": 0.7506742675003065, + "grad_norm": 1.6181815668814579, + "learning_rate": 3.086995977056323e-06, + "loss": 0.7075, + "step": 24493 + }, + { + "epoch": 0.7507049160230477, + "grad_norm": 1.8273676813245192, + "learning_rate": 3.0862787648621695e-06, + "loss": 0.6191, + "step": 24494 + }, + { + "epoch": 0.7507355645457889, + "grad_norm": 1.7151613878512588, + "learning_rate": 3.0855616207906793e-06, + "loss": 0.56, + "step": 24495 + }, + { + "epoch": 0.7507662130685301, + "grad_norm": 0.6544369094037259, + "learning_rate": 3.084844544848923e-06, + "loss": 0.5344, + "step": 24496 + }, + { + "epoch": 0.7507968615912713, + "grad_norm": 0.6683471687678778, + "learning_rate": 3.084127537043963e-06, + "loss": 0.5397, + "step": 24497 + }, + { + "epoch": 0.7508275101140125, + "grad_norm": 1.4908327437541513, + "learning_rate": 3.0834105973828566e-06, + "loss": 0.6606, + "step": 24498 + }, + { + "epoch": 0.7508581586367538, + "grad_norm": 1.5235026768848061, + "learning_rate": 3.082693725872682e-06, + "loss": 0.7388, + "step": 24499 + }, + { + "epoch": 0.7508888071594949, + "grad_norm": 1.6318576898320944, + "learning_rate": 3.0819769225204965e-06, + "loss": 0.6857, + "step": 24500 + }, + { + "epoch": 0.7509194556822362, + "grad_norm": 1.5866227686036916, + "learning_rate": 3.0812601873333593e-06, + "loss": 0.5969, + "step": 24501 + }, + { + "epoch": 0.7509501042049773, + "grad_norm": 1.5761681804595278, + "learning_rate": 3.080543520318335e-06, + "loss": 0.5982, + "step": 24502 + }, + { + "epoch": 0.7509807527277185, + "grad_norm": 1.6739229897277714, + "learning_rate": 3.0798269214824893e-06, + "loss": 0.6477, + "step": 24503 + }, + { + "epoch": 0.7510114012504597, + "grad_norm": 1.917238764713813, + "learning_rate": 3.0791103908328766e-06, + "loss": 0.7229, + "step": 24504 + }, + { + "epoch": 0.7510420497732009, + "grad_norm": 1.7010651959072036, + "learning_rate": 3.0783939283765595e-06, + "loss": 0.5876, + "step": 24505 + }, + { + "epoch": 0.7510726982959421, + "grad_norm": 0.7057454662842236, + "learning_rate": 3.077677534120599e-06, + "loss": 0.5445, + "step": 24506 + }, + { + "epoch": 0.7511033468186833, + "grad_norm": 1.8257790563898064, + "learning_rate": 3.0769612080720544e-06, + "loss": 0.7795, + "step": 24507 + }, + { + "epoch": 0.7511339953414246, + "grad_norm": 1.49003380748834, + "learning_rate": 3.076244950237984e-06, + "loss": 0.5836, + "step": 24508 + }, + { + "epoch": 0.7511646438641657, + "grad_norm": 1.7779375005498674, + "learning_rate": 3.075528760625439e-06, + "loss": 0.647, + "step": 24509 + }, + { + "epoch": 0.751195292386907, + "grad_norm": 1.6956824007582498, + "learning_rate": 3.0748126392414823e-06, + "loss": 0.6503, + "step": 24510 + }, + { + "epoch": 0.7512259409096481, + "grad_norm": 1.6015572710818187, + "learning_rate": 3.0740965860931715e-06, + "loss": 0.6615, + "step": 24511 + }, + { + "epoch": 0.7512565894323894, + "grad_norm": 1.7942208038619099, + "learning_rate": 3.0733806011875555e-06, + "loss": 0.6509, + "step": 24512 + }, + { + "epoch": 0.7512872379551305, + "grad_norm": 1.6523569570867211, + "learning_rate": 3.072664684531692e-06, + "loss": 0.6205, + "step": 24513 + }, + { + "epoch": 0.7513178864778718, + "grad_norm": 1.90756341184552, + "learning_rate": 3.0719488361326368e-06, + "loss": 0.7313, + "step": 24514 + }, + { + "epoch": 0.7513485350006129, + "grad_norm": 1.6314604628477947, + "learning_rate": 3.0712330559974445e-06, + "loss": 0.6328, + "step": 24515 + }, + { + "epoch": 0.7513791835233542, + "grad_norm": 1.6423476963258083, + "learning_rate": 3.070517344133167e-06, + "loss": 0.6112, + "step": 24516 + }, + { + "epoch": 0.7514098320460953, + "grad_norm": 1.623191916419964, + "learning_rate": 3.0698017005468483e-06, + "loss": 0.6884, + "step": 24517 + }, + { + "epoch": 0.7514404805688366, + "grad_norm": 0.6520856476507506, + "learning_rate": 3.0690861252455527e-06, + "loss": 0.5299, + "step": 24518 + }, + { + "epoch": 0.7514711290915778, + "grad_norm": 1.6023000582525961, + "learning_rate": 3.068370618236325e-06, + "loss": 0.6071, + "step": 24519 + }, + { + "epoch": 0.751501777614319, + "grad_norm": 1.637537320862198, + "learning_rate": 3.067655179526212e-06, + "loss": 0.7191, + "step": 24520 + }, + { + "epoch": 0.7515324261370602, + "grad_norm": 0.7091184951970919, + "learning_rate": 3.066939809122266e-06, + "loss": 0.5196, + "step": 24521 + }, + { + "epoch": 0.7515630746598014, + "grad_norm": 0.6838019832206298, + "learning_rate": 3.0662245070315355e-06, + "loss": 0.5349, + "step": 24522 + }, + { + "epoch": 0.7515937231825426, + "grad_norm": 1.6928517766344215, + "learning_rate": 3.0655092732610735e-06, + "loss": 0.6392, + "step": 24523 + }, + { + "epoch": 0.7516243717052838, + "grad_norm": 1.5800739892611235, + "learning_rate": 3.064794107817919e-06, + "loss": 0.6134, + "step": 24524 + }, + { + "epoch": 0.751655020228025, + "grad_norm": 0.6611494489331866, + "learning_rate": 3.0640790107091223e-06, + "loss": 0.533, + "step": 24525 + }, + { + "epoch": 0.7516856687507663, + "grad_norm": 1.732968974747023, + "learning_rate": 3.0633639819417336e-06, + "loss": 0.6245, + "step": 24526 + }, + { + "epoch": 0.7517163172735074, + "grad_norm": 1.4980056017405983, + "learning_rate": 3.0626490215227934e-06, + "loss": 0.7495, + "step": 24527 + }, + { + "epoch": 0.7517469657962487, + "grad_norm": 1.753086067097267, + "learning_rate": 3.061934129459342e-06, + "loss": 0.6752, + "step": 24528 + }, + { + "epoch": 0.7517776143189898, + "grad_norm": 0.66222177850576, + "learning_rate": 3.061219305758435e-06, + "loss": 0.528, + "step": 24529 + }, + { + "epoch": 0.7518082628417311, + "grad_norm": 1.6483378018116155, + "learning_rate": 3.060504550427109e-06, + "loss": 0.5464, + "step": 24530 + }, + { + "epoch": 0.7518389113644722, + "grad_norm": 0.6846776282719913, + "learning_rate": 3.0597898634724043e-06, + "loss": 0.5145, + "step": 24531 + }, + { + "epoch": 0.7518695598872135, + "grad_norm": 1.6395669202714651, + "learning_rate": 3.059075244901366e-06, + "loss": 0.5439, + "step": 24532 + }, + { + "epoch": 0.7519002084099546, + "grad_norm": 1.610644826759509, + "learning_rate": 3.0583606947210353e-06, + "loss": 0.6749, + "step": 24533 + }, + { + "epoch": 0.7519308569326958, + "grad_norm": 1.8833286303268482, + "learning_rate": 3.0576462129384554e-06, + "loss": 0.6483, + "step": 24534 + }, + { + "epoch": 0.751961505455437, + "grad_norm": 1.7268005450366792, + "learning_rate": 3.056931799560662e-06, + "loss": 0.6427, + "step": 24535 + }, + { + "epoch": 0.7519921539781782, + "grad_norm": 1.807175135194997, + "learning_rate": 3.0562174545946943e-06, + "loss": 0.7269, + "step": 24536 + }, + { + "epoch": 0.7520228025009195, + "grad_norm": 1.6069264552580946, + "learning_rate": 3.055503178047596e-06, + "loss": 0.6504, + "step": 24537 + }, + { + "epoch": 0.7520534510236606, + "grad_norm": 1.9680115579632682, + "learning_rate": 3.054788969926402e-06, + "loss": 0.6587, + "step": 24538 + }, + { + "epoch": 0.7520840995464019, + "grad_norm": 1.592568316878811, + "learning_rate": 3.054074830238144e-06, + "loss": 0.6413, + "step": 24539 + }, + { + "epoch": 0.752114748069143, + "grad_norm": 1.6016388523812397, + "learning_rate": 3.0533607589898686e-06, + "loss": 0.6984, + "step": 24540 + }, + { + "epoch": 0.7521453965918843, + "grad_norm": 1.8362101975974037, + "learning_rate": 3.0526467561886054e-06, + "loss": 0.6313, + "step": 24541 + }, + { + "epoch": 0.7521760451146254, + "grad_norm": 1.7694860174532219, + "learning_rate": 3.0519328218413933e-06, + "loss": 0.69, + "step": 24542 + }, + { + "epoch": 0.7522066936373667, + "grad_norm": 1.7972615948477773, + "learning_rate": 3.0512189559552617e-06, + "loss": 0.5492, + "step": 24543 + }, + { + "epoch": 0.7522373421601078, + "grad_norm": 1.5366130697965954, + "learning_rate": 3.050505158537248e-06, + "loss": 0.572, + "step": 24544 + }, + { + "epoch": 0.7522679906828491, + "grad_norm": 1.608381786538457, + "learning_rate": 3.0497914295943877e-06, + "loss": 0.7408, + "step": 24545 + }, + { + "epoch": 0.7522986392055903, + "grad_norm": 0.6983590232793949, + "learning_rate": 3.0490777691337083e-06, + "loss": 0.5291, + "step": 24546 + }, + { + "epoch": 0.7523292877283315, + "grad_norm": 1.766294065630452, + "learning_rate": 3.048364177162244e-06, + "loss": 0.6723, + "step": 24547 + }, + { + "epoch": 0.7523599362510727, + "grad_norm": 1.4804855290609376, + "learning_rate": 3.0476506536870285e-06, + "loss": 0.6248, + "step": 24548 + }, + { + "epoch": 0.7523905847738139, + "grad_norm": 1.7843782490479339, + "learning_rate": 3.0469371987150877e-06, + "loss": 0.7116, + "step": 24549 + }, + { + "epoch": 0.7524212332965551, + "grad_norm": 1.5647796366418192, + "learning_rate": 3.0462238122534536e-06, + "loss": 0.6659, + "step": 24550 + }, + { + "epoch": 0.7524518818192963, + "grad_norm": 1.6310034151724269, + "learning_rate": 3.0455104943091586e-06, + "loss": 0.655, + "step": 24551 + }, + { + "epoch": 0.7524825303420375, + "grad_norm": 1.5686690549179205, + "learning_rate": 3.0447972448892247e-06, + "loss": 0.6081, + "step": 24552 + }, + { + "epoch": 0.7525131788647788, + "grad_norm": 1.6110191600043131, + "learning_rate": 3.0440840640006865e-06, + "loss": 0.6221, + "step": 24553 + }, + { + "epoch": 0.7525438273875199, + "grad_norm": 1.8967914930222831, + "learning_rate": 3.043370951650564e-06, + "loss": 0.7468, + "step": 24554 + }, + { + "epoch": 0.7525744759102612, + "grad_norm": 1.8359655708883496, + "learning_rate": 3.0426579078458886e-06, + "loss": 0.6747, + "step": 24555 + }, + { + "epoch": 0.7526051244330023, + "grad_norm": 1.58001400001336, + "learning_rate": 3.0419449325936866e-06, + "loss": 0.6239, + "step": 24556 + }, + { + "epoch": 0.7526357729557436, + "grad_norm": 1.7915177515996776, + "learning_rate": 3.04123202590098e-06, + "loss": 0.711, + "step": 24557 + }, + { + "epoch": 0.7526664214784847, + "grad_norm": 1.6797937013609527, + "learning_rate": 3.040519187774793e-06, + "loss": 0.6253, + "step": 24558 + }, + { + "epoch": 0.752697070001226, + "grad_norm": 0.6393440358374485, + "learning_rate": 3.0398064182221554e-06, + "loss": 0.5075, + "step": 24559 + }, + { + "epoch": 0.7527277185239671, + "grad_norm": 0.7011976030378184, + "learning_rate": 3.0390937172500825e-06, + "loss": 0.5529, + "step": 24560 + }, + { + "epoch": 0.7527583670467084, + "grad_norm": 1.555571386087484, + "learning_rate": 3.0383810848656037e-06, + "loss": 0.6622, + "step": 24561 + }, + { + "epoch": 0.7527890155694495, + "grad_norm": 0.6809129944329033, + "learning_rate": 3.037668521075734e-06, + "loss": 0.5379, + "step": 24562 + }, + { + "epoch": 0.7528196640921908, + "grad_norm": 1.7141039022001647, + "learning_rate": 3.0369560258874973e-06, + "loss": 0.7013, + "step": 24563 + }, + { + "epoch": 0.752850312614932, + "grad_norm": 0.666024509675446, + "learning_rate": 3.0362435993079178e-06, + "loss": 0.5092, + "step": 24564 + }, + { + "epoch": 0.7528809611376731, + "grad_norm": 1.726761299105222, + "learning_rate": 3.0355312413440086e-06, + "loss": 0.6116, + "step": 24565 + }, + { + "epoch": 0.7529116096604144, + "grad_norm": 1.55582463517085, + "learning_rate": 3.0348189520027925e-06, + "loss": 0.602, + "step": 24566 + }, + { + "epoch": 0.7529422581831555, + "grad_norm": 0.6768159968235796, + "learning_rate": 3.03410673129129e-06, + "loss": 0.5189, + "step": 24567 + }, + { + "epoch": 0.7529729067058968, + "grad_norm": 1.7239833245589187, + "learning_rate": 3.033394579216513e-06, + "loss": 0.7034, + "step": 24568 + }, + { + "epoch": 0.7530035552286379, + "grad_norm": 1.6839882455664936, + "learning_rate": 3.0326824957854815e-06, + "loss": 0.4681, + "step": 24569 + }, + { + "epoch": 0.7530342037513792, + "grad_norm": 1.5853249936956129, + "learning_rate": 3.0319704810052164e-06, + "loss": 0.6768, + "step": 24570 + }, + { + "epoch": 0.7530648522741203, + "grad_norm": 1.6070966566950535, + "learning_rate": 3.031258534882725e-06, + "loss": 0.6259, + "step": 24571 + }, + { + "epoch": 0.7530955007968616, + "grad_norm": 1.8356628183837045, + "learning_rate": 3.03054665742503e-06, + "loss": 0.6606, + "step": 24572 + }, + { + "epoch": 0.7531261493196028, + "grad_norm": 1.9369587836288613, + "learning_rate": 3.0298348486391384e-06, + "loss": 0.6845, + "step": 24573 + }, + { + "epoch": 0.753156797842344, + "grad_norm": 1.7492407335958993, + "learning_rate": 3.029123108532067e-06, + "loss": 0.6811, + "step": 24574 + }, + { + "epoch": 0.7531874463650852, + "grad_norm": 1.8602692690324127, + "learning_rate": 3.028411437110833e-06, + "loss": 0.6889, + "step": 24575 + }, + { + "epoch": 0.7532180948878264, + "grad_norm": 1.5714610633311317, + "learning_rate": 3.0276998343824416e-06, + "loss": 0.6031, + "step": 24576 + }, + { + "epoch": 0.7532487434105676, + "grad_norm": 1.864259820537621, + "learning_rate": 3.0269883003539068e-06, + "loss": 0.6418, + "step": 24577 + }, + { + "epoch": 0.7532793919333088, + "grad_norm": 1.6002918237104777, + "learning_rate": 3.0262768350322445e-06, + "loss": 0.6688, + "step": 24578 + }, + { + "epoch": 0.75331004045605, + "grad_norm": 1.8333271021040896, + "learning_rate": 3.025565438424458e-06, + "loss": 0.7153, + "step": 24579 + }, + { + "epoch": 0.7533406889787913, + "grad_norm": 1.8711419464271943, + "learning_rate": 3.0248541105375595e-06, + "loss": 0.6962, + "step": 24580 + }, + { + "epoch": 0.7533713375015324, + "grad_norm": 1.4853351033352526, + "learning_rate": 3.0241428513785607e-06, + "loss": 0.6021, + "step": 24581 + }, + { + "epoch": 0.7534019860242737, + "grad_norm": 1.5700246553716057, + "learning_rate": 3.0234316609544645e-06, + "loss": 0.6153, + "step": 24582 + }, + { + "epoch": 0.7534326345470148, + "grad_norm": 1.5038718908047835, + "learning_rate": 3.0227205392722838e-06, + "loss": 0.6426, + "step": 24583 + }, + { + "epoch": 0.7534632830697561, + "grad_norm": 1.5828218877584224, + "learning_rate": 3.02200948633902e-06, + "loss": 0.6169, + "step": 24584 + }, + { + "epoch": 0.7534939315924972, + "grad_norm": 1.6868800876704715, + "learning_rate": 3.0212985021616825e-06, + "loss": 0.6341, + "step": 24585 + }, + { + "epoch": 0.7535245801152385, + "grad_norm": 1.7257097857346029, + "learning_rate": 3.0205875867472787e-06, + "loss": 0.6252, + "step": 24586 + }, + { + "epoch": 0.7535552286379796, + "grad_norm": 1.7447039177197086, + "learning_rate": 3.0198767401028095e-06, + "loss": 0.6, + "step": 24587 + }, + { + "epoch": 0.7535858771607209, + "grad_norm": 1.77427892874093, + "learning_rate": 3.0191659622352797e-06, + "loss": 0.6388, + "step": 24588 + }, + { + "epoch": 0.753616525683462, + "grad_norm": 1.6405543659993758, + "learning_rate": 3.0184552531516966e-06, + "loss": 0.6392, + "step": 24589 + }, + { + "epoch": 0.7536471742062033, + "grad_norm": 1.585750740744802, + "learning_rate": 3.0177446128590582e-06, + "loss": 0.6575, + "step": 24590 + }, + { + "epoch": 0.7536778227289445, + "grad_norm": 1.785382399806194, + "learning_rate": 3.017034041364367e-06, + "loss": 0.6799, + "step": 24591 + }, + { + "epoch": 0.7537084712516857, + "grad_norm": 1.582177561190538, + "learning_rate": 3.0163235386746303e-06, + "loss": 0.5782, + "step": 24592 + }, + { + "epoch": 0.7537391197744269, + "grad_norm": 1.909645321972486, + "learning_rate": 3.0156131047968417e-06, + "loss": 0.7351, + "step": 24593 + }, + { + "epoch": 0.7537697682971681, + "grad_norm": 1.6185317722293964, + "learning_rate": 3.014902739738007e-06, + "loss": 0.6182, + "step": 24594 + }, + { + "epoch": 0.7538004168199093, + "grad_norm": 0.6774069008677374, + "learning_rate": 3.0141924435051163e-06, + "loss": 0.5289, + "step": 24595 + }, + { + "epoch": 0.7538310653426504, + "grad_norm": 1.4339156172953325, + "learning_rate": 3.0134822161051815e-06, + "loss": 0.6241, + "step": 24596 + }, + { + "epoch": 0.7538617138653917, + "grad_norm": 0.6655880015154684, + "learning_rate": 3.0127720575451935e-06, + "loss": 0.5274, + "step": 24597 + }, + { + "epoch": 0.7538923623881328, + "grad_norm": 1.4383435246641574, + "learning_rate": 3.0120619678321473e-06, + "loss": 0.5346, + "step": 24598 + }, + { + "epoch": 0.7539230109108741, + "grad_norm": 1.6686708130363033, + "learning_rate": 3.0113519469730435e-06, + "loss": 0.5083, + "step": 24599 + }, + { + "epoch": 0.7539536594336153, + "grad_norm": 1.7114798435312102, + "learning_rate": 3.0106419949748787e-06, + "loss": 0.6888, + "step": 24600 + }, + { + "epoch": 0.7539843079563565, + "grad_norm": 1.77103100590262, + "learning_rate": 3.0099321118446446e-06, + "loss": 0.716, + "step": 24601 + }, + { + "epoch": 0.7540149564790977, + "grad_norm": 1.6814115694506881, + "learning_rate": 3.009222297589337e-06, + "loss": 0.7298, + "step": 24602 + }, + { + "epoch": 0.7540456050018389, + "grad_norm": 0.6502311056741953, + "learning_rate": 3.008512552215951e-06, + "loss": 0.5268, + "step": 24603 + }, + { + "epoch": 0.7540762535245801, + "grad_norm": 1.7919434688117502, + "learning_rate": 3.0078028757314826e-06, + "loss": 0.677, + "step": 24604 + }, + { + "epoch": 0.7541069020473213, + "grad_norm": 1.6932981791533441, + "learning_rate": 3.007093268142922e-06, + "loss": 0.631, + "step": 24605 + }, + { + "epoch": 0.7541375505700625, + "grad_norm": 1.7235397907280416, + "learning_rate": 3.0063837294572575e-06, + "loss": 0.6076, + "step": 24606 + }, + { + "epoch": 0.7541681990928037, + "grad_norm": 1.7700723431677443, + "learning_rate": 3.0056742596814835e-06, + "loss": 0.7062, + "step": 24607 + }, + { + "epoch": 0.7541988476155449, + "grad_norm": 1.6568338345441627, + "learning_rate": 3.0049648588225933e-06, + "loss": 0.6314, + "step": 24608 + }, + { + "epoch": 0.7542294961382862, + "grad_norm": 1.5172478758247738, + "learning_rate": 3.0042555268875715e-06, + "loss": 0.6576, + "step": 24609 + }, + { + "epoch": 0.7542601446610273, + "grad_norm": 1.6814714960724968, + "learning_rate": 3.00354626388341e-06, + "loss": 0.69, + "step": 24610 + }, + { + "epoch": 0.7542907931837686, + "grad_norm": 1.6491359118962325, + "learning_rate": 3.0028370698170996e-06, + "loss": 0.7166, + "step": 24611 + }, + { + "epoch": 0.7543214417065097, + "grad_norm": 1.6403437843923416, + "learning_rate": 3.002127944695623e-06, + "loss": 0.6468, + "step": 24612 + }, + { + "epoch": 0.754352090229251, + "grad_norm": 1.726512192777365, + "learning_rate": 3.001418888525974e-06, + "loss": 0.723, + "step": 24613 + }, + { + "epoch": 0.7543827387519921, + "grad_norm": 1.5022978556813251, + "learning_rate": 3.0007099013151286e-06, + "loss": 0.6155, + "step": 24614 + }, + { + "epoch": 0.7544133872747334, + "grad_norm": 1.6135180731102614, + "learning_rate": 3.000000983070086e-06, + "loss": 0.7355, + "step": 24615 + }, + { + "epoch": 0.7544440357974745, + "grad_norm": 1.6213305989362832, + "learning_rate": 2.999292133797824e-06, + "loss": 0.5816, + "step": 24616 + }, + { + "epoch": 0.7544746843202158, + "grad_norm": 1.7605565971284869, + "learning_rate": 2.9985833535053255e-06, + "loss": 0.6889, + "step": 24617 + }, + { + "epoch": 0.754505332842957, + "grad_norm": 1.6090661475922625, + "learning_rate": 2.9978746421995765e-06, + "loss": 0.6346, + "step": 24618 + }, + { + "epoch": 0.7545359813656982, + "grad_norm": 1.5619153781777801, + "learning_rate": 2.9971659998875625e-06, + "loss": 0.6122, + "step": 24619 + }, + { + "epoch": 0.7545666298884394, + "grad_norm": 1.666778369333626, + "learning_rate": 2.9964574265762615e-06, + "loss": 0.665, + "step": 24620 + }, + { + "epoch": 0.7545972784111806, + "grad_norm": 1.5404740050842038, + "learning_rate": 2.9957489222726567e-06, + "loss": 0.6707, + "step": 24621 + }, + { + "epoch": 0.7546279269339218, + "grad_norm": 1.6338817530324383, + "learning_rate": 2.9950404869837303e-06, + "loss": 0.7387, + "step": 24622 + }, + { + "epoch": 0.754658575456663, + "grad_norm": 1.652408001392591, + "learning_rate": 2.9943321207164657e-06, + "loss": 0.6328, + "step": 24623 + }, + { + "epoch": 0.7546892239794042, + "grad_norm": 1.5985372541355425, + "learning_rate": 2.9936238234778394e-06, + "loss": 0.6036, + "step": 24624 + }, + { + "epoch": 0.7547198725021455, + "grad_norm": 1.5789423753282616, + "learning_rate": 2.9929155952748225e-06, + "loss": 0.5865, + "step": 24625 + }, + { + "epoch": 0.7547505210248866, + "grad_norm": 1.59939245783832, + "learning_rate": 2.992207436114408e-06, + "loss": 0.6457, + "step": 24626 + }, + { + "epoch": 0.7547811695476278, + "grad_norm": 1.7677600444524688, + "learning_rate": 2.991499346003567e-06, + "loss": 0.6857, + "step": 24627 + }, + { + "epoch": 0.754811818070369, + "grad_norm": 0.6773351339515816, + "learning_rate": 2.9907913249492737e-06, + "loss": 0.5068, + "step": 24628 + }, + { + "epoch": 0.7548424665931102, + "grad_norm": 1.6242191371611605, + "learning_rate": 2.9900833729585057e-06, + "loss": 0.6219, + "step": 24629 + }, + { + "epoch": 0.7548731151158514, + "grad_norm": 1.731763970827972, + "learning_rate": 2.98937549003824e-06, + "loss": 0.802, + "step": 24630 + }, + { + "epoch": 0.7549037636385926, + "grad_norm": 1.8400205928751312, + "learning_rate": 2.988667676195455e-06, + "loss": 0.6742, + "step": 24631 + }, + { + "epoch": 0.7549344121613338, + "grad_norm": 1.5229842063311796, + "learning_rate": 2.9879599314371177e-06, + "loss": 0.5573, + "step": 24632 + }, + { + "epoch": 0.754965060684075, + "grad_norm": 0.6882376483719618, + "learning_rate": 2.9872522557702057e-06, + "loss": 0.5141, + "step": 24633 + }, + { + "epoch": 0.7549957092068162, + "grad_norm": 1.6163408739864684, + "learning_rate": 2.9865446492016936e-06, + "loss": 0.6642, + "step": 24634 + }, + { + "epoch": 0.7550263577295574, + "grad_norm": 0.6577246513003921, + "learning_rate": 2.9858371117385533e-06, + "loss": 0.533, + "step": 24635 + }, + { + "epoch": 0.7550570062522987, + "grad_norm": 1.7622437440919394, + "learning_rate": 2.9851296433877464e-06, + "loss": 0.6596, + "step": 24636 + }, + { + "epoch": 0.7550876547750398, + "grad_norm": 1.8112166147267674, + "learning_rate": 2.9844222441562598e-06, + "loss": 0.5346, + "step": 24637 + }, + { + "epoch": 0.7551183032977811, + "grad_norm": 1.5549041158860113, + "learning_rate": 2.9837149140510545e-06, + "loss": 0.6349, + "step": 24638 + }, + { + "epoch": 0.7551489518205222, + "grad_norm": 1.8414101099224853, + "learning_rate": 2.9830076530790995e-06, + "loss": 0.6669, + "step": 24639 + }, + { + "epoch": 0.7551796003432635, + "grad_norm": 1.7752933702423008, + "learning_rate": 2.982300461247365e-06, + "loss": 0.6426, + "step": 24640 + }, + { + "epoch": 0.7552102488660046, + "grad_norm": 1.6076795784664966, + "learning_rate": 2.9815933385628192e-06, + "loss": 0.7514, + "step": 24641 + }, + { + "epoch": 0.7552408973887459, + "grad_norm": 1.8035457121134202, + "learning_rate": 2.9808862850324327e-06, + "loss": 0.6819, + "step": 24642 + }, + { + "epoch": 0.755271545911487, + "grad_norm": 1.9724631530525325, + "learning_rate": 2.9801793006631672e-06, + "loss": 0.6754, + "step": 24643 + }, + { + "epoch": 0.7553021944342283, + "grad_norm": 1.687930131542777, + "learning_rate": 2.979472385461991e-06, + "loss": 0.7056, + "step": 24644 + }, + { + "epoch": 0.7553328429569695, + "grad_norm": 1.6156294263919329, + "learning_rate": 2.978765539435874e-06, + "loss": 0.7083, + "step": 24645 + }, + { + "epoch": 0.7553634914797107, + "grad_norm": 1.7331282995449082, + "learning_rate": 2.9780587625917745e-06, + "loss": 0.6885, + "step": 24646 + }, + { + "epoch": 0.7553941400024519, + "grad_norm": 1.587644131325501, + "learning_rate": 2.977352054936654e-06, + "loss": 0.6561, + "step": 24647 + }, + { + "epoch": 0.7554247885251931, + "grad_norm": 1.8127269396466459, + "learning_rate": 2.976645416477486e-06, + "loss": 0.6341, + "step": 24648 + }, + { + "epoch": 0.7554554370479343, + "grad_norm": 1.7264004851907542, + "learning_rate": 2.975938847221225e-06, + "loss": 0.6831, + "step": 24649 + }, + { + "epoch": 0.7554860855706755, + "grad_norm": 1.6875913895286887, + "learning_rate": 2.975232347174838e-06, + "loss": 0.5966, + "step": 24650 + }, + { + "epoch": 0.7555167340934167, + "grad_norm": 0.6930194871218002, + "learning_rate": 2.974525916345281e-06, + "loss": 0.5552, + "step": 24651 + }, + { + "epoch": 0.755547382616158, + "grad_norm": 1.5886140371318993, + "learning_rate": 2.973819554739519e-06, + "loss": 0.6328, + "step": 24652 + }, + { + "epoch": 0.7555780311388991, + "grad_norm": 1.5760083256636408, + "learning_rate": 2.973113262364513e-06, + "loss": 0.6588, + "step": 24653 + }, + { + "epoch": 0.7556086796616404, + "grad_norm": 1.5938605981479312, + "learning_rate": 2.9724070392272165e-06, + "loss": 0.6508, + "step": 24654 + }, + { + "epoch": 0.7556393281843815, + "grad_norm": 1.681937965743917, + "learning_rate": 2.971700885334592e-06, + "loss": 0.7465, + "step": 24655 + }, + { + "epoch": 0.7556699767071228, + "grad_norm": 1.6289858437949432, + "learning_rate": 2.9709948006935995e-06, + "loss": 0.6335, + "step": 24656 + }, + { + "epoch": 0.7557006252298639, + "grad_norm": 1.5136385553726233, + "learning_rate": 2.9702887853111906e-06, + "loss": 0.6362, + "step": 24657 + }, + { + "epoch": 0.7557312737526051, + "grad_norm": 1.3875764266774695, + "learning_rate": 2.969582839194328e-06, + "loss": 0.57, + "step": 24658 + }, + { + "epoch": 0.7557619222753463, + "grad_norm": 1.7912374845237347, + "learning_rate": 2.9688769623499624e-06, + "loss": 0.6584, + "step": 24659 + }, + { + "epoch": 0.7557925707980875, + "grad_norm": 1.4499812905215936, + "learning_rate": 2.96817115478505e-06, + "loss": 0.6935, + "step": 24660 + }, + { + "epoch": 0.7558232193208287, + "grad_norm": 1.507464313016373, + "learning_rate": 2.967465416506551e-06, + "loss": 0.598, + "step": 24661 + }, + { + "epoch": 0.7558538678435699, + "grad_norm": 1.564123949624506, + "learning_rate": 2.9667597475214092e-06, + "loss": 0.6568, + "step": 24662 + }, + { + "epoch": 0.7558845163663112, + "grad_norm": 0.6524984920897421, + "learning_rate": 2.9660541478365844e-06, + "loss": 0.5252, + "step": 24663 + }, + { + "epoch": 0.7559151648890523, + "grad_norm": 1.4058276247627768, + "learning_rate": 2.965348617459032e-06, + "loss": 0.5918, + "step": 24664 + }, + { + "epoch": 0.7559458134117936, + "grad_norm": 1.3819769107284663, + "learning_rate": 2.964643156395698e-06, + "loss": 0.6216, + "step": 24665 + }, + { + "epoch": 0.7559764619345347, + "grad_norm": 1.6845440502121123, + "learning_rate": 2.9639377646535294e-06, + "loss": 0.5858, + "step": 24666 + }, + { + "epoch": 0.756007110457276, + "grad_norm": 1.574731920114349, + "learning_rate": 2.9632324422394876e-06, + "loss": 0.6481, + "step": 24667 + }, + { + "epoch": 0.7560377589800171, + "grad_norm": 0.6482659968948081, + "learning_rate": 2.9625271891605147e-06, + "loss": 0.5215, + "step": 24668 + }, + { + "epoch": 0.7560684075027584, + "grad_norm": 0.6763968898330789, + "learning_rate": 2.961822005423566e-06, + "loss": 0.5415, + "step": 24669 + }, + { + "epoch": 0.7560990560254995, + "grad_norm": 1.766791889578018, + "learning_rate": 2.9611168910355816e-06, + "loss": 0.6258, + "step": 24670 + }, + { + "epoch": 0.7561297045482408, + "grad_norm": 0.6652914227286535, + "learning_rate": 2.9604118460035135e-06, + "loss": 0.5206, + "step": 24671 + }, + { + "epoch": 0.756160353070982, + "grad_norm": 0.6697836190488979, + "learning_rate": 2.959706870334311e-06, + "loss": 0.5404, + "step": 24672 + }, + { + "epoch": 0.7561910015937232, + "grad_norm": 1.6095524030291197, + "learning_rate": 2.9590019640349167e-06, + "loss": 0.7143, + "step": 24673 + }, + { + "epoch": 0.7562216501164644, + "grad_norm": 1.755958186606886, + "learning_rate": 2.9582971271122763e-06, + "loss": 0.6813, + "step": 24674 + }, + { + "epoch": 0.7562522986392056, + "grad_norm": 0.6620998479495056, + "learning_rate": 2.957592359573339e-06, + "loss": 0.5228, + "step": 24675 + }, + { + "epoch": 0.7562829471619468, + "grad_norm": 1.6986386621388143, + "learning_rate": 2.956887661425044e-06, + "loss": 0.6451, + "step": 24676 + }, + { + "epoch": 0.756313595684688, + "grad_norm": 1.5512398797630758, + "learning_rate": 2.956183032674337e-06, + "loss": 0.6211, + "step": 24677 + }, + { + "epoch": 0.7563442442074292, + "grad_norm": 1.599604253348455, + "learning_rate": 2.9554784733281627e-06, + "loss": 0.5905, + "step": 24678 + }, + { + "epoch": 0.7563748927301704, + "grad_norm": 1.602242757288758, + "learning_rate": 2.9547739833934585e-06, + "loss": 0.6561, + "step": 24679 + }, + { + "epoch": 0.7564055412529116, + "grad_norm": 1.6468651228215074, + "learning_rate": 2.954069562877173e-06, + "loss": 0.6337, + "step": 24680 + }, + { + "epoch": 0.7564361897756529, + "grad_norm": 1.4873259883648287, + "learning_rate": 2.953365211786239e-06, + "loss": 0.6882, + "step": 24681 + }, + { + "epoch": 0.756466838298394, + "grad_norm": 1.5276077214563588, + "learning_rate": 2.9526609301276e-06, + "loss": 0.7029, + "step": 24682 + }, + { + "epoch": 0.7564974868211353, + "grad_norm": 1.650618112763276, + "learning_rate": 2.9519567179082e-06, + "loss": 0.6434, + "step": 24683 + }, + { + "epoch": 0.7565281353438764, + "grad_norm": 1.6187555644708342, + "learning_rate": 2.951252575134971e-06, + "loss": 0.5589, + "step": 24684 + }, + { + "epoch": 0.7565587838666177, + "grad_norm": 1.55944332691538, + "learning_rate": 2.950548501814853e-06, + "loss": 0.6117, + "step": 24685 + }, + { + "epoch": 0.7565894323893588, + "grad_norm": 0.6914157354933044, + "learning_rate": 2.949844497954788e-06, + "loss": 0.5539, + "step": 24686 + }, + { + "epoch": 0.7566200809121001, + "grad_norm": 1.5243243361818901, + "learning_rate": 2.9491405635617054e-06, + "loss": 0.6152, + "step": 24687 + }, + { + "epoch": 0.7566507294348412, + "grad_norm": 1.5321641710291567, + "learning_rate": 2.948436698642545e-06, + "loss": 0.7371, + "step": 24688 + }, + { + "epoch": 0.7566813779575824, + "grad_norm": 1.7576835520546752, + "learning_rate": 2.9477329032042457e-06, + "loss": 0.7292, + "step": 24689 + }, + { + "epoch": 0.7567120264803237, + "grad_norm": 1.849287731452297, + "learning_rate": 2.947029177253734e-06, + "loss": 0.8283, + "step": 24690 + }, + { + "epoch": 0.7567426750030648, + "grad_norm": 0.6723568146199803, + "learning_rate": 2.9463255207979524e-06, + "loss": 0.532, + "step": 24691 + }, + { + "epoch": 0.7567733235258061, + "grad_norm": 1.7359078779456925, + "learning_rate": 2.9456219338438274e-06, + "loss": 0.6883, + "step": 24692 + }, + { + "epoch": 0.7568039720485472, + "grad_norm": 1.6500273418616642, + "learning_rate": 2.9449184163982946e-06, + "loss": 0.6635, + "step": 24693 + }, + { + "epoch": 0.7568346205712885, + "grad_norm": 0.6820052118597493, + "learning_rate": 2.9442149684682887e-06, + "loss": 0.5244, + "step": 24694 + }, + { + "epoch": 0.7568652690940296, + "grad_norm": 1.5466945713727582, + "learning_rate": 2.9435115900607337e-06, + "loss": 0.6929, + "step": 24695 + }, + { + "epoch": 0.7568959176167709, + "grad_norm": 1.540496454995149, + "learning_rate": 2.9428082811825655e-06, + "loss": 0.664, + "step": 24696 + }, + { + "epoch": 0.756926566139512, + "grad_norm": 1.9144392498612361, + "learning_rate": 2.942105041840716e-06, + "loss": 0.6534, + "step": 24697 + }, + { + "epoch": 0.7569572146622533, + "grad_norm": 1.652205661131341, + "learning_rate": 2.941401872042109e-06, + "loss": 0.6467, + "step": 24698 + }, + { + "epoch": 0.7569878631849944, + "grad_norm": 1.640936674284511, + "learning_rate": 2.940698771793674e-06, + "loss": 0.6588, + "step": 24699 + }, + { + "epoch": 0.7570185117077357, + "grad_norm": 1.5822285769328643, + "learning_rate": 2.939995741102344e-06, + "loss": 0.6604, + "step": 24700 + }, + { + "epoch": 0.7570491602304769, + "grad_norm": 1.7424081800233127, + "learning_rate": 2.9392927799750392e-06, + "loss": 0.733, + "step": 24701 + }, + { + "epoch": 0.7570798087532181, + "grad_norm": 1.5956990474112798, + "learning_rate": 2.9385898884186925e-06, + "loss": 0.5711, + "step": 24702 + }, + { + "epoch": 0.7571104572759593, + "grad_norm": 1.7370733576467081, + "learning_rate": 2.9378870664402227e-06, + "loss": 0.627, + "step": 24703 + }, + { + "epoch": 0.7571411057987005, + "grad_norm": 1.5314337158280218, + "learning_rate": 2.9371843140465594e-06, + "loss": 0.6104, + "step": 24704 + }, + { + "epoch": 0.7571717543214417, + "grad_norm": 1.8611093628252198, + "learning_rate": 2.9364816312446288e-06, + "loss": 0.7028, + "step": 24705 + }, + { + "epoch": 0.7572024028441829, + "grad_norm": 1.695384547943354, + "learning_rate": 2.935779018041348e-06, + "loss": 0.6129, + "step": 24706 + }, + { + "epoch": 0.7572330513669241, + "grad_norm": 1.8896134429650329, + "learning_rate": 2.9350764744436454e-06, + "loss": 0.6454, + "step": 24707 + }, + { + "epoch": 0.7572636998896654, + "grad_norm": 1.527754655506608, + "learning_rate": 2.9343740004584442e-06, + "loss": 0.7682, + "step": 24708 + }, + { + "epoch": 0.7572943484124065, + "grad_norm": 1.5527422940602924, + "learning_rate": 2.933671596092661e-06, + "loss": 0.62, + "step": 24709 + }, + { + "epoch": 0.7573249969351478, + "grad_norm": 1.9442249864914245, + "learning_rate": 2.9329692613532224e-06, + "loss": 0.7467, + "step": 24710 + }, + { + "epoch": 0.7573556454578889, + "grad_norm": 0.6286380123798799, + "learning_rate": 2.932266996247043e-06, + "loss": 0.5044, + "step": 24711 + }, + { + "epoch": 0.7573862939806302, + "grad_norm": 1.4250597755209335, + "learning_rate": 2.931564800781045e-06, + "loss": 0.5445, + "step": 24712 + }, + { + "epoch": 0.7574169425033713, + "grad_norm": 1.7042840746970553, + "learning_rate": 2.9308626749621503e-06, + "loss": 0.7032, + "step": 24713 + }, + { + "epoch": 0.7574475910261126, + "grad_norm": 1.8421785994602098, + "learning_rate": 2.930160618797272e-06, + "loss": 0.6694, + "step": 24714 + }, + { + "epoch": 0.7574782395488537, + "grad_norm": 1.839413845051987, + "learning_rate": 2.9294586322933304e-06, + "loss": 0.664, + "step": 24715 + }, + { + "epoch": 0.757508888071595, + "grad_norm": 1.5862671704367768, + "learning_rate": 2.928756715457245e-06, + "loss": 0.6587, + "step": 24716 + }, + { + "epoch": 0.7575395365943361, + "grad_norm": 1.6987673623742372, + "learning_rate": 2.9280548682959242e-06, + "loss": 0.6559, + "step": 24717 + }, + { + "epoch": 0.7575701851170774, + "grad_norm": 1.7617252372226087, + "learning_rate": 2.9273530908162895e-06, + "loss": 0.6543, + "step": 24718 + }, + { + "epoch": 0.7576008336398186, + "grad_norm": 1.5966738337323694, + "learning_rate": 2.9266513830252575e-06, + "loss": 0.6422, + "step": 24719 + }, + { + "epoch": 0.7576314821625597, + "grad_norm": 1.764735722165414, + "learning_rate": 2.925949744929736e-06, + "loss": 0.7024, + "step": 24720 + }, + { + "epoch": 0.757662130685301, + "grad_norm": 1.6116971961355266, + "learning_rate": 2.9252481765366447e-06, + "loss": 0.598, + "step": 24721 + }, + { + "epoch": 0.7576927792080421, + "grad_norm": 1.7661436047008385, + "learning_rate": 2.9245466778528876e-06, + "loss": 0.7026, + "step": 24722 + }, + { + "epoch": 0.7577234277307834, + "grad_norm": 1.4999999292919814, + "learning_rate": 2.9238452488853875e-06, + "loss": 0.6411, + "step": 24723 + }, + { + "epoch": 0.7577540762535245, + "grad_norm": 1.6481420054085454, + "learning_rate": 2.9231438896410514e-06, + "loss": 0.6603, + "step": 24724 + }, + { + "epoch": 0.7577847247762658, + "grad_norm": 1.7211873032635463, + "learning_rate": 2.9224426001267857e-06, + "loss": 0.6584, + "step": 24725 + }, + { + "epoch": 0.757815373299007, + "grad_norm": 1.9426694170415957, + "learning_rate": 2.9217413803495054e-06, + "loss": 0.7563, + "step": 24726 + }, + { + "epoch": 0.7578460218217482, + "grad_norm": 1.4551529818258886, + "learning_rate": 2.92104023031612e-06, + "loss": 0.71, + "step": 24727 + }, + { + "epoch": 0.7578766703444894, + "grad_norm": 0.6632129054557244, + "learning_rate": 2.9203391500335333e-06, + "loss": 0.503, + "step": 24728 + }, + { + "epoch": 0.7579073188672306, + "grad_norm": 1.684048555472227, + "learning_rate": 2.919638139508657e-06, + "loss": 0.6656, + "step": 24729 + }, + { + "epoch": 0.7579379673899718, + "grad_norm": 1.6456645877850642, + "learning_rate": 2.9189371987483974e-06, + "loss": 0.7314, + "step": 24730 + }, + { + "epoch": 0.757968615912713, + "grad_norm": 1.6632832847991579, + "learning_rate": 2.9182363277596636e-06, + "loss": 0.6737, + "step": 24731 + }, + { + "epoch": 0.7579992644354542, + "grad_norm": 1.5926520847217185, + "learning_rate": 2.917535526549361e-06, + "loss": 0.5953, + "step": 24732 + }, + { + "epoch": 0.7580299129581954, + "grad_norm": 1.5341737126260395, + "learning_rate": 2.916834795124386e-06, + "loss": 0.7927, + "step": 24733 + }, + { + "epoch": 0.7580605614809366, + "grad_norm": 1.7298968896551585, + "learning_rate": 2.916134133491656e-06, + "loss": 0.6582, + "step": 24734 + }, + { + "epoch": 0.7580912100036779, + "grad_norm": 1.7686558830009051, + "learning_rate": 2.915433541658068e-06, + "loss": 0.6799, + "step": 24735 + }, + { + "epoch": 0.758121858526419, + "grad_norm": 2.124073528585819, + "learning_rate": 2.9147330196305244e-06, + "loss": 0.6853, + "step": 24736 + }, + { + "epoch": 0.7581525070491603, + "grad_norm": 1.533741162498436, + "learning_rate": 2.914032567415929e-06, + "loss": 0.6907, + "step": 24737 + }, + { + "epoch": 0.7581831555719014, + "grad_norm": 0.6822401156317597, + "learning_rate": 2.913332185021186e-06, + "loss": 0.5459, + "step": 24738 + }, + { + "epoch": 0.7582138040946427, + "grad_norm": 0.6615241810776789, + "learning_rate": 2.912631872453192e-06, + "loss": 0.519, + "step": 24739 + }, + { + "epoch": 0.7582444526173838, + "grad_norm": 0.658589420174802, + "learning_rate": 2.911931629718849e-06, + "loss": 0.5165, + "step": 24740 + }, + { + "epoch": 0.7582751011401251, + "grad_norm": 1.653012484968295, + "learning_rate": 2.9112314568250566e-06, + "loss": 0.6072, + "step": 24741 + }, + { + "epoch": 0.7583057496628662, + "grad_norm": 1.5836302057964518, + "learning_rate": 2.9105313537787185e-06, + "loss": 0.5813, + "step": 24742 + }, + { + "epoch": 0.7583363981856075, + "grad_norm": 0.6898589506574383, + "learning_rate": 2.909831320586729e-06, + "loss": 0.5234, + "step": 24743 + }, + { + "epoch": 0.7583670467083486, + "grad_norm": 1.6224000795094373, + "learning_rate": 2.909131357255979e-06, + "loss": 0.6967, + "step": 24744 + }, + { + "epoch": 0.7583976952310899, + "grad_norm": 1.6484536058580326, + "learning_rate": 2.9084314637933788e-06, + "loss": 0.6415, + "step": 24745 + }, + { + "epoch": 0.7584283437538311, + "grad_norm": 1.6260764607558176, + "learning_rate": 2.9077316402058164e-06, + "loss": 0.7096, + "step": 24746 + }, + { + "epoch": 0.7584589922765723, + "grad_norm": 0.6742707504499023, + "learning_rate": 2.9070318865001866e-06, + "loss": 0.5158, + "step": 24747 + }, + { + "epoch": 0.7584896407993135, + "grad_norm": 1.7037400934418843, + "learning_rate": 2.9063322026833863e-06, + "loss": 0.5897, + "step": 24748 + }, + { + "epoch": 0.7585202893220547, + "grad_norm": 1.6120524684742161, + "learning_rate": 2.9056325887623104e-06, + "loss": 0.6089, + "step": 24749 + }, + { + "epoch": 0.7585509378447959, + "grad_norm": 1.5721544801707394, + "learning_rate": 2.904933044743854e-06, + "loss": 0.6834, + "step": 24750 + }, + { + "epoch": 0.758581586367537, + "grad_norm": 1.735631319669675, + "learning_rate": 2.9042335706349046e-06, + "loss": 0.6541, + "step": 24751 + }, + { + "epoch": 0.7586122348902783, + "grad_norm": 1.640521447061559, + "learning_rate": 2.9035341664423577e-06, + "loss": 0.6027, + "step": 24752 + }, + { + "epoch": 0.7586428834130194, + "grad_norm": 1.5694624793005814, + "learning_rate": 2.9028348321731082e-06, + "loss": 0.6182, + "step": 24753 + }, + { + "epoch": 0.7586735319357607, + "grad_norm": 1.767307486234506, + "learning_rate": 2.9021355678340425e-06, + "loss": 0.6213, + "step": 24754 + }, + { + "epoch": 0.7587041804585019, + "grad_norm": 1.695220401049439, + "learning_rate": 2.9014363734320474e-06, + "loss": 0.672, + "step": 24755 + }, + { + "epoch": 0.7587348289812431, + "grad_norm": 1.9765032179894366, + "learning_rate": 2.900737248974016e-06, + "loss": 0.6684, + "step": 24756 + }, + { + "epoch": 0.7587654775039843, + "grad_norm": 0.7003202880717142, + "learning_rate": 2.9000381944668376e-06, + "loss": 0.5472, + "step": 24757 + }, + { + "epoch": 0.7587961260267255, + "grad_norm": 1.7844462812087674, + "learning_rate": 2.899339209917402e-06, + "loss": 0.5983, + "step": 24758 + }, + { + "epoch": 0.7588267745494667, + "grad_norm": 1.6769697313897336, + "learning_rate": 2.898640295332591e-06, + "loss": 0.5662, + "step": 24759 + }, + { + "epoch": 0.7588574230722079, + "grad_norm": 1.7205552607297323, + "learning_rate": 2.8979414507192936e-06, + "loss": 0.7046, + "step": 24760 + }, + { + "epoch": 0.7588880715949491, + "grad_norm": 1.5459901854569364, + "learning_rate": 2.8972426760844007e-06, + "loss": 0.7133, + "step": 24761 + }, + { + "epoch": 0.7589187201176903, + "grad_norm": 1.728011029663363, + "learning_rate": 2.8965439714347923e-06, + "loss": 0.7126, + "step": 24762 + }, + { + "epoch": 0.7589493686404315, + "grad_norm": 0.6763465739867592, + "learning_rate": 2.8958453367773463e-06, + "loss": 0.497, + "step": 24763 + }, + { + "epoch": 0.7589800171631728, + "grad_norm": 1.9036163382649123, + "learning_rate": 2.895146772118962e-06, + "loss": 0.727, + "step": 24764 + }, + { + "epoch": 0.7590106656859139, + "grad_norm": 1.4888302466629484, + "learning_rate": 2.894448277466513e-06, + "loss": 0.6375, + "step": 24765 + }, + { + "epoch": 0.7590413142086552, + "grad_norm": 1.7892328039058845, + "learning_rate": 2.8937498528268803e-06, + "loss": 0.6081, + "step": 24766 + }, + { + "epoch": 0.7590719627313963, + "grad_norm": 1.4579862466544695, + "learning_rate": 2.893051498206949e-06, + "loss": 0.4956, + "step": 24767 + }, + { + "epoch": 0.7591026112541376, + "grad_norm": 1.6186555135455665, + "learning_rate": 2.8923532136136e-06, + "loss": 0.6595, + "step": 24768 + }, + { + "epoch": 0.7591332597768787, + "grad_norm": 1.6386289158381089, + "learning_rate": 2.8916549990537156e-06, + "loss": 0.6896, + "step": 24769 + }, + { + "epoch": 0.75916390829962, + "grad_norm": 1.6009978752368015, + "learning_rate": 2.8909568545341702e-06, + "loss": 0.6793, + "step": 24770 + }, + { + "epoch": 0.7591945568223611, + "grad_norm": 1.5616354607199707, + "learning_rate": 2.8902587800618466e-06, + "loss": 0.6153, + "step": 24771 + }, + { + "epoch": 0.7592252053451024, + "grad_norm": 1.5771409025038416, + "learning_rate": 2.889560775643625e-06, + "loss": 0.6455, + "step": 24772 + }, + { + "epoch": 0.7592558538678436, + "grad_norm": 1.7653700415985487, + "learning_rate": 2.8888628412863808e-06, + "loss": 0.6677, + "step": 24773 + }, + { + "epoch": 0.7592865023905848, + "grad_norm": 0.6657170122395593, + "learning_rate": 2.8881649769969833e-06, + "loss": 0.5178, + "step": 24774 + }, + { + "epoch": 0.759317150913326, + "grad_norm": 1.7142078203745057, + "learning_rate": 2.887467182782323e-06, + "loss": 0.6877, + "step": 24775 + }, + { + "epoch": 0.7593477994360672, + "grad_norm": 1.7609216613971173, + "learning_rate": 2.8867694586492643e-06, + "loss": 0.634, + "step": 24776 + }, + { + "epoch": 0.7593784479588084, + "grad_norm": 1.8013164901625303, + "learning_rate": 2.88607180460469e-06, + "loss": 0.7202, + "step": 24777 + }, + { + "epoch": 0.7594090964815496, + "grad_norm": 1.6219224205105365, + "learning_rate": 2.8853742206554667e-06, + "loss": 0.6879, + "step": 24778 + }, + { + "epoch": 0.7594397450042908, + "grad_norm": 1.5701477487855982, + "learning_rate": 2.8846767068084723e-06, + "loss": 0.6131, + "step": 24779 + }, + { + "epoch": 0.759470393527032, + "grad_norm": 1.6817328775961211, + "learning_rate": 2.883979263070582e-06, + "loss": 0.6339, + "step": 24780 + }, + { + "epoch": 0.7595010420497732, + "grad_norm": 0.6432360355148771, + "learning_rate": 2.8832818894486613e-06, + "loss": 0.5123, + "step": 24781 + }, + { + "epoch": 0.7595316905725144, + "grad_norm": 1.6747129675739918, + "learning_rate": 2.882584585949585e-06, + "loss": 0.6105, + "step": 24782 + }, + { + "epoch": 0.7595623390952556, + "grad_norm": 1.3549421603992924, + "learning_rate": 2.881887352580227e-06, + "loss": 0.545, + "step": 24783 + }, + { + "epoch": 0.7595929876179968, + "grad_norm": 1.398911437632573, + "learning_rate": 2.8811901893474516e-06, + "loss": 0.5742, + "step": 24784 + }, + { + "epoch": 0.759623636140738, + "grad_norm": 1.7125663018660844, + "learning_rate": 2.880493096258129e-06, + "loss": 0.674, + "step": 24785 + }, + { + "epoch": 0.7596542846634792, + "grad_norm": 1.7701689760030237, + "learning_rate": 2.8797960733191345e-06, + "loss": 0.7342, + "step": 24786 + }, + { + "epoch": 0.7596849331862204, + "grad_norm": 1.8770927723670081, + "learning_rate": 2.8790991205373262e-06, + "loss": 0.6657, + "step": 24787 + }, + { + "epoch": 0.7597155817089616, + "grad_norm": 1.5598134062520277, + "learning_rate": 2.8784022379195807e-06, + "loss": 0.6941, + "step": 24788 + }, + { + "epoch": 0.7597462302317028, + "grad_norm": 0.6578271295368436, + "learning_rate": 2.8777054254727567e-06, + "loss": 0.5032, + "step": 24789 + }, + { + "epoch": 0.759776878754444, + "grad_norm": 1.6176348518413561, + "learning_rate": 2.8770086832037237e-06, + "loss": 0.7301, + "step": 24790 + }, + { + "epoch": 0.7598075272771853, + "grad_norm": 1.520205896665711, + "learning_rate": 2.87631201111935e-06, + "loss": 0.6292, + "step": 24791 + }, + { + "epoch": 0.7598381757999264, + "grad_norm": 0.6523284162979548, + "learning_rate": 2.875615409226492e-06, + "loss": 0.534, + "step": 24792 + }, + { + "epoch": 0.7598688243226677, + "grad_norm": 0.6799699401926659, + "learning_rate": 2.8749188775320192e-06, + "loss": 0.5166, + "step": 24793 + }, + { + "epoch": 0.7598994728454088, + "grad_norm": 1.6210170411304992, + "learning_rate": 2.874222416042798e-06, + "loss": 0.6905, + "step": 24794 + }, + { + "epoch": 0.7599301213681501, + "grad_norm": 1.368757634018759, + "learning_rate": 2.8735260247656814e-06, + "loss": 0.6282, + "step": 24795 + }, + { + "epoch": 0.7599607698908912, + "grad_norm": 0.714262851383957, + "learning_rate": 2.872829703707537e-06, + "loss": 0.5389, + "step": 24796 + }, + { + "epoch": 0.7599914184136325, + "grad_norm": 0.6824681763439475, + "learning_rate": 2.8721334528752288e-06, + "loss": 0.535, + "step": 24797 + }, + { + "epoch": 0.7600220669363736, + "grad_norm": 1.648142902351031, + "learning_rate": 2.87143727227561e-06, + "loss": 0.6674, + "step": 24798 + }, + { + "epoch": 0.7600527154591149, + "grad_norm": 0.6736482168508126, + "learning_rate": 2.870741161915547e-06, + "loss": 0.5117, + "step": 24799 + }, + { + "epoch": 0.760083363981856, + "grad_norm": 1.496796831472933, + "learning_rate": 2.870045121801892e-06, + "loss": 0.5564, + "step": 24800 + }, + { + "epoch": 0.7601140125045973, + "grad_norm": 0.639503988161161, + "learning_rate": 2.8693491519415062e-06, + "loss": 0.5199, + "step": 24801 + }, + { + "epoch": 0.7601446610273385, + "grad_norm": 1.5670630637367937, + "learning_rate": 2.8686532523412514e-06, + "loss": 0.5937, + "step": 24802 + }, + { + "epoch": 0.7601753095500797, + "grad_norm": 1.7188091706180477, + "learning_rate": 2.8679574230079775e-06, + "loss": 0.6447, + "step": 24803 + }, + { + "epoch": 0.7602059580728209, + "grad_norm": 0.6473032575589703, + "learning_rate": 2.867261663948544e-06, + "loss": 0.5253, + "step": 24804 + }, + { + "epoch": 0.7602366065955621, + "grad_norm": 1.55988305597467, + "learning_rate": 2.8665659751698095e-06, + "loss": 0.5636, + "step": 24805 + }, + { + "epoch": 0.7602672551183033, + "grad_norm": 1.8102825187504783, + "learning_rate": 2.865870356678623e-06, + "loss": 0.6545, + "step": 24806 + }, + { + "epoch": 0.7602979036410445, + "grad_norm": 1.5313301635752399, + "learning_rate": 2.865174808481843e-06, + "loss": 0.5847, + "step": 24807 + }, + { + "epoch": 0.7603285521637857, + "grad_norm": 1.865339645839085, + "learning_rate": 2.8644793305863184e-06, + "loss": 0.7082, + "step": 24808 + }, + { + "epoch": 0.760359200686527, + "grad_norm": 0.6914391523076312, + "learning_rate": 2.863783922998905e-06, + "loss": 0.5218, + "step": 24809 + }, + { + "epoch": 0.7603898492092681, + "grad_norm": 0.6611017027861055, + "learning_rate": 2.863088585726458e-06, + "loss": 0.5073, + "step": 24810 + }, + { + "epoch": 0.7604204977320094, + "grad_norm": 1.572546356075576, + "learning_rate": 2.862393318775821e-06, + "loss": 0.5921, + "step": 24811 + }, + { + "epoch": 0.7604511462547505, + "grad_norm": 0.699401801913209, + "learning_rate": 2.86169812215385e-06, + "loss": 0.5214, + "step": 24812 + }, + { + "epoch": 0.7604817947774917, + "grad_norm": 1.7316205046344575, + "learning_rate": 2.8610029958673966e-06, + "loss": 0.6565, + "step": 24813 + }, + { + "epoch": 0.7605124433002329, + "grad_norm": 0.6618971848644982, + "learning_rate": 2.8603079399233045e-06, + "loss": 0.5351, + "step": 24814 + }, + { + "epoch": 0.7605430918229741, + "grad_norm": 1.6200548987024506, + "learning_rate": 2.8596129543284246e-06, + "loss": 0.6463, + "step": 24815 + }, + { + "epoch": 0.7605737403457153, + "grad_norm": 1.9274367520026383, + "learning_rate": 2.8589180390896078e-06, + "loss": 0.6793, + "step": 24816 + }, + { + "epoch": 0.7606043888684565, + "grad_norm": 0.6875430391412758, + "learning_rate": 2.8582231942136963e-06, + "loss": 0.551, + "step": 24817 + }, + { + "epoch": 0.7606350373911978, + "grad_norm": 1.605495272918078, + "learning_rate": 2.8575284197075415e-06, + "loss": 0.6997, + "step": 24818 + }, + { + "epoch": 0.7606656859139389, + "grad_norm": 1.569413323763876, + "learning_rate": 2.8568337155779847e-06, + "loss": 0.6638, + "step": 24819 + }, + { + "epoch": 0.7606963344366802, + "grad_norm": 2.031888842740036, + "learning_rate": 2.8561390818318713e-06, + "loss": 0.6809, + "step": 24820 + }, + { + "epoch": 0.7607269829594213, + "grad_norm": 1.88564263936467, + "learning_rate": 2.8554445184760517e-06, + "loss": 0.7463, + "step": 24821 + }, + { + "epoch": 0.7607576314821626, + "grad_norm": 0.6607327253698714, + "learning_rate": 2.8547500255173623e-06, + "loss": 0.5011, + "step": 24822 + }, + { + "epoch": 0.7607882800049037, + "grad_norm": 1.6845026737321591, + "learning_rate": 2.8540556029626487e-06, + "loss": 0.6053, + "step": 24823 + }, + { + "epoch": 0.760818928527645, + "grad_norm": 1.7651472720918095, + "learning_rate": 2.853361250818756e-06, + "loss": 0.6783, + "step": 24824 + }, + { + "epoch": 0.7608495770503861, + "grad_norm": 1.7685523029494707, + "learning_rate": 2.852666969092521e-06, + "loss": 0.6701, + "step": 24825 + }, + { + "epoch": 0.7608802255731274, + "grad_norm": 1.5561939172381147, + "learning_rate": 2.8519727577907876e-06, + "loss": 0.647, + "step": 24826 + }, + { + "epoch": 0.7609108740958686, + "grad_norm": 1.637975143254, + "learning_rate": 2.8512786169203975e-06, + "loss": 0.603, + "step": 24827 + }, + { + "epoch": 0.7609415226186098, + "grad_norm": 1.6005169079501023, + "learning_rate": 2.8505845464881852e-06, + "loss": 0.6702, + "step": 24828 + }, + { + "epoch": 0.760972171141351, + "grad_norm": 0.663374006319824, + "learning_rate": 2.8498905465009974e-06, + "loss": 0.5253, + "step": 24829 + }, + { + "epoch": 0.7610028196640922, + "grad_norm": 1.6667040721922641, + "learning_rate": 2.8491966169656593e-06, + "loss": 0.6624, + "step": 24830 + }, + { + "epoch": 0.7610334681868334, + "grad_norm": 0.6467126836645932, + "learning_rate": 2.8485027578890234e-06, + "loss": 0.5335, + "step": 24831 + }, + { + "epoch": 0.7610641167095746, + "grad_norm": 0.662351006469573, + "learning_rate": 2.84780896927792e-06, + "loss": 0.5014, + "step": 24832 + }, + { + "epoch": 0.7610947652323158, + "grad_norm": 1.5475464450532193, + "learning_rate": 2.847115251139182e-06, + "loss": 0.6151, + "step": 24833 + }, + { + "epoch": 0.761125413755057, + "grad_norm": 0.6376733215240672, + "learning_rate": 2.8464216034796465e-06, + "loss": 0.4869, + "step": 24834 + }, + { + "epoch": 0.7611560622777982, + "grad_norm": 1.734636273955565, + "learning_rate": 2.845728026306153e-06, + "loss": 0.5715, + "step": 24835 + }, + { + "epoch": 0.7611867108005395, + "grad_norm": 1.6190123211814027, + "learning_rate": 2.845034519625529e-06, + "loss": 0.7001, + "step": 24836 + }, + { + "epoch": 0.7612173593232806, + "grad_norm": 1.6241580584689337, + "learning_rate": 2.8443410834446094e-06, + "loss": 0.7169, + "step": 24837 + }, + { + "epoch": 0.7612480078460219, + "grad_norm": 1.439408973856243, + "learning_rate": 2.8436477177702295e-06, + "loss": 0.6006, + "step": 24838 + }, + { + "epoch": 0.761278656368763, + "grad_norm": 1.6839700597786562, + "learning_rate": 2.842954422609222e-06, + "loss": 0.6195, + "step": 24839 + }, + { + "epoch": 0.7613093048915043, + "grad_norm": 1.747930713040194, + "learning_rate": 2.8422611979684158e-06, + "loss": 0.6654, + "step": 24840 + }, + { + "epoch": 0.7613399534142454, + "grad_norm": 1.7117308260961346, + "learning_rate": 2.8415680438546345e-06, + "loss": 0.6581, + "step": 24841 + }, + { + "epoch": 0.7613706019369867, + "grad_norm": 1.718017022996928, + "learning_rate": 2.840874960274722e-06, + "loss": 0.4349, + "step": 24842 + }, + { + "epoch": 0.7614012504597278, + "grad_norm": 1.429082223161652, + "learning_rate": 2.8401819472354995e-06, + "loss": 0.6165, + "step": 24843 + }, + { + "epoch": 0.761431898982469, + "grad_norm": 1.5065873242031684, + "learning_rate": 2.839489004743794e-06, + "loss": 0.6875, + "step": 24844 + }, + { + "epoch": 0.7614625475052103, + "grad_norm": 1.8575010798584632, + "learning_rate": 2.8387961328064353e-06, + "loss": 0.6218, + "step": 24845 + }, + { + "epoch": 0.7614931960279514, + "grad_norm": 1.5095521795483307, + "learning_rate": 2.8381033314302532e-06, + "loss": 0.6377, + "step": 24846 + }, + { + "epoch": 0.7615238445506927, + "grad_norm": 1.692862979918361, + "learning_rate": 2.8374106006220682e-06, + "loss": 0.6488, + "step": 24847 + }, + { + "epoch": 0.7615544930734338, + "grad_norm": 1.6700503288487447, + "learning_rate": 2.836717940388709e-06, + "loss": 0.671, + "step": 24848 + }, + { + "epoch": 0.7615851415961751, + "grad_norm": 1.5976110481693835, + "learning_rate": 2.8360253507370016e-06, + "loss": 0.6408, + "step": 24849 + }, + { + "epoch": 0.7616157901189162, + "grad_norm": 1.6472946784094336, + "learning_rate": 2.8353328316737715e-06, + "loss": 0.6757, + "step": 24850 + }, + { + "epoch": 0.7616464386416575, + "grad_norm": 1.9594567315567952, + "learning_rate": 2.8346403832058413e-06, + "loss": 0.6436, + "step": 24851 + }, + { + "epoch": 0.7616770871643986, + "grad_norm": 1.7310368395363493, + "learning_rate": 2.833948005340029e-06, + "loss": 0.6409, + "step": 24852 + }, + { + "epoch": 0.7617077356871399, + "grad_norm": 1.5332591848543349, + "learning_rate": 2.8332556980831605e-06, + "loss": 0.6628, + "step": 24853 + }, + { + "epoch": 0.761738384209881, + "grad_norm": 1.5393948792253156, + "learning_rate": 2.8325634614420607e-06, + "loss": 0.5902, + "step": 24854 + }, + { + "epoch": 0.7617690327326223, + "grad_norm": 2.025310614557162, + "learning_rate": 2.831871295423543e-06, + "loss": 0.6909, + "step": 24855 + }, + { + "epoch": 0.7617996812553635, + "grad_norm": 1.699559380505285, + "learning_rate": 2.8311792000344328e-06, + "loss": 0.7573, + "step": 24856 + }, + { + "epoch": 0.7618303297781047, + "grad_norm": 1.496997417203785, + "learning_rate": 2.8304871752815466e-06, + "loss": 0.5828, + "step": 24857 + }, + { + "epoch": 0.7618609783008459, + "grad_norm": 1.7109371094928636, + "learning_rate": 2.8297952211717096e-06, + "loss": 0.6228, + "step": 24858 + }, + { + "epoch": 0.7618916268235871, + "grad_norm": 1.729076679590674, + "learning_rate": 2.8291033377117337e-06, + "loss": 0.5716, + "step": 24859 + }, + { + "epoch": 0.7619222753463283, + "grad_norm": 1.5523803926470074, + "learning_rate": 2.828411524908431e-06, + "loss": 0.7257, + "step": 24860 + }, + { + "epoch": 0.7619529238690695, + "grad_norm": 1.620577005995154, + "learning_rate": 2.8277197827686308e-06, + "loss": 0.6175, + "step": 24861 + }, + { + "epoch": 0.7619835723918107, + "grad_norm": 1.873113791568274, + "learning_rate": 2.827028111299144e-06, + "loss": 0.6908, + "step": 24862 + }, + { + "epoch": 0.762014220914552, + "grad_norm": 1.691367770305016, + "learning_rate": 2.82633651050678e-06, + "loss": 0.6774, + "step": 24863 + }, + { + "epoch": 0.7620448694372931, + "grad_norm": 1.6436878027454414, + "learning_rate": 2.8256449803983577e-06, + "loss": 0.7499, + "step": 24864 + }, + { + "epoch": 0.7620755179600344, + "grad_norm": 1.5064716084567893, + "learning_rate": 2.8249535209806943e-06, + "loss": 0.6636, + "step": 24865 + }, + { + "epoch": 0.7621061664827755, + "grad_norm": 1.6654987726237804, + "learning_rate": 2.824262132260598e-06, + "loss": 0.6565, + "step": 24866 + }, + { + "epoch": 0.7621368150055168, + "grad_norm": 1.65289991282501, + "learning_rate": 2.8235708142448816e-06, + "loss": 0.6444, + "step": 24867 + }, + { + "epoch": 0.7621674635282579, + "grad_norm": 1.658867630660637, + "learning_rate": 2.8228795669403577e-06, + "loss": 0.6711, + "step": 24868 + }, + { + "epoch": 0.7621981120509992, + "grad_norm": 1.547026925417686, + "learning_rate": 2.8221883903538415e-06, + "loss": 0.6281, + "step": 24869 + }, + { + "epoch": 0.7622287605737403, + "grad_norm": 1.6806610258459083, + "learning_rate": 2.8214972844921396e-06, + "loss": 0.7275, + "step": 24870 + }, + { + "epoch": 0.7622594090964816, + "grad_norm": 1.6291457886048624, + "learning_rate": 2.8208062493620556e-06, + "loss": 0.6783, + "step": 24871 + }, + { + "epoch": 0.7622900576192227, + "grad_norm": 1.6812988218250986, + "learning_rate": 2.8201152849704104e-06, + "loss": 0.6524, + "step": 24872 + }, + { + "epoch": 0.762320706141964, + "grad_norm": 1.9487525743749925, + "learning_rate": 2.819424391324007e-06, + "loss": 0.6951, + "step": 24873 + }, + { + "epoch": 0.7623513546647052, + "grad_norm": 1.4809532208611376, + "learning_rate": 2.818733568429649e-06, + "loss": 0.6605, + "step": 24874 + }, + { + "epoch": 0.7623820031874463, + "grad_norm": 0.6876885023686667, + "learning_rate": 2.818042816294145e-06, + "loss": 0.5254, + "step": 24875 + }, + { + "epoch": 0.7624126517101876, + "grad_norm": 1.4966155962546803, + "learning_rate": 2.817352134924305e-06, + "loss": 0.6542, + "step": 24876 + }, + { + "epoch": 0.7624433002329287, + "grad_norm": 1.5897850330805643, + "learning_rate": 2.8166615243269346e-06, + "loss": 0.61, + "step": 24877 + }, + { + "epoch": 0.76247394875567, + "grad_norm": 1.8502838683444325, + "learning_rate": 2.815970984508832e-06, + "loss": 0.6235, + "step": 24878 + }, + { + "epoch": 0.7625045972784111, + "grad_norm": 1.8182628319503644, + "learning_rate": 2.815280515476806e-06, + "loss": 0.7794, + "step": 24879 + }, + { + "epoch": 0.7625352458011524, + "grad_norm": 1.8602998943800173, + "learning_rate": 2.814590117237663e-06, + "loss": 0.6144, + "step": 24880 + }, + { + "epoch": 0.7625658943238935, + "grad_norm": 1.7341376393132577, + "learning_rate": 2.8138997897982013e-06, + "loss": 0.6346, + "step": 24881 + }, + { + "epoch": 0.7625965428466348, + "grad_norm": 1.387206324403819, + "learning_rate": 2.8132095331652178e-06, + "loss": 0.5809, + "step": 24882 + }, + { + "epoch": 0.762627191369376, + "grad_norm": 1.5878020796112051, + "learning_rate": 2.812519347345526e-06, + "loss": 0.6764, + "step": 24883 + }, + { + "epoch": 0.7626578398921172, + "grad_norm": 1.6627117033667178, + "learning_rate": 2.811829232345916e-06, + "loss": 0.5959, + "step": 24884 + }, + { + "epoch": 0.7626884884148584, + "grad_norm": 1.7828280517478903, + "learning_rate": 2.8111391881731966e-06, + "loss": 0.7113, + "step": 24885 + }, + { + "epoch": 0.7627191369375996, + "grad_norm": 1.7458984675727802, + "learning_rate": 2.810449214834158e-06, + "loss": 0.644, + "step": 24886 + }, + { + "epoch": 0.7627497854603408, + "grad_norm": 1.6756402639836678, + "learning_rate": 2.8097593123356025e-06, + "loss": 0.6814, + "step": 24887 + }, + { + "epoch": 0.762780433983082, + "grad_norm": 1.8133893125485256, + "learning_rate": 2.8090694806843312e-06, + "loss": 0.6732, + "step": 24888 + }, + { + "epoch": 0.7628110825058232, + "grad_norm": 1.7794801622199754, + "learning_rate": 2.8083797198871356e-06, + "loss": 0.6926, + "step": 24889 + }, + { + "epoch": 0.7628417310285645, + "grad_norm": 1.7180678154731108, + "learning_rate": 2.8076900299508148e-06, + "loss": 0.7164, + "step": 24890 + }, + { + "epoch": 0.7628723795513056, + "grad_norm": 0.6669776332012523, + "learning_rate": 2.807000410882167e-06, + "loss": 0.4917, + "step": 24891 + }, + { + "epoch": 0.7629030280740469, + "grad_norm": 1.819204574760747, + "learning_rate": 2.8063108626879842e-06, + "loss": 0.7272, + "step": 24892 + }, + { + "epoch": 0.762933676596788, + "grad_norm": 1.5575214540506992, + "learning_rate": 2.805621385375056e-06, + "loss": 0.6142, + "step": 24893 + }, + { + "epoch": 0.7629643251195293, + "grad_norm": 1.611837930268907, + "learning_rate": 2.804931978950186e-06, + "loss": 0.5787, + "step": 24894 + }, + { + "epoch": 0.7629949736422704, + "grad_norm": 1.6906736486672405, + "learning_rate": 2.804242643420161e-06, + "loss": 0.6075, + "step": 24895 + }, + { + "epoch": 0.7630256221650117, + "grad_norm": 1.5707242349311232, + "learning_rate": 2.8035533787917757e-06, + "loss": 0.6979, + "step": 24896 + }, + { + "epoch": 0.7630562706877528, + "grad_norm": 1.8603687709617394, + "learning_rate": 2.8028641850718173e-06, + "loss": 0.7261, + "step": 24897 + }, + { + "epoch": 0.7630869192104941, + "grad_norm": 1.795219808432168, + "learning_rate": 2.8021750622670796e-06, + "loss": 0.6262, + "step": 24898 + }, + { + "epoch": 0.7631175677332352, + "grad_norm": 0.6548622799501431, + "learning_rate": 2.8014860103843565e-06, + "loss": 0.5237, + "step": 24899 + }, + { + "epoch": 0.7631482162559765, + "grad_norm": 0.6651542099357814, + "learning_rate": 2.80079702943043e-06, + "loss": 0.532, + "step": 24900 + }, + { + "epoch": 0.7631788647787177, + "grad_norm": 0.6944536599559719, + "learning_rate": 2.8001081194120925e-06, + "loss": 0.5593, + "step": 24901 + }, + { + "epoch": 0.7632095133014589, + "grad_norm": 1.6249982521108035, + "learning_rate": 2.7994192803361343e-06, + "loss": 0.7026, + "step": 24902 + }, + { + "epoch": 0.7632401618242001, + "grad_norm": 1.7059972555113159, + "learning_rate": 2.798730512209338e-06, + "loss": 0.6003, + "step": 24903 + }, + { + "epoch": 0.7632708103469413, + "grad_norm": 1.4752906847874232, + "learning_rate": 2.7980418150384946e-06, + "loss": 0.5433, + "step": 24904 + }, + { + "epoch": 0.7633014588696825, + "grad_norm": 1.8983774673945493, + "learning_rate": 2.7973531888303863e-06, + "loss": 0.7258, + "step": 24905 + }, + { + "epoch": 0.7633321073924236, + "grad_norm": 1.6239857916696878, + "learning_rate": 2.7966646335917992e-06, + "loss": 0.7097, + "step": 24906 + }, + { + "epoch": 0.7633627559151649, + "grad_norm": 1.4830484694128319, + "learning_rate": 2.7959761493295214e-06, + "loss": 0.6812, + "step": 24907 + }, + { + "epoch": 0.763393404437906, + "grad_norm": 0.6778089287166685, + "learning_rate": 2.7952877360503304e-06, + "loss": 0.5436, + "step": 24908 + }, + { + "epoch": 0.7634240529606473, + "grad_norm": 1.666999852553824, + "learning_rate": 2.794599393761014e-06, + "loss": 0.5418, + "step": 24909 + }, + { + "epoch": 0.7634547014833885, + "grad_norm": 0.6453418877483726, + "learning_rate": 2.7939111224683545e-06, + "loss": 0.5184, + "step": 24910 + }, + { + "epoch": 0.7634853500061297, + "grad_norm": 0.6492444297198331, + "learning_rate": 2.7932229221791307e-06, + "loss": 0.5271, + "step": 24911 + }, + { + "epoch": 0.7635159985288709, + "grad_norm": 1.5375809856712275, + "learning_rate": 2.7925347929001258e-06, + "loss": 0.6395, + "step": 24912 + }, + { + "epoch": 0.7635466470516121, + "grad_norm": 1.7239712459809167, + "learning_rate": 2.7918467346381216e-06, + "loss": 0.7512, + "step": 24913 + }, + { + "epoch": 0.7635772955743533, + "grad_norm": 1.5875139472072748, + "learning_rate": 2.7911587473998936e-06, + "loss": 0.6007, + "step": 24914 + }, + { + "epoch": 0.7636079440970945, + "grad_norm": 1.5967634696953568, + "learning_rate": 2.7904708311922256e-06, + "loss": 0.6629, + "step": 24915 + }, + { + "epoch": 0.7636385926198357, + "grad_norm": 1.6746629675346278, + "learning_rate": 2.7897829860218906e-06, + "loss": 0.6117, + "step": 24916 + }, + { + "epoch": 0.763669241142577, + "grad_norm": 1.6878609520178693, + "learning_rate": 2.7890952118956672e-06, + "loss": 0.7676, + "step": 24917 + }, + { + "epoch": 0.7636998896653181, + "grad_norm": 1.532715937574136, + "learning_rate": 2.788407508820338e-06, + "loss": 0.5621, + "step": 24918 + }, + { + "epoch": 0.7637305381880594, + "grad_norm": 1.5287234225124506, + "learning_rate": 2.7877198768026714e-06, + "loss": 0.5706, + "step": 24919 + }, + { + "epoch": 0.7637611867108005, + "grad_norm": 1.5720438571276496, + "learning_rate": 2.787032315849446e-06, + "loss": 0.671, + "step": 24920 + }, + { + "epoch": 0.7637918352335418, + "grad_norm": 1.782339813196017, + "learning_rate": 2.786344825967441e-06, + "loss": 0.6835, + "step": 24921 + }, + { + "epoch": 0.7638224837562829, + "grad_norm": 1.6712905252881904, + "learning_rate": 2.785657407163421e-06, + "loss": 0.622, + "step": 24922 + }, + { + "epoch": 0.7638531322790242, + "grad_norm": 1.5414753088697126, + "learning_rate": 2.7849700594441662e-06, + "loss": 0.5927, + "step": 24923 + }, + { + "epoch": 0.7638837808017653, + "grad_norm": 0.6539826040255812, + "learning_rate": 2.7842827828164497e-06, + "loss": 0.5069, + "step": 24924 + }, + { + "epoch": 0.7639144293245066, + "grad_norm": 1.6332557782398192, + "learning_rate": 2.783595577287038e-06, + "loss": 0.6078, + "step": 24925 + }, + { + "epoch": 0.7639450778472477, + "grad_norm": 1.9119264164858443, + "learning_rate": 2.7829084428627094e-06, + "loss": 0.813, + "step": 24926 + }, + { + "epoch": 0.763975726369989, + "grad_norm": 1.6081210671790127, + "learning_rate": 2.782221379550226e-06, + "loss": 0.5099, + "step": 24927 + }, + { + "epoch": 0.7640063748927302, + "grad_norm": 1.5238623633161466, + "learning_rate": 2.7815343873563627e-06, + "loss": 0.5367, + "step": 24928 + }, + { + "epoch": 0.7640370234154714, + "grad_norm": 0.6443231971336595, + "learning_rate": 2.7808474662878914e-06, + "loss": 0.5019, + "step": 24929 + }, + { + "epoch": 0.7640676719382126, + "grad_norm": 1.602471482463365, + "learning_rate": 2.780160616351575e-06, + "loss": 0.6393, + "step": 24930 + }, + { + "epoch": 0.7640983204609538, + "grad_norm": 1.6215022177309832, + "learning_rate": 2.779473837554182e-06, + "loss": 0.7133, + "step": 24931 + }, + { + "epoch": 0.764128968983695, + "grad_norm": 0.6570849908594792, + "learning_rate": 2.7787871299024837e-06, + "loss": 0.4991, + "step": 24932 + }, + { + "epoch": 0.7641596175064362, + "grad_norm": 1.5128793203495874, + "learning_rate": 2.7781004934032407e-06, + "loss": 0.5696, + "step": 24933 + }, + { + "epoch": 0.7641902660291774, + "grad_norm": 1.660118659731747, + "learning_rate": 2.777413928063221e-06, + "loss": 0.6644, + "step": 24934 + }, + { + "epoch": 0.7642209145519187, + "grad_norm": 1.7936094054531504, + "learning_rate": 2.7767274338891935e-06, + "loss": 0.6156, + "step": 24935 + }, + { + "epoch": 0.7642515630746598, + "grad_norm": 1.6210969514700324, + "learning_rate": 2.7760410108879155e-06, + "loss": 0.641, + "step": 24936 + }, + { + "epoch": 0.764282211597401, + "grad_norm": 1.54023555458636, + "learning_rate": 2.7753546590661563e-06, + "loss": 0.6231, + "step": 24937 + }, + { + "epoch": 0.7643128601201422, + "grad_norm": 1.9716666148919497, + "learning_rate": 2.7746683784306707e-06, + "loss": 0.6567, + "step": 24938 + }, + { + "epoch": 0.7643435086428834, + "grad_norm": 1.8236489052881502, + "learning_rate": 2.773982168988232e-06, + "loss": 0.6977, + "step": 24939 + }, + { + "epoch": 0.7643741571656246, + "grad_norm": 1.6985830290058117, + "learning_rate": 2.7732960307455957e-06, + "loss": 0.6421, + "step": 24940 + }, + { + "epoch": 0.7644048056883658, + "grad_norm": 1.8181692264810734, + "learning_rate": 2.7726099637095207e-06, + "loss": 0.7547, + "step": 24941 + }, + { + "epoch": 0.764435454211107, + "grad_norm": 1.6520719397136516, + "learning_rate": 2.771923967886767e-06, + "loss": 0.6175, + "step": 24942 + }, + { + "epoch": 0.7644661027338482, + "grad_norm": 1.47230637914356, + "learning_rate": 2.7712380432841002e-06, + "loss": 0.6142, + "step": 24943 + }, + { + "epoch": 0.7644967512565894, + "grad_norm": 1.8089288380596102, + "learning_rate": 2.770552189908272e-06, + "loss": 0.664, + "step": 24944 + }, + { + "epoch": 0.7645273997793306, + "grad_norm": 1.7830127752029807, + "learning_rate": 2.769866407766042e-06, + "loss": 0.6629, + "step": 24945 + }, + { + "epoch": 0.7645580483020719, + "grad_norm": 1.4686080522613494, + "learning_rate": 2.769180696864171e-06, + "loss": 0.6745, + "step": 24946 + }, + { + "epoch": 0.764588696824813, + "grad_norm": 1.6532247817771948, + "learning_rate": 2.7684950572094094e-06, + "loss": 0.5977, + "step": 24947 + }, + { + "epoch": 0.7646193453475543, + "grad_norm": 1.7135089070394247, + "learning_rate": 2.7678094888085206e-06, + "loss": 0.6059, + "step": 24948 + }, + { + "epoch": 0.7646499938702954, + "grad_norm": 1.5964290283985065, + "learning_rate": 2.7671239916682514e-06, + "loss": 0.5858, + "step": 24949 + }, + { + "epoch": 0.7646806423930367, + "grad_norm": 1.5600959939171073, + "learning_rate": 2.7664385657953596e-06, + "loss": 0.7326, + "step": 24950 + }, + { + "epoch": 0.7647112909157778, + "grad_norm": 1.7618845214129149, + "learning_rate": 2.7657532111966036e-06, + "loss": 0.6716, + "step": 24951 + }, + { + "epoch": 0.7647419394385191, + "grad_norm": 1.859799610558714, + "learning_rate": 2.7650679278787283e-06, + "loss": 0.6985, + "step": 24952 + }, + { + "epoch": 0.7647725879612602, + "grad_norm": 1.7395899602280562, + "learning_rate": 2.7643827158484905e-06, + "loss": 0.6972, + "step": 24953 + }, + { + "epoch": 0.7648032364840015, + "grad_norm": 1.693292839783312, + "learning_rate": 2.7636975751126436e-06, + "loss": 0.659, + "step": 24954 + }, + { + "epoch": 0.7648338850067427, + "grad_norm": 1.6558562515084436, + "learning_rate": 2.7630125056779334e-06, + "loss": 0.7008, + "step": 24955 + }, + { + "epoch": 0.7648645335294839, + "grad_norm": 1.5483246005644271, + "learning_rate": 2.762327507551116e-06, + "loss": 0.5666, + "step": 24956 + }, + { + "epoch": 0.7648951820522251, + "grad_norm": 1.7163811796880344, + "learning_rate": 2.76164258073893e-06, + "loss": 0.68, + "step": 24957 + }, + { + "epoch": 0.7649258305749663, + "grad_norm": 1.6091527307908933, + "learning_rate": 2.760957725248139e-06, + "loss": 0.7218, + "step": 24958 + }, + { + "epoch": 0.7649564790977075, + "grad_norm": 1.6296922312174886, + "learning_rate": 2.7602729410854835e-06, + "loss": 0.6743, + "step": 24959 + }, + { + "epoch": 0.7649871276204487, + "grad_norm": 1.4842811852525373, + "learning_rate": 2.759588228257708e-06, + "loss": 0.5783, + "step": 24960 + }, + { + "epoch": 0.7650177761431899, + "grad_norm": 1.8605100620074542, + "learning_rate": 2.758903586771562e-06, + "loss": 0.6222, + "step": 24961 + }, + { + "epoch": 0.7650484246659311, + "grad_norm": 1.5641044782941642, + "learning_rate": 2.758219016633794e-06, + "loss": 0.5271, + "step": 24962 + }, + { + "epoch": 0.7650790731886723, + "grad_norm": 1.4736373784105272, + "learning_rate": 2.757534517851145e-06, + "loss": 0.6449, + "step": 24963 + }, + { + "epoch": 0.7651097217114136, + "grad_norm": 1.70526632903175, + "learning_rate": 2.7568500904303607e-06, + "loss": 0.6437, + "step": 24964 + }, + { + "epoch": 0.7651403702341547, + "grad_norm": 1.897769235664668, + "learning_rate": 2.7561657343781866e-06, + "loss": 0.6657, + "step": 24965 + }, + { + "epoch": 0.765171018756896, + "grad_norm": 1.6006750447635858, + "learning_rate": 2.755481449701367e-06, + "loss": 0.6182, + "step": 24966 + }, + { + "epoch": 0.7652016672796371, + "grad_norm": 1.4684779492174687, + "learning_rate": 2.754797236406643e-06, + "loss": 0.6732, + "step": 24967 + }, + { + "epoch": 0.7652323158023783, + "grad_norm": 1.838397627174193, + "learning_rate": 2.75411309450075e-06, + "loss": 0.6657, + "step": 24968 + }, + { + "epoch": 0.7652629643251195, + "grad_norm": 1.5350938573582096, + "learning_rate": 2.753429023990439e-06, + "loss": 0.647, + "step": 24969 + }, + { + "epoch": 0.7652936128478607, + "grad_norm": 1.5003017881513399, + "learning_rate": 2.752745024882447e-06, + "loss": 0.6238, + "step": 24970 + }, + { + "epoch": 0.7653242613706019, + "grad_norm": 0.7157387001265636, + "learning_rate": 2.7520610971835106e-06, + "loss": 0.5197, + "step": 24971 + }, + { + "epoch": 0.7653549098933431, + "grad_norm": 1.8104929326669927, + "learning_rate": 2.751377240900369e-06, + "loss": 0.7199, + "step": 24972 + }, + { + "epoch": 0.7653855584160844, + "grad_norm": 1.5959889228709687, + "learning_rate": 2.750693456039766e-06, + "loss": 0.5865, + "step": 24973 + }, + { + "epoch": 0.7654162069388255, + "grad_norm": 1.563017900437345, + "learning_rate": 2.7500097426084317e-06, + "loss": 0.6681, + "step": 24974 + }, + { + "epoch": 0.7654468554615668, + "grad_norm": 1.6781562726819104, + "learning_rate": 2.7493261006131065e-06, + "loss": 0.6383, + "step": 24975 + }, + { + "epoch": 0.7654775039843079, + "grad_norm": 1.480025908454522, + "learning_rate": 2.7486425300605257e-06, + "loss": 0.5206, + "step": 24976 + }, + { + "epoch": 0.7655081525070492, + "grad_norm": 1.739400460658128, + "learning_rate": 2.74795903095743e-06, + "loss": 0.6255, + "step": 24977 + }, + { + "epoch": 0.7655388010297903, + "grad_norm": 1.8189417935305336, + "learning_rate": 2.7472756033105486e-06, + "loss": 0.6168, + "step": 24978 + }, + { + "epoch": 0.7655694495525316, + "grad_norm": 1.681529145368073, + "learning_rate": 2.7465922471266094e-06, + "loss": 0.5808, + "step": 24979 + }, + { + "epoch": 0.7656000980752727, + "grad_norm": 1.7252805274465794, + "learning_rate": 2.74590896241236e-06, + "loss": 0.58, + "step": 24980 + }, + { + "epoch": 0.765630746598014, + "grad_norm": 1.652309733604823, + "learning_rate": 2.745225749174525e-06, + "loss": 0.6352, + "step": 24981 + }, + { + "epoch": 0.7656613951207552, + "grad_norm": 2.001367164918085, + "learning_rate": 2.7445426074198347e-06, + "loss": 0.6967, + "step": 24982 + }, + { + "epoch": 0.7656920436434964, + "grad_norm": 1.6644133127282572, + "learning_rate": 2.7438595371550216e-06, + "loss": 0.6104, + "step": 24983 + }, + { + "epoch": 0.7657226921662376, + "grad_norm": 1.4507912686565543, + "learning_rate": 2.743176538386817e-06, + "loss": 0.6577, + "step": 24984 + }, + { + "epoch": 0.7657533406889788, + "grad_norm": 1.641003863180422, + "learning_rate": 2.7424936111219548e-06, + "loss": 0.7174, + "step": 24985 + }, + { + "epoch": 0.76578398921172, + "grad_norm": 1.5495298348935835, + "learning_rate": 2.7418107553671556e-06, + "loss": 0.6114, + "step": 24986 + }, + { + "epoch": 0.7658146377344612, + "grad_norm": 0.6739523550178442, + "learning_rate": 2.741127971129153e-06, + "loss": 0.5262, + "step": 24987 + }, + { + "epoch": 0.7658452862572024, + "grad_norm": 1.6869233752911066, + "learning_rate": 2.740445258414677e-06, + "loss": 0.6823, + "step": 24988 + }, + { + "epoch": 0.7658759347799436, + "grad_norm": 1.6598256940844318, + "learning_rate": 2.7397626172304504e-06, + "loss": 0.609, + "step": 24989 + }, + { + "epoch": 0.7659065833026848, + "grad_norm": 1.788260075779694, + "learning_rate": 2.7390800475831948e-06, + "loss": 0.7486, + "step": 24990 + }, + { + "epoch": 0.7659372318254261, + "grad_norm": 1.82243283801284, + "learning_rate": 2.738397549479648e-06, + "loss": 0.6851, + "step": 24991 + }, + { + "epoch": 0.7659678803481672, + "grad_norm": 1.586013674012022, + "learning_rate": 2.7377151229265233e-06, + "loss": 0.6523, + "step": 24992 + }, + { + "epoch": 0.7659985288709085, + "grad_norm": 1.7937937000453692, + "learning_rate": 2.737032767930554e-06, + "loss": 0.6518, + "step": 24993 + }, + { + "epoch": 0.7660291773936496, + "grad_norm": 0.657371212789346, + "learning_rate": 2.7363504844984557e-06, + "loss": 0.5219, + "step": 24994 + }, + { + "epoch": 0.7660598259163909, + "grad_norm": 1.7195880511925987, + "learning_rate": 2.7356682726369544e-06, + "loss": 0.6582, + "step": 24995 + }, + { + "epoch": 0.766090474439132, + "grad_norm": 1.5563931031319582, + "learning_rate": 2.734986132352776e-06, + "loss": 0.6352, + "step": 24996 + }, + { + "epoch": 0.7661211229618733, + "grad_norm": 1.75984322745801, + "learning_rate": 2.734304063652634e-06, + "loss": 0.6046, + "step": 24997 + }, + { + "epoch": 0.7661517714846144, + "grad_norm": 1.6191341763809475, + "learning_rate": 2.7336220665432545e-06, + "loss": 0.6068, + "step": 24998 + }, + { + "epoch": 0.7661824200073556, + "grad_norm": 1.6105870129412674, + "learning_rate": 2.7329401410313584e-06, + "loss": 0.6414, + "step": 24999 + }, + { + "epoch": 0.7662130685300969, + "grad_norm": 1.6715853246601255, + "learning_rate": 2.7322582871236614e-06, + "loss": 0.6054, + "step": 25000 + }, + { + "epoch": 0.766243717052838, + "grad_norm": 0.6364595694818944, + "learning_rate": 2.7315765048268817e-06, + "loss": 0.5049, + "step": 25001 + }, + { + "epoch": 0.7662743655755793, + "grad_norm": 1.7866183933826745, + "learning_rate": 2.730894794147737e-06, + "loss": 0.685, + "step": 25002 + }, + { + "epoch": 0.7663050140983204, + "grad_norm": 1.462193412416914, + "learning_rate": 2.7302131550929467e-06, + "loss": 0.6371, + "step": 25003 + }, + { + "epoch": 0.7663356626210617, + "grad_norm": 0.6553999390274475, + "learning_rate": 2.7295315876692287e-06, + "loss": 0.5131, + "step": 25004 + }, + { + "epoch": 0.7663663111438028, + "grad_norm": 1.7836167310040323, + "learning_rate": 2.728850091883293e-06, + "loss": 0.6683, + "step": 25005 + }, + { + "epoch": 0.7663969596665441, + "grad_norm": 1.745831227396596, + "learning_rate": 2.7281686677418585e-06, + "loss": 0.6033, + "step": 25006 + }, + { + "epoch": 0.7664276081892852, + "grad_norm": 1.7878125941024892, + "learning_rate": 2.727487315251641e-06, + "loss": 0.6976, + "step": 25007 + }, + { + "epoch": 0.7664582567120265, + "grad_norm": 1.5679609076278023, + "learning_rate": 2.7268060344193524e-06, + "loss": 0.5963, + "step": 25008 + }, + { + "epoch": 0.7664889052347676, + "grad_norm": 1.7607921433317164, + "learning_rate": 2.726124825251698e-06, + "loss": 0.6567, + "step": 25009 + }, + { + "epoch": 0.7665195537575089, + "grad_norm": 1.6334944530808522, + "learning_rate": 2.7254436877554034e-06, + "loss": 0.5688, + "step": 25010 + }, + { + "epoch": 0.7665502022802501, + "grad_norm": 1.6583400745940464, + "learning_rate": 2.7247626219371704e-06, + "loss": 0.6092, + "step": 25011 + }, + { + "epoch": 0.7665808508029913, + "grad_norm": 0.6609564236309997, + "learning_rate": 2.724081627803715e-06, + "loss": 0.5347, + "step": 25012 + }, + { + "epoch": 0.7666114993257325, + "grad_norm": 1.5692392902124783, + "learning_rate": 2.7234007053617427e-06, + "loss": 0.6409, + "step": 25013 + }, + { + "epoch": 0.7666421478484737, + "grad_norm": 1.8604202461732156, + "learning_rate": 2.7227198546179656e-06, + "loss": 0.717, + "step": 25014 + }, + { + "epoch": 0.7666727963712149, + "grad_norm": 1.618865842069174, + "learning_rate": 2.722039075579094e-06, + "loss": 0.5928, + "step": 25015 + }, + { + "epoch": 0.7667034448939561, + "grad_norm": 1.5308653883702603, + "learning_rate": 2.7213583682518306e-06, + "loss": 0.6293, + "step": 25016 + }, + { + "epoch": 0.7667340934166973, + "grad_norm": 0.6926329964938367, + "learning_rate": 2.720677732642886e-06, + "loss": 0.5435, + "step": 25017 + }, + { + "epoch": 0.7667647419394386, + "grad_norm": 1.7360654866888248, + "learning_rate": 2.719997168758968e-06, + "loss": 0.7776, + "step": 25018 + }, + { + "epoch": 0.7667953904621797, + "grad_norm": 1.6853120165035775, + "learning_rate": 2.7193166766067812e-06, + "loss": 0.6911, + "step": 25019 + }, + { + "epoch": 0.766826038984921, + "grad_norm": 1.620333066712009, + "learning_rate": 2.7186362561930247e-06, + "loss": 0.7123, + "step": 25020 + }, + { + "epoch": 0.7668566875076621, + "grad_norm": 1.8704021881724047, + "learning_rate": 2.7179559075244132e-06, + "loss": 0.7037, + "step": 25021 + }, + { + "epoch": 0.7668873360304034, + "grad_norm": 1.579936792343944, + "learning_rate": 2.717275630607643e-06, + "loss": 0.6373, + "step": 25022 + }, + { + "epoch": 0.7669179845531445, + "grad_norm": 1.5899394943663807, + "learning_rate": 2.716595425449422e-06, + "loss": 0.6082, + "step": 25023 + }, + { + "epoch": 0.7669486330758858, + "grad_norm": 0.6799099920532035, + "learning_rate": 2.715915292056447e-06, + "loss": 0.52, + "step": 25024 + }, + { + "epoch": 0.7669792815986269, + "grad_norm": 1.3616213727389672, + "learning_rate": 2.7152352304354223e-06, + "loss": 0.5645, + "step": 25025 + }, + { + "epoch": 0.7670099301213682, + "grad_norm": 1.5523958997084584, + "learning_rate": 2.714555240593052e-06, + "loss": 0.6447, + "step": 25026 + }, + { + "epoch": 0.7670405786441093, + "grad_norm": 1.708253682314911, + "learning_rate": 2.713875322536029e-06, + "loss": 0.6162, + "step": 25027 + }, + { + "epoch": 0.7670712271668506, + "grad_norm": 1.7813990705668343, + "learning_rate": 2.7131954762710576e-06, + "loss": 0.671, + "step": 25028 + }, + { + "epoch": 0.7671018756895918, + "grad_norm": 0.6637068697955526, + "learning_rate": 2.712515701804839e-06, + "loss": 0.5046, + "step": 25029 + }, + { + "epoch": 0.7671325242123329, + "grad_norm": 2.004675077008431, + "learning_rate": 2.7118359991440636e-06, + "loss": 0.6749, + "step": 25030 + }, + { + "epoch": 0.7671631727350742, + "grad_norm": 1.6907787847809583, + "learning_rate": 2.7111563682954333e-06, + "loss": 0.6701, + "step": 25031 + }, + { + "epoch": 0.7671938212578153, + "grad_norm": 1.787079971583713, + "learning_rate": 2.7104768092656475e-06, + "loss": 0.646, + "step": 25032 + }, + { + "epoch": 0.7672244697805566, + "grad_norm": 1.6329472547514126, + "learning_rate": 2.709797322061396e-06, + "loss": 0.6398, + "step": 25033 + }, + { + "epoch": 0.7672551183032977, + "grad_norm": 1.5967607481832533, + "learning_rate": 2.7091179066893793e-06, + "loss": 0.7036, + "step": 25034 + }, + { + "epoch": 0.767285766826039, + "grad_norm": 1.594668402894269, + "learning_rate": 2.708438563156286e-06, + "loss": 0.6003, + "step": 25035 + }, + { + "epoch": 0.7673164153487801, + "grad_norm": 1.7930489028317569, + "learning_rate": 2.7077592914688132e-06, + "loss": 0.6279, + "step": 25036 + }, + { + "epoch": 0.7673470638715214, + "grad_norm": 1.5536053774713328, + "learning_rate": 2.7070800916336583e-06, + "loss": 0.6911, + "step": 25037 + }, + { + "epoch": 0.7673777123942626, + "grad_norm": 1.4541741729376505, + "learning_rate": 2.706400963657505e-06, + "loss": 0.65, + "step": 25038 + }, + { + "epoch": 0.7674083609170038, + "grad_norm": 1.5470998544185957, + "learning_rate": 2.7057219075470488e-06, + "loss": 0.6496, + "step": 25039 + }, + { + "epoch": 0.767439009439745, + "grad_norm": 1.8298859604975126, + "learning_rate": 2.705042923308985e-06, + "loss": 0.7273, + "step": 25040 + }, + { + "epoch": 0.7674696579624862, + "grad_norm": 1.7628129643353017, + "learning_rate": 2.7043640109499957e-06, + "loss": 0.6889, + "step": 25041 + }, + { + "epoch": 0.7675003064852274, + "grad_norm": 0.6726423639024923, + "learning_rate": 2.703685170476775e-06, + "loss": 0.5335, + "step": 25042 + }, + { + "epoch": 0.7675309550079686, + "grad_norm": 1.6664234753791176, + "learning_rate": 2.7030064018960144e-06, + "loss": 0.6169, + "step": 25043 + }, + { + "epoch": 0.7675616035307098, + "grad_norm": 1.63141039592218, + "learning_rate": 2.702327705214396e-06, + "loss": 0.6611, + "step": 25044 + }, + { + "epoch": 0.767592252053451, + "grad_norm": 1.757521492824501, + "learning_rate": 2.701649080438613e-06, + "loss": 0.6873, + "step": 25045 + }, + { + "epoch": 0.7676229005761922, + "grad_norm": 1.6391793725849972, + "learning_rate": 2.700970527575345e-06, + "loss": 0.581, + "step": 25046 + }, + { + "epoch": 0.7676535490989335, + "grad_norm": 1.8443396265094143, + "learning_rate": 2.700292046631282e-06, + "loss": 0.6395, + "step": 25047 + }, + { + "epoch": 0.7676841976216746, + "grad_norm": 1.604574606606986, + "learning_rate": 2.699613637613113e-06, + "loss": 0.6715, + "step": 25048 + }, + { + "epoch": 0.7677148461444159, + "grad_norm": 1.6746461338871228, + "learning_rate": 2.6989353005275154e-06, + "loss": 0.6669, + "step": 25049 + }, + { + "epoch": 0.767745494667157, + "grad_norm": 1.6147063955858871, + "learning_rate": 2.6982570353811764e-06, + "loss": 0.6813, + "step": 25050 + }, + { + "epoch": 0.7677761431898983, + "grad_norm": 0.6517784900814818, + "learning_rate": 2.6975788421807813e-06, + "loss": 0.5182, + "step": 25051 + }, + { + "epoch": 0.7678067917126394, + "grad_norm": 1.9342572525396644, + "learning_rate": 2.6969007209330077e-06, + "loss": 0.6477, + "step": 25052 + }, + { + "epoch": 0.7678374402353807, + "grad_norm": 0.6511274896765678, + "learning_rate": 2.6962226716445437e-06, + "loss": 0.4953, + "step": 25053 + }, + { + "epoch": 0.7678680887581218, + "grad_norm": 0.6395609715236781, + "learning_rate": 2.695544694322063e-06, + "loss": 0.5229, + "step": 25054 + }, + { + "epoch": 0.7678987372808631, + "grad_norm": 1.7209169109256108, + "learning_rate": 2.694866788972249e-06, + "loss": 0.7346, + "step": 25055 + }, + { + "epoch": 0.7679293858036043, + "grad_norm": 1.7939988853136983, + "learning_rate": 2.694188955601784e-06, + "loss": 0.7087, + "step": 25056 + }, + { + "epoch": 0.7679600343263455, + "grad_norm": 1.560243602141352, + "learning_rate": 2.693511194217343e-06, + "loss": 0.6047, + "step": 25057 + }, + { + "epoch": 0.7679906828490867, + "grad_norm": 1.6758969840673592, + "learning_rate": 2.692833504825605e-06, + "loss": 0.6394, + "step": 25058 + }, + { + "epoch": 0.7680213313718279, + "grad_norm": 1.8257113656878163, + "learning_rate": 2.692155887433251e-06, + "loss": 0.7156, + "step": 25059 + }, + { + "epoch": 0.7680519798945691, + "grad_norm": 1.725597567797289, + "learning_rate": 2.6914783420469514e-06, + "loss": 0.6752, + "step": 25060 + }, + { + "epoch": 0.7680826284173102, + "grad_norm": 1.6921201952722713, + "learning_rate": 2.6908008686733864e-06, + "loss": 0.6636, + "step": 25061 + }, + { + "epoch": 0.7681132769400515, + "grad_norm": 1.678357557856749, + "learning_rate": 2.690123467319233e-06, + "loss": 0.691, + "step": 25062 + }, + { + "epoch": 0.7681439254627926, + "grad_norm": 1.6464030030396726, + "learning_rate": 2.6894461379911607e-06, + "loss": 0.6789, + "step": 25063 + }, + { + "epoch": 0.7681745739855339, + "grad_norm": 2.054738402713889, + "learning_rate": 2.6887688806958488e-06, + "loss": 0.6976, + "step": 25064 + }, + { + "epoch": 0.768205222508275, + "grad_norm": 1.5461303588631132, + "learning_rate": 2.6880916954399627e-06, + "loss": 0.6854, + "step": 25065 + }, + { + "epoch": 0.7682358710310163, + "grad_norm": 1.6238829156307562, + "learning_rate": 2.6874145822301855e-06, + "loss": 0.5913, + "step": 25066 + }, + { + "epoch": 0.7682665195537575, + "grad_norm": 1.467997709492943, + "learning_rate": 2.6867375410731834e-06, + "loss": 0.5508, + "step": 25067 + }, + { + "epoch": 0.7682971680764987, + "grad_norm": 1.5961462169877323, + "learning_rate": 2.6860605719756238e-06, + "loss": 0.7208, + "step": 25068 + }, + { + "epoch": 0.7683278165992399, + "grad_norm": 1.7745606077702187, + "learning_rate": 2.685383674944181e-06, + "loss": 0.6666, + "step": 25069 + }, + { + "epoch": 0.7683584651219811, + "grad_norm": 1.8405116050616148, + "learning_rate": 2.684706849985528e-06, + "loss": 0.6764, + "step": 25070 + }, + { + "epoch": 0.7683891136447223, + "grad_norm": 1.7121393629102173, + "learning_rate": 2.684030097106326e-06, + "loss": 0.6035, + "step": 25071 + }, + { + "epoch": 0.7684197621674635, + "grad_norm": 1.7874881202639543, + "learning_rate": 2.683353416313249e-06, + "loss": 0.6893, + "step": 25072 + }, + { + "epoch": 0.7684504106902047, + "grad_norm": 1.591977405828768, + "learning_rate": 2.682676807612965e-06, + "loss": 0.6354, + "step": 25073 + }, + { + "epoch": 0.768481059212946, + "grad_norm": 0.65963595267423, + "learning_rate": 2.682000271012135e-06, + "loss": 0.5158, + "step": 25074 + }, + { + "epoch": 0.7685117077356871, + "grad_norm": 1.5448685260327772, + "learning_rate": 2.681323806517432e-06, + "loss": 0.6386, + "step": 25075 + }, + { + "epoch": 0.7685423562584284, + "grad_norm": 1.5657285438579196, + "learning_rate": 2.680647414135512e-06, + "loss": 0.6483, + "step": 25076 + }, + { + "epoch": 0.7685730047811695, + "grad_norm": 1.7100150384694384, + "learning_rate": 2.6799710938730528e-06, + "loss": 0.6541, + "step": 25077 + }, + { + "epoch": 0.7686036533039108, + "grad_norm": 1.6486308101931733, + "learning_rate": 2.679294845736711e-06, + "loss": 0.6162, + "step": 25078 + }, + { + "epoch": 0.7686343018266519, + "grad_norm": 0.6933272258209832, + "learning_rate": 2.6786186697331463e-06, + "loss": 0.5152, + "step": 25079 + }, + { + "epoch": 0.7686649503493932, + "grad_norm": 1.6805896900902855, + "learning_rate": 2.677942565869026e-06, + "loss": 0.6554, + "step": 25080 + }, + { + "epoch": 0.7686955988721343, + "grad_norm": 1.8702934711003134, + "learning_rate": 2.677266534151013e-06, + "loss": 0.6644, + "step": 25081 + }, + { + "epoch": 0.7687262473948756, + "grad_norm": 1.748475037235264, + "learning_rate": 2.6765905745857646e-06, + "loss": 0.7198, + "step": 25082 + }, + { + "epoch": 0.7687568959176168, + "grad_norm": 1.5369544188284943, + "learning_rate": 2.6759146871799425e-06, + "loss": 0.6269, + "step": 25083 + }, + { + "epoch": 0.768787544440358, + "grad_norm": 1.6072505894490203, + "learning_rate": 2.675238871940207e-06, + "loss": 0.6057, + "step": 25084 + }, + { + "epoch": 0.7688181929630992, + "grad_norm": 0.9465072805471408, + "learning_rate": 2.67456312887322e-06, + "loss": 0.4945, + "step": 25085 + }, + { + "epoch": 0.7688488414858404, + "grad_norm": 1.4005807689008636, + "learning_rate": 2.673887457985637e-06, + "loss": 0.6932, + "step": 25086 + }, + { + "epoch": 0.7688794900085816, + "grad_norm": 0.68035918607428, + "learning_rate": 2.673211859284112e-06, + "loss": 0.5089, + "step": 25087 + }, + { + "epoch": 0.7689101385313228, + "grad_norm": 1.563529705450103, + "learning_rate": 2.6725363327753053e-06, + "loss": 0.5791, + "step": 25088 + }, + { + "epoch": 0.768940787054064, + "grad_norm": 1.8404295670566628, + "learning_rate": 2.671860878465875e-06, + "loss": 0.7177, + "step": 25089 + }, + { + "epoch": 0.7689714355768053, + "grad_norm": 1.4948488632543693, + "learning_rate": 2.6711854963624728e-06, + "loss": 0.5901, + "step": 25090 + }, + { + "epoch": 0.7690020840995464, + "grad_norm": 1.6312206045878765, + "learning_rate": 2.670510186471754e-06, + "loss": 0.5905, + "step": 25091 + }, + { + "epoch": 0.7690327326222876, + "grad_norm": 1.647972839946551, + "learning_rate": 2.669834948800375e-06, + "loss": 0.7777, + "step": 25092 + }, + { + "epoch": 0.7690633811450288, + "grad_norm": 0.6595067713903032, + "learning_rate": 2.66915978335499e-06, + "loss": 0.506, + "step": 25093 + }, + { + "epoch": 0.76909402966777, + "grad_norm": 1.750286380942677, + "learning_rate": 2.668484690142249e-06, + "loss": 0.627, + "step": 25094 + }, + { + "epoch": 0.7691246781905112, + "grad_norm": 0.714601665675928, + "learning_rate": 2.6678096691687983e-06, + "loss": 0.5549, + "step": 25095 + }, + { + "epoch": 0.7691553267132524, + "grad_norm": 1.4123635092143856, + "learning_rate": 2.667134720441301e-06, + "loss": 0.6266, + "step": 25096 + }, + { + "epoch": 0.7691859752359936, + "grad_norm": 1.6415133691731463, + "learning_rate": 2.6664598439664023e-06, + "loss": 0.59, + "step": 25097 + }, + { + "epoch": 0.7692166237587348, + "grad_norm": 1.608286218341482, + "learning_rate": 2.6657850397507477e-06, + "loss": 0.7038, + "step": 25098 + }, + { + "epoch": 0.769247272281476, + "grad_norm": 1.6978855618918116, + "learning_rate": 2.66511030780099e-06, + "loss": 0.6031, + "step": 25099 + }, + { + "epoch": 0.7692779208042172, + "grad_norm": 1.5811127942891308, + "learning_rate": 2.6644356481237786e-06, + "loss": 0.5758, + "step": 25100 + }, + { + "epoch": 0.7693085693269585, + "grad_norm": 1.6159537261829657, + "learning_rate": 2.663761060725758e-06, + "loss": 0.6166, + "step": 25101 + }, + { + "epoch": 0.7693392178496996, + "grad_norm": 0.696950488323829, + "learning_rate": 2.663086545613577e-06, + "loss": 0.5606, + "step": 25102 + }, + { + "epoch": 0.7693698663724409, + "grad_norm": 1.5843049137691039, + "learning_rate": 2.6624121027938797e-06, + "loss": 0.6172, + "step": 25103 + }, + { + "epoch": 0.769400514895182, + "grad_norm": 1.4483945197713233, + "learning_rate": 2.6617377322733184e-06, + "loss": 0.5539, + "step": 25104 + }, + { + "epoch": 0.7694311634179233, + "grad_norm": 1.5268952624786394, + "learning_rate": 2.6610634340585316e-06, + "loss": 0.672, + "step": 25105 + }, + { + "epoch": 0.7694618119406644, + "grad_norm": 1.4686518102023502, + "learning_rate": 2.6603892081561588e-06, + "loss": 0.5627, + "step": 25106 + }, + { + "epoch": 0.7694924604634057, + "grad_norm": 1.9667022415443034, + "learning_rate": 2.6597150545728555e-06, + "loss": 0.7199, + "step": 25107 + }, + { + "epoch": 0.7695231089861468, + "grad_norm": 0.6387689303839353, + "learning_rate": 2.6590409733152578e-06, + "loss": 0.5452, + "step": 25108 + }, + { + "epoch": 0.7695537575088881, + "grad_norm": 1.7459933486336994, + "learning_rate": 2.6583669643900035e-06, + "loss": 0.6227, + "step": 25109 + }, + { + "epoch": 0.7695844060316293, + "grad_norm": 1.5427962663649062, + "learning_rate": 2.657693027803739e-06, + "loss": 0.7129, + "step": 25110 + }, + { + "epoch": 0.7696150545543705, + "grad_norm": 0.6512223981241394, + "learning_rate": 2.6570191635631036e-06, + "loss": 0.5098, + "step": 25111 + }, + { + "epoch": 0.7696457030771117, + "grad_norm": 1.6693354900512798, + "learning_rate": 2.65634537167474e-06, + "loss": 0.6205, + "step": 25112 + }, + { + "epoch": 0.7696763515998529, + "grad_norm": 1.5898444794397164, + "learning_rate": 2.6556716521452817e-06, + "loss": 0.7085, + "step": 25113 + }, + { + "epoch": 0.7697070001225941, + "grad_norm": 1.7216151387989558, + "learning_rate": 2.6549980049813694e-06, + "loss": 0.6564, + "step": 25114 + }, + { + "epoch": 0.7697376486453353, + "grad_norm": 1.6422378355693765, + "learning_rate": 2.6543244301896444e-06, + "loss": 0.6366, + "step": 25115 + }, + { + "epoch": 0.7697682971680765, + "grad_norm": 0.7182295423292376, + "learning_rate": 2.65365092777674e-06, + "loss": 0.5452, + "step": 25116 + }, + { + "epoch": 0.7697989456908177, + "grad_norm": 1.5684556700054606, + "learning_rate": 2.652977497749286e-06, + "loss": 0.6524, + "step": 25117 + }, + { + "epoch": 0.7698295942135589, + "grad_norm": 0.6849446068213355, + "learning_rate": 2.6523041401139316e-06, + "loss": 0.5099, + "step": 25118 + }, + { + "epoch": 0.7698602427363002, + "grad_norm": 1.5855779116738875, + "learning_rate": 2.6516308548773005e-06, + "loss": 0.5346, + "step": 25119 + }, + { + "epoch": 0.7698908912590413, + "grad_norm": 0.6731354227339938, + "learning_rate": 2.650957642046035e-06, + "loss": 0.5385, + "step": 25120 + }, + { + "epoch": 0.7699215397817826, + "grad_norm": 1.5729839232679306, + "learning_rate": 2.650284501626761e-06, + "loss": 0.6848, + "step": 25121 + }, + { + "epoch": 0.7699521883045237, + "grad_norm": 1.4047719210110803, + "learning_rate": 2.6496114336261135e-06, + "loss": 0.5491, + "step": 25122 + }, + { + "epoch": 0.7699828368272649, + "grad_norm": 0.6817981868572558, + "learning_rate": 2.648938438050729e-06, + "loss": 0.5288, + "step": 25123 + }, + { + "epoch": 0.7700134853500061, + "grad_norm": 1.5706535341681978, + "learning_rate": 2.6482655149072313e-06, + "loss": 0.5894, + "step": 25124 + }, + { + "epoch": 0.7700441338727473, + "grad_norm": 1.707464370736348, + "learning_rate": 2.6475926642022545e-06, + "loss": 0.6308, + "step": 25125 + }, + { + "epoch": 0.7700747823954885, + "grad_norm": 1.7294762589745443, + "learning_rate": 2.6469198859424318e-06, + "loss": 0.5921, + "step": 25126 + }, + { + "epoch": 0.7701054309182297, + "grad_norm": 1.68482900784973, + "learning_rate": 2.646247180134388e-06, + "loss": 0.6792, + "step": 25127 + }, + { + "epoch": 0.770136079440971, + "grad_norm": 1.5988699742243635, + "learning_rate": 2.645574546784747e-06, + "loss": 0.6841, + "step": 25128 + }, + { + "epoch": 0.7701667279637121, + "grad_norm": 1.5730081021570017, + "learning_rate": 2.644901985900148e-06, + "loss": 0.5903, + "step": 25129 + }, + { + "epoch": 0.7701973764864534, + "grad_norm": 1.881981138465677, + "learning_rate": 2.644229497487207e-06, + "loss": 0.7403, + "step": 25130 + }, + { + "epoch": 0.7702280250091945, + "grad_norm": 1.717818414097053, + "learning_rate": 2.6435570815525603e-06, + "loss": 0.6331, + "step": 25131 + }, + { + "epoch": 0.7702586735319358, + "grad_norm": 1.689697272870392, + "learning_rate": 2.6428847381028235e-06, + "loss": 0.6076, + "step": 25132 + }, + { + "epoch": 0.7702893220546769, + "grad_norm": 1.5314701208828605, + "learning_rate": 2.6422124671446257e-06, + "loss": 0.6364, + "step": 25133 + }, + { + "epoch": 0.7703199705774182, + "grad_norm": 1.564859998494089, + "learning_rate": 2.6415402686845936e-06, + "loss": 0.6767, + "step": 25134 + }, + { + "epoch": 0.7703506191001593, + "grad_norm": 1.9036433815998177, + "learning_rate": 2.640868142729346e-06, + "loss": 0.6503, + "step": 25135 + }, + { + "epoch": 0.7703812676229006, + "grad_norm": 1.720558772200777, + "learning_rate": 2.640196089285507e-06, + "loss": 0.6738, + "step": 25136 + }, + { + "epoch": 0.7704119161456418, + "grad_norm": 1.3484616330900994, + "learning_rate": 2.6395241083597024e-06, + "loss": 0.4977, + "step": 25137 + }, + { + "epoch": 0.770442564668383, + "grad_norm": 1.599879456945573, + "learning_rate": 2.638852199958546e-06, + "loss": 0.6529, + "step": 25138 + }, + { + "epoch": 0.7704732131911242, + "grad_norm": 1.844813537747114, + "learning_rate": 2.638180364088666e-06, + "loss": 0.6397, + "step": 25139 + }, + { + "epoch": 0.7705038617138654, + "grad_norm": 1.6188222310545317, + "learning_rate": 2.6375086007566766e-06, + "loss": 0.6062, + "step": 25140 + }, + { + "epoch": 0.7705345102366066, + "grad_norm": 1.7817601538017729, + "learning_rate": 2.636836909969197e-06, + "loss": 0.6699, + "step": 25141 + }, + { + "epoch": 0.7705651587593478, + "grad_norm": 1.6775622385529336, + "learning_rate": 2.6361652917328506e-06, + "loss": 0.6599, + "step": 25142 + }, + { + "epoch": 0.770595807282089, + "grad_norm": 1.739290493023613, + "learning_rate": 2.6354937460542495e-06, + "loss": 0.6501, + "step": 25143 + }, + { + "epoch": 0.7706264558048302, + "grad_norm": 1.7925701436073478, + "learning_rate": 2.634822272940012e-06, + "loss": 0.5948, + "step": 25144 + }, + { + "epoch": 0.7706571043275714, + "grad_norm": 1.68149985881153, + "learning_rate": 2.634150872396758e-06, + "loss": 0.645, + "step": 25145 + }, + { + "epoch": 0.7706877528503127, + "grad_norm": 1.5350599468320676, + "learning_rate": 2.633479544431098e-06, + "loss": 0.6086, + "step": 25146 + }, + { + "epoch": 0.7707184013730538, + "grad_norm": 1.3546060424915103, + "learning_rate": 2.6328082890496487e-06, + "loss": 0.5006, + "step": 25147 + }, + { + "epoch": 0.7707490498957951, + "grad_norm": 0.6480550864089277, + "learning_rate": 2.6321371062590264e-06, + "loss": 0.5192, + "step": 25148 + }, + { + "epoch": 0.7707796984185362, + "grad_norm": 1.9201622185946499, + "learning_rate": 2.6314659960658407e-06, + "loss": 0.6987, + "step": 25149 + }, + { + "epoch": 0.7708103469412775, + "grad_norm": 1.5740153721254935, + "learning_rate": 2.630794958476708e-06, + "loss": 0.5854, + "step": 25150 + }, + { + "epoch": 0.7708409954640186, + "grad_norm": 1.8171129373515476, + "learning_rate": 2.6301239934982347e-06, + "loss": 0.6689, + "step": 25151 + }, + { + "epoch": 0.7708716439867599, + "grad_norm": 0.6748854932646854, + "learning_rate": 2.629453101137036e-06, + "loss": 0.5527, + "step": 25152 + }, + { + "epoch": 0.770902292509501, + "grad_norm": 0.671417222970454, + "learning_rate": 2.6287822813997243e-06, + "loss": 0.5164, + "step": 25153 + }, + { + "epoch": 0.7709329410322422, + "grad_norm": 0.693067619873647, + "learning_rate": 2.6281115342929044e-06, + "loss": 0.5371, + "step": 25154 + }, + { + "epoch": 0.7709635895549835, + "grad_norm": 1.7444422936618835, + "learning_rate": 2.627440859823187e-06, + "loss": 0.6255, + "step": 25155 + }, + { + "epoch": 0.7709942380777246, + "grad_norm": 1.6409534077109036, + "learning_rate": 2.6267702579971843e-06, + "loss": 0.6601, + "step": 25156 + }, + { + "epoch": 0.7710248866004659, + "grad_norm": 0.6771370146736172, + "learning_rate": 2.6260997288214983e-06, + "loss": 0.5295, + "step": 25157 + }, + { + "epoch": 0.771055535123207, + "grad_norm": 1.5831001259082378, + "learning_rate": 2.6254292723027374e-06, + "loss": 0.7291, + "step": 25158 + }, + { + "epoch": 0.7710861836459483, + "grad_norm": 1.962293913272931, + "learning_rate": 2.6247588884475127e-06, + "loss": 0.6331, + "step": 25159 + }, + { + "epoch": 0.7711168321686894, + "grad_norm": 1.5444356797145324, + "learning_rate": 2.6240885772624226e-06, + "loss": 0.6304, + "step": 25160 + }, + { + "epoch": 0.7711474806914307, + "grad_norm": 1.5600763413537877, + "learning_rate": 2.623418338754078e-06, + "loss": 0.645, + "step": 25161 + }, + { + "epoch": 0.7711781292141718, + "grad_norm": 1.4629755581333126, + "learning_rate": 2.622748172929076e-06, + "loss": 0.6146, + "step": 25162 + }, + { + "epoch": 0.7712087777369131, + "grad_norm": 0.663451446026902, + "learning_rate": 2.622078079794025e-06, + "loss": 0.5198, + "step": 25163 + }, + { + "epoch": 0.7712394262596542, + "grad_norm": 1.7117877635524708, + "learning_rate": 2.621408059355529e-06, + "loss": 0.674, + "step": 25164 + }, + { + "epoch": 0.7712700747823955, + "grad_norm": 1.6898996807190858, + "learning_rate": 2.6207381116201836e-06, + "loss": 0.6765, + "step": 25165 + }, + { + "epoch": 0.7713007233051367, + "grad_norm": 1.593752277939377, + "learning_rate": 2.620068236594594e-06, + "loss": 0.6176, + "step": 25166 + }, + { + "epoch": 0.7713313718278779, + "grad_norm": 1.4797671228414877, + "learning_rate": 2.619398434285364e-06, + "loss": 0.6619, + "step": 25167 + }, + { + "epoch": 0.7713620203506191, + "grad_norm": 1.5676665396312277, + "learning_rate": 2.6187287046990863e-06, + "loss": 0.5182, + "step": 25168 + }, + { + "epoch": 0.7713926688733603, + "grad_norm": 1.9081833123900815, + "learning_rate": 2.618059047842363e-06, + "loss": 0.6513, + "step": 25169 + }, + { + "epoch": 0.7714233173961015, + "grad_norm": 1.7730233418851153, + "learning_rate": 2.6173894637217954e-06, + "loss": 0.6129, + "step": 25170 + }, + { + "epoch": 0.7714539659188427, + "grad_norm": 1.6188918695079089, + "learning_rate": 2.6167199523439757e-06, + "loss": 0.71, + "step": 25171 + }, + { + "epoch": 0.7714846144415839, + "grad_norm": 1.826853008679677, + "learning_rate": 2.6160505137155067e-06, + "loss": 0.7083, + "step": 25172 + }, + { + "epoch": 0.7715152629643252, + "grad_norm": 0.6684393708288567, + "learning_rate": 2.6153811478429747e-06, + "loss": 0.528, + "step": 25173 + }, + { + "epoch": 0.7715459114870663, + "grad_norm": 1.5771661289674344, + "learning_rate": 2.6147118547329873e-06, + "loss": 0.697, + "step": 25174 + }, + { + "epoch": 0.7715765600098076, + "grad_norm": 1.8060838791312241, + "learning_rate": 2.6140426343921345e-06, + "loss": 0.7219, + "step": 25175 + }, + { + "epoch": 0.7716072085325487, + "grad_norm": 0.6885697328389648, + "learning_rate": 2.6133734868270065e-06, + "loss": 0.5106, + "step": 25176 + }, + { + "epoch": 0.77163785705529, + "grad_norm": 0.6797100120890116, + "learning_rate": 2.612704412044199e-06, + "loss": 0.553, + "step": 25177 + }, + { + "epoch": 0.7716685055780311, + "grad_norm": 1.4224907911284324, + "learning_rate": 2.6120354100503075e-06, + "loss": 0.6123, + "step": 25178 + }, + { + "epoch": 0.7716991541007724, + "grad_norm": 1.586001323978827, + "learning_rate": 2.611366480851919e-06, + "loss": 0.6558, + "step": 25179 + }, + { + "epoch": 0.7717298026235135, + "grad_norm": 1.4143086158010267, + "learning_rate": 2.610697624455627e-06, + "loss": 0.6613, + "step": 25180 + }, + { + "epoch": 0.7717604511462548, + "grad_norm": 1.7027850711763608, + "learning_rate": 2.6100288408680254e-06, + "loss": 0.6359, + "step": 25181 + }, + { + "epoch": 0.771791099668996, + "grad_norm": 1.7195154374182176, + "learning_rate": 2.6093601300956973e-06, + "loss": 0.6379, + "step": 25182 + }, + { + "epoch": 0.7718217481917372, + "grad_norm": 1.662720646609982, + "learning_rate": 2.608691492145238e-06, + "loss": 0.6236, + "step": 25183 + }, + { + "epoch": 0.7718523967144784, + "grad_norm": 1.7698101893255993, + "learning_rate": 2.6080229270232283e-06, + "loss": 0.6766, + "step": 25184 + }, + { + "epoch": 0.7718830452372195, + "grad_norm": 0.6380398451408991, + "learning_rate": 2.6073544347362613e-06, + "loss": 0.516, + "step": 25185 + }, + { + "epoch": 0.7719136937599608, + "grad_norm": 1.5714912668453556, + "learning_rate": 2.6066860152909246e-06, + "loss": 0.6079, + "step": 25186 + }, + { + "epoch": 0.7719443422827019, + "grad_norm": 1.7276801539240316, + "learning_rate": 2.6060176686938e-06, + "loss": 0.705, + "step": 25187 + }, + { + "epoch": 0.7719749908054432, + "grad_norm": 1.6022711100723777, + "learning_rate": 2.605349394951475e-06, + "loss": 0.7066, + "step": 25188 + }, + { + "epoch": 0.7720056393281843, + "grad_norm": 0.6680904042340332, + "learning_rate": 2.6046811940705375e-06, + "loss": 0.5458, + "step": 25189 + }, + { + "epoch": 0.7720362878509256, + "grad_norm": 1.7127769359710607, + "learning_rate": 2.6040130660575645e-06, + "loss": 0.6904, + "step": 25190 + }, + { + "epoch": 0.7720669363736667, + "grad_norm": 0.6497792756752994, + "learning_rate": 2.6033450109191474e-06, + "loss": 0.5172, + "step": 25191 + }, + { + "epoch": 0.772097584896408, + "grad_norm": 1.754541552629107, + "learning_rate": 2.6026770286618573e-06, + "loss": 0.6247, + "step": 25192 + }, + { + "epoch": 0.7721282334191492, + "grad_norm": 0.6789712769030608, + "learning_rate": 2.6020091192922903e-06, + "loss": 0.5319, + "step": 25193 + }, + { + "epoch": 0.7721588819418904, + "grad_norm": 0.6892608587921086, + "learning_rate": 2.601341282817019e-06, + "loss": 0.515, + "step": 25194 + }, + { + "epoch": 0.7721895304646316, + "grad_norm": 0.6826194857316011, + "learning_rate": 2.6006735192426225e-06, + "loss": 0.5139, + "step": 25195 + }, + { + "epoch": 0.7722201789873728, + "grad_norm": 0.6614374944505589, + "learning_rate": 2.6000058285756835e-06, + "loss": 0.5349, + "step": 25196 + }, + { + "epoch": 0.772250827510114, + "grad_norm": 1.7526417091409896, + "learning_rate": 2.5993382108227826e-06, + "loss": 0.8073, + "step": 25197 + }, + { + "epoch": 0.7722814760328552, + "grad_norm": 1.8110456570757136, + "learning_rate": 2.5986706659904936e-06, + "loss": 0.6525, + "step": 25198 + }, + { + "epoch": 0.7723121245555964, + "grad_norm": 1.3961389781848776, + "learning_rate": 2.598003194085397e-06, + "loss": 0.5621, + "step": 25199 + }, + { + "epoch": 0.7723427730783377, + "grad_norm": 1.5361824864316869, + "learning_rate": 2.59733579511407e-06, + "loss": 0.6633, + "step": 25200 + }, + { + "epoch": 0.7723734216010788, + "grad_norm": 1.827179079903136, + "learning_rate": 2.596668469083086e-06, + "loss": 0.6037, + "step": 25201 + }, + { + "epoch": 0.7724040701238201, + "grad_norm": 1.750764447336766, + "learning_rate": 2.5960012159990233e-06, + "loss": 0.7687, + "step": 25202 + }, + { + "epoch": 0.7724347186465612, + "grad_norm": 1.7464246838611461, + "learning_rate": 2.5953340358684496e-06, + "loss": 0.6697, + "step": 25203 + }, + { + "epoch": 0.7724653671693025, + "grad_norm": 1.758409520034155, + "learning_rate": 2.5946669286979507e-06, + "loss": 0.6225, + "step": 25204 + }, + { + "epoch": 0.7724960156920436, + "grad_norm": 1.7568492448675983, + "learning_rate": 2.5939998944940937e-06, + "loss": 0.7118, + "step": 25205 + }, + { + "epoch": 0.7725266642147849, + "grad_norm": 1.9114912590393347, + "learning_rate": 2.5933329332634473e-06, + "loss": 0.6589, + "step": 25206 + }, + { + "epoch": 0.772557312737526, + "grad_norm": 0.6706948667674388, + "learning_rate": 2.592666045012585e-06, + "loss": 0.5241, + "step": 25207 + }, + { + "epoch": 0.7725879612602673, + "grad_norm": 1.526719078605571, + "learning_rate": 2.5919992297480847e-06, + "loss": 0.5694, + "step": 25208 + }, + { + "epoch": 0.7726186097830084, + "grad_norm": 1.5019876400615977, + "learning_rate": 2.5913324874765067e-06, + "loss": 0.6282, + "step": 25209 + }, + { + "epoch": 0.7726492583057497, + "grad_norm": 1.6263280368940423, + "learning_rate": 2.5906658182044262e-06, + "loss": 0.7025, + "step": 25210 + }, + { + "epoch": 0.7726799068284909, + "grad_norm": 1.6247524097974113, + "learning_rate": 2.5899992219384107e-06, + "loss": 0.6707, + "step": 25211 + }, + { + "epoch": 0.7727105553512321, + "grad_norm": 0.6742193141992893, + "learning_rate": 2.589332698685032e-06, + "loss": 0.5468, + "step": 25212 + }, + { + "epoch": 0.7727412038739733, + "grad_norm": 1.5740979414255813, + "learning_rate": 2.588666248450854e-06, + "loss": 0.6868, + "step": 25213 + }, + { + "epoch": 0.7727718523967145, + "grad_norm": 1.6311356728384663, + "learning_rate": 2.5879998712424383e-06, + "loss": 0.7019, + "step": 25214 + }, + { + "epoch": 0.7728025009194557, + "grad_norm": 1.6952237001827244, + "learning_rate": 2.5873335670663626e-06, + "loss": 0.6452, + "step": 25215 + }, + { + "epoch": 0.7728331494421968, + "grad_norm": 1.596361208151069, + "learning_rate": 2.586667335929185e-06, + "loss": 0.6055, + "step": 25216 + }, + { + "epoch": 0.7728637979649381, + "grad_norm": 1.6584005025632504, + "learning_rate": 2.5860011778374685e-06, + "loss": 0.5452, + "step": 25217 + }, + { + "epoch": 0.7728944464876792, + "grad_norm": 1.7177874758420848, + "learning_rate": 2.5853350927977795e-06, + "loss": 0.6279, + "step": 25218 + }, + { + "epoch": 0.7729250950104205, + "grad_norm": 0.6676488064061683, + "learning_rate": 2.5846690808166796e-06, + "loss": 0.5117, + "step": 25219 + }, + { + "epoch": 0.7729557435331617, + "grad_norm": 1.7409929544250222, + "learning_rate": 2.5840031419007374e-06, + "loss": 0.6726, + "step": 25220 + }, + { + "epoch": 0.7729863920559029, + "grad_norm": 0.6511679134868809, + "learning_rate": 2.5833372760565056e-06, + "loss": 0.5108, + "step": 25221 + }, + { + "epoch": 0.7730170405786441, + "grad_norm": 1.4138540181477923, + "learning_rate": 2.58267148329055e-06, + "loss": 0.6355, + "step": 25222 + }, + { + "epoch": 0.7730476891013853, + "grad_norm": 1.662637393646837, + "learning_rate": 2.582005763609432e-06, + "loss": 0.6009, + "step": 25223 + }, + { + "epoch": 0.7730783376241265, + "grad_norm": 1.5737070575682284, + "learning_rate": 2.5813401170197095e-06, + "loss": 0.5617, + "step": 25224 + }, + { + "epoch": 0.7731089861468677, + "grad_norm": 1.6134666009667455, + "learning_rate": 2.5806745435279355e-06, + "loss": 0.6448, + "step": 25225 + }, + { + "epoch": 0.7731396346696089, + "grad_norm": 1.6110636578477402, + "learning_rate": 2.5800090431406788e-06, + "loss": 0.6025, + "step": 25226 + }, + { + "epoch": 0.7731702831923501, + "grad_norm": 1.7717544159787801, + "learning_rate": 2.5793436158644924e-06, + "loss": 0.7526, + "step": 25227 + }, + { + "epoch": 0.7732009317150913, + "grad_norm": 1.5523024253400757, + "learning_rate": 2.578678261705928e-06, + "loss": 0.6718, + "step": 25228 + }, + { + "epoch": 0.7732315802378326, + "grad_norm": 1.6148017696708563, + "learning_rate": 2.5780129806715457e-06, + "loss": 0.7079, + "step": 25229 + }, + { + "epoch": 0.7732622287605737, + "grad_norm": 1.864314077165091, + "learning_rate": 2.577347772767902e-06, + "loss": 0.6716, + "step": 25230 + }, + { + "epoch": 0.773292877283315, + "grad_norm": 1.6368974479644962, + "learning_rate": 2.5766826380015507e-06, + "loss": 0.7188, + "step": 25231 + }, + { + "epoch": 0.7733235258060561, + "grad_norm": 1.8120977940669287, + "learning_rate": 2.576017576379043e-06, + "loss": 0.7538, + "step": 25232 + }, + { + "epoch": 0.7733541743287974, + "grad_norm": 1.647353100286319, + "learning_rate": 2.575352587906933e-06, + "loss": 0.6986, + "step": 25233 + }, + { + "epoch": 0.7733848228515385, + "grad_norm": 1.9281938111820254, + "learning_rate": 2.574687672591777e-06, + "loss": 0.6636, + "step": 25234 + }, + { + "epoch": 0.7734154713742798, + "grad_norm": 1.905767264879505, + "learning_rate": 2.5740228304401237e-06, + "loss": 0.7116, + "step": 25235 + }, + { + "epoch": 0.773446119897021, + "grad_norm": 1.5943889754286176, + "learning_rate": 2.5733580614585197e-06, + "loss": 0.6637, + "step": 25236 + }, + { + "epoch": 0.7734767684197622, + "grad_norm": 1.742435640771592, + "learning_rate": 2.5726933656535193e-06, + "loss": 0.636, + "step": 25237 + }, + { + "epoch": 0.7735074169425034, + "grad_norm": 0.681063629749853, + "learning_rate": 2.5720287430316717e-06, + "loss": 0.5347, + "step": 25238 + }, + { + "epoch": 0.7735380654652446, + "grad_norm": 1.6144556001938744, + "learning_rate": 2.5713641935995283e-06, + "loss": 0.6609, + "step": 25239 + }, + { + "epoch": 0.7735687139879858, + "grad_norm": 1.6998351352234191, + "learning_rate": 2.5706997173636308e-06, + "loss": 0.8311, + "step": 25240 + }, + { + "epoch": 0.773599362510727, + "grad_norm": 1.5260922391657927, + "learning_rate": 2.57003531433053e-06, + "loss": 0.6408, + "step": 25241 + }, + { + "epoch": 0.7736300110334682, + "grad_norm": 1.7993925787124083, + "learning_rate": 2.569370984506775e-06, + "loss": 0.6082, + "step": 25242 + }, + { + "epoch": 0.7736606595562094, + "grad_norm": 1.7004445806557908, + "learning_rate": 2.56870672789891e-06, + "loss": 0.5836, + "step": 25243 + }, + { + "epoch": 0.7736913080789506, + "grad_norm": 1.768621968944887, + "learning_rate": 2.5680425445134718e-06, + "loss": 0.701, + "step": 25244 + }, + { + "epoch": 0.7737219566016919, + "grad_norm": 1.5800737822563982, + "learning_rate": 2.5673784343570186e-06, + "loss": 0.6428, + "step": 25245 + }, + { + "epoch": 0.773752605124433, + "grad_norm": 1.7602180441758917, + "learning_rate": 2.5667143974360843e-06, + "loss": 0.6538, + "step": 25246 + }, + { + "epoch": 0.7737832536471742, + "grad_norm": 1.8071212292675976, + "learning_rate": 2.5660504337572178e-06, + "loss": 0.7596, + "step": 25247 + }, + { + "epoch": 0.7738139021699154, + "grad_norm": 1.6406442708528002, + "learning_rate": 2.565386543326955e-06, + "loss": 0.6066, + "step": 25248 + }, + { + "epoch": 0.7738445506926566, + "grad_norm": 1.67969583225448, + "learning_rate": 2.5647227261518415e-06, + "loss": 0.6734, + "step": 25249 + }, + { + "epoch": 0.7738751992153978, + "grad_norm": 1.6652190713511823, + "learning_rate": 2.5640589822384197e-06, + "loss": 0.6329, + "step": 25250 + }, + { + "epoch": 0.773905847738139, + "grad_norm": 1.7585588272392436, + "learning_rate": 2.5633953115932254e-06, + "loss": 0.6075, + "step": 25251 + }, + { + "epoch": 0.7739364962608802, + "grad_norm": 1.5746448037117606, + "learning_rate": 2.5627317142227994e-06, + "loss": 0.6037, + "step": 25252 + }, + { + "epoch": 0.7739671447836214, + "grad_norm": 0.6793090431516652, + "learning_rate": 2.562068190133683e-06, + "loss": 0.5445, + "step": 25253 + }, + { + "epoch": 0.7739977933063626, + "grad_norm": 1.4915627480611167, + "learning_rate": 2.5614047393324127e-06, + "loss": 0.6023, + "step": 25254 + }, + { + "epoch": 0.7740284418291038, + "grad_norm": 1.7609375015989555, + "learning_rate": 2.560741361825518e-06, + "loss": 0.6856, + "step": 25255 + }, + { + "epoch": 0.7740590903518451, + "grad_norm": 1.521568593176724, + "learning_rate": 2.5600780576195485e-06, + "loss": 0.6692, + "step": 25256 + }, + { + "epoch": 0.7740897388745862, + "grad_norm": 0.6521510716444332, + "learning_rate": 2.5594148267210307e-06, + "loss": 0.4867, + "step": 25257 + }, + { + "epoch": 0.7741203873973275, + "grad_norm": 1.8792971556271432, + "learning_rate": 2.5587516691365043e-06, + "loss": 0.6162, + "step": 25258 + }, + { + "epoch": 0.7741510359200686, + "grad_norm": 1.7304954785086266, + "learning_rate": 2.5580885848725e-06, + "loss": 0.5619, + "step": 25259 + }, + { + "epoch": 0.7741816844428099, + "grad_norm": 1.6824031332201672, + "learning_rate": 2.5574255739355523e-06, + "loss": 0.6676, + "step": 25260 + }, + { + "epoch": 0.774212332965551, + "grad_norm": 1.635191175684177, + "learning_rate": 2.5567626363321972e-06, + "loss": 0.5356, + "step": 25261 + }, + { + "epoch": 0.7742429814882923, + "grad_norm": 1.6959347339338284, + "learning_rate": 2.556099772068963e-06, + "loss": 0.6324, + "step": 25262 + }, + { + "epoch": 0.7742736300110334, + "grad_norm": 1.4771487460545378, + "learning_rate": 2.5554369811523803e-06, + "loss": 0.6036, + "step": 25263 + }, + { + "epoch": 0.7743042785337747, + "grad_norm": 1.7539476289821156, + "learning_rate": 2.554774263588986e-06, + "loss": 0.6578, + "step": 25264 + }, + { + "epoch": 0.7743349270565159, + "grad_norm": 1.908337951292504, + "learning_rate": 2.5541116193853023e-06, + "loss": 0.5982, + "step": 25265 + }, + { + "epoch": 0.7743655755792571, + "grad_norm": 1.6621172947580438, + "learning_rate": 2.5534490485478626e-06, + "loss": 0.6169, + "step": 25266 + }, + { + "epoch": 0.7743962241019983, + "grad_norm": 1.6223038934905494, + "learning_rate": 2.5527865510831972e-06, + "loss": 0.5732, + "step": 25267 + }, + { + "epoch": 0.7744268726247395, + "grad_norm": 1.4791801300875878, + "learning_rate": 2.5521241269978283e-06, + "loss": 0.7063, + "step": 25268 + }, + { + "epoch": 0.7744575211474807, + "grad_norm": 1.735911873488686, + "learning_rate": 2.5514617762982897e-06, + "loss": 0.6723, + "step": 25269 + }, + { + "epoch": 0.7744881696702219, + "grad_norm": 1.5633695997886254, + "learning_rate": 2.5507994989911e-06, + "loss": 0.61, + "step": 25270 + }, + { + "epoch": 0.7745188181929631, + "grad_norm": 1.571489807573166, + "learning_rate": 2.5501372950827897e-06, + "loss": 0.7159, + "step": 25271 + }, + { + "epoch": 0.7745494667157043, + "grad_norm": 1.794529455076981, + "learning_rate": 2.5494751645798843e-06, + "loss": 0.6342, + "step": 25272 + }, + { + "epoch": 0.7745801152384455, + "grad_norm": 1.523614167771446, + "learning_rate": 2.5488131074889043e-06, + "loss": 0.5893, + "step": 25273 + }, + { + "epoch": 0.7746107637611868, + "grad_norm": 1.8016920893792874, + "learning_rate": 2.5481511238163757e-06, + "loss": 0.7141, + "step": 25274 + }, + { + "epoch": 0.7746414122839279, + "grad_norm": 1.8916395924877227, + "learning_rate": 2.547489213568823e-06, + "loss": 0.6956, + "step": 25275 + }, + { + "epoch": 0.7746720608066692, + "grad_norm": 1.5644986738283007, + "learning_rate": 2.5468273767527642e-06, + "loss": 0.5614, + "step": 25276 + }, + { + "epoch": 0.7747027093294103, + "grad_norm": 1.5851150300101537, + "learning_rate": 2.5461656133747206e-06, + "loss": 0.6855, + "step": 25277 + }, + { + "epoch": 0.7747333578521515, + "grad_norm": 0.6558104881094207, + "learning_rate": 2.545503923441218e-06, + "loss": 0.487, + "step": 25278 + }, + { + "epoch": 0.7747640063748927, + "grad_norm": 1.816378567554724, + "learning_rate": 2.5448423069587703e-06, + "loss": 0.6732, + "step": 25279 + }, + { + "epoch": 0.7747946548976339, + "grad_norm": 1.7499739198351996, + "learning_rate": 2.544180763933901e-06, + "loss": 0.6889, + "step": 25280 + }, + { + "epoch": 0.7748253034203751, + "grad_norm": 1.6447492852520587, + "learning_rate": 2.5435192943731237e-06, + "loss": 0.6275, + "step": 25281 + }, + { + "epoch": 0.7748559519431163, + "grad_norm": 1.7957035731032707, + "learning_rate": 2.542857898282958e-06, + "loss": 0.6958, + "step": 25282 + }, + { + "epoch": 0.7748866004658576, + "grad_norm": 1.9468827703478095, + "learning_rate": 2.5421965756699242e-06, + "loss": 0.656, + "step": 25283 + }, + { + "epoch": 0.7749172489885987, + "grad_norm": 1.7229734275632687, + "learning_rate": 2.541535326540533e-06, + "loss": 0.581, + "step": 25284 + }, + { + "epoch": 0.77494789751134, + "grad_norm": 1.8426457833068002, + "learning_rate": 2.5408741509013033e-06, + "loss": 0.6552, + "step": 25285 + }, + { + "epoch": 0.7749785460340811, + "grad_norm": 1.6813930663577752, + "learning_rate": 2.540213048758752e-06, + "loss": 0.6711, + "step": 25286 + }, + { + "epoch": 0.7750091945568224, + "grad_norm": 1.6687461278868527, + "learning_rate": 2.5395520201193857e-06, + "loss": 0.5843, + "step": 25287 + }, + { + "epoch": 0.7750398430795635, + "grad_norm": 1.5873988646375166, + "learning_rate": 2.538891064989727e-06, + "loss": 0.6734, + "step": 25288 + }, + { + "epoch": 0.7750704916023048, + "grad_norm": 1.7427451727354408, + "learning_rate": 2.53823018337628e-06, + "loss": 0.6907, + "step": 25289 + }, + { + "epoch": 0.7751011401250459, + "grad_norm": 1.5476958080675414, + "learning_rate": 2.5375693752855603e-06, + "loss": 0.5732, + "step": 25290 + }, + { + "epoch": 0.7751317886477872, + "grad_norm": 1.4239964510717638, + "learning_rate": 2.5369086407240804e-06, + "loss": 0.4997, + "step": 25291 + }, + { + "epoch": 0.7751624371705284, + "grad_norm": 1.9141419139173848, + "learning_rate": 2.5362479796983486e-06, + "loss": 0.7776, + "step": 25292 + }, + { + "epoch": 0.7751930856932696, + "grad_norm": 1.6627167768028275, + "learning_rate": 2.535587392214873e-06, + "loss": 0.7138, + "step": 25293 + }, + { + "epoch": 0.7752237342160108, + "grad_norm": 1.6894753940823546, + "learning_rate": 2.5349268782801697e-06, + "loss": 0.6266, + "step": 25294 + }, + { + "epoch": 0.775254382738752, + "grad_norm": 1.675109874945071, + "learning_rate": 2.5342664379007375e-06, + "loss": 0.5843, + "step": 25295 + }, + { + "epoch": 0.7752850312614932, + "grad_norm": 1.6578246969078059, + "learning_rate": 2.533606071083089e-06, + "loss": 0.6732, + "step": 25296 + }, + { + "epoch": 0.7753156797842344, + "grad_norm": 1.5693665200272946, + "learning_rate": 2.532945777833732e-06, + "loss": 0.5915, + "step": 25297 + }, + { + "epoch": 0.7753463283069756, + "grad_norm": 0.6648052423106099, + "learning_rate": 2.5322855581591687e-06, + "loss": 0.5478, + "step": 25298 + }, + { + "epoch": 0.7753769768297168, + "grad_norm": 1.7300030623002352, + "learning_rate": 2.53162541206591e-06, + "loss": 0.5791, + "step": 25299 + }, + { + "epoch": 0.775407625352458, + "grad_norm": 0.6517566466929261, + "learning_rate": 2.5309653395604505e-06, + "loss": 0.5125, + "step": 25300 + }, + { + "epoch": 0.7754382738751993, + "grad_norm": 1.696649193583042, + "learning_rate": 2.5303053406493063e-06, + "loss": 0.6094, + "step": 25301 + }, + { + "epoch": 0.7754689223979404, + "grad_norm": 1.6977361479282918, + "learning_rate": 2.529645415338975e-06, + "loss": 0.4848, + "step": 25302 + }, + { + "epoch": 0.7754995709206817, + "grad_norm": 1.6282782394223934, + "learning_rate": 2.528985563635955e-06, + "loss": 0.6249, + "step": 25303 + }, + { + "epoch": 0.7755302194434228, + "grad_norm": 0.6865052380320504, + "learning_rate": 2.5283257855467537e-06, + "loss": 0.5294, + "step": 25304 + }, + { + "epoch": 0.7755608679661641, + "grad_norm": 1.4473392579116098, + "learning_rate": 2.5276660810778708e-06, + "loss": 0.6054, + "step": 25305 + }, + { + "epoch": 0.7755915164889052, + "grad_norm": 0.6490207623976514, + "learning_rate": 2.527006450235805e-06, + "loss": 0.5099, + "step": 25306 + }, + { + "epoch": 0.7756221650116465, + "grad_norm": 0.647785216335446, + "learning_rate": 2.526346893027055e-06, + "loss": 0.4852, + "step": 25307 + }, + { + "epoch": 0.7756528135343876, + "grad_norm": 1.6225295959409638, + "learning_rate": 2.525687409458125e-06, + "loss": 0.6092, + "step": 25308 + }, + { + "epoch": 0.7756834620571288, + "grad_norm": 1.5722624442312954, + "learning_rate": 2.5250279995355065e-06, + "loss": 0.5852, + "step": 25309 + }, + { + "epoch": 0.77571411057987, + "grad_norm": 1.8458503549820306, + "learning_rate": 2.5243686632657027e-06, + "loss": 0.6771, + "step": 25310 + }, + { + "epoch": 0.7757447591026112, + "grad_norm": 1.6871079359716936, + "learning_rate": 2.523709400655201e-06, + "loss": 0.5372, + "step": 25311 + }, + { + "epoch": 0.7757754076253525, + "grad_norm": 0.6948801627615172, + "learning_rate": 2.5230502117105094e-06, + "loss": 0.5451, + "step": 25312 + }, + { + "epoch": 0.7758060561480936, + "grad_norm": 1.6716843072573055, + "learning_rate": 2.5223910964381173e-06, + "loss": 0.6509, + "step": 25313 + }, + { + "epoch": 0.7758367046708349, + "grad_norm": 1.7110524786919856, + "learning_rate": 2.5217320548445155e-06, + "loss": 0.703, + "step": 25314 + }, + { + "epoch": 0.775867353193576, + "grad_norm": 1.5840309857635666, + "learning_rate": 2.521073086936202e-06, + "loss": 0.7337, + "step": 25315 + }, + { + "epoch": 0.7758980017163173, + "grad_norm": 1.7810449860805848, + "learning_rate": 2.5204141927196712e-06, + "loss": 0.6514, + "step": 25316 + }, + { + "epoch": 0.7759286502390584, + "grad_norm": 0.6525015547447424, + "learning_rate": 2.51975537220141e-06, + "loss": 0.5155, + "step": 25317 + }, + { + "epoch": 0.7759592987617997, + "grad_norm": 0.677164336490316, + "learning_rate": 2.5190966253879145e-06, + "loss": 0.5477, + "step": 25318 + }, + { + "epoch": 0.7759899472845408, + "grad_norm": 1.486639073695604, + "learning_rate": 2.518437952285673e-06, + "loss": 0.5594, + "step": 25319 + }, + { + "epoch": 0.7760205958072821, + "grad_norm": 1.607799034271857, + "learning_rate": 2.5177793529011786e-06, + "loss": 0.7534, + "step": 25320 + }, + { + "epoch": 0.7760512443300233, + "grad_norm": 0.679856901873704, + "learning_rate": 2.5171208272409197e-06, + "loss": 0.5435, + "step": 25321 + }, + { + "epoch": 0.7760818928527645, + "grad_norm": 1.7028677094017233, + "learning_rate": 2.516462375311378e-06, + "loss": 0.6193, + "step": 25322 + }, + { + "epoch": 0.7761125413755057, + "grad_norm": 1.6503286998286635, + "learning_rate": 2.5158039971190527e-06, + "loss": 0.6374, + "step": 25323 + }, + { + "epoch": 0.7761431898982469, + "grad_norm": 1.8450677461441527, + "learning_rate": 2.5151456926704253e-06, + "loss": 0.6877, + "step": 25324 + }, + { + "epoch": 0.7761738384209881, + "grad_norm": 1.7306836746311869, + "learning_rate": 2.5144874619719804e-06, + "loss": 0.6104, + "step": 25325 + }, + { + "epoch": 0.7762044869437293, + "grad_norm": 1.6763295215843208, + "learning_rate": 2.5138293050302055e-06, + "loss": 0.5842, + "step": 25326 + }, + { + "epoch": 0.7762351354664705, + "grad_norm": 1.7819540183541867, + "learning_rate": 2.5131712218515858e-06, + "loss": 0.6168, + "step": 25327 + }, + { + "epoch": 0.7762657839892118, + "grad_norm": 1.6455060683442853, + "learning_rate": 2.5125132124426088e-06, + "loss": 0.6336, + "step": 25328 + }, + { + "epoch": 0.7762964325119529, + "grad_norm": 1.715993106023308, + "learning_rate": 2.5118552768097516e-06, + "loss": 0.7444, + "step": 25329 + }, + { + "epoch": 0.7763270810346942, + "grad_norm": 1.5514417579251039, + "learning_rate": 2.5111974149594998e-06, + "loss": 0.6828, + "step": 25330 + }, + { + "epoch": 0.7763577295574353, + "grad_norm": 1.7310246527331867, + "learning_rate": 2.5105396268983393e-06, + "loss": 0.7554, + "step": 25331 + }, + { + "epoch": 0.7763883780801766, + "grad_norm": 1.5929374382022568, + "learning_rate": 2.5098819126327488e-06, + "loss": 0.7077, + "step": 25332 + }, + { + "epoch": 0.7764190266029177, + "grad_norm": 1.8652542745072842, + "learning_rate": 2.509224272169205e-06, + "loss": 0.7593, + "step": 25333 + }, + { + "epoch": 0.776449675125659, + "grad_norm": 1.4577178394980208, + "learning_rate": 2.5085667055141903e-06, + "loss": 0.6487, + "step": 25334 + }, + { + "epoch": 0.7764803236484001, + "grad_norm": 1.8795768381025046, + "learning_rate": 2.507909212674189e-06, + "loss": 0.6939, + "step": 25335 + }, + { + "epoch": 0.7765109721711414, + "grad_norm": 1.4932159793829156, + "learning_rate": 2.5072517936556705e-06, + "loss": 0.5352, + "step": 25336 + }, + { + "epoch": 0.7765416206938826, + "grad_norm": 1.6133445898890142, + "learning_rate": 2.5065944484651185e-06, + "loss": 0.5862, + "step": 25337 + }, + { + "epoch": 0.7765722692166238, + "grad_norm": 0.6705591503714069, + "learning_rate": 2.505937177109008e-06, + "loss": 0.5296, + "step": 25338 + }, + { + "epoch": 0.776602917739365, + "grad_norm": 1.712987263046058, + "learning_rate": 2.5052799795938187e-06, + "loss": 0.5715, + "step": 25339 + }, + { + "epoch": 0.7766335662621061, + "grad_norm": 1.6313050948355978, + "learning_rate": 2.5046228559260244e-06, + "loss": 0.6459, + "step": 25340 + }, + { + "epoch": 0.7766642147848474, + "grad_norm": 1.7353442451239358, + "learning_rate": 2.503965806112092e-06, + "loss": 0.6803, + "step": 25341 + }, + { + "epoch": 0.7766948633075885, + "grad_norm": 1.6436196012750264, + "learning_rate": 2.5033088301585085e-06, + "loss": 0.6988, + "step": 25342 + }, + { + "epoch": 0.7767255118303298, + "grad_norm": 1.5490012500497143, + "learning_rate": 2.502651928071741e-06, + "loss": 0.6357, + "step": 25343 + }, + { + "epoch": 0.7767561603530709, + "grad_norm": 0.6566029827584627, + "learning_rate": 2.50199509985826e-06, + "loss": 0.4901, + "step": 25344 + }, + { + "epoch": 0.7767868088758122, + "grad_norm": 1.9247364929488873, + "learning_rate": 2.5013383455245397e-06, + "loss": 0.6752, + "step": 25345 + }, + { + "epoch": 0.7768174573985533, + "grad_norm": 1.6434348472186142, + "learning_rate": 2.5006816650770503e-06, + "loss": 0.5972, + "step": 25346 + }, + { + "epoch": 0.7768481059212946, + "grad_norm": 0.6526030031077088, + "learning_rate": 2.5000250585222672e-06, + "loss": 0.5196, + "step": 25347 + }, + { + "epoch": 0.7768787544440358, + "grad_norm": 1.594839603951086, + "learning_rate": 2.4993685258666534e-06, + "loss": 0.5989, + "step": 25348 + }, + { + "epoch": 0.776909402966777, + "grad_norm": 1.516302961319116, + "learning_rate": 2.4987120671166798e-06, + "loss": 0.5734, + "step": 25349 + }, + { + "epoch": 0.7769400514895182, + "grad_norm": 1.4878982162652383, + "learning_rate": 2.4980556822788193e-06, + "loss": 0.6659, + "step": 25350 + }, + { + "epoch": 0.7769707000122594, + "grad_norm": 1.5752876902618242, + "learning_rate": 2.4973993713595345e-06, + "loss": 0.6068, + "step": 25351 + }, + { + "epoch": 0.7770013485350006, + "grad_norm": 1.7851308006938826, + "learning_rate": 2.496743134365288e-06, + "loss": 0.6734, + "step": 25352 + }, + { + "epoch": 0.7770319970577418, + "grad_norm": 1.7040303352495303, + "learning_rate": 2.496086971302557e-06, + "loss": 0.5697, + "step": 25353 + }, + { + "epoch": 0.777062645580483, + "grad_norm": 1.7990126687600805, + "learning_rate": 2.4954308821777984e-06, + "loss": 0.6711, + "step": 25354 + }, + { + "epoch": 0.7770932941032243, + "grad_norm": 1.7272316190353416, + "learning_rate": 2.4947748669974824e-06, + "loss": 0.7349, + "step": 25355 + }, + { + "epoch": 0.7771239426259654, + "grad_norm": 1.7423632696277231, + "learning_rate": 2.4941189257680665e-06, + "loss": 0.6474, + "step": 25356 + }, + { + "epoch": 0.7771545911487067, + "grad_norm": 1.7365731298281015, + "learning_rate": 2.4934630584960186e-06, + "loss": 0.698, + "step": 25357 + }, + { + "epoch": 0.7771852396714478, + "grad_norm": 1.6307074687096286, + "learning_rate": 2.492807265187801e-06, + "loss": 0.6088, + "step": 25358 + }, + { + "epoch": 0.7772158881941891, + "grad_norm": 1.6100555095564961, + "learning_rate": 2.4921515458498726e-06, + "loss": 0.6298, + "step": 25359 + }, + { + "epoch": 0.7772465367169302, + "grad_norm": 1.7022485048863942, + "learning_rate": 2.491495900488695e-06, + "loss": 0.5994, + "step": 25360 + }, + { + "epoch": 0.7772771852396715, + "grad_norm": 0.6495798203724956, + "learning_rate": 2.490840329110733e-06, + "loss": 0.5382, + "step": 25361 + }, + { + "epoch": 0.7773078337624126, + "grad_norm": 1.740329869517033, + "learning_rate": 2.490184831722442e-06, + "loss": 0.6816, + "step": 25362 + }, + { + "epoch": 0.7773384822851539, + "grad_norm": 1.947016146015014, + "learning_rate": 2.4895294083302755e-06, + "loss": 0.7623, + "step": 25363 + }, + { + "epoch": 0.777369130807895, + "grad_norm": 1.9230900055371132, + "learning_rate": 2.4888740589407035e-06, + "loss": 0.7401, + "step": 25364 + }, + { + "epoch": 0.7773997793306363, + "grad_norm": 0.6781002461827107, + "learning_rate": 2.4882187835601744e-06, + "loss": 0.5261, + "step": 25365 + }, + { + "epoch": 0.7774304278533775, + "grad_norm": 1.6960898100815278, + "learning_rate": 2.4875635821951504e-06, + "loss": 0.5875, + "step": 25366 + }, + { + "epoch": 0.7774610763761187, + "grad_norm": 1.7771696213327042, + "learning_rate": 2.4869084548520815e-06, + "loss": 0.6659, + "step": 25367 + }, + { + "epoch": 0.7774917248988599, + "grad_norm": 1.78274095467029, + "learning_rate": 2.4862534015374264e-06, + "loss": 0.6682, + "step": 25368 + }, + { + "epoch": 0.7775223734216011, + "grad_norm": 1.5618830621499158, + "learning_rate": 2.485598422257641e-06, + "loss": 0.6971, + "step": 25369 + }, + { + "epoch": 0.7775530219443423, + "grad_norm": 1.6126067121926173, + "learning_rate": 2.484943517019175e-06, + "loss": 0.5987, + "step": 25370 + }, + { + "epoch": 0.7775836704670834, + "grad_norm": 1.7209804585080801, + "learning_rate": 2.484288685828483e-06, + "loss": 0.5988, + "step": 25371 + }, + { + "epoch": 0.7776143189898247, + "grad_norm": 1.5494936323850526, + "learning_rate": 2.4836339286920196e-06, + "loss": 0.5667, + "step": 25372 + }, + { + "epoch": 0.7776449675125658, + "grad_norm": 1.808371016905946, + "learning_rate": 2.4829792456162328e-06, + "loss": 0.7152, + "step": 25373 + }, + { + "epoch": 0.7776756160353071, + "grad_norm": 1.6329753011883272, + "learning_rate": 2.4823246366075737e-06, + "loss": 0.7223, + "step": 25374 + }, + { + "epoch": 0.7777062645580483, + "grad_norm": 1.4758816339971639, + "learning_rate": 2.4816701016724977e-06, + "loss": 0.6246, + "step": 25375 + }, + { + "epoch": 0.7777369130807895, + "grad_norm": 1.5385205969704205, + "learning_rate": 2.4810156408174457e-06, + "loss": 0.6679, + "step": 25376 + }, + { + "epoch": 0.7777675616035307, + "grad_norm": 1.555354309847288, + "learning_rate": 2.4803612540488732e-06, + "loss": 0.6483, + "step": 25377 + }, + { + "epoch": 0.7777982101262719, + "grad_norm": 1.4760854942446695, + "learning_rate": 2.4797069413732233e-06, + "loss": 0.5618, + "step": 25378 + }, + { + "epoch": 0.7778288586490131, + "grad_norm": 1.5696491090507576, + "learning_rate": 2.4790527027969448e-06, + "loss": 0.5963, + "step": 25379 + }, + { + "epoch": 0.7778595071717543, + "grad_norm": 1.8582147499482624, + "learning_rate": 2.478398538326486e-06, + "loss": 0.6989, + "step": 25380 + }, + { + "epoch": 0.7778901556944955, + "grad_norm": 1.6960356338581888, + "learning_rate": 2.47774444796829e-06, + "loss": 0.6098, + "step": 25381 + }, + { + "epoch": 0.7779208042172367, + "grad_norm": 1.480315797541692, + "learning_rate": 2.4770904317288012e-06, + "loss": 0.6353, + "step": 25382 + }, + { + "epoch": 0.7779514527399779, + "grad_norm": 1.673488085654771, + "learning_rate": 2.476436489614469e-06, + "loss": 0.6892, + "step": 25383 + }, + { + "epoch": 0.7779821012627192, + "grad_norm": 1.8752464088544936, + "learning_rate": 2.4757826216317295e-06, + "loss": 0.7031, + "step": 25384 + }, + { + "epoch": 0.7780127497854603, + "grad_norm": 1.5341909337318878, + "learning_rate": 2.475128827787031e-06, + "loss": 0.7301, + "step": 25385 + }, + { + "epoch": 0.7780433983082016, + "grad_norm": 1.6078397572440226, + "learning_rate": 2.4744751080868125e-06, + "loss": 0.6105, + "step": 25386 + }, + { + "epoch": 0.7780740468309427, + "grad_norm": 1.6903758462423282, + "learning_rate": 2.4738214625375145e-06, + "loss": 0.5709, + "step": 25387 + }, + { + "epoch": 0.778104695353684, + "grad_norm": 1.6967655192935711, + "learning_rate": 2.4731678911455838e-06, + "loss": 0.6228, + "step": 25388 + }, + { + "epoch": 0.7781353438764251, + "grad_norm": 0.6644801934022821, + "learning_rate": 2.472514393917451e-06, + "loss": 0.5411, + "step": 25389 + }, + { + "epoch": 0.7781659923991664, + "grad_norm": 1.6506345537196738, + "learning_rate": 2.471860970859562e-06, + "loss": 0.6192, + "step": 25390 + }, + { + "epoch": 0.7781966409219075, + "grad_norm": 0.7044435057858467, + "learning_rate": 2.471207621978354e-06, + "loss": 0.5264, + "step": 25391 + }, + { + "epoch": 0.7782272894446488, + "grad_norm": 1.7108045089303454, + "learning_rate": 2.470554347280262e-06, + "loss": 0.8004, + "step": 25392 + }, + { + "epoch": 0.77825793796739, + "grad_norm": 1.6621902779010578, + "learning_rate": 2.4699011467717237e-06, + "loss": 0.5734, + "step": 25393 + }, + { + "epoch": 0.7782885864901312, + "grad_norm": 1.595752054568799, + "learning_rate": 2.4692480204591797e-06, + "loss": 0.5702, + "step": 25394 + }, + { + "epoch": 0.7783192350128724, + "grad_norm": 1.621493541986823, + "learning_rate": 2.4685949683490584e-06, + "loss": 0.6542, + "step": 25395 + }, + { + "epoch": 0.7783498835356136, + "grad_norm": 1.6756329381736692, + "learning_rate": 2.4679419904478007e-06, + "loss": 0.6461, + "step": 25396 + }, + { + "epoch": 0.7783805320583548, + "grad_norm": 1.7636317278865505, + "learning_rate": 2.467289086761835e-06, + "loss": 0.5845, + "step": 25397 + }, + { + "epoch": 0.778411180581096, + "grad_norm": 0.6632016120398682, + "learning_rate": 2.4666362572975965e-06, + "loss": 0.5263, + "step": 25398 + }, + { + "epoch": 0.7784418291038372, + "grad_norm": 1.6706435541819358, + "learning_rate": 2.4659835020615232e-06, + "loss": 0.6752, + "step": 25399 + }, + { + "epoch": 0.7784724776265785, + "grad_norm": 0.6427176131747115, + "learning_rate": 2.465330821060038e-06, + "loss": 0.5182, + "step": 25400 + }, + { + "epoch": 0.7785031261493196, + "grad_norm": 1.762058421066308, + "learning_rate": 2.4646782142995763e-06, + "loss": 0.6356, + "step": 25401 + }, + { + "epoch": 0.7785337746720608, + "grad_norm": 1.6097546167377366, + "learning_rate": 2.4640256817865704e-06, + "loss": 0.4982, + "step": 25402 + }, + { + "epoch": 0.778564423194802, + "grad_norm": 1.8341399483535312, + "learning_rate": 2.4633732235274453e-06, + "loss": 0.6562, + "step": 25403 + }, + { + "epoch": 0.7785950717175432, + "grad_norm": 1.5029532768310223, + "learning_rate": 2.4627208395286316e-06, + "loss": 0.635, + "step": 25404 + }, + { + "epoch": 0.7786257202402844, + "grad_norm": 1.710508003042808, + "learning_rate": 2.462068529796562e-06, + "loss": 0.7076, + "step": 25405 + }, + { + "epoch": 0.7786563687630256, + "grad_norm": 1.6473883335655168, + "learning_rate": 2.4614162943376564e-06, + "loss": 0.6523, + "step": 25406 + }, + { + "epoch": 0.7786870172857668, + "grad_norm": 0.6507419105767244, + "learning_rate": 2.4607641331583478e-06, + "loss": 0.543, + "step": 25407 + }, + { + "epoch": 0.778717665808508, + "grad_norm": 1.6968937322750446, + "learning_rate": 2.460112046265055e-06, + "loss": 0.6355, + "step": 25408 + }, + { + "epoch": 0.7787483143312492, + "grad_norm": 1.6105454759782922, + "learning_rate": 2.4594600336642095e-06, + "loss": 0.6242, + "step": 25409 + }, + { + "epoch": 0.7787789628539904, + "grad_norm": 0.7024123505651536, + "learning_rate": 2.4588080953622352e-06, + "loss": 0.5351, + "step": 25410 + }, + { + "epoch": 0.7788096113767317, + "grad_norm": 1.7623235672203585, + "learning_rate": 2.4581562313655516e-06, + "loss": 0.643, + "step": 25411 + }, + { + "epoch": 0.7788402598994728, + "grad_norm": 1.7168688918112847, + "learning_rate": 2.457504441680584e-06, + "loss": 0.7285, + "step": 25412 + }, + { + "epoch": 0.7788709084222141, + "grad_norm": 1.6591718918772342, + "learning_rate": 2.4568527263137588e-06, + "loss": 0.5114, + "step": 25413 + }, + { + "epoch": 0.7789015569449552, + "grad_norm": 1.6935331795086856, + "learning_rate": 2.45620108527149e-06, + "loss": 0.5943, + "step": 25414 + }, + { + "epoch": 0.7789322054676965, + "grad_norm": 2.048068293766446, + "learning_rate": 2.455549518560202e-06, + "loss": 0.7583, + "step": 25415 + }, + { + "epoch": 0.7789628539904376, + "grad_norm": 1.7841488494339461, + "learning_rate": 2.4548980261863187e-06, + "loss": 0.7225, + "step": 25416 + }, + { + "epoch": 0.7789935025131789, + "grad_norm": 1.6830845587318906, + "learning_rate": 2.454246608156252e-06, + "loss": 0.5634, + "step": 25417 + }, + { + "epoch": 0.77902415103592, + "grad_norm": 1.6411210572015824, + "learning_rate": 2.453595264476427e-06, + "loss": 0.6095, + "step": 25418 + }, + { + "epoch": 0.7790547995586613, + "grad_norm": 1.6142321926785805, + "learning_rate": 2.452943995153253e-06, + "loss": 0.672, + "step": 25419 + }, + { + "epoch": 0.7790854480814025, + "grad_norm": 1.7953785343747752, + "learning_rate": 2.452292800193159e-06, + "loss": 0.6689, + "step": 25420 + }, + { + "epoch": 0.7791160966041437, + "grad_norm": 1.6189000748004743, + "learning_rate": 2.4516416796025543e-06, + "loss": 0.6819, + "step": 25421 + }, + { + "epoch": 0.7791467451268849, + "grad_norm": 1.6903239600258362, + "learning_rate": 2.450990633387853e-06, + "loss": 0.6311, + "step": 25422 + }, + { + "epoch": 0.7791773936496261, + "grad_norm": 1.6686350075215421, + "learning_rate": 2.450339661555473e-06, + "loss": 0.7317, + "step": 25423 + }, + { + "epoch": 0.7792080421723673, + "grad_norm": 1.5390134114536054, + "learning_rate": 2.4496887641118307e-06, + "loss": 0.6442, + "step": 25424 + }, + { + "epoch": 0.7792386906951085, + "grad_norm": 1.8190335087803458, + "learning_rate": 2.4490379410633336e-06, + "loss": 0.6059, + "step": 25425 + }, + { + "epoch": 0.7792693392178497, + "grad_norm": 0.6847231270043465, + "learning_rate": 2.4483871924163983e-06, + "loss": 0.5448, + "step": 25426 + }, + { + "epoch": 0.779299987740591, + "grad_norm": 1.7205559098660703, + "learning_rate": 2.4477365181774348e-06, + "loss": 0.6765, + "step": 25427 + }, + { + "epoch": 0.7793306362633321, + "grad_norm": 1.502285336978385, + "learning_rate": 2.4470859183528606e-06, + "loss": 0.5778, + "step": 25428 + }, + { + "epoch": 0.7793612847860734, + "grad_norm": 1.7249900740198756, + "learning_rate": 2.44643539294908e-06, + "loss": 0.6448, + "step": 25429 + }, + { + "epoch": 0.7793919333088145, + "grad_norm": 1.8618451853824722, + "learning_rate": 2.4457849419725012e-06, + "loss": 0.6425, + "step": 25430 + }, + { + "epoch": 0.7794225818315558, + "grad_norm": 1.5592742577462495, + "learning_rate": 2.4451345654295368e-06, + "loss": 0.6726, + "step": 25431 + }, + { + "epoch": 0.7794532303542969, + "grad_norm": 1.5267811843400136, + "learning_rate": 2.4444842633265963e-06, + "loss": 0.6643, + "step": 25432 + }, + { + "epoch": 0.7794838788770381, + "grad_norm": 1.80223501284614, + "learning_rate": 2.443834035670084e-06, + "loss": 0.6655, + "step": 25433 + }, + { + "epoch": 0.7795145273997793, + "grad_norm": 1.821706871031925, + "learning_rate": 2.4431838824664076e-06, + "loss": 0.6017, + "step": 25434 + }, + { + "epoch": 0.7795451759225205, + "grad_norm": 1.647384038527654, + "learning_rate": 2.442533803721977e-06, + "loss": 0.6948, + "step": 25435 + }, + { + "epoch": 0.7795758244452617, + "grad_norm": 1.7045076763270968, + "learning_rate": 2.441883799443191e-06, + "loss": 0.6323, + "step": 25436 + }, + { + "epoch": 0.7796064729680029, + "grad_norm": 1.6527754420082466, + "learning_rate": 2.4412338696364614e-06, + "loss": 0.5955, + "step": 25437 + }, + { + "epoch": 0.7796371214907442, + "grad_norm": 0.6652825424192425, + "learning_rate": 2.4405840143081826e-06, + "loss": 0.5345, + "step": 25438 + }, + { + "epoch": 0.7796677700134853, + "grad_norm": 1.7071805536550644, + "learning_rate": 2.4399342334647692e-06, + "loss": 0.6654, + "step": 25439 + }, + { + "epoch": 0.7796984185362266, + "grad_norm": 1.525449260581906, + "learning_rate": 2.4392845271126185e-06, + "loss": 0.5752, + "step": 25440 + }, + { + "epoch": 0.7797290670589677, + "grad_norm": 1.9557140161076954, + "learning_rate": 2.4386348952581285e-06, + "loss": 0.7254, + "step": 25441 + }, + { + "epoch": 0.779759715581709, + "grad_norm": 1.5766723061320804, + "learning_rate": 2.4379853379077032e-06, + "loss": 0.5594, + "step": 25442 + }, + { + "epoch": 0.7797903641044501, + "grad_norm": 1.771116457502718, + "learning_rate": 2.4373358550677475e-06, + "loss": 0.6763, + "step": 25443 + }, + { + "epoch": 0.7798210126271914, + "grad_norm": 1.697429279252008, + "learning_rate": 2.4366864467446526e-06, + "loss": 0.6554, + "step": 25444 + }, + { + "epoch": 0.7798516611499325, + "grad_norm": 1.6668802371494196, + "learning_rate": 2.436037112944821e-06, + "loss": 0.7334, + "step": 25445 + }, + { + "epoch": 0.7798823096726738, + "grad_norm": 1.6041578370219611, + "learning_rate": 2.435387853674651e-06, + "loss": 0.6535, + "step": 25446 + }, + { + "epoch": 0.779912958195415, + "grad_norm": 1.756378486112554, + "learning_rate": 2.434738668940544e-06, + "loss": 0.6443, + "step": 25447 + }, + { + "epoch": 0.7799436067181562, + "grad_norm": 0.6638807379029786, + "learning_rate": 2.434089558748892e-06, + "loss": 0.5426, + "step": 25448 + }, + { + "epoch": 0.7799742552408974, + "grad_norm": 1.5703122650791954, + "learning_rate": 2.4334405231060854e-06, + "loss": 0.6069, + "step": 25449 + }, + { + "epoch": 0.7800049037636386, + "grad_norm": 1.6319697671003677, + "learning_rate": 2.4327915620185317e-06, + "loss": 0.5744, + "step": 25450 + }, + { + "epoch": 0.7800355522863798, + "grad_norm": 1.7595806848594093, + "learning_rate": 2.432142675492618e-06, + "loss": 0.5387, + "step": 25451 + }, + { + "epoch": 0.780066200809121, + "grad_norm": 1.7492235069065083, + "learning_rate": 2.4314938635347364e-06, + "loss": 0.6772, + "step": 25452 + }, + { + "epoch": 0.7800968493318622, + "grad_norm": 1.610833922293547, + "learning_rate": 2.4308451261512823e-06, + "loss": 0.569, + "step": 25453 + }, + { + "epoch": 0.7801274978546034, + "grad_norm": 1.7355929588979424, + "learning_rate": 2.4301964633486473e-06, + "loss": 0.6706, + "step": 25454 + }, + { + "epoch": 0.7801581463773446, + "grad_norm": 1.6129874131777115, + "learning_rate": 2.4295478751332268e-06, + "loss": 0.6571, + "step": 25455 + }, + { + "epoch": 0.7801887949000859, + "grad_norm": 1.5504873673619994, + "learning_rate": 2.4288993615114053e-06, + "loss": 0.6198, + "step": 25456 + }, + { + "epoch": 0.780219443422827, + "grad_norm": 1.483079081244339, + "learning_rate": 2.4282509224895755e-06, + "loss": 0.5472, + "step": 25457 + }, + { + "epoch": 0.7802500919455683, + "grad_norm": 1.5730805158871422, + "learning_rate": 2.427602558074129e-06, + "loss": 0.6296, + "step": 25458 + }, + { + "epoch": 0.7802807404683094, + "grad_norm": 0.6671723847231783, + "learning_rate": 2.4269542682714532e-06, + "loss": 0.5267, + "step": 25459 + }, + { + "epoch": 0.7803113889910507, + "grad_norm": 1.8214546924922372, + "learning_rate": 2.4263060530879277e-06, + "loss": 0.536, + "step": 25460 + }, + { + "epoch": 0.7803420375137918, + "grad_norm": 1.646684561748201, + "learning_rate": 2.425657912529953e-06, + "loss": 0.729, + "step": 25461 + }, + { + "epoch": 0.7803726860365331, + "grad_norm": 1.7828048715904907, + "learning_rate": 2.4250098466039087e-06, + "loss": 0.5816, + "step": 25462 + }, + { + "epoch": 0.7804033345592742, + "grad_norm": 1.5593271416836987, + "learning_rate": 2.4243618553161773e-06, + "loss": 0.6875, + "step": 25463 + }, + { + "epoch": 0.7804339830820154, + "grad_norm": 1.7518306185040926, + "learning_rate": 2.4237139386731465e-06, + "loss": 0.6506, + "step": 25464 + }, + { + "epoch": 0.7804646316047567, + "grad_norm": 1.4263111140913591, + "learning_rate": 2.4230660966812012e-06, + "loss": 0.54, + "step": 25465 + }, + { + "epoch": 0.7804952801274978, + "grad_norm": 1.6906472385455569, + "learning_rate": 2.422418329346727e-06, + "loss": 0.6489, + "step": 25466 + }, + { + "epoch": 0.7805259286502391, + "grad_norm": 0.6577901540809472, + "learning_rate": 2.4217706366761017e-06, + "loss": 0.5347, + "step": 25467 + }, + { + "epoch": 0.7805565771729802, + "grad_norm": 1.5294911180233566, + "learning_rate": 2.4211230186757085e-06, + "loss": 0.5659, + "step": 25468 + }, + { + "epoch": 0.7805872256957215, + "grad_norm": 1.6960411464071525, + "learning_rate": 2.420475475351932e-06, + "loss": 0.6358, + "step": 25469 + }, + { + "epoch": 0.7806178742184626, + "grad_norm": 2.000943845855656, + "learning_rate": 2.41982800671115e-06, + "loss": 0.684, + "step": 25470 + }, + { + "epoch": 0.7806485227412039, + "grad_norm": 1.6532452294184585, + "learning_rate": 2.4191806127597373e-06, + "loss": 0.584, + "step": 25471 + }, + { + "epoch": 0.780679171263945, + "grad_norm": 1.8970806136692522, + "learning_rate": 2.418533293504083e-06, + "loss": 0.6701, + "step": 25472 + }, + { + "epoch": 0.7807098197866863, + "grad_norm": 1.56691888555747, + "learning_rate": 2.4178860489505564e-06, + "loss": 0.5696, + "step": 25473 + }, + { + "epoch": 0.7807404683094274, + "grad_norm": 1.6101577424533406, + "learning_rate": 2.4172388791055424e-06, + "loss": 0.6347, + "step": 25474 + }, + { + "epoch": 0.7807711168321687, + "grad_norm": 0.6522490381826531, + "learning_rate": 2.4165917839754103e-06, + "loss": 0.4901, + "step": 25475 + }, + { + "epoch": 0.7808017653549099, + "grad_norm": 1.5870691713573015, + "learning_rate": 2.41594476356654e-06, + "loss": 0.6201, + "step": 25476 + }, + { + "epoch": 0.7808324138776511, + "grad_norm": 1.763098062458711, + "learning_rate": 2.415297817885309e-06, + "loss": 0.6358, + "step": 25477 + }, + { + "epoch": 0.7808630624003923, + "grad_norm": 1.6394942842269948, + "learning_rate": 2.4146509469380865e-06, + "loss": 0.6913, + "step": 25478 + }, + { + "epoch": 0.7808937109231335, + "grad_norm": 1.5422782602881624, + "learning_rate": 2.4140041507312496e-06, + "loss": 0.6243, + "step": 25479 + }, + { + "epoch": 0.7809243594458747, + "grad_norm": 1.8325211857314185, + "learning_rate": 2.4133574292711726e-06, + "loss": 0.7152, + "step": 25480 + }, + { + "epoch": 0.7809550079686159, + "grad_norm": 1.8520826442984701, + "learning_rate": 2.4127107825642236e-06, + "loss": 0.605, + "step": 25481 + }, + { + "epoch": 0.7809856564913571, + "grad_norm": 1.6107084245211845, + "learning_rate": 2.412064210616779e-06, + "loss": 0.6024, + "step": 25482 + }, + { + "epoch": 0.7810163050140984, + "grad_norm": 1.558376881555201, + "learning_rate": 2.4114177134352048e-06, + "loss": 0.5631, + "step": 25483 + }, + { + "epoch": 0.7810469535368395, + "grad_norm": 1.7262662480666011, + "learning_rate": 2.410771291025873e-06, + "loss": 0.7536, + "step": 25484 + }, + { + "epoch": 0.7810776020595808, + "grad_norm": 0.6954575319735649, + "learning_rate": 2.410124943395157e-06, + "loss": 0.529, + "step": 25485 + }, + { + "epoch": 0.7811082505823219, + "grad_norm": 1.7756538229029142, + "learning_rate": 2.409478670549419e-06, + "loss": 0.6782, + "step": 25486 + }, + { + "epoch": 0.7811388991050632, + "grad_norm": 1.8423791237993268, + "learning_rate": 2.4088324724950295e-06, + "loss": 0.5968, + "step": 25487 + }, + { + "epoch": 0.7811695476278043, + "grad_norm": 1.6390216490279776, + "learning_rate": 2.4081863492383585e-06, + "loss": 0.5869, + "step": 25488 + }, + { + "epoch": 0.7812001961505456, + "grad_norm": 1.5938031231278096, + "learning_rate": 2.40754030078577e-06, + "loss": 0.6488, + "step": 25489 + }, + { + "epoch": 0.7812308446732867, + "grad_norm": 1.5189905394862764, + "learning_rate": 2.4068943271436242e-06, + "loss": 0.5885, + "step": 25490 + }, + { + "epoch": 0.781261493196028, + "grad_norm": 1.4720057158405564, + "learning_rate": 2.406248428318296e-06, + "loss": 0.5532, + "step": 25491 + }, + { + "epoch": 0.7812921417187692, + "grad_norm": 1.608102380532941, + "learning_rate": 2.405602604316144e-06, + "loss": 0.6689, + "step": 25492 + }, + { + "epoch": 0.7813227902415104, + "grad_norm": 1.514350144259021, + "learning_rate": 2.404956855143534e-06, + "loss": 0.579, + "step": 25493 + }, + { + "epoch": 0.7813534387642516, + "grad_norm": 1.6040018543848613, + "learning_rate": 2.4043111808068255e-06, + "loss": 0.6709, + "step": 25494 + }, + { + "epoch": 0.7813840872869927, + "grad_norm": 0.6783583053469395, + "learning_rate": 2.4036655813123823e-06, + "loss": 0.5358, + "step": 25495 + }, + { + "epoch": 0.781414735809734, + "grad_norm": 1.7342953599216073, + "learning_rate": 2.4030200566665675e-06, + "loss": 0.741, + "step": 25496 + }, + { + "epoch": 0.7814453843324751, + "grad_norm": 1.5458890143160546, + "learning_rate": 2.402374606875738e-06, + "loss": 0.6482, + "step": 25497 + }, + { + "epoch": 0.7814760328552164, + "grad_norm": 1.7465613880646058, + "learning_rate": 2.401729231946255e-06, + "loss": 0.614, + "step": 25498 + }, + { + "epoch": 0.7815066813779575, + "grad_norm": 1.8453077784763168, + "learning_rate": 2.4010839318844803e-06, + "loss": 0.7186, + "step": 25499 + }, + { + "epoch": 0.7815373299006988, + "grad_norm": 1.613165607758133, + "learning_rate": 2.4004387066967684e-06, + "loss": 0.5861, + "step": 25500 + }, + { + "epoch": 0.78156797842344, + "grad_norm": 1.7284896980930595, + "learning_rate": 2.399793556389477e-06, + "loss": 0.6933, + "step": 25501 + }, + { + "epoch": 0.7815986269461812, + "grad_norm": 1.8191645528112577, + "learning_rate": 2.399148480968968e-06, + "loss": 0.5993, + "step": 25502 + }, + { + "epoch": 0.7816292754689224, + "grad_norm": 1.729620733228697, + "learning_rate": 2.398503480441591e-06, + "loss": 0.6922, + "step": 25503 + }, + { + "epoch": 0.7816599239916636, + "grad_norm": 0.6649241342406985, + "learning_rate": 2.3978585548137066e-06, + "loss": 0.5061, + "step": 25504 + }, + { + "epoch": 0.7816905725144048, + "grad_norm": 1.4070439580992549, + "learning_rate": 2.3972137040916645e-06, + "loss": 0.6479, + "step": 25505 + }, + { + "epoch": 0.781721221037146, + "grad_norm": 1.6457634006175028, + "learning_rate": 2.3965689282818206e-06, + "loss": 0.6342, + "step": 25506 + }, + { + "epoch": 0.7817518695598872, + "grad_norm": 1.522577665512399, + "learning_rate": 2.3959242273905314e-06, + "loss": 0.5924, + "step": 25507 + }, + { + "epoch": 0.7817825180826284, + "grad_norm": 1.8213798649665405, + "learning_rate": 2.395279601424143e-06, + "loss": 0.6277, + "step": 25508 + }, + { + "epoch": 0.7818131666053696, + "grad_norm": 0.6576006487771849, + "learning_rate": 2.3946350503890115e-06, + "loss": 0.5432, + "step": 25509 + }, + { + "epoch": 0.7818438151281109, + "grad_norm": 1.68962593230842, + "learning_rate": 2.3939905742914884e-06, + "loss": 0.6163, + "step": 25510 + }, + { + "epoch": 0.781874463650852, + "grad_norm": 1.863833794226272, + "learning_rate": 2.3933461731379204e-06, + "loss": 0.6553, + "step": 25511 + }, + { + "epoch": 0.7819051121735933, + "grad_norm": 1.7164972739861502, + "learning_rate": 2.3927018469346586e-06, + "loss": 0.7522, + "step": 25512 + }, + { + "epoch": 0.7819357606963344, + "grad_norm": 1.4313678527812053, + "learning_rate": 2.392057595688054e-06, + "loss": 0.6729, + "step": 25513 + }, + { + "epoch": 0.7819664092190757, + "grad_norm": 1.6448067515550686, + "learning_rate": 2.3914134194044504e-06, + "loss": 0.6896, + "step": 25514 + }, + { + "epoch": 0.7819970577418168, + "grad_norm": 1.6406005502115573, + "learning_rate": 2.3907693180902005e-06, + "loss": 0.6204, + "step": 25515 + }, + { + "epoch": 0.7820277062645581, + "grad_norm": 1.7502379355614404, + "learning_rate": 2.3901252917516436e-06, + "loss": 0.7168, + "step": 25516 + }, + { + "epoch": 0.7820583547872992, + "grad_norm": 1.818502049741224, + "learning_rate": 2.38948134039513e-06, + "loss": 0.7031, + "step": 25517 + }, + { + "epoch": 0.7820890033100405, + "grad_norm": 1.7528212451787288, + "learning_rate": 2.3888374640270062e-06, + "loss": 0.627, + "step": 25518 + }, + { + "epoch": 0.7821196518327816, + "grad_norm": 1.7375963697919814, + "learning_rate": 2.3881936626536116e-06, + "loss": 0.6478, + "step": 25519 + }, + { + "epoch": 0.7821503003555229, + "grad_norm": 1.7149490026932828, + "learning_rate": 2.3875499362812928e-06, + "loss": 0.6887, + "step": 25520 + }, + { + "epoch": 0.7821809488782641, + "grad_norm": 1.7950830282097618, + "learning_rate": 2.3869062849163947e-06, + "loss": 0.6401, + "step": 25521 + }, + { + "epoch": 0.7822115974010053, + "grad_norm": 1.464996727452963, + "learning_rate": 2.3862627085652536e-06, + "loss": 0.659, + "step": 25522 + }, + { + "epoch": 0.7822422459237465, + "grad_norm": 1.6492444482449675, + "learning_rate": 2.3856192072342143e-06, + "loss": 0.5998, + "step": 25523 + }, + { + "epoch": 0.7822728944464877, + "grad_norm": 1.751418228725259, + "learning_rate": 2.38497578092962e-06, + "loss": 0.6654, + "step": 25524 + }, + { + "epoch": 0.7823035429692289, + "grad_norm": 1.5721292275411745, + "learning_rate": 2.3843324296578054e-06, + "loss": 0.5783, + "step": 25525 + }, + { + "epoch": 0.78233419149197, + "grad_norm": 1.8010318597844321, + "learning_rate": 2.383689153425115e-06, + "loss": 0.6598, + "step": 25526 + }, + { + "epoch": 0.7823648400147113, + "grad_norm": 1.6937582834306741, + "learning_rate": 2.38304595223788e-06, + "loss": 0.6911, + "step": 25527 + }, + { + "epoch": 0.7823954885374524, + "grad_norm": 0.6451612199452791, + "learning_rate": 2.3824028261024433e-06, + "loss": 0.5196, + "step": 25528 + }, + { + "epoch": 0.7824261370601937, + "grad_norm": 1.7680058589020966, + "learning_rate": 2.381759775025143e-06, + "loss": 0.5664, + "step": 25529 + }, + { + "epoch": 0.7824567855829349, + "grad_norm": 1.903387046052559, + "learning_rate": 2.3811167990123095e-06, + "loss": 0.7578, + "step": 25530 + }, + { + "epoch": 0.7824874341056761, + "grad_norm": 1.9590814296181034, + "learning_rate": 2.3804738980702824e-06, + "loss": 0.686, + "step": 25531 + }, + { + "epoch": 0.7825180826284173, + "grad_norm": 1.8325015020679711, + "learning_rate": 2.3798310722053984e-06, + "loss": 0.624, + "step": 25532 + }, + { + "epoch": 0.7825487311511585, + "grad_norm": 2.0028361339129517, + "learning_rate": 2.379188321423985e-06, + "loss": 0.729, + "step": 25533 + }, + { + "epoch": 0.7825793796738997, + "grad_norm": 1.9160749310375755, + "learning_rate": 2.378545645732382e-06, + "loss": 0.6346, + "step": 25534 + }, + { + "epoch": 0.7826100281966409, + "grad_norm": 1.4367252282015106, + "learning_rate": 2.3779030451369166e-06, + "loss": 0.6694, + "step": 25535 + }, + { + "epoch": 0.7826406767193821, + "grad_norm": 1.88736702732565, + "learning_rate": 2.3772605196439214e-06, + "loss": 0.6516, + "step": 25536 + }, + { + "epoch": 0.7826713252421234, + "grad_norm": 1.6406060199480312, + "learning_rate": 2.376618069259733e-06, + "loss": 0.7232, + "step": 25537 + }, + { + "epoch": 0.7827019737648645, + "grad_norm": 1.7957503093793385, + "learning_rate": 2.3759756939906732e-06, + "loss": 0.7018, + "step": 25538 + }, + { + "epoch": 0.7827326222876058, + "grad_norm": 1.5465777053006176, + "learning_rate": 2.3753333938430767e-06, + "loss": 0.63, + "step": 25539 + }, + { + "epoch": 0.7827632708103469, + "grad_norm": 1.8135597494297067, + "learning_rate": 2.3746911688232733e-06, + "loss": 0.6071, + "step": 25540 + }, + { + "epoch": 0.7827939193330882, + "grad_norm": 1.6474434867294603, + "learning_rate": 2.374049018937585e-06, + "loss": 0.6362, + "step": 25541 + }, + { + "epoch": 0.7828245678558293, + "grad_norm": 1.6917164831789049, + "learning_rate": 2.3734069441923445e-06, + "loss": 0.6994, + "step": 25542 + }, + { + "epoch": 0.7828552163785706, + "grad_norm": 1.738200913977062, + "learning_rate": 2.3727649445938792e-06, + "loss": 0.6571, + "step": 25543 + }, + { + "epoch": 0.7828858649013117, + "grad_norm": 1.7137722039173142, + "learning_rate": 2.3721230201485092e-06, + "loss": 0.6361, + "step": 25544 + }, + { + "epoch": 0.782916513424053, + "grad_norm": 0.6669320932065058, + "learning_rate": 2.3714811708625664e-06, + "loss": 0.5134, + "step": 25545 + }, + { + "epoch": 0.7829471619467941, + "grad_norm": 1.816073314158929, + "learning_rate": 2.3708393967423647e-06, + "loss": 0.5769, + "step": 25546 + }, + { + "epoch": 0.7829778104695354, + "grad_norm": 1.5971596460976458, + "learning_rate": 2.370197697794241e-06, + "loss": 0.6297, + "step": 25547 + }, + { + "epoch": 0.7830084589922766, + "grad_norm": 1.6302427355586313, + "learning_rate": 2.3695560740245104e-06, + "loss": 0.5709, + "step": 25548 + }, + { + "epoch": 0.7830391075150178, + "grad_norm": 1.941905201416824, + "learning_rate": 2.368914525439494e-06, + "loss": 0.6906, + "step": 25549 + }, + { + "epoch": 0.783069756037759, + "grad_norm": 1.7943918560210121, + "learning_rate": 2.3682730520455157e-06, + "loss": 0.6631, + "step": 25550 + }, + { + "epoch": 0.7831004045605002, + "grad_norm": 1.827606185435315, + "learning_rate": 2.3676316538488976e-06, + "loss": 0.6383, + "step": 25551 + }, + { + "epoch": 0.7831310530832414, + "grad_norm": 1.5887666554556477, + "learning_rate": 2.366990330855955e-06, + "loss": 0.7208, + "step": 25552 + }, + { + "epoch": 0.7831617016059826, + "grad_norm": 1.625662003277826, + "learning_rate": 2.366349083073011e-06, + "loss": 0.5189, + "step": 25553 + }, + { + "epoch": 0.7831923501287238, + "grad_norm": 1.6927866170266865, + "learning_rate": 2.3657079105063806e-06, + "loss": 0.6629, + "step": 25554 + }, + { + "epoch": 0.783222998651465, + "grad_norm": 1.6508059755331892, + "learning_rate": 2.365066813162388e-06, + "loss": 0.6229, + "step": 25555 + }, + { + "epoch": 0.7832536471742062, + "grad_norm": 0.6802616713081369, + "learning_rate": 2.3644257910473443e-06, + "loss": 0.5275, + "step": 25556 + }, + { + "epoch": 0.7832842956969474, + "grad_norm": 1.693218144697417, + "learning_rate": 2.3637848441675624e-06, + "loss": 0.6824, + "step": 25557 + }, + { + "epoch": 0.7833149442196886, + "grad_norm": 1.6406197330952086, + "learning_rate": 2.363143972529367e-06, + "loss": 0.6337, + "step": 25558 + }, + { + "epoch": 0.7833455927424298, + "grad_norm": 1.89757452850348, + "learning_rate": 2.3625031761390683e-06, + "loss": 0.7011, + "step": 25559 + }, + { + "epoch": 0.783376241265171, + "grad_norm": 1.8250196409707766, + "learning_rate": 2.361862455002978e-06, + "loss": 0.7106, + "step": 25560 + }, + { + "epoch": 0.7834068897879122, + "grad_norm": 1.9651129289467963, + "learning_rate": 2.3612218091274096e-06, + "loss": 0.6373, + "step": 25561 + }, + { + "epoch": 0.7834375383106534, + "grad_norm": 1.7857448042883903, + "learning_rate": 2.360581238518681e-06, + "loss": 0.6587, + "step": 25562 + }, + { + "epoch": 0.7834681868333946, + "grad_norm": 1.4294822002888297, + "learning_rate": 2.359940743183097e-06, + "loss": 0.6659, + "step": 25563 + }, + { + "epoch": 0.7834988353561358, + "grad_norm": 1.6447820934386803, + "learning_rate": 2.359300323126972e-06, + "loss": 0.6833, + "step": 25564 + }, + { + "epoch": 0.783529483878877, + "grad_norm": 1.5632078254235795, + "learning_rate": 2.3586599783566155e-06, + "loss": 0.6286, + "step": 25565 + }, + { + "epoch": 0.7835601324016183, + "grad_norm": 1.5843958892659968, + "learning_rate": 2.3580197088783397e-06, + "loss": 0.5999, + "step": 25566 + }, + { + "epoch": 0.7835907809243594, + "grad_norm": 1.817339280291542, + "learning_rate": 2.3573795146984525e-06, + "loss": 0.7405, + "step": 25567 + }, + { + "epoch": 0.7836214294471007, + "grad_norm": 1.7074121577576145, + "learning_rate": 2.356739395823253e-06, + "loss": 0.6047, + "step": 25568 + }, + { + "epoch": 0.7836520779698418, + "grad_norm": 1.5695741254556423, + "learning_rate": 2.3560993522590624e-06, + "loss": 0.6593, + "step": 25569 + }, + { + "epoch": 0.7836827264925831, + "grad_norm": 1.9026163918376264, + "learning_rate": 2.355459384012181e-06, + "loss": 0.6811, + "step": 25570 + }, + { + "epoch": 0.7837133750153242, + "grad_norm": 1.9843918192443464, + "learning_rate": 2.354819491088911e-06, + "loss": 0.7068, + "step": 25571 + }, + { + "epoch": 0.7837440235380655, + "grad_norm": 0.6691171735411442, + "learning_rate": 2.35417967349556e-06, + "loss": 0.5103, + "step": 25572 + }, + { + "epoch": 0.7837746720608066, + "grad_norm": 1.6474265598496043, + "learning_rate": 2.3535399312384344e-06, + "loss": 0.6896, + "step": 25573 + }, + { + "epoch": 0.7838053205835479, + "grad_norm": 1.7392626655356334, + "learning_rate": 2.3529002643238373e-06, + "loss": 0.631, + "step": 25574 + }, + { + "epoch": 0.783835969106289, + "grad_norm": 1.972080341283694, + "learning_rate": 2.3522606727580686e-06, + "loss": 0.6806, + "step": 25575 + }, + { + "epoch": 0.7838666176290303, + "grad_norm": 1.9551398980925314, + "learning_rate": 2.3516211565474333e-06, + "loss": 0.6017, + "step": 25576 + }, + { + "epoch": 0.7838972661517715, + "grad_norm": 0.6809735873922907, + "learning_rate": 2.350981715698233e-06, + "loss": 0.5215, + "step": 25577 + }, + { + "epoch": 0.7839279146745127, + "grad_norm": 1.5895242966605887, + "learning_rate": 2.350342350216768e-06, + "loss": 0.5828, + "step": 25578 + }, + { + "epoch": 0.7839585631972539, + "grad_norm": 1.5480536162789678, + "learning_rate": 2.349703060109333e-06, + "loss": 0.6376, + "step": 25579 + }, + { + "epoch": 0.7839892117199951, + "grad_norm": 1.6020634477005562, + "learning_rate": 2.349063845382232e-06, + "loss": 0.643, + "step": 25580 + }, + { + "epoch": 0.7840198602427363, + "grad_norm": 1.6593831473153866, + "learning_rate": 2.348424706041762e-06, + "loss": 0.8032, + "step": 25581 + }, + { + "epoch": 0.7840505087654775, + "grad_norm": 1.5902492945234004, + "learning_rate": 2.347785642094225e-06, + "loss": 0.5903, + "step": 25582 + }, + { + "epoch": 0.7840811572882187, + "grad_norm": 0.6707599361259924, + "learning_rate": 2.34714665354591e-06, + "loss": 0.4986, + "step": 25583 + }, + { + "epoch": 0.78411180581096, + "grad_norm": 1.7540761947096408, + "learning_rate": 2.346507740403118e-06, + "loss": 0.7402, + "step": 25584 + }, + { + "epoch": 0.7841424543337011, + "grad_norm": 1.6048557786225097, + "learning_rate": 2.345868902672146e-06, + "loss": 0.6318, + "step": 25585 + }, + { + "epoch": 0.7841731028564424, + "grad_norm": 1.6681625556150097, + "learning_rate": 2.345230140359286e-06, + "loss": 0.6349, + "step": 25586 + }, + { + "epoch": 0.7842037513791835, + "grad_norm": 1.7136517697735594, + "learning_rate": 2.344591453470826e-06, + "loss": 0.6966, + "step": 25587 + }, + { + "epoch": 0.7842343999019247, + "grad_norm": 0.6579472947152798, + "learning_rate": 2.3439528420130707e-06, + "loss": 0.5483, + "step": 25588 + }, + { + "epoch": 0.7842650484246659, + "grad_norm": 1.365629570037727, + "learning_rate": 2.343314305992307e-06, + "loss": 0.6702, + "step": 25589 + }, + { + "epoch": 0.7842956969474071, + "grad_norm": 1.5704832708010488, + "learning_rate": 2.3426758454148246e-06, + "loss": 0.634, + "step": 25590 + }, + { + "epoch": 0.7843263454701483, + "grad_norm": 1.8105940411481316, + "learning_rate": 2.3420374602869156e-06, + "loss": 0.7002, + "step": 25591 + }, + { + "epoch": 0.7843569939928895, + "grad_norm": 1.9243848225774596, + "learning_rate": 2.3413991506148704e-06, + "loss": 0.7035, + "step": 25592 + }, + { + "epoch": 0.7843876425156308, + "grad_norm": 1.7086296192919992, + "learning_rate": 2.3407609164049827e-06, + "loss": 0.6752, + "step": 25593 + }, + { + "epoch": 0.7844182910383719, + "grad_norm": 1.6877811027696719, + "learning_rate": 2.340122757663533e-06, + "loss": 0.6339, + "step": 25594 + }, + { + "epoch": 0.7844489395611132, + "grad_norm": 0.6734686976803408, + "learning_rate": 2.3394846743968158e-06, + "loss": 0.5475, + "step": 25595 + }, + { + "epoch": 0.7844795880838543, + "grad_norm": 1.6058090122377742, + "learning_rate": 2.338846666611118e-06, + "loss": 0.5931, + "step": 25596 + }, + { + "epoch": 0.7845102366065956, + "grad_norm": 1.7636925717199161, + "learning_rate": 2.3382087343127238e-06, + "loss": 0.6493, + "step": 25597 + }, + { + "epoch": 0.7845408851293367, + "grad_norm": 1.6968386896808176, + "learning_rate": 2.337570877507913e-06, + "loss": 0.645, + "step": 25598 + }, + { + "epoch": 0.784571533652078, + "grad_norm": 1.478322909721053, + "learning_rate": 2.3369330962029845e-06, + "loss": 0.6949, + "step": 25599 + }, + { + "epoch": 0.7846021821748191, + "grad_norm": 1.584094950564774, + "learning_rate": 2.336295390404211e-06, + "loss": 0.621, + "step": 25600 + }, + { + "epoch": 0.7846328306975604, + "grad_norm": 1.7988671779183891, + "learning_rate": 2.335657760117882e-06, + "loss": 0.6582, + "step": 25601 + }, + { + "epoch": 0.7846634792203016, + "grad_norm": 1.6879700845584749, + "learning_rate": 2.3350202053502757e-06, + "loss": 0.6797, + "step": 25602 + }, + { + "epoch": 0.7846941277430428, + "grad_norm": 1.5565149034539956, + "learning_rate": 2.334382726107677e-06, + "loss": 0.6234, + "step": 25603 + }, + { + "epoch": 0.784724776265784, + "grad_norm": 1.6337635208897305, + "learning_rate": 2.333745322396369e-06, + "loss": 0.7438, + "step": 25604 + }, + { + "epoch": 0.7847554247885252, + "grad_norm": 1.61683873365323, + "learning_rate": 2.3331079942226275e-06, + "loss": 0.584, + "step": 25605 + }, + { + "epoch": 0.7847860733112664, + "grad_norm": 1.6965347410361682, + "learning_rate": 2.332470741592734e-06, + "loss": 0.6537, + "step": 25606 + }, + { + "epoch": 0.7848167218340076, + "grad_norm": 1.8830183796443278, + "learning_rate": 2.331833564512972e-06, + "loss": 0.7381, + "step": 25607 + }, + { + "epoch": 0.7848473703567488, + "grad_norm": 1.3798153825906039, + "learning_rate": 2.331196462989612e-06, + "loss": 0.4773, + "step": 25608 + }, + { + "epoch": 0.78487801887949, + "grad_norm": 0.6583753989927815, + "learning_rate": 2.3305594370289354e-06, + "loss": 0.503, + "step": 25609 + }, + { + "epoch": 0.7849086674022312, + "grad_norm": 1.7158891596948902, + "learning_rate": 2.3299224866372216e-06, + "loss": 0.617, + "step": 25610 + }, + { + "epoch": 0.7849393159249725, + "grad_norm": 1.6771962134036482, + "learning_rate": 2.3292856118207418e-06, + "loss": 0.7066, + "step": 25611 + }, + { + "epoch": 0.7849699644477136, + "grad_norm": 1.749683892310093, + "learning_rate": 2.3286488125857763e-06, + "loss": 0.6885, + "step": 25612 + }, + { + "epoch": 0.7850006129704549, + "grad_norm": 1.6076666709921277, + "learning_rate": 2.3280120889385936e-06, + "loss": 0.5715, + "step": 25613 + }, + { + "epoch": 0.785031261493196, + "grad_norm": 1.6172376020232326, + "learning_rate": 2.327375440885472e-06, + "loss": 0.5912, + "step": 25614 + }, + { + "epoch": 0.7850619100159373, + "grad_norm": 1.7103253528792144, + "learning_rate": 2.3267388684326852e-06, + "loss": 0.6316, + "step": 25615 + }, + { + "epoch": 0.7850925585386784, + "grad_norm": 0.6602344964535167, + "learning_rate": 2.3261023715865007e-06, + "loss": 0.494, + "step": 25616 + }, + { + "epoch": 0.7851232070614197, + "grad_norm": 0.6580101529681004, + "learning_rate": 2.3254659503531928e-06, + "loss": 0.514, + "step": 25617 + }, + { + "epoch": 0.7851538555841608, + "grad_norm": 1.5910583901394044, + "learning_rate": 2.324829604739035e-06, + "loss": 0.5788, + "step": 25618 + }, + { + "epoch": 0.785184504106902, + "grad_norm": 1.5841938906081718, + "learning_rate": 2.324193334750293e-06, + "loss": 0.6416, + "step": 25619 + }, + { + "epoch": 0.7852151526296433, + "grad_norm": 1.8239735888374278, + "learning_rate": 2.3235571403932376e-06, + "loss": 0.7458, + "step": 25620 + }, + { + "epoch": 0.7852458011523844, + "grad_norm": 1.6833396992308656, + "learning_rate": 2.3229210216741405e-06, + "loss": 0.5935, + "step": 25621 + }, + { + "epoch": 0.7852764496751257, + "grad_norm": 1.7117812081698132, + "learning_rate": 2.3222849785992653e-06, + "loss": 0.664, + "step": 25622 + }, + { + "epoch": 0.7853070981978668, + "grad_norm": 1.5639919529308326, + "learning_rate": 2.3216490111748813e-06, + "loss": 0.6039, + "step": 25623 + }, + { + "epoch": 0.7853377467206081, + "grad_norm": 1.7174757010240962, + "learning_rate": 2.3210131194072527e-06, + "loss": 0.7407, + "step": 25624 + }, + { + "epoch": 0.7853683952433492, + "grad_norm": 0.6644865813791536, + "learning_rate": 2.3203773033026468e-06, + "loss": 0.5154, + "step": 25625 + }, + { + "epoch": 0.7853990437660905, + "grad_norm": 1.516302424742265, + "learning_rate": 2.31974156286733e-06, + "loss": 0.5254, + "step": 25626 + }, + { + "epoch": 0.7854296922888316, + "grad_norm": 1.578480862063782, + "learning_rate": 2.319105898107563e-06, + "loss": 0.5996, + "step": 25627 + }, + { + "epoch": 0.7854603408115729, + "grad_norm": 1.6410214704184534, + "learning_rate": 2.3184703090296103e-06, + "loss": 0.6418, + "step": 25628 + }, + { + "epoch": 0.785490989334314, + "grad_norm": 1.9393842695713248, + "learning_rate": 2.3178347956397375e-06, + "loss": 0.6715, + "step": 25629 + }, + { + "epoch": 0.7855216378570553, + "grad_norm": 1.660955710159324, + "learning_rate": 2.317199357944201e-06, + "loss": 0.7491, + "step": 25630 + }, + { + "epoch": 0.7855522863797965, + "grad_norm": 1.6928255653229771, + "learning_rate": 2.3165639959492693e-06, + "loss": 0.6082, + "step": 25631 + }, + { + "epoch": 0.7855829349025377, + "grad_norm": 1.7271774019231414, + "learning_rate": 2.315928709661194e-06, + "loss": 0.6672, + "step": 25632 + }, + { + "epoch": 0.7856135834252789, + "grad_norm": 1.7062764346450472, + "learning_rate": 2.3152934990862397e-06, + "loss": 0.6994, + "step": 25633 + }, + { + "epoch": 0.7856442319480201, + "grad_norm": 0.6937726692381454, + "learning_rate": 2.3146583642306676e-06, + "loss": 0.5315, + "step": 25634 + }, + { + "epoch": 0.7856748804707613, + "grad_norm": 1.7505507426492701, + "learning_rate": 2.31402330510073e-06, + "loss": 0.6041, + "step": 25635 + }, + { + "epoch": 0.7857055289935025, + "grad_norm": 1.778811520436564, + "learning_rate": 2.3133883217026876e-06, + "loss": 0.6611, + "step": 25636 + }, + { + "epoch": 0.7857361775162437, + "grad_norm": 1.6055308095413865, + "learning_rate": 2.3127534140428e-06, + "loss": 0.6534, + "step": 25637 + }, + { + "epoch": 0.785766826038985, + "grad_norm": 1.7653699840952104, + "learning_rate": 2.3121185821273164e-06, + "loss": 0.6121, + "step": 25638 + }, + { + "epoch": 0.7857974745617261, + "grad_norm": 1.7795277233785303, + "learning_rate": 2.311483825962496e-06, + "loss": 0.7502, + "step": 25639 + }, + { + "epoch": 0.7858281230844674, + "grad_norm": 1.7227773983886758, + "learning_rate": 2.3108491455545955e-06, + "loss": 0.5924, + "step": 25640 + }, + { + "epoch": 0.7858587716072085, + "grad_norm": 1.6093752719312067, + "learning_rate": 2.3102145409098618e-06, + "loss": 0.6184, + "step": 25641 + }, + { + "epoch": 0.7858894201299498, + "grad_norm": 1.62545045466391, + "learning_rate": 2.3095800120345558e-06, + "loss": 0.6558, + "step": 25642 + }, + { + "epoch": 0.7859200686526909, + "grad_norm": 1.7553088429100216, + "learning_rate": 2.308945558934922e-06, + "loss": 0.663, + "step": 25643 + }, + { + "epoch": 0.7859507171754322, + "grad_norm": 1.6139167728892958, + "learning_rate": 2.3083111816172153e-06, + "loss": 0.6013, + "step": 25644 + }, + { + "epoch": 0.7859813656981733, + "grad_norm": 1.6477496420559627, + "learning_rate": 2.3076768800876903e-06, + "loss": 0.6854, + "step": 25645 + }, + { + "epoch": 0.7860120142209146, + "grad_norm": 2.0052848906508864, + "learning_rate": 2.30704265435259e-06, + "loss": 0.6511, + "step": 25646 + }, + { + "epoch": 0.7860426627436558, + "grad_norm": 1.5907438329962182, + "learning_rate": 2.306408504418166e-06, + "loss": 0.6582, + "step": 25647 + }, + { + "epoch": 0.786073311266397, + "grad_norm": 1.7351904377191922, + "learning_rate": 2.3057744302906714e-06, + "loss": 0.7018, + "step": 25648 + }, + { + "epoch": 0.7861039597891382, + "grad_norm": 1.740941068055053, + "learning_rate": 2.305140431976347e-06, + "loss": 0.7006, + "step": 25649 + }, + { + "epoch": 0.7861346083118793, + "grad_norm": 2.0144114284347214, + "learning_rate": 2.3045065094814424e-06, + "loss": 0.7311, + "step": 25650 + }, + { + "epoch": 0.7861652568346206, + "grad_norm": 1.6888825976118849, + "learning_rate": 2.3038726628122066e-06, + "loss": 0.683, + "step": 25651 + }, + { + "epoch": 0.7861959053573617, + "grad_norm": 1.8586665423351896, + "learning_rate": 2.3032388919748807e-06, + "loss": 0.6698, + "step": 25652 + }, + { + "epoch": 0.786226553880103, + "grad_norm": 1.6073195534090023, + "learning_rate": 2.3026051969757133e-06, + "loss": 0.584, + "step": 25653 + }, + { + "epoch": 0.7862572024028441, + "grad_norm": 1.93245971810908, + "learning_rate": 2.301971577820941e-06, + "loss": 0.7688, + "step": 25654 + }, + { + "epoch": 0.7862878509255854, + "grad_norm": 1.685048754987103, + "learning_rate": 2.301338034516818e-06, + "loss": 0.6418, + "step": 25655 + }, + { + "epoch": 0.7863184994483265, + "grad_norm": 1.7743236711313886, + "learning_rate": 2.3007045670695816e-06, + "loss": 0.5833, + "step": 25656 + }, + { + "epoch": 0.7863491479710678, + "grad_norm": 1.6682292996618595, + "learning_rate": 2.3000711754854697e-06, + "loss": 0.636, + "step": 25657 + }, + { + "epoch": 0.786379796493809, + "grad_norm": 1.662082537363577, + "learning_rate": 2.299437859770727e-06, + "loss": 0.6785, + "step": 25658 + }, + { + "epoch": 0.7864104450165502, + "grad_norm": 1.6705645172232504, + "learning_rate": 2.298804619931595e-06, + "loss": 0.713, + "step": 25659 + }, + { + "epoch": 0.7864410935392914, + "grad_norm": 1.7288797881413238, + "learning_rate": 2.2981714559743096e-06, + "loss": 0.5925, + "step": 25660 + }, + { + "epoch": 0.7864717420620326, + "grad_norm": 0.6504056447382034, + "learning_rate": 2.2975383679051123e-06, + "loss": 0.5125, + "step": 25661 + }, + { + "epoch": 0.7865023905847738, + "grad_norm": 1.6350218707973976, + "learning_rate": 2.296905355730239e-06, + "loss": 0.6535, + "step": 25662 + }, + { + "epoch": 0.786533039107515, + "grad_norm": 1.6090440559401722, + "learning_rate": 2.2962724194559307e-06, + "loss": 0.6215, + "step": 25663 + }, + { + "epoch": 0.7865636876302562, + "grad_norm": 1.4888257968491214, + "learning_rate": 2.295639559088422e-06, + "loss": 0.628, + "step": 25664 + }, + { + "epoch": 0.7865943361529975, + "grad_norm": 1.3920851315906337, + "learning_rate": 2.2950067746339456e-06, + "loss": 0.535, + "step": 25665 + }, + { + "epoch": 0.7866249846757386, + "grad_norm": 1.899861081404155, + "learning_rate": 2.294374066098738e-06, + "loss": 0.7623, + "step": 25666 + }, + { + "epoch": 0.7866556331984799, + "grad_norm": 1.6791420827059942, + "learning_rate": 2.2937414334890374e-06, + "loss": 0.7133, + "step": 25667 + }, + { + "epoch": 0.786686281721221, + "grad_norm": 0.6510448356545456, + "learning_rate": 2.293108876811071e-06, + "loss": 0.5164, + "step": 25668 + }, + { + "epoch": 0.7867169302439623, + "grad_norm": 1.7254485652864955, + "learning_rate": 2.2924763960710762e-06, + "loss": 0.6075, + "step": 25669 + }, + { + "epoch": 0.7867475787667034, + "grad_norm": 1.8932278105299971, + "learning_rate": 2.2918439912752843e-06, + "loss": 0.6943, + "step": 25670 + }, + { + "epoch": 0.7867782272894447, + "grad_norm": 1.6008247395397472, + "learning_rate": 2.291211662429924e-06, + "loss": 0.645, + "step": 25671 + }, + { + "epoch": 0.7868088758121858, + "grad_norm": 1.5986353312655746, + "learning_rate": 2.290579409541228e-06, + "loss": 0.5908, + "step": 25672 + }, + { + "epoch": 0.7868395243349271, + "grad_norm": 2.1884871871159706, + "learning_rate": 2.289947232615425e-06, + "loss": 0.7726, + "step": 25673 + }, + { + "epoch": 0.7868701728576682, + "grad_norm": 1.6239873003251133, + "learning_rate": 2.289315131658748e-06, + "loss": 0.6535, + "step": 25674 + }, + { + "epoch": 0.7869008213804095, + "grad_norm": 1.8534170678649962, + "learning_rate": 2.2886831066774207e-06, + "loss": 0.7007, + "step": 25675 + }, + { + "epoch": 0.7869314699031507, + "grad_norm": 1.9446788394560852, + "learning_rate": 2.28805115767767e-06, + "loss": 0.6923, + "step": 25676 + }, + { + "epoch": 0.7869621184258919, + "grad_norm": 1.5845532743600832, + "learning_rate": 2.287419284665724e-06, + "loss": 0.6278, + "step": 25677 + }, + { + "epoch": 0.7869927669486331, + "grad_norm": 1.506649176133862, + "learning_rate": 2.2867874876478124e-06, + "loss": 0.5846, + "step": 25678 + }, + { + "epoch": 0.7870234154713743, + "grad_norm": 1.7112643349728407, + "learning_rate": 2.2861557666301536e-06, + "loss": 0.6583, + "step": 25679 + }, + { + "epoch": 0.7870540639941155, + "grad_norm": 1.8043622076620813, + "learning_rate": 2.2855241216189762e-06, + "loss": 0.6916, + "step": 25680 + }, + { + "epoch": 0.7870847125168566, + "grad_norm": 1.512598292660124, + "learning_rate": 2.2848925526205033e-06, + "loss": 0.7243, + "step": 25681 + }, + { + "epoch": 0.7871153610395979, + "grad_norm": 1.5867525085112508, + "learning_rate": 2.284261059640961e-06, + "loss": 0.5976, + "step": 25682 + }, + { + "epoch": 0.787146009562339, + "grad_norm": 1.8200303816319021, + "learning_rate": 2.283629642686569e-06, + "loss": 0.6006, + "step": 25683 + }, + { + "epoch": 0.7871766580850803, + "grad_norm": 1.561133278827563, + "learning_rate": 2.2829983017635427e-06, + "loss": 0.685, + "step": 25684 + }, + { + "epoch": 0.7872073066078215, + "grad_norm": 1.7085684120858848, + "learning_rate": 2.2823670368781138e-06, + "loss": 0.6542, + "step": 25685 + }, + { + "epoch": 0.7872379551305627, + "grad_norm": 1.6634612412975014, + "learning_rate": 2.281735848036497e-06, + "loss": 0.5725, + "step": 25686 + }, + { + "epoch": 0.7872686036533039, + "grad_norm": 1.735617177170926, + "learning_rate": 2.2811047352449102e-06, + "loss": 0.5837, + "step": 25687 + }, + { + "epoch": 0.7872992521760451, + "grad_norm": 1.8673802279653113, + "learning_rate": 2.280473698509572e-06, + "loss": 0.7438, + "step": 25688 + }, + { + "epoch": 0.7873299006987863, + "grad_norm": 1.597349642192999, + "learning_rate": 2.2798427378367018e-06, + "loss": 0.5827, + "step": 25689 + }, + { + "epoch": 0.7873605492215275, + "grad_norm": 1.826395397974437, + "learning_rate": 2.2792118532325193e-06, + "loss": 0.7758, + "step": 25690 + }, + { + "epoch": 0.7873911977442687, + "grad_norm": 0.6858019088903782, + "learning_rate": 2.278581044703235e-06, + "loss": 0.5422, + "step": 25691 + }, + { + "epoch": 0.78742184626701, + "grad_norm": 1.5490395332186855, + "learning_rate": 2.2779503122550672e-06, + "loss": 0.6152, + "step": 25692 + }, + { + "epoch": 0.7874524947897511, + "grad_norm": 1.855294366534737, + "learning_rate": 2.2773196558942347e-06, + "loss": 0.7402, + "step": 25693 + }, + { + "epoch": 0.7874831433124924, + "grad_norm": 1.8475870872112194, + "learning_rate": 2.2766890756269467e-06, + "loss": 0.6604, + "step": 25694 + }, + { + "epoch": 0.7875137918352335, + "grad_norm": 1.5777461076201271, + "learning_rate": 2.2760585714594106e-06, + "loss": 0.6749, + "step": 25695 + }, + { + "epoch": 0.7875444403579748, + "grad_norm": 0.6610530086854328, + "learning_rate": 2.275428143397853e-06, + "loss": 0.5303, + "step": 25696 + }, + { + "epoch": 0.7875750888807159, + "grad_norm": 1.6942691610303806, + "learning_rate": 2.2747977914484776e-06, + "loss": 0.698, + "step": 25697 + }, + { + "epoch": 0.7876057374034572, + "grad_norm": 1.6450291264425816, + "learning_rate": 2.2741675156174936e-06, + "loss": 0.5469, + "step": 25698 + }, + { + "epoch": 0.7876363859261983, + "grad_norm": 1.753954600640852, + "learning_rate": 2.273537315911113e-06, + "loss": 0.6627, + "step": 25699 + }, + { + "epoch": 0.7876670344489396, + "grad_norm": 1.7644426704185432, + "learning_rate": 2.272907192335547e-06, + "loss": 0.5132, + "step": 25700 + }, + { + "epoch": 0.7876976829716807, + "grad_norm": 1.7168754251844072, + "learning_rate": 2.272277144897006e-06, + "loss": 0.6886, + "step": 25701 + }, + { + "epoch": 0.787728331494422, + "grad_norm": 1.6794331721298863, + "learning_rate": 2.271647173601693e-06, + "loss": 0.6747, + "step": 25702 + }, + { + "epoch": 0.7877589800171632, + "grad_norm": 1.8140256956930372, + "learning_rate": 2.271017278455817e-06, + "loss": 0.637, + "step": 25703 + }, + { + "epoch": 0.7877896285399044, + "grad_norm": 1.6758971593952159, + "learning_rate": 2.2703874594655884e-06, + "loss": 0.6126, + "step": 25704 + }, + { + "epoch": 0.7878202770626456, + "grad_norm": 1.758095040600881, + "learning_rate": 2.269757716637211e-06, + "loss": 0.6952, + "step": 25705 + }, + { + "epoch": 0.7878509255853868, + "grad_norm": 1.7417976874923633, + "learning_rate": 2.269128049976882e-06, + "loss": 0.6358, + "step": 25706 + }, + { + "epoch": 0.787881574108128, + "grad_norm": 1.709145973870067, + "learning_rate": 2.268498459490818e-06, + "loss": 0.6888, + "step": 25707 + }, + { + "epoch": 0.7879122226308692, + "grad_norm": 1.810654130050506, + "learning_rate": 2.267868945185214e-06, + "loss": 0.6572, + "step": 25708 + }, + { + "epoch": 0.7879428711536104, + "grad_norm": 1.8299139694463875, + "learning_rate": 2.267239507066279e-06, + "loss": 0.6793, + "step": 25709 + }, + { + "epoch": 0.7879735196763517, + "grad_norm": 1.4633627588803948, + "learning_rate": 2.266610145140208e-06, + "loss": 0.673, + "step": 25710 + }, + { + "epoch": 0.7880041681990928, + "grad_norm": 1.683703392795663, + "learning_rate": 2.265980859413206e-06, + "loss": 0.5704, + "step": 25711 + }, + { + "epoch": 0.788034816721834, + "grad_norm": 1.6746514935063657, + "learning_rate": 2.2653516498914774e-06, + "loss": 0.6825, + "step": 25712 + }, + { + "epoch": 0.7880654652445752, + "grad_norm": 1.827519635621323, + "learning_rate": 2.2647225165812137e-06, + "loss": 0.6039, + "step": 25713 + }, + { + "epoch": 0.7880961137673164, + "grad_norm": 1.441331752952928, + "learning_rate": 2.2640934594886187e-06, + "loss": 0.5258, + "step": 25714 + }, + { + "epoch": 0.7881267622900576, + "grad_norm": 1.8147589528106574, + "learning_rate": 2.2634644786198936e-06, + "loss": 0.623, + "step": 25715 + }, + { + "epoch": 0.7881574108127988, + "grad_norm": 1.560470344200939, + "learning_rate": 2.2628355739812325e-06, + "loss": 0.7068, + "step": 25716 + }, + { + "epoch": 0.78818805933554, + "grad_norm": 1.5969266530903072, + "learning_rate": 2.2622067455788288e-06, + "loss": 0.6384, + "step": 25717 + }, + { + "epoch": 0.7882187078582812, + "grad_norm": 1.6325100825554375, + "learning_rate": 2.261577993418882e-06, + "loss": 0.6419, + "step": 25718 + }, + { + "epoch": 0.7882493563810224, + "grad_norm": 1.6362232866799966, + "learning_rate": 2.260949317507587e-06, + "loss": 0.6401, + "step": 25719 + }, + { + "epoch": 0.7882800049037636, + "grad_norm": 1.5362389693020864, + "learning_rate": 2.260320717851141e-06, + "loss": 0.5849, + "step": 25720 + }, + { + "epoch": 0.7883106534265049, + "grad_norm": 1.5054392982527196, + "learning_rate": 2.2596921944557325e-06, + "loss": 0.5818, + "step": 25721 + }, + { + "epoch": 0.788341301949246, + "grad_norm": 1.6425909516269168, + "learning_rate": 2.259063747327558e-06, + "loss": 0.6123, + "step": 25722 + }, + { + "epoch": 0.7883719504719873, + "grad_norm": 0.7041208167402445, + "learning_rate": 2.258435376472812e-06, + "loss": 0.5294, + "step": 25723 + }, + { + "epoch": 0.7884025989947284, + "grad_norm": 1.7230123354506404, + "learning_rate": 2.2578070818976783e-06, + "loss": 0.4887, + "step": 25724 + }, + { + "epoch": 0.7884332475174697, + "grad_norm": 0.6705301645058316, + "learning_rate": 2.2571788636083537e-06, + "loss": 0.5525, + "step": 25725 + }, + { + "epoch": 0.7884638960402108, + "grad_norm": 1.6199809985358513, + "learning_rate": 2.2565507216110305e-06, + "loss": 0.648, + "step": 25726 + }, + { + "epoch": 0.7884945445629521, + "grad_norm": 1.5816745485405654, + "learning_rate": 2.25592265591189e-06, + "loss": 0.5999, + "step": 25727 + }, + { + "epoch": 0.7885251930856932, + "grad_norm": 1.625168455639574, + "learning_rate": 2.255294666517128e-06, + "loss": 0.521, + "step": 25728 + }, + { + "epoch": 0.7885558416084345, + "grad_norm": 1.7370153818741996, + "learning_rate": 2.2546667534329268e-06, + "loss": 0.619, + "step": 25729 + }, + { + "epoch": 0.7885864901311757, + "grad_norm": 1.8032751054058616, + "learning_rate": 2.254038916665476e-06, + "loss": 0.6554, + "step": 25730 + }, + { + "epoch": 0.7886171386539169, + "grad_norm": 1.6366252189564772, + "learning_rate": 2.253411156220964e-06, + "loss": 0.7348, + "step": 25731 + }, + { + "epoch": 0.7886477871766581, + "grad_norm": 1.8785200588056457, + "learning_rate": 2.252783472105572e-06, + "loss": 0.6605, + "step": 25732 + }, + { + "epoch": 0.7886784356993993, + "grad_norm": 1.7718374511547048, + "learning_rate": 2.2521558643254857e-06, + "loss": 0.6585, + "step": 25733 + }, + { + "epoch": 0.7887090842221405, + "grad_norm": 1.8862186555747849, + "learning_rate": 2.2515283328868933e-06, + "loss": 0.7776, + "step": 25734 + }, + { + "epoch": 0.7887397327448817, + "grad_norm": 1.7384247405810787, + "learning_rate": 2.2509008777959717e-06, + "loss": 0.7223, + "step": 25735 + }, + { + "epoch": 0.7887703812676229, + "grad_norm": 1.8093490364837364, + "learning_rate": 2.250273499058906e-06, + "loss": 0.6614, + "step": 25736 + }, + { + "epoch": 0.7888010297903641, + "grad_norm": 1.7173243132780185, + "learning_rate": 2.2496461966818817e-06, + "loss": 0.6096, + "step": 25737 + }, + { + "epoch": 0.7888316783131053, + "grad_norm": 1.6911486057760863, + "learning_rate": 2.249018970671074e-06, + "loss": 0.6425, + "step": 25738 + }, + { + "epoch": 0.7888623268358466, + "grad_norm": 1.9262164017467456, + "learning_rate": 2.248391821032668e-06, + "loss": 0.6097, + "step": 25739 + }, + { + "epoch": 0.7888929753585877, + "grad_norm": 1.5190828674245394, + "learning_rate": 2.247764747772838e-06, + "loss": 0.6452, + "step": 25740 + }, + { + "epoch": 0.788923623881329, + "grad_norm": 0.684605740151761, + "learning_rate": 2.2471377508977655e-06, + "loss": 0.5291, + "step": 25741 + }, + { + "epoch": 0.7889542724040701, + "grad_norm": 1.9491143529516197, + "learning_rate": 2.246510830413631e-06, + "loss": 0.6276, + "step": 25742 + }, + { + "epoch": 0.7889849209268113, + "grad_norm": 0.6670816082896809, + "learning_rate": 2.245883986326606e-06, + "loss": 0.5108, + "step": 25743 + }, + { + "epoch": 0.7890155694495525, + "grad_norm": 1.668293131454183, + "learning_rate": 2.24525721864287e-06, + "loss": 0.6933, + "step": 25744 + }, + { + "epoch": 0.7890462179722937, + "grad_norm": 0.6892753508724124, + "learning_rate": 2.2446305273686033e-06, + "loss": 0.5566, + "step": 25745 + }, + { + "epoch": 0.789076866495035, + "grad_norm": 1.683546867272479, + "learning_rate": 2.2440039125099713e-06, + "loss": 0.6326, + "step": 25746 + }, + { + "epoch": 0.7891075150177761, + "grad_norm": 1.948192027422851, + "learning_rate": 2.2433773740731547e-06, + "loss": 0.7034, + "step": 25747 + }, + { + "epoch": 0.7891381635405174, + "grad_norm": 1.5134051264708066, + "learning_rate": 2.2427509120643277e-06, + "loss": 0.6451, + "step": 25748 + }, + { + "epoch": 0.7891688120632585, + "grad_norm": 1.784012346100233, + "learning_rate": 2.2421245264896574e-06, + "loss": 0.5704, + "step": 25749 + }, + { + "epoch": 0.7891994605859998, + "grad_norm": 1.8178081143269273, + "learning_rate": 2.2414982173553225e-06, + "loss": 0.5585, + "step": 25750 + }, + { + "epoch": 0.7892301091087409, + "grad_norm": 1.7407974191842335, + "learning_rate": 2.2408719846674874e-06, + "loss": 0.6261, + "step": 25751 + }, + { + "epoch": 0.7892607576314822, + "grad_norm": 0.6633418197111166, + "learning_rate": 2.240245828432327e-06, + "loss": 0.5303, + "step": 25752 + }, + { + "epoch": 0.7892914061542233, + "grad_norm": 1.8031195616457978, + "learning_rate": 2.239619748656011e-06, + "loss": 0.6315, + "step": 25753 + }, + { + "epoch": 0.7893220546769646, + "grad_norm": 1.7473196382466643, + "learning_rate": 2.2389937453447066e-06, + "loss": 0.6685, + "step": 25754 + }, + { + "epoch": 0.7893527031997057, + "grad_norm": 1.8258453903079959, + "learning_rate": 2.238367818504581e-06, + "loss": 0.6952, + "step": 25755 + }, + { + "epoch": 0.789383351722447, + "grad_norm": 1.565391845932454, + "learning_rate": 2.2377419681418056e-06, + "loss": 0.5035, + "step": 25756 + }, + { + "epoch": 0.7894140002451882, + "grad_norm": 1.7244345837538868, + "learning_rate": 2.237116194262543e-06, + "loss": 0.6248, + "step": 25757 + }, + { + "epoch": 0.7894446487679294, + "grad_norm": 1.7409579901764123, + "learning_rate": 2.2364904968729606e-06, + "loss": 0.6848, + "step": 25758 + }, + { + "epoch": 0.7894752972906706, + "grad_norm": 1.6038973970456116, + "learning_rate": 2.235864875979226e-06, + "loss": 0.6792, + "step": 25759 + }, + { + "epoch": 0.7895059458134118, + "grad_norm": 1.7058153043763693, + "learning_rate": 2.235239331587499e-06, + "loss": 0.6816, + "step": 25760 + }, + { + "epoch": 0.789536594336153, + "grad_norm": 1.8446108483801096, + "learning_rate": 2.234613863703948e-06, + "loss": 0.6406, + "step": 25761 + }, + { + "epoch": 0.7895672428588942, + "grad_norm": 1.7731438732488727, + "learning_rate": 2.2339884723347303e-06, + "loss": 0.648, + "step": 25762 + }, + { + "epoch": 0.7895978913816354, + "grad_norm": 1.7522784548522796, + "learning_rate": 2.2333631574860124e-06, + "loss": 0.6225, + "step": 25763 + }, + { + "epoch": 0.7896285399043766, + "grad_norm": 1.4335265723553103, + "learning_rate": 2.2327379191639566e-06, + "loss": 0.6269, + "step": 25764 + }, + { + "epoch": 0.7896591884271178, + "grad_norm": 0.6743226397308856, + "learning_rate": 2.2321127573747183e-06, + "loss": 0.5357, + "step": 25765 + }, + { + "epoch": 0.7896898369498591, + "grad_norm": 1.5607657567477962, + "learning_rate": 2.2314876721244604e-06, + "loss": 0.5681, + "step": 25766 + }, + { + "epoch": 0.7897204854726002, + "grad_norm": 1.6670369984305915, + "learning_rate": 2.230862663419345e-06, + "loss": 0.6397, + "step": 25767 + }, + { + "epoch": 0.7897511339953415, + "grad_norm": 1.8115276903282538, + "learning_rate": 2.2302377312655254e-06, + "loss": 0.71, + "step": 25768 + }, + { + "epoch": 0.7897817825180826, + "grad_norm": 1.7950716353963898, + "learning_rate": 2.229612875669165e-06, + "loss": 0.6857, + "step": 25769 + }, + { + "epoch": 0.7898124310408239, + "grad_norm": 0.6836714082738364, + "learning_rate": 2.228988096636413e-06, + "loss": 0.5366, + "step": 25770 + }, + { + "epoch": 0.789843079563565, + "grad_norm": 1.7254926288705832, + "learning_rate": 2.2283633941734297e-06, + "loss": 0.6316, + "step": 25771 + }, + { + "epoch": 0.7898737280863063, + "grad_norm": 1.8805000983867663, + "learning_rate": 2.227738768286373e-06, + "loss": 0.7009, + "step": 25772 + }, + { + "epoch": 0.7899043766090474, + "grad_norm": 1.7101933178833748, + "learning_rate": 2.2271142189813922e-06, + "loss": 0.7087, + "step": 25773 + }, + { + "epoch": 0.7899350251317886, + "grad_norm": 1.5501290319772223, + "learning_rate": 2.226489746264644e-06, + "loss": 0.641, + "step": 25774 + }, + { + "epoch": 0.7899656736545299, + "grad_norm": 2.3995890046904864, + "learning_rate": 2.2258653501422834e-06, + "loss": 0.6833, + "step": 25775 + }, + { + "epoch": 0.789996322177271, + "grad_norm": 1.6288706959203767, + "learning_rate": 2.2252410306204587e-06, + "loss": 0.6003, + "step": 25776 + }, + { + "epoch": 0.7900269707000123, + "grad_norm": 1.910095481068033, + "learning_rate": 2.2246167877053225e-06, + "loss": 0.6743, + "step": 25777 + }, + { + "epoch": 0.7900576192227534, + "grad_norm": 1.781319053588227, + "learning_rate": 2.2239926214030306e-06, + "loss": 0.6584, + "step": 25778 + }, + { + "epoch": 0.7900882677454947, + "grad_norm": 0.6497358709815493, + "learning_rate": 2.2233685317197252e-06, + "loss": 0.4887, + "step": 25779 + }, + { + "epoch": 0.7901189162682358, + "grad_norm": 1.5907834154540896, + "learning_rate": 2.2227445186615626e-06, + "loss": 0.645, + "step": 25780 + }, + { + "epoch": 0.7901495647909771, + "grad_norm": 0.6752400859658176, + "learning_rate": 2.2221205822346825e-06, + "loss": 0.5071, + "step": 25781 + }, + { + "epoch": 0.7901802133137182, + "grad_norm": 1.7682185320740555, + "learning_rate": 2.2214967224452433e-06, + "loss": 0.6341, + "step": 25782 + }, + { + "epoch": 0.7902108618364595, + "grad_norm": 1.6463717277471013, + "learning_rate": 2.2208729392993876e-06, + "loss": 0.5938, + "step": 25783 + }, + { + "epoch": 0.7902415103592006, + "grad_norm": 1.8618972797041367, + "learning_rate": 2.220249232803259e-06, + "loss": 0.7067, + "step": 25784 + }, + { + "epoch": 0.7902721588819419, + "grad_norm": 1.5527842370256906, + "learning_rate": 2.219625602963004e-06, + "loss": 0.5223, + "step": 25785 + }, + { + "epoch": 0.7903028074046831, + "grad_norm": 1.6140562156686755, + "learning_rate": 2.2190020497847718e-06, + "loss": 0.6559, + "step": 25786 + }, + { + "epoch": 0.7903334559274243, + "grad_norm": 1.8110462586851985, + "learning_rate": 2.218378573274701e-06, + "loss": 0.6374, + "step": 25787 + }, + { + "epoch": 0.7903641044501655, + "grad_norm": 1.5624397641780359, + "learning_rate": 2.217755173438937e-06, + "loss": 0.6346, + "step": 25788 + }, + { + "epoch": 0.7903947529729067, + "grad_norm": 1.6691558679231453, + "learning_rate": 2.2171318502836227e-06, + "loss": 0.7039, + "step": 25789 + }, + { + "epoch": 0.7904254014956479, + "grad_norm": 2.211927477605894, + "learning_rate": 2.216508603814902e-06, + "loss": 0.7627, + "step": 25790 + }, + { + "epoch": 0.7904560500183891, + "grad_norm": 2.255317711116328, + "learning_rate": 2.2158854340389137e-06, + "loss": 0.5862, + "step": 25791 + }, + { + "epoch": 0.7904866985411303, + "grad_norm": 1.7687645628558033, + "learning_rate": 2.2152623409617915e-06, + "loss": 0.7038, + "step": 25792 + }, + { + "epoch": 0.7905173470638716, + "grad_norm": 1.6842818121184921, + "learning_rate": 2.214639324589688e-06, + "loss": 0.6506, + "step": 25793 + }, + { + "epoch": 0.7905479955866127, + "grad_norm": 1.6120643262915773, + "learning_rate": 2.2140163849287344e-06, + "loss": 0.612, + "step": 25794 + }, + { + "epoch": 0.790578644109354, + "grad_norm": 1.5851298630669588, + "learning_rate": 2.2133935219850667e-06, + "loss": 0.7056, + "step": 25795 + }, + { + "epoch": 0.7906092926320951, + "grad_norm": 1.574949619213783, + "learning_rate": 2.2127707357648255e-06, + "loss": 0.6118, + "step": 25796 + }, + { + "epoch": 0.7906399411548364, + "grad_norm": 1.6871236418298472, + "learning_rate": 2.212148026274149e-06, + "loss": 0.7054, + "step": 25797 + }, + { + "epoch": 0.7906705896775775, + "grad_norm": 1.7996371565403948, + "learning_rate": 2.211525393519168e-06, + "loss": 0.6889, + "step": 25798 + }, + { + "epoch": 0.7907012382003188, + "grad_norm": 1.6065759515529825, + "learning_rate": 2.2109028375060203e-06, + "loss": 0.6704, + "step": 25799 + }, + { + "epoch": 0.7907318867230599, + "grad_norm": 1.8472799304857501, + "learning_rate": 2.2102803582408394e-06, + "loss": 0.7083, + "step": 25800 + }, + { + "epoch": 0.7907625352458012, + "grad_norm": 1.8528041195393319, + "learning_rate": 2.2096579557297628e-06, + "loss": 0.6253, + "step": 25801 + }, + { + "epoch": 0.7907931837685424, + "grad_norm": 1.704376998388713, + "learning_rate": 2.2090356299789184e-06, + "loss": 0.7252, + "step": 25802 + }, + { + "epoch": 0.7908238322912836, + "grad_norm": 1.6718250739506906, + "learning_rate": 2.208413380994434e-06, + "loss": 0.5847, + "step": 25803 + }, + { + "epoch": 0.7908544808140248, + "grad_norm": 1.7177144418090737, + "learning_rate": 2.2077912087824528e-06, + "loss": 0.6247, + "step": 25804 + }, + { + "epoch": 0.7908851293367659, + "grad_norm": 0.6656924188846821, + "learning_rate": 2.207169113349098e-06, + "loss": 0.5252, + "step": 25805 + }, + { + "epoch": 0.7909157778595072, + "grad_norm": 1.698144417605188, + "learning_rate": 2.206547094700496e-06, + "loss": 0.5886, + "step": 25806 + }, + { + "epoch": 0.7909464263822483, + "grad_norm": 1.815943837003451, + "learning_rate": 2.2059251528427805e-06, + "loss": 0.662, + "step": 25807 + }, + { + "epoch": 0.7909770749049896, + "grad_norm": 0.6684098476774893, + "learning_rate": 2.205303287782079e-06, + "loss": 0.5249, + "step": 25808 + }, + { + "epoch": 0.7910077234277307, + "grad_norm": 1.5184766466519701, + "learning_rate": 2.2046814995245202e-06, + "loss": 0.6172, + "step": 25809 + }, + { + "epoch": 0.791038371950472, + "grad_norm": 1.762922180362538, + "learning_rate": 2.204059788076227e-06, + "loss": 0.6043, + "step": 25810 + }, + { + "epoch": 0.7910690204732131, + "grad_norm": 1.9349464145877928, + "learning_rate": 2.203438153443328e-06, + "loss": 0.6632, + "step": 25811 + }, + { + "epoch": 0.7910996689959544, + "grad_norm": 1.5918464215399892, + "learning_rate": 2.20281659563195e-06, + "loss": 0.6136, + "step": 25812 + }, + { + "epoch": 0.7911303175186956, + "grad_norm": 0.6937766598409576, + "learning_rate": 2.2021951146482145e-06, + "loss": 0.5369, + "step": 25813 + }, + { + "epoch": 0.7911609660414368, + "grad_norm": 1.3585668786204472, + "learning_rate": 2.2015737104982438e-06, + "loss": 0.5532, + "step": 25814 + }, + { + "epoch": 0.791191614564178, + "grad_norm": 1.5641712531437577, + "learning_rate": 2.200952383188162e-06, + "loss": 0.554, + "step": 25815 + }, + { + "epoch": 0.7912222630869192, + "grad_norm": 1.5732604882205155, + "learning_rate": 2.2003311327240927e-06, + "loss": 0.6726, + "step": 25816 + }, + { + "epoch": 0.7912529116096604, + "grad_norm": 1.7071044017693437, + "learning_rate": 2.1997099591121583e-06, + "loss": 0.7028, + "step": 25817 + }, + { + "epoch": 0.7912835601324016, + "grad_norm": 1.5029680948150221, + "learning_rate": 2.199088862358475e-06, + "loss": 0.6418, + "step": 25818 + }, + { + "epoch": 0.7913142086551428, + "grad_norm": 1.8761551846856026, + "learning_rate": 2.1984678424691654e-06, + "loss": 0.5836, + "step": 25819 + }, + { + "epoch": 0.791344857177884, + "grad_norm": 1.6554508895301085, + "learning_rate": 2.19784689945035e-06, + "loss": 0.6465, + "step": 25820 + }, + { + "epoch": 0.7913755057006252, + "grad_norm": 0.7124769824083206, + "learning_rate": 2.197226033308146e-06, + "loss": 0.5237, + "step": 25821 + }, + { + "epoch": 0.7914061542233665, + "grad_norm": 1.670348367766903, + "learning_rate": 2.1966052440486653e-06, + "loss": 0.599, + "step": 25822 + }, + { + "epoch": 0.7914368027461076, + "grad_norm": 1.693482331738215, + "learning_rate": 2.195984531678034e-06, + "loss": 0.628, + "step": 25823 + }, + { + "epoch": 0.7914674512688489, + "grad_norm": 1.8273332215908518, + "learning_rate": 2.1953638962023647e-06, + "loss": 0.6813, + "step": 25824 + }, + { + "epoch": 0.79149809979159, + "grad_norm": 0.6559962564799978, + "learning_rate": 2.1947433376277695e-06, + "loss": 0.5114, + "step": 25825 + }, + { + "epoch": 0.7915287483143313, + "grad_norm": 1.8835082204839104, + "learning_rate": 2.1941228559603646e-06, + "loss": 0.6427, + "step": 25826 + }, + { + "epoch": 0.7915593968370724, + "grad_norm": 1.607511182743327, + "learning_rate": 2.1935024512062643e-06, + "loss": 0.6586, + "step": 25827 + }, + { + "epoch": 0.7915900453598137, + "grad_norm": 1.7167548014429028, + "learning_rate": 2.1928821233715847e-06, + "loss": 0.6376, + "step": 25828 + }, + { + "epoch": 0.7916206938825548, + "grad_norm": 1.6628589369706817, + "learning_rate": 2.1922618724624325e-06, + "loss": 0.697, + "step": 25829 + }, + { + "epoch": 0.7916513424052961, + "grad_norm": 1.4293827845453189, + "learning_rate": 2.191641698484921e-06, + "loss": 0.5816, + "step": 25830 + }, + { + "epoch": 0.7916819909280373, + "grad_norm": 1.8142589967943619, + "learning_rate": 2.1910216014451655e-06, + "loss": 0.6534, + "step": 25831 + }, + { + "epoch": 0.7917126394507785, + "grad_norm": 1.6465992388631556, + "learning_rate": 2.190401581349272e-06, + "loss": 0.7073, + "step": 25832 + }, + { + "epoch": 0.7917432879735197, + "grad_norm": 1.6125311670410498, + "learning_rate": 2.1897816382033433e-06, + "loss": 0.6676, + "step": 25833 + }, + { + "epoch": 0.7917739364962609, + "grad_norm": 1.7485101977271065, + "learning_rate": 2.1891617720135004e-06, + "loss": 0.6345, + "step": 25834 + }, + { + "epoch": 0.7918045850190021, + "grad_norm": 1.748029649608274, + "learning_rate": 2.188541982785843e-06, + "loss": 0.618, + "step": 25835 + }, + { + "epoch": 0.7918352335417432, + "grad_norm": 1.759554247150461, + "learning_rate": 2.1879222705264826e-06, + "loss": 0.6777, + "step": 25836 + }, + { + "epoch": 0.7918658820644845, + "grad_norm": 1.8812972253878848, + "learning_rate": 2.1873026352415206e-06, + "loss": 0.6339, + "step": 25837 + }, + { + "epoch": 0.7918965305872256, + "grad_norm": 1.7668594371133965, + "learning_rate": 2.186683076937064e-06, + "loss": 0.7344, + "step": 25838 + }, + { + "epoch": 0.7919271791099669, + "grad_norm": 1.5415866703783134, + "learning_rate": 2.186063595619221e-06, + "loss": 0.66, + "step": 25839 + }, + { + "epoch": 0.7919578276327081, + "grad_norm": 1.460772014266458, + "learning_rate": 2.185444191294089e-06, + "loss": 0.6249, + "step": 25840 + }, + { + "epoch": 0.7919884761554493, + "grad_norm": 0.6776405107634003, + "learning_rate": 2.184824863967776e-06, + "loss": 0.5165, + "step": 25841 + }, + { + "epoch": 0.7920191246781905, + "grad_norm": 1.7985650640947313, + "learning_rate": 2.184205613646386e-06, + "loss": 0.625, + "step": 25842 + }, + { + "epoch": 0.7920497732009317, + "grad_norm": 1.5776194737445346, + "learning_rate": 2.183586440336015e-06, + "loss": 0.6995, + "step": 25843 + }, + { + "epoch": 0.7920804217236729, + "grad_norm": 1.9792930317296962, + "learning_rate": 2.182967344042767e-06, + "loss": 0.719, + "step": 25844 + }, + { + "epoch": 0.7921110702464141, + "grad_norm": 1.7262386469638524, + "learning_rate": 2.182348324772744e-06, + "loss": 0.6161, + "step": 25845 + }, + { + "epoch": 0.7921417187691553, + "grad_norm": 1.6911346971594092, + "learning_rate": 2.1817293825320407e-06, + "loss": 0.6934, + "step": 25846 + }, + { + "epoch": 0.7921723672918966, + "grad_norm": 1.7109845819041694, + "learning_rate": 2.181110517326761e-06, + "loss": 0.5959, + "step": 25847 + }, + { + "epoch": 0.7922030158146377, + "grad_norm": 1.753757629801344, + "learning_rate": 2.1804917291629968e-06, + "loss": 0.7273, + "step": 25848 + }, + { + "epoch": 0.792233664337379, + "grad_norm": 1.5322171236427196, + "learning_rate": 2.179873018046849e-06, + "loss": 0.6768, + "step": 25849 + }, + { + "epoch": 0.7922643128601201, + "grad_norm": 1.6614135471356153, + "learning_rate": 2.1792543839844148e-06, + "loss": 0.5837, + "step": 25850 + }, + { + "epoch": 0.7922949613828614, + "grad_norm": 1.6528864577208133, + "learning_rate": 2.1786358269817865e-06, + "loss": 0.6414, + "step": 25851 + }, + { + "epoch": 0.7923256099056025, + "grad_norm": 1.624591326061899, + "learning_rate": 2.1780173470450593e-06, + "loss": 0.6452, + "step": 25852 + }, + { + "epoch": 0.7923562584283438, + "grad_norm": 1.7909107969465041, + "learning_rate": 2.177398944180332e-06, + "loss": 0.5726, + "step": 25853 + }, + { + "epoch": 0.7923869069510849, + "grad_norm": 1.7997378987564212, + "learning_rate": 2.1767806183936923e-06, + "loss": 0.7314, + "step": 25854 + }, + { + "epoch": 0.7924175554738262, + "grad_norm": 1.872914202979911, + "learning_rate": 2.1761623696912337e-06, + "loss": 0.6052, + "step": 25855 + }, + { + "epoch": 0.7924482039965673, + "grad_norm": 2.0773847502519036, + "learning_rate": 2.1755441980790525e-06, + "loss": 0.7186, + "step": 25856 + }, + { + "epoch": 0.7924788525193086, + "grad_norm": 1.6549666558309801, + "learning_rate": 2.174926103563234e-06, + "loss": 0.6679, + "step": 25857 + }, + { + "epoch": 0.7925095010420498, + "grad_norm": 1.78812419403257, + "learning_rate": 2.174308086149873e-06, + "loss": 0.5922, + "step": 25858 + }, + { + "epoch": 0.792540149564791, + "grad_norm": 2.0307564332883357, + "learning_rate": 2.1736901458450545e-06, + "loss": 0.7648, + "step": 25859 + }, + { + "epoch": 0.7925707980875322, + "grad_norm": 1.721736948544978, + "learning_rate": 2.173072282654868e-06, + "loss": 0.646, + "step": 25860 + }, + { + "epoch": 0.7926014466102734, + "grad_norm": 1.9324069123057979, + "learning_rate": 2.1724544965854066e-06, + "loss": 0.7103, + "step": 25861 + }, + { + "epoch": 0.7926320951330146, + "grad_norm": 1.5730284619906207, + "learning_rate": 2.1718367876427517e-06, + "loss": 0.5542, + "step": 25862 + }, + { + "epoch": 0.7926627436557558, + "grad_norm": 1.6922614190970378, + "learning_rate": 2.171219155832991e-06, + "loss": 0.5487, + "step": 25863 + }, + { + "epoch": 0.792693392178497, + "grad_norm": 1.7249069830065638, + "learning_rate": 2.1706016011622134e-06, + "loss": 0.6089, + "step": 25864 + }, + { + "epoch": 0.7927240407012383, + "grad_norm": 1.655697844204627, + "learning_rate": 2.169984123636499e-06, + "loss": 0.6141, + "step": 25865 + }, + { + "epoch": 0.7927546892239794, + "grad_norm": 1.463558782073219, + "learning_rate": 2.1693667232619373e-06, + "loss": 0.5581, + "step": 25866 + }, + { + "epoch": 0.7927853377467206, + "grad_norm": 1.6718963733194374, + "learning_rate": 2.168749400044606e-06, + "loss": 0.6832, + "step": 25867 + }, + { + "epoch": 0.7928159862694618, + "grad_norm": 1.5780256960463017, + "learning_rate": 2.168132153990592e-06, + "loss": 0.6275, + "step": 25868 + }, + { + "epoch": 0.792846634792203, + "grad_norm": 1.7317802767565778, + "learning_rate": 2.1675149851059774e-06, + "loss": 0.5885, + "step": 25869 + }, + { + "epoch": 0.7928772833149442, + "grad_norm": 1.7181594168228675, + "learning_rate": 2.1668978933968386e-06, + "loss": 0.6296, + "step": 25870 + }, + { + "epoch": 0.7929079318376854, + "grad_norm": 1.7272734049908443, + "learning_rate": 2.166280878869259e-06, + "loss": 0.6812, + "step": 25871 + }, + { + "epoch": 0.7929385803604266, + "grad_norm": 1.8931700305464478, + "learning_rate": 2.1656639415293213e-06, + "loss": 0.6523, + "step": 25872 + }, + { + "epoch": 0.7929692288831678, + "grad_norm": 2.113688039328918, + "learning_rate": 2.165047081383098e-06, + "loss": 0.7854, + "step": 25873 + }, + { + "epoch": 0.792999877405909, + "grad_norm": 0.6659025531316843, + "learning_rate": 2.1644302984366717e-06, + "loss": 0.5346, + "step": 25874 + }, + { + "epoch": 0.7930305259286502, + "grad_norm": 1.547004426647133, + "learning_rate": 2.16381359269612e-06, + "loss": 0.6962, + "step": 25875 + }, + { + "epoch": 0.7930611744513915, + "grad_norm": 0.681908360618903, + "learning_rate": 2.1631969641675155e-06, + "loss": 0.5262, + "step": 25876 + }, + { + "epoch": 0.7930918229741326, + "grad_norm": 1.433422639300872, + "learning_rate": 2.1625804128569394e-06, + "loss": 0.5393, + "step": 25877 + }, + { + "epoch": 0.7931224714968739, + "grad_norm": 1.6374968164003851, + "learning_rate": 2.1619639387704617e-06, + "loss": 0.6594, + "step": 25878 + }, + { + "epoch": 0.793153120019615, + "grad_norm": 0.678515311447597, + "learning_rate": 2.1613475419141573e-06, + "loss": 0.5183, + "step": 25879 + }, + { + "epoch": 0.7931837685423563, + "grad_norm": 1.8584488330651892, + "learning_rate": 2.1607312222941045e-06, + "loss": 0.7588, + "step": 25880 + }, + { + "epoch": 0.7932144170650974, + "grad_norm": 0.6828500963161478, + "learning_rate": 2.16011497991637e-06, + "loss": 0.5289, + "step": 25881 + }, + { + "epoch": 0.7932450655878387, + "grad_norm": 1.69425050580842, + "learning_rate": 2.1594988147870287e-06, + "loss": 0.5797, + "step": 25882 + }, + { + "epoch": 0.7932757141105798, + "grad_norm": 1.7802289362366195, + "learning_rate": 2.158882726912155e-06, + "loss": 0.6506, + "step": 25883 + }, + { + "epoch": 0.7933063626333211, + "grad_norm": 1.7367943803371173, + "learning_rate": 2.1582667162978122e-06, + "loss": 0.6326, + "step": 25884 + }, + { + "epoch": 0.7933370111560623, + "grad_norm": 1.7339967623477404, + "learning_rate": 2.1576507829500746e-06, + "loss": 0.5907, + "step": 25885 + }, + { + "epoch": 0.7933676596788035, + "grad_norm": 1.4278958157964914, + "learning_rate": 2.1570349268750135e-06, + "loss": 0.5559, + "step": 25886 + }, + { + "epoch": 0.7933983082015447, + "grad_norm": 1.8859922762711068, + "learning_rate": 2.15641914807869e-06, + "loss": 0.65, + "step": 25887 + }, + { + "epoch": 0.7934289567242859, + "grad_norm": 1.7651666427166879, + "learning_rate": 2.1558034465671785e-06, + "loss": 0.5962, + "step": 25888 + }, + { + "epoch": 0.7934596052470271, + "grad_norm": 1.6954405373134855, + "learning_rate": 2.1551878223465383e-06, + "loss": 0.6849, + "step": 25889 + }, + { + "epoch": 0.7934902537697683, + "grad_norm": 1.6735477799915968, + "learning_rate": 2.154572275422844e-06, + "loss": 0.6519, + "step": 25890 + }, + { + "epoch": 0.7935209022925095, + "grad_norm": 1.7677402172348384, + "learning_rate": 2.1539568058021567e-06, + "loss": 0.6773, + "step": 25891 + }, + { + "epoch": 0.7935515508152508, + "grad_norm": 1.5035008691203937, + "learning_rate": 2.1533414134905384e-06, + "loss": 0.5055, + "step": 25892 + }, + { + "epoch": 0.7935821993379919, + "grad_norm": 1.7364974602339573, + "learning_rate": 2.1527260984940533e-06, + "loss": 0.6166, + "step": 25893 + }, + { + "epoch": 0.7936128478607332, + "grad_norm": 1.6868521695757774, + "learning_rate": 2.15211086081877e-06, + "loss": 0.6505, + "step": 25894 + }, + { + "epoch": 0.7936434963834743, + "grad_norm": 1.5076600921390126, + "learning_rate": 2.1514957004707425e-06, + "loss": 0.5359, + "step": 25895 + }, + { + "epoch": 0.7936741449062156, + "grad_norm": 1.6280433509947176, + "learning_rate": 2.150880617456037e-06, + "loss": 0.6283, + "step": 25896 + }, + { + "epoch": 0.7937047934289567, + "grad_norm": 1.7286843689760993, + "learning_rate": 2.150265611780715e-06, + "loss": 0.6051, + "step": 25897 + }, + { + "epoch": 0.7937354419516979, + "grad_norm": 1.7461764090491207, + "learning_rate": 2.1496506834508323e-06, + "loss": 0.6076, + "step": 25898 + }, + { + "epoch": 0.7937660904744391, + "grad_norm": 1.7816905225923745, + "learning_rate": 2.1490358324724526e-06, + "loss": 0.6193, + "step": 25899 + }, + { + "epoch": 0.7937967389971803, + "grad_norm": 1.575711388290399, + "learning_rate": 2.148421058851625e-06, + "loss": 0.6128, + "step": 25900 + }, + { + "epoch": 0.7938273875199215, + "grad_norm": 1.8252562811223803, + "learning_rate": 2.14780636259442e-06, + "loss": 0.6975, + "step": 25901 + }, + { + "epoch": 0.7938580360426627, + "grad_norm": 1.6073231981715435, + "learning_rate": 2.147191743706889e-06, + "loss": 0.593, + "step": 25902 + }, + { + "epoch": 0.793888684565404, + "grad_norm": 1.6366667162246535, + "learning_rate": 2.1465772021950827e-06, + "loss": 0.6497, + "step": 25903 + }, + { + "epoch": 0.7939193330881451, + "grad_norm": 1.7244859405042583, + "learning_rate": 2.1459627380650615e-06, + "loss": 0.7196, + "step": 25904 + }, + { + "epoch": 0.7939499816108864, + "grad_norm": 1.8830379543759945, + "learning_rate": 2.145348351322881e-06, + "loss": 0.7883, + "step": 25905 + }, + { + "epoch": 0.7939806301336275, + "grad_norm": 1.7813861130812216, + "learning_rate": 2.144734041974591e-06, + "loss": 0.6294, + "step": 25906 + }, + { + "epoch": 0.7940112786563688, + "grad_norm": 0.6472523639938351, + "learning_rate": 2.144119810026245e-06, + "loss": 0.5079, + "step": 25907 + }, + { + "epoch": 0.7940419271791099, + "grad_norm": 1.6846154811737408, + "learning_rate": 2.1435056554838982e-06, + "loss": 0.7114, + "step": 25908 + }, + { + "epoch": 0.7940725757018512, + "grad_norm": 1.5236414058388739, + "learning_rate": 2.142891578353602e-06, + "loss": 0.6232, + "step": 25909 + }, + { + "epoch": 0.7941032242245923, + "grad_norm": 1.439706004412147, + "learning_rate": 2.142277578641405e-06, + "loss": 0.5716, + "step": 25910 + }, + { + "epoch": 0.7941338727473336, + "grad_norm": 1.9010635060745524, + "learning_rate": 2.141663656353357e-06, + "loss": 0.7212, + "step": 25911 + }, + { + "epoch": 0.7941645212700748, + "grad_norm": 0.6742781024928481, + "learning_rate": 2.141049811495505e-06, + "loss": 0.5261, + "step": 25912 + }, + { + "epoch": 0.794195169792816, + "grad_norm": 1.7340100324452392, + "learning_rate": 2.140436044073904e-06, + "loss": 0.6934, + "step": 25913 + }, + { + "epoch": 0.7942258183155572, + "grad_norm": 1.8941025211928983, + "learning_rate": 2.139822354094595e-06, + "loss": 0.7907, + "step": 25914 + }, + { + "epoch": 0.7942564668382984, + "grad_norm": 1.711541784039244, + "learning_rate": 2.1392087415636264e-06, + "loss": 0.7344, + "step": 25915 + }, + { + "epoch": 0.7942871153610396, + "grad_norm": 1.5905573908174997, + "learning_rate": 2.1385952064870464e-06, + "loss": 0.6509, + "step": 25916 + }, + { + "epoch": 0.7943177638837808, + "grad_norm": 0.653157946033587, + "learning_rate": 2.137981748870902e-06, + "loss": 0.5162, + "step": 25917 + }, + { + "epoch": 0.794348412406522, + "grad_norm": 0.6695732100779616, + "learning_rate": 2.1373683687212343e-06, + "loss": 0.5384, + "step": 25918 + }, + { + "epoch": 0.7943790609292632, + "grad_norm": 1.799131548689024, + "learning_rate": 2.136755066044083e-06, + "loss": 0.6413, + "step": 25919 + }, + { + "epoch": 0.7944097094520044, + "grad_norm": 1.6441908576916884, + "learning_rate": 2.1361418408455014e-06, + "loss": 0.6927, + "step": 25920 + }, + { + "epoch": 0.7944403579747457, + "grad_norm": 1.6425544929531297, + "learning_rate": 2.1355286931315255e-06, + "loss": 0.6319, + "step": 25921 + }, + { + "epoch": 0.7944710064974868, + "grad_norm": 0.6712539574072631, + "learning_rate": 2.1349156229081957e-06, + "loss": 0.5242, + "step": 25922 + }, + { + "epoch": 0.7945016550202281, + "grad_norm": 1.5157633811808628, + "learning_rate": 2.1343026301815552e-06, + "loss": 0.5918, + "step": 25923 + }, + { + "epoch": 0.7945323035429692, + "grad_norm": 1.57020861734155, + "learning_rate": 2.133689714957644e-06, + "loss": 0.6101, + "step": 25924 + }, + { + "epoch": 0.7945629520657105, + "grad_norm": 1.786651333346657, + "learning_rate": 2.1330768772425003e-06, + "loss": 0.6734, + "step": 25925 + }, + { + "epoch": 0.7945936005884516, + "grad_norm": 1.7486035644261586, + "learning_rate": 2.132464117042161e-06, + "loss": 0.6633, + "step": 25926 + }, + { + "epoch": 0.7946242491111929, + "grad_norm": 2.0012304031890213, + "learning_rate": 2.1318514343626674e-06, + "loss": 0.7212, + "step": 25927 + }, + { + "epoch": 0.794654897633934, + "grad_norm": 1.5786264595396395, + "learning_rate": 2.1312388292100563e-06, + "loss": 0.5551, + "step": 25928 + }, + { + "epoch": 0.7946855461566752, + "grad_norm": 1.5180106819434078, + "learning_rate": 2.130626301590363e-06, + "loss": 0.5455, + "step": 25929 + }, + { + "epoch": 0.7947161946794165, + "grad_norm": 1.6388093891985134, + "learning_rate": 2.130013851509617e-06, + "loss": 0.7086, + "step": 25930 + }, + { + "epoch": 0.7947468432021576, + "grad_norm": 1.4605751851131963, + "learning_rate": 2.1294014789738625e-06, + "loss": 0.5871, + "step": 25931 + }, + { + "epoch": 0.7947774917248989, + "grad_norm": 1.4786946240047614, + "learning_rate": 2.1287891839891304e-06, + "loss": 0.5574, + "step": 25932 + }, + { + "epoch": 0.79480814024764, + "grad_norm": 1.598558856264121, + "learning_rate": 2.128176966561448e-06, + "loss": 0.6268, + "step": 25933 + }, + { + "epoch": 0.7948387887703813, + "grad_norm": 1.6661780091943315, + "learning_rate": 2.127564826696854e-06, + "loss": 0.5928, + "step": 25934 + }, + { + "epoch": 0.7948694372931224, + "grad_norm": 0.6763915035323895, + "learning_rate": 2.1269527644013766e-06, + "loss": 0.5404, + "step": 25935 + }, + { + "epoch": 0.7949000858158637, + "grad_norm": 1.7126535147152486, + "learning_rate": 2.126340779681051e-06, + "loss": 0.651, + "step": 25936 + }, + { + "epoch": 0.7949307343386048, + "grad_norm": 1.6531400387171495, + "learning_rate": 2.1257288725419024e-06, + "loss": 0.5729, + "step": 25937 + }, + { + "epoch": 0.7949613828613461, + "grad_norm": 1.6269273113346667, + "learning_rate": 2.1251170429899604e-06, + "loss": 0.688, + "step": 25938 + }, + { + "epoch": 0.7949920313840872, + "grad_norm": 1.795666040250951, + "learning_rate": 2.1245052910312593e-06, + "loss": 0.66, + "step": 25939 + }, + { + "epoch": 0.7950226799068285, + "grad_norm": 1.6375662356401655, + "learning_rate": 2.1238936166718215e-06, + "loss": 0.6116, + "step": 25940 + }, + { + "epoch": 0.7950533284295697, + "grad_norm": 0.6691828751407048, + "learning_rate": 2.1232820199176697e-06, + "loss": 0.5275, + "step": 25941 + }, + { + "epoch": 0.7950839769523109, + "grad_norm": 0.6612672940635113, + "learning_rate": 2.1226705007748418e-06, + "loss": 0.5211, + "step": 25942 + }, + { + "epoch": 0.7951146254750521, + "grad_norm": 0.6865511691610422, + "learning_rate": 2.122059059249354e-06, + "loss": 0.5203, + "step": 25943 + }, + { + "epoch": 0.7951452739977933, + "grad_norm": 1.4155570263694126, + "learning_rate": 2.121447695347236e-06, + "loss": 0.6477, + "step": 25944 + }, + { + "epoch": 0.7951759225205345, + "grad_norm": 1.5522406597548823, + "learning_rate": 2.120836409074507e-06, + "loss": 0.5524, + "step": 25945 + }, + { + "epoch": 0.7952065710432757, + "grad_norm": 1.6074230932092939, + "learning_rate": 2.120225200437194e-06, + "loss": 0.6514, + "step": 25946 + }, + { + "epoch": 0.7952372195660169, + "grad_norm": 1.7371582024438876, + "learning_rate": 2.119614069441319e-06, + "loss": 0.5968, + "step": 25947 + }, + { + "epoch": 0.7952678680887582, + "grad_norm": 0.6662853449683498, + "learning_rate": 2.119003016092902e-06, + "loss": 0.5388, + "step": 25948 + }, + { + "epoch": 0.7952985166114993, + "grad_norm": 0.6770673041275459, + "learning_rate": 2.1183920403979643e-06, + "loss": 0.5076, + "step": 25949 + }, + { + "epoch": 0.7953291651342406, + "grad_norm": 1.7798027700499233, + "learning_rate": 2.1177811423625283e-06, + "loss": 0.6575, + "step": 25950 + }, + { + "epoch": 0.7953598136569817, + "grad_norm": 0.6810455449422599, + "learning_rate": 2.117170321992612e-06, + "loss": 0.5459, + "step": 25951 + }, + { + "epoch": 0.795390462179723, + "grad_norm": 1.7589829498685416, + "learning_rate": 2.1165595792942285e-06, + "loss": 0.6853, + "step": 25952 + }, + { + "epoch": 0.7954211107024641, + "grad_norm": 1.5796956628894598, + "learning_rate": 2.1159489142734046e-06, + "loss": 0.6201, + "step": 25953 + }, + { + "epoch": 0.7954517592252054, + "grad_norm": 1.7363736759048112, + "learning_rate": 2.1153383269361516e-06, + "loss": 0.6968, + "step": 25954 + }, + { + "epoch": 0.7954824077479465, + "grad_norm": 1.8892879536911502, + "learning_rate": 2.11472781728849e-06, + "loss": 0.7057, + "step": 25955 + }, + { + "epoch": 0.7955130562706878, + "grad_norm": 1.6029346950871721, + "learning_rate": 2.1141173853364306e-06, + "loss": 0.5992, + "step": 25956 + }, + { + "epoch": 0.795543704793429, + "grad_norm": 0.6611603644330292, + "learning_rate": 2.1135070310859895e-06, + "loss": 0.5147, + "step": 25957 + }, + { + "epoch": 0.7955743533161702, + "grad_norm": 1.5083481594835897, + "learning_rate": 2.1128967545431844e-06, + "loss": 0.5792, + "step": 25958 + }, + { + "epoch": 0.7956050018389114, + "grad_norm": 1.7410406129030682, + "learning_rate": 2.1122865557140226e-06, + "loss": 0.6617, + "step": 25959 + }, + { + "epoch": 0.7956356503616525, + "grad_norm": 1.5827307091464795, + "learning_rate": 2.1116764346045193e-06, + "loss": 0.6716, + "step": 25960 + }, + { + "epoch": 0.7956662988843938, + "grad_norm": 1.6700955687542691, + "learning_rate": 2.1110663912206895e-06, + "loss": 0.7707, + "step": 25961 + }, + { + "epoch": 0.7956969474071349, + "grad_norm": 1.7404659150030866, + "learning_rate": 2.110456425568539e-06, + "loss": 0.7177, + "step": 25962 + }, + { + "epoch": 0.7957275959298762, + "grad_norm": 1.6314852647553622, + "learning_rate": 2.109846537654082e-06, + "loss": 0.5132, + "step": 25963 + }, + { + "epoch": 0.7957582444526173, + "grad_norm": 1.5105931657074918, + "learning_rate": 2.1092367274833225e-06, + "loss": 0.625, + "step": 25964 + }, + { + "epoch": 0.7957888929753586, + "grad_norm": 0.6665969387236709, + "learning_rate": 2.108626995062274e-06, + "loss": 0.5187, + "step": 25965 + }, + { + "epoch": 0.7958195414980997, + "grad_norm": 1.7277843784683085, + "learning_rate": 2.108017340396944e-06, + "loss": 0.6118, + "step": 25966 + }, + { + "epoch": 0.795850190020841, + "grad_norm": 1.6194132746055687, + "learning_rate": 2.1074077634933364e-06, + "loss": 0.61, + "step": 25967 + }, + { + "epoch": 0.7958808385435822, + "grad_norm": 1.6515558053242563, + "learning_rate": 2.10679826435746e-06, + "loss": 0.6987, + "step": 25968 + }, + { + "epoch": 0.7959114870663234, + "grad_norm": 0.6571716513931969, + "learning_rate": 2.1061888429953215e-06, + "loss": 0.5318, + "step": 25969 + }, + { + "epoch": 0.7959421355890646, + "grad_norm": 1.660201229068974, + "learning_rate": 2.105579499412922e-06, + "loss": 0.6896, + "step": 25970 + }, + { + "epoch": 0.7959727841118058, + "grad_norm": 1.5225462479794432, + "learning_rate": 2.1049702336162682e-06, + "loss": 0.6241, + "step": 25971 + }, + { + "epoch": 0.796003432634547, + "grad_norm": 1.6286733396672768, + "learning_rate": 2.104361045611364e-06, + "loss": 0.704, + "step": 25972 + }, + { + "epoch": 0.7960340811572882, + "grad_norm": 1.6314117219670212, + "learning_rate": 2.103751935404209e-06, + "loss": 0.6882, + "step": 25973 + }, + { + "epoch": 0.7960647296800294, + "grad_norm": 1.9718177906025833, + "learning_rate": 2.1031429030008086e-06, + "loss": 0.7691, + "step": 25974 + }, + { + "epoch": 0.7960953782027707, + "grad_norm": 1.5419741529427236, + "learning_rate": 2.1025339484071595e-06, + "loss": 0.5614, + "step": 25975 + }, + { + "epoch": 0.7961260267255118, + "grad_norm": 1.6898605707166536, + "learning_rate": 2.101925071629264e-06, + "loss": 0.7147, + "step": 25976 + }, + { + "epoch": 0.7961566752482531, + "grad_norm": 1.5773865938585696, + "learning_rate": 2.101316272673123e-06, + "loss": 0.7317, + "step": 25977 + }, + { + "epoch": 0.7961873237709942, + "grad_norm": 1.945272062466861, + "learning_rate": 2.100707551544733e-06, + "loss": 0.7191, + "step": 25978 + }, + { + "epoch": 0.7962179722937355, + "grad_norm": 1.574905590504979, + "learning_rate": 2.100098908250091e-06, + "loss": 0.5792, + "step": 25979 + }, + { + "epoch": 0.7962486208164766, + "grad_norm": 1.8649883227063888, + "learning_rate": 2.0994903427951995e-06, + "loss": 0.6245, + "step": 25980 + }, + { + "epoch": 0.7962792693392179, + "grad_norm": 1.776161654232585, + "learning_rate": 2.098881855186048e-06, + "loss": 0.5866, + "step": 25981 + }, + { + "epoch": 0.796309917861959, + "grad_norm": 1.6769914146026719, + "learning_rate": 2.0982734454286347e-06, + "loss": 0.7455, + "step": 25982 + }, + { + "epoch": 0.7963405663847003, + "grad_norm": 0.6738528402462202, + "learning_rate": 2.0976651135289583e-06, + "loss": 0.5642, + "step": 25983 + }, + { + "epoch": 0.7963712149074414, + "grad_norm": 1.6811866286275807, + "learning_rate": 2.0970568594930063e-06, + "loss": 0.696, + "step": 25984 + }, + { + "epoch": 0.7964018634301827, + "grad_norm": 1.8471138807821672, + "learning_rate": 2.096448683326778e-06, + "loss": 0.6049, + "step": 25985 + }, + { + "epoch": 0.7964325119529239, + "grad_norm": 1.6308634093958965, + "learning_rate": 2.0958405850362607e-06, + "loss": 0.6596, + "step": 25986 + }, + { + "epoch": 0.7964631604756651, + "grad_norm": 1.473298929184122, + "learning_rate": 2.0952325646274475e-06, + "loss": 0.5845, + "step": 25987 + }, + { + "epoch": 0.7964938089984063, + "grad_norm": 1.3948254749001296, + "learning_rate": 2.094624622106334e-06, + "loss": 0.5971, + "step": 25988 + }, + { + "epoch": 0.7965244575211475, + "grad_norm": 1.86067760103744, + "learning_rate": 2.094016757478904e-06, + "loss": 0.7547, + "step": 25989 + }, + { + "epoch": 0.7965551060438887, + "grad_norm": 0.6491855429579151, + "learning_rate": 2.0934089707511483e-06, + "loss": 0.5377, + "step": 25990 + }, + { + "epoch": 0.7965857545666298, + "grad_norm": 0.6650329991567956, + "learning_rate": 2.0928012619290617e-06, + "loss": 0.5265, + "step": 25991 + }, + { + "epoch": 0.7966164030893711, + "grad_norm": 1.6287597097131237, + "learning_rate": 2.092193631018624e-06, + "loss": 0.6887, + "step": 25992 + }, + { + "epoch": 0.7966470516121122, + "grad_norm": 1.9874213170219415, + "learning_rate": 2.0915860780258257e-06, + "loss": 0.7719, + "step": 25993 + }, + { + "epoch": 0.7966777001348535, + "grad_norm": 0.6973033752234934, + "learning_rate": 2.090978602956656e-06, + "loss": 0.5598, + "step": 25994 + }, + { + "epoch": 0.7967083486575947, + "grad_norm": 0.6567773040676048, + "learning_rate": 2.0903712058170945e-06, + "loss": 0.5187, + "step": 25995 + }, + { + "epoch": 0.7967389971803359, + "grad_norm": 1.5748040970198018, + "learning_rate": 2.089763886613132e-06, + "loss": 0.642, + "step": 25996 + }, + { + "epoch": 0.7967696457030771, + "grad_norm": 1.7013969541463685, + "learning_rate": 2.089156645350745e-06, + "loss": 0.5978, + "step": 25997 + }, + { + "epoch": 0.7968002942258183, + "grad_norm": 1.3827412118963456, + "learning_rate": 2.0885494820359266e-06, + "loss": 0.5651, + "step": 25998 + }, + { + "epoch": 0.7968309427485595, + "grad_norm": 1.7103801938639454, + "learning_rate": 2.087942396674655e-06, + "loss": 0.7223, + "step": 25999 + }, + { + "epoch": 0.7968615912713007, + "grad_norm": 1.7434069456183128, + "learning_rate": 2.0873353892729088e-06, + "loss": 0.6516, + "step": 26000 + }, + { + "epoch": 0.7968922397940419, + "grad_norm": 2.0911552116903143, + "learning_rate": 2.086728459836671e-06, + "loss": 0.7133, + "step": 26001 + }, + { + "epoch": 0.7969228883167832, + "grad_norm": 1.7502112944331807, + "learning_rate": 2.086121608371925e-06, + "loss": 0.6239, + "step": 26002 + }, + { + "epoch": 0.7969535368395243, + "grad_norm": 1.6556196895624646, + "learning_rate": 2.085514834884644e-06, + "loss": 0.642, + "step": 26003 + }, + { + "epoch": 0.7969841853622656, + "grad_norm": 1.4777452008376846, + "learning_rate": 2.084908139380812e-06, + "loss": 0.6969, + "step": 26004 + }, + { + "epoch": 0.7970148338850067, + "grad_norm": 1.640230602799788, + "learning_rate": 2.0843015218664076e-06, + "loss": 0.6148, + "step": 26005 + }, + { + "epoch": 0.797045482407748, + "grad_norm": 1.6869311455060245, + "learning_rate": 2.083694982347403e-06, + "loss": 0.6625, + "step": 26006 + }, + { + "epoch": 0.7970761309304891, + "grad_norm": 1.5740003697334501, + "learning_rate": 2.083088520829779e-06, + "loss": 0.5698, + "step": 26007 + }, + { + "epoch": 0.7971067794532304, + "grad_norm": 1.650299952025477, + "learning_rate": 2.0824821373195083e-06, + "loss": 0.6498, + "step": 26008 + }, + { + "epoch": 0.7971374279759715, + "grad_norm": 1.5945163886538474, + "learning_rate": 2.081875831822565e-06, + "loss": 0.7163, + "step": 26009 + }, + { + "epoch": 0.7971680764987128, + "grad_norm": 1.5241779638170305, + "learning_rate": 2.081269604344929e-06, + "loss": 0.6544, + "step": 26010 + }, + { + "epoch": 0.797198725021454, + "grad_norm": 1.6067943932726945, + "learning_rate": 2.0806634548925665e-06, + "loss": 0.5884, + "step": 26011 + }, + { + "epoch": 0.7972293735441952, + "grad_norm": 1.616687181567119, + "learning_rate": 2.0800573834714533e-06, + "loss": 0.6671, + "step": 26012 + }, + { + "epoch": 0.7972600220669364, + "grad_norm": 1.6401590061744848, + "learning_rate": 2.0794513900875644e-06, + "loss": 0.5718, + "step": 26013 + }, + { + "epoch": 0.7972906705896776, + "grad_norm": 1.7809653794781728, + "learning_rate": 2.0788454747468644e-06, + "loss": 0.6695, + "step": 26014 + }, + { + "epoch": 0.7973213191124188, + "grad_norm": 1.4937328380534232, + "learning_rate": 2.0782396374553293e-06, + "loss": 0.5553, + "step": 26015 + }, + { + "epoch": 0.79735196763516, + "grad_norm": 1.8690337631656166, + "learning_rate": 2.07763387821892e-06, + "loss": 0.66, + "step": 26016 + }, + { + "epoch": 0.7973826161579012, + "grad_norm": 1.7967576943619314, + "learning_rate": 2.077028197043617e-06, + "loss": 0.7167, + "step": 26017 + }, + { + "epoch": 0.7974132646806424, + "grad_norm": 1.9776578174420907, + "learning_rate": 2.076422593935382e-06, + "loss": 0.6861, + "step": 26018 + }, + { + "epoch": 0.7974439132033836, + "grad_norm": 0.7277997302818257, + "learning_rate": 2.075817068900181e-06, + "loss": 0.5329, + "step": 26019 + }, + { + "epoch": 0.7974745617261249, + "grad_norm": 1.511005581307663, + "learning_rate": 2.075211621943981e-06, + "loss": 0.7365, + "step": 26020 + }, + { + "epoch": 0.797505210248866, + "grad_norm": 1.4734337308116157, + "learning_rate": 2.074606253072752e-06, + "loss": 0.6092, + "step": 26021 + }, + { + "epoch": 0.7975358587716072, + "grad_norm": 1.7103817911139252, + "learning_rate": 2.0740009622924515e-06, + "loss": 0.6514, + "step": 26022 + }, + { + "epoch": 0.7975665072943484, + "grad_norm": 1.6053960162983798, + "learning_rate": 2.0733957496090472e-06, + "loss": 0.6527, + "step": 26023 + }, + { + "epoch": 0.7975971558170896, + "grad_norm": 1.8698631594539625, + "learning_rate": 2.0727906150285037e-06, + "loss": 0.6314, + "step": 26024 + }, + { + "epoch": 0.7976278043398308, + "grad_norm": 1.7623857632670579, + "learning_rate": 2.072185558556785e-06, + "loss": 0.7477, + "step": 26025 + }, + { + "epoch": 0.797658452862572, + "grad_norm": 1.7798542094800716, + "learning_rate": 2.071580580199851e-06, + "loss": 0.703, + "step": 26026 + }, + { + "epoch": 0.7976891013853132, + "grad_norm": 1.5931465445189612, + "learning_rate": 2.070975679963656e-06, + "loss": 0.6686, + "step": 26027 + }, + { + "epoch": 0.7977197499080544, + "grad_norm": 1.5742164513705077, + "learning_rate": 2.0703708578541715e-06, + "loss": 0.6277, + "step": 26028 + }, + { + "epoch": 0.7977503984307956, + "grad_norm": 1.4415249343866314, + "learning_rate": 2.0697661138773528e-06, + "loss": 0.5978, + "step": 26029 + }, + { + "epoch": 0.7977810469535368, + "grad_norm": 1.8079624976616493, + "learning_rate": 2.069161448039154e-06, + "loss": 0.7059, + "step": 26030 + }, + { + "epoch": 0.7978116954762781, + "grad_norm": 1.514925941041682, + "learning_rate": 2.0685568603455375e-06, + "loss": 0.5763, + "step": 26031 + }, + { + "epoch": 0.7978423439990192, + "grad_norm": 0.6603337322528235, + "learning_rate": 2.0679523508024613e-06, + "loss": 0.5025, + "step": 26032 + }, + { + "epoch": 0.7978729925217605, + "grad_norm": 1.6856541755283718, + "learning_rate": 2.0673479194158775e-06, + "loss": 0.7551, + "step": 26033 + }, + { + "epoch": 0.7979036410445016, + "grad_norm": 1.688121348141109, + "learning_rate": 2.0667435661917457e-06, + "loss": 0.5595, + "step": 26034 + }, + { + "epoch": 0.7979342895672429, + "grad_norm": 2.010354509883101, + "learning_rate": 2.0661392911360177e-06, + "loss": 0.7161, + "step": 26035 + }, + { + "epoch": 0.797964938089984, + "grad_norm": 1.505816079657819, + "learning_rate": 2.0655350942546524e-06, + "loss": 0.604, + "step": 26036 + }, + { + "epoch": 0.7979955866127253, + "grad_norm": 1.6506505140010943, + "learning_rate": 2.0649309755536006e-06, + "loss": 0.5613, + "step": 26037 + }, + { + "epoch": 0.7980262351354664, + "grad_norm": 1.6895604544509069, + "learning_rate": 2.0643269350388084e-06, + "loss": 0.6315, + "step": 26038 + }, + { + "epoch": 0.7980568836582077, + "grad_norm": 1.7318850717266938, + "learning_rate": 2.0637229727162377e-06, + "loss": 0.6864, + "step": 26039 + }, + { + "epoch": 0.7980875321809489, + "grad_norm": 1.6727747866149163, + "learning_rate": 2.0631190885918363e-06, + "loss": 0.5307, + "step": 26040 + }, + { + "epoch": 0.7981181807036901, + "grad_norm": 1.9217297793274672, + "learning_rate": 2.062515282671551e-06, + "loss": 0.6321, + "step": 26041 + }, + { + "epoch": 0.7981488292264313, + "grad_norm": 1.429047199643503, + "learning_rate": 2.0619115549613323e-06, + "loss": 0.6198, + "step": 26042 + }, + { + "epoch": 0.7981794777491725, + "grad_norm": 1.6937953163139958, + "learning_rate": 2.0613079054671305e-06, + "loss": 0.6758, + "step": 26043 + }, + { + "epoch": 0.7982101262719137, + "grad_norm": 1.7723723705097947, + "learning_rate": 2.0607043341948962e-06, + "loss": 0.5714, + "step": 26044 + }, + { + "epoch": 0.7982407747946549, + "grad_norm": 0.6714795989830113, + "learning_rate": 2.0601008411505707e-06, + "loss": 0.5455, + "step": 26045 + }, + { + "epoch": 0.7982714233173961, + "grad_norm": 1.717482664336068, + "learning_rate": 2.0594974263401025e-06, + "loss": 0.7049, + "step": 26046 + }, + { + "epoch": 0.7983020718401374, + "grad_norm": 0.6670851553300606, + "learning_rate": 2.058894089769441e-06, + "loss": 0.531, + "step": 26047 + }, + { + "epoch": 0.7983327203628785, + "grad_norm": 1.6719666637950208, + "learning_rate": 2.058290831444528e-06, + "loss": 0.6286, + "step": 26048 + }, + { + "epoch": 0.7983633688856198, + "grad_norm": 1.722761926320592, + "learning_rate": 2.057687651371302e-06, + "loss": 0.6895, + "step": 26049 + }, + { + "epoch": 0.7983940174083609, + "grad_norm": 1.8236346474661589, + "learning_rate": 2.0570845495557166e-06, + "loss": 0.6326, + "step": 26050 + }, + { + "epoch": 0.7984246659311022, + "grad_norm": 1.4430895465075892, + "learning_rate": 2.05648152600371e-06, + "loss": 0.6108, + "step": 26051 + }, + { + "epoch": 0.7984553144538433, + "grad_norm": 1.6541326885127945, + "learning_rate": 2.05587858072122e-06, + "loss": 0.668, + "step": 26052 + }, + { + "epoch": 0.7984859629765845, + "grad_norm": 1.7759280352409994, + "learning_rate": 2.055275713714191e-06, + "loss": 0.7487, + "step": 26053 + }, + { + "epoch": 0.7985166114993257, + "grad_norm": 1.7609378843035275, + "learning_rate": 2.0546729249885633e-06, + "loss": 0.6254, + "step": 26054 + }, + { + "epoch": 0.7985472600220669, + "grad_norm": 2.156746680063711, + "learning_rate": 2.054070214550279e-06, + "loss": 0.6498, + "step": 26055 + }, + { + "epoch": 0.7985779085448081, + "grad_norm": 1.8040485264603043, + "learning_rate": 2.053467582405272e-06, + "loss": 0.5661, + "step": 26056 + }, + { + "epoch": 0.7986085570675493, + "grad_norm": 1.9508107508712136, + "learning_rate": 2.052865028559481e-06, + "loss": 0.6947, + "step": 26057 + }, + { + "epoch": 0.7986392055902906, + "grad_norm": 1.7181515622412622, + "learning_rate": 2.052262553018848e-06, + "loss": 0.5787, + "step": 26058 + }, + { + "epoch": 0.7986698541130317, + "grad_norm": 1.5414583962204758, + "learning_rate": 2.0516601557893044e-06, + "loss": 0.6833, + "step": 26059 + }, + { + "epoch": 0.798700502635773, + "grad_norm": 1.6962955539644595, + "learning_rate": 2.0510578368767842e-06, + "loss": 0.5908, + "step": 26060 + }, + { + "epoch": 0.7987311511585141, + "grad_norm": 1.739785064013238, + "learning_rate": 2.0504555962872263e-06, + "loss": 0.6576, + "step": 26061 + }, + { + "epoch": 0.7987617996812554, + "grad_norm": 1.6220336214021542, + "learning_rate": 2.049853434026562e-06, + "loss": 0.6744, + "step": 26062 + }, + { + "epoch": 0.7987924482039965, + "grad_norm": 1.809357384184415, + "learning_rate": 2.0492513501007295e-06, + "loss": 0.6136, + "step": 26063 + }, + { + "epoch": 0.7988230967267378, + "grad_norm": 0.6541687137332041, + "learning_rate": 2.048649344515654e-06, + "loss": 0.5018, + "step": 26064 + }, + { + "epoch": 0.7988537452494789, + "grad_norm": 1.6841507035142038, + "learning_rate": 2.0480474172772725e-06, + "loss": 0.6465, + "step": 26065 + }, + { + "epoch": 0.7988843937722202, + "grad_norm": 1.7218154838175808, + "learning_rate": 2.047445568391516e-06, + "loss": 0.6279, + "step": 26066 + }, + { + "epoch": 0.7989150422949614, + "grad_norm": 1.9378442679583414, + "learning_rate": 2.046843797864313e-06, + "loss": 0.6196, + "step": 26067 + }, + { + "epoch": 0.7989456908177026, + "grad_norm": 0.6616769257264368, + "learning_rate": 2.046242105701588e-06, + "loss": 0.5211, + "step": 26068 + }, + { + "epoch": 0.7989763393404438, + "grad_norm": 1.478953877465209, + "learning_rate": 2.0456404919092797e-06, + "loss": 0.6191, + "step": 26069 + }, + { + "epoch": 0.799006987863185, + "grad_norm": 1.7086201645365007, + "learning_rate": 2.045038956493309e-06, + "loss": 0.6929, + "step": 26070 + }, + { + "epoch": 0.7990376363859262, + "grad_norm": 1.765543660628219, + "learning_rate": 2.0444374994596073e-06, + "loss": 0.6691, + "step": 26071 + }, + { + "epoch": 0.7990682849086674, + "grad_norm": 1.8397232884497108, + "learning_rate": 2.0438361208140943e-06, + "loss": 0.6856, + "step": 26072 + }, + { + "epoch": 0.7990989334314086, + "grad_norm": 1.6294827052713678, + "learning_rate": 2.043234820562701e-06, + "loss": 0.5332, + "step": 26073 + }, + { + "epoch": 0.7991295819541498, + "grad_norm": 1.8038592967163538, + "learning_rate": 2.0426335987113534e-06, + "loss": 0.7226, + "step": 26074 + }, + { + "epoch": 0.799160230476891, + "grad_norm": 1.7333550177261063, + "learning_rate": 2.04203245526597e-06, + "loss": 0.6303, + "step": 26075 + }, + { + "epoch": 0.7991908789996323, + "grad_norm": 1.870008691911234, + "learning_rate": 2.041431390232477e-06, + "loss": 0.6662, + "step": 26076 + }, + { + "epoch": 0.7992215275223734, + "grad_norm": 1.6319230048400017, + "learning_rate": 2.040830403616799e-06, + "loss": 0.6864, + "step": 26077 + }, + { + "epoch": 0.7992521760451147, + "grad_norm": 1.7953420924441408, + "learning_rate": 2.040229495424857e-06, + "loss": 0.6156, + "step": 26078 + }, + { + "epoch": 0.7992828245678558, + "grad_norm": 1.6475053075275101, + "learning_rate": 2.039628665662563e-06, + "loss": 0.5757, + "step": 26079 + }, + { + "epoch": 0.7993134730905971, + "grad_norm": 0.670758668086698, + "learning_rate": 2.0390279143358517e-06, + "loss": 0.5158, + "step": 26080 + }, + { + "epoch": 0.7993441216133382, + "grad_norm": 1.5590115803165296, + "learning_rate": 2.038427241450631e-06, + "loss": 0.6723, + "step": 26081 + }, + { + "epoch": 0.7993747701360795, + "grad_norm": 1.7729171422629157, + "learning_rate": 2.037826647012827e-06, + "loss": 0.5976, + "step": 26082 + }, + { + "epoch": 0.7994054186588206, + "grad_norm": 1.823323398465121, + "learning_rate": 2.0372261310283525e-06, + "loss": 0.6414, + "step": 26083 + }, + { + "epoch": 0.7994360671815618, + "grad_norm": 1.7966712458115264, + "learning_rate": 2.036625693503125e-06, + "loss": 0.6773, + "step": 26084 + }, + { + "epoch": 0.799466715704303, + "grad_norm": 1.7038908406569726, + "learning_rate": 2.036025334443066e-06, + "loss": 0.6633, + "step": 26085 + }, + { + "epoch": 0.7994973642270442, + "grad_norm": 1.8634831565517245, + "learning_rate": 2.035425053854083e-06, + "loss": 0.5974, + "step": 26086 + }, + { + "epoch": 0.7995280127497855, + "grad_norm": 1.640941696011056, + "learning_rate": 2.0348248517420953e-06, + "loss": 0.6249, + "step": 26087 + }, + { + "epoch": 0.7995586612725266, + "grad_norm": 1.4886807382559162, + "learning_rate": 2.034224728113019e-06, + "loss": 0.6187, + "step": 26088 + }, + { + "epoch": 0.7995893097952679, + "grad_norm": 1.661959577827896, + "learning_rate": 2.0336246829727626e-06, + "loss": 0.692, + "step": 26089 + }, + { + "epoch": 0.799619958318009, + "grad_norm": 1.5599663688281846, + "learning_rate": 2.03302471632724e-06, + "loss": 0.6721, + "step": 26090 + }, + { + "epoch": 0.7996506068407503, + "grad_norm": 1.7252448437112493, + "learning_rate": 2.0324248281823654e-06, + "loss": 0.661, + "step": 26091 + }, + { + "epoch": 0.7996812553634914, + "grad_norm": 0.6979646641124494, + "learning_rate": 2.031825018544046e-06, + "loss": 0.5241, + "step": 26092 + }, + { + "epoch": 0.7997119038862327, + "grad_norm": 1.5878207008672307, + "learning_rate": 2.0312252874181946e-06, + "loss": 0.7536, + "step": 26093 + }, + { + "epoch": 0.7997425524089739, + "grad_norm": 1.7958630448884623, + "learning_rate": 2.030625634810718e-06, + "loss": 0.6361, + "step": 26094 + }, + { + "epoch": 0.7997732009317151, + "grad_norm": 1.6567053694201535, + "learning_rate": 2.0300260607275256e-06, + "loss": 0.6476, + "step": 26095 + }, + { + "epoch": 0.7998038494544563, + "grad_norm": 1.6543754349761641, + "learning_rate": 2.0294265651745283e-06, + "loss": 0.6812, + "step": 26096 + }, + { + "epoch": 0.7998344979771975, + "grad_norm": 1.7286121713204359, + "learning_rate": 2.0288271481576284e-06, + "loss": 0.6391, + "step": 26097 + }, + { + "epoch": 0.7998651464999387, + "grad_norm": 1.6881745025377082, + "learning_rate": 2.028227809682732e-06, + "loss": 0.6369, + "step": 26098 + }, + { + "epoch": 0.7998957950226799, + "grad_norm": 1.852396942243154, + "learning_rate": 2.027628549755751e-06, + "loss": 0.7319, + "step": 26099 + }, + { + "epoch": 0.7999264435454211, + "grad_norm": 1.8889846472460432, + "learning_rate": 2.0270293683825837e-06, + "loss": 0.624, + "step": 26100 + }, + { + "epoch": 0.7999570920681623, + "grad_norm": 1.739426481468107, + "learning_rate": 2.0264302655691348e-06, + "loss": 0.6426, + "step": 26101 + }, + { + "epoch": 0.7999877405909035, + "grad_norm": 1.7894816788867556, + "learning_rate": 2.025831241321312e-06, + "loss": 0.5731, + "step": 26102 + }, + { + "epoch": 0.8000183891136448, + "grad_norm": 1.5403710325879063, + "learning_rate": 2.025232295645011e-06, + "loss": 0.5327, + "step": 26103 + }, + { + "epoch": 0.8000490376363859, + "grad_norm": 0.691094680166247, + "learning_rate": 2.02463342854614e-06, + "loss": 0.526, + "step": 26104 + }, + { + "epoch": 0.8000796861591272, + "grad_norm": 1.947929520408361, + "learning_rate": 2.0240346400305935e-06, + "loss": 0.6404, + "step": 26105 + }, + { + "epoch": 0.8001103346818683, + "grad_norm": 1.82049523200783, + "learning_rate": 2.023435930104274e-06, + "loss": 0.6673, + "step": 26106 + }, + { + "epoch": 0.8001409832046096, + "grad_norm": 1.5607497453592358, + "learning_rate": 2.022837298773084e-06, + "loss": 0.6324, + "step": 26107 + }, + { + "epoch": 0.8001716317273507, + "grad_norm": 0.6826518618148095, + "learning_rate": 2.0222387460429162e-06, + "loss": 0.5428, + "step": 26108 + }, + { + "epoch": 0.800202280250092, + "grad_norm": 1.5406224115948834, + "learning_rate": 2.0216402719196714e-06, + "loss": 0.5582, + "step": 26109 + }, + { + "epoch": 0.8002329287728331, + "grad_norm": 1.9587758591954139, + "learning_rate": 2.0210418764092487e-06, + "loss": 0.6398, + "step": 26110 + }, + { + "epoch": 0.8002635772955744, + "grad_norm": 1.7036727675800518, + "learning_rate": 2.02044355951754e-06, + "loss": 0.6018, + "step": 26111 + }, + { + "epoch": 0.8002942258183156, + "grad_norm": 1.4606399257406204, + "learning_rate": 2.0198453212504453e-06, + "loss": 0.5381, + "step": 26112 + }, + { + "epoch": 0.8003248743410568, + "grad_norm": 1.6157855594041586, + "learning_rate": 2.019247161613853e-06, + "loss": 0.6584, + "step": 26113 + }, + { + "epoch": 0.800355522863798, + "grad_norm": 1.595796672691742, + "learning_rate": 2.0186490806136616e-06, + "loss": 0.6705, + "step": 26114 + }, + { + "epoch": 0.8003861713865391, + "grad_norm": 1.5321153530970304, + "learning_rate": 2.0180510782557637e-06, + "loss": 0.6333, + "step": 26115 + }, + { + "epoch": 0.8004168199092804, + "grad_norm": 1.573449335831091, + "learning_rate": 2.01745315454605e-06, + "loss": 0.5479, + "step": 26116 + }, + { + "epoch": 0.8004474684320215, + "grad_norm": 1.6144423712719815, + "learning_rate": 2.016855309490412e-06, + "loss": 0.6249, + "step": 26117 + }, + { + "epoch": 0.8004781169547628, + "grad_norm": 1.705031740817334, + "learning_rate": 2.016257543094744e-06, + "loss": 0.6706, + "step": 26118 + }, + { + "epoch": 0.8005087654775039, + "grad_norm": 1.9243579929832597, + "learning_rate": 2.01565985536493e-06, + "loss": 0.6707, + "step": 26119 + }, + { + "epoch": 0.8005394140002452, + "grad_norm": 1.7083193890727775, + "learning_rate": 2.0150622463068627e-06, + "loss": 0.57, + "step": 26120 + }, + { + "epoch": 0.8005700625229863, + "grad_norm": 1.6321116580398891, + "learning_rate": 2.014464715926433e-06, + "loss": 0.7087, + "step": 26121 + }, + { + "epoch": 0.8006007110457276, + "grad_norm": 1.670659138108408, + "learning_rate": 2.0138672642295232e-06, + "loss": 0.6653, + "step": 26122 + }, + { + "epoch": 0.8006313595684688, + "grad_norm": 1.675266823974336, + "learning_rate": 2.013269891222024e-06, + "loss": 0.6594, + "step": 26123 + }, + { + "epoch": 0.80066200809121, + "grad_norm": 1.7177144027540943, + "learning_rate": 2.012672596909816e-06, + "loss": 0.648, + "step": 26124 + }, + { + "epoch": 0.8006926566139512, + "grad_norm": 0.6628697592076849, + "learning_rate": 2.0120753812987935e-06, + "loss": 0.4996, + "step": 26125 + }, + { + "epoch": 0.8007233051366924, + "grad_norm": 1.6541941880154525, + "learning_rate": 2.0114782443948355e-06, + "loss": 0.5983, + "step": 26126 + }, + { + "epoch": 0.8007539536594336, + "grad_norm": 1.524892989735101, + "learning_rate": 2.0108811862038247e-06, + "loss": 0.5632, + "step": 26127 + }, + { + "epoch": 0.8007846021821748, + "grad_norm": 0.6674446145192602, + "learning_rate": 2.010284206731645e-06, + "loss": 0.4993, + "step": 26128 + }, + { + "epoch": 0.800815250704916, + "grad_norm": 1.8749432044585728, + "learning_rate": 2.0096873059841816e-06, + "loss": 0.6697, + "step": 26129 + }, + { + "epoch": 0.8008458992276573, + "grad_norm": 1.480455057717806, + "learning_rate": 2.009090483967312e-06, + "loss": 0.709, + "step": 26130 + }, + { + "epoch": 0.8008765477503984, + "grad_norm": 1.7173657058633878, + "learning_rate": 2.0084937406869175e-06, + "loss": 0.6489, + "step": 26131 + }, + { + "epoch": 0.8009071962731397, + "grad_norm": 1.6602934244742653, + "learning_rate": 2.0078970761488816e-06, + "loss": 0.6938, + "step": 26132 + }, + { + "epoch": 0.8009378447958808, + "grad_norm": 1.7481285500615014, + "learning_rate": 2.0073004903590786e-06, + "loss": 0.6275, + "step": 26133 + }, + { + "epoch": 0.8009684933186221, + "grad_norm": 1.6783293246044237, + "learning_rate": 2.0067039833233916e-06, + "loss": 0.671, + "step": 26134 + }, + { + "epoch": 0.8009991418413632, + "grad_norm": 1.6153786243810169, + "learning_rate": 2.0061075550476894e-06, + "loss": 0.6643, + "step": 26135 + }, + { + "epoch": 0.8010297903641045, + "grad_norm": 1.5503646909708835, + "learning_rate": 2.00551120553786e-06, + "loss": 0.6429, + "step": 26136 + }, + { + "epoch": 0.8010604388868456, + "grad_norm": 1.5515709828611233, + "learning_rate": 2.0049149347997743e-06, + "loss": 0.631, + "step": 26137 + }, + { + "epoch": 0.8010910874095869, + "grad_norm": 1.6909740756793374, + "learning_rate": 2.004318742839305e-06, + "loss": 0.6872, + "step": 26138 + }, + { + "epoch": 0.801121735932328, + "grad_norm": 1.542696856407757, + "learning_rate": 2.003722629662329e-06, + "loss": 0.5769, + "step": 26139 + }, + { + "epoch": 0.8011523844550693, + "grad_norm": 1.7635565202535142, + "learning_rate": 2.0031265952747224e-06, + "loss": 0.715, + "step": 26140 + }, + { + "epoch": 0.8011830329778105, + "grad_norm": 0.6869434884153637, + "learning_rate": 2.002530639682353e-06, + "loss": 0.5066, + "step": 26141 + }, + { + "epoch": 0.8012136815005517, + "grad_norm": 1.7135433887787475, + "learning_rate": 2.0019347628910955e-06, + "loss": 0.585, + "step": 26142 + }, + { + "epoch": 0.8012443300232929, + "grad_norm": 1.849281285487007, + "learning_rate": 2.0013389649068217e-06, + "loss": 0.6777, + "step": 26143 + }, + { + "epoch": 0.8012749785460341, + "grad_norm": 0.6696097435685971, + "learning_rate": 2.0007432457354036e-06, + "loss": 0.5246, + "step": 26144 + }, + { + "epoch": 0.8013056270687753, + "grad_norm": 1.5903017528010008, + "learning_rate": 2.0001476053827085e-06, + "loss": 0.6745, + "step": 26145 + }, + { + "epoch": 0.8013362755915164, + "grad_norm": 1.6364672293616567, + "learning_rate": 1.9995520438546013e-06, + "loss": 0.6345, + "step": 26146 + }, + { + "epoch": 0.8013669241142577, + "grad_norm": 1.6863787824361895, + "learning_rate": 1.9989565611569596e-06, + "loss": 0.6124, + "step": 26147 + }, + { + "epoch": 0.8013975726369988, + "grad_norm": 0.7072477342144851, + "learning_rate": 1.998361157295646e-06, + "loss": 0.5552, + "step": 26148 + }, + { + "epoch": 0.8014282211597401, + "grad_norm": 1.5851682993191902, + "learning_rate": 1.997765832276526e-06, + "loss": 0.6841, + "step": 26149 + }, + { + "epoch": 0.8014588696824813, + "grad_norm": 1.6070394355893036, + "learning_rate": 1.9971705861054657e-06, + "loss": 0.6814, + "step": 26150 + }, + { + "epoch": 0.8014895182052225, + "grad_norm": 1.5549116448708338, + "learning_rate": 1.996575418788331e-06, + "loss": 0.6242, + "step": 26151 + }, + { + "epoch": 0.8015201667279637, + "grad_norm": 0.6671344248469886, + "learning_rate": 1.9959803303309888e-06, + "loss": 0.5176, + "step": 26152 + }, + { + "epoch": 0.8015508152507049, + "grad_norm": 1.7116842013242248, + "learning_rate": 1.995385320739298e-06, + "loss": 0.6956, + "step": 26153 + }, + { + "epoch": 0.8015814637734461, + "grad_norm": 1.6443950599140236, + "learning_rate": 1.9947903900191248e-06, + "loss": 0.6745, + "step": 26154 + }, + { + "epoch": 0.8016121122961873, + "grad_norm": 0.6791114053481491, + "learning_rate": 1.994195538176331e-06, + "loss": 0.5268, + "step": 26155 + }, + { + "epoch": 0.8016427608189285, + "grad_norm": 1.923418550150469, + "learning_rate": 1.9936007652167777e-06, + "loss": 0.5093, + "step": 26156 + }, + { + "epoch": 0.8016734093416698, + "grad_norm": 1.5241098626515743, + "learning_rate": 1.9930060711463227e-06, + "loss": 0.619, + "step": 26157 + }, + { + "epoch": 0.8017040578644109, + "grad_norm": 1.6853748013837788, + "learning_rate": 1.9924114559708263e-06, + "loss": 0.6615, + "step": 26158 + }, + { + "epoch": 0.8017347063871522, + "grad_norm": 1.9218774541415167, + "learning_rate": 1.9918169196961524e-06, + "loss": 0.7246, + "step": 26159 + }, + { + "epoch": 0.8017653549098933, + "grad_norm": 1.5887846568640822, + "learning_rate": 1.9912224623281516e-06, + "loss": 0.6361, + "step": 26160 + }, + { + "epoch": 0.8017960034326346, + "grad_norm": 1.68443878238556, + "learning_rate": 1.9906280838726866e-06, + "loss": 0.6798, + "step": 26161 + }, + { + "epoch": 0.8018266519553757, + "grad_norm": 1.5392718251731108, + "learning_rate": 1.990033784335611e-06, + "loss": 0.618, + "step": 26162 + }, + { + "epoch": 0.801857300478117, + "grad_norm": 0.6396184009180079, + "learning_rate": 1.9894395637227847e-06, + "loss": 0.5353, + "step": 26163 + }, + { + "epoch": 0.8018879490008581, + "grad_norm": 1.540384453145219, + "learning_rate": 1.988845422040061e-06, + "loss": 0.5777, + "step": 26164 + }, + { + "epoch": 0.8019185975235994, + "grad_norm": 1.671129009231801, + "learning_rate": 1.9882513592932864e-06, + "loss": 0.6437, + "step": 26165 + }, + { + "epoch": 0.8019492460463405, + "grad_norm": 1.6847243267986938, + "learning_rate": 1.9876573754883277e-06, + "loss": 0.6561, + "step": 26166 + }, + { + "epoch": 0.8019798945690818, + "grad_norm": 1.511123803115238, + "learning_rate": 1.98706347063103e-06, + "loss": 0.5758, + "step": 26167 + }, + { + "epoch": 0.802010543091823, + "grad_norm": 0.6623847779831078, + "learning_rate": 1.9864696447272434e-06, + "loss": 0.5433, + "step": 26168 + }, + { + "epoch": 0.8020411916145642, + "grad_norm": 0.6633918187825704, + "learning_rate": 1.985875897782822e-06, + "loss": 0.5208, + "step": 26169 + }, + { + "epoch": 0.8020718401373054, + "grad_norm": 1.8379213116114799, + "learning_rate": 1.985282229803616e-06, + "loss": 0.7386, + "step": 26170 + }, + { + "epoch": 0.8021024886600466, + "grad_norm": 1.6725581902831133, + "learning_rate": 1.984688640795478e-06, + "loss": 0.6404, + "step": 26171 + }, + { + "epoch": 0.8021331371827878, + "grad_norm": 1.756775721585567, + "learning_rate": 1.9840951307642496e-06, + "loss": 0.6251, + "step": 26172 + }, + { + "epoch": 0.802163785705529, + "grad_norm": 1.7542693956176305, + "learning_rate": 1.983501699715784e-06, + "loss": 0.6731, + "step": 26173 + }, + { + "epoch": 0.8021944342282702, + "grad_norm": 1.5905142174777727, + "learning_rate": 1.9829083476559296e-06, + "loss": 0.5616, + "step": 26174 + }, + { + "epoch": 0.8022250827510115, + "grad_norm": 1.6773851471515098, + "learning_rate": 1.9823150745905305e-06, + "loss": 0.7094, + "step": 26175 + }, + { + "epoch": 0.8022557312737526, + "grad_norm": 1.6658731058401388, + "learning_rate": 1.981721880525427e-06, + "loss": 0.6445, + "step": 26176 + }, + { + "epoch": 0.8022863797964938, + "grad_norm": 1.7895455175706056, + "learning_rate": 1.9811287654664746e-06, + "loss": 0.606, + "step": 26177 + }, + { + "epoch": 0.802317028319235, + "grad_norm": 1.6296881541423087, + "learning_rate": 1.9805357294195094e-06, + "loss": 0.7552, + "step": 26178 + }, + { + "epoch": 0.8023476768419762, + "grad_norm": 1.6485754121313267, + "learning_rate": 1.979942772390381e-06, + "loss": 0.6677, + "step": 26179 + }, + { + "epoch": 0.8023783253647174, + "grad_norm": 1.5451658182213042, + "learning_rate": 1.9793498943849254e-06, + "loss": 0.601, + "step": 26180 + }, + { + "epoch": 0.8024089738874586, + "grad_norm": 1.6588831103231672, + "learning_rate": 1.9787570954089872e-06, + "loss": 0.6067, + "step": 26181 + }, + { + "epoch": 0.8024396224101998, + "grad_norm": 1.7390425310691497, + "learning_rate": 1.97816437546841e-06, + "loss": 0.6668, + "step": 26182 + }, + { + "epoch": 0.802470270932941, + "grad_norm": 1.7333803095017164, + "learning_rate": 1.977571734569029e-06, + "loss": 0.6079, + "step": 26183 + }, + { + "epoch": 0.8025009194556822, + "grad_norm": 1.626965120173678, + "learning_rate": 1.9769791727166874e-06, + "loss": 0.4888, + "step": 26184 + }, + { + "epoch": 0.8025315679784234, + "grad_norm": 1.8428419105001483, + "learning_rate": 1.9763866899172246e-06, + "loss": 0.6522, + "step": 26185 + }, + { + "epoch": 0.8025622165011647, + "grad_norm": 1.6221579193730895, + "learning_rate": 1.9757942861764776e-06, + "loss": 0.5823, + "step": 26186 + }, + { + "epoch": 0.8025928650239058, + "grad_norm": 1.637082207121886, + "learning_rate": 1.975201961500276e-06, + "loss": 0.6928, + "step": 26187 + }, + { + "epoch": 0.8026235135466471, + "grad_norm": 1.5380215178024652, + "learning_rate": 1.974609715894469e-06, + "loss": 0.6148, + "step": 26188 + }, + { + "epoch": 0.8026541620693882, + "grad_norm": 1.4692027699212424, + "learning_rate": 1.974017549364883e-06, + "loss": 0.5685, + "step": 26189 + }, + { + "epoch": 0.8026848105921295, + "grad_norm": 1.7507902418375836, + "learning_rate": 1.973425461917358e-06, + "loss": 0.6753, + "step": 26190 + }, + { + "epoch": 0.8027154591148706, + "grad_norm": 1.659070516533829, + "learning_rate": 1.972833453557723e-06, + "loss": 0.6029, + "step": 26191 + }, + { + "epoch": 0.8027461076376119, + "grad_norm": 1.675570945607688, + "learning_rate": 1.9722415242918137e-06, + "loss": 0.6251, + "step": 26192 + }, + { + "epoch": 0.802776756160353, + "grad_norm": 1.6376693569377228, + "learning_rate": 1.9716496741254654e-06, + "loss": 0.6854, + "step": 26193 + }, + { + "epoch": 0.8028074046830943, + "grad_norm": 1.6595387495507576, + "learning_rate": 1.971057903064505e-06, + "loss": 0.6797, + "step": 26194 + }, + { + "epoch": 0.8028380532058355, + "grad_norm": 1.678583011741427, + "learning_rate": 1.9704662111147644e-06, + "loss": 0.6524, + "step": 26195 + }, + { + "epoch": 0.8028687017285767, + "grad_norm": 1.6772368565398235, + "learning_rate": 1.9698745982820776e-06, + "loss": 0.6437, + "step": 26196 + }, + { + "epoch": 0.8028993502513179, + "grad_norm": 1.5888732797405638, + "learning_rate": 1.969283064572268e-06, + "loss": 0.6019, + "step": 26197 + }, + { + "epoch": 0.8029299987740591, + "grad_norm": 1.7941881918465987, + "learning_rate": 1.9686916099911677e-06, + "loss": 0.7637, + "step": 26198 + }, + { + "epoch": 0.8029606472968003, + "grad_norm": 2.0628976630901907, + "learning_rate": 1.9681002345446067e-06, + "loss": 0.6827, + "step": 26199 + }, + { + "epoch": 0.8029912958195415, + "grad_norm": 1.6122908925496453, + "learning_rate": 1.967508938238406e-06, + "loss": 0.665, + "step": 26200 + }, + { + "epoch": 0.8030219443422827, + "grad_norm": 1.6294354091127796, + "learning_rate": 1.9669177210783975e-06, + "loss": 0.6309, + "step": 26201 + }, + { + "epoch": 0.803052592865024, + "grad_norm": 1.690532091020504, + "learning_rate": 1.9663265830704025e-06, + "loss": 0.6204, + "step": 26202 + }, + { + "epoch": 0.8030832413877651, + "grad_norm": 1.8377637065900874, + "learning_rate": 1.9657355242202457e-06, + "loss": 0.6645, + "step": 26203 + }, + { + "epoch": 0.8031138899105064, + "grad_norm": 1.701708068715522, + "learning_rate": 1.965144544533756e-06, + "loss": 0.6602, + "step": 26204 + }, + { + "epoch": 0.8031445384332475, + "grad_norm": 1.7777941907698405, + "learning_rate": 1.9645536440167503e-06, + "loss": 0.6757, + "step": 26205 + }, + { + "epoch": 0.8031751869559888, + "grad_norm": 1.6872815521025906, + "learning_rate": 1.963962822675053e-06, + "loss": 0.5887, + "step": 26206 + }, + { + "epoch": 0.8032058354787299, + "grad_norm": 1.7181587672305587, + "learning_rate": 1.9633720805144883e-06, + "loss": 0.6247, + "step": 26207 + }, + { + "epoch": 0.8032364840014711, + "grad_norm": 1.5907402674414894, + "learning_rate": 1.9627814175408732e-06, + "loss": 0.5976, + "step": 26208 + }, + { + "epoch": 0.8032671325242123, + "grad_norm": 1.5861304783545984, + "learning_rate": 1.9621908337600314e-06, + "loss": 0.5558, + "step": 26209 + }, + { + "epoch": 0.8032977810469535, + "grad_norm": 1.5899517777532608, + "learning_rate": 1.9616003291777776e-06, + "loss": 0.7115, + "step": 26210 + }, + { + "epoch": 0.8033284295696947, + "grad_norm": 1.582953475418643, + "learning_rate": 1.961009903799932e-06, + "loss": 0.5496, + "step": 26211 + }, + { + "epoch": 0.8033590780924359, + "grad_norm": 1.7335396566882588, + "learning_rate": 1.9604195576323148e-06, + "loss": 0.7281, + "step": 26212 + }, + { + "epoch": 0.8033897266151772, + "grad_norm": 1.60999231220567, + "learning_rate": 1.9598292906807392e-06, + "loss": 0.5248, + "step": 26213 + }, + { + "epoch": 0.8034203751379183, + "grad_norm": 1.5257735892540836, + "learning_rate": 1.9592391029510215e-06, + "loss": 0.648, + "step": 26214 + }, + { + "epoch": 0.8034510236606596, + "grad_norm": 1.5384477571727109, + "learning_rate": 1.958648994448982e-06, + "loss": 0.5968, + "step": 26215 + }, + { + "epoch": 0.8034816721834007, + "grad_norm": 1.455067491006254, + "learning_rate": 1.9580589651804282e-06, + "loss": 0.6332, + "step": 26216 + }, + { + "epoch": 0.803512320706142, + "grad_norm": 1.7853862012977273, + "learning_rate": 1.957469015151178e-06, + "loss": 0.6688, + "step": 26217 + }, + { + "epoch": 0.8035429692288831, + "grad_norm": 1.6942275489282907, + "learning_rate": 1.9568791443670444e-06, + "loss": 0.6818, + "step": 26218 + }, + { + "epoch": 0.8035736177516244, + "grad_norm": 1.5604846932449707, + "learning_rate": 1.9562893528338367e-06, + "loss": 0.681, + "step": 26219 + }, + { + "epoch": 0.8036042662743655, + "grad_norm": 1.7014583124766263, + "learning_rate": 1.9556996405573715e-06, + "loss": 0.6464, + "step": 26220 + }, + { + "epoch": 0.8036349147971068, + "grad_norm": 0.6814449954882663, + "learning_rate": 1.9551100075434526e-06, + "loss": 0.5116, + "step": 26221 + }, + { + "epoch": 0.803665563319848, + "grad_norm": 1.8447464112160028, + "learning_rate": 1.9545204537978924e-06, + "loss": 0.6494, + "step": 26222 + }, + { + "epoch": 0.8036962118425892, + "grad_norm": 1.3474747680529588, + "learning_rate": 1.953930979326505e-06, + "loss": 0.4987, + "step": 26223 + }, + { + "epoch": 0.8037268603653304, + "grad_norm": 1.566773933997325, + "learning_rate": 1.95334158413509e-06, + "loss": 0.6463, + "step": 26224 + }, + { + "epoch": 0.8037575088880716, + "grad_norm": 1.7999478232536394, + "learning_rate": 1.9527522682294598e-06, + "loss": 0.6506, + "step": 26225 + }, + { + "epoch": 0.8037881574108128, + "grad_norm": 1.4591370333629825, + "learning_rate": 1.952163031615424e-06, + "loss": 0.6804, + "step": 26226 + }, + { + "epoch": 0.803818805933554, + "grad_norm": 1.7692557966172076, + "learning_rate": 1.951573874298781e-06, + "loss": 0.6598, + "step": 26227 + }, + { + "epoch": 0.8038494544562952, + "grad_norm": 1.702776499182661, + "learning_rate": 1.950984796285341e-06, + "loss": 0.6064, + "step": 26228 + }, + { + "epoch": 0.8038801029790364, + "grad_norm": 1.5756060088326427, + "learning_rate": 1.9503957975809095e-06, + "loss": 0.5638, + "step": 26229 + }, + { + "epoch": 0.8039107515017776, + "grad_norm": 1.641700811748304, + "learning_rate": 1.9498068781912847e-06, + "loss": 0.6164, + "step": 26230 + }, + { + "epoch": 0.8039414000245189, + "grad_norm": 1.7378100377358372, + "learning_rate": 1.949218038122276e-06, + "loss": 0.6961, + "step": 26231 + }, + { + "epoch": 0.80397204854726, + "grad_norm": 1.8064591187759982, + "learning_rate": 1.948629277379678e-06, + "loss": 0.6543, + "step": 26232 + }, + { + "epoch": 0.8040026970700013, + "grad_norm": 1.8505523295460087, + "learning_rate": 1.948040595969296e-06, + "loss": 0.6818, + "step": 26233 + }, + { + "epoch": 0.8040333455927424, + "grad_norm": 1.6136663401409541, + "learning_rate": 1.947451993896934e-06, + "loss": 0.727, + "step": 26234 + }, + { + "epoch": 0.8040639941154837, + "grad_norm": 1.8411986986550823, + "learning_rate": 1.9468634711683843e-06, + "loss": 0.7378, + "step": 26235 + }, + { + "epoch": 0.8040946426382248, + "grad_norm": 1.877150758152449, + "learning_rate": 1.946275027789449e-06, + "loss": 0.7023, + "step": 26236 + }, + { + "epoch": 0.8041252911609661, + "grad_norm": 1.5760496455724238, + "learning_rate": 1.94568666376593e-06, + "loss": 0.5746, + "step": 26237 + }, + { + "epoch": 0.8041559396837072, + "grad_norm": 1.671606115159493, + "learning_rate": 1.9450983791036184e-06, + "loss": 0.6406, + "step": 26238 + }, + { + "epoch": 0.8041865882064484, + "grad_norm": 1.639527917296251, + "learning_rate": 1.9445101738083127e-06, + "loss": 0.6322, + "step": 26239 + }, + { + "epoch": 0.8042172367291897, + "grad_norm": 1.752003737586929, + "learning_rate": 1.9439220478858124e-06, + "loss": 0.7693, + "step": 26240 + }, + { + "epoch": 0.8042478852519308, + "grad_norm": 1.8286810877328652, + "learning_rate": 1.9433340013419066e-06, + "loss": 0.6077, + "step": 26241 + }, + { + "epoch": 0.8042785337746721, + "grad_norm": 1.5544065790743309, + "learning_rate": 1.9427460341823945e-06, + "loss": 0.5547, + "step": 26242 + }, + { + "epoch": 0.8043091822974132, + "grad_norm": 1.6981107469934864, + "learning_rate": 1.942158146413062e-06, + "loss": 0.6062, + "step": 26243 + }, + { + "epoch": 0.8043398308201545, + "grad_norm": 1.6001679710719177, + "learning_rate": 1.941570338039713e-06, + "loss": 0.6182, + "step": 26244 + }, + { + "epoch": 0.8043704793428956, + "grad_norm": 1.5620631809305874, + "learning_rate": 1.940982609068133e-06, + "loss": 0.5532, + "step": 26245 + }, + { + "epoch": 0.8044011278656369, + "grad_norm": 0.7050402071246366, + "learning_rate": 1.9403949595041105e-06, + "loss": 0.5338, + "step": 26246 + }, + { + "epoch": 0.804431776388378, + "grad_norm": 1.819878167189048, + "learning_rate": 1.93980738935344e-06, + "loss": 0.6431, + "step": 26247 + }, + { + "epoch": 0.8044624249111193, + "grad_norm": 0.7140693175123849, + "learning_rate": 1.93921989862191e-06, + "loss": 0.5426, + "step": 26248 + }, + { + "epoch": 0.8044930734338605, + "grad_norm": 1.985794268814434, + "learning_rate": 1.9386324873153073e-06, + "loss": 0.7221, + "step": 26249 + }, + { + "epoch": 0.8045237219566017, + "grad_norm": 1.7042664695419065, + "learning_rate": 1.9380451554394207e-06, + "loss": 0.7027, + "step": 26250 + }, + { + "epoch": 0.8045543704793429, + "grad_norm": 1.6852721274462432, + "learning_rate": 1.9374579030000385e-06, + "loss": 0.6927, + "step": 26251 + }, + { + "epoch": 0.8045850190020841, + "grad_norm": 1.6244593873627484, + "learning_rate": 1.9368707300029497e-06, + "loss": 0.6476, + "step": 26252 + }, + { + "epoch": 0.8046156675248253, + "grad_norm": 1.7496440424028379, + "learning_rate": 1.9362836364539363e-06, + "loss": 0.6259, + "step": 26253 + }, + { + "epoch": 0.8046463160475665, + "grad_norm": 1.5639807050567487, + "learning_rate": 1.935696622358779e-06, + "loss": 0.5639, + "step": 26254 + }, + { + "epoch": 0.8046769645703077, + "grad_norm": 1.663150083347978, + "learning_rate": 1.935109687723268e-06, + "loss": 0.6335, + "step": 26255 + }, + { + "epoch": 0.804707613093049, + "grad_norm": 1.5732212874875224, + "learning_rate": 1.934522832553187e-06, + "loss": 0.6107, + "step": 26256 + }, + { + "epoch": 0.8047382616157901, + "grad_norm": 1.6514197963662773, + "learning_rate": 1.933936056854314e-06, + "loss": 0.6508, + "step": 26257 + }, + { + "epoch": 0.8047689101385314, + "grad_norm": 1.669489020742062, + "learning_rate": 1.9333493606324326e-06, + "loss": 0.6308, + "step": 26258 + }, + { + "epoch": 0.8047995586612725, + "grad_norm": 0.6886222860129451, + "learning_rate": 1.9327627438933263e-06, + "loss": 0.5491, + "step": 26259 + }, + { + "epoch": 0.8048302071840138, + "grad_norm": 0.6582346026058346, + "learning_rate": 1.9321762066427695e-06, + "loss": 0.5197, + "step": 26260 + }, + { + "epoch": 0.8048608557067549, + "grad_norm": 1.5161194352709906, + "learning_rate": 1.9315897488865487e-06, + "loss": 0.5599, + "step": 26261 + }, + { + "epoch": 0.8048915042294962, + "grad_norm": 1.6112133218742897, + "learning_rate": 1.931003370630432e-06, + "loss": 0.595, + "step": 26262 + }, + { + "epoch": 0.8049221527522373, + "grad_norm": 1.5709831891163204, + "learning_rate": 1.9304170718802095e-06, + "loss": 0.5557, + "step": 26263 + }, + { + "epoch": 0.8049528012749786, + "grad_norm": 0.6668719921516629, + "learning_rate": 1.929830852641652e-06, + "loss": 0.5225, + "step": 26264 + }, + { + "epoch": 0.8049834497977197, + "grad_norm": 1.7162291612820864, + "learning_rate": 1.929244712920534e-06, + "loss": 0.6425, + "step": 26265 + }, + { + "epoch": 0.805014098320461, + "grad_norm": 0.6792033898310654, + "learning_rate": 1.9286586527226324e-06, + "loss": 0.5012, + "step": 26266 + }, + { + "epoch": 0.8050447468432022, + "grad_norm": 2.012618087833143, + "learning_rate": 1.9280726720537245e-06, + "loss": 0.7015, + "step": 26267 + }, + { + "epoch": 0.8050753953659434, + "grad_norm": 1.7805724711186965, + "learning_rate": 1.92748677091958e-06, + "loss": 0.6208, + "step": 26268 + }, + { + "epoch": 0.8051060438886846, + "grad_norm": 1.6651963770536482, + "learning_rate": 1.9269009493259727e-06, + "loss": 0.6383, + "step": 26269 + }, + { + "epoch": 0.8051366924114257, + "grad_norm": 1.6142375717197652, + "learning_rate": 1.926315207278677e-06, + "loss": 0.5016, + "step": 26270 + }, + { + "epoch": 0.805167340934167, + "grad_norm": 1.611573220398728, + "learning_rate": 1.9257295447834657e-06, + "loss": 0.5645, + "step": 26271 + }, + { + "epoch": 0.8051979894569081, + "grad_norm": 1.538531410485471, + "learning_rate": 1.9251439618461064e-06, + "loss": 0.6047, + "step": 26272 + }, + { + "epoch": 0.8052286379796494, + "grad_norm": 1.6854345516082045, + "learning_rate": 1.9245584584723653e-06, + "loss": 0.5644, + "step": 26273 + }, + { + "epoch": 0.8052592865023905, + "grad_norm": 1.678358320652359, + "learning_rate": 1.923973034668021e-06, + "loss": 0.6972, + "step": 26274 + }, + { + "epoch": 0.8052899350251318, + "grad_norm": 1.4891310219208256, + "learning_rate": 1.923387690438836e-06, + "loss": 0.6918, + "step": 26275 + }, + { + "epoch": 0.805320583547873, + "grad_norm": 1.3679515357297325, + "learning_rate": 1.9228024257905776e-06, + "loss": 0.5278, + "step": 26276 + }, + { + "epoch": 0.8053512320706142, + "grad_norm": 1.5282881484247854, + "learning_rate": 1.922217240729012e-06, + "loss": 0.6541, + "step": 26277 + }, + { + "epoch": 0.8053818805933554, + "grad_norm": 1.611867254685024, + "learning_rate": 1.9216321352599067e-06, + "loss": 0.5921, + "step": 26278 + }, + { + "epoch": 0.8054125291160966, + "grad_norm": 1.677205033293735, + "learning_rate": 1.9210471093890304e-06, + "loss": 0.5721, + "step": 26279 + }, + { + "epoch": 0.8054431776388378, + "grad_norm": 1.659722671689919, + "learning_rate": 1.920462163122141e-06, + "loss": 0.5369, + "step": 26280 + }, + { + "epoch": 0.805473826161579, + "grad_norm": 1.8928735621546686, + "learning_rate": 1.919877296465005e-06, + "loss": 0.5666, + "step": 26281 + }, + { + "epoch": 0.8055044746843202, + "grad_norm": 1.5681026563323464, + "learning_rate": 1.9192925094233884e-06, + "loss": 0.6055, + "step": 26282 + }, + { + "epoch": 0.8055351232070614, + "grad_norm": 1.8408777578983437, + "learning_rate": 1.918707802003049e-06, + "loss": 0.6507, + "step": 26283 + }, + { + "epoch": 0.8055657717298026, + "grad_norm": 1.793113673590417, + "learning_rate": 1.918123174209746e-06, + "loss": 0.5818, + "step": 26284 + }, + { + "epoch": 0.8055964202525439, + "grad_norm": 1.579950785319253, + "learning_rate": 1.917538626049247e-06, + "loss": 0.6703, + "step": 26285 + }, + { + "epoch": 0.805627068775285, + "grad_norm": 1.625058535241454, + "learning_rate": 1.9169541575273086e-06, + "loss": 0.6509, + "step": 26286 + }, + { + "epoch": 0.8056577172980263, + "grad_norm": 1.5453438028428452, + "learning_rate": 1.916369768649686e-06, + "loss": 0.5774, + "step": 26287 + }, + { + "epoch": 0.8056883658207674, + "grad_norm": 1.7662688341970842, + "learning_rate": 1.9157854594221403e-06, + "loss": 0.7257, + "step": 26288 + }, + { + "epoch": 0.8057190143435087, + "grad_norm": 1.633084660544282, + "learning_rate": 1.9152012298504296e-06, + "loss": 0.5667, + "step": 26289 + }, + { + "epoch": 0.8057496628662498, + "grad_norm": 0.6602970509200378, + "learning_rate": 1.9146170799403117e-06, + "loss": 0.5185, + "step": 26290 + }, + { + "epoch": 0.8057803113889911, + "grad_norm": 1.683715724566319, + "learning_rate": 1.914033009697538e-06, + "loss": 0.5241, + "step": 26291 + }, + { + "epoch": 0.8058109599117322, + "grad_norm": 1.7276675034153584, + "learning_rate": 1.9134490191278666e-06, + "loss": 0.6274, + "step": 26292 + }, + { + "epoch": 0.8058416084344735, + "grad_norm": 1.6352011003864568, + "learning_rate": 1.912865108237053e-06, + "loss": 0.7344, + "step": 26293 + }, + { + "epoch": 0.8058722569572146, + "grad_norm": 1.7007635510489896, + "learning_rate": 1.9122812770308486e-06, + "loss": 0.6436, + "step": 26294 + }, + { + "epoch": 0.8059029054799559, + "grad_norm": 1.5220331922211061, + "learning_rate": 1.9116975255150003e-06, + "loss": 0.6526, + "step": 26295 + }, + { + "epoch": 0.8059335540026971, + "grad_norm": 1.7003326845400821, + "learning_rate": 1.911113853695272e-06, + "loss": 0.6495, + "step": 26296 + }, + { + "epoch": 0.8059642025254383, + "grad_norm": 0.6696969417028538, + "learning_rate": 1.9105302615774056e-06, + "loss": 0.5122, + "step": 26297 + }, + { + "epoch": 0.8059948510481795, + "grad_norm": 1.6177131485934706, + "learning_rate": 1.9099467491671575e-06, + "loss": 0.6221, + "step": 26298 + }, + { + "epoch": 0.8060254995709207, + "grad_norm": 1.8021946004747909, + "learning_rate": 1.909363316470271e-06, + "loss": 0.6413, + "step": 26299 + }, + { + "epoch": 0.8060561480936619, + "grad_norm": 1.618239617741311, + "learning_rate": 1.9087799634924977e-06, + "loss": 0.635, + "step": 26300 + }, + { + "epoch": 0.806086796616403, + "grad_norm": 1.7916696922641684, + "learning_rate": 1.9081966902395878e-06, + "loss": 0.6505, + "step": 26301 + }, + { + "epoch": 0.8061174451391443, + "grad_norm": 1.8651912015459209, + "learning_rate": 1.9076134967172844e-06, + "loss": 0.6607, + "step": 26302 + }, + { + "epoch": 0.8061480936618854, + "grad_norm": 0.6870044656229396, + "learning_rate": 1.9070303829313352e-06, + "loss": 0.5331, + "step": 26303 + }, + { + "epoch": 0.8061787421846267, + "grad_norm": 1.8195486412881634, + "learning_rate": 1.906447348887489e-06, + "loss": 0.7246, + "step": 26304 + }, + { + "epoch": 0.8062093907073679, + "grad_norm": 1.8733173439567825, + "learning_rate": 1.9058643945914857e-06, + "loss": 0.652, + "step": 26305 + }, + { + "epoch": 0.8062400392301091, + "grad_norm": 1.6439551111617512, + "learning_rate": 1.9052815200490738e-06, + "loss": 0.6584, + "step": 26306 + }, + { + "epoch": 0.8062706877528503, + "grad_norm": 1.7807775702684907, + "learning_rate": 1.9046987252659922e-06, + "loss": 0.6467, + "step": 26307 + }, + { + "epoch": 0.8063013362755915, + "grad_norm": 1.994330853487921, + "learning_rate": 1.904116010247985e-06, + "loss": 0.693, + "step": 26308 + }, + { + "epoch": 0.8063319847983327, + "grad_norm": 1.5313648038998127, + "learning_rate": 1.9035333750007957e-06, + "loss": 0.5835, + "step": 26309 + }, + { + "epoch": 0.8063626333210739, + "grad_norm": 1.7358519198196785, + "learning_rate": 1.9029508195301626e-06, + "loss": 0.6938, + "step": 26310 + }, + { + "epoch": 0.8063932818438151, + "grad_norm": 1.7958664380894505, + "learning_rate": 1.902368343841826e-06, + "loss": 0.5928, + "step": 26311 + }, + { + "epoch": 0.8064239303665564, + "grad_norm": 0.6606420364703068, + "learning_rate": 1.9017859479415278e-06, + "loss": 0.5208, + "step": 26312 + }, + { + "epoch": 0.8064545788892975, + "grad_norm": 1.8758414708604652, + "learning_rate": 1.9012036318350058e-06, + "loss": 0.6132, + "step": 26313 + }, + { + "epoch": 0.8064852274120388, + "grad_norm": 1.6337225637582744, + "learning_rate": 1.9006213955279917e-06, + "loss": 0.6161, + "step": 26314 + }, + { + "epoch": 0.8065158759347799, + "grad_norm": 1.4816097463413376, + "learning_rate": 1.9000392390262313e-06, + "loss": 0.6083, + "step": 26315 + }, + { + "epoch": 0.8065465244575212, + "grad_norm": 1.7986474981913276, + "learning_rate": 1.8994571623354551e-06, + "loss": 0.6418, + "step": 26316 + }, + { + "epoch": 0.8065771729802623, + "grad_norm": 1.8271174277481745, + "learning_rate": 1.8988751654614023e-06, + "loss": 0.6738, + "step": 26317 + }, + { + "epoch": 0.8066078215030036, + "grad_norm": 1.692394302179927, + "learning_rate": 1.8982932484098028e-06, + "loss": 0.6036, + "step": 26318 + }, + { + "epoch": 0.8066384700257447, + "grad_norm": 1.6227050062849235, + "learning_rate": 1.8977114111863926e-06, + "loss": 0.6919, + "step": 26319 + }, + { + "epoch": 0.806669118548486, + "grad_norm": 1.7183181943213637, + "learning_rate": 1.8971296537969076e-06, + "loss": 0.6322, + "step": 26320 + }, + { + "epoch": 0.8066997670712271, + "grad_norm": 1.5752700780415094, + "learning_rate": 1.896547976247075e-06, + "loss": 0.6101, + "step": 26321 + }, + { + "epoch": 0.8067304155939684, + "grad_norm": 1.8405494431885014, + "learning_rate": 1.8959663785426285e-06, + "loss": 0.5662, + "step": 26322 + }, + { + "epoch": 0.8067610641167096, + "grad_norm": 1.598763552743261, + "learning_rate": 1.895384860689301e-06, + "loss": 0.6186, + "step": 26323 + }, + { + "epoch": 0.8067917126394508, + "grad_norm": 1.5740757175421682, + "learning_rate": 1.894803422692818e-06, + "loss": 0.6002, + "step": 26324 + }, + { + "epoch": 0.806822361162192, + "grad_norm": 1.748440022100364, + "learning_rate": 1.8942220645589105e-06, + "loss": 0.5764, + "step": 26325 + }, + { + "epoch": 0.8068530096849332, + "grad_norm": 1.9034688850110282, + "learning_rate": 1.8936407862933092e-06, + "loss": 0.7264, + "step": 26326 + }, + { + "epoch": 0.8068836582076744, + "grad_norm": 1.6128280364761953, + "learning_rate": 1.8930595879017377e-06, + "loss": 0.6337, + "step": 26327 + }, + { + "epoch": 0.8069143067304156, + "grad_norm": 1.7291223229035784, + "learning_rate": 1.8924784693899257e-06, + "loss": 0.6733, + "step": 26328 + }, + { + "epoch": 0.8069449552531568, + "grad_norm": 1.7916251579792306, + "learning_rate": 1.8918974307635962e-06, + "loss": 0.567, + "step": 26329 + }, + { + "epoch": 0.806975603775898, + "grad_norm": 1.7696724076246102, + "learning_rate": 1.891316472028475e-06, + "loss": 0.635, + "step": 26330 + }, + { + "epoch": 0.8070062522986392, + "grad_norm": 2.3345086905777497, + "learning_rate": 1.8907355931902904e-06, + "loss": 0.5585, + "step": 26331 + }, + { + "epoch": 0.8070369008213804, + "grad_norm": 1.745684492742105, + "learning_rate": 1.8901547942547594e-06, + "loss": 0.6045, + "step": 26332 + }, + { + "epoch": 0.8070675493441216, + "grad_norm": 0.6615910243060847, + "learning_rate": 1.8895740752276094e-06, + "loss": 0.5056, + "step": 26333 + }, + { + "epoch": 0.8070981978668628, + "grad_norm": 1.7623495687666075, + "learning_rate": 1.8889934361145635e-06, + "loss": 0.663, + "step": 26334 + }, + { + "epoch": 0.807128846389604, + "grad_norm": 1.719068678603171, + "learning_rate": 1.8884128769213373e-06, + "loss": 0.6068, + "step": 26335 + }, + { + "epoch": 0.8071594949123452, + "grad_norm": 1.5275889696035203, + "learning_rate": 1.887832397653655e-06, + "loss": 0.5804, + "step": 26336 + }, + { + "epoch": 0.8071901434350864, + "grad_norm": 0.667980043019102, + "learning_rate": 1.8872519983172376e-06, + "loss": 0.5057, + "step": 26337 + }, + { + "epoch": 0.8072207919578276, + "grad_norm": 1.7343059737770752, + "learning_rate": 1.8866716789178007e-06, + "loss": 0.7303, + "step": 26338 + }, + { + "epoch": 0.8072514404805688, + "grad_norm": 1.6501463196486168, + "learning_rate": 1.8860914394610652e-06, + "loss": 0.6939, + "step": 26339 + }, + { + "epoch": 0.80728208900331, + "grad_norm": 1.6700171671624473, + "learning_rate": 1.8855112799527443e-06, + "loss": 0.652, + "step": 26340 + }, + { + "epoch": 0.8073127375260513, + "grad_norm": 1.613764874273654, + "learning_rate": 1.8849312003985576e-06, + "loss": 0.6213, + "step": 26341 + }, + { + "epoch": 0.8073433860487924, + "grad_norm": 1.5627209905472381, + "learning_rate": 1.884351200804222e-06, + "loss": 0.6852, + "step": 26342 + }, + { + "epoch": 0.8073740345715337, + "grad_norm": 1.6209527784076012, + "learning_rate": 1.8837712811754482e-06, + "loss": 0.6574, + "step": 26343 + }, + { + "epoch": 0.8074046830942748, + "grad_norm": 1.5159740248684015, + "learning_rate": 1.883191441517953e-06, + "loss": 0.5612, + "step": 26344 + }, + { + "epoch": 0.8074353316170161, + "grad_norm": 1.7941731236936929, + "learning_rate": 1.8826116818374508e-06, + "loss": 0.5469, + "step": 26345 + }, + { + "epoch": 0.8074659801397572, + "grad_norm": 0.6484075289114385, + "learning_rate": 1.882032002139651e-06, + "loss": 0.5234, + "step": 26346 + }, + { + "epoch": 0.8074966286624985, + "grad_norm": 1.810005159007228, + "learning_rate": 1.881452402430266e-06, + "loss": 0.7308, + "step": 26347 + }, + { + "epoch": 0.8075272771852396, + "grad_norm": 0.6594938586133748, + "learning_rate": 1.8808728827150114e-06, + "loss": 0.5216, + "step": 26348 + }, + { + "epoch": 0.8075579257079809, + "grad_norm": 1.7314272397278605, + "learning_rate": 1.8802934429995912e-06, + "loss": 0.6259, + "step": 26349 + }, + { + "epoch": 0.8075885742307221, + "grad_norm": 1.6960148416815146, + "learning_rate": 1.8797140832897186e-06, + "loss": 0.5812, + "step": 26350 + }, + { + "epoch": 0.8076192227534633, + "grad_norm": 1.6651612487379281, + "learning_rate": 1.8791348035910984e-06, + "loss": 0.5414, + "step": 26351 + }, + { + "epoch": 0.8076498712762045, + "grad_norm": 1.8406540237510547, + "learning_rate": 1.878555603909441e-06, + "loss": 0.6874, + "step": 26352 + }, + { + "epoch": 0.8076805197989457, + "grad_norm": 2.047671925942426, + "learning_rate": 1.8779764842504567e-06, + "loss": 0.7205, + "step": 26353 + }, + { + "epoch": 0.8077111683216869, + "grad_norm": 1.5403152359831496, + "learning_rate": 1.877397444619845e-06, + "loss": 0.6266, + "step": 26354 + }, + { + "epoch": 0.8077418168444281, + "grad_norm": 0.6507980078336768, + "learning_rate": 1.876818485023314e-06, + "loss": 0.4982, + "step": 26355 + }, + { + "epoch": 0.8077724653671693, + "grad_norm": 1.7280130205440278, + "learning_rate": 1.8762396054665721e-06, + "loss": 0.6553, + "step": 26356 + }, + { + "epoch": 0.8078031138899106, + "grad_norm": 1.734136567689314, + "learning_rate": 1.8756608059553171e-06, + "loss": 0.7436, + "step": 26357 + }, + { + "epoch": 0.8078337624126517, + "grad_norm": 1.766159776639998, + "learning_rate": 1.875082086495258e-06, + "loss": 0.6445, + "step": 26358 + }, + { + "epoch": 0.807864410935393, + "grad_norm": 1.8979828990218734, + "learning_rate": 1.8745034470920874e-06, + "loss": 0.7457, + "step": 26359 + }, + { + "epoch": 0.8078950594581341, + "grad_norm": 0.6547359324704786, + "learning_rate": 1.8739248877515193e-06, + "loss": 0.5464, + "step": 26360 + }, + { + "epoch": 0.8079257079808754, + "grad_norm": 1.6610060317334159, + "learning_rate": 1.8733464084792486e-06, + "loss": 0.5556, + "step": 26361 + }, + { + "epoch": 0.8079563565036165, + "grad_norm": 1.7447452199060438, + "learning_rate": 1.872768009280973e-06, + "loss": 0.6756, + "step": 26362 + }, + { + "epoch": 0.8079870050263577, + "grad_norm": 1.8560988125727134, + "learning_rate": 1.8721896901623927e-06, + "loss": 0.6251, + "step": 26363 + }, + { + "epoch": 0.8080176535490989, + "grad_norm": 1.671839643100121, + "learning_rate": 1.8716114511292093e-06, + "loss": 0.5913, + "step": 26364 + }, + { + "epoch": 0.8080483020718401, + "grad_norm": 1.760015494406605, + "learning_rate": 1.8710332921871166e-06, + "loss": 0.7112, + "step": 26365 + }, + { + "epoch": 0.8080789505945813, + "grad_norm": 1.5900106563988627, + "learning_rate": 1.8704552133418119e-06, + "loss": 0.5743, + "step": 26366 + }, + { + "epoch": 0.8081095991173225, + "grad_norm": 1.7232503980383915, + "learning_rate": 1.8698772145989952e-06, + "loss": 0.7299, + "step": 26367 + }, + { + "epoch": 0.8081402476400638, + "grad_norm": 1.6121677664377776, + "learning_rate": 1.8692992959643552e-06, + "loss": 0.5661, + "step": 26368 + }, + { + "epoch": 0.8081708961628049, + "grad_norm": 1.5992839567965107, + "learning_rate": 1.8687214574435918e-06, + "loss": 0.5823, + "step": 26369 + }, + { + "epoch": 0.8082015446855462, + "grad_norm": 1.5289764811443904, + "learning_rate": 1.868143699042393e-06, + "loss": 0.5573, + "step": 26370 + }, + { + "epoch": 0.8082321932082873, + "grad_norm": 1.597462794906416, + "learning_rate": 1.8675660207664582e-06, + "loss": 0.6435, + "step": 26371 + }, + { + "epoch": 0.8082628417310286, + "grad_norm": 1.7242454430292626, + "learning_rate": 1.8669884226214774e-06, + "loss": 0.6029, + "step": 26372 + }, + { + "epoch": 0.8082934902537697, + "grad_norm": 2.023308522614134, + "learning_rate": 1.8664109046131373e-06, + "loss": 0.671, + "step": 26373 + }, + { + "epoch": 0.808324138776511, + "grad_norm": 1.7163333726502827, + "learning_rate": 1.8658334667471322e-06, + "loss": 0.6744, + "step": 26374 + }, + { + "epoch": 0.8083547872992521, + "grad_norm": 1.712371410276842, + "learning_rate": 1.8652561090291533e-06, + "loss": 0.6031, + "step": 26375 + }, + { + "epoch": 0.8083854358219934, + "grad_norm": 1.82513100965361, + "learning_rate": 1.8646788314648844e-06, + "loss": 0.6808, + "step": 26376 + }, + { + "epoch": 0.8084160843447346, + "grad_norm": 1.6016893714193188, + "learning_rate": 1.864101634060017e-06, + "loss": 0.6007, + "step": 26377 + }, + { + "epoch": 0.8084467328674758, + "grad_norm": 1.8558523313662334, + "learning_rate": 1.8635245168202388e-06, + "loss": 0.6613, + "step": 26378 + }, + { + "epoch": 0.808477381390217, + "grad_norm": 1.6005839733223053, + "learning_rate": 1.862947479751236e-06, + "loss": 0.6456, + "step": 26379 + }, + { + "epoch": 0.8085080299129582, + "grad_norm": 1.8454379541967036, + "learning_rate": 1.8623705228586953e-06, + "loss": 0.7162, + "step": 26380 + }, + { + "epoch": 0.8085386784356994, + "grad_norm": 1.6580432845052118, + "learning_rate": 1.8617936461482934e-06, + "loss": 0.5697, + "step": 26381 + }, + { + "epoch": 0.8085693269584406, + "grad_norm": 1.5808450193713495, + "learning_rate": 1.8612168496257277e-06, + "loss": 0.6214, + "step": 26382 + }, + { + "epoch": 0.8085999754811818, + "grad_norm": 1.6244916652686239, + "learning_rate": 1.8606401332966729e-06, + "loss": 0.7247, + "step": 26383 + }, + { + "epoch": 0.808630624003923, + "grad_norm": 0.68936606627924, + "learning_rate": 1.860063497166812e-06, + "loss": 0.5152, + "step": 26384 + }, + { + "epoch": 0.8086612725266642, + "grad_norm": 1.8764909690483342, + "learning_rate": 1.8594869412418282e-06, + "loss": 0.6855, + "step": 26385 + }, + { + "epoch": 0.8086919210494055, + "grad_norm": 1.884878526982512, + "learning_rate": 1.858910465527405e-06, + "loss": 0.6352, + "step": 26386 + }, + { + "epoch": 0.8087225695721466, + "grad_norm": 0.6695882852039624, + "learning_rate": 1.8583340700292173e-06, + "loss": 0.5582, + "step": 26387 + }, + { + "epoch": 0.8087532180948879, + "grad_norm": 1.5481319930304045, + "learning_rate": 1.8577577547529467e-06, + "loss": 0.5575, + "step": 26388 + }, + { + "epoch": 0.808783866617629, + "grad_norm": 1.6174683712181328, + "learning_rate": 1.8571815197042719e-06, + "loss": 0.6048, + "step": 26389 + }, + { + "epoch": 0.8088145151403703, + "grad_norm": 1.830200985352567, + "learning_rate": 1.8566053648888748e-06, + "loss": 0.6818, + "step": 26390 + }, + { + "epoch": 0.8088451636631114, + "grad_norm": 1.7760690593160495, + "learning_rate": 1.8560292903124277e-06, + "loss": 0.5734, + "step": 26391 + }, + { + "epoch": 0.8088758121858527, + "grad_norm": 1.573769021191761, + "learning_rate": 1.855453295980606e-06, + "loss": 0.6912, + "step": 26392 + }, + { + "epoch": 0.8089064607085938, + "grad_norm": 0.6508518540518322, + "learning_rate": 1.8548773818990861e-06, + "loss": 0.5191, + "step": 26393 + }, + { + "epoch": 0.808937109231335, + "grad_norm": 1.7105028290614075, + "learning_rate": 1.854301548073546e-06, + "loss": 0.5352, + "step": 26394 + }, + { + "epoch": 0.8089677577540763, + "grad_norm": 0.6800499981309375, + "learning_rate": 1.8537257945096543e-06, + "loss": 0.5313, + "step": 26395 + }, + { + "epoch": 0.8089984062768174, + "grad_norm": 1.6839975937932268, + "learning_rate": 1.8531501212130876e-06, + "loss": 0.7515, + "step": 26396 + }, + { + "epoch": 0.8090290547995587, + "grad_norm": 1.8151532138706028, + "learning_rate": 1.8525745281895158e-06, + "loss": 0.658, + "step": 26397 + }, + { + "epoch": 0.8090597033222998, + "grad_norm": 1.6618502998170892, + "learning_rate": 1.8519990154446154e-06, + "loss": 0.7087, + "step": 26398 + }, + { + "epoch": 0.8090903518450411, + "grad_norm": 1.8942984303884856, + "learning_rate": 1.8514235829840498e-06, + "loss": 0.7022, + "step": 26399 + }, + { + "epoch": 0.8091210003677822, + "grad_norm": 1.5229054430941575, + "learning_rate": 1.8508482308134934e-06, + "loss": 0.5669, + "step": 26400 + }, + { + "epoch": 0.8091516488905235, + "grad_norm": 1.6577152702474631, + "learning_rate": 1.850272958938617e-06, + "loss": 0.5644, + "step": 26401 + }, + { + "epoch": 0.8091822974132646, + "grad_norm": 1.6466604345880684, + "learning_rate": 1.8496977673650861e-06, + "loss": 0.6397, + "step": 26402 + }, + { + "epoch": 0.8092129459360059, + "grad_norm": 1.808243123004495, + "learning_rate": 1.8491226560985665e-06, + "loss": 0.7085, + "step": 26403 + }, + { + "epoch": 0.809243594458747, + "grad_norm": 1.5771481046987263, + "learning_rate": 1.8485476251447266e-06, + "loss": 0.6031, + "step": 26404 + }, + { + "epoch": 0.8092742429814883, + "grad_norm": 1.672712041134532, + "learning_rate": 1.8479726745092319e-06, + "loss": 0.6413, + "step": 26405 + }, + { + "epoch": 0.8093048915042295, + "grad_norm": 1.64837347645544, + "learning_rate": 1.8473978041977514e-06, + "loss": 0.5986, + "step": 26406 + }, + { + "epoch": 0.8093355400269707, + "grad_norm": 1.987427757710475, + "learning_rate": 1.8468230142159427e-06, + "loss": 0.7531, + "step": 26407 + }, + { + "epoch": 0.8093661885497119, + "grad_norm": 1.7545954672800785, + "learning_rate": 1.8462483045694745e-06, + "loss": 0.6673, + "step": 26408 + }, + { + "epoch": 0.8093968370724531, + "grad_norm": 1.9402232662052965, + "learning_rate": 1.8456736752640092e-06, + "loss": 0.6594, + "step": 26409 + }, + { + "epoch": 0.8094274855951943, + "grad_norm": 1.8802854268379425, + "learning_rate": 1.8450991263052088e-06, + "loss": 0.6202, + "step": 26410 + }, + { + "epoch": 0.8094581341179355, + "grad_norm": 1.386710022697791, + "learning_rate": 1.8445246576987275e-06, + "loss": 0.572, + "step": 26411 + }, + { + "epoch": 0.8094887826406767, + "grad_norm": 1.7934447168503533, + "learning_rate": 1.8439502694502365e-06, + "loss": 0.711, + "step": 26412 + }, + { + "epoch": 0.809519431163418, + "grad_norm": 1.7772469101746113, + "learning_rate": 1.8433759615653902e-06, + "loss": 0.6586, + "step": 26413 + }, + { + "epoch": 0.8095500796861591, + "grad_norm": 1.7151206428001078, + "learning_rate": 1.842801734049845e-06, + "loss": 0.7044, + "step": 26414 + }, + { + "epoch": 0.8095807282089004, + "grad_norm": 1.6592367254044587, + "learning_rate": 1.8422275869092609e-06, + "loss": 0.6573, + "step": 26415 + }, + { + "epoch": 0.8096113767316415, + "grad_norm": 1.6001229470386256, + "learning_rate": 1.8416535201492957e-06, + "loss": 0.6971, + "step": 26416 + }, + { + "epoch": 0.8096420252543828, + "grad_norm": 1.554709030061633, + "learning_rate": 1.8410795337756092e-06, + "loss": 0.6588, + "step": 26417 + }, + { + "epoch": 0.8096726737771239, + "grad_norm": 0.6692995819055564, + "learning_rate": 1.8405056277938505e-06, + "loss": 0.5363, + "step": 26418 + }, + { + "epoch": 0.8097033222998652, + "grad_norm": 1.4179645478852128, + "learning_rate": 1.8399318022096778e-06, + "loss": 0.5062, + "step": 26419 + }, + { + "epoch": 0.8097339708226063, + "grad_norm": 0.6744131778117965, + "learning_rate": 1.8393580570287472e-06, + "loss": 0.5283, + "step": 26420 + }, + { + "epoch": 0.8097646193453476, + "grad_norm": 1.8374771490936743, + "learning_rate": 1.8387843922567105e-06, + "loss": 0.6021, + "step": 26421 + }, + { + "epoch": 0.8097952678680888, + "grad_norm": 1.824315122862943, + "learning_rate": 1.8382108078992133e-06, + "loss": 0.6852, + "step": 26422 + }, + { + "epoch": 0.80982591639083, + "grad_norm": 1.659709999763416, + "learning_rate": 1.8376373039619189e-06, + "loss": 0.6924, + "step": 26423 + }, + { + "epoch": 0.8098565649135712, + "grad_norm": 1.6219848629705105, + "learning_rate": 1.8370638804504693e-06, + "loss": 0.5544, + "step": 26424 + }, + { + "epoch": 0.8098872134363123, + "grad_norm": 1.897404105667859, + "learning_rate": 1.836490537370521e-06, + "loss": 0.6989, + "step": 26425 + }, + { + "epoch": 0.8099178619590536, + "grad_norm": 1.9135116344234058, + "learning_rate": 1.8359172747277176e-06, + "loss": 0.6742, + "step": 26426 + }, + { + "epoch": 0.8099485104817947, + "grad_norm": 1.6270376585241626, + "learning_rate": 1.8353440925277099e-06, + "loss": 0.633, + "step": 26427 + }, + { + "epoch": 0.809979159004536, + "grad_norm": 1.4938064019211401, + "learning_rate": 1.834770990776149e-06, + "loss": 0.5958, + "step": 26428 + }, + { + "epoch": 0.8100098075272771, + "grad_norm": 1.7865105016919263, + "learning_rate": 1.834197969478675e-06, + "loss": 0.6728, + "step": 26429 + }, + { + "epoch": 0.8100404560500184, + "grad_norm": 1.7916019012623907, + "learning_rate": 1.8336250286409385e-06, + "loss": 0.6221, + "step": 26430 + }, + { + "epoch": 0.8100711045727595, + "grad_norm": 1.8340511505677566, + "learning_rate": 1.8330521682685865e-06, + "loss": 0.663, + "step": 26431 + }, + { + "epoch": 0.8101017530955008, + "grad_norm": 0.6839529597234313, + "learning_rate": 1.8324793883672587e-06, + "loss": 0.5552, + "step": 26432 + }, + { + "epoch": 0.810132401618242, + "grad_norm": 1.9822396550465102, + "learning_rate": 1.8319066889426006e-06, + "loss": 0.7136, + "step": 26433 + }, + { + "epoch": 0.8101630501409832, + "grad_norm": 1.9522150137493395, + "learning_rate": 1.831334070000259e-06, + "loss": 0.6875, + "step": 26434 + }, + { + "epoch": 0.8101936986637244, + "grad_norm": 1.776926835769378, + "learning_rate": 1.8307615315458704e-06, + "loss": 0.7129, + "step": 26435 + }, + { + "epoch": 0.8102243471864656, + "grad_norm": 1.6554168843647001, + "learning_rate": 1.8301890735850814e-06, + "loss": 0.6568, + "step": 26436 + }, + { + "epoch": 0.8102549957092068, + "grad_norm": 1.5805924040996682, + "learning_rate": 1.8296166961235262e-06, + "loss": 0.6195, + "step": 26437 + }, + { + "epoch": 0.810285644231948, + "grad_norm": 0.6740627035941149, + "learning_rate": 1.8290443991668494e-06, + "loss": 0.518, + "step": 26438 + }, + { + "epoch": 0.8103162927546892, + "grad_norm": 1.661146336848068, + "learning_rate": 1.8284721827206898e-06, + "loss": 0.6021, + "step": 26439 + }, + { + "epoch": 0.8103469412774305, + "grad_norm": 1.9137032513714325, + "learning_rate": 1.8279000467906837e-06, + "loss": 0.7125, + "step": 26440 + }, + { + "epoch": 0.8103775898001716, + "grad_norm": 1.652102746474709, + "learning_rate": 1.8273279913824683e-06, + "loss": 0.6302, + "step": 26441 + }, + { + "epoch": 0.8104082383229129, + "grad_norm": 1.8476496402450742, + "learning_rate": 1.826756016501684e-06, + "loss": 0.6496, + "step": 26442 + }, + { + "epoch": 0.810438886845654, + "grad_norm": 1.6606175561210277, + "learning_rate": 1.8261841221539611e-06, + "loss": 0.6626, + "step": 26443 + }, + { + "epoch": 0.8104695353683953, + "grad_norm": 1.7188770356948893, + "learning_rate": 1.8256123083449407e-06, + "loss": 0.6384, + "step": 26444 + }, + { + "epoch": 0.8105001838911364, + "grad_norm": 0.6501585696250497, + "learning_rate": 1.8250405750802502e-06, + "loss": 0.5275, + "step": 26445 + }, + { + "epoch": 0.8105308324138777, + "grad_norm": 1.7958873388404313, + "learning_rate": 1.8244689223655277e-06, + "loss": 0.6952, + "step": 26446 + }, + { + "epoch": 0.8105614809366188, + "grad_norm": 0.673372107866181, + "learning_rate": 1.8238973502064062e-06, + "loss": 0.5185, + "step": 26447 + }, + { + "epoch": 0.8105921294593601, + "grad_norm": 0.6515030343223107, + "learning_rate": 1.8233258586085133e-06, + "loss": 0.5216, + "step": 26448 + }, + { + "epoch": 0.8106227779821013, + "grad_norm": 0.6465675980947779, + "learning_rate": 1.822754447577484e-06, + "loss": 0.5302, + "step": 26449 + }, + { + "epoch": 0.8106534265048425, + "grad_norm": 1.782793498997544, + "learning_rate": 1.8221831171189496e-06, + "loss": 0.6743, + "step": 26450 + }, + { + "epoch": 0.8106840750275837, + "grad_norm": 1.9198913803946203, + "learning_rate": 1.821611867238534e-06, + "loss": 0.6154, + "step": 26451 + }, + { + "epoch": 0.8107147235503249, + "grad_norm": 0.6454770603521738, + "learning_rate": 1.8210406979418705e-06, + "loss": 0.4906, + "step": 26452 + }, + { + "epoch": 0.8107453720730661, + "grad_norm": 1.8404634899295351, + "learning_rate": 1.8204696092345874e-06, + "loss": 0.5841, + "step": 26453 + }, + { + "epoch": 0.8107760205958073, + "grad_norm": 1.6430310887443835, + "learning_rate": 1.8198986011223074e-06, + "loss": 0.6366, + "step": 26454 + }, + { + "epoch": 0.8108066691185485, + "grad_norm": 1.5831618251580437, + "learning_rate": 1.8193276736106625e-06, + "loss": 0.6699, + "step": 26455 + }, + { + "epoch": 0.8108373176412896, + "grad_norm": 1.6384651122691953, + "learning_rate": 1.8187568267052713e-06, + "loss": 0.5671, + "step": 26456 + }, + { + "epoch": 0.8108679661640309, + "grad_norm": 1.7079172971805345, + "learning_rate": 1.818186060411764e-06, + "loss": 0.6633, + "step": 26457 + }, + { + "epoch": 0.810898614686772, + "grad_norm": 1.6972023781702874, + "learning_rate": 1.817615374735765e-06, + "loss": 0.6538, + "step": 26458 + }, + { + "epoch": 0.8109292632095133, + "grad_norm": 1.6943797061992842, + "learning_rate": 1.817044769682892e-06, + "loss": 0.5989, + "step": 26459 + }, + { + "epoch": 0.8109599117322545, + "grad_norm": 1.7561502219582468, + "learning_rate": 1.8164742452587713e-06, + "loss": 0.6822, + "step": 26460 + }, + { + "epoch": 0.8109905602549957, + "grad_norm": 1.6896530365385187, + "learning_rate": 1.8159038014690256e-06, + "loss": 0.6397, + "step": 26461 + }, + { + "epoch": 0.8110212087777369, + "grad_norm": 1.413488908840296, + "learning_rate": 1.815333438319271e-06, + "loss": 0.6535, + "step": 26462 + }, + { + "epoch": 0.8110518573004781, + "grad_norm": 1.9084457418577458, + "learning_rate": 1.8147631558151314e-06, + "loss": 0.6895, + "step": 26463 + }, + { + "epoch": 0.8110825058232193, + "grad_norm": 1.4843311634196474, + "learning_rate": 1.8141929539622261e-06, + "loss": 0.6004, + "step": 26464 + }, + { + "epoch": 0.8111131543459605, + "grad_norm": 1.7059534614687963, + "learning_rate": 1.8136228327661709e-06, + "loss": 0.7331, + "step": 26465 + }, + { + "epoch": 0.8111438028687017, + "grad_norm": 1.5716098253081447, + "learning_rate": 1.8130527922325858e-06, + "loss": 0.5581, + "step": 26466 + }, + { + "epoch": 0.811174451391443, + "grad_norm": 1.7074143249772973, + "learning_rate": 1.812482832367084e-06, + "loss": 0.6844, + "step": 26467 + }, + { + "epoch": 0.8112050999141841, + "grad_norm": 1.7661878439642678, + "learning_rate": 1.8119129531752834e-06, + "loss": 0.6473, + "step": 26468 + }, + { + "epoch": 0.8112357484369254, + "grad_norm": 1.697899177164613, + "learning_rate": 1.8113431546628024e-06, + "loss": 0.6279, + "step": 26469 + }, + { + "epoch": 0.8112663969596665, + "grad_norm": 1.7726473295948977, + "learning_rate": 1.8107734368352504e-06, + "loss": 0.6172, + "step": 26470 + }, + { + "epoch": 0.8112970454824078, + "grad_norm": 0.6533211325260508, + "learning_rate": 1.8102037996982425e-06, + "loss": 0.5425, + "step": 26471 + }, + { + "epoch": 0.8113276940051489, + "grad_norm": 1.6171306953650981, + "learning_rate": 1.8096342432573943e-06, + "loss": 0.6451, + "step": 26472 + }, + { + "epoch": 0.8113583425278902, + "grad_norm": 1.7258121821587618, + "learning_rate": 1.8090647675183138e-06, + "loss": 0.6785, + "step": 26473 + }, + { + "epoch": 0.8113889910506313, + "grad_norm": 1.6511464076441429, + "learning_rate": 1.8084953724866129e-06, + "loss": 0.6929, + "step": 26474 + }, + { + "epoch": 0.8114196395733726, + "grad_norm": 2.0316484034388496, + "learning_rate": 1.8079260581679058e-06, + "loss": 0.7121, + "step": 26475 + }, + { + "epoch": 0.8114502880961137, + "grad_norm": 1.6007891735880357, + "learning_rate": 1.8073568245677974e-06, + "loss": 0.6562, + "step": 26476 + }, + { + "epoch": 0.811480936618855, + "grad_norm": 1.8687736632292569, + "learning_rate": 1.8067876716919008e-06, + "loss": 0.6799, + "step": 26477 + }, + { + "epoch": 0.8115115851415962, + "grad_norm": 1.5150172005323523, + "learning_rate": 1.806218599545816e-06, + "loss": 0.5756, + "step": 26478 + }, + { + "epoch": 0.8115422336643374, + "grad_norm": 1.8510463939038608, + "learning_rate": 1.8056496081351605e-06, + "loss": 0.6063, + "step": 26479 + }, + { + "epoch": 0.8115728821870786, + "grad_norm": 1.8033029699435992, + "learning_rate": 1.8050806974655366e-06, + "loss": 0.5999, + "step": 26480 + }, + { + "epoch": 0.8116035307098198, + "grad_norm": 1.7522500908388836, + "learning_rate": 1.8045118675425466e-06, + "loss": 0.7143, + "step": 26481 + }, + { + "epoch": 0.811634179232561, + "grad_norm": 2.031159430147478, + "learning_rate": 1.803943118371798e-06, + "loss": 0.6806, + "step": 26482 + }, + { + "epoch": 0.8116648277553022, + "grad_norm": 1.6955311434617362, + "learning_rate": 1.803374449958898e-06, + "loss": 0.66, + "step": 26483 + }, + { + "epoch": 0.8116954762780434, + "grad_norm": 1.6760867491531286, + "learning_rate": 1.8028058623094446e-06, + "loss": 0.6139, + "step": 26484 + }, + { + "epoch": 0.8117261248007847, + "grad_norm": 1.903944222749739, + "learning_rate": 1.8022373554290418e-06, + "loss": 0.6336, + "step": 26485 + }, + { + "epoch": 0.8117567733235258, + "grad_norm": 1.9256230107829295, + "learning_rate": 1.8016689293232914e-06, + "loss": 0.7408, + "step": 26486 + }, + { + "epoch": 0.811787421846267, + "grad_norm": 1.7153503519604896, + "learning_rate": 1.8011005839977969e-06, + "loss": 0.7292, + "step": 26487 + }, + { + "epoch": 0.8118180703690082, + "grad_norm": 1.7310046175967089, + "learning_rate": 1.800532319458157e-06, + "loss": 0.559, + "step": 26488 + }, + { + "epoch": 0.8118487188917494, + "grad_norm": 0.6916281235456041, + "learning_rate": 1.7999641357099673e-06, + "loss": 0.542, + "step": 26489 + }, + { + "epoch": 0.8118793674144906, + "grad_norm": 1.390489151056949, + "learning_rate": 1.799396032758829e-06, + "loss": 0.5239, + "step": 26490 + }, + { + "epoch": 0.8119100159372318, + "grad_norm": 1.6779470270540597, + "learning_rate": 1.798828010610343e-06, + "loss": 0.6148, + "step": 26491 + }, + { + "epoch": 0.811940664459973, + "grad_norm": 0.6754903079681563, + "learning_rate": 1.798260069270099e-06, + "loss": 0.5367, + "step": 26492 + }, + { + "epoch": 0.8119713129827142, + "grad_norm": 1.7636774651040708, + "learning_rate": 1.7976922087436977e-06, + "loss": 0.6571, + "step": 26493 + }, + { + "epoch": 0.8120019615054554, + "grad_norm": 1.7376570488688396, + "learning_rate": 1.7971244290367374e-06, + "loss": 0.6033, + "step": 26494 + }, + { + "epoch": 0.8120326100281966, + "grad_norm": 1.921650859875069, + "learning_rate": 1.7965567301548048e-06, + "loss": 0.6712, + "step": 26495 + }, + { + "epoch": 0.8120632585509379, + "grad_norm": 1.624149021684839, + "learning_rate": 1.7959891121035012e-06, + "loss": 0.655, + "step": 26496 + }, + { + "epoch": 0.812093907073679, + "grad_norm": 1.7669390787759596, + "learning_rate": 1.7954215748884096e-06, + "loss": 0.5852, + "step": 26497 + }, + { + "epoch": 0.8121245555964203, + "grad_norm": 0.6550615738848223, + "learning_rate": 1.7948541185151347e-06, + "loss": 0.5074, + "step": 26498 + }, + { + "epoch": 0.8121552041191614, + "grad_norm": 1.5390874866598165, + "learning_rate": 1.794286742989262e-06, + "loss": 0.5524, + "step": 26499 + }, + { + "epoch": 0.8121858526419027, + "grad_norm": 1.5101825137277582, + "learning_rate": 1.7937194483163777e-06, + "loss": 0.6113, + "step": 26500 + }, + { + "epoch": 0.8122165011646438, + "grad_norm": 1.7842772105471956, + "learning_rate": 1.7931522345020758e-06, + "loss": 0.6776, + "step": 26501 + }, + { + "epoch": 0.8122471496873851, + "grad_norm": 1.7229311527635263, + "learning_rate": 1.792585101551948e-06, + "loss": 0.5761, + "step": 26502 + }, + { + "epoch": 0.8122777982101262, + "grad_norm": 0.6991407668416832, + "learning_rate": 1.7920180494715755e-06, + "loss": 0.51, + "step": 26503 + }, + { + "epoch": 0.8123084467328675, + "grad_norm": 1.7735646239268825, + "learning_rate": 1.7914510782665495e-06, + "loss": 0.7362, + "step": 26504 + }, + { + "epoch": 0.8123390952556087, + "grad_norm": 1.8037730191267207, + "learning_rate": 1.7908841879424565e-06, + "loss": 0.6865, + "step": 26505 + }, + { + "epoch": 0.8123697437783499, + "grad_norm": 1.4490605289247769, + "learning_rate": 1.7903173785048843e-06, + "loss": 0.5476, + "step": 26506 + }, + { + "epoch": 0.8124003923010911, + "grad_norm": 1.7554886385592618, + "learning_rate": 1.7897506499594165e-06, + "loss": 0.7616, + "step": 26507 + }, + { + "epoch": 0.8124310408238323, + "grad_norm": 1.5742656994365372, + "learning_rate": 1.7891840023116304e-06, + "loss": 0.6322, + "step": 26508 + }, + { + "epoch": 0.8124616893465735, + "grad_norm": 1.6379085670655247, + "learning_rate": 1.7886174355671205e-06, + "loss": 0.5819, + "step": 26509 + }, + { + "epoch": 0.8124923378693147, + "grad_norm": 2.0000305442566684, + "learning_rate": 1.7880509497314635e-06, + "loss": 0.6602, + "step": 26510 + }, + { + "epoch": 0.8125229863920559, + "grad_norm": 0.6378440981232428, + "learning_rate": 1.7874845448102386e-06, + "loss": 0.5017, + "step": 26511 + }, + { + "epoch": 0.8125536349147972, + "grad_norm": 1.7191912132643, + "learning_rate": 1.7869182208090308e-06, + "loss": 0.7284, + "step": 26512 + }, + { + "epoch": 0.8125842834375383, + "grad_norm": 1.8553687483794832, + "learning_rate": 1.7863519777334193e-06, + "loss": 0.6468, + "step": 26513 + }, + { + "epoch": 0.8126149319602796, + "grad_norm": 0.6713213062541884, + "learning_rate": 1.785785815588985e-06, + "loss": 0.5259, + "step": 26514 + }, + { + "epoch": 0.8126455804830207, + "grad_norm": 1.636740855717816, + "learning_rate": 1.7852197343813028e-06, + "loss": 0.5549, + "step": 26515 + }, + { + "epoch": 0.812676229005762, + "grad_norm": 1.8485074695400865, + "learning_rate": 1.784653734115952e-06, + "loss": 0.714, + "step": 26516 + }, + { + "epoch": 0.8127068775285031, + "grad_norm": 1.7112998881023542, + "learning_rate": 1.784087814798513e-06, + "loss": 0.645, + "step": 26517 + }, + { + "epoch": 0.8127375260512444, + "grad_norm": 0.6526540481454861, + "learning_rate": 1.783521976434558e-06, + "loss": 0.5222, + "step": 26518 + }, + { + "epoch": 0.8127681745739855, + "grad_norm": 1.5524231178910382, + "learning_rate": 1.7829562190296589e-06, + "loss": 0.6473, + "step": 26519 + }, + { + "epoch": 0.8127988230967267, + "grad_norm": 1.5727885746331998, + "learning_rate": 1.7823905425893995e-06, + "loss": 0.616, + "step": 26520 + }, + { + "epoch": 0.812829471619468, + "grad_norm": 1.6232038477830397, + "learning_rate": 1.7818249471193482e-06, + "loss": 0.6042, + "step": 26521 + }, + { + "epoch": 0.8128601201422091, + "grad_norm": 1.5539081867545925, + "learning_rate": 1.7812594326250764e-06, + "loss": 0.6048, + "step": 26522 + }, + { + "epoch": 0.8128907686649504, + "grad_norm": 1.5959576788488365, + "learning_rate": 1.7806939991121585e-06, + "loss": 0.614, + "step": 26523 + }, + { + "epoch": 0.8129214171876915, + "grad_norm": 1.8608079788599026, + "learning_rate": 1.7801286465861655e-06, + "loss": 0.7007, + "step": 26524 + }, + { + "epoch": 0.8129520657104328, + "grad_norm": 1.791064838182032, + "learning_rate": 1.7795633750526697e-06, + "loss": 0.61, + "step": 26525 + }, + { + "epoch": 0.8129827142331739, + "grad_norm": 1.6041764880405973, + "learning_rate": 1.7789981845172377e-06, + "loss": 0.6847, + "step": 26526 + }, + { + "epoch": 0.8130133627559152, + "grad_norm": 1.479359062077673, + "learning_rate": 1.7784330749854395e-06, + "loss": 0.6009, + "step": 26527 + }, + { + "epoch": 0.8130440112786563, + "grad_norm": 1.6610799018419706, + "learning_rate": 1.7778680464628473e-06, + "loss": 0.5851, + "step": 26528 + }, + { + "epoch": 0.8130746598013976, + "grad_norm": 0.6791377251072266, + "learning_rate": 1.7773030989550245e-06, + "loss": 0.5165, + "step": 26529 + }, + { + "epoch": 0.8131053083241387, + "grad_norm": 1.804740743493383, + "learning_rate": 1.776738232467532e-06, + "loss": 0.6273, + "step": 26530 + }, + { + "epoch": 0.81313595684688, + "grad_norm": 1.73833136741307, + "learning_rate": 1.7761734470059478e-06, + "loss": 0.6277, + "step": 26531 + }, + { + "epoch": 0.8131666053696212, + "grad_norm": 0.7099035711861595, + "learning_rate": 1.7756087425758284e-06, + "loss": 0.5327, + "step": 26532 + }, + { + "epoch": 0.8131972538923624, + "grad_norm": 1.5289869019360642, + "learning_rate": 1.7750441191827427e-06, + "loss": 0.6636, + "step": 26533 + }, + { + "epoch": 0.8132279024151036, + "grad_norm": 1.5798520191754224, + "learning_rate": 1.7744795768322488e-06, + "loss": 0.6236, + "step": 26534 + }, + { + "epoch": 0.8132585509378448, + "grad_norm": 1.7204992651228261, + "learning_rate": 1.7739151155299129e-06, + "loss": 0.6432, + "step": 26535 + }, + { + "epoch": 0.813289199460586, + "grad_norm": 1.6242866975931194, + "learning_rate": 1.7733507352812973e-06, + "loss": 0.5162, + "step": 26536 + }, + { + "epoch": 0.8133198479833272, + "grad_norm": 1.6666998138530935, + "learning_rate": 1.77278643609196e-06, + "loss": 0.6993, + "step": 26537 + }, + { + "epoch": 0.8133504965060684, + "grad_norm": 1.5901331797826574, + "learning_rate": 1.772222217967463e-06, + "loss": 0.5407, + "step": 26538 + }, + { + "epoch": 0.8133811450288096, + "grad_norm": 1.6109709894923074, + "learning_rate": 1.7716580809133689e-06, + "loss": 0.5848, + "step": 26539 + }, + { + "epoch": 0.8134117935515508, + "grad_norm": 1.8336129884490382, + "learning_rate": 1.7710940249352305e-06, + "loss": 0.5597, + "step": 26540 + }, + { + "epoch": 0.8134424420742921, + "grad_norm": 1.6538078872955504, + "learning_rate": 1.770530050038609e-06, + "loss": 0.5706, + "step": 26541 + }, + { + "epoch": 0.8134730905970332, + "grad_norm": 1.6808317451592656, + "learning_rate": 1.7699661562290594e-06, + "loss": 0.6314, + "step": 26542 + }, + { + "epoch": 0.8135037391197745, + "grad_norm": 1.7337581190585052, + "learning_rate": 1.7694023435121389e-06, + "loss": 0.6778, + "step": 26543 + }, + { + "epoch": 0.8135343876425156, + "grad_norm": 1.7280438312861885, + "learning_rate": 1.7688386118934053e-06, + "loss": 0.5371, + "step": 26544 + }, + { + "epoch": 0.8135650361652569, + "grad_norm": 1.7933606885735698, + "learning_rate": 1.7682749613784077e-06, + "loss": 0.6846, + "step": 26545 + }, + { + "epoch": 0.813595684687998, + "grad_norm": 0.6896899775622722, + "learning_rate": 1.767711391972704e-06, + "loss": 0.5474, + "step": 26546 + }, + { + "epoch": 0.8136263332107393, + "grad_norm": 1.7380126424021578, + "learning_rate": 1.7671479036818484e-06, + "loss": 0.6751, + "step": 26547 + }, + { + "epoch": 0.8136569817334804, + "grad_norm": 0.6502261887949866, + "learning_rate": 1.7665844965113922e-06, + "loss": 0.5047, + "step": 26548 + }, + { + "epoch": 0.8136876302562217, + "grad_norm": 0.676007735716706, + "learning_rate": 1.7660211704668785e-06, + "loss": 0.5328, + "step": 26549 + }, + { + "epoch": 0.8137182787789629, + "grad_norm": 1.677025528975057, + "learning_rate": 1.7654579255538717e-06, + "loss": 0.6701, + "step": 26550 + }, + { + "epoch": 0.813748927301704, + "grad_norm": 1.6795014641428168, + "learning_rate": 1.764894761777911e-06, + "loss": 0.6848, + "step": 26551 + }, + { + "epoch": 0.8137795758244453, + "grad_norm": 1.913639443827136, + "learning_rate": 1.764331679144552e-06, + "loss": 0.6793, + "step": 26552 + }, + { + "epoch": 0.8138102243471864, + "grad_norm": 1.662113444119127, + "learning_rate": 1.7637686776593389e-06, + "loss": 0.6626, + "step": 26553 + }, + { + "epoch": 0.8138408728699277, + "grad_norm": 1.6501000143431241, + "learning_rate": 1.7632057573278195e-06, + "loss": 0.6911, + "step": 26554 + }, + { + "epoch": 0.8138715213926688, + "grad_norm": 1.5792459568219563, + "learning_rate": 1.7626429181555427e-06, + "loss": 0.7219, + "step": 26555 + }, + { + "epoch": 0.8139021699154101, + "grad_norm": 1.937047438112256, + "learning_rate": 1.762080160148052e-06, + "loss": 0.7116, + "step": 26556 + }, + { + "epoch": 0.8139328184381512, + "grad_norm": 1.6033662129768547, + "learning_rate": 1.7615174833108928e-06, + "loss": 0.6453, + "step": 26557 + }, + { + "epoch": 0.8139634669608925, + "grad_norm": 2.0785458316937695, + "learning_rate": 1.760954887649612e-06, + "loss": 0.7434, + "step": 26558 + }, + { + "epoch": 0.8139941154836337, + "grad_norm": 1.9599299156160772, + "learning_rate": 1.760392373169748e-06, + "loss": 0.6756, + "step": 26559 + }, + { + "epoch": 0.8140247640063749, + "grad_norm": 1.7463053150770897, + "learning_rate": 1.759829939876846e-06, + "loss": 0.6881, + "step": 26560 + }, + { + "epoch": 0.8140554125291161, + "grad_norm": 1.796590289167428, + "learning_rate": 1.7592675877764508e-06, + "loss": 0.6282, + "step": 26561 + }, + { + "epoch": 0.8140860610518573, + "grad_norm": 1.6120886001858188, + "learning_rate": 1.7587053168740986e-06, + "loss": 0.6177, + "step": 26562 + }, + { + "epoch": 0.8141167095745985, + "grad_norm": 1.6718235221271256, + "learning_rate": 1.7581431271753335e-06, + "loss": 0.7053, + "step": 26563 + }, + { + "epoch": 0.8141473580973397, + "grad_norm": 0.6826375603688901, + "learning_rate": 1.75758101868569e-06, + "loss": 0.511, + "step": 26564 + }, + { + "epoch": 0.8141780066200809, + "grad_norm": 1.7099087378217745, + "learning_rate": 1.7570189914107104e-06, + "loss": 0.5841, + "step": 26565 + }, + { + "epoch": 0.8142086551428221, + "grad_norm": 1.6099008970435897, + "learning_rate": 1.7564570453559338e-06, + "loss": 0.6996, + "step": 26566 + }, + { + "epoch": 0.8142393036655633, + "grad_norm": 1.6918671351458618, + "learning_rate": 1.7558951805268931e-06, + "loss": 0.7047, + "step": 26567 + }, + { + "epoch": 0.8142699521883046, + "grad_norm": 0.7120870359851281, + "learning_rate": 1.7553333969291265e-06, + "loss": 0.5279, + "step": 26568 + }, + { + "epoch": 0.8143006007110457, + "grad_norm": 1.7040016986789797, + "learning_rate": 1.7547716945681714e-06, + "loss": 0.6505, + "step": 26569 + }, + { + "epoch": 0.814331249233787, + "grad_norm": 1.707277601603113, + "learning_rate": 1.7542100734495582e-06, + "loss": 0.6508, + "step": 26570 + }, + { + "epoch": 0.8143618977565281, + "grad_norm": 1.8818904246300543, + "learning_rate": 1.7536485335788223e-06, + "loss": 0.6096, + "step": 26571 + }, + { + "epoch": 0.8143925462792694, + "grad_norm": 1.9804469696272404, + "learning_rate": 1.7530870749615002e-06, + "loss": 0.7083, + "step": 26572 + }, + { + "epoch": 0.8144231948020105, + "grad_norm": 0.6795479405046493, + "learning_rate": 1.7525256976031191e-06, + "loss": 0.579, + "step": 26573 + }, + { + "epoch": 0.8144538433247518, + "grad_norm": 1.6924813116997726, + "learning_rate": 1.7519644015092153e-06, + "loss": 0.5973, + "step": 26574 + }, + { + "epoch": 0.8144844918474929, + "grad_norm": 1.6561386669738565, + "learning_rate": 1.7514031866853132e-06, + "loss": 0.6282, + "step": 26575 + }, + { + "epoch": 0.8145151403702342, + "grad_norm": 0.6619882818278547, + "learning_rate": 1.7508420531369464e-06, + "loss": 0.4974, + "step": 26576 + }, + { + "epoch": 0.8145457888929754, + "grad_norm": 1.8471761197072147, + "learning_rate": 1.7502810008696459e-06, + "loss": 0.6772, + "step": 26577 + }, + { + "epoch": 0.8145764374157166, + "grad_norm": 1.7408712969337075, + "learning_rate": 1.749720029888935e-06, + "loss": 0.5466, + "step": 26578 + }, + { + "epoch": 0.8146070859384578, + "grad_norm": 1.7362189797538121, + "learning_rate": 1.7491591402003438e-06, + "loss": 0.6396, + "step": 26579 + }, + { + "epoch": 0.814637734461199, + "grad_norm": 1.7980936706684045, + "learning_rate": 1.7485983318094012e-06, + "loss": 0.5841, + "step": 26580 + }, + { + "epoch": 0.8146683829839402, + "grad_norm": 1.6466167573024644, + "learning_rate": 1.7480376047216275e-06, + "loss": 0.6476, + "step": 26581 + }, + { + "epoch": 0.8146990315066813, + "grad_norm": 0.6920521426313626, + "learning_rate": 1.747476958942551e-06, + "loss": 0.5127, + "step": 26582 + }, + { + "epoch": 0.8147296800294226, + "grad_norm": 0.6926465753723301, + "learning_rate": 1.746916394477698e-06, + "loss": 0.5041, + "step": 26583 + }, + { + "epoch": 0.8147603285521637, + "grad_norm": 1.5930943196282843, + "learning_rate": 1.7463559113325868e-06, + "loss": 0.7243, + "step": 26584 + }, + { + "epoch": 0.814790977074905, + "grad_norm": 1.6292726707114091, + "learning_rate": 1.7457955095127455e-06, + "loss": 0.5727, + "step": 26585 + }, + { + "epoch": 0.8148216255976461, + "grad_norm": 0.6799355066625293, + "learning_rate": 1.7452351890236897e-06, + "loss": 0.52, + "step": 26586 + }, + { + "epoch": 0.8148522741203874, + "grad_norm": 1.6030681499221988, + "learning_rate": 1.7446749498709437e-06, + "loss": 0.5975, + "step": 26587 + }, + { + "epoch": 0.8148829226431286, + "grad_norm": 1.4646541903870494, + "learning_rate": 1.744114792060031e-06, + "loss": 0.5268, + "step": 26588 + }, + { + "epoch": 0.8149135711658698, + "grad_norm": 1.7199880034178996, + "learning_rate": 1.743554715596465e-06, + "loss": 0.6793, + "step": 26589 + }, + { + "epoch": 0.814944219688611, + "grad_norm": 1.6480511086965333, + "learning_rate": 1.7429947204857655e-06, + "loss": 0.6478, + "step": 26590 + }, + { + "epoch": 0.8149748682113522, + "grad_norm": 1.6664222800521267, + "learning_rate": 1.7424348067334563e-06, + "loss": 0.6552, + "step": 26591 + }, + { + "epoch": 0.8150055167340934, + "grad_norm": 0.6673778864446055, + "learning_rate": 1.741874974345046e-06, + "loss": 0.4921, + "step": 26592 + }, + { + "epoch": 0.8150361652568346, + "grad_norm": 1.4036188452237024, + "learning_rate": 1.7413152233260567e-06, + "loss": 0.5656, + "step": 26593 + }, + { + "epoch": 0.8150668137795758, + "grad_norm": 1.675856720862738, + "learning_rate": 1.7407555536819997e-06, + "loss": 0.5879, + "step": 26594 + }, + { + "epoch": 0.815097462302317, + "grad_norm": 1.6008661881555195, + "learning_rate": 1.7401959654183908e-06, + "loss": 0.6807, + "step": 26595 + }, + { + "epoch": 0.8151281108250582, + "grad_norm": 1.5654455904917632, + "learning_rate": 1.7396364585407477e-06, + "loss": 0.5585, + "step": 26596 + }, + { + "epoch": 0.8151587593477995, + "grad_norm": 1.585284818697116, + "learning_rate": 1.7390770330545769e-06, + "loss": 0.6552, + "step": 26597 + }, + { + "epoch": 0.8151894078705406, + "grad_norm": 1.837590518933135, + "learning_rate": 1.7385176889653943e-06, + "loss": 0.7393, + "step": 26598 + }, + { + "epoch": 0.8152200563932819, + "grad_norm": 0.6827438289840654, + "learning_rate": 1.7379584262787131e-06, + "loss": 0.5227, + "step": 26599 + }, + { + "epoch": 0.815250704916023, + "grad_norm": 1.8456092704537612, + "learning_rate": 1.7373992450000387e-06, + "loss": 0.6246, + "step": 26600 + }, + { + "epoch": 0.8152813534387643, + "grad_norm": 1.6462419492849503, + "learning_rate": 1.7368401451348837e-06, + "loss": 0.695, + "step": 26601 + }, + { + "epoch": 0.8153120019615054, + "grad_norm": 1.597287026225543, + "learning_rate": 1.736281126688759e-06, + "loss": 0.5571, + "step": 26602 + }, + { + "epoch": 0.8153426504842467, + "grad_norm": 1.7243337473629754, + "learning_rate": 1.7357221896671694e-06, + "loss": 0.7257, + "step": 26603 + }, + { + "epoch": 0.8153732990069879, + "grad_norm": 1.7512611384768961, + "learning_rate": 1.7351633340756247e-06, + "loss": 0.6671, + "step": 26604 + }, + { + "epoch": 0.8154039475297291, + "grad_norm": 1.8200010641002577, + "learning_rate": 1.734604559919626e-06, + "loss": 0.6554, + "step": 26605 + }, + { + "epoch": 0.8154345960524703, + "grad_norm": 1.737282629986744, + "learning_rate": 1.734045867204689e-06, + "loss": 0.6367, + "step": 26606 + }, + { + "epoch": 0.8154652445752115, + "grad_norm": 1.5742186614563765, + "learning_rate": 1.7334872559363126e-06, + "loss": 0.6627, + "step": 26607 + }, + { + "epoch": 0.8154958930979527, + "grad_norm": 2.0337293251704986, + "learning_rate": 1.7329287261199979e-06, + "loss": 0.7806, + "step": 26608 + }, + { + "epoch": 0.8155265416206939, + "grad_norm": 0.6293802927916575, + "learning_rate": 1.7323702777612529e-06, + "loss": 0.4907, + "step": 26609 + }, + { + "epoch": 0.8155571901434351, + "grad_norm": 1.602454372498578, + "learning_rate": 1.7318119108655807e-06, + "loss": 0.6588, + "step": 26610 + }, + { + "epoch": 0.8155878386661763, + "grad_norm": 1.8465188469999654, + "learning_rate": 1.7312536254384794e-06, + "loss": 0.727, + "step": 26611 + }, + { + "epoch": 0.8156184871889175, + "grad_norm": 1.7806590244342084, + "learning_rate": 1.730695421485451e-06, + "loss": 0.6423, + "step": 26612 + }, + { + "epoch": 0.8156491357116586, + "grad_norm": 1.984656429315511, + "learning_rate": 1.7301372990119968e-06, + "loss": 0.7777, + "step": 26613 + }, + { + "epoch": 0.8156797842343999, + "grad_norm": 1.817196835218645, + "learning_rate": 1.729579258023618e-06, + "loss": 0.6035, + "step": 26614 + }, + { + "epoch": 0.8157104327571411, + "grad_norm": 1.5958994633240606, + "learning_rate": 1.7290212985258114e-06, + "loss": 0.5688, + "step": 26615 + }, + { + "epoch": 0.8157410812798823, + "grad_norm": 1.7011982101676553, + "learning_rate": 1.7284634205240692e-06, + "loss": 0.6389, + "step": 26616 + }, + { + "epoch": 0.8157717298026235, + "grad_norm": 1.552001232641861, + "learning_rate": 1.7279056240238978e-06, + "loss": 0.5826, + "step": 26617 + }, + { + "epoch": 0.8158023783253647, + "grad_norm": 1.8186732172725244, + "learning_rate": 1.7273479090307888e-06, + "loss": 0.6064, + "step": 26618 + }, + { + "epoch": 0.8158330268481059, + "grad_norm": 0.6710381389606737, + "learning_rate": 1.7267902755502353e-06, + "loss": 0.5389, + "step": 26619 + }, + { + "epoch": 0.8158636753708471, + "grad_norm": 1.662238062051277, + "learning_rate": 1.726232723587733e-06, + "loss": 0.5829, + "step": 26620 + }, + { + "epoch": 0.8158943238935883, + "grad_norm": 1.5919034590276087, + "learning_rate": 1.7256752531487796e-06, + "loss": 0.5925, + "step": 26621 + }, + { + "epoch": 0.8159249724163296, + "grad_norm": 1.7623341482243953, + "learning_rate": 1.7251178642388633e-06, + "loss": 0.621, + "step": 26622 + }, + { + "epoch": 0.8159556209390707, + "grad_norm": 1.600491457953816, + "learning_rate": 1.724560556863477e-06, + "loss": 0.6395, + "step": 26623 + }, + { + "epoch": 0.815986269461812, + "grad_norm": 1.8480455793455117, + "learning_rate": 1.7240033310281135e-06, + "loss": 0.6797, + "step": 26624 + }, + { + "epoch": 0.8160169179845531, + "grad_norm": 0.9740124863049923, + "learning_rate": 1.7234461867382658e-06, + "loss": 0.503, + "step": 26625 + }, + { + "epoch": 0.8160475665072944, + "grad_norm": 1.6617496209585458, + "learning_rate": 1.7228891239994193e-06, + "loss": 0.7204, + "step": 26626 + }, + { + "epoch": 0.8160782150300355, + "grad_norm": 1.670580925073925, + "learning_rate": 1.7223321428170591e-06, + "loss": 0.6153, + "step": 26627 + }, + { + "epoch": 0.8161088635527768, + "grad_norm": 1.748722864599218, + "learning_rate": 1.7217752431966839e-06, + "loss": 0.7302, + "step": 26628 + }, + { + "epoch": 0.8161395120755179, + "grad_norm": 1.833716948409012, + "learning_rate": 1.7212184251437747e-06, + "loss": 0.6259, + "step": 26629 + }, + { + "epoch": 0.8161701605982592, + "grad_norm": 1.66749140976549, + "learning_rate": 1.7206616886638162e-06, + "loss": 0.7449, + "step": 26630 + }, + { + "epoch": 0.8162008091210003, + "grad_norm": 1.7276193394934647, + "learning_rate": 1.720105033762297e-06, + "loss": 0.5914, + "step": 26631 + }, + { + "epoch": 0.8162314576437416, + "grad_norm": 0.6519995068091972, + "learning_rate": 1.719548460444701e-06, + "loss": 0.4885, + "step": 26632 + }, + { + "epoch": 0.8162621061664828, + "grad_norm": 1.6694892189838242, + "learning_rate": 1.7189919687165145e-06, + "loss": 0.6044, + "step": 26633 + }, + { + "epoch": 0.816292754689224, + "grad_norm": 1.8971281704763951, + "learning_rate": 1.7184355585832169e-06, + "loss": 0.6575, + "step": 26634 + }, + { + "epoch": 0.8163234032119652, + "grad_norm": 1.5548138404259302, + "learning_rate": 1.7178792300502934e-06, + "loss": 0.5915, + "step": 26635 + }, + { + "epoch": 0.8163540517347064, + "grad_norm": 1.6593419148406077, + "learning_rate": 1.7173229831232262e-06, + "loss": 0.7343, + "step": 26636 + }, + { + "epoch": 0.8163847002574476, + "grad_norm": 1.5172579187351753, + "learning_rate": 1.7167668178074958e-06, + "loss": 0.6373, + "step": 26637 + }, + { + "epoch": 0.8164153487801888, + "grad_norm": 1.5909039777615461, + "learning_rate": 1.7162107341085788e-06, + "loss": 0.5254, + "step": 26638 + }, + { + "epoch": 0.81644599730293, + "grad_norm": 1.77160766722775, + "learning_rate": 1.715654732031956e-06, + "loss": 0.6808, + "step": 26639 + }, + { + "epoch": 0.8164766458256713, + "grad_norm": 1.6912129978948338, + "learning_rate": 1.715098811583108e-06, + "loss": 0.6149, + "step": 26640 + }, + { + "epoch": 0.8165072943484124, + "grad_norm": 1.8660431464722038, + "learning_rate": 1.7145429727675134e-06, + "loss": 0.5855, + "step": 26641 + }, + { + "epoch": 0.8165379428711537, + "grad_norm": 1.7521722386073788, + "learning_rate": 1.7139872155906434e-06, + "loss": 0.6134, + "step": 26642 + }, + { + "epoch": 0.8165685913938948, + "grad_norm": 1.9298159374851525, + "learning_rate": 1.7134315400579782e-06, + "loss": 0.7311, + "step": 26643 + }, + { + "epoch": 0.816599239916636, + "grad_norm": 1.7015409098406085, + "learning_rate": 1.7128759461749944e-06, + "loss": 0.7059, + "step": 26644 + }, + { + "epoch": 0.8166298884393772, + "grad_norm": 1.686204691148446, + "learning_rate": 1.7123204339471643e-06, + "loss": 0.6742, + "step": 26645 + }, + { + "epoch": 0.8166605369621184, + "grad_norm": 0.6696787959531634, + "learning_rate": 1.711765003379957e-06, + "loss": 0.5224, + "step": 26646 + }, + { + "epoch": 0.8166911854848596, + "grad_norm": 1.6732993298737784, + "learning_rate": 1.7112096544788547e-06, + "loss": 0.643, + "step": 26647 + }, + { + "epoch": 0.8167218340076008, + "grad_norm": 1.5912936648365954, + "learning_rate": 1.7106543872493242e-06, + "loss": 0.6577, + "step": 26648 + }, + { + "epoch": 0.816752482530342, + "grad_norm": 1.5956086132204246, + "learning_rate": 1.7100992016968342e-06, + "loss": 0.5725, + "step": 26649 + }, + { + "epoch": 0.8167831310530832, + "grad_norm": 1.4933456990669598, + "learning_rate": 1.7095440978268573e-06, + "loss": 0.4892, + "step": 26650 + }, + { + "epoch": 0.8168137795758245, + "grad_norm": 1.8151152387560612, + "learning_rate": 1.7089890756448645e-06, + "loss": 0.7012, + "step": 26651 + }, + { + "epoch": 0.8168444280985656, + "grad_norm": 1.7409961211854212, + "learning_rate": 1.7084341351563261e-06, + "loss": 0.5731, + "step": 26652 + }, + { + "epoch": 0.8168750766213069, + "grad_norm": 0.6492130542345564, + "learning_rate": 1.7078792763667051e-06, + "loss": 0.5206, + "step": 26653 + }, + { + "epoch": 0.816905725144048, + "grad_norm": 0.6790546992228319, + "learning_rate": 1.7073244992814707e-06, + "loss": 0.5383, + "step": 26654 + }, + { + "epoch": 0.8169363736667893, + "grad_norm": 0.6745242646958677, + "learning_rate": 1.7067698039060931e-06, + "loss": 0.5384, + "step": 26655 + }, + { + "epoch": 0.8169670221895304, + "grad_norm": 1.678025769447618, + "learning_rate": 1.7062151902460344e-06, + "loss": 0.7136, + "step": 26656 + }, + { + "epoch": 0.8169976707122717, + "grad_norm": 1.5566738311660333, + "learning_rate": 1.7056606583067547e-06, + "loss": 0.6544, + "step": 26657 + }, + { + "epoch": 0.8170283192350128, + "grad_norm": 0.6624184387115882, + "learning_rate": 1.7051062080937264e-06, + "loss": 0.4841, + "step": 26658 + }, + { + "epoch": 0.8170589677577541, + "grad_norm": 1.7478136576745378, + "learning_rate": 1.7045518396124072e-06, + "loss": 0.7183, + "step": 26659 + }, + { + "epoch": 0.8170896162804953, + "grad_norm": 1.5407180055863363, + "learning_rate": 1.703997552868264e-06, + "loss": 0.6554, + "step": 26660 + }, + { + "epoch": 0.8171202648032365, + "grad_norm": 1.6569374897432732, + "learning_rate": 1.7034433478667534e-06, + "loss": 0.6072, + "step": 26661 + }, + { + "epoch": 0.8171509133259777, + "grad_norm": 0.6629523951113346, + "learning_rate": 1.7028892246133377e-06, + "loss": 0.5309, + "step": 26662 + }, + { + "epoch": 0.8171815618487189, + "grad_norm": 1.7679712906196852, + "learning_rate": 1.7023351831134804e-06, + "loss": 0.654, + "step": 26663 + }, + { + "epoch": 0.8172122103714601, + "grad_norm": 1.4896730636518933, + "learning_rate": 1.7017812233726339e-06, + "loss": 0.5652, + "step": 26664 + }, + { + "epoch": 0.8172428588942013, + "grad_norm": 1.7866067390263949, + "learning_rate": 1.7012273453962614e-06, + "loss": 0.6439, + "step": 26665 + }, + { + "epoch": 0.8172735074169425, + "grad_norm": 2.073407696959595, + "learning_rate": 1.7006735491898207e-06, + "loss": 0.5617, + "step": 26666 + }, + { + "epoch": 0.8173041559396838, + "grad_norm": 1.4858671436792594, + "learning_rate": 1.7001198347587655e-06, + "loss": 0.5796, + "step": 26667 + }, + { + "epoch": 0.8173348044624249, + "grad_norm": 1.703116942821589, + "learning_rate": 1.6995662021085524e-06, + "loss": 0.6798, + "step": 26668 + }, + { + "epoch": 0.8173654529851662, + "grad_norm": 1.682959147261958, + "learning_rate": 1.6990126512446403e-06, + "loss": 0.6269, + "step": 26669 + }, + { + "epoch": 0.8173961015079073, + "grad_norm": 1.5470223898460225, + "learning_rate": 1.6984591821724772e-06, + "loss": 0.6484, + "step": 26670 + }, + { + "epoch": 0.8174267500306486, + "grad_norm": 1.6024263308962374, + "learning_rate": 1.697905794897523e-06, + "loss": 0.5844, + "step": 26671 + }, + { + "epoch": 0.8174573985533897, + "grad_norm": 1.7329983500274637, + "learning_rate": 1.6973524894252247e-06, + "loss": 0.7066, + "step": 26672 + }, + { + "epoch": 0.817488047076131, + "grad_norm": 1.5615227749018286, + "learning_rate": 1.6967992657610366e-06, + "loss": 0.6231, + "step": 26673 + }, + { + "epoch": 0.8175186955988721, + "grad_norm": 0.6703242668826795, + "learning_rate": 1.6962461239104123e-06, + "loss": 0.5527, + "step": 26674 + }, + { + "epoch": 0.8175493441216133, + "grad_norm": 1.8649402597323577, + "learning_rate": 1.6956930638787972e-06, + "loss": 0.5785, + "step": 26675 + }, + { + "epoch": 0.8175799926443545, + "grad_norm": 1.609367921188314, + "learning_rate": 1.6951400856716426e-06, + "loss": 0.5923, + "step": 26676 + }, + { + "epoch": 0.8176106411670957, + "grad_norm": 1.8382711528156215, + "learning_rate": 1.6945871892944e-06, + "loss": 0.6935, + "step": 26677 + }, + { + "epoch": 0.817641289689837, + "grad_norm": 1.9040992306379438, + "learning_rate": 1.6940343747525123e-06, + "loss": 0.7065, + "step": 26678 + }, + { + "epoch": 0.8176719382125781, + "grad_norm": 1.5826080213843488, + "learning_rate": 1.69348164205143e-06, + "loss": 0.5766, + "step": 26679 + }, + { + "epoch": 0.8177025867353194, + "grad_norm": 1.7719306597314464, + "learning_rate": 1.6929289911966007e-06, + "loss": 0.6884, + "step": 26680 + }, + { + "epoch": 0.8177332352580605, + "grad_norm": 1.7849166165517105, + "learning_rate": 1.6923764221934646e-06, + "loss": 0.5666, + "step": 26681 + }, + { + "epoch": 0.8177638837808018, + "grad_norm": 1.6745577283588549, + "learning_rate": 1.6918239350474708e-06, + "loss": 0.572, + "step": 26682 + }, + { + "epoch": 0.8177945323035429, + "grad_norm": 1.6459200571190098, + "learning_rate": 1.6912715297640603e-06, + "loss": 0.6162, + "step": 26683 + }, + { + "epoch": 0.8178251808262842, + "grad_norm": 1.9699356058752369, + "learning_rate": 1.6907192063486777e-06, + "loss": 0.6126, + "step": 26684 + }, + { + "epoch": 0.8178558293490253, + "grad_norm": 1.7870357184195986, + "learning_rate": 1.6901669648067664e-06, + "loss": 0.7229, + "step": 26685 + }, + { + "epoch": 0.8178864778717666, + "grad_norm": 1.5920862584660087, + "learning_rate": 1.6896148051437632e-06, + "loss": 0.6862, + "step": 26686 + }, + { + "epoch": 0.8179171263945078, + "grad_norm": 0.6777856741576771, + "learning_rate": 1.6890627273651128e-06, + "loss": 0.5032, + "step": 26687 + }, + { + "epoch": 0.817947774917249, + "grad_norm": 0.7068131346765077, + "learning_rate": 1.688510731476255e-06, + "loss": 0.5263, + "step": 26688 + }, + { + "epoch": 0.8179784234399902, + "grad_norm": 1.6800009349024125, + "learning_rate": 1.6879588174826266e-06, + "loss": 0.5943, + "step": 26689 + }, + { + "epoch": 0.8180090719627314, + "grad_norm": 1.7645477072518447, + "learning_rate": 1.687406985389668e-06, + "loss": 0.5834, + "step": 26690 + }, + { + "epoch": 0.8180397204854726, + "grad_norm": 1.9537509385218528, + "learning_rate": 1.6868552352028134e-06, + "loss": 0.752, + "step": 26691 + }, + { + "epoch": 0.8180703690082138, + "grad_norm": 1.6820325990050977, + "learning_rate": 1.6863035669275007e-06, + "loss": 0.6388, + "step": 26692 + }, + { + "epoch": 0.818101017530955, + "grad_norm": 1.7445368969452224, + "learning_rate": 1.6857519805691692e-06, + "loss": 0.6713, + "step": 26693 + }, + { + "epoch": 0.8181316660536962, + "grad_norm": 1.7401293968904599, + "learning_rate": 1.6852004761332474e-06, + "loss": 0.6025, + "step": 26694 + }, + { + "epoch": 0.8181623145764374, + "grad_norm": 1.9361305222841036, + "learning_rate": 1.6846490536251725e-06, + "loss": 0.7267, + "step": 26695 + }, + { + "epoch": 0.8181929630991787, + "grad_norm": 1.6125702357623775, + "learning_rate": 1.6840977130503821e-06, + "loss": 0.6744, + "step": 26696 + }, + { + "epoch": 0.8182236116219198, + "grad_norm": 1.4767935035676083, + "learning_rate": 1.683546454414301e-06, + "loss": 0.5826, + "step": 26697 + }, + { + "epoch": 0.8182542601446611, + "grad_norm": 1.7205692013646419, + "learning_rate": 1.6829952777223647e-06, + "loss": 0.7219, + "step": 26698 + }, + { + "epoch": 0.8182849086674022, + "grad_norm": 1.4851632827715253, + "learning_rate": 1.6824441829800065e-06, + "loss": 0.6557, + "step": 26699 + }, + { + "epoch": 0.8183155571901435, + "grad_norm": 1.6835297861102754, + "learning_rate": 1.681893170192651e-06, + "loss": 0.6447, + "step": 26700 + }, + { + "epoch": 0.8183462057128846, + "grad_norm": 1.682735538825629, + "learning_rate": 1.6813422393657341e-06, + "loss": 0.6328, + "step": 26701 + }, + { + "epoch": 0.8183768542356259, + "grad_norm": 1.6486120808548936, + "learning_rate": 1.6807913905046768e-06, + "loss": 0.591, + "step": 26702 + }, + { + "epoch": 0.818407502758367, + "grad_norm": 1.6093098049827101, + "learning_rate": 1.6802406236149115e-06, + "loss": 0.6438, + "step": 26703 + }, + { + "epoch": 0.8184381512811083, + "grad_norm": 1.864993139516703, + "learning_rate": 1.679689938701865e-06, + "loss": 0.6675, + "step": 26704 + }, + { + "epoch": 0.8184687998038495, + "grad_norm": 1.7142042424348745, + "learning_rate": 1.6791393357709618e-06, + "loss": 0.5112, + "step": 26705 + }, + { + "epoch": 0.8184994483265906, + "grad_norm": 1.5291310171031411, + "learning_rate": 1.6785888148276263e-06, + "loss": 0.6042, + "step": 26706 + }, + { + "epoch": 0.8185300968493319, + "grad_norm": 1.978157072627003, + "learning_rate": 1.6780383758772877e-06, + "loss": 0.6083, + "step": 26707 + }, + { + "epoch": 0.818560745372073, + "grad_norm": 2.119816836351463, + "learning_rate": 1.677488018925363e-06, + "loss": 0.6769, + "step": 26708 + }, + { + "epoch": 0.8185913938948143, + "grad_norm": 1.4519282213936302, + "learning_rate": 1.6769377439772782e-06, + "loss": 0.6309, + "step": 26709 + }, + { + "epoch": 0.8186220424175554, + "grad_norm": 1.704418200976234, + "learning_rate": 1.6763875510384587e-06, + "loss": 0.5876, + "step": 26710 + }, + { + "epoch": 0.8186526909402967, + "grad_norm": 1.727089689189109, + "learning_rate": 1.6758374401143196e-06, + "loss": 0.6076, + "step": 26711 + }, + { + "epoch": 0.8186833394630378, + "grad_norm": 1.6146664861734659, + "learning_rate": 1.6752874112102857e-06, + "loss": 0.5494, + "step": 26712 + }, + { + "epoch": 0.8187139879857791, + "grad_norm": 0.6643602711581501, + "learning_rate": 1.6747374643317705e-06, + "loss": 0.4993, + "step": 26713 + }, + { + "epoch": 0.8187446365085203, + "grad_norm": 1.8624779295261873, + "learning_rate": 1.6741875994842028e-06, + "loss": 0.6837, + "step": 26714 + }, + { + "epoch": 0.8187752850312615, + "grad_norm": 0.6589492024230726, + "learning_rate": 1.6736378166729938e-06, + "loss": 0.5021, + "step": 26715 + }, + { + "epoch": 0.8188059335540027, + "grad_norm": 1.812188693316912, + "learning_rate": 1.6730881159035606e-06, + "loss": 0.731, + "step": 26716 + }, + { + "epoch": 0.8188365820767439, + "grad_norm": 0.6641905516901849, + "learning_rate": 1.6725384971813198e-06, + "loss": 0.5462, + "step": 26717 + }, + { + "epoch": 0.8188672305994851, + "grad_norm": 1.610624618351187, + "learning_rate": 1.67198896051169e-06, + "loss": 0.612, + "step": 26718 + }, + { + "epoch": 0.8188978791222263, + "grad_norm": 0.6972520983431424, + "learning_rate": 1.671439505900082e-06, + "loss": 0.521, + "step": 26719 + }, + { + "epoch": 0.8189285276449675, + "grad_norm": 1.8291906900982278, + "learning_rate": 1.6708901333519111e-06, + "loss": 0.6687, + "step": 26720 + }, + { + "epoch": 0.8189591761677087, + "grad_norm": 1.5884638984574202, + "learning_rate": 1.670340842872591e-06, + "loss": 0.6824, + "step": 26721 + }, + { + "epoch": 0.8189898246904499, + "grad_norm": 1.8064750705995667, + "learning_rate": 1.6697916344675368e-06, + "loss": 0.6924, + "step": 26722 + }, + { + "epoch": 0.8190204732131912, + "grad_norm": 0.6370189916700769, + "learning_rate": 1.669242508142156e-06, + "loss": 0.5296, + "step": 26723 + }, + { + "epoch": 0.8190511217359323, + "grad_norm": 1.6944332068559917, + "learning_rate": 1.668693463901856e-06, + "loss": 0.5862, + "step": 26724 + }, + { + "epoch": 0.8190817702586736, + "grad_norm": 0.680843879992619, + "learning_rate": 1.668144501752056e-06, + "loss": 0.5239, + "step": 26725 + }, + { + "epoch": 0.8191124187814147, + "grad_norm": 1.6229854233299643, + "learning_rate": 1.6675956216981593e-06, + "loss": 0.6253, + "step": 26726 + }, + { + "epoch": 0.819143067304156, + "grad_norm": 1.7626418266784976, + "learning_rate": 1.6670468237455728e-06, + "loss": 0.5937, + "step": 26727 + }, + { + "epoch": 0.8191737158268971, + "grad_norm": 0.653512975192971, + "learning_rate": 1.6664981078997066e-06, + "loss": 0.5217, + "step": 26728 + }, + { + "epoch": 0.8192043643496384, + "grad_norm": 1.5757923740028743, + "learning_rate": 1.6659494741659688e-06, + "loss": 0.6235, + "step": 26729 + }, + { + "epoch": 0.8192350128723795, + "grad_norm": 1.5570160350407019, + "learning_rate": 1.6654009225497603e-06, + "loss": 0.6111, + "step": 26730 + }, + { + "epoch": 0.8192656613951208, + "grad_norm": 1.5248795706418805, + "learning_rate": 1.6648524530564892e-06, + "loss": 0.6086, + "step": 26731 + }, + { + "epoch": 0.819296309917862, + "grad_norm": 1.5798246435899623, + "learning_rate": 1.66430406569156e-06, + "loss": 0.6374, + "step": 26732 + }, + { + "epoch": 0.8193269584406032, + "grad_norm": 1.6208475536203095, + "learning_rate": 1.6637557604603782e-06, + "loss": 0.6452, + "step": 26733 + }, + { + "epoch": 0.8193576069633444, + "grad_norm": 1.8701242900668253, + "learning_rate": 1.6632075373683432e-06, + "loss": 0.5703, + "step": 26734 + }, + { + "epoch": 0.8193882554860856, + "grad_norm": 1.7775114968014003, + "learning_rate": 1.6626593964208547e-06, + "loss": 0.6635, + "step": 26735 + }, + { + "epoch": 0.8194189040088268, + "grad_norm": 0.6564850334694502, + "learning_rate": 1.6621113376233166e-06, + "loss": 0.5031, + "step": 26736 + }, + { + "epoch": 0.8194495525315679, + "grad_norm": 0.6819153913917266, + "learning_rate": 1.6615633609811322e-06, + "loss": 0.5386, + "step": 26737 + }, + { + "epoch": 0.8194802010543092, + "grad_norm": 1.816671762560344, + "learning_rate": 1.6610154664996936e-06, + "loss": 0.5938, + "step": 26738 + }, + { + "epoch": 0.8195108495770503, + "grad_norm": 1.5654034721781185, + "learning_rate": 1.6604676541844044e-06, + "loss": 0.65, + "step": 26739 + }, + { + "epoch": 0.8195414980997916, + "grad_norm": 1.6620639052867645, + "learning_rate": 1.6599199240406606e-06, + "loss": 0.6256, + "step": 26740 + }, + { + "epoch": 0.8195721466225327, + "grad_norm": 1.498718975808323, + "learning_rate": 1.6593722760738617e-06, + "loss": 0.7123, + "step": 26741 + }, + { + "epoch": 0.819602795145274, + "grad_norm": 1.5451377873720755, + "learning_rate": 1.6588247102894027e-06, + "loss": 0.614, + "step": 26742 + }, + { + "epoch": 0.8196334436680152, + "grad_norm": 1.751525745068671, + "learning_rate": 1.6582772266926727e-06, + "loss": 0.6097, + "step": 26743 + }, + { + "epoch": 0.8196640921907564, + "grad_norm": 1.7326957294906609, + "learning_rate": 1.6577298252890762e-06, + "loss": 0.5987, + "step": 26744 + }, + { + "epoch": 0.8196947407134976, + "grad_norm": 1.6672808648903823, + "learning_rate": 1.657182506084003e-06, + "loss": 0.6784, + "step": 26745 + }, + { + "epoch": 0.8197253892362388, + "grad_norm": 1.584442512891522, + "learning_rate": 1.6566352690828425e-06, + "loss": 0.659, + "step": 26746 + }, + { + "epoch": 0.81975603775898, + "grad_norm": 0.6785725119890448, + "learning_rate": 1.656088114290989e-06, + "loss": 0.5216, + "step": 26747 + }, + { + "epoch": 0.8197866862817212, + "grad_norm": 1.6701176943659184, + "learning_rate": 1.6555410417138361e-06, + "loss": 0.5458, + "step": 26748 + }, + { + "epoch": 0.8198173348044624, + "grad_norm": 0.6494095145181563, + "learning_rate": 1.6549940513567709e-06, + "loss": 0.4939, + "step": 26749 + }, + { + "epoch": 0.8198479833272037, + "grad_norm": 1.574704878024234, + "learning_rate": 1.6544471432251841e-06, + "loss": 0.6182, + "step": 26750 + }, + { + "epoch": 0.8198786318499448, + "grad_norm": 1.7125505034653534, + "learning_rate": 1.653900317324465e-06, + "loss": 0.6516, + "step": 26751 + }, + { + "epoch": 0.8199092803726861, + "grad_norm": 1.4809185808073047, + "learning_rate": 1.6533535736600038e-06, + "loss": 0.6264, + "step": 26752 + }, + { + "epoch": 0.8199399288954272, + "grad_norm": 2.0476954432110777, + "learning_rate": 1.6528069122371849e-06, + "loss": 0.6743, + "step": 26753 + }, + { + "epoch": 0.8199705774181685, + "grad_norm": 1.8031322781580181, + "learning_rate": 1.6522603330613917e-06, + "loss": 0.6684, + "step": 26754 + }, + { + "epoch": 0.8200012259409096, + "grad_norm": 1.8882850168338068, + "learning_rate": 1.651713836138017e-06, + "loss": 0.7161, + "step": 26755 + }, + { + "epoch": 0.8200318744636509, + "grad_norm": 1.5710480467366432, + "learning_rate": 1.6511674214724426e-06, + "loss": 0.652, + "step": 26756 + }, + { + "epoch": 0.820062522986392, + "grad_norm": 1.8774375117018132, + "learning_rate": 1.650621089070049e-06, + "loss": 0.7009, + "step": 26757 + }, + { + "epoch": 0.8200931715091333, + "grad_norm": 1.7828691363476143, + "learning_rate": 1.650074838936222e-06, + "loss": 0.6713, + "step": 26758 + }, + { + "epoch": 0.8201238200318745, + "grad_norm": 1.6117534312847888, + "learning_rate": 1.6495286710763437e-06, + "loss": 0.5854, + "step": 26759 + }, + { + "epoch": 0.8201544685546157, + "grad_norm": 1.6452737993466788, + "learning_rate": 1.6489825854957985e-06, + "loss": 0.6339, + "step": 26760 + }, + { + "epoch": 0.8201851170773569, + "grad_norm": 1.573259941125196, + "learning_rate": 1.6484365821999626e-06, + "loss": 0.606, + "step": 26761 + }, + { + "epoch": 0.8202157656000981, + "grad_norm": 1.8512331147691286, + "learning_rate": 1.6478906611942181e-06, + "loss": 0.6142, + "step": 26762 + }, + { + "epoch": 0.8202464141228393, + "grad_norm": 1.4866633127614146, + "learning_rate": 1.6473448224839462e-06, + "loss": 0.7352, + "step": 26763 + }, + { + "epoch": 0.8202770626455805, + "grad_norm": 1.696312250859224, + "learning_rate": 1.6467990660745226e-06, + "loss": 0.6407, + "step": 26764 + }, + { + "epoch": 0.8203077111683217, + "grad_norm": 1.5478629259665662, + "learning_rate": 1.6462533919713198e-06, + "loss": 0.6312, + "step": 26765 + }, + { + "epoch": 0.820338359691063, + "grad_norm": 1.7510011668065535, + "learning_rate": 1.6457078001797255e-06, + "loss": 0.6668, + "step": 26766 + }, + { + "epoch": 0.8203690082138041, + "grad_norm": 1.9430067966284648, + "learning_rate": 1.6451622907051068e-06, + "loss": 0.6782, + "step": 26767 + }, + { + "epoch": 0.8203996567365452, + "grad_norm": 1.6522724763570447, + "learning_rate": 1.6446168635528438e-06, + "loss": 0.5905, + "step": 26768 + }, + { + "epoch": 0.8204303052592865, + "grad_norm": 1.7121400474456279, + "learning_rate": 1.6440715187283063e-06, + "loss": 0.6968, + "step": 26769 + }, + { + "epoch": 0.8204609537820277, + "grad_norm": 1.562451025494179, + "learning_rate": 1.6435262562368704e-06, + "loss": 0.7175, + "step": 26770 + }, + { + "epoch": 0.8204916023047689, + "grad_norm": 1.5408243084217885, + "learning_rate": 1.6429810760839115e-06, + "loss": 0.6187, + "step": 26771 + }, + { + "epoch": 0.8205222508275101, + "grad_norm": 1.785503549354846, + "learning_rate": 1.6424359782747957e-06, + "loss": 0.6649, + "step": 26772 + }, + { + "epoch": 0.8205528993502513, + "grad_norm": 1.7416275643331465, + "learning_rate": 1.641890962814896e-06, + "loss": 0.5881, + "step": 26773 + }, + { + "epoch": 0.8205835478729925, + "grad_norm": 1.7673804754147007, + "learning_rate": 1.6413460297095852e-06, + "loss": 0.6693, + "step": 26774 + }, + { + "epoch": 0.8206141963957337, + "grad_norm": 1.690812149567057, + "learning_rate": 1.6408011789642308e-06, + "loss": 0.5431, + "step": 26775 + }, + { + "epoch": 0.8206448449184749, + "grad_norm": 1.9920702462173001, + "learning_rate": 1.6402564105841968e-06, + "loss": 0.739, + "step": 26776 + }, + { + "epoch": 0.8206754934412162, + "grad_norm": 0.6967612040586998, + "learning_rate": 1.6397117245748606e-06, + "loss": 0.5498, + "step": 26777 + }, + { + "epoch": 0.8207061419639573, + "grad_norm": 1.6474181788850628, + "learning_rate": 1.6391671209415805e-06, + "loss": 0.6545, + "step": 26778 + }, + { + "epoch": 0.8207367904866986, + "grad_norm": 1.5941935014780677, + "learning_rate": 1.6386225996897288e-06, + "loss": 0.6556, + "step": 26779 + }, + { + "epoch": 0.8207674390094397, + "grad_norm": 1.578520295410758, + "learning_rate": 1.6380781608246654e-06, + "loss": 0.623, + "step": 26780 + }, + { + "epoch": 0.820798087532181, + "grad_norm": 2.0704903973343947, + "learning_rate": 1.6375338043517575e-06, + "loss": 0.6372, + "step": 26781 + }, + { + "epoch": 0.8208287360549221, + "grad_norm": 1.8114897521370357, + "learning_rate": 1.6369895302763706e-06, + "loss": 0.5066, + "step": 26782 + }, + { + "epoch": 0.8208593845776634, + "grad_norm": 1.623708867750081, + "learning_rate": 1.6364453386038636e-06, + "loss": 0.6438, + "step": 26783 + }, + { + "epoch": 0.8208900331004045, + "grad_norm": 1.7415381595675028, + "learning_rate": 1.6359012293396015e-06, + "loss": 0.7082, + "step": 26784 + }, + { + "epoch": 0.8209206816231458, + "grad_norm": 1.7225466503558375, + "learning_rate": 1.6353572024889453e-06, + "loss": 0.614, + "step": 26785 + }, + { + "epoch": 0.820951330145887, + "grad_norm": 1.7121383986628893, + "learning_rate": 1.634813258057254e-06, + "loss": 0.5985, + "step": 26786 + }, + { + "epoch": 0.8209819786686282, + "grad_norm": 1.9454054582445006, + "learning_rate": 1.634269396049889e-06, + "loss": 0.6395, + "step": 26787 + }, + { + "epoch": 0.8210126271913694, + "grad_norm": 1.7738119549185507, + "learning_rate": 1.633725616472207e-06, + "loss": 0.6292, + "step": 26788 + }, + { + "epoch": 0.8210432757141106, + "grad_norm": 1.6173207938820435, + "learning_rate": 1.6331819193295662e-06, + "loss": 0.6421, + "step": 26789 + }, + { + "epoch": 0.8210739242368518, + "grad_norm": 1.7555920302537744, + "learning_rate": 1.6326383046273275e-06, + "loss": 0.5815, + "step": 26790 + }, + { + "epoch": 0.821104572759593, + "grad_norm": 1.8023469603945559, + "learning_rate": 1.6320947723708413e-06, + "loss": 0.5597, + "step": 26791 + }, + { + "epoch": 0.8211352212823342, + "grad_norm": 1.7001971696905431, + "learning_rate": 1.6315513225654667e-06, + "loss": 0.6102, + "step": 26792 + }, + { + "epoch": 0.8211658698050754, + "grad_norm": 1.6309436069685779, + "learning_rate": 1.6310079552165614e-06, + "loss": 0.6431, + "step": 26793 + }, + { + "epoch": 0.8211965183278166, + "grad_norm": 1.7534289992998948, + "learning_rate": 1.6304646703294724e-06, + "loss": 0.5944, + "step": 26794 + }, + { + "epoch": 0.8212271668505579, + "grad_norm": 1.6745718822568345, + "learning_rate": 1.6299214679095576e-06, + "loss": 0.485, + "step": 26795 + }, + { + "epoch": 0.821257815373299, + "grad_norm": 1.6920747303661248, + "learning_rate": 1.6293783479621694e-06, + "loss": 0.6064, + "step": 26796 + }, + { + "epoch": 0.8212884638960403, + "grad_norm": 1.834490863570255, + "learning_rate": 1.628835310492657e-06, + "loss": 0.6775, + "step": 26797 + }, + { + "epoch": 0.8213191124187814, + "grad_norm": 1.646078157317512, + "learning_rate": 1.6282923555063735e-06, + "loss": 0.586, + "step": 26798 + }, + { + "epoch": 0.8213497609415226, + "grad_norm": 1.6754501948390146, + "learning_rate": 1.6277494830086649e-06, + "loss": 0.6008, + "step": 26799 + }, + { + "epoch": 0.8213804094642638, + "grad_norm": 1.7425142512638483, + "learning_rate": 1.6272066930048835e-06, + "loss": 0.5785, + "step": 26800 + }, + { + "epoch": 0.821411057987005, + "grad_norm": 1.664163026624291, + "learning_rate": 1.6266639855003785e-06, + "loss": 0.5456, + "step": 26801 + }, + { + "epoch": 0.8214417065097462, + "grad_norm": 1.8198469196621943, + "learning_rate": 1.6261213605004933e-06, + "loss": 0.581, + "step": 26802 + }, + { + "epoch": 0.8214723550324874, + "grad_norm": 1.5729375161081525, + "learning_rate": 1.6255788180105769e-06, + "loss": 0.5546, + "step": 26803 + }, + { + "epoch": 0.8215030035552287, + "grad_norm": 1.5308107302028637, + "learning_rate": 1.6250363580359784e-06, + "loss": 0.6167, + "step": 26804 + }, + { + "epoch": 0.8215336520779698, + "grad_norm": 1.615293199900421, + "learning_rate": 1.624493980582036e-06, + "loss": 0.5761, + "step": 26805 + }, + { + "epoch": 0.8215643006007111, + "grad_norm": 1.6737708553049648, + "learning_rate": 1.6239516856540981e-06, + "loss": 0.5915, + "step": 26806 + }, + { + "epoch": 0.8215949491234522, + "grad_norm": 1.5500413523053145, + "learning_rate": 1.623409473257509e-06, + "loss": 0.577, + "step": 26807 + }, + { + "epoch": 0.8216255976461935, + "grad_norm": 1.7862458999358968, + "learning_rate": 1.6228673433976082e-06, + "loss": 0.6461, + "step": 26808 + }, + { + "epoch": 0.8216562461689346, + "grad_norm": 1.5628156658336707, + "learning_rate": 1.622325296079741e-06, + "loss": 0.5442, + "step": 26809 + }, + { + "epoch": 0.8216868946916759, + "grad_norm": 0.6563294994348949, + "learning_rate": 1.6217833313092435e-06, + "loss": 0.5109, + "step": 26810 + }, + { + "epoch": 0.821717543214417, + "grad_norm": 1.615590382350074, + "learning_rate": 1.6212414490914585e-06, + "loss": 0.6431, + "step": 26811 + }, + { + "epoch": 0.8217481917371583, + "grad_norm": 0.6708457073063666, + "learning_rate": 1.6206996494317273e-06, + "loss": 0.5156, + "step": 26812 + }, + { + "epoch": 0.8217788402598994, + "grad_norm": 1.8191970957750943, + "learning_rate": 1.6201579323353844e-06, + "loss": 0.5774, + "step": 26813 + }, + { + "epoch": 0.8218094887826407, + "grad_norm": 1.8709033020894958, + "learning_rate": 1.619616297807769e-06, + "loss": 0.7007, + "step": 26814 + }, + { + "epoch": 0.8218401373053819, + "grad_norm": 1.8539356552408455, + "learning_rate": 1.6190747458542222e-06, + "loss": 0.6925, + "step": 26815 + }, + { + "epoch": 0.8218707858281231, + "grad_norm": 1.8251679630670796, + "learning_rate": 1.618533276480072e-06, + "loss": 0.6281, + "step": 26816 + }, + { + "epoch": 0.8219014343508643, + "grad_norm": 1.7617259471289448, + "learning_rate": 1.61799188969066e-06, + "loss": 0.6504, + "step": 26817 + }, + { + "epoch": 0.8219320828736055, + "grad_norm": 1.6337209654609266, + "learning_rate": 1.617450585491319e-06, + "loss": 0.6797, + "step": 26818 + }, + { + "epoch": 0.8219627313963467, + "grad_norm": 1.9296362941593506, + "learning_rate": 1.6169093638873813e-06, + "loss": 0.5992, + "step": 26819 + }, + { + "epoch": 0.8219933799190879, + "grad_norm": 1.5994301006442544, + "learning_rate": 1.6163682248841817e-06, + "loss": 0.6509, + "step": 26820 + }, + { + "epoch": 0.8220240284418291, + "grad_norm": 1.6025685562139815, + "learning_rate": 1.6158271684870464e-06, + "loss": 0.6175, + "step": 26821 + }, + { + "epoch": 0.8220546769645704, + "grad_norm": 1.7531471247112385, + "learning_rate": 1.6152861947013165e-06, + "loss": 0.6138, + "step": 26822 + }, + { + "epoch": 0.8220853254873115, + "grad_norm": 1.7918469377294497, + "learning_rate": 1.6147453035323169e-06, + "loss": 0.6339, + "step": 26823 + }, + { + "epoch": 0.8221159740100528, + "grad_norm": 1.7315969537244298, + "learning_rate": 1.6142044949853752e-06, + "loss": 0.6849, + "step": 26824 + }, + { + "epoch": 0.8221466225327939, + "grad_norm": 0.6537371561970459, + "learning_rate": 1.613663769065822e-06, + "loss": 0.5245, + "step": 26825 + }, + { + "epoch": 0.8221772710555352, + "grad_norm": 1.6806387104425824, + "learning_rate": 1.613123125778987e-06, + "loss": 0.6812, + "step": 26826 + }, + { + "epoch": 0.8222079195782763, + "grad_norm": 1.7402775305075056, + "learning_rate": 1.612582565130194e-06, + "loss": 0.6833, + "step": 26827 + }, + { + "epoch": 0.8222385681010176, + "grad_norm": 1.544925468601538, + "learning_rate": 1.612042087124771e-06, + "loss": 0.7016, + "step": 26828 + }, + { + "epoch": 0.8222692166237587, + "grad_norm": 1.839793324191391, + "learning_rate": 1.611501691768046e-06, + "loss": 0.6527, + "step": 26829 + }, + { + "epoch": 0.8222998651464999, + "grad_norm": 0.6762155349773374, + "learning_rate": 1.610961379065339e-06, + "loss": 0.5049, + "step": 26830 + }, + { + "epoch": 0.8223305136692411, + "grad_norm": 1.7272178994549068, + "learning_rate": 1.6104211490219778e-06, + "loss": 0.6986, + "step": 26831 + }, + { + "epoch": 0.8223611621919823, + "grad_norm": 1.7641016845373931, + "learning_rate": 1.609881001643281e-06, + "loss": 0.6707, + "step": 26832 + }, + { + "epoch": 0.8223918107147236, + "grad_norm": 1.6211886287601707, + "learning_rate": 1.6093409369345736e-06, + "loss": 0.6578, + "step": 26833 + }, + { + "epoch": 0.8224224592374647, + "grad_norm": 1.6315099566516629, + "learning_rate": 1.6088009549011796e-06, + "loss": 0.5816, + "step": 26834 + }, + { + "epoch": 0.822453107760206, + "grad_norm": 1.6295994327819892, + "learning_rate": 1.6082610555484146e-06, + "loss": 0.5633, + "step": 26835 + }, + { + "epoch": 0.8224837562829471, + "grad_norm": 1.5267018185645758, + "learning_rate": 1.6077212388816e-06, + "loss": 0.6172, + "step": 26836 + }, + { + "epoch": 0.8225144048056884, + "grad_norm": 0.6680937874229477, + "learning_rate": 1.6071815049060579e-06, + "loss": 0.5199, + "step": 26837 + }, + { + "epoch": 0.8225450533284295, + "grad_norm": 1.7507765250317577, + "learning_rate": 1.6066418536271012e-06, + "loss": 0.5941, + "step": 26838 + }, + { + "epoch": 0.8225757018511708, + "grad_norm": 1.6832047224691122, + "learning_rate": 1.606102285050052e-06, + "loss": 0.6609, + "step": 26839 + }, + { + "epoch": 0.8226063503739119, + "grad_norm": 0.6716802354529439, + "learning_rate": 1.6055627991802202e-06, + "loss": 0.5082, + "step": 26840 + }, + { + "epoch": 0.8226369988966532, + "grad_norm": 1.7585077127928785, + "learning_rate": 1.6050233960229311e-06, + "loss": 0.6727, + "step": 26841 + }, + { + "epoch": 0.8226676474193944, + "grad_norm": 1.6242155008502743, + "learning_rate": 1.6044840755834935e-06, + "loss": 0.6231, + "step": 26842 + }, + { + "epoch": 0.8226982959421356, + "grad_norm": 1.7449222896144665, + "learning_rate": 1.6039448378672206e-06, + "loss": 0.5859, + "step": 26843 + }, + { + "epoch": 0.8227289444648768, + "grad_norm": 1.6218575801187851, + "learning_rate": 1.6034056828794276e-06, + "loss": 0.6116, + "step": 26844 + }, + { + "epoch": 0.822759592987618, + "grad_norm": 0.6792459265814225, + "learning_rate": 1.6028666106254287e-06, + "loss": 0.5191, + "step": 26845 + }, + { + "epoch": 0.8227902415103592, + "grad_norm": 1.4680614882126393, + "learning_rate": 1.602327621110531e-06, + "loss": 0.5451, + "step": 26846 + }, + { + "epoch": 0.8228208900331004, + "grad_norm": 1.5934885477408762, + "learning_rate": 1.601788714340049e-06, + "loss": 0.6501, + "step": 26847 + }, + { + "epoch": 0.8228515385558416, + "grad_norm": 1.723537033907352, + "learning_rate": 1.6012498903192907e-06, + "loss": 0.6422, + "step": 26848 + }, + { + "epoch": 0.8228821870785828, + "grad_norm": 1.7026408071170436, + "learning_rate": 1.6007111490535688e-06, + "loss": 0.6259, + "step": 26849 + }, + { + "epoch": 0.822912835601324, + "grad_norm": 1.5699424713577692, + "learning_rate": 1.6001724905481886e-06, + "loss": 0.6149, + "step": 26850 + }, + { + "epoch": 0.8229434841240653, + "grad_norm": 1.7161088109158977, + "learning_rate": 1.5996339148084539e-06, + "loss": 0.5326, + "step": 26851 + }, + { + "epoch": 0.8229741326468064, + "grad_norm": 1.6654691692574923, + "learning_rate": 1.5990954218396793e-06, + "loss": 0.6479, + "step": 26852 + }, + { + "epoch": 0.8230047811695477, + "grad_norm": 1.8682154220489176, + "learning_rate": 1.5985570116471682e-06, + "loss": 0.6317, + "step": 26853 + }, + { + "epoch": 0.8230354296922888, + "grad_norm": 1.6472034779779208, + "learning_rate": 1.5980186842362212e-06, + "loss": 0.7077, + "step": 26854 + }, + { + "epoch": 0.8230660782150301, + "grad_norm": 1.6376232327154168, + "learning_rate": 1.5974804396121467e-06, + "loss": 0.6063, + "step": 26855 + }, + { + "epoch": 0.8230967267377712, + "grad_norm": 1.7041996951632372, + "learning_rate": 1.5969422777802491e-06, + "loss": 0.6542, + "step": 26856 + }, + { + "epoch": 0.8231273752605125, + "grad_norm": 1.7451246008923071, + "learning_rate": 1.5964041987458268e-06, + "loss": 0.6207, + "step": 26857 + }, + { + "epoch": 0.8231580237832536, + "grad_norm": 1.7230595411382277, + "learning_rate": 1.5958662025141846e-06, + "loss": 0.6438, + "step": 26858 + }, + { + "epoch": 0.8231886723059949, + "grad_norm": 1.641762375164065, + "learning_rate": 1.595328289090622e-06, + "loss": 0.581, + "step": 26859 + }, + { + "epoch": 0.8232193208287361, + "grad_norm": 1.8408927025855237, + "learning_rate": 1.594790458480443e-06, + "loss": 0.6681, + "step": 26860 + }, + { + "epoch": 0.8232499693514772, + "grad_norm": 1.7494816497530152, + "learning_rate": 1.594252710688945e-06, + "loss": 0.606, + "step": 26861 + }, + { + "epoch": 0.8232806178742185, + "grad_norm": 1.9746365342382237, + "learning_rate": 1.59371504572142e-06, + "loss": 0.6136, + "step": 26862 + }, + { + "epoch": 0.8233112663969596, + "grad_norm": 1.670226340480328, + "learning_rate": 1.5931774635831764e-06, + "loss": 0.5719, + "step": 26863 + }, + { + "epoch": 0.8233419149197009, + "grad_norm": 1.6469366118140443, + "learning_rate": 1.5926399642795066e-06, + "loss": 0.5764, + "step": 26864 + }, + { + "epoch": 0.823372563442442, + "grad_norm": 1.9129242533903088, + "learning_rate": 1.5921025478157037e-06, + "loss": 0.6593, + "step": 26865 + }, + { + "epoch": 0.8234032119651833, + "grad_norm": 1.5633199600080956, + "learning_rate": 1.5915652141970662e-06, + "loss": 0.6589, + "step": 26866 + }, + { + "epoch": 0.8234338604879244, + "grad_norm": 1.590467890705001, + "learning_rate": 1.5910279634288873e-06, + "loss": 0.5221, + "step": 26867 + }, + { + "epoch": 0.8234645090106657, + "grad_norm": 1.6267961899733754, + "learning_rate": 1.590490795516465e-06, + "loss": 0.6456, + "step": 26868 + }, + { + "epoch": 0.8234951575334069, + "grad_norm": 1.8207047652310973, + "learning_rate": 1.5899537104650853e-06, + "loss": 0.7533, + "step": 26869 + }, + { + "epoch": 0.8235258060561481, + "grad_norm": 1.8089804143666857, + "learning_rate": 1.5894167082800427e-06, + "loss": 0.7152, + "step": 26870 + }, + { + "epoch": 0.8235564545788893, + "grad_norm": 1.9110570068713268, + "learning_rate": 1.588879788966633e-06, + "loss": 0.6629, + "step": 26871 + }, + { + "epoch": 0.8235871031016305, + "grad_norm": 1.5949408553934474, + "learning_rate": 1.5883429525301419e-06, + "loss": 0.6015, + "step": 26872 + }, + { + "epoch": 0.8236177516243717, + "grad_norm": 1.6844087776499737, + "learning_rate": 1.5878061989758553e-06, + "loss": 0.6632, + "step": 26873 + }, + { + "epoch": 0.8236484001471129, + "grad_norm": 1.7864932719455662, + "learning_rate": 1.5872695283090711e-06, + "loss": 0.5817, + "step": 26874 + }, + { + "epoch": 0.8236790486698541, + "grad_norm": 0.6756034539282302, + "learning_rate": 1.5867329405350708e-06, + "loss": 0.5157, + "step": 26875 + }, + { + "epoch": 0.8237096971925953, + "grad_norm": 1.6234827404256522, + "learning_rate": 1.5861964356591465e-06, + "loss": 0.7022, + "step": 26876 + }, + { + "epoch": 0.8237403457153365, + "grad_norm": 1.4812677421605618, + "learning_rate": 1.5856600136865774e-06, + "loss": 0.4879, + "step": 26877 + }, + { + "epoch": 0.8237709942380778, + "grad_norm": 1.553122191573587, + "learning_rate": 1.585123674622655e-06, + "loss": 0.5131, + "step": 26878 + }, + { + "epoch": 0.8238016427608189, + "grad_norm": 1.7769038410584117, + "learning_rate": 1.584587418472663e-06, + "loss": 0.6639, + "step": 26879 + }, + { + "epoch": 0.8238322912835602, + "grad_norm": 1.6127412941025157, + "learning_rate": 1.5840512452418822e-06, + "loss": 0.6688, + "step": 26880 + }, + { + "epoch": 0.8238629398063013, + "grad_norm": 0.6762642183212367, + "learning_rate": 1.5835151549355988e-06, + "loss": 0.5273, + "step": 26881 + }, + { + "epoch": 0.8238935883290426, + "grad_norm": 1.714677554118019, + "learning_rate": 1.582979147559095e-06, + "loss": 0.6347, + "step": 26882 + }, + { + "epoch": 0.8239242368517837, + "grad_norm": 1.5273509159012182, + "learning_rate": 1.5824432231176523e-06, + "loss": 0.5499, + "step": 26883 + }, + { + "epoch": 0.823954885374525, + "grad_norm": 1.6378861195295968, + "learning_rate": 1.5819073816165475e-06, + "loss": 0.683, + "step": 26884 + }, + { + "epoch": 0.8239855338972661, + "grad_norm": 1.8443470472082784, + "learning_rate": 1.5813716230610631e-06, + "loss": 0.6338, + "step": 26885 + }, + { + "epoch": 0.8240161824200074, + "grad_norm": 1.6770497836881395, + "learning_rate": 1.5808359474564784e-06, + "loss": 0.5628, + "step": 26886 + }, + { + "epoch": 0.8240468309427486, + "grad_norm": 1.6554301841700692, + "learning_rate": 1.5803003548080732e-06, + "loss": 0.576, + "step": 26887 + }, + { + "epoch": 0.8240774794654898, + "grad_norm": 1.8673101018548597, + "learning_rate": 1.57976484512112e-06, + "loss": 0.6366, + "step": 26888 + }, + { + "epoch": 0.824108127988231, + "grad_norm": 0.6898052035700827, + "learning_rate": 1.5792294184008995e-06, + "loss": 0.4997, + "step": 26889 + }, + { + "epoch": 0.8241387765109722, + "grad_norm": 1.885851081202531, + "learning_rate": 1.5786940746526869e-06, + "loss": 0.6775, + "step": 26890 + }, + { + "epoch": 0.8241694250337134, + "grad_norm": 1.8222926471127476, + "learning_rate": 1.5781588138817572e-06, + "loss": 0.6283, + "step": 26891 + }, + { + "epoch": 0.8242000735564545, + "grad_norm": 1.5774069082709328, + "learning_rate": 1.5776236360933794e-06, + "loss": 0.6174, + "step": 26892 + }, + { + "epoch": 0.8242307220791958, + "grad_norm": 1.5877819955006256, + "learning_rate": 1.577088541292835e-06, + "loss": 0.7373, + "step": 26893 + }, + { + "epoch": 0.8242613706019369, + "grad_norm": 1.6416589414857974, + "learning_rate": 1.5765535294853894e-06, + "loss": 0.6595, + "step": 26894 + }, + { + "epoch": 0.8242920191246782, + "grad_norm": 1.7853810706486137, + "learning_rate": 1.57601860067632e-06, + "loss": 0.6543, + "step": 26895 + }, + { + "epoch": 0.8243226676474193, + "grad_norm": 1.728338330989248, + "learning_rate": 1.5754837548708923e-06, + "loss": 0.5591, + "step": 26896 + }, + { + "epoch": 0.8243533161701606, + "grad_norm": 1.7618401385830107, + "learning_rate": 1.5749489920743788e-06, + "loss": 0.7104, + "step": 26897 + }, + { + "epoch": 0.8243839646929018, + "grad_norm": 0.6554519426101661, + "learning_rate": 1.5744143122920508e-06, + "loss": 0.511, + "step": 26898 + }, + { + "epoch": 0.824414613215643, + "grad_norm": 1.7081707151771228, + "learning_rate": 1.5738797155291719e-06, + "loss": 0.5492, + "step": 26899 + }, + { + "epoch": 0.8244452617383842, + "grad_norm": 1.5378730102154738, + "learning_rate": 1.5733452017910123e-06, + "loss": 0.62, + "step": 26900 + }, + { + "epoch": 0.8244759102611254, + "grad_norm": 2.131290941571745, + "learning_rate": 1.57281077108284e-06, + "loss": 0.5302, + "step": 26901 + }, + { + "epoch": 0.8245065587838666, + "grad_norm": 1.746219379769956, + "learning_rate": 1.5722764234099198e-06, + "loss": 0.7013, + "step": 26902 + }, + { + "epoch": 0.8245372073066078, + "grad_norm": 0.6664364432827452, + "learning_rate": 1.5717421587775116e-06, + "loss": 0.5166, + "step": 26903 + }, + { + "epoch": 0.824567855829349, + "grad_norm": 1.6791399425088571, + "learning_rate": 1.5712079771908894e-06, + "loss": 0.6062, + "step": 26904 + }, + { + "epoch": 0.8245985043520903, + "grad_norm": 1.6026162796861654, + "learning_rate": 1.57067387865531e-06, + "loss": 0.6109, + "step": 26905 + }, + { + "epoch": 0.8246291528748314, + "grad_norm": 1.8771476402165312, + "learning_rate": 1.570139863176039e-06, + "loss": 0.6789, + "step": 26906 + }, + { + "epoch": 0.8246598013975727, + "grad_norm": 1.7215273897660064, + "learning_rate": 1.5696059307583345e-06, + "loss": 0.5774, + "step": 26907 + }, + { + "epoch": 0.8246904499203138, + "grad_norm": 0.6630390624210933, + "learning_rate": 1.56907208140746e-06, + "loss": 0.5097, + "step": 26908 + }, + { + "epoch": 0.8247210984430551, + "grad_norm": 0.6372945183470993, + "learning_rate": 1.568538315128677e-06, + "loss": 0.507, + "step": 26909 + }, + { + "epoch": 0.8247517469657962, + "grad_norm": 0.6922840666282184, + "learning_rate": 1.5680046319272413e-06, + "loss": 0.5446, + "step": 26910 + }, + { + "epoch": 0.8247823954885375, + "grad_norm": 1.6959194129611426, + "learning_rate": 1.567471031808414e-06, + "loss": 0.6758, + "step": 26911 + }, + { + "epoch": 0.8248130440112786, + "grad_norm": 1.9428212347820104, + "learning_rate": 1.5669375147774546e-06, + "loss": 0.6705, + "step": 26912 + }, + { + "epoch": 0.8248436925340199, + "grad_norm": 1.751480916998748, + "learning_rate": 1.5664040808396141e-06, + "loss": 0.6102, + "step": 26913 + }, + { + "epoch": 0.824874341056761, + "grad_norm": 1.9360099463243374, + "learning_rate": 1.565870730000153e-06, + "loss": 0.5589, + "step": 26914 + }, + { + "epoch": 0.8249049895795023, + "grad_norm": 1.6511420596233035, + "learning_rate": 1.565337462264327e-06, + "loss": 0.641, + "step": 26915 + }, + { + "epoch": 0.8249356381022435, + "grad_norm": 1.6814306556390224, + "learning_rate": 1.5648042776373872e-06, + "loss": 0.5881, + "step": 26916 + }, + { + "epoch": 0.8249662866249847, + "grad_norm": 0.6574763610918115, + "learning_rate": 1.564271176124592e-06, + "loss": 0.5184, + "step": 26917 + }, + { + "epoch": 0.8249969351477259, + "grad_norm": 1.8467076153334252, + "learning_rate": 1.5637381577311883e-06, + "loss": 0.7272, + "step": 26918 + }, + { + "epoch": 0.8250275836704671, + "grad_norm": 1.5945637437705031, + "learning_rate": 1.5632052224624317e-06, + "loss": 0.6039, + "step": 26919 + }, + { + "epoch": 0.8250582321932083, + "grad_norm": 0.6377421333088287, + "learning_rate": 1.5626723703235747e-06, + "loss": 0.5153, + "step": 26920 + }, + { + "epoch": 0.8250888807159495, + "grad_norm": 1.6960753448501396, + "learning_rate": 1.5621396013198632e-06, + "loss": 0.628, + "step": 26921 + }, + { + "epoch": 0.8251195292386907, + "grad_norm": 1.6404106088455759, + "learning_rate": 1.5616069154565482e-06, + "loss": 0.6445, + "step": 26922 + }, + { + "epoch": 0.8251501777614318, + "grad_norm": 1.538004724932008, + "learning_rate": 1.5610743127388827e-06, + "loss": 0.5565, + "step": 26923 + }, + { + "epoch": 0.8251808262841731, + "grad_norm": 0.654165093720301, + "learning_rate": 1.560541793172109e-06, + "loss": 0.5119, + "step": 26924 + }, + { + "epoch": 0.8252114748069143, + "grad_norm": 0.643753144486626, + "learning_rate": 1.560009356761476e-06, + "loss": 0.5225, + "step": 26925 + }, + { + "epoch": 0.8252421233296555, + "grad_norm": 1.9240056131130767, + "learning_rate": 1.559477003512232e-06, + "loss": 0.6763, + "step": 26926 + }, + { + "epoch": 0.8252727718523967, + "grad_norm": 1.7662180576855766, + "learning_rate": 1.5589447334296193e-06, + "loss": 0.6648, + "step": 26927 + }, + { + "epoch": 0.8253034203751379, + "grad_norm": 1.9315585939020214, + "learning_rate": 1.558412546518886e-06, + "loss": 0.6284, + "step": 26928 + }, + { + "epoch": 0.8253340688978791, + "grad_norm": 1.7477270909075477, + "learning_rate": 1.5578804427852713e-06, + "loss": 0.5919, + "step": 26929 + }, + { + "epoch": 0.8253647174206203, + "grad_norm": 1.6711650106890432, + "learning_rate": 1.5573484222340208e-06, + "loss": 0.6188, + "step": 26930 + }, + { + "epoch": 0.8253953659433615, + "grad_norm": 1.784087688604928, + "learning_rate": 1.5568164848703782e-06, + "loss": 0.6093, + "step": 26931 + }, + { + "epoch": 0.8254260144661028, + "grad_norm": 1.657033034771872, + "learning_rate": 1.5562846306995816e-06, + "loss": 0.5534, + "step": 26932 + }, + { + "epoch": 0.8254566629888439, + "grad_norm": 1.7526684724615713, + "learning_rate": 1.5557528597268722e-06, + "loss": 0.5737, + "step": 26933 + }, + { + "epoch": 0.8254873115115852, + "grad_norm": 1.8265428360040752, + "learning_rate": 1.5552211719574928e-06, + "loss": 0.6468, + "step": 26934 + }, + { + "epoch": 0.8255179600343263, + "grad_norm": 1.6821159783681554, + "learning_rate": 1.5546895673966777e-06, + "loss": 0.6026, + "step": 26935 + }, + { + "epoch": 0.8255486085570676, + "grad_norm": 1.5218635831135239, + "learning_rate": 1.5541580460496697e-06, + "loss": 0.558, + "step": 26936 + }, + { + "epoch": 0.8255792570798087, + "grad_norm": 1.5554487333928984, + "learning_rate": 1.5536266079217011e-06, + "loss": 0.65, + "step": 26937 + }, + { + "epoch": 0.82560990560255, + "grad_norm": 1.8001444421823016, + "learning_rate": 1.5530952530180099e-06, + "loss": 0.6492, + "step": 26938 + }, + { + "epoch": 0.8256405541252911, + "grad_norm": 1.8533089345064424, + "learning_rate": 1.5525639813438353e-06, + "loss": 0.6901, + "step": 26939 + }, + { + "epoch": 0.8256712026480324, + "grad_norm": 1.7608048942564927, + "learning_rate": 1.5520327929044066e-06, + "loss": 0.6098, + "step": 26940 + }, + { + "epoch": 0.8257018511707735, + "grad_norm": 1.5012903811080267, + "learning_rate": 1.5515016877049605e-06, + "loss": 0.4919, + "step": 26941 + }, + { + "epoch": 0.8257324996935148, + "grad_norm": 1.6293392831126796, + "learning_rate": 1.5509706657507328e-06, + "loss": 0.7237, + "step": 26942 + }, + { + "epoch": 0.825763148216256, + "grad_norm": 1.5959517042543965, + "learning_rate": 1.5504397270469496e-06, + "loss": 0.6023, + "step": 26943 + }, + { + "epoch": 0.8257937967389972, + "grad_norm": 1.6184081849648961, + "learning_rate": 1.5499088715988464e-06, + "loss": 0.705, + "step": 26944 + }, + { + "epoch": 0.8258244452617384, + "grad_norm": 0.6651162062025181, + "learning_rate": 1.5493780994116546e-06, + "loss": 0.5166, + "step": 26945 + }, + { + "epoch": 0.8258550937844796, + "grad_norm": 1.595776783328022, + "learning_rate": 1.5488474104906014e-06, + "loss": 0.612, + "step": 26946 + }, + { + "epoch": 0.8258857423072208, + "grad_norm": 1.6921010463347206, + "learning_rate": 1.548316804840919e-06, + "loss": 0.6804, + "step": 26947 + }, + { + "epoch": 0.825916390829962, + "grad_norm": 1.4587429444754831, + "learning_rate": 1.547786282467828e-06, + "loss": 0.5277, + "step": 26948 + }, + { + "epoch": 0.8259470393527032, + "grad_norm": 1.6051314112504866, + "learning_rate": 1.5472558433765671e-06, + "loss": 0.5784, + "step": 26949 + }, + { + "epoch": 0.8259776878754445, + "grad_norm": 1.5354724516611755, + "learning_rate": 1.5467254875723569e-06, + "loss": 0.5491, + "step": 26950 + }, + { + "epoch": 0.8260083363981856, + "grad_norm": 1.72976451081876, + "learning_rate": 1.5461952150604197e-06, + "loss": 0.5827, + "step": 26951 + }, + { + "epoch": 0.8260389849209269, + "grad_norm": 1.5743042861732628, + "learning_rate": 1.545665025845986e-06, + "loss": 0.5708, + "step": 26952 + }, + { + "epoch": 0.826069633443668, + "grad_norm": 1.4523953539354966, + "learning_rate": 1.545134919934279e-06, + "loss": 0.5551, + "step": 26953 + }, + { + "epoch": 0.8261002819664092, + "grad_norm": 1.6697771189728938, + "learning_rate": 1.5446048973305195e-06, + "loss": 0.599, + "step": 26954 + }, + { + "epoch": 0.8261309304891504, + "grad_norm": 1.7281404445003972, + "learning_rate": 1.5440749580399306e-06, + "loss": 0.6607, + "step": 26955 + }, + { + "epoch": 0.8261615790118916, + "grad_norm": 1.8777856386483942, + "learning_rate": 1.5435451020677373e-06, + "loss": 0.6643, + "step": 26956 + }, + { + "epoch": 0.8261922275346328, + "grad_norm": 1.7466863447798961, + "learning_rate": 1.5430153294191552e-06, + "loss": 0.6913, + "step": 26957 + }, + { + "epoch": 0.826222876057374, + "grad_norm": 1.6523329629500758, + "learning_rate": 1.5424856400994093e-06, + "loss": 0.6789, + "step": 26958 + }, + { + "epoch": 0.8262535245801153, + "grad_norm": 1.6625732305759233, + "learning_rate": 1.5419560341137118e-06, + "loss": 0.7192, + "step": 26959 + }, + { + "epoch": 0.8262841731028564, + "grad_norm": 1.7802917267200908, + "learning_rate": 1.54142651146729e-06, + "loss": 0.633, + "step": 26960 + }, + { + "epoch": 0.8263148216255977, + "grad_norm": 1.6484857295384703, + "learning_rate": 1.540897072165357e-06, + "loss": 0.6785, + "step": 26961 + }, + { + "epoch": 0.8263454701483388, + "grad_norm": 0.6743250093668266, + "learning_rate": 1.540367716213127e-06, + "loss": 0.5233, + "step": 26962 + }, + { + "epoch": 0.8263761186710801, + "grad_norm": 0.6669347485344517, + "learning_rate": 1.5398384436158186e-06, + "loss": 0.4972, + "step": 26963 + }, + { + "epoch": 0.8264067671938212, + "grad_norm": 1.7770560699910054, + "learning_rate": 1.5393092543786503e-06, + "loss": 0.6015, + "step": 26964 + }, + { + "epoch": 0.8264374157165625, + "grad_norm": 1.6316292381845712, + "learning_rate": 1.5387801485068287e-06, + "loss": 0.7039, + "step": 26965 + }, + { + "epoch": 0.8264680642393036, + "grad_norm": 1.8022723726289878, + "learning_rate": 1.538251126005571e-06, + "loss": 0.6147, + "step": 26966 + }, + { + "epoch": 0.8264987127620449, + "grad_norm": 1.6662545751069773, + "learning_rate": 1.5377221868800907e-06, + "loss": 0.5378, + "step": 26967 + }, + { + "epoch": 0.826529361284786, + "grad_norm": 1.7315455710000818, + "learning_rate": 1.5371933311356012e-06, + "loss": 0.5977, + "step": 26968 + }, + { + "epoch": 0.8265600098075273, + "grad_norm": 1.6698315327701136, + "learning_rate": 1.536664558777311e-06, + "loss": 0.5942, + "step": 26969 + }, + { + "epoch": 0.8265906583302685, + "grad_norm": 1.6816067860858357, + "learning_rate": 1.5361358698104257e-06, + "loss": 0.6945, + "step": 26970 + }, + { + "epoch": 0.8266213068530097, + "grad_norm": 1.7024882339250942, + "learning_rate": 1.5356072642401642e-06, + "loss": 0.7072, + "step": 26971 + }, + { + "epoch": 0.8266519553757509, + "grad_norm": 1.7411279283531749, + "learning_rate": 1.5350787420717294e-06, + "loss": 0.6969, + "step": 26972 + }, + { + "epoch": 0.8266826038984921, + "grad_norm": 1.7862362499745115, + "learning_rate": 1.5345503033103282e-06, + "loss": 0.7498, + "step": 26973 + }, + { + "epoch": 0.8267132524212333, + "grad_norm": 1.5124452402250605, + "learning_rate": 1.5340219479611685e-06, + "loss": 0.5705, + "step": 26974 + }, + { + "epoch": 0.8267439009439745, + "grad_norm": 1.7941626808033382, + "learning_rate": 1.5334936760294561e-06, + "loss": 0.6384, + "step": 26975 + }, + { + "epoch": 0.8267745494667157, + "grad_norm": 0.6428172506045025, + "learning_rate": 1.5329654875203993e-06, + "loss": 0.5098, + "step": 26976 + }, + { + "epoch": 0.826805197989457, + "grad_norm": 0.6965234658501017, + "learning_rate": 1.532437382439198e-06, + "loss": 0.5281, + "step": 26977 + }, + { + "epoch": 0.8268358465121981, + "grad_norm": 1.7055528684515657, + "learning_rate": 1.5319093607910574e-06, + "loss": 0.6831, + "step": 26978 + }, + { + "epoch": 0.8268664950349394, + "grad_norm": 1.6374087582589485, + "learning_rate": 1.531381422581183e-06, + "loss": 0.5472, + "step": 26979 + }, + { + "epoch": 0.8268971435576805, + "grad_norm": 1.7554335781828916, + "learning_rate": 1.530853567814774e-06, + "loss": 0.6897, + "step": 26980 + }, + { + "epoch": 0.8269277920804218, + "grad_norm": 0.6739305935944312, + "learning_rate": 1.5303257964970298e-06, + "loss": 0.5317, + "step": 26981 + }, + { + "epoch": 0.8269584406031629, + "grad_norm": 1.6141277058168877, + "learning_rate": 1.5297981086331515e-06, + "loss": 0.5783, + "step": 26982 + }, + { + "epoch": 0.8269890891259042, + "grad_norm": 0.6525239727403737, + "learning_rate": 1.529270504228343e-06, + "loss": 0.5033, + "step": 26983 + }, + { + "epoch": 0.8270197376486453, + "grad_norm": 0.6820231506028525, + "learning_rate": 1.5287429832877964e-06, + "loss": 0.5018, + "step": 26984 + }, + { + "epoch": 0.8270503861713865, + "grad_norm": 1.6798408134883567, + "learning_rate": 1.5282155458167136e-06, + "loss": 0.6823, + "step": 26985 + }, + { + "epoch": 0.8270810346941277, + "grad_norm": 0.6442416905218848, + "learning_rate": 1.5276881918202903e-06, + "loss": 0.495, + "step": 26986 + }, + { + "epoch": 0.8271116832168689, + "grad_norm": 1.917001103955956, + "learning_rate": 1.5271609213037252e-06, + "loss": 0.768, + "step": 26987 + }, + { + "epoch": 0.8271423317396102, + "grad_norm": 1.785250961776223, + "learning_rate": 1.5266337342722115e-06, + "loss": 0.654, + "step": 26988 + }, + { + "epoch": 0.8271729802623513, + "grad_norm": 1.8199756226870716, + "learning_rate": 1.526106630730939e-06, + "loss": 0.6937, + "step": 26989 + }, + { + "epoch": 0.8272036287850926, + "grad_norm": 0.6825986816723297, + "learning_rate": 1.5255796106851105e-06, + "loss": 0.5169, + "step": 26990 + }, + { + "epoch": 0.8272342773078337, + "grad_norm": 1.6210401833838002, + "learning_rate": 1.525052674139914e-06, + "loss": 0.5981, + "step": 26991 + }, + { + "epoch": 0.827264925830575, + "grad_norm": 1.775109014579834, + "learning_rate": 1.5245258211005408e-06, + "loss": 0.5635, + "step": 26992 + }, + { + "epoch": 0.8272955743533161, + "grad_norm": 2.1765189873432385, + "learning_rate": 1.5239990515721826e-06, + "loss": 0.6603, + "step": 26993 + }, + { + "epoch": 0.8273262228760574, + "grad_norm": 1.7714307141677856, + "learning_rate": 1.5234723655600304e-06, + "loss": 0.639, + "step": 26994 + }, + { + "epoch": 0.8273568713987985, + "grad_norm": 1.6563127742398103, + "learning_rate": 1.5229457630692756e-06, + "loss": 0.6121, + "step": 26995 + }, + { + "epoch": 0.8273875199215398, + "grad_norm": 1.6991667705950082, + "learning_rate": 1.5224192441051034e-06, + "loss": 0.6536, + "step": 26996 + }, + { + "epoch": 0.827418168444281, + "grad_norm": 1.912617994316302, + "learning_rate": 1.5218928086727025e-06, + "loss": 0.697, + "step": 26997 + }, + { + "epoch": 0.8274488169670222, + "grad_norm": 1.5151925719646395, + "learning_rate": 1.5213664567772646e-06, + "loss": 0.6135, + "step": 26998 + }, + { + "epoch": 0.8274794654897634, + "grad_norm": 1.4218241816760582, + "learning_rate": 1.5208401884239722e-06, + "loss": 0.5357, + "step": 26999 + }, + { + "epoch": 0.8275101140125046, + "grad_norm": 1.9611659164234143, + "learning_rate": 1.5203140036180054e-06, + "loss": 0.6816, + "step": 27000 + }, + { + "epoch": 0.8275407625352458, + "grad_norm": 1.5197759855312685, + "learning_rate": 1.51978790236456e-06, + "loss": 0.6794, + "step": 27001 + }, + { + "epoch": 0.827571411057987, + "grad_norm": 1.6681648301000285, + "learning_rate": 1.519261884668811e-06, + "loss": 0.548, + "step": 27002 + }, + { + "epoch": 0.8276020595807282, + "grad_norm": 1.7403803853244177, + "learning_rate": 1.5187359505359467e-06, + "loss": 0.6526, + "step": 27003 + }, + { + "epoch": 0.8276327081034694, + "grad_norm": 1.875546040981543, + "learning_rate": 1.5182100999711457e-06, + "loss": 0.6397, + "step": 27004 + }, + { + "epoch": 0.8276633566262106, + "grad_norm": 1.6621833259294552, + "learning_rate": 1.5176843329795898e-06, + "loss": 0.5932, + "step": 27005 + }, + { + "epoch": 0.8276940051489519, + "grad_norm": 1.6746635805483385, + "learning_rate": 1.5171586495664635e-06, + "loss": 0.5866, + "step": 27006 + }, + { + "epoch": 0.827724653671693, + "grad_norm": 1.8107405808292292, + "learning_rate": 1.5166330497369408e-06, + "loss": 0.6009, + "step": 27007 + }, + { + "epoch": 0.8277553021944343, + "grad_norm": 1.5548685711515355, + "learning_rate": 1.5161075334962039e-06, + "loss": 0.6454, + "step": 27008 + }, + { + "epoch": 0.8277859507171754, + "grad_norm": 1.7649163611479213, + "learning_rate": 1.515582100849432e-06, + "loss": 0.6374, + "step": 27009 + }, + { + "epoch": 0.8278165992399167, + "grad_norm": 1.8358441458368813, + "learning_rate": 1.5150567518018e-06, + "loss": 0.6462, + "step": 27010 + }, + { + "epoch": 0.8278472477626578, + "grad_norm": 1.6938496086715518, + "learning_rate": 1.5145314863584804e-06, + "loss": 0.5969, + "step": 27011 + }, + { + "epoch": 0.8278778962853991, + "grad_norm": 1.4961123150811688, + "learning_rate": 1.5140063045246577e-06, + "loss": 0.6077, + "step": 27012 + }, + { + "epoch": 0.8279085448081402, + "grad_norm": 1.819251119000024, + "learning_rate": 1.5134812063055004e-06, + "loss": 0.7331, + "step": 27013 + }, + { + "epoch": 0.8279391933308815, + "grad_norm": 1.864920577019268, + "learning_rate": 1.5129561917061864e-06, + "loss": 0.6751, + "step": 27014 + }, + { + "epoch": 0.8279698418536227, + "grad_norm": 0.6729491679321814, + "learning_rate": 1.5124312607318837e-06, + "loss": 0.5239, + "step": 27015 + }, + { + "epoch": 0.8280004903763638, + "grad_norm": 1.7718859942399985, + "learning_rate": 1.511906413387768e-06, + "loss": 0.6585, + "step": 27016 + }, + { + "epoch": 0.8280311388991051, + "grad_norm": 1.6026108817326439, + "learning_rate": 1.5113816496790124e-06, + "loss": 0.6541, + "step": 27017 + }, + { + "epoch": 0.8280617874218462, + "grad_norm": 0.7024773415396371, + "learning_rate": 1.5108569696107822e-06, + "loss": 0.5452, + "step": 27018 + }, + { + "epoch": 0.8280924359445875, + "grad_norm": 1.7097263267082194, + "learning_rate": 1.5103323731882514e-06, + "loss": 0.5346, + "step": 27019 + }, + { + "epoch": 0.8281230844673286, + "grad_norm": 1.8054180356130254, + "learning_rate": 1.50980786041659e-06, + "loss": 0.6537, + "step": 27020 + }, + { + "epoch": 0.8281537329900699, + "grad_norm": 1.7547818292953077, + "learning_rate": 1.5092834313009608e-06, + "loss": 0.7374, + "step": 27021 + }, + { + "epoch": 0.828184381512811, + "grad_norm": 1.899456905008356, + "learning_rate": 1.5087590858465372e-06, + "loss": 0.6572, + "step": 27022 + }, + { + "epoch": 0.8282150300355523, + "grad_norm": 1.9001211901759285, + "learning_rate": 1.508234824058481e-06, + "loss": 0.6778, + "step": 27023 + }, + { + "epoch": 0.8282456785582935, + "grad_norm": 2.0766337394332166, + "learning_rate": 1.5077106459419599e-06, + "loss": 0.578, + "step": 27024 + }, + { + "epoch": 0.8282763270810347, + "grad_norm": 1.4977564651145945, + "learning_rate": 1.507186551502141e-06, + "loss": 0.6154, + "step": 27025 + }, + { + "epoch": 0.8283069756037759, + "grad_norm": 1.518154885570785, + "learning_rate": 1.5066625407441826e-06, + "loss": 0.6217, + "step": 27026 + }, + { + "epoch": 0.8283376241265171, + "grad_norm": 1.6587250164399172, + "learning_rate": 1.5061386136732526e-06, + "loss": 0.5913, + "step": 27027 + }, + { + "epoch": 0.8283682726492583, + "grad_norm": 1.920259138385895, + "learning_rate": 1.5056147702945134e-06, + "loss": 0.7147, + "step": 27028 + }, + { + "epoch": 0.8283989211719995, + "grad_norm": 1.7198078110380521, + "learning_rate": 1.5050910106131233e-06, + "loss": 0.688, + "step": 27029 + }, + { + "epoch": 0.8284295696947407, + "grad_norm": 1.7699223106203639, + "learning_rate": 1.5045673346342448e-06, + "loss": 0.6478, + "step": 27030 + }, + { + "epoch": 0.828460218217482, + "grad_norm": 1.7169264276443519, + "learning_rate": 1.5040437423630404e-06, + "loss": 0.7288, + "step": 27031 + }, + { + "epoch": 0.8284908667402231, + "grad_norm": 1.5824748086670968, + "learning_rate": 1.503520233804665e-06, + "loss": 0.6988, + "step": 27032 + }, + { + "epoch": 0.8285215152629644, + "grad_norm": 1.554449092682245, + "learning_rate": 1.502996808964281e-06, + "loss": 0.591, + "step": 27033 + }, + { + "epoch": 0.8285521637857055, + "grad_norm": 1.6888808264750406, + "learning_rate": 1.502473467847041e-06, + "loss": 0.6315, + "step": 27034 + }, + { + "epoch": 0.8285828123084468, + "grad_norm": 0.7024156179534743, + "learning_rate": 1.501950210458103e-06, + "loss": 0.5444, + "step": 27035 + }, + { + "epoch": 0.8286134608311879, + "grad_norm": 2.2172330323488767, + "learning_rate": 1.5014270368026274e-06, + "loss": 0.5752, + "step": 27036 + }, + { + "epoch": 0.8286441093539292, + "grad_norm": 1.6604467314997005, + "learning_rate": 1.5009039468857633e-06, + "loss": 0.5965, + "step": 27037 + }, + { + "epoch": 0.8286747578766703, + "grad_norm": 1.6351671452675658, + "learning_rate": 1.5003809407126668e-06, + "loss": 0.5554, + "step": 27038 + }, + { + "epoch": 0.8287054063994116, + "grad_norm": 1.7159857923514317, + "learning_rate": 1.4998580182884937e-06, + "loss": 0.7253, + "step": 27039 + }, + { + "epoch": 0.8287360549221527, + "grad_norm": 1.818492583881738, + "learning_rate": 1.499335179618393e-06, + "loss": 0.7125, + "step": 27040 + }, + { + "epoch": 0.828766703444894, + "grad_norm": 1.5312374947012901, + "learning_rate": 1.4988124247075176e-06, + "loss": 0.6173, + "step": 27041 + }, + { + "epoch": 0.8287973519676352, + "grad_norm": 1.951942772118029, + "learning_rate": 1.4982897535610197e-06, + "loss": 0.6445, + "step": 27042 + }, + { + "epoch": 0.8288280004903764, + "grad_norm": 1.6955821747421838, + "learning_rate": 1.4977671661840465e-06, + "loss": 0.6782, + "step": 27043 + }, + { + "epoch": 0.8288586490131176, + "grad_norm": 1.6258718875088987, + "learning_rate": 1.4972446625817516e-06, + "loss": 0.6269, + "step": 27044 + }, + { + "epoch": 0.8288892975358588, + "grad_norm": 1.728558021857777, + "learning_rate": 1.4967222427592776e-06, + "loss": 0.6187, + "step": 27045 + }, + { + "epoch": 0.8289199460586, + "grad_norm": 1.646444653202246, + "learning_rate": 1.4961999067217748e-06, + "loss": 0.6963, + "step": 27046 + }, + { + "epoch": 0.8289505945813411, + "grad_norm": 1.7135335317817892, + "learning_rate": 1.4956776544743935e-06, + "loss": 0.6311, + "step": 27047 + }, + { + "epoch": 0.8289812431040824, + "grad_norm": 1.9107760919823265, + "learning_rate": 1.4951554860222727e-06, + "loss": 0.7015, + "step": 27048 + }, + { + "epoch": 0.8290118916268235, + "grad_norm": 1.9967667866406102, + "learning_rate": 1.494633401370561e-06, + "loss": 0.7263, + "step": 27049 + }, + { + "epoch": 0.8290425401495648, + "grad_norm": 0.6605142118836044, + "learning_rate": 1.4941114005244062e-06, + "loss": 0.4974, + "step": 27050 + }, + { + "epoch": 0.829073188672306, + "grad_norm": 1.8617454015541315, + "learning_rate": 1.493589483488944e-06, + "loss": 0.6374, + "step": 27051 + }, + { + "epoch": 0.8291038371950472, + "grad_norm": 1.7102580064839032, + "learning_rate": 1.4930676502693231e-06, + "loss": 0.6528, + "step": 27052 + }, + { + "epoch": 0.8291344857177884, + "grad_norm": 1.6145185185653672, + "learning_rate": 1.4925459008706844e-06, + "loss": 0.6183, + "step": 27053 + }, + { + "epoch": 0.8291651342405296, + "grad_norm": 1.7050706921918861, + "learning_rate": 1.4920242352981651e-06, + "loss": 0.6904, + "step": 27054 + }, + { + "epoch": 0.8291957827632708, + "grad_norm": 1.6913576749066375, + "learning_rate": 1.49150265355691e-06, + "loss": 0.5291, + "step": 27055 + }, + { + "epoch": 0.829226431286012, + "grad_norm": 1.777059231884981, + "learning_rate": 1.4909811556520527e-06, + "loss": 0.6098, + "step": 27056 + }, + { + "epoch": 0.8292570798087532, + "grad_norm": 1.6852313622556936, + "learning_rate": 1.4904597415887389e-06, + "loss": 0.5569, + "step": 27057 + }, + { + "epoch": 0.8292877283314944, + "grad_norm": 1.8449547540901692, + "learning_rate": 1.4899384113721027e-06, + "loss": 0.8168, + "step": 27058 + }, + { + "epoch": 0.8293183768542356, + "grad_norm": 1.7122003221417297, + "learning_rate": 1.4894171650072785e-06, + "loss": 0.6761, + "step": 27059 + }, + { + "epoch": 0.8293490253769769, + "grad_norm": 1.6100168345212416, + "learning_rate": 1.4888960024994049e-06, + "loss": 0.5633, + "step": 27060 + }, + { + "epoch": 0.829379673899718, + "grad_norm": 1.6244854237270199, + "learning_rate": 1.4883749238536182e-06, + "loss": 0.6072, + "step": 27061 + }, + { + "epoch": 0.8294103224224593, + "grad_norm": 0.6512056616211748, + "learning_rate": 1.4878539290750493e-06, + "loss": 0.5082, + "step": 27062 + }, + { + "epoch": 0.8294409709452004, + "grad_norm": 1.7469281737947602, + "learning_rate": 1.4873330181688338e-06, + "loss": 0.6772, + "step": 27063 + }, + { + "epoch": 0.8294716194679417, + "grad_norm": 1.894575854040231, + "learning_rate": 1.4868121911401068e-06, + "loss": 0.7558, + "step": 27064 + }, + { + "epoch": 0.8295022679906828, + "grad_norm": 1.6667084651914443, + "learning_rate": 1.4862914479939939e-06, + "loss": 0.6231, + "step": 27065 + }, + { + "epoch": 0.8295329165134241, + "grad_norm": 0.651809852662755, + "learning_rate": 1.4857707887356332e-06, + "loss": 0.5201, + "step": 27066 + }, + { + "epoch": 0.8295635650361652, + "grad_norm": 1.6537787510896362, + "learning_rate": 1.4852502133701484e-06, + "loss": 0.6139, + "step": 27067 + }, + { + "epoch": 0.8295942135589065, + "grad_norm": 1.8341833610739229, + "learning_rate": 1.4847297219026712e-06, + "loss": 0.6589, + "step": 27068 + }, + { + "epoch": 0.8296248620816477, + "grad_norm": 1.661170042922288, + "learning_rate": 1.484209314338334e-06, + "loss": 0.5794, + "step": 27069 + }, + { + "epoch": 0.8296555106043889, + "grad_norm": 1.8005484806008776, + "learning_rate": 1.4836889906822594e-06, + "loss": 0.7505, + "step": 27070 + }, + { + "epoch": 0.8296861591271301, + "grad_norm": 1.597005336760516, + "learning_rate": 1.4831687509395753e-06, + "loss": 0.6327, + "step": 27071 + }, + { + "epoch": 0.8297168076498713, + "grad_norm": 1.5572962618840507, + "learning_rate": 1.4826485951154112e-06, + "loss": 0.6752, + "step": 27072 + }, + { + "epoch": 0.8297474561726125, + "grad_norm": 0.6696572923975173, + "learning_rate": 1.4821285232148874e-06, + "loss": 0.5245, + "step": 27073 + }, + { + "epoch": 0.8297781046953537, + "grad_norm": 1.593832926203933, + "learning_rate": 1.481608535243133e-06, + "loss": 0.6046, + "step": 27074 + }, + { + "epoch": 0.8298087532180949, + "grad_norm": 1.707130771362055, + "learning_rate": 1.4810886312052654e-06, + "loss": 0.6051, + "step": 27075 + }, + { + "epoch": 0.8298394017408361, + "grad_norm": 0.6516335153934784, + "learning_rate": 1.4805688111064143e-06, + "loss": 0.5142, + "step": 27076 + }, + { + "epoch": 0.8298700502635773, + "grad_norm": 1.823272335065505, + "learning_rate": 1.4800490749516993e-06, + "loss": 0.6835, + "step": 27077 + }, + { + "epoch": 0.8299006987863184, + "grad_norm": 0.6651841420137754, + "learning_rate": 1.4795294227462388e-06, + "loss": 0.5331, + "step": 27078 + }, + { + "epoch": 0.8299313473090597, + "grad_norm": 1.87957743631646, + "learning_rate": 1.4790098544951538e-06, + "loss": 0.6264, + "step": 27079 + }, + { + "epoch": 0.8299619958318009, + "grad_norm": 1.5072876539025852, + "learning_rate": 1.478490370203568e-06, + "loss": 0.5999, + "step": 27080 + }, + { + "epoch": 0.8299926443545421, + "grad_norm": 1.7496062656424622, + "learning_rate": 1.4779709698765943e-06, + "loss": 0.5999, + "step": 27081 + }, + { + "epoch": 0.8300232928772833, + "grad_norm": 1.6746816690051147, + "learning_rate": 1.477451653519354e-06, + "loss": 0.6249, + "step": 27082 + }, + { + "epoch": 0.8300539414000245, + "grad_norm": 1.9710892041755625, + "learning_rate": 1.476932421136964e-06, + "loss": 0.5904, + "step": 27083 + }, + { + "epoch": 0.8300845899227657, + "grad_norm": 1.7702717692371321, + "learning_rate": 1.4764132727345381e-06, + "loss": 0.6038, + "step": 27084 + }, + { + "epoch": 0.8301152384455069, + "grad_norm": 1.525722427864218, + "learning_rate": 1.4758942083171957e-06, + "loss": 0.6612, + "step": 27085 + }, + { + "epoch": 0.8301458869682481, + "grad_norm": 1.8998881135993548, + "learning_rate": 1.4753752278900435e-06, + "loss": 0.6765, + "step": 27086 + }, + { + "epoch": 0.8301765354909894, + "grad_norm": 1.5448513486135123, + "learning_rate": 1.4748563314582043e-06, + "loss": 0.5806, + "step": 27087 + }, + { + "epoch": 0.8302071840137305, + "grad_norm": 1.7359328613041256, + "learning_rate": 1.4743375190267883e-06, + "loss": 0.6313, + "step": 27088 + }, + { + "epoch": 0.8302378325364718, + "grad_norm": 1.6131598988721485, + "learning_rate": 1.4738187906009027e-06, + "loss": 0.6647, + "step": 27089 + }, + { + "epoch": 0.8302684810592129, + "grad_norm": 1.7373280577304424, + "learning_rate": 1.4733001461856623e-06, + "loss": 0.6297, + "step": 27090 + }, + { + "epoch": 0.8302991295819542, + "grad_norm": 0.6878437549773863, + "learning_rate": 1.4727815857861805e-06, + "loss": 0.5369, + "step": 27091 + }, + { + "epoch": 0.8303297781046953, + "grad_norm": 1.6128821353662746, + "learning_rate": 1.47226310940756e-06, + "loss": 0.6182, + "step": 27092 + }, + { + "epoch": 0.8303604266274366, + "grad_norm": 1.8479366632962784, + "learning_rate": 1.4717447170549137e-06, + "loss": 0.6896, + "step": 27093 + }, + { + "epoch": 0.8303910751501777, + "grad_norm": 1.7866920172520062, + "learning_rate": 1.4712264087333483e-06, + "loss": 0.6307, + "step": 27094 + }, + { + "epoch": 0.830421723672919, + "grad_norm": 1.899762468323444, + "learning_rate": 1.470708184447973e-06, + "loss": 0.6342, + "step": 27095 + }, + { + "epoch": 0.8304523721956601, + "grad_norm": 1.5222055765118319, + "learning_rate": 1.4701900442038942e-06, + "loss": 0.4447, + "step": 27096 + }, + { + "epoch": 0.8304830207184014, + "grad_norm": 1.7781611223995497, + "learning_rate": 1.4696719880062093e-06, + "loss": 0.5993, + "step": 27097 + }, + { + "epoch": 0.8305136692411426, + "grad_norm": 1.9954305933182297, + "learning_rate": 1.4691540158600336e-06, + "loss": 0.72, + "step": 27098 + }, + { + "epoch": 0.8305443177638838, + "grad_norm": 1.6889189526028463, + "learning_rate": 1.4686361277704663e-06, + "loss": 0.6102, + "step": 27099 + }, + { + "epoch": 0.830574966286625, + "grad_norm": 1.8555610543741015, + "learning_rate": 1.4681183237426078e-06, + "loss": 0.7468, + "step": 27100 + }, + { + "epoch": 0.8306056148093662, + "grad_norm": 1.9661280372161047, + "learning_rate": 1.4676006037815616e-06, + "loss": 0.6498, + "step": 27101 + }, + { + "epoch": 0.8306362633321074, + "grad_norm": 2.0991220259544145, + "learning_rate": 1.4670829678924314e-06, + "loss": 0.6496, + "step": 27102 + }, + { + "epoch": 0.8306669118548486, + "grad_norm": 1.6906912013079025, + "learning_rate": 1.4665654160803167e-06, + "loss": 0.7004, + "step": 27103 + }, + { + "epoch": 0.8306975603775898, + "grad_norm": 1.740496698755604, + "learning_rate": 1.4660479483503154e-06, + "loss": 0.6416, + "step": 27104 + }, + { + "epoch": 0.830728208900331, + "grad_norm": 1.9927286236640163, + "learning_rate": 1.4655305647075257e-06, + "loss": 0.5861, + "step": 27105 + }, + { + "epoch": 0.8307588574230722, + "grad_norm": 1.602974705713784, + "learning_rate": 1.4650132651570504e-06, + "loss": 0.6046, + "step": 27106 + }, + { + "epoch": 0.8307895059458135, + "grad_norm": 1.6166062925225837, + "learning_rate": 1.464496049703983e-06, + "loss": 0.5952, + "step": 27107 + }, + { + "epoch": 0.8308201544685546, + "grad_norm": 1.629667178448705, + "learning_rate": 1.4639789183534148e-06, + "loss": 0.7041, + "step": 27108 + }, + { + "epoch": 0.8308508029912958, + "grad_norm": 1.72412407989646, + "learning_rate": 1.4634618711104509e-06, + "loss": 0.6179, + "step": 27109 + }, + { + "epoch": 0.830881451514037, + "grad_norm": 1.8896467710662175, + "learning_rate": 1.4629449079801827e-06, + "loss": 0.6679, + "step": 27110 + }, + { + "epoch": 0.8309121000367782, + "grad_norm": 1.6118411538576578, + "learning_rate": 1.4624280289676985e-06, + "loss": 0.6246, + "step": 27111 + }, + { + "epoch": 0.8309427485595194, + "grad_norm": 1.6818446773957858, + "learning_rate": 1.461911234078096e-06, + "loss": 0.6045, + "step": 27112 + }, + { + "epoch": 0.8309733970822606, + "grad_norm": 1.635369704857757, + "learning_rate": 1.4613945233164672e-06, + "loss": 0.6125, + "step": 27113 + }, + { + "epoch": 0.8310040456050019, + "grad_norm": 1.691696097010359, + "learning_rate": 1.4608778966879057e-06, + "loss": 0.6673, + "step": 27114 + }, + { + "epoch": 0.831034694127743, + "grad_norm": 1.8597469343294175, + "learning_rate": 1.460361354197496e-06, + "loss": 0.667, + "step": 27115 + }, + { + "epoch": 0.8310653426504843, + "grad_norm": 1.7823192623562016, + "learning_rate": 1.4598448958503297e-06, + "loss": 0.6806, + "step": 27116 + }, + { + "epoch": 0.8310959911732254, + "grad_norm": 1.6584450556788923, + "learning_rate": 1.4593285216515006e-06, + "loss": 0.6802, + "step": 27117 + }, + { + "epoch": 0.8311266396959667, + "grad_norm": 1.819921400483981, + "learning_rate": 1.4588122316060926e-06, + "loss": 0.6545, + "step": 27118 + }, + { + "epoch": 0.8311572882187078, + "grad_norm": 1.8634671260196811, + "learning_rate": 1.4582960257191902e-06, + "loss": 0.6505, + "step": 27119 + }, + { + "epoch": 0.8311879367414491, + "grad_norm": 1.7390593461458579, + "learning_rate": 1.4577799039958828e-06, + "loss": 0.6138, + "step": 27120 + }, + { + "epoch": 0.8312185852641902, + "grad_norm": 1.9642181062153314, + "learning_rate": 1.4572638664412553e-06, + "loss": 0.6079, + "step": 27121 + }, + { + "epoch": 0.8312492337869315, + "grad_norm": 1.6849996237323988, + "learning_rate": 1.4567479130603956e-06, + "loss": 0.6182, + "step": 27122 + }, + { + "epoch": 0.8312798823096726, + "grad_norm": 1.8530382695717722, + "learning_rate": 1.4562320438583821e-06, + "loss": 0.6781, + "step": 27123 + }, + { + "epoch": 0.8313105308324139, + "grad_norm": 0.6709563137796809, + "learning_rate": 1.4557162588403007e-06, + "loss": 0.515, + "step": 27124 + }, + { + "epoch": 0.8313411793551551, + "grad_norm": 1.6777795043737842, + "learning_rate": 1.455200558011235e-06, + "loss": 0.6388, + "step": 27125 + }, + { + "epoch": 0.8313718278778963, + "grad_norm": 1.8032924071784695, + "learning_rate": 1.4546849413762642e-06, + "loss": 0.7335, + "step": 27126 + }, + { + "epoch": 0.8314024764006375, + "grad_norm": 1.5331281088319462, + "learning_rate": 1.4541694089404645e-06, + "loss": 0.6789, + "step": 27127 + }, + { + "epoch": 0.8314331249233787, + "grad_norm": 1.7124011015422356, + "learning_rate": 1.453653960708925e-06, + "loss": 0.6394, + "step": 27128 + }, + { + "epoch": 0.8314637734461199, + "grad_norm": 1.8704027927900633, + "learning_rate": 1.4531385966867173e-06, + "loss": 0.6187, + "step": 27129 + }, + { + "epoch": 0.8314944219688611, + "grad_norm": 1.6633865284175542, + "learning_rate": 1.452623316878924e-06, + "loss": 0.6011, + "step": 27130 + }, + { + "epoch": 0.8315250704916023, + "grad_norm": 1.625749017818253, + "learning_rate": 1.4521081212906184e-06, + "loss": 0.6408, + "step": 27131 + }, + { + "epoch": 0.8315557190143436, + "grad_norm": 1.654141554647032, + "learning_rate": 1.4515930099268782e-06, + "loss": 0.5395, + "step": 27132 + }, + { + "epoch": 0.8315863675370847, + "grad_norm": 1.5925077096128024, + "learning_rate": 1.4510779827927813e-06, + "loss": 0.6725, + "step": 27133 + }, + { + "epoch": 0.831617016059826, + "grad_norm": 1.63434491021891, + "learning_rate": 1.450563039893399e-06, + "loss": 0.5479, + "step": 27134 + }, + { + "epoch": 0.8316476645825671, + "grad_norm": 1.761181823323792, + "learning_rate": 1.4500481812338053e-06, + "loss": 0.6764, + "step": 27135 + }, + { + "epoch": 0.8316783131053084, + "grad_norm": 0.6638099797865975, + "learning_rate": 1.449533406819077e-06, + "loss": 0.5276, + "step": 27136 + }, + { + "epoch": 0.8317089616280495, + "grad_norm": 1.7047333291136288, + "learning_rate": 1.4490187166542846e-06, + "loss": 0.5462, + "step": 27137 + }, + { + "epoch": 0.8317396101507908, + "grad_norm": 1.687202755973569, + "learning_rate": 1.4485041107444931e-06, + "loss": 0.71, + "step": 27138 + }, + { + "epoch": 0.8317702586735319, + "grad_norm": 1.5015550838443295, + "learning_rate": 1.4479895890947838e-06, + "loss": 0.6003, + "step": 27139 + }, + { + "epoch": 0.8318009071962731, + "grad_norm": 1.4690271582996657, + "learning_rate": 1.4474751517102192e-06, + "loss": 0.6205, + "step": 27140 + }, + { + "epoch": 0.8318315557190143, + "grad_norm": 0.6741051867058289, + "learning_rate": 1.4469607985958711e-06, + "loss": 0.5377, + "step": 27141 + }, + { + "epoch": 0.8318622042417555, + "grad_norm": 1.640156264547204, + "learning_rate": 1.4464465297568052e-06, + "loss": 0.7058, + "step": 27142 + }, + { + "epoch": 0.8318928527644968, + "grad_norm": 1.5364281338820043, + "learning_rate": 1.445932345198091e-06, + "loss": 0.5807, + "step": 27143 + }, + { + "epoch": 0.8319235012872379, + "grad_norm": 1.763450540429855, + "learning_rate": 1.4454182449247955e-06, + "loss": 0.5957, + "step": 27144 + }, + { + "epoch": 0.8319541498099792, + "grad_norm": 1.7374924483556167, + "learning_rate": 1.44490422894198e-06, + "loss": 0.7115, + "step": 27145 + }, + { + "epoch": 0.8319847983327203, + "grad_norm": 1.7114477976087015, + "learning_rate": 1.4443902972547131e-06, + "loss": 0.6706, + "step": 27146 + }, + { + "epoch": 0.8320154468554616, + "grad_norm": 1.8579290700497593, + "learning_rate": 1.4438764498680591e-06, + "loss": 0.611, + "step": 27147 + }, + { + "epoch": 0.8320460953782027, + "grad_norm": 1.8723426963333383, + "learning_rate": 1.4433626867870776e-06, + "loss": 0.6814, + "step": 27148 + }, + { + "epoch": 0.832076743900944, + "grad_norm": 1.512849165409728, + "learning_rate": 1.4428490080168334e-06, + "loss": 0.6471, + "step": 27149 + }, + { + "epoch": 0.8321073924236851, + "grad_norm": 0.6673422732908649, + "learning_rate": 1.442335413562389e-06, + "loss": 0.5408, + "step": 27150 + }, + { + "epoch": 0.8321380409464264, + "grad_norm": 0.6744975097603801, + "learning_rate": 1.4418219034288016e-06, + "loss": 0.5152, + "step": 27151 + }, + { + "epoch": 0.8321686894691676, + "grad_norm": 1.8395844483817456, + "learning_rate": 1.441308477621135e-06, + "loss": 0.6183, + "step": 27152 + }, + { + "epoch": 0.8321993379919088, + "grad_norm": 1.4921820219950142, + "learning_rate": 1.4407951361444428e-06, + "loss": 0.5478, + "step": 27153 + }, + { + "epoch": 0.83222998651465, + "grad_norm": 0.6814101277493714, + "learning_rate": 1.4402818790037865e-06, + "loss": 0.5149, + "step": 27154 + }, + { + "epoch": 0.8322606350373912, + "grad_norm": 1.4409466723657594, + "learning_rate": 1.4397687062042253e-06, + "loss": 0.5037, + "step": 27155 + }, + { + "epoch": 0.8322912835601324, + "grad_norm": 1.6446603389765673, + "learning_rate": 1.439255617750811e-06, + "loss": 0.6682, + "step": 27156 + }, + { + "epoch": 0.8323219320828736, + "grad_norm": 1.8135699798666367, + "learning_rate": 1.4387426136486015e-06, + "loss": 0.7599, + "step": 27157 + }, + { + "epoch": 0.8323525806056148, + "grad_norm": 1.6783186449374727, + "learning_rate": 1.438229693902653e-06, + "loss": 0.594, + "step": 27158 + }, + { + "epoch": 0.832383229128356, + "grad_norm": 1.6756550804272625, + "learning_rate": 1.4377168585180167e-06, + "loss": 0.5563, + "step": 27159 + }, + { + "epoch": 0.8324138776510972, + "grad_norm": 1.603712718268639, + "learning_rate": 1.4372041074997466e-06, + "loss": 0.589, + "step": 27160 + }, + { + "epoch": 0.8324445261738385, + "grad_norm": 1.3939764238365204, + "learning_rate": 1.4366914408528976e-06, + "loss": 0.6119, + "step": 27161 + }, + { + "epoch": 0.8324751746965796, + "grad_norm": 1.6245709266794972, + "learning_rate": 1.4361788585825165e-06, + "loss": 0.6204, + "step": 27162 + }, + { + "epoch": 0.8325058232193209, + "grad_norm": 1.808537323224315, + "learning_rate": 1.4356663606936584e-06, + "loss": 0.6776, + "step": 27163 + }, + { + "epoch": 0.832536471742062, + "grad_norm": 1.8188138652222028, + "learning_rate": 1.4351539471913688e-06, + "loss": 0.7398, + "step": 27164 + }, + { + "epoch": 0.8325671202648033, + "grad_norm": 1.3873227498079481, + "learning_rate": 1.4346416180806987e-06, + "loss": 0.6151, + "step": 27165 + }, + { + "epoch": 0.8325977687875444, + "grad_norm": 1.6937856865884253, + "learning_rate": 1.4341293733666982e-06, + "loss": 0.5696, + "step": 27166 + }, + { + "epoch": 0.8326284173102857, + "grad_norm": 1.4186969789725925, + "learning_rate": 1.4336172130544113e-06, + "loss": 0.714, + "step": 27167 + }, + { + "epoch": 0.8326590658330268, + "grad_norm": 1.4890687821669142, + "learning_rate": 1.4331051371488857e-06, + "loss": 0.5746, + "step": 27168 + }, + { + "epoch": 0.8326897143557681, + "grad_norm": 1.9820654841746213, + "learning_rate": 1.4325931456551688e-06, + "loss": 0.688, + "step": 27169 + }, + { + "epoch": 0.8327203628785093, + "grad_norm": 1.7735141164638253, + "learning_rate": 1.432081238578301e-06, + "loss": 0.6913, + "step": 27170 + }, + { + "epoch": 0.8327510114012504, + "grad_norm": 1.5246310849681113, + "learning_rate": 1.4315694159233317e-06, + "loss": 0.6312, + "step": 27171 + }, + { + "epoch": 0.8327816599239917, + "grad_norm": 1.5923852990535643, + "learning_rate": 1.4310576776953e-06, + "loss": 0.6043, + "step": 27172 + }, + { + "epoch": 0.8328123084467328, + "grad_norm": 1.745870167378445, + "learning_rate": 1.4305460238992486e-06, + "loss": 0.6678, + "step": 27173 + }, + { + "epoch": 0.8328429569694741, + "grad_norm": 1.6915427242139516, + "learning_rate": 1.4300344545402223e-06, + "loss": 0.572, + "step": 27174 + }, + { + "epoch": 0.8328736054922152, + "grad_norm": 1.673233115768395, + "learning_rate": 1.429522969623256e-06, + "loss": 0.5714, + "step": 27175 + }, + { + "epoch": 0.8329042540149565, + "grad_norm": 1.7349594535437547, + "learning_rate": 1.4290115691533934e-06, + "loss": 0.7017, + "step": 27176 + }, + { + "epoch": 0.8329349025376976, + "grad_norm": 1.7079852397882593, + "learning_rate": 1.4285002531356751e-06, + "loss": 0.7126, + "step": 27177 + }, + { + "epoch": 0.8329655510604389, + "grad_norm": 0.6503726676319641, + "learning_rate": 1.4279890215751345e-06, + "loss": 0.5034, + "step": 27178 + }, + { + "epoch": 0.83299619958318, + "grad_norm": 1.8526940550239184, + "learning_rate": 1.4274778744768125e-06, + "loss": 0.5901, + "step": 27179 + }, + { + "epoch": 0.8330268481059213, + "grad_norm": 1.706277958768434, + "learning_rate": 1.4269668118457457e-06, + "loss": 0.6353, + "step": 27180 + }, + { + "epoch": 0.8330574966286625, + "grad_norm": 1.8513047340463415, + "learning_rate": 1.426455833686966e-06, + "loss": 0.6256, + "step": 27181 + }, + { + "epoch": 0.8330881451514037, + "grad_norm": 1.6116506736320146, + "learning_rate": 1.4259449400055124e-06, + "loss": 0.6343, + "step": 27182 + }, + { + "epoch": 0.8331187936741449, + "grad_norm": 1.85048156793936, + "learning_rate": 1.4254341308064136e-06, + "loss": 0.4931, + "step": 27183 + }, + { + "epoch": 0.8331494421968861, + "grad_norm": 1.6024147380578528, + "learning_rate": 1.4249234060947105e-06, + "loss": 0.6031, + "step": 27184 + }, + { + "epoch": 0.8331800907196273, + "grad_norm": 0.6835723159529762, + "learning_rate": 1.4244127658754303e-06, + "loss": 0.526, + "step": 27185 + }, + { + "epoch": 0.8332107392423685, + "grad_norm": 1.8802627700317514, + "learning_rate": 1.4239022101536037e-06, + "loss": 0.6484, + "step": 27186 + }, + { + "epoch": 0.8332413877651097, + "grad_norm": 1.6277773228644061, + "learning_rate": 1.4233917389342633e-06, + "loss": 0.6278, + "step": 27187 + }, + { + "epoch": 0.833272036287851, + "grad_norm": 1.8759889328434012, + "learning_rate": 1.4228813522224394e-06, + "loss": 0.7235, + "step": 27188 + }, + { + "epoch": 0.8333026848105921, + "grad_norm": 1.6285732594911515, + "learning_rate": 1.422371050023159e-06, + "loss": 0.5107, + "step": 27189 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.6505858680004047, + "learning_rate": 1.4218608323414507e-06, + "loss": 0.5236, + "step": 27190 + }, + { + "epoch": 0.8333639818560745, + "grad_norm": 0.6639997777705882, + "learning_rate": 1.4213506991823455e-06, + "loss": 0.514, + "step": 27191 + }, + { + "epoch": 0.8333946303788158, + "grad_norm": 1.8303117133203683, + "learning_rate": 1.4208406505508644e-06, + "loss": 0.6386, + "step": 27192 + }, + { + "epoch": 0.8334252789015569, + "grad_norm": 1.646178402007925, + "learning_rate": 1.4203306864520373e-06, + "loss": 0.6415, + "step": 27193 + }, + { + "epoch": 0.8334559274242982, + "grad_norm": 1.6911342395667228, + "learning_rate": 1.4198208068908826e-06, + "loss": 0.5904, + "step": 27194 + }, + { + "epoch": 0.8334865759470393, + "grad_norm": 1.6330879645429148, + "learning_rate": 1.4193110118724329e-06, + "loss": 0.7216, + "step": 27195 + }, + { + "epoch": 0.8335172244697806, + "grad_norm": 1.716968918533389, + "learning_rate": 1.4188013014017077e-06, + "loss": 0.6312, + "step": 27196 + }, + { + "epoch": 0.8335478729925218, + "grad_norm": 1.627164022575241, + "learning_rate": 1.4182916754837272e-06, + "loss": 0.6801, + "step": 27197 + }, + { + "epoch": 0.833578521515263, + "grad_norm": 1.5183345258435115, + "learning_rate": 1.4177821341235143e-06, + "loss": 0.5906, + "step": 27198 + }, + { + "epoch": 0.8336091700380042, + "grad_norm": 1.7167486463463992, + "learning_rate": 1.4172726773260914e-06, + "loss": 0.641, + "step": 27199 + }, + { + "epoch": 0.8336398185607454, + "grad_norm": 1.5727710811604267, + "learning_rate": 1.4167633050964746e-06, + "loss": 0.5443, + "step": 27200 + }, + { + "epoch": 0.8336704670834866, + "grad_norm": 1.8202152810737628, + "learning_rate": 1.4162540174396855e-06, + "loss": 0.6914, + "step": 27201 + }, + { + "epoch": 0.8337011156062277, + "grad_norm": 1.5317684239497147, + "learning_rate": 1.4157448143607422e-06, + "loss": 0.5359, + "step": 27202 + }, + { + "epoch": 0.833731764128969, + "grad_norm": 1.7505542162541021, + "learning_rate": 1.415235695864664e-06, + "loss": 0.6009, + "step": 27203 + }, + { + "epoch": 0.8337624126517101, + "grad_norm": 1.8218929380502438, + "learning_rate": 1.4147266619564637e-06, + "loss": 0.6274, + "step": 27204 + }, + { + "epoch": 0.8337930611744514, + "grad_norm": 1.6753504346347787, + "learning_rate": 1.4142177126411548e-06, + "loss": 0.6063, + "step": 27205 + }, + { + "epoch": 0.8338237096971925, + "grad_norm": 0.656550171894375, + "learning_rate": 1.4137088479237605e-06, + "loss": 0.5094, + "step": 27206 + }, + { + "epoch": 0.8338543582199338, + "grad_norm": 1.762812387415467, + "learning_rate": 1.4132000678092883e-06, + "loss": 0.6622, + "step": 27207 + }, + { + "epoch": 0.833885006742675, + "grad_norm": 1.610453135629528, + "learning_rate": 1.4126913723027513e-06, + "loss": 0.704, + "step": 27208 + }, + { + "epoch": 0.8339156552654162, + "grad_norm": 1.672831045704461, + "learning_rate": 1.4121827614091631e-06, + "loss": 0.5687, + "step": 27209 + }, + { + "epoch": 0.8339463037881574, + "grad_norm": 1.4596050560659959, + "learning_rate": 1.4116742351335366e-06, + "loss": 0.5655, + "step": 27210 + }, + { + "epoch": 0.8339769523108986, + "grad_norm": 1.6659383764461533, + "learning_rate": 1.411165793480883e-06, + "loss": 0.6777, + "step": 27211 + }, + { + "epoch": 0.8340076008336398, + "grad_norm": 1.79096888708849, + "learning_rate": 1.4106574364562076e-06, + "loss": 0.6425, + "step": 27212 + }, + { + "epoch": 0.834038249356381, + "grad_norm": 1.6815745080030091, + "learning_rate": 1.4101491640645226e-06, + "loss": 0.6113, + "step": 27213 + }, + { + "epoch": 0.8340688978791222, + "grad_norm": 1.7676631647120482, + "learning_rate": 1.4096409763108376e-06, + "loss": 0.484, + "step": 27214 + }, + { + "epoch": 0.8340995464018635, + "grad_norm": 0.6545868253796304, + "learning_rate": 1.4091328732001574e-06, + "loss": 0.5283, + "step": 27215 + }, + { + "epoch": 0.8341301949246046, + "grad_norm": 1.775513124807778, + "learning_rate": 1.4086248547374882e-06, + "loss": 0.6891, + "step": 27216 + }, + { + "epoch": 0.8341608434473459, + "grad_norm": 0.6416460125509325, + "learning_rate": 1.4081169209278355e-06, + "loss": 0.4855, + "step": 27217 + }, + { + "epoch": 0.834191491970087, + "grad_norm": 0.6778898080547212, + "learning_rate": 1.4076090717762081e-06, + "loss": 0.512, + "step": 27218 + }, + { + "epoch": 0.8342221404928283, + "grad_norm": 2.031355655027828, + "learning_rate": 1.407101307287604e-06, + "loss": 0.7435, + "step": 27219 + }, + { + "epoch": 0.8342527890155694, + "grad_norm": 1.7237956076513274, + "learning_rate": 1.40659362746703e-06, + "loss": 0.5941, + "step": 27220 + }, + { + "epoch": 0.8342834375383107, + "grad_norm": 1.5417573822815545, + "learning_rate": 1.406086032319487e-06, + "loss": 0.6028, + "step": 27221 + }, + { + "epoch": 0.8343140860610518, + "grad_norm": 1.8891687595901288, + "learning_rate": 1.4055785218499807e-06, + "loss": 0.7738, + "step": 27222 + }, + { + "epoch": 0.8343447345837931, + "grad_norm": 1.6837969663242849, + "learning_rate": 1.4050710960635072e-06, + "loss": 0.6271, + "step": 27223 + }, + { + "epoch": 0.8343753831065343, + "grad_norm": 1.8342169632658416, + "learning_rate": 1.4045637549650626e-06, + "loss": 0.7141, + "step": 27224 + }, + { + "epoch": 0.8344060316292755, + "grad_norm": 1.701888723318656, + "learning_rate": 1.404056498559655e-06, + "loss": 0.6392, + "step": 27225 + }, + { + "epoch": 0.8344366801520167, + "grad_norm": 1.8119050534149173, + "learning_rate": 1.4035493268522782e-06, + "loss": 0.5915, + "step": 27226 + }, + { + "epoch": 0.8344673286747579, + "grad_norm": 1.5637831388532166, + "learning_rate": 1.4030422398479282e-06, + "loss": 0.5174, + "step": 27227 + }, + { + "epoch": 0.8344979771974991, + "grad_norm": 1.6263409992791655, + "learning_rate": 1.4025352375516011e-06, + "loss": 0.6161, + "step": 27228 + }, + { + "epoch": 0.8345286257202403, + "grad_norm": 1.6793015006605772, + "learning_rate": 1.4020283199682948e-06, + "loss": 0.7263, + "step": 27229 + }, + { + "epoch": 0.8345592742429815, + "grad_norm": 1.7284720766214186, + "learning_rate": 1.401521487103006e-06, + "loss": 0.6831, + "step": 27230 + }, + { + "epoch": 0.8345899227657227, + "grad_norm": 1.8245434355332653, + "learning_rate": 1.401014738960723e-06, + "loss": 0.6638, + "step": 27231 + }, + { + "epoch": 0.8346205712884639, + "grad_norm": 1.783456170022654, + "learning_rate": 1.400508075546443e-06, + "loss": 0.6921, + "step": 27232 + }, + { + "epoch": 0.834651219811205, + "grad_norm": 1.6519060766992741, + "learning_rate": 1.4000014968651576e-06, + "loss": 0.6056, + "step": 27233 + }, + { + "epoch": 0.8346818683339463, + "grad_norm": 1.7382344181607914, + "learning_rate": 1.399495002921859e-06, + "loss": 0.6321, + "step": 27234 + }, + { + "epoch": 0.8347125168566875, + "grad_norm": 1.7836625361269223, + "learning_rate": 1.398988593721533e-06, + "loss": 0.778, + "step": 27235 + }, + { + "epoch": 0.8347431653794287, + "grad_norm": 1.5961723696425971, + "learning_rate": 1.3984822692691769e-06, + "loss": 0.5457, + "step": 27236 + }, + { + "epoch": 0.8347738139021699, + "grad_norm": 1.7439138230826787, + "learning_rate": 1.397976029569773e-06, + "loss": 0.6401, + "step": 27237 + }, + { + "epoch": 0.8348044624249111, + "grad_norm": 0.6355702428550818, + "learning_rate": 1.3974698746283144e-06, + "loss": 0.5126, + "step": 27238 + }, + { + "epoch": 0.8348351109476523, + "grad_norm": 1.7127762866886698, + "learning_rate": 1.3969638044497846e-06, + "loss": 0.6346, + "step": 27239 + }, + { + "epoch": 0.8348657594703935, + "grad_norm": 1.9942849357436874, + "learning_rate": 1.396457819039171e-06, + "loss": 0.7521, + "step": 27240 + }, + { + "epoch": 0.8348964079931347, + "grad_norm": 1.6166370479320638, + "learning_rate": 1.3959519184014624e-06, + "loss": 0.6324, + "step": 27241 + }, + { + "epoch": 0.834927056515876, + "grad_norm": 1.6176580704125616, + "learning_rate": 1.3954461025416388e-06, + "loss": 0.6559, + "step": 27242 + }, + { + "epoch": 0.8349577050386171, + "grad_norm": 0.6822405958186402, + "learning_rate": 1.3949403714646859e-06, + "loss": 0.5278, + "step": 27243 + }, + { + "epoch": 0.8349883535613584, + "grad_norm": 1.7056267154505655, + "learning_rate": 1.3944347251755897e-06, + "loss": 0.7483, + "step": 27244 + }, + { + "epoch": 0.8350190020840995, + "grad_norm": 1.6068898936086162, + "learning_rate": 1.3939291636793307e-06, + "loss": 0.5971, + "step": 27245 + }, + { + "epoch": 0.8350496506068408, + "grad_norm": 0.6367173346356783, + "learning_rate": 1.3934236869808847e-06, + "loss": 0.4863, + "step": 27246 + }, + { + "epoch": 0.8350802991295819, + "grad_norm": 1.8370209257047936, + "learning_rate": 1.3929182950852416e-06, + "loss": 0.5848, + "step": 27247 + }, + { + "epoch": 0.8351109476523232, + "grad_norm": 1.6469337775854551, + "learning_rate": 1.3924129879973737e-06, + "loss": 0.6652, + "step": 27248 + }, + { + "epoch": 0.8351415961750643, + "grad_norm": 1.4652219594130096, + "learning_rate": 1.3919077657222657e-06, + "loss": 0.5526, + "step": 27249 + }, + { + "epoch": 0.8351722446978056, + "grad_norm": 1.446884350500985, + "learning_rate": 1.391402628264892e-06, + "loss": 0.546, + "step": 27250 + }, + { + "epoch": 0.8352028932205467, + "grad_norm": 1.5467083820003131, + "learning_rate": 1.390897575630229e-06, + "loss": 0.5917, + "step": 27251 + }, + { + "epoch": 0.835233541743288, + "grad_norm": 0.6883358179874199, + "learning_rate": 1.3903926078232576e-06, + "loss": 0.515, + "step": 27252 + }, + { + "epoch": 0.8352641902660292, + "grad_norm": 1.6278929564331912, + "learning_rate": 1.3898877248489495e-06, + "loss": 0.6153, + "step": 27253 + }, + { + "epoch": 0.8352948387887704, + "grad_norm": 1.9093303678620055, + "learning_rate": 1.3893829267122794e-06, + "loss": 0.6734, + "step": 27254 + }, + { + "epoch": 0.8353254873115116, + "grad_norm": 3.786525060490711, + "learning_rate": 1.3888782134182255e-06, + "loss": 0.5563, + "step": 27255 + }, + { + "epoch": 0.8353561358342528, + "grad_norm": 1.6654788243794927, + "learning_rate": 1.388373584971755e-06, + "loss": 0.5438, + "step": 27256 + }, + { + "epoch": 0.835386784356994, + "grad_norm": 1.6914312538011365, + "learning_rate": 1.387869041377844e-06, + "loss": 0.5137, + "step": 27257 + }, + { + "epoch": 0.8354174328797352, + "grad_norm": 0.6443175579853396, + "learning_rate": 1.3873645826414639e-06, + "loss": 0.4948, + "step": 27258 + }, + { + "epoch": 0.8354480814024764, + "grad_norm": 1.6873194343259035, + "learning_rate": 1.386860208767582e-06, + "loss": 0.5701, + "step": 27259 + }, + { + "epoch": 0.8354787299252177, + "grad_norm": 0.7130880835504634, + "learning_rate": 1.386355919761173e-06, + "loss": 0.523, + "step": 27260 + }, + { + "epoch": 0.8355093784479588, + "grad_norm": 1.7363661064214382, + "learning_rate": 1.385851715627201e-06, + "loss": 0.6485, + "step": 27261 + }, + { + "epoch": 0.8355400269707001, + "grad_norm": 0.665786528864853, + "learning_rate": 1.3853475963706353e-06, + "loss": 0.5582, + "step": 27262 + }, + { + "epoch": 0.8355706754934412, + "grad_norm": 1.682383170579885, + "learning_rate": 1.3848435619964462e-06, + "loss": 0.637, + "step": 27263 + }, + { + "epoch": 0.8356013240161824, + "grad_norm": 1.6613659461313741, + "learning_rate": 1.3843396125095966e-06, + "loss": 0.7116, + "step": 27264 + }, + { + "epoch": 0.8356319725389236, + "grad_norm": 1.7592066958730552, + "learning_rate": 1.3838357479150522e-06, + "loss": 0.5899, + "step": 27265 + }, + { + "epoch": 0.8356626210616648, + "grad_norm": 1.9589180597794025, + "learning_rate": 1.3833319682177816e-06, + "loss": 0.6537, + "step": 27266 + }, + { + "epoch": 0.835693269584406, + "grad_norm": 1.82451981759141, + "learning_rate": 1.3828282734227428e-06, + "loss": 0.6514, + "step": 27267 + }, + { + "epoch": 0.8357239181071472, + "grad_norm": 1.8089896623430242, + "learning_rate": 1.3823246635349041e-06, + "loss": 0.6535, + "step": 27268 + }, + { + "epoch": 0.8357545666298885, + "grad_norm": 0.6661324931892427, + "learning_rate": 1.381821138559224e-06, + "loss": 0.5561, + "step": 27269 + }, + { + "epoch": 0.8357852151526296, + "grad_norm": 1.757850574977744, + "learning_rate": 1.381317698500665e-06, + "loss": 0.5523, + "step": 27270 + }, + { + "epoch": 0.8358158636753709, + "grad_norm": 0.6705347951270866, + "learning_rate": 1.3808143433641897e-06, + "loss": 0.5058, + "step": 27271 + }, + { + "epoch": 0.835846512198112, + "grad_norm": 1.960414640128892, + "learning_rate": 1.3803110731547531e-06, + "loss": 0.6288, + "step": 27272 + }, + { + "epoch": 0.8358771607208533, + "grad_norm": 0.6408065391829136, + "learning_rate": 1.379807887877318e-06, + "loss": 0.5046, + "step": 27273 + }, + { + "epoch": 0.8359078092435944, + "grad_norm": 1.6532609613981324, + "learning_rate": 1.3793047875368437e-06, + "loss": 0.5967, + "step": 27274 + }, + { + "epoch": 0.8359384577663357, + "grad_norm": 1.7613389049536579, + "learning_rate": 1.378801772138283e-06, + "loss": 0.7569, + "step": 27275 + }, + { + "epoch": 0.8359691062890768, + "grad_norm": 1.9373586933453728, + "learning_rate": 1.3782988416865928e-06, + "loss": 0.7763, + "step": 27276 + }, + { + "epoch": 0.8359997548118181, + "grad_norm": 1.6385274837990451, + "learning_rate": 1.3777959961867338e-06, + "loss": 0.5627, + "step": 27277 + }, + { + "epoch": 0.8360304033345592, + "grad_norm": 1.6640749481643375, + "learning_rate": 1.377293235643654e-06, + "loss": 0.7114, + "step": 27278 + }, + { + "epoch": 0.8360610518573005, + "grad_norm": 1.7057823964315744, + "learning_rate": 1.3767905600623121e-06, + "loss": 0.717, + "step": 27279 + }, + { + "epoch": 0.8360917003800417, + "grad_norm": 0.672261799796328, + "learning_rate": 1.3762879694476583e-06, + "loss": 0.5178, + "step": 27280 + }, + { + "epoch": 0.8361223489027829, + "grad_norm": 1.6274383314574383, + "learning_rate": 1.3757854638046442e-06, + "loss": 0.6602, + "step": 27281 + }, + { + "epoch": 0.8361529974255241, + "grad_norm": 1.500410192997953, + "learning_rate": 1.3752830431382248e-06, + "loss": 0.6418, + "step": 27282 + }, + { + "epoch": 0.8361836459482653, + "grad_norm": 1.9609647431084554, + "learning_rate": 1.374780707453347e-06, + "loss": 0.6693, + "step": 27283 + }, + { + "epoch": 0.8362142944710065, + "grad_norm": 0.6715321613471484, + "learning_rate": 1.3742784567549616e-06, + "loss": 0.5072, + "step": 27284 + }, + { + "epoch": 0.8362449429937477, + "grad_norm": 1.6325572998024473, + "learning_rate": 1.373776291048019e-06, + "loss": 0.5642, + "step": 27285 + }, + { + "epoch": 0.8362755915164889, + "grad_norm": 1.9156022373910564, + "learning_rate": 1.3732742103374642e-06, + "loss": 0.6585, + "step": 27286 + }, + { + "epoch": 0.8363062400392302, + "grad_norm": 1.7805098968353068, + "learning_rate": 1.3727722146282452e-06, + "loss": 0.6362, + "step": 27287 + }, + { + "epoch": 0.8363368885619713, + "grad_norm": 1.7890337924691648, + "learning_rate": 1.3722703039253116e-06, + "loss": 0.6162, + "step": 27288 + }, + { + "epoch": 0.8363675370847126, + "grad_norm": 1.6976783631870405, + "learning_rate": 1.3717684782336038e-06, + "loss": 0.5926, + "step": 27289 + }, + { + "epoch": 0.8363981856074537, + "grad_norm": 1.4732919244495535, + "learning_rate": 1.3712667375580713e-06, + "loss": 0.6808, + "step": 27290 + }, + { + "epoch": 0.836428834130195, + "grad_norm": 1.6018771737971738, + "learning_rate": 1.3707650819036532e-06, + "loss": 0.6203, + "step": 27291 + }, + { + "epoch": 0.8364594826529361, + "grad_norm": 0.6667729284618145, + "learning_rate": 1.3702635112752939e-06, + "loss": 0.5257, + "step": 27292 + }, + { + "epoch": 0.8364901311756774, + "grad_norm": 1.8934202063390646, + "learning_rate": 1.3697620256779398e-06, + "loss": 0.5574, + "step": 27293 + }, + { + "epoch": 0.8365207796984185, + "grad_norm": 1.766884957847458, + "learning_rate": 1.3692606251165252e-06, + "loss": 0.7005, + "step": 27294 + }, + { + "epoch": 0.8365514282211597, + "grad_norm": 1.9569280188887124, + "learning_rate": 1.368759309595994e-06, + "loss": 0.7334, + "step": 27295 + }, + { + "epoch": 0.836582076743901, + "grad_norm": 1.9547551671240337, + "learning_rate": 1.3682580791212885e-06, + "loss": 0.6844, + "step": 27296 + }, + { + "epoch": 0.8366127252666421, + "grad_norm": 1.7201217904176476, + "learning_rate": 1.3677569336973417e-06, + "loss": 0.7024, + "step": 27297 + }, + { + "epoch": 0.8366433737893834, + "grad_norm": 0.6563613228441205, + "learning_rate": 1.3672558733290953e-06, + "loss": 0.5185, + "step": 27298 + }, + { + "epoch": 0.8366740223121245, + "grad_norm": 1.5345428256204505, + "learning_rate": 1.3667548980214874e-06, + "loss": 0.6212, + "step": 27299 + }, + { + "epoch": 0.8367046708348658, + "grad_norm": 1.6402536393232823, + "learning_rate": 1.3662540077794506e-06, + "loss": 0.6293, + "step": 27300 + }, + { + "epoch": 0.8367353193576069, + "grad_norm": 1.809761524734633, + "learning_rate": 1.3657532026079234e-06, + "loss": 0.739, + "step": 27301 + }, + { + "epoch": 0.8367659678803482, + "grad_norm": 1.6715203119950142, + "learning_rate": 1.3652524825118352e-06, + "loss": 0.6424, + "step": 27302 + }, + { + "epoch": 0.8367966164030893, + "grad_norm": 0.6944088200973072, + "learning_rate": 1.3647518474961285e-06, + "loss": 0.5526, + "step": 27303 + }, + { + "epoch": 0.8368272649258306, + "grad_norm": 1.5954438306709986, + "learning_rate": 1.3642512975657308e-06, + "loss": 0.5939, + "step": 27304 + }, + { + "epoch": 0.8368579134485717, + "grad_norm": 1.805202155218967, + "learning_rate": 1.3637508327255721e-06, + "loss": 0.6387, + "step": 27305 + }, + { + "epoch": 0.836888561971313, + "grad_norm": 1.6991920155818285, + "learning_rate": 1.3632504529805867e-06, + "loss": 0.5984, + "step": 27306 + }, + { + "epoch": 0.8369192104940542, + "grad_norm": 1.637466421652112, + "learning_rate": 1.3627501583357062e-06, + "loss": 0.6828, + "step": 27307 + }, + { + "epoch": 0.8369498590167954, + "grad_norm": 1.820223977588771, + "learning_rate": 1.3622499487958563e-06, + "loss": 0.601, + "step": 27308 + }, + { + "epoch": 0.8369805075395366, + "grad_norm": 1.5099157575105993, + "learning_rate": 1.3617498243659677e-06, + "loss": 0.6022, + "step": 27309 + }, + { + "epoch": 0.8370111560622778, + "grad_norm": 1.4828314783830505, + "learning_rate": 1.3612497850509688e-06, + "loss": 0.6286, + "step": 27310 + }, + { + "epoch": 0.837041804585019, + "grad_norm": 0.6853464871597152, + "learning_rate": 1.3607498308557875e-06, + "loss": 0.5138, + "step": 27311 + }, + { + "epoch": 0.8370724531077602, + "grad_norm": 0.6516787172686009, + "learning_rate": 1.3602499617853482e-06, + "loss": 0.482, + "step": 27312 + }, + { + "epoch": 0.8371031016305014, + "grad_norm": 1.4404669517648734, + "learning_rate": 1.3597501778445754e-06, + "loss": 0.5979, + "step": 27313 + }, + { + "epoch": 0.8371337501532427, + "grad_norm": 1.6413887902726663, + "learning_rate": 1.3592504790383942e-06, + "loss": 0.6566, + "step": 27314 + }, + { + "epoch": 0.8371643986759838, + "grad_norm": 0.6345774815947686, + "learning_rate": 1.3587508653717318e-06, + "loss": 0.5011, + "step": 27315 + }, + { + "epoch": 0.8371950471987251, + "grad_norm": 0.6935103791935813, + "learning_rate": 1.3582513368495042e-06, + "loss": 0.528, + "step": 27316 + }, + { + "epoch": 0.8372256957214662, + "grad_norm": 1.7442341659599794, + "learning_rate": 1.3577518934766388e-06, + "loss": 0.6562, + "step": 27317 + }, + { + "epoch": 0.8372563442442075, + "grad_norm": 1.7624310998447008, + "learning_rate": 1.3572525352580568e-06, + "loss": 0.6889, + "step": 27318 + }, + { + "epoch": 0.8372869927669486, + "grad_norm": 1.6993493471864163, + "learning_rate": 1.3567532621986755e-06, + "loss": 0.6831, + "step": 27319 + }, + { + "epoch": 0.8373176412896899, + "grad_norm": 1.7702361030848777, + "learning_rate": 1.3562540743034168e-06, + "loss": 0.7089, + "step": 27320 + }, + { + "epoch": 0.837348289812431, + "grad_norm": 1.7183375634568554, + "learning_rate": 1.3557549715771945e-06, + "loss": 0.6942, + "step": 27321 + }, + { + "epoch": 0.8373789383351723, + "grad_norm": 1.6958914420023325, + "learning_rate": 1.3552559540249354e-06, + "loss": 0.5985, + "step": 27322 + }, + { + "epoch": 0.8374095868579134, + "grad_norm": 1.872754724132142, + "learning_rate": 1.3547570216515504e-06, + "loss": 0.6323, + "step": 27323 + }, + { + "epoch": 0.8374402353806547, + "grad_norm": 0.6578076770292028, + "learning_rate": 1.3542581744619542e-06, + "loss": 0.5234, + "step": 27324 + }, + { + "epoch": 0.8374708839033959, + "grad_norm": 0.6889152860140182, + "learning_rate": 1.3537594124610642e-06, + "loss": 0.54, + "step": 27325 + }, + { + "epoch": 0.837501532426137, + "grad_norm": 1.5270263273885445, + "learning_rate": 1.3532607356537974e-06, + "loss": 0.7034, + "step": 27326 + }, + { + "epoch": 0.8375321809488783, + "grad_norm": 1.7816722652286758, + "learning_rate": 1.3527621440450623e-06, + "loss": 0.5518, + "step": 27327 + }, + { + "epoch": 0.8375628294716194, + "grad_norm": 0.6327429164575671, + "learning_rate": 1.3522636376397747e-06, + "loss": 0.4913, + "step": 27328 + }, + { + "epoch": 0.8375934779943607, + "grad_norm": 0.6689910594876954, + "learning_rate": 1.3517652164428463e-06, + "loss": 0.5359, + "step": 27329 + }, + { + "epoch": 0.8376241265171018, + "grad_norm": 1.7702491943905778, + "learning_rate": 1.35126688045919e-06, + "loss": 0.6604, + "step": 27330 + }, + { + "epoch": 0.8376547750398431, + "grad_norm": 1.9156416080740413, + "learning_rate": 1.350768629693714e-06, + "loss": 0.7423, + "step": 27331 + }, + { + "epoch": 0.8376854235625842, + "grad_norm": 0.6717514436401668, + "learning_rate": 1.350270464151323e-06, + "loss": 0.51, + "step": 27332 + }, + { + "epoch": 0.8377160720853255, + "grad_norm": 1.7993921686603647, + "learning_rate": 1.3497723838369347e-06, + "loss": 0.6683, + "step": 27333 + }, + { + "epoch": 0.8377467206080667, + "grad_norm": 1.568433080818535, + "learning_rate": 1.3492743887554526e-06, + "loss": 0.5713, + "step": 27334 + }, + { + "epoch": 0.8377773691308079, + "grad_norm": 1.5641446420249887, + "learning_rate": 1.3487764789117807e-06, + "loss": 0.6096, + "step": 27335 + }, + { + "epoch": 0.8378080176535491, + "grad_norm": 1.5046079430030517, + "learning_rate": 1.3482786543108284e-06, + "loss": 0.5407, + "step": 27336 + }, + { + "epoch": 0.8378386661762903, + "grad_norm": 0.6583435025206456, + "learning_rate": 1.3477809149574994e-06, + "loss": 0.5086, + "step": 27337 + }, + { + "epoch": 0.8378693146990315, + "grad_norm": 1.5998701291278663, + "learning_rate": 1.347283260856702e-06, + "loss": 0.6337, + "step": 27338 + }, + { + "epoch": 0.8378999632217727, + "grad_norm": 1.6670432266288198, + "learning_rate": 1.3467856920133337e-06, + "loss": 0.6677, + "step": 27339 + }, + { + "epoch": 0.8379306117445139, + "grad_norm": 1.7772838447354977, + "learning_rate": 1.3462882084322993e-06, + "loss": 0.6512, + "step": 27340 + }, + { + "epoch": 0.8379612602672551, + "grad_norm": 1.6246613341517828, + "learning_rate": 1.3457908101185046e-06, + "loss": 0.7301, + "step": 27341 + }, + { + "epoch": 0.8379919087899963, + "grad_norm": 0.6528908078681371, + "learning_rate": 1.3452934970768471e-06, + "loss": 0.4947, + "step": 27342 + }, + { + "epoch": 0.8380225573127376, + "grad_norm": 1.5717768972367525, + "learning_rate": 1.344796269312223e-06, + "loss": 0.6131, + "step": 27343 + }, + { + "epoch": 0.8380532058354787, + "grad_norm": 1.7590013803190416, + "learning_rate": 1.3442991268295392e-06, + "loss": 0.6458, + "step": 27344 + }, + { + "epoch": 0.83808385435822, + "grad_norm": 1.5606337754828365, + "learning_rate": 1.3438020696336918e-06, + "loss": 0.6407, + "step": 27345 + }, + { + "epoch": 0.8381145028809611, + "grad_norm": 1.7689230264709146, + "learning_rate": 1.3433050977295748e-06, + "loss": 0.6878, + "step": 27346 + }, + { + "epoch": 0.8381451514037024, + "grad_norm": 1.6910325632603593, + "learning_rate": 1.3428082111220874e-06, + "loss": 0.7339, + "step": 27347 + }, + { + "epoch": 0.8381757999264435, + "grad_norm": 1.8355816865271652, + "learning_rate": 1.342311409816126e-06, + "loss": 0.7148, + "step": 27348 + }, + { + "epoch": 0.8382064484491848, + "grad_norm": 1.6783653509132994, + "learning_rate": 1.3418146938165877e-06, + "loss": 0.5495, + "step": 27349 + }, + { + "epoch": 0.8382370969719259, + "grad_norm": 1.6621099667978094, + "learning_rate": 1.3413180631283619e-06, + "loss": 0.6002, + "step": 27350 + }, + { + "epoch": 0.8382677454946672, + "grad_norm": 1.746482189257291, + "learning_rate": 1.3408215177563445e-06, + "loss": 0.6415, + "step": 27351 + }, + { + "epoch": 0.8382983940174084, + "grad_norm": 1.6736850767213887, + "learning_rate": 1.3403250577054305e-06, + "loss": 0.6895, + "step": 27352 + }, + { + "epoch": 0.8383290425401496, + "grad_norm": 1.6344948784569757, + "learning_rate": 1.3398286829805096e-06, + "loss": 0.6207, + "step": 27353 + }, + { + "epoch": 0.8383596910628908, + "grad_norm": 1.5359524249220007, + "learning_rate": 1.3393323935864688e-06, + "loss": 0.6592, + "step": 27354 + }, + { + "epoch": 0.838390339585632, + "grad_norm": 1.8060936311625986, + "learning_rate": 1.3388361895282054e-06, + "loss": 0.6682, + "step": 27355 + }, + { + "epoch": 0.8384209881083732, + "grad_norm": 1.6813842684639206, + "learning_rate": 1.3383400708106032e-06, + "loss": 0.6513, + "step": 27356 + }, + { + "epoch": 0.8384516366311143, + "grad_norm": 1.7077682370893845, + "learning_rate": 1.3378440374385548e-06, + "loss": 0.6215, + "step": 27357 + }, + { + "epoch": 0.8384822851538556, + "grad_norm": 1.538499241677186, + "learning_rate": 1.3373480894169422e-06, + "loss": 0.5095, + "step": 27358 + }, + { + "epoch": 0.8385129336765967, + "grad_norm": 1.6516892964066867, + "learning_rate": 1.3368522267506567e-06, + "loss": 0.6321, + "step": 27359 + }, + { + "epoch": 0.838543582199338, + "grad_norm": 1.7641851182828214, + "learning_rate": 1.3363564494445846e-06, + "loss": 0.6506, + "step": 27360 + }, + { + "epoch": 0.8385742307220792, + "grad_norm": 1.7184648755534977, + "learning_rate": 1.3358607575036064e-06, + "loss": 0.6291, + "step": 27361 + }, + { + "epoch": 0.8386048792448204, + "grad_norm": 1.7921606369960705, + "learning_rate": 1.3353651509326093e-06, + "loss": 0.6275, + "step": 27362 + }, + { + "epoch": 0.8386355277675616, + "grad_norm": 0.6750295640582351, + "learning_rate": 1.3348696297364782e-06, + "loss": 0.5197, + "step": 27363 + }, + { + "epoch": 0.8386661762903028, + "grad_norm": 1.6703850485451193, + "learning_rate": 1.3343741939200916e-06, + "loss": 0.7635, + "step": 27364 + }, + { + "epoch": 0.838696824813044, + "grad_norm": 0.6646062457931216, + "learning_rate": 1.3338788434883353e-06, + "loss": 0.5222, + "step": 27365 + }, + { + "epoch": 0.8387274733357852, + "grad_norm": 1.9958698142483744, + "learning_rate": 1.3333835784460869e-06, + "loss": 0.63, + "step": 27366 + }, + { + "epoch": 0.8387581218585264, + "grad_norm": 1.7747735685734911, + "learning_rate": 1.3328883987982267e-06, + "loss": 0.8154, + "step": 27367 + }, + { + "epoch": 0.8387887703812676, + "grad_norm": 1.9998697422693956, + "learning_rate": 1.3323933045496374e-06, + "loss": 0.5724, + "step": 27368 + }, + { + "epoch": 0.8388194189040088, + "grad_norm": 1.8944286887751265, + "learning_rate": 1.3318982957051917e-06, + "loss": 0.6795, + "step": 27369 + }, + { + "epoch": 0.8388500674267501, + "grad_norm": 1.6183927700983463, + "learning_rate": 1.3314033722697705e-06, + "loss": 0.6851, + "step": 27370 + }, + { + "epoch": 0.8388807159494912, + "grad_norm": 1.5456389131657955, + "learning_rate": 1.3309085342482508e-06, + "loss": 0.5925, + "step": 27371 + }, + { + "epoch": 0.8389113644722325, + "grad_norm": 0.6580230536551128, + "learning_rate": 1.3304137816455087e-06, + "loss": 0.5127, + "step": 27372 + }, + { + "epoch": 0.8389420129949736, + "grad_norm": 1.7666689426326272, + "learning_rate": 1.3299191144664137e-06, + "loss": 0.6762, + "step": 27373 + }, + { + "epoch": 0.8389726615177149, + "grad_norm": 1.5136017397655726, + "learning_rate": 1.3294245327158472e-06, + "loss": 0.6092, + "step": 27374 + }, + { + "epoch": 0.839003310040456, + "grad_norm": 1.6913414599262027, + "learning_rate": 1.3289300363986779e-06, + "loss": 0.6058, + "step": 27375 + }, + { + "epoch": 0.8390339585631973, + "grad_norm": 1.6679571912699662, + "learning_rate": 1.3284356255197816e-06, + "loss": 0.7362, + "step": 27376 + }, + { + "epoch": 0.8390646070859384, + "grad_norm": 1.770612517714952, + "learning_rate": 1.3279413000840247e-06, + "loss": 0.6296, + "step": 27377 + }, + { + "epoch": 0.8390952556086797, + "grad_norm": 1.9678586816797348, + "learning_rate": 1.3274470600962818e-06, + "loss": 0.6264, + "step": 27378 + }, + { + "epoch": 0.8391259041314209, + "grad_norm": 1.7654133555431502, + "learning_rate": 1.326952905561424e-06, + "loss": 0.5459, + "step": 27379 + }, + { + "epoch": 0.8391565526541621, + "grad_norm": 0.6776577419470472, + "learning_rate": 1.3264588364843168e-06, + "loss": 0.5415, + "step": 27380 + }, + { + "epoch": 0.8391872011769033, + "grad_norm": 1.5891890948746974, + "learning_rate": 1.3259648528698288e-06, + "loss": 0.6259, + "step": 27381 + }, + { + "epoch": 0.8392178496996445, + "grad_norm": 1.9062996469974745, + "learning_rate": 1.3254709547228318e-06, + "loss": 0.6429, + "step": 27382 + }, + { + "epoch": 0.8392484982223857, + "grad_norm": 1.615049944282795, + "learning_rate": 1.3249771420481861e-06, + "loss": 0.6956, + "step": 27383 + }, + { + "epoch": 0.8392791467451269, + "grad_norm": 1.5718372805053578, + "learning_rate": 1.32448341485076e-06, + "loss": 0.6023, + "step": 27384 + }, + { + "epoch": 0.8393097952678681, + "grad_norm": 1.6403947448879375, + "learning_rate": 1.3239897731354213e-06, + "loss": 0.613, + "step": 27385 + }, + { + "epoch": 0.8393404437906093, + "grad_norm": 1.8811244876199515, + "learning_rate": 1.3234962169070287e-06, + "loss": 0.6203, + "step": 27386 + }, + { + "epoch": 0.8393710923133505, + "grad_norm": 1.7726830957455089, + "learning_rate": 1.3230027461704498e-06, + "loss": 0.6465, + "step": 27387 + }, + { + "epoch": 0.8394017408360916, + "grad_norm": 0.6482985587476995, + "learning_rate": 1.3225093609305429e-06, + "loss": 0.5054, + "step": 27388 + }, + { + "epoch": 0.8394323893588329, + "grad_norm": 0.6583871661335251, + "learning_rate": 1.3220160611921718e-06, + "loss": 0.529, + "step": 27389 + }, + { + "epoch": 0.8394630378815741, + "grad_norm": 2.075338630880378, + "learning_rate": 1.3215228469601993e-06, + "loss": 0.6323, + "step": 27390 + }, + { + "epoch": 0.8394936864043153, + "grad_norm": 1.5663031230308693, + "learning_rate": 1.3210297182394792e-06, + "loss": 0.6125, + "step": 27391 + }, + { + "epoch": 0.8395243349270565, + "grad_norm": 0.6691504483638717, + "learning_rate": 1.3205366750348747e-06, + "loss": 0.5462, + "step": 27392 + }, + { + "epoch": 0.8395549834497977, + "grad_norm": 1.7297199091715705, + "learning_rate": 1.3200437173512459e-06, + "loss": 0.6572, + "step": 27393 + }, + { + "epoch": 0.8395856319725389, + "grad_norm": 1.7286569486620933, + "learning_rate": 1.3195508451934447e-06, + "loss": 0.8217, + "step": 27394 + }, + { + "epoch": 0.8396162804952801, + "grad_norm": 0.6671632726360014, + "learning_rate": 1.3190580585663293e-06, + "loss": 0.5241, + "step": 27395 + }, + { + "epoch": 0.8396469290180213, + "grad_norm": 1.6765791990420973, + "learning_rate": 1.3185653574747581e-06, + "loss": 0.6394, + "step": 27396 + }, + { + "epoch": 0.8396775775407626, + "grad_norm": 1.927271593148135, + "learning_rate": 1.3180727419235827e-06, + "loss": 0.5934, + "step": 27397 + }, + { + "epoch": 0.8397082260635037, + "grad_norm": 1.606607133568383, + "learning_rate": 1.3175802119176596e-06, + "loss": 0.5865, + "step": 27398 + }, + { + "epoch": 0.839738874586245, + "grad_norm": 1.5438076791586355, + "learning_rate": 1.3170877674618376e-06, + "loss": 0.632, + "step": 27399 + }, + { + "epoch": 0.8397695231089861, + "grad_norm": 0.6631205311636733, + "learning_rate": 1.3165954085609712e-06, + "loss": 0.5253, + "step": 27400 + }, + { + "epoch": 0.8398001716317274, + "grad_norm": 1.8155207805326774, + "learning_rate": 1.316103135219915e-06, + "loss": 0.6201, + "step": 27401 + }, + { + "epoch": 0.8398308201544685, + "grad_norm": 0.6830999308441364, + "learning_rate": 1.315610947443513e-06, + "loss": 0.4965, + "step": 27402 + }, + { + "epoch": 0.8398614686772098, + "grad_norm": 1.6226566752107643, + "learning_rate": 1.3151188452366193e-06, + "loss": 0.5798, + "step": 27403 + }, + { + "epoch": 0.8398921171999509, + "grad_norm": 1.5873770698220706, + "learning_rate": 1.3146268286040842e-06, + "loss": 0.6275, + "step": 27404 + }, + { + "epoch": 0.8399227657226922, + "grad_norm": 1.750746444558961, + "learning_rate": 1.3141348975507507e-06, + "loss": 0.6386, + "step": 27405 + }, + { + "epoch": 0.8399534142454333, + "grad_norm": 0.6818804310927198, + "learning_rate": 1.313643052081468e-06, + "loss": 0.5178, + "step": 27406 + }, + { + "epoch": 0.8399840627681746, + "grad_norm": 1.827959299005708, + "learning_rate": 1.3131512922010857e-06, + "loss": 0.6783, + "step": 27407 + }, + { + "epoch": 0.8400147112909158, + "grad_norm": 1.8511670147477677, + "learning_rate": 1.3126596179144435e-06, + "loss": 0.6781, + "step": 27408 + }, + { + "epoch": 0.840045359813657, + "grad_norm": 1.6517657930344054, + "learning_rate": 1.3121680292263917e-06, + "loss": 0.6114, + "step": 27409 + }, + { + "epoch": 0.8400760083363982, + "grad_norm": 1.5845949410430895, + "learning_rate": 1.3116765261417686e-06, + "loss": 0.5527, + "step": 27410 + }, + { + "epoch": 0.8401066568591394, + "grad_norm": 1.3595906829266118, + "learning_rate": 1.3111851086654194e-06, + "loss": 0.5572, + "step": 27411 + }, + { + "epoch": 0.8401373053818806, + "grad_norm": 1.6239993809174285, + "learning_rate": 1.3106937768021898e-06, + "loss": 0.5972, + "step": 27412 + }, + { + "epoch": 0.8401679539046218, + "grad_norm": 1.705248184983839, + "learning_rate": 1.3102025305569145e-06, + "loss": 0.53, + "step": 27413 + }, + { + "epoch": 0.840198602427363, + "grad_norm": 1.849492662159135, + "learning_rate": 1.3097113699344366e-06, + "loss": 0.6449, + "step": 27414 + }, + { + "epoch": 0.8402292509501043, + "grad_norm": 1.6304696532799199, + "learning_rate": 1.3092202949395993e-06, + "loss": 0.6136, + "step": 27415 + }, + { + "epoch": 0.8402598994728454, + "grad_norm": 0.6712501071252938, + "learning_rate": 1.3087293055772353e-06, + "loss": 0.5218, + "step": 27416 + }, + { + "epoch": 0.8402905479955867, + "grad_norm": 1.7858214717570773, + "learning_rate": 1.3082384018521877e-06, + "loss": 0.703, + "step": 27417 + }, + { + "epoch": 0.8403211965183278, + "grad_norm": 1.9756263564232917, + "learning_rate": 1.3077475837692888e-06, + "loss": 0.5982, + "step": 27418 + }, + { + "epoch": 0.840351845041069, + "grad_norm": 1.5878872091992298, + "learning_rate": 1.3072568513333761e-06, + "loss": 0.7235, + "step": 27419 + }, + { + "epoch": 0.8403824935638102, + "grad_norm": 1.5785496951110736, + "learning_rate": 1.306766204549289e-06, + "loss": 0.6099, + "step": 27420 + }, + { + "epoch": 0.8404131420865514, + "grad_norm": 1.9233361958925816, + "learning_rate": 1.306275643421856e-06, + "loss": 0.598, + "step": 27421 + }, + { + "epoch": 0.8404437906092926, + "grad_norm": 1.882333820761442, + "learning_rate": 1.305785167955914e-06, + "loss": 0.8152, + "step": 27422 + }, + { + "epoch": 0.8404744391320338, + "grad_norm": 1.619646100038247, + "learning_rate": 1.3052947781562974e-06, + "loss": 0.6431, + "step": 27423 + }, + { + "epoch": 0.840505087654775, + "grad_norm": 1.7960521387570385, + "learning_rate": 1.3048044740278332e-06, + "loss": 0.7668, + "step": 27424 + }, + { + "epoch": 0.8405357361775162, + "grad_norm": 1.7934274327356372, + "learning_rate": 1.3043142555753563e-06, + "loss": 0.6704, + "step": 27425 + }, + { + "epoch": 0.8405663847002575, + "grad_norm": 1.7677048050573836, + "learning_rate": 1.3038241228036974e-06, + "loss": 0.6159, + "step": 27426 + }, + { + "epoch": 0.8405970332229986, + "grad_norm": 1.7244112566336527, + "learning_rate": 1.3033340757176827e-06, + "loss": 0.5818, + "step": 27427 + }, + { + "epoch": 0.8406276817457399, + "grad_norm": 1.5777476083458417, + "learning_rate": 1.3028441143221438e-06, + "loss": 0.6607, + "step": 27428 + }, + { + "epoch": 0.840658330268481, + "grad_norm": 1.7603429266701984, + "learning_rate": 1.3023542386219035e-06, + "loss": 0.6564, + "step": 27429 + }, + { + "epoch": 0.8406889787912223, + "grad_norm": 0.7122150991809072, + "learning_rate": 1.3018644486217956e-06, + "loss": 0.531, + "step": 27430 + }, + { + "epoch": 0.8407196273139634, + "grad_norm": 1.7238873988669248, + "learning_rate": 1.3013747443266445e-06, + "loss": 0.6604, + "step": 27431 + }, + { + "epoch": 0.8407502758367047, + "grad_norm": 1.9174407988720863, + "learning_rate": 1.3008851257412703e-06, + "loss": 0.6714, + "step": 27432 + }, + { + "epoch": 0.8407809243594458, + "grad_norm": 1.944438254648073, + "learning_rate": 1.3003955928705004e-06, + "loss": 0.6792, + "step": 27433 + }, + { + "epoch": 0.8408115728821871, + "grad_norm": 1.5283960857938874, + "learning_rate": 1.2999061457191619e-06, + "loss": 0.5961, + "step": 27434 + }, + { + "epoch": 0.8408422214049283, + "grad_norm": 1.7828445532467139, + "learning_rate": 1.2994167842920713e-06, + "loss": 0.5795, + "step": 27435 + }, + { + "epoch": 0.8408728699276695, + "grad_norm": 1.5832879303421412, + "learning_rate": 1.2989275085940534e-06, + "loss": 0.603, + "step": 27436 + }, + { + "epoch": 0.8409035184504107, + "grad_norm": 1.7472916036480652, + "learning_rate": 1.2984383186299287e-06, + "loss": 0.6767, + "step": 27437 + }, + { + "epoch": 0.8409341669731519, + "grad_norm": 1.879457687043342, + "learning_rate": 1.2979492144045202e-06, + "loss": 0.669, + "step": 27438 + }, + { + "epoch": 0.8409648154958931, + "grad_norm": 1.6482145669694561, + "learning_rate": 1.2974601959226452e-06, + "loss": 0.6467, + "step": 27439 + }, + { + "epoch": 0.8409954640186343, + "grad_norm": 1.8202272362235605, + "learning_rate": 1.2969712631891163e-06, + "loss": 0.6642, + "step": 27440 + }, + { + "epoch": 0.8410261125413755, + "grad_norm": 1.961172827524619, + "learning_rate": 1.2964824162087607e-06, + "loss": 0.6096, + "step": 27441 + }, + { + "epoch": 0.8410567610641168, + "grad_norm": 1.5455645260712523, + "learning_rate": 1.2959936549863904e-06, + "loss": 0.5813, + "step": 27442 + }, + { + "epoch": 0.8410874095868579, + "grad_norm": 1.8738289491109543, + "learning_rate": 1.2955049795268205e-06, + "loss": 0.5931, + "step": 27443 + }, + { + "epoch": 0.8411180581095992, + "grad_norm": 1.7064998373514548, + "learning_rate": 1.2950163898348667e-06, + "loss": 0.6317, + "step": 27444 + }, + { + "epoch": 0.8411487066323403, + "grad_norm": 1.4942419297396519, + "learning_rate": 1.2945278859153465e-06, + "loss": 0.6057, + "step": 27445 + }, + { + "epoch": 0.8411793551550816, + "grad_norm": 1.861052169947374, + "learning_rate": 1.2940394677730672e-06, + "loss": 0.7743, + "step": 27446 + }, + { + "epoch": 0.8412100036778227, + "grad_norm": 1.4719428493302749, + "learning_rate": 1.293551135412845e-06, + "loss": 0.5884, + "step": 27447 + }, + { + "epoch": 0.841240652200564, + "grad_norm": 1.539151901033202, + "learning_rate": 1.2930628888394914e-06, + "loss": 0.6228, + "step": 27448 + }, + { + "epoch": 0.8412713007233051, + "grad_norm": 1.8325251586432976, + "learning_rate": 1.2925747280578182e-06, + "loss": 0.688, + "step": 27449 + }, + { + "epoch": 0.8413019492460463, + "grad_norm": 1.635043425319831, + "learning_rate": 1.292086653072635e-06, + "loss": 0.6789, + "step": 27450 + }, + { + "epoch": 0.8413325977687875, + "grad_norm": 1.7248431637709012, + "learning_rate": 1.2915986638887446e-06, + "loss": 0.6073, + "step": 27451 + }, + { + "epoch": 0.8413632462915287, + "grad_norm": 1.675320928661529, + "learning_rate": 1.2911107605109664e-06, + "loss": 0.6513, + "step": 27452 + }, + { + "epoch": 0.84139389481427, + "grad_norm": 1.965663414506112, + "learning_rate": 1.2906229429441008e-06, + "loss": 0.6413, + "step": 27453 + }, + { + "epoch": 0.8414245433370111, + "grad_norm": 2.0628119109602436, + "learning_rate": 1.2901352111929544e-06, + "loss": 0.6853, + "step": 27454 + }, + { + "epoch": 0.8414551918597524, + "grad_norm": 1.7993467334153381, + "learning_rate": 1.2896475652623341e-06, + "loss": 0.5105, + "step": 27455 + }, + { + "epoch": 0.8414858403824935, + "grad_norm": 0.7065057662907418, + "learning_rate": 1.2891600051570452e-06, + "loss": 0.5386, + "step": 27456 + }, + { + "epoch": 0.8415164889052348, + "grad_norm": 0.6735404874008849, + "learning_rate": 1.2886725308818938e-06, + "loss": 0.519, + "step": 27457 + }, + { + "epoch": 0.8415471374279759, + "grad_norm": 0.6794186259012028, + "learning_rate": 1.288185142441678e-06, + "loss": 0.5365, + "step": 27458 + }, + { + "epoch": 0.8415777859507172, + "grad_norm": 1.8438812221356309, + "learning_rate": 1.2876978398412033e-06, + "loss": 0.5809, + "step": 27459 + }, + { + "epoch": 0.8416084344734583, + "grad_norm": 1.705293671133277, + "learning_rate": 1.2872106230852732e-06, + "loss": 0.663, + "step": 27460 + }, + { + "epoch": 0.8416390829961996, + "grad_norm": 1.6853171242307574, + "learning_rate": 1.2867234921786865e-06, + "loss": 0.6076, + "step": 27461 + }, + { + "epoch": 0.8416697315189408, + "grad_norm": 1.562855016027012, + "learning_rate": 1.2862364471262401e-06, + "loss": 0.542, + "step": 27462 + }, + { + "epoch": 0.841700380041682, + "grad_norm": 1.795369583968869, + "learning_rate": 1.2857494879327348e-06, + "loss": 0.6409, + "step": 27463 + }, + { + "epoch": 0.8417310285644232, + "grad_norm": 1.6945922169357033, + "learning_rate": 1.28526261460297e-06, + "loss": 0.6556, + "step": 27464 + }, + { + "epoch": 0.8417616770871644, + "grad_norm": 1.8251050412462175, + "learning_rate": 1.2847758271417455e-06, + "loss": 0.7667, + "step": 27465 + }, + { + "epoch": 0.8417923256099056, + "grad_norm": 1.7872712604189382, + "learning_rate": 1.2842891255538515e-06, + "loss": 0.6906, + "step": 27466 + }, + { + "epoch": 0.8418229741326468, + "grad_norm": 1.7497332972073, + "learning_rate": 1.2838025098440865e-06, + "loss": 0.7164, + "step": 27467 + }, + { + "epoch": 0.841853622655388, + "grad_norm": 1.6443545693398327, + "learning_rate": 1.283315980017248e-06, + "loss": 0.6119, + "step": 27468 + }, + { + "epoch": 0.8418842711781293, + "grad_norm": 1.686419197924965, + "learning_rate": 1.2828295360781274e-06, + "loss": 0.5837, + "step": 27469 + }, + { + "epoch": 0.8419149197008704, + "grad_norm": 1.9363413522110087, + "learning_rate": 1.2823431780315144e-06, + "loss": 0.6632, + "step": 27470 + }, + { + "epoch": 0.8419455682236117, + "grad_norm": 0.6729944051513044, + "learning_rate": 1.2818569058822073e-06, + "loss": 0.5078, + "step": 27471 + }, + { + "epoch": 0.8419762167463528, + "grad_norm": 1.8616275721897348, + "learning_rate": 1.2813707196349955e-06, + "loss": 0.7565, + "step": 27472 + }, + { + "epoch": 0.8420068652690941, + "grad_norm": 1.7309029011912904, + "learning_rate": 1.2808846192946668e-06, + "loss": 0.6736, + "step": 27473 + }, + { + "epoch": 0.8420375137918352, + "grad_norm": 1.6727206708398024, + "learning_rate": 1.2803986048660123e-06, + "loss": 0.5683, + "step": 27474 + }, + { + "epoch": 0.8420681623145765, + "grad_norm": 1.7983775321094464, + "learning_rate": 1.2799126763538216e-06, + "loss": 0.6381, + "step": 27475 + }, + { + "epoch": 0.8420988108373176, + "grad_norm": 1.6017472852531454, + "learning_rate": 1.2794268337628845e-06, + "loss": 0.6468, + "step": 27476 + }, + { + "epoch": 0.8421294593600589, + "grad_norm": 0.6603037688389208, + "learning_rate": 1.278941077097985e-06, + "loss": 0.5129, + "step": 27477 + }, + { + "epoch": 0.8421601078828, + "grad_norm": 1.815829165773731, + "learning_rate": 1.2784554063639088e-06, + "loss": 0.7588, + "step": 27478 + }, + { + "epoch": 0.8421907564055413, + "grad_norm": 1.529475244550045, + "learning_rate": 1.2779698215654457e-06, + "loss": 0.6189, + "step": 27479 + }, + { + "epoch": 0.8422214049282825, + "grad_norm": 1.7994512464274486, + "learning_rate": 1.2774843227073775e-06, + "loss": 0.6332, + "step": 27480 + }, + { + "epoch": 0.8422520534510236, + "grad_norm": 1.5770107192632759, + "learning_rate": 1.2769989097944847e-06, + "loss": 0.5618, + "step": 27481 + }, + { + "epoch": 0.8422827019737649, + "grad_norm": 0.6916154880080272, + "learning_rate": 1.2765135828315567e-06, + "loss": 0.5045, + "step": 27482 + }, + { + "epoch": 0.842313350496506, + "grad_norm": 0.6699004217707156, + "learning_rate": 1.27602834182337e-06, + "loss": 0.5107, + "step": 27483 + }, + { + "epoch": 0.8423439990192473, + "grad_norm": 2.0572348492305594, + "learning_rate": 1.2755431867747114e-06, + "loss": 0.5976, + "step": 27484 + }, + { + "epoch": 0.8423746475419884, + "grad_norm": 1.8140679407934779, + "learning_rate": 1.2750581176903554e-06, + "loss": 0.6518, + "step": 27485 + }, + { + "epoch": 0.8424052960647297, + "grad_norm": 1.8367663954012983, + "learning_rate": 1.2745731345750833e-06, + "loss": 0.5917, + "step": 27486 + }, + { + "epoch": 0.8424359445874708, + "grad_norm": 0.7014900389544899, + "learning_rate": 1.2740882374336783e-06, + "loss": 0.5352, + "step": 27487 + }, + { + "epoch": 0.8424665931102121, + "grad_norm": 1.5834564770243829, + "learning_rate": 1.2736034262709117e-06, + "loss": 0.6377, + "step": 27488 + }, + { + "epoch": 0.8424972416329533, + "grad_norm": 1.8958560105961024, + "learning_rate": 1.2731187010915625e-06, + "loss": 0.699, + "step": 27489 + }, + { + "epoch": 0.8425278901556945, + "grad_norm": 1.8230455842381454, + "learning_rate": 1.2726340619004107e-06, + "loss": 0.6029, + "step": 27490 + }, + { + "epoch": 0.8425585386784357, + "grad_norm": 1.7460991215763266, + "learning_rate": 1.2721495087022262e-06, + "loss": 0.7018, + "step": 27491 + }, + { + "epoch": 0.8425891872011769, + "grad_norm": 1.70851032986693, + "learning_rate": 1.271665041501786e-06, + "loss": 0.5983, + "step": 27492 + }, + { + "epoch": 0.8426198357239181, + "grad_norm": 1.6527791369285794, + "learning_rate": 1.2711806603038645e-06, + "loss": 0.6243, + "step": 27493 + }, + { + "epoch": 0.8426504842466593, + "grad_norm": 1.6873468081695229, + "learning_rate": 1.270696365113232e-06, + "loss": 0.658, + "step": 27494 + }, + { + "epoch": 0.8426811327694005, + "grad_norm": 1.5992484476517896, + "learning_rate": 1.2702121559346637e-06, + "loss": 0.6154, + "step": 27495 + }, + { + "epoch": 0.8427117812921417, + "grad_norm": 1.7290141767521072, + "learning_rate": 1.2697280327729266e-06, + "loss": 0.6594, + "step": 27496 + }, + { + "epoch": 0.8427424298148829, + "grad_norm": 0.6632107043343376, + "learning_rate": 1.2692439956327928e-06, + "loss": 0.5223, + "step": 27497 + }, + { + "epoch": 0.8427730783376242, + "grad_norm": 1.7948847745766678, + "learning_rate": 1.268760044519034e-06, + "loss": 0.6261, + "step": 27498 + }, + { + "epoch": 0.8428037268603653, + "grad_norm": 1.899436715410688, + "learning_rate": 1.268276179436414e-06, + "loss": 0.7241, + "step": 27499 + }, + { + "epoch": 0.8428343753831066, + "grad_norm": 1.9106274169918054, + "learning_rate": 1.2677924003897024e-06, + "loss": 0.5781, + "step": 27500 + }, + { + "epoch": 0.8428650239058477, + "grad_norm": 1.5062594465616281, + "learning_rate": 1.2673087073836698e-06, + "loss": 0.6206, + "step": 27501 + }, + { + "epoch": 0.842895672428589, + "grad_norm": 1.5141601673496778, + "learning_rate": 1.266825100423077e-06, + "loss": 0.6426, + "step": 27502 + }, + { + "epoch": 0.8429263209513301, + "grad_norm": 1.7183983252971624, + "learning_rate": 1.2663415795126898e-06, + "loss": 0.5988, + "step": 27503 + }, + { + "epoch": 0.8429569694740714, + "grad_norm": 0.6739552010148938, + "learning_rate": 1.265858144657276e-06, + "loss": 0.5118, + "step": 27504 + }, + { + "epoch": 0.8429876179968125, + "grad_norm": 1.6381561168284386, + "learning_rate": 1.2653747958615946e-06, + "loss": 0.6024, + "step": 27505 + }, + { + "epoch": 0.8430182665195538, + "grad_norm": 1.7476567230304074, + "learning_rate": 1.2648915331304124e-06, + "loss": 0.4904, + "step": 27506 + }, + { + "epoch": 0.843048915042295, + "grad_norm": 1.829028104563017, + "learning_rate": 1.2644083564684873e-06, + "loss": 0.7056, + "step": 27507 + }, + { + "epoch": 0.8430795635650362, + "grad_norm": 0.6641105069663767, + "learning_rate": 1.2639252658805811e-06, + "loss": 0.5224, + "step": 27508 + }, + { + "epoch": 0.8431102120877774, + "grad_norm": 1.6496890609859336, + "learning_rate": 1.263442261371457e-06, + "loss": 0.6399, + "step": 27509 + }, + { + "epoch": 0.8431408606105186, + "grad_norm": 1.907256767422812, + "learning_rate": 1.2629593429458687e-06, + "loss": 0.726, + "step": 27510 + }, + { + "epoch": 0.8431715091332598, + "grad_norm": 1.7296125571298424, + "learning_rate": 1.2624765106085778e-06, + "loss": 0.661, + "step": 27511 + }, + { + "epoch": 0.8432021576560009, + "grad_norm": 1.707906258526059, + "learning_rate": 1.2619937643643442e-06, + "loss": 0.6007, + "step": 27512 + }, + { + "epoch": 0.8432328061787422, + "grad_norm": 0.6913551039439743, + "learning_rate": 1.2615111042179195e-06, + "loss": 0.5419, + "step": 27513 + }, + { + "epoch": 0.8432634547014833, + "grad_norm": 1.7797708290598426, + "learning_rate": 1.2610285301740632e-06, + "loss": 0.7083, + "step": 27514 + }, + { + "epoch": 0.8432941032242246, + "grad_norm": 1.6937333765273646, + "learning_rate": 1.260546042237527e-06, + "loss": 0.5765, + "step": 27515 + }, + { + "epoch": 0.8433247517469658, + "grad_norm": 1.5461313980598648, + "learning_rate": 1.2600636404130673e-06, + "loss": 0.4854, + "step": 27516 + }, + { + "epoch": 0.843355400269707, + "grad_norm": 1.630518360167449, + "learning_rate": 1.2595813247054378e-06, + "loss": 0.7016, + "step": 27517 + }, + { + "epoch": 0.8433860487924482, + "grad_norm": 1.7452906332202078, + "learning_rate": 1.2590990951193882e-06, + "loss": 0.7135, + "step": 27518 + }, + { + "epoch": 0.8434166973151894, + "grad_norm": 1.8634330149532452, + "learning_rate": 1.2586169516596713e-06, + "loss": 0.6296, + "step": 27519 + }, + { + "epoch": 0.8434473458379306, + "grad_norm": 1.9104484899201815, + "learning_rate": 1.2581348943310412e-06, + "loss": 0.6161, + "step": 27520 + }, + { + "epoch": 0.8434779943606718, + "grad_norm": 1.7561962024745477, + "learning_rate": 1.2576529231382418e-06, + "loss": 0.6016, + "step": 27521 + }, + { + "epoch": 0.843508642883413, + "grad_norm": 1.6394609416003432, + "learning_rate": 1.2571710380860257e-06, + "loss": 0.5272, + "step": 27522 + }, + { + "epoch": 0.8435392914061542, + "grad_norm": 1.6539458102976214, + "learning_rate": 1.256689239179142e-06, + "loss": 0.5774, + "step": 27523 + }, + { + "epoch": 0.8435699399288954, + "grad_norm": 1.7543268546892947, + "learning_rate": 1.256207526422334e-06, + "loss": 0.5965, + "step": 27524 + }, + { + "epoch": 0.8436005884516367, + "grad_norm": 1.5790796184764473, + "learning_rate": 1.2557258998203526e-06, + "loss": 0.6197, + "step": 27525 + }, + { + "epoch": 0.8436312369743778, + "grad_norm": 1.3660799128337138, + "learning_rate": 1.2552443593779384e-06, + "loss": 0.5403, + "step": 27526 + }, + { + "epoch": 0.8436618854971191, + "grad_norm": 1.6357283154244961, + "learning_rate": 1.25476290509984e-06, + "loss": 0.7155, + "step": 27527 + }, + { + "epoch": 0.8436925340198602, + "grad_norm": 1.701633112473793, + "learning_rate": 1.2542815369908023e-06, + "loss": 0.6373, + "step": 27528 + }, + { + "epoch": 0.8437231825426015, + "grad_norm": 0.6574167249850016, + "learning_rate": 1.2538002550555638e-06, + "loss": 0.521, + "step": 27529 + }, + { + "epoch": 0.8437538310653426, + "grad_norm": 1.7216365309286503, + "learning_rate": 1.2533190592988698e-06, + "loss": 0.6233, + "step": 27530 + }, + { + "epoch": 0.8437844795880839, + "grad_norm": 0.6912342812737767, + "learning_rate": 1.2528379497254628e-06, + "loss": 0.5407, + "step": 27531 + }, + { + "epoch": 0.843815128110825, + "grad_norm": 1.5637605630036933, + "learning_rate": 1.2523569263400792e-06, + "loss": 0.6371, + "step": 27532 + }, + { + "epoch": 0.8438457766335663, + "grad_norm": 1.6021923038693655, + "learning_rate": 1.251875989147462e-06, + "loss": 0.506, + "step": 27533 + }, + { + "epoch": 0.8438764251563075, + "grad_norm": 1.8303378247109943, + "learning_rate": 1.2513951381523492e-06, + "loss": 0.6391, + "step": 27534 + }, + { + "epoch": 0.8439070736790487, + "grad_norm": 0.6689779700415707, + "learning_rate": 1.2509143733594775e-06, + "loss": 0.5528, + "step": 27535 + }, + { + "epoch": 0.8439377222017899, + "grad_norm": 1.752889516085018, + "learning_rate": 1.2504336947735873e-06, + "loss": 0.6186, + "step": 27536 + }, + { + "epoch": 0.8439683707245311, + "grad_norm": 1.780552647967852, + "learning_rate": 1.2499531023994082e-06, + "loss": 0.7002, + "step": 27537 + }, + { + "epoch": 0.8439990192472723, + "grad_norm": 1.8860582789879352, + "learning_rate": 1.2494725962416843e-06, + "loss": 0.6297, + "step": 27538 + }, + { + "epoch": 0.8440296677700135, + "grad_norm": 1.588134110073739, + "learning_rate": 1.248992176305146e-06, + "loss": 0.5521, + "step": 27539 + }, + { + "epoch": 0.8440603162927547, + "grad_norm": 0.6510352050223738, + "learning_rate": 1.2485118425945241e-06, + "loss": 0.5271, + "step": 27540 + }, + { + "epoch": 0.844090964815496, + "grad_norm": 1.7463059664141145, + "learning_rate": 1.248031595114555e-06, + "loss": 0.6457, + "step": 27541 + }, + { + "epoch": 0.8441216133382371, + "grad_norm": 1.9592291346010968, + "learning_rate": 1.2475514338699713e-06, + "loss": 0.6539, + "step": 27542 + }, + { + "epoch": 0.8441522618609782, + "grad_norm": 0.6621711285065044, + "learning_rate": 1.2470713588655014e-06, + "loss": 0.5024, + "step": 27543 + }, + { + "epoch": 0.8441829103837195, + "grad_norm": 1.757533715022156, + "learning_rate": 1.2465913701058762e-06, + "loss": 0.6847, + "step": 27544 + }, + { + "epoch": 0.8442135589064607, + "grad_norm": 0.6894443540128973, + "learning_rate": 1.2461114675958252e-06, + "loss": 0.496, + "step": 27545 + }, + { + "epoch": 0.8442442074292019, + "grad_norm": 1.4826290479095805, + "learning_rate": 1.2456316513400813e-06, + "loss": 0.5958, + "step": 27546 + }, + { + "epoch": 0.8442748559519431, + "grad_norm": 1.6441894946788118, + "learning_rate": 1.2451519213433682e-06, + "loss": 0.6288, + "step": 27547 + }, + { + "epoch": 0.8443055044746843, + "grad_norm": 1.5934276996565035, + "learning_rate": 1.2446722776104082e-06, + "loss": 0.5596, + "step": 27548 + }, + { + "epoch": 0.8443361529974255, + "grad_norm": 1.5545531093139897, + "learning_rate": 1.244192720145938e-06, + "loss": 0.591, + "step": 27549 + }, + { + "epoch": 0.8443668015201667, + "grad_norm": 1.6018815206739838, + "learning_rate": 1.2437132489546767e-06, + "loss": 0.6122, + "step": 27550 + }, + { + "epoch": 0.8443974500429079, + "grad_norm": 0.6483083470801031, + "learning_rate": 1.2432338640413467e-06, + "loss": 0.5064, + "step": 27551 + }, + { + "epoch": 0.8444280985656492, + "grad_norm": 1.9612899040504537, + "learning_rate": 1.2427545654106731e-06, + "loss": 0.6325, + "step": 27552 + }, + { + "epoch": 0.8444587470883903, + "grad_norm": 1.6637142802301306, + "learning_rate": 1.2422753530673825e-06, + "loss": 0.6779, + "step": 27553 + }, + { + "epoch": 0.8444893956111316, + "grad_norm": 1.8141386841734801, + "learning_rate": 1.241796227016192e-06, + "loss": 0.5926, + "step": 27554 + }, + { + "epoch": 0.8445200441338727, + "grad_norm": 1.7184565098078752, + "learning_rate": 1.2413171872618235e-06, + "loss": 0.6171, + "step": 27555 + }, + { + "epoch": 0.844550692656614, + "grad_norm": 2.045951014049444, + "learning_rate": 1.2408382338089975e-06, + "loss": 0.7329, + "step": 27556 + }, + { + "epoch": 0.8445813411793551, + "grad_norm": 0.6593477887690755, + "learning_rate": 1.2403593666624359e-06, + "loss": 0.5066, + "step": 27557 + }, + { + "epoch": 0.8446119897020964, + "grad_norm": 1.8504151327858467, + "learning_rate": 1.2398805858268547e-06, + "loss": 0.5997, + "step": 27558 + }, + { + "epoch": 0.8446426382248375, + "grad_norm": 1.6212648999899768, + "learning_rate": 1.2394018913069693e-06, + "loss": 0.6005, + "step": 27559 + }, + { + "epoch": 0.8446732867475788, + "grad_norm": 1.8628344720417298, + "learning_rate": 1.2389232831074993e-06, + "loss": 0.6582, + "step": 27560 + }, + { + "epoch": 0.84470393527032, + "grad_norm": 1.8552800220111505, + "learning_rate": 1.2384447612331618e-06, + "loss": 0.5692, + "step": 27561 + }, + { + "epoch": 0.8447345837930612, + "grad_norm": 1.7574777360997582, + "learning_rate": 1.2379663256886666e-06, + "loss": 0.6158, + "step": 27562 + }, + { + "epoch": 0.8447652323158024, + "grad_norm": 1.6037967324309703, + "learning_rate": 1.2374879764787318e-06, + "loss": 0.6025, + "step": 27563 + }, + { + "epoch": 0.8447958808385436, + "grad_norm": 1.5217939667959073, + "learning_rate": 1.2370097136080694e-06, + "loss": 0.6321, + "step": 27564 + }, + { + "epoch": 0.8448265293612848, + "grad_norm": 1.8436649152580746, + "learning_rate": 1.2365315370813957e-06, + "loss": 0.67, + "step": 27565 + }, + { + "epoch": 0.844857177884026, + "grad_norm": 1.8004784972878976, + "learning_rate": 1.236053446903418e-06, + "loss": 0.7556, + "step": 27566 + }, + { + "epoch": 0.8448878264067672, + "grad_norm": 1.7690694458068357, + "learning_rate": 1.2355754430788436e-06, + "loss": 0.6215, + "step": 27567 + }, + { + "epoch": 0.8449184749295084, + "grad_norm": 2.021676161911649, + "learning_rate": 1.235097525612392e-06, + "loss": 0.6857, + "step": 27568 + }, + { + "epoch": 0.8449491234522496, + "grad_norm": 1.7578811700623203, + "learning_rate": 1.2346196945087662e-06, + "loss": 0.5885, + "step": 27569 + }, + { + "epoch": 0.8449797719749909, + "grad_norm": 1.5511101883405223, + "learning_rate": 1.2341419497726736e-06, + "loss": 0.617, + "step": 27570 + }, + { + "epoch": 0.845010420497732, + "grad_norm": 1.970312215745497, + "learning_rate": 1.2336642914088237e-06, + "loss": 0.6031, + "step": 27571 + }, + { + "epoch": 0.8450410690204733, + "grad_norm": 1.734251729445184, + "learning_rate": 1.2331867194219216e-06, + "loss": 0.605, + "step": 27572 + }, + { + "epoch": 0.8450717175432144, + "grad_norm": 1.8402109970199667, + "learning_rate": 1.2327092338166768e-06, + "loss": 0.5857, + "step": 27573 + }, + { + "epoch": 0.8451023660659556, + "grad_norm": 1.7367864436917437, + "learning_rate": 1.2322318345977879e-06, + "loss": 0.5701, + "step": 27574 + }, + { + "epoch": 0.8451330145886968, + "grad_norm": 1.7479099054326168, + "learning_rate": 1.2317545217699634e-06, + "loss": 0.5862, + "step": 27575 + }, + { + "epoch": 0.845163663111438, + "grad_norm": 1.5308627148566876, + "learning_rate": 1.2312772953379059e-06, + "loss": 0.6112, + "step": 27576 + }, + { + "epoch": 0.8451943116341792, + "grad_norm": 0.6756891978805593, + "learning_rate": 1.2308001553063176e-06, + "loss": 0.4899, + "step": 27577 + }, + { + "epoch": 0.8452249601569204, + "grad_norm": 1.7542379499999692, + "learning_rate": 1.2303231016798944e-06, + "loss": 0.7151, + "step": 27578 + }, + { + "epoch": 0.8452556086796617, + "grad_norm": 1.7066970610115966, + "learning_rate": 1.2298461344633462e-06, + "loss": 0.7075, + "step": 27579 + }, + { + "epoch": 0.8452862572024028, + "grad_norm": 1.9062270451293815, + "learning_rate": 1.2293692536613678e-06, + "loss": 0.6666, + "step": 27580 + }, + { + "epoch": 0.8453169057251441, + "grad_norm": 1.7470863297267696, + "learning_rate": 1.2288924592786555e-06, + "loss": 0.6092, + "step": 27581 + }, + { + "epoch": 0.8453475542478852, + "grad_norm": 1.7031306471362173, + "learning_rate": 1.2284157513199102e-06, + "loss": 0.6668, + "step": 27582 + }, + { + "epoch": 0.8453782027706265, + "grad_norm": 1.8043824904180077, + "learning_rate": 1.227939129789829e-06, + "loss": 0.6659, + "step": 27583 + }, + { + "epoch": 0.8454088512933676, + "grad_norm": 0.6300333747703322, + "learning_rate": 1.2274625946931107e-06, + "loss": 0.4959, + "step": 27584 + }, + { + "epoch": 0.8454394998161089, + "grad_norm": 1.9919110271447549, + "learning_rate": 1.2269861460344446e-06, + "loss": 0.7018, + "step": 27585 + }, + { + "epoch": 0.84547014833885, + "grad_norm": 1.7207057525818104, + "learning_rate": 1.2265097838185303e-06, + "loss": 0.7475, + "step": 27586 + }, + { + "epoch": 0.8455007968615913, + "grad_norm": 0.6503878845193626, + "learning_rate": 1.2260335080500607e-06, + "loss": 0.514, + "step": 27587 + }, + { + "epoch": 0.8455314453843324, + "grad_norm": 1.6930610475782426, + "learning_rate": 1.2255573187337289e-06, + "loss": 0.5268, + "step": 27588 + }, + { + "epoch": 0.8455620939070737, + "grad_norm": 1.749977156245245, + "learning_rate": 1.2250812158742209e-06, + "loss": 0.6476, + "step": 27589 + }, + { + "epoch": 0.8455927424298149, + "grad_norm": 1.6627034367782187, + "learning_rate": 1.2246051994762364e-06, + "loss": 0.5834, + "step": 27590 + }, + { + "epoch": 0.8456233909525561, + "grad_norm": 1.5568537585662245, + "learning_rate": 1.2241292695444607e-06, + "loss": 0.6504, + "step": 27591 + }, + { + "epoch": 0.8456540394752973, + "grad_norm": 1.6894507338106544, + "learning_rate": 1.2236534260835876e-06, + "loss": 0.6191, + "step": 27592 + }, + { + "epoch": 0.8456846879980385, + "grad_norm": 1.8719984086750074, + "learning_rate": 1.2231776690982999e-06, + "loss": 0.6418, + "step": 27593 + }, + { + "epoch": 0.8457153365207797, + "grad_norm": 0.6863633519158275, + "learning_rate": 1.2227019985932886e-06, + "loss": 0.5552, + "step": 27594 + }, + { + "epoch": 0.8457459850435209, + "grad_norm": 1.6437132039867575, + "learning_rate": 1.222226414573242e-06, + "loss": 0.5654, + "step": 27595 + }, + { + "epoch": 0.8457766335662621, + "grad_norm": 1.6414283436852033, + "learning_rate": 1.2217509170428421e-06, + "loss": 0.6394, + "step": 27596 + }, + { + "epoch": 0.8458072820890034, + "grad_norm": 1.7821279316805811, + "learning_rate": 1.221275506006777e-06, + "loss": 0.6057, + "step": 27597 + }, + { + "epoch": 0.8458379306117445, + "grad_norm": 1.7762706192169495, + "learning_rate": 1.2208001814697324e-06, + "loss": 0.6774, + "step": 27598 + }, + { + "epoch": 0.8458685791344858, + "grad_norm": 1.8081211855930022, + "learning_rate": 1.2203249434363907e-06, + "loss": 0.6735, + "step": 27599 + }, + { + "epoch": 0.8458992276572269, + "grad_norm": 1.7361744685171987, + "learning_rate": 1.2198497919114282e-06, + "loss": 0.5932, + "step": 27600 + }, + { + "epoch": 0.8459298761799682, + "grad_norm": 1.6440982932076857, + "learning_rate": 1.219374726899537e-06, + "loss": 0.6561, + "step": 27601 + }, + { + "epoch": 0.8459605247027093, + "grad_norm": 0.6812844575976619, + "learning_rate": 1.218899748405391e-06, + "loss": 0.5499, + "step": 27602 + }, + { + "epoch": 0.8459911732254506, + "grad_norm": 1.6837665727453728, + "learning_rate": 1.2184248564336754e-06, + "loss": 0.5624, + "step": 27603 + }, + { + "epoch": 0.8460218217481917, + "grad_norm": 1.758726552386625, + "learning_rate": 1.217950050989063e-06, + "loss": 0.7043, + "step": 27604 + }, + { + "epoch": 0.8460524702709329, + "grad_norm": 1.697501417321809, + "learning_rate": 1.2174753320762366e-06, + "loss": 0.6604, + "step": 27605 + }, + { + "epoch": 0.8460831187936741, + "grad_norm": 1.615574448450535, + "learning_rate": 1.2170006996998752e-06, + "loss": 0.5335, + "step": 27606 + }, + { + "epoch": 0.8461137673164153, + "grad_norm": 1.7853140577994564, + "learning_rate": 1.21652615386465e-06, + "loss": 0.6305, + "step": 27607 + }, + { + "epoch": 0.8461444158391566, + "grad_norm": 0.6785322263323705, + "learning_rate": 1.216051694575241e-06, + "loss": 0.5053, + "step": 27608 + }, + { + "epoch": 0.8461750643618977, + "grad_norm": 1.732085318960486, + "learning_rate": 1.2155773218363242e-06, + "loss": 0.7464, + "step": 27609 + }, + { + "epoch": 0.846205712884639, + "grad_norm": 1.7119520684470309, + "learning_rate": 1.2151030356525683e-06, + "loss": 0.6497, + "step": 27610 + }, + { + "epoch": 0.8462363614073801, + "grad_norm": 1.486615163496953, + "learning_rate": 1.214628836028653e-06, + "loss": 0.608, + "step": 27611 + }, + { + "epoch": 0.8462670099301214, + "grad_norm": 1.6578584111369985, + "learning_rate": 1.2141547229692452e-06, + "loss": 0.5899, + "step": 27612 + }, + { + "epoch": 0.8462976584528625, + "grad_norm": 1.794310116692523, + "learning_rate": 1.2136806964790193e-06, + "loss": 0.6476, + "step": 27613 + }, + { + "epoch": 0.8463283069756038, + "grad_norm": 1.5862878802279246, + "learning_rate": 1.2132067565626482e-06, + "loss": 0.678, + "step": 27614 + }, + { + "epoch": 0.846358955498345, + "grad_norm": 1.7614150823042583, + "learning_rate": 1.2127329032247959e-06, + "loss": 0.6434, + "step": 27615 + }, + { + "epoch": 0.8463896040210862, + "grad_norm": 1.608076648912691, + "learning_rate": 1.2122591364701353e-06, + "loss": 0.5739, + "step": 27616 + }, + { + "epoch": 0.8464202525438274, + "grad_norm": 1.6639816853451292, + "learning_rate": 1.211785456303335e-06, + "loss": 0.6364, + "step": 27617 + }, + { + "epoch": 0.8464509010665686, + "grad_norm": 1.6686145340372505, + "learning_rate": 1.2113118627290599e-06, + "loss": 0.6114, + "step": 27618 + }, + { + "epoch": 0.8464815495893098, + "grad_norm": 1.6592642127678119, + "learning_rate": 1.2108383557519777e-06, + "loss": 0.6476, + "step": 27619 + }, + { + "epoch": 0.846512198112051, + "grad_norm": 1.8311424570280248, + "learning_rate": 1.2103649353767554e-06, + "loss": 0.6163, + "step": 27620 + }, + { + "epoch": 0.8465428466347922, + "grad_norm": 1.8146658582469726, + "learning_rate": 1.2098916016080553e-06, + "loss": 0.7262, + "step": 27621 + }, + { + "epoch": 0.8465734951575334, + "grad_norm": 1.7660758642754082, + "learning_rate": 1.2094183544505433e-06, + "loss": 0.5882, + "step": 27622 + }, + { + "epoch": 0.8466041436802746, + "grad_norm": 1.9693649689056067, + "learning_rate": 1.2089451939088802e-06, + "loss": 0.6636, + "step": 27623 + }, + { + "epoch": 0.8466347922030159, + "grad_norm": 0.6678245982600763, + "learning_rate": 1.2084721199877293e-06, + "loss": 0.5017, + "step": 27624 + }, + { + "epoch": 0.846665440725757, + "grad_norm": 0.7286785869827943, + "learning_rate": 1.207999132691753e-06, + "loss": 0.5286, + "step": 27625 + }, + { + "epoch": 0.8466960892484983, + "grad_norm": 1.4961984855254096, + "learning_rate": 1.2075262320256098e-06, + "loss": 0.6574, + "step": 27626 + }, + { + "epoch": 0.8467267377712394, + "grad_norm": 1.7470921468894478, + "learning_rate": 1.2070534179939597e-06, + "loss": 0.5448, + "step": 27627 + }, + { + "epoch": 0.8467573862939807, + "grad_norm": 1.9512174709524976, + "learning_rate": 1.2065806906014644e-06, + "loss": 0.7764, + "step": 27628 + }, + { + "epoch": 0.8467880348167218, + "grad_norm": 0.6662697016294417, + "learning_rate": 1.2061080498527778e-06, + "loss": 0.5033, + "step": 27629 + }, + { + "epoch": 0.8468186833394631, + "grad_norm": 1.7617216789627488, + "learning_rate": 1.2056354957525585e-06, + "loss": 0.6231, + "step": 27630 + }, + { + "epoch": 0.8468493318622042, + "grad_norm": 1.8071053745008179, + "learning_rate": 1.2051630283054638e-06, + "loss": 0.5226, + "step": 27631 + }, + { + "epoch": 0.8468799803849455, + "grad_norm": 1.6145466790314154, + "learning_rate": 1.2046906475161469e-06, + "loss": 0.5451, + "step": 27632 + }, + { + "epoch": 0.8469106289076866, + "grad_norm": 1.8851190953451507, + "learning_rate": 1.2042183533892659e-06, + "loss": 0.6285, + "step": 27633 + }, + { + "epoch": 0.8469412774304279, + "grad_norm": 1.9073670340303543, + "learning_rate": 1.2037461459294685e-06, + "loss": 0.7027, + "step": 27634 + }, + { + "epoch": 0.8469719259531691, + "grad_norm": 1.7084904685559836, + "learning_rate": 1.203274025141412e-06, + "loss": 0.6118, + "step": 27635 + }, + { + "epoch": 0.8470025744759102, + "grad_norm": 1.7547288943229007, + "learning_rate": 1.2028019910297496e-06, + "loss": 0.6773, + "step": 27636 + }, + { + "epoch": 0.8470332229986515, + "grad_norm": 1.653351273735711, + "learning_rate": 1.2023300435991269e-06, + "loss": 0.6782, + "step": 27637 + }, + { + "epoch": 0.8470638715213926, + "grad_norm": 0.6882074867801758, + "learning_rate": 1.2018581828541986e-06, + "loss": 0.5279, + "step": 27638 + }, + { + "epoch": 0.8470945200441339, + "grad_norm": 1.511355089474226, + "learning_rate": 1.201386408799614e-06, + "loss": 0.532, + "step": 27639 + }, + { + "epoch": 0.847125168566875, + "grad_norm": 1.7337132114126288, + "learning_rate": 1.2009147214400175e-06, + "loss": 0.6982, + "step": 27640 + }, + { + "epoch": 0.8471558170896163, + "grad_norm": 1.7276087202393577, + "learning_rate": 1.2004431207800605e-06, + "loss": 0.6815, + "step": 27641 + }, + { + "epoch": 0.8471864656123574, + "grad_norm": 1.6439228928239265, + "learning_rate": 1.1999716068243916e-06, + "loss": 0.6863, + "step": 27642 + }, + { + "epoch": 0.8472171141350987, + "grad_norm": 0.6336257764492341, + "learning_rate": 1.1995001795776507e-06, + "loss": 0.4976, + "step": 27643 + }, + { + "epoch": 0.8472477626578399, + "grad_norm": 1.696444023264911, + "learning_rate": 1.1990288390444893e-06, + "loss": 0.5278, + "step": 27644 + }, + { + "epoch": 0.8472784111805811, + "grad_norm": 1.7804700054419724, + "learning_rate": 1.1985575852295462e-06, + "loss": 0.6473, + "step": 27645 + }, + { + "epoch": 0.8473090597033223, + "grad_norm": 1.5778932974261657, + "learning_rate": 1.1980864181374663e-06, + "loss": 0.6951, + "step": 27646 + }, + { + "epoch": 0.8473397082260635, + "grad_norm": 1.675442173744138, + "learning_rate": 1.197615337772896e-06, + "loss": 0.5495, + "step": 27647 + }, + { + "epoch": 0.8473703567488047, + "grad_norm": 1.5720361898094362, + "learning_rate": 1.1971443441404718e-06, + "loss": 0.5931, + "step": 27648 + }, + { + "epoch": 0.8474010052715459, + "grad_norm": 0.731030566206928, + "learning_rate": 1.1966734372448364e-06, + "loss": 0.5614, + "step": 27649 + }, + { + "epoch": 0.8474316537942871, + "grad_norm": 1.5743653189908915, + "learning_rate": 1.1962026170906316e-06, + "loss": 0.5723, + "step": 27650 + }, + { + "epoch": 0.8474623023170283, + "grad_norm": 0.6395490667829228, + "learning_rate": 1.1957318836824938e-06, + "loss": 0.525, + "step": 27651 + }, + { + "epoch": 0.8474929508397695, + "grad_norm": 1.58580330089632, + "learning_rate": 1.1952612370250628e-06, + "loss": 0.6011, + "step": 27652 + }, + { + "epoch": 0.8475235993625108, + "grad_norm": 1.6200677857608723, + "learning_rate": 1.1947906771229766e-06, + "loss": 0.598, + "step": 27653 + }, + { + "epoch": 0.8475542478852519, + "grad_norm": 2.008877385079311, + "learning_rate": 1.1943202039808689e-06, + "loss": 0.7111, + "step": 27654 + }, + { + "epoch": 0.8475848964079932, + "grad_norm": 1.4715982054378776, + "learning_rate": 1.1938498176033798e-06, + "loss": 0.6211, + "step": 27655 + }, + { + "epoch": 0.8476155449307343, + "grad_norm": 1.6936863655802028, + "learning_rate": 1.193379517995139e-06, + "loss": 0.626, + "step": 27656 + }, + { + "epoch": 0.8476461934534756, + "grad_norm": 0.6390306175240514, + "learning_rate": 1.1929093051607832e-06, + "loss": 0.5287, + "step": 27657 + }, + { + "epoch": 0.8476768419762167, + "grad_norm": 1.754581215328057, + "learning_rate": 1.1924391791049484e-06, + "loss": 0.6465, + "step": 27658 + }, + { + "epoch": 0.847707490498958, + "grad_norm": 1.7278064185745559, + "learning_rate": 1.1919691398322609e-06, + "loss": 0.6832, + "step": 27659 + }, + { + "epoch": 0.8477381390216991, + "grad_norm": 0.6599184683707549, + "learning_rate": 1.1914991873473547e-06, + "loss": 0.5059, + "step": 27660 + }, + { + "epoch": 0.8477687875444404, + "grad_norm": 1.7390545950183545, + "learning_rate": 1.1910293216548641e-06, + "loss": 0.6675, + "step": 27661 + }, + { + "epoch": 0.8477994360671816, + "grad_norm": 1.7702046255184651, + "learning_rate": 1.190559542759413e-06, + "loss": 0.6073, + "step": 27662 + }, + { + "epoch": 0.8478300845899228, + "grad_norm": 0.6318392461013886, + "learning_rate": 1.1900898506656344e-06, + "loss": 0.5002, + "step": 27663 + }, + { + "epoch": 0.847860733112664, + "grad_norm": 0.6759141383611244, + "learning_rate": 1.1896202453781503e-06, + "loss": 0.5117, + "step": 27664 + }, + { + "epoch": 0.8478913816354052, + "grad_norm": 1.9062328397232586, + "learning_rate": 1.1891507269015967e-06, + "loss": 0.6481, + "step": 27665 + }, + { + "epoch": 0.8479220301581464, + "grad_norm": 1.4701174341249126, + "learning_rate": 1.1886812952405946e-06, + "loss": 0.5788, + "step": 27666 + }, + { + "epoch": 0.8479526786808875, + "grad_norm": 0.6858677649248149, + "learning_rate": 1.1882119503997691e-06, + "loss": 0.5275, + "step": 27667 + }, + { + "epoch": 0.8479833272036288, + "grad_norm": 1.5015332592979584, + "learning_rate": 1.1877426923837455e-06, + "loss": 0.6153, + "step": 27668 + }, + { + "epoch": 0.8480139757263699, + "grad_norm": 1.891669800124615, + "learning_rate": 1.1872735211971497e-06, + "loss": 0.6841, + "step": 27669 + }, + { + "epoch": 0.8480446242491112, + "grad_norm": 1.6615773963351572, + "learning_rate": 1.1868044368446007e-06, + "loss": 0.629, + "step": 27670 + }, + { + "epoch": 0.8480752727718524, + "grad_norm": 0.6742056138930386, + "learning_rate": 1.1863354393307224e-06, + "loss": 0.5642, + "step": 27671 + }, + { + "epoch": 0.8481059212945936, + "grad_norm": 0.6626344700288846, + "learning_rate": 1.1858665286601367e-06, + "loss": 0.5206, + "step": 27672 + }, + { + "epoch": 0.8481365698173348, + "grad_norm": 1.5928874384217642, + "learning_rate": 1.1853977048374653e-06, + "loss": 0.6585, + "step": 27673 + }, + { + "epoch": 0.848167218340076, + "grad_norm": 1.766155406235346, + "learning_rate": 1.184928967867325e-06, + "loss": 0.6242, + "step": 27674 + }, + { + "epoch": 0.8481978668628172, + "grad_norm": 1.7443562335491163, + "learning_rate": 1.1844603177543313e-06, + "loss": 0.6173, + "step": 27675 + }, + { + "epoch": 0.8482285153855584, + "grad_norm": 2.0496132605000756, + "learning_rate": 1.1839917545031098e-06, + "loss": 0.7465, + "step": 27676 + }, + { + "epoch": 0.8482591639082996, + "grad_norm": 1.7339735621061023, + "learning_rate": 1.1835232781182726e-06, + "loss": 0.6542, + "step": 27677 + }, + { + "epoch": 0.8482898124310408, + "grad_norm": 1.5270753542615225, + "learning_rate": 1.1830548886044357e-06, + "loss": 0.6236, + "step": 27678 + }, + { + "epoch": 0.848320460953782, + "grad_norm": 1.556381201814171, + "learning_rate": 1.1825865859662133e-06, + "loss": 0.6455, + "step": 27679 + }, + { + "epoch": 0.8483511094765233, + "grad_norm": 1.8714547083714184, + "learning_rate": 1.182118370208224e-06, + "loss": 0.6411, + "step": 27680 + }, + { + "epoch": 0.8483817579992644, + "grad_norm": 1.985407231528369, + "learning_rate": 1.1816502413350772e-06, + "loss": 0.6181, + "step": 27681 + }, + { + "epoch": 0.8484124065220057, + "grad_norm": 1.8481958787851371, + "learning_rate": 1.181182199351386e-06, + "loss": 0.7017, + "step": 27682 + }, + { + "epoch": 0.8484430550447468, + "grad_norm": 1.6087946547600682, + "learning_rate": 1.1807142442617626e-06, + "loss": 0.6812, + "step": 27683 + }, + { + "epoch": 0.8484737035674881, + "grad_norm": 1.760705963575772, + "learning_rate": 1.1802463760708215e-06, + "loss": 0.6465, + "step": 27684 + }, + { + "epoch": 0.8485043520902292, + "grad_norm": 1.4354170816008336, + "learning_rate": 1.1797785947831685e-06, + "loss": 0.64, + "step": 27685 + }, + { + "epoch": 0.8485350006129705, + "grad_norm": 1.6569060693513777, + "learning_rate": 1.1793109004034098e-06, + "loss": 0.6395, + "step": 27686 + }, + { + "epoch": 0.8485656491357116, + "grad_norm": 1.7272796363240512, + "learning_rate": 1.1788432929361626e-06, + "loss": 0.638, + "step": 27687 + }, + { + "epoch": 0.8485962976584529, + "grad_norm": 1.766790232352592, + "learning_rate": 1.1783757723860279e-06, + "loss": 0.5975, + "step": 27688 + }, + { + "epoch": 0.848626946181194, + "grad_norm": 0.6631829012860427, + "learning_rate": 1.1779083387576129e-06, + "loss": 0.5168, + "step": 27689 + }, + { + "epoch": 0.8486575947039353, + "grad_norm": 1.9423704724791437, + "learning_rate": 1.177440992055524e-06, + "loss": 0.5687, + "step": 27690 + }, + { + "epoch": 0.8486882432266765, + "grad_norm": 1.7894836257311528, + "learning_rate": 1.1769737322843667e-06, + "loss": 0.6525, + "step": 27691 + }, + { + "epoch": 0.8487188917494177, + "grad_norm": 1.7793038544792934, + "learning_rate": 1.1765065594487469e-06, + "loss": 0.5748, + "step": 27692 + }, + { + "epoch": 0.8487495402721589, + "grad_norm": 1.6889688469774207, + "learning_rate": 1.176039473553262e-06, + "loss": 0.6518, + "step": 27693 + }, + { + "epoch": 0.8487801887949001, + "grad_norm": 1.7621128575691851, + "learning_rate": 1.1755724746025199e-06, + "loss": 0.4991, + "step": 27694 + }, + { + "epoch": 0.8488108373176413, + "grad_norm": 1.600769684019307, + "learning_rate": 1.175105562601121e-06, + "loss": 0.653, + "step": 27695 + }, + { + "epoch": 0.8488414858403825, + "grad_norm": 1.8870016657082422, + "learning_rate": 1.174638737553665e-06, + "loss": 0.634, + "step": 27696 + }, + { + "epoch": 0.8488721343631237, + "grad_norm": 1.763049910423824, + "learning_rate": 1.1741719994647493e-06, + "loss": 0.6092, + "step": 27697 + }, + { + "epoch": 0.8489027828858648, + "grad_norm": 1.5707124658555038, + "learning_rate": 1.173705348338975e-06, + "loss": 0.7033, + "step": 27698 + }, + { + "epoch": 0.8489334314086061, + "grad_norm": 2.0663121182009383, + "learning_rate": 1.1732387841809412e-06, + "loss": 0.7167, + "step": 27699 + }, + { + "epoch": 0.8489640799313473, + "grad_norm": 1.8031208498778561, + "learning_rate": 1.1727723069952456e-06, + "loss": 0.7224, + "step": 27700 + }, + { + "epoch": 0.8489947284540885, + "grad_norm": 1.768242858913607, + "learning_rate": 1.1723059167864803e-06, + "loss": 0.6402, + "step": 27701 + }, + { + "epoch": 0.8490253769768297, + "grad_norm": 1.8750725612702748, + "learning_rate": 1.1718396135592435e-06, + "loss": 0.7953, + "step": 27702 + }, + { + "epoch": 0.8490560254995709, + "grad_norm": 1.9130166906879007, + "learning_rate": 1.171373397318133e-06, + "loss": 0.6804, + "step": 27703 + }, + { + "epoch": 0.8490866740223121, + "grad_norm": 0.6717485164276144, + "learning_rate": 1.170907268067737e-06, + "loss": 0.5143, + "step": 27704 + }, + { + "epoch": 0.8491173225450533, + "grad_norm": 1.7135222718161733, + "learning_rate": 1.1704412258126495e-06, + "loss": 0.6594, + "step": 27705 + }, + { + "epoch": 0.8491479710677945, + "grad_norm": 1.7905451684487257, + "learning_rate": 1.1699752705574674e-06, + "loss": 0.6953, + "step": 27706 + }, + { + "epoch": 0.8491786195905358, + "grad_norm": 1.4497572229746731, + "learning_rate": 1.1695094023067765e-06, + "loss": 0.5434, + "step": 27707 + }, + { + "epoch": 0.8492092681132769, + "grad_norm": 1.9064910641912467, + "learning_rate": 1.1690436210651679e-06, + "loss": 0.6839, + "step": 27708 + }, + { + "epoch": 0.8492399166360182, + "grad_norm": 1.7131334364893196, + "learning_rate": 1.1685779268372322e-06, + "loss": 0.7088, + "step": 27709 + }, + { + "epoch": 0.8492705651587593, + "grad_norm": 0.6746181157248611, + "learning_rate": 1.1681123196275567e-06, + "loss": 0.5204, + "step": 27710 + }, + { + "epoch": 0.8493012136815006, + "grad_norm": 1.6461837767142646, + "learning_rate": 1.1676467994407336e-06, + "loss": 0.6271, + "step": 27711 + }, + { + "epoch": 0.8493318622042417, + "grad_norm": 2.039842595167242, + "learning_rate": 1.1671813662813437e-06, + "loss": 0.6726, + "step": 27712 + }, + { + "epoch": 0.849362510726983, + "grad_norm": 1.7918386083001787, + "learning_rate": 1.1667160201539763e-06, + "loss": 0.6611, + "step": 27713 + }, + { + "epoch": 0.8493931592497241, + "grad_norm": 0.6765933161397568, + "learning_rate": 1.1662507610632168e-06, + "loss": 0.532, + "step": 27714 + }, + { + "epoch": 0.8494238077724654, + "grad_norm": 0.6526853620533786, + "learning_rate": 1.1657855890136504e-06, + "loss": 0.5014, + "step": 27715 + }, + { + "epoch": 0.8494544562952066, + "grad_norm": 1.5563676465232836, + "learning_rate": 1.1653205040098537e-06, + "loss": 0.586, + "step": 27716 + }, + { + "epoch": 0.8494851048179478, + "grad_norm": 1.9616655470927207, + "learning_rate": 1.1648555060564192e-06, + "loss": 0.6844, + "step": 27717 + }, + { + "epoch": 0.849515753340689, + "grad_norm": 1.5189658275511646, + "learning_rate": 1.1643905951579216e-06, + "loss": 0.5347, + "step": 27718 + }, + { + "epoch": 0.8495464018634302, + "grad_norm": 1.8977603937720613, + "learning_rate": 1.1639257713189466e-06, + "loss": 0.5827, + "step": 27719 + }, + { + "epoch": 0.8495770503861714, + "grad_norm": 1.6638259117193093, + "learning_rate": 1.1634610345440688e-06, + "loss": 0.5442, + "step": 27720 + }, + { + "epoch": 0.8496076989089126, + "grad_norm": 0.671587824348321, + "learning_rate": 1.1629963848378712e-06, + "loss": 0.5108, + "step": 27721 + }, + { + "epoch": 0.8496383474316538, + "grad_norm": 1.7376187440993844, + "learning_rate": 1.1625318222049332e-06, + "loss": 0.6798, + "step": 27722 + }, + { + "epoch": 0.849668995954395, + "grad_norm": 1.5893385525909614, + "learning_rate": 1.1620673466498278e-06, + "loss": 0.5165, + "step": 27723 + }, + { + "epoch": 0.8496996444771362, + "grad_norm": 0.6843637946837291, + "learning_rate": 1.1616029581771349e-06, + "loss": 0.5441, + "step": 27724 + }, + { + "epoch": 0.8497302929998775, + "grad_norm": 1.768207429860978, + "learning_rate": 1.1611386567914308e-06, + "loss": 0.6009, + "step": 27725 + }, + { + "epoch": 0.8497609415226186, + "grad_norm": 1.6937138852829452, + "learning_rate": 1.1606744424972871e-06, + "loss": 0.6253, + "step": 27726 + }, + { + "epoch": 0.8497915900453599, + "grad_norm": 1.6866916344830745, + "learning_rate": 1.1602103152992804e-06, + "loss": 0.6239, + "step": 27727 + }, + { + "epoch": 0.849822238568101, + "grad_norm": 1.8000535121959071, + "learning_rate": 1.159746275201985e-06, + "loss": 0.6323, + "step": 27728 + }, + { + "epoch": 0.8498528870908422, + "grad_norm": 1.880338301979242, + "learning_rate": 1.1592823222099692e-06, + "loss": 0.6136, + "step": 27729 + }, + { + "epoch": 0.8498835356135834, + "grad_norm": 1.8436780664397585, + "learning_rate": 1.1588184563278082e-06, + "loss": 0.6403, + "step": 27730 + }, + { + "epoch": 0.8499141841363246, + "grad_norm": 1.6748990087895819, + "learning_rate": 1.1583546775600696e-06, + "loss": 0.6604, + "step": 27731 + }, + { + "epoch": 0.8499448326590658, + "grad_norm": 1.546153051678757, + "learning_rate": 1.157890985911324e-06, + "loss": 0.6262, + "step": 27732 + }, + { + "epoch": 0.849975481181807, + "grad_norm": 1.6761169577405155, + "learning_rate": 1.1574273813861437e-06, + "loss": 0.6509, + "step": 27733 + }, + { + "epoch": 0.8500061297045483, + "grad_norm": 0.678050272271988, + "learning_rate": 1.1569638639890912e-06, + "loss": 0.4986, + "step": 27734 + }, + { + "epoch": 0.8500367782272894, + "grad_norm": 1.65820495252001, + "learning_rate": 1.1565004337247375e-06, + "loss": 0.5723, + "step": 27735 + }, + { + "epoch": 0.8500674267500307, + "grad_norm": 1.5149741745563716, + "learning_rate": 1.1560370905976481e-06, + "loss": 0.6535, + "step": 27736 + }, + { + "epoch": 0.8500980752727718, + "grad_norm": 1.6412972001889075, + "learning_rate": 1.1555738346123878e-06, + "loss": 0.6895, + "step": 27737 + }, + { + "epoch": 0.8501287237955131, + "grad_norm": 1.583042952756462, + "learning_rate": 1.15511066577352e-06, + "loss": 0.6085, + "step": 27738 + }, + { + "epoch": 0.8501593723182542, + "grad_norm": 1.4893954169057448, + "learning_rate": 1.154647584085613e-06, + "loss": 0.6783, + "step": 27739 + }, + { + "epoch": 0.8501900208409955, + "grad_norm": 1.674753052191216, + "learning_rate": 1.1541845895532233e-06, + "loss": 0.6258, + "step": 27740 + }, + { + "epoch": 0.8502206693637366, + "grad_norm": 1.948702007502756, + "learning_rate": 1.1537216821809194e-06, + "loss": 0.5858, + "step": 27741 + }, + { + "epoch": 0.8502513178864779, + "grad_norm": 1.6440821224011637, + "learning_rate": 1.1532588619732565e-06, + "loss": 0.6616, + "step": 27742 + }, + { + "epoch": 0.850281966409219, + "grad_norm": 1.6293310082369772, + "learning_rate": 1.1527961289347978e-06, + "loss": 0.5928, + "step": 27743 + }, + { + "epoch": 0.8503126149319603, + "grad_norm": 1.9780423503853526, + "learning_rate": 1.1523334830701038e-06, + "loss": 0.6298, + "step": 27744 + }, + { + "epoch": 0.8503432634547015, + "grad_norm": 2.068928270719642, + "learning_rate": 1.15187092438373e-06, + "loss": 0.7139, + "step": 27745 + }, + { + "epoch": 0.8503739119774427, + "grad_norm": 0.6716163609342617, + "learning_rate": 1.1514084528802371e-06, + "loss": 0.5407, + "step": 27746 + }, + { + "epoch": 0.8504045605001839, + "grad_norm": 1.7078045887115103, + "learning_rate": 1.1509460685641816e-06, + "loss": 0.5641, + "step": 27747 + }, + { + "epoch": 0.8504352090229251, + "grad_norm": 1.8672413160027337, + "learning_rate": 1.1504837714401163e-06, + "loss": 0.6241, + "step": 27748 + }, + { + "epoch": 0.8504658575456663, + "grad_norm": 1.6628283868866285, + "learning_rate": 1.150021561512602e-06, + "loss": 0.7134, + "step": 27749 + }, + { + "epoch": 0.8504965060684075, + "grad_norm": 1.7684396886805875, + "learning_rate": 1.1495594387861863e-06, + "loss": 0.725, + "step": 27750 + }, + { + "epoch": 0.8505271545911487, + "grad_norm": 1.8681933595743452, + "learning_rate": 1.1490974032654268e-06, + "loss": 0.6539, + "step": 27751 + }, + { + "epoch": 0.85055780311389, + "grad_norm": 1.6541145518626144, + "learning_rate": 1.148635454954876e-06, + "loss": 0.6112, + "step": 27752 + }, + { + "epoch": 0.8505884516366311, + "grad_norm": 1.761419033844996, + "learning_rate": 1.1481735938590844e-06, + "loss": 0.6508, + "step": 27753 + }, + { + "epoch": 0.8506191001593724, + "grad_norm": 1.9499241584159472, + "learning_rate": 1.147711819982602e-06, + "loss": 0.6284, + "step": 27754 + }, + { + "epoch": 0.8506497486821135, + "grad_norm": 1.7774632214706858, + "learning_rate": 1.1472501333299823e-06, + "loss": 0.7337, + "step": 27755 + }, + { + "epoch": 0.8506803972048548, + "grad_norm": 1.7365673341477883, + "learning_rate": 1.1467885339057704e-06, + "loss": 0.6111, + "step": 27756 + }, + { + "epoch": 0.8507110457275959, + "grad_norm": 1.8579021560832425, + "learning_rate": 1.1463270217145161e-06, + "loss": 0.6809, + "step": 27757 + }, + { + "epoch": 0.8507416942503372, + "grad_norm": 0.6901042008191633, + "learning_rate": 1.145865596760769e-06, + "loss": 0.4954, + "step": 27758 + }, + { + "epoch": 0.8507723427730783, + "grad_norm": 1.5296495162793025, + "learning_rate": 1.1454042590490711e-06, + "loss": 0.5706, + "step": 27759 + }, + { + "epoch": 0.8508029912958195, + "grad_norm": 1.7157091694874442, + "learning_rate": 1.144943008583973e-06, + "loss": 0.6304, + "step": 27760 + }, + { + "epoch": 0.8508336398185607, + "grad_norm": 1.6300499607781802, + "learning_rate": 1.1444818453700157e-06, + "loss": 0.5357, + "step": 27761 + }, + { + "epoch": 0.8508642883413019, + "grad_norm": 1.5652607309911084, + "learning_rate": 1.1440207694117434e-06, + "loss": 0.6426, + "step": 27762 + }, + { + "epoch": 0.8508949368640432, + "grad_norm": 1.7425619101966774, + "learning_rate": 1.1435597807137033e-06, + "loss": 0.6115, + "step": 27763 + }, + { + "epoch": 0.8509255853867843, + "grad_norm": 1.8724745031389496, + "learning_rate": 1.143098879280432e-06, + "loss": 0.7139, + "step": 27764 + }, + { + "epoch": 0.8509562339095256, + "grad_norm": 1.9343887036182421, + "learning_rate": 1.1426380651164749e-06, + "loss": 0.7119, + "step": 27765 + }, + { + "epoch": 0.8509868824322667, + "grad_norm": 1.6848786881409672, + "learning_rate": 1.1421773382263713e-06, + "loss": 0.6496, + "step": 27766 + }, + { + "epoch": 0.851017530955008, + "grad_norm": 1.7023714671995338, + "learning_rate": 1.1417166986146599e-06, + "loss": 0.7125, + "step": 27767 + }, + { + "epoch": 0.8510481794777491, + "grad_norm": 1.5984269271110394, + "learning_rate": 1.1412561462858808e-06, + "loss": 0.5986, + "step": 27768 + }, + { + "epoch": 0.8510788280004904, + "grad_norm": 1.5431232890144022, + "learning_rate": 1.140795681244572e-06, + "loss": 0.585, + "step": 27769 + }, + { + "epoch": 0.8511094765232315, + "grad_norm": 1.7193509957201605, + "learning_rate": 1.140335303495269e-06, + "loss": 0.5777, + "step": 27770 + }, + { + "epoch": 0.8511401250459728, + "grad_norm": 1.6061514809047495, + "learning_rate": 1.1398750130425107e-06, + "loss": 0.6221, + "step": 27771 + }, + { + "epoch": 0.851170773568714, + "grad_norm": 1.7131235826654385, + "learning_rate": 1.1394148098908276e-06, + "loss": 0.65, + "step": 27772 + }, + { + "epoch": 0.8512014220914552, + "grad_norm": 1.5791575352969667, + "learning_rate": 1.1389546940447615e-06, + "loss": 0.6476, + "step": 27773 + }, + { + "epoch": 0.8512320706141964, + "grad_norm": 1.8840782283160187, + "learning_rate": 1.1384946655088413e-06, + "loss": 0.6539, + "step": 27774 + }, + { + "epoch": 0.8512627191369376, + "grad_norm": 1.7023895447672277, + "learning_rate": 1.138034724287599e-06, + "loss": 0.6222, + "step": 27775 + }, + { + "epoch": 0.8512933676596788, + "grad_norm": 1.566896482741472, + "learning_rate": 1.137574870385567e-06, + "loss": 0.6263, + "step": 27776 + }, + { + "epoch": 0.85132401618242, + "grad_norm": 0.678533330087478, + "learning_rate": 1.1371151038072803e-06, + "loss": 0.5138, + "step": 27777 + }, + { + "epoch": 0.8513546647051612, + "grad_norm": 1.659169017219585, + "learning_rate": 1.1366554245572635e-06, + "loss": 0.6457, + "step": 27778 + }, + { + "epoch": 0.8513853132279025, + "grad_norm": 1.5215479573100616, + "learning_rate": 1.1361958326400492e-06, + "loss": 0.6128, + "step": 27779 + }, + { + "epoch": 0.8514159617506436, + "grad_norm": 1.8226384104381046, + "learning_rate": 1.1357363280601673e-06, + "loss": 0.7161, + "step": 27780 + }, + { + "epoch": 0.8514466102733849, + "grad_norm": 1.7471446471028675, + "learning_rate": 1.1352769108221406e-06, + "loss": 0.6334, + "step": 27781 + }, + { + "epoch": 0.851477258796126, + "grad_norm": 0.6585694766294987, + "learning_rate": 1.1348175809305019e-06, + "loss": 0.5364, + "step": 27782 + }, + { + "epoch": 0.8515079073188673, + "grad_norm": 1.8130487368165422, + "learning_rate": 1.1343583383897683e-06, + "loss": 0.6845, + "step": 27783 + }, + { + "epoch": 0.8515385558416084, + "grad_norm": 1.61869760446302, + "learning_rate": 1.1338991832044754e-06, + "loss": 0.6413, + "step": 27784 + }, + { + "epoch": 0.8515692043643497, + "grad_norm": 1.743397383895823, + "learning_rate": 1.1334401153791419e-06, + "loss": 0.6173, + "step": 27785 + }, + { + "epoch": 0.8515998528870908, + "grad_norm": 0.6716225629430875, + "learning_rate": 1.1329811349182895e-06, + "loss": 0.5422, + "step": 27786 + }, + { + "epoch": 0.8516305014098321, + "grad_norm": 1.7258720821308613, + "learning_rate": 1.1325222418264438e-06, + "loss": 0.6576, + "step": 27787 + }, + { + "epoch": 0.8516611499325732, + "grad_norm": 1.67517804633014, + "learning_rate": 1.1320634361081261e-06, + "loss": 0.6397, + "step": 27788 + }, + { + "epoch": 0.8516917984553145, + "grad_norm": 2.1122161597736717, + "learning_rate": 1.1316047177678546e-06, + "loss": 0.7719, + "step": 27789 + }, + { + "epoch": 0.8517224469780557, + "grad_norm": 1.8466968401607504, + "learning_rate": 1.131146086810151e-06, + "loss": 0.673, + "step": 27790 + }, + { + "epoch": 0.8517530955007968, + "grad_norm": 1.800637851315199, + "learning_rate": 1.1306875432395338e-06, + "loss": 0.6693, + "step": 27791 + }, + { + "epoch": 0.8517837440235381, + "grad_norm": 1.6428085534162533, + "learning_rate": 1.1302290870605236e-06, + "loss": 0.6114, + "step": 27792 + }, + { + "epoch": 0.8518143925462792, + "grad_norm": 1.8450444244726272, + "learning_rate": 1.1297707182776363e-06, + "loss": 0.6979, + "step": 27793 + }, + { + "epoch": 0.8518450410690205, + "grad_norm": 1.6560832246801298, + "learning_rate": 1.1293124368953855e-06, + "loss": 0.6863, + "step": 27794 + }, + { + "epoch": 0.8518756895917616, + "grad_norm": 1.6169949960857541, + "learning_rate": 1.128854242918289e-06, + "loss": 0.6135, + "step": 27795 + }, + { + "epoch": 0.8519063381145029, + "grad_norm": 0.6633235308172846, + "learning_rate": 1.1283961363508633e-06, + "loss": 0.533, + "step": 27796 + }, + { + "epoch": 0.851936986637244, + "grad_norm": 1.8504723247711221, + "learning_rate": 1.1279381171976178e-06, + "loss": 0.64, + "step": 27797 + }, + { + "epoch": 0.8519676351599853, + "grad_norm": 0.6980321260605924, + "learning_rate": 1.1274801854630678e-06, + "loss": 0.5112, + "step": 27798 + }, + { + "epoch": 0.8519982836827265, + "grad_norm": 1.7545909574530392, + "learning_rate": 1.1270223411517267e-06, + "loss": 0.7025, + "step": 27799 + }, + { + "epoch": 0.8520289322054677, + "grad_norm": 1.8564764173349064, + "learning_rate": 1.126564584268106e-06, + "loss": 0.7285, + "step": 27800 + }, + { + "epoch": 0.8520595807282089, + "grad_norm": 1.793991180948156, + "learning_rate": 1.126106914816716e-06, + "loss": 0.6421, + "step": 27801 + }, + { + "epoch": 0.8520902292509501, + "grad_norm": 1.8898955251288267, + "learning_rate": 1.12564933280206e-06, + "loss": 0.7294, + "step": 27802 + }, + { + "epoch": 0.8521208777736913, + "grad_norm": 1.8987295008332263, + "learning_rate": 1.1251918382286554e-06, + "loss": 0.5717, + "step": 27803 + }, + { + "epoch": 0.8521515262964325, + "grad_norm": 1.6392527455049732, + "learning_rate": 1.1247344311010077e-06, + "loss": 0.5738, + "step": 27804 + }, + { + "epoch": 0.8521821748191737, + "grad_norm": 1.7086566443427404, + "learning_rate": 1.1242771114236194e-06, + "loss": 0.6172, + "step": 27805 + }, + { + "epoch": 0.852212823341915, + "grad_norm": 1.7155241332474727, + "learning_rate": 1.1238198792009992e-06, + "loss": 0.6605, + "step": 27806 + }, + { + "epoch": 0.8522434718646561, + "grad_norm": 1.67881561074021, + "learning_rate": 1.1233627344376562e-06, + "loss": 0.7157, + "step": 27807 + }, + { + "epoch": 0.8522741203873974, + "grad_norm": 1.5840674356578017, + "learning_rate": 1.1229056771380886e-06, + "loss": 0.6029, + "step": 27808 + }, + { + "epoch": 0.8523047689101385, + "grad_norm": 1.7903234747876478, + "learning_rate": 1.1224487073068023e-06, + "loss": 0.6586, + "step": 27809 + }, + { + "epoch": 0.8523354174328798, + "grad_norm": 1.8542827687663064, + "learning_rate": 1.1219918249483008e-06, + "loss": 0.6497, + "step": 27810 + }, + { + "epoch": 0.8523660659556209, + "grad_norm": 2.0532209859078767, + "learning_rate": 1.121535030067088e-06, + "loss": 0.6815, + "step": 27811 + }, + { + "epoch": 0.8523967144783622, + "grad_norm": 1.78934105508356, + "learning_rate": 1.1210783226676613e-06, + "loss": 0.6117, + "step": 27812 + }, + { + "epoch": 0.8524273630011033, + "grad_norm": 1.9231502633008088, + "learning_rate": 1.1206217027545173e-06, + "loss": 0.6596, + "step": 27813 + }, + { + "epoch": 0.8524580115238446, + "grad_norm": 1.7723495345261542, + "learning_rate": 1.1201651703321648e-06, + "loss": 0.6747, + "step": 27814 + }, + { + "epoch": 0.8524886600465857, + "grad_norm": 0.6708683786561888, + "learning_rate": 1.1197087254050965e-06, + "loss": 0.4928, + "step": 27815 + }, + { + "epoch": 0.852519308569327, + "grad_norm": 1.7743156345916702, + "learning_rate": 1.1192523679778078e-06, + "loss": 0.6326, + "step": 27816 + }, + { + "epoch": 0.8525499570920682, + "grad_norm": 2.0536900379794956, + "learning_rate": 1.1187960980547973e-06, + "loss": 0.7189, + "step": 27817 + }, + { + "epoch": 0.8525806056148094, + "grad_norm": 1.7727765294347857, + "learning_rate": 1.1183399156405617e-06, + "loss": 0.6802, + "step": 27818 + }, + { + "epoch": 0.8526112541375506, + "grad_norm": 1.9043801324910044, + "learning_rate": 1.1178838207395981e-06, + "loss": 0.6955, + "step": 27819 + }, + { + "epoch": 0.8526419026602918, + "grad_norm": 1.6652996268441715, + "learning_rate": 1.1174278133563953e-06, + "loss": 0.6159, + "step": 27820 + }, + { + "epoch": 0.852672551183033, + "grad_norm": 1.5788047639473293, + "learning_rate": 1.1169718934954487e-06, + "loss": 0.5779, + "step": 27821 + }, + { + "epoch": 0.8527031997057741, + "grad_norm": 1.7332939596132473, + "learning_rate": 1.1165160611612524e-06, + "loss": 0.6356, + "step": 27822 + }, + { + "epoch": 0.8527338482285154, + "grad_norm": 1.6960522947700147, + "learning_rate": 1.1160603163582973e-06, + "loss": 0.6299, + "step": 27823 + }, + { + "epoch": 0.8527644967512565, + "grad_norm": 1.7072866334710521, + "learning_rate": 1.1156046590910686e-06, + "loss": 0.5712, + "step": 27824 + }, + { + "epoch": 0.8527951452739978, + "grad_norm": 1.7484399092215361, + "learning_rate": 1.115149089364065e-06, + "loss": 0.6648, + "step": 27825 + }, + { + "epoch": 0.852825793796739, + "grad_norm": 1.8922396658393534, + "learning_rate": 1.114693607181767e-06, + "loss": 0.6541, + "step": 27826 + }, + { + "epoch": 0.8528564423194802, + "grad_norm": 1.9878140187152296, + "learning_rate": 1.1142382125486694e-06, + "loss": 0.6293, + "step": 27827 + }, + { + "epoch": 0.8528870908422214, + "grad_norm": 1.7311045144885207, + "learning_rate": 1.1137829054692539e-06, + "loss": 0.5978, + "step": 27828 + }, + { + "epoch": 0.8529177393649626, + "grad_norm": 1.6679602214801132, + "learning_rate": 1.1133276859480102e-06, + "loss": 0.5876, + "step": 27829 + }, + { + "epoch": 0.8529483878877038, + "grad_norm": 1.7936211837711107, + "learning_rate": 1.1128725539894237e-06, + "loss": 0.6045, + "step": 27830 + }, + { + "epoch": 0.852979036410445, + "grad_norm": 1.9252673429073235, + "learning_rate": 1.112417509597975e-06, + "loss": 0.6126, + "step": 27831 + }, + { + "epoch": 0.8530096849331862, + "grad_norm": 1.796918147208208, + "learning_rate": 1.1119625527781518e-06, + "loss": 0.5636, + "step": 27832 + }, + { + "epoch": 0.8530403334559274, + "grad_norm": 1.6546516948337457, + "learning_rate": 1.1115076835344374e-06, + "loss": 0.6299, + "step": 27833 + }, + { + "epoch": 0.8530709819786686, + "grad_norm": 1.5873563881699049, + "learning_rate": 1.1110529018713112e-06, + "loss": 0.6925, + "step": 27834 + }, + { + "epoch": 0.8531016305014099, + "grad_norm": 1.5405440659208702, + "learning_rate": 1.110598207793252e-06, + "loss": 0.6348, + "step": 27835 + }, + { + "epoch": 0.853132279024151, + "grad_norm": 1.6277041704057693, + "learning_rate": 1.1101436013047473e-06, + "loss": 0.5553, + "step": 27836 + }, + { + "epoch": 0.8531629275468923, + "grad_norm": 1.8975850856302574, + "learning_rate": 1.109689082410269e-06, + "loss": 0.6786, + "step": 27837 + }, + { + "epoch": 0.8531935760696334, + "grad_norm": 1.7517874486096678, + "learning_rate": 1.1092346511143015e-06, + "loss": 0.6327, + "step": 27838 + }, + { + "epoch": 0.8532242245923747, + "grad_norm": 1.6954354546234862, + "learning_rate": 1.1087803074213187e-06, + "loss": 0.6314, + "step": 27839 + }, + { + "epoch": 0.8532548731151158, + "grad_norm": 1.7956054252743516, + "learning_rate": 1.1083260513357973e-06, + "loss": 0.5689, + "step": 27840 + }, + { + "epoch": 0.8532855216378571, + "grad_norm": 1.6368406403806148, + "learning_rate": 1.107871882862217e-06, + "loss": 0.5851, + "step": 27841 + }, + { + "epoch": 0.8533161701605982, + "grad_norm": 0.6792603920749046, + "learning_rate": 1.1074178020050474e-06, + "loss": 0.531, + "step": 27842 + }, + { + "epoch": 0.8533468186833395, + "grad_norm": 1.7942475629209789, + "learning_rate": 1.1069638087687662e-06, + "loss": 0.6775, + "step": 27843 + }, + { + "epoch": 0.8533774672060807, + "grad_norm": 0.6619516503694676, + "learning_rate": 1.1065099031578475e-06, + "loss": 0.5318, + "step": 27844 + }, + { + "epoch": 0.8534081157288219, + "grad_norm": 1.7113389272864508, + "learning_rate": 1.106056085176761e-06, + "loss": 0.6174, + "step": 27845 + }, + { + "epoch": 0.8534387642515631, + "grad_norm": 0.6902715594908104, + "learning_rate": 1.105602354829981e-06, + "loss": 0.5283, + "step": 27846 + }, + { + "epoch": 0.8534694127743043, + "grad_norm": 1.7715476710848483, + "learning_rate": 1.105148712121974e-06, + "loss": 0.6774, + "step": 27847 + }, + { + "epoch": 0.8535000612970455, + "grad_norm": 1.7227122100107761, + "learning_rate": 1.104695157057214e-06, + "loss": 0.6021, + "step": 27848 + }, + { + "epoch": 0.8535307098197867, + "grad_norm": 1.557171663532462, + "learning_rate": 1.1042416896401698e-06, + "loss": 0.5657, + "step": 27849 + }, + { + "epoch": 0.8535613583425279, + "grad_norm": 1.8678390897493462, + "learning_rate": 1.1037883098753054e-06, + "loss": 0.6512, + "step": 27850 + }, + { + "epoch": 0.8535920068652691, + "grad_norm": 1.7531154194006024, + "learning_rate": 1.103335017767092e-06, + "loss": 0.5876, + "step": 27851 + }, + { + "epoch": 0.8536226553880103, + "grad_norm": 2.02757712945669, + "learning_rate": 1.102881813319997e-06, + "loss": 0.7059, + "step": 27852 + }, + { + "epoch": 0.8536533039107514, + "grad_norm": 1.8518755601058932, + "learning_rate": 1.1024286965384823e-06, + "loss": 0.7278, + "step": 27853 + }, + { + "epoch": 0.8536839524334927, + "grad_norm": 1.8264871248252852, + "learning_rate": 1.1019756674270132e-06, + "loss": 0.605, + "step": 27854 + }, + { + "epoch": 0.8537146009562339, + "grad_norm": 1.6395668775153933, + "learning_rate": 1.1015227259900573e-06, + "loss": 0.6001, + "step": 27855 + }, + { + "epoch": 0.8537452494789751, + "grad_norm": 1.5123529940231526, + "learning_rate": 1.1010698722320723e-06, + "loss": 0.5929, + "step": 27856 + }, + { + "epoch": 0.8537758980017163, + "grad_norm": 0.6605834147368859, + "learning_rate": 1.1006171061575256e-06, + "loss": 0.5138, + "step": 27857 + }, + { + "epoch": 0.8538065465244575, + "grad_norm": 1.6896744184389327, + "learning_rate": 1.1001644277708723e-06, + "loss": 0.6383, + "step": 27858 + }, + { + "epoch": 0.8538371950471987, + "grad_norm": 1.7159104758596997, + "learning_rate": 1.099711837076577e-06, + "loss": 0.6596, + "step": 27859 + }, + { + "epoch": 0.8538678435699399, + "grad_norm": 1.6171210068915498, + "learning_rate": 1.0992593340791003e-06, + "loss": 0.5588, + "step": 27860 + }, + { + "epoch": 0.8538984920926811, + "grad_norm": 1.7307017121283426, + "learning_rate": 1.0988069187828953e-06, + "loss": 0.664, + "step": 27861 + }, + { + "epoch": 0.8539291406154224, + "grad_norm": 1.857243148906211, + "learning_rate": 1.0983545911924244e-06, + "loss": 0.6371, + "step": 27862 + }, + { + "epoch": 0.8539597891381635, + "grad_norm": 1.778646709040151, + "learning_rate": 1.0979023513121457e-06, + "loss": 0.7153, + "step": 27863 + }, + { + "epoch": 0.8539904376609048, + "grad_norm": 0.6812901448442374, + "learning_rate": 1.0974501991465091e-06, + "loss": 0.5319, + "step": 27864 + }, + { + "epoch": 0.8540210861836459, + "grad_norm": 0.6762793937342177, + "learning_rate": 1.0969981346999747e-06, + "loss": 0.528, + "step": 27865 + }, + { + "epoch": 0.8540517347063872, + "grad_norm": 1.6806557322417104, + "learning_rate": 1.0965461579769975e-06, + "loss": 0.6638, + "step": 27866 + }, + { + "epoch": 0.8540823832291283, + "grad_norm": 1.897041946372697, + "learning_rate": 1.0960942689820275e-06, + "loss": 0.7506, + "step": 27867 + }, + { + "epoch": 0.8541130317518696, + "grad_norm": 1.5741251311923368, + "learning_rate": 1.09564246771952e-06, + "loss": 0.7065, + "step": 27868 + }, + { + "epoch": 0.8541436802746107, + "grad_norm": 0.634110504208063, + "learning_rate": 1.0951907541939244e-06, + "loss": 0.5001, + "step": 27869 + }, + { + "epoch": 0.854174328797352, + "grad_norm": 1.6338377465831266, + "learning_rate": 1.094739128409692e-06, + "loss": 0.6144, + "step": 27870 + }, + { + "epoch": 0.8542049773200932, + "grad_norm": 1.9372418380626197, + "learning_rate": 1.0942875903712756e-06, + "loss": 0.5947, + "step": 27871 + }, + { + "epoch": 0.8542356258428344, + "grad_norm": 0.6718223008297841, + "learning_rate": 1.093836140083121e-06, + "loss": 0.5346, + "step": 27872 + }, + { + "epoch": 0.8542662743655756, + "grad_norm": 1.6876855774208226, + "learning_rate": 1.0933847775496765e-06, + "loss": 0.5714, + "step": 27873 + }, + { + "epoch": 0.8542969228883168, + "grad_norm": 1.5553437173400222, + "learning_rate": 1.092933502775393e-06, + "loss": 0.6092, + "step": 27874 + }, + { + "epoch": 0.854327571411058, + "grad_norm": 1.695469181136745, + "learning_rate": 1.0924823157647124e-06, + "loss": 0.632, + "step": 27875 + }, + { + "epoch": 0.8543582199337992, + "grad_norm": 1.8113263327742044, + "learning_rate": 1.0920312165220836e-06, + "loss": 0.729, + "step": 27876 + }, + { + "epoch": 0.8543888684565404, + "grad_norm": 1.7990148187124833, + "learning_rate": 1.0915802050519519e-06, + "loss": 0.6389, + "step": 27877 + }, + { + "epoch": 0.8544195169792816, + "grad_norm": 1.6113255260492052, + "learning_rate": 1.091129281358757e-06, + "loss": 0.718, + "step": 27878 + }, + { + "epoch": 0.8544501655020228, + "grad_norm": 1.561295417901993, + "learning_rate": 1.0906784454469478e-06, + "loss": 0.6231, + "step": 27879 + }, + { + "epoch": 0.8544808140247641, + "grad_norm": 1.6432190949306822, + "learning_rate": 1.0902276973209592e-06, + "loss": 0.645, + "step": 27880 + }, + { + "epoch": 0.8545114625475052, + "grad_norm": 1.6356200881753469, + "learning_rate": 1.08977703698524e-06, + "loss": 0.6376, + "step": 27881 + }, + { + "epoch": 0.8545421110702465, + "grad_norm": 1.836139487460372, + "learning_rate": 1.089326464444228e-06, + "loss": 0.8464, + "step": 27882 + }, + { + "epoch": 0.8545727595929876, + "grad_norm": 1.7741839788396025, + "learning_rate": 1.0888759797023606e-06, + "loss": 0.7539, + "step": 27883 + }, + { + "epoch": 0.8546034081157288, + "grad_norm": 1.5909177994253565, + "learning_rate": 1.0884255827640778e-06, + "loss": 0.6538, + "step": 27884 + }, + { + "epoch": 0.85463405663847, + "grad_norm": 1.6360240499467857, + "learning_rate": 1.0879752736338201e-06, + "loss": 0.5644, + "step": 27885 + }, + { + "epoch": 0.8546647051612112, + "grad_norm": 1.4819814937793072, + "learning_rate": 1.0875250523160197e-06, + "loss": 0.5039, + "step": 27886 + }, + { + "epoch": 0.8546953536839524, + "grad_norm": 0.6758270358645366, + "learning_rate": 1.0870749188151153e-06, + "loss": 0.4995, + "step": 27887 + }, + { + "epoch": 0.8547260022066936, + "grad_norm": 0.6790127710073903, + "learning_rate": 1.0866248731355443e-06, + "loss": 0.5132, + "step": 27888 + }, + { + "epoch": 0.8547566507294349, + "grad_norm": 1.6607975291031674, + "learning_rate": 1.086174915281738e-06, + "loss": 0.542, + "step": 27889 + }, + { + "epoch": 0.854787299252176, + "grad_norm": 1.5546064902149037, + "learning_rate": 1.0857250452581326e-06, + "loss": 0.6204, + "step": 27890 + }, + { + "epoch": 0.8548179477749173, + "grad_norm": 1.8953638183360446, + "learning_rate": 1.0852752630691566e-06, + "loss": 0.6448, + "step": 27891 + }, + { + "epoch": 0.8548485962976584, + "grad_norm": 1.8786278671191183, + "learning_rate": 1.0848255687192444e-06, + "loss": 0.6322, + "step": 27892 + }, + { + "epoch": 0.8548792448203997, + "grad_norm": 1.5829759469980398, + "learning_rate": 1.084375962212829e-06, + "loss": 0.6301, + "step": 27893 + }, + { + "epoch": 0.8549098933431408, + "grad_norm": 1.818654676217284, + "learning_rate": 1.0839264435543363e-06, + "loss": 0.7016, + "step": 27894 + }, + { + "epoch": 0.8549405418658821, + "grad_norm": 1.9657768140435465, + "learning_rate": 1.0834770127481975e-06, + "loss": 0.6413, + "step": 27895 + }, + { + "epoch": 0.8549711903886232, + "grad_norm": 1.7899440934442263, + "learning_rate": 1.083027669798844e-06, + "loss": 0.6178, + "step": 27896 + }, + { + "epoch": 0.8550018389113645, + "grad_norm": 1.8443678266771748, + "learning_rate": 1.0825784147106978e-06, + "loss": 0.5549, + "step": 27897 + }, + { + "epoch": 0.8550324874341056, + "grad_norm": 1.7302326865107713, + "learning_rate": 1.0821292474881894e-06, + "loss": 0.5737, + "step": 27898 + }, + { + "epoch": 0.8550631359568469, + "grad_norm": 1.6707703301861334, + "learning_rate": 1.0816801681357402e-06, + "loss": 0.6159, + "step": 27899 + }, + { + "epoch": 0.8550937844795881, + "grad_norm": 1.6587805716354895, + "learning_rate": 1.081231176657782e-06, + "loss": 0.5902, + "step": 27900 + }, + { + "epoch": 0.8551244330023293, + "grad_norm": 2.023523583263772, + "learning_rate": 1.0807822730587348e-06, + "loss": 0.6881, + "step": 27901 + }, + { + "epoch": 0.8551550815250705, + "grad_norm": 1.6383864290704004, + "learning_rate": 1.080333457343019e-06, + "loss": 0.6083, + "step": 27902 + }, + { + "epoch": 0.8551857300478117, + "grad_norm": 1.7057503231194562, + "learning_rate": 1.0798847295150617e-06, + "loss": 0.5893, + "step": 27903 + }, + { + "epoch": 0.8552163785705529, + "grad_norm": 2.0102999689775762, + "learning_rate": 1.0794360895792832e-06, + "loss": 0.693, + "step": 27904 + }, + { + "epoch": 0.8552470270932941, + "grad_norm": 1.6028463911575404, + "learning_rate": 1.0789875375401016e-06, + "loss": 0.6123, + "step": 27905 + }, + { + "epoch": 0.8552776756160353, + "grad_norm": 1.4843550660739653, + "learning_rate": 1.0785390734019386e-06, + "loss": 0.5025, + "step": 27906 + }, + { + "epoch": 0.8553083241387766, + "grad_norm": 1.7229766038819971, + "learning_rate": 1.078090697169213e-06, + "loss": 0.6708, + "step": 27907 + }, + { + "epoch": 0.8553389726615177, + "grad_norm": 1.7963385799085134, + "learning_rate": 1.0776424088463432e-06, + "loss": 0.6624, + "step": 27908 + }, + { + "epoch": 0.855369621184259, + "grad_norm": 1.7857586239408765, + "learning_rate": 1.0771942084377473e-06, + "loss": 0.561, + "step": 27909 + }, + { + "epoch": 0.8554002697070001, + "grad_norm": 1.8858207888302763, + "learning_rate": 1.0767460959478348e-06, + "loss": 0.6304, + "step": 27910 + }, + { + "epoch": 0.8554309182297414, + "grad_norm": 1.7155109692403994, + "learning_rate": 1.076298071381031e-06, + "loss": 0.594, + "step": 27911 + }, + { + "epoch": 0.8554615667524825, + "grad_norm": 0.6695952119069769, + "learning_rate": 1.075850134741745e-06, + "loss": 0.5087, + "step": 27912 + }, + { + "epoch": 0.8554922152752238, + "grad_norm": 1.6809125849774411, + "learning_rate": 1.0754022860343882e-06, + "loss": 0.6416, + "step": 27913 + }, + { + "epoch": 0.8555228637979649, + "grad_norm": 1.7797717224469434, + "learning_rate": 1.0749545252633775e-06, + "loss": 0.7329, + "step": 27914 + }, + { + "epoch": 0.8555535123207061, + "grad_norm": 0.6820782877913836, + "learning_rate": 1.0745068524331247e-06, + "loss": 0.5239, + "step": 27915 + }, + { + "epoch": 0.8555841608434473, + "grad_norm": 1.7764740996577766, + "learning_rate": 1.0740592675480377e-06, + "loss": 0.6508, + "step": 27916 + }, + { + "epoch": 0.8556148093661885, + "grad_norm": 1.9894102783832301, + "learning_rate": 1.0736117706125282e-06, + "loss": 0.6221, + "step": 27917 + }, + { + "epoch": 0.8556454578889298, + "grad_norm": 1.8173049648321282, + "learning_rate": 1.073164361631006e-06, + "loss": 0.6042, + "step": 27918 + }, + { + "epoch": 0.8556761064116709, + "grad_norm": 1.81722065174181, + "learning_rate": 1.0727170406078824e-06, + "loss": 0.724, + "step": 27919 + }, + { + "epoch": 0.8557067549344122, + "grad_norm": 1.917006720928186, + "learning_rate": 1.0722698075475602e-06, + "loss": 0.6459, + "step": 27920 + }, + { + "epoch": 0.8557374034571533, + "grad_norm": 0.654651660518425, + "learning_rate": 1.0718226624544447e-06, + "loss": 0.5004, + "step": 27921 + }, + { + "epoch": 0.8557680519798946, + "grad_norm": 1.654107131396366, + "learning_rate": 1.0713756053329493e-06, + "loss": 0.6914, + "step": 27922 + }, + { + "epoch": 0.8557987005026357, + "grad_norm": 1.6475979996377441, + "learning_rate": 1.0709286361874737e-06, + "loss": 0.666, + "step": 27923 + }, + { + "epoch": 0.855829349025377, + "grad_norm": 1.8134650909832408, + "learning_rate": 1.0704817550224222e-06, + "loss": 0.7405, + "step": 27924 + }, + { + "epoch": 0.8558599975481181, + "grad_norm": 0.6876896956612779, + "learning_rate": 1.0700349618421979e-06, + "loss": 0.5325, + "step": 27925 + }, + { + "epoch": 0.8558906460708594, + "grad_norm": 1.8792770944174122, + "learning_rate": 1.0695882566512028e-06, + "loss": 0.5859, + "step": 27926 + }, + { + "epoch": 0.8559212945936006, + "grad_norm": 1.7726825865468685, + "learning_rate": 1.0691416394538434e-06, + "loss": 0.6765, + "step": 27927 + }, + { + "epoch": 0.8559519431163418, + "grad_norm": 1.7278704083719703, + "learning_rate": 1.068695110254513e-06, + "loss": 0.6658, + "step": 27928 + }, + { + "epoch": 0.855982591639083, + "grad_norm": 1.8442711340527784, + "learning_rate": 1.0682486690576154e-06, + "loss": 0.7012, + "step": 27929 + }, + { + "epoch": 0.8560132401618242, + "grad_norm": 1.6884568725363143, + "learning_rate": 1.0678023158675521e-06, + "loss": 0.5778, + "step": 27930 + }, + { + "epoch": 0.8560438886845654, + "grad_norm": 1.9284648962458055, + "learning_rate": 1.0673560506887159e-06, + "loss": 0.6653, + "step": 27931 + }, + { + "epoch": 0.8560745372073066, + "grad_norm": 2.0795159554551064, + "learning_rate": 1.0669098735255035e-06, + "loss": 0.5639, + "step": 27932 + }, + { + "epoch": 0.8561051857300478, + "grad_norm": 1.6831974418728681, + "learning_rate": 1.0664637843823178e-06, + "loss": 0.5921, + "step": 27933 + }, + { + "epoch": 0.856135834252789, + "grad_norm": 0.6545328828461748, + "learning_rate": 1.06601778326355e-06, + "loss": 0.5138, + "step": 27934 + }, + { + "epoch": 0.8561664827755302, + "grad_norm": 1.412655748335713, + "learning_rate": 1.0655718701735918e-06, + "loss": 0.5609, + "step": 27935 + }, + { + "epoch": 0.8561971312982715, + "grad_norm": 1.8411186822261303, + "learning_rate": 1.0651260451168411e-06, + "loss": 0.6983, + "step": 27936 + }, + { + "epoch": 0.8562277798210126, + "grad_norm": 1.7507126379863103, + "learning_rate": 1.0646803080976886e-06, + "loss": 0.6563, + "step": 27937 + }, + { + "epoch": 0.8562584283437539, + "grad_norm": 1.8891911849217493, + "learning_rate": 1.06423465912053e-06, + "loss": 0.6914, + "step": 27938 + }, + { + "epoch": 0.856289076866495, + "grad_norm": 1.5902136255895425, + "learning_rate": 1.0637890981897514e-06, + "loss": 0.6221, + "step": 27939 + }, + { + "epoch": 0.8563197253892363, + "grad_norm": 0.6997819105149092, + "learning_rate": 1.0633436253097451e-06, + "loss": 0.5414, + "step": 27940 + }, + { + "epoch": 0.8563503739119774, + "grad_norm": 1.6808343747193548, + "learning_rate": 1.062898240484903e-06, + "loss": 0.6188, + "step": 27941 + }, + { + "epoch": 0.8563810224347187, + "grad_norm": 0.6491487607949741, + "learning_rate": 1.0624529437196107e-06, + "loss": 0.4906, + "step": 27942 + }, + { + "epoch": 0.8564116709574598, + "grad_norm": 1.7979552514628965, + "learning_rate": 1.0620077350182546e-06, + "loss": 0.7263, + "step": 27943 + }, + { + "epoch": 0.8564423194802011, + "grad_norm": 1.7957695391097048, + "learning_rate": 1.0615626143852232e-06, + "loss": 0.5753, + "step": 27944 + }, + { + "epoch": 0.8564729680029423, + "grad_norm": 1.7901434865554904, + "learning_rate": 1.061117581824902e-06, + "loss": 0.7477, + "step": 27945 + }, + { + "epoch": 0.8565036165256834, + "grad_norm": 1.7069784851737368, + "learning_rate": 1.0606726373416787e-06, + "loss": 0.704, + "step": 27946 + }, + { + "epoch": 0.8565342650484247, + "grad_norm": 1.6196979574961994, + "learning_rate": 1.060227780939933e-06, + "loss": 0.7058, + "step": 27947 + }, + { + "epoch": 0.8565649135711658, + "grad_norm": 1.6640705551309665, + "learning_rate": 1.0597830126240505e-06, + "loss": 0.6408, + "step": 27948 + }, + { + "epoch": 0.8565955620939071, + "grad_norm": 1.8089109276383544, + "learning_rate": 1.0593383323984162e-06, + "loss": 0.6859, + "step": 27949 + }, + { + "epoch": 0.8566262106166482, + "grad_norm": 1.868072291124295, + "learning_rate": 1.0588937402674071e-06, + "loss": 0.7005, + "step": 27950 + }, + { + "epoch": 0.8566568591393895, + "grad_norm": 1.7465048101145866, + "learning_rate": 1.0584492362354027e-06, + "loss": 0.5497, + "step": 27951 + }, + { + "epoch": 0.8566875076621306, + "grad_norm": 1.7165588914965044, + "learning_rate": 1.0580048203067904e-06, + "loss": 0.6211, + "step": 27952 + }, + { + "epoch": 0.8567181561848719, + "grad_norm": 1.5496976700826413, + "learning_rate": 1.0575604924859416e-06, + "loss": 0.56, + "step": 27953 + }, + { + "epoch": 0.856748804707613, + "grad_norm": 1.657095464874734, + "learning_rate": 1.0571162527772394e-06, + "loss": 0.6303, + "step": 27954 + }, + { + "epoch": 0.8567794532303543, + "grad_norm": 0.6511274685141352, + "learning_rate": 1.0566721011850567e-06, + "loss": 0.5061, + "step": 27955 + }, + { + "epoch": 0.8568101017530955, + "grad_norm": 1.6721358440922978, + "learning_rate": 1.0562280377137723e-06, + "loss": 0.6516, + "step": 27956 + }, + { + "epoch": 0.8568407502758367, + "grad_norm": 0.6825228576961269, + "learning_rate": 1.055784062367764e-06, + "loss": 0.513, + "step": 27957 + }, + { + "epoch": 0.8568713987985779, + "grad_norm": 1.633043472796882, + "learning_rate": 1.0553401751514004e-06, + "loss": 0.6232, + "step": 27958 + }, + { + "epoch": 0.8569020473213191, + "grad_norm": 1.7546026479737677, + "learning_rate": 1.0548963760690601e-06, + "loss": 0.6433, + "step": 27959 + }, + { + "epoch": 0.8569326958440603, + "grad_norm": 2.0680400387469806, + "learning_rate": 1.0544526651251152e-06, + "loss": 0.6532, + "step": 27960 + }, + { + "epoch": 0.8569633443668015, + "grad_norm": 1.8845398410184, + "learning_rate": 1.054009042323938e-06, + "loss": 0.6833, + "step": 27961 + }, + { + "epoch": 0.8569939928895427, + "grad_norm": 1.8760791226922953, + "learning_rate": 1.0535655076698947e-06, + "loss": 0.6153, + "step": 27962 + }, + { + "epoch": 0.857024641412284, + "grad_norm": 1.7345707448994507, + "learning_rate": 1.053122061167362e-06, + "loss": 0.7185, + "step": 27963 + }, + { + "epoch": 0.8570552899350251, + "grad_norm": 1.62595814638849, + "learning_rate": 1.0526787028207065e-06, + "loss": 0.6699, + "step": 27964 + }, + { + "epoch": 0.8570859384577664, + "grad_norm": 0.6697812798556957, + "learning_rate": 1.0522354326342988e-06, + "loss": 0.5232, + "step": 27965 + }, + { + "epoch": 0.8571165869805075, + "grad_norm": 1.8685399774075475, + "learning_rate": 1.0517922506125023e-06, + "loss": 0.6002, + "step": 27966 + }, + { + "epoch": 0.8571472355032488, + "grad_norm": 1.8439696082422483, + "learning_rate": 1.0513491567596856e-06, + "loss": 0.6355, + "step": 27967 + }, + { + "epoch": 0.8571778840259899, + "grad_norm": 1.7205986015026715, + "learning_rate": 1.0509061510802188e-06, + "loss": 0.7053, + "step": 27968 + }, + { + "epoch": 0.8572085325487312, + "grad_norm": 1.753056080167251, + "learning_rate": 1.0504632335784603e-06, + "loss": 0.6613, + "step": 27969 + }, + { + "epoch": 0.8572391810714723, + "grad_norm": 1.7009321826074602, + "learning_rate": 1.050020404258778e-06, + "loss": 0.5853, + "step": 27970 + }, + { + "epoch": 0.8572698295942136, + "grad_norm": 1.8233172619367795, + "learning_rate": 1.049577663125536e-06, + "loss": 0.6128, + "step": 27971 + }, + { + "epoch": 0.8573004781169548, + "grad_norm": 0.6737530933175566, + "learning_rate": 1.0491350101830934e-06, + "loss": 0.515, + "step": 27972 + }, + { + "epoch": 0.857331126639696, + "grad_norm": 1.7613315410426795, + "learning_rate": 1.0486924454358137e-06, + "loss": 0.6522, + "step": 27973 + }, + { + "epoch": 0.8573617751624372, + "grad_norm": 1.4980938504668575, + "learning_rate": 1.0482499688880598e-06, + "loss": 0.5552, + "step": 27974 + }, + { + "epoch": 0.8573924236851784, + "grad_norm": 1.7693972550480426, + "learning_rate": 1.0478075805441879e-06, + "loss": 0.7034, + "step": 27975 + }, + { + "epoch": 0.8574230722079196, + "grad_norm": 1.7232796812771827, + "learning_rate": 1.04736528040856e-06, + "loss": 0.6184, + "step": 27976 + }, + { + "epoch": 0.8574537207306607, + "grad_norm": 1.7127587809255285, + "learning_rate": 1.0469230684855302e-06, + "loss": 0.5759, + "step": 27977 + }, + { + "epoch": 0.857484369253402, + "grad_norm": 1.7119100368409057, + "learning_rate": 1.0464809447794587e-06, + "loss": 0.7646, + "step": 27978 + }, + { + "epoch": 0.8575150177761431, + "grad_norm": 1.6533086427448158, + "learning_rate": 1.0460389092947031e-06, + "loss": 0.6774, + "step": 27979 + }, + { + "epoch": 0.8575456662988844, + "grad_norm": 1.863009813449869, + "learning_rate": 1.0455969620356154e-06, + "loss": 0.623, + "step": 27980 + }, + { + "epoch": 0.8575763148216256, + "grad_norm": 1.7669087364570293, + "learning_rate": 1.045155103006552e-06, + "loss": 0.5882, + "step": 27981 + }, + { + "epoch": 0.8576069633443668, + "grad_norm": 1.6061878673267846, + "learning_rate": 1.0447133322118675e-06, + "loss": 0.6185, + "step": 27982 + }, + { + "epoch": 0.857637611867108, + "grad_norm": 1.7464021325401264, + "learning_rate": 1.0442716496559136e-06, + "loss": 0.6809, + "step": 27983 + }, + { + "epoch": 0.8576682603898492, + "grad_norm": 1.5385219014345333, + "learning_rate": 1.0438300553430413e-06, + "loss": 0.6802, + "step": 27984 + }, + { + "epoch": 0.8576989089125904, + "grad_norm": 1.590406333789719, + "learning_rate": 1.0433885492776052e-06, + "loss": 0.7364, + "step": 27985 + }, + { + "epoch": 0.8577295574353316, + "grad_norm": 1.6550277908203308, + "learning_rate": 1.0429471314639517e-06, + "loss": 0.5975, + "step": 27986 + }, + { + "epoch": 0.8577602059580728, + "grad_norm": 0.6760655187725039, + "learning_rate": 1.0425058019064328e-06, + "loss": 0.5202, + "step": 27987 + }, + { + "epoch": 0.857790854480814, + "grad_norm": 1.846224386319827, + "learning_rate": 1.0420645606093948e-06, + "loss": 0.6266, + "step": 27988 + }, + { + "epoch": 0.8578215030035552, + "grad_norm": 1.8103063999047144, + "learning_rate": 1.0416234075771869e-06, + "loss": 0.6478, + "step": 27989 + }, + { + "epoch": 0.8578521515262965, + "grad_norm": 1.8098982466189786, + "learning_rate": 1.0411823428141577e-06, + "loss": 0.682, + "step": 27990 + }, + { + "epoch": 0.8578828000490376, + "grad_norm": 1.8177547741474342, + "learning_rate": 1.040741366324649e-06, + "loss": 0.5718, + "step": 27991 + }, + { + "epoch": 0.8579134485717789, + "grad_norm": 1.824434895121659, + "learning_rate": 1.0403004781130078e-06, + "loss": 0.6201, + "step": 27992 + }, + { + "epoch": 0.85794409709452, + "grad_norm": 0.7000643552396754, + "learning_rate": 1.03985967818358e-06, + "loss": 0.5148, + "step": 27993 + }, + { + "epoch": 0.8579747456172613, + "grad_norm": 0.6632792948660627, + "learning_rate": 1.0394189665407062e-06, + "loss": 0.5127, + "step": 27994 + }, + { + "epoch": 0.8580053941400024, + "grad_norm": 0.6436283354967564, + "learning_rate": 1.0389783431887313e-06, + "loss": 0.5139, + "step": 27995 + }, + { + "epoch": 0.8580360426627437, + "grad_norm": 1.695442816866467, + "learning_rate": 1.038537808131994e-06, + "loss": 0.6299, + "step": 27996 + }, + { + "epoch": 0.8580666911854848, + "grad_norm": 1.5843325758196474, + "learning_rate": 1.0380973613748368e-06, + "loss": 0.5756, + "step": 27997 + }, + { + "epoch": 0.8580973397082261, + "grad_norm": 1.6820563823994188, + "learning_rate": 1.0376570029216003e-06, + "loss": 0.584, + "step": 27998 + }, + { + "epoch": 0.8581279882309673, + "grad_norm": 1.6510635227276627, + "learning_rate": 1.0372167327766213e-06, + "loss": 0.6845, + "step": 27999 + }, + { + "epoch": 0.8581586367537085, + "grad_norm": 1.8412119481688025, + "learning_rate": 1.0367765509442395e-06, + "loss": 0.6461, + "step": 28000 + }, + { + "epoch": 0.8581892852764497, + "grad_norm": 1.8099522809579474, + "learning_rate": 1.0363364574287926e-06, + "loss": 0.7251, + "step": 28001 + }, + { + "epoch": 0.8582199337991909, + "grad_norm": 0.6754431709436522, + "learning_rate": 1.035896452234615e-06, + "loss": 0.5294, + "step": 28002 + }, + { + "epoch": 0.8582505823219321, + "grad_norm": 0.6524619229171646, + "learning_rate": 1.0354565353660428e-06, + "loss": 0.5143, + "step": 28003 + }, + { + "epoch": 0.8582812308446733, + "grad_norm": 0.6484377526190463, + "learning_rate": 1.035016706827413e-06, + "loss": 0.5184, + "step": 28004 + }, + { + "epoch": 0.8583118793674145, + "grad_norm": 1.829524211280801, + "learning_rate": 1.0345769666230553e-06, + "loss": 0.6514, + "step": 28005 + }, + { + "epoch": 0.8583425278901557, + "grad_norm": 1.5697242461220955, + "learning_rate": 1.0341373147573063e-06, + "loss": 0.6905, + "step": 28006 + }, + { + "epoch": 0.8583731764128969, + "grad_norm": 0.6500454922846545, + "learning_rate": 1.0336977512344925e-06, + "loss": 0.5147, + "step": 28007 + }, + { + "epoch": 0.858403824935638, + "grad_norm": 1.5834740391925846, + "learning_rate": 1.0332582760589539e-06, + "loss": 0.6131, + "step": 28008 + }, + { + "epoch": 0.8584344734583793, + "grad_norm": 1.5680728438174973, + "learning_rate": 1.0328188892350145e-06, + "loss": 0.6565, + "step": 28009 + }, + { + "epoch": 0.8584651219811205, + "grad_norm": 2.0219257889665525, + "learning_rate": 1.032379590767003e-06, + "loss": 0.6599, + "step": 28010 + }, + { + "epoch": 0.8584957705038617, + "grad_norm": 0.6627292257735126, + "learning_rate": 1.031940380659251e-06, + "loss": 0.4986, + "step": 28011 + }, + { + "epoch": 0.8585264190266029, + "grad_norm": 1.5281409985456642, + "learning_rate": 1.0315012589160855e-06, + "loss": 0.6587, + "step": 28012 + }, + { + "epoch": 0.8585570675493441, + "grad_norm": 1.7221376183603796, + "learning_rate": 1.0310622255418311e-06, + "loss": 0.6558, + "step": 28013 + }, + { + "epoch": 0.8585877160720853, + "grad_norm": 1.575506636001436, + "learning_rate": 1.0306232805408167e-06, + "loss": 0.6828, + "step": 28014 + }, + { + "epoch": 0.8586183645948265, + "grad_norm": 1.736691289132642, + "learning_rate": 1.0301844239173664e-06, + "loss": 0.682, + "step": 28015 + }, + { + "epoch": 0.8586490131175677, + "grad_norm": 1.6149785591624435, + "learning_rate": 1.0297456556758035e-06, + "loss": 0.6526, + "step": 28016 + }, + { + "epoch": 0.858679661640309, + "grad_norm": 0.6704938901886814, + "learning_rate": 1.0293069758204533e-06, + "loss": 0.5376, + "step": 28017 + }, + { + "epoch": 0.8587103101630501, + "grad_norm": 1.742149939594457, + "learning_rate": 1.0288683843556324e-06, + "loss": 0.631, + "step": 28018 + }, + { + "epoch": 0.8587409586857914, + "grad_norm": 1.8366721945182316, + "learning_rate": 1.0284298812856708e-06, + "loss": 0.6273, + "step": 28019 + }, + { + "epoch": 0.8587716072085325, + "grad_norm": 1.6364042317909522, + "learning_rate": 1.0279914666148848e-06, + "loss": 0.7181, + "step": 28020 + }, + { + "epoch": 0.8588022557312738, + "grad_norm": 1.8008752819199276, + "learning_rate": 1.0275531403475924e-06, + "loss": 0.5576, + "step": 28021 + }, + { + "epoch": 0.8588329042540149, + "grad_norm": 1.5126606584816518, + "learning_rate": 1.0271149024881143e-06, + "loss": 0.5928, + "step": 28022 + }, + { + "epoch": 0.8588635527767562, + "grad_norm": 1.6675657755014242, + "learning_rate": 1.0266767530407718e-06, + "loss": 0.6243, + "step": 28023 + }, + { + "epoch": 0.8588942012994973, + "grad_norm": 1.71966484890626, + "learning_rate": 1.0262386920098766e-06, + "loss": 0.6508, + "step": 28024 + }, + { + "epoch": 0.8589248498222386, + "grad_norm": 1.426235250969349, + "learning_rate": 1.0258007193997476e-06, + "loss": 0.4597, + "step": 28025 + }, + { + "epoch": 0.8589554983449798, + "grad_norm": 1.4333909748415854, + "learning_rate": 1.0253628352147016e-06, + "loss": 0.6028, + "step": 28026 + }, + { + "epoch": 0.858986146867721, + "grad_norm": 1.8549847934159205, + "learning_rate": 1.0249250394590527e-06, + "loss": 0.6696, + "step": 28027 + }, + { + "epoch": 0.8590167953904622, + "grad_norm": 1.7122374533295994, + "learning_rate": 1.024487332137113e-06, + "loss": 0.6172, + "step": 28028 + }, + { + "epoch": 0.8590474439132034, + "grad_norm": 0.6704641991451603, + "learning_rate": 1.0240497132531935e-06, + "loss": 0.49, + "step": 28029 + }, + { + "epoch": 0.8590780924359446, + "grad_norm": 2.1144116124734302, + "learning_rate": 1.023612182811612e-06, + "loss": 0.6563, + "step": 28030 + }, + { + "epoch": 0.8591087409586858, + "grad_norm": 1.775654874479279, + "learning_rate": 1.023174740816676e-06, + "loss": 0.6035, + "step": 28031 + }, + { + "epoch": 0.859139389481427, + "grad_norm": 1.6066483781513485, + "learning_rate": 1.0227373872726954e-06, + "loss": 0.5406, + "step": 28032 + }, + { + "epoch": 0.8591700380041682, + "grad_norm": 1.702500094789987, + "learning_rate": 1.022300122183979e-06, + "loss": 0.6192, + "step": 28033 + }, + { + "epoch": 0.8592006865269094, + "grad_norm": 0.6453848554332888, + "learning_rate": 1.0218629455548367e-06, + "loss": 0.504, + "step": 28034 + }, + { + "epoch": 0.8592313350496507, + "grad_norm": 0.6908739450574445, + "learning_rate": 1.0214258573895786e-06, + "loss": 0.5281, + "step": 28035 + }, + { + "epoch": 0.8592619835723918, + "grad_norm": 1.6909778641946467, + "learning_rate": 1.0209888576925064e-06, + "loss": 0.6393, + "step": 28036 + }, + { + "epoch": 0.8592926320951331, + "grad_norm": 1.7127493303161436, + "learning_rate": 1.020551946467928e-06, + "loss": 0.6638, + "step": 28037 + }, + { + "epoch": 0.8593232806178742, + "grad_norm": 1.7435962169010761, + "learning_rate": 1.0201151237201511e-06, + "loss": 0.6559, + "step": 28038 + }, + { + "epoch": 0.8593539291406154, + "grad_norm": 1.7500585654933956, + "learning_rate": 1.019678389453478e-06, + "loss": 0.5469, + "step": 28039 + }, + { + "epoch": 0.8593845776633566, + "grad_norm": 1.6468413638848431, + "learning_rate": 1.019241743672209e-06, + "loss": 0.623, + "step": 28040 + }, + { + "epoch": 0.8594152261860978, + "grad_norm": 1.6201730950232158, + "learning_rate": 1.0188051863806493e-06, + "loss": 0.6197, + "step": 28041 + }, + { + "epoch": 0.859445874708839, + "grad_norm": 1.6159355836774283, + "learning_rate": 1.0183687175831015e-06, + "loss": 0.581, + "step": 28042 + }, + { + "epoch": 0.8594765232315802, + "grad_norm": 1.5634587453618347, + "learning_rate": 1.0179323372838635e-06, + "loss": 0.5751, + "step": 28043 + }, + { + "epoch": 0.8595071717543215, + "grad_norm": 0.6880618828571203, + "learning_rate": 1.0174960454872351e-06, + "loss": 0.5538, + "step": 28044 + }, + { + "epoch": 0.8595378202770626, + "grad_norm": 1.6258033101488765, + "learning_rate": 1.0170598421975175e-06, + "loss": 0.65, + "step": 28045 + }, + { + "epoch": 0.8595684687998039, + "grad_norm": 1.6430669297874065, + "learning_rate": 1.0166237274190093e-06, + "loss": 0.6328, + "step": 28046 + }, + { + "epoch": 0.859599117322545, + "grad_norm": 0.6750766117347089, + "learning_rate": 1.0161877011560062e-06, + "loss": 0.5291, + "step": 28047 + }, + { + "epoch": 0.8596297658452863, + "grad_norm": 1.851452747543138, + "learning_rate": 1.0157517634128e-06, + "loss": 0.7263, + "step": 28048 + }, + { + "epoch": 0.8596604143680274, + "grad_norm": 1.6010803472407111, + "learning_rate": 1.0153159141936942e-06, + "loss": 0.672, + "step": 28049 + }, + { + "epoch": 0.8596910628907687, + "grad_norm": 1.7929130741196302, + "learning_rate": 1.0148801535029795e-06, + "loss": 0.6804, + "step": 28050 + }, + { + "epoch": 0.8597217114135098, + "grad_norm": 1.5979906921660358, + "learning_rate": 1.0144444813449483e-06, + "loss": 0.5707, + "step": 28051 + }, + { + "epoch": 0.8597523599362511, + "grad_norm": 1.7104131919691945, + "learning_rate": 1.0140088977238938e-06, + "loss": 0.5931, + "step": 28052 + }, + { + "epoch": 0.8597830084589922, + "grad_norm": 1.9730328530655847, + "learning_rate": 1.0135734026441101e-06, + "loss": 0.7123, + "step": 28053 + }, + { + "epoch": 0.8598136569817335, + "grad_norm": 1.957799187551128, + "learning_rate": 1.0131379961098876e-06, + "loss": 0.7504, + "step": 28054 + }, + { + "epoch": 0.8598443055044747, + "grad_norm": 1.753185951939159, + "learning_rate": 1.0127026781255144e-06, + "loss": 0.5539, + "step": 28055 + }, + { + "epoch": 0.8598749540272159, + "grad_norm": 1.7421873148436067, + "learning_rate": 1.012267448695281e-06, + "loss": 0.5967, + "step": 28056 + }, + { + "epoch": 0.8599056025499571, + "grad_norm": 1.716732754103509, + "learning_rate": 1.0118323078234782e-06, + "loss": 0.6507, + "step": 28057 + }, + { + "epoch": 0.8599362510726983, + "grad_norm": 1.5696546493930543, + "learning_rate": 1.0113972555143913e-06, + "loss": 0.5884, + "step": 28058 + }, + { + "epoch": 0.8599668995954395, + "grad_norm": 1.632519283500297, + "learning_rate": 1.010962291772304e-06, + "loss": 0.6518, + "step": 28059 + }, + { + "epoch": 0.8599975481181807, + "grad_norm": 1.612837498230178, + "learning_rate": 1.0105274166015078e-06, + "loss": 0.6176, + "step": 28060 + }, + { + "epoch": 0.8600281966409219, + "grad_norm": 1.798880302572338, + "learning_rate": 1.0100926300062829e-06, + "loss": 0.672, + "step": 28061 + }, + { + "epoch": 0.8600588451636632, + "grad_norm": 1.807496234098568, + "learning_rate": 1.0096579319909182e-06, + "loss": 0.5902, + "step": 28062 + }, + { + "epoch": 0.8600894936864043, + "grad_norm": 1.666183431507789, + "learning_rate": 1.0092233225596926e-06, + "loss": 0.5581, + "step": 28063 + }, + { + "epoch": 0.8601201422091456, + "grad_norm": 1.8446161592423527, + "learning_rate": 1.00878880171689e-06, + "loss": 0.6648, + "step": 28064 + }, + { + "epoch": 0.8601507907318867, + "grad_norm": 1.7322208084384851, + "learning_rate": 1.008354369466793e-06, + "loss": 0.7255, + "step": 28065 + }, + { + "epoch": 0.860181439254628, + "grad_norm": 1.9051354241087386, + "learning_rate": 1.007920025813679e-06, + "loss": 0.5629, + "step": 28066 + }, + { + "epoch": 0.8602120877773691, + "grad_norm": 1.7457721774349633, + "learning_rate": 1.0074857707618303e-06, + "loss": 0.562, + "step": 28067 + }, + { + "epoch": 0.8602427363001104, + "grad_norm": 1.7269361672097325, + "learning_rate": 1.0070516043155266e-06, + "loss": 0.6373, + "step": 28068 + }, + { + "epoch": 0.8602733848228515, + "grad_norm": 1.6903830238839903, + "learning_rate": 1.0066175264790446e-06, + "loss": 0.588, + "step": 28069 + }, + { + "epoch": 0.8603040333455927, + "grad_norm": 1.7315777991467964, + "learning_rate": 1.0061835372566574e-06, + "loss": 0.6741, + "step": 28070 + }, + { + "epoch": 0.860334681868334, + "grad_norm": 1.9546467582101288, + "learning_rate": 1.0057496366526486e-06, + "loss": 0.737, + "step": 28071 + }, + { + "epoch": 0.8603653303910751, + "grad_norm": 1.7778221540612198, + "learning_rate": 1.005315824671288e-06, + "loss": 0.683, + "step": 28072 + }, + { + "epoch": 0.8603959789138164, + "grad_norm": 1.7045115104911372, + "learning_rate": 1.0048821013168541e-06, + "loss": 0.5497, + "step": 28073 + }, + { + "epoch": 0.8604266274365575, + "grad_norm": 1.623509234069388, + "learning_rate": 1.0044484665936171e-06, + "loss": 0.6087, + "step": 28074 + }, + { + "epoch": 0.8604572759592988, + "grad_norm": 1.4672609827006955, + "learning_rate": 1.0040149205058501e-06, + "loss": 0.6156, + "step": 28075 + }, + { + "epoch": 0.8604879244820399, + "grad_norm": 1.6632027638243223, + "learning_rate": 1.0035814630578278e-06, + "loss": 0.6055, + "step": 28076 + }, + { + "epoch": 0.8605185730047812, + "grad_norm": 1.478253756182009, + "learning_rate": 1.0031480942538174e-06, + "loss": 0.5847, + "step": 28077 + }, + { + "epoch": 0.8605492215275223, + "grad_norm": 1.7560827094249647, + "learning_rate": 1.0027148140980903e-06, + "loss": 0.6517, + "step": 28078 + }, + { + "epoch": 0.8605798700502636, + "grad_norm": 1.7004441961705536, + "learning_rate": 1.0022816225949184e-06, + "loss": 0.7183, + "step": 28079 + }, + { + "epoch": 0.8606105185730047, + "grad_norm": 1.5612000647891058, + "learning_rate": 1.001848519748566e-06, + "loss": 0.6014, + "step": 28080 + }, + { + "epoch": 0.860641167095746, + "grad_norm": 1.7177003983831967, + "learning_rate": 1.0014155055633024e-06, + "loss": 0.6314, + "step": 28081 + }, + { + "epoch": 0.8606718156184872, + "grad_norm": 1.7770189855401952, + "learning_rate": 1.000982580043397e-06, + "loss": 0.705, + "step": 28082 + }, + { + "epoch": 0.8607024641412284, + "grad_norm": 1.721720720163391, + "learning_rate": 1.0005497431931099e-06, + "loss": 0.6974, + "step": 28083 + }, + { + "epoch": 0.8607331126639696, + "grad_norm": 1.9173786053114799, + "learning_rate": 1.0001169950167112e-06, + "loss": 0.7536, + "step": 28084 + }, + { + "epoch": 0.8607637611867108, + "grad_norm": 1.5330189979459736, + "learning_rate": 9.996843355184593e-07, + "loss": 0.6228, + "step": 28085 + }, + { + "epoch": 0.860794409709452, + "grad_norm": 1.9389431103474188, + "learning_rate": 9.992517647026213e-07, + "loss": 0.677, + "step": 28086 + }, + { + "epoch": 0.8608250582321932, + "grad_norm": 1.967329533369886, + "learning_rate": 9.988192825734611e-07, + "loss": 0.6513, + "step": 28087 + }, + { + "epoch": 0.8608557067549344, + "grad_norm": 1.6541020544074028, + "learning_rate": 9.983868891352343e-07, + "loss": 0.73, + "step": 28088 + }, + { + "epoch": 0.8608863552776757, + "grad_norm": 1.5951065711236865, + "learning_rate": 9.979545843922057e-07, + "loss": 0.6051, + "step": 28089 + }, + { + "epoch": 0.8609170038004168, + "grad_norm": 1.477734270492089, + "learning_rate": 9.975223683486356e-07, + "loss": 0.681, + "step": 28090 + }, + { + "epoch": 0.8609476523231581, + "grad_norm": 1.7099228573302938, + "learning_rate": 9.97090241008779e-07, + "loss": 0.5721, + "step": 28091 + }, + { + "epoch": 0.8609783008458992, + "grad_norm": 1.4494844722377591, + "learning_rate": 9.966582023768978e-07, + "loss": 0.6144, + "step": 28092 + }, + { + "epoch": 0.8610089493686405, + "grad_norm": 1.8941754323834656, + "learning_rate": 9.962262524572451e-07, + "loss": 0.7179, + "step": 28093 + }, + { + "epoch": 0.8610395978913816, + "grad_norm": 2.0159299867942364, + "learning_rate": 9.957943912540778e-07, + "loss": 0.6628, + "step": 28094 + }, + { + "epoch": 0.8610702464141229, + "grad_norm": 2.046931066293268, + "learning_rate": 9.953626187716559e-07, + "loss": 0.6805, + "step": 28095 + }, + { + "epoch": 0.861100894936864, + "grad_norm": 1.5637324111652025, + "learning_rate": 9.949309350142266e-07, + "loss": 0.4835, + "step": 28096 + }, + { + "epoch": 0.8611315434596053, + "grad_norm": 1.67427243451387, + "learning_rate": 9.94499339986047e-07, + "loss": 0.625, + "step": 28097 + }, + { + "epoch": 0.8611621919823464, + "grad_norm": 1.650816570701026, + "learning_rate": 9.940678336913723e-07, + "loss": 0.5774, + "step": 28098 + }, + { + "epoch": 0.8611928405050877, + "grad_norm": 0.7026167705727304, + "learning_rate": 9.936364161344492e-07, + "loss": 0.5278, + "step": 28099 + }, + { + "epoch": 0.8612234890278289, + "grad_norm": 0.678554643342103, + "learning_rate": 9.93205087319531e-07, + "loss": 0.5248, + "step": 28100 + }, + { + "epoch": 0.86125413755057, + "grad_norm": 1.991292307869919, + "learning_rate": 9.927738472508687e-07, + "loss": 0.6881, + "step": 28101 + }, + { + "epoch": 0.8612847860733113, + "grad_norm": 1.6078256821914687, + "learning_rate": 9.923426959327099e-07, + "loss": 0.6471, + "step": 28102 + }, + { + "epoch": 0.8613154345960524, + "grad_norm": 2.1819929502632256, + "learning_rate": 9.919116333693035e-07, + "loss": 0.6789, + "step": 28103 + }, + { + "epoch": 0.8613460831187937, + "grad_norm": 1.6266521927982156, + "learning_rate": 9.914806595648952e-07, + "loss": 0.572, + "step": 28104 + }, + { + "epoch": 0.8613767316415348, + "grad_norm": 1.6094189669379007, + "learning_rate": 9.910497745237334e-07, + "loss": 0.586, + "step": 28105 + }, + { + "epoch": 0.8614073801642761, + "grad_norm": 1.6346752770969502, + "learning_rate": 9.906189782500652e-07, + "loss": 0.5993, + "step": 28106 + }, + { + "epoch": 0.8614380286870172, + "grad_norm": 0.6921024528574038, + "learning_rate": 9.901882707481303e-07, + "loss": 0.544, + "step": 28107 + }, + { + "epoch": 0.8614686772097585, + "grad_norm": 1.9099847792015368, + "learning_rate": 9.897576520221763e-07, + "loss": 0.6164, + "step": 28108 + }, + { + "epoch": 0.8614993257324997, + "grad_norm": 0.6692035168425016, + "learning_rate": 9.893271220764478e-07, + "loss": 0.5144, + "step": 28109 + }, + { + "epoch": 0.8615299742552409, + "grad_norm": 0.6580691277195173, + "learning_rate": 9.888966809151822e-07, + "loss": 0.5256, + "step": 28110 + }, + { + "epoch": 0.8615606227779821, + "grad_norm": 1.7764356828833974, + "learning_rate": 9.884663285426233e-07, + "loss": 0.6853, + "step": 28111 + }, + { + "epoch": 0.8615912713007233, + "grad_norm": 1.6448548966809315, + "learning_rate": 9.880360649630138e-07, + "loss": 0.5991, + "step": 28112 + }, + { + "epoch": 0.8616219198234645, + "grad_norm": 1.6383373447702594, + "learning_rate": 9.876058901805885e-07, + "loss": 0.7268, + "step": 28113 + }, + { + "epoch": 0.8616525683462057, + "grad_norm": 0.6795848835617592, + "learning_rate": 9.871758041995906e-07, + "loss": 0.5171, + "step": 28114 + }, + { + "epoch": 0.8616832168689469, + "grad_norm": 1.9875175825105726, + "learning_rate": 9.867458070242531e-07, + "loss": 0.6462, + "step": 28115 + }, + { + "epoch": 0.8617138653916881, + "grad_norm": 0.6696147223584955, + "learning_rate": 9.86315898658815e-07, + "loss": 0.5104, + "step": 28116 + }, + { + "epoch": 0.8617445139144293, + "grad_norm": 1.6394736754225505, + "learning_rate": 9.858860791075153e-07, + "loss": 0.6119, + "step": 28117 + }, + { + "epoch": 0.8617751624371706, + "grad_norm": 1.704717710937973, + "learning_rate": 9.854563483745838e-07, + "loss": 0.6928, + "step": 28118 + }, + { + "epoch": 0.8618058109599117, + "grad_norm": 1.643201446087864, + "learning_rate": 9.85026706464257e-07, + "loss": 0.6041, + "step": 28119 + }, + { + "epoch": 0.861836459482653, + "grad_norm": 1.7702309179603022, + "learning_rate": 9.845971533807718e-07, + "loss": 0.5947, + "step": 28120 + }, + { + "epoch": 0.8618671080053941, + "grad_norm": 1.7368526040205883, + "learning_rate": 9.841676891283548e-07, + "loss": 0.6508, + "step": 28121 + }, + { + "epoch": 0.8618977565281354, + "grad_norm": 1.7446478723001455, + "learning_rate": 9.83738313711241e-07, + "loss": 0.6969, + "step": 28122 + }, + { + "epoch": 0.8619284050508765, + "grad_norm": 1.6724943118845843, + "learning_rate": 9.83309027133662e-07, + "loss": 0.6121, + "step": 28123 + }, + { + "epoch": 0.8619590535736178, + "grad_norm": 2.0379252718909906, + "learning_rate": 9.828798293998444e-07, + "loss": 0.7105, + "step": 28124 + }, + { + "epoch": 0.861989702096359, + "grad_norm": 1.6929186199568322, + "learning_rate": 9.82450720514021e-07, + "loss": 0.6362, + "step": 28125 + }, + { + "epoch": 0.8620203506191002, + "grad_norm": 1.7863536746153998, + "learning_rate": 9.820217004804134e-07, + "loss": 0.6438, + "step": 28126 + }, + { + "epoch": 0.8620509991418414, + "grad_norm": 1.7375849442606701, + "learning_rate": 9.815927693032579e-07, + "loss": 0.7192, + "step": 28127 + }, + { + "epoch": 0.8620816476645826, + "grad_norm": 1.7105298552146475, + "learning_rate": 9.811639269867756e-07, + "loss": 0.5694, + "step": 28128 + }, + { + "epoch": 0.8621122961873238, + "grad_norm": 1.7702314808932944, + "learning_rate": 9.807351735351912e-07, + "loss": 0.6451, + "step": 28129 + }, + { + "epoch": 0.862142944710065, + "grad_norm": 1.8990186459251979, + "learning_rate": 9.803065089527309e-07, + "loss": 0.6285, + "step": 28130 + }, + { + "epoch": 0.8621735932328062, + "grad_norm": 1.554986896806274, + "learning_rate": 9.798779332436203e-07, + "loss": 0.5636, + "step": 28131 + }, + { + "epoch": 0.8622042417555473, + "grad_norm": 1.5569068004860083, + "learning_rate": 9.794494464120785e-07, + "loss": 0.6465, + "step": 28132 + }, + { + "epoch": 0.8622348902782886, + "grad_norm": 1.7133105628147778, + "learning_rate": 9.790210484623286e-07, + "loss": 0.605, + "step": 28133 + }, + { + "epoch": 0.8622655388010297, + "grad_norm": 1.6112875988543454, + "learning_rate": 9.785927393985928e-07, + "loss": 0.6833, + "step": 28134 + }, + { + "epoch": 0.862296187323771, + "grad_norm": 1.9041139716475024, + "learning_rate": 9.781645192250932e-07, + "loss": 0.6406, + "step": 28135 + }, + { + "epoch": 0.8623268358465122, + "grad_norm": 2.153610042801965, + "learning_rate": 9.777363879460466e-07, + "loss": 0.6759, + "step": 28136 + }, + { + "epoch": 0.8623574843692534, + "grad_norm": 2.0878116882631814, + "learning_rate": 9.773083455656696e-07, + "loss": 0.5709, + "step": 28137 + }, + { + "epoch": 0.8623881328919946, + "grad_norm": 1.6329890620589196, + "learning_rate": 9.76880392088183e-07, + "loss": 0.6415, + "step": 28138 + }, + { + "epoch": 0.8624187814147358, + "grad_norm": 1.813951552142964, + "learning_rate": 9.764525275178039e-07, + "loss": 0.6882, + "step": 28139 + }, + { + "epoch": 0.862449429937477, + "grad_norm": 1.7293615465914756, + "learning_rate": 9.76024751858745e-07, + "loss": 0.6221, + "step": 28140 + }, + { + "epoch": 0.8624800784602182, + "grad_norm": 0.6810664926050881, + "learning_rate": 9.755970651152224e-07, + "loss": 0.5391, + "step": 28141 + }, + { + "epoch": 0.8625107269829594, + "grad_norm": 1.794674708005084, + "learning_rate": 9.751694672914535e-07, + "loss": 0.6063, + "step": 28142 + }, + { + "epoch": 0.8625413755057006, + "grad_norm": 1.7641450626890132, + "learning_rate": 9.747419583916474e-07, + "loss": 0.6046, + "step": 28143 + }, + { + "epoch": 0.8625720240284418, + "grad_norm": 1.7966752090181068, + "learning_rate": 9.743145384200192e-07, + "loss": 0.6223, + "step": 28144 + }, + { + "epoch": 0.8626026725511831, + "grad_norm": 1.8767452036152104, + "learning_rate": 9.73887207380776e-07, + "loss": 0.6021, + "step": 28145 + }, + { + "epoch": 0.8626333210739242, + "grad_norm": 1.683788694440033, + "learning_rate": 9.734599652781351e-07, + "loss": 0.6221, + "step": 28146 + }, + { + "epoch": 0.8626639695966655, + "grad_norm": 1.7787894712960084, + "learning_rate": 9.730328121163023e-07, + "loss": 0.6476, + "step": 28147 + }, + { + "epoch": 0.8626946181194066, + "grad_norm": 0.6505282808074841, + "learning_rate": 9.726057478994855e-07, + "loss": 0.4948, + "step": 28148 + }, + { + "epoch": 0.8627252666421479, + "grad_norm": 1.7097590590252434, + "learning_rate": 9.721787726318943e-07, + "loss": 0.6664, + "step": 28149 + }, + { + "epoch": 0.862755915164889, + "grad_norm": 1.898235248678247, + "learning_rate": 9.717518863177366e-07, + "loss": 0.6475, + "step": 28150 + }, + { + "epoch": 0.8627865636876303, + "grad_norm": 1.786359853981554, + "learning_rate": 9.713250889612158e-07, + "loss": 0.6691, + "step": 28151 + }, + { + "epoch": 0.8628172122103714, + "grad_norm": 0.6864275188675839, + "learning_rate": 9.708983805665394e-07, + "loss": 0.5581, + "step": 28152 + }, + { + "epoch": 0.8628478607331127, + "grad_norm": 1.8176799405219135, + "learning_rate": 9.704717611379112e-07, + "loss": 0.7314, + "step": 28153 + }, + { + "epoch": 0.8628785092558539, + "grad_norm": 1.572535069323199, + "learning_rate": 9.700452306795373e-07, + "loss": 0.6237, + "step": 28154 + }, + { + "epoch": 0.8629091577785951, + "grad_norm": 0.6702730821309179, + "learning_rate": 9.696187891956177e-07, + "loss": 0.4947, + "step": 28155 + }, + { + "epoch": 0.8629398063013363, + "grad_norm": 0.6606522382304911, + "learning_rate": 9.691924366903505e-07, + "loss": 0.5293, + "step": 28156 + }, + { + "epoch": 0.8629704548240775, + "grad_norm": 1.6970519722623685, + "learning_rate": 9.687661731679454e-07, + "loss": 0.6292, + "step": 28157 + }, + { + "epoch": 0.8630011033468187, + "grad_norm": 1.848468641689797, + "learning_rate": 9.68339998632597e-07, + "loss": 0.6004, + "step": 28158 + }, + { + "epoch": 0.8630317518695599, + "grad_norm": 1.4710092255219063, + "learning_rate": 9.67913913088505e-07, + "loss": 0.6136, + "step": 28159 + }, + { + "epoch": 0.8630624003923011, + "grad_norm": 2.1481413727372733, + "learning_rate": 9.674879165398665e-07, + "loss": 0.5845, + "step": 28160 + }, + { + "epoch": 0.8630930489150423, + "grad_norm": 1.5908052425925594, + "learning_rate": 9.670620089908823e-07, + "loss": 0.7317, + "step": 28161 + }, + { + "epoch": 0.8631236974377835, + "grad_norm": 2.3846307957069155, + "learning_rate": 9.666361904457477e-07, + "loss": 0.6474, + "step": 28162 + }, + { + "epoch": 0.8631543459605246, + "grad_norm": 1.7956674355453215, + "learning_rate": 9.662104609086576e-07, + "loss": 0.6421, + "step": 28163 + }, + { + "epoch": 0.8631849944832659, + "grad_norm": 1.8293596681972932, + "learning_rate": 9.65784820383806e-07, + "loss": 0.5944, + "step": 28164 + }, + { + "epoch": 0.8632156430060071, + "grad_norm": 1.8840927534350278, + "learning_rate": 9.65359268875391e-07, + "loss": 0.7122, + "step": 28165 + }, + { + "epoch": 0.8632462915287483, + "grad_norm": 0.6621735519495554, + "learning_rate": 9.649338063876013e-07, + "loss": 0.4999, + "step": 28166 + }, + { + "epoch": 0.8632769400514895, + "grad_norm": 0.6472115896044202, + "learning_rate": 9.645084329246279e-07, + "loss": 0.5159, + "step": 28167 + }, + { + "epoch": 0.8633075885742307, + "grad_norm": 1.6877215813581392, + "learning_rate": 9.640831484906687e-07, + "loss": 0.6419, + "step": 28168 + }, + { + "epoch": 0.8633382370969719, + "grad_norm": 1.4497584876534635, + "learning_rate": 9.636579530899092e-07, + "loss": 0.5205, + "step": 28169 + }, + { + "epoch": 0.8633688856197131, + "grad_norm": 1.7174982240482293, + "learning_rate": 9.632328467265384e-07, + "loss": 0.6579, + "step": 28170 + }, + { + "epoch": 0.8633995341424543, + "grad_norm": 0.6268215674409414, + "learning_rate": 9.628078294047471e-07, + "loss": 0.501, + "step": 28171 + }, + { + "epoch": 0.8634301826651956, + "grad_norm": 0.6663711011553225, + "learning_rate": 9.623829011287223e-07, + "loss": 0.5427, + "step": 28172 + }, + { + "epoch": 0.8634608311879367, + "grad_norm": 1.5550687006580195, + "learning_rate": 9.619580619026526e-07, + "loss": 0.5618, + "step": 28173 + }, + { + "epoch": 0.863491479710678, + "grad_norm": 1.8572526976353143, + "learning_rate": 9.615333117307201e-07, + "loss": 0.705, + "step": 28174 + }, + { + "epoch": 0.8635221282334191, + "grad_norm": 1.8659745391169291, + "learning_rate": 9.611086506171141e-07, + "loss": 0.6486, + "step": 28175 + }, + { + "epoch": 0.8635527767561604, + "grad_norm": 1.8077537886846176, + "learning_rate": 9.606840785660177e-07, + "loss": 0.641, + "step": 28176 + }, + { + "epoch": 0.8635834252789015, + "grad_norm": 1.8254992564577623, + "learning_rate": 9.602595955816152e-07, + "loss": 0.6098, + "step": 28177 + }, + { + "epoch": 0.8636140738016428, + "grad_norm": 1.7831628222204738, + "learning_rate": 9.598352016680835e-07, + "loss": 0.6729, + "step": 28178 + }, + { + "epoch": 0.8636447223243839, + "grad_norm": 1.5484229877727393, + "learning_rate": 9.594108968296122e-07, + "loss": 0.6121, + "step": 28179 + }, + { + "epoch": 0.8636753708471252, + "grad_norm": 1.6300907303070238, + "learning_rate": 9.589866810703763e-07, + "loss": 0.613, + "step": 28180 + }, + { + "epoch": 0.8637060193698664, + "grad_norm": 1.7857274176899198, + "learning_rate": 9.585625543945597e-07, + "loss": 0.686, + "step": 28181 + }, + { + "epoch": 0.8637366678926076, + "grad_norm": 1.72531746191709, + "learning_rate": 9.581385168063385e-07, + "loss": 0.683, + "step": 28182 + }, + { + "epoch": 0.8637673164153488, + "grad_norm": 1.6623086511063936, + "learning_rate": 9.577145683098922e-07, + "loss": 0.6737, + "step": 28183 + }, + { + "epoch": 0.86379796493809, + "grad_norm": 1.6132304651884801, + "learning_rate": 9.572907089093986e-07, + "loss": 0.6951, + "step": 28184 + }, + { + "epoch": 0.8638286134608312, + "grad_norm": 1.7310676333119825, + "learning_rate": 9.568669386090313e-07, + "loss": 0.6423, + "step": 28185 + }, + { + "epoch": 0.8638592619835724, + "grad_norm": 0.6927346890525721, + "learning_rate": 9.56443257412969e-07, + "loss": 0.5297, + "step": 28186 + }, + { + "epoch": 0.8638899105063136, + "grad_norm": 2.0143018091557376, + "learning_rate": 9.560196653253861e-07, + "loss": 0.7295, + "step": 28187 + }, + { + "epoch": 0.8639205590290548, + "grad_norm": 1.6545190071688731, + "learning_rate": 9.55596162350454e-07, + "loss": 0.5714, + "step": 28188 + }, + { + "epoch": 0.863951207551796, + "grad_norm": 1.68827030786523, + "learning_rate": 9.55172748492349e-07, + "loss": 0.6528, + "step": 28189 + }, + { + "epoch": 0.8639818560745373, + "grad_norm": 1.7299955633037967, + "learning_rate": 9.547494237552391e-07, + "loss": 0.6167, + "step": 28190 + }, + { + "epoch": 0.8640125045972784, + "grad_norm": 1.5583104508378902, + "learning_rate": 9.543261881432975e-07, + "loss": 0.6316, + "step": 28191 + }, + { + "epoch": 0.8640431531200197, + "grad_norm": 1.7352911901551713, + "learning_rate": 9.539030416606965e-07, + "loss": 0.6518, + "step": 28192 + }, + { + "epoch": 0.8640738016427608, + "grad_norm": 1.8096883847247272, + "learning_rate": 9.534799843116005e-07, + "loss": 0.6495, + "step": 28193 + }, + { + "epoch": 0.864104450165502, + "grad_norm": 1.7714613125826755, + "learning_rate": 9.530570161001817e-07, + "loss": 0.6684, + "step": 28194 + }, + { + "epoch": 0.8641350986882432, + "grad_norm": 1.585915812288795, + "learning_rate": 9.526341370306092e-07, + "loss": 0.6347, + "step": 28195 + }, + { + "epoch": 0.8641657472109844, + "grad_norm": 1.8492352074280625, + "learning_rate": 9.522113471070471e-07, + "loss": 0.5574, + "step": 28196 + }, + { + "epoch": 0.8641963957337256, + "grad_norm": 1.8579441699994754, + "learning_rate": 9.517886463336568e-07, + "loss": 0.766, + "step": 28197 + }, + { + "epoch": 0.8642270442564668, + "grad_norm": 1.6623396801997339, + "learning_rate": 9.513660347146125e-07, + "loss": 0.6107, + "step": 28198 + }, + { + "epoch": 0.864257692779208, + "grad_norm": 1.6663552706715592, + "learning_rate": 9.509435122540722e-07, + "loss": 0.5957, + "step": 28199 + }, + { + "epoch": 0.8642883413019492, + "grad_norm": 1.8337087347233985, + "learning_rate": 9.505210789562025e-07, + "loss": 0.6591, + "step": 28200 + }, + { + "epoch": 0.8643189898246905, + "grad_norm": 0.6607248097402827, + "learning_rate": 9.500987348251622e-07, + "loss": 0.5174, + "step": 28201 + }, + { + "epoch": 0.8643496383474316, + "grad_norm": 1.866343087415986, + "learning_rate": 9.496764798651148e-07, + "loss": 0.5767, + "step": 28202 + }, + { + "epoch": 0.8643802868701729, + "grad_norm": 1.5412348636149114, + "learning_rate": 9.492543140802224e-07, + "loss": 0.6374, + "step": 28203 + }, + { + "epoch": 0.864410935392914, + "grad_norm": 1.92785299417265, + "learning_rate": 9.488322374746406e-07, + "loss": 0.6691, + "step": 28204 + }, + { + "epoch": 0.8644415839156553, + "grad_norm": 1.7537500907817487, + "learning_rate": 9.484102500525316e-07, + "loss": 0.6324, + "step": 28205 + }, + { + "epoch": 0.8644722324383964, + "grad_norm": 0.6636675184055808, + "learning_rate": 9.479883518180533e-07, + "loss": 0.5, + "step": 28206 + }, + { + "epoch": 0.8645028809611377, + "grad_norm": 1.780515337254273, + "learning_rate": 9.47566542775361e-07, + "loss": 0.6176, + "step": 28207 + }, + { + "epoch": 0.8645335294838788, + "grad_norm": 1.5143293471745614, + "learning_rate": 9.471448229286107e-07, + "loss": 0.482, + "step": 28208 + }, + { + "epoch": 0.8645641780066201, + "grad_norm": 0.6635574511761927, + "learning_rate": 9.467231922819609e-07, + "loss": 0.5236, + "step": 28209 + }, + { + "epoch": 0.8645948265293613, + "grad_norm": 1.7503320431378473, + "learning_rate": 9.463016508395617e-07, + "loss": 0.5982, + "step": 28210 + }, + { + "epoch": 0.8646254750521025, + "grad_norm": 1.8398906583508639, + "learning_rate": 9.45880198605571e-07, + "loss": 0.5599, + "step": 28211 + }, + { + "epoch": 0.8646561235748437, + "grad_norm": 1.7630634889476777, + "learning_rate": 9.454588355841377e-07, + "loss": 0.6425, + "step": 28212 + }, + { + "epoch": 0.8646867720975849, + "grad_norm": 1.740300467368216, + "learning_rate": 9.45037561779415e-07, + "loss": 0.672, + "step": 28213 + }, + { + "epoch": 0.8647174206203261, + "grad_norm": 1.8419389249314733, + "learning_rate": 9.446163771955552e-07, + "loss": 0.6887, + "step": 28214 + }, + { + "epoch": 0.8647480691430673, + "grad_norm": 1.552599035680065, + "learning_rate": 9.441952818367062e-07, + "loss": 0.6126, + "step": 28215 + }, + { + "epoch": 0.8647787176658085, + "grad_norm": 1.7065744119986785, + "learning_rate": 9.437742757070178e-07, + "loss": 0.6858, + "step": 28216 + }, + { + "epoch": 0.8648093661885498, + "grad_norm": 0.6859742174183676, + "learning_rate": 9.433533588106402e-07, + "loss": 0.5326, + "step": 28217 + }, + { + "epoch": 0.8648400147112909, + "grad_norm": 1.8030197389156692, + "learning_rate": 9.429325311517179e-07, + "loss": 0.7036, + "step": 28218 + }, + { + "epoch": 0.8648706632340322, + "grad_norm": 1.7767105126621066, + "learning_rate": 9.425117927343985e-07, + "loss": 0.7432, + "step": 28219 + }, + { + "epoch": 0.8649013117567733, + "grad_norm": 1.8536973721576746, + "learning_rate": 9.420911435628299e-07, + "loss": 0.6626, + "step": 28220 + }, + { + "epoch": 0.8649319602795146, + "grad_norm": 1.7139658145058685, + "learning_rate": 9.416705836411522e-07, + "loss": 0.5856, + "step": 28221 + }, + { + "epoch": 0.8649626088022557, + "grad_norm": 1.8796855746723005, + "learning_rate": 9.412501129735152e-07, + "loss": 0.6816, + "step": 28222 + }, + { + "epoch": 0.864993257324997, + "grad_norm": 1.9718110863243243, + "learning_rate": 9.408297315640558e-07, + "loss": 0.6523, + "step": 28223 + }, + { + "epoch": 0.8650239058477381, + "grad_norm": 1.7037193107723612, + "learning_rate": 9.404094394169183e-07, + "loss": 0.5939, + "step": 28224 + }, + { + "epoch": 0.8650545543704793, + "grad_norm": 1.5062329896142088, + "learning_rate": 9.399892365362473e-07, + "loss": 0.6362, + "step": 28225 + }, + { + "epoch": 0.8650852028932206, + "grad_norm": 1.6189900364995897, + "learning_rate": 9.395691229261783e-07, + "loss": 0.6333, + "step": 28226 + }, + { + "epoch": 0.8651158514159617, + "grad_norm": 1.5333126028862838, + "learning_rate": 9.391490985908536e-07, + "loss": 0.5927, + "step": 28227 + }, + { + "epoch": 0.865146499938703, + "grad_norm": 1.6791582457274623, + "learning_rate": 9.387291635344121e-07, + "loss": 0.5777, + "step": 28228 + }, + { + "epoch": 0.8651771484614441, + "grad_norm": 1.7476455401257347, + "learning_rate": 9.383093177609892e-07, + "loss": 0.7157, + "step": 28229 + }, + { + "epoch": 0.8652077969841854, + "grad_norm": 1.8027951833242104, + "learning_rate": 9.378895612747229e-07, + "loss": 0.6659, + "step": 28230 + }, + { + "epoch": 0.8652384455069265, + "grad_norm": 1.543399514871212, + "learning_rate": 9.374698940797511e-07, + "loss": 0.5709, + "step": 28231 + }, + { + "epoch": 0.8652690940296678, + "grad_norm": 1.7839619594747527, + "learning_rate": 9.370503161802058e-07, + "loss": 0.5585, + "step": 28232 + }, + { + "epoch": 0.8652997425524089, + "grad_norm": 1.606806989064549, + "learning_rate": 9.36630827580225e-07, + "loss": 0.6597, + "step": 28233 + }, + { + "epoch": 0.8653303910751502, + "grad_norm": 1.6568505676088963, + "learning_rate": 9.362114282839363e-07, + "loss": 0.6425, + "step": 28234 + }, + { + "epoch": 0.8653610395978913, + "grad_norm": 1.7759222842702127, + "learning_rate": 9.357921182954765e-07, + "loss": 0.6525, + "step": 28235 + }, + { + "epoch": 0.8653916881206326, + "grad_norm": 1.784352906062938, + "learning_rate": 9.35372897618978e-07, + "loss": 0.6407, + "step": 28236 + }, + { + "epoch": 0.8654223366433738, + "grad_norm": 1.7344458411131598, + "learning_rate": 9.349537662585672e-07, + "loss": 0.6708, + "step": 28237 + }, + { + "epoch": 0.865452985166115, + "grad_norm": 1.5934227294560654, + "learning_rate": 9.345347242183766e-07, + "loss": 0.6354, + "step": 28238 + }, + { + "epoch": 0.8654836336888562, + "grad_norm": 1.7264631137101587, + "learning_rate": 9.341157715025362e-07, + "loss": 0.7236, + "step": 28239 + }, + { + "epoch": 0.8655142822115974, + "grad_norm": 1.7443740800326017, + "learning_rate": 9.336969081151715e-07, + "loss": 0.6936, + "step": 28240 + }, + { + "epoch": 0.8655449307343386, + "grad_norm": 1.892819990027967, + "learning_rate": 9.332781340604124e-07, + "loss": 0.577, + "step": 28241 + }, + { + "epoch": 0.8655755792570798, + "grad_norm": 1.6749478012187797, + "learning_rate": 9.328594493423804e-07, + "loss": 0.6074, + "step": 28242 + }, + { + "epoch": 0.865606227779821, + "grad_norm": 1.8701949434674032, + "learning_rate": 9.324408539652074e-07, + "loss": 0.7106, + "step": 28243 + }, + { + "epoch": 0.8656368763025623, + "grad_norm": 0.7044830319213893, + "learning_rate": 9.320223479330148e-07, + "loss": 0.5242, + "step": 28244 + }, + { + "epoch": 0.8656675248253034, + "grad_norm": 1.6204960236653205, + "learning_rate": 9.316039312499248e-07, + "loss": 0.63, + "step": 28245 + }, + { + "epoch": 0.8656981733480447, + "grad_norm": 1.9059276819829714, + "learning_rate": 9.311856039200617e-07, + "loss": 0.6313, + "step": 28246 + }, + { + "epoch": 0.8657288218707858, + "grad_norm": 1.8612237901846447, + "learning_rate": 9.307673659475481e-07, + "loss": 0.6446, + "step": 28247 + }, + { + "epoch": 0.8657594703935271, + "grad_norm": 1.5419355885431876, + "learning_rate": 9.303492173365025e-07, + "loss": 0.6131, + "step": 28248 + }, + { + "epoch": 0.8657901189162682, + "grad_norm": 1.660058939534501, + "learning_rate": 9.299311580910464e-07, + "loss": 0.7145, + "step": 28249 + }, + { + "epoch": 0.8658207674390095, + "grad_norm": 1.7938203423063017, + "learning_rate": 9.295131882153019e-07, + "loss": 0.6424, + "step": 28250 + }, + { + "epoch": 0.8658514159617506, + "grad_norm": 1.6225992680692387, + "learning_rate": 9.290953077133824e-07, + "loss": 0.613, + "step": 28251 + }, + { + "epoch": 0.8658820644844919, + "grad_norm": 0.6773924688619752, + "learning_rate": 9.286775165894102e-07, + "loss": 0.5469, + "step": 28252 + }, + { + "epoch": 0.865912713007233, + "grad_norm": 1.8224500795845724, + "learning_rate": 9.282598148474953e-07, + "loss": 0.6444, + "step": 28253 + }, + { + "epoch": 0.8659433615299743, + "grad_norm": 1.6556264996155752, + "learning_rate": 9.278422024917611e-07, + "loss": 0.5881, + "step": 28254 + }, + { + "epoch": 0.8659740100527155, + "grad_norm": 1.5605746130617473, + "learning_rate": 9.274246795263187e-07, + "loss": 0.6972, + "step": 28255 + }, + { + "epoch": 0.8660046585754566, + "grad_norm": 1.7095214575153626, + "learning_rate": 9.270072459552804e-07, + "loss": 0.6101, + "step": 28256 + }, + { + "epoch": 0.8660353070981979, + "grad_norm": 1.761491415030424, + "learning_rate": 9.265899017827617e-07, + "loss": 0.6311, + "step": 28257 + }, + { + "epoch": 0.866065955620939, + "grad_norm": 1.8355664265736402, + "learning_rate": 9.261726470128751e-07, + "loss": 0.699, + "step": 28258 + }, + { + "epoch": 0.8660966041436803, + "grad_norm": 1.879182525315117, + "learning_rate": 9.257554816497305e-07, + "loss": 0.5964, + "step": 28259 + }, + { + "epoch": 0.8661272526664214, + "grad_norm": 1.4908875723581907, + "learning_rate": 9.25338405697438e-07, + "loss": 0.5827, + "step": 28260 + }, + { + "epoch": 0.8661579011891627, + "grad_norm": 1.7261008578519654, + "learning_rate": 9.249214191601086e-07, + "loss": 0.7091, + "step": 28261 + }, + { + "epoch": 0.8661885497119038, + "grad_norm": 1.5907382212179595, + "learning_rate": 9.245045220418514e-07, + "loss": 0.6037, + "step": 28262 + }, + { + "epoch": 0.8662191982346451, + "grad_norm": 0.6569314686783119, + "learning_rate": 9.240877143467741e-07, + "loss": 0.5274, + "step": 28263 + }, + { + "epoch": 0.8662498467573863, + "grad_norm": 1.75870387988221, + "learning_rate": 9.236709960789781e-07, + "loss": 0.5516, + "step": 28264 + }, + { + "epoch": 0.8662804952801275, + "grad_norm": 1.69496869050591, + "learning_rate": 9.232543672425787e-07, + "loss": 0.6294, + "step": 28265 + }, + { + "epoch": 0.8663111438028687, + "grad_norm": 1.880599964197845, + "learning_rate": 9.228378278416761e-07, + "loss": 0.7345, + "step": 28266 + }, + { + "epoch": 0.8663417923256099, + "grad_norm": 1.7803670599911563, + "learning_rate": 9.224213778803726e-07, + "loss": 0.6573, + "step": 28267 + }, + { + "epoch": 0.8663724408483511, + "grad_norm": 1.8593409787672897, + "learning_rate": 9.220050173627748e-07, + "loss": 0.5987, + "step": 28268 + }, + { + "epoch": 0.8664030893710923, + "grad_norm": 1.5515691585927749, + "learning_rate": 9.215887462929851e-07, + "loss": 0.5512, + "step": 28269 + }, + { + "epoch": 0.8664337378938335, + "grad_norm": 1.7879178628144878, + "learning_rate": 9.211725646751024e-07, + "loss": 0.645, + "step": 28270 + }, + { + "epoch": 0.8664643864165747, + "grad_norm": 1.66670938613085, + "learning_rate": 9.207564725132301e-07, + "loss": 0.5794, + "step": 28271 + }, + { + "epoch": 0.8664950349393159, + "grad_norm": 1.6100632091897604, + "learning_rate": 9.20340469811467e-07, + "loss": 0.707, + "step": 28272 + }, + { + "epoch": 0.8665256834620572, + "grad_norm": 1.9501529013072603, + "learning_rate": 9.199245565739146e-07, + "loss": 0.7143, + "step": 28273 + }, + { + "epoch": 0.8665563319847983, + "grad_norm": 1.8146616511392009, + "learning_rate": 9.195087328046681e-07, + "loss": 0.5831, + "step": 28274 + }, + { + "epoch": 0.8665869805075396, + "grad_norm": 1.5795146346068483, + "learning_rate": 9.190929985078223e-07, + "loss": 0.6091, + "step": 28275 + }, + { + "epoch": 0.8666176290302807, + "grad_norm": 1.5866965227514407, + "learning_rate": 9.186773536874804e-07, + "loss": 0.5603, + "step": 28276 + }, + { + "epoch": 0.866648277553022, + "grad_norm": 0.6760295345091536, + "learning_rate": 9.182617983477338e-07, + "loss": 0.5292, + "step": 28277 + }, + { + "epoch": 0.8666789260757631, + "grad_norm": 1.6720039569250154, + "learning_rate": 9.178463324926746e-07, + "loss": 0.6572, + "step": 28278 + }, + { + "epoch": 0.8667095745985044, + "grad_norm": 1.9341960399787275, + "learning_rate": 9.174309561264006e-07, + "loss": 0.681, + "step": 28279 + }, + { + "epoch": 0.8667402231212455, + "grad_norm": 1.6268944567000245, + "learning_rate": 9.17015669253003e-07, + "loss": 0.6752, + "step": 28280 + }, + { + "epoch": 0.8667708716439868, + "grad_norm": 1.6312197832332942, + "learning_rate": 9.166004718765753e-07, + "loss": 0.5357, + "step": 28281 + }, + { + "epoch": 0.866801520166728, + "grad_norm": 1.655179567956467, + "learning_rate": 9.161853640012053e-07, + "loss": 0.6191, + "step": 28282 + }, + { + "epoch": 0.8668321686894692, + "grad_norm": 2.0093327459235724, + "learning_rate": 9.157703456309864e-07, + "loss": 0.7183, + "step": 28283 + }, + { + "epoch": 0.8668628172122104, + "grad_norm": 1.7158918735825512, + "learning_rate": 9.153554167700074e-07, + "loss": 0.7529, + "step": 28284 + }, + { + "epoch": 0.8668934657349516, + "grad_norm": 1.803334494215991, + "learning_rate": 9.149405774223563e-07, + "loss": 0.6629, + "step": 28285 + }, + { + "epoch": 0.8669241142576928, + "grad_norm": 0.6850229276924158, + "learning_rate": 9.145258275921176e-07, + "loss": 0.5491, + "step": 28286 + }, + { + "epoch": 0.8669547627804339, + "grad_norm": 1.6765157128033807, + "learning_rate": 9.141111672833814e-07, + "loss": 0.5666, + "step": 28287 + }, + { + "epoch": 0.8669854113031752, + "grad_norm": 1.7632067095853943, + "learning_rate": 9.13696596500232e-07, + "loss": 0.695, + "step": 28288 + }, + { + "epoch": 0.8670160598259163, + "grad_norm": 1.7483630335498856, + "learning_rate": 9.132821152467564e-07, + "loss": 0.6124, + "step": 28289 + }, + { + "epoch": 0.8670467083486576, + "grad_norm": 1.786122073620791, + "learning_rate": 9.128677235270355e-07, + "loss": 0.6693, + "step": 28290 + }, + { + "epoch": 0.8670773568713988, + "grad_norm": 1.7868141296165683, + "learning_rate": 9.124534213451552e-07, + "loss": 0.7366, + "step": 28291 + }, + { + "epoch": 0.86710800539414, + "grad_norm": 0.6512756626364357, + "learning_rate": 9.120392087051966e-07, + "loss": 0.5265, + "step": 28292 + }, + { + "epoch": 0.8671386539168812, + "grad_norm": 1.7334476200470967, + "learning_rate": 9.116250856112419e-07, + "loss": 0.7619, + "step": 28293 + }, + { + "epoch": 0.8671693024396224, + "grad_norm": 1.7644679818472242, + "learning_rate": 9.112110520673667e-07, + "loss": 0.672, + "step": 28294 + }, + { + "epoch": 0.8671999509623636, + "grad_norm": 1.8333080533517563, + "learning_rate": 9.107971080776579e-07, + "loss": 0.6061, + "step": 28295 + }, + { + "epoch": 0.8672305994851048, + "grad_norm": 1.737519677731782, + "learning_rate": 9.10383253646191e-07, + "loss": 0.7671, + "step": 28296 + }, + { + "epoch": 0.867261248007846, + "grad_norm": 1.7234486645134455, + "learning_rate": 9.099694887770416e-07, + "loss": 0.7292, + "step": 28297 + }, + { + "epoch": 0.8672918965305872, + "grad_norm": 1.6505328728895683, + "learning_rate": 9.095558134742887e-07, + "loss": 0.6994, + "step": 28298 + }, + { + "epoch": 0.8673225450533284, + "grad_norm": 1.7528225721586415, + "learning_rate": 9.091422277420092e-07, + "loss": 0.5587, + "step": 28299 + }, + { + "epoch": 0.8673531935760697, + "grad_norm": 0.6651501463022883, + "learning_rate": 9.087287315842774e-07, + "loss": 0.5092, + "step": 28300 + }, + { + "epoch": 0.8673838420988108, + "grad_norm": 1.8969485027844812, + "learning_rate": 9.083153250051669e-07, + "loss": 0.6931, + "step": 28301 + }, + { + "epoch": 0.8674144906215521, + "grad_norm": 1.6428791327618446, + "learning_rate": 9.07902008008752e-07, + "loss": 0.5787, + "step": 28302 + }, + { + "epoch": 0.8674451391442932, + "grad_norm": 1.956891940200214, + "learning_rate": 9.074887805991061e-07, + "loss": 0.7918, + "step": 28303 + }, + { + "epoch": 0.8674757876670345, + "grad_norm": 0.6493656173299999, + "learning_rate": 9.070756427802996e-07, + "loss": 0.4956, + "step": 28304 + }, + { + "epoch": 0.8675064361897756, + "grad_norm": 1.885124928835196, + "learning_rate": 9.06662594556399e-07, + "loss": 0.6257, + "step": 28305 + }, + { + "epoch": 0.8675370847125169, + "grad_norm": 0.6380315833824772, + "learning_rate": 9.062496359314831e-07, + "loss": 0.4992, + "step": 28306 + }, + { + "epoch": 0.867567733235258, + "grad_norm": 0.6594054775759415, + "learning_rate": 9.058367669096146e-07, + "loss": 0.5319, + "step": 28307 + }, + { + "epoch": 0.8675983817579993, + "grad_norm": 1.706303489875732, + "learning_rate": 9.054239874948645e-07, + "loss": 0.571, + "step": 28308 + }, + { + "epoch": 0.8676290302807405, + "grad_norm": 1.5446887938760163, + "learning_rate": 9.050112976912973e-07, + "loss": 0.6021, + "step": 28309 + }, + { + "epoch": 0.8676596788034817, + "grad_norm": 1.7265678024020994, + "learning_rate": 9.045986975029808e-07, + "loss": 0.6352, + "step": 28310 + }, + { + "epoch": 0.8676903273262229, + "grad_norm": 1.6916965618668613, + "learning_rate": 9.041861869339819e-07, + "loss": 0.6262, + "step": 28311 + }, + { + "epoch": 0.8677209758489641, + "grad_norm": 1.905357560842428, + "learning_rate": 9.037737659883628e-07, + "loss": 0.6388, + "step": 28312 + }, + { + "epoch": 0.8677516243717053, + "grad_norm": 1.5143463709038862, + "learning_rate": 9.033614346701868e-07, + "loss": 0.6492, + "step": 28313 + }, + { + "epoch": 0.8677822728944465, + "grad_norm": 1.4196631159345643, + "learning_rate": 9.02949192983521e-07, + "loss": 0.6423, + "step": 28314 + }, + { + "epoch": 0.8678129214171877, + "grad_norm": 1.7330566541296786, + "learning_rate": 9.02537040932423e-07, + "loss": 0.7126, + "step": 28315 + }, + { + "epoch": 0.867843569939929, + "grad_norm": 1.6217546278224482, + "learning_rate": 9.02124978520954e-07, + "loss": 0.6714, + "step": 28316 + }, + { + "epoch": 0.8678742184626701, + "grad_norm": 1.8513567127080752, + "learning_rate": 9.017130057531775e-07, + "loss": 0.6785, + "step": 28317 + }, + { + "epoch": 0.8679048669854112, + "grad_norm": 1.7416841434840467, + "learning_rate": 9.013011226331492e-07, + "loss": 0.6825, + "step": 28318 + }, + { + "epoch": 0.8679355155081525, + "grad_norm": 1.7964604003853544, + "learning_rate": 9.008893291649313e-07, + "loss": 0.7384, + "step": 28319 + }, + { + "epoch": 0.8679661640308937, + "grad_norm": 1.5281597219906164, + "learning_rate": 9.00477625352576e-07, + "loss": 0.6485, + "step": 28320 + }, + { + "epoch": 0.8679968125536349, + "grad_norm": 1.65604007756773, + "learning_rate": 9.000660112001436e-07, + "loss": 0.4753, + "step": 28321 + }, + { + "epoch": 0.8680274610763761, + "grad_norm": 1.9388401691780899, + "learning_rate": 8.996544867116907e-07, + "loss": 0.7219, + "step": 28322 + }, + { + "epoch": 0.8680581095991173, + "grad_norm": 1.8175566068452151, + "learning_rate": 8.992430518912687e-07, + "loss": 0.6095, + "step": 28323 + }, + { + "epoch": 0.8680887581218585, + "grad_norm": 1.7429550309940747, + "learning_rate": 8.98831706742933e-07, + "loss": 0.6596, + "step": 28324 + }, + { + "epoch": 0.8681194066445997, + "grad_norm": 1.9780182253673104, + "learning_rate": 8.984204512707395e-07, + "loss": 0.5777, + "step": 28325 + }, + { + "epoch": 0.8681500551673409, + "grad_norm": 1.8581307391494755, + "learning_rate": 8.980092854787359e-07, + "loss": 0.7066, + "step": 28326 + }, + { + "epoch": 0.8681807036900822, + "grad_norm": 1.889287939732904, + "learning_rate": 8.975982093709756e-07, + "loss": 0.6341, + "step": 28327 + }, + { + "epoch": 0.8682113522128233, + "grad_norm": 1.5824604528904276, + "learning_rate": 8.971872229515111e-07, + "loss": 0.4911, + "step": 28328 + }, + { + "epoch": 0.8682420007355646, + "grad_norm": 1.7580481993529244, + "learning_rate": 8.967763262243889e-07, + "loss": 0.673, + "step": 28329 + }, + { + "epoch": 0.8682726492583057, + "grad_norm": 0.6643035199879779, + "learning_rate": 8.963655191936593e-07, + "loss": 0.5203, + "step": 28330 + }, + { + "epoch": 0.868303297781047, + "grad_norm": 1.822203377428934, + "learning_rate": 8.959548018633679e-07, + "loss": 0.604, + "step": 28331 + }, + { + "epoch": 0.8683339463037881, + "grad_norm": 0.644899889727522, + "learning_rate": 8.955441742375637e-07, + "loss": 0.5185, + "step": 28332 + }, + { + "epoch": 0.8683645948265294, + "grad_norm": 1.595942539122263, + "learning_rate": 8.951336363202944e-07, + "loss": 0.6576, + "step": 28333 + }, + { + "epoch": 0.8683952433492705, + "grad_norm": 0.6570761586419581, + "learning_rate": 8.947231881156004e-07, + "loss": 0.5027, + "step": 28334 + }, + { + "epoch": 0.8684258918720118, + "grad_norm": 1.763048398092572, + "learning_rate": 8.943128296275283e-07, + "loss": 0.6261, + "step": 28335 + }, + { + "epoch": 0.868456540394753, + "grad_norm": 1.55958986971173, + "learning_rate": 8.939025608601237e-07, + "loss": 0.5725, + "step": 28336 + }, + { + "epoch": 0.8684871889174942, + "grad_norm": 1.7807281219872244, + "learning_rate": 8.934923818174258e-07, + "loss": 0.7273, + "step": 28337 + }, + { + "epoch": 0.8685178374402354, + "grad_norm": 1.6706728986373347, + "learning_rate": 8.930822925034788e-07, + "loss": 0.6201, + "step": 28338 + }, + { + "epoch": 0.8685484859629766, + "grad_norm": 1.9369513443041635, + "learning_rate": 8.926722929223209e-07, + "loss": 0.6782, + "step": 28339 + }, + { + "epoch": 0.8685791344857178, + "grad_norm": 0.6593048251796195, + "learning_rate": 8.92262383077992e-07, + "loss": 0.4947, + "step": 28340 + }, + { + "epoch": 0.868609783008459, + "grad_norm": 1.565239237517272, + "learning_rate": 8.918525629745344e-07, + "loss": 0.6303, + "step": 28341 + }, + { + "epoch": 0.8686404315312002, + "grad_norm": 0.6454780275792618, + "learning_rate": 8.91442832615983e-07, + "loss": 0.5108, + "step": 28342 + }, + { + "epoch": 0.8686710800539414, + "grad_norm": 1.6014398557856875, + "learning_rate": 8.910331920063752e-07, + "loss": 0.6214, + "step": 28343 + }, + { + "epoch": 0.8687017285766826, + "grad_norm": 1.7570637572821826, + "learning_rate": 8.906236411497493e-07, + "loss": 0.6769, + "step": 28344 + }, + { + "epoch": 0.8687323770994239, + "grad_norm": 1.8323226346133055, + "learning_rate": 8.902141800501385e-07, + "loss": 0.6234, + "step": 28345 + }, + { + "epoch": 0.868763025622165, + "grad_norm": 1.7863957069844358, + "learning_rate": 8.898048087115774e-07, + "loss": 0.7007, + "step": 28346 + }, + { + "epoch": 0.8687936741449063, + "grad_norm": 1.6549931394101947, + "learning_rate": 8.893955271381028e-07, + "loss": 0.6004, + "step": 28347 + }, + { + "epoch": 0.8688243226676474, + "grad_norm": 1.7305031493278649, + "learning_rate": 8.889863353337435e-07, + "loss": 0.6053, + "step": 28348 + }, + { + "epoch": 0.8688549711903886, + "grad_norm": 1.8009792331670371, + "learning_rate": 8.885772333025344e-07, + "loss": 0.5783, + "step": 28349 + }, + { + "epoch": 0.8688856197131298, + "grad_norm": 1.7758946740600599, + "learning_rate": 8.881682210485032e-07, + "loss": 0.631, + "step": 28350 + }, + { + "epoch": 0.868916268235871, + "grad_norm": 1.5941631262754543, + "learning_rate": 8.877592985756822e-07, + "loss": 0.6454, + "step": 28351 + }, + { + "epoch": 0.8689469167586122, + "grad_norm": 1.8181832073630004, + "learning_rate": 8.873504658881016e-07, + "loss": 0.6833, + "step": 28352 + }, + { + "epoch": 0.8689775652813534, + "grad_norm": 0.663234135751278, + "learning_rate": 8.86941722989787e-07, + "loss": 0.5206, + "step": 28353 + }, + { + "epoch": 0.8690082138040947, + "grad_norm": 1.7202168010560683, + "learning_rate": 8.865330698847674e-07, + "loss": 0.6242, + "step": 28354 + }, + { + "epoch": 0.8690388623268358, + "grad_norm": 1.8150979460121501, + "learning_rate": 8.861245065770708e-07, + "loss": 0.7076, + "step": 28355 + }, + { + "epoch": 0.8690695108495771, + "grad_norm": 1.6637238332003896, + "learning_rate": 8.857160330707193e-07, + "loss": 0.6845, + "step": 28356 + }, + { + "epoch": 0.8691001593723182, + "grad_norm": 1.6239907968080867, + "learning_rate": 8.853076493697399e-07, + "loss": 0.7465, + "step": 28357 + }, + { + "epoch": 0.8691308078950595, + "grad_norm": 1.698875529884894, + "learning_rate": 8.848993554781582e-07, + "loss": 0.6957, + "step": 28358 + }, + { + "epoch": 0.8691614564178006, + "grad_norm": 1.8407162335099891, + "learning_rate": 8.844911513999943e-07, + "loss": 0.5827, + "step": 28359 + }, + { + "epoch": 0.8691921049405419, + "grad_norm": 1.6077614391018396, + "learning_rate": 8.840830371392717e-07, + "loss": 0.6366, + "step": 28360 + }, + { + "epoch": 0.869222753463283, + "grad_norm": 1.716468867132107, + "learning_rate": 8.836750127000082e-07, + "loss": 0.6626, + "step": 28361 + }, + { + "epoch": 0.8692534019860243, + "grad_norm": 1.5492566463386028, + "learning_rate": 8.832670780862317e-07, + "loss": 0.6368, + "step": 28362 + }, + { + "epoch": 0.8692840505087654, + "grad_norm": 1.7059691567914714, + "learning_rate": 8.82859233301957e-07, + "loss": 0.6025, + "step": 28363 + }, + { + "epoch": 0.8693146990315067, + "grad_norm": 1.5694867661615266, + "learning_rate": 8.824514783512006e-07, + "loss": 0.6057, + "step": 28364 + }, + { + "epoch": 0.8693453475542479, + "grad_norm": 2.002110876002572, + "learning_rate": 8.820438132379838e-07, + "loss": 0.658, + "step": 28365 + }, + { + "epoch": 0.8693759960769891, + "grad_norm": 1.5915090809560266, + "learning_rate": 8.816362379663224e-07, + "loss": 0.6202, + "step": 28366 + }, + { + "epoch": 0.8694066445997303, + "grad_norm": 1.7247233958866675, + "learning_rate": 8.812287525402319e-07, + "loss": 0.6714, + "step": 28367 + }, + { + "epoch": 0.8694372931224715, + "grad_norm": 1.7290164279465714, + "learning_rate": 8.808213569637269e-07, + "loss": 0.6965, + "step": 28368 + }, + { + "epoch": 0.8694679416452127, + "grad_norm": 1.8027616060605043, + "learning_rate": 8.804140512408222e-07, + "loss": 0.5855, + "step": 28369 + }, + { + "epoch": 0.8694985901679539, + "grad_norm": 0.6566967690337169, + "learning_rate": 8.800068353755331e-07, + "loss": 0.5255, + "step": 28370 + }, + { + "epoch": 0.8695292386906951, + "grad_norm": 1.7027090745526214, + "learning_rate": 8.7959970937187e-07, + "loss": 0.6115, + "step": 28371 + }, + { + "epoch": 0.8695598872134364, + "grad_norm": 2.123962782150575, + "learning_rate": 8.791926732338429e-07, + "loss": 0.651, + "step": 28372 + }, + { + "epoch": 0.8695905357361775, + "grad_norm": 0.720307610684846, + "learning_rate": 8.787857269654643e-07, + "loss": 0.5478, + "step": 28373 + }, + { + "epoch": 0.8696211842589188, + "grad_norm": 2.0310906720976494, + "learning_rate": 8.783788705707452e-07, + "loss": 0.7177, + "step": 28374 + }, + { + "epoch": 0.8696518327816599, + "grad_norm": 0.6427571666313192, + "learning_rate": 8.779721040536914e-07, + "loss": 0.4703, + "step": 28375 + }, + { + "epoch": 0.8696824813044012, + "grad_norm": 1.891090082256074, + "learning_rate": 8.775654274183121e-07, + "loss": 0.6716, + "step": 28376 + }, + { + "epoch": 0.8697131298271423, + "grad_norm": 1.7657275762691735, + "learning_rate": 8.771588406686171e-07, + "loss": 0.5398, + "step": 28377 + }, + { + "epoch": 0.8697437783498836, + "grad_norm": 1.6577053766823064, + "learning_rate": 8.767523438086079e-07, + "loss": 0.5852, + "step": 28378 + }, + { + "epoch": 0.8697744268726247, + "grad_norm": 1.6721883034002067, + "learning_rate": 8.763459368422933e-07, + "loss": 0.6714, + "step": 28379 + }, + { + "epoch": 0.8698050753953659, + "grad_norm": 0.6330433304552032, + "learning_rate": 8.759396197736736e-07, + "loss": 0.5204, + "step": 28380 + }, + { + "epoch": 0.8698357239181072, + "grad_norm": 1.6828312742693718, + "learning_rate": 8.7553339260676e-07, + "loss": 0.6854, + "step": 28381 + }, + { + "epoch": 0.8698663724408483, + "grad_norm": 1.6186910902094884, + "learning_rate": 8.751272553455492e-07, + "loss": 0.5981, + "step": 28382 + }, + { + "epoch": 0.8698970209635896, + "grad_norm": 1.9470435177733418, + "learning_rate": 8.747212079940426e-07, + "loss": 0.663, + "step": 28383 + }, + { + "epoch": 0.8699276694863307, + "grad_norm": 1.5628960095789974, + "learning_rate": 8.743152505562425e-07, + "loss": 0.6381, + "step": 28384 + }, + { + "epoch": 0.869958318009072, + "grad_norm": 1.6135912471730658, + "learning_rate": 8.739093830361511e-07, + "loss": 0.6552, + "step": 28385 + }, + { + "epoch": 0.8699889665318131, + "grad_norm": 1.6928424356924279, + "learning_rate": 8.735036054377643e-07, + "loss": 0.6514, + "step": 28386 + }, + { + "epoch": 0.8700196150545544, + "grad_norm": 1.9322741968552182, + "learning_rate": 8.730979177650812e-07, + "loss": 0.56, + "step": 28387 + }, + { + "epoch": 0.8700502635772955, + "grad_norm": 1.665392167046506, + "learning_rate": 8.726923200221005e-07, + "loss": 0.6994, + "step": 28388 + }, + { + "epoch": 0.8700809121000368, + "grad_norm": 0.6705030681175017, + "learning_rate": 8.722868122128181e-07, + "loss": 0.5182, + "step": 28389 + }, + { + "epoch": 0.870111560622778, + "grad_norm": 1.7007149500677177, + "learning_rate": 8.718813943412297e-07, + "loss": 0.5971, + "step": 28390 + }, + { + "epoch": 0.8701422091455192, + "grad_norm": 1.5772742454906326, + "learning_rate": 8.714760664113253e-07, + "loss": 0.6012, + "step": 28391 + }, + { + "epoch": 0.8701728576682604, + "grad_norm": 1.6102832171394046, + "learning_rate": 8.710708284271074e-07, + "loss": 0.6279, + "step": 28392 + }, + { + "epoch": 0.8702035061910016, + "grad_norm": 1.7848377931703865, + "learning_rate": 8.70665680392564e-07, + "loss": 0.631, + "step": 28393 + }, + { + "epoch": 0.8702341547137428, + "grad_norm": 1.5069637029428653, + "learning_rate": 8.70260622311686e-07, + "loss": 0.5852, + "step": 28394 + }, + { + "epoch": 0.870264803236484, + "grad_norm": 1.6458186630864629, + "learning_rate": 8.69855654188465e-07, + "loss": 0.5493, + "step": 28395 + }, + { + "epoch": 0.8702954517592252, + "grad_norm": 1.7515638910535498, + "learning_rate": 8.694507760268934e-07, + "loss": 0.6524, + "step": 28396 + }, + { + "epoch": 0.8703261002819664, + "grad_norm": 1.8570344517955888, + "learning_rate": 8.690459878309609e-07, + "loss": 0.6227, + "step": 28397 + }, + { + "epoch": 0.8703567488047076, + "grad_norm": 1.7100503788208916, + "learning_rate": 8.686412896046526e-07, + "loss": 0.6489, + "step": 28398 + }, + { + "epoch": 0.8703873973274489, + "grad_norm": 1.8350822325944127, + "learning_rate": 8.682366813519583e-07, + "loss": 0.672, + "step": 28399 + }, + { + "epoch": 0.87041804585019, + "grad_norm": 1.8516868612579804, + "learning_rate": 8.67832163076866e-07, + "loss": 0.5902, + "step": 28400 + }, + { + "epoch": 0.8704486943729313, + "grad_norm": 1.7250602190724234, + "learning_rate": 8.674277347833593e-07, + "loss": 0.7201, + "step": 28401 + }, + { + "epoch": 0.8704793428956724, + "grad_norm": 1.793744379795597, + "learning_rate": 8.670233964754216e-07, + "loss": 0.7018, + "step": 28402 + }, + { + "epoch": 0.8705099914184137, + "grad_norm": 1.8879335988594357, + "learning_rate": 8.666191481570418e-07, + "loss": 0.6622, + "step": 28403 + }, + { + "epoch": 0.8705406399411548, + "grad_norm": 1.5645427241325762, + "learning_rate": 8.662149898322004e-07, + "loss": 0.6233, + "step": 28404 + }, + { + "epoch": 0.8705712884638961, + "grad_norm": 1.849775628322291, + "learning_rate": 8.658109215048782e-07, + "loss": 0.6274, + "step": 28405 + }, + { + "epoch": 0.8706019369866372, + "grad_norm": 1.5926176731141708, + "learning_rate": 8.654069431790579e-07, + "loss": 0.6906, + "step": 28406 + }, + { + "epoch": 0.8706325855093785, + "grad_norm": 1.928041884505564, + "learning_rate": 8.650030548587196e-07, + "loss": 0.691, + "step": 28407 + }, + { + "epoch": 0.8706632340321196, + "grad_norm": 1.680637078584197, + "learning_rate": 8.645992565478467e-07, + "loss": 0.6173, + "step": 28408 + }, + { + "epoch": 0.8706938825548609, + "grad_norm": 1.6696685382725998, + "learning_rate": 8.641955482504116e-07, + "loss": 0.6572, + "step": 28409 + }, + { + "epoch": 0.8707245310776021, + "grad_norm": 1.798992886228329, + "learning_rate": 8.637919299703956e-07, + "loss": 0.7054, + "step": 28410 + }, + { + "epoch": 0.8707551796003432, + "grad_norm": 1.7897668298331302, + "learning_rate": 8.633884017117777e-07, + "loss": 0.5974, + "step": 28411 + }, + { + "epoch": 0.8707858281230845, + "grad_norm": 1.8282531831279287, + "learning_rate": 8.629849634785315e-07, + "loss": 0.7159, + "step": 28412 + }, + { + "epoch": 0.8708164766458256, + "grad_norm": 1.3261903073735013, + "learning_rate": 8.62581615274628e-07, + "loss": 0.6296, + "step": 28413 + }, + { + "epoch": 0.8708471251685669, + "grad_norm": 1.6339152357186206, + "learning_rate": 8.621783571040499e-07, + "loss": 0.6896, + "step": 28414 + }, + { + "epoch": 0.870877773691308, + "grad_norm": 1.7991657773270824, + "learning_rate": 8.617751889707648e-07, + "loss": 0.5383, + "step": 28415 + }, + { + "epoch": 0.8709084222140493, + "grad_norm": 1.5841439000559818, + "learning_rate": 8.613721108787487e-07, + "loss": 0.6173, + "step": 28416 + }, + { + "epoch": 0.8709390707367904, + "grad_norm": 1.8536576386668604, + "learning_rate": 8.609691228319684e-07, + "loss": 0.6767, + "step": 28417 + }, + { + "epoch": 0.8709697192595317, + "grad_norm": 0.6683723543934271, + "learning_rate": 8.605662248343993e-07, + "loss": 0.484, + "step": 28418 + }, + { + "epoch": 0.8710003677822729, + "grad_norm": 1.7591975227980263, + "learning_rate": 8.601634168900109e-07, + "loss": 0.6811, + "step": 28419 + }, + { + "epoch": 0.8710310163050141, + "grad_norm": 1.9233225223752652, + "learning_rate": 8.597606990027685e-07, + "loss": 0.736, + "step": 28420 + }, + { + "epoch": 0.8710616648277553, + "grad_norm": 0.6662198467417991, + "learning_rate": 8.593580711766425e-07, + "loss": 0.5415, + "step": 28421 + }, + { + "epoch": 0.8710923133504965, + "grad_norm": 1.74187059277486, + "learning_rate": 8.58955533415603e-07, + "loss": 0.6202, + "step": 28422 + }, + { + "epoch": 0.8711229618732377, + "grad_norm": 1.7428988678404758, + "learning_rate": 8.585530857236102e-07, + "loss": 0.6498, + "step": 28423 + }, + { + "epoch": 0.8711536103959789, + "grad_norm": 1.698349838659086, + "learning_rate": 8.581507281046353e-07, + "loss": 0.6439, + "step": 28424 + }, + { + "epoch": 0.8711842589187201, + "grad_norm": 1.9489065904464216, + "learning_rate": 8.577484605626384e-07, + "loss": 0.6827, + "step": 28425 + }, + { + "epoch": 0.8712149074414614, + "grad_norm": 1.6099898734597111, + "learning_rate": 8.573462831015855e-07, + "loss": 0.696, + "step": 28426 + }, + { + "epoch": 0.8712455559642025, + "grad_norm": 1.5556482661295559, + "learning_rate": 8.56944195725441e-07, + "loss": 0.6, + "step": 28427 + }, + { + "epoch": 0.8712762044869438, + "grad_norm": 1.543709575993329, + "learning_rate": 8.565421984381628e-07, + "loss": 0.6675, + "step": 28428 + }, + { + "epoch": 0.8713068530096849, + "grad_norm": 1.7434121246734775, + "learning_rate": 8.561402912437134e-07, + "loss": 0.6624, + "step": 28429 + }, + { + "epoch": 0.8713375015324262, + "grad_norm": 1.8568854263808074, + "learning_rate": 8.557384741460551e-07, + "loss": 0.6841, + "step": 28430 + }, + { + "epoch": 0.8713681500551673, + "grad_norm": 1.6285621912122836, + "learning_rate": 8.553367471491447e-07, + "loss": 0.6986, + "step": 28431 + }, + { + "epoch": 0.8713987985779086, + "grad_norm": 0.6640332214560071, + "learning_rate": 8.549351102569381e-07, + "loss": 0.5091, + "step": 28432 + }, + { + "epoch": 0.8714294471006497, + "grad_norm": 1.4925443501982347, + "learning_rate": 8.545335634733987e-07, + "loss": 0.5927, + "step": 28433 + }, + { + "epoch": 0.871460095623391, + "grad_norm": 1.767920593592658, + "learning_rate": 8.541321068024788e-07, + "loss": 0.6093, + "step": 28434 + }, + { + "epoch": 0.8714907441461321, + "grad_norm": 1.6991399660450786, + "learning_rate": 8.537307402481377e-07, + "loss": 0.6401, + "step": 28435 + }, + { + "epoch": 0.8715213926688734, + "grad_norm": 1.6873091921956418, + "learning_rate": 8.533294638143253e-07, + "loss": 0.6283, + "step": 28436 + }, + { + "epoch": 0.8715520411916146, + "grad_norm": 0.6587883576759002, + "learning_rate": 8.529282775049985e-07, + "loss": 0.5434, + "step": 28437 + }, + { + "epoch": 0.8715826897143558, + "grad_norm": 1.5718032476236254, + "learning_rate": 8.525271813241109e-07, + "loss": 0.6035, + "step": 28438 + }, + { + "epoch": 0.871613338237097, + "grad_norm": 1.6181408963146264, + "learning_rate": 8.521261752756115e-07, + "loss": 0.7012, + "step": 28439 + }, + { + "epoch": 0.8716439867598382, + "grad_norm": 1.644199265345681, + "learning_rate": 8.51725259363454e-07, + "loss": 0.6333, + "step": 28440 + }, + { + "epoch": 0.8716746352825794, + "grad_norm": 1.5412596347508387, + "learning_rate": 8.513244335915905e-07, + "loss": 0.5717, + "step": 28441 + }, + { + "epoch": 0.8717052838053205, + "grad_norm": 1.7614018196109578, + "learning_rate": 8.509236979639657e-07, + "loss": 0.6277, + "step": 28442 + }, + { + "epoch": 0.8717359323280618, + "grad_norm": 0.6786684639730058, + "learning_rate": 8.505230524845299e-07, + "loss": 0.5229, + "step": 28443 + }, + { + "epoch": 0.8717665808508029, + "grad_norm": 0.6856524581226409, + "learning_rate": 8.501224971572342e-07, + "loss": 0.5173, + "step": 28444 + }, + { + "epoch": 0.8717972293735442, + "grad_norm": 1.8071191242690985, + "learning_rate": 8.497220319860211e-07, + "loss": 0.61, + "step": 28445 + }, + { + "epoch": 0.8718278778962854, + "grad_norm": 0.6632782606769825, + "learning_rate": 8.493216569748386e-07, + "loss": 0.5155, + "step": 28446 + }, + { + "epoch": 0.8718585264190266, + "grad_norm": 1.7945441449053843, + "learning_rate": 8.489213721276301e-07, + "loss": 0.6068, + "step": 28447 + }, + { + "epoch": 0.8718891749417678, + "grad_norm": 1.5469707284294636, + "learning_rate": 8.485211774483415e-07, + "loss": 0.6109, + "step": 28448 + }, + { + "epoch": 0.871919823464509, + "grad_norm": 1.803282051086953, + "learning_rate": 8.481210729409161e-07, + "loss": 0.764, + "step": 28449 + }, + { + "epoch": 0.8719504719872502, + "grad_norm": 1.635049209748324, + "learning_rate": 8.477210586092932e-07, + "loss": 0.6707, + "step": 28450 + }, + { + "epoch": 0.8719811205099914, + "grad_norm": 1.997574040469502, + "learning_rate": 8.473211344574173e-07, + "loss": 0.6805, + "step": 28451 + }, + { + "epoch": 0.8720117690327326, + "grad_norm": 1.724285193983374, + "learning_rate": 8.469213004892296e-07, + "loss": 0.6282, + "step": 28452 + }, + { + "epoch": 0.8720424175554738, + "grad_norm": 1.6855647416054154, + "learning_rate": 8.46521556708666e-07, + "loss": 0.679, + "step": 28453 + }, + { + "epoch": 0.872073066078215, + "grad_norm": 1.7448979094448762, + "learning_rate": 8.461219031196677e-07, + "loss": 0.666, + "step": 28454 + }, + { + "epoch": 0.8721037146009563, + "grad_norm": 0.6868873807452711, + "learning_rate": 8.457223397261749e-07, + "loss": 0.5234, + "step": 28455 + }, + { + "epoch": 0.8721343631236974, + "grad_norm": 1.621635998733434, + "learning_rate": 8.453228665321189e-07, + "loss": 0.6971, + "step": 28456 + }, + { + "epoch": 0.8721650116464387, + "grad_norm": 0.6744626123385462, + "learning_rate": 8.449234835414422e-07, + "loss": 0.523, + "step": 28457 + }, + { + "epoch": 0.8721956601691798, + "grad_norm": 2.0423698084461255, + "learning_rate": 8.445241907580748e-07, + "loss": 0.662, + "step": 28458 + }, + { + "epoch": 0.8722263086919211, + "grad_norm": 1.6628484745615422, + "learning_rate": 8.441249881859525e-07, + "loss": 0.5326, + "step": 28459 + }, + { + "epoch": 0.8722569572146622, + "grad_norm": 1.6989448080794323, + "learning_rate": 8.437258758290112e-07, + "loss": 0.6092, + "step": 28460 + }, + { + "epoch": 0.8722876057374035, + "grad_norm": 1.7507201663494698, + "learning_rate": 8.433268536911799e-07, + "loss": 0.6767, + "step": 28461 + }, + { + "epoch": 0.8723182542601446, + "grad_norm": 1.641011663637294, + "learning_rate": 8.42927921776393e-07, + "loss": 0.5534, + "step": 28462 + }, + { + "epoch": 0.8723489027828859, + "grad_norm": 1.5743579346123195, + "learning_rate": 8.42529080088581e-07, + "loss": 0.6832, + "step": 28463 + }, + { + "epoch": 0.872379551305627, + "grad_norm": 1.7460766969793047, + "learning_rate": 8.421303286316706e-07, + "loss": 0.7065, + "step": 28464 + }, + { + "epoch": 0.8724101998283683, + "grad_norm": 1.671769986966837, + "learning_rate": 8.417316674095943e-07, + "loss": 0.6814, + "step": 28465 + }, + { + "epoch": 0.8724408483511095, + "grad_norm": 1.6614159516804043, + "learning_rate": 8.4133309642628e-07, + "loss": 0.698, + "step": 28466 + }, + { + "epoch": 0.8724714968738507, + "grad_norm": 1.6608432747681563, + "learning_rate": 8.409346156856534e-07, + "loss": 0.6338, + "step": 28467 + }, + { + "epoch": 0.8725021453965919, + "grad_norm": 1.762730128131546, + "learning_rate": 8.405362251916426e-07, + "loss": 0.7069, + "step": 28468 + }, + { + "epoch": 0.8725327939193331, + "grad_norm": 1.665313716236233, + "learning_rate": 8.4013792494817e-07, + "loss": 0.6521, + "step": 28469 + }, + { + "epoch": 0.8725634424420743, + "grad_norm": 1.4819176088353425, + "learning_rate": 8.397397149591624e-07, + "loss": 0.5768, + "step": 28470 + }, + { + "epoch": 0.8725940909648155, + "grad_norm": 1.5857825268979702, + "learning_rate": 8.393415952285444e-07, + "loss": 0.6711, + "step": 28471 + }, + { + "epoch": 0.8726247394875567, + "grad_norm": 1.7092606811956599, + "learning_rate": 8.389435657602363e-07, + "loss": 0.6413, + "step": 28472 + }, + { + "epoch": 0.8726553880102978, + "grad_norm": 1.7407020190993443, + "learning_rate": 8.385456265581615e-07, + "loss": 0.6255, + "step": 28473 + }, + { + "epoch": 0.8726860365330391, + "grad_norm": 1.8245947761046313, + "learning_rate": 8.381477776262415e-07, + "loss": 0.6338, + "step": 28474 + }, + { + "epoch": 0.8727166850557803, + "grad_norm": 1.7686956123623558, + "learning_rate": 8.377500189683951e-07, + "loss": 0.7067, + "step": 28475 + }, + { + "epoch": 0.8727473335785215, + "grad_norm": 1.794273627935512, + "learning_rate": 8.373523505885428e-07, + "loss": 0.6394, + "step": 28476 + }, + { + "epoch": 0.8727779821012627, + "grad_norm": 1.5468644967731635, + "learning_rate": 8.369547724906001e-07, + "loss": 0.6381, + "step": 28477 + }, + { + "epoch": 0.8728086306240039, + "grad_norm": 1.5524294666733027, + "learning_rate": 8.365572846784875e-07, + "loss": 0.5937, + "step": 28478 + }, + { + "epoch": 0.8728392791467451, + "grad_norm": 1.6758953595103185, + "learning_rate": 8.361598871561216e-07, + "loss": 0.6501, + "step": 28479 + }, + { + "epoch": 0.8728699276694863, + "grad_norm": 1.6731229572773811, + "learning_rate": 8.357625799274161e-07, + "loss": 0.5987, + "step": 28480 + }, + { + "epoch": 0.8729005761922275, + "grad_norm": 1.9665498815996567, + "learning_rate": 8.353653629962855e-07, + "loss": 0.6492, + "step": 28481 + }, + { + "epoch": 0.8729312247149688, + "grad_norm": 2.0168584203698834, + "learning_rate": 8.349682363666478e-07, + "loss": 0.693, + "step": 28482 + }, + { + "epoch": 0.8729618732377099, + "grad_norm": 1.5856017882489093, + "learning_rate": 8.3457120004241e-07, + "loss": 0.6057, + "step": 28483 + }, + { + "epoch": 0.8729925217604512, + "grad_norm": 1.909176379112174, + "learning_rate": 8.341742540274878e-07, + "loss": 0.6655, + "step": 28484 + }, + { + "epoch": 0.8730231702831923, + "grad_norm": 0.6650963313917422, + "learning_rate": 8.337773983257936e-07, + "loss": 0.5158, + "step": 28485 + }, + { + "epoch": 0.8730538188059336, + "grad_norm": 1.7798484977089344, + "learning_rate": 8.333806329412342e-07, + "loss": 0.5671, + "step": 28486 + }, + { + "epoch": 0.8730844673286747, + "grad_norm": 1.741213070747687, + "learning_rate": 8.329839578777232e-07, + "loss": 0.7122, + "step": 28487 + }, + { + "epoch": 0.873115115851416, + "grad_norm": 1.5840008233450265, + "learning_rate": 8.32587373139162e-07, + "loss": 0.5732, + "step": 28488 + }, + { + "epoch": 0.8731457643741571, + "grad_norm": 1.7977846169259508, + "learning_rate": 8.321908787294674e-07, + "loss": 0.617, + "step": 28489 + }, + { + "epoch": 0.8731764128968984, + "grad_norm": 1.4364671634480206, + "learning_rate": 8.317944746525419e-07, + "loss": 0.6148, + "step": 28490 + }, + { + "epoch": 0.8732070614196396, + "grad_norm": 0.6518124203287572, + "learning_rate": 8.31398160912289e-07, + "loss": 0.5056, + "step": 28491 + }, + { + "epoch": 0.8732377099423808, + "grad_norm": 1.798551632565635, + "learning_rate": 8.310019375126166e-07, + "loss": 0.6071, + "step": 28492 + }, + { + "epoch": 0.873268358465122, + "grad_norm": 1.4988660081390237, + "learning_rate": 8.306058044574295e-07, + "loss": 0.5552, + "step": 28493 + }, + { + "epoch": 0.8732990069878632, + "grad_norm": 1.57968529888963, + "learning_rate": 8.302097617506266e-07, + "loss": 0.6299, + "step": 28494 + }, + { + "epoch": 0.8733296555106044, + "grad_norm": 1.6829999000773577, + "learning_rate": 8.298138093961139e-07, + "loss": 0.5998, + "step": 28495 + }, + { + "epoch": 0.8733603040333456, + "grad_norm": 1.6587045191894916, + "learning_rate": 8.294179473977925e-07, + "loss": 0.5097, + "step": 28496 + }, + { + "epoch": 0.8733909525560868, + "grad_norm": 1.6207889389750598, + "learning_rate": 8.29022175759564e-07, + "loss": 0.6384, + "step": 28497 + }, + { + "epoch": 0.873421601078828, + "grad_norm": 1.7786277414285026, + "learning_rate": 8.286264944853261e-07, + "loss": 0.6991, + "step": 28498 + }, + { + "epoch": 0.8734522496015692, + "grad_norm": 1.6090227872469307, + "learning_rate": 8.282309035789748e-07, + "loss": 0.5811, + "step": 28499 + }, + { + "epoch": 0.8734828981243105, + "grad_norm": 1.73727829754806, + "learning_rate": 8.278354030444146e-07, + "loss": 0.5819, + "step": 28500 + }, + { + "epoch": 0.8735135466470516, + "grad_norm": 1.664366459089382, + "learning_rate": 8.274399928855392e-07, + "loss": 0.6483, + "step": 28501 + }, + { + "epoch": 0.8735441951697929, + "grad_norm": 1.8833576865425607, + "learning_rate": 8.27044673106242e-07, + "loss": 0.6174, + "step": 28502 + }, + { + "epoch": 0.873574843692534, + "grad_norm": 1.7155901401249272, + "learning_rate": 8.266494437104211e-07, + "loss": 0.6491, + "step": 28503 + }, + { + "epoch": 0.8736054922152752, + "grad_norm": 0.6773921404253544, + "learning_rate": 8.262543047019722e-07, + "loss": 0.5099, + "step": 28504 + }, + { + "epoch": 0.8736361407380164, + "grad_norm": 1.5552091676958277, + "learning_rate": 8.258592560847856e-07, + "loss": 0.5646, + "step": 28505 + }, + { + "epoch": 0.8736667892607576, + "grad_norm": 1.810863551561745, + "learning_rate": 8.254642978627536e-07, + "loss": 0.6097, + "step": 28506 + }, + { + "epoch": 0.8736974377834988, + "grad_norm": 1.8191316893782632, + "learning_rate": 8.250694300397699e-07, + "loss": 0.6444, + "step": 28507 + }, + { + "epoch": 0.87372808630624, + "grad_norm": 1.5000818743265107, + "learning_rate": 8.246746526197269e-07, + "loss": 0.5421, + "step": 28508 + }, + { + "epoch": 0.8737587348289813, + "grad_norm": 1.7387673215955008, + "learning_rate": 8.242799656065114e-07, + "loss": 0.7641, + "step": 28509 + }, + { + "epoch": 0.8737893833517224, + "grad_norm": 1.9043712904678538, + "learning_rate": 8.238853690040105e-07, + "loss": 0.6854, + "step": 28510 + }, + { + "epoch": 0.8738200318744637, + "grad_norm": 1.7467155861553447, + "learning_rate": 8.234908628161175e-07, + "loss": 0.5998, + "step": 28511 + }, + { + "epoch": 0.8738506803972048, + "grad_norm": 1.786795816933144, + "learning_rate": 8.230964470467173e-07, + "loss": 0.6912, + "step": 28512 + }, + { + "epoch": 0.8738813289199461, + "grad_norm": 1.7298089309361449, + "learning_rate": 8.227021216996945e-07, + "loss": 0.6667, + "step": 28513 + }, + { + "epoch": 0.8739119774426872, + "grad_norm": 1.7107548785758913, + "learning_rate": 8.223078867789358e-07, + "loss": 0.6383, + "step": 28514 + }, + { + "epoch": 0.8739426259654285, + "grad_norm": 1.8459989429588983, + "learning_rate": 8.21913742288325e-07, + "loss": 0.5588, + "step": 28515 + }, + { + "epoch": 0.8739732744881696, + "grad_norm": 1.8456523731107835, + "learning_rate": 8.215196882317477e-07, + "loss": 0.6689, + "step": 28516 + }, + { + "epoch": 0.8740039230109109, + "grad_norm": 2.218371996806719, + "learning_rate": 8.211257246130843e-07, + "loss": 0.6011, + "step": 28517 + }, + { + "epoch": 0.874034571533652, + "grad_norm": 1.7926901507357638, + "learning_rate": 8.207318514362183e-07, + "loss": 0.6184, + "step": 28518 + }, + { + "epoch": 0.8740652200563933, + "grad_norm": 1.777999587731999, + "learning_rate": 8.203380687050311e-07, + "loss": 0.6195, + "step": 28519 + }, + { + "epoch": 0.8740958685791345, + "grad_norm": 1.989698139002563, + "learning_rate": 8.199443764234016e-07, + "loss": 0.6286, + "step": 28520 + }, + { + "epoch": 0.8741265171018757, + "grad_norm": 1.6258857548589312, + "learning_rate": 8.195507745952069e-07, + "loss": 0.6436, + "step": 28521 + }, + { + "epoch": 0.8741571656246169, + "grad_norm": 1.6748380102647173, + "learning_rate": 8.191572632243283e-07, + "loss": 0.478, + "step": 28522 + }, + { + "epoch": 0.8741878141473581, + "grad_norm": 1.6271151296616297, + "learning_rate": 8.187638423146415e-07, + "loss": 0.5256, + "step": 28523 + }, + { + "epoch": 0.8742184626700993, + "grad_norm": 1.5861480128339234, + "learning_rate": 8.183705118700258e-07, + "loss": 0.5311, + "step": 28524 + }, + { + "epoch": 0.8742491111928405, + "grad_norm": 1.4090222297384791, + "learning_rate": 8.179772718943524e-07, + "loss": 0.5335, + "step": 28525 + }, + { + "epoch": 0.8742797597155817, + "grad_norm": 0.6836192430092928, + "learning_rate": 8.175841223914982e-07, + "loss": 0.5023, + "step": 28526 + }, + { + "epoch": 0.874310408238323, + "grad_norm": 1.552649562677294, + "learning_rate": 8.17191063365339e-07, + "loss": 0.5693, + "step": 28527 + }, + { + "epoch": 0.8743410567610641, + "grad_norm": 1.6446481481187134, + "learning_rate": 8.167980948197462e-07, + "loss": 0.6444, + "step": 28528 + }, + { + "epoch": 0.8743717052838054, + "grad_norm": 0.6720620593417265, + "learning_rate": 8.164052167585879e-07, + "loss": 0.5337, + "step": 28529 + }, + { + "epoch": 0.8744023538065465, + "grad_norm": 1.4280418521243115, + "learning_rate": 8.160124291857418e-07, + "loss": 0.6376, + "step": 28530 + }, + { + "epoch": 0.8744330023292878, + "grad_norm": 1.5703128700832578, + "learning_rate": 8.156197321050752e-07, + "loss": 0.5676, + "step": 28531 + }, + { + "epoch": 0.8744636508520289, + "grad_norm": 1.7527610129051445, + "learning_rate": 8.152271255204547e-07, + "loss": 0.6862, + "step": 28532 + }, + { + "epoch": 0.8744942993747702, + "grad_norm": 1.6233670861169762, + "learning_rate": 8.148346094357529e-07, + "loss": 0.6343, + "step": 28533 + }, + { + "epoch": 0.8745249478975113, + "grad_norm": 2.0129424283892137, + "learning_rate": 8.144421838548344e-07, + "loss": 0.757, + "step": 28534 + }, + { + "epoch": 0.8745555964202525, + "grad_norm": 0.6993818908299521, + "learning_rate": 8.140498487815707e-07, + "loss": 0.5249, + "step": 28535 + }, + { + "epoch": 0.8745862449429938, + "grad_norm": 0.6999209088569747, + "learning_rate": 8.136576042198208e-07, + "loss": 0.5303, + "step": 28536 + }, + { + "epoch": 0.8746168934657349, + "grad_norm": 1.7090721984794568, + "learning_rate": 8.132654501734539e-07, + "loss": 0.6426, + "step": 28537 + }, + { + "epoch": 0.8746475419884762, + "grad_norm": 0.6519416036698067, + "learning_rate": 8.128733866463345e-07, + "loss": 0.5233, + "step": 28538 + }, + { + "epoch": 0.8746781905112173, + "grad_norm": 0.6644787880587523, + "learning_rate": 8.124814136423242e-07, + "loss": 0.4979, + "step": 28539 + }, + { + "epoch": 0.8747088390339586, + "grad_norm": 1.7437818762678583, + "learning_rate": 8.120895311652821e-07, + "loss": 0.7047, + "step": 28540 + }, + { + "epoch": 0.8747394875566997, + "grad_norm": 1.632758847416899, + "learning_rate": 8.116977392190761e-07, + "loss": 0.5407, + "step": 28541 + }, + { + "epoch": 0.874770136079441, + "grad_norm": 1.5798207135440894, + "learning_rate": 8.113060378075611e-07, + "loss": 0.582, + "step": 28542 + }, + { + "epoch": 0.8748007846021821, + "grad_norm": 1.8203931662943171, + "learning_rate": 8.109144269346003e-07, + "loss": 0.6345, + "step": 28543 + }, + { + "epoch": 0.8748314331249234, + "grad_norm": 1.6288466303683016, + "learning_rate": 8.105229066040499e-07, + "loss": 0.5626, + "step": 28544 + }, + { + "epoch": 0.8748620816476645, + "grad_norm": 1.727741079279614, + "learning_rate": 8.101314768197677e-07, + "loss": 0.5835, + "step": 28545 + }, + { + "epoch": 0.8748927301704058, + "grad_norm": 1.690014874072341, + "learning_rate": 8.097401375856129e-07, + "loss": 0.5301, + "step": 28546 + }, + { + "epoch": 0.874923378693147, + "grad_norm": 2.0660739425418178, + "learning_rate": 8.093488889054391e-07, + "loss": 0.8067, + "step": 28547 + }, + { + "epoch": 0.8749540272158882, + "grad_norm": 0.6570265176234625, + "learning_rate": 8.089577307831021e-07, + "loss": 0.508, + "step": 28548 + }, + { + "epoch": 0.8749846757386294, + "grad_norm": 1.9229615405613603, + "learning_rate": 8.085666632224576e-07, + "loss": 0.6583, + "step": 28549 + }, + { + "epoch": 0.8750153242613706, + "grad_norm": 1.8726783597795529, + "learning_rate": 8.08175686227356e-07, + "loss": 0.6953, + "step": 28550 + }, + { + "epoch": 0.8750459727841118, + "grad_norm": 1.8333669942559028, + "learning_rate": 8.077847998016508e-07, + "loss": 0.6699, + "step": 28551 + }, + { + "epoch": 0.875076621306853, + "grad_norm": 1.7172577197997225, + "learning_rate": 8.073940039491957e-07, + "loss": 0.5763, + "step": 28552 + }, + { + "epoch": 0.8751072698295942, + "grad_norm": 1.697789413177875, + "learning_rate": 8.070032986738385e-07, + "loss": 0.6688, + "step": 28553 + }, + { + "epoch": 0.8751379183523355, + "grad_norm": 1.6324831419350003, + "learning_rate": 8.066126839794309e-07, + "loss": 0.5608, + "step": 28554 + }, + { + "epoch": 0.8751685668750766, + "grad_norm": 1.7819673911105016, + "learning_rate": 8.062221598698194e-07, + "loss": 0.64, + "step": 28555 + }, + { + "epoch": 0.8751992153978179, + "grad_norm": 1.7010818088116133, + "learning_rate": 8.058317263488524e-07, + "loss": 0.622, + "step": 28556 + }, + { + "epoch": 0.875229863920559, + "grad_norm": 1.4676417287858148, + "learning_rate": 8.054413834203811e-07, + "loss": 0.6188, + "step": 28557 + }, + { + "epoch": 0.8752605124433003, + "grad_norm": 1.9003641381039535, + "learning_rate": 8.050511310882458e-07, + "loss": 0.6431, + "step": 28558 + }, + { + "epoch": 0.8752911609660414, + "grad_norm": 1.8231624813579947, + "learning_rate": 8.046609693562945e-07, + "loss": 0.6281, + "step": 28559 + }, + { + "epoch": 0.8753218094887827, + "grad_norm": 1.618868821857156, + "learning_rate": 8.042708982283731e-07, + "loss": 0.5732, + "step": 28560 + }, + { + "epoch": 0.8753524580115238, + "grad_norm": 1.848061560053572, + "learning_rate": 8.038809177083207e-07, + "loss": 0.6464, + "step": 28561 + }, + { + "epoch": 0.8753831065342651, + "grad_norm": 0.6820381167740591, + "learning_rate": 8.034910277999842e-07, + "loss": 0.5157, + "step": 28562 + }, + { + "epoch": 0.8754137550570062, + "grad_norm": 0.6841628517275854, + "learning_rate": 8.031012285072037e-07, + "loss": 0.5152, + "step": 28563 + }, + { + "epoch": 0.8754444035797475, + "grad_norm": 1.6676146144126753, + "learning_rate": 8.027115198338198e-07, + "loss": 0.581, + "step": 28564 + }, + { + "epoch": 0.8754750521024887, + "grad_norm": 0.6722541246352497, + "learning_rate": 8.023219017836737e-07, + "loss": 0.5259, + "step": 28565 + }, + { + "epoch": 0.8755057006252298, + "grad_norm": 1.7029485745629769, + "learning_rate": 8.019323743606011e-07, + "loss": 0.6627, + "step": 28566 + }, + { + "epoch": 0.8755363491479711, + "grad_norm": 1.5963591000764068, + "learning_rate": 8.015429375684425e-07, + "loss": 0.5904, + "step": 28567 + }, + { + "epoch": 0.8755669976707122, + "grad_norm": 1.6516583932962758, + "learning_rate": 8.011535914110358e-07, + "loss": 0.5892, + "step": 28568 + }, + { + "epoch": 0.8755976461934535, + "grad_norm": 1.7610739549948105, + "learning_rate": 8.007643358922157e-07, + "loss": 0.6789, + "step": 28569 + }, + { + "epoch": 0.8756282947161946, + "grad_norm": 1.6077475698830574, + "learning_rate": 8.00375171015818e-07, + "loss": 0.5757, + "step": 28570 + }, + { + "epoch": 0.8756589432389359, + "grad_norm": 1.9937176132787304, + "learning_rate": 7.999860967856798e-07, + "loss": 0.6097, + "step": 28571 + }, + { + "epoch": 0.875689591761677, + "grad_norm": 0.6476658443347042, + "learning_rate": 7.995971132056301e-07, + "loss": 0.5044, + "step": 28572 + }, + { + "epoch": 0.8757202402844183, + "grad_norm": 1.657402652376921, + "learning_rate": 7.992082202795059e-07, + "loss": 0.7064, + "step": 28573 + }, + { + "epoch": 0.8757508888071595, + "grad_norm": 1.84821289319052, + "learning_rate": 7.988194180111353e-07, + "loss": 0.5742, + "step": 28574 + }, + { + "epoch": 0.8757815373299007, + "grad_norm": 0.6863742967990301, + "learning_rate": 7.984307064043517e-07, + "loss": 0.5407, + "step": 28575 + }, + { + "epoch": 0.8758121858526419, + "grad_norm": 0.6833453768135068, + "learning_rate": 7.980420854629866e-07, + "loss": 0.5313, + "step": 28576 + }, + { + "epoch": 0.8758428343753831, + "grad_norm": 0.6551939119726283, + "learning_rate": 7.976535551908649e-07, + "loss": 0.5034, + "step": 28577 + }, + { + "epoch": 0.8758734828981243, + "grad_norm": 1.8790491699492262, + "learning_rate": 7.972651155918176e-07, + "loss": 0.6501, + "step": 28578 + }, + { + "epoch": 0.8759041314208655, + "grad_norm": 2.012491296830817, + "learning_rate": 7.968767666696731e-07, + "loss": 0.6647, + "step": 28579 + }, + { + "epoch": 0.8759347799436067, + "grad_norm": 1.74055746540769, + "learning_rate": 7.964885084282547e-07, + "loss": 0.608, + "step": 28580 + }, + { + "epoch": 0.875965428466348, + "grad_norm": 1.8203656281348672, + "learning_rate": 7.961003408713908e-07, + "loss": 0.5979, + "step": 28581 + }, + { + "epoch": 0.8759960769890891, + "grad_norm": 1.6459275536721751, + "learning_rate": 7.957122640029058e-07, + "loss": 0.6506, + "step": 28582 + }, + { + "epoch": 0.8760267255118304, + "grad_norm": 1.5805574001466032, + "learning_rate": 7.953242778266223e-07, + "loss": 0.6486, + "step": 28583 + }, + { + "epoch": 0.8760573740345715, + "grad_norm": 1.489562817653635, + "learning_rate": 7.94936382346364e-07, + "loss": 0.7569, + "step": 28584 + }, + { + "epoch": 0.8760880225573128, + "grad_norm": 1.6101616805563181, + "learning_rate": 7.945485775659523e-07, + "loss": 0.5486, + "step": 28585 + }, + { + "epoch": 0.8761186710800539, + "grad_norm": 1.746796304583028, + "learning_rate": 7.941608634892084e-07, + "loss": 0.6311, + "step": 28586 + }, + { + "epoch": 0.8761493196027952, + "grad_norm": 1.795963544690308, + "learning_rate": 7.937732401199549e-07, + "loss": 0.6922, + "step": 28587 + }, + { + "epoch": 0.8761799681255363, + "grad_norm": 1.5509599665583929, + "learning_rate": 7.933857074620066e-07, + "loss": 0.5772, + "step": 28588 + }, + { + "epoch": 0.8762106166482776, + "grad_norm": 1.803480452114286, + "learning_rate": 7.929982655191859e-07, + "loss": 0.5986, + "step": 28589 + }, + { + "epoch": 0.8762412651710187, + "grad_norm": 1.8257010001849314, + "learning_rate": 7.926109142953098e-07, + "loss": 0.7834, + "step": 28590 + }, + { + "epoch": 0.87627191369376, + "grad_norm": 1.570515859088159, + "learning_rate": 7.922236537941919e-07, + "loss": 0.5895, + "step": 28591 + }, + { + "epoch": 0.8763025622165012, + "grad_norm": 1.882314325804774, + "learning_rate": 7.918364840196512e-07, + "loss": 0.7358, + "step": 28592 + }, + { + "epoch": 0.8763332107392424, + "grad_norm": 0.65789893470002, + "learning_rate": 7.914494049755028e-07, + "loss": 0.5236, + "step": 28593 + }, + { + "epoch": 0.8763638592619836, + "grad_norm": 1.6924736072423605, + "learning_rate": 7.91062416665559e-07, + "loss": 0.6479, + "step": 28594 + }, + { + "epoch": 0.8763945077847248, + "grad_norm": 1.9221512322442345, + "learning_rate": 7.906755190936333e-07, + "loss": 0.7332, + "step": 28595 + }, + { + "epoch": 0.876425156307466, + "grad_norm": 1.5912331760920613, + "learning_rate": 7.902887122635361e-07, + "loss": 0.5576, + "step": 28596 + }, + { + "epoch": 0.8764558048302071, + "grad_norm": 1.6725530130510047, + "learning_rate": 7.899019961790833e-07, + "loss": 0.6224, + "step": 28597 + }, + { + "epoch": 0.8764864533529484, + "grad_norm": 1.83980224298346, + "learning_rate": 7.895153708440828e-07, + "loss": 0.7773, + "step": 28598 + }, + { + "epoch": 0.8765171018756895, + "grad_norm": 1.8158192576064904, + "learning_rate": 7.891288362623418e-07, + "loss": 0.6913, + "step": 28599 + }, + { + "epoch": 0.8765477503984308, + "grad_norm": 1.7612983226960792, + "learning_rate": 7.887423924376725e-07, + "loss": 0.7577, + "step": 28600 + }, + { + "epoch": 0.876578398921172, + "grad_norm": 1.9346686764644765, + "learning_rate": 7.883560393738809e-07, + "loss": 0.6777, + "step": 28601 + }, + { + "epoch": 0.8766090474439132, + "grad_norm": 1.7078705840681452, + "learning_rate": 7.87969777074774e-07, + "loss": 0.6414, + "step": 28602 + }, + { + "epoch": 0.8766396959666544, + "grad_norm": 1.6770278170986375, + "learning_rate": 7.875836055441577e-07, + "loss": 0.6497, + "step": 28603 + }, + { + "epoch": 0.8766703444893956, + "grad_norm": 1.5768088740746582, + "learning_rate": 7.871975247858366e-07, + "loss": 0.6202, + "step": 28604 + }, + { + "epoch": 0.8767009930121368, + "grad_norm": 1.4881312458919347, + "learning_rate": 7.868115348036176e-07, + "loss": 0.6357, + "step": 28605 + }, + { + "epoch": 0.876731641534878, + "grad_norm": 1.5933170600623916, + "learning_rate": 7.864256356013011e-07, + "loss": 0.5263, + "step": 28606 + }, + { + "epoch": 0.8767622900576192, + "grad_norm": 1.7062974372223327, + "learning_rate": 7.860398271826875e-07, + "loss": 0.6292, + "step": 28607 + }, + { + "epoch": 0.8767929385803604, + "grad_norm": 1.6042834500032932, + "learning_rate": 7.856541095515846e-07, + "loss": 0.5522, + "step": 28608 + }, + { + "epoch": 0.8768235871031016, + "grad_norm": 1.6681600195186908, + "learning_rate": 7.852684827117896e-07, + "loss": 0.607, + "step": 28609 + }, + { + "epoch": 0.8768542356258429, + "grad_norm": 0.7048724913174492, + "learning_rate": 7.848829466670993e-07, + "loss": 0.5345, + "step": 28610 + }, + { + "epoch": 0.876884884148584, + "grad_norm": 0.6850923857309995, + "learning_rate": 7.844975014213153e-07, + "loss": 0.5327, + "step": 28611 + }, + { + "epoch": 0.8769155326713253, + "grad_norm": 1.681844480754018, + "learning_rate": 7.841121469782376e-07, + "loss": 0.64, + "step": 28612 + }, + { + "epoch": 0.8769461811940664, + "grad_norm": 1.6103532737361395, + "learning_rate": 7.837268833416589e-07, + "loss": 0.6212, + "step": 28613 + }, + { + "epoch": 0.8769768297168077, + "grad_norm": 1.7178317941438934, + "learning_rate": 7.833417105153773e-07, + "loss": 0.6152, + "step": 28614 + }, + { + "epoch": 0.8770074782395488, + "grad_norm": 1.5755683063751627, + "learning_rate": 7.829566285031875e-07, + "loss": 0.6314, + "step": 28615 + }, + { + "epoch": 0.8770381267622901, + "grad_norm": 1.6668596201384647, + "learning_rate": 7.825716373088865e-07, + "loss": 0.6735, + "step": 28616 + }, + { + "epoch": 0.8770687752850312, + "grad_norm": 1.5440907115821758, + "learning_rate": 7.821867369362657e-07, + "loss": 0.621, + "step": 28617 + }, + { + "epoch": 0.8770994238077725, + "grad_norm": 1.628538035662553, + "learning_rate": 7.818019273891153e-07, + "loss": 0.7028, + "step": 28618 + }, + { + "epoch": 0.8771300723305137, + "grad_norm": 0.6414620237694564, + "learning_rate": 7.81417208671229e-07, + "loss": 0.4935, + "step": 28619 + }, + { + "epoch": 0.8771607208532549, + "grad_norm": 1.608282692899645, + "learning_rate": 7.810325807864006e-07, + "loss": 0.5422, + "step": 28620 + }, + { + "epoch": 0.8771913693759961, + "grad_norm": 1.6746267889731299, + "learning_rate": 7.806480437384135e-07, + "loss": 0.5714, + "step": 28621 + }, + { + "epoch": 0.8772220178987373, + "grad_norm": 1.96864063347339, + "learning_rate": 7.802635975310613e-07, + "loss": 0.6534, + "step": 28622 + }, + { + "epoch": 0.8772526664214785, + "grad_norm": 1.5892897780637418, + "learning_rate": 7.7987924216813e-07, + "loss": 0.568, + "step": 28623 + }, + { + "epoch": 0.8772833149442197, + "grad_norm": 0.6689478242944449, + "learning_rate": 7.7949497765341e-07, + "loss": 0.5152, + "step": 28624 + }, + { + "epoch": 0.8773139634669609, + "grad_norm": 1.6176449347923818, + "learning_rate": 7.791108039906848e-07, + "loss": 0.608, + "step": 28625 + }, + { + "epoch": 0.8773446119897022, + "grad_norm": 1.7707886571484532, + "learning_rate": 7.787267211837368e-07, + "loss": 0.6905, + "step": 28626 + }, + { + "epoch": 0.8773752605124433, + "grad_norm": 1.71099955241231, + "learning_rate": 7.783427292363577e-07, + "loss": 0.6679, + "step": 28627 + }, + { + "epoch": 0.8774059090351845, + "grad_norm": 0.7270101217916791, + "learning_rate": 7.779588281523264e-07, + "loss": 0.5228, + "step": 28628 + }, + { + "epoch": 0.8774365575579257, + "grad_norm": 1.7927369932311206, + "learning_rate": 7.775750179354246e-07, + "loss": 0.7739, + "step": 28629 + }, + { + "epoch": 0.8774672060806669, + "grad_norm": 1.593018487271881, + "learning_rate": 7.771912985894359e-07, + "loss": 0.562, + "step": 28630 + }, + { + "epoch": 0.8774978546034081, + "grad_norm": 1.8426289622704435, + "learning_rate": 7.768076701181437e-07, + "loss": 0.7452, + "step": 28631 + }, + { + "epoch": 0.8775285031261493, + "grad_norm": 1.6174350117947158, + "learning_rate": 7.76424132525323e-07, + "loss": 0.6444, + "step": 28632 + }, + { + "epoch": 0.8775591516488905, + "grad_norm": 1.7300075615750354, + "learning_rate": 7.760406858147551e-07, + "loss": 0.6871, + "step": 28633 + }, + { + "epoch": 0.8775898001716317, + "grad_norm": 1.5075232247381827, + "learning_rate": 7.756573299902181e-07, + "loss": 0.6112, + "step": 28634 + }, + { + "epoch": 0.877620448694373, + "grad_norm": 1.9727280503704212, + "learning_rate": 7.752740650554924e-07, + "loss": 0.6044, + "step": 28635 + }, + { + "epoch": 0.8776510972171141, + "grad_norm": 1.7538889988267916, + "learning_rate": 7.748908910143504e-07, + "loss": 0.5799, + "step": 28636 + }, + { + "epoch": 0.8776817457398554, + "grad_norm": 1.7957648050985433, + "learning_rate": 7.745078078705659e-07, + "loss": 0.6171, + "step": 28637 + }, + { + "epoch": 0.8777123942625965, + "grad_norm": 1.739628323259614, + "learning_rate": 7.741248156279202e-07, + "loss": 0.5721, + "step": 28638 + }, + { + "epoch": 0.8777430427853378, + "grad_norm": 0.709239857458423, + "learning_rate": 7.737419142901825e-07, + "loss": 0.5195, + "step": 28639 + }, + { + "epoch": 0.8777736913080789, + "grad_norm": 1.5785711426149198, + "learning_rate": 7.733591038611244e-07, + "loss": 0.6608, + "step": 28640 + }, + { + "epoch": 0.8778043398308202, + "grad_norm": 1.8568309819782705, + "learning_rate": 7.729763843445204e-07, + "loss": 0.7068, + "step": 28641 + }, + { + "epoch": 0.8778349883535613, + "grad_norm": 1.583234426512178, + "learning_rate": 7.72593755744141e-07, + "loss": 0.6335, + "step": 28642 + }, + { + "epoch": 0.8778656368763026, + "grad_norm": 1.7938120047098032, + "learning_rate": 7.722112180637576e-07, + "loss": 0.6557, + "step": 28643 + }, + { + "epoch": 0.8778962853990437, + "grad_norm": 0.6703719237832694, + "learning_rate": 7.71828771307137e-07, + "loss": 0.5192, + "step": 28644 + }, + { + "epoch": 0.877926933921785, + "grad_norm": 1.7260763147847553, + "learning_rate": 7.714464154780487e-07, + "loss": 0.6458, + "step": 28645 + }, + { + "epoch": 0.8779575824445262, + "grad_norm": 1.7280764030742646, + "learning_rate": 7.710641505802608e-07, + "loss": 0.5468, + "step": 28646 + }, + { + "epoch": 0.8779882309672674, + "grad_norm": 1.5750148395314112, + "learning_rate": 7.7068197661754e-07, + "loss": 0.5625, + "step": 28647 + }, + { + "epoch": 0.8780188794900086, + "grad_norm": 1.6998123023283516, + "learning_rate": 7.702998935936479e-07, + "loss": 0.6228, + "step": 28648 + }, + { + "epoch": 0.8780495280127498, + "grad_norm": 1.8543061317063128, + "learning_rate": 7.699179015123548e-07, + "loss": 0.6053, + "step": 28649 + }, + { + "epoch": 0.878080176535491, + "grad_norm": 1.6149909053405376, + "learning_rate": 7.695360003774211e-07, + "loss": 0.6364, + "step": 28650 + }, + { + "epoch": 0.8781108250582322, + "grad_norm": 1.5508793505005691, + "learning_rate": 7.691541901926125e-07, + "loss": 0.5677, + "step": 28651 + }, + { + "epoch": 0.8781414735809734, + "grad_norm": 1.7980736216888393, + "learning_rate": 7.687724709616884e-07, + "loss": 0.7079, + "step": 28652 + }, + { + "epoch": 0.8781721221037146, + "grad_norm": 1.6263008099006908, + "learning_rate": 7.683908426884101e-07, + "loss": 0.6547, + "step": 28653 + }, + { + "epoch": 0.8782027706264558, + "grad_norm": 1.770490856359275, + "learning_rate": 7.680093053765414e-07, + "loss": 0.6234, + "step": 28654 + }, + { + "epoch": 0.8782334191491971, + "grad_norm": 1.6905059520657226, + "learning_rate": 7.67627859029837e-07, + "loss": 0.6082, + "step": 28655 + }, + { + "epoch": 0.8782640676719382, + "grad_norm": 1.7080266417644627, + "learning_rate": 7.672465036520571e-07, + "loss": 0.5811, + "step": 28656 + }, + { + "epoch": 0.8782947161946795, + "grad_norm": 0.6877894456499849, + "learning_rate": 7.668652392469622e-07, + "loss": 0.4998, + "step": 28657 + }, + { + "epoch": 0.8783253647174206, + "grad_norm": 0.6879738815606725, + "learning_rate": 7.664840658183059e-07, + "loss": 0.5439, + "step": 28658 + }, + { + "epoch": 0.8783560132401618, + "grad_norm": 1.8069122845250156, + "learning_rate": 7.661029833698419e-07, + "loss": 0.6707, + "step": 28659 + }, + { + "epoch": 0.878386661762903, + "grad_norm": 2.1403287674599416, + "learning_rate": 7.657219919053305e-07, + "loss": 0.6706, + "step": 28660 + }, + { + "epoch": 0.8784173102856442, + "grad_norm": 1.717491896376236, + "learning_rate": 7.65341091428522e-07, + "loss": 0.6438, + "step": 28661 + }, + { + "epoch": 0.8784479588083854, + "grad_norm": 1.7982040686763592, + "learning_rate": 7.649602819431712e-07, + "loss": 0.5885, + "step": 28662 + }, + { + "epoch": 0.8784786073311266, + "grad_norm": 0.6736252948845456, + "learning_rate": 7.645795634530284e-07, + "loss": 0.4967, + "step": 28663 + }, + { + "epoch": 0.8785092558538679, + "grad_norm": 1.6467887758804487, + "learning_rate": 7.641989359618462e-07, + "loss": 0.5987, + "step": 28664 + }, + { + "epoch": 0.878539904376609, + "grad_norm": 1.8699069516521407, + "learning_rate": 7.638183994733772e-07, + "loss": 0.6245, + "step": 28665 + }, + { + "epoch": 0.8785705528993503, + "grad_norm": 1.6211178050684223, + "learning_rate": 7.634379539913661e-07, + "loss": 0.5673, + "step": 28666 + }, + { + "epoch": 0.8786012014220914, + "grad_norm": 1.926323062585757, + "learning_rate": 7.630575995195644e-07, + "loss": 0.6674, + "step": 28667 + }, + { + "epoch": 0.8786318499448327, + "grad_norm": 1.6971183006779578, + "learning_rate": 7.626773360617212e-07, + "loss": 0.6236, + "step": 28668 + }, + { + "epoch": 0.8786624984675738, + "grad_norm": 0.7518302057355012, + "learning_rate": 7.622971636215804e-07, + "loss": 0.5072, + "step": 28669 + }, + { + "epoch": 0.8786931469903151, + "grad_norm": 0.6820441766822101, + "learning_rate": 7.61917082202891e-07, + "loss": 0.5187, + "step": 28670 + }, + { + "epoch": 0.8787237955130562, + "grad_norm": 1.9312465621809034, + "learning_rate": 7.615370918093934e-07, + "loss": 0.6997, + "step": 28671 + }, + { + "epoch": 0.8787544440357975, + "grad_norm": 1.7770385114651557, + "learning_rate": 7.611571924448358e-07, + "loss": 0.6772, + "step": 28672 + }, + { + "epoch": 0.8787850925585386, + "grad_norm": 1.5507769217401925, + "learning_rate": 7.607773841129618e-07, + "loss": 0.5812, + "step": 28673 + }, + { + "epoch": 0.8788157410812799, + "grad_norm": 1.7506215713765156, + "learning_rate": 7.603976668175095e-07, + "loss": 0.6296, + "step": 28674 + }, + { + "epoch": 0.8788463896040211, + "grad_norm": 1.8614181908927148, + "learning_rate": 7.600180405622238e-07, + "loss": 0.6048, + "step": 28675 + }, + { + "epoch": 0.8788770381267623, + "grad_norm": 1.8672824518576792, + "learning_rate": 7.59638505350847e-07, + "loss": 0.6341, + "step": 28676 + }, + { + "epoch": 0.8789076866495035, + "grad_norm": 0.6723812853184451, + "learning_rate": 7.592590611871131e-07, + "loss": 0.5208, + "step": 28677 + }, + { + "epoch": 0.8789383351722447, + "grad_norm": 0.694725322051993, + "learning_rate": 7.588797080747646e-07, + "loss": 0.5323, + "step": 28678 + }, + { + "epoch": 0.8789689836949859, + "grad_norm": 1.7390359326686646, + "learning_rate": 7.585004460175405e-07, + "loss": 0.5977, + "step": 28679 + }, + { + "epoch": 0.8789996322177271, + "grad_norm": 0.6580846912401298, + "learning_rate": 7.581212750191747e-07, + "loss": 0.5153, + "step": 28680 + }, + { + "epoch": 0.8790302807404683, + "grad_norm": 2.0025178829940278, + "learning_rate": 7.577421950834063e-07, + "loss": 0.6955, + "step": 28681 + }, + { + "epoch": 0.8790609292632096, + "grad_norm": 2.088607100391564, + "learning_rate": 7.573632062139658e-07, + "loss": 0.6595, + "step": 28682 + }, + { + "epoch": 0.8790915777859507, + "grad_norm": 0.6531844058314004, + "learning_rate": 7.569843084145923e-07, + "loss": 0.5182, + "step": 28683 + }, + { + "epoch": 0.879122226308692, + "grad_norm": 1.6784245522093764, + "learning_rate": 7.566055016890173e-07, + "loss": 0.63, + "step": 28684 + }, + { + "epoch": 0.8791528748314331, + "grad_norm": 2.1656874420926546, + "learning_rate": 7.562267860409733e-07, + "loss": 0.6793, + "step": 28685 + }, + { + "epoch": 0.8791835233541744, + "grad_norm": 1.6561686118960168, + "learning_rate": 7.558481614741908e-07, + "loss": 0.5096, + "step": 28686 + }, + { + "epoch": 0.8792141718769155, + "grad_norm": 1.718715056388469, + "learning_rate": 7.554696279924034e-07, + "loss": 0.6607, + "step": 28687 + }, + { + "epoch": 0.8792448203996568, + "grad_norm": 1.7037704440019514, + "learning_rate": 7.55091185599337e-07, + "loss": 0.6068, + "step": 28688 + }, + { + "epoch": 0.8792754689223979, + "grad_norm": 1.7159781667648522, + "learning_rate": 7.547128342987231e-07, + "loss": 0.6443, + "step": 28689 + }, + { + "epoch": 0.8793061174451391, + "grad_norm": 0.6942840199273207, + "learning_rate": 7.543345740942909e-07, + "loss": 0.518, + "step": 28690 + }, + { + "epoch": 0.8793367659678804, + "grad_norm": 1.7402726632091354, + "learning_rate": 7.539564049897641e-07, + "loss": 0.6733, + "step": 28691 + }, + { + "epoch": 0.8793674144906215, + "grad_norm": 1.6668230848336871, + "learning_rate": 7.535783269888719e-07, + "loss": 0.635, + "step": 28692 + }, + { + "epoch": 0.8793980630133628, + "grad_norm": 1.5470761824826018, + "learning_rate": 7.53200340095337e-07, + "loss": 0.5335, + "step": 28693 + }, + { + "epoch": 0.8794287115361039, + "grad_norm": 0.6861046008327067, + "learning_rate": 7.528224443128851e-07, + "loss": 0.5464, + "step": 28694 + }, + { + "epoch": 0.8794593600588452, + "grad_norm": 1.6512405682941695, + "learning_rate": 7.524446396452411e-07, + "loss": 0.6052, + "step": 28695 + }, + { + "epoch": 0.8794900085815863, + "grad_norm": 1.8506841558276477, + "learning_rate": 7.520669260961244e-07, + "loss": 0.6414, + "step": 28696 + }, + { + "epoch": 0.8795206571043276, + "grad_norm": 1.5676885998107861, + "learning_rate": 7.516893036692585e-07, + "loss": 0.7206, + "step": 28697 + }, + { + "epoch": 0.8795513056270687, + "grad_norm": 1.5825230035662385, + "learning_rate": 7.513117723683661e-07, + "loss": 0.616, + "step": 28698 + }, + { + "epoch": 0.87958195414981, + "grad_norm": 1.7023071492630724, + "learning_rate": 7.509343321971629e-07, + "loss": 0.5378, + "step": 28699 + }, + { + "epoch": 0.8796126026725511, + "grad_norm": 1.8541972395422208, + "learning_rate": 7.505569831593706e-07, + "loss": 0.6564, + "step": 28700 + }, + { + "epoch": 0.8796432511952924, + "grad_norm": 1.756052168595814, + "learning_rate": 7.501797252587084e-07, + "loss": 0.6043, + "step": 28701 + }, + { + "epoch": 0.8796738997180336, + "grad_norm": 1.6835369675505043, + "learning_rate": 7.49802558498891e-07, + "loss": 0.6668, + "step": 28702 + }, + { + "epoch": 0.8797045482407748, + "grad_norm": 1.752638267060999, + "learning_rate": 7.494254828836367e-07, + "loss": 0.6001, + "step": 28703 + }, + { + "epoch": 0.879735196763516, + "grad_norm": 0.6806168372671054, + "learning_rate": 7.490484984166568e-07, + "loss": 0.5231, + "step": 28704 + }, + { + "epoch": 0.8797658452862572, + "grad_norm": 1.5761170708103223, + "learning_rate": 7.486716051016718e-07, + "loss": 0.5463, + "step": 28705 + }, + { + "epoch": 0.8797964938089984, + "grad_norm": 1.6738382210979414, + "learning_rate": 7.482948029423931e-07, + "loss": 0.5715, + "step": 28706 + }, + { + "epoch": 0.8798271423317396, + "grad_norm": 1.7747554069282192, + "learning_rate": 7.479180919425322e-07, + "loss": 0.771, + "step": 28707 + }, + { + "epoch": 0.8798577908544808, + "grad_norm": 1.7504415958488633, + "learning_rate": 7.475414721058005e-07, + "loss": 0.6281, + "step": 28708 + }, + { + "epoch": 0.879888439377222, + "grad_norm": 0.6495984920485021, + "learning_rate": 7.471649434359119e-07, + "loss": 0.4891, + "step": 28709 + }, + { + "epoch": 0.8799190878999632, + "grad_norm": 1.632861245557445, + "learning_rate": 7.467885059365721e-07, + "loss": 0.5696, + "step": 28710 + }, + { + "epoch": 0.8799497364227045, + "grad_norm": 0.6855828052747719, + "learning_rate": 7.464121596114938e-07, + "loss": 0.5274, + "step": 28711 + }, + { + "epoch": 0.8799803849454456, + "grad_norm": 1.7907108257501694, + "learning_rate": 7.46035904464385e-07, + "loss": 0.7131, + "step": 28712 + }, + { + "epoch": 0.8800110334681869, + "grad_norm": 1.7299860174122446, + "learning_rate": 7.456597404989508e-07, + "loss": 0.6727, + "step": 28713 + }, + { + "epoch": 0.880041681990928, + "grad_norm": 1.71730389893396, + "learning_rate": 7.452836677189012e-07, + "loss": 0.6917, + "step": 28714 + }, + { + "epoch": 0.8800723305136693, + "grad_norm": 1.8610061850563915, + "learning_rate": 7.44907686127937e-07, + "loss": 0.683, + "step": 28715 + }, + { + "epoch": 0.8801029790364104, + "grad_norm": 1.5592577131287801, + "learning_rate": 7.44531795729766e-07, + "loss": 0.533, + "step": 28716 + }, + { + "epoch": 0.8801336275591517, + "grad_norm": 2.002900676929724, + "learning_rate": 7.441559965280921e-07, + "loss": 0.7543, + "step": 28717 + }, + { + "epoch": 0.8801642760818928, + "grad_norm": 1.8169137949249745, + "learning_rate": 7.437802885266165e-07, + "loss": 0.5719, + "step": 28718 + }, + { + "epoch": 0.8801949246046341, + "grad_norm": 1.510960118131776, + "learning_rate": 7.434046717290422e-07, + "loss": 0.6201, + "step": 28719 + }, + { + "epoch": 0.8802255731273753, + "grad_norm": 1.7796916417828217, + "learning_rate": 7.430291461390716e-07, + "loss": 0.7333, + "step": 28720 + }, + { + "epoch": 0.8802562216501164, + "grad_norm": 1.8991175024516804, + "learning_rate": 7.426537117604016e-07, + "loss": 0.6663, + "step": 28721 + }, + { + "epoch": 0.8802868701728577, + "grad_norm": 1.7193090539308808, + "learning_rate": 7.42278368596735e-07, + "loss": 0.6551, + "step": 28722 + }, + { + "epoch": 0.8803175186955988, + "grad_norm": 1.8936050501963417, + "learning_rate": 7.419031166517642e-07, + "loss": 0.6764, + "step": 28723 + }, + { + "epoch": 0.8803481672183401, + "grad_norm": 1.650915380536523, + "learning_rate": 7.415279559291944e-07, + "loss": 0.6325, + "step": 28724 + }, + { + "epoch": 0.8803788157410812, + "grad_norm": 1.8149048868304243, + "learning_rate": 7.411528864327188e-07, + "loss": 0.7424, + "step": 28725 + }, + { + "epoch": 0.8804094642638225, + "grad_norm": 1.7237330104979312, + "learning_rate": 7.407779081660316e-07, + "loss": 0.7423, + "step": 28726 + }, + { + "epoch": 0.8804401127865636, + "grad_norm": 1.7030410984221735, + "learning_rate": 7.404030211328284e-07, + "loss": 0.6363, + "step": 28727 + }, + { + "epoch": 0.8804707613093049, + "grad_norm": 0.6737419374346812, + "learning_rate": 7.40028225336804e-07, + "loss": 0.5288, + "step": 28728 + }, + { + "epoch": 0.8805014098320461, + "grad_norm": 1.7472572735675809, + "learning_rate": 7.396535207816502e-07, + "loss": 0.647, + "step": 28729 + }, + { + "epoch": 0.8805320583547873, + "grad_norm": 1.5870440339168868, + "learning_rate": 7.392789074710594e-07, + "loss": 0.6819, + "step": 28730 + }, + { + "epoch": 0.8805627068775285, + "grad_norm": 1.7159428257158293, + "learning_rate": 7.38904385408723e-07, + "loss": 0.5962, + "step": 28731 + }, + { + "epoch": 0.8805933554002697, + "grad_norm": 0.7222291627980197, + "learning_rate": 7.385299545983327e-07, + "loss": 0.5292, + "step": 28732 + }, + { + "epoch": 0.8806240039230109, + "grad_norm": 1.624091074093299, + "learning_rate": 7.381556150435775e-07, + "loss": 0.6511, + "step": 28733 + }, + { + "epoch": 0.8806546524457521, + "grad_norm": 1.7366238172247563, + "learning_rate": 7.377813667481404e-07, + "loss": 0.6559, + "step": 28734 + }, + { + "epoch": 0.8806853009684933, + "grad_norm": 1.7223210761933403, + "learning_rate": 7.37407209715717e-07, + "loss": 0.6028, + "step": 28735 + }, + { + "epoch": 0.8807159494912346, + "grad_norm": 1.6250081563261336, + "learning_rate": 7.370331439499901e-07, + "loss": 0.6083, + "step": 28736 + }, + { + "epoch": 0.8807465980139757, + "grad_norm": 1.6828144528903575, + "learning_rate": 7.366591694546432e-07, + "loss": 0.6342, + "step": 28737 + }, + { + "epoch": 0.880777246536717, + "grad_norm": 1.7455872837135453, + "learning_rate": 7.362852862333647e-07, + "loss": 0.6602, + "step": 28738 + }, + { + "epoch": 0.8808078950594581, + "grad_norm": 1.6845976733705583, + "learning_rate": 7.359114942898393e-07, + "loss": 0.6485, + "step": 28739 + }, + { + "epoch": 0.8808385435821994, + "grad_norm": 1.7278844584878785, + "learning_rate": 7.355377936277464e-07, + "loss": 0.6222, + "step": 28740 + }, + { + "epoch": 0.8808691921049405, + "grad_norm": 1.6466406806797096, + "learning_rate": 7.351641842507696e-07, + "loss": 0.6788, + "step": 28741 + }, + { + "epoch": 0.8808998406276818, + "grad_norm": 1.7257124879601924, + "learning_rate": 7.347906661625904e-07, + "loss": 0.6307, + "step": 28742 + }, + { + "epoch": 0.8809304891504229, + "grad_norm": 1.7182522651433063, + "learning_rate": 7.344172393668913e-07, + "loss": 0.6155, + "step": 28743 + }, + { + "epoch": 0.8809611376731642, + "grad_norm": 1.7592248759549811, + "learning_rate": 7.340439038673508e-07, + "loss": 0.6892, + "step": 28744 + }, + { + "epoch": 0.8809917861959053, + "grad_norm": 1.5291132557981317, + "learning_rate": 7.336706596676424e-07, + "loss": 0.5067, + "step": 28745 + }, + { + "epoch": 0.8810224347186466, + "grad_norm": 1.5607787371138178, + "learning_rate": 7.332975067714509e-07, + "loss": 0.6468, + "step": 28746 + }, + { + "epoch": 0.8810530832413878, + "grad_norm": 0.6564759360690735, + "learning_rate": 7.329244451824502e-07, + "loss": 0.4912, + "step": 28747 + }, + { + "epoch": 0.881083731764129, + "grad_norm": 1.5121984794258674, + "learning_rate": 7.32551474904315e-07, + "loss": 0.4962, + "step": 28748 + }, + { + "epoch": 0.8811143802868702, + "grad_norm": 1.7842151940391393, + "learning_rate": 7.321785959407202e-07, + "loss": 0.6891, + "step": 28749 + }, + { + "epoch": 0.8811450288096114, + "grad_norm": 2.1061643721860728, + "learning_rate": 7.318058082953417e-07, + "loss": 0.6887, + "step": 28750 + }, + { + "epoch": 0.8811756773323526, + "grad_norm": 1.6706203812191478, + "learning_rate": 7.314331119718543e-07, + "loss": 0.5992, + "step": 28751 + }, + { + "epoch": 0.8812063258550937, + "grad_norm": 1.6281745655612807, + "learning_rate": 7.310605069739251e-07, + "loss": 0.6083, + "step": 28752 + }, + { + "epoch": 0.881236974377835, + "grad_norm": 1.7096054020770204, + "learning_rate": 7.306879933052291e-07, + "loss": 0.5977, + "step": 28753 + }, + { + "epoch": 0.8812676229005761, + "grad_norm": 1.9333136616498157, + "learning_rate": 7.303155709694365e-07, + "loss": 0.7598, + "step": 28754 + }, + { + "epoch": 0.8812982714233174, + "grad_norm": 1.555868509641396, + "learning_rate": 7.299432399702167e-07, + "loss": 0.6267, + "step": 28755 + }, + { + "epoch": 0.8813289199460586, + "grad_norm": 0.6691917280499133, + "learning_rate": 7.295710003112355e-07, + "loss": 0.508, + "step": 28756 + }, + { + "epoch": 0.8813595684687998, + "grad_norm": 1.7349188066054129, + "learning_rate": 7.291988519961657e-07, + "loss": 0.6098, + "step": 28757 + }, + { + "epoch": 0.881390216991541, + "grad_norm": 1.8335311628079793, + "learning_rate": 7.288267950286709e-07, + "loss": 0.7331, + "step": 28758 + }, + { + "epoch": 0.8814208655142822, + "grad_norm": 0.6919751762373164, + "learning_rate": 7.284548294124183e-07, + "loss": 0.5208, + "step": 28759 + }, + { + "epoch": 0.8814515140370234, + "grad_norm": 1.9610040495895709, + "learning_rate": 7.280829551510716e-07, + "loss": 0.698, + "step": 28760 + }, + { + "epoch": 0.8814821625597646, + "grad_norm": 1.733198467500906, + "learning_rate": 7.277111722482954e-07, + "loss": 0.6534, + "step": 28761 + }, + { + "epoch": 0.8815128110825058, + "grad_norm": 1.861412150832413, + "learning_rate": 7.27339480707755e-07, + "loss": 0.7341, + "step": 28762 + }, + { + "epoch": 0.881543459605247, + "grad_norm": 1.8738826117462513, + "learning_rate": 7.269678805331104e-07, + "loss": 0.7354, + "step": 28763 + }, + { + "epoch": 0.8815741081279882, + "grad_norm": 1.6978611557416425, + "learning_rate": 7.265963717280234e-07, + "loss": 0.6942, + "step": 28764 + }, + { + "epoch": 0.8816047566507295, + "grad_norm": 1.6389645111694005, + "learning_rate": 7.262249542961563e-07, + "loss": 0.5649, + "step": 28765 + }, + { + "epoch": 0.8816354051734706, + "grad_norm": 1.673731932064329, + "learning_rate": 7.258536282411677e-07, + "loss": 0.6097, + "step": 28766 + }, + { + "epoch": 0.8816660536962119, + "grad_norm": 1.622583631538294, + "learning_rate": 7.254823935667155e-07, + "loss": 0.5602, + "step": 28767 + }, + { + "epoch": 0.881696702218953, + "grad_norm": 1.5407325038506106, + "learning_rate": 7.251112502764568e-07, + "loss": 0.6412, + "step": 28768 + }, + { + "epoch": 0.8817273507416943, + "grad_norm": 1.598865976544347, + "learning_rate": 7.24740198374051e-07, + "loss": 0.6364, + "step": 28769 + }, + { + "epoch": 0.8817579992644354, + "grad_norm": 1.5836792329520424, + "learning_rate": 7.243692378631551e-07, + "loss": 0.5938, + "step": 28770 + }, + { + "epoch": 0.8817886477871767, + "grad_norm": 1.8750886427485172, + "learning_rate": 7.239983687474194e-07, + "loss": 0.6867, + "step": 28771 + }, + { + "epoch": 0.8818192963099178, + "grad_norm": 1.703897206614191, + "learning_rate": 7.236275910305024e-07, + "loss": 0.6159, + "step": 28772 + }, + { + "epoch": 0.8818499448326591, + "grad_norm": 1.9710872111702364, + "learning_rate": 7.232569047160576e-07, + "loss": 0.5611, + "step": 28773 + }, + { + "epoch": 0.8818805933554003, + "grad_norm": 1.8102875145400001, + "learning_rate": 7.228863098077355e-07, + "loss": 0.6384, + "step": 28774 + }, + { + "epoch": 0.8819112418781415, + "grad_norm": 1.8355212537125285, + "learning_rate": 7.225158063091853e-07, + "loss": 0.575, + "step": 28775 + }, + { + "epoch": 0.8819418904008827, + "grad_norm": 2.0161583427823935, + "learning_rate": 7.221453942240642e-07, + "loss": 0.6102, + "step": 28776 + }, + { + "epoch": 0.8819725389236239, + "grad_norm": 1.6543947330028712, + "learning_rate": 7.217750735560158e-07, + "loss": 0.6557, + "step": 28777 + }, + { + "epoch": 0.8820031874463651, + "grad_norm": 0.6823708117364444, + "learning_rate": 7.21404844308694e-07, + "loss": 0.5139, + "step": 28778 + }, + { + "epoch": 0.8820338359691063, + "grad_norm": 1.7462609960293802, + "learning_rate": 7.210347064857425e-07, + "loss": 0.6006, + "step": 28779 + }, + { + "epoch": 0.8820644844918475, + "grad_norm": 0.6473129562798783, + "learning_rate": 7.206646600908107e-07, + "loss": 0.4799, + "step": 28780 + }, + { + "epoch": 0.8820951330145888, + "grad_norm": 1.5766765175184323, + "learning_rate": 7.202947051275456e-07, + "loss": 0.5129, + "step": 28781 + }, + { + "epoch": 0.8821257815373299, + "grad_norm": 1.7565681250853695, + "learning_rate": 7.199248415995886e-07, + "loss": 0.6658, + "step": 28782 + }, + { + "epoch": 0.882156430060071, + "grad_norm": 1.528348597907515, + "learning_rate": 7.195550695105868e-07, + "loss": 0.549, + "step": 28783 + }, + { + "epoch": 0.8821870785828123, + "grad_norm": 1.7494811517837514, + "learning_rate": 7.191853888641853e-07, + "loss": 0.7551, + "step": 28784 + }, + { + "epoch": 0.8822177271055535, + "grad_norm": 1.783913581940916, + "learning_rate": 7.188157996640255e-07, + "loss": 0.6196, + "step": 28785 + }, + { + "epoch": 0.8822483756282947, + "grad_norm": 1.8037145278776026, + "learning_rate": 7.184463019137444e-07, + "loss": 0.6866, + "step": 28786 + }, + { + "epoch": 0.8822790241510359, + "grad_norm": 1.6798071138324697, + "learning_rate": 7.180768956169893e-07, + "loss": 0.6149, + "step": 28787 + }, + { + "epoch": 0.8823096726737771, + "grad_norm": 1.7761555211834963, + "learning_rate": 7.17707580777397e-07, + "loss": 0.6203, + "step": 28788 + }, + { + "epoch": 0.8823403211965183, + "grad_norm": 1.7073749379664789, + "learning_rate": 7.173383573986081e-07, + "loss": 0.6649, + "step": 28789 + }, + { + "epoch": 0.8823709697192595, + "grad_norm": 1.682007013769545, + "learning_rate": 7.169692254842576e-07, + "loss": 0.6321, + "step": 28790 + }, + { + "epoch": 0.8824016182420007, + "grad_norm": 1.6744009857078637, + "learning_rate": 7.166001850379844e-07, + "loss": 0.6489, + "step": 28791 + }, + { + "epoch": 0.882432266764742, + "grad_norm": 1.5109430787785947, + "learning_rate": 7.162312360634261e-07, + "loss": 0.5778, + "step": 28792 + }, + { + "epoch": 0.8824629152874831, + "grad_norm": 1.6279460708246913, + "learning_rate": 7.158623785642161e-07, + "loss": 0.6834, + "step": 28793 + }, + { + "epoch": 0.8824935638102244, + "grad_norm": 1.5811028858905671, + "learning_rate": 7.154936125439882e-07, + "loss": 0.5647, + "step": 28794 + }, + { + "epoch": 0.8825242123329655, + "grad_norm": 1.698000083416949, + "learning_rate": 7.151249380063807e-07, + "loss": 0.6469, + "step": 28795 + }, + { + "epoch": 0.8825548608557068, + "grad_norm": 1.7146819746188287, + "learning_rate": 7.147563549550196e-07, + "loss": 0.6149, + "step": 28796 + }, + { + "epoch": 0.8825855093784479, + "grad_norm": 1.6349697438303856, + "learning_rate": 7.143878633935408e-07, + "loss": 0.5544, + "step": 28797 + }, + { + "epoch": 0.8826161579011892, + "grad_norm": 1.717679857650366, + "learning_rate": 7.140194633255759e-07, + "loss": 0.5133, + "step": 28798 + }, + { + "epoch": 0.8826468064239303, + "grad_norm": 1.6291088686769921, + "learning_rate": 7.136511547547509e-07, + "loss": 0.6195, + "step": 28799 + }, + { + "epoch": 0.8826774549466716, + "grad_norm": 2.1274234694326877, + "learning_rate": 7.132829376846984e-07, + "loss": 0.6829, + "step": 28800 + }, + { + "epoch": 0.8827081034694128, + "grad_norm": 1.6153235625348503, + "learning_rate": 7.129148121190444e-07, + "loss": 0.6939, + "step": 28801 + }, + { + "epoch": 0.882738751992154, + "grad_norm": 1.9058607530849563, + "learning_rate": 7.12546778061417e-07, + "loss": 0.6866, + "step": 28802 + }, + { + "epoch": 0.8827694005148952, + "grad_norm": 1.6245644217317903, + "learning_rate": 7.121788355154435e-07, + "loss": 0.6283, + "step": 28803 + }, + { + "epoch": 0.8828000490376364, + "grad_norm": 1.6618737981482834, + "learning_rate": 7.118109844847476e-07, + "loss": 0.5918, + "step": 28804 + }, + { + "epoch": 0.8828306975603776, + "grad_norm": 1.6991088024280465, + "learning_rate": 7.114432249729541e-07, + "loss": 0.6281, + "step": 28805 + }, + { + "epoch": 0.8828613460831188, + "grad_norm": 1.7157535583689498, + "learning_rate": 7.110755569836881e-07, + "loss": 0.5983, + "step": 28806 + }, + { + "epoch": 0.88289199460586, + "grad_norm": 0.6921308027333116, + "learning_rate": 7.107079805205707e-07, + "loss": 0.5294, + "step": 28807 + }, + { + "epoch": 0.8829226431286012, + "grad_norm": 1.8184018598518827, + "learning_rate": 7.10340495587224e-07, + "loss": 0.7097, + "step": 28808 + }, + { + "epoch": 0.8829532916513424, + "grad_norm": 1.7605494495981662, + "learning_rate": 7.099731021872702e-07, + "loss": 0.5653, + "step": 28809 + }, + { + "epoch": 0.8829839401740837, + "grad_norm": 1.6659906557825992, + "learning_rate": 7.096058003243278e-07, + "loss": 0.6299, + "step": 28810 + }, + { + "epoch": 0.8830145886968248, + "grad_norm": 1.5241097250110358, + "learning_rate": 7.092385900020171e-07, + "loss": 0.6832, + "step": 28811 + }, + { + "epoch": 0.8830452372195661, + "grad_norm": 1.7213249849541652, + "learning_rate": 7.088714712239553e-07, + "loss": 0.6161, + "step": 28812 + }, + { + "epoch": 0.8830758857423072, + "grad_norm": 1.7418348477713639, + "learning_rate": 7.085044439937594e-07, + "loss": 0.6323, + "step": 28813 + }, + { + "epoch": 0.8831065342650484, + "grad_norm": 1.8599043739308208, + "learning_rate": 7.081375083150477e-07, + "loss": 0.7277, + "step": 28814 + }, + { + "epoch": 0.8831371827877896, + "grad_norm": 0.6699892373316118, + "learning_rate": 7.077706641914339e-07, + "loss": 0.5278, + "step": 28815 + }, + { + "epoch": 0.8831678313105308, + "grad_norm": 1.614670599226331, + "learning_rate": 7.07403911626533e-07, + "loss": 0.7059, + "step": 28816 + }, + { + "epoch": 0.883198479833272, + "grad_norm": 0.659371193888276, + "learning_rate": 7.070372506239598e-07, + "loss": 0.5043, + "step": 28817 + }, + { + "epoch": 0.8832291283560132, + "grad_norm": 1.7520839965161659, + "learning_rate": 7.066706811873259e-07, + "loss": 0.6416, + "step": 28818 + }, + { + "epoch": 0.8832597768787545, + "grad_norm": 1.5670905097443255, + "learning_rate": 7.063042033202439e-07, + "loss": 0.5179, + "step": 28819 + }, + { + "epoch": 0.8832904254014956, + "grad_norm": 1.7910326228271387, + "learning_rate": 7.059378170263231e-07, + "loss": 0.6733, + "step": 28820 + }, + { + "epoch": 0.8833210739242369, + "grad_norm": 1.7021744757164778, + "learning_rate": 7.055715223091763e-07, + "loss": 0.643, + "step": 28821 + }, + { + "epoch": 0.883351722446978, + "grad_norm": 1.6748164057511428, + "learning_rate": 7.052053191724117e-07, + "loss": 0.648, + "step": 28822 + }, + { + "epoch": 0.8833823709697193, + "grad_norm": 1.7207943106007721, + "learning_rate": 7.048392076196364e-07, + "loss": 0.731, + "step": 28823 + }, + { + "epoch": 0.8834130194924604, + "grad_norm": 0.6631179021158911, + "learning_rate": 7.044731876544575e-07, + "loss": 0.4872, + "step": 28824 + }, + { + "epoch": 0.8834436680152017, + "grad_norm": 0.6745960980297402, + "learning_rate": 7.041072592804854e-07, + "loss": 0.5319, + "step": 28825 + }, + { + "epoch": 0.8834743165379428, + "grad_norm": 1.6903741369968572, + "learning_rate": 7.037414225013206e-07, + "loss": 0.6362, + "step": 28826 + }, + { + "epoch": 0.8835049650606841, + "grad_norm": 0.6704261354099073, + "learning_rate": 7.033756773205713e-07, + "loss": 0.5106, + "step": 28827 + }, + { + "epoch": 0.8835356135834253, + "grad_norm": 1.6945831081164364, + "learning_rate": 7.030100237418403e-07, + "loss": 0.6508, + "step": 28828 + }, + { + "epoch": 0.8835662621061665, + "grad_norm": 1.848142164481312, + "learning_rate": 7.0264446176873e-07, + "loss": 0.713, + "step": 28829 + }, + { + "epoch": 0.8835969106289077, + "grad_norm": 1.4846759187906173, + "learning_rate": 7.022789914048434e-07, + "loss": 0.6366, + "step": 28830 + }, + { + "epoch": 0.8836275591516489, + "grad_norm": 1.61794470098669, + "learning_rate": 7.019136126537773e-07, + "loss": 0.5896, + "step": 28831 + }, + { + "epoch": 0.8836582076743901, + "grad_norm": 1.6125623008624845, + "learning_rate": 7.015483255191391e-07, + "loss": 0.6415, + "step": 28832 + }, + { + "epoch": 0.8836888561971313, + "grad_norm": 1.7188796217562097, + "learning_rate": 7.011831300045247e-07, + "loss": 0.7332, + "step": 28833 + }, + { + "epoch": 0.8837195047198725, + "grad_norm": 1.7536956031018895, + "learning_rate": 7.0081802611353e-07, + "loss": 0.659, + "step": 28834 + }, + { + "epoch": 0.8837501532426137, + "grad_norm": 1.9388889295286633, + "learning_rate": 7.004530138497545e-07, + "loss": 0.7343, + "step": 28835 + }, + { + "epoch": 0.8837808017653549, + "grad_norm": 1.7420659183707832, + "learning_rate": 7.000880932167964e-07, + "loss": 0.6926, + "step": 28836 + }, + { + "epoch": 0.8838114502880962, + "grad_norm": 1.7089731369585481, + "learning_rate": 6.997232642182484e-07, + "loss": 0.6122, + "step": 28837 + }, + { + "epoch": 0.8838420988108373, + "grad_norm": 0.6661652018829328, + "learning_rate": 6.993585268577063e-07, + "loss": 0.5112, + "step": 28838 + }, + { + "epoch": 0.8838727473335786, + "grad_norm": 1.7378635352374041, + "learning_rate": 6.989938811387665e-07, + "loss": 0.6199, + "step": 28839 + }, + { + "epoch": 0.8839033958563197, + "grad_norm": 1.761732777676736, + "learning_rate": 6.98629327065018e-07, + "loss": 0.6299, + "step": 28840 + }, + { + "epoch": 0.883934044379061, + "grad_norm": 0.6555840839099096, + "learning_rate": 6.982648646400569e-07, + "loss": 0.5112, + "step": 28841 + }, + { + "epoch": 0.8839646929018021, + "grad_norm": 1.7687735584421507, + "learning_rate": 6.979004938674672e-07, + "loss": 0.5757, + "step": 28842 + }, + { + "epoch": 0.8839953414245434, + "grad_norm": 1.6942747836674685, + "learning_rate": 6.97536214750848e-07, + "loss": 0.7442, + "step": 28843 + }, + { + "epoch": 0.8840259899472845, + "grad_norm": 1.4694917028143066, + "learning_rate": 6.971720272937854e-07, + "loss": 0.6202, + "step": 28844 + }, + { + "epoch": 0.8840566384700257, + "grad_norm": 1.7642953135827018, + "learning_rate": 6.968079314998643e-07, + "loss": 0.5991, + "step": 28845 + }, + { + "epoch": 0.884087286992767, + "grad_norm": 0.6411562476377711, + "learning_rate": 6.964439273726753e-07, + "loss": 0.5073, + "step": 28846 + }, + { + "epoch": 0.8841179355155081, + "grad_norm": 1.9056673884165005, + "learning_rate": 6.960800149158064e-07, + "loss": 0.6247, + "step": 28847 + }, + { + "epoch": 0.8841485840382494, + "grad_norm": 1.4813744872871508, + "learning_rate": 6.957161941328405e-07, + "loss": 0.5823, + "step": 28848 + }, + { + "epoch": 0.8841792325609905, + "grad_norm": 0.6663078165361231, + "learning_rate": 6.953524650273624e-07, + "loss": 0.5156, + "step": 28849 + }, + { + "epoch": 0.8842098810837318, + "grad_norm": 1.862647579185466, + "learning_rate": 6.949888276029581e-07, + "loss": 0.6191, + "step": 28850 + }, + { + "epoch": 0.8842405296064729, + "grad_norm": 0.6712019613404903, + "learning_rate": 6.946252818632115e-07, + "loss": 0.5253, + "step": 28851 + }, + { + "epoch": 0.8842711781292142, + "grad_norm": 1.7074496226288345, + "learning_rate": 6.942618278117019e-07, + "loss": 0.6356, + "step": 28852 + }, + { + "epoch": 0.8843018266519553, + "grad_norm": 1.7933270924010853, + "learning_rate": 6.938984654520086e-07, + "loss": 0.7243, + "step": 28853 + }, + { + "epoch": 0.8843324751746966, + "grad_norm": 1.7643865705130533, + "learning_rate": 6.935351947877189e-07, + "loss": 0.6956, + "step": 28854 + }, + { + "epoch": 0.8843631236974377, + "grad_norm": 0.6856528928977327, + "learning_rate": 6.931720158224064e-07, + "loss": 0.5288, + "step": 28855 + }, + { + "epoch": 0.884393772220179, + "grad_norm": 1.6496260304741666, + "learning_rate": 6.928089285596518e-07, + "loss": 0.5644, + "step": 28856 + }, + { + "epoch": 0.8844244207429202, + "grad_norm": 1.8220156938652887, + "learning_rate": 6.924459330030309e-07, + "loss": 0.6494, + "step": 28857 + }, + { + "epoch": 0.8844550692656614, + "grad_norm": 1.6536254513928565, + "learning_rate": 6.92083029156121e-07, + "loss": 0.6188, + "step": 28858 + }, + { + "epoch": 0.8844857177884026, + "grad_norm": 1.856634811882663, + "learning_rate": 6.917202170225013e-07, + "loss": 0.7046, + "step": 28859 + }, + { + "epoch": 0.8845163663111438, + "grad_norm": 2.29762117608363, + "learning_rate": 6.913574966057423e-07, + "loss": 0.7016, + "step": 28860 + }, + { + "epoch": 0.884547014833885, + "grad_norm": 1.7865261274513795, + "learning_rate": 6.909948679094192e-07, + "loss": 0.6441, + "step": 28861 + }, + { + "epoch": 0.8845776633566262, + "grad_norm": 1.6472966636109057, + "learning_rate": 6.906323309371066e-07, + "loss": 0.5949, + "step": 28862 + }, + { + "epoch": 0.8846083118793674, + "grad_norm": 1.772177808520946, + "learning_rate": 6.902698856923762e-07, + "loss": 0.7385, + "step": 28863 + }, + { + "epoch": 0.8846389604021087, + "grad_norm": 1.7939879096697073, + "learning_rate": 6.899075321787974e-07, + "loss": 0.695, + "step": 28864 + }, + { + "epoch": 0.8846696089248498, + "grad_norm": 2.2437310445388006, + "learning_rate": 6.895452703999406e-07, + "loss": 0.6638, + "step": 28865 + }, + { + "epoch": 0.8847002574475911, + "grad_norm": 1.7615229973785547, + "learning_rate": 6.891831003593785e-07, + "loss": 0.6645, + "step": 28866 + }, + { + "epoch": 0.8847309059703322, + "grad_norm": 1.7348710247482213, + "learning_rate": 6.888210220606761e-07, + "loss": 0.6177, + "step": 28867 + }, + { + "epoch": 0.8847615544930735, + "grad_norm": 1.6274093098992348, + "learning_rate": 6.884590355074028e-07, + "loss": 0.6424, + "step": 28868 + }, + { + "epoch": 0.8847922030158146, + "grad_norm": 1.8380407621071426, + "learning_rate": 6.880971407031245e-07, + "loss": 0.7444, + "step": 28869 + }, + { + "epoch": 0.8848228515385559, + "grad_norm": 1.581748701173326, + "learning_rate": 6.877353376514107e-07, + "loss": 0.6323, + "step": 28870 + }, + { + "epoch": 0.884853500061297, + "grad_norm": 1.7074540868654429, + "learning_rate": 6.873736263558217e-07, + "loss": 0.6603, + "step": 28871 + }, + { + "epoch": 0.8848841485840383, + "grad_norm": 1.9285623116465909, + "learning_rate": 6.870120068199205e-07, + "loss": 0.686, + "step": 28872 + }, + { + "epoch": 0.8849147971067794, + "grad_norm": 1.9133931162004985, + "learning_rate": 6.866504790472762e-07, + "loss": 0.7123, + "step": 28873 + }, + { + "epoch": 0.8849454456295207, + "grad_norm": 1.7984582390870625, + "learning_rate": 6.862890430414471e-07, + "loss": 0.5777, + "step": 28874 + }, + { + "epoch": 0.8849760941522619, + "grad_norm": 0.6598078156520177, + "learning_rate": 6.859276988059937e-07, + "loss": 0.501, + "step": 28875 + }, + { + "epoch": 0.885006742675003, + "grad_norm": 1.6178509715835696, + "learning_rate": 6.855664463444778e-07, + "loss": 0.5162, + "step": 28876 + }, + { + "epoch": 0.8850373911977443, + "grad_norm": 0.6719915689371346, + "learning_rate": 6.852052856604585e-07, + "loss": 0.5083, + "step": 28877 + }, + { + "epoch": 0.8850680397204854, + "grad_norm": 1.9172500609946512, + "learning_rate": 6.848442167574975e-07, + "loss": 0.5974, + "step": 28878 + }, + { + "epoch": 0.8850986882432267, + "grad_norm": 1.7124569694576113, + "learning_rate": 6.844832396391476e-07, + "loss": 0.6482, + "step": 28879 + }, + { + "epoch": 0.8851293367659678, + "grad_norm": 1.5939168707734415, + "learning_rate": 6.84122354308967e-07, + "loss": 0.6621, + "step": 28880 + }, + { + "epoch": 0.8851599852887091, + "grad_norm": 1.7148335735216682, + "learning_rate": 6.83761560770515e-07, + "loss": 0.5895, + "step": 28881 + }, + { + "epoch": 0.8851906338114502, + "grad_norm": 1.6157791903195182, + "learning_rate": 6.834008590273445e-07, + "loss": 0.6566, + "step": 28882 + }, + { + "epoch": 0.8852212823341915, + "grad_norm": 0.6991559675666222, + "learning_rate": 6.830402490830046e-07, + "loss": 0.5266, + "step": 28883 + }, + { + "epoch": 0.8852519308569327, + "grad_norm": 0.6757359979809491, + "learning_rate": 6.826797309410571e-07, + "loss": 0.534, + "step": 28884 + }, + { + "epoch": 0.8852825793796739, + "grad_norm": 1.6253233721607088, + "learning_rate": 6.823193046050481e-07, + "loss": 0.5124, + "step": 28885 + }, + { + "epoch": 0.8853132279024151, + "grad_norm": 0.6668865719516617, + "learning_rate": 6.819589700785323e-07, + "loss": 0.4941, + "step": 28886 + }, + { + "epoch": 0.8853438764251563, + "grad_norm": 1.7884983046286733, + "learning_rate": 6.815987273650582e-07, + "loss": 0.642, + "step": 28887 + }, + { + "epoch": 0.8853745249478975, + "grad_norm": 1.7735326817693322, + "learning_rate": 6.81238576468175e-07, + "loss": 0.6656, + "step": 28888 + }, + { + "epoch": 0.8854051734706387, + "grad_norm": 1.7184795249168, + "learning_rate": 6.808785173914345e-07, + "loss": 0.7225, + "step": 28889 + }, + { + "epoch": 0.8854358219933799, + "grad_norm": 1.505919223243777, + "learning_rate": 6.805185501383815e-07, + "loss": 0.6297, + "step": 28890 + }, + { + "epoch": 0.8854664705161212, + "grad_norm": 1.8758515875497093, + "learning_rate": 6.801586747125633e-07, + "loss": 0.6892, + "step": 28891 + }, + { + "epoch": 0.8854971190388623, + "grad_norm": 0.6513146665542286, + "learning_rate": 6.797988911175268e-07, + "loss": 0.4914, + "step": 28892 + }, + { + "epoch": 0.8855277675616036, + "grad_norm": 1.6520765227324568, + "learning_rate": 6.794391993568184e-07, + "loss": 0.6724, + "step": 28893 + }, + { + "epoch": 0.8855584160843447, + "grad_norm": 1.8718392788969913, + "learning_rate": 6.79079599433976e-07, + "loss": 0.6148, + "step": 28894 + }, + { + "epoch": 0.885589064607086, + "grad_norm": 1.7437053761325279, + "learning_rate": 6.787200913525505e-07, + "loss": 0.5591, + "step": 28895 + }, + { + "epoch": 0.8856197131298271, + "grad_norm": 0.6649213411910184, + "learning_rate": 6.783606751160788e-07, + "loss": 0.5247, + "step": 28896 + }, + { + "epoch": 0.8856503616525684, + "grad_norm": 1.7331956434018079, + "learning_rate": 6.780013507281069e-07, + "loss": 0.5793, + "step": 28897 + }, + { + "epoch": 0.8856810101753095, + "grad_norm": 1.6112310754510382, + "learning_rate": 6.776421181921699e-07, + "loss": 0.6227, + "step": 28898 + }, + { + "epoch": 0.8857116586980508, + "grad_norm": 1.7052134644403807, + "learning_rate": 6.772829775118118e-07, + "loss": 0.653, + "step": 28899 + }, + { + "epoch": 0.885742307220792, + "grad_norm": 0.667582912750823, + "learning_rate": 6.769239286905704e-07, + "loss": 0.5171, + "step": 28900 + }, + { + "epoch": 0.8857729557435332, + "grad_norm": 0.7039565436504285, + "learning_rate": 6.765649717319823e-07, + "loss": 0.518, + "step": 28901 + }, + { + "epoch": 0.8858036042662744, + "grad_norm": 1.652191288038818, + "learning_rate": 6.762061066395842e-07, + "loss": 0.6555, + "step": 28902 + }, + { + "epoch": 0.8858342527890156, + "grad_norm": 1.483689069392023, + "learning_rate": 6.758473334169146e-07, + "loss": 0.6138, + "step": 28903 + }, + { + "epoch": 0.8858649013117568, + "grad_norm": 1.787007798711784, + "learning_rate": 6.754886520675064e-07, + "loss": 0.6342, + "step": 28904 + }, + { + "epoch": 0.885895549834498, + "grad_norm": 1.824320092025457, + "learning_rate": 6.751300625948932e-07, + "loss": 0.6244, + "step": 28905 + }, + { + "epoch": 0.8859261983572392, + "grad_norm": 0.6639903813271265, + "learning_rate": 6.747715650026109e-07, + "loss": 0.5146, + "step": 28906 + }, + { + "epoch": 0.8859568468799803, + "grad_norm": 1.782717947190779, + "learning_rate": 6.744131592941894e-07, + "loss": 0.5274, + "step": 28907 + }, + { + "epoch": 0.8859874954027216, + "grad_norm": 1.6701046889562066, + "learning_rate": 6.740548454731622e-07, + "loss": 0.5829, + "step": 28908 + }, + { + "epoch": 0.8860181439254627, + "grad_norm": 0.7245504068025135, + "learning_rate": 6.736966235430575e-07, + "loss": 0.5326, + "step": 28909 + }, + { + "epoch": 0.886048792448204, + "grad_norm": 1.7325543313128013, + "learning_rate": 6.733384935074061e-07, + "loss": 0.6067, + "step": 28910 + }, + { + "epoch": 0.8860794409709452, + "grad_norm": 1.577234596134339, + "learning_rate": 6.729804553697383e-07, + "loss": 0.5471, + "step": 28911 + }, + { + "epoch": 0.8861100894936864, + "grad_norm": 1.8843034279483681, + "learning_rate": 6.726225091335792e-07, + "loss": 0.6516, + "step": 28912 + }, + { + "epoch": 0.8861407380164276, + "grad_norm": 1.6412710139057356, + "learning_rate": 6.722646548024558e-07, + "loss": 0.5954, + "step": 28913 + }, + { + "epoch": 0.8861713865391688, + "grad_norm": 1.7048196325636475, + "learning_rate": 6.719068923798988e-07, + "loss": 0.6664, + "step": 28914 + }, + { + "epoch": 0.88620203506191, + "grad_norm": 2.3271924009670886, + "learning_rate": 6.715492218694275e-07, + "loss": 0.7904, + "step": 28915 + }, + { + "epoch": 0.8862326835846512, + "grad_norm": 1.8185744968468573, + "learning_rate": 6.711916432745691e-07, + "loss": 0.653, + "step": 28916 + }, + { + "epoch": 0.8862633321073924, + "grad_norm": 1.78653756721392, + "learning_rate": 6.708341565988463e-07, + "loss": 0.5832, + "step": 28917 + }, + { + "epoch": 0.8862939806301336, + "grad_norm": 1.8788155079532176, + "learning_rate": 6.704767618457808e-07, + "loss": 0.5976, + "step": 28918 + }, + { + "epoch": 0.8863246291528748, + "grad_norm": 1.9741131268346142, + "learning_rate": 6.701194590188964e-07, + "loss": 0.5926, + "step": 28919 + }, + { + "epoch": 0.8863552776756161, + "grad_norm": 1.5976865873138315, + "learning_rate": 6.697622481217104e-07, + "loss": 0.5849, + "step": 28920 + }, + { + "epoch": 0.8863859261983572, + "grad_norm": 1.7038285831906936, + "learning_rate": 6.694051291577452e-07, + "loss": 0.6611, + "step": 28921 + }, + { + "epoch": 0.8864165747210985, + "grad_norm": 1.6082979085238323, + "learning_rate": 6.690481021305184e-07, + "loss": 0.7247, + "step": 28922 + }, + { + "epoch": 0.8864472232438396, + "grad_norm": 1.5745896590848805, + "learning_rate": 6.686911670435481e-07, + "loss": 0.6406, + "step": 28923 + }, + { + "epoch": 0.8864778717665809, + "grad_norm": 1.8251573897500464, + "learning_rate": 6.683343239003504e-07, + "loss": 0.609, + "step": 28924 + }, + { + "epoch": 0.886508520289322, + "grad_norm": 1.8633674453896385, + "learning_rate": 6.679775727044446e-07, + "loss": 0.7072, + "step": 28925 + }, + { + "epoch": 0.8865391688120633, + "grad_norm": 1.7916437581491076, + "learning_rate": 6.676209134593414e-07, + "loss": 0.7047, + "step": 28926 + }, + { + "epoch": 0.8865698173348044, + "grad_norm": 1.6687952342232772, + "learning_rate": 6.67264346168558e-07, + "loss": 0.6572, + "step": 28927 + }, + { + "epoch": 0.8866004658575457, + "grad_norm": 1.662212441843934, + "learning_rate": 6.669078708356058e-07, + "loss": 0.6379, + "step": 28928 + }, + { + "epoch": 0.8866311143802869, + "grad_norm": 1.486846052496009, + "learning_rate": 6.665514874639989e-07, + "loss": 0.5121, + "step": 28929 + }, + { + "epoch": 0.8866617629030281, + "grad_norm": 0.6721330304249692, + "learning_rate": 6.661951960572499e-07, + "loss": 0.5113, + "step": 28930 + }, + { + "epoch": 0.8866924114257693, + "grad_norm": 1.7804518632467181, + "learning_rate": 6.65838996618865e-07, + "loss": 0.7132, + "step": 28931 + }, + { + "epoch": 0.8867230599485105, + "grad_norm": 1.791061459508585, + "learning_rate": 6.654828891523579e-07, + "loss": 0.6979, + "step": 28932 + }, + { + "epoch": 0.8867537084712517, + "grad_norm": 1.590079718804271, + "learning_rate": 6.651268736612371e-07, + "loss": 0.5979, + "step": 28933 + }, + { + "epoch": 0.8867843569939929, + "grad_norm": 1.829094342654451, + "learning_rate": 6.647709501490085e-07, + "loss": 0.6009, + "step": 28934 + }, + { + "epoch": 0.8868150055167341, + "grad_norm": 1.7814732894291354, + "learning_rate": 6.644151186191805e-07, + "loss": 0.5743, + "step": 28935 + }, + { + "epoch": 0.8868456540394754, + "grad_norm": 0.700682854858632, + "learning_rate": 6.640593790752603e-07, + "loss": 0.5581, + "step": 28936 + }, + { + "epoch": 0.8868763025622165, + "grad_norm": 0.695776653182606, + "learning_rate": 6.637037315207495e-07, + "loss": 0.533, + "step": 28937 + }, + { + "epoch": 0.8869069510849577, + "grad_norm": 1.4931180830468134, + "learning_rate": 6.633481759591564e-07, + "loss": 0.5527, + "step": 28938 + }, + { + "epoch": 0.8869375996076989, + "grad_norm": 1.5292861415121175, + "learning_rate": 6.629927123939805e-07, + "loss": 0.6846, + "step": 28939 + }, + { + "epoch": 0.8869682481304401, + "grad_norm": 1.6942917037902923, + "learning_rate": 6.626373408287279e-07, + "loss": 0.7451, + "step": 28940 + }, + { + "epoch": 0.8869988966531813, + "grad_norm": 1.8569328602266286, + "learning_rate": 6.622820612669001e-07, + "loss": 0.6804, + "step": 28941 + }, + { + "epoch": 0.8870295451759225, + "grad_norm": 1.8052478453228298, + "learning_rate": 6.619268737119943e-07, + "loss": 0.6342, + "step": 28942 + }, + { + "epoch": 0.8870601936986637, + "grad_norm": 1.802064471814656, + "learning_rate": 6.615717781675113e-07, + "loss": 0.6019, + "step": 28943 + }, + { + "epoch": 0.8870908422214049, + "grad_norm": 1.8409704795297024, + "learning_rate": 6.612167746369535e-07, + "loss": 0.6699, + "step": 28944 + }, + { + "epoch": 0.8871214907441461, + "grad_norm": 1.7340084045538275, + "learning_rate": 6.608618631238151e-07, + "loss": 0.7093, + "step": 28945 + }, + { + "epoch": 0.8871521392668873, + "grad_norm": 1.7133925274025463, + "learning_rate": 6.60507043631593e-07, + "loss": 0.5563, + "step": 28946 + }, + { + "epoch": 0.8871827877896286, + "grad_norm": 1.8673484292176092, + "learning_rate": 6.601523161637868e-07, + "loss": 0.6979, + "step": 28947 + }, + { + "epoch": 0.8872134363123697, + "grad_norm": 1.8502802544782806, + "learning_rate": 6.597976807238882e-07, + "loss": 0.6481, + "step": 28948 + }, + { + "epoch": 0.887244084835111, + "grad_norm": 1.929225021374438, + "learning_rate": 6.594431373153942e-07, + "loss": 0.6942, + "step": 28949 + }, + { + "epoch": 0.8872747333578521, + "grad_norm": 1.6046770276437885, + "learning_rate": 6.590886859417955e-07, + "loss": 0.6348, + "step": 28950 + }, + { + "epoch": 0.8873053818805934, + "grad_norm": 0.6846807823395925, + "learning_rate": 6.58734326606586e-07, + "loss": 0.509, + "step": 28951 + }, + { + "epoch": 0.8873360304033345, + "grad_norm": 1.7609362936293158, + "learning_rate": 6.583800593132583e-07, + "loss": 0.5992, + "step": 28952 + }, + { + "epoch": 0.8873666789260758, + "grad_norm": 1.8932152529280337, + "learning_rate": 6.580258840653009e-07, + "loss": 0.5775, + "step": 28953 + }, + { + "epoch": 0.8873973274488169, + "grad_norm": 1.63530857517409, + "learning_rate": 6.576718008662042e-07, + "loss": 0.5528, + "step": 28954 + }, + { + "epoch": 0.8874279759715582, + "grad_norm": 1.494592444361107, + "learning_rate": 6.573178097194599e-07, + "loss": 0.5275, + "step": 28955 + }, + { + "epoch": 0.8874586244942994, + "grad_norm": 1.9403857492031673, + "learning_rate": 6.569639106285519e-07, + "loss": 0.6575, + "step": 28956 + }, + { + "epoch": 0.8874892730170406, + "grad_norm": 0.6823747887202765, + "learning_rate": 6.566101035969685e-07, + "loss": 0.5336, + "step": 28957 + }, + { + "epoch": 0.8875199215397818, + "grad_norm": 1.8963123953958079, + "learning_rate": 6.56256388628197e-07, + "loss": 0.6432, + "step": 28958 + }, + { + "epoch": 0.887550570062523, + "grad_norm": 1.886638179858474, + "learning_rate": 6.559027657257222e-07, + "loss": 0.5979, + "step": 28959 + }, + { + "epoch": 0.8875812185852642, + "grad_norm": 1.7218157566895098, + "learning_rate": 6.555492348930303e-07, + "loss": 0.4796, + "step": 28960 + }, + { + "epoch": 0.8876118671080054, + "grad_norm": 1.573489782338519, + "learning_rate": 6.551957961335997e-07, + "loss": 0.4264, + "step": 28961 + }, + { + "epoch": 0.8876425156307466, + "grad_norm": 1.6971012027980066, + "learning_rate": 6.548424494509165e-07, + "loss": 0.5521, + "step": 28962 + }, + { + "epoch": 0.8876731641534878, + "grad_norm": 1.664329293496478, + "learning_rate": 6.544891948484622e-07, + "loss": 0.5211, + "step": 28963 + }, + { + "epoch": 0.887703812676229, + "grad_norm": 1.8265546404680637, + "learning_rate": 6.541360323297163e-07, + "loss": 0.6849, + "step": 28964 + }, + { + "epoch": 0.8877344611989703, + "grad_norm": 1.5192979408344989, + "learning_rate": 6.537829618981594e-07, + "loss": 0.6328, + "step": 28965 + }, + { + "epoch": 0.8877651097217114, + "grad_norm": 1.8720818934717185, + "learning_rate": 6.534299835572722e-07, + "loss": 0.7307, + "step": 28966 + }, + { + "epoch": 0.8877957582444527, + "grad_norm": 1.9244186208296366, + "learning_rate": 6.530770973105283e-07, + "loss": 0.6281, + "step": 28967 + }, + { + "epoch": 0.8878264067671938, + "grad_norm": 0.6840964780272367, + "learning_rate": 6.527243031614094e-07, + "loss": 0.525, + "step": 28968 + }, + { + "epoch": 0.887857055289935, + "grad_norm": 1.5662940736387054, + "learning_rate": 6.523716011133863e-07, + "loss": 0.5689, + "step": 28969 + }, + { + "epoch": 0.8878877038126762, + "grad_norm": 1.9809707943246149, + "learning_rate": 6.520189911699415e-07, + "loss": 0.6547, + "step": 28970 + }, + { + "epoch": 0.8879183523354174, + "grad_norm": 1.6064154710921916, + "learning_rate": 6.516664733345435e-07, + "loss": 0.5969, + "step": 28971 + }, + { + "epoch": 0.8879490008581586, + "grad_norm": 1.5935143547384452, + "learning_rate": 6.513140476106672e-07, + "loss": 0.5881, + "step": 28972 + }, + { + "epoch": 0.8879796493808998, + "grad_norm": 1.652093803788473, + "learning_rate": 6.509617140017855e-07, + "loss": 0.6692, + "step": 28973 + }, + { + "epoch": 0.888010297903641, + "grad_norm": 1.6388520244781413, + "learning_rate": 6.506094725113721e-07, + "loss": 0.5758, + "step": 28974 + }, + { + "epoch": 0.8880409464263822, + "grad_norm": 1.6063888089818281, + "learning_rate": 6.502573231428932e-07, + "loss": 0.5961, + "step": 28975 + }, + { + "epoch": 0.8880715949491235, + "grad_norm": 1.7698944037390296, + "learning_rate": 6.499052658998217e-07, + "loss": 0.5341, + "step": 28976 + }, + { + "epoch": 0.8881022434718646, + "grad_norm": 1.6323867253375999, + "learning_rate": 6.495533007856258e-07, + "loss": 0.756, + "step": 28977 + }, + { + "epoch": 0.8881328919946059, + "grad_norm": 1.8528528725663826, + "learning_rate": 6.49201427803775e-07, + "loss": 0.5702, + "step": 28978 + }, + { + "epoch": 0.888163540517347, + "grad_norm": 1.791315252838473, + "learning_rate": 6.488496469577354e-07, + "loss": 0.6142, + "step": 28979 + }, + { + "epoch": 0.8881941890400883, + "grad_norm": 1.5962005893199969, + "learning_rate": 6.484979582509698e-07, + "loss": 0.617, + "step": 28980 + }, + { + "epoch": 0.8882248375628294, + "grad_norm": 1.7870116537103184, + "learning_rate": 6.481463616869499e-07, + "loss": 0.6166, + "step": 28981 + }, + { + "epoch": 0.8882554860855707, + "grad_norm": 1.6516577858064507, + "learning_rate": 6.477948572691362e-07, + "loss": 0.662, + "step": 28982 + }, + { + "epoch": 0.8882861346083119, + "grad_norm": 1.7606487985390935, + "learning_rate": 6.474434450009903e-07, + "loss": 0.5685, + "step": 28983 + }, + { + "epoch": 0.8883167831310531, + "grad_norm": 2.0093485499908206, + "learning_rate": 6.470921248859785e-07, + "loss": 0.6641, + "step": 28984 + }, + { + "epoch": 0.8883474316537943, + "grad_norm": 1.8237482576833588, + "learning_rate": 6.467408969275602e-07, + "loss": 0.6689, + "step": 28985 + }, + { + "epoch": 0.8883780801765355, + "grad_norm": 2.207184888507649, + "learning_rate": 6.46389761129198e-07, + "loss": 0.7009, + "step": 28986 + }, + { + "epoch": 0.8884087286992767, + "grad_norm": 1.7242666802210231, + "learning_rate": 6.460387174943505e-07, + "loss": 0.6553, + "step": 28987 + }, + { + "epoch": 0.8884393772220179, + "grad_norm": 1.9574418020893096, + "learning_rate": 6.45687766026476e-07, + "loss": 0.6675, + "step": 28988 + }, + { + "epoch": 0.8884700257447591, + "grad_norm": 0.6867668244354601, + "learning_rate": 6.453369067290349e-07, + "loss": 0.5352, + "step": 28989 + }, + { + "epoch": 0.8885006742675003, + "grad_norm": 1.7156478895035692, + "learning_rate": 6.449861396054824e-07, + "loss": 0.4913, + "step": 28990 + }, + { + "epoch": 0.8885313227902415, + "grad_norm": 1.6593492049306282, + "learning_rate": 6.446354646592734e-07, + "loss": 0.6402, + "step": 28991 + }, + { + "epoch": 0.8885619713129828, + "grad_norm": 1.874434003510465, + "learning_rate": 6.442848818938663e-07, + "loss": 0.6645, + "step": 28992 + }, + { + "epoch": 0.8885926198357239, + "grad_norm": 1.8257739575055334, + "learning_rate": 6.439343913127149e-07, + "loss": 0.597, + "step": 28993 + }, + { + "epoch": 0.8886232683584652, + "grad_norm": 1.9111762549474374, + "learning_rate": 6.4358399291927e-07, + "loss": 0.7182, + "step": 28994 + }, + { + "epoch": 0.8886539168812063, + "grad_norm": 1.7663453573302297, + "learning_rate": 6.432336867169863e-07, + "loss": 0.5884, + "step": 28995 + }, + { + "epoch": 0.8886845654039476, + "grad_norm": 1.730510479325667, + "learning_rate": 6.428834727093147e-07, + "loss": 0.6773, + "step": 28996 + }, + { + "epoch": 0.8887152139266887, + "grad_norm": 1.718756038794306, + "learning_rate": 6.425333508997079e-07, + "loss": 0.5881, + "step": 28997 + }, + { + "epoch": 0.88874586244943, + "grad_norm": 1.7237020180642992, + "learning_rate": 6.421833212916128e-07, + "loss": 0.5905, + "step": 28998 + }, + { + "epoch": 0.8887765109721711, + "grad_norm": 1.6334233890048884, + "learning_rate": 6.418333838884805e-07, + "loss": 0.664, + "step": 28999 + }, + { + "epoch": 0.8888071594949123, + "grad_norm": 1.7430654235535794, + "learning_rate": 6.41483538693759e-07, + "loss": 0.6261, + "step": 29000 + }, + { + "epoch": 0.8888378080176536, + "grad_norm": 1.887084630730667, + "learning_rate": 6.411337857108946e-07, + "loss": 0.6772, + "step": 29001 + }, + { + "epoch": 0.8888684565403947, + "grad_norm": 1.782454640103262, + "learning_rate": 6.407841249433322e-07, + "loss": 0.5931, + "step": 29002 + }, + { + "epoch": 0.888899105063136, + "grad_norm": 1.6670258851331017, + "learning_rate": 6.40434556394518e-07, + "loss": 0.622, + "step": 29003 + }, + { + "epoch": 0.8889297535858771, + "grad_norm": 1.8580575978550171, + "learning_rate": 6.40085080067897e-07, + "loss": 0.6864, + "step": 29004 + }, + { + "epoch": 0.8889604021086184, + "grad_norm": 1.742681490241905, + "learning_rate": 6.397356959669144e-07, + "loss": 0.597, + "step": 29005 + }, + { + "epoch": 0.8889910506313595, + "grad_norm": 1.690159524105416, + "learning_rate": 6.393864040950093e-07, + "loss": 0.5698, + "step": 29006 + }, + { + "epoch": 0.8890216991541008, + "grad_norm": 0.6900199049059728, + "learning_rate": 6.390372044556259e-07, + "loss": 0.5107, + "step": 29007 + }, + { + "epoch": 0.8890523476768419, + "grad_norm": 1.5930134543468182, + "learning_rate": 6.386880970522047e-07, + "loss": 0.5731, + "step": 29008 + }, + { + "epoch": 0.8890829961995832, + "grad_norm": 1.684706160593904, + "learning_rate": 6.38339081888183e-07, + "loss": 0.6851, + "step": 29009 + }, + { + "epoch": 0.8891136447223243, + "grad_norm": 1.894331436706112, + "learning_rate": 6.379901589670023e-07, + "loss": 0.6744, + "step": 29010 + }, + { + "epoch": 0.8891442932450656, + "grad_norm": 1.8301693506709282, + "learning_rate": 6.376413282921013e-07, + "loss": 0.7027, + "step": 29011 + }, + { + "epoch": 0.8891749417678068, + "grad_norm": 1.9130337430802222, + "learning_rate": 6.372925898669136e-07, + "loss": 0.7007, + "step": 29012 + }, + { + "epoch": 0.889205590290548, + "grad_norm": 1.7598666717500422, + "learning_rate": 6.369439436948799e-07, + "loss": 0.6294, + "step": 29013 + }, + { + "epoch": 0.8892362388132892, + "grad_norm": 1.5814630623460872, + "learning_rate": 6.365953897794308e-07, + "loss": 0.7015, + "step": 29014 + }, + { + "epoch": 0.8892668873360304, + "grad_norm": 0.6664764031636028, + "learning_rate": 6.362469281240035e-07, + "loss": 0.5011, + "step": 29015 + }, + { + "epoch": 0.8892975358587716, + "grad_norm": 0.6768419890190348, + "learning_rate": 6.358985587320332e-07, + "loss": 0.5071, + "step": 29016 + }, + { + "epoch": 0.8893281843815128, + "grad_norm": 1.7587707850073684, + "learning_rate": 6.355502816069481e-07, + "loss": 0.6711, + "step": 29017 + }, + { + "epoch": 0.889358832904254, + "grad_norm": 1.7833732552623907, + "learning_rate": 6.352020967521821e-07, + "loss": 0.6175, + "step": 29018 + }, + { + "epoch": 0.8893894814269953, + "grad_norm": 1.70087080263323, + "learning_rate": 6.34854004171167e-07, + "loss": 0.6846, + "step": 29019 + }, + { + "epoch": 0.8894201299497364, + "grad_norm": 1.7068774289855937, + "learning_rate": 6.345060038673323e-07, + "loss": 0.6296, + "step": 29020 + }, + { + "epoch": 0.8894507784724777, + "grad_norm": 1.858546926584152, + "learning_rate": 6.341580958441029e-07, + "loss": 0.6484, + "step": 29021 + }, + { + "epoch": 0.8894814269952188, + "grad_norm": 1.889449597980431, + "learning_rate": 6.338102801049129e-07, + "loss": 0.5647, + "step": 29022 + }, + { + "epoch": 0.8895120755179601, + "grad_norm": 0.675956180118384, + "learning_rate": 6.33462556653186e-07, + "loss": 0.5514, + "step": 29023 + }, + { + "epoch": 0.8895427240407012, + "grad_norm": 1.704108045014808, + "learning_rate": 6.331149254923496e-07, + "loss": 0.685, + "step": 29024 + }, + { + "epoch": 0.8895733725634425, + "grad_norm": 2.0091794436107677, + "learning_rate": 6.327673866258277e-07, + "loss": 0.6803, + "step": 29025 + }, + { + "epoch": 0.8896040210861836, + "grad_norm": 1.6846553581070827, + "learning_rate": 6.324199400570452e-07, + "loss": 0.5837, + "step": 29026 + }, + { + "epoch": 0.8896346696089249, + "grad_norm": 0.687490562174479, + "learning_rate": 6.320725857894272e-07, + "loss": 0.5223, + "step": 29027 + }, + { + "epoch": 0.889665318131666, + "grad_norm": 1.7604577556212693, + "learning_rate": 6.317253238263932e-07, + "loss": 0.6274, + "step": 29028 + }, + { + "epoch": 0.8896959666544073, + "grad_norm": 1.9103845505675703, + "learning_rate": 6.31378154171367e-07, + "loss": 0.7069, + "step": 29029 + }, + { + "epoch": 0.8897266151771485, + "grad_norm": 1.8511756435401325, + "learning_rate": 6.310310768277705e-07, + "loss": 0.6796, + "step": 29030 + }, + { + "epoch": 0.8897572636998896, + "grad_norm": 1.8521943384544284, + "learning_rate": 6.306840917990198e-07, + "loss": 0.6298, + "step": 29031 + }, + { + "epoch": 0.8897879122226309, + "grad_norm": 1.7904556395814608, + "learning_rate": 6.303371990885365e-07, + "loss": 0.6516, + "step": 29032 + }, + { + "epoch": 0.889818560745372, + "grad_norm": 1.775173335196664, + "learning_rate": 6.299903986997391e-07, + "loss": 0.6611, + "step": 29033 + }, + { + "epoch": 0.8898492092681133, + "grad_norm": 1.5819723514955488, + "learning_rate": 6.296436906360426e-07, + "loss": 0.5226, + "step": 29034 + }, + { + "epoch": 0.8898798577908544, + "grad_norm": 1.7419197096087968, + "learning_rate": 6.292970749008665e-07, + "loss": 0.7029, + "step": 29035 + }, + { + "epoch": 0.8899105063135957, + "grad_norm": 1.8053994462530876, + "learning_rate": 6.289505514976213e-07, + "loss": 0.5924, + "step": 29036 + }, + { + "epoch": 0.8899411548363368, + "grad_norm": 1.5806318218137516, + "learning_rate": 6.286041204297244e-07, + "loss": 0.5187, + "step": 29037 + }, + { + "epoch": 0.8899718033590781, + "grad_norm": 1.7599193928026862, + "learning_rate": 6.282577817005908e-07, + "loss": 0.6529, + "step": 29038 + }, + { + "epoch": 0.8900024518818193, + "grad_norm": 1.4298535091978517, + "learning_rate": 6.27911535313629e-07, + "loss": 0.5792, + "step": 29039 + }, + { + "epoch": 0.8900331004045605, + "grad_norm": 1.7277834361253184, + "learning_rate": 6.275653812722526e-07, + "loss": 0.713, + "step": 29040 + }, + { + "epoch": 0.8900637489273017, + "grad_norm": 2.024143515682326, + "learning_rate": 6.272193195798748e-07, + "loss": 0.7221, + "step": 29041 + }, + { + "epoch": 0.8900943974500429, + "grad_norm": 1.6170131181392267, + "learning_rate": 6.268733502399016e-07, + "loss": 0.5445, + "step": 29042 + }, + { + "epoch": 0.8901250459727841, + "grad_norm": 1.8223425613516857, + "learning_rate": 6.265274732557436e-07, + "loss": 0.7005, + "step": 29043 + }, + { + "epoch": 0.8901556944955253, + "grad_norm": 1.8481775560576714, + "learning_rate": 6.261816886308091e-07, + "loss": 0.6722, + "step": 29044 + }, + { + "epoch": 0.8901863430182665, + "grad_norm": 1.8917333870605848, + "learning_rate": 6.258359963685046e-07, + "loss": 0.668, + "step": 29045 + }, + { + "epoch": 0.8902169915410078, + "grad_norm": 1.6860604019605683, + "learning_rate": 6.254903964722369e-07, + "loss": 0.5866, + "step": 29046 + }, + { + "epoch": 0.8902476400637489, + "grad_norm": 1.6543730303263084, + "learning_rate": 6.251448889454104e-07, + "loss": 0.5329, + "step": 29047 + }, + { + "epoch": 0.8902782885864902, + "grad_norm": 1.69866120343468, + "learning_rate": 6.247994737914298e-07, + "loss": 0.6041, + "step": 29048 + }, + { + "epoch": 0.8903089371092313, + "grad_norm": 1.6771073191502845, + "learning_rate": 6.244541510137004e-07, + "loss": 0.6257, + "step": 29049 + }, + { + "epoch": 0.8903395856319726, + "grad_norm": 1.6811836856653468, + "learning_rate": 6.241089206156203e-07, + "loss": 0.6045, + "step": 29050 + }, + { + "epoch": 0.8903702341547137, + "grad_norm": 0.6455791544565872, + "learning_rate": 6.237637826005949e-07, + "loss": 0.4936, + "step": 29051 + }, + { + "epoch": 0.890400882677455, + "grad_norm": 1.9388908609050948, + "learning_rate": 6.234187369720257e-07, + "loss": 0.6967, + "step": 29052 + }, + { + "epoch": 0.8904315312001961, + "grad_norm": 2.06439849021349, + "learning_rate": 6.230737837333089e-07, + "loss": 0.7134, + "step": 29053 + }, + { + "epoch": 0.8904621797229374, + "grad_norm": 1.5849359932507998, + "learning_rate": 6.227289228878475e-07, + "loss": 0.7139, + "step": 29054 + }, + { + "epoch": 0.8904928282456785, + "grad_norm": 1.706922013058739, + "learning_rate": 6.223841544390341e-07, + "loss": 0.6932, + "step": 29055 + }, + { + "epoch": 0.8905234767684198, + "grad_norm": 0.6651695045625058, + "learning_rate": 6.220394783902705e-07, + "loss": 0.5272, + "step": 29056 + }, + { + "epoch": 0.890554125291161, + "grad_norm": 1.717276198359794, + "learning_rate": 6.21694894744953e-07, + "loss": 0.6821, + "step": 29057 + }, + { + "epoch": 0.8905847738139022, + "grad_norm": 2.0786918063384463, + "learning_rate": 6.213504035064721e-07, + "loss": 0.6471, + "step": 29058 + }, + { + "epoch": 0.8906154223366434, + "grad_norm": 0.6712598255304262, + "learning_rate": 6.210060046782274e-07, + "loss": 0.5291, + "step": 29059 + }, + { + "epoch": 0.8906460708593846, + "grad_norm": 1.6750532112411702, + "learning_rate": 6.206616982636104e-07, + "loss": 0.5799, + "step": 29060 + }, + { + "epoch": 0.8906767193821258, + "grad_norm": 1.9535333464354332, + "learning_rate": 6.20317484266012e-07, + "loss": 0.6918, + "step": 29061 + }, + { + "epoch": 0.8907073679048669, + "grad_norm": 1.5707373849170694, + "learning_rate": 6.199733626888261e-07, + "loss": 0.5965, + "step": 29062 + }, + { + "epoch": 0.8907380164276082, + "grad_norm": 1.6623051679603873, + "learning_rate": 6.19629333535443e-07, + "loss": 0.6517, + "step": 29063 + }, + { + "epoch": 0.8907686649503493, + "grad_norm": 1.8655636337769996, + "learning_rate": 6.192853968092516e-07, + "loss": 0.7224, + "step": 29064 + }, + { + "epoch": 0.8907993134730906, + "grad_norm": 0.6674948020010651, + "learning_rate": 6.189415525136433e-07, + "loss": 0.5219, + "step": 29065 + }, + { + "epoch": 0.8908299619958318, + "grad_norm": 1.7851312964146404, + "learning_rate": 6.18597800652e-07, + "loss": 0.5423, + "step": 29066 + }, + { + "epoch": 0.890860610518573, + "grad_norm": 1.6877305713093647, + "learning_rate": 6.182541412277165e-07, + "loss": 0.6374, + "step": 29067 + }, + { + "epoch": 0.8908912590413142, + "grad_norm": 1.675646866721709, + "learning_rate": 6.179105742441749e-07, + "loss": 0.6773, + "step": 29068 + }, + { + "epoch": 0.8909219075640554, + "grad_norm": 1.6266013168627942, + "learning_rate": 6.1756709970476e-07, + "loss": 0.6309, + "step": 29069 + }, + { + "epoch": 0.8909525560867966, + "grad_norm": 1.4947576839380428, + "learning_rate": 6.172237176128571e-07, + "loss": 0.5948, + "step": 29070 + }, + { + "epoch": 0.8909832046095378, + "grad_norm": 0.7241361382150575, + "learning_rate": 6.168804279718498e-07, + "loss": 0.537, + "step": 29071 + }, + { + "epoch": 0.891013853132279, + "grad_norm": 1.5751077175587216, + "learning_rate": 6.165372307851202e-07, + "loss": 0.5928, + "step": 29072 + }, + { + "epoch": 0.8910445016550202, + "grad_norm": 1.8841466977773675, + "learning_rate": 6.1619412605605e-07, + "loss": 0.7724, + "step": 29073 + }, + { + "epoch": 0.8910751501777614, + "grad_norm": 1.6678679219780304, + "learning_rate": 6.158511137880219e-07, + "loss": 0.739, + "step": 29074 + }, + { + "epoch": 0.8911057987005027, + "grad_norm": 0.674504438291133, + "learning_rate": 6.155081939844109e-07, + "loss": 0.5107, + "step": 29075 + }, + { + "epoch": 0.8911364472232438, + "grad_norm": 0.67468154203834, + "learning_rate": 6.151653666486013e-07, + "loss": 0.5178, + "step": 29076 + }, + { + "epoch": 0.8911670957459851, + "grad_norm": 1.692080987517213, + "learning_rate": 6.148226317839656e-07, + "loss": 0.6263, + "step": 29077 + }, + { + "epoch": 0.8911977442687262, + "grad_norm": 1.6169190405822091, + "learning_rate": 6.144799893938869e-07, + "loss": 0.6355, + "step": 29078 + }, + { + "epoch": 0.8912283927914675, + "grad_norm": 1.670132923409954, + "learning_rate": 6.141374394817379e-07, + "loss": 0.7585, + "step": 29079 + }, + { + "epoch": 0.8912590413142086, + "grad_norm": 1.7368856472921133, + "learning_rate": 6.137949820508926e-07, + "loss": 0.7088, + "step": 29080 + }, + { + "epoch": 0.8912896898369499, + "grad_norm": 1.8145145297854046, + "learning_rate": 6.134526171047273e-07, + "loss": 0.6929, + "step": 29081 + }, + { + "epoch": 0.891320338359691, + "grad_norm": 1.6385532759395807, + "learning_rate": 6.131103446466158e-07, + "loss": 0.6843, + "step": 29082 + }, + { + "epoch": 0.8913509868824323, + "grad_norm": 1.8074555739128366, + "learning_rate": 6.127681646799288e-07, + "loss": 0.6672, + "step": 29083 + }, + { + "epoch": 0.8913816354051735, + "grad_norm": 1.697106822850248, + "learning_rate": 6.124260772080392e-07, + "loss": 0.6475, + "step": 29084 + }, + { + "epoch": 0.8914122839279147, + "grad_norm": 0.6611562364996341, + "learning_rate": 6.120840822343166e-07, + "loss": 0.5148, + "step": 29085 + }, + { + "epoch": 0.8914429324506559, + "grad_norm": 1.5891229146272887, + "learning_rate": 6.117421797621337e-07, + "loss": 0.5967, + "step": 29086 + }, + { + "epoch": 0.8914735809733971, + "grad_norm": 1.6197127492326935, + "learning_rate": 6.114003697948567e-07, + "loss": 0.6344, + "step": 29087 + }, + { + "epoch": 0.8915042294961383, + "grad_norm": 1.6731299251775937, + "learning_rate": 6.11058652335852e-07, + "loss": 0.6487, + "step": 29088 + }, + { + "epoch": 0.8915348780188795, + "grad_norm": 0.6667788419346992, + "learning_rate": 6.10717027388491e-07, + "loss": 0.5142, + "step": 29089 + }, + { + "epoch": 0.8915655265416207, + "grad_norm": 1.6164019404957874, + "learning_rate": 6.103754949561369e-07, + "loss": 0.653, + "step": 29090 + }, + { + "epoch": 0.891596175064362, + "grad_norm": 1.5566191032610006, + "learning_rate": 6.100340550421547e-07, + "loss": 0.5762, + "step": 29091 + }, + { + "epoch": 0.8916268235871031, + "grad_norm": 1.6765575421201417, + "learning_rate": 6.096927076499093e-07, + "loss": 0.6248, + "step": 29092 + }, + { + "epoch": 0.8916574721098443, + "grad_norm": 1.9859976715531347, + "learning_rate": 6.093514527827649e-07, + "loss": 0.6254, + "step": 29093 + }, + { + "epoch": 0.8916881206325855, + "grad_norm": 1.7250497102973925, + "learning_rate": 6.090102904440842e-07, + "loss": 0.5701, + "step": 29094 + }, + { + "epoch": 0.8917187691553267, + "grad_norm": 1.615183342317515, + "learning_rate": 6.086692206372258e-07, + "loss": 0.5625, + "step": 29095 + }, + { + "epoch": 0.8917494176780679, + "grad_norm": 1.7349053596182404, + "learning_rate": 6.083282433655535e-07, + "loss": 0.5491, + "step": 29096 + }, + { + "epoch": 0.8917800662008091, + "grad_norm": 1.6524292968281726, + "learning_rate": 6.07987358632427e-07, + "loss": 0.6085, + "step": 29097 + }, + { + "epoch": 0.8918107147235503, + "grad_norm": 1.7179964289136316, + "learning_rate": 6.076465664412046e-07, + "loss": 0.6072, + "step": 29098 + }, + { + "epoch": 0.8918413632462915, + "grad_norm": 1.8471198729251164, + "learning_rate": 6.073058667952414e-07, + "loss": 0.6247, + "step": 29099 + }, + { + "epoch": 0.8918720117690327, + "grad_norm": 1.8103263437994663, + "learning_rate": 6.06965259697897e-07, + "loss": 0.6563, + "step": 29100 + }, + { + "epoch": 0.8919026602917739, + "grad_norm": 1.642435159552505, + "learning_rate": 6.066247451525286e-07, + "loss": 0.5996, + "step": 29101 + }, + { + "epoch": 0.8919333088145152, + "grad_norm": 1.6529531978214465, + "learning_rate": 6.062843231624893e-07, + "loss": 0.5684, + "step": 29102 + }, + { + "epoch": 0.8919639573372563, + "grad_norm": 1.6599673487953859, + "learning_rate": 6.059439937311329e-07, + "loss": 0.7049, + "step": 29103 + }, + { + "epoch": 0.8919946058599976, + "grad_norm": 1.7349888734734453, + "learning_rate": 6.056037568618145e-07, + "loss": 0.7149, + "step": 29104 + }, + { + "epoch": 0.8920252543827387, + "grad_norm": 1.5908354018661053, + "learning_rate": 6.052636125578882e-07, + "loss": 0.6479, + "step": 29105 + }, + { + "epoch": 0.89205590290548, + "grad_norm": 0.6569712506202792, + "learning_rate": 6.049235608227022e-07, + "loss": 0.496, + "step": 29106 + }, + { + "epoch": 0.8920865514282211, + "grad_norm": 1.725687780300887, + "learning_rate": 6.045836016596052e-07, + "loss": 0.7396, + "step": 29107 + }, + { + "epoch": 0.8921171999509624, + "grad_norm": 1.851486680062614, + "learning_rate": 6.042437350719532e-07, + "loss": 0.7155, + "step": 29108 + }, + { + "epoch": 0.8921478484737035, + "grad_norm": 1.9766042416254221, + "learning_rate": 6.039039610630915e-07, + "loss": 0.6165, + "step": 29109 + }, + { + "epoch": 0.8921784969964448, + "grad_norm": 1.9304471290327827, + "learning_rate": 6.035642796363672e-07, + "loss": 0.7475, + "step": 29110 + }, + { + "epoch": 0.892209145519186, + "grad_norm": 1.6580758275609737, + "learning_rate": 6.032246907951278e-07, + "loss": 0.6484, + "step": 29111 + }, + { + "epoch": 0.8922397940419272, + "grad_norm": 1.764883755250148, + "learning_rate": 6.028851945427195e-07, + "loss": 0.7057, + "step": 29112 + }, + { + "epoch": 0.8922704425646684, + "grad_norm": 0.6628331148230517, + "learning_rate": 6.025457908824895e-07, + "loss": 0.5118, + "step": 29113 + }, + { + "epoch": 0.8923010910874096, + "grad_norm": 0.652731024338594, + "learning_rate": 6.022064798177785e-07, + "loss": 0.5355, + "step": 29114 + }, + { + "epoch": 0.8923317396101508, + "grad_norm": 1.9401681442267058, + "learning_rate": 6.018672613519327e-07, + "loss": 0.608, + "step": 29115 + }, + { + "epoch": 0.892362388132892, + "grad_norm": 1.9841036343704046, + "learning_rate": 6.01528135488294e-07, + "loss": 0.7728, + "step": 29116 + }, + { + "epoch": 0.8923930366556332, + "grad_norm": 1.8140008503893226, + "learning_rate": 6.01189102230203e-07, + "loss": 0.6634, + "step": 29117 + }, + { + "epoch": 0.8924236851783744, + "grad_norm": 1.9753273686538901, + "learning_rate": 6.008501615809981e-07, + "loss": 0.6325, + "step": 29118 + }, + { + "epoch": 0.8924543337011156, + "grad_norm": 1.5205477664094167, + "learning_rate": 6.005113135440243e-07, + "loss": 0.6805, + "step": 29119 + }, + { + "epoch": 0.8924849822238569, + "grad_norm": 1.5759142606240162, + "learning_rate": 6.001725581226159e-07, + "loss": 0.4809, + "step": 29120 + }, + { + "epoch": 0.892515630746598, + "grad_norm": 1.6139837835409616, + "learning_rate": 5.998338953201144e-07, + "loss": 0.6584, + "step": 29121 + }, + { + "epoch": 0.8925462792693393, + "grad_norm": 1.8165028519489073, + "learning_rate": 5.994953251398516e-07, + "loss": 0.6432, + "step": 29122 + }, + { + "epoch": 0.8925769277920804, + "grad_norm": 0.6344196066306915, + "learning_rate": 5.991568475851683e-07, + "loss": 0.4919, + "step": 29123 + }, + { + "epoch": 0.8926075763148216, + "grad_norm": 1.9509014626796117, + "learning_rate": 5.988184626593985e-07, + "loss": 0.549, + "step": 29124 + }, + { + "epoch": 0.8926382248375628, + "grad_norm": 1.8149066967179077, + "learning_rate": 5.98480170365875e-07, + "loss": 0.6004, + "step": 29125 + }, + { + "epoch": 0.892668873360304, + "grad_norm": 1.7564953059281918, + "learning_rate": 5.981419707079306e-07, + "loss": 0.5846, + "step": 29126 + }, + { + "epoch": 0.8926995218830452, + "grad_norm": 1.7417908986506918, + "learning_rate": 5.978038636889017e-07, + "loss": 0.535, + "step": 29127 + }, + { + "epoch": 0.8927301704057864, + "grad_norm": 1.623860602386489, + "learning_rate": 5.974658493121166e-07, + "loss": 0.5039, + "step": 29128 + }, + { + "epoch": 0.8927608189285277, + "grad_norm": 1.6673670878027402, + "learning_rate": 5.971279275809028e-07, + "loss": 0.5585, + "step": 29129 + }, + { + "epoch": 0.8927914674512688, + "grad_norm": 1.8060293147189466, + "learning_rate": 5.967900984985975e-07, + "loss": 0.7896, + "step": 29130 + }, + { + "epoch": 0.8928221159740101, + "grad_norm": 1.747767374984669, + "learning_rate": 5.964523620685225e-07, + "loss": 0.7027, + "step": 29131 + }, + { + "epoch": 0.8928527644967512, + "grad_norm": 1.5858709650043166, + "learning_rate": 5.961147182940108e-07, + "loss": 0.5918, + "step": 29132 + }, + { + "epoch": 0.8928834130194925, + "grad_norm": 1.9391879693339036, + "learning_rate": 5.95777167178384e-07, + "loss": 0.6813, + "step": 29133 + }, + { + "epoch": 0.8929140615422336, + "grad_norm": 1.8511477989460934, + "learning_rate": 5.954397087249719e-07, + "loss": 0.6884, + "step": 29134 + }, + { + "epoch": 0.8929447100649749, + "grad_norm": 1.7229149008931421, + "learning_rate": 5.951023429371006e-07, + "loss": 0.6412, + "step": 29135 + }, + { + "epoch": 0.892975358587716, + "grad_norm": 1.6351144828469844, + "learning_rate": 5.947650698180895e-07, + "loss": 0.5335, + "step": 29136 + }, + { + "epoch": 0.8930060071104573, + "grad_norm": 1.669581974547594, + "learning_rate": 5.944278893712663e-07, + "loss": 0.6158, + "step": 29137 + }, + { + "epoch": 0.8930366556331985, + "grad_norm": 1.5959875419470864, + "learning_rate": 5.940908015999514e-07, + "loss": 0.6608, + "step": 29138 + }, + { + "epoch": 0.8930673041559397, + "grad_norm": 1.7063356545721589, + "learning_rate": 5.937538065074655e-07, + "loss": 0.679, + "step": 29139 + }, + { + "epoch": 0.8930979526786809, + "grad_norm": 1.7817082251418066, + "learning_rate": 5.934169040971305e-07, + "loss": 0.6935, + "step": 29140 + }, + { + "epoch": 0.8931286012014221, + "grad_norm": 1.8113626517522359, + "learning_rate": 5.930800943722669e-07, + "loss": 0.5505, + "step": 29141 + }, + { + "epoch": 0.8931592497241633, + "grad_norm": 1.5531303604144475, + "learning_rate": 5.927433773361901e-07, + "loss": 0.6548, + "step": 29142 + }, + { + "epoch": 0.8931898982469045, + "grad_norm": 0.6777656867817123, + "learning_rate": 5.924067529922218e-07, + "loss": 0.5359, + "step": 29143 + }, + { + "epoch": 0.8932205467696457, + "grad_norm": 1.6582762499059374, + "learning_rate": 5.920702213436746e-07, + "loss": 0.6629, + "step": 29144 + }, + { + "epoch": 0.893251195292387, + "grad_norm": 0.6715729524533642, + "learning_rate": 5.917337823938674e-07, + "loss": 0.5081, + "step": 29145 + }, + { + "epoch": 0.8932818438151281, + "grad_norm": 1.7596474044121413, + "learning_rate": 5.913974361461161e-07, + "loss": 0.6263, + "step": 29146 + }, + { + "epoch": 0.8933124923378694, + "grad_norm": 1.5773050075406996, + "learning_rate": 5.910611826037305e-07, + "loss": 0.5155, + "step": 29147 + }, + { + "epoch": 0.8933431408606105, + "grad_norm": 1.9413448964286595, + "learning_rate": 5.907250217700277e-07, + "loss": 0.7026, + "step": 29148 + }, + { + "epoch": 0.8933737893833518, + "grad_norm": 0.701110797714981, + "learning_rate": 5.903889536483187e-07, + "loss": 0.5249, + "step": 29149 + }, + { + "epoch": 0.8934044379060929, + "grad_norm": 1.7593376704657484, + "learning_rate": 5.900529782419151e-07, + "loss": 0.6746, + "step": 29150 + }, + { + "epoch": 0.8934350864288342, + "grad_norm": 1.7031809887210598, + "learning_rate": 5.897170955541276e-07, + "loss": 0.6805, + "step": 29151 + }, + { + "epoch": 0.8934657349515753, + "grad_norm": 1.5370497699918748, + "learning_rate": 5.893813055882636e-07, + "loss": 0.6066, + "step": 29152 + }, + { + "epoch": 0.8934963834743166, + "grad_norm": 1.86640661513244, + "learning_rate": 5.890456083476348e-07, + "loss": 0.6828, + "step": 29153 + }, + { + "epoch": 0.8935270319970577, + "grad_norm": 1.6629747122698169, + "learning_rate": 5.887100038355475e-07, + "loss": 0.65, + "step": 29154 + }, + { + "epoch": 0.8935576805197989, + "grad_norm": 1.8606463276405598, + "learning_rate": 5.88374492055308e-07, + "loss": 0.6486, + "step": 29155 + }, + { + "epoch": 0.8935883290425402, + "grad_norm": 1.7436147643694924, + "learning_rate": 5.880390730102215e-07, + "loss": 0.6553, + "step": 29156 + }, + { + "epoch": 0.8936189775652813, + "grad_norm": 1.6746854028935234, + "learning_rate": 5.877037467035973e-07, + "loss": 0.5672, + "step": 29157 + }, + { + "epoch": 0.8936496260880226, + "grad_norm": 1.6636097815070228, + "learning_rate": 5.87368513138733e-07, + "loss": 0.632, + "step": 29158 + }, + { + "epoch": 0.8936802746107637, + "grad_norm": 1.6824631259837641, + "learning_rate": 5.87033372318937e-07, + "loss": 0.6216, + "step": 29159 + }, + { + "epoch": 0.893710923133505, + "grad_norm": 0.6580334053100472, + "learning_rate": 5.866983242475099e-07, + "loss": 0.5007, + "step": 29160 + }, + { + "epoch": 0.8937415716562461, + "grad_norm": 1.6923979915708762, + "learning_rate": 5.863633689277515e-07, + "loss": 0.6421, + "step": 29161 + }, + { + "epoch": 0.8937722201789874, + "grad_norm": 1.8183188172835296, + "learning_rate": 5.860285063629645e-07, + "loss": 0.6167, + "step": 29162 + }, + { + "epoch": 0.8938028687017285, + "grad_norm": 1.8033136869463608, + "learning_rate": 5.856937365564463e-07, + "loss": 0.6143, + "step": 29163 + }, + { + "epoch": 0.8938335172244698, + "grad_norm": 0.6880272170931598, + "learning_rate": 5.853590595114966e-07, + "loss": 0.5134, + "step": 29164 + }, + { + "epoch": 0.893864165747211, + "grad_norm": 2.04913176670942, + "learning_rate": 5.850244752314138e-07, + "loss": 0.693, + "step": 29165 + }, + { + "epoch": 0.8938948142699522, + "grad_norm": 1.7686292564717447, + "learning_rate": 5.846899837194919e-07, + "loss": 0.7021, + "step": 29166 + }, + { + "epoch": 0.8939254627926934, + "grad_norm": 1.8371823747089508, + "learning_rate": 5.843555849790295e-07, + "loss": 0.6038, + "step": 29167 + }, + { + "epoch": 0.8939561113154346, + "grad_norm": 1.7038388328917904, + "learning_rate": 5.840212790133226e-07, + "loss": 0.5121, + "step": 29168 + }, + { + "epoch": 0.8939867598381758, + "grad_norm": 0.6651979558882529, + "learning_rate": 5.83687065825661e-07, + "loss": 0.505, + "step": 29169 + }, + { + "epoch": 0.894017408360917, + "grad_norm": 1.6741493626752009, + "learning_rate": 5.833529454193398e-07, + "loss": 0.6132, + "step": 29170 + }, + { + "epoch": 0.8940480568836582, + "grad_norm": 1.7892860213024406, + "learning_rate": 5.83018917797653e-07, + "loss": 0.7132, + "step": 29171 + }, + { + "epoch": 0.8940787054063994, + "grad_norm": 1.774690761859729, + "learning_rate": 5.826849829638892e-07, + "loss": 0.5369, + "step": 29172 + }, + { + "epoch": 0.8941093539291406, + "grad_norm": 1.70538441187049, + "learning_rate": 5.823511409213412e-07, + "loss": 0.6022, + "step": 29173 + }, + { + "epoch": 0.8941400024518819, + "grad_norm": 1.6845152504019791, + "learning_rate": 5.820173916732951e-07, + "loss": 0.7211, + "step": 29174 + }, + { + "epoch": 0.894170650974623, + "grad_norm": 1.6029310262792191, + "learning_rate": 5.816837352230409e-07, + "loss": 0.6638, + "step": 29175 + }, + { + "epoch": 0.8942012994973643, + "grad_norm": 1.685137789503385, + "learning_rate": 5.81350171573869e-07, + "loss": 0.5488, + "step": 29176 + }, + { + "epoch": 0.8942319480201054, + "grad_norm": 1.482516976392023, + "learning_rate": 5.810167007290624e-07, + "loss": 0.5947, + "step": 29177 + }, + { + "epoch": 0.8942625965428467, + "grad_norm": 1.8947254588425362, + "learning_rate": 5.806833226919073e-07, + "loss": 0.6718, + "step": 29178 + }, + { + "epoch": 0.8942932450655878, + "grad_norm": 0.664990267094071, + "learning_rate": 5.803500374656912e-07, + "loss": 0.5115, + "step": 29179 + }, + { + "epoch": 0.8943238935883291, + "grad_norm": 1.7139220325663873, + "learning_rate": 5.800168450536948e-07, + "loss": 0.6316, + "step": 29180 + }, + { + "epoch": 0.8943545421110702, + "grad_norm": 1.757586572280292, + "learning_rate": 5.796837454592031e-07, + "loss": 0.613, + "step": 29181 + }, + { + "epoch": 0.8943851906338115, + "grad_norm": 1.9301061959931192, + "learning_rate": 5.79350738685499e-07, + "loss": 0.6212, + "step": 29182 + }, + { + "epoch": 0.8944158391565527, + "grad_norm": 1.566216906599471, + "learning_rate": 5.790178247358613e-07, + "loss": 0.6646, + "step": 29183 + }, + { + "epoch": 0.8944464876792939, + "grad_norm": 1.8227975040492794, + "learning_rate": 5.786850036135728e-07, + "loss": 0.6982, + "step": 29184 + }, + { + "epoch": 0.8944771362020351, + "grad_norm": 1.561069582723026, + "learning_rate": 5.783522753219084e-07, + "loss": 0.6447, + "step": 29185 + }, + { + "epoch": 0.8945077847247762, + "grad_norm": 1.9088442385020956, + "learning_rate": 5.780196398641524e-07, + "loss": 0.6343, + "step": 29186 + }, + { + "epoch": 0.8945384332475175, + "grad_norm": 1.568332786239051, + "learning_rate": 5.776870972435788e-07, + "loss": 0.6546, + "step": 29187 + }, + { + "epoch": 0.8945690817702586, + "grad_norm": 1.5558819844781722, + "learning_rate": 5.773546474634651e-07, + "loss": 0.5795, + "step": 29188 + }, + { + "epoch": 0.8945997302929999, + "grad_norm": 1.9676481205600422, + "learning_rate": 5.770222905270862e-07, + "loss": 0.618, + "step": 29189 + }, + { + "epoch": 0.894630378815741, + "grad_norm": 1.7395594828374135, + "learning_rate": 5.766900264377196e-07, + "loss": 0.5497, + "step": 29190 + }, + { + "epoch": 0.8946610273384823, + "grad_norm": 1.7674153904429932, + "learning_rate": 5.763578551986348e-07, + "loss": 0.5915, + "step": 29191 + }, + { + "epoch": 0.8946916758612234, + "grad_norm": 1.8492145809517375, + "learning_rate": 5.760257768131083e-07, + "loss": 0.6438, + "step": 29192 + }, + { + "epoch": 0.8947223243839647, + "grad_norm": 1.5934948800748043, + "learning_rate": 5.756937912844108e-07, + "loss": 0.5353, + "step": 29193 + }, + { + "epoch": 0.8947529729067059, + "grad_norm": 1.8108989521183432, + "learning_rate": 5.75361898615815e-07, + "loss": 0.6412, + "step": 29194 + }, + { + "epoch": 0.8947836214294471, + "grad_norm": 1.6995892897738107, + "learning_rate": 5.750300988105895e-07, + "loss": 0.6189, + "step": 29195 + }, + { + "epoch": 0.8948142699521883, + "grad_norm": 1.6622623904948362, + "learning_rate": 5.746983918720028e-07, + "loss": 0.602, + "step": 29196 + }, + { + "epoch": 0.8948449184749295, + "grad_norm": 1.832631116775526, + "learning_rate": 5.743667778033235e-07, + "loss": 0.6381, + "step": 29197 + }, + { + "epoch": 0.8948755669976707, + "grad_norm": 1.5990255516385679, + "learning_rate": 5.740352566078233e-07, + "loss": 0.645, + "step": 29198 + }, + { + "epoch": 0.8949062155204119, + "grad_norm": 1.6750852072572708, + "learning_rate": 5.737038282887619e-07, + "loss": 0.6496, + "step": 29199 + }, + { + "epoch": 0.8949368640431531, + "grad_norm": 1.8845090594414629, + "learning_rate": 5.7337249284941e-07, + "loss": 0.6641, + "step": 29200 + }, + { + "epoch": 0.8949675125658944, + "grad_norm": 1.854938556845497, + "learning_rate": 5.730412502930316e-07, + "loss": 0.6512, + "step": 29201 + }, + { + "epoch": 0.8949981610886355, + "grad_norm": 1.641402380648764, + "learning_rate": 5.727101006228886e-07, + "loss": 0.5662, + "step": 29202 + }, + { + "epoch": 0.8950288096113768, + "grad_norm": 1.9596711165241225, + "learning_rate": 5.723790438422472e-07, + "loss": 0.7528, + "step": 29203 + }, + { + "epoch": 0.8950594581341179, + "grad_norm": 1.4964317478629667, + "learning_rate": 5.720480799543626e-07, + "loss": 0.5126, + "step": 29204 + }, + { + "epoch": 0.8950901066568592, + "grad_norm": 1.8115370566945321, + "learning_rate": 5.717172089625045e-07, + "loss": 0.5882, + "step": 29205 + }, + { + "epoch": 0.8951207551796003, + "grad_norm": 1.747200847261554, + "learning_rate": 5.71386430869929e-07, + "loss": 0.5716, + "step": 29206 + }, + { + "epoch": 0.8951514037023416, + "grad_norm": 1.6940041136517368, + "learning_rate": 5.710557456798938e-07, + "loss": 0.5844, + "step": 29207 + }, + { + "epoch": 0.8951820522250827, + "grad_norm": 1.7941359615538692, + "learning_rate": 5.707251533956592e-07, + "loss": 0.7425, + "step": 29208 + }, + { + "epoch": 0.895212700747824, + "grad_norm": 2.0065350044299888, + "learning_rate": 5.703946540204841e-07, + "loss": 0.6907, + "step": 29209 + }, + { + "epoch": 0.8952433492705651, + "grad_norm": 1.6148700568151402, + "learning_rate": 5.700642475576202e-07, + "loss": 0.6612, + "step": 29210 + }, + { + "epoch": 0.8952739977933064, + "grad_norm": 1.7839228432365095, + "learning_rate": 5.697339340103269e-07, + "loss": 0.5667, + "step": 29211 + }, + { + "epoch": 0.8953046463160476, + "grad_norm": 1.980597534526554, + "learning_rate": 5.694037133818587e-07, + "loss": 0.5396, + "step": 29212 + }, + { + "epoch": 0.8953352948387888, + "grad_norm": 1.6761223831921956, + "learning_rate": 5.690735856754693e-07, + "loss": 0.669, + "step": 29213 + }, + { + "epoch": 0.89536594336153, + "grad_norm": 1.7819763896115726, + "learning_rate": 5.687435508944105e-07, + "loss": 0.5875, + "step": 29214 + }, + { + "epoch": 0.8953965918842712, + "grad_norm": 1.4962133722605002, + "learning_rate": 5.684136090419323e-07, + "loss": 0.654, + "step": 29215 + }, + { + "epoch": 0.8954272404070124, + "grad_norm": 0.7027777923469052, + "learning_rate": 5.680837601212907e-07, + "loss": 0.5139, + "step": 29216 + }, + { + "epoch": 0.8954578889297535, + "grad_norm": 1.6757316613580464, + "learning_rate": 5.677540041357332e-07, + "loss": 0.7085, + "step": 29217 + }, + { + "epoch": 0.8954885374524948, + "grad_norm": 1.601003331444184, + "learning_rate": 5.674243410885072e-07, + "loss": 0.5806, + "step": 29218 + }, + { + "epoch": 0.8955191859752359, + "grad_norm": 1.8166088946498882, + "learning_rate": 5.670947709828622e-07, + "loss": 0.6661, + "step": 29219 + }, + { + "epoch": 0.8955498344979772, + "grad_norm": 1.9354557337767972, + "learning_rate": 5.66765293822047e-07, + "loss": 0.6825, + "step": 29220 + }, + { + "epoch": 0.8955804830207184, + "grad_norm": 1.6283971921192109, + "learning_rate": 5.664359096093075e-07, + "loss": 0.6157, + "step": 29221 + }, + { + "epoch": 0.8956111315434596, + "grad_norm": 1.69791709329541, + "learning_rate": 5.66106618347887e-07, + "loss": 0.5878, + "step": 29222 + }, + { + "epoch": 0.8956417800662008, + "grad_norm": 1.9060187812856577, + "learning_rate": 5.657774200410326e-07, + "loss": 0.7293, + "step": 29223 + }, + { + "epoch": 0.895672428588942, + "grad_norm": 0.6842595523878423, + "learning_rate": 5.654483146919887e-07, + "loss": 0.5378, + "step": 29224 + }, + { + "epoch": 0.8957030771116832, + "grad_norm": 1.6937197495971978, + "learning_rate": 5.651193023039958e-07, + "loss": 0.7219, + "step": 29225 + }, + { + "epoch": 0.8957337256344244, + "grad_norm": 0.6968631829590033, + "learning_rate": 5.647903828802936e-07, + "loss": 0.5076, + "step": 29226 + }, + { + "epoch": 0.8957643741571656, + "grad_norm": 0.6909035962927554, + "learning_rate": 5.644615564241285e-07, + "loss": 0.5353, + "step": 29227 + }, + { + "epoch": 0.8957950226799068, + "grad_norm": 1.7502220764087462, + "learning_rate": 5.641328229387389e-07, + "loss": 0.5346, + "step": 29228 + }, + { + "epoch": 0.895825671202648, + "grad_norm": 1.746143471019387, + "learning_rate": 5.638041824273599e-07, + "loss": 0.6407, + "step": 29229 + }, + { + "epoch": 0.8958563197253893, + "grad_norm": 1.589410630710236, + "learning_rate": 5.634756348932335e-07, + "loss": 0.6007, + "step": 29230 + }, + { + "epoch": 0.8958869682481304, + "grad_norm": 1.953114457948361, + "learning_rate": 5.631471803395971e-07, + "loss": 0.585, + "step": 29231 + }, + { + "epoch": 0.8959176167708717, + "grad_norm": 1.4855812364275818, + "learning_rate": 5.628188187696859e-07, + "loss": 0.6172, + "step": 29232 + }, + { + "epoch": 0.8959482652936128, + "grad_norm": 1.5949330681481673, + "learning_rate": 5.62490550186735e-07, + "loss": 0.6325, + "step": 29233 + }, + { + "epoch": 0.8959789138163541, + "grad_norm": 1.7420088367658126, + "learning_rate": 5.621623745939786e-07, + "loss": 0.6267, + "step": 29234 + }, + { + "epoch": 0.8960095623390952, + "grad_norm": 1.5768598238453824, + "learning_rate": 5.618342919946528e-07, + "loss": 0.6348, + "step": 29235 + }, + { + "epoch": 0.8960402108618365, + "grad_norm": 0.6569596845858119, + "learning_rate": 5.615063023919897e-07, + "loss": 0.5147, + "step": 29236 + }, + { + "epoch": 0.8960708593845776, + "grad_norm": 1.6745964146292815, + "learning_rate": 5.611784057892156e-07, + "loss": 0.5678, + "step": 29237 + }, + { + "epoch": 0.8961015079073189, + "grad_norm": 1.5935917441004641, + "learning_rate": 5.608506021895698e-07, + "loss": 0.646, + "step": 29238 + }, + { + "epoch": 0.8961321564300601, + "grad_norm": 0.6903870929979744, + "learning_rate": 5.605228915962757e-07, + "loss": 0.532, + "step": 29239 + }, + { + "epoch": 0.8961628049528013, + "grad_norm": 1.8882451152337998, + "learning_rate": 5.60195274012566e-07, + "loss": 0.6972, + "step": 29240 + }, + { + "epoch": 0.8961934534755425, + "grad_norm": 0.6592879857004049, + "learning_rate": 5.598677494416672e-07, + "loss": 0.5301, + "step": 29241 + }, + { + "epoch": 0.8962241019982837, + "grad_norm": 1.727491520849677, + "learning_rate": 5.595403178868064e-07, + "loss": 0.6749, + "step": 29242 + }, + { + "epoch": 0.8962547505210249, + "grad_norm": 1.6689472353231471, + "learning_rate": 5.592129793512114e-07, + "loss": 0.586, + "step": 29243 + }, + { + "epoch": 0.8962853990437661, + "grad_norm": 1.6979279056015688, + "learning_rate": 5.588857338381049e-07, + "loss": 0.5969, + "step": 29244 + }, + { + "epoch": 0.8963160475665073, + "grad_norm": 1.820634138839332, + "learning_rate": 5.585585813507133e-07, + "loss": 0.6664, + "step": 29245 + }, + { + "epoch": 0.8963466960892486, + "grad_norm": 0.6780630419420532, + "learning_rate": 5.582315218922607e-07, + "loss": 0.4945, + "step": 29246 + }, + { + "epoch": 0.8963773446119897, + "grad_norm": 1.7149976724025284, + "learning_rate": 5.579045554659679e-07, + "loss": 0.6293, + "step": 29247 + }, + { + "epoch": 0.8964079931347309, + "grad_norm": 1.6366226782465094, + "learning_rate": 5.575776820750589e-07, + "loss": 0.6202, + "step": 29248 + }, + { + "epoch": 0.8964386416574721, + "grad_norm": 1.956591703067072, + "learning_rate": 5.572509017227512e-07, + "loss": 0.7138, + "step": 29249 + }, + { + "epoch": 0.8964692901802133, + "grad_norm": 1.6051273358813554, + "learning_rate": 5.569242144122655e-07, + "loss": 0.5842, + "step": 29250 + }, + { + "epoch": 0.8964999387029545, + "grad_norm": 1.701511107667265, + "learning_rate": 5.565976201468237e-07, + "loss": 0.5131, + "step": 29251 + }, + { + "epoch": 0.8965305872256957, + "grad_norm": 1.9702963931776003, + "learning_rate": 5.56271118929641e-07, + "loss": 0.7433, + "step": 29252 + }, + { + "epoch": 0.8965612357484369, + "grad_norm": 1.8192586096478292, + "learning_rate": 5.559447107639348e-07, + "loss": 0.684, + "step": 29253 + }, + { + "epoch": 0.8965918842711781, + "grad_norm": 1.6957228886932203, + "learning_rate": 5.556183956529226e-07, + "loss": 0.6586, + "step": 29254 + }, + { + "epoch": 0.8966225327939193, + "grad_norm": 1.8214681262186687, + "learning_rate": 5.552921735998196e-07, + "loss": 0.6096, + "step": 29255 + }, + { + "epoch": 0.8966531813166605, + "grad_norm": 1.8711634021853665, + "learning_rate": 5.549660446078364e-07, + "loss": 0.6577, + "step": 29256 + }, + { + "epoch": 0.8966838298394018, + "grad_norm": 1.8601822129594519, + "learning_rate": 5.546400086801917e-07, + "loss": 0.7656, + "step": 29257 + }, + { + "epoch": 0.8967144783621429, + "grad_norm": 1.742007368929908, + "learning_rate": 5.54314065820094e-07, + "loss": 0.6331, + "step": 29258 + }, + { + "epoch": 0.8967451268848842, + "grad_norm": 1.6451777208027278, + "learning_rate": 5.539882160307586e-07, + "loss": 0.5886, + "step": 29259 + }, + { + "epoch": 0.8967757754076253, + "grad_norm": 0.656873933431282, + "learning_rate": 5.536624593153928e-07, + "loss": 0.5146, + "step": 29260 + }, + { + "epoch": 0.8968064239303666, + "grad_norm": 1.7995266507786194, + "learning_rate": 5.533367956772085e-07, + "loss": 0.5516, + "step": 29261 + }, + { + "epoch": 0.8968370724531077, + "grad_norm": 1.953984652698689, + "learning_rate": 5.530112251194142e-07, + "loss": 0.6068, + "step": 29262 + }, + { + "epoch": 0.896867720975849, + "grad_norm": 0.6558567289640249, + "learning_rate": 5.526857476452163e-07, + "loss": 0.5094, + "step": 29263 + }, + { + "epoch": 0.8968983694985901, + "grad_norm": 1.5604167131385769, + "learning_rate": 5.523603632578223e-07, + "loss": 0.6215, + "step": 29264 + }, + { + "epoch": 0.8969290180213314, + "grad_norm": 1.477442407363663, + "learning_rate": 5.520350719604406e-07, + "loss": 0.6202, + "step": 29265 + }, + { + "epoch": 0.8969596665440726, + "grad_norm": 1.8395251993128372, + "learning_rate": 5.517098737562731e-07, + "loss": 0.673, + "step": 29266 + }, + { + "epoch": 0.8969903150668138, + "grad_norm": 1.641759222334589, + "learning_rate": 5.513847686485263e-07, + "loss": 0.6185, + "step": 29267 + }, + { + "epoch": 0.897020963589555, + "grad_norm": 1.6757404858753, + "learning_rate": 5.510597566404042e-07, + "loss": 0.6859, + "step": 29268 + }, + { + "epoch": 0.8970516121122962, + "grad_norm": 1.7400790247203346, + "learning_rate": 5.507348377351063e-07, + "loss": 0.6654, + "step": 29269 + }, + { + "epoch": 0.8970822606350374, + "grad_norm": 1.631695179729245, + "learning_rate": 5.50410011935838e-07, + "loss": 0.5398, + "step": 29270 + }, + { + "epoch": 0.8971129091577786, + "grad_norm": 1.7171941315444743, + "learning_rate": 5.500852792457956e-07, + "loss": 0.6528, + "step": 29271 + }, + { + "epoch": 0.8971435576805198, + "grad_norm": 1.7210725573650258, + "learning_rate": 5.497606396681798e-07, + "loss": 0.6536, + "step": 29272 + }, + { + "epoch": 0.897174206203261, + "grad_norm": 1.8532227901153688, + "learning_rate": 5.494360932061926e-07, + "loss": 0.6683, + "step": 29273 + }, + { + "epoch": 0.8972048547260022, + "grad_norm": 1.83062309167407, + "learning_rate": 5.491116398630292e-07, + "loss": 0.5547, + "step": 29274 + }, + { + "epoch": 0.8972355032487435, + "grad_norm": 0.6709085520074681, + "learning_rate": 5.487872796418859e-07, + "loss": 0.499, + "step": 29275 + }, + { + "epoch": 0.8972661517714846, + "grad_norm": 1.7278513124949415, + "learning_rate": 5.484630125459611e-07, + "loss": 0.6483, + "step": 29276 + }, + { + "epoch": 0.8972968002942259, + "grad_norm": 1.789614127887816, + "learning_rate": 5.48138838578447e-07, + "loss": 0.5771, + "step": 29277 + }, + { + "epoch": 0.897327448816967, + "grad_norm": 1.8686008166061425, + "learning_rate": 5.478147577425397e-07, + "loss": 0.6367, + "step": 29278 + }, + { + "epoch": 0.8973580973397082, + "grad_norm": 1.871576027825447, + "learning_rate": 5.474907700414334e-07, + "loss": 0.4727, + "step": 29279 + }, + { + "epoch": 0.8973887458624494, + "grad_norm": 1.6608447186902873, + "learning_rate": 5.471668754783177e-07, + "loss": 0.5884, + "step": 29280 + }, + { + "epoch": 0.8974193943851906, + "grad_norm": 1.8423407242813132, + "learning_rate": 5.468430740563857e-07, + "loss": 0.7483, + "step": 29281 + }, + { + "epoch": 0.8974500429079318, + "grad_norm": 1.596238510913496, + "learning_rate": 5.465193657788282e-07, + "loss": 0.5669, + "step": 29282 + }, + { + "epoch": 0.897480691430673, + "grad_norm": 1.765410866045787, + "learning_rate": 5.461957506488324e-07, + "loss": 0.6447, + "step": 29283 + }, + { + "epoch": 0.8975113399534143, + "grad_norm": 0.629921809485311, + "learning_rate": 5.458722286695905e-07, + "loss": 0.5116, + "step": 29284 + }, + { + "epoch": 0.8975419884761554, + "grad_norm": 1.6027319188787899, + "learning_rate": 5.455487998442877e-07, + "loss": 0.6191, + "step": 29285 + }, + { + "epoch": 0.8975726369988967, + "grad_norm": 1.7451160310359632, + "learning_rate": 5.452254641761112e-07, + "loss": 0.6437, + "step": 29286 + }, + { + "epoch": 0.8976032855216378, + "grad_norm": 1.511735478596321, + "learning_rate": 5.449022216682487e-07, + "loss": 0.7085, + "step": 29287 + }, + { + "epoch": 0.8976339340443791, + "grad_norm": 1.604295059005099, + "learning_rate": 5.445790723238831e-07, + "loss": 0.6787, + "step": 29288 + }, + { + "epoch": 0.8976645825671202, + "grad_norm": 2.3160872047759256, + "learning_rate": 5.442560161461984e-07, + "loss": 0.6975, + "step": 29289 + }, + { + "epoch": 0.8976952310898615, + "grad_norm": 1.9231739830695438, + "learning_rate": 5.439330531383802e-07, + "loss": 0.5443, + "step": 29290 + }, + { + "epoch": 0.8977258796126026, + "grad_norm": 1.735426070446385, + "learning_rate": 5.436101833036067e-07, + "loss": 0.7051, + "step": 29291 + }, + { + "epoch": 0.8977565281353439, + "grad_norm": 1.6390050313584632, + "learning_rate": 5.432874066450644e-07, + "loss": 0.5903, + "step": 29292 + }, + { + "epoch": 0.897787176658085, + "grad_norm": 1.5042813797283892, + "learning_rate": 5.429647231659285e-07, + "loss": 0.5154, + "step": 29293 + }, + { + "epoch": 0.8978178251808263, + "grad_norm": 1.7086558224958577, + "learning_rate": 5.426421328693821e-07, + "loss": 0.6442, + "step": 29294 + }, + { + "epoch": 0.8978484737035675, + "grad_norm": 1.7005842449074275, + "learning_rate": 5.423196357586024e-07, + "loss": 0.7601, + "step": 29295 + }, + { + "epoch": 0.8978791222263087, + "grad_norm": 1.7980743736966487, + "learning_rate": 5.419972318367672e-07, + "loss": 0.6964, + "step": 29296 + }, + { + "epoch": 0.8979097707490499, + "grad_norm": 1.531597462144348, + "learning_rate": 5.416749211070527e-07, + "loss": 0.6415, + "step": 29297 + }, + { + "epoch": 0.8979404192717911, + "grad_norm": 1.7736810012521653, + "learning_rate": 5.413527035726363e-07, + "loss": 0.6387, + "step": 29298 + }, + { + "epoch": 0.8979710677945323, + "grad_norm": 1.7727026243549708, + "learning_rate": 5.410305792366899e-07, + "loss": 0.6526, + "step": 29299 + }, + { + "epoch": 0.8980017163172735, + "grad_norm": 0.6654461938511603, + "learning_rate": 5.407085481023922e-07, + "loss": 0.5036, + "step": 29300 + }, + { + "epoch": 0.8980323648400147, + "grad_norm": 1.6045383676244795, + "learning_rate": 5.403866101729105e-07, + "loss": 0.5425, + "step": 29301 + }, + { + "epoch": 0.898063013362756, + "grad_norm": 1.7467115310675145, + "learning_rate": 5.400647654514212e-07, + "loss": 0.7226, + "step": 29302 + }, + { + "epoch": 0.8980936618854971, + "grad_norm": 1.838521440832332, + "learning_rate": 5.397430139410953e-07, + "loss": 0.6876, + "step": 29303 + }, + { + "epoch": 0.8981243104082384, + "grad_norm": 0.6493372630749848, + "learning_rate": 5.394213556451e-07, + "loss": 0.4776, + "step": 29304 + }, + { + "epoch": 0.8981549589309795, + "grad_norm": 1.869156080197986, + "learning_rate": 5.390997905666074e-07, + "loss": 0.5912, + "step": 29305 + }, + { + "epoch": 0.8981856074537208, + "grad_norm": 0.6839667291123793, + "learning_rate": 5.387783187087858e-07, + "loss": 0.5009, + "step": 29306 + }, + { + "epoch": 0.8982162559764619, + "grad_norm": 1.6696956598051667, + "learning_rate": 5.384569400748007e-07, + "loss": 0.595, + "step": 29307 + }, + { + "epoch": 0.8982469044992032, + "grad_norm": 1.4746077197277339, + "learning_rate": 5.381356546678207e-07, + "loss": 0.465, + "step": 29308 + }, + { + "epoch": 0.8982775530219443, + "grad_norm": 0.6927181734757517, + "learning_rate": 5.378144624910132e-07, + "loss": 0.5075, + "step": 29309 + }, + { + "epoch": 0.8983082015446855, + "grad_norm": 1.6680986709065782, + "learning_rate": 5.374933635475388e-07, + "loss": 0.6113, + "step": 29310 + }, + { + "epoch": 0.8983388500674268, + "grad_norm": 1.9692875611945901, + "learning_rate": 5.371723578405641e-07, + "loss": 0.639, + "step": 29311 + }, + { + "epoch": 0.8983694985901679, + "grad_norm": 1.699764621738216, + "learning_rate": 5.368514453732487e-07, + "loss": 0.5492, + "step": 29312 + }, + { + "epoch": 0.8984001471129092, + "grad_norm": 1.691185978931394, + "learning_rate": 5.365306261487613e-07, + "loss": 0.6066, + "step": 29313 + }, + { + "epoch": 0.8984307956356503, + "grad_norm": 1.5831170668664714, + "learning_rate": 5.362099001702581e-07, + "loss": 0.6592, + "step": 29314 + }, + { + "epoch": 0.8984614441583916, + "grad_norm": 0.7013515016608945, + "learning_rate": 5.358892674408988e-07, + "loss": 0.505, + "step": 29315 + }, + { + "epoch": 0.8984920926811327, + "grad_norm": 1.7967328200590267, + "learning_rate": 5.355687279638433e-07, + "loss": 0.6821, + "step": 29316 + }, + { + "epoch": 0.898522741203874, + "grad_norm": 1.7191604151928492, + "learning_rate": 5.352482817422533e-07, + "loss": 0.6569, + "step": 29317 + }, + { + "epoch": 0.8985533897266151, + "grad_norm": 1.539623949940457, + "learning_rate": 5.349279287792819e-07, + "loss": 0.5943, + "step": 29318 + }, + { + "epoch": 0.8985840382493564, + "grad_norm": 1.6051090477645786, + "learning_rate": 5.346076690780866e-07, + "loss": 0.6154, + "step": 29319 + }, + { + "epoch": 0.8986146867720975, + "grad_norm": 1.8779639459333024, + "learning_rate": 5.342875026418248e-07, + "loss": 0.5927, + "step": 29320 + }, + { + "epoch": 0.8986453352948388, + "grad_norm": 1.5844436209542263, + "learning_rate": 5.339674294736508e-07, + "loss": 0.64, + "step": 29321 + }, + { + "epoch": 0.89867598381758, + "grad_norm": 1.7889940691029165, + "learning_rate": 5.336474495767185e-07, + "loss": 0.6518, + "step": 29322 + }, + { + "epoch": 0.8987066323403212, + "grad_norm": 2.123663140358819, + "learning_rate": 5.333275629541768e-07, + "loss": 0.6409, + "step": 29323 + }, + { + "epoch": 0.8987372808630624, + "grad_norm": 1.7956074172376093, + "learning_rate": 5.330077696091829e-07, + "loss": 0.6932, + "step": 29324 + }, + { + "epoch": 0.8987679293858036, + "grad_norm": 0.6514704929583245, + "learning_rate": 5.326880695448866e-07, + "loss": 0.4882, + "step": 29325 + }, + { + "epoch": 0.8987985779085448, + "grad_norm": 1.6634050299112373, + "learning_rate": 5.323684627644354e-07, + "loss": 0.6158, + "step": 29326 + }, + { + "epoch": 0.898829226431286, + "grad_norm": 1.7252211222603395, + "learning_rate": 5.320489492709802e-07, + "loss": 0.7213, + "step": 29327 + }, + { + "epoch": 0.8988598749540272, + "grad_norm": 1.8996607481870866, + "learning_rate": 5.317295290676705e-07, + "loss": 0.6175, + "step": 29328 + }, + { + "epoch": 0.8988905234767685, + "grad_norm": 2.033086183704497, + "learning_rate": 5.314102021576506e-07, + "loss": 0.6441, + "step": 29329 + }, + { + "epoch": 0.8989211719995096, + "grad_norm": 1.5208833954764995, + "learning_rate": 5.310909685440691e-07, + "loss": 0.675, + "step": 29330 + }, + { + "epoch": 0.8989518205222509, + "grad_norm": 1.6793731023431584, + "learning_rate": 5.3077182823007e-07, + "loss": 0.6442, + "step": 29331 + }, + { + "epoch": 0.898982469044992, + "grad_norm": 0.6950737701768612, + "learning_rate": 5.30452781218801e-07, + "loss": 0.5261, + "step": 29332 + }, + { + "epoch": 0.8990131175677333, + "grad_norm": 1.9651930527046007, + "learning_rate": 5.301338275134038e-07, + "loss": 0.5957, + "step": 29333 + }, + { + "epoch": 0.8990437660904744, + "grad_norm": 0.624716984050067, + "learning_rate": 5.298149671170183e-07, + "loss": 0.4921, + "step": 29334 + }, + { + "epoch": 0.8990744146132157, + "grad_norm": 1.7321119844165855, + "learning_rate": 5.294962000327919e-07, + "loss": 0.5331, + "step": 29335 + }, + { + "epoch": 0.8991050631359568, + "grad_norm": 1.7052018570025527, + "learning_rate": 5.291775262638621e-07, + "loss": 0.6192, + "step": 29336 + }, + { + "epoch": 0.8991357116586981, + "grad_norm": 1.6391963093666582, + "learning_rate": 5.288589458133675e-07, + "loss": 0.5853, + "step": 29337 + }, + { + "epoch": 0.8991663601814393, + "grad_norm": 1.6349061077220932, + "learning_rate": 5.285404586844501e-07, + "loss": 0.6264, + "step": 29338 + }, + { + "epoch": 0.8991970087041805, + "grad_norm": 1.8463739193051578, + "learning_rate": 5.28222064880246e-07, + "loss": 0.5971, + "step": 29339 + }, + { + "epoch": 0.8992276572269217, + "grad_norm": 1.651812402002969, + "learning_rate": 5.279037644038953e-07, + "loss": 0.6081, + "step": 29340 + }, + { + "epoch": 0.8992583057496628, + "grad_norm": 1.5833775763649234, + "learning_rate": 5.275855572585309e-07, + "loss": 0.6478, + "step": 29341 + }, + { + "epoch": 0.8992889542724041, + "grad_norm": 1.6613835700291266, + "learning_rate": 5.272674434472891e-07, + "loss": 0.6799, + "step": 29342 + }, + { + "epoch": 0.8993196027951452, + "grad_norm": 1.8648433764413028, + "learning_rate": 5.269494229733075e-07, + "loss": 0.646, + "step": 29343 + }, + { + "epoch": 0.8993502513178865, + "grad_norm": 1.5648995497072316, + "learning_rate": 5.266314958397156e-07, + "loss": 0.5898, + "step": 29344 + }, + { + "epoch": 0.8993808998406276, + "grad_norm": 0.670874434614837, + "learning_rate": 5.263136620496468e-07, + "loss": 0.5186, + "step": 29345 + }, + { + "epoch": 0.8994115483633689, + "grad_norm": 1.8143207257364171, + "learning_rate": 5.259959216062338e-07, + "loss": 0.6867, + "step": 29346 + }, + { + "epoch": 0.89944219688611, + "grad_norm": 0.6506889379113756, + "learning_rate": 5.256782745126065e-07, + "loss": 0.4939, + "step": 29347 + }, + { + "epoch": 0.8994728454088513, + "grad_norm": 1.6723151948987347, + "learning_rate": 5.253607207718958e-07, + "loss": 0.637, + "step": 29348 + }, + { + "epoch": 0.8995034939315925, + "grad_norm": 1.6028830141662362, + "learning_rate": 5.250432603872302e-07, + "loss": 0.6458, + "step": 29349 + }, + { + "epoch": 0.8995341424543337, + "grad_norm": 0.6677182840527106, + "learning_rate": 5.247258933617372e-07, + "loss": 0.5397, + "step": 29350 + }, + { + "epoch": 0.8995647909770749, + "grad_norm": 1.6731884580790055, + "learning_rate": 5.244086196985454e-07, + "loss": 0.5529, + "step": 29351 + }, + { + "epoch": 0.8995954394998161, + "grad_norm": 2.0519503992845802, + "learning_rate": 5.240914394007802e-07, + "loss": 0.6161, + "step": 29352 + }, + { + "epoch": 0.8996260880225573, + "grad_norm": 1.6262566182029679, + "learning_rate": 5.237743524715632e-07, + "loss": 0.5888, + "step": 29353 + }, + { + "epoch": 0.8996567365452985, + "grad_norm": 1.7912216776341552, + "learning_rate": 5.234573589140257e-07, + "loss": 0.6217, + "step": 29354 + }, + { + "epoch": 0.8996873850680397, + "grad_norm": 0.6607846468320583, + "learning_rate": 5.231404587312872e-07, + "loss": 0.4922, + "step": 29355 + }, + { + "epoch": 0.899718033590781, + "grad_norm": 1.8411785840190562, + "learning_rate": 5.228236519264685e-07, + "loss": 0.6092, + "step": 29356 + }, + { + "epoch": 0.8997486821135221, + "grad_norm": 0.6514009709208028, + "learning_rate": 5.225069385026938e-07, + "loss": 0.5178, + "step": 29357 + }, + { + "epoch": 0.8997793306362634, + "grad_norm": 0.6760711320304574, + "learning_rate": 5.221903184630827e-07, + "loss": 0.5335, + "step": 29358 + }, + { + "epoch": 0.8998099791590045, + "grad_norm": 1.6687929270120936, + "learning_rate": 5.218737918107575e-07, + "loss": 0.6543, + "step": 29359 + }, + { + "epoch": 0.8998406276817458, + "grad_norm": 1.631084505638504, + "learning_rate": 5.215573585488331e-07, + "loss": 0.6237, + "step": 29360 + }, + { + "epoch": 0.8998712762044869, + "grad_norm": 1.706720636168603, + "learning_rate": 5.212410186804295e-07, + "loss": 0.7444, + "step": 29361 + }, + { + "epoch": 0.8999019247272282, + "grad_norm": 1.8014226084815281, + "learning_rate": 5.209247722086652e-07, + "loss": 0.7117, + "step": 29362 + }, + { + "epoch": 0.8999325732499693, + "grad_norm": 1.7664949750985641, + "learning_rate": 5.206086191366533e-07, + "loss": 0.7067, + "step": 29363 + }, + { + "epoch": 0.8999632217727106, + "grad_norm": 1.87788787630712, + "learning_rate": 5.202925594675079e-07, + "loss": 0.6797, + "step": 29364 + }, + { + "epoch": 0.8999938702954517, + "grad_norm": 0.6579075795717428, + "learning_rate": 5.199765932043477e-07, + "loss": 0.5251, + "step": 29365 + }, + { + "epoch": 0.900024518818193, + "grad_norm": 1.7382211256412143, + "learning_rate": 5.196607203502835e-07, + "loss": 0.6404, + "step": 29366 + }, + { + "epoch": 0.9000551673409342, + "grad_norm": 1.6657973303689626, + "learning_rate": 5.193449409084283e-07, + "loss": 0.5092, + "step": 29367 + }, + { + "epoch": 0.9000858158636754, + "grad_norm": 1.8266908030530378, + "learning_rate": 5.19029254881892e-07, + "loss": 0.6541, + "step": 29368 + }, + { + "epoch": 0.9001164643864166, + "grad_norm": 1.8426618437735371, + "learning_rate": 5.187136622737865e-07, + "loss": 0.6344, + "step": 29369 + }, + { + "epoch": 0.9001471129091578, + "grad_norm": 1.809122828114172, + "learning_rate": 5.183981630872215e-07, + "loss": 0.6674, + "step": 29370 + }, + { + "epoch": 0.900177761431899, + "grad_norm": 1.5270854019384765, + "learning_rate": 5.180827573253055e-07, + "loss": 0.492, + "step": 29371 + }, + { + "epoch": 0.9002084099546401, + "grad_norm": 1.7380343283302713, + "learning_rate": 5.177674449911451e-07, + "loss": 0.5841, + "step": 29372 + }, + { + "epoch": 0.9002390584773814, + "grad_norm": 1.8676325140130587, + "learning_rate": 5.174522260878501e-07, + "loss": 0.6748, + "step": 29373 + }, + { + "epoch": 0.9002697070001225, + "grad_norm": 1.8996089859722451, + "learning_rate": 5.171371006185222e-07, + "loss": 0.633, + "step": 29374 + }, + { + "epoch": 0.9003003555228638, + "grad_norm": 1.616426279545996, + "learning_rate": 5.168220685862701e-07, + "loss": 0.748, + "step": 29375 + }, + { + "epoch": 0.900331004045605, + "grad_norm": 1.739006650061765, + "learning_rate": 5.165071299941971e-07, + "loss": 0.6345, + "step": 29376 + }, + { + "epoch": 0.9003616525683462, + "grad_norm": 1.9337441389408403, + "learning_rate": 5.161922848454048e-07, + "loss": 0.6472, + "step": 29377 + }, + { + "epoch": 0.9003923010910874, + "grad_norm": 1.4543694534708502, + "learning_rate": 5.158775331429977e-07, + "loss": 0.5165, + "step": 29378 + }, + { + "epoch": 0.9004229496138286, + "grad_norm": 1.5794490382309196, + "learning_rate": 5.155628748900743e-07, + "loss": 0.6491, + "step": 29379 + }, + { + "epoch": 0.9004535981365698, + "grad_norm": 0.6809950013606083, + "learning_rate": 5.152483100897365e-07, + "loss": 0.543, + "step": 29380 + }, + { + "epoch": 0.900484246659311, + "grad_norm": 1.838954689956282, + "learning_rate": 5.149338387450853e-07, + "loss": 0.6402, + "step": 29381 + }, + { + "epoch": 0.9005148951820522, + "grad_norm": 1.5530872251119892, + "learning_rate": 5.14619460859217e-07, + "loss": 0.5731, + "step": 29382 + }, + { + "epoch": 0.9005455437047934, + "grad_norm": 1.5590599853389024, + "learning_rate": 5.143051764352292e-07, + "loss": 0.5953, + "step": 29383 + }, + { + "epoch": 0.9005761922275346, + "grad_norm": 1.5741646985699507, + "learning_rate": 5.139909854762215e-07, + "loss": 0.6442, + "step": 29384 + }, + { + "epoch": 0.9006068407502759, + "grad_norm": 1.9039047846489574, + "learning_rate": 5.13676887985286e-07, + "loss": 0.7018, + "step": 29385 + }, + { + "epoch": 0.900637489273017, + "grad_norm": 1.9072024589900758, + "learning_rate": 5.133628839655202e-07, + "loss": 0.7023, + "step": 29386 + }, + { + "epoch": 0.9006681377957583, + "grad_norm": 1.8280831037426168, + "learning_rate": 5.130489734200183e-07, + "loss": 0.6162, + "step": 29387 + }, + { + "epoch": 0.9006987863184994, + "grad_norm": 1.592674370997663, + "learning_rate": 5.127351563518701e-07, + "loss": 0.5428, + "step": 29388 + }, + { + "epoch": 0.9007294348412407, + "grad_norm": 1.7239695528234145, + "learning_rate": 5.124214327641719e-07, + "loss": 0.554, + "step": 29389 + }, + { + "epoch": 0.9007600833639818, + "grad_norm": 1.8990609866497545, + "learning_rate": 5.121078026600102e-07, + "loss": 0.6858, + "step": 29390 + }, + { + "epoch": 0.9007907318867231, + "grad_norm": 1.7412765001147639, + "learning_rate": 5.117942660424791e-07, + "loss": 0.6364, + "step": 29391 + }, + { + "epoch": 0.9008213804094642, + "grad_norm": 1.6018152563788228, + "learning_rate": 5.114808229146684e-07, + "loss": 0.5433, + "step": 29392 + }, + { + "epoch": 0.9008520289322055, + "grad_norm": 1.7615079919177303, + "learning_rate": 5.111674732796624e-07, + "loss": 0.6598, + "step": 29393 + }, + { + "epoch": 0.9008826774549467, + "grad_norm": 1.6720925171975984, + "learning_rate": 5.108542171405518e-07, + "loss": 0.6465, + "step": 29394 + }, + { + "epoch": 0.9009133259776879, + "grad_norm": 1.676551633962039, + "learning_rate": 5.105410545004241e-07, + "loss": 0.4648, + "step": 29395 + }, + { + "epoch": 0.9009439745004291, + "grad_norm": 1.8254744051240148, + "learning_rate": 5.102279853623615e-07, + "loss": 0.5969, + "step": 29396 + }, + { + "epoch": 0.9009746230231703, + "grad_norm": 1.6352254205087042, + "learning_rate": 5.099150097294525e-07, + "loss": 0.6342, + "step": 29397 + }, + { + "epoch": 0.9010052715459115, + "grad_norm": 1.5780610033319777, + "learning_rate": 5.096021276047769e-07, + "loss": 0.6823, + "step": 29398 + }, + { + "epoch": 0.9010359200686527, + "grad_norm": 1.6032382513566283, + "learning_rate": 5.0928933899142e-07, + "loss": 0.6546, + "step": 29399 + }, + { + "epoch": 0.9010665685913939, + "grad_norm": 0.6495466532337351, + "learning_rate": 5.089766438924648e-07, + "loss": 0.5225, + "step": 29400 + }, + { + "epoch": 0.9010972171141352, + "grad_norm": 1.7189694705372083, + "learning_rate": 5.086640423109901e-07, + "loss": 0.633, + "step": 29401 + }, + { + "epoch": 0.9011278656368763, + "grad_norm": 1.6332563583635555, + "learning_rate": 5.083515342500778e-07, + "loss": 0.6205, + "step": 29402 + }, + { + "epoch": 0.9011585141596175, + "grad_norm": 0.6992936161068098, + "learning_rate": 5.080391197128065e-07, + "loss": 0.496, + "step": 29403 + }, + { + "epoch": 0.9011891626823587, + "grad_norm": 1.7398462158485626, + "learning_rate": 5.077267987022539e-07, + "loss": 0.6095, + "step": 29404 + }, + { + "epoch": 0.9012198112050999, + "grad_norm": 1.6446648660983167, + "learning_rate": 5.074145712214972e-07, + "loss": 0.6267, + "step": 29405 + }, + { + "epoch": 0.9012504597278411, + "grad_norm": 1.5671725876703986, + "learning_rate": 5.071024372736144e-07, + "loss": 0.5913, + "step": 29406 + }, + { + "epoch": 0.9012811082505823, + "grad_norm": 1.631305074444597, + "learning_rate": 5.067903968616794e-07, + "loss": 0.6158, + "step": 29407 + }, + { + "epoch": 0.9013117567733235, + "grad_norm": 0.6738022021897732, + "learning_rate": 5.064784499887698e-07, + "loss": 0.5135, + "step": 29408 + }, + { + "epoch": 0.9013424052960647, + "grad_norm": 1.6490691494467222, + "learning_rate": 5.061665966579543e-07, + "loss": 0.6201, + "step": 29409 + }, + { + "epoch": 0.901373053818806, + "grad_norm": 1.5709501263912837, + "learning_rate": 5.058548368723093e-07, + "loss": 0.5682, + "step": 29410 + }, + { + "epoch": 0.9014037023415471, + "grad_norm": 0.6982588410765479, + "learning_rate": 5.055431706349068e-07, + "loss": 0.5198, + "step": 29411 + }, + { + "epoch": 0.9014343508642884, + "grad_norm": 1.6580424628410388, + "learning_rate": 5.052315979488154e-07, + "loss": 0.5448, + "step": 29412 + }, + { + "epoch": 0.9014649993870295, + "grad_norm": 1.6829537607093874, + "learning_rate": 5.049201188171061e-07, + "loss": 0.6296, + "step": 29413 + }, + { + "epoch": 0.9014956479097708, + "grad_norm": 1.7569187105601662, + "learning_rate": 5.046087332428496e-07, + "loss": 0.6566, + "step": 29414 + }, + { + "epoch": 0.9015262964325119, + "grad_norm": 1.718091137224346, + "learning_rate": 5.042974412291124e-07, + "loss": 0.7727, + "step": 29415 + }, + { + "epoch": 0.9015569449552532, + "grad_norm": 1.6505383959891409, + "learning_rate": 5.039862427789611e-07, + "loss": 0.5323, + "step": 29416 + }, + { + "epoch": 0.9015875934779943, + "grad_norm": 1.988152618433005, + "learning_rate": 5.036751378954652e-07, + "loss": 0.6086, + "step": 29417 + }, + { + "epoch": 0.9016182420007356, + "grad_norm": 1.756843301598485, + "learning_rate": 5.033641265816858e-07, + "loss": 0.6986, + "step": 29418 + }, + { + "epoch": 0.9016488905234767, + "grad_norm": 1.8500614311251466, + "learning_rate": 5.030532088406914e-07, + "loss": 0.5975, + "step": 29419 + }, + { + "epoch": 0.901679539046218, + "grad_norm": 1.9453120416211747, + "learning_rate": 5.027423846755397e-07, + "loss": 0.6349, + "step": 29420 + }, + { + "epoch": 0.9017101875689592, + "grad_norm": 1.7520541478324803, + "learning_rate": 5.024316540893015e-07, + "loss": 0.6759, + "step": 29421 + }, + { + "epoch": 0.9017408360917004, + "grad_norm": 1.7580992951713639, + "learning_rate": 5.021210170850332e-07, + "loss": 0.5927, + "step": 29422 + }, + { + "epoch": 0.9017714846144416, + "grad_norm": 0.659967352867608, + "learning_rate": 5.018104736657958e-07, + "loss": 0.4799, + "step": 29423 + }, + { + "epoch": 0.9018021331371828, + "grad_norm": 1.7156523337893828, + "learning_rate": 5.015000238346501e-07, + "loss": 0.6731, + "step": 29424 + }, + { + "epoch": 0.901832781659924, + "grad_norm": 1.5903578421900393, + "learning_rate": 5.011896675946559e-07, + "loss": 0.6706, + "step": 29425 + }, + { + "epoch": 0.9018634301826652, + "grad_norm": 1.7571066429102207, + "learning_rate": 5.008794049488697e-07, + "loss": 0.6174, + "step": 29426 + }, + { + "epoch": 0.9018940787054064, + "grad_norm": 1.7823234651628694, + "learning_rate": 5.005692359003489e-07, + "loss": 0.64, + "step": 29427 + }, + { + "epoch": 0.9019247272281476, + "grad_norm": 0.6672173295447257, + "learning_rate": 5.002591604521489e-07, + "loss": 0.5283, + "step": 29428 + }, + { + "epoch": 0.9019553757508888, + "grad_norm": 1.7236329426401573, + "learning_rate": 4.999491786073285e-07, + "loss": 0.6658, + "step": 29429 + }, + { + "epoch": 0.9019860242736301, + "grad_norm": 1.7317839027712523, + "learning_rate": 4.996392903689396e-07, + "loss": 0.5978, + "step": 29430 + }, + { + "epoch": 0.9020166727963712, + "grad_norm": 1.7606339004756613, + "learning_rate": 4.993294957400319e-07, + "loss": 0.6857, + "step": 29431 + }, + { + "epoch": 0.9020473213191125, + "grad_norm": 1.675405485305714, + "learning_rate": 4.990197947236653e-07, + "loss": 0.6392, + "step": 29432 + }, + { + "epoch": 0.9020779698418536, + "grad_norm": 0.6646311625628637, + "learning_rate": 4.987101873228873e-07, + "loss": 0.5199, + "step": 29433 + }, + { + "epoch": 0.9021086183645948, + "grad_norm": 1.7371145191369517, + "learning_rate": 4.984006735407465e-07, + "loss": 0.6693, + "step": 29434 + }, + { + "epoch": 0.902139266887336, + "grad_norm": 1.9085969387709454, + "learning_rate": 4.980912533802962e-07, + "loss": 0.5887, + "step": 29435 + }, + { + "epoch": 0.9021699154100772, + "grad_norm": 1.5417918567606785, + "learning_rate": 4.977819268445849e-07, + "loss": 0.5516, + "step": 29436 + }, + { + "epoch": 0.9022005639328184, + "grad_norm": 1.694517944917628, + "learning_rate": 4.974726939366581e-07, + "loss": 0.6678, + "step": 29437 + }, + { + "epoch": 0.9022312124555596, + "grad_norm": 1.668261279332021, + "learning_rate": 4.971635546595632e-07, + "loss": 0.6137, + "step": 29438 + }, + { + "epoch": 0.9022618609783009, + "grad_norm": 1.7150616538473145, + "learning_rate": 4.96854509016349e-07, + "loss": 0.5989, + "step": 29439 + }, + { + "epoch": 0.902292509501042, + "grad_norm": 1.6191610879409744, + "learning_rate": 4.965455570100585e-07, + "loss": 0.5667, + "step": 29440 + }, + { + "epoch": 0.9023231580237833, + "grad_norm": 1.7198621405234742, + "learning_rate": 4.962366986437372e-07, + "loss": 0.5429, + "step": 29441 + }, + { + "epoch": 0.9023538065465244, + "grad_norm": 0.6734734632218485, + "learning_rate": 4.959279339204259e-07, + "loss": 0.5219, + "step": 29442 + }, + { + "epoch": 0.9023844550692657, + "grad_norm": 1.684519292148657, + "learning_rate": 4.956192628431688e-07, + "loss": 0.5973, + "step": 29443 + }, + { + "epoch": 0.9024151035920068, + "grad_norm": 1.6795473863462516, + "learning_rate": 4.953106854150081e-07, + "loss": 0.5747, + "step": 29444 + }, + { + "epoch": 0.9024457521147481, + "grad_norm": 1.9873099697727226, + "learning_rate": 4.950022016389811e-07, + "loss": 0.7452, + "step": 29445 + }, + { + "epoch": 0.9024764006374892, + "grad_norm": 1.5426391036403784, + "learning_rate": 4.946938115181288e-07, + "loss": 0.5147, + "step": 29446 + }, + { + "epoch": 0.9025070491602305, + "grad_norm": 2.0290805831799736, + "learning_rate": 4.943855150554922e-07, + "loss": 0.7077, + "step": 29447 + }, + { + "epoch": 0.9025376976829717, + "grad_norm": 0.7126534619172613, + "learning_rate": 4.940773122541076e-07, + "loss": 0.5476, + "step": 29448 + }, + { + "epoch": 0.9025683462057129, + "grad_norm": 1.5049064321994723, + "learning_rate": 4.937692031170116e-07, + "loss": 0.6186, + "step": 29449 + }, + { + "epoch": 0.9025989947284541, + "grad_norm": 1.7830968628217918, + "learning_rate": 4.934611876472361e-07, + "loss": 0.6549, + "step": 29450 + }, + { + "epoch": 0.9026296432511953, + "grad_norm": 1.5599776877566245, + "learning_rate": 4.931532658478244e-07, + "loss": 0.6207, + "step": 29451 + }, + { + "epoch": 0.9026602917739365, + "grad_norm": 0.6779887350260376, + "learning_rate": 4.92845437721805e-07, + "loss": 0.5285, + "step": 29452 + }, + { + "epoch": 0.9026909402966777, + "grad_norm": 1.790062604598601, + "learning_rate": 4.925377032722112e-07, + "loss": 0.652, + "step": 29453 + }, + { + "epoch": 0.9027215888194189, + "grad_norm": 1.7883630385687734, + "learning_rate": 4.922300625020749e-07, + "loss": 0.5621, + "step": 29454 + }, + { + "epoch": 0.9027522373421601, + "grad_norm": 1.5277394392185295, + "learning_rate": 4.919225154144291e-07, + "loss": 0.5206, + "step": 29455 + }, + { + "epoch": 0.9027828858649013, + "grad_norm": 1.7087943906802043, + "learning_rate": 4.91615062012305e-07, + "loss": 0.6007, + "step": 29456 + }, + { + "epoch": 0.9028135343876426, + "grad_norm": 1.6083303268282965, + "learning_rate": 4.9130770229873e-07, + "loss": 0.6584, + "step": 29457 + }, + { + "epoch": 0.9028441829103837, + "grad_norm": 2.028874298820146, + "learning_rate": 4.910004362767317e-07, + "loss": 0.6611, + "step": 29458 + }, + { + "epoch": 0.902874831433125, + "grad_norm": 1.5231911001410585, + "learning_rate": 4.906932639493411e-07, + "loss": 0.6462, + "step": 29459 + }, + { + "epoch": 0.9029054799558661, + "grad_norm": 1.7424842556446722, + "learning_rate": 4.903861853195824e-07, + "loss": 0.6778, + "step": 29460 + }, + { + "epoch": 0.9029361284786074, + "grad_norm": 1.569435918745588, + "learning_rate": 4.900792003904798e-07, + "loss": 0.6456, + "step": 29461 + }, + { + "epoch": 0.9029667770013485, + "grad_norm": 1.5337085680141913, + "learning_rate": 4.897723091650619e-07, + "loss": 0.6191, + "step": 29462 + }, + { + "epoch": 0.9029974255240898, + "grad_norm": 1.6519363829966849, + "learning_rate": 4.894655116463509e-07, + "loss": 0.5299, + "step": 29463 + }, + { + "epoch": 0.9030280740468309, + "grad_norm": 1.751993133301974, + "learning_rate": 4.891588078373688e-07, + "loss": 0.6528, + "step": 29464 + }, + { + "epoch": 0.9030587225695721, + "grad_norm": 0.6890707654953285, + "learning_rate": 4.888521977411387e-07, + "loss": 0.5404, + "step": 29465 + }, + { + "epoch": 0.9030893710923134, + "grad_norm": 1.6832980640887818, + "learning_rate": 4.885456813606804e-07, + "loss": 0.6567, + "step": 29466 + }, + { + "epoch": 0.9031200196150545, + "grad_norm": 1.7708731471082784, + "learning_rate": 4.882392586990171e-07, + "loss": 0.6226, + "step": 29467 + }, + { + "epoch": 0.9031506681377958, + "grad_norm": 1.6406931273300496, + "learning_rate": 4.879329297591639e-07, + "loss": 0.636, + "step": 29468 + }, + { + "epoch": 0.9031813166605369, + "grad_norm": 0.6667776171677694, + "learning_rate": 4.876266945441422e-07, + "loss": 0.5043, + "step": 29469 + }, + { + "epoch": 0.9032119651832782, + "grad_norm": 1.7944239892946185, + "learning_rate": 4.873205530569703e-07, + "loss": 0.6676, + "step": 29470 + }, + { + "epoch": 0.9032426137060193, + "grad_norm": 1.7095772079565028, + "learning_rate": 4.870145053006614e-07, + "loss": 0.6235, + "step": 29471 + }, + { + "epoch": 0.9032732622287606, + "grad_norm": 1.7121696139584814, + "learning_rate": 4.86708551278231e-07, + "loss": 0.5898, + "step": 29472 + }, + { + "epoch": 0.9033039107515017, + "grad_norm": 1.7351734631347875, + "learning_rate": 4.864026909926978e-07, + "loss": 0.6101, + "step": 29473 + }, + { + "epoch": 0.903334559274243, + "grad_norm": 1.6547038240982699, + "learning_rate": 4.860969244470715e-07, + "loss": 0.6959, + "step": 29474 + }, + { + "epoch": 0.9033652077969841, + "grad_norm": 1.841110880153551, + "learning_rate": 4.857912516443686e-07, + "loss": 0.7441, + "step": 29475 + }, + { + "epoch": 0.9033958563197254, + "grad_norm": 1.80617964092186, + "learning_rate": 4.854856725875967e-07, + "loss": 0.626, + "step": 29476 + }, + { + "epoch": 0.9034265048424666, + "grad_norm": 1.7672080706744713, + "learning_rate": 4.851801872797679e-07, + "loss": 0.5512, + "step": 29477 + }, + { + "epoch": 0.9034571533652078, + "grad_norm": 0.6618314509422313, + "learning_rate": 4.848747957238964e-07, + "loss": 0.504, + "step": 29478 + }, + { + "epoch": 0.903487801887949, + "grad_norm": 1.7659073636020775, + "learning_rate": 4.845694979229853e-07, + "loss": 0.5855, + "step": 29479 + }, + { + "epoch": 0.9035184504106902, + "grad_norm": 1.620383258095045, + "learning_rate": 4.842642938800468e-07, + "loss": 0.6302, + "step": 29480 + }, + { + "epoch": 0.9035490989334314, + "grad_norm": 1.659681286848142, + "learning_rate": 4.839591835980872e-07, + "loss": 0.6778, + "step": 29481 + }, + { + "epoch": 0.9035797474561726, + "grad_norm": 1.967701616551913, + "learning_rate": 4.836541670801131e-07, + "loss": 0.6846, + "step": 29482 + }, + { + "epoch": 0.9036103959789138, + "grad_norm": 1.734470496125627, + "learning_rate": 4.833492443291265e-07, + "loss": 0.6975, + "step": 29483 + }, + { + "epoch": 0.903641044501655, + "grad_norm": 1.6153442157243458, + "learning_rate": 4.830444153481373e-07, + "loss": 0.7231, + "step": 29484 + }, + { + "epoch": 0.9036716930243962, + "grad_norm": 0.6412892435904323, + "learning_rate": 4.827396801401452e-07, + "loss": 0.5113, + "step": 29485 + }, + { + "epoch": 0.9037023415471375, + "grad_norm": 1.8178204150881885, + "learning_rate": 4.824350387081555e-07, + "loss": 0.6558, + "step": 29486 + }, + { + "epoch": 0.9037329900698786, + "grad_norm": 1.7591496087065983, + "learning_rate": 4.821304910551683e-07, + "loss": 0.7715, + "step": 29487 + }, + { + "epoch": 0.9037636385926199, + "grad_norm": 1.685600027526809, + "learning_rate": 4.818260371841832e-07, + "loss": 0.6144, + "step": 29488 + }, + { + "epoch": 0.903794287115361, + "grad_norm": 0.6575799655191579, + "learning_rate": 4.815216770982034e-07, + "loss": 0.5266, + "step": 29489 + }, + { + "epoch": 0.9038249356381023, + "grad_norm": 1.6865181538525407, + "learning_rate": 4.812174108002243e-07, + "loss": 0.5906, + "step": 29490 + }, + { + "epoch": 0.9038555841608434, + "grad_norm": 1.7980428303079097, + "learning_rate": 4.809132382932457e-07, + "loss": 0.6432, + "step": 29491 + }, + { + "epoch": 0.9038862326835847, + "grad_norm": 1.770017385746612, + "learning_rate": 4.806091595802653e-07, + "loss": 0.614, + "step": 29492 + }, + { + "epoch": 0.9039168812063259, + "grad_norm": 1.5659625429930808, + "learning_rate": 4.803051746642784e-07, + "loss": 0.5384, + "step": 29493 + }, + { + "epoch": 0.9039475297290671, + "grad_norm": 1.7343566041441485, + "learning_rate": 4.800012835482804e-07, + "loss": 0.5574, + "step": 29494 + }, + { + "epoch": 0.9039781782518083, + "grad_norm": 1.705270898407469, + "learning_rate": 4.796974862352654e-07, + "loss": 0.6803, + "step": 29495 + }, + { + "epoch": 0.9040088267745494, + "grad_norm": 1.5366587661083775, + "learning_rate": 4.793937827282258e-07, + "loss": 0.6489, + "step": 29496 + }, + { + "epoch": 0.9040394752972907, + "grad_norm": 1.6427953566275748, + "learning_rate": 4.790901730301567e-07, + "loss": 0.6815, + "step": 29497 + }, + { + "epoch": 0.9040701238200318, + "grad_norm": 1.8464389076629484, + "learning_rate": 4.787866571440481e-07, + "loss": 0.7278, + "step": 29498 + }, + { + "epoch": 0.9041007723427731, + "grad_norm": 1.9574835485236954, + "learning_rate": 4.784832350728896e-07, + "loss": 0.6771, + "step": 29499 + }, + { + "epoch": 0.9041314208655142, + "grad_norm": 1.7827671315826894, + "learning_rate": 4.781799068196736e-07, + "loss": 0.6375, + "step": 29500 + }, + { + "epoch": 0.9041620693882555, + "grad_norm": 1.9184732130541298, + "learning_rate": 4.778766723873851e-07, + "loss": 0.6467, + "step": 29501 + }, + { + "epoch": 0.9041927179109966, + "grad_norm": 1.8543362578531881, + "learning_rate": 4.775735317790154e-07, + "loss": 0.6388, + "step": 29502 + }, + { + "epoch": 0.9042233664337379, + "grad_norm": 1.6674385056070322, + "learning_rate": 4.772704849975506e-07, + "loss": 0.6191, + "step": 29503 + }, + { + "epoch": 0.9042540149564791, + "grad_norm": 1.793619355136449, + "learning_rate": 4.769675320459743e-07, + "loss": 0.6743, + "step": 29504 + }, + { + "epoch": 0.9042846634792203, + "grad_norm": 1.8048707012507637, + "learning_rate": 4.766646729272761e-07, + "loss": 0.7123, + "step": 29505 + }, + { + "epoch": 0.9043153120019615, + "grad_norm": 0.6854324435568357, + "learning_rate": 4.763619076444359e-07, + "loss": 0.4882, + "step": 29506 + }, + { + "epoch": 0.9043459605247027, + "grad_norm": 1.767502673296581, + "learning_rate": 4.7605923620043793e-07, + "loss": 0.6116, + "step": 29507 + }, + { + "epoch": 0.9043766090474439, + "grad_norm": 1.6185038994669831, + "learning_rate": 4.757566585982665e-07, + "loss": 0.5912, + "step": 29508 + }, + { + "epoch": 0.9044072575701851, + "grad_norm": 1.6985252128069614, + "learning_rate": 4.754541748409014e-07, + "loss": 0.6037, + "step": 29509 + }, + { + "epoch": 0.9044379060929263, + "grad_norm": 1.808661667455187, + "learning_rate": 4.7515178493132255e-07, + "loss": 0.7206, + "step": 29510 + }, + { + "epoch": 0.9044685546156676, + "grad_norm": 1.8629845852412101, + "learning_rate": 4.748494888725108e-07, + "loss": 0.7304, + "step": 29511 + }, + { + "epoch": 0.9044992031384087, + "grad_norm": 0.673881152124118, + "learning_rate": 4.745472866674439e-07, + "loss": 0.5168, + "step": 29512 + }, + { + "epoch": 0.90452985166115, + "grad_norm": 1.6951992618513707, + "learning_rate": 4.742451783190993e-07, + "loss": 0.5292, + "step": 29513 + }, + { + "epoch": 0.9045605001838911, + "grad_norm": 1.7531614345549809, + "learning_rate": 4.739431638304548e-07, + "loss": 0.6058, + "step": 29514 + }, + { + "epoch": 0.9045911487066324, + "grad_norm": 1.7993232314928667, + "learning_rate": 4.7364124320448567e-07, + "loss": 0.7829, + "step": 29515 + }, + { + "epoch": 0.9046217972293735, + "grad_norm": 1.7083352083528882, + "learning_rate": 4.733394164441674e-07, + "loss": 0.5743, + "step": 29516 + }, + { + "epoch": 0.9046524457521148, + "grad_norm": 1.7491654741626421, + "learning_rate": 4.730376835524719e-07, + "loss": 0.5782, + "step": 29517 + }, + { + "epoch": 0.9046830942748559, + "grad_norm": 1.6702157493419696, + "learning_rate": 4.7273604453237475e-07, + "loss": 0.5766, + "step": 29518 + }, + { + "epoch": 0.9047137427975972, + "grad_norm": 1.592194959551572, + "learning_rate": 4.7243449938684685e-07, + "loss": 0.6188, + "step": 29519 + }, + { + "epoch": 0.9047443913203383, + "grad_norm": 1.7095762702768336, + "learning_rate": 4.721330481188591e-07, + "loss": 0.5788, + "step": 29520 + }, + { + "epoch": 0.9047750398430796, + "grad_norm": 0.670512897611505, + "learning_rate": 4.7183169073138246e-07, + "loss": 0.524, + "step": 29521 + }, + { + "epoch": 0.9048056883658208, + "grad_norm": 0.6832489274093335, + "learning_rate": 4.7153042722738684e-07, + "loss": 0.5297, + "step": 29522 + }, + { + "epoch": 0.904836336888562, + "grad_norm": 1.7285275383124783, + "learning_rate": 4.712292576098387e-07, + "loss": 0.5533, + "step": 29523 + }, + { + "epoch": 0.9048669854113032, + "grad_norm": 0.6699345225617278, + "learning_rate": 4.7092818188170684e-07, + "loss": 0.5484, + "step": 29524 + }, + { + "epoch": 0.9048976339340444, + "grad_norm": 0.6565772346331964, + "learning_rate": 4.706272000459589e-07, + "loss": 0.4972, + "step": 29525 + }, + { + "epoch": 0.9049282824567856, + "grad_norm": 1.926764954436633, + "learning_rate": 4.703263121055579e-07, + "loss": 0.6452, + "step": 29526 + }, + { + "epoch": 0.9049589309795267, + "grad_norm": 1.6063099833488133, + "learning_rate": 4.7002551806347165e-07, + "loss": 0.5661, + "step": 29527 + }, + { + "epoch": 0.904989579502268, + "grad_norm": 1.8871998680711013, + "learning_rate": 4.697248179226599e-07, + "loss": 0.6488, + "step": 29528 + }, + { + "epoch": 0.9050202280250091, + "grad_norm": 0.6631092746237682, + "learning_rate": 4.694242116860903e-07, + "loss": 0.5163, + "step": 29529 + }, + { + "epoch": 0.9050508765477504, + "grad_norm": 1.6711008717376563, + "learning_rate": 4.6912369935672277e-07, + "loss": 0.5135, + "step": 29530 + }, + { + "epoch": 0.9050815250704916, + "grad_norm": 1.670154330669895, + "learning_rate": 4.6882328093751594e-07, + "loss": 0.6462, + "step": 29531 + }, + { + "epoch": 0.9051121735932328, + "grad_norm": 1.7840462305188258, + "learning_rate": 4.68522956431432e-07, + "loss": 0.6297, + "step": 29532 + }, + { + "epoch": 0.905142822115974, + "grad_norm": 0.7223380723207105, + "learning_rate": 4.682227258414318e-07, + "loss": 0.517, + "step": 29533 + }, + { + "epoch": 0.9051734706387152, + "grad_norm": 1.765729410869725, + "learning_rate": 4.679225891704708e-07, + "loss": 0.644, + "step": 29534 + }, + { + "epoch": 0.9052041191614564, + "grad_norm": 1.6593917062856463, + "learning_rate": 4.6762254642150675e-07, + "loss": 0.6685, + "step": 29535 + }, + { + "epoch": 0.9052347676841976, + "grad_norm": 1.9453912760390348, + "learning_rate": 4.673225975974993e-07, + "loss": 0.6393, + "step": 29536 + }, + { + "epoch": 0.9052654162069388, + "grad_norm": 1.7918482822135573, + "learning_rate": 4.6702274270139845e-07, + "loss": 0.6661, + "step": 29537 + }, + { + "epoch": 0.90529606472968, + "grad_norm": 1.5624456289132835, + "learning_rate": 4.6672298173616406e-07, + "loss": 0.627, + "step": 29538 + }, + { + "epoch": 0.9053267132524212, + "grad_norm": 1.7521659146355697, + "learning_rate": 4.664233147047459e-07, + "loss": 0.6054, + "step": 29539 + }, + { + "epoch": 0.9053573617751625, + "grad_norm": 1.674343949272896, + "learning_rate": 4.661237416100972e-07, + "loss": 0.7255, + "step": 29540 + }, + { + "epoch": 0.9053880102979036, + "grad_norm": 1.4867165108605078, + "learning_rate": 4.658242624551734e-07, + "loss": 0.565, + "step": 29541 + }, + { + "epoch": 0.9054186588206449, + "grad_norm": 1.8143113802855952, + "learning_rate": 4.6552487724291996e-07, + "loss": 0.7228, + "step": 29542 + }, + { + "epoch": 0.905449307343386, + "grad_norm": 0.6845358631421891, + "learning_rate": 4.6522558597629e-07, + "loss": 0.4881, + "step": 29543 + }, + { + "epoch": 0.9054799558661273, + "grad_norm": 1.6734447226177487, + "learning_rate": 4.649263886582334e-07, + "loss": 0.6304, + "step": 29544 + }, + { + "epoch": 0.9055106043888684, + "grad_norm": 0.684278031709858, + "learning_rate": 4.6462728529169443e-07, + "loss": 0.5376, + "step": 29545 + }, + { + "epoch": 0.9055412529116097, + "grad_norm": 1.552793940394048, + "learning_rate": 4.6432827587962415e-07, + "loss": 0.5211, + "step": 29546 + }, + { + "epoch": 0.9055719014343508, + "grad_norm": 1.6496392752417004, + "learning_rate": 4.640293604249657e-07, + "loss": 0.6918, + "step": 29547 + }, + { + "epoch": 0.9056025499570921, + "grad_norm": 1.5676831345014328, + "learning_rate": 4.637305389306679e-07, + "loss": 0.6893, + "step": 29548 + }, + { + "epoch": 0.9056331984798333, + "grad_norm": 0.6662196692039074, + "learning_rate": 4.6343181139967273e-07, + "loss": 0.5213, + "step": 29549 + }, + { + "epoch": 0.9056638470025745, + "grad_norm": 1.8740776332630247, + "learning_rate": 4.631331778349224e-07, + "loss": 0.5502, + "step": 29550 + }, + { + "epoch": 0.9056944955253157, + "grad_norm": 1.5675484023009363, + "learning_rate": 4.6283463823936115e-07, + "loss": 0.6945, + "step": 29551 + }, + { + "epoch": 0.9057251440480569, + "grad_norm": 1.641426661900141, + "learning_rate": 4.625361926159322e-07, + "loss": 0.6786, + "step": 29552 + }, + { + "epoch": 0.9057557925707981, + "grad_norm": 0.6653241845397482, + "learning_rate": 4.622378409675732e-07, + "loss": 0.5056, + "step": 29553 + }, + { + "epoch": 0.9057864410935393, + "grad_norm": 1.7851444160941923, + "learning_rate": 4.61939583297224e-07, + "loss": 0.5275, + "step": 29554 + }, + { + "epoch": 0.9058170896162805, + "grad_norm": 1.9253442697504737, + "learning_rate": 4.616414196078256e-07, + "loss": 0.5799, + "step": 29555 + }, + { + "epoch": 0.9058477381390218, + "grad_norm": 1.8719210881655552, + "learning_rate": 4.6134334990231566e-07, + "loss": 0.5623, + "step": 29556 + }, + { + "epoch": 0.9058783866617629, + "grad_norm": 1.7536620479604514, + "learning_rate": 4.610453741836307e-07, + "loss": 0.5957, + "step": 29557 + }, + { + "epoch": 0.905909035184504, + "grad_norm": 1.7330399515258652, + "learning_rate": 4.6074749245470285e-07, + "loss": 0.6719, + "step": 29558 + }, + { + "epoch": 0.9059396837072453, + "grad_norm": 2.1891473936309307, + "learning_rate": 4.6044970471847416e-07, + "loss": 0.6548, + "step": 29559 + }, + { + "epoch": 0.9059703322299865, + "grad_norm": 0.6627284490154394, + "learning_rate": 4.6015201097787454e-07, + "loss": 0.5146, + "step": 29560 + }, + { + "epoch": 0.9060009807527277, + "grad_norm": 1.694586983584897, + "learning_rate": 4.598544112358372e-07, + "loss": 0.6084, + "step": 29561 + }, + { + "epoch": 0.9060316292754689, + "grad_norm": 1.76862343823716, + "learning_rate": 4.595569054952953e-07, + "loss": 0.6742, + "step": 29562 + }, + { + "epoch": 0.9060622777982101, + "grad_norm": 1.6381042501287237, + "learning_rate": 4.59259493759181e-07, + "loss": 0.5805, + "step": 29563 + }, + { + "epoch": 0.9060929263209513, + "grad_norm": 1.8401808917384075, + "learning_rate": 4.5896217603042413e-07, + "loss": 0.6762, + "step": 29564 + }, + { + "epoch": 0.9061235748436925, + "grad_norm": 1.5113657200105357, + "learning_rate": 4.586649523119524e-07, + "loss": 0.6037, + "step": 29565 + }, + { + "epoch": 0.9061542233664337, + "grad_norm": 1.7693852919218425, + "learning_rate": 4.5836782260669675e-07, + "loss": 0.6402, + "step": 29566 + }, + { + "epoch": 0.906184871889175, + "grad_norm": 0.6560267947622694, + "learning_rate": 4.58070786917586e-07, + "loss": 0.5054, + "step": 29567 + }, + { + "epoch": 0.9062155204119161, + "grad_norm": 0.6525255949236487, + "learning_rate": 4.577738452475455e-07, + "loss": 0.4854, + "step": 29568 + }, + { + "epoch": 0.9062461689346574, + "grad_norm": 1.886582697808757, + "learning_rate": 4.5747699759949747e-07, + "loss": 0.712, + "step": 29569 + }, + { + "epoch": 0.9062768174573985, + "grad_norm": 1.6006136377364064, + "learning_rate": 4.571802439763728e-07, + "loss": 0.6768, + "step": 29570 + }, + { + "epoch": 0.9063074659801398, + "grad_norm": 1.6865814608839522, + "learning_rate": 4.568835843810926e-07, + "loss": 0.6598, + "step": 29571 + }, + { + "epoch": 0.9063381145028809, + "grad_norm": 1.8580741830801673, + "learning_rate": 4.5658701881657885e-07, + "loss": 0.5733, + "step": 29572 + }, + { + "epoch": 0.9063687630256222, + "grad_norm": 1.8909028847714544, + "learning_rate": 4.562905472857559e-07, + "loss": 0.7223, + "step": 29573 + }, + { + "epoch": 0.9063994115483633, + "grad_norm": 1.7358252095063003, + "learning_rate": 4.5599416979154374e-07, + "loss": 0.5544, + "step": 29574 + }, + { + "epoch": 0.9064300600711046, + "grad_norm": 1.8467928297235747, + "learning_rate": 4.556978863368633e-07, + "loss": 0.592, + "step": 29575 + }, + { + "epoch": 0.9064607085938458, + "grad_norm": 1.7196316192410575, + "learning_rate": 4.554016969246333e-07, + "loss": 0.599, + "step": 29576 + }, + { + "epoch": 0.906491357116587, + "grad_norm": 1.600287196933654, + "learning_rate": 4.551056015577726e-07, + "loss": 0.4897, + "step": 29577 + }, + { + "epoch": 0.9065220056393282, + "grad_norm": 1.823440844136681, + "learning_rate": 4.5480960023919883e-07, + "loss": 0.6574, + "step": 29578 + }, + { + "epoch": 0.9065526541620694, + "grad_norm": 1.7021372238265922, + "learning_rate": 4.5451369297182855e-07, + "loss": 0.6286, + "step": 29579 + }, + { + "epoch": 0.9065833026848106, + "grad_norm": 1.6276151540077402, + "learning_rate": 4.54217879758575e-07, + "loss": 0.6922, + "step": 29580 + }, + { + "epoch": 0.9066139512075518, + "grad_norm": 0.6535826799393624, + "learning_rate": 4.5392216060235804e-07, + "loss": 0.497, + "step": 29581 + }, + { + "epoch": 0.906644599730293, + "grad_norm": 1.5774109322048573, + "learning_rate": 4.5362653550608646e-07, + "loss": 0.5996, + "step": 29582 + }, + { + "epoch": 0.9066752482530342, + "grad_norm": 1.8541443541134135, + "learning_rate": 4.533310044726769e-07, + "loss": 0.6344, + "step": 29583 + }, + { + "epoch": 0.9067058967757754, + "grad_norm": 1.5376973049435039, + "learning_rate": 4.5303556750503794e-07, + "loss": 0.6187, + "step": 29584 + }, + { + "epoch": 0.9067365452985167, + "grad_norm": 1.905217070217248, + "learning_rate": 4.52740224606083e-07, + "loss": 0.7469, + "step": 29585 + }, + { + "epoch": 0.9067671938212578, + "grad_norm": 1.6462775554049756, + "learning_rate": 4.5244497577872195e-07, + "loss": 0.5873, + "step": 29586 + }, + { + "epoch": 0.9067978423439991, + "grad_norm": 2.0273390902320654, + "learning_rate": 4.5214982102586237e-07, + "loss": 0.5989, + "step": 29587 + }, + { + "epoch": 0.9068284908667402, + "grad_norm": 1.6459911236719051, + "learning_rate": 4.518547603504131e-07, + "loss": 0.5499, + "step": 29588 + }, + { + "epoch": 0.9068591393894814, + "grad_norm": 1.8739807349089617, + "learning_rate": 4.51559793755284e-07, + "loss": 0.5928, + "step": 29589 + }, + { + "epoch": 0.9068897879122226, + "grad_norm": 0.6679920121176502, + "learning_rate": 4.512649212433784e-07, + "loss": 0.5005, + "step": 29590 + }, + { + "epoch": 0.9069204364349638, + "grad_norm": 0.6617134492345412, + "learning_rate": 4.5097014281760163e-07, + "loss": 0.5269, + "step": 29591 + }, + { + "epoch": 0.906951084957705, + "grad_norm": 1.892957922009016, + "learning_rate": 4.506754584808592e-07, + "loss": 0.6128, + "step": 29592 + }, + { + "epoch": 0.9069817334804462, + "grad_norm": 1.7882464499127941, + "learning_rate": 4.5038086823605555e-07, + "loss": 0.5621, + "step": 29593 + }, + { + "epoch": 0.9070123820031875, + "grad_norm": 1.83898722420442, + "learning_rate": 4.5008637208609375e-07, + "loss": 0.6875, + "step": 29594 + }, + { + "epoch": 0.9070430305259286, + "grad_norm": 1.7580191546286499, + "learning_rate": 4.4979197003387264e-07, + "loss": 0.6522, + "step": 29595 + }, + { + "epoch": 0.9070736790486699, + "grad_norm": 1.7317888585971308, + "learning_rate": 4.4949766208229437e-07, + "loss": 0.7606, + "step": 29596 + }, + { + "epoch": 0.907104327571411, + "grad_norm": 1.920828490274357, + "learning_rate": 4.492034482342611e-07, + "loss": 0.5628, + "step": 29597 + }, + { + "epoch": 0.9071349760941523, + "grad_norm": 1.5261353424307273, + "learning_rate": 4.489093284926704e-07, + "loss": 0.5811, + "step": 29598 + }, + { + "epoch": 0.9071656246168934, + "grad_norm": 1.6999112729075343, + "learning_rate": 4.4861530286041565e-07, + "loss": 0.6337, + "step": 29599 + }, + { + "epoch": 0.9071962731396347, + "grad_norm": 1.5360340619152297, + "learning_rate": 4.483213713404022e-07, + "loss": 0.5628, + "step": 29600 + }, + { + "epoch": 0.9072269216623758, + "grad_norm": 1.8988763184093629, + "learning_rate": 4.4802753393552e-07, + "loss": 0.6857, + "step": 29601 + }, + { + "epoch": 0.9072575701851171, + "grad_norm": 0.6229714870187849, + "learning_rate": 4.4773379064866893e-07, + "loss": 0.4812, + "step": 29602 + }, + { + "epoch": 0.9072882187078583, + "grad_norm": 1.5519570133463922, + "learning_rate": 4.47440141482739e-07, + "loss": 0.612, + "step": 29603 + }, + { + "epoch": 0.9073188672305995, + "grad_norm": 1.7168355661130827, + "learning_rate": 4.4714658644062546e-07, + "loss": 0.7064, + "step": 29604 + }, + { + "epoch": 0.9073495157533407, + "grad_norm": 1.6608573738259094, + "learning_rate": 4.4685312552522175e-07, + "loss": 0.6037, + "step": 29605 + }, + { + "epoch": 0.9073801642760819, + "grad_norm": 1.857087869329377, + "learning_rate": 4.465597587394177e-07, + "loss": 0.6847, + "step": 29606 + }, + { + "epoch": 0.9074108127988231, + "grad_norm": 0.7005459194025038, + "learning_rate": 4.4626648608610434e-07, + "loss": 0.5459, + "step": 29607 + }, + { + "epoch": 0.9074414613215643, + "grad_norm": 1.7897591095201923, + "learning_rate": 4.459733075681727e-07, + "loss": 0.7488, + "step": 29608 + }, + { + "epoch": 0.9074721098443055, + "grad_norm": 1.7559679450103494, + "learning_rate": 4.456802231885093e-07, + "loss": 0.5878, + "step": 29609 + }, + { + "epoch": 0.9075027583670467, + "grad_norm": 1.85721640787971, + "learning_rate": 4.453872329500042e-07, + "loss": 0.6893, + "step": 29610 + }, + { + "epoch": 0.9075334068897879, + "grad_norm": 1.5436811255084555, + "learning_rate": 4.450943368555438e-07, + "loss": 0.6735, + "step": 29611 + }, + { + "epoch": 0.9075640554125292, + "grad_norm": 1.818657378393103, + "learning_rate": 4.448015349080126e-07, + "loss": 0.6341, + "step": 29612 + }, + { + "epoch": 0.9075947039352703, + "grad_norm": 1.5719044621339642, + "learning_rate": 4.445088271102982e-07, + "loss": 0.697, + "step": 29613 + }, + { + "epoch": 0.9076253524580116, + "grad_norm": 1.725829095926772, + "learning_rate": 4.442162134652817e-07, + "loss": 0.6191, + "step": 29614 + }, + { + "epoch": 0.9076560009807527, + "grad_norm": 0.6934303710171049, + "learning_rate": 4.4392369397584736e-07, + "loss": 0.557, + "step": 29615 + }, + { + "epoch": 0.907686649503494, + "grad_norm": 1.6875788216639938, + "learning_rate": 4.436312686448796e-07, + "loss": 0.6452, + "step": 29616 + }, + { + "epoch": 0.9077172980262351, + "grad_norm": 1.650635648551288, + "learning_rate": 4.433389374752572e-07, + "loss": 0.6255, + "step": 29617 + }, + { + "epoch": 0.9077479465489764, + "grad_norm": 1.8221589110892318, + "learning_rate": 4.430467004698602e-07, + "loss": 0.6487, + "step": 29618 + }, + { + "epoch": 0.9077785950717175, + "grad_norm": 1.9308094106626914, + "learning_rate": 4.427545576315717e-07, + "loss": 0.5989, + "step": 29619 + }, + { + "epoch": 0.9078092435944587, + "grad_norm": 1.705461835415919, + "learning_rate": 4.4246250896326614e-07, + "loss": 0.6016, + "step": 29620 + }, + { + "epoch": 0.9078398921172, + "grad_norm": 1.8273823220864078, + "learning_rate": 4.4217055446782344e-07, + "loss": 0.5894, + "step": 29621 + }, + { + "epoch": 0.9078705406399411, + "grad_norm": 0.7189870640696275, + "learning_rate": 4.4187869414812013e-07, + "loss": 0.5194, + "step": 29622 + }, + { + "epoch": 0.9079011891626824, + "grad_norm": 1.5899406033951717, + "learning_rate": 4.4158692800703064e-07, + "loss": 0.5605, + "step": 29623 + }, + { + "epoch": 0.9079318376854235, + "grad_norm": 1.9008051844640355, + "learning_rate": 4.4129525604743264e-07, + "loss": 0.6304, + "step": 29624 + }, + { + "epoch": 0.9079624862081648, + "grad_norm": 0.6727511056309272, + "learning_rate": 4.4100367827219604e-07, + "loss": 0.5143, + "step": 29625 + }, + { + "epoch": 0.9079931347309059, + "grad_norm": 1.6589758898892584, + "learning_rate": 4.4071219468419637e-07, + "loss": 0.6449, + "step": 29626 + }, + { + "epoch": 0.9080237832536472, + "grad_norm": 0.675202512973631, + "learning_rate": 4.404208052863068e-07, + "loss": 0.5078, + "step": 29627 + }, + { + "epoch": 0.9080544317763883, + "grad_norm": 1.6778634619525807, + "learning_rate": 4.4012951008139514e-07, + "loss": 0.5632, + "step": 29628 + }, + { + "epoch": 0.9080850802991296, + "grad_norm": 1.7610050815855853, + "learning_rate": 4.398383090723346e-07, + "loss": 0.6801, + "step": 29629 + }, + { + "epoch": 0.9081157288218707, + "grad_norm": 1.548815997104293, + "learning_rate": 4.3954720226199285e-07, + "loss": 0.6077, + "step": 29630 + }, + { + "epoch": 0.908146377344612, + "grad_norm": 1.5019589003732294, + "learning_rate": 4.392561896532388e-07, + "loss": 0.5822, + "step": 29631 + }, + { + "epoch": 0.9081770258673532, + "grad_norm": 1.7559889131641422, + "learning_rate": 4.38965271248939e-07, + "loss": 0.7176, + "step": 29632 + }, + { + "epoch": 0.9082076743900944, + "grad_norm": 1.752051170248239, + "learning_rate": 4.3867444705196217e-07, + "loss": 0.6647, + "step": 29633 + }, + { + "epoch": 0.9082383229128356, + "grad_norm": 1.8409269868322164, + "learning_rate": 4.383837170651706e-07, + "loss": 0.726, + "step": 29634 + }, + { + "epoch": 0.9082689714355768, + "grad_norm": 1.6083997765303593, + "learning_rate": 4.38093081291433e-07, + "loss": 0.587, + "step": 29635 + }, + { + "epoch": 0.908299619958318, + "grad_norm": 1.7362658702160778, + "learning_rate": 4.378025397336083e-07, + "loss": 0.6153, + "step": 29636 + }, + { + "epoch": 0.9083302684810592, + "grad_norm": 1.6447941538236324, + "learning_rate": 4.3751209239456306e-07, + "loss": 0.6417, + "step": 29637 + }, + { + "epoch": 0.9083609170038004, + "grad_norm": 1.7676369335150128, + "learning_rate": 4.372217392771583e-07, + "loss": 0.5765, + "step": 29638 + }, + { + "epoch": 0.9083915655265417, + "grad_norm": 0.6664747259295856, + "learning_rate": 4.369314803842539e-07, + "loss": 0.5088, + "step": 29639 + }, + { + "epoch": 0.9084222140492828, + "grad_norm": 1.5747330139451345, + "learning_rate": 4.366413157187099e-07, + "loss": 0.6155, + "step": 29640 + }, + { + "epoch": 0.9084528625720241, + "grad_norm": 1.8016502079570602, + "learning_rate": 4.3635124528338623e-07, + "loss": 0.6777, + "step": 29641 + }, + { + "epoch": 0.9084835110947652, + "grad_norm": 1.7951556782573732, + "learning_rate": 4.3606126908114057e-07, + "loss": 0.6786, + "step": 29642 + }, + { + "epoch": 0.9085141596175065, + "grad_norm": 1.6185995011252678, + "learning_rate": 4.3577138711483167e-07, + "loss": 0.598, + "step": 29643 + }, + { + "epoch": 0.9085448081402476, + "grad_norm": 1.6368902430934091, + "learning_rate": 4.354815993873129e-07, + "loss": 0.6185, + "step": 29644 + }, + { + "epoch": 0.9085754566629889, + "grad_norm": 1.585871938981323, + "learning_rate": 4.351919059014409e-07, + "loss": 0.5782, + "step": 29645 + }, + { + "epoch": 0.90860610518573, + "grad_norm": 1.6788555086478893, + "learning_rate": 4.3490230666007214e-07, + "loss": 0.6019, + "step": 29646 + }, + { + "epoch": 0.9086367537084713, + "grad_norm": 1.6963759489888273, + "learning_rate": 4.346128016660567e-07, + "loss": 0.6933, + "step": 29647 + }, + { + "epoch": 0.9086674022312125, + "grad_norm": 1.7362850409231516, + "learning_rate": 4.3432339092224884e-07, + "loss": 0.5709, + "step": 29648 + }, + { + "epoch": 0.9086980507539537, + "grad_norm": 1.7897887956295508, + "learning_rate": 4.340340744315008e-07, + "loss": 0.5315, + "step": 29649 + }, + { + "epoch": 0.9087286992766949, + "grad_norm": 0.6559707719056334, + "learning_rate": 4.337448521966614e-07, + "loss": 0.5024, + "step": 29650 + }, + { + "epoch": 0.908759347799436, + "grad_norm": 1.5413790929796551, + "learning_rate": 4.334557242205817e-07, + "loss": 0.5523, + "step": 29651 + }, + { + "epoch": 0.9087899963221773, + "grad_norm": 1.9435099244734944, + "learning_rate": 4.331666905061127e-07, + "loss": 0.6724, + "step": 29652 + }, + { + "epoch": 0.9088206448449184, + "grad_norm": 1.9035826956368311, + "learning_rate": 4.3287775105609776e-07, + "loss": 0.6249, + "step": 29653 + }, + { + "epoch": 0.9088512933676597, + "grad_norm": 1.5616443004622989, + "learning_rate": 4.325889058733879e-07, + "loss": 0.6776, + "step": 29654 + }, + { + "epoch": 0.9088819418904008, + "grad_norm": 1.6749807214613168, + "learning_rate": 4.323001549608241e-07, + "loss": 0.5903, + "step": 29655 + }, + { + "epoch": 0.9089125904131421, + "grad_norm": 1.7283845478606972, + "learning_rate": 4.320114983212587e-07, + "loss": 0.6914, + "step": 29656 + }, + { + "epoch": 0.9089432389358832, + "grad_norm": 0.6569158944951555, + "learning_rate": 4.317229359575315e-07, + "loss": 0.5231, + "step": 29657 + }, + { + "epoch": 0.9089738874586245, + "grad_norm": 1.7680269253487928, + "learning_rate": 4.3143446787248464e-07, + "loss": 0.6254, + "step": 29658 + }, + { + "epoch": 0.9090045359813657, + "grad_norm": 1.6349778021296106, + "learning_rate": 4.311460940689627e-07, + "loss": 0.5695, + "step": 29659 + }, + { + "epoch": 0.9090351845041069, + "grad_norm": 1.7401324615641067, + "learning_rate": 4.308578145498077e-07, + "loss": 0.7745, + "step": 29660 + }, + { + "epoch": 0.9090658330268481, + "grad_norm": 1.9512270338137339, + "learning_rate": 4.3056962931785737e-07, + "loss": 0.5457, + "step": 29661 + }, + { + "epoch": 0.9090964815495893, + "grad_norm": 0.6798100019602105, + "learning_rate": 4.3028153837595397e-07, + "loss": 0.4992, + "step": 29662 + }, + { + "epoch": 0.9091271300723305, + "grad_norm": 1.6192060896860803, + "learning_rate": 4.299935417269352e-07, + "loss": 0.6483, + "step": 29663 + }, + { + "epoch": 0.9091577785950717, + "grad_norm": 1.923661063610283, + "learning_rate": 4.2970563937363874e-07, + "loss": 0.7257, + "step": 29664 + }, + { + "epoch": 0.9091884271178129, + "grad_norm": 1.6010627627766019, + "learning_rate": 4.2941783131890124e-07, + "loss": 0.6338, + "step": 29665 + }, + { + "epoch": 0.9092190756405542, + "grad_norm": 1.5489178028729935, + "learning_rate": 4.291301175655571e-07, + "loss": 0.6294, + "step": 29666 + }, + { + "epoch": 0.9092497241632953, + "grad_norm": 1.7261843768725105, + "learning_rate": 4.288424981164441e-07, + "loss": 0.6966, + "step": 29667 + }, + { + "epoch": 0.9092803726860366, + "grad_norm": 1.9152877468272096, + "learning_rate": 4.285549729743954e-07, + "loss": 0.6855, + "step": 29668 + }, + { + "epoch": 0.9093110212087777, + "grad_norm": 1.7596612509180924, + "learning_rate": 4.282675421422422e-07, + "loss": 0.5936, + "step": 29669 + }, + { + "epoch": 0.909341669731519, + "grad_norm": 1.7090915232225532, + "learning_rate": 4.2798020562281883e-07, + "loss": 0.6189, + "step": 29670 + }, + { + "epoch": 0.9093723182542601, + "grad_norm": 1.5982498367735392, + "learning_rate": 4.276929634189564e-07, + "loss": 0.6133, + "step": 29671 + }, + { + "epoch": 0.9094029667770014, + "grad_norm": 1.6089363949264284, + "learning_rate": 4.274058155334826e-07, + "loss": 0.6451, + "step": 29672 + }, + { + "epoch": 0.9094336152997425, + "grad_norm": 1.7394624277719837, + "learning_rate": 4.2711876196922855e-07, + "loss": 0.6163, + "step": 29673 + }, + { + "epoch": 0.9094642638224838, + "grad_norm": 1.5889917649795149, + "learning_rate": 4.2683180272902304e-07, + "loss": 0.6492, + "step": 29674 + }, + { + "epoch": 0.909494912345225, + "grad_norm": 1.78161813262166, + "learning_rate": 4.2654493781569386e-07, + "loss": 0.606, + "step": 29675 + }, + { + "epoch": 0.9095255608679662, + "grad_norm": 1.410484711894156, + "learning_rate": 4.262581672320676e-07, + "loss": 0.594, + "step": 29676 + }, + { + "epoch": 0.9095562093907074, + "grad_norm": 1.5543131528711518, + "learning_rate": 4.259714909809676e-07, + "loss": 0.6741, + "step": 29677 + }, + { + "epoch": 0.9095868579134486, + "grad_norm": 1.767641456513206, + "learning_rate": 4.2568490906522043e-07, + "loss": 0.733, + "step": 29678 + }, + { + "epoch": 0.9096175064361898, + "grad_norm": 1.6499887350093958, + "learning_rate": 4.2539842148765055e-07, + "loss": 0.6958, + "step": 29679 + }, + { + "epoch": 0.909648154958931, + "grad_norm": 1.7604576115755963, + "learning_rate": 4.251120282510779e-07, + "loss": 0.6701, + "step": 29680 + }, + { + "epoch": 0.9096788034816722, + "grad_norm": 1.6813109740943017, + "learning_rate": 4.248257293583269e-07, + "loss": 0.6318, + "step": 29681 + }, + { + "epoch": 0.9097094520044133, + "grad_norm": 1.7335247406315353, + "learning_rate": 4.245395248122175e-07, + "loss": 0.5601, + "step": 29682 + }, + { + "epoch": 0.9097401005271546, + "grad_norm": 0.6514518444988374, + "learning_rate": 4.242534146155719e-07, + "loss": 0.513, + "step": 29683 + }, + { + "epoch": 0.9097707490498957, + "grad_norm": 1.7007443147601584, + "learning_rate": 4.2396739877120676e-07, + "loss": 0.7174, + "step": 29684 + }, + { + "epoch": 0.909801397572637, + "grad_norm": 1.6785004037300766, + "learning_rate": 4.2368147728193974e-07, + "loss": 0.6912, + "step": 29685 + }, + { + "epoch": 0.9098320460953782, + "grad_norm": 1.882760637979735, + "learning_rate": 4.233956501505909e-07, + "loss": 0.7103, + "step": 29686 + }, + { + "epoch": 0.9098626946181194, + "grad_norm": 1.6820691416910967, + "learning_rate": 4.2310991737997575e-07, + "loss": 0.6168, + "step": 29687 + }, + { + "epoch": 0.9098933431408606, + "grad_norm": 1.92900212137273, + "learning_rate": 4.228242789729076e-07, + "loss": 0.5849, + "step": 29688 + }, + { + "epoch": 0.9099239916636018, + "grad_norm": 1.6498041252892555, + "learning_rate": 4.225387349322019e-07, + "loss": 0.6614, + "step": 29689 + }, + { + "epoch": 0.909954640186343, + "grad_norm": 1.7726112682930792, + "learning_rate": 4.222532852606731e-07, + "loss": 0.6808, + "step": 29690 + }, + { + "epoch": 0.9099852887090842, + "grad_norm": 1.9076578356744465, + "learning_rate": 4.219679299611323e-07, + "loss": 0.6005, + "step": 29691 + }, + { + "epoch": 0.9100159372318254, + "grad_norm": 1.7984405662903216, + "learning_rate": 4.2168266903639287e-07, + "loss": 0.8072, + "step": 29692 + }, + { + "epoch": 0.9100465857545667, + "grad_norm": 1.6675528237008734, + "learning_rate": 4.213975024892647e-07, + "loss": 0.612, + "step": 29693 + }, + { + "epoch": 0.9100772342773078, + "grad_norm": 1.864563956801823, + "learning_rate": 4.211124303225589e-07, + "loss": 0.6048, + "step": 29694 + }, + { + "epoch": 0.9101078828000491, + "grad_norm": 1.6905161554472736, + "learning_rate": 4.2082745253908206e-07, + "loss": 0.7151, + "step": 29695 + }, + { + "epoch": 0.9101385313227902, + "grad_norm": 1.7084622594262564, + "learning_rate": 4.2054256914164205e-07, + "loss": 0.5953, + "step": 29696 + }, + { + "epoch": 0.9101691798455315, + "grad_norm": 1.9583069865820775, + "learning_rate": 4.2025778013304984e-07, + "loss": 0.6135, + "step": 29697 + }, + { + "epoch": 0.9101998283682726, + "grad_norm": 1.999956844313581, + "learning_rate": 4.199730855161077e-07, + "loss": 0.6428, + "step": 29698 + }, + { + "epoch": 0.9102304768910139, + "grad_norm": 1.575720431619096, + "learning_rate": 4.1968848529362114e-07, + "loss": 0.5841, + "step": 29699 + }, + { + "epoch": 0.910261125413755, + "grad_norm": 1.5880009673009352, + "learning_rate": 4.194039794683957e-07, + "loss": 0.6021, + "step": 29700 + }, + { + "epoch": 0.9102917739364963, + "grad_norm": 1.5161417993620119, + "learning_rate": 4.191195680432336e-07, + "loss": 0.6009, + "step": 29701 + }, + { + "epoch": 0.9103224224592374, + "grad_norm": 1.9482424987867575, + "learning_rate": 4.188352510209381e-07, + "loss": 0.5971, + "step": 29702 + }, + { + "epoch": 0.9103530709819787, + "grad_norm": 1.7237093165495188, + "learning_rate": 4.185510284043104e-07, + "loss": 0.7349, + "step": 29703 + }, + { + "epoch": 0.9103837195047199, + "grad_norm": 1.7138327333996664, + "learning_rate": 4.1826690019615036e-07, + "loss": 0.6176, + "step": 29704 + }, + { + "epoch": 0.9104143680274611, + "grad_norm": 1.7004529493903044, + "learning_rate": 4.179828663992602e-07, + "loss": 0.7005, + "step": 29705 + }, + { + "epoch": 0.9104450165502023, + "grad_norm": 1.6833883988837979, + "learning_rate": 4.176989270164356e-07, + "loss": 0.5395, + "step": 29706 + }, + { + "epoch": 0.9104756650729435, + "grad_norm": 1.790956264619784, + "learning_rate": 4.17415082050473e-07, + "loss": 0.6322, + "step": 29707 + }, + { + "epoch": 0.9105063135956847, + "grad_norm": 0.6593997004634895, + "learning_rate": 4.1713133150417364e-07, + "loss": 0.5091, + "step": 29708 + }, + { + "epoch": 0.9105369621184259, + "grad_norm": 1.7372688655214261, + "learning_rate": 4.168476753803308e-07, + "loss": 0.6147, + "step": 29709 + }, + { + "epoch": 0.9105676106411671, + "grad_norm": 1.7669478864125894, + "learning_rate": 4.1656411368174e-07, + "loss": 0.599, + "step": 29710 + }, + { + "epoch": 0.9105982591639084, + "grad_norm": 1.6436827836187522, + "learning_rate": 4.162806464111946e-07, + "loss": 0.6367, + "step": 29711 + }, + { + "epoch": 0.9106289076866495, + "grad_norm": 1.6780754866293297, + "learning_rate": 4.159972735714879e-07, + "loss": 0.6258, + "step": 29712 + }, + { + "epoch": 0.9106595562093907, + "grad_norm": 1.7150209930410734, + "learning_rate": 4.157139951654132e-07, + "loss": 0.6166, + "step": 29713 + }, + { + "epoch": 0.9106902047321319, + "grad_norm": 1.6891761422806555, + "learning_rate": 4.1543081119575946e-07, + "loss": 0.6261, + "step": 29714 + }, + { + "epoch": 0.9107208532548731, + "grad_norm": 1.953527843476185, + "learning_rate": 4.151477216653177e-07, + "loss": 0.6641, + "step": 29715 + }, + { + "epoch": 0.9107515017776143, + "grad_norm": 0.6840246685842853, + "learning_rate": 4.1486472657688014e-07, + "loss": 0.4969, + "step": 29716 + }, + { + "epoch": 0.9107821503003555, + "grad_norm": 1.6136436606280393, + "learning_rate": 4.1458182593323237e-07, + "loss": 0.5457, + "step": 29717 + }, + { + "epoch": 0.9108127988230967, + "grad_norm": 1.6499931929152367, + "learning_rate": 4.142990197371599e-07, + "loss": 0.641, + "step": 29718 + }, + { + "epoch": 0.9108434473458379, + "grad_norm": 0.6602782792623046, + "learning_rate": 4.1401630799145497e-07, + "loss": 0.539, + "step": 29719 + }, + { + "epoch": 0.9108740958685791, + "grad_norm": 1.903410449079789, + "learning_rate": 4.1373369069889756e-07, + "loss": 0.7095, + "step": 29720 + }, + { + "epoch": 0.9109047443913203, + "grad_norm": 1.5987985356551535, + "learning_rate": 4.1345116786227767e-07, + "loss": 0.6075, + "step": 29721 + }, + { + "epoch": 0.9109353929140616, + "grad_norm": 1.779739398946823, + "learning_rate": 4.1316873948437306e-07, + "loss": 0.6373, + "step": 29722 + }, + { + "epoch": 0.9109660414368027, + "grad_norm": 1.8510803203177217, + "learning_rate": 4.1288640556797156e-07, + "loss": 0.6, + "step": 29723 + }, + { + "epoch": 0.910996689959544, + "grad_norm": 1.5195931042168027, + "learning_rate": 4.126041661158531e-07, + "loss": 0.5667, + "step": 29724 + }, + { + "epoch": 0.9110273384822851, + "grad_norm": 1.6495008064224912, + "learning_rate": 4.123220211307988e-07, + "loss": 0.6335, + "step": 29725 + }, + { + "epoch": 0.9110579870050264, + "grad_norm": 1.8878699748208987, + "learning_rate": 4.120399706155875e-07, + "loss": 0.6519, + "step": 29726 + }, + { + "epoch": 0.9110886355277675, + "grad_norm": 1.6681370664594644, + "learning_rate": 4.1175801457300156e-07, + "loss": 0.505, + "step": 29727 + }, + { + "epoch": 0.9111192840505088, + "grad_norm": 0.701086430728706, + "learning_rate": 4.1147615300581647e-07, + "loss": 0.5237, + "step": 29728 + }, + { + "epoch": 0.9111499325732499, + "grad_norm": 1.629196055009242, + "learning_rate": 4.1119438591681103e-07, + "loss": 0.7142, + "step": 29729 + }, + { + "epoch": 0.9111805810959912, + "grad_norm": 1.6793318824486447, + "learning_rate": 4.109127133087587e-07, + "loss": 0.6639, + "step": 29730 + }, + { + "epoch": 0.9112112296187324, + "grad_norm": 2.021110467462155, + "learning_rate": 4.106311351844372e-07, + "loss": 0.6239, + "step": 29731 + }, + { + "epoch": 0.9112418781414736, + "grad_norm": 1.6015212977743065, + "learning_rate": 4.103496515466221e-07, + "loss": 0.7043, + "step": 29732 + }, + { + "epoch": 0.9112725266642148, + "grad_norm": 1.6025253349365431, + "learning_rate": 4.100682623980845e-07, + "loss": 0.6264, + "step": 29733 + }, + { + "epoch": 0.911303175186956, + "grad_norm": 1.685432284674359, + "learning_rate": 4.0978696774159775e-07, + "loss": 0.585, + "step": 29734 + }, + { + "epoch": 0.9113338237096972, + "grad_norm": 0.6645424089741508, + "learning_rate": 4.095057675799352e-07, + "loss": 0.5305, + "step": 29735 + }, + { + "epoch": 0.9113644722324384, + "grad_norm": 1.942449753170848, + "learning_rate": 4.092246619158646e-07, + "loss": 0.6439, + "step": 29736 + }, + { + "epoch": 0.9113951207551796, + "grad_norm": 0.6322404215585448, + "learning_rate": 4.089436507521571e-07, + "loss": 0.4864, + "step": 29737 + }, + { + "epoch": 0.9114257692779208, + "grad_norm": 1.6980360065859976, + "learning_rate": 4.086627340915839e-07, + "loss": 0.5643, + "step": 29738 + }, + { + "epoch": 0.911456417800662, + "grad_norm": 1.7261601032697194, + "learning_rate": 4.0838191193690924e-07, + "loss": 0.6263, + "step": 29739 + }, + { + "epoch": 0.9114870663234033, + "grad_norm": 1.8883183628109053, + "learning_rate": 4.0810118429090215e-07, + "loss": 0.6155, + "step": 29740 + }, + { + "epoch": 0.9115177148461444, + "grad_norm": 1.7795192105896294, + "learning_rate": 4.0782055115632824e-07, + "loss": 0.6481, + "step": 29741 + }, + { + "epoch": 0.9115483633688857, + "grad_norm": 1.5898805270687433, + "learning_rate": 4.075400125359519e-07, + "loss": 0.6193, + "step": 29742 + }, + { + "epoch": 0.9115790118916268, + "grad_norm": 1.7554811506171843, + "learning_rate": 4.072595684325398e-07, + "loss": 0.6647, + "step": 29743 + }, + { + "epoch": 0.911609660414368, + "grad_norm": 1.775645334545653, + "learning_rate": 4.0697921884885193e-07, + "loss": 0.5992, + "step": 29744 + }, + { + "epoch": 0.9116403089371092, + "grad_norm": 1.7269614112384377, + "learning_rate": 4.066989637876528e-07, + "loss": 0.6298, + "step": 29745 + }, + { + "epoch": 0.9116709574598504, + "grad_norm": 1.872062262966562, + "learning_rate": 4.064188032517047e-07, + "loss": 0.5826, + "step": 29746 + }, + { + "epoch": 0.9117016059825916, + "grad_norm": 1.8802460931229212, + "learning_rate": 4.061387372437642e-07, + "loss": 0.6082, + "step": 29747 + }, + { + "epoch": 0.9117322545053328, + "grad_norm": 1.8710528730339493, + "learning_rate": 4.0585876576659465e-07, + "loss": 0.6688, + "step": 29748 + }, + { + "epoch": 0.9117629030280741, + "grad_norm": 1.830941469101405, + "learning_rate": 4.0557888882295503e-07, + "loss": 0.5972, + "step": 29749 + }, + { + "epoch": 0.9117935515508152, + "grad_norm": 1.6489789425946746, + "learning_rate": 4.0529910641559867e-07, + "loss": 0.6042, + "step": 29750 + }, + { + "epoch": 0.9118242000735565, + "grad_norm": 1.7737578240270402, + "learning_rate": 4.0501941854728775e-07, + "loss": 0.5846, + "step": 29751 + }, + { + "epoch": 0.9118548485962976, + "grad_norm": 1.786074916670251, + "learning_rate": 4.047398252207735e-07, + "loss": 0.5684, + "step": 29752 + }, + { + "epoch": 0.9118854971190389, + "grad_norm": 1.6738029347227226, + "learning_rate": 4.044603264388136e-07, + "loss": 0.5603, + "step": 29753 + }, + { + "epoch": 0.91191614564178, + "grad_norm": 0.6862563782036553, + "learning_rate": 4.041809222041615e-07, + "loss": 0.5342, + "step": 29754 + }, + { + "epoch": 0.9119467941645213, + "grad_norm": 1.7251176804719122, + "learning_rate": 4.039016125195694e-07, + "loss": 0.5674, + "step": 29755 + }, + { + "epoch": 0.9119774426872624, + "grad_norm": 1.799632293103739, + "learning_rate": 4.0362239738778955e-07, + "loss": 0.6474, + "step": 29756 + }, + { + "epoch": 0.9120080912100037, + "grad_norm": 1.7406467000863826, + "learning_rate": 4.0334327681157523e-07, + "loss": 0.5874, + "step": 29757 + }, + { + "epoch": 0.9120387397327449, + "grad_norm": 1.7312922281971626, + "learning_rate": 4.030642507936733e-07, + "loss": 0.6644, + "step": 29758 + }, + { + "epoch": 0.9120693882554861, + "grad_norm": 1.7113747180583625, + "learning_rate": 4.0278531933683476e-07, + "loss": 0.6893, + "step": 29759 + }, + { + "epoch": 0.9121000367782273, + "grad_norm": 1.6168389317669363, + "learning_rate": 4.0250648244380966e-07, + "loss": 0.5964, + "step": 29760 + }, + { + "epoch": 0.9121306853009685, + "grad_norm": 1.8424334716248518, + "learning_rate": 4.0222774011734247e-07, + "loss": 0.5979, + "step": 29761 + }, + { + "epoch": 0.9121613338237097, + "grad_norm": 1.8398450706411444, + "learning_rate": 4.019490923601821e-07, + "loss": 0.628, + "step": 29762 + }, + { + "epoch": 0.9121919823464509, + "grad_norm": 1.690850672850196, + "learning_rate": 4.016705391750708e-07, + "loss": 0.7106, + "step": 29763 + }, + { + "epoch": 0.9122226308691921, + "grad_norm": 1.6049976549398695, + "learning_rate": 4.0139208056475863e-07, + "loss": 0.6178, + "step": 29764 + }, + { + "epoch": 0.9122532793919333, + "grad_norm": 1.6289053038846402, + "learning_rate": 4.011137165319856e-07, + "loss": 0.6269, + "step": 29765 + }, + { + "epoch": 0.9122839279146745, + "grad_norm": 1.945981688753157, + "learning_rate": 4.0083544707949397e-07, + "loss": 0.6018, + "step": 29766 + }, + { + "epoch": 0.9123145764374158, + "grad_norm": 0.7023352750384637, + "learning_rate": 4.0055727221002593e-07, + "loss": 0.512, + "step": 29767 + }, + { + "epoch": 0.9123452249601569, + "grad_norm": 1.7385146434799696, + "learning_rate": 4.0027919192632493e-07, + "loss": 0.5841, + "step": 29768 + }, + { + "epoch": 0.9123758734828982, + "grad_norm": 1.9199496236726104, + "learning_rate": 4.000012062311287e-07, + "loss": 0.5929, + "step": 29769 + }, + { + "epoch": 0.9124065220056393, + "grad_norm": 0.6816865978085518, + "learning_rate": 3.997233151271762e-07, + "loss": 0.5096, + "step": 29770 + }, + { + "epoch": 0.9124371705283806, + "grad_norm": 1.825883726142393, + "learning_rate": 3.994455186172075e-07, + "loss": 0.6035, + "step": 29771 + }, + { + "epoch": 0.9124678190511217, + "grad_norm": 1.4989940949571579, + "learning_rate": 3.9916781670395697e-07, + "loss": 0.6241, + "step": 29772 + }, + { + "epoch": 0.912498467573863, + "grad_norm": 1.6739074279300263, + "learning_rate": 3.988902093901648e-07, + "loss": 0.6029, + "step": 29773 + }, + { + "epoch": 0.9125291160966041, + "grad_norm": 1.6304761580030922, + "learning_rate": 3.9861269667856194e-07, + "loss": 0.5789, + "step": 29774 + }, + { + "epoch": 0.9125597646193453, + "grad_norm": 1.9292991985344072, + "learning_rate": 3.983352785718841e-07, + "loss": 0.7817, + "step": 29775 + }, + { + "epoch": 0.9125904131420866, + "grad_norm": 1.7146675858475817, + "learning_rate": 3.98057955072868e-07, + "loss": 0.7243, + "step": 29776 + }, + { + "epoch": 0.9126210616648277, + "grad_norm": 1.572572873390579, + "learning_rate": 3.9778072618424146e-07, + "loss": 0.5478, + "step": 29777 + }, + { + "epoch": 0.912651710187569, + "grad_norm": 1.6611151025910313, + "learning_rate": 3.975035919087389e-07, + "loss": 0.6041, + "step": 29778 + }, + { + "epoch": 0.9126823587103101, + "grad_norm": 1.687415888318399, + "learning_rate": 3.9722655224909037e-07, + "loss": 0.6017, + "step": 29779 + }, + { + "epoch": 0.9127130072330514, + "grad_norm": 0.6740855001233003, + "learning_rate": 3.969496072080259e-07, + "loss": 0.4962, + "step": 29780 + }, + { + "epoch": 0.9127436557557925, + "grad_norm": 0.709553477423777, + "learning_rate": 3.9667275678827444e-07, + "loss": 0.5372, + "step": 29781 + }, + { + "epoch": 0.9127743042785338, + "grad_norm": 1.8664731711896394, + "learning_rate": 3.963960009925616e-07, + "loss": 0.669, + "step": 29782 + }, + { + "epoch": 0.9128049528012749, + "grad_norm": 1.95198295451742, + "learning_rate": 3.9611933982361737e-07, + "loss": 0.6051, + "step": 29783 + }, + { + "epoch": 0.9128356013240162, + "grad_norm": 1.6937815534110217, + "learning_rate": 3.958427732841674e-07, + "loss": 0.5287, + "step": 29784 + }, + { + "epoch": 0.9128662498467573, + "grad_norm": 1.4894857888740884, + "learning_rate": 3.955663013769351e-07, + "loss": 0.6546, + "step": 29785 + }, + { + "epoch": 0.9128968983694986, + "grad_norm": 1.7873381756776405, + "learning_rate": 3.9528992410464486e-07, + "loss": 0.7622, + "step": 29786 + }, + { + "epoch": 0.9129275468922398, + "grad_norm": 0.6880775167606994, + "learning_rate": 3.950136414700212e-07, + "loss": 0.5168, + "step": 29787 + }, + { + "epoch": 0.912958195414981, + "grad_norm": 1.7181616594977263, + "learning_rate": 3.947374534757853e-07, + "loss": 0.5404, + "step": 29788 + }, + { + "epoch": 0.9129888439377222, + "grad_norm": 1.6424448100229334, + "learning_rate": 3.944613601246583e-07, + "loss": 0.7632, + "step": 29789 + }, + { + "epoch": 0.9130194924604634, + "grad_norm": 1.593143471935835, + "learning_rate": 3.9418536141936137e-07, + "loss": 0.628, + "step": 29790 + }, + { + "epoch": 0.9130501409832046, + "grad_norm": 0.6751867735391623, + "learning_rate": 3.9390945736261565e-07, + "loss": 0.5016, + "step": 29791 + }, + { + "epoch": 0.9130807895059458, + "grad_norm": 1.777291975191471, + "learning_rate": 3.9363364795713675e-07, + "loss": 0.6847, + "step": 29792 + }, + { + "epoch": 0.913111438028687, + "grad_norm": 1.8730276229439555, + "learning_rate": 3.9335793320564254e-07, + "loss": 0.6545, + "step": 29793 + }, + { + "epoch": 0.9131420865514283, + "grad_norm": 1.941930502960569, + "learning_rate": 3.930823131108519e-07, + "loss": 0.5987, + "step": 29794 + }, + { + "epoch": 0.9131727350741694, + "grad_norm": 1.639161320921472, + "learning_rate": 3.928067876754793e-07, + "loss": 0.6741, + "step": 29795 + }, + { + "epoch": 0.9132033835969107, + "grad_norm": 1.74602852661989, + "learning_rate": 3.925313569022382e-07, + "loss": 0.6116, + "step": 29796 + }, + { + "epoch": 0.9132340321196518, + "grad_norm": 1.9061657093338615, + "learning_rate": 3.9225602079384416e-07, + "loss": 0.6774, + "step": 29797 + }, + { + "epoch": 0.9132646806423931, + "grad_norm": 1.7442084449195416, + "learning_rate": 3.919807793530106e-07, + "loss": 0.6644, + "step": 29798 + }, + { + "epoch": 0.9132953291651342, + "grad_norm": 1.8134991874882105, + "learning_rate": 3.9170563258244753e-07, + "loss": 0.7496, + "step": 29799 + }, + { + "epoch": 0.9133259776878755, + "grad_norm": 1.7712055817918582, + "learning_rate": 3.914305804848684e-07, + "loss": 0.7336, + "step": 29800 + }, + { + "epoch": 0.9133566262106166, + "grad_norm": 1.8237797560843425, + "learning_rate": 3.9115562306298094e-07, + "loss": 0.7007, + "step": 29801 + }, + { + "epoch": 0.9133872747333579, + "grad_norm": 1.5170758105239737, + "learning_rate": 3.908807603194975e-07, + "loss": 0.6065, + "step": 29802 + }, + { + "epoch": 0.913417923256099, + "grad_norm": 1.6826275471464471, + "learning_rate": 3.906059922571248e-07, + "loss": 0.5698, + "step": 29803 + }, + { + "epoch": 0.9134485717788403, + "grad_norm": 1.8957649806043806, + "learning_rate": 3.9033131887856623e-07, + "loss": 0.6293, + "step": 29804 + }, + { + "epoch": 0.9134792203015815, + "grad_norm": 0.7261578027950842, + "learning_rate": 3.9005674018653515e-07, + "loss": 0.5204, + "step": 29805 + }, + { + "epoch": 0.9135098688243226, + "grad_norm": 0.6754626441715705, + "learning_rate": 3.897822561837339e-07, + "loss": 0.5144, + "step": 29806 + }, + { + "epoch": 0.9135405173470639, + "grad_norm": 1.9446090576044144, + "learning_rate": 3.895078668728658e-07, + "loss": 0.6001, + "step": 29807 + }, + { + "epoch": 0.913571165869805, + "grad_norm": 0.6594386021311582, + "learning_rate": 3.892335722566354e-07, + "loss": 0.5268, + "step": 29808 + }, + { + "epoch": 0.9136018143925463, + "grad_norm": 0.7012844288928892, + "learning_rate": 3.8895937233774603e-07, + "loss": 0.5307, + "step": 29809 + }, + { + "epoch": 0.9136324629152874, + "grad_norm": 1.8048587809503456, + "learning_rate": 3.886852671189001e-07, + "loss": 0.6323, + "step": 29810 + }, + { + "epoch": 0.9136631114380287, + "grad_norm": 2.0209713528221442, + "learning_rate": 3.884112566027953e-07, + "loss": 0.6396, + "step": 29811 + }, + { + "epoch": 0.9136937599607698, + "grad_norm": 1.9583506368579946, + "learning_rate": 3.8813734079213517e-07, + "loss": 0.7002, + "step": 29812 + }, + { + "epoch": 0.9137244084835111, + "grad_norm": 1.6277449310078163, + "learning_rate": 3.878635196896174e-07, + "loss": 0.6049, + "step": 29813 + }, + { + "epoch": 0.9137550570062523, + "grad_norm": 1.6557784060195908, + "learning_rate": 3.8758979329794e-07, + "loss": 0.5316, + "step": 29814 + }, + { + "epoch": 0.9137857055289935, + "grad_norm": 1.5504005438845363, + "learning_rate": 3.8731616161979735e-07, + "loss": 0.6063, + "step": 29815 + }, + { + "epoch": 0.9138163540517347, + "grad_norm": 1.6962962931262018, + "learning_rate": 3.8704262465788953e-07, + "loss": 0.625, + "step": 29816 + }, + { + "epoch": 0.9138470025744759, + "grad_norm": 0.716315246765002, + "learning_rate": 3.867691824149111e-07, + "loss": 0.5464, + "step": 29817 + }, + { + "epoch": 0.9138776510972171, + "grad_norm": 1.6026755975778848, + "learning_rate": 3.8649583489355544e-07, + "loss": 0.5365, + "step": 29818 + }, + { + "epoch": 0.9139082996199583, + "grad_norm": 1.7758463349368, + "learning_rate": 3.862225820965149e-07, + "loss": 0.7074, + "step": 29819 + }, + { + "epoch": 0.9139389481426995, + "grad_norm": 1.583143666758979, + "learning_rate": 3.859494240264827e-07, + "loss": 0.5877, + "step": 29820 + }, + { + "epoch": 0.9139695966654408, + "grad_norm": 1.5966264143432976, + "learning_rate": 3.8567636068615246e-07, + "loss": 0.629, + "step": 29821 + }, + { + "epoch": 0.9140002451881819, + "grad_norm": 0.6584913533549979, + "learning_rate": 3.8540339207821187e-07, + "loss": 0.5201, + "step": 29822 + }, + { + "epoch": 0.9140308937109232, + "grad_norm": 1.7442933669384517, + "learning_rate": 3.851305182053511e-07, + "loss": 0.5556, + "step": 29823 + }, + { + "epoch": 0.9140615422336643, + "grad_norm": 1.7819412793068214, + "learning_rate": 3.8485773907026125e-07, + "loss": 0.6219, + "step": 29824 + }, + { + "epoch": 0.9140921907564056, + "grad_norm": 0.6927596693252511, + "learning_rate": 3.8458505467562803e-07, + "loss": 0.4953, + "step": 29825 + }, + { + "epoch": 0.9141228392791467, + "grad_norm": 2.0415423966294846, + "learning_rate": 3.8431246502413697e-07, + "loss": 0.6798, + "step": 29826 + }, + { + "epoch": 0.914153487801888, + "grad_norm": 0.6459839839696688, + "learning_rate": 3.84039970118476e-07, + "loss": 0.513, + "step": 29827 + }, + { + "epoch": 0.9141841363246291, + "grad_norm": 1.5957527828293108, + "learning_rate": 3.837675699613297e-07, + "loss": 0.5886, + "step": 29828 + }, + { + "epoch": 0.9142147848473704, + "grad_norm": 1.4723489913907704, + "learning_rate": 3.8349526455538244e-07, + "loss": 0.6069, + "step": 29829 + }, + { + "epoch": 0.9142454333701115, + "grad_norm": 1.689893486564277, + "learning_rate": 3.832230539033155e-07, + "loss": 0.5906, + "step": 29830 + }, + { + "epoch": 0.9142760818928528, + "grad_norm": 1.621951873388437, + "learning_rate": 3.8295093800781334e-07, + "loss": 0.7177, + "step": 29831 + }, + { + "epoch": 0.914306730415594, + "grad_norm": 1.800134823407682, + "learning_rate": 3.826789168715561e-07, + "loss": 0.6692, + "step": 29832 + }, + { + "epoch": 0.9143373789383352, + "grad_norm": 1.7478595840146156, + "learning_rate": 3.8240699049722494e-07, + "loss": 0.6172, + "step": 29833 + }, + { + "epoch": 0.9143680274610764, + "grad_norm": 1.691286901985937, + "learning_rate": 3.8213515888749663e-07, + "loss": 0.6234, + "step": 29834 + }, + { + "epoch": 0.9143986759838176, + "grad_norm": 1.6675615665813117, + "learning_rate": 3.8186342204505345e-07, + "loss": 0.645, + "step": 29835 + }, + { + "epoch": 0.9144293245065588, + "grad_norm": 1.6664599455032918, + "learning_rate": 3.815917799725688e-07, + "loss": 0.5836, + "step": 29836 + }, + { + "epoch": 0.9144599730292999, + "grad_norm": 1.7408385214520146, + "learning_rate": 3.813202326727239e-07, + "loss": 0.626, + "step": 29837 + }, + { + "epoch": 0.9144906215520412, + "grad_norm": 1.9148113110716032, + "learning_rate": 3.810487801481899e-07, + "loss": 0.6183, + "step": 29838 + }, + { + "epoch": 0.9145212700747823, + "grad_norm": 2.016912972857587, + "learning_rate": 3.807774224016425e-07, + "loss": 0.5628, + "step": 29839 + }, + { + "epoch": 0.9145519185975236, + "grad_norm": 1.910127405086854, + "learning_rate": 3.8050615943575843e-07, + "loss": 0.6287, + "step": 29840 + }, + { + "epoch": 0.9145825671202648, + "grad_norm": 1.6994099267239653, + "learning_rate": 3.8023499125320775e-07, + "loss": 0.6753, + "step": 29841 + }, + { + "epoch": 0.914613215643006, + "grad_norm": 0.6886150126661743, + "learning_rate": 3.7996391785666275e-07, + "loss": 0.5207, + "step": 29842 + }, + { + "epoch": 0.9146438641657472, + "grad_norm": 0.6477000037423198, + "learning_rate": 3.796929392487958e-07, + "loss": 0.5206, + "step": 29843 + }, + { + "epoch": 0.9146745126884884, + "grad_norm": 1.5866271035371284, + "learning_rate": 3.794220554322747e-07, + "loss": 0.531, + "step": 29844 + }, + { + "epoch": 0.9147051612112296, + "grad_norm": 1.7204632865739544, + "learning_rate": 3.7915126640976854e-07, + "loss": 0.5761, + "step": 29845 + }, + { + "epoch": 0.9147358097339708, + "grad_norm": 1.8161137852375728, + "learning_rate": 3.7888057218394837e-07, + "loss": 0.5932, + "step": 29846 + }, + { + "epoch": 0.914766458256712, + "grad_norm": 0.6509400135539682, + "learning_rate": 3.786099727574788e-07, + "loss": 0.5227, + "step": 29847 + }, + { + "epoch": 0.9147971067794533, + "grad_norm": 0.6719506208361117, + "learning_rate": 3.783394681330277e-07, + "loss": 0.4864, + "step": 29848 + }, + { + "epoch": 0.9148277553021944, + "grad_norm": 1.72662130188737, + "learning_rate": 3.780690583132585e-07, + "loss": 0.6068, + "step": 29849 + }, + { + "epoch": 0.9148584038249357, + "grad_norm": 1.686950707903266, + "learning_rate": 3.777987433008368e-07, + "loss": 0.6415, + "step": 29850 + }, + { + "epoch": 0.9148890523476768, + "grad_norm": 1.5290038960332688, + "learning_rate": 3.7752852309842714e-07, + "loss": 0.5509, + "step": 29851 + }, + { + "epoch": 0.9149197008704181, + "grad_norm": 0.6837195533381285, + "learning_rate": 3.7725839770869075e-07, + "loss": 0.5306, + "step": 29852 + }, + { + "epoch": 0.9149503493931592, + "grad_norm": 1.809111199801938, + "learning_rate": 3.7698836713428775e-07, + "loss": 0.7072, + "step": 29853 + }, + { + "epoch": 0.9149809979159005, + "grad_norm": 0.653769070285079, + "learning_rate": 3.7671843137788265e-07, + "loss": 0.498, + "step": 29854 + }, + { + "epoch": 0.9150116464386416, + "grad_norm": 1.644552813482788, + "learning_rate": 3.764485904421322e-07, + "loss": 0.5707, + "step": 29855 + }, + { + "epoch": 0.9150422949613829, + "grad_norm": 0.643584319723175, + "learning_rate": 3.761788443296954e-07, + "loss": 0.5008, + "step": 29856 + }, + { + "epoch": 0.915072943484124, + "grad_norm": 1.691487771780517, + "learning_rate": 3.7590919304323237e-07, + "loss": 0.7152, + "step": 29857 + }, + { + "epoch": 0.9151035920068653, + "grad_norm": 1.8980471682484439, + "learning_rate": 3.756396365853976e-07, + "loss": 0.582, + "step": 29858 + }, + { + "epoch": 0.9151342405296065, + "grad_norm": 1.6220583983784689, + "learning_rate": 3.7537017495884786e-07, + "loss": 0.7069, + "step": 29859 + }, + { + "epoch": 0.9151648890523477, + "grad_norm": 1.7390300176664266, + "learning_rate": 3.7510080816623883e-07, + "loss": 0.5658, + "step": 29860 + }, + { + "epoch": 0.9151955375750889, + "grad_norm": 0.6347474250134054, + "learning_rate": 3.748315362102228e-07, + "loss": 0.5049, + "step": 29861 + }, + { + "epoch": 0.9152261860978301, + "grad_norm": 1.9694420445412806, + "learning_rate": 3.745623590934566e-07, + "loss": 0.6596, + "step": 29862 + }, + { + "epoch": 0.9152568346205713, + "grad_norm": 1.9214482728000493, + "learning_rate": 3.7429327681858807e-07, + "loss": 0.6976, + "step": 29863 + }, + { + "epoch": 0.9152874831433125, + "grad_norm": 1.7137282007983454, + "learning_rate": 3.7402428938827175e-07, + "loss": 0.6734, + "step": 29864 + }, + { + "epoch": 0.9153181316660537, + "grad_norm": 1.738282556156538, + "learning_rate": 3.737553968051577e-07, + "loss": 0.6901, + "step": 29865 + }, + { + "epoch": 0.915348780188795, + "grad_norm": 1.882216297375224, + "learning_rate": 3.7348659907189387e-07, + "loss": 0.7599, + "step": 29866 + }, + { + "epoch": 0.9153794287115361, + "grad_norm": 1.7784939678395861, + "learning_rate": 3.7321789619112927e-07, + "loss": 0.542, + "step": 29867 + }, + { + "epoch": 0.9154100772342773, + "grad_norm": 1.6514989655875099, + "learning_rate": 3.729492881655128e-07, + "loss": 0.6899, + "step": 29868 + }, + { + "epoch": 0.9154407257570185, + "grad_norm": 1.5332924973558952, + "learning_rate": 3.7268077499768906e-07, + "loss": 0.6563, + "step": 29869 + }, + { + "epoch": 0.9154713742797597, + "grad_norm": 1.7455402815306502, + "learning_rate": 3.7241235669030597e-07, + "loss": 0.6295, + "step": 29870 + }, + { + "epoch": 0.9155020228025009, + "grad_norm": 1.688180476513671, + "learning_rate": 3.721440332460069e-07, + "loss": 0.6065, + "step": 29871 + }, + { + "epoch": 0.9155326713252421, + "grad_norm": 0.6448350719610286, + "learning_rate": 3.718758046674353e-07, + "loss": 0.487, + "step": 29872 + }, + { + "epoch": 0.9155633198479833, + "grad_norm": 1.7819042184549363, + "learning_rate": 3.7160767095723585e-07, + "loss": 0.6099, + "step": 29873 + }, + { + "epoch": 0.9155939683707245, + "grad_norm": 1.7184947074083399, + "learning_rate": 3.713396321180496e-07, + "loss": 0.6279, + "step": 29874 + }, + { + "epoch": 0.9156246168934657, + "grad_norm": 1.7604074446757405, + "learning_rate": 3.710716881525167e-07, + "loss": 0.62, + "step": 29875 + }, + { + "epoch": 0.9156552654162069, + "grad_norm": 1.6412530572921926, + "learning_rate": 3.7080383906327957e-07, + "loss": 0.6827, + "step": 29876 + }, + { + "epoch": 0.9156859139389482, + "grad_norm": 2.070857843341579, + "learning_rate": 3.705360848529738e-07, + "loss": 0.714, + "step": 29877 + }, + { + "epoch": 0.9157165624616893, + "grad_norm": 1.6980866611420609, + "learning_rate": 3.702684255242417e-07, + "loss": 0.6102, + "step": 29878 + }, + { + "epoch": 0.9157472109844306, + "grad_norm": 0.6731375624227345, + "learning_rate": 3.700008610797179e-07, + "loss": 0.5022, + "step": 29879 + }, + { + "epoch": 0.9157778595071717, + "grad_norm": 1.778011794424277, + "learning_rate": 3.6973339152203915e-07, + "loss": 0.7009, + "step": 29880 + }, + { + "epoch": 0.915808508029913, + "grad_norm": 1.7161260935663492, + "learning_rate": 3.694660168538422e-07, + "loss": 0.6466, + "step": 29881 + }, + { + "epoch": 0.9158391565526541, + "grad_norm": 1.9534393570945545, + "learning_rate": 3.6919873707776056e-07, + "loss": 0.7117, + "step": 29882 + }, + { + "epoch": 0.9158698050753954, + "grad_norm": 1.8003880478491288, + "learning_rate": 3.689315521964265e-07, + "loss": 0.6248, + "step": 29883 + }, + { + "epoch": 0.9159004535981365, + "grad_norm": 1.629513665683529, + "learning_rate": 3.686644622124758e-07, + "loss": 0.5948, + "step": 29884 + }, + { + "epoch": 0.9159311021208778, + "grad_norm": 1.9076023268230964, + "learning_rate": 3.683974671285373e-07, + "loss": 0.7108, + "step": 29885 + }, + { + "epoch": 0.915961750643619, + "grad_norm": 1.5407138905820534, + "learning_rate": 3.6813056694724345e-07, + "loss": 0.4968, + "step": 29886 + }, + { + "epoch": 0.9159923991663602, + "grad_norm": 0.7132307633065402, + "learning_rate": 3.678637616712244e-07, + "loss": 0.5024, + "step": 29887 + }, + { + "epoch": 0.9160230476891014, + "grad_norm": 1.7470082568063388, + "learning_rate": 3.6759705130310685e-07, + "loss": 0.6796, + "step": 29888 + }, + { + "epoch": 0.9160536962118426, + "grad_norm": 1.7202734141722598, + "learning_rate": 3.67330435845521e-07, + "loss": 0.651, + "step": 29889 + }, + { + "epoch": 0.9160843447345838, + "grad_norm": 1.9387803487179733, + "learning_rate": 3.6706391530109133e-07, + "loss": 0.5071, + "step": 29890 + }, + { + "epoch": 0.916114993257325, + "grad_norm": 1.5855470560030622, + "learning_rate": 3.667974896724469e-07, + "loss": 0.681, + "step": 29891 + }, + { + "epoch": 0.9161456417800662, + "grad_norm": 1.7940618871690892, + "learning_rate": 3.6653115896221223e-07, + "loss": 0.5656, + "step": 29892 + }, + { + "epoch": 0.9161762903028075, + "grad_norm": 1.6859147118706996, + "learning_rate": 3.662649231730098e-07, + "loss": 0.6097, + "step": 29893 + }, + { + "epoch": 0.9162069388255486, + "grad_norm": 1.6818457080978952, + "learning_rate": 3.65998782307464e-07, + "loss": 0.7051, + "step": 29894 + }, + { + "epoch": 0.9162375873482899, + "grad_norm": 1.6372934965880854, + "learning_rate": 3.657327363681984e-07, + "loss": 0.5391, + "step": 29895 + }, + { + "epoch": 0.916268235871031, + "grad_norm": 1.8255043244884699, + "learning_rate": 3.6546678535783197e-07, + "loss": 0.6182, + "step": 29896 + }, + { + "epoch": 0.9162988843937723, + "grad_norm": 1.772803223046146, + "learning_rate": 3.6520092927898597e-07, + "loss": 0.6449, + "step": 29897 + }, + { + "epoch": 0.9163295329165134, + "grad_norm": 0.6800130025716657, + "learning_rate": 3.6493516813428165e-07, + "loss": 0.5337, + "step": 29898 + }, + { + "epoch": 0.9163601814392546, + "grad_norm": 0.6813274054998041, + "learning_rate": 3.6466950192633576e-07, + "loss": 0.534, + "step": 29899 + }, + { + "epoch": 0.9163908299619958, + "grad_norm": 1.795157507262024, + "learning_rate": 3.644039306577674e-07, + "loss": 0.7248, + "step": 29900 + }, + { + "epoch": 0.916421478484737, + "grad_norm": 1.6546888348219269, + "learning_rate": 3.6413845433118986e-07, + "loss": 0.5866, + "step": 29901 + }, + { + "epoch": 0.9164521270074782, + "grad_norm": 1.7256457634990816, + "learning_rate": 3.638730729492246e-07, + "loss": 0.6087, + "step": 29902 + }, + { + "epoch": 0.9164827755302194, + "grad_norm": 1.6403646274506685, + "learning_rate": 3.636077865144827e-07, + "loss": 0.5925, + "step": 29903 + }, + { + "epoch": 0.9165134240529607, + "grad_norm": 0.6744096699902729, + "learning_rate": 3.633425950295777e-07, + "loss": 0.525, + "step": 29904 + }, + { + "epoch": 0.9165440725757018, + "grad_norm": 1.8907715957792053, + "learning_rate": 3.6307749849712414e-07, + "loss": 0.6705, + "step": 29905 + }, + { + "epoch": 0.9165747210984431, + "grad_norm": 1.7802192674723987, + "learning_rate": 3.628124969197344e-07, + "loss": 0.6736, + "step": 29906 + }, + { + "epoch": 0.9166053696211842, + "grad_norm": 2.03709178373618, + "learning_rate": 3.625475903000186e-07, + "loss": 0.5026, + "step": 29907 + }, + { + "epoch": 0.9166360181439255, + "grad_norm": 1.8236895003671763, + "learning_rate": 3.6228277864058693e-07, + "loss": 0.6675, + "step": 29908 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.7088166284877138, + "learning_rate": 3.620180619440483e-07, + "loss": 0.523, + "step": 29909 + }, + { + "epoch": 0.9166973151894079, + "grad_norm": 1.73318841845094, + "learning_rate": 3.617534402130141e-07, + "loss": 0.6595, + "step": 29910 + }, + { + "epoch": 0.916727963712149, + "grad_norm": 1.6325735956340324, + "learning_rate": 3.6148891345008765e-07, + "loss": 0.5818, + "step": 29911 + }, + { + "epoch": 0.9167586122348903, + "grad_norm": 1.604806209088137, + "learning_rate": 3.6122448165787583e-07, + "loss": 0.6102, + "step": 29912 + }, + { + "epoch": 0.9167892607576315, + "grad_norm": 1.6355078133219687, + "learning_rate": 3.609601448389877e-07, + "loss": 0.677, + "step": 29913 + }, + { + "epoch": 0.9168199092803727, + "grad_norm": 1.7136722170108327, + "learning_rate": 3.606959029960255e-07, + "loss": 0.5668, + "step": 29914 + }, + { + "epoch": 0.9168505578031139, + "grad_norm": 0.6793047233022966, + "learning_rate": 3.604317561315918e-07, + "loss": 0.5116, + "step": 29915 + }, + { + "epoch": 0.9168812063258551, + "grad_norm": 0.6447426816863767, + "learning_rate": 3.601677042482898e-07, + "loss": 0.5044, + "step": 29916 + }, + { + "epoch": 0.9169118548485963, + "grad_norm": 1.8237399257492821, + "learning_rate": 3.599037473487221e-07, + "loss": 0.594, + "step": 29917 + }, + { + "epoch": 0.9169425033713375, + "grad_norm": 1.952529125166798, + "learning_rate": 3.59639885435491e-07, + "loss": 0.6683, + "step": 29918 + }, + { + "epoch": 0.9169731518940787, + "grad_norm": 1.7083023130779065, + "learning_rate": 3.5937611851119326e-07, + "loss": 0.5843, + "step": 29919 + }, + { + "epoch": 0.91700380041682, + "grad_norm": 1.7420585396457915, + "learning_rate": 3.5911244657842903e-07, + "loss": 0.6963, + "step": 29920 + }, + { + "epoch": 0.9170344489395611, + "grad_norm": 1.8759215588643279, + "learning_rate": 3.588488696397974e-07, + "loss": 0.5246, + "step": 29921 + }, + { + "epoch": 0.9170650974623024, + "grad_norm": 2.062142598906493, + "learning_rate": 3.585853876978951e-07, + "loss": 0.7183, + "step": 29922 + }, + { + "epoch": 0.9170957459850435, + "grad_norm": 1.863845455572442, + "learning_rate": 3.5832200075531675e-07, + "loss": 0.64, + "step": 29923 + }, + { + "epoch": 0.9171263945077848, + "grad_norm": 1.7488398769983868, + "learning_rate": 3.5805870881465923e-07, + "loss": 0.5906, + "step": 29924 + }, + { + "epoch": 0.9171570430305259, + "grad_norm": 1.8956297803292383, + "learning_rate": 3.577955118785159e-07, + "loss": 0.7146, + "step": 29925 + }, + { + "epoch": 0.9171876915532672, + "grad_norm": 1.8516591360754109, + "learning_rate": 3.5753240994948037e-07, + "loss": 0.6166, + "step": 29926 + }, + { + "epoch": 0.9172183400760083, + "grad_norm": 1.826757641298462, + "learning_rate": 3.572694030301449e-07, + "loss": 0.6929, + "step": 29927 + }, + { + "epoch": 0.9172489885987496, + "grad_norm": 1.6817789587144805, + "learning_rate": 3.570064911231019e-07, + "loss": 0.6774, + "step": 29928 + }, + { + "epoch": 0.9172796371214907, + "grad_norm": 1.4920134623941355, + "learning_rate": 3.5674367423094156e-07, + "loss": 0.5408, + "step": 29929 + }, + { + "epoch": 0.9173102856442319, + "grad_norm": 1.7376795788786312, + "learning_rate": 3.564809523562529e-07, + "loss": 0.5935, + "step": 29930 + }, + { + "epoch": 0.9173409341669732, + "grad_norm": 1.933714378186037, + "learning_rate": 3.562183255016227e-07, + "loss": 0.6932, + "step": 29931 + }, + { + "epoch": 0.9173715826897143, + "grad_norm": 1.7010438203520117, + "learning_rate": 3.559557936696434e-07, + "loss": 0.5682, + "step": 29932 + }, + { + "epoch": 0.9174022312124556, + "grad_norm": 0.7276227060582322, + "learning_rate": 3.5569335686289954e-07, + "loss": 0.5354, + "step": 29933 + }, + { + "epoch": 0.9174328797351967, + "grad_norm": 1.6901987600561015, + "learning_rate": 3.554310150839746e-07, + "loss": 0.6681, + "step": 29934 + }, + { + "epoch": 0.917463528257938, + "grad_norm": 1.6411383530886454, + "learning_rate": 3.5516876833545655e-07, + "loss": 0.6808, + "step": 29935 + }, + { + "epoch": 0.9174941767806791, + "grad_norm": 1.7443429639822077, + "learning_rate": 3.5490661661992774e-07, + "loss": 0.7642, + "step": 29936 + }, + { + "epoch": 0.9175248253034204, + "grad_norm": 0.658780648953719, + "learning_rate": 3.546445599399728e-07, + "loss": 0.531, + "step": 29937 + }, + { + "epoch": 0.9175554738261615, + "grad_norm": 1.718098679975198, + "learning_rate": 3.543825982981719e-07, + "loss": 0.6331, + "step": 29938 + }, + { + "epoch": 0.9175861223489028, + "grad_norm": 1.6219671566446603, + "learning_rate": 3.541207316971074e-07, + "loss": 0.5829, + "step": 29939 + }, + { + "epoch": 0.917616770871644, + "grad_norm": 1.669610398023911, + "learning_rate": 3.538589601393605e-07, + "loss": 0.661, + "step": 29940 + }, + { + "epoch": 0.9176474193943852, + "grad_norm": 1.5627838181713314, + "learning_rate": 3.535972836275092e-07, + "loss": 0.5894, + "step": 29941 + }, + { + "epoch": 0.9176780679171264, + "grad_norm": 2.0180364222039833, + "learning_rate": 3.5333570216412924e-07, + "loss": 0.6488, + "step": 29942 + }, + { + "epoch": 0.9177087164398676, + "grad_norm": 1.7483131076136287, + "learning_rate": 3.530742157518041e-07, + "loss": 0.6247, + "step": 29943 + }, + { + "epoch": 0.9177393649626088, + "grad_norm": 1.8716984319953704, + "learning_rate": 3.5281282439310505e-07, + "loss": 0.6153, + "step": 29944 + }, + { + "epoch": 0.91777001348535, + "grad_norm": 1.6331887381338253, + "learning_rate": 3.525515280906111e-07, + "loss": 0.6126, + "step": 29945 + }, + { + "epoch": 0.9178006620080912, + "grad_norm": 0.6609952961538199, + "learning_rate": 3.5229032684689356e-07, + "loss": 0.5103, + "step": 29946 + }, + { + "epoch": 0.9178313105308324, + "grad_norm": 1.7565257389148974, + "learning_rate": 3.5202922066452814e-07, + "loss": 0.5867, + "step": 29947 + }, + { + "epoch": 0.9178619590535736, + "grad_norm": 1.5714842017645212, + "learning_rate": 3.517682095460895e-07, + "loss": 0.577, + "step": 29948 + }, + { + "epoch": 0.9178926075763149, + "grad_norm": 1.7203912874848821, + "learning_rate": 3.515072934941455e-07, + "loss": 0.6818, + "step": 29949 + }, + { + "epoch": 0.917923256099056, + "grad_norm": 1.6924103486844015, + "learning_rate": 3.5124647251126854e-07, + "loss": 0.6578, + "step": 29950 + }, + { + "epoch": 0.9179539046217973, + "grad_norm": 1.4911960272276288, + "learning_rate": 3.5098574660002996e-07, + "loss": 0.5652, + "step": 29951 + }, + { + "epoch": 0.9179845531445384, + "grad_norm": 1.5989677807086122, + "learning_rate": 3.507251157629976e-07, + "loss": 0.676, + "step": 29952 + }, + { + "epoch": 0.9180152016672797, + "grad_norm": 1.7938695480904243, + "learning_rate": 3.504645800027373e-07, + "loss": 0.7512, + "step": 29953 + }, + { + "epoch": 0.9180458501900208, + "grad_norm": 1.6591574537325395, + "learning_rate": 3.502041393218214e-07, + "loss": 0.5171, + "step": 29954 + }, + { + "epoch": 0.9180764987127621, + "grad_norm": 1.7237277611577186, + "learning_rate": 3.499437937228112e-07, + "loss": 0.667, + "step": 29955 + }, + { + "epoch": 0.9181071472355032, + "grad_norm": 1.9102220905568719, + "learning_rate": 3.496835432082757e-07, + "loss": 0.6617, + "step": 29956 + }, + { + "epoch": 0.9181377957582445, + "grad_norm": 1.8114690048407427, + "learning_rate": 3.4942338778077625e-07, + "loss": 0.592, + "step": 29957 + }, + { + "epoch": 0.9181684442809857, + "grad_norm": 1.7977102224161245, + "learning_rate": 3.491633274428763e-07, + "loss": 0.6293, + "step": 29958 + }, + { + "epoch": 0.9181990928037269, + "grad_norm": 1.7461133436946077, + "learning_rate": 3.489033621971416e-07, + "loss": 0.5745, + "step": 29959 + }, + { + "epoch": 0.9182297413264681, + "grad_norm": 1.7567834384097396, + "learning_rate": 3.4864349204613015e-07, + "loss": 0.6951, + "step": 29960 + }, + { + "epoch": 0.9182603898492092, + "grad_norm": 1.929322519521447, + "learning_rate": 3.4838371699240316e-07, + "loss": 0.537, + "step": 29961 + }, + { + "epoch": 0.9182910383719505, + "grad_norm": 1.654256388420278, + "learning_rate": 3.4812403703852195e-07, + "loss": 0.648, + "step": 29962 + }, + { + "epoch": 0.9183216868946916, + "grad_norm": 1.6963283515486136, + "learning_rate": 3.4786445218704335e-07, + "loss": 0.6945, + "step": 29963 + }, + { + "epoch": 0.9183523354174329, + "grad_norm": 1.6743413157857905, + "learning_rate": 3.4760496244052645e-07, + "loss": 0.577, + "step": 29964 + }, + { + "epoch": 0.918382983940174, + "grad_norm": 0.6439080646826003, + "learning_rate": 3.4734556780152807e-07, + "loss": 0.5211, + "step": 29965 + }, + { + "epoch": 0.9184136324629153, + "grad_norm": 1.7601122436924737, + "learning_rate": 3.470862682726028e-07, + "loss": 0.5857, + "step": 29966 + }, + { + "epoch": 0.9184442809856564, + "grad_norm": 1.7001147092845605, + "learning_rate": 3.468270638563065e-07, + "loss": 0.6836, + "step": 29967 + }, + { + "epoch": 0.9184749295083977, + "grad_norm": 1.578922774533876, + "learning_rate": 3.4656795455519256e-07, + "loss": 0.6227, + "step": 29968 + }, + { + "epoch": 0.9185055780311389, + "grad_norm": 1.7075351029346335, + "learning_rate": 3.463089403718145e-07, + "loss": 0.5647, + "step": 29969 + }, + { + "epoch": 0.9185362265538801, + "grad_norm": 1.5452341140633208, + "learning_rate": 3.460500213087259e-07, + "loss": 0.5836, + "step": 29970 + }, + { + "epoch": 0.9185668750766213, + "grad_norm": 1.8235631787538358, + "learning_rate": 3.4579119736847466e-07, + "loss": 0.6805, + "step": 29971 + }, + { + "epoch": 0.9185975235993625, + "grad_norm": 1.8358703279784476, + "learning_rate": 3.455324685536132e-07, + "loss": 0.7078, + "step": 29972 + }, + { + "epoch": 0.9186281721221037, + "grad_norm": 1.6316457977820567, + "learning_rate": 3.4527383486669174e-07, + "loss": 0.6566, + "step": 29973 + }, + { + "epoch": 0.9186588206448449, + "grad_norm": 1.889834308129557, + "learning_rate": 3.450152963102571e-07, + "loss": 0.6203, + "step": 29974 + }, + { + "epoch": 0.9186894691675861, + "grad_norm": 1.7159257449827297, + "learning_rate": 3.4475685288685725e-07, + "loss": 0.5721, + "step": 29975 + }, + { + "epoch": 0.9187201176903274, + "grad_norm": 0.6502972445747902, + "learning_rate": 3.4449850459903676e-07, + "loss": 0.4827, + "step": 29976 + }, + { + "epoch": 0.9187507662130685, + "grad_norm": 1.8188651480737936, + "learning_rate": 3.442402514493448e-07, + "loss": 0.6549, + "step": 29977 + }, + { + "epoch": 0.9187814147358098, + "grad_norm": 1.651963146147538, + "learning_rate": 3.4398209344032373e-07, + "loss": 0.6928, + "step": 29978 + }, + { + "epoch": 0.9188120632585509, + "grad_norm": 1.7508482497304367, + "learning_rate": 3.437240305745171e-07, + "loss": 0.6746, + "step": 29979 + }, + { + "epoch": 0.9188427117812922, + "grad_norm": 1.6573279177077331, + "learning_rate": 3.4346606285446836e-07, + "loss": 0.6155, + "step": 29980 + }, + { + "epoch": 0.9188733603040333, + "grad_norm": 1.7234968842163352, + "learning_rate": 3.4320819028272e-07, + "loss": 0.6132, + "step": 29981 + }, + { + "epoch": 0.9189040088267746, + "grad_norm": 1.9179414156079069, + "learning_rate": 3.429504128618111e-07, + "loss": 0.6141, + "step": 29982 + }, + { + "epoch": 0.9189346573495157, + "grad_norm": 0.6949318867767843, + "learning_rate": 3.426927305942829e-07, + "loss": 0.5048, + "step": 29983 + }, + { + "epoch": 0.918965305872257, + "grad_norm": 1.7229517060701731, + "learning_rate": 3.424351434826756e-07, + "loss": 0.6768, + "step": 29984 + }, + { + "epoch": 0.9189959543949981, + "grad_norm": 0.6994466660832519, + "learning_rate": 3.421776515295239e-07, + "loss": 0.5399, + "step": 29985 + }, + { + "epoch": 0.9190266029177394, + "grad_norm": 1.7084385763206253, + "learning_rate": 3.41920254737369e-07, + "loss": 0.5223, + "step": 29986 + }, + { + "epoch": 0.9190572514404806, + "grad_norm": 1.6887879476943404, + "learning_rate": 3.4166295310874343e-07, + "loss": 0.686, + "step": 29987 + }, + { + "epoch": 0.9190878999632218, + "grad_norm": 1.864559153149219, + "learning_rate": 3.41405746646184e-07, + "loss": 0.6231, + "step": 29988 + }, + { + "epoch": 0.919118548485963, + "grad_norm": 1.761685991518244, + "learning_rate": 3.411486353522253e-07, + "loss": 0.6632, + "step": 29989 + }, + { + "epoch": 0.9191491970087042, + "grad_norm": 1.580694959473383, + "learning_rate": 3.4089161922939984e-07, + "loss": 0.5774, + "step": 29990 + }, + { + "epoch": 0.9191798455314454, + "grad_norm": 1.6891270899091995, + "learning_rate": 3.406346982802411e-07, + "loss": 0.7037, + "step": 29991 + }, + { + "epoch": 0.9192104940541865, + "grad_norm": 2.0635477407481697, + "learning_rate": 3.403778725072804e-07, + "loss": 0.698, + "step": 29992 + }, + { + "epoch": 0.9192411425769278, + "grad_norm": 1.7096231008751035, + "learning_rate": 3.401211419130479e-07, + "loss": 0.5638, + "step": 29993 + }, + { + "epoch": 0.919271791099669, + "grad_norm": 1.9408550472916795, + "learning_rate": 3.3986450650007275e-07, + "loss": 0.6799, + "step": 29994 + }, + { + "epoch": 0.9193024396224102, + "grad_norm": 1.8065700881802218, + "learning_rate": 3.396079662708851e-07, + "loss": 0.6306, + "step": 29995 + }, + { + "epoch": 0.9193330881451514, + "grad_norm": 1.7574604841333965, + "learning_rate": 3.3935152122801184e-07, + "loss": 0.625, + "step": 29996 + }, + { + "epoch": 0.9193637366678926, + "grad_norm": 1.871420255730792, + "learning_rate": 3.3909517137397983e-07, + "loss": 0.6084, + "step": 29997 + }, + { + "epoch": 0.9193943851906338, + "grad_norm": 1.5708337463448905, + "learning_rate": 3.388389167113137e-07, + "loss": 0.6786, + "step": 29998 + }, + { + "epoch": 0.919425033713375, + "grad_norm": 1.684276169192469, + "learning_rate": 3.385827572425404e-07, + "loss": 0.6489, + "step": 29999 + }, + { + "epoch": 0.9194556822361162, + "grad_norm": 1.9150718900808459, + "learning_rate": 3.383266929701845e-07, + "loss": 0.5811, + "step": 30000 + }, + { + "epoch": 0.9194863307588574, + "grad_norm": 1.7121230753914698, + "learning_rate": 3.380707238967662e-07, + "loss": 0.6451, + "step": 30001 + }, + { + "epoch": 0.9195169792815986, + "grad_norm": 1.989663570509669, + "learning_rate": 3.3781485002480906e-07, + "loss": 0.6538, + "step": 30002 + }, + { + "epoch": 0.9195476278043399, + "grad_norm": 1.5812550340559874, + "learning_rate": 3.3755907135683553e-07, + "loss": 0.6774, + "step": 30003 + }, + { + "epoch": 0.919578276327081, + "grad_norm": 1.7849088537758884, + "learning_rate": 3.373033878953635e-07, + "loss": 0.6072, + "step": 30004 + }, + { + "epoch": 0.9196089248498223, + "grad_norm": 1.841536909577516, + "learning_rate": 3.3704779964291445e-07, + "loss": 0.6178, + "step": 30005 + }, + { + "epoch": 0.9196395733725634, + "grad_norm": 1.790518920808597, + "learning_rate": 3.3679230660200626e-07, + "loss": 0.6251, + "step": 30006 + }, + { + "epoch": 0.9196702218953047, + "grad_norm": 1.7959142621685964, + "learning_rate": 3.3653690877515466e-07, + "loss": 0.572, + "step": 30007 + }, + { + "epoch": 0.9197008704180458, + "grad_norm": 1.6205626614848898, + "learning_rate": 3.362816061648777e-07, + "loss": 0.5502, + "step": 30008 + }, + { + "epoch": 0.9197315189407871, + "grad_norm": 1.793220206000785, + "learning_rate": 3.3602639877369004e-07, + "loss": 0.6435, + "step": 30009 + }, + { + "epoch": 0.9197621674635282, + "grad_norm": 1.954519314300471, + "learning_rate": 3.357712866041074e-07, + "loss": 0.6243, + "step": 30010 + }, + { + "epoch": 0.9197928159862695, + "grad_norm": 1.5679192392396424, + "learning_rate": 3.355162696586445e-07, + "loss": 0.464, + "step": 30011 + }, + { + "epoch": 0.9198234645090106, + "grad_norm": 0.6535708386276617, + "learning_rate": 3.3526134793981033e-07, + "loss": 0.4868, + "step": 30012 + }, + { + "epoch": 0.9198541130317519, + "grad_norm": 1.8560655098605465, + "learning_rate": 3.350065214501197e-07, + "loss": 0.5916, + "step": 30013 + }, + { + "epoch": 0.9198847615544931, + "grad_norm": 1.7499358561569907, + "learning_rate": 3.347517901920838e-07, + "loss": 0.6965, + "step": 30014 + }, + { + "epoch": 0.9199154100772343, + "grad_norm": 1.7378050084091685, + "learning_rate": 3.344971541682096e-07, + "loss": 0.7195, + "step": 30015 + }, + { + "epoch": 0.9199460585999755, + "grad_norm": 1.7003560224469783, + "learning_rate": 3.342426133810095e-07, + "loss": 0.5489, + "step": 30016 + }, + { + "epoch": 0.9199767071227167, + "grad_norm": 1.7686336271421121, + "learning_rate": 3.3398816783298814e-07, + "loss": 0.6408, + "step": 30017 + }, + { + "epoch": 0.9200073556454579, + "grad_norm": 1.6493264146891131, + "learning_rate": 3.337338175266569e-07, + "loss": 0.744, + "step": 30018 + }, + { + "epoch": 0.9200380041681991, + "grad_norm": 1.8162042820625, + "learning_rate": 3.3347956246451927e-07, + "loss": 0.6793, + "step": 30019 + }, + { + "epoch": 0.9200686526909403, + "grad_norm": 1.921161892441963, + "learning_rate": 3.3322540264908e-07, + "loss": 0.7181, + "step": 30020 + }, + { + "epoch": 0.9200993012136816, + "grad_norm": 1.7797309644457493, + "learning_rate": 3.3297133808284367e-07, + "loss": 0.5613, + "step": 30021 + }, + { + "epoch": 0.9201299497364227, + "grad_norm": 1.7074966464469257, + "learning_rate": 3.3271736876831496e-07, + "loss": 0.6609, + "step": 30022 + }, + { + "epoch": 0.9201605982591639, + "grad_norm": 1.7977762521743832, + "learning_rate": 3.324634947079952e-07, + "loss": 0.6291, + "step": 30023 + }, + { + "epoch": 0.9201912467819051, + "grad_norm": 1.6829080325557702, + "learning_rate": 3.3220971590438577e-07, + "loss": 0.5946, + "step": 30024 + }, + { + "epoch": 0.9202218953046463, + "grad_norm": 1.8526070160782449, + "learning_rate": 3.319560323599891e-07, + "loss": 0.5459, + "step": 30025 + }, + { + "epoch": 0.9202525438273875, + "grad_norm": 1.540628872628076, + "learning_rate": 3.317024440773009e-07, + "loss": 0.6255, + "step": 30026 + }, + { + "epoch": 0.9202831923501287, + "grad_norm": 1.826746564461188, + "learning_rate": 3.314489510588248e-07, + "loss": 0.6459, + "step": 30027 + }, + { + "epoch": 0.9203138408728699, + "grad_norm": 1.4600943050085697, + "learning_rate": 3.311955533070532e-07, + "loss": 0.5799, + "step": 30028 + }, + { + "epoch": 0.9203444893956111, + "grad_norm": 1.6454877752827057, + "learning_rate": 3.309422508244886e-07, + "loss": 0.6565, + "step": 30029 + }, + { + "epoch": 0.9203751379183523, + "grad_norm": 1.7340434837718888, + "learning_rate": 3.306890436136223e-07, + "loss": 0.4882, + "step": 30030 + }, + { + "epoch": 0.9204057864410935, + "grad_norm": 0.6550379534898683, + "learning_rate": 3.304359316769512e-07, + "loss": 0.5358, + "step": 30031 + }, + { + "epoch": 0.9204364349638348, + "grad_norm": 1.9846643778492832, + "learning_rate": 3.3018291501696887e-07, + "loss": 0.7325, + "step": 30032 + }, + { + "epoch": 0.9204670834865759, + "grad_norm": 1.8648494678919676, + "learning_rate": 3.299299936361688e-07, + "loss": 0.6202, + "step": 30033 + }, + { + "epoch": 0.9204977320093172, + "grad_norm": 1.8615737257805327, + "learning_rate": 3.2967716753704246e-07, + "loss": 0.6682, + "step": 30034 + }, + { + "epoch": 0.9205283805320583, + "grad_norm": 1.6443166388514, + "learning_rate": 3.2942443672208e-07, + "loss": 0.5883, + "step": 30035 + }, + { + "epoch": 0.9205590290547996, + "grad_norm": 1.5774616062776339, + "learning_rate": 3.291718011937739e-07, + "loss": 0.5731, + "step": 30036 + }, + { + "epoch": 0.9205896775775407, + "grad_norm": 1.4619069802239024, + "learning_rate": 3.2891926095461325e-07, + "loss": 0.5142, + "step": 30037 + }, + { + "epoch": 0.920620326100282, + "grad_norm": 1.6874152009172712, + "learning_rate": 3.2866681600708605e-07, + "loss": 0.6051, + "step": 30038 + }, + { + "epoch": 0.9206509746230231, + "grad_norm": 1.694042445356625, + "learning_rate": 3.284144663536759e-07, + "loss": 0.6959, + "step": 30039 + }, + { + "epoch": 0.9206816231457644, + "grad_norm": 1.8335752898255107, + "learning_rate": 3.2816221199687527e-07, + "loss": 0.6464, + "step": 30040 + }, + { + "epoch": 0.9207122716685056, + "grad_norm": 2.0231986352669926, + "learning_rate": 3.279100529391677e-07, + "loss": 0.6488, + "step": 30041 + }, + { + "epoch": 0.9207429201912468, + "grad_norm": 1.7829111034023728, + "learning_rate": 3.276579891830356e-07, + "loss": 0.6402, + "step": 30042 + }, + { + "epoch": 0.920773568713988, + "grad_norm": 1.8714946703731743, + "learning_rate": 3.274060207309637e-07, + "loss": 0.644, + "step": 30043 + }, + { + "epoch": 0.9208042172367292, + "grad_norm": 1.964575868135138, + "learning_rate": 3.2715414758543563e-07, + "loss": 0.5952, + "step": 30044 + }, + { + "epoch": 0.9208348657594704, + "grad_norm": 1.7510629082852402, + "learning_rate": 3.2690236974893373e-07, + "loss": 0.604, + "step": 30045 + }, + { + "epoch": 0.9208655142822116, + "grad_norm": 1.5954787014471157, + "learning_rate": 3.266506872239361e-07, + "loss": 0.68, + "step": 30046 + }, + { + "epoch": 0.9208961628049528, + "grad_norm": 1.5405334110389004, + "learning_rate": 3.2639910001292517e-07, + "loss": 0.5374, + "step": 30047 + }, + { + "epoch": 0.920926811327694, + "grad_norm": 0.6648007263598402, + "learning_rate": 3.26147608118379e-07, + "loss": 0.5243, + "step": 30048 + }, + { + "epoch": 0.9209574598504352, + "grad_norm": 1.8745917030976673, + "learning_rate": 3.2589621154277664e-07, + "loss": 0.6546, + "step": 30049 + }, + { + "epoch": 0.9209881083731765, + "grad_norm": 1.8926613979375195, + "learning_rate": 3.2564491028859166e-07, + "loss": 0.6304, + "step": 30050 + }, + { + "epoch": 0.9210187568959176, + "grad_norm": 0.6711029311860204, + "learning_rate": 3.2539370435830443e-07, + "loss": 0.528, + "step": 30051 + }, + { + "epoch": 0.9210494054186589, + "grad_norm": 1.751580738586225, + "learning_rate": 3.251425937543884e-07, + "loss": 0.5623, + "step": 30052 + }, + { + "epoch": 0.9210800539414, + "grad_norm": 1.5848550496398623, + "learning_rate": 3.248915784793172e-07, + "loss": 0.5851, + "step": 30053 + }, + { + "epoch": 0.9211107024641412, + "grad_norm": 1.56293716684575, + "learning_rate": 3.2464065853556435e-07, + "loss": 0.5876, + "step": 30054 + }, + { + "epoch": 0.9211413509868824, + "grad_norm": 1.6525807662064258, + "learning_rate": 3.2438983392560244e-07, + "loss": 0.5738, + "step": 30055 + }, + { + "epoch": 0.9211719995096236, + "grad_norm": 1.6891121710436603, + "learning_rate": 3.241391046519049e-07, + "loss": 0.5976, + "step": 30056 + }, + { + "epoch": 0.9212026480323648, + "grad_norm": 0.6703704900688885, + "learning_rate": 3.238884707169387e-07, + "loss": 0.5196, + "step": 30057 + }, + { + "epoch": 0.921233296555106, + "grad_norm": 1.7974831021961217, + "learning_rate": 3.236379321231753e-07, + "loss": 0.5523, + "step": 30058 + }, + { + "epoch": 0.9212639450778473, + "grad_norm": 0.6792139554245442, + "learning_rate": 3.233874888730848e-07, + "loss": 0.5168, + "step": 30059 + }, + { + "epoch": 0.9212945936005884, + "grad_norm": 1.6127902283345756, + "learning_rate": 3.231371409691331e-07, + "loss": 0.6791, + "step": 30060 + }, + { + "epoch": 0.9213252421233297, + "grad_norm": 1.8304354620037535, + "learning_rate": 3.228868884137848e-07, + "loss": 0.6526, + "step": 30061 + }, + { + "epoch": 0.9213558906460708, + "grad_norm": 1.6806531841522054, + "learning_rate": 3.2263673120950914e-07, + "loss": 0.5628, + "step": 30062 + }, + { + "epoch": 0.9213865391688121, + "grad_norm": 1.8662747937627362, + "learning_rate": 3.223866693587696e-07, + "loss": 0.687, + "step": 30063 + }, + { + "epoch": 0.9214171876915532, + "grad_norm": 1.588019460189109, + "learning_rate": 3.221367028640321e-07, + "loss": 0.6077, + "step": 30064 + }, + { + "epoch": 0.9214478362142945, + "grad_norm": 1.5657273635886593, + "learning_rate": 3.218868317277557e-07, + "loss": 0.627, + "step": 30065 + }, + { + "epoch": 0.9214784847370356, + "grad_norm": 0.6885603472959235, + "learning_rate": 3.2163705595240514e-07, + "loss": 0.4976, + "step": 30066 + }, + { + "epoch": 0.9215091332597769, + "grad_norm": 1.7133645802277973, + "learning_rate": 3.2138737554044175e-07, + "loss": 0.6066, + "step": 30067 + }, + { + "epoch": 0.921539781782518, + "grad_norm": 1.8203541567422497, + "learning_rate": 3.211377904943247e-07, + "loss": 0.6525, + "step": 30068 + }, + { + "epoch": 0.9215704303052593, + "grad_norm": 1.6471874547681777, + "learning_rate": 3.20888300816512e-07, + "loss": 0.6385, + "step": 30069 + }, + { + "epoch": 0.9216010788280005, + "grad_norm": 1.5737954063759507, + "learning_rate": 3.2063890650946506e-07, + "loss": 0.6449, + "step": 30070 + }, + { + "epoch": 0.9216317273507417, + "grad_norm": 1.5314166536085212, + "learning_rate": 3.2038960757563854e-07, + "loss": 0.5907, + "step": 30071 + }, + { + "epoch": 0.9216623758734829, + "grad_norm": 1.7396780762735173, + "learning_rate": 3.201404040174916e-07, + "loss": 0.6442, + "step": 30072 + }, + { + "epoch": 0.9216930243962241, + "grad_norm": 1.737183347048008, + "learning_rate": 3.198912958374767e-07, + "loss": 0.701, + "step": 30073 + }, + { + "epoch": 0.9217236729189653, + "grad_norm": 1.6614553595164874, + "learning_rate": 3.1964228303804855e-07, + "loss": 0.6381, + "step": 30074 + }, + { + "epoch": 0.9217543214417065, + "grad_norm": 1.7462039616059695, + "learning_rate": 3.1939336562166414e-07, + "loss": 0.6241, + "step": 30075 + }, + { + "epoch": 0.9217849699644477, + "grad_norm": 1.6040090526644335, + "learning_rate": 3.191445435907714e-07, + "loss": 0.7227, + "step": 30076 + }, + { + "epoch": 0.921815618487189, + "grad_norm": 1.6568508083320217, + "learning_rate": 3.188958169478251e-07, + "loss": 0.5977, + "step": 30077 + }, + { + "epoch": 0.9218462670099301, + "grad_norm": 1.6987472322770936, + "learning_rate": 3.1864718569527664e-07, + "loss": 0.6482, + "step": 30078 + }, + { + "epoch": 0.9218769155326714, + "grad_norm": 1.5342336822301748, + "learning_rate": 3.18398649835574e-07, + "loss": 0.5662, + "step": 30079 + }, + { + "epoch": 0.9219075640554125, + "grad_norm": 1.7542331276851248, + "learning_rate": 3.181502093711653e-07, + "loss": 0.6546, + "step": 30080 + }, + { + "epoch": 0.9219382125781538, + "grad_norm": 1.8644046609177496, + "learning_rate": 3.1790186430450177e-07, + "loss": 0.6302, + "step": 30081 + }, + { + "epoch": 0.9219688611008949, + "grad_norm": 1.6938682789838544, + "learning_rate": 3.176536146380271e-07, + "loss": 0.5797, + "step": 30082 + }, + { + "epoch": 0.9219995096236362, + "grad_norm": 2.009763502693411, + "learning_rate": 3.174054603741894e-07, + "loss": 0.6883, + "step": 30083 + }, + { + "epoch": 0.9220301581463773, + "grad_norm": 0.6955227200378705, + "learning_rate": 3.1715740151543216e-07, + "loss": 0.5092, + "step": 30084 + }, + { + "epoch": 0.9220608066691185, + "grad_norm": 0.673530922748998, + "learning_rate": 3.1690943806420126e-07, + "loss": 0.5029, + "step": 30085 + }, + { + "epoch": 0.9220914551918598, + "grad_norm": 0.6787172565574752, + "learning_rate": 3.166615700229392e-07, + "loss": 0.5037, + "step": 30086 + }, + { + "epoch": 0.9221221037146009, + "grad_norm": 1.8546369325116066, + "learning_rate": 3.164137973940873e-07, + "loss": 0.6204, + "step": 30087 + }, + { + "epoch": 0.9221527522373422, + "grad_norm": 1.777454515496857, + "learning_rate": 3.1616612018008917e-07, + "loss": 0.5999, + "step": 30088 + }, + { + "epoch": 0.9221834007600833, + "grad_norm": 1.6741234032810655, + "learning_rate": 3.1591853838338403e-07, + "loss": 0.7011, + "step": 30089 + }, + { + "epoch": 0.9222140492828246, + "grad_norm": 1.7182257479471952, + "learning_rate": 3.156710520064099e-07, + "loss": 0.6691, + "step": 30090 + }, + { + "epoch": 0.9222446978055657, + "grad_norm": 1.8260074923355512, + "learning_rate": 3.1542366105160706e-07, + "loss": 0.7147, + "step": 30091 + }, + { + "epoch": 0.922275346328307, + "grad_norm": 1.731171221789602, + "learning_rate": 3.151763655214146e-07, + "loss": 0.662, + "step": 30092 + }, + { + "epoch": 0.9223059948510481, + "grad_norm": 1.6294497482259729, + "learning_rate": 3.1492916541826515e-07, + "loss": 0.5875, + "step": 30093 + }, + { + "epoch": 0.9223366433737894, + "grad_norm": 1.9154565354493498, + "learning_rate": 3.1468206074459884e-07, + "loss": 0.7486, + "step": 30094 + }, + { + "epoch": 0.9223672918965306, + "grad_norm": 1.712921875452548, + "learning_rate": 3.1443505150284714e-07, + "loss": 0.6591, + "step": 30095 + }, + { + "epoch": 0.9223979404192718, + "grad_norm": 1.669350765849765, + "learning_rate": 3.1418813769544364e-07, + "loss": 0.5792, + "step": 30096 + }, + { + "epoch": 0.922428588942013, + "grad_norm": 1.7685331125784125, + "learning_rate": 3.139413193248253e-07, + "loss": 0.5748, + "step": 30097 + }, + { + "epoch": 0.9224592374647542, + "grad_norm": 1.8098833091834763, + "learning_rate": 3.1369459639342017e-07, + "loss": 0.5936, + "step": 30098 + }, + { + "epoch": 0.9224898859874954, + "grad_norm": 1.5293947852702843, + "learning_rate": 3.1344796890365957e-07, + "loss": 0.5392, + "step": 30099 + }, + { + "epoch": 0.9225205345102366, + "grad_norm": 0.6574845350254408, + "learning_rate": 3.1320143685797613e-07, + "loss": 0.5263, + "step": 30100 + }, + { + "epoch": 0.9225511830329778, + "grad_norm": 1.805962481697005, + "learning_rate": 3.1295500025879666e-07, + "loss": 0.5888, + "step": 30101 + }, + { + "epoch": 0.922581831555719, + "grad_norm": 1.439066492183763, + "learning_rate": 3.127086591085493e-07, + "loss": 0.5211, + "step": 30102 + }, + { + "epoch": 0.9226124800784602, + "grad_norm": 1.723623191183681, + "learning_rate": 3.124624134096643e-07, + "loss": 0.6682, + "step": 30103 + }, + { + "epoch": 0.9226431286012015, + "grad_norm": 1.7767039591257998, + "learning_rate": 3.122162631645631e-07, + "loss": 0.6953, + "step": 30104 + }, + { + "epoch": 0.9226737771239426, + "grad_norm": 0.6663430994950789, + "learning_rate": 3.119702083756759e-07, + "loss": 0.5145, + "step": 30105 + }, + { + "epoch": 0.9227044256466839, + "grad_norm": 1.683216553512322, + "learning_rate": 3.117242490454242e-07, + "loss": 0.6486, + "step": 30106 + }, + { + "epoch": 0.922735074169425, + "grad_norm": 1.9360155124420586, + "learning_rate": 3.114783851762326e-07, + "loss": 0.6837, + "step": 30107 + }, + { + "epoch": 0.9227657226921663, + "grad_norm": 1.6648074865733191, + "learning_rate": 3.112326167705237e-07, + "loss": 0.6904, + "step": 30108 + }, + { + "epoch": 0.9227963712149074, + "grad_norm": 1.9483378132036786, + "learning_rate": 3.109869438307178e-07, + "loss": 0.7759, + "step": 30109 + }, + { + "epoch": 0.9228270197376487, + "grad_norm": 1.8145983428646872, + "learning_rate": 3.107413663592362e-07, + "loss": 0.7257, + "step": 30110 + }, + { + "epoch": 0.9228576682603898, + "grad_norm": 1.6588695494652292, + "learning_rate": 3.1049588435850154e-07, + "loss": 0.6021, + "step": 30111 + }, + { + "epoch": 0.9228883167831311, + "grad_norm": 0.6766313947686017, + "learning_rate": 3.102504978309273e-07, + "loss": 0.5083, + "step": 30112 + }, + { + "epoch": 0.9229189653058723, + "grad_norm": 1.604250544083791, + "learning_rate": 3.10005206778935e-07, + "loss": 0.6676, + "step": 30113 + }, + { + "epoch": 0.9229496138286135, + "grad_norm": 1.6760959257898225, + "learning_rate": 3.097600112049426e-07, + "loss": 0.5838, + "step": 30114 + }, + { + "epoch": 0.9229802623513547, + "grad_norm": 1.7309044703999437, + "learning_rate": 3.0951491111136154e-07, + "loss": 0.5399, + "step": 30115 + }, + { + "epoch": 0.9230109108740958, + "grad_norm": 1.6872208094927517, + "learning_rate": 3.092699065006111e-07, + "loss": 0.6489, + "step": 30116 + }, + { + "epoch": 0.9230415593968371, + "grad_norm": 1.8609850600848814, + "learning_rate": 3.090249973751025e-07, + "loss": 0.6975, + "step": 30117 + }, + { + "epoch": 0.9230722079195782, + "grad_norm": 1.559639445587741, + "learning_rate": 3.087801837372506e-07, + "loss": 0.636, + "step": 30118 + }, + { + "epoch": 0.9231028564423195, + "grad_norm": 1.8244562438187972, + "learning_rate": 3.085354655894679e-07, + "loss": 0.5952, + "step": 30119 + }, + { + "epoch": 0.9231335049650606, + "grad_norm": 1.8208288492777407, + "learning_rate": 3.082908429341647e-07, + "loss": 0.6365, + "step": 30120 + }, + { + "epoch": 0.9231641534878019, + "grad_norm": 1.574861318980279, + "learning_rate": 3.0804631577375013e-07, + "loss": 0.6717, + "step": 30121 + }, + { + "epoch": 0.923194802010543, + "grad_norm": 1.9480971972961743, + "learning_rate": 3.0780188411063785e-07, + "loss": 0.6096, + "step": 30122 + }, + { + "epoch": 0.9232254505332843, + "grad_norm": 1.8271340942781895, + "learning_rate": 3.0755754794723036e-07, + "loss": 0.6547, + "step": 30123 + }, + { + "epoch": 0.9232560990560255, + "grad_norm": 1.9328897448322175, + "learning_rate": 3.073133072859402e-07, + "loss": 0.651, + "step": 30124 + }, + { + "epoch": 0.9232867475787667, + "grad_norm": 1.7322255175115164, + "learning_rate": 3.0706916212917103e-07, + "loss": 0.5832, + "step": 30125 + }, + { + "epoch": 0.9233173961015079, + "grad_norm": 1.8658675701563334, + "learning_rate": 3.0682511247932976e-07, + "loss": 0.662, + "step": 30126 + }, + { + "epoch": 0.9233480446242491, + "grad_norm": 1.811767630154733, + "learning_rate": 3.065811583388223e-07, + "loss": 0.6001, + "step": 30127 + }, + { + "epoch": 0.9233786931469903, + "grad_norm": 1.5079757052752067, + "learning_rate": 3.063372997100489e-07, + "loss": 0.666, + "step": 30128 + }, + { + "epoch": 0.9234093416697315, + "grad_norm": 2.020446036096644, + "learning_rate": 3.060935365954143e-07, + "loss": 0.6937, + "step": 30129 + }, + { + "epoch": 0.9234399901924727, + "grad_norm": 1.7926867378499436, + "learning_rate": 3.0584986899732105e-07, + "loss": 0.7024, + "step": 30130 + }, + { + "epoch": 0.923470638715214, + "grad_norm": 0.6660552816321464, + "learning_rate": 3.056062969181695e-07, + "loss": 0.519, + "step": 30131 + }, + { + "epoch": 0.9235012872379551, + "grad_norm": 1.6881349242105668, + "learning_rate": 3.0536282036035867e-07, + "loss": 0.6424, + "step": 30132 + }, + { + "epoch": 0.9235319357606964, + "grad_norm": 1.8957253188675387, + "learning_rate": 3.0511943932629017e-07, + "loss": 0.6602, + "step": 30133 + }, + { + "epoch": 0.9235625842834375, + "grad_norm": 0.6773805421427517, + "learning_rate": 3.0487615381835864e-07, + "loss": 0.5304, + "step": 30134 + }, + { + "epoch": 0.9235932328061788, + "grad_norm": 1.5328037144179247, + "learning_rate": 3.046329638389645e-07, + "loss": 0.5791, + "step": 30135 + }, + { + "epoch": 0.9236238813289199, + "grad_norm": 1.6016983322109242, + "learning_rate": 3.0438986939050007e-07, + "loss": 0.6911, + "step": 30136 + }, + { + "epoch": 0.9236545298516612, + "grad_norm": 0.6422621283752519, + "learning_rate": 3.0414687047536475e-07, + "loss": 0.5309, + "step": 30137 + }, + { + "epoch": 0.9236851783744023, + "grad_norm": 1.730004739831135, + "learning_rate": 3.0390396709595094e-07, + "loss": 0.6622, + "step": 30138 + }, + { + "epoch": 0.9237158268971436, + "grad_norm": 0.6785055090808063, + "learning_rate": 3.0366115925465233e-07, + "loss": 0.5124, + "step": 30139 + }, + { + "epoch": 0.9237464754198847, + "grad_norm": 1.8332421158951622, + "learning_rate": 3.0341844695386035e-07, + "loss": 0.571, + "step": 30140 + }, + { + "epoch": 0.923777123942626, + "grad_norm": 1.698710785131232, + "learning_rate": 3.0317583019596865e-07, + "loss": 0.6348, + "step": 30141 + }, + { + "epoch": 0.9238077724653672, + "grad_norm": 1.7303034541780757, + "learning_rate": 3.0293330898336524e-07, + "loss": 0.6184, + "step": 30142 + }, + { + "epoch": 0.9238384209881084, + "grad_norm": 1.8194290099921473, + "learning_rate": 3.0269088331844166e-07, + "loss": 0.54, + "step": 30143 + }, + { + "epoch": 0.9238690695108496, + "grad_norm": 1.7574493526015524, + "learning_rate": 3.024485532035859e-07, + "loss": 0.7101, + "step": 30144 + }, + { + "epoch": 0.9238997180335908, + "grad_norm": 0.6562988622969185, + "learning_rate": 3.022063186411861e-07, + "loss": 0.5005, + "step": 30145 + }, + { + "epoch": 0.923930366556332, + "grad_norm": 0.6422695627516327, + "learning_rate": 3.0196417963362925e-07, + "loss": 0.4772, + "step": 30146 + }, + { + "epoch": 0.9239610150790731, + "grad_norm": 0.6446641406209884, + "learning_rate": 3.0172213618329893e-07, + "loss": 0.4784, + "step": 30147 + }, + { + "epoch": 0.9239916636018144, + "grad_norm": 1.788555179979165, + "learning_rate": 3.014801882925833e-07, + "loss": 0.7119, + "step": 30148 + }, + { + "epoch": 0.9240223121245555, + "grad_norm": 0.675648900863608, + "learning_rate": 3.0123833596386485e-07, + "loss": 0.5157, + "step": 30149 + }, + { + "epoch": 0.9240529606472968, + "grad_norm": 1.8770231975550187, + "learning_rate": 3.0099657919952617e-07, + "loss": 0.6191, + "step": 30150 + }, + { + "epoch": 0.924083609170038, + "grad_norm": 1.6803702487573002, + "learning_rate": 3.007549180019487e-07, + "loss": 0.6495, + "step": 30151 + }, + { + "epoch": 0.9241142576927792, + "grad_norm": 1.5992366635184287, + "learning_rate": 3.005133523735171e-07, + "loss": 0.67, + "step": 30152 + }, + { + "epoch": 0.9241449062155204, + "grad_norm": 1.5513197455065284, + "learning_rate": 3.0027188231660734e-07, + "loss": 0.5564, + "step": 30153 + }, + { + "epoch": 0.9241755547382616, + "grad_norm": 1.7746465853309936, + "learning_rate": 3.0003050783359965e-07, + "loss": 0.6236, + "step": 30154 + }, + { + "epoch": 0.9242062032610028, + "grad_norm": 1.6936792831495313, + "learning_rate": 2.9978922892687445e-07, + "loss": 0.6402, + "step": 30155 + }, + { + "epoch": 0.924236851783744, + "grad_norm": 1.7868771964269856, + "learning_rate": 2.995480455988087e-07, + "loss": 0.685, + "step": 30156 + }, + { + "epoch": 0.9242675003064852, + "grad_norm": 0.6820421060481753, + "learning_rate": 2.993069578517782e-07, + "loss": 0.5486, + "step": 30157 + }, + { + "epoch": 0.9242981488292265, + "grad_norm": 1.9675960854255539, + "learning_rate": 2.990659656881556e-07, + "loss": 0.6651, + "step": 30158 + }, + { + "epoch": 0.9243287973519676, + "grad_norm": 0.6460760934834602, + "learning_rate": 2.9882506911032005e-07, + "loss": 0.534, + "step": 30159 + }, + { + "epoch": 0.9243594458747089, + "grad_norm": 1.9419485256604252, + "learning_rate": 2.985842681206441e-07, + "loss": 0.6319, + "step": 30160 + }, + { + "epoch": 0.92439009439745, + "grad_norm": 1.7597217670018503, + "learning_rate": 2.983435627214981e-07, + "loss": 0.5751, + "step": 30161 + }, + { + "epoch": 0.9244207429201913, + "grad_norm": 1.8159040124355794, + "learning_rate": 2.981029529152557e-07, + "loss": 0.5908, + "step": 30162 + }, + { + "epoch": 0.9244513914429324, + "grad_norm": 1.6727008143393474, + "learning_rate": 2.978624387042872e-07, + "loss": 0.6527, + "step": 30163 + }, + { + "epoch": 0.9244820399656737, + "grad_norm": 1.9308307042433857, + "learning_rate": 2.97622020090963e-07, + "loss": 0.6984, + "step": 30164 + }, + { + "epoch": 0.9245126884884148, + "grad_norm": 1.8068217215773803, + "learning_rate": 2.9738169707764994e-07, + "loss": 0.5965, + "step": 30165 + }, + { + "epoch": 0.9245433370111561, + "grad_norm": 1.673547822144462, + "learning_rate": 2.9714146966671854e-07, + "loss": 0.642, + "step": 30166 + }, + { + "epoch": 0.9245739855338972, + "grad_norm": 0.6847341326971405, + "learning_rate": 2.9690133786053566e-07, + "loss": 0.5229, + "step": 30167 + }, + { + "epoch": 0.9246046340566385, + "grad_norm": 1.6387789371619053, + "learning_rate": 2.966613016614661e-07, + "loss": 0.6144, + "step": 30168 + }, + { + "epoch": 0.9246352825793797, + "grad_norm": 1.8992932763637977, + "learning_rate": 2.964213610718747e-07, + "loss": 0.6599, + "step": 30169 + }, + { + "epoch": 0.9246659311021209, + "grad_norm": 2.0333079900612105, + "learning_rate": 2.9618151609412727e-07, + "loss": 0.5974, + "step": 30170 + }, + { + "epoch": 0.9246965796248621, + "grad_norm": 1.8280137614037932, + "learning_rate": 2.959417667305853e-07, + "loss": 0.6577, + "step": 30171 + }, + { + "epoch": 0.9247272281476033, + "grad_norm": 1.5665115223290778, + "learning_rate": 2.957021129836124e-07, + "loss": 0.6291, + "step": 30172 + }, + { + "epoch": 0.9247578766703445, + "grad_norm": 1.5930136935005446, + "learning_rate": 2.9546255485557006e-07, + "loss": 0.6253, + "step": 30173 + }, + { + "epoch": 0.9247885251930857, + "grad_norm": 1.5894447194433872, + "learning_rate": 2.952230923488164e-07, + "loss": 0.6505, + "step": 30174 + }, + { + "epoch": 0.9248191737158269, + "grad_norm": 2.011785811665092, + "learning_rate": 2.949837254657151e-07, + "loss": 0.7419, + "step": 30175 + }, + { + "epoch": 0.9248498222385682, + "grad_norm": 0.6345511824113116, + "learning_rate": 2.9474445420862195e-07, + "loss": 0.5275, + "step": 30176 + }, + { + "epoch": 0.9248804707613093, + "grad_norm": 1.7716694108866355, + "learning_rate": 2.94505278579893e-07, + "loss": 0.5689, + "step": 30177 + }, + { + "epoch": 0.9249111192840505, + "grad_norm": 1.8424729334085068, + "learning_rate": 2.942661985818884e-07, + "loss": 0.7307, + "step": 30178 + }, + { + "epoch": 0.9249417678067917, + "grad_norm": 0.6992710065835995, + "learning_rate": 2.9402721421696204e-07, + "loss": 0.5285, + "step": 30179 + }, + { + "epoch": 0.9249724163295329, + "grad_norm": 1.7228554767403141, + "learning_rate": 2.937883254874685e-07, + "loss": 0.648, + "step": 30180 + }, + { + "epoch": 0.9250030648522741, + "grad_norm": 1.5956297811944065, + "learning_rate": 2.9354953239576156e-07, + "loss": 0.5415, + "step": 30181 + }, + { + "epoch": 0.9250337133750153, + "grad_norm": 1.7647651051040179, + "learning_rate": 2.9331083494419486e-07, + "loss": 0.6893, + "step": 30182 + }, + { + "epoch": 0.9250643618977565, + "grad_norm": 1.8088303538189796, + "learning_rate": 2.93072233135121e-07, + "loss": 0.6166, + "step": 30183 + }, + { + "epoch": 0.9250950104204977, + "grad_norm": 1.6722838608243629, + "learning_rate": 2.928337269708892e-07, + "loss": 0.5453, + "step": 30184 + }, + { + "epoch": 0.925125658943239, + "grad_norm": 1.777271399178551, + "learning_rate": 2.9259531645384974e-07, + "loss": 0.6685, + "step": 30185 + }, + { + "epoch": 0.9251563074659801, + "grad_norm": 1.8467423436640382, + "learning_rate": 2.9235700158635414e-07, + "loss": 0.6909, + "step": 30186 + }, + { + "epoch": 0.9251869559887214, + "grad_norm": 0.6780703040179232, + "learning_rate": 2.921187823707483e-07, + "loss": 0.5267, + "step": 30187 + }, + { + "epoch": 0.9252176045114625, + "grad_norm": 1.8194998762636696, + "learning_rate": 2.918806588093781e-07, + "loss": 0.6993, + "step": 30188 + }, + { + "epoch": 0.9252482530342038, + "grad_norm": 1.536624835079196, + "learning_rate": 2.9164263090459386e-07, + "loss": 0.6464, + "step": 30189 + }, + { + "epoch": 0.9252789015569449, + "grad_norm": 1.8784127493452154, + "learning_rate": 2.9140469865873824e-07, + "loss": 0.6827, + "step": 30190 + }, + { + "epoch": 0.9253095500796862, + "grad_norm": 1.6741424145072787, + "learning_rate": 2.91166862074157e-07, + "loss": 0.5977, + "step": 30191 + }, + { + "epoch": 0.9253401986024273, + "grad_norm": 1.8541425607137396, + "learning_rate": 2.9092912115319063e-07, + "loss": 0.7517, + "step": 30192 + }, + { + "epoch": 0.9253708471251686, + "grad_norm": 1.836360248602983, + "learning_rate": 2.906914758981849e-07, + "loss": 0.6312, + "step": 30193 + }, + { + "epoch": 0.9254014956479097, + "grad_norm": 0.6573881888758272, + "learning_rate": 2.904539263114814e-07, + "loss": 0.5065, + "step": 30194 + }, + { + "epoch": 0.925432144170651, + "grad_norm": 1.8046665819599346, + "learning_rate": 2.902164723954182e-07, + "loss": 0.5702, + "step": 30195 + }, + { + "epoch": 0.9254627926933922, + "grad_norm": 1.7500099987925208, + "learning_rate": 2.8997911415233673e-07, + "loss": 0.6091, + "step": 30196 + }, + { + "epoch": 0.9254934412161334, + "grad_norm": 1.9233280462344848, + "learning_rate": 2.8974185158457623e-07, + "loss": 0.5477, + "step": 30197 + }, + { + "epoch": 0.9255240897388746, + "grad_norm": 1.6971312921545738, + "learning_rate": 2.8950468469447267e-07, + "loss": 0.6997, + "step": 30198 + }, + { + "epoch": 0.9255547382616158, + "grad_norm": 1.8837234687702968, + "learning_rate": 2.8926761348436416e-07, + "loss": 0.6398, + "step": 30199 + }, + { + "epoch": 0.925585386784357, + "grad_norm": 1.5990686010234823, + "learning_rate": 2.890306379565877e-07, + "loss": 0.6578, + "step": 30200 + }, + { + "epoch": 0.9256160353070982, + "grad_norm": 1.881318160776408, + "learning_rate": 2.887937581134748e-07, + "loss": 0.6781, + "step": 30201 + }, + { + "epoch": 0.9256466838298394, + "grad_norm": 1.5846260131585863, + "learning_rate": 2.885569739573635e-07, + "loss": 0.5795, + "step": 30202 + }, + { + "epoch": 0.9256773323525807, + "grad_norm": 0.6576879534343789, + "learning_rate": 2.8832028549058423e-07, + "loss": 0.5203, + "step": 30203 + }, + { + "epoch": 0.9257079808753218, + "grad_norm": 0.6326607530298286, + "learning_rate": 2.8808369271547065e-07, + "loss": 0.4842, + "step": 30204 + }, + { + "epoch": 0.9257386293980631, + "grad_norm": 1.9016888493038662, + "learning_rate": 2.8784719563435315e-07, + "loss": 0.5746, + "step": 30205 + }, + { + "epoch": 0.9257692779208042, + "grad_norm": 1.8284297410074357, + "learning_rate": 2.8761079424956205e-07, + "loss": 0.6415, + "step": 30206 + }, + { + "epoch": 0.9257999264435455, + "grad_norm": 1.8554524754516517, + "learning_rate": 2.8737448856342666e-07, + "loss": 0.668, + "step": 30207 + }, + { + "epoch": 0.9258305749662866, + "grad_norm": 1.9895410703673668, + "learning_rate": 2.8713827857827613e-07, + "loss": 0.5764, + "step": 30208 + }, + { + "epoch": 0.9258612234890278, + "grad_norm": 1.5758796206495538, + "learning_rate": 2.8690216429643646e-07, + "loss": 0.5492, + "step": 30209 + }, + { + "epoch": 0.925891872011769, + "grad_norm": 1.7557878260589423, + "learning_rate": 2.8666614572023577e-07, + "loss": 0.5922, + "step": 30210 + }, + { + "epoch": 0.9259225205345102, + "grad_norm": 1.8013259232108865, + "learning_rate": 2.8643022285199885e-07, + "loss": 0.6789, + "step": 30211 + }, + { + "epoch": 0.9259531690572514, + "grad_norm": 1.7457696199357338, + "learning_rate": 2.861943956940494e-07, + "loss": 0.6166, + "step": 30212 + }, + { + "epoch": 0.9259838175799926, + "grad_norm": 1.5354135646448395, + "learning_rate": 2.859586642487133e-07, + "loss": 0.6407, + "step": 30213 + }, + { + "epoch": 0.9260144661027339, + "grad_norm": 0.6463129584506809, + "learning_rate": 2.85723028518311e-07, + "loss": 0.5218, + "step": 30214 + }, + { + "epoch": 0.926045114625475, + "grad_norm": 1.7949234607855737, + "learning_rate": 2.8548748850516393e-07, + "loss": 0.6758, + "step": 30215 + }, + { + "epoch": 0.9260757631482163, + "grad_norm": 1.5657219746768143, + "learning_rate": 2.852520442115969e-07, + "loss": 0.6722, + "step": 30216 + }, + { + "epoch": 0.9261064116709574, + "grad_norm": 1.8568107130201001, + "learning_rate": 2.8501669563992473e-07, + "loss": 0.624, + "step": 30217 + }, + { + "epoch": 0.9261370601936987, + "grad_norm": 1.805252098326306, + "learning_rate": 2.8478144279246997e-07, + "loss": 0.7169, + "step": 30218 + }, + { + "epoch": 0.9261677087164398, + "grad_norm": 1.634762274789689, + "learning_rate": 2.8454628567154976e-07, + "loss": 0.5309, + "step": 30219 + }, + { + "epoch": 0.9261983572391811, + "grad_norm": 0.6777836121139095, + "learning_rate": 2.8431122427947987e-07, + "loss": 0.5117, + "step": 30220 + }, + { + "epoch": 0.9262290057619222, + "grad_norm": 1.722175152613215, + "learning_rate": 2.8407625861857854e-07, + "loss": 0.6453, + "step": 30221 + }, + { + "epoch": 0.9262596542846635, + "grad_norm": 1.5768463895070244, + "learning_rate": 2.838413886911584e-07, + "loss": 0.6421, + "step": 30222 + }, + { + "epoch": 0.9262903028074047, + "grad_norm": 1.72087169677179, + "learning_rate": 2.836066144995353e-07, + "loss": 0.6147, + "step": 30223 + }, + { + "epoch": 0.9263209513301459, + "grad_norm": 1.7452648618680788, + "learning_rate": 2.8337193604602296e-07, + "loss": 0.5767, + "step": 30224 + }, + { + "epoch": 0.9263515998528871, + "grad_norm": 2.143216974891482, + "learning_rate": 2.831373533329318e-07, + "loss": 0.6425, + "step": 30225 + }, + { + "epoch": 0.9263822483756283, + "grad_norm": 1.7759519266126775, + "learning_rate": 2.8290286636257546e-07, + "loss": 0.6863, + "step": 30226 + }, + { + "epoch": 0.9264128968983695, + "grad_norm": 1.504354244296424, + "learning_rate": 2.826684751372633e-07, + "loss": 0.5829, + "step": 30227 + }, + { + "epoch": 0.9264435454211107, + "grad_norm": 1.7456914333194273, + "learning_rate": 2.8243417965930555e-07, + "loss": 0.6362, + "step": 30228 + }, + { + "epoch": 0.9264741939438519, + "grad_norm": 1.6645313977658711, + "learning_rate": 2.821999799310082e-07, + "loss": 0.6071, + "step": 30229 + }, + { + "epoch": 0.9265048424665931, + "grad_norm": 1.8435968059007934, + "learning_rate": 2.8196587595468284e-07, + "loss": 0.6911, + "step": 30230 + }, + { + "epoch": 0.9265354909893343, + "grad_norm": 1.8507061924984833, + "learning_rate": 2.8173186773263307e-07, + "loss": 0.6947, + "step": 30231 + }, + { + "epoch": 0.9265661395120756, + "grad_norm": 1.667285891315012, + "learning_rate": 2.814979552671671e-07, + "loss": 0.6934, + "step": 30232 + }, + { + "epoch": 0.9265967880348167, + "grad_norm": 1.5097426879600122, + "learning_rate": 2.812641385605874e-07, + "loss": 0.5764, + "step": 30233 + }, + { + "epoch": 0.926627436557558, + "grad_norm": 0.721007907719413, + "learning_rate": 2.810304176151979e-07, + "loss": 0.552, + "step": 30234 + }, + { + "epoch": 0.9266580850802991, + "grad_norm": 0.6517678792666873, + "learning_rate": 2.807967924333044e-07, + "loss": 0.5256, + "step": 30235 + }, + { + "epoch": 0.9266887336030404, + "grad_norm": 1.4471277717619968, + "learning_rate": 2.8056326301720504e-07, + "loss": 0.5897, + "step": 30236 + }, + { + "epoch": 0.9267193821257815, + "grad_norm": 1.7631818374359027, + "learning_rate": 2.803298293692036e-07, + "loss": 0.6464, + "step": 30237 + }, + { + "epoch": 0.9267500306485228, + "grad_norm": 1.7170612967047363, + "learning_rate": 2.8009649149159934e-07, + "loss": 0.6791, + "step": 30238 + }, + { + "epoch": 0.9267806791712639, + "grad_norm": 1.6587943159341025, + "learning_rate": 2.798632493866904e-07, + "loss": 0.5854, + "step": 30239 + }, + { + "epoch": 0.9268113276940051, + "grad_norm": 1.577946634266407, + "learning_rate": 2.7963010305677606e-07, + "loss": 0.6341, + "step": 30240 + }, + { + "epoch": 0.9268419762167464, + "grad_norm": 1.6196373674001632, + "learning_rate": 2.793970525041545e-07, + "loss": 0.6667, + "step": 30241 + }, + { + "epoch": 0.9268726247394875, + "grad_norm": 1.7242896347966563, + "learning_rate": 2.791640977311194e-07, + "loss": 0.6404, + "step": 30242 + }, + { + "epoch": 0.9269032732622288, + "grad_norm": 1.9334747790701483, + "learning_rate": 2.7893123873996895e-07, + "loss": 0.6126, + "step": 30243 + }, + { + "epoch": 0.9269339217849699, + "grad_norm": 1.7101715300790232, + "learning_rate": 2.786984755329947e-07, + "loss": 0.6716, + "step": 30244 + }, + { + "epoch": 0.9269645703077112, + "grad_norm": 1.7259054074736466, + "learning_rate": 2.7846580811249356e-07, + "loss": 0.6555, + "step": 30245 + }, + { + "epoch": 0.9269952188304523, + "grad_norm": 1.7142439155201266, + "learning_rate": 2.7823323648075606e-07, + "loss": 0.6614, + "step": 30246 + }, + { + "epoch": 0.9270258673531936, + "grad_norm": 1.7559733471798706, + "learning_rate": 2.7800076064007255e-07, + "loss": 0.6599, + "step": 30247 + }, + { + "epoch": 0.9270565158759347, + "grad_norm": 1.9476152889984342, + "learning_rate": 2.777683805927356e-07, + "loss": 0.5913, + "step": 30248 + }, + { + "epoch": 0.927087164398676, + "grad_norm": 1.6371044700684123, + "learning_rate": 2.7753609634103453e-07, + "loss": 0.6472, + "step": 30249 + }, + { + "epoch": 0.9271178129214172, + "grad_norm": 1.7676764257307331, + "learning_rate": 2.773039078872575e-07, + "loss": 0.6607, + "step": 30250 + }, + { + "epoch": 0.9271484614441584, + "grad_norm": 1.7931738158540316, + "learning_rate": 2.7707181523369263e-07, + "loss": 0.5918, + "step": 30251 + }, + { + "epoch": 0.9271791099668996, + "grad_norm": 1.726473567154612, + "learning_rate": 2.768398183826271e-07, + "loss": 0.615, + "step": 30252 + }, + { + "epoch": 0.9272097584896408, + "grad_norm": 1.7559925847273603, + "learning_rate": 2.766079173363467e-07, + "loss": 0.7259, + "step": 30253 + }, + { + "epoch": 0.927240407012382, + "grad_norm": 1.8540323329247785, + "learning_rate": 2.7637611209713755e-07, + "loss": 0.6554, + "step": 30254 + }, + { + "epoch": 0.9272710555351232, + "grad_norm": 1.6097656115106307, + "learning_rate": 2.761444026672799e-07, + "loss": 0.565, + "step": 30255 + }, + { + "epoch": 0.9273017040578644, + "grad_norm": 1.6886028291678086, + "learning_rate": 2.7591278904906094e-07, + "loss": 0.5556, + "step": 30256 + }, + { + "epoch": 0.9273323525806056, + "grad_norm": 1.6826879825461976, + "learning_rate": 2.756812712447621e-07, + "loss": 0.6069, + "step": 30257 + }, + { + "epoch": 0.9273630011033468, + "grad_norm": 1.6038931668768095, + "learning_rate": 2.754498492566626e-07, + "loss": 0.5984, + "step": 30258 + }, + { + "epoch": 0.9273936496260881, + "grad_norm": 1.9863413805581953, + "learning_rate": 2.75218523087043e-07, + "loss": 0.6326, + "step": 30259 + }, + { + "epoch": 0.9274242981488292, + "grad_norm": 1.5439468449057472, + "learning_rate": 2.749872927381858e-07, + "loss": 0.6564, + "step": 30260 + }, + { + "epoch": 0.9274549466715705, + "grad_norm": 1.7954191522634753, + "learning_rate": 2.747561582123648e-07, + "loss": 0.6154, + "step": 30261 + }, + { + "epoch": 0.9274855951943116, + "grad_norm": 1.8060445966473113, + "learning_rate": 2.7452511951186036e-07, + "loss": 0.667, + "step": 30262 + }, + { + "epoch": 0.9275162437170529, + "grad_norm": 0.6468493706492098, + "learning_rate": 2.742941766389484e-07, + "loss": 0.5066, + "step": 30263 + }, + { + "epoch": 0.927546892239794, + "grad_norm": 1.7910937248681964, + "learning_rate": 2.74063329595905e-07, + "loss": 0.6301, + "step": 30264 + }, + { + "epoch": 0.9275775407625353, + "grad_norm": 1.721795955697498, + "learning_rate": 2.738325783850049e-07, + "loss": 0.613, + "step": 30265 + }, + { + "epoch": 0.9276081892852764, + "grad_norm": 1.9750862597411345, + "learning_rate": 2.736019230085185e-07, + "loss": 0.644, + "step": 30266 + }, + { + "epoch": 0.9276388378080177, + "grad_norm": 1.61544355432147, + "learning_rate": 2.733713634687218e-07, + "loss": 0.6336, + "step": 30267 + }, + { + "epoch": 0.9276694863307589, + "grad_norm": 1.8700739723759006, + "learning_rate": 2.731408997678875e-07, + "loss": 0.6649, + "step": 30268 + }, + { + "epoch": 0.9277001348535001, + "grad_norm": 1.5930174375944024, + "learning_rate": 2.7291053190828253e-07, + "loss": 0.6385, + "step": 30269 + }, + { + "epoch": 0.9277307833762413, + "grad_norm": 1.8153624074186498, + "learning_rate": 2.726802598921796e-07, + "loss": 0.6749, + "step": 30270 + }, + { + "epoch": 0.9277614318989824, + "grad_norm": 1.8496205436052757, + "learning_rate": 2.724500837218458e-07, + "loss": 0.6291, + "step": 30271 + }, + { + "epoch": 0.9277920804217237, + "grad_norm": 0.6442287523418693, + "learning_rate": 2.722200033995526e-07, + "loss": 0.4908, + "step": 30272 + }, + { + "epoch": 0.9278227289444648, + "grad_norm": 1.8955164652769936, + "learning_rate": 2.719900189275637e-07, + "loss": 0.678, + "step": 30273 + }, + { + "epoch": 0.9278533774672061, + "grad_norm": 1.6289026455899154, + "learning_rate": 2.7176013030814406e-07, + "loss": 0.5743, + "step": 30274 + }, + { + "epoch": 0.9278840259899472, + "grad_norm": 1.957327597264305, + "learning_rate": 2.7153033754356407e-07, + "loss": 0.6382, + "step": 30275 + }, + { + "epoch": 0.9279146745126885, + "grad_norm": 1.9157272194675916, + "learning_rate": 2.713006406360841e-07, + "loss": 0.6151, + "step": 30276 + }, + { + "epoch": 0.9279453230354296, + "grad_norm": 1.7465366173362773, + "learning_rate": 2.7107103958796677e-07, + "loss": 0.66, + "step": 30277 + }, + { + "epoch": 0.9279759715581709, + "grad_norm": 2.0497355602829614, + "learning_rate": 2.7084153440147584e-07, + "loss": 0.6825, + "step": 30278 + }, + { + "epoch": 0.9280066200809121, + "grad_norm": 1.8133757074323802, + "learning_rate": 2.706121250788729e-07, + "loss": 0.6275, + "step": 30279 + }, + { + "epoch": 0.9280372686036533, + "grad_norm": 1.748252241388168, + "learning_rate": 2.703828116224183e-07, + "loss": 0.6719, + "step": 30280 + }, + { + "epoch": 0.9280679171263945, + "grad_norm": 1.565143751080572, + "learning_rate": 2.7015359403437136e-07, + "loss": 0.5794, + "step": 30281 + }, + { + "epoch": 0.9280985656491357, + "grad_norm": 1.7354118212082559, + "learning_rate": 2.6992447231699027e-07, + "loss": 0.4972, + "step": 30282 + }, + { + "epoch": 0.9281292141718769, + "grad_norm": 1.5517824560274234, + "learning_rate": 2.696954464725332e-07, + "loss": 0.5338, + "step": 30283 + }, + { + "epoch": 0.9281598626946181, + "grad_norm": 1.780165333234839, + "learning_rate": 2.6946651650325727e-07, + "loss": 0.6108, + "step": 30284 + }, + { + "epoch": 0.9281905112173593, + "grad_norm": 0.6503222665558132, + "learning_rate": 2.6923768241141513e-07, + "loss": 0.4927, + "step": 30285 + }, + { + "epoch": 0.9282211597401006, + "grad_norm": 1.7701111732117336, + "learning_rate": 2.6900894419926607e-07, + "loss": 0.6986, + "step": 30286 + }, + { + "epoch": 0.9282518082628417, + "grad_norm": 1.8669961296280095, + "learning_rate": 2.6878030186906156e-07, + "loss": 0.6564, + "step": 30287 + }, + { + "epoch": 0.928282456785583, + "grad_norm": 1.8194030548279656, + "learning_rate": 2.685517554230532e-07, + "loss": 0.5363, + "step": 30288 + }, + { + "epoch": 0.9283131053083241, + "grad_norm": 1.623634454874966, + "learning_rate": 2.683233048634948e-07, + "loss": 0.6347, + "step": 30289 + }, + { + "epoch": 0.9283437538310654, + "grad_norm": 1.6514337097416527, + "learning_rate": 2.6809495019263665e-07, + "loss": 0.5917, + "step": 30290 + }, + { + "epoch": 0.9283744023538065, + "grad_norm": 1.7554765064303175, + "learning_rate": 2.6786669141273035e-07, + "loss": 0.5965, + "step": 30291 + }, + { + "epoch": 0.9284050508765478, + "grad_norm": 1.8224228583584978, + "learning_rate": 2.676385285260219e-07, + "loss": 0.6746, + "step": 30292 + }, + { + "epoch": 0.9284356993992889, + "grad_norm": 1.75497263161657, + "learning_rate": 2.6741046153476167e-07, + "loss": 0.6105, + "step": 30293 + }, + { + "epoch": 0.9284663479220302, + "grad_norm": 1.954309265209232, + "learning_rate": 2.671824904411968e-07, + "loss": 0.6786, + "step": 30294 + }, + { + "epoch": 0.9284969964447713, + "grad_norm": 1.6173860244756606, + "learning_rate": 2.669546152475733e-07, + "loss": 0.6388, + "step": 30295 + }, + { + "epoch": 0.9285276449675126, + "grad_norm": 1.8680383397118232, + "learning_rate": 2.667268359561348e-07, + "loss": 0.6166, + "step": 30296 + }, + { + "epoch": 0.9285582934902538, + "grad_norm": 0.6246024719981995, + "learning_rate": 2.664991525691285e-07, + "loss": 0.4991, + "step": 30297 + }, + { + "epoch": 0.928588942012995, + "grad_norm": 1.729610336508283, + "learning_rate": 2.662715650887959e-07, + "loss": 0.6117, + "step": 30298 + }, + { + "epoch": 0.9286195905357362, + "grad_norm": 1.941413586589406, + "learning_rate": 2.660440735173808e-07, + "loss": 0.633, + "step": 30299 + }, + { + "epoch": 0.9286502390584774, + "grad_norm": 1.7849034546595637, + "learning_rate": 2.658166778571236e-07, + "loss": 0.6539, + "step": 30300 + }, + { + "epoch": 0.9286808875812186, + "grad_norm": 1.8988698708879006, + "learning_rate": 2.6558937811026474e-07, + "loss": 0.6656, + "step": 30301 + }, + { + "epoch": 0.9287115361039597, + "grad_norm": 1.8111240593504618, + "learning_rate": 2.653621742790458e-07, + "loss": 0.663, + "step": 30302 + }, + { + "epoch": 0.928742184626701, + "grad_norm": 1.85757180940263, + "learning_rate": 2.651350663657026e-07, + "loss": 0.7183, + "step": 30303 + }, + { + "epoch": 0.9287728331494421, + "grad_norm": 1.636200245080196, + "learning_rate": 2.6490805437247357e-07, + "loss": 0.5788, + "step": 30304 + }, + { + "epoch": 0.9288034816721834, + "grad_norm": 0.6921417000046813, + "learning_rate": 2.64681138301599e-07, + "loss": 0.5366, + "step": 30305 + }, + { + "epoch": 0.9288341301949246, + "grad_norm": 1.71868777755501, + "learning_rate": 2.6445431815530943e-07, + "loss": 0.6005, + "step": 30306 + }, + { + "epoch": 0.9288647787176658, + "grad_norm": 1.569869463728276, + "learning_rate": 2.642275939358452e-07, + "loss": 0.5674, + "step": 30307 + }, + { + "epoch": 0.928895427240407, + "grad_norm": 1.7658917371496394, + "learning_rate": 2.6400096564543454e-07, + "loss": 0.663, + "step": 30308 + }, + { + "epoch": 0.9289260757631482, + "grad_norm": 1.6732092490443866, + "learning_rate": 2.637744332863146e-07, + "loss": 0.6413, + "step": 30309 + }, + { + "epoch": 0.9289567242858894, + "grad_norm": 2.0058937689059464, + "learning_rate": 2.6354799686071797e-07, + "loss": 0.5957, + "step": 30310 + }, + { + "epoch": 0.9289873728086306, + "grad_norm": 1.6724352516617045, + "learning_rate": 2.633216563708718e-07, + "loss": 0.6032, + "step": 30311 + }, + { + "epoch": 0.9290180213313718, + "grad_norm": 0.6584526079950127, + "learning_rate": 2.6309541181900875e-07, + "loss": 0.5113, + "step": 30312 + }, + { + "epoch": 0.929048669854113, + "grad_norm": 1.622271208827324, + "learning_rate": 2.628692632073593e-07, + "loss": 0.5494, + "step": 30313 + }, + { + "epoch": 0.9290793183768542, + "grad_norm": 0.694681390193124, + "learning_rate": 2.6264321053814933e-07, + "loss": 0.5234, + "step": 30314 + }, + { + "epoch": 0.9291099668995955, + "grad_norm": 0.7042178653593887, + "learning_rate": 2.6241725381360715e-07, + "loss": 0.5277, + "step": 30315 + }, + { + "epoch": 0.9291406154223366, + "grad_norm": 1.674191617106509, + "learning_rate": 2.6219139303595985e-07, + "loss": 0.6164, + "step": 30316 + }, + { + "epoch": 0.9291712639450779, + "grad_norm": 1.801954754239002, + "learning_rate": 2.619656282074323e-07, + "loss": 0.5807, + "step": 30317 + }, + { + "epoch": 0.929201912467819, + "grad_norm": 1.6726081865875013, + "learning_rate": 2.6173995933024943e-07, + "loss": 0.7156, + "step": 30318 + }, + { + "epoch": 0.9292325609905603, + "grad_norm": 1.496571325933477, + "learning_rate": 2.615143864066327e-07, + "loss": 0.6127, + "step": 30319 + }, + { + "epoch": 0.9292632095133014, + "grad_norm": 1.75482349904713, + "learning_rate": 2.6128890943880716e-07, + "loss": 0.5558, + "step": 30320 + }, + { + "epoch": 0.9292938580360427, + "grad_norm": 1.8857813644408996, + "learning_rate": 2.610635284289942e-07, + "loss": 0.6189, + "step": 30321 + }, + { + "epoch": 0.9293245065587838, + "grad_norm": 1.524477915393755, + "learning_rate": 2.608382433794143e-07, + "loss": 0.5551, + "step": 30322 + }, + { + "epoch": 0.9293551550815251, + "grad_norm": 1.8280034552053885, + "learning_rate": 2.606130542922858e-07, + "loss": 0.5468, + "step": 30323 + }, + { + "epoch": 0.9293858036042663, + "grad_norm": 2.041845170420877, + "learning_rate": 2.6038796116983014e-07, + "loss": 0.6412, + "step": 30324 + }, + { + "epoch": 0.9294164521270075, + "grad_norm": 1.8477571519788414, + "learning_rate": 2.601629640142633e-07, + "loss": 0.6613, + "step": 30325 + }, + { + "epoch": 0.9294471006497487, + "grad_norm": 1.522365805445448, + "learning_rate": 2.5993806282780254e-07, + "loss": 0.5963, + "step": 30326 + }, + { + "epoch": 0.9294777491724899, + "grad_norm": 1.613946591528, + "learning_rate": 2.5971325761266486e-07, + "loss": 0.601, + "step": 30327 + }, + { + "epoch": 0.9295083976952311, + "grad_norm": 1.6899896986542224, + "learning_rate": 2.594885483710641e-07, + "loss": 0.5906, + "step": 30328 + }, + { + "epoch": 0.9295390462179723, + "grad_norm": 1.6042289694454819, + "learning_rate": 2.592639351052162e-07, + "loss": 0.7031, + "step": 30329 + }, + { + "epoch": 0.9295696947407135, + "grad_norm": 0.6633312780035784, + "learning_rate": 2.5903941781733054e-07, + "loss": 0.4854, + "step": 30330 + }, + { + "epoch": 0.9296003432634548, + "grad_norm": 1.6922411827417445, + "learning_rate": 2.588149965096232e-07, + "loss": 0.5405, + "step": 30331 + }, + { + "epoch": 0.9296309917861959, + "grad_norm": 1.7515263269132646, + "learning_rate": 2.5859067118430446e-07, + "loss": 0.5856, + "step": 30332 + }, + { + "epoch": 0.929661640308937, + "grad_norm": 1.6825479459343098, + "learning_rate": 2.5836644184358384e-07, + "loss": 0.6599, + "step": 30333 + }, + { + "epoch": 0.9296922888316783, + "grad_norm": 1.6089764300825298, + "learning_rate": 2.581423084896706e-07, + "loss": 0.5501, + "step": 30334 + }, + { + "epoch": 0.9297229373544195, + "grad_norm": 1.9050542254145728, + "learning_rate": 2.579182711247752e-07, + "loss": 0.5938, + "step": 30335 + }, + { + "epoch": 0.9297535858771607, + "grad_norm": 1.6450518696723089, + "learning_rate": 2.5769432975110256e-07, + "loss": 0.6429, + "step": 30336 + }, + { + "epoch": 0.9297842343999019, + "grad_norm": 1.6728028638207215, + "learning_rate": 2.5747048437085977e-07, + "loss": 0.657, + "step": 30337 + }, + { + "epoch": 0.9298148829226431, + "grad_norm": 1.5537570096448778, + "learning_rate": 2.5724673498625506e-07, + "loss": 0.6697, + "step": 30338 + }, + { + "epoch": 0.9298455314453843, + "grad_norm": 1.7570761296101878, + "learning_rate": 2.5702308159948896e-07, + "loss": 0.7272, + "step": 30339 + }, + { + "epoch": 0.9298761799681255, + "grad_norm": 0.6610449666843243, + "learning_rate": 2.5679952421276964e-07, + "loss": 0.5028, + "step": 30340 + }, + { + "epoch": 0.9299068284908667, + "grad_norm": 1.564198230255503, + "learning_rate": 2.565760628282954e-07, + "loss": 0.5733, + "step": 30341 + }, + { + "epoch": 0.929937477013608, + "grad_norm": 2.202319290435388, + "learning_rate": 2.563526974482711e-07, + "loss": 0.7043, + "step": 30342 + }, + { + "epoch": 0.9299681255363491, + "grad_norm": 0.6766035164098541, + "learning_rate": 2.5612942807489714e-07, + "loss": 0.5292, + "step": 30343 + }, + { + "epoch": 0.9299987740590904, + "grad_norm": 2.0361557437791187, + "learning_rate": 2.559062547103719e-07, + "loss": 0.6703, + "step": 30344 + }, + { + "epoch": 0.9300294225818315, + "grad_norm": 1.7338831687788996, + "learning_rate": 2.5568317735689575e-07, + "loss": 0.5155, + "step": 30345 + }, + { + "epoch": 0.9300600711045728, + "grad_norm": 1.7043230415365145, + "learning_rate": 2.554601960166669e-07, + "loss": 0.6564, + "step": 30346 + }, + { + "epoch": 0.9300907196273139, + "grad_norm": 1.546596344601947, + "learning_rate": 2.5523731069188154e-07, + "loss": 0.5358, + "step": 30347 + }, + { + "epoch": 0.9301213681500552, + "grad_norm": 1.6381938620674608, + "learning_rate": 2.550145213847355e-07, + "loss": 0.5833, + "step": 30348 + }, + { + "epoch": 0.9301520166727963, + "grad_norm": 0.6905532322878428, + "learning_rate": 2.54791828097426e-07, + "loss": 0.526, + "step": 30349 + }, + { + "epoch": 0.9301826651955376, + "grad_norm": 0.6807337742103222, + "learning_rate": 2.545692308321457e-07, + "loss": 0.5339, + "step": 30350 + }, + { + "epoch": 0.9302133137182788, + "grad_norm": 1.7456282449043528, + "learning_rate": 2.5434672959108843e-07, + "loss": 0.6067, + "step": 30351 + }, + { + "epoch": 0.93024396224102, + "grad_norm": 0.672780066358356, + "learning_rate": 2.5412432437644687e-07, + "loss": 0.508, + "step": 30352 + }, + { + "epoch": 0.9302746107637612, + "grad_norm": 1.8216746997645354, + "learning_rate": 2.539020151904104e-07, + "loss": 0.6262, + "step": 30353 + }, + { + "epoch": 0.9303052592865024, + "grad_norm": 0.6561754525272844, + "learning_rate": 2.5367980203517273e-07, + "loss": 0.4978, + "step": 30354 + }, + { + "epoch": 0.9303359078092436, + "grad_norm": 1.7110029962253168, + "learning_rate": 2.5345768491292e-07, + "loss": 0.6262, + "step": 30355 + }, + { + "epoch": 0.9303665563319848, + "grad_norm": 1.8916606325654184, + "learning_rate": 2.532356638258426e-07, + "loss": 0.7663, + "step": 30356 + }, + { + "epoch": 0.930397204854726, + "grad_norm": 1.8196377997976136, + "learning_rate": 2.5301373877613e-07, + "loss": 0.6424, + "step": 30357 + }, + { + "epoch": 0.9304278533774673, + "grad_norm": 0.7043864138083056, + "learning_rate": 2.527919097659648e-07, + "loss": 0.5325, + "step": 30358 + }, + { + "epoch": 0.9304585019002084, + "grad_norm": 1.6945206778558024, + "learning_rate": 2.5257017679753636e-07, + "loss": 0.6287, + "step": 30359 + }, + { + "epoch": 0.9304891504229497, + "grad_norm": 1.925390574473247, + "learning_rate": 2.5234853987302744e-07, + "loss": 0.6363, + "step": 30360 + }, + { + "epoch": 0.9305197989456908, + "grad_norm": 1.8545410014840151, + "learning_rate": 2.521269989946218e-07, + "loss": 0.613, + "step": 30361 + }, + { + "epoch": 0.9305504474684321, + "grad_norm": 1.7286975679939658, + "learning_rate": 2.519055541645032e-07, + "loss": 0.599, + "step": 30362 + }, + { + "epoch": 0.9305810959911732, + "grad_norm": 2.009746175803532, + "learning_rate": 2.516842053848534e-07, + "loss": 0.5156, + "step": 30363 + }, + { + "epoch": 0.9306117445139144, + "grad_norm": 1.5673677087609454, + "learning_rate": 2.514629526578527e-07, + "loss": 0.5702, + "step": 30364 + }, + { + "epoch": 0.9306423930366556, + "grad_norm": 0.6662925345071683, + "learning_rate": 2.512417959856839e-07, + "loss": 0.5209, + "step": 30365 + }, + { + "epoch": 0.9306730415593968, + "grad_norm": 1.6027078526942606, + "learning_rate": 2.5102073537052186e-07, + "loss": 0.5997, + "step": 30366 + }, + { + "epoch": 0.930703690082138, + "grad_norm": 1.8037676619271412, + "learning_rate": 2.5079977081454707e-07, + "loss": 0.6223, + "step": 30367 + }, + { + "epoch": 0.9307343386048792, + "grad_norm": 1.6607708712971667, + "learning_rate": 2.5057890231993784e-07, + "loss": 0.5908, + "step": 30368 + }, + { + "epoch": 0.9307649871276205, + "grad_norm": 1.975533174550241, + "learning_rate": 2.5035812988886797e-07, + "loss": 0.6022, + "step": 30369 + }, + { + "epoch": 0.9307956356503616, + "grad_norm": 1.6058456624886563, + "learning_rate": 2.501374535235157e-07, + "loss": 0.5368, + "step": 30370 + }, + { + "epoch": 0.9308262841731029, + "grad_norm": 1.568125411712777, + "learning_rate": 2.4991687322605154e-07, + "loss": 0.6639, + "step": 30371 + }, + { + "epoch": 0.930856932695844, + "grad_norm": 1.8592889627035603, + "learning_rate": 2.496963889986526e-07, + "loss": 0.6501, + "step": 30372 + }, + { + "epoch": 0.9308875812185853, + "grad_norm": 0.6600690897304884, + "learning_rate": 2.494760008434893e-07, + "loss": 0.5155, + "step": 30373 + }, + { + "epoch": 0.9309182297413264, + "grad_norm": 1.6348774782946776, + "learning_rate": 2.492557087627334e-07, + "loss": 0.5634, + "step": 30374 + }, + { + "epoch": 0.9309488782640677, + "grad_norm": 1.8341143454857076, + "learning_rate": 2.490355127585564e-07, + "loss": 0.6873, + "step": 30375 + }, + { + "epoch": 0.9309795267868088, + "grad_norm": 1.7076414731644423, + "learning_rate": 2.488154128331277e-07, + "loss": 0.6139, + "step": 30376 + }, + { + "epoch": 0.9310101753095501, + "grad_norm": 0.6633486030285033, + "learning_rate": 2.4859540898861446e-07, + "loss": 0.51, + "step": 30377 + }, + { + "epoch": 0.9310408238322913, + "grad_norm": 1.820150492029403, + "learning_rate": 2.4837550122718603e-07, + "loss": 0.6256, + "step": 30378 + }, + { + "epoch": 0.9310714723550325, + "grad_norm": 1.9592995488956213, + "learning_rate": 2.4815568955100954e-07, + "loss": 0.6299, + "step": 30379 + }, + { + "epoch": 0.9311021208777737, + "grad_norm": 1.8145594544715848, + "learning_rate": 2.4793597396225e-07, + "loss": 0.6017, + "step": 30380 + }, + { + "epoch": 0.9311327694005149, + "grad_norm": 1.6672980631719492, + "learning_rate": 2.477163544630734e-07, + "loss": 0.6649, + "step": 30381 + }, + { + "epoch": 0.9311634179232561, + "grad_norm": 1.7649659852215074, + "learning_rate": 2.474968310556403e-07, + "loss": 0.6239, + "step": 30382 + }, + { + "epoch": 0.9311940664459973, + "grad_norm": 0.654669815665946, + "learning_rate": 2.4727740374211773e-07, + "loss": 0.4815, + "step": 30383 + }, + { + "epoch": 0.9312247149687385, + "grad_norm": 0.6703327087550786, + "learning_rate": 2.470580725246674e-07, + "loss": 0.5274, + "step": 30384 + }, + { + "epoch": 0.9312553634914797, + "grad_norm": 1.8292429698377972, + "learning_rate": 2.468388374054476e-07, + "loss": 0.6881, + "step": 30385 + }, + { + "epoch": 0.9312860120142209, + "grad_norm": 0.6654371538878955, + "learning_rate": 2.466196983866198e-07, + "loss": 0.5154, + "step": 30386 + }, + { + "epoch": 0.9313166605369622, + "grad_norm": 1.592734645565429, + "learning_rate": 2.4640065547034467e-07, + "loss": 0.5489, + "step": 30387 + }, + { + "epoch": 0.9313473090597033, + "grad_norm": 0.6770305035626504, + "learning_rate": 2.4618170865877924e-07, + "loss": 0.4907, + "step": 30388 + }, + { + "epoch": 0.9313779575824446, + "grad_norm": 1.674991648552753, + "learning_rate": 2.459628579540807e-07, + "loss": 0.5537, + "step": 30389 + }, + { + "epoch": 0.9314086061051857, + "grad_norm": 1.716471201960789, + "learning_rate": 2.4574410335840625e-07, + "loss": 0.5639, + "step": 30390 + }, + { + "epoch": 0.931439254627927, + "grad_norm": 0.7301237777971292, + "learning_rate": 2.4552544487391083e-07, + "loss": 0.5534, + "step": 30391 + }, + { + "epoch": 0.9314699031506681, + "grad_norm": 1.8117859098029359, + "learning_rate": 2.4530688250274935e-07, + "loss": 0.6471, + "step": 30392 + }, + { + "epoch": 0.9315005516734094, + "grad_norm": 1.5154262443915414, + "learning_rate": 2.450884162470735e-07, + "loss": 0.5868, + "step": 30393 + }, + { + "epoch": 0.9315312001961505, + "grad_norm": 0.6954683946607884, + "learning_rate": 2.448700461090392e-07, + "loss": 0.5235, + "step": 30394 + }, + { + "epoch": 0.9315618487188917, + "grad_norm": 1.7461950704620415, + "learning_rate": 2.4465177209079593e-07, + "loss": 0.6175, + "step": 30395 + }, + { + "epoch": 0.931592497241633, + "grad_norm": 1.6391097191830295, + "learning_rate": 2.444335941944942e-07, + "loss": 0.6785, + "step": 30396 + }, + { + "epoch": 0.9316231457643741, + "grad_norm": 0.6643948559160626, + "learning_rate": 2.442155124222845e-07, + "loss": 0.4853, + "step": 30397 + }, + { + "epoch": 0.9316537942871154, + "grad_norm": 1.8287470721681691, + "learning_rate": 2.4399752677631505e-07, + "loss": 0.5534, + "step": 30398 + }, + { + "epoch": 0.9316844428098565, + "grad_norm": 1.8340683419567685, + "learning_rate": 2.437796372587353e-07, + "loss": 0.6032, + "step": 30399 + }, + { + "epoch": 0.9317150913325978, + "grad_norm": 1.5897350989206092, + "learning_rate": 2.4356184387168913e-07, + "loss": 0.6624, + "step": 30400 + }, + { + "epoch": 0.9317457398553389, + "grad_norm": 1.6689087716681905, + "learning_rate": 2.433441466173259e-07, + "loss": 0.6835, + "step": 30401 + }, + { + "epoch": 0.9317763883780802, + "grad_norm": 0.6508105271006082, + "learning_rate": 2.4312654549778935e-07, + "loss": 0.485, + "step": 30402 + }, + { + "epoch": 0.9318070369008213, + "grad_norm": 1.7865194233305381, + "learning_rate": 2.4290904051522347e-07, + "loss": 0.6566, + "step": 30403 + }, + { + "epoch": 0.9318376854235626, + "grad_norm": 1.812117213720948, + "learning_rate": 2.426916316717698e-07, + "loss": 0.5885, + "step": 30404 + }, + { + "epoch": 0.9318683339463038, + "grad_norm": 1.8199980074478883, + "learning_rate": 2.4247431896957216e-07, + "loss": 0.5868, + "step": 30405 + }, + { + "epoch": 0.931898982469045, + "grad_norm": 1.5776441833175698, + "learning_rate": 2.4225710241077225e-07, + "loss": 0.5628, + "step": 30406 + }, + { + "epoch": 0.9319296309917862, + "grad_norm": 1.8596690250374057, + "learning_rate": 2.4203998199751057e-07, + "loss": 0.6074, + "step": 30407 + }, + { + "epoch": 0.9319602795145274, + "grad_norm": 1.7907189244930017, + "learning_rate": 2.418229577319242e-07, + "loss": 0.6448, + "step": 30408 + }, + { + "epoch": 0.9319909280372686, + "grad_norm": 1.8401393083085198, + "learning_rate": 2.4160602961615373e-07, + "loss": 0.6676, + "step": 30409 + }, + { + "epoch": 0.9320215765600098, + "grad_norm": 0.690262717527468, + "learning_rate": 2.4138919765233635e-07, + "loss": 0.4968, + "step": 30410 + }, + { + "epoch": 0.932052225082751, + "grad_norm": 1.8525128385026803, + "learning_rate": 2.411724618426081e-07, + "loss": 0.6633, + "step": 30411 + }, + { + "epoch": 0.9320828736054922, + "grad_norm": 1.8031856687940275, + "learning_rate": 2.4095582218910174e-07, + "loss": 0.6114, + "step": 30412 + }, + { + "epoch": 0.9321135221282334, + "grad_norm": 1.7405295238101488, + "learning_rate": 2.4073927869395773e-07, + "loss": 0.5948, + "step": 30413 + }, + { + "epoch": 0.9321441706509747, + "grad_norm": 1.7915768287394715, + "learning_rate": 2.4052283135930665e-07, + "loss": 0.6522, + "step": 30414 + }, + { + "epoch": 0.9321748191737158, + "grad_norm": 1.737246333023106, + "learning_rate": 2.40306480187279e-07, + "loss": 0.6897, + "step": 30415 + }, + { + "epoch": 0.9322054676964571, + "grad_norm": 1.795499408988413, + "learning_rate": 2.400902251800097e-07, + "loss": 0.6272, + "step": 30416 + }, + { + "epoch": 0.9322361162191982, + "grad_norm": 1.8994064177645997, + "learning_rate": 2.3987406633962815e-07, + "loss": 0.7304, + "step": 30417 + }, + { + "epoch": 0.9322667647419395, + "grad_norm": 1.6828719615253633, + "learning_rate": 2.396580036682661e-07, + "loss": 0.6144, + "step": 30418 + }, + { + "epoch": 0.9322974132646806, + "grad_norm": 1.5430193700605561, + "learning_rate": 2.394420371680495e-07, + "loss": 0.5967, + "step": 30419 + }, + { + "epoch": 0.9323280617874219, + "grad_norm": 1.7943733349588287, + "learning_rate": 2.3922616684110887e-07, + "loss": 0.7528, + "step": 30420 + }, + { + "epoch": 0.932358710310163, + "grad_norm": 1.66331503077077, + "learning_rate": 2.390103926895704e-07, + "loss": 0.6811, + "step": 30421 + }, + { + "epoch": 0.9323893588329043, + "grad_norm": 1.5486460451583601, + "learning_rate": 2.3879471471556e-07, + "loss": 0.5864, + "step": 30422 + }, + { + "epoch": 0.9324200073556455, + "grad_norm": 1.7709105058565624, + "learning_rate": 2.385791329212006e-07, + "loss": 0.5655, + "step": 30423 + }, + { + "epoch": 0.9324506558783867, + "grad_norm": 1.6244922258099874, + "learning_rate": 2.383636473086215e-07, + "loss": 0.6261, + "step": 30424 + }, + { + "epoch": 0.9324813044011279, + "grad_norm": 0.7258387755324308, + "learning_rate": 2.38148257879941e-07, + "loss": 0.5312, + "step": 30425 + }, + { + "epoch": 0.932511952923869, + "grad_norm": 1.6641347150156425, + "learning_rate": 2.379329646372841e-07, + "loss": 0.6226, + "step": 30426 + }, + { + "epoch": 0.9325426014466103, + "grad_norm": 0.6786254959119464, + "learning_rate": 2.377177675827713e-07, + "loss": 0.5192, + "step": 30427 + }, + { + "epoch": 0.9325732499693514, + "grad_norm": 1.7778603679391736, + "learning_rate": 2.3750266671852319e-07, + "loss": 0.6369, + "step": 30428 + }, + { + "epoch": 0.9326038984920927, + "grad_norm": 1.9286270495159274, + "learning_rate": 2.372876620466602e-07, + "loss": 0.5245, + "step": 30429 + }, + { + "epoch": 0.9326345470148338, + "grad_norm": 1.9820840149402226, + "learning_rate": 2.370727535692985e-07, + "loss": 0.6106, + "step": 30430 + }, + { + "epoch": 0.9326651955375751, + "grad_norm": 0.6849481163571226, + "learning_rate": 2.3685794128855632e-07, + "loss": 0.4895, + "step": 30431 + }, + { + "epoch": 0.9326958440603162, + "grad_norm": 1.8557273674385728, + "learning_rate": 2.3664322520655203e-07, + "loss": 0.6016, + "step": 30432 + }, + { + "epoch": 0.9327264925830575, + "grad_norm": 1.6546201487954706, + "learning_rate": 2.3642860532539946e-07, + "loss": 0.693, + "step": 30433 + }, + { + "epoch": 0.9327571411057987, + "grad_norm": 0.6753558984273446, + "learning_rate": 2.362140816472147e-07, + "loss": 0.5054, + "step": 30434 + }, + { + "epoch": 0.9327877896285399, + "grad_norm": 1.7537793643043287, + "learning_rate": 2.3599965417411052e-07, + "loss": 0.5851, + "step": 30435 + }, + { + "epoch": 0.9328184381512811, + "grad_norm": 1.5074425323534943, + "learning_rate": 2.3578532290819968e-07, + "loss": 0.5827, + "step": 30436 + }, + { + "epoch": 0.9328490866740223, + "grad_norm": 1.7500159357367113, + "learning_rate": 2.355710878515949e-07, + "loss": 0.5743, + "step": 30437 + }, + { + "epoch": 0.9328797351967635, + "grad_norm": 1.6301688830230883, + "learning_rate": 2.353569490064056e-07, + "loss": 0.679, + "step": 30438 + }, + { + "epoch": 0.9329103837195047, + "grad_norm": 1.6552178961666455, + "learning_rate": 2.3514290637474345e-07, + "loss": 0.5914, + "step": 30439 + }, + { + "epoch": 0.9329410322422459, + "grad_norm": 1.616919696447508, + "learning_rate": 2.349289599587168e-07, + "loss": 0.6411, + "step": 30440 + }, + { + "epoch": 0.9329716807649872, + "grad_norm": 0.6620033345203564, + "learning_rate": 2.3471510976043277e-07, + "loss": 0.5142, + "step": 30441 + }, + { + "epoch": 0.9330023292877283, + "grad_norm": 1.8605310363570995, + "learning_rate": 2.3450135578199972e-07, + "loss": 0.6741, + "step": 30442 + }, + { + "epoch": 0.9330329778104696, + "grad_norm": 1.8345483039003443, + "learning_rate": 2.3428769802552375e-07, + "loss": 0.6388, + "step": 30443 + }, + { + "epoch": 0.9330636263332107, + "grad_norm": 1.574623429801772, + "learning_rate": 2.3407413649310984e-07, + "loss": 0.5414, + "step": 30444 + }, + { + "epoch": 0.933094274855952, + "grad_norm": 1.7752778568981087, + "learning_rate": 2.3386067118686074e-07, + "loss": 0.6961, + "step": 30445 + }, + { + "epoch": 0.9331249233786931, + "grad_norm": 1.5863725687044787, + "learning_rate": 2.3364730210888363e-07, + "loss": 0.5799, + "step": 30446 + }, + { + "epoch": 0.9331555719014344, + "grad_norm": 1.8118556990803671, + "learning_rate": 2.334340292612769e-07, + "loss": 0.6606, + "step": 30447 + }, + { + "epoch": 0.9331862204241755, + "grad_norm": 1.9583445528323462, + "learning_rate": 2.3322085264614435e-07, + "loss": 0.6606, + "step": 30448 + }, + { + "epoch": 0.9332168689469168, + "grad_norm": 0.6588692950266398, + "learning_rate": 2.3300777226558436e-07, + "loss": 0.5011, + "step": 30449 + }, + { + "epoch": 0.933247517469658, + "grad_norm": 1.6582624407069466, + "learning_rate": 2.3279478812169853e-07, + "loss": 0.6652, + "step": 30450 + }, + { + "epoch": 0.9332781659923992, + "grad_norm": 1.6843115526271564, + "learning_rate": 2.3258190021658523e-07, + "loss": 0.5947, + "step": 30451 + }, + { + "epoch": 0.9333088145151404, + "grad_norm": 1.5068176228324925, + "learning_rate": 2.3236910855234053e-07, + "loss": 0.5497, + "step": 30452 + }, + { + "epoch": 0.9333394630378816, + "grad_norm": 1.6446775857736464, + "learning_rate": 2.3215641313106275e-07, + "loss": 0.5803, + "step": 30453 + }, + { + "epoch": 0.9333701115606228, + "grad_norm": 1.743263516743064, + "learning_rate": 2.3194381395484689e-07, + "loss": 0.588, + "step": 30454 + }, + { + "epoch": 0.933400760083364, + "grad_norm": 1.8762180802680317, + "learning_rate": 2.3173131102578793e-07, + "loss": 0.6743, + "step": 30455 + }, + { + "epoch": 0.9334314086061052, + "grad_norm": 0.6810229954943838, + "learning_rate": 2.315189043459809e-07, + "loss": 0.5116, + "step": 30456 + }, + { + "epoch": 0.9334620571288463, + "grad_norm": 1.9587044497864174, + "learning_rate": 2.313065939175152e-07, + "loss": 0.6803, + "step": 30457 + }, + { + "epoch": 0.9334927056515876, + "grad_norm": 1.7495356881867077, + "learning_rate": 2.3109437974248583e-07, + "loss": 0.6855, + "step": 30458 + }, + { + "epoch": 0.9335233541743287, + "grad_norm": 0.6712635545745907, + "learning_rate": 2.3088226182298445e-07, + "loss": 0.5106, + "step": 30459 + }, + { + "epoch": 0.93355400269707, + "grad_norm": 1.6196891724532116, + "learning_rate": 2.306702401610983e-07, + "loss": 0.6668, + "step": 30460 + }, + { + "epoch": 0.9335846512198112, + "grad_norm": 0.6558124412741764, + "learning_rate": 2.304583147589179e-07, + "loss": 0.5228, + "step": 30461 + }, + { + "epoch": 0.9336152997425524, + "grad_norm": 1.6676016437894619, + "learning_rate": 2.302464856185327e-07, + "loss": 0.6039, + "step": 30462 + }, + { + "epoch": 0.9336459482652936, + "grad_norm": 1.6421718400164809, + "learning_rate": 2.3003475274202657e-07, + "loss": 0.6601, + "step": 30463 + }, + { + "epoch": 0.9336765967880348, + "grad_norm": 1.6732080930665658, + "learning_rate": 2.29823116131489e-07, + "loss": 0.646, + "step": 30464 + }, + { + "epoch": 0.933707245310776, + "grad_norm": 0.7163983244069051, + "learning_rate": 2.2961157578900383e-07, + "loss": 0.5186, + "step": 30465 + }, + { + "epoch": 0.9337378938335172, + "grad_norm": 1.7089139432768168, + "learning_rate": 2.29400131716655e-07, + "loss": 0.6464, + "step": 30466 + }, + { + "epoch": 0.9337685423562584, + "grad_norm": 1.8125270633993131, + "learning_rate": 2.2918878391652854e-07, + "loss": 0.6867, + "step": 30467 + }, + { + "epoch": 0.9337991908789997, + "grad_norm": 1.5193019775080634, + "learning_rate": 2.2897753239070286e-07, + "loss": 0.6177, + "step": 30468 + }, + { + "epoch": 0.9338298394017408, + "grad_norm": 1.7434992652406622, + "learning_rate": 2.2876637714126182e-07, + "loss": 0.6472, + "step": 30469 + }, + { + "epoch": 0.9338604879244821, + "grad_norm": 1.728890898012457, + "learning_rate": 2.285553181702871e-07, + "loss": 0.6592, + "step": 30470 + }, + { + "epoch": 0.9338911364472232, + "grad_norm": 1.5836709161742808, + "learning_rate": 2.283443554798559e-07, + "loss": 0.6803, + "step": 30471 + }, + { + "epoch": 0.9339217849699645, + "grad_norm": 1.8969805124778234, + "learning_rate": 2.281334890720477e-07, + "loss": 0.7495, + "step": 30472 + }, + { + "epoch": 0.9339524334927056, + "grad_norm": 0.6854734819468918, + "learning_rate": 2.2792271894894192e-07, + "loss": 0.526, + "step": 30473 + }, + { + "epoch": 0.9339830820154469, + "grad_norm": 1.5867773600267971, + "learning_rate": 2.2771204511261247e-07, + "loss": 0.614, + "step": 30474 + }, + { + "epoch": 0.934013730538188, + "grad_norm": 1.577611422818222, + "learning_rate": 2.275014675651366e-07, + "loss": 0.6286, + "step": 30475 + }, + { + "epoch": 0.9340443790609293, + "grad_norm": 1.7836923313751978, + "learning_rate": 2.2729098630859038e-07, + "loss": 0.6511, + "step": 30476 + }, + { + "epoch": 0.9340750275836704, + "grad_norm": 1.5795692605763996, + "learning_rate": 2.270806013450455e-07, + "loss": 0.6751, + "step": 30477 + }, + { + "epoch": 0.9341056761064117, + "grad_norm": 1.8965188313821382, + "learning_rate": 2.26870312676577e-07, + "loss": 0.586, + "step": 30478 + }, + { + "epoch": 0.9341363246291529, + "grad_norm": 1.7826683787136877, + "learning_rate": 2.2666012030525318e-07, + "loss": 0.632, + "step": 30479 + }, + { + "epoch": 0.9341669731518941, + "grad_norm": 1.5664094276748226, + "learning_rate": 2.2645002423315132e-07, + "loss": 0.5664, + "step": 30480 + }, + { + "epoch": 0.9341976216746353, + "grad_norm": 1.558255734271246, + "learning_rate": 2.262400244623364e-07, + "loss": 0.5249, + "step": 30481 + }, + { + "epoch": 0.9342282701973765, + "grad_norm": 1.7066046765826413, + "learning_rate": 2.2603012099487898e-07, + "loss": 0.694, + "step": 30482 + }, + { + "epoch": 0.9342589187201177, + "grad_norm": 0.6941431486823593, + "learning_rate": 2.258203138328474e-07, + "loss": 0.5245, + "step": 30483 + }, + { + "epoch": 0.9342895672428589, + "grad_norm": 1.5816581177904214, + "learning_rate": 2.2561060297831006e-07, + "loss": 0.6421, + "step": 30484 + }, + { + "epoch": 0.9343202157656001, + "grad_norm": 1.7676171383213493, + "learning_rate": 2.2540098843333192e-07, + "loss": 0.6464, + "step": 30485 + }, + { + "epoch": 0.9343508642883414, + "grad_norm": 1.6603450046774526, + "learning_rate": 2.25191470199978e-07, + "loss": 0.6415, + "step": 30486 + }, + { + "epoch": 0.9343815128110825, + "grad_norm": 2.083324773146432, + "learning_rate": 2.2498204828031445e-07, + "loss": 0.732, + "step": 30487 + }, + { + "epoch": 0.9344121613338237, + "grad_norm": 0.6571865667585057, + "learning_rate": 2.2477272267640403e-07, + "loss": 0.5012, + "step": 30488 + }, + { + "epoch": 0.9344428098565649, + "grad_norm": 1.8676703515325273, + "learning_rate": 2.245634933903096e-07, + "loss": 0.5709, + "step": 30489 + }, + { + "epoch": 0.9344734583793061, + "grad_norm": 1.7548593662037764, + "learning_rate": 2.2435436042408942e-07, + "loss": 0.7321, + "step": 30490 + }, + { + "epoch": 0.9345041069020473, + "grad_norm": 0.6576109317880979, + "learning_rate": 2.241453237798097e-07, + "loss": 0.5112, + "step": 30491 + }, + { + "epoch": 0.9345347554247885, + "grad_norm": 1.4835584803933473, + "learning_rate": 2.239363834595265e-07, + "loss": 0.6261, + "step": 30492 + }, + { + "epoch": 0.9345654039475297, + "grad_norm": 1.6055650986111636, + "learning_rate": 2.2372753946529934e-07, + "loss": 0.6482, + "step": 30493 + }, + { + "epoch": 0.9345960524702709, + "grad_norm": 1.7499672479034838, + "learning_rate": 2.2351879179918656e-07, + "loss": 0.6125, + "step": 30494 + }, + { + "epoch": 0.9346267009930121, + "grad_norm": 1.7481564916106027, + "learning_rate": 2.233101404632443e-07, + "loss": 0.6463, + "step": 30495 + }, + { + "epoch": 0.9346573495157533, + "grad_norm": 1.7300365538417293, + "learning_rate": 2.2310158545952865e-07, + "loss": 0.6156, + "step": 30496 + }, + { + "epoch": 0.9346879980384946, + "grad_norm": 0.679935940313596, + "learning_rate": 2.2289312679009356e-07, + "loss": 0.5374, + "step": 30497 + }, + { + "epoch": 0.9347186465612357, + "grad_norm": 1.6082774857280024, + "learning_rate": 2.2268476445699516e-07, + "loss": 0.5601, + "step": 30498 + }, + { + "epoch": 0.934749295083977, + "grad_norm": 1.695770323859518, + "learning_rate": 2.2247649846228514e-07, + "loss": 0.5501, + "step": 30499 + }, + { + "epoch": 0.9347799436067181, + "grad_norm": 1.876689803110316, + "learning_rate": 2.222683288080163e-07, + "loss": 0.6461, + "step": 30500 + }, + { + "epoch": 0.9348105921294594, + "grad_norm": 1.8311598208389281, + "learning_rate": 2.2206025549623922e-07, + "loss": 0.645, + "step": 30501 + }, + { + "epoch": 0.9348412406522005, + "grad_norm": 1.6525989836601929, + "learning_rate": 2.2185227852900339e-07, + "loss": 0.5692, + "step": 30502 + }, + { + "epoch": 0.9348718891749418, + "grad_norm": 1.9231465997656325, + "learning_rate": 2.2164439790836044e-07, + "loss": 0.6932, + "step": 30503 + }, + { + "epoch": 0.934902537697683, + "grad_norm": 1.7532448616528258, + "learning_rate": 2.214366136363555e-07, + "loss": 0.5827, + "step": 30504 + }, + { + "epoch": 0.9349331862204242, + "grad_norm": 0.6851617151446876, + "learning_rate": 2.2122892571503794e-07, + "loss": 0.5222, + "step": 30505 + }, + { + "epoch": 0.9349638347431654, + "grad_norm": 0.6961482084662085, + "learning_rate": 2.2102133414645398e-07, + "loss": 0.5257, + "step": 30506 + }, + { + "epoch": 0.9349944832659066, + "grad_norm": 1.7768078034917822, + "learning_rate": 2.2081383893264974e-07, + "loss": 0.6527, + "step": 30507 + }, + { + "epoch": 0.9350251317886478, + "grad_norm": 1.8355091011377846, + "learning_rate": 2.2060644007566912e-07, + "loss": 0.635, + "step": 30508 + }, + { + "epoch": 0.935055780311389, + "grad_norm": 0.6798478045956362, + "learning_rate": 2.203991375775527e-07, + "loss": 0.5162, + "step": 30509 + }, + { + "epoch": 0.9350864288341302, + "grad_norm": 0.6981352228761311, + "learning_rate": 2.201919314403489e-07, + "loss": 0.5311, + "step": 30510 + }, + { + "epoch": 0.9351170773568714, + "grad_norm": 1.5607067981975609, + "learning_rate": 2.199848216660949e-07, + "loss": 0.6172, + "step": 30511 + }, + { + "epoch": 0.9351477258796126, + "grad_norm": 1.7260812288121636, + "learning_rate": 2.1977780825683248e-07, + "loss": 0.6399, + "step": 30512 + }, + { + "epoch": 0.9351783744023539, + "grad_norm": 1.6858416265867369, + "learning_rate": 2.1957089121460218e-07, + "loss": 0.5814, + "step": 30513 + }, + { + "epoch": 0.935209022925095, + "grad_norm": 1.8308966381876701, + "learning_rate": 2.1936407054144238e-07, + "loss": 0.7093, + "step": 30514 + }, + { + "epoch": 0.9352396714478363, + "grad_norm": 1.6053794418345573, + "learning_rate": 2.1915734623939032e-07, + "loss": 0.6696, + "step": 30515 + }, + { + "epoch": 0.9352703199705774, + "grad_norm": 1.7557750094015296, + "learning_rate": 2.189507183104833e-07, + "loss": 0.5973, + "step": 30516 + }, + { + "epoch": 0.9353009684933187, + "grad_norm": 1.8543890530089422, + "learning_rate": 2.1874418675675745e-07, + "loss": 0.6855, + "step": 30517 + }, + { + "epoch": 0.9353316170160598, + "grad_norm": 1.743743903942582, + "learning_rate": 2.1853775158024893e-07, + "loss": 0.6396, + "step": 30518 + }, + { + "epoch": 0.935362265538801, + "grad_norm": 1.6512492720103515, + "learning_rate": 2.1833141278299052e-07, + "loss": 0.6888, + "step": 30519 + }, + { + "epoch": 0.9353929140615422, + "grad_norm": 0.6791851854675527, + "learning_rate": 2.1812517036701396e-07, + "loss": 0.5228, + "step": 30520 + }, + { + "epoch": 0.9354235625842834, + "grad_norm": 1.6139481566132516, + "learning_rate": 2.179190243343543e-07, + "loss": 0.571, + "step": 30521 + }, + { + "epoch": 0.9354542111070246, + "grad_norm": 1.775214304289078, + "learning_rate": 2.177129746870421e-07, + "loss": 0.626, + "step": 30522 + }, + { + "epoch": 0.9354848596297658, + "grad_norm": 1.7200623830666073, + "learning_rate": 2.1750702142710468e-07, + "loss": 0.6662, + "step": 30523 + }, + { + "epoch": 0.9355155081525071, + "grad_norm": 1.7517190446074293, + "learning_rate": 2.173011645565748e-07, + "loss": 0.5969, + "step": 30524 + }, + { + "epoch": 0.9355461566752482, + "grad_norm": 1.7326283962401923, + "learning_rate": 2.1709540407747864e-07, + "loss": 0.6555, + "step": 30525 + }, + { + "epoch": 0.9355768051979895, + "grad_norm": 1.9204183635876793, + "learning_rate": 2.168897399918457e-07, + "loss": 0.708, + "step": 30526 + }, + { + "epoch": 0.9356074537207306, + "grad_norm": 1.7245580341327953, + "learning_rate": 2.1668417230169993e-07, + "loss": 0.6457, + "step": 30527 + }, + { + "epoch": 0.9356381022434719, + "grad_norm": 1.6511358906194362, + "learning_rate": 2.1647870100906854e-07, + "loss": 0.5906, + "step": 30528 + }, + { + "epoch": 0.935668750766213, + "grad_norm": 1.714953982800671, + "learning_rate": 2.162733261159766e-07, + "loss": 0.7115, + "step": 30529 + }, + { + "epoch": 0.9356993992889543, + "grad_norm": 2.005438710812585, + "learning_rate": 2.16068047624447e-07, + "loss": 0.7764, + "step": 30530 + }, + { + "epoch": 0.9357300478116954, + "grad_norm": 1.8849308095190251, + "learning_rate": 2.1586286553650137e-07, + "loss": 0.6386, + "step": 30531 + }, + { + "epoch": 0.9357606963344367, + "grad_norm": 1.5107756700690316, + "learning_rate": 2.1565777985416259e-07, + "loss": 0.6734, + "step": 30532 + }, + { + "epoch": 0.9357913448571779, + "grad_norm": 0.6409367773718708, + "learning_rate": 2.1545279057945124e-07, + "loss": 0.5175, + "step": 30533 + }, + { + "epoch": 0.9358219933799191, + "grad_norm": 0.6677841555780134, + "learning_rate": 2.152478977143868e-07, + "loss": 0.503, + "step": 30534 + }, + { + "epoch": 0.9358526419026603, + "grad_norm": 1.959900024266421, + "learning_rate": 2.1504310126098882e-07, + "loss": 0.6473, + "step": 30535 + }, + { + "epoch": 0.9358832904254015, + "grad_norm": 1.5145341265920893, + "learning_rate": 2.1483840122127341e-07, + "loss": 0.5857, + "step": 30536 + }, + { + "epoch": 0.9359139389481427, + "grad_norm": 2.0810066386012553, + "learning_rate": 2.1463379759726121e-07, + "loss": 0.5857, + "step": 30537 + }, + { + "epoch": 0.9359445874708839, + "grad_norm": 0.6656974043779451, + "learning_rate": 2.1442929039096395e-07, + "loss": 0.5055, + "step": 30538 + }, + { + "epoch": 0.9359752359936251, + "grad_norm": 1.5018928619860903, + "learning_rate": 2.1422487960439886e-07, + "loss": 0.6112, + "step": 30539 + }, + { + "epoch": 0.9360058845163663, + "grad_norm": 1.7532774653040946, + "learning_rate": 2.1402056523958104e-07, + "loss": 0.6892, + "step": 30540 + }, + { + "epoch": 0.9360365330391075, + "grad_norm": 2.0202981433672584, + "learning_rate": 2.1381634729852218e-07, + "loss": 0.6832, + "step": 30541 + }, + { + "epoch": 0.9360671815618488, + "grad_norm": 1.7798208295294806, + "learning_rate": 2.1361222578323293e-07, + "loss": 0.5994, + "step": 30542 + }, + { + "epoch": 0.9360978300845899, + "grad_norm": 1.7330409319969686, + "learning_rate": 2.134082006957283e-07, + "loss": 0.6524, + "step": 30543 + }, + { + "epoch": 0.9361284786073312, + "grad_norm": 1.9627349075486846, + "learning_rate": 2.1320427203801565e-07, + "loss": 0.6764, + "step": 30544 + }, + { + "epoch": 0.9361591271300723, + "grad_norm": 1.6728429879535942, + "learning_rate": 2.130004398121066e-07, + "loss": 0.6597, + "step": 30545 + }, + { + "epoch": 0.9361897756528136, + "grad_norm": 1.6233541154107072, + "learning_rate": 2.127967040200063e-07, + "loss": 0.6356, + "step": 30546 + }, + { + "epoch": 0.9362204241755547, + "grad_norm": 1.6948251153389629, + "learning_rate": 2.125930646637253e-07, + "loss": 0.6182, + "step": 30547 + }, + { + "epoch": 0.936251072698296, + "grad_norm": 1.746773597876938, + "learning_rate": 2.1238952174526982e-07, + "loss": 0.6119, + "step": 30548 + }, + { + "epoch": 0.9362817212210371, + "grad_norm": 1.6402232616389278, + "learning_rate": 2.121860752666438e-07, + "loss": 0.6112, + "step": 30549 + }, + { + "epoch": 0.9363123697437783, + "grad_norm": 1.7017422408039873, + "learning_rate": 2.119827252298523e-07, + "loss": 0.6805, + "step": 30550 + }, + { + "epoch": 0.9363430182665196, + "grad_norm": 1.9034732565323604, + "learning_rate": 2.1177947163690037e-07, + "loss": 0.5908, + "step": 30551 + }, + { + "epoch": 0.9363736667892607, + "grad_norm": 0.6706462711890453, + "learning_rate": 2.1157631448978978e-07, + "loss": 0.5048, + "step": 30552 + }, + { + "epoch": 0.936404315312002, + "grad_norm": 1.6448297141900665, + "learning_rate": 2.113732537905222e-07, + "loss": 0.6801, + "step": 30553 + }, + { + "epoch": 0.9364349638347431, + "grad_norm": 1.9667337611983329, + "learning_rate": 2.111702895410972e-07, + "loss": 0.6207, + "step": 30554 + }, + { + "epoch": 0.9364656123574844, + "grad_norm": 1.7155831350330588, + "learning_rate": 2.1096742174351647e-07, + "loss": 0.6699, + "step": 30555 + }, + { + "epoch": 0.9364962608802255, + "grad_norm": 1.5033572305975587, + "learning_rate": 2.1076465039977956e-07, + "loss": 0.5668, + "step": 30556 + }, + { + "epoch": 0.9365269094029668, + "grad_norm": 1.7296638473801274, + "learning_rate": 2.1056197551188262e-07, + "loss": 0.619, + "step": 30557 + }, + { + "epoch": 0.9365575579257079, + "grad_norm": 1.6990741424217664, + "learning_rate": 2.1035939708182184e-07, + "loss": 0.6515, + "step": 30558 + }, + { + "epoch": 0.9365882064484492, + "grad_norm": 1.8443522561614671, + "learning_rate": 2.1015691511159675e-07, + "loss": 0.7012, + "step": 30559 + }, + { + "epoch": 0.9366188549711904, + "grad_norm": 1.712145279072477, + "learning_rate": 2.0995452960319907e-07, + "loss": 0.583, + "step": 30560 + }, + { + "epoch": 0.9366495034939316, + "grad_norm": 0.6713427554685186, + "learning_rate": 2.0975224055862499e-07, + "loss": 0.4957, + "step": 30561 + }, + { + "epoch": 0.9366801520166728, + "grad_norm": 1.9123214052104893, + "learning_rate": 2.0955004797986733e-07, + "loss": 0.6589, + "step": 30562 + }, + { + "epoch": 0.936710800539414, + "grad_norm": 1.5755639139549398, + "learning_rate": 2.0934795186891677e-07, + "loss": 0.5813, + "step": 30563 + }, + { + "epoch": 0.9367414490621552, + "grad_norm": 0.676206838373896, + "learning_rate": 2.0914595222776724e-07, + "loss": 0.4858, + "step": 30564 + }, + { + "epoch": 0.9367720975848964, + "grad_norm": 1.650335224738484, + "learning_rate": 2.0894404905840714e-07, + "loss": 0.6252, + "step": 30565 + }, + { + "epoch": 0.9368027461076376, + "grad_norm": 1.9763675384174, + "learning_rate": 2.0874224236282604e-07, + "loss": 0.5457, + "step": 30566 + }, + { + "epoch": 0.9368333946303788, + "grad_norm": 1.960193164212852, + "learning_rate": 2.085405321430134e-07, + "loss": 0.593, + "step": 30567 + }, + { + "epoch": 0.93686404315312, + "grad_norm": 1.6231022098831804, + "learning_rate": 2.0833891840095542e-07, + "loss": 0.566, + "step": 30568 + }, + { + "epoch": 0.9368946916758613, + "grad_norm": 1.6225936582362155, + "learning_rate": 2.0813740113864056e-07, + "loss": 0.6384, + "step": 30569 + }, + { + "epoch": 0.9369253401986024, + "grad_norm": 1.7335178953573391, + "learning_rate": 2.0793598035805274e-07, + "loss": 0.6255, + "step": 30570 + }, + { + "epoch": 0.9369559887213437, + "grad_norm": 0.7022294615487908, + "learning_rate": 2.0773465606117703e-07, + "loss": 0.5308, + "step": 30571 + }, + { + "epoch": 0.9369866372440848, + "grad_norm": 1.7445129904844787, + "learning_rate": 2.0753342824999635e-07, + "loss": 0.7231, + "step": 30572 + }, + { + "epoch": 0.9370172857668261, + "grad_norm": 1.7753566562499354, + "learning_rate": 2.073322969264957e-07, + "loss": 0.6693, + "step": 30573 + }, + { + "epoch": 0.9370479342895672, + "grad_norm": 0.7215662759869533, + "learning_rate": 2.0713126209265466e-07, + "loss": 0.5453, + "step": 30574 + }, + { + "epoch": 0.9370785828123085, + "grad_norm": 0.6660951934216213, + "learning_rate": 2.0693032375045607e-07, + "loss": 0.5079, + "step": 30575 + }, + { + "epoch": 0.9371092313350496, + "grad_norm": 1.540757697228514, + "learning_rate": 2.0672948190187724e-07, + "loss": 0.6147, + "step": 30576 + }, + { + "epoch": 0.9371398798577909, + "grad_norm": 1.813088068472838, + "learning_rate": 2.0652873654889882e-07, + "loss": 0.6626, + "step": 30577 + }, + { + "epoch": 0.937170528380532, + "grad_norm": 1.8323854604596665, + "learning_rate": 2.0632808769349922e-07, + "loss": 0.6059, + "step": 30578 + }, + { + "epoch": 0.9372011769032733, + "grad_norm": 1.7385335974463758, + "learning_rate": 2.061275353376546e-07, + "loss": 0.6584, + "step": 30579 + }, + { + "epoch": 0.9372318254260145, + "grad_norm": 1.8671956886806658, + "learning_rate": 2.0592707948334012e-07, + "loss": 0.6611, + "step": 30580 + }, + { + "epoch": 0.9372624739487556, + "grad_norm": 0.6868573796922655, + "learning_rate": 2.0572672013253415e-07, + "loss": 0.5173, + "step": 30581 + }, + { + "epoch": 0.9372931224714969, + "grad_norm": 0.6530763011512245, + "learning_rate": 2.0552645728720733e-07, + "loss": 0.5192, + "step": 30582 + }, + { + "epoch": 0.937323770994238, + "grad_norm": 1.766376219286547, + "learning_rate": 2.0532629094933366e-07, + "loss": 0.6533, + "step": 30583 + }, + { + "epoch": 0.9373544195169793, + "grad_norm": 1.9789668174538129, + "learning_rate": 2.051262211208882e-07, + "loss": 0.5235, + "step": 30584 + }, + { + "epoch": 0.9373850680397204, + "grad_norm": 1.8330932192765073, + "learning_rate": 2.049262478038383e-07, + "loss": 0.6242, + "step": 30585 + }, + { + "epoch": 0.9374157165624617, + "grad_norm": 1.8040959219133355, + "learning_rate": 2.0472637100015792e-07, + "loss": 0.6161, + "step": 30586 + }, + { + "epoch": 0.9374463650852028, + "grad_norm": 0.6433197677498208, + "learning_rate": 2.0452659071181214e-07, + "loss": 0.4733, + "step": 30587 + }, + { + "epoch": 0.9374770136079441, + "grad_norm": 1.649338868374915, + "learning_rate": 2.0432690694077496e-07, + "loss": 0.6191, + "step": 30588 + }, + { + "epoch": 0.9375076621306853, + "grad_norm": 1.6830231431976392, + "learning_rate": 2.0412731968901033e-07, + "loss": 0.6683, + "step": 30589 + }, + { + "epoch": 0.9375383106534265, + "grad_norm": 1.7089209867301147, + "learning_rate": 2.0392782895848563e-07, + "loss": 0.6642, + "step": 30590 + }, + { + "epoch": 0.9375689591761677, + "grad_norm": 1.9587943052120695, + "learning_rate": 2.0372843475116589e-07, + "loss": 0.8619, + "step": 30591 + }, + { + "epoch": 0.9375996076989089, + "grad_norm": 0.6562331393712623, + "learning_rate": 2.0352913706901623e-07, + "loss": 0.5242, + "step": 30592 + }, + { + "epoch": 0.9376302562216501, + "grad_norm": 0.6744148429647084, + "learning_rate": 2.0332993591400063e-07, + "loss": 0.5526, + "step": 30593 + }, + { + "epoch": 0.9376609047443913, + "grad_norm": 1.6754404018234805, + "learning_rate": 2.0313083128808198e-07, + "loss": 0.6058, + "step": 30594 + }, + { + "epoch": 0.9376915532671325, + "grad_norm": 1.8448607522171487, + "learning_rate": 2.0293182319322314e-07, + "loss": 0.7841, + "step": 30595 + }, + { + "epoch": 0.9377222017898738, + "grad_norm": 1.778009148022056, + "learning_rate": 2.0273291163138142e-07, + "loss": 0.675, + "step": 30596 + }, + { + "epoch": 0.9377528503126149, + "grad_norm": 1.5658779412697084, + "learning_rate": 2.0253409660452083e-07, + "loss": 0.5982, + "step": 30597 + }, + { + "epoch": 0.9377834988353562, + "grad_norm": 1.5841409110412121, + "learning_rate": 2.023353781145976e-07, + "loss": 0.6415, + "step": 30598 + }, + { + "epoch": 0.9378141473580973, + "grad_norm": 0.6517631704035276, + "learning_rate": 2.0213675616357121e-07, + "loss": 0.5216, + "step": 30599 + }, + { + "epoch": 0.9378447958808386, + "grad_norm": 1.6779262714351575, + "learning_rate": 2.0193823075339902e-07, + "loss": 0.5752, + "step": 30600 + }, + { + "epoch": 0.9378754444035797, + "grad_norm": 1.7535570288237283, + "learning_rate": 2.0173980188603503e-07, + "loss": 0.5801, + "step": 30601 + }, + { + "epoch": 0.937906092926321, + "grad_norm": 1.910039982182691, + "learning_rate": 2.0154146956343546e-07, + "loss": 0.6775, + "step": 30602 + }, + { + "epoch": 0.9379367414490621, + "grad_norm": 1.5550794567630024, + "learning_rate": 2.013432337875565e-07, + "loss": 0.6183, + "step": 30603 + }, + { + "epoch": 0.9379673899718034, + "grad_norm": 0.6702681870297585, + "learning_rate": 2.011450945603488e-07, + "loss": 0.5234, + "step": 30604 + }, + { + "epoch": 0.9379980384945446, + "grad_norm": 1.5905901823675386, + "learning_rate": 2.009470518837664e-07, + "loss": 0.5767, + "step": 30605 + }, + { + "epoch": 0.9380286870172858, + "grad_norm": 1.7446267046493587, + "learning_rate": 2.007491057597577e-07, + "loss": 0.6846, + "step": 30606 + }, + { + "epoch": 0.938059335540027, + "grad_norm": 1.5763133406293834, + "learning_rate": 2.0055125619027672e-07, + "loss": 0.6217, + "step": 30607 + }, + { + "epoch": 0.9380899840627682, + "grad_norm": 1.6679499875497918, + "learning_rate": 2.0035350317727298e-07, + "loss": 0.6111, + "step": 30608 + }, + { + "epoch": 0.9381206325855094, + "grad_norm": 1.6932101929181202, + "learning_rate": 2.0015584672269161e-07, + "loss": 0.6348, + "step": 30609 + }, + { + "epoch": 0.9381512811082506, + "grad_norm": 1.9527411827149506, + "learning_rate": 1.9995828682848219e-07, + "loss": 0.7183, + "step": 30610 + }, + { + "epoch": 0.9381819296309918, + "grad_norm": 1.627790756341312, + "learning_rate": 1.99760823496592e-07, + "loss": 0.6047, + "step": 30611 + }, + { + "epoch": 0.938212578153733, + "grad_norm": 1.6311959843253216, + "learning_rate": 1.9956345672896504e-07, + "loss": 0.6496, + "step": 30612 + }, + { + "epoch": 0.9382432266764742, + "grad_norm": 1.6572936800341644, + "learning_rate": 1.9936618652754758e-07, + "loss": 0.6719, + "step": 30613 + }, + { + "epoch": 0.9382738751992153, + "grad_norm": 1.8146585932680637, + "learning_rate": 1.9916901289428136e-07, + "loss": 0.6748, + "step": 30614 + }, + { + "epoch": 0.9383045237219566, + "grad_norm": 1.5447845624648864, + "learning_rate": 1.9897193583111264e-07, + "loss": 0.6037, + "step": 30615 + }, + { + "epoch": 0.9383351722446978, + "grad_norm": 1.7631745819325964, + "learning_rate": 1.9877495533998092e-07, + "loss": 0.7469, + "step": 30616 + }, + { + "epoch": 0.938365820767439, + "grad_norm": 1.7430827516907204, + "learning_rate": 1.985780714228247e-07, + "loss": 0.5066, + "step": 30617 + }, + { + "epoch": 0.9383964692901802, + "grad_norm": 1.6099711014562088, + "learning_rate": 1.9838128408158908e-07, + "loss": 0.5704, + "step": 30618 + }, + { + "epoch": 0.9384271178129214, + "grad_norm": 1.8034823706567435, + "learning_rate": 1.9818459331821027e-07, + "loss": 0.5854, + "step": 30619 + }, + { + "epoch": 0.9384577663356626, + "grad_norm": 1.564943822374138, + "learning_rate": 1.9798799913462563e-07, + "loss": 0.5856, + "step": 30620 + }, + { + "epoch": 0.9384884148584038, + "grad_norm": 1.7785058863020695, + "learning_rate": 1.977915015327736e-07, + "loss": 0.671, + "step": 30621 + }, + { + "epoch": 0.938519063381145, + "grad_norm": 1.814869517124345, + "learning_rate": 1.9759510051459042e-07, + "loss": 0.7353, + "step": 30622 + }, + { + "epoch": 0.9385497119038863, + "grad_norm": 1.5389895610582744, + "learning_rate": 1.9739879608201008e-07, + "loss": 0.5666, + "step": 30623 + }, + { + "epoch": 0.9385803604266274, + "grad_norm": 1.6086774482503972, + "learning_rate": 1.972025882369677e-07, + "loss": 0.5345, + "step": 30624 + }, + { + "epoch": 0.9386110089493687, + "grad_norm": 1.7049823937661985, + "learning_rate": 1.9700647698139619e-07, + "loss": 0.6852, + "step": 30625 + }, + { + "epoch": 0.9386416574721098, + "grad_norm": 1.745567142132047, + "learning_rate": 1.9681046231722846e-07, + "loss": 0.6062, + "step": 30626 + }, + { + "epoch": 0.9386723059948511, + "grad_norm": 1.8101793144275404, + "learning_rate": 1.9661454424639625e-07, + "loss": 0.6132, + "step": 30627 + }, + { + "epoch": 0.9387029545175922, + "grad_norm": 1.5951830240710203, + "learning_rate": 1.9641872277082696e-07, + "loss": 0.5736, + "step": 30628 + }, + { + "epoch": 0.9387336030403335, + "grad_norm": 1.7171730205829119, + "learning_rate": 1.9622299789245457e-07, + "loss": 0.7175, + "step": 30629 + }, + { + "epoch": 0.9387642515630746, + "grad_norm": 1.6275405200584872, + "learning_rate": 1.9602736961320535e-07, + "loss": 0.6379, + "step": 30630 + }, + { + "epoch": 0.9387949000858159, + "grad_norm": 0.6710941166483327, + "learning_rate": 1.958318379350055e-07, + "loss": 0.5195, + "step": 30631 + }, + { + "epoch": 0.938825548608557, + "grad_norm": 0.6562856812358931, + "learning_rate": 1.9563640285978346e-07, + "loss": 0.5305, + "step": 30632 + }, + { + "epoch": 0.9388561971312983, + "grad_norm": 0.6678598509059293, + "learning_rate": 1.9544106438946443e-07, + "loss": 0.5223, + "step": 30633 + }, + { + "epoch": 0.9388868456540395, + "grad_norm": 2.0406805710487186, + "learning_rate": 1.9524582252597346e-07, + "loss": 0.6348, + "step": 30634 + }, + { + "epoch": 0.9389174941767807, + "grad_norm": 1.7177050278122254, + "learning_rate": 1.950506772712335e-07, + "loss": 0.5807, + "step": 30635 + }, + { + "epoch": 0.9389481426995219, + "grad_norm": 1.607945638017124, + "learning_rate": 1.9485562862716856e-07, + "loss": 0.6523, + "step": 30636 + }, + { + "epoch": 0.9389787912222631, + "grad_norm": 1.7428652849540158, + "learning_rate": 1.9466067659570042e-07, + "loss": 0.5884, + "step": 30637 + }, + { + "epoch": 0.9390094397450043, + "grad_norm": 1.5624214094533289, + "learning_rate": 1.9446582117874868e-07, + "loss": 0.5328, + "step": 30638 + }, + { + "epoch": 0.9390400882677455, + "grad_norm": 0.6797085099420171, + "learning_rate": 1.942710623782329e-07, + "loss": 0.5226, + "step": 30639 + }, + { + "epoch": 0.9390707367904867, + "grad_norm": 2.147333964320954, + "learning_rate": 1.94076400196076e-07, + "loss": 0.6152, + "step": 30640 + }, + { + "epoch": 0.939101385313228, + "grad_norm": 1.6153774962990959, + "learning_rate": 1.9388183463419085e-07, + "loss": 0.6537, + "step": 30641 + }, + { + "epoch": 0.9391320338359691, + "grad_norm": 1.9158137532739885, + "learning_rate": 1.936873656944982e-07, + "loss": 0.706, + "step": 30642 + }, + { + "epoch": 0.9391626823587104, + "grad_norm": 1.8278116544478282, + "learning_rate": 1.9349299337891315e-07, + "loss": 0.5985, + "step": 30643 + }, + { + "epoch": 0.9391933308814515, + "grad_norm": 1.7316216091599468, + "learning_rate": 1.932987176893497e-07, + "loss": 0.6141, + "step": 30644 + }, + { + "epoch": 0.9392239794041927, + "grad_norm": 1.9147798628168835, + "learning_rate": 1.9310453862772415e-07, + "loss": 0.6229, + "step": 30645 + }, + { + "epoch": 0.9392546279269339, + "grad_norm": 1.6972070256094807, + "learning_rate": 1.9291045619594827e-07, + "loss": 0.6677, + "step": 30646 + }, + { + "epoch": 0.9392852764496751, + "grad_norm": 1.6941530224581856, + "learning_rate": 1.92716470395935e-07, + "loss": 0.6556, + "step": 30647 + }, + { + "epoch": 0.9393159249724163, + "grad_norm": 1.863183073158232, + "learning_rate": 1.9252258122959611e-07, + "loss": 0.6576, + "step": 30648 + }, + { + "epoch": 0.9393465734951575, + "grad_norm": 1.7808269863980721, + "learning_rate": 1.923287886988412e-07, + "loss": 0.6826, + "step": 30649 + }, + { + "epoch": 0.9393772220178987, + "grad_norm": 1.7652476260688634, + "learning_rate": 1.9213509280557985e-07, + "loss": 0.5433, + "step": 30650 + }, + { + "epoch": 0.9394078705406399, + "grad_norm": 1.694957334378324, + "learning_rate": 1.9194149355172055e-07, + "loss": 0.627, + "step": 30651 + }, + { + "epoch": 0.9394385190633812, + "grad_norm": 1.9437243679373464, + "learning_rate": 1.9174799093917173e-07, + "loss": 0.6682, + "step": 30652 + }, + { + "epoch": 0.9394691675861223, + "grad_norm": 1.7170439262538617, + "learning_rate": 1.915545849698397e-07, + "loss": 0.6189, + "step": 30653 + }, + { + "epoch": 0.9394998161088636, + "grad_norm": 0.6877867438112152, + "learning_rate": 1.9136127564562956e-07, + "loss": 0.5346, + "step": 30654 + }, + { + "epoch": 0.9395304646316047, + "grad_norm": 1.9298546501389096, + "learning_rate": 1.9116806296844649e-07, + "loss": 0.6387, + "step": 30655 + }, + { + "epoch": 0.939561113154346, + "grad_norm": 1.751087969862821, + "learning_rate": 1.9097494694019558e-07, + "loss": 0.7338, + "step": 30656 + }, + { + "epoch": 0.9395917616770871, + "grad_norm": 1.9282767112521666, + "learning_rate": 1.9078192756277758e-07, + "loss": 0.6572, + "step": 30657 + }, + { + "epoch": 0.9396224101998284, + "grad_norm": 1.7331078604086334, + "learning_rate": 1.9058900483809318e-07, + "loss": 0.6612, + "step": 30658 + }, + { + "epoch": 0.9396530587225695, + "grad_norm": 0.68348716414892, + "learning_rate": 1.903961787680464e-07, + "loss": 0.5146, + "step": 30659 + }, + { + "epoch": 0.9396837072453108, + "grad_norm": 1.7844051154032181, + "learning_rate": 1.902034493545357e-07, + "loss": 0.6054, + "step": 30660 + }, + { + "epoch": 0.939714355768052, + "grad_norm": 1.900617163158079, + "learning_rate": 1.9001081659946185e-07, + "loss": 0.5965, + "step": 30661 + }, + { + "epoch": 0.9397450042907932, + "grad_norm": 1.8462838191762814, + "learning_rate": 1.8981828050471996e-07, + "loss": 0.7188, + "step": 30662 + }, + { + "epoch": 0.9397756528135344, + "grad_norm": 0.6575013411683615, + "learning_rate": 1.8962584107220849e-07, + "loss": 0.5275, + "step": 30663 + }, + { + "epoch": 0.9398063013362756, + "grad_norm": 0.6523303808369934, + "learning_rate": 1.8943349830382485e-07, + "loss": 0.5098, + "step": 30664 + }, + { + "epoch": 0.9398369498590168, + "grad_norm": 0.6961321076534851, + "learning_rate": 1.8924125220146195e-07, + "loss": 0.5223, + "step": 30665 + }, + { + "epoch": 0.939867598381758, + "grad_norm": 1.8159715081438963, + "learning_rate": 1.8904910276701492e-07, + "loss": 0.6552, + "step": 30666 + }, + { + "epoch": 0.9398982469044992, + "grad_norm": 0.6610105199920305, + "learning_rate": 1.8885705000237898e-07, + "loss": 0.4971, + "step": 30667 + }, + { + "epoch": 0.9399288954272405, + "grad_norm": 1.8353577136677723, + "learning_rate": 1.8866509390944365e-07, + "loss": 0.6672, + "step": 30668 + }, + { + "epoch": 0.9399595439499816, + "grad_norm": 1.7818937511615065, + "learning_rate": 1.884732344901008e-07, + "loss": 0.7265, + "step": 30669 + }, + { + "epoch": 0.9399901924727229, + "grad_norm": 1.888648863911179, + "learning_rate": 1.8828147174624334e-07, + "loss": 0.7036, + "step": 30670 + }, + { + "epoch": 0.940020840995464, + "grad_norm": 0.6679468019822178, + "learning_rate": 1.8808980567975754e-07, + "loss": 0.5239, + "step": 30671 + }, + { + "epoch": 0.9400514895182053, + "grad_norm": 0.658881948123826, + "learning_rate": 1.8789823629253412e-07, + "loss": 0.5143, + "step": 30672 + }, + { + "epoch": 0.9400821380409464, + "grad_norm": 1.6747067337534656, + "learning_rate": 1.8770676358645934e-07, + "loss": 0.6979, + "step": 30673 + }, + { + "epoch": 0.9401127865636877, + "grad_norm": 1.7907569804193577, + "learning_rate": 1.8751538756342058e-07, + "loss": 0.598, + "step": 30674 + }, + { + "epoch": 0.9401434350864288, + "grad_norm": 1.904336051600588, + "learning_rate": 1.873241082253041e-07, + "loss": 0.7309, + "step": 30675 + }, + { + "epoch": 0.94017408360917, + "grad_norm": 1.710808109915782, + "learning_rate": 1.8713292557399286e-07, + "loss": 0.6174, + "step": 30676 + }, + { + "epoch": 0.9402047321319112, + "grad_norm": 0.6916660650598653, + "learning_rate": 1.8694183961137203e-07, + "loss": 0.5077, + "step": 30677 + }, + { + "epoch": 0.9402353806546524, + "grad_norm": 1.881218595966987, + "learning_rate": 1.8675085033932448e-07, + "loss": 0.6793, + "step": 30678 + }, + { + "epoch": 0.9402660291773937, + "grad_norm": 0.6803424311639856, + "learning_rate": 1.86559957759731e-07, + "loss": 0.5182, + "step": 30679 + }, + { + "epoch": 0.9402966777001348, + "grad_norm": 1.8622868448439904, + "learning_rate": 1.8636916187447228e-07, + "loss": 0.5263, + "step": 30680 + }, + { + "epoch": 0.9403273262228761, + "grad_norm": 1.8110978072923725, + "learning_rate": 1.8617846268543126e-07, + "loss": 0.6703, + "step": 30681 + }, + { + "epoch": 0.9403579747456172, + "grad_norm": 1.7085568492526482, + "learning_rate": 1.859878601944831e-07, + "loss": 0.7041, + "step": 30682 + }, + { + "epoch": 0.9403886232683585, + "grad_norm": 1.7307582056723865, + "learning_rate": 1.8579735440350854e-07, + "loss": 0.6526, + "step": 30683 + }, + { + "epoch": 0.9404192717910996, + "grad_norm": 1.683494961064867, + "learning_rate": 1.8560694531438384e-07, + "loss": 0.6406, + "step": 30684 + }, + { + "epoch": 0.9404499203138409, + "grad_norm": 1.6169073049947051, + "learning_rate": 1.8541663292898414e-07, + "loss": 0.5655, + "step": 30685 + }, + { + "epoch": 0.940480568836582, + "grad_norm": 2.058413351478994, + "learning_rate": 1.8522641724918576e-07, + "loss": 0.5968, + "step": 30686 + }, + { + "epoch": 0.9405112173593233, + "grad_norm": 1.6651244156839387, + "learning_rate": 1.8503629827686276e-07, + "loss": 0.5965, + "step": 30687 + }, + { + "epoch": 0.9405418658820645, + "grad_norm": 1.7418680218935065, + "learning_rate": 1.8484627601388804e-07, + "loss": 0.6258, + "step": 30688 + }, + { + "epoch": 0.9405725144048057, + "grad_norm": 1.8376702828635625, + "learning_rate": 1.846563504621357e-07, + "loss": 0.7127, + "step": 30689 + }, + { + "epoch": 0.9406031629275469, + "grad_norm": 1.918451674255338, + "learning_rate": 1.8446652162347423e-07, + "loss": 0.6863, + "step": 30690 + }, + { + "epoch": 0.9406338114502881, + "grad_norm": 1.7968881424340861, + "learning_rate": 1.8427678949977658e-07, + "loss": 0.7922, + "step": 30691 + }, + { + "epoch": 0.9406644599730293, + "grad_norm": 1.6973408074942173, + "learning_rate": 1.8408715409291123e-07, + "loss": 0.6363, + "step": 30692 + }, + { + "epoch": 0.9406951084957705, + "grad_norm": 1.7047278169569053, + "learning_rate": 1.838976154047456e-07, + "loss": 0.6149, + "step": 30693 + }, + { + "epoch": 0.9407257570185117, + "grad_norm": 0.6883136003245859, + "learning_rate": 1.837081734371493e-07, + "loss": 0.5244, + "step": 30694 + }, + { + "epoch": 0.940756405541253, + "grad_norm": 1.6576473957901925, + "learning_rate": 1.835188281919875e-07, + "loss": 0.5709, + "step": 30695 + }, + { + "epoch": 0.9407870540639941, + "grad_norm": 1.6558794834331931, + "learning_rate": 1.833295796711254e-07, + "loss": 0.5092, + "step": 30696 + }, + { + "epoch": 0.9408177025867354, + "grad_norm": 1.695076895607168, + "learning_rate": 1.831404278764304e-07, + "loss": 0.6021, + "step": 30697 + }, + { + "epoch": 0.9408483511094765, + "grad_norm": 1.978959121468237, + "learning_rate": 1.8295137280976316e-07, + "loss": 0.6352, + "step": 30698 + }, + { + "epoch": 0.9408789996322178, + "grad_norm": 1.7701112360250353, + "learning_rate": 1.827624144729878e-07, + "loss": 0.6917, + "step": 30699 + }, + { + "epoch": 0.9409096481549589, + "grad_norm": 1.6560519269236913, + "learning_rate": 1.825735528679673e-07, + "loss": 0.5923, + "step": 30700 + }, + { + "epoch": 0.9409402966777002, + "grad_norm": 1.6493771332786051, + "learning_rate": 1.8238478799656123e-07, + "loss": 0.6311, + "step": 30701 + }, + { + "epoch": 0.9409709452004413, + "grad_norm": 1.6839553896996025, + "learning_rate": 1.8219611986063035e-07, + "loss": 0.5896, + "step": 30702 + }, + { + "epoch": 0.9410015937231826, + "grad_norm": 1.9540683294664825, + "learning_rate": 1.8200754846203207e-07, + "loss": 0.6443, + "step": 30703 + }, + { + "epoch": 0.9410322422459237, + "grad_norm": 1.7180992913703965, + "learning_rate": 1.8181907380262486e-07, + "loss": 0.5563, + "step": 30704 + }, + { + "epoch": 0.941062890768665, + "grad_norm": 1.79996379940842, + "learning_rate": 1.816306958842684e-07, + "loss": 0.7237, + "step": 30705 + }, + { + "epoch": 0.9410935392914062, + "grad_norm": 1.555497860756483, + "learning_rate": 1.8144241470881452e-07, + "loss": 0.5399, + "step": 30706 + }, + { + "epoch": 0.9411241878141473, + "grad_norm": 1.7141410077532255, + "learning_rate": 1.8125423027812174e-07, + "loss": 0.6458, + "step": 30707 + }, + { + "epoch": 0.9411548363368886, + "grad_norm": 1.6148132995960203, + "learning_rate": 1.8106614259404409e-07, + "loss": 0.5496, + "step": 30708 + }, + { + "epoch": 0.9411854848596297, + "grad_norm": 0.6592735698024419, + "learning_rate": 1.8087815165843347e-07, + "loss": 0.4919, + "step": 30709 + }, + { + "epoch": 0.941216133382371, + "grad_norm": 1.7731490480898462, + "learning_rate": 1.8069025747314172e-07, + "loss": 0.643, + "step": 30710 + }, + { + "epoch": 0.9412467819051121, + "grad_norm": 1.6529779498549921, + "learning_rate": 1.8050246004002293e-07, + "loss": 0.5132, + "step": 30711 + }, + { + "epoch": 0.9412774304278534, + "grad_norm": 1.7486149807177611, + "learning_rate": 1.8031475936092445e-07, + "loss": 0.6215, + "step": 30712 + }, + { + "epoch": 0.9413080789505945, + "grad_norm": 0.6772492984050014, + "learning_rate": 1.801271554376982e-07, + "loss": 0.5094, + "step": 30713 + }, + { + "epoch": 0.9413387274733358, + "grad_norm": 1.8197928573428035, + "learning_rate": 1.7993964827219047e-07, + "loss": 0.6312, + "step": 30714 + }, + { + "epoch": 0.941369375996077, + "grad_norm": 1.6864669450796173, + "learning_rate": 1.7975223786625085e-07, + "loss": 0.5397, + "step": 30715 + }, + { + "epoch": 0.9414000245188182, + "grad_norm": 1.754800684837076, + "learning_rate": 1.7956492422172455e-07, + "loss": 0.6748, + "step": 30716 + }, + { + "epoch": 0.9414306730415594, + "grad_norm": 1.7816835959573163, + "learning_rate": 1.793777073404579e-07, + "loss": 0.6681, + "step": 30717 + }, + { + "epoch": 0.9414613215643006, + "grad_norm": 1.5402090344772665, + "learning_rate": 1.7919058722429495e-07, + "loss": 0.5866, + "step": 30718 + }, + { + "epoch": 0.9414919700870418, + "grad_norm": 1.7058388688156476, + "learning_rate": 1.790035638750809e-07, + "loss": 0.7194, + "step": 30719 + }, + { + "epoch": 0.941522618609783, + "grad_norm": 1.640978354079405, + "learning_rate": 1.788166372946576e-07, + "loss": 0.6651, + "step": 30720 + }, + { + "epoch": 0.9415532671325242, + "grad_norm": 0.6826264573945495, + "learning_rate": 1.786298074848658e-07, + "loss": 0.5253, + "step": 30721 + }, + { + "epoch": 0.9415839156552654, + "grad_norm": 1.6997271838253059, + "learning_rate": 1.784430744475485e-07, + "loss": 0.6822, + "step": 30722 + }, + { + "epoch": 0.9416145641780066, + "grad_norm": 1.7455133709661252, + "learning_rate": 1.7825643818454307e-07, + "loss": 0.6555, + "step": 30723 + }, + { + "epoch": 0.9416452127007479, + "grad_norm": 1.6329152394383226, + "learning_rate": 1.7806989869769144e-07, + "loss": 0.5981, + "step": 30724 + }, + { + "epoch": 0.941675861223489, + "grad_norm": 1.7041160364209498, + "learning_rate": 1.778834559888287e-07, + "loss": 0.6518, + "step": 30725 + }, + { + "epoch": 0.9417065097462303, + "grad_norm": 1.6866348320768003, + "learning_rate": 1.7769711005979463e-07, + "loss": 0.5101, + "step": 30726 + }, + { + "epoch": 0.9417371582689714, + "grad_norm": 1.8952368079259452, + "learning_rate": 1.7751086091242432e-07, + "loss": 0.6336, + "step": 30727 + }, + { + "epoch": 0.9417678067917127, + "grad_norm": 0.6539282020115398, + "learning_rate": 1.7732470854855188e-07, + "loss": 0.5018, + "step": 30728 + }, + { + "epoch": 0.9417984553144538, + "grad_norm": 1.924636612919127, + "learning_rate": 1.7713865297001143e-07, + "loss": 0.6677, + "step": 30729 + }, + { + "epoch": 0.9418291038371951, + "grad_norm": 1.6508942255990744, + "learning_rate": 1.7695269417863926e-07, + "loss": 0.7133, + "step": 30730 + }, + { + "epoch": 0.9418597523599362, + "grad_norm": 0.6737811301733115, + "learning_rate": 1.767668321762639e-07, + "loss": 0.4972, + "step": 30731 + }, + { + "epoch": 0.9418904008826775, + "grad_norm": 1.5764284554651122, + "learning_rate": 1.7658106696471834e-07, + "loss": 0.5457, + "step": 30732 + }, + { + "epoch": 0.9419210494054187, + "grad_norm": 1.7277284020776729, + "learning_rate": 1.7639539854583333e-07, + "loss": 0.5983, + "step": 30733 + }, + { + "epoch": 0.9419516979281599, + "grad_norm": 1.723313813092794, + "learning_rate": 1.762098269214385e-07, + "loss": 0.616, + "step": 30734 + }, + { + "epoch": 0.9419823464509011, + "grad_norm": 1.8234342554326732, + "learning_rate": 1.7602435209336243e-07, + "loss": 0.6745, + "step": 30735 + }, + { + "epoch": 0.9420129949736423, + "grad_norm": 2.03782780989724, + "learning_rate": 1.758389740634292e-07, + "loss": 0.6241, + "step": 30736 + }, + { + "epoch": 0.9420436434963835, + "grad_norm": 0.632026555426609, + "learning_rate": 1.7565369283347067e-07, + "loss": 0.5234, + "step": 30737 + }, + { + "epoch": 0.9420742920191246, + "grad_norm": 1.5001552164634606, + "learning_rate": 1.7546850840530983e-07, + "loss": 0.5536, + "step": 30738 + }, + { + "epoch": 0.9421049405418659, + "grad_norm": 1.8134758051417996, + "learning_rate": 1.7528342078077076e-07, + "loss": 0.6588, + "step": 30739 + }, + { + "epoch": 0.942135589064607, + "grad_norm": 1.7795428068993195, + "learning_rate": 1.7509842996167758e-07, + "loss": 0.6038, + "step": 30740 + }, + { + "epoch": 0.9421662375873483, + "grad_norm": 1.5970767952368634, + "learning_rate": 1.7491353594985328e-07, + "loss": 0.6719, + "step": 30741 + }, + { + "epoch": 0.9421968861100894, + "grad_norm": 1.688255870444968, + "learning_rate": 1.747287387471208e-07, + "loss": 0.7557, + "step": 30742 + }, + { + "epoch": 0.9422275346328307, + "grad_norm": 1.772607338562823, + "learning_rate": 1.7454403835529875e-07, + "loss": 0.6439, + "step": 30743 + }, + { + "epoch": 0.9422581831555719, + "grad_norm": 1.8197011989427854, + "learning_rate": 1.7435943477620897e-07, + "loss": 0.6436, + "step": 30744 + }, + { + "epoch": 0.9422888316783131, + "grad_norm": 1.5061416069502676, + "learning_rate": 1.7417492801167e-07, + "loss": 0.6523, + "step": 30745 + }, + { + "epoch": 0.9423194802010543, + "grad_norm": 0.6914597938680627, + "learning_rate": 1.7399051806350043e-07, + "loss": 0.532, + "step": 30746 + }, + { + "epoch": 0.9423501287237955, + "grad_norm": 0.683619348789982, + "learning_rate": 1.738062049335143e-07, + "loss": 0.559, + "step": 30747 + }, + { + "epoch": 0.9423807772465367, + "grad_norm": 1.8965863733340107, + "learning_rate": 1.736219886235302e-07, + "loss": 0.7089, + "step": 30748 + }, + { + "epoch": 0.9424114257692779, + "grad_norm": 1.7671619174557551, + "learning_rate": 1.7343786913536333e-07, + "loss": 0.6654, + "step": 30749 + }, + { + "epoch": 0.9424420742920191, + "grad_norm": 1.7141246369756, + "learning_rate": 1.7325384647082776e-07, + "loss": 0.6867, + "step": 30750 + }, + { + "epoch": 0.9424727228147604, + "grad_norm": 1.7959854963549864, + "learning_rate": 1.7306992063173544e-07, + "loss": 0.6873, + "step": 30751 + }, + { + "epoch": 0.9425033713375015, + "grad_norm": 1.8811275126657447, + "learning_rate": 1.7288609161989933e-07, + "loss": 0.5992, + "step": 30752 + }, + { + "epoch": 0.9425340198602428, + "grad_norm": 1.5473222969401537, + "learning_rate": 1.7270235943713243e-07, + "loss": 0.4691, + "step": 30753 + }, + { + "epoch": 0.9425646683829839, + "grad_norm": 0.6474710817903387, + "learning_rate": 1.725187240852433e-07, + "loss": 0.4887, + "step": 30754 + }, + { + "epoch": 0.9425953169057252, + "grad_norm": 1.7635780333884958, + "learning_rate": 1.7233518556603935e-07, + "loss": 0.6568, + "step": 30755 + }, + { + "epoch": 0.9426259654284663, + "grad_norm": 1.8378275405641296, + "learning_rate": 1.721517438813336e-07, + "loss": 0.6389, + "step": 30756 + }, + { + "epoch": 0.9426566139512076, + "grad_norm": 1.8357810398064214, + "learning_rate": 1.7196839903293128e-07, + "loss": 0.7236, + "step": 30757 + }, + { + "epoch": 0.9426872624739487, + "grad_norm": 1.7505815938740923, + "learning_rate": 1.717851510226376e-07, + "loss": 0.5898, + "step": 30758 + }, + { + "epoch": 0.94271791099669, + "grad_norm": 1.9283440691306224, + "learning_rate": 1.7160199985226001e-07, + "loss": 0.5938, + "step": 30759 + }, + { + "epoch": 0.9427485595194312, + "grad_norm": 1.6172430244647682, + "learning_rate": 1.7141894552360262e-07, + "loss": 0.6212, + "step": 30760 + }, + { + "epoch": 0.9427792080421724, + "grad_norm": 1.7572012709152653, + "learning_rate": 1.7123598803846953e-07, + "loss": 0.5437, + "step": 30761 + }, + { + "epoch": 0.9428098565649136, + "grad_norm": 1.602164176175061, + "learning_rate": 1.7105312739866265e-07, + "loss": 0.641, + "step": 30762 + }, + { + "epoch": 0.9428405050876548, + "grad_norm": 1.663820109264866, + "learning_rate": 1.7087036360598385e-07, + "loss": 0.5344, + "step": 30763 + }, + { + "epoch": 0.942871153610396, + "grad_norm": 1.5617885770143647, + "learning_rate": 1.7068769666223617e-07, + "loss": 0.5424, + "step": 30764 + }, + { + "epoch": 0.9429018021331372, + "grad_norm": 1.6367870136377016, + "learning_rate": 1.7050512656921592e-07, + "loss": 0.6737, + "step": 30765 + }, + { + "epoch": 0.9429324506558784, + "grad_norm": 1.6427990114412323, + "learning_rate": 1.703226533287228e-07, + "loss": 0.5977, + "step": 30766 + }, + { + "epoch": 0.9429630991786196, + "grad_norm": 1.6054410706348385, + "learning_rate": 1.7014027694255752e-07, + "loss": 0.6533, + "step": 30767 + }, + { + "epoch": 0.9429937477013608, + "grad_norm": 1.7094696662669295, + "learning_rate": 1.699579974125143e-07, + "loss": 0.6619, + "step": 30768 + }, + { + "epoch": 0.943024396224102, + "grad_norm": 2.2127470719455187, + "learning_rate": 1.697758147403905e-07, + "loss": 0.6892, + "step": 30769 + }, + { + "epoch": 0.9430550447468432, + "grad_norm": 0.6656075985347527, + "learning_rate": 1.695937289279792e-07, + "loss": 0.5259, + "step": 30770 + }, + { + "epoch": 0.9430856932695844, + "grad_norm": 1.4868034746650107, + "learning_rate": 1.6941173997707782e-07, + "loss": 0.5559, + "step": 30771 + }, + { + "epoch": 0.9431163417923256, + "grad_norm": 1.7325535847255171, + "learning_rate": 1.6922984788947717e-07, + "loss": 0.6203, + "step": 30772 + }, + { + "epoch": 0.9431469903150668, + "grad_norm": 1.7162119085412784, + "learning_rate": 1.6904805266697023e-07, + "loss": 0.5782, + "step": 30773 + }, + { + "epoch": 0.943177638837808, + "grad_norm": 1.8535135344646767, + "learning_rate": 1.688663543113478e-07, + "loss": 0.7079, + "step": 30774 + }, + { + "epoch": 0.9432082873605492, + "grad_norm": 0.6759644969185382, + "learning_rate": 1.6868475282440177e-07, + "loss": 0.5435, + "step": 30775 + }, + { + "epoch": 0.9432389358832904, + "grad_norm": 1.7356117300040748, + "learning_rate": 1.6850324820791963e-07, + "loss": 0.7733, + "step": 30776 + }, + { + "epoch": 0.9432695844060316, + "grad_norm": 0.6799044979697147, + "learning_rate": 1.6832184046368883e-07, + "loss": 0.4984, + "step": 30777 + }, + { + "epoch": 0.9433002329287729, + "grad_norm": 0.682616532786839, + "learning_rate": 1.6814052959350125e-07, + "loss": 0.5062, + "step": 30778 + }, + { + "epoch": 0.943330881451514, + "grad_norm": 1.6218028406248124, + "learning_rate": 1.679593155991388e-07, + "loss": 0.5579, + "step": 30779 + }, + { + "epoch": 0.9433615299742553, + "grad_norm": 1.6161776196708708, + "learning_rate": 1.6777819848239007e-07, + "loss": 0.6806, + "step": 30780 + }, + { + "epoch": 0.9433921784969964, + "grad_norm": 1.7728065800155162, + "learning_rate": 1.6759717824503697e-07, + "loss": 0.6757, + "step": 30781 + }, + { + "epoch": 0.9434228270197377, + "grad_norm": 0.6328569196949553, + "learning_rate": 1.674162548888658e-07, + "loss": 0.4648, + "step": 30782 + }, + { + "epoch": 0.9434534755424788, + "grad_norm": 1.5619503441440554, + "learning_rate": 1.6723542841565743e-07, + "loss": 0.5605, + "step": 30783 + }, + { + "epoch": 0.9434841240652201, + "grad_norm": 1.7110490401042098, + "learning_rate": 1.6705469882719483e-07, + "loss": 0.5337, + "step": 30784 + }, + { + "epoch": 0.9435147725879612, + "grad_norm": 1.631599807651246, + "learning_rate": 1.6687406612525658e-07, + "loss": 0.5789, + "step": 30785 + }, + { + "epoch": 0.9435454211107025, + "grad_norm": 1.7677746785856407, + "learning_rate": 1.666935303116257e-07, + "loss": 0.57, + "step": 30786 + }, + { + "epoch": 0.9435760696334436, + "grad_norm": 1.871295671307115, + "learning_rate": 1.665130913880797e-07, + "loss": 0.6909, + "step": 30787 + }, + { + "epoch": 0.9436067181561849, + "grad_norm": 1.4432631970881367, + "learning_rate": 1.6633274935639488e-07, + "loss": 0.6073, + "step": 30788 + }, + { + "epoch": 0.9436373666789261, + "grad_norm": 1.7209338991813756, + "learning_rate": 1.6615250421835095e-07, + "loss": 0.6884, + "step": 30789 + }, + { + "epoch": 0.9436680152016673, + "grad_norm": 1.815654285180484, + "learning_rate": 1.6597235597572093e-07, + "loss": 0.7088, + "step": 30790 + }, + { + "epoch": 0.9436986637244085, + "grad_norm": 1.9775059906407013, + "learning_rate": 1.657923046302823e-07, + "loss": 0.6012, + "step": 30791 + }, + { + "epoch": 0.9437293122471497, + "grad_norm": 0.6871616835888696, + "learning_rate": 1.6561235018380807e-07, + "loss": 0.5257, + "step": 30792 + }, + { + "epoch": 0.9437599607698909, + "grad_norm": 1.8408605704806296, + "learning_rate": 1.6543249263807128e-07, + "loss": 0.6721, + "step": 30793 + }, + { + "epoch": 0.9437906092926321, + "grad_norm": 1.5624456104511033, + "learning_rate": 1.6525273199484603e-07, + "loss": 0.5581, + "step": 30794 + }, + { + "epoch": 0.9438212578153733, + "grad_norm": 1.8771590121313753, + "learning_rate": 1.6507306825589987e-07, + "loss": 0.673, + "step": 30795 + }, + { + "epoch": 0.9438519063381146, + "grad_norm": 1.8628854059744147, + "learning_rate": 1.6489350142300575e-07, + "loss": 0.7129, + "step": 30796 + }, + { + "epoch": 0.9438825548608557, + "grad_norm": 1.7994159200012132, + "learning_rate": 1.647140314979334e-07, + "loss": 0.6411, + "step": 30797 + }, + { + "epoch": 0.943913203383597, + "grad_norm": 1.6839153693999445, + "learning_rate": 1.645346584824492e-07, + "loss": 0.6033, + "step": 30798 + }, + { + "epoch": 0.9439438519063381, + "grad_norm": 1.6714008342068403, + "learning_rate": 1.643553823783217e-07, + "loss": 0.5755, + "step": 30799 + }, + { + "epoch": 0.9439745004290793, + "grad_norm": 1.9842448669851795, + "learning_rate": 1.641762031873173e-07, + "loss": 0.6781, + "step": 30800 + }, + { + "epoch": 0.9440051489518205, + "grad_norm": 1.87679008077384, + "learning_rate": 1.6399712091120125e-07, + "loss": 0.7134, + "step": 30801 + }, + { + "epoch": 0.9440357974745617, + "grad_norm": 1.8526772139055208, + "learning_rate": 1.6381813555173876e-07, + "loss": 0.5587, + "step": 30802 + }, + { + "epoch": 0.9440664459973029, + "grad_norm": 1.4752405024915403, + "learning_rate": 1.636392471106918e-07, + "loss": 0.4868, + "step": 30803 + }, + { + "epoch": 0.9440970945200441, + "grad_norm": 1.8973459315735122, + "learning_rate": 1.6346045558982448e-07, + "loss": 0.6763, + "step": 30804 + }, + { + "epoch": 0.9441277430427854, + "grad_norm": 1.7772184331797152, + "learning_rate": 1.6328176099089876e-07, + "loss": 0.6545, + "step": 30805 + }, + { + "epoch": 0.9441583915655265, + "grad_norm": 0.6743575222971924, + "learning_rate": 1.6310316331567323e-07, + "loss": 0.5252, + "step": 30806 + }, + { + "epoch": 0.9441890400882678, + "grad_norm": 1.828004834352826, + "learning_rate": 1.6292466256590978e-07, + "loss": 0.678, + "step": 30807 + }, + { + "epoch": 0.9442196886110089, + "grad_norm": 1.691230564094994, + "learning_rate": 1.6274625874336813e-07, + "loss": 0.6237, + "step": 30808 + }, + { + "epoch": 0.9442503371337502, + "grad_norm": 1.745826726008466, + "learning_rate": 1.6256795184980246e-07, + "loss": 0.6896, + "step": 30809 + }, + { + "epoch": 0.9442809856564913, + "grad_norm": 1.6573896838443776, + "learning_rate": 1.6238974188697354e-07, + "loss": 0.694, + "step": 30810 + }, + { + "epoch": 0.9443116341792326, + "grad_norm": 1.6782905859715394, + "learning_rate": 1.6221162885663332e-07, + "loss": 0.6184, + "step": 30811 + }, + { + "epoch": 0.9443422827019737, + "grad_norm": 1.8455743766255883, + "learning_rate": 1.620336127605404e-07, + "loss": 0.605, + "step": 30812 + }, + { + "epoch": 0.944372931224715, + "grad_norm": 1.866095711724978, + "learning_rate": 1.6185569360044783e-07, + "loss": 0.7261, + "step": 30813 + }, + { + "epoch": 0.9444035797474561, + "grad_norm": 1.6804444388575202, + "learning_rate": 1.6167787137810752e-07, + "loss": 0.6206, + "step": 30814 + }, + { + "epoch": 0.9444342282701974, + "grad_norm": 1.6667728581594083, + "learning_rate": 1.6150014609527253e-07, + "loss": 0.578, + "step": 30815 + }, + { + "epoch": 0.9444648767929386, + "grad_norm": 0.6787286760267531, + "learning_rate": 1.6132251775369478e-07, + "loss": 0.5285, + "step": 30816 + }, + { + "epoch": 0.9444955253156798, + "grad_norm": 1.7411689941504407, + "learning_rate": 1.6114498635512177e-07, + "loss": 0.6093, + "step": 30817 + }, + { + "epoch": 0.944526173838421, + "grad_norm": 1.7977758236942727, + "learning_rate": 1.6096755190130542e-07, + "loss": 0.6961, + "step": 30818 + }, + { + "epoch": 0.9445568223611622, + "grad_norm": 1.6987500681295897, + "learning_rate": 1.6079021439399434e-07, + "loss": 0.5797, + "step": 30819 + }, + { + "epoch": 0.9445874708839034, + "grad_norm": 1.840226992278724, + "learning_rate": 1.606129738349338e-07, + "loss": 0.6537, + "step": 30820 + }, + { + "epoch": 0.9446181194066446, + "grad_norm": 1.6456982388120227, + "learning_rate": 1.6043583022587127e-07, + "loss": 0.6232, + "step": 30821 + }, + { + "epoch": 0.9446487679293858, + "grad_norm": 1.6185364290093467, + "learning_rate": 1.6025878356855095e-07, + "loss": 0.5881, + "step": 30822 + }, + { + "epoch": 0.944679416452127, + "grad_norm": 0.6723590044716252, + "learning_rate": 1.600818338647203e-07, + "loss": 0.5095, + "step": 30823 + }, + { + "epoch": 0.9447100649748682, + "grad_norm": 1.849107853082545, + "learning_rate": 1.5990498111612018e-07, + "loss": 0.6563, + "step": 30824 + }, + { + "epoch": 0.9447407134976095, + "grad_norm": 1.7335775019775048, + "learning_rate": 1.5972822532449362e-07, + "loss": 0.5988, + "step": 30825 + }, + { + "epoch": 0.9447713620203506, + "grad_norm": 1.8592026343123638, + "learning_rate": 1.5955156649158254e-07, + "loss": 0.6039, + "step": 30826 + }, + { + "epoch": 0.9448020105430919, + "grad_norm": 0.68290166458759, + "learning_rate": 1.593750046191289e-07, + "loss": 0.5123, + "step": 30827 + }, + { + "epoch": 0.944832659065833, + "grad_norm": 1.6590101226187846, + "learning_rate": 1.5919853970887022e-07, + "loss": 0.6223, + "step": 30828 + }, + { + "epoch": 0.9448633075885743, + "grad_norm": 1.602822548560988, + "learning_rate": 1.590221717625462e-07, + "loss": 0.6447, + "step": 30829 + }, + { + "epoch": 0.9448939561113154, + "grad_norm": 1.657957400986655, + "learning_rate": 1.5884590078189543e-07, + "loss": 0.5493, + "step": 30830 + }, + { + "epoch": 0.9449246046340566, + "grad_norm": 0.6650691666150103, + "learning_rate": 1.5866972676865322e-07, + "loss": 0.5095, + "step": 30831 + }, + { + "epoch": 0.9449552531567978, + "grad_norm": 1.7265910555502202, + "learning_rate": 1.5849364972455594e-07, + "loss": 0.6298, + "step": 30832 + }, + { + "epoch": 0.944985901679539, + "grad_norm": 1.9434388296563465, + "learning_rate": 1.5831766965133887e-07, + "loss": 0.6578, + "step": 30833 + }, + { + "epoch": 0.9450165502022803, + "grad_norm": 1.7079829985655612, + "learning_rate": 1.581417865507362e-07, + "loss": 0.665, + "step": 30834 + }, + { + "epoch": 0.9450471987250214, + "grad_norm": 1.7879447765609517, + "learning_rate": 1.5796600042448095e-07, + "loss": 0.6861, + "step": 30835 + }, + { + "epoch": 0.9450778472477627, + "grad_norm": 1.6671647649178976, + "learning_rate": 1.577903112743051e-07, + "loss": 0.5874, + "step": 30836 + }, + { + "epoch": 0.9451084957705038, + "grad_norm": 1.5298069002447328, + "learning_rate": 1.5761471910193836e-07, + "loss": 0.5961, + "step": 30837 + }, + { + "epoch": 0.9451391442932451, + "grad_norm": 2.06055745431702, + "learning_rate": 1.574392239091127e-07, + "loss": 0.6273, + "step": 30838 + }, + { + "epoch": 0.9451697928159862, + "grad_norm": 1.7893755661302237, + "learning_rate": 1.5726382569755672e-07, + "loss": 0.637, + "step": 30839 + }, + { + "epoch": 0.9452004413387275, + "grad_norm": 1.749958252909433, + "learning_rate": 1.5708852446899902e-07, + "loss": 0.5154, + "step": 30840 + }, + { + "epoch": 0.9452310898614686, + "grad_norm": 1.5962220139252417, + "learning_rate": 1.5691332022516494e-07, + "loss": 0.5821, + "step": 30841 + }, + { + "epoch": 0.9452617383842099, + "grad_norm": 1.855260571681887, + "learning_rate": 1.5673821296778412e-07, + "loss": 0.6651, + "step": 30842 + }, + { + "epoch": 0.945292386906951, + "grad_norm": 1.564123639417341, + "learning_rate": 1.5656320269858083e-07, + "loss": 0.5663, + "step": 30843 + }, + { + "epoch": 0.9453230354296923, + "grad_norm": 0.673417785960624, + "learning_rate": 1.5638828941927697e-07, + "loss": 0.526, + "step": 30844 + }, + { + "epoch": 0.9453536839524335, + "grad_norm": 1.6546828831933036, + "learning_rate": 1.5621347313159895e-07, + "loss": 0.6564, + "step": 30845 + }, + { + "epoch": 0.9453843324751747, + "grad_norm": 1.5869994896740705, + "learning_rate": 1.5603875383726763e-07, + "loss": 0.535, + "step": 30846 + }, + { + "epoch": 0.9454149809979159, + "grad_norm": 1.933894435716, + "learning_rate": 1.5586413153800494e-07, + "loss": 0.7105, + "step": 30847 + }, + { + "epoch": 0.9454456295206571, + "grad_norm": 1.5688642503400543, + "learning_rate": 1.5568960623553176e-07, + "loss": 0.5694, + "step": 30848 + }, + { + "epoch": 0.9454762780433983, + "grad_norm": 1.6735943497699186, + "learning_rate": 1.555151779315689e-07, + "loss": 0.6618, + "step": 30849 + }, + { + "epoch": 0.9455069265661395, + "grad_norm": 1.882162753640192, + "learning_rate": 1.5534084662783277e-07, + "loss": 0.6273, + "step": 30850 + }, + { + "epoch": 0.9455375750888807, + "grad_norm": 1.7097268445934575, + "learning_rate": 1.5516661232604312e-07, + "loss": 0.6679, + "step": 30851 + }, + { + "epoch": 0.945568223611622, + "grad_norm": 1.7595184421172914, + "learning_rate": 1.5499247502791415e-07, + "loss": 0.5607, + "step": 30852 + }, + { + "epoch": 0.9455988721343631, + "grad_norm": 2.075872771969191, + "learning_rate": 1.5481843473516445e-07, + "loss": 0.6449, + "step": 30853 + }, + { + "epoch": 0.9456295206571044, + "grad_norm": 1.7706607065735929, + "learning_rate": 1.546444914495071e-07, + "loss": 0.6573, + "step": 30854 + }, + { + "epoch": 0.9456601691798455, + "grad_norm": 1.9244008769165188, + "learning_rate": 1.544706451726574e-07, + "loss": 0.6072, + "step": 30855 + }, + { + "epoch": 0.9456908177025868, + "grad_norm": 1.60055351295757, + "learning_rate": 1.5429689590632624e-07, + "loss": 0.622, + "step": 30856 + }, + { + "epoch": 0.9457214662253279, + "grad_norm": 1.9736971244590256, + "learning_rate": 1.5412324365222775e-07, + "loss": 0.7024, + "step": 30857 + }, + { + "epoch": 0.9457521147480692, + "grad_norm": 1.7248845647342739, + "learning_rate": 1.539496884120717e-07, + "loss": 0.6502, + "step": 30858 + }, + { + "epoch": 0.9457827632708103, + "grad_norm": 1.874939289085135, + "learning_rate": 1.5377623018756894e-07, + "loss": 0.6855, + "step": 30859 + }, + { + "epoch": 0.9458134117935516, + "grad_norm": 1.678894569041011, + "learning_rate": 1.536028689804281e-07, + "loss": 0.575, + "step": 30860 + }, + { + "epoch": 0.9458440603162928, + "grad_norm": 1.8697575670062405, + "learning_rate": 1.534296047923578e-07, + "loss": 0.7155, + "step": 30861 + }, + { + "epoch": 0.9458747088390339, + "grad_norm": 0.7122468655494175, + "learning_rate": 1.5325643762506558e-07, + "loss": 0.5337, + "step": 30862 + }, + { + "epoch": 0.9459053573617752, + "grad_norm": 1.741174500699386, + "learning_rate": 1.5308336748025564e-07, + "loss": 0.5575, + "step": 30863 + }, + { + "epoch": 0.9459360058845163, + "grad_norm": 0.6793500229094414, + "learning_rate": 1.529103943596355e-07, + "loss": 0.5204, + "step": 30864 + }, + { + "epoch": 0.9459666544072576, + "grad_norm": 1.9401369998086786, + "learning_rate": 1.5273751826490934e-07, + "loss": 0.5831, + "step": 30865 + }, + { + "epoch": 0.9459973029299987, + "grad_norm": 1.7297881064864755, + "learning_rate": 1.5256473919777803e-07, + "loss": 0.5532, + "step": 30866 + }, + { + "epoch": 0.94602795145274, + "grad_norm": 1.7226480138732585, + "learning_rate": 1.5239205715994687e-07, + "loss": 0.5608, + "step": 30867 + }, + { + "epoch": 0.9460585999754811, + "grad_norm": 1.7724860100011537, + "learning_rate": 1.5221947215311673e-07, + "loss": 0.6602, + "step": 30868 + }, + { + "epoch": 0.9460892484982224, + "grad_norm": 1.9540637277386592, + "learning_rate": 1.5204698417898844e-07, + "loss": 0.631, + "step": 30869 + }, + { + "epoch": 0.9461198970209636, + "grad_norm": 1.9043808439349046, + "learning_rate": 1.5187459323925958e-07, + "loss": 0.7327, + "step": 30870 + }, + { + "epoch": 0.9461505455437048, + "grad_norm": 1.9054042182062298, + "learning_rate": 1.5170229933562986e-07, + "loss": 0.5732, + "step": 30871 + }, + { + "epoch": 0.946181194066446, + "grad_norm": 1.7473908384475643, + "learning_rate": 1.5153010246979905e-07, + "loss": 0.6086, + "step": 30872 + }, + { + "epoch": 0.9462118425891872, + "grad_norm": 1.6855508552227527, + "learning_rate": 1.5135800264346134e-07, + "loss": 0.6846, + "step": 30873 + }, + { + "epoch": 0.9462424911119284, + "grad_norm": 0.6788348318930977, + "learning_rate": 1.5118599985831205e-07, + "loss": 0.5257, + "step": 30874 + }, + { + "epoch": 0.9462731396346696, + "grad_norm": 1.682524292946267, + "learning_rate": 1.5101409411604762e-07, + "loss": 0.6568, + "step": 30875 + }, + { + "epoch": 0.9463037881574108, + "grad_norm": 0.663831001865575, + "learning_rate": 1.5084228541836222e-07, + "loss": 0.5078, + "step": 30876 + }, + { + "epoch": 0.946334436680152, + "grad_norm": 1.9355228232361572, + "learning_rate": 1.5067057376694672e-07, + "loss": 0.6639, + "step": 30877 + }, + { + "epoch": 0.9463650852028932, + "grad_norm": 1.6351377983637536, + "learning_rate": 1.504989591634931e-07, + "loss": 0.5439, + "step": 30878 + }, + { + "epoch": 0.9463957337256345, + "grad_norm": 1.7692218849965549, + "learning_rate": 1.5032744160969448e-07, + "loss": 0.5981, + "step": 30879 + }, + { + "epoch": 0.9464263822483756, + "grad_norm": 1.8971040681119675, + "learning_rate": 1.501560211072406e-07, + "loss": 0.6085, + "step": 30880 + }, + { + "epoch": 0.9464570307711169, + "grad_norm": 1.8597706999804833, + "learning_rate": 1.4998469765781898e-07, + "loss": 0.622, + "step": 30881 + }, + { + "epoch": 0.946487679293858, + "grad_norm": 1.8300641175622017, + "learning_rate": 1.498134712631172e-07, + "loss": 0.6334, + "step": 30882 + }, + { + "epoch": 0.9465183278165993, + "grad_norm": 2.0305673624062175, + "learning_rate": 1.4964234192482496e-07, + "loss": 0.5733, + "step": 30883 + }, + { + "epoch": 0.9465489763393404, + "grad_norm": 0.6541931235949069, + "learning_rate": 1.4947130964462763e-07, + "loss": 0.5132, + "step": 30884 + }, + { + "epoch": 0.9465796248620817, + "grad_norm": 0.665786558994943, + "learning_rate": 1.4930037442420831e-07, + "loss": 0.4925, + "step": 30885 + }, + { + "epoch": 0.9466102733848228, + "grad_norm": 1.7755916089228165, + "learning_rate": 1.491295362652534e-07, + "loss": 0.6218, + "step": 30886 + }, + { + "epoch": 0.9466409219075641, + "grad_norm": 1.7457325908706194, + "learning_rate": 1.489587951694449e-07, + "loss": 0.62, + "step": 30887 + }, + { + "epoch": 0.9466715704303053, + "grad_norm": 0.7228563905127847, + "learning_rate": 1.48788151138467e-07, + "loss": 0.5609, + "step": 30888 + }, + { + "epoch": 0.9467022189530465, + "grad_norm": 1.8243337618363837, + "learning_rate": 1.486176041739995e-07, + "loss": 0.6142, + "step": 30889 + }, + { + "epoch": 0.9467328674757877, + "grad_norm": 1.7437766952627758, + "learning_rate": 1.4844715427772327e-07, + "loss": 0.5876, + "step": 30890 + }, + { + "epoch": 0.9467635159985289, + "grad_norm": 1.7618880769084975, + "learning_rate": 1.4827680145131918e-07, + "loss": 0.574, + "step": 30891 + }, + { + "epoch": 0.9467941645212701, + "grad_norm": 1.9131646002672373, + "learning_rate": 1.4810654569646255e-07, + "loss": 0.6272, + "step": 30892 + }, + { + "epoch": 0.9468248130440112, + "grad_norm": 1.5983920601809132, + "learning_rate": 1.4793638701483314e-07, + "loss": 0.638, + "step": 30893 + }, + { + "epoch": 0.9468554615667525, + "grad_norm": 1.8126413859127475, + "learning_rate": 1.4776632540810854e-07, + "loss": 0.6016, + "step": 30894 + }, + { + "epoch": 0.9468861100894936, + "grad_norm": 1.757597709113908, + "learning_rate": 1.475963608779618e-07, + "loss": 0.6271, + "step": 30895 + }, + { + "epoch": 0.9469167586122349, + "grad_norm": 1.5822233827263579, + "learning_rate": 1.474264934260694e-07, + "loss": 0.5309, + "step": 30896 + }, + { + "epoch": 0.946947407134976, + "grad_norm": 1.8056301485874435, + "learning_rate": 1.4725672305410442e-07, + "loss": 0.6294, + "step": 30897 + }, + { + "epoch": 0.9469780556577173, + "grad_norm": 1.799325352670263, + "learning_rate": 1.4708704976374e-07, + "loss": 0.7307, + "step": 30898 + }, + { + "epoch": 0.9470087041804585, + "grad_norm": 1.608226622661296, + "learning_rate": 1.469174735566492e-07, + "loss": 0.6137, + "step": 30899 + }, + { + "epoch": 0.9470393527031997, + "grad_norm": 1.8868363146274183, + "learning_rate": 1.467479944344996e-07, + "loss": 0.6737, + "step": 30900 + }, + { + "epoch": 0.9470700012259409, + "grad_norm": 0.7019202683237691, + "learning_rate": 1.465786123989632e-07, + "loss": 0.5521, + "step": 30901 + }, + { + "epoch": 0.9471006497486821, + "grad_norm": 1.8928934909325463, + "learning_rate": 1.4640932745171088e-07, + "loss": 0.6069, + "step": 30902 + }, + { + "epoch": 0.9471312982714233, + "grad_norm": 1.670177093858697, + "learning_rate": 1.4624013959440687e-07, + "loss": 0.5729, + "step": 30903 + }, + { + "epoch": 0.9471619467941645, + "grad_norm": 1.7970173049917328, + "learning_rate": 1.460710488287198e-07, + "loss": 0.6448, + "step": 30904 + }, + { + "epoch": 0.9471925953169057, + "grad_norm": 1.6772966576865487, + "learning_rate": 1.4590205515631728e-07, + "loss": 0.5443, + "step": 30905 + }, + { + "epoch": 0.947223243839647, + "grad_norm": 1.9196197462454092, + "learning_rate": 1.4573315857886127e-07, + "loss": 0.6147, + "step": 30906 + }, + { + "epoch": 0.9472538923623881, + "grad_norm": 1.6604696368077494, + "learning_rate": 1.4556435909801936e-07, + "loss": 0.5778, + "step": 30907 + }, + { + "epoch": 0.9472845408851294, + "grad_norm": 1.7400150403257577, + "learning_rate": 1.4539565671545242e-07, + "loss": 0.7405, + "step": 30908 + }, + { + "epoch": 0.9473151894078705, + "grad_norm": 1.7809553456863765, + "learning_rate": 1.4522705143282357e-07, + "loss": 0.6159, + "step": 30909 + }, + { + "epoch": 0.9473458379306118, + "grad_norm": 1.6546691905057602, + "learning_rate": 1.4505854325179368e-07, + "loss": 0.6431, + "step": 30910 + }, + { + "epoch": 0.9473764864533529, + "grad_norm": 1.5610867493141707, + "learning_rate": 1.448901321740237e-07, + "loss": 0.6411, + "step": 30911 + }, + { + "epoch": 0.9474071349760942, + "grad_norm": 1.718441838869165, + "learning_rate": 1.4472181820117336e-07, + "loss": 0.5985, + "step": 30912 + }, + { + "epoch": 0.9474377834988353, + "grad_norm": 1.712837436260795, + "learning_rate": 1.4455360133490025e-07, + "loss": 0.604, + "step": 30913 + }, + { + "epoch": 0.9474684320215766, + "grad_norm": 1.6264760026040241, + "learning_rate": 1.4438548157686195e-07, + "loss": 0.5234, + "step": 30914 + }, + { + "epoch": 0.9474990805443178, + "grad_norm": 1.681317268525713, + "learning_rate": 1.4421745892871487e-07, + "loss": 0.6591, + "step": 30915 + }, + { + "epoch": 0.947529729067059, + "grad_norm": 2.0759184232926753, + "learning_rate": 1.4404953339211548e-07, + "loss": 0.6337, + "step": 30916 + }, + { + "epoch": 0.9475603775898002, + "grad_norm": 0.6618327678546524, + "learning_rate": 1.4388170496871688e-07, + "loss": 0.4774, + "step": 30917 + }, + { + "epoch": 0.9475910261125414, + "grad_norm": 1.8691331463940826, + "learning_rate": 1.437139736601756e-07, + "loss": 0.6532, + "step": 30918 + }, + { + "epoch": 0.9476216746352826, + "grad_norm": 1.73307476326832, + "learning_rate": 1.4354633946814023e-07, + "loss": 0.6, + "step": 30919 + }, + { + "epoch": 0.9476523231580238, + "grad_norm": 1.8693912527338867, + "learning_rate": 1.4337880239426504e-07, + "loss": 0.6139, + "step": 30920 + }, + { + "epoch": 0.947682971680765, + "grad_norm": 1.8031936740957795, + "learning_rate": 1.4321136244020206e-07, + "loss": 0.5973, + "step": 30921 + }, + { + "epoch": 0.9477136202035062, + "grad_norm": 1.8276287418688613, + "learning_rate": 1.4304401960759773e-07, + "loss": 0.5853, + "step": 30922 + }, + { + "epoch": 0.9477442687262474, + "grad_norm": 1.8245824034608804, + "learning_rate": 1.4287677389810296e-07, + "loss": 0.6258, + "step": 30923 + }, + { + "epoch": 0.9477749172489885, + "grad_norm": 0.676327864325155, + "learning_rate": 1.427096253133664e-07, + "loss": 0.5062, + "step": 30924 + }, + { + "epoch": 0.9478055657717298, + "grad_norm": 1.9260045310739127, + "learning_rate": 1.4254257385503235e-07, + "loss": 0.6133, + "step": 30925 + }, + { + "epoch": 0.947836214294471, + "grad_norm": 1.7210084881068297, + "learning_rate": 1.4237561952474943e-07, + "loss": 0.6481, + "step": 30926 + }, + { + "epoch": 0.9478668628172122, + "grad_norm": 1.5745312166857763, + "learning_rate": 1.4220876232416193e-07, + "loss": 0.5796, + "step": 30927 + }, + { + "epoch": 0.9478975113399534, + "grad_norm": 0.6581413165350514, + "learning_rate": 1.4204200225491404e-07, + "loss": 0.53, + "step": 30928 + }, + { + "epoch": 0.9479281598626946, + "grad_norm": 1.6565354120613955, + "learning_rate": 1.4187533931864784e-07, + "loss": 0.5752, + "step": 30929 + }, + { + "epoch": 0.9479588083854358, + "grad_norm": 1.8692831096850866, + "learning_rate": 1.417087735170064e-07, + "loss": 0.6045, + "step": 30930 + }, + { + "epoch": 0.947989456908177, + "grad_norm": 1.4983872871398811, + "learning_rate": 1.4154230485163067e-07, + "loss": 0.7049, + "step": 30931 + }, + { + "epoch": 0.9480201054309182, + "grad_norm": 0.6462927064983343, + "learning_rate": 1.4137593332416155e-07, + "loss": 0.52, + "step": 30932 + }, + { + "epoch": 0.9480507539536595, + "grad_norm": 1.4717057794466952, + "learning_rate": 1.4120965893623662e-07, + "loss": 0.565, + "step": 30933 + }, + { + "epoch": 0.9480814024764006, + "grad_norm": 1.5952599691576674, + "learning_rate": 1.4104348168949567e-07, + "loss": 0.6364, + "step": 30934 + }, + { + "epoch": 0.9481120509991419, + "grad_norm": 1.7433488395626635, + "learning_rate": 1.4087740158557738e-07, + "loss": 0.5052, + "step": 30935 + }, + { + "epoch": 0.948142699521883, + "grad_norm": 1.7562248317337616, + "learning_rate": 1.4071141862611493e-07, + "loss": 0.682, + "step": 30936 + }, + { + "epoch": 0.9481733480446243, + "grad_norm": 1.892517048139292, + "learning_rate": 1.4054553281274586e-07, + "loss": 0.6558, + "step": 30937 + }, + { + "epoch": 0.9482039965673654, + "grad_norm": 1.6536485940061092, + "learning_rate": 1.4037974414710552e-07, + "loss": 0.6206, + "step": 30938 + }, + { + "epoch": 0.9482346450901067, + "grad_norm": 1.7769222691008477, + "learning_rate": 1.402140526308249e-07, + "loss": 0.6263, + "step": 30939 + }, + { + "epoch": 0.9482652936128478, + "grad_norm": 1.7810398013863404, + "learning_rate": 1.4004845826553814e-07, + "loss": 0.6495, + "step": 30940 + }, + { + "epoch": 0.9482959421355891, + "grad_norm": 2.011323730369059, + "learning_rate": 1.3988296105287736e-07, + "loss": 0.6399, + "step": 30941 + }, + { + "epoch": 0.9483265906583302, + "grad_norm": 1.7356077638033136, + "learning_rate": 1.397175609944712e-07, + "loss": 0.6716, + "step": 30942 + }, + { + "epoch": 0.9483572391810715, + "grad_norm": 0.6407693299049273, + "learning_rate": 1.3955225809195171e-07, + "loss": 0.5167, + "step": 30943 + }, + { + "epoch": 0.9483878877038127, + "grad_norm": 1.8117734680148618, + "learning_rate": 1.393870523469465e-07, + "loss": 0.6875, + "step": 30944 + }, + { + "epoch": 0.9484185362265539, + "grad_norm": 1.6912024322673078, + "learning_rate": 1.3922194376108423e-07, + "loss": 0.6205, + "step": 30945 + }, + { + "epoch": 0.9484491847492951, + "grad_norm": 1.9006599771823318, + "learning_rate": 1.3905693233599139e-07, + "loss": 0.724, + "step": 30946 + }, + { + "epoch": 0.9484798332720363, + "grad_norm": 1.8323210790775672, + "learning_rate": 1.3889201807329224e-07, + "loss": 0.6586, + "step": 30947 + }, + { + "epoch": 0.9485104817947775, + "grad_norm": 1.7305206549577032, + "learning_rate": 1.3872720097461435e-07, + "loss": 0.6019, + "step": 30948 + }, + { + "epoch": 0.9485411303175187, + "grad_norm": 0.6663744614449574, + "learning_rate": 1.3856248104157867e-07, + "loss": 0.5129, + "step": 30949 + }, + { + "epoch": 0.9485717788402599, + "grad_norm": 1.9396069597775702, + "learning_rate": 1.3839785827581164e-07, + "loss": 0.5892, + "step": 30950 + }, + { + "epoch": 0.9486024273630012, + "grad_norm": 1.6515699992795514, + "learning_rate": 1.3823333267893423e-07, + "loss": 0.5828, + "step": 30951 + }, + { + "epoch": 0.9486330758857423, + "grad_norm": 1.578858066546402, + "learning_rate": 1.3806890425256515e-07, + "loss": 0.6613, + "step": 30952 + }, + { + "epoch": 0.9486637244084836, + "grad_norm": 1.982718991652756, + "learning_rate": 1.3790457299832748e-07, + "loss": 0.4935, + "step": 30953 + }, + { + "epoch": 0.9486943729312247, + "grad_norm": 1.6485506004002195, + "learning_rate": 1.3774033891784e-07, + "loss": 0.6867, + "step": 30954 + }, + { + "epoch": 0.9487250214539659, + "grad_norm": 2.092138541168431, + "learning_rate": 1.3757620201271916e-07, + "loss": 0.634, + "step": 30955 + }, + { + "epoch": 0.9487556699767071, + "grad_norm": 1.7183644264874094, + "learning_rate": 1.3741216228458366e-07, + "loss": 0.5943, + "step": 30956 + }, + { + "epoch": 0.9487863184994483, + "grad_norm": 2.0279426747038953, + "learning_rate": 1.3724821973505e-07, + "loss": 0.6705, + "step": 30957 + }, + { + "epoch": 0.9488169670221895, + "grad_norm": 1.899230201116082, + "learning_rate": 1.3708437436573352e-07, + "loss": 0.6491, + "step": 30958 + }, + { + "epoch": 0.9488476155449307, + "grad_norm": 1.937844311786075, + "learning_rate": 1.3692062617824742e-07, + "loss": 0.6288, + "step": 30959 + }, + { + "epoch": 0.948878264067672, + "grad_norm": 1.629866762025114, + "learning_rate": 1.3675697517420482e-07, + "loss": 0.623, + "step": 30960 + }, + { + "epoch": 0.9489089125904131, + "grad_norm": 1.7965848818017442, + "learning_rate": 1.3659342135522225e-07, + "loss": 0.6582, + "step": 30961 + }, + { + "epoch": 0.9489395611131544, + "grad_norm": 1.5215207629589038, + "learning_rate": 1.3642996472290727e-07, + "loss": 0.5506, + "step": 30962 + }, + { + "epoch": 0.9489702096358955, + "grad_norm": 0.6642849124089266, + "learning_rate": 1.362666052788708e-07, + "loss": 0.5199, + "step": 30963 + }, + { + "epoch": 0.9490008581586368, + "grad_norm": 1.7183191076432327, + "learning_rate": 1.3610334302472273e-07, + "loss": 0.5983, + "step": 30964 + }, + { + "epoch": 0.9490315066813779, + "grad_norm": 0.6688589151209923, + "learning_rate": 1.3594017796207394e-07, + "loss": 0.5191, + "step": 30965 + }, + { + "epoch": 0.9490621552041192, + "grad_norm": 1.667346139190887, + "learning_rate": 1.357771100925287e-07, + "loss": 0.5705, + "step": 30966 + }, + { + "epoch": 0.9490928037268603, + "grad_norm": 1.7759247633714321, + "learning_rate": 1.3561413941769576e-07, + "loss": 0.6367, + "step": 30967 + }, + { + "epoch": 0.9491234522496016, + "grad_norm": 1.633115525299695, + "learning_rate": 1.3545126593918158e-07, + "loss": 0.5928, + "step": 30968 + }, + { + "epoch": 0.9491541007723427, + "grad_norm": 1.673388897694574, + "learning_rate": 1.352884896585893e-07, + "loss": 0.6746, + "step": 30969 + }, + { + "epoch": 0.949184749295084, + "grad_norm": 1.7938036276508411, + "learning_rate": 1.351258105775244e-07, + "loss": 0.6275, + "step": 30970 + }, + { + "epoch": 0.9492153978178252, + "grad_norm": 1.7386499580376595, + "learning_rate": 1.3496322869758772e-07, + "loss": 0.6479, + "step": 30971 + }, + { + "epoch": 0.9492460463405664, + "grad_norm": 0.6880768586628984, + "learning_rate": 1.3480074402038357e-07, + "loss": 0.5294, + "step": 30972 + }, + { + "epoch": 0.9492766948633076, + "grad_norm": 2.001444649806287, + "learning_rate": 1.3463835654751179e-07, + "loss": 0.6298, + "step": 30973 + }, + { + "epoch": 0.9493073433860488, + "grad_norm": 1.8126804145410016, + "learning_rate": 1.3447606628057108e-07, + "loss": 0.6566, + "step": 30974 + }, + { + "epoch": 0.94933799190879, + "grad_norm": 1.6574792636223126, + "learning_rate": 1.343138732211624e-07, + "loss": 0.7126, + "step": 30975 + }, + { + "epoch": 0.9493686404315312, + "grad_norm": 1.9497677337598278, + "learning_rate": 1.3415177737088336e-07, + "loss": 0.6423, + "step": 30976 + }, + { + "epoch": 0.9493992889542724, + "grad_norm": 1.812714856290376, + "learning_rate": 1.3398977873133268e-07, + "loss": 0.6905, + "step": 30977 + }, + { + "epoch": 0.9494299374770137, + "grad_norm": 1.90400917339067, + "learning_rate": 1.3382787730410352e-07, + "loss": 0.6884, + "step": 30978 + }, + { + "epoch": 0.9494605859997548, + "grad_norm": 1.756052966901787, + "learning_rate": 1.3366607309079238e-07, + "loss": 0.6627, + "step": 30979 + }, + { + "epoch": 0.9494912345224961, + "grad_norm": 1.8341895340298415, + "learning_rate": 1.3350436609299467e-07, + "loss": 0.6032, + "step": 30980 + }, + { + "epoch": 0.9495218830452372, + "grad_norm": 1.6088949770760874, + "learning_rate": 1.3334275631230353e-07, + "loss": 0.5721, + "step": 30981 + }, + { + "epoch": 0.9495525315679785, + "grad_norm": 1.5648684866977853, + "learning_rate": 1.3318124375030995e-07, + "loss": 0.6831, + "step": 30982 + }, + { + "epoch": 0.9495831800907196, + "grad_norm": 2.1371880208304885, + "learning_rate": 1.3301982840860482e-07, + "loss": 0.7672, + "step": 30983 + }, + { + "epoch": 0.9496138286134609, + "grad_norm": 1.7880381353711827, + "learning_rate": 1.328585102887825e-07, + "loss": 0.6629, + "step": 30984 + }, + { + "epoch": 0.949644477136202, + "grad_norm": 1.5980922588949051, + "learning_rate": 1.3269728939242722e-07, + "loss": 0.5811, + "step": 30985 + }, + { + "epoch": 0.9496751256589432, + "grad_norm": 1.8624262589577185, + "learning_rate": 1.3253616572113215e-07, + "loss": 0.6418, + "step": 30986 + }, + { + "epoch": 0.9497057741816844, + "grad_norm": 1.6773364025497193, + "learning_rate": 1.323751392764816e-07, + "loss": 0.7248, + "step": 30987 + }, + { + "epoch": 0.9497364227044256, + "grad_norm": 1.56487308768271, + "learning_rate": 1.322142100600643e-07, + "loss": 0.643, + "step": 30988 + }, + { + "epoch": 0.9497670712271669, + "grad_norm": 1.5858920774970018, + "learning_rate": 1.320533780734645e-07, + "loss": 0.546, + "step": 30989 + }, + { + "epoch": 0.949797719749908, + "grad_norm": 0.691690504294578, + "learning_rate": 1.318926433182688e-07, + "loss": 0.4956, + "step": 30990 + }, + { + "epoch": 0.9498283682726493, + "grad_norm": 1.6647293770944378, + "learning_rate": 1.3173200579605916e-07, + "loss": 0.5521, + "step": 30991 + }, + { + "epoch": 0.9498590167953904, + "grad_norm": 1.6652795336926405, + "learning_rate": 1.3157146550841882e-07, + "loss": 0.588, + "step": 30992 + }, + { + "epoch": 0.9498896653181317, + "grad_norm": 1.7378865258444822, + "learning_rate": 1.3141102245692982e-07, + "loss": 0.5647, + "step": 30993 + }, + { + "epoch": 0.9499203138408728, + "grad_norm": 1.5529916034181763, + "learning_rate": 1.3125067664317314e-07, + "loss": 0.5109, + "step": 30994 + }, + { + "epoch": 0.9499509623636141, + "grad_norm": 1.7815064195629629, + "learning_rate": 1.3109042806872752e-07, + "loss": 0.682, + "step": 30995 + }, + { + "epoch": 0.9499816108863552, + "grad_norm": 1.9090511770222314, + "learning_rate": 1.30930276735175e-07, + "loss": 0.6884, + "step": 30996 + }, + { + "epoch": 0.9500122594090965, + "grad_norm": 1.8590224534156559, + "learning_rate": 1.307702226440899e-07, + "loss": 0.523, + "step": 30997 + }, + { + "epoch": 0.9500429079318377, + "grad_norm": 1.6725331015835887, + "learning_rate": 1.3061026579705206e-07, + "loss": 0.6182, + "step": 30998 + }, + { + "epoch": 0.9500735564545789, + "grad_norm": 1.9745406541756605, + "learning_rate": 1.3045040619563576e-07, + "loss": 0.6919, + "step": 30999 + }, + { + "epoch": 0.9501042049773201, + "grad_norm": 1.572710508492769, + "learning_rate": 1.3029064384141753e-07, + "loss": 0.6319, + "step": 31000 + }, + { + "epoch": 0.9501348535000613, + "grad_norm": 0.6788160378359152, + "learning_rate": 1.3013097873596947e-07, + "loss": 0.5098, + "step": 31001 + }, + { + "epoch": 0.9501655020228025, + "grad_norm": 1.6744759745238729, + "learning_rate": 1.2997141088086696e-07, + "loss": 0.6756, + "step": 31002 + }, + { + "epoch": 0.9501961505455437, + "grad_norm": 1.7629447045742561, + "learning_rate": 1.2981194027768206e-07, + "loss": 0.6951, + "step": 31003 + }, + { + "epoch": 0.9502267990682849, + "grad_norm": 1.741927143997792, + "learning_rate": 1.2965256692798578e-07, + "loss": 0.6609, + "step": 31004 + }, + { + "epoch": 0.9502574475910261, + "grad_norm": 1.6926362282793952, + "learning_rate": 1.2949329083334683e-07, + "loss": 0.6601, + "step": 31005 + }, + { + "epoch": 0.9502880961137673, + "grad_norm": 1.6754350858557403, + "learning_rate": 1.2933411199533618e-07, + "loss": 0.6832, + "step": 31006 + }, + { + "epoch": 0.9503187446365086, + "grad_norm": 1.7301526015470965, + "learning_rate": 1.291750304155226e-07, + "loss": 0.6787, + "step": 31007 + }, + { + "epoch": 0.9503493931592497, + "grad_norm": 1.8731133561076738, + "learning_rate": 1.2901604609547258e-07, + "loss": 0.674, + "step": 31008 + }, + { + "epoch": 0.950380041681991, + "grad_norm": 1.6514047881137284, + "learning_rate": 1.2885715903675379e-07, + "loss": 0.6121, + "step": 31009 + }, + { + "epoch": 0.9504106902047321, + "grad_norm": 1.8086565970533954, + "learning_rate": 1.286983692409305e-07, + "loss": 0.7199, + "step": 31010 + }, + { + "epoch": 0.9504413387274734, + "grad_norm": 0.6596363996373991, + "learning_rate": 1.2853967670956924e-07, + "loss": 0.5273, + "step": 31011 + }, + { + "epoch": 0.9504719872502145, + "grad_norm": 1.5821081254013958, + "learning_rate": 1.283810814442299e-07, + "loss": 0.6545, + "step": 31012 + }, + { + "epoch": 0.9505026357729558, + "grad_norm": 1.860801766141471, + "learning_rate": 1.2822258344647897e-07, + "loss": 0.7105, + "step": 31013 + }, + { + "epoch": 0.950533284295697, + "grad_norm": 1.8659770309238521, + "learning_rate": 1.2806418271787636e-07, + "loss": 0.5333, + "step": 31014 + }, + { + "epoch": 0.9505639328184382, + "grad_norm": 1.7548322452261618, + "learning_rate": 1.27905879259983e-07, + "loss": 0.6442, + "step": 31015 + }, + { + "epoch": 0.9505945813411794, + "grad_norm": 0.6752946786686964, + "learning_rate": 1.2774767307435876e-07, + "loss": 0.5016, + "step": 31016 + }, + { + "epoch": 0.9506252298639205, + "grad_norm": 1.7247534609704642, + "learning_rate": 1.2758956416256352e-07, + "loss": 0.654, + "step": 31017 + }, + { + "epoch": 0.9506558783866618, + "grad_norm": 1.5869596439491014, + "learning_rate": 1.274315525261538e-07, + "loss": 0.6508, + "step": 31018 + }, + { + "epoch": 0.9506865269094029, + "grad_norm": 1.7505571178667467, + "learning_rate": 1.2727363816668615e-07, + "loss": 0.6351, + "step": 31019 + }, + { + "epoch": 0.9507171754321442, + "grad_norm": 1.862402325113081, + "learning_rate": 1.2711582108571817e-07, + "loss": 0.7802, + "step": 31020 + }, + { + "epoch": 0.9507478239548853, + "grad_norm": 1.6015315808211874, + "learning_rate": 1.2695810128480423e-07, + "loss": 0.5553, + "step": 31021 + }, + { + "epoch": 0.9507784724776266, + "grad_norm": 1.4775774026862403, + "learning_rate": 1.2680047876549863e-07, + "loss": 0.4795, + "step": 31022 + }, + { + "epoch": 0.9508091210003677, + "grad_norm": 1.8179129627667214, + "learning_rate": 1.2664295352935342e-07, + "loss": 0.6333, + "step": 31023 + }, + { + "epoch": 0.950839769523109, + "grad_norm": 1.752228063366808, + "learning_rate": 1.2648552557792183e-07, + "loss": 0.7321, + "step": 31024 + }, + { + "epoch": 0.9508704180458502, + "grad_norm": 1.5861589090255048, + "learning_rate": 1.263281949127537e-07, + "loss": 0.5541, + "step": 31025 + }, + { + "epoch": 0.9509010665685914, + "grad_norm": 0.6759168897294926, + "learning_rate": 1.261709615354012e-07, + "loss": 0.5077, + "step": 31026 + }, + { + "epoch": 0.9509317150913326, + "grad_norm": 1.6774392393187365, + "learning_rate": 1.2601382544741191e-07, + "loss": 0.596, + "step": 31027 + }, + { + "epoch": 0.9509623636140738, + "grad_norm": 1.7637203351814719, + "learning_rate": 1.2585678665033462e-07, + "loss": 0.6295, + "step": 31028 + }, + { + "epoch": 0.950993012136815, + "grad_norm": 1.6747421741651867, + "learning_rate": 1.2569984514571808e-07, + "loss": 0.6658, + "step": 31029 + }, + { + "epoch": 0.9510236606595562, + "grad_norm": 1.7807592124291622, + "learning_rate": 1.2554300093510553e-07, + "loss": 0.6227, + "step": 31030 + }, + { + "epoch": 0.9510543091822974, + "grad_norm": 1.6783212397713716, + "learning_rate": 1.2538625402004567e-07, + "loss": 0.7069, + "step": 31031 + }, + { + "epoch": 0.9510849577050386, + "grad_norm": 1.8600406458350183, + "learning_rate": 1.2522960440208176e-07, + "loss": 0.5865, + "step": 31032 + }, + { + "epoch": 0.9511156062277798, + "grad_norm": 1.6837785158053424, + "learning_rate": 1.250730520827559e-07, + "loss": 0.6245, + "step": 31033 + }, + { + "epoch": 0.9511462547505211, + "grad_norm": 1.8714317661352704, + "learning_rate": 1.2491659706361236e-07, + "loss": 0.718, + "step": 31034 + }, + { + "epoch": 0.9511769032732622, + "grad_norm": 1.857845895716478, + "learning_rate": 1.247602393461922e-07, + "loss": 0.612, + "step": 31035 + }, + { + "epoch": 0.9512075517960035, + "grad_norm": 1.7746697240032194, + "learning_rate": 1.2460397893203635e-07, + "loss": 0.6393, + "step": 31036 + }, + { + "epoch": 0.9512382003187446, + "grad_norm": 1.7244706448926859, + "learning_rate": 1.2444781582268471e-07, + "loss": 0.637, + "step": 31037 + }, + { + "epoch": 0.9512688488414859, + "grad_norm": 0.6951341069118201, + "learning_rate": 1.2429175001967497e-07, + "loss": 0.522, + "step": 31038 + }, + { + "epoch": 0.951299497364227, + "grad_norm": 1.839531041924484, + "learning_rate": 1.2413578152454476e-07, + "loss": 0.6378, + "step": 31039 + }, + { + "epoch": 0.9513301458869683, + "grad_norm": 2.0090988476339406, + "learning_rate": 1.2397991033883284e-07, + "loss": 0.6005, + "step": 31040 + }, + { + "epoch": 0.9513607944097094, + "grad_norm": 1.8444428299565805, + "learning_rate": 1.2382413646407244e-07, + "loss": 0.6143, + "step": 31041 + }, + { + "epoch": 0.9513914429324507, + "grad_norm": 1.9980717376139778, + "learning_rate": 1.236684599018001e-07, + "loss": 0.597, + "step": 31042 + }, + { + "epoch": 0.9514220914551919, + "grad_norm": 1.7003029116786736, + "learning_rate": 1.2351288065355015e-07, + "loss": 0.6411, + "step": 31043 + }, + { + "epoch": 0.9514527399779331, + "grad_norm": 1.721595982765769, + "learning_rate": 1.2335739872085474e-07, + "loss": 0.6179, + "step": 31044 + }, + { + "epoch": 0.9514833885006743, + "grad_norm": 1.5758928984694967, + "learning_rate": 1.232020141052459e-07, + "loss": 0.6595, + "step": 31045 + }, + { + "epoch": 0.9515140370234155, + "grad_norm": 1.8113155781301988, + "learning_rate": 1.2304672680825357e-07, + "loss": 0.6491, + "step": 31046 + }, + { + "epoch": 0.9515446855461567, + "grad_norm": 0.6540401037324629, + "learning_rate": 1.2289153683140987e-07, + "loss": 0.4955, + "step": 31047 + }, + { + "epoch": 0.9515753340688978, + "grad_norm": 1.6635754471815953, + "learning_rate": 1.2273644417624243e-07, + "loss": 0.5997, + "step": 31048 + }, + { + "epoch": 0.9516059825916391, + "grad_norm": 1.8925792400287376, + "learning_rate": 1.2258144884428114e-07, + "loss": 0.7529, + "step": 31049 + }, + { + "epoch": 0.9516366311143802, + "grad_norm": 1.7379367268562746, + "learning_rate": 1.2242655083705034e-07, + "loss": 0.6572, + "step": 31050 + }, + { + "epoch": 0.9516672796371215, + "grad_norm": 1.686546417026666, + "learning_rate": 1.2227175015607995e-07, + "loss": 0.5721, + "step": 31051 + }, + { + "epoch": 0.9516979281598626, + "grad_norm": 1.8453718413628546, + "learning_rate": 1.2211704680289204e-07, + "loss": 0.6722, + "step": 31052 + }, + { + "epoch": 0.9517285766826039, + "grad_norm": 1.8692400502562907, + "learning_rate": 1.219624407790121e-07, + "loss": 0.5779, + "step": 31053 + }, + { + "epoch": 0.9517592252053451, + "grad_norm": 1.7545363388458401, + "learning_rate": 1.2180793208596553e-07, + "loss": 0.5911, + "step": 31054 + }, + { + "epoch": 0.9517898737280863, + "grad_norm": 1.8044514143503323, + "learning_rate": 1.2165352072527116e-07, + "loss": 0.6321, + "step": 31055 + }, + { + "epoch": 0.9518205222508275, + "grad_norm": 1.695950907156294, + "learning_rate": 1.2149920669845217e-07, + "loss": 0.6196, + "step": 31056 + }, + { + "epoch": 0.9518511707735687, + "grad_norm": 1.6384127396055703, + "learning_rate": 1.213449900070296e-07, + "loss": 0.5373, + "step": 31057 + }, + { + "epoch": 0.9518818192963099, + "grad_norm": 1.6580750143213858, + "learning_rate": 1.2119087065252223e-07, + "loss": 0.5977, + "step": 31058 + }, + { + "epoch": 0.9519124678190511, + "grad_norm": 1.61843339985123, + "learning_rate": 1.2103684863644884e-07, + "loss": 0.5945, + "step": 31059 + }, + { + "epoch": 0.9519431163417923, + "grad_norm": 1.7640147514390203, + "learning_rate": 1.2088292396032598e-07, + "loss": 0.6425, + "step": 31060 + }, + { + "epoch": 0.9519737648645336, + "grad_norm": 1.6189258360382413, + "learning_rate": 1.2072909662567245e-07, + "loss": 0.6066, + "step": 31061 + }, + { + "epoch": 0.9520044133872747, + "grad_norm": 0.6665578710568866, + "learning_rate": 1.205753666340026e-07, + "loss": 0.5164, + "step": 31062 + }, + { + "epoch": 0.952035061910016, + "grad_norm": 1.716228281804362, + "learning_rate": 1.2042173398683187e-07, + "loss": 0.6217, + "step": 31063 + }, + { + "epoch": 0.9520657104327571, + "grad_norm": 2.142204407390708, + "learning_rate": 1.202681986856724e-07, + "loss": 0.7129, + "step": 31064 + }, + { + "epoch": 0.9520963589554984, + "grad_norm": 1.7466054298268099, + "learning_rate": 1.2011476073203964e-07, + "loss": 0.5559, + "step": 31065 + }, + { + "epoch": 0.9521270074782395, + "grad_norm": 1.9417143008925775, + "learning_rate": 1.199614201274435e-07, + "loss": 0.6572, + "step": 31066 + }, + { + "epoch": 0.9521576560009808, + "grad_norm": 1.6158376700738206, + "learning_rate": 1.1980817687339607e-07, + "loss": 0.6848, + "step": 31067 + }, + { + "epoch": 0.9521883045237219, + "grad_norm": 1.7383910945585288, + "learning_rate": 1.1965503097140507e-07, + "loss": 0.6831, + "step": 31068 + }, + { + "epoch": 0.9522189530464632, + "grad_norm": 0.665424887465409, + "learning_rate": 1.195019824229815e-07, + "loss": 0.509, + "step": 31069 + }, + { + "epoch": 0.9522496015692044, + "grad_norm": 1.5857052662046192, + "learning_rate": 1.1934903122963415e-07, + "loss": 0.576, + "step": 31070 + }, + { + "epoch": 0.9522802500919456, + "grad_norm": 1.756295265617958, + "learning_rate": 1.1919617739286738e-07, + "loss": 0.6402, + "step": 31071 + }, + { + "epoch": 0.9523108986146868, + "grad_norm": 1.9155503577064408, + "learning_rate": 1.1904342091418886e-07, + "loss": 0.6163, + "step": 31072 + }, + { + "epoch": 0.952341547137428, + "grad_norm": 0.6794686643128414, + "learning_rate": 1.1889076179510516e-07, + "loss": 0.5122, + "step": 31073 + }, + { + "epoch": 0.9523721956601692, + "grad_norm": 1.6814632376927707, + "learning_rate": 1.1873820003711734e-07, + "loss": 0.7442, + "step": 31074 + }, + { + "epoch": 0.9524028441829104, + "grad_norm": 1.7266949920069683, + "learning_rate": 1.1858573564173081e-07, + "loss": 0.7286, + "step": 31075 + }, + { + "epoch": 0.9524334927056516, + "grad_norm": 2.0888919810093816, + "learning_rate": 1.1843336861044774e-07, + "loss": 0.5699, + "step": 31076 + }, + { + "epoch": 0.9524641412283928, + "grad_norm": 1.636782525719394, + "learning_rate": 1.1828109894476914e-07, + "loss": 0.6351, + "step": 31077 + }, + { + "epoch": 0.952494789751134, + "grad_norm": 0.6764413399881375, + "learning_rate": 1.181289266461949e-07, + "loss": 0.506, + "step": 31078 + }, + { + "epoch": 0.9525254382738751, + "grad_norm": 0.672008426014271, + "learning_rate": 1.1797685171622386e-07, + "loss": 0.523, + "step": 31079 + }, + { + "epoch": 0.9525560867966164, + "grad_norm": 1.6777375613051126, + "learning_rate": 1.1782487415635591e-07, + "loss": 0.6291, + "step": 31080 + }, + { + "epoch": 0.9525867353193576, + "grad_norm": 1.6859306245798222, + "learning_rate": 1.1767299396808874e-07, + "loss": 0.6876, + "step": 31081 + }, + { + "epoch": 0.9526173838420988, + "grad_norm": 1.8498011406602692, + "learning_rate": 1.175212111529167e-07, + "loss": 0.6744, + "step": 31082 + }, + { + "epoch": 0.95264803236484, + "grad_norm": 1.8613784313242578, + "learning_rate": 1.1736952571233751e-07, + "loss": 0.6102, + "step": 31083 + }, + { + "epoch": 0.9526786808875812, + "grad_norm": 2.020765341498056, + "learning_rate": 1.1721793764784551e-07, + "loss": 0.6033, + "step": 31084 + }, + { + "epoch": 0.9527093294103224, + "grad_norm": 1.7638730707074541, + "learning_rate": 1.1706644696093283e-07, + "loss": 0.6714, + "step": 31085 + }, + { + "epoch": 0.9527399779330636, + "grad_norm": 1.9028765148096776, + "learning_rate": 1.1691505365309385e-07, + "loss": 0.6202, + "step": 31086 + }, + { + "epoch": 0.9527706264558048, + "grad_norm": 1.877124191843646, + "learning_rate": 1.167637577258185e-07, + "loss": 0.6759, + "step": 31087 + }, + { + "epoch": 0.952801274978546, + "grad_norm": 1.7419393903463112, + "learning_rate": 1.1661255918059889e-07, + "loss": 0.6006, + "step": 31088 + }, + { + "epoch": 0.9528319235012872, + "grad_norm": 1.606720434320391, + "learning_rate": 1.1646145801892606e-07, + "loss": 0.6505, + "step": 31089 + }, + { + "epoch": 0.9528625720240285, + "grad_norm": 2.042174099584643, + "learning_rate": 1.1631045424228548e-07, + "loss": 0.7744, + "step": 31090 + }, + { + "epoch": 0.9528932205467696, + "grad_norm": 1.8208955562707183, + "learning_rate": 1.1615954785216709e-07, + "loss": 0.6789, + "step": 31091 + }, + { + "epoch": 0.9529238690695109, + "grad_norm": 1.6501862106964351, + "learning_rate": 1.160087388500586e-07, + "loss": 0.5858, + "step": 31092 + }, + { + "epoch": 0.952954517592252, + "grad_norm": 1.5134704319157237, + "learning_rate": 1.1585802723744432e-07, + "loss": 0.5075, + "step": 31093 + }, + { + "epoch": 0.9529851661149933, + "grad_norm": 1.8426534204050755, + "learning_rate": 1.1570741301580867e-07, + "loss": 0.6282, + "step": 31094 + }, + { + "epoch": 0.9530158146377344, + "grad_norm": 1.7307857210589832, + "learning_rate": 1.1555689618663823e-07, + "loss": 0.5625, + "step": 31095 + }, + { + "epoch": 0.9530464631604757, + "grad_norm": 1.7281599758081736, + "learning_rate": 1.1540647675141514e-07, + "loss": 0.6708, + "step": 31096 + }, + { + "epoch": 0.9530771116832168, + "grad_norm": 1.7944883006603582, + "learning_rate": 1.1525615471162044e-07, + "loss": 0.5902, + "step": 31097 + }, + { + "epoch": 0.9531077602059581, + "grad_norm": 1.7881622535857684, + "learning_rate": 1.1510593006873516e-07, + "loss": 0.6537, + "step": 31098 + }, + { + "epoch": 0.9531384087286993, + "grad_norm": 1.703511617061211, + "learning_rate": 1.1495580282424146e-07, + "loss": 0.4709, + "step": 31099 + }, + { + "epoch": 0.9531690572514405, + "grad_norm": 1.8413317479126308, + "learning_rate": 1.1480577297961815e-07, + "loss": 0.6039, + "step": 31100 + }, + { + "epoch": 0.9531997057741817, + "grad_norm": 1.6237500284942077, + "learning_rate": 1.1465584053634071e-07, + "loss": 0.6198, + "step": 31101 + }, + { + "epoch": 0.9532303542969229, + "grad_norm": 1.5848592093323504, + "learning_rate": 1.1450600549588908e-07, + "loss": 0.5797, + "step": 31102 + }, + { + "epoch": 0.9532610028196641, + "grad_norm": 1.6688217476791118, + "learning_rate": 1.1435626785973986e-07, + "loss": 0.5825, + "step": 31103 + }, + { + "epoch": 0.9532916513424053, + "grad_norm": 1.9587709604510648, + "learning_rate": 1.142066276293674e-07, + "loss": 0.6439, + "step": 31104 + }, + { + "epoch": 0.9533222998651465, + "grad_norm": 0.6688962653534888, + "learning_rate": 1.1405708480624723e-07, + "loss": 0.5384, + "step": 31105 + }, + { + "epoch": 0.9533529483878878, + "grad_norm": 1.8520439691803496, + "learning_rate": 1.1390763939185035e-07, + "loss": 0.6266, + "step": 31106 + }, + { + "epoch": 0.9533835969106289, + "grad_norm": 1.8413944950562635, + "learning_rate": 1.1375829138765227e-07, + "loss": 0.6374, + "step": 31107 + }, + { + "epoch": 0.9534142454333702, + "grad_norm": 1.9169813920266106, + "learning_rate": 1.1360904079512291e-07, + "loss": 0.6431, + "step": 31108 + }, + { + "epoch": 0.9534448939561113, + "grad_norm": 1.900131980283888, + "learning_rate": 1.1345988761573334e-07, + "loss": 0.6975, + "step": 31109 + }, + { + "epoch": 0.9534755424788525, + "grad_norm": 1.656803049420239, + "learning_rate": 1.1331083185095238e-07, + "loss": 0.5994, + "step": 31110 + }, + { + "epoch": 0.9535061910015937, + "grad_norm": 1.648524060937901, + "learning_rate": 1.1316187350225105e-07, + "loss": 0.6125, + "step": 31111 + }, + { + "epoch": 0.9535368395243349, + "grad_norm": 2.220864717039143, + "learning_rate": 1.1301301257109376e-07, + "loss": 0.6093, + "step": 31112 + }, + { + "epoch": 0.9535674880470761, + "grad_norm": 1.6292154420428546, + "learning_rate": 1.1286424905894932e-07, + "loss": 0.6224, + "step": 31113 + }, + { + "epoch": 0.9535981365698173, + "grad_norm": 1.5487148657102259, + "learning_rate": 1.1271558296728324e-07, + "loss": 0.6148, + "step": 31114 + }, + { + "epoch": 0.9536287850925586, + "grad_norm": 1.6424237282787115, + "learning_rate": 1.1256701429756101e-07, + "loss": 0.5961, + "step": 31115 + }, + { + "epoch": 0.9536594336152997, + "grad_norm": 0.6916889278388318, + "learning_rate": 1.1241854305124477e-07, + "loss": 0.5042, + "step": 31116 + }, + { + "epoch": 0.953690082138041, + "grad_norm": 1.7915706689043778, + "learning_rate": 1.1227016922979894e-07, + "loss": 0.7026, + "step": 31117 + }, + { + "epoch": 0.9537207306607821, + "grad_norm": 1.7445146260362683, + "learning_rate": 1.1212189283468455e-07, + "loss": 0.6293, + "step": 31118 + }, + { + "epoch": 0.9537513791835234, + "grad_norm": 1.7574680063936108, + "learning_rate": 1.1197371386736377e-07, + "loss": 0.6447, + "step": 31119 + }, + { + "epoch": 0.9537820277062645, + "grad_norm": 1.8713716951005888, + "learning_rate": 1.1182563232929544e-07, + "loss": 0.5914, + "step": 31120 + }, + { + "epoch": 0.9538126762290058, + "grad_norm": 1.667808690896685, + "learning_rate": 1.1167764822193949e-07, + "loss": 0.6714, + "step": 31121 + }, + { + "epoch": 0.9538433247517469, + "grad_norm": 2.0666830011750537, + "learning_rate": 1.1152976154675365e-07, + "loss": 0.6097, + "step": 31122 + }, + { + "epoch": 0.9538739732744882, + "grad_norm": 1.6375317464017551, + "learning_rate": 1.1138197230519565e-07, + "loss": 0.6308, + "step": 31123 + }, + { + "epoch": 0.9539046217972293, + "grad_norm": 0.6742964448969117, + "learning_rate": 1.1123428049871987e-07, + "loss": 0.5473, + "step": 31124 + }, + { + "epoch": 0.9539352703199706, + "grad_norm": 1.9082050047596189, + "learning_rate": 1.1108668612878403e-07, + "loss": 0.6869, + "step": 31125 + }, + { + "epoch": 0.9539659188427118, + "grad_norm": 1.7487939966191004, + "learning_rate": 1.1093918919684033e-07, + "loss": 0.5429, + "step": 31126 + }, + { + "epoch": 0.953996567365453, + "grad_norm": 1.7280982957725768, + "learning_rate": 1.1079178970434423e-07, + "loss": 0.6239, + "step": 31127 + }, + { + "epoch": 0.9540272158881942, + "grad_norm": 1.6927114765684788, + "learning_rate": 1.1064448765274572e-07, + "loss": 0.6761, + "step": 31128 + }, + { + "epoch": 0.9540578644109354, + "grad_norm": 1.64005735799342, + "learning_rate": 1.1049728304349805e-07, + "loss": 0.6557, + "step": 31129 + }, + { + "epoch": 0.9540885129336766, + "grad_norm": 1.6547727730615927, + "learning_rate": 1.1035017587805119e-07, + "loss": 0.6726, + "step": 31130 + }, + { + "epoch": 0.9541191614564178, + "grad_norm": 0.6689424528631657, + "learning_rate": 1.1020316615785398e-07, + "loss": 0.5082, + "step": 31131 + }, + { + "epoch": 0.954149809979159, + "grad_norm": 1.7473886171565676, + "learning_rate": 1.1005625388435525e-07, + "loss": 0.6666, + "step": 31132 + }, + { + "epoch": 0.9541804585019003, + "grad_norm": 1.838077296269892, + "learning_rate": 1.0990943905900275e-07, + "loss": 0.5165, + "step": 31133 + }, + { + "epoch": 0.9542111070246414, + "grad_norm": 1.7800726542398788, + "learning_rate": 1.097627216832431e-07, + "loss": 0.6585, + "step": 31134 + }, + { + "epoch": 0.9542417555473827, + "grad_norm": 1.636099315582382, + "learning_rate": 1.0961610175852178e-07, + "loss": 0.6897, + "step": 31135 + }, + { + "epoch": 0.9542724040701238, + "grad_norm": 1.8703182411504296, + "learning_rate": 1.0946957928628432e-07, + "loss": 0.7068, + "step": 31136 + }, + { + "epoch": 0.9543030525928651, + "grad_norm": 1.7970857887097258, + "learning_rate": 1.0932315426797291e-07, + "loss": 0.5, + "step": 31137 + }, + { + "epoch": 0.9543337011156062, + "grad_norm": 1.6002060165214185, + "learning_rate": 1.0917682670503194e-07, + "loss": 0.6484, + "step": 31138 + }, + { + "epoch": 0.9543643496383475, + "grad_norm": 1.7871597930568073, + "learning_rate": 1.0903059659890025e-07, + "loss": 0.6869, + "step": 31139 + }, + { + "epoch": 0.9543949981610886, + "grad_norm": 1.8522339478238743, + "learning_rate": 1.0888446395102336e-07, + "loss": 0.7956, + "step": 31140 + }, + { + "epoch": 0.9544256466838298, + "grad_norm": 1.8957273126098337, + "learning_rate": 1.087384287628368e-07, + "loss": 0.708, + "step": 31141 + }, + { + "epoch": 0.954456295206571, + "grad_norm": 1.403262724971185, + "learning_rate": 1.0859249103578273e-07, + "loss": 0.4901, + "step": 31142 + }, + { + "epoch": 0.9544869437293122, + "grad_norm": 1.5409393553823985, + "learning_rate": 1.0844665077129668e-07, + "loss": 0.6166, + "step": 31143 + }, + { + "epoch": 0.9545175922520535, + "grad_norm": 1.9410508132254356, + "learning_rate": 1.0830090797081639e-07, + "loss": 0.7224, + "step": 31144 + }, + { + "epoch": 0.9545482407747946, + "grad_norm": 1.7417757896149009, + "learning_rate": 1.0815526263577958e-07, + "loss": 0.6824, + "step": 31145 + }, + { + "epoch": 0.9545788892975359, + "grad_norm": 2.027702377931104, + "learning_rate": 1.0800971476761845e-07, + "loss": 0.7029, + "step": 31146 + }, + { + "epoch": 0.954609537820277, + "grad_norm": 1.590193093105412, + "learning_rate": 1.0786426436776965e-07, + "loss": 0.6189, + "step": 31147 + }, + { + "epoch": 0.9546401863430183, + "grad_norm": 1.7949011469992908, + "learning_rate": 1.0771891143766533e-07, + "loss": 0.5636, + "step": 31148 + }, + { + "epoch": 0.9546708348657594, + "grad_norm": 0.6620965071885427, + "learning_rate": 1.0757365597873659e-07, + "loss": 0.5096, + "step": 31149 + }, + { + "epoch": 0.9547014833885007, + "grad_norm": 1.6677821857458006, + "learning_rate": 1.0742849799241561e-07, + "loss": 0.5548, + "step": 31150 + }, + { + "epoch": 0.9547321319112418, + "grad_norm": 1.8081005144518207, + "learning_rate": 1.0728343748013348e-07, + "loss": 0.5958, + "step": 31151 + }, + { + "epoch": 0.9547627804339831, + "grad_norm": 1.864939285967156, + "learning_rate": 1.0713847444331905e-07, + "loss": 0.6851, + "step": 31152 + }, + { + "epoch": 0.9547934289567243, + "grad_norm": 0.6842199223015584, + "learning_rate": 1.0699360888340005e-07, + "loss": 0.5145, + "step": 31153 + }, + { + "epoch": 0.9548240774794655, + "grad_norm": 1.5871135410260948, + "learning_rate": 1.0684884080180424e-07, + "loss": 0.6997, + "step": 31154 + }, + { + "epoch": 0.9548547260022067, + "grad_norm": 1.5715359574355945, + "learning_rate": 1.0670417019995716e-07, + "loss": 0.6581, + "step": 31155 + }, + { + "epoch": 0.9548853745249479, + "grad_norm": 1.7850655920745617, + "learning_rate": 1.0655959707928654e-07, + "loss": 0.6238, + "step": 31156 + }, + { + "epoch": 0.9549160230476891, + "grad_norm": 1.7511789885798121, + "learning_rate": 1.0641512144121568e-07, + "loss": 0.5875, + "step": 31157 + }, + { + "epoch": 0.9549466715704303, + "grad_norm": 1.5708638388765883, + "learning_rate": 1.062707432871668e-07, + "loss": 0.588, + "step": 31158 + }, + { + "epoch": 0.9549773200931715, + "grad_norm": 1.8129201898460663, + "learning_rate": 1.0612646261856541e-07, + "loss": 0.642, + "step": 31159 + }, + { + "epoch": 0.9550079686159128, + "grad_norm": 1.4735221755028776, + "learning_rate": 1.0598227943682926e-07, + "loss": 0.7471, + "step": 31160 + }, + { + "epoch": 0.9550386171386539, + "grad_norm": 1.631916106558414, + "learning_rate": 1.0583819374338278e-07, + "loss": 0.5583, + "step": 31161 + }, + { + "epoch": 0.9550692656613952, + "grad_norm": 1.8763591048065356, + "learning_rate": 1.0569420553964371e-07, + "loss": 0.6388, + "step": 31162 + }, + { + "epoch": 0.9550999141841363, + "grad_norm": 1.6755527035543425, + "learning_rate": 1.0555031482703093e-07, + "loss": 0.6694, + "step": 31163 + }, + { + "epoch": 0.9551305627068776, + "grad_norm": 1.8540063499354367, + "learning_rate": 1.0540652160696329e-07, + "loss": 0.6578, + "step": 31164 + }, + { + "epoch": 0.9551612112296187, + "grad_norm": 1.7979585084386822, + "learning_rate": 1.0526282588085634e-07, + "loss": 0.6446, + "step": 31165 + }, + { + "epoch": 0.95519185975236, + "grad_norm": 1.7128841585779477, + "learning_rate": 1.0511922765012561e-07, + "loss": 0.6763, + "step": 31166 + }, + { + "epoch": 0.9552225082751011, + "grad_norm": 1.8379970259777205, + "learning_rate": 1.0497572691618773e-07, + "loss": 0.6403, + "step": 31167 + }, + { + "epoch": 0.9552531567978424, + "grad_norm": 1.5968686537654546, + "learning_rate": 1.0483232368045603e-07, + "loss": 0.6094, + "step": 31168 + }, + { + "epoch": 0.9552838053205835, + "grad_norm": 1.5924015195721264, + "learning_rate": 1.0468901794434271e-07, + "loss": 0.588, + "step": 31169 + }, + { + "epoch": 0.9553144538433248, + "grad_norm": 0.6799384472562671, + "learning_rate": 1.0454580970925998e-07, + "loss": 0.5096, + "step": 31170 + }, + { + "epoch": 0.955345102366066, + "grad_norm": 1.6142694766287333, + "learning_rate": 1.0440269897662003e-07, + "loss": 0.561, + "step": 31171 + }, + { + "epoch": 0.9553757508888071, + "grad_norm": 1.6869397611526307, + "learning_rate": 1.0425968574783173e-07, + "loss": 0.5286, + "step": 31172 + }, + { + "epoch": 0.9554063994115484, + "grad_norm": 1.6369166797825307, + "learning_rate": 1.0411677002430509e-07, + "loss": 0.613, + "step": 31173 + }, + { + "epoch": 0.9554370479342895, + "grad_norm": 1.7962767290843955, + "learning_rate": 1.0397395180744785e-07, + "loss": 0.6613, + "step": 31174 + }, + { + "epoch": 0.9554676964570308, + "grad_norm": 1.6845092193350715, + "learning_rate": 1.0383123109866666e-07, + "loss": 0.6809, + "step": 31175 + }, + { + "epoch": 0.9554983449797719, + "grad_norm": 1.6770710184128004, + "learning_rate": 1.036886078993693e-07, + "loss": 0.6902, + "step": 31176 + }, + { + "epoch": 0.9555289935025132, + "grad_norm": 1.6638576984736413, + "learning_rate": 1.0354608221095907e-07, + "loss": 0.5923, + "step": 31177 + }, + { + "epoch": 0.9555596420252543, + "grad_norm": 1.8163510602749173, + "learning_rate": 1.0340365403484265e-07, + "loss": 0.6775, + "step": 31178 + }, + { + "epoch": 0.9555902905479956, + "grad_norm": 1.743469058359585, + "learning_rate": 1.0326132337242112e-07, + "loss": 0.6243, + "step": 31179 + }, + { + "epoch": 0.9556209390707368, + "grad_norm": 1.7496593240169445, + "learning_rate": 1.0311909022509781e-07, + "loss": 0.6001, + "step": 31180 + }, + { + "epoch": 0.955651587593478, + "grad_norm": 1.6280084374791524, + "learning_rate": 1.0297695459427493e-07, + "loss": 0.6724, + "step": 31181 + }, + { + "epoch": 0.9556822361162192, + "grad_norm": 1.9945115728535288, + "learning_rate": 1.0283491648135246e-07, + "loss": 0.6773, + "step": 31182 + }, + { + "epoch": 0.9557128846389604, + "grad_norm": 1.9204014545287276, + "learning_rate": 1.0269297588773041e-07, + "loss": 0.5729, + "step": 31183 + }, + { + "epoch": 0.9557435331617016, + "grad_norm": 1.6603212960640803, + "learning_rate": 1.0255113281480544e-07, + "loss": 0.5831, + "step": 31184 + }, + { + "epoch": 0.9557741816844428, + "grad_norm": 1.653338247558827, + "learning_rate": 1.0240938726397753e-07, + "loss": 0.6225, + "step": 31185 + }, + { + "epoch": 0.955804830207184, + "grad_norm": 1.6438777934172288, + "learning_rate": 1.0226773923664224e-07, + "loss": 0.588, + "step": 31186 + }, + { + "epoch": 0.9558354787299252, + "grad_norm": 0.6727043957101064, + "learning_rate": 1.0212618873419511e-07, + "loss": 0.5335, + "step": 31187 + }, + { + "epoch": 0.9558661272526664, + "grad_norm": 1.498182680276338, + "learning_rate": 1.0198473575803058e-07, + "loss": 0.4421, + "step": 31188 + }, + { + "epoch": 0.9558967757754077, + "grad_norm": 0.642962646382557, + "learning_rate": 1.0184338030954422e-07, + "loss": 0.4967, + "step": 31189 + }, + { + "epoch": 0.9559274242981488, + "grad_norm": 1.9568630129651128, + "learning_rate": 1.01702122390126e-07, + "loss": 0.5739, + "step": 31190 + }, + { + "epoch": 0.9559580728208901, + "grad_norm": 1.6011765269123819, + "learning_rate": 1.0156096200117039e-07, + "loss": 0.6524, + "step": 31191 + }, + { + "epoch": 0.9559887213436312, + "grad_norm": 1.7307016841774803, + "learning_rate": 1.0141989914406736e-07, + "loss": 0.6505, + "step": 31192 + }, + { + "epoch": 0.9560193698663725, + "grad_norm": 1.490722855740948, + "learning_rate": 1.0127893382020581e-07, + "loss": 0.5094, + "step": 31193 + }, + { + "epoch": 0.9560500183891136, + "grad_norm": 1.626680749944716, + "learning_rate": 1.0113806603097687e-07, + "loss": 0.5475, + "step": 31194 + }, + { + "epoch": 0.9560806669118549, + "grad_norm": 0.6721380521537633, + "learning_rate": 1.0099729577776607e-07, + "loss": 0.5218, + "step": 31195 + }, + { + "epoch": 0.956111315434596, + "grad_norm": 1.616007449534649, + "learning_rate": 1.0085662306196231e-07, + "loss": 0.6037, + "step": 31196 + }, + { + "epoch": 0.9561419639573373, + "grad_norm": 1.7490056137037608, + "learning_rate": 1.0071604788495227e-07, + "loss": 0.5782, + "step": 31197 + }, + { + "epoch": 0.9561726124800785, + "grad_norm": 1.6403658577379086, + "learning_rate": 1.0057557024811815e-07, + "loss": 0.7535, + "step": 31198 + }, + { + "epoch": 0.9562032610028197, + "grad_norm": 2.5025337545191935, + "learning_rate": 1.0043519015284553e-07, + "loss": 0.6114, + "step": 31199 + }, + { + "epoch": 0.9562339095255609, + "grad_norm": 1.6445463558496978, + "learning_rate": 1.0029490760051996e-07, + "loss": 0.556, + "step": 31200 + }, + { + "epoch": 0.9562645580483021, + "grad_norm": 1.9039503432939786, + "learning_rate": 1.0015472259251924e-07, + "loss": 0.7074, + "step": 31201 + }, + { + "epoch": 0.9562952065710433, + "grad_norm": 1.7124439173439625, + "learning_rate": 1.000146351302278e-07, + "loss": 0.6207, + "step": 31202 + }, + { + "epoch": 0.9563258550937844, + "grad_norm": 0.684792848250871, + "learning_rate": 9.987464521502566e-08, + "loss": 0.5357, + "step": 31203 + }, + { + "epoch": 0.9563565036165257, + "grad_norm": 1.7089192450665314, + "learning_rate": 9.97347528482917e-08, + "loss": 0.577, + "step": 31204 + }, + { + "epoch": 0.9563871521392668, + "grad_norm": 1.7825850751242496, + "learning_rate": 9.959495803140484e-08, + "loss": 0.6546, + "step": 31205 + }, + { + "epoch": 0.9564178006620081, + "grad_norm": 1.665617325316931, + "learning_rate": 9.945526076574063e-08, + "loss": 0.6754, + "step": 31206 + }, + { + "epoch": 0.9564484491847492, + "grad_norm": 0.6533350055637793, + "learning_rate": 9.931566105267799e-08, + "loss": 0.5158, + "step": 31207 + }, + { + "epoch": 0.9564790977074905, + "grad_norm": 1.5364947378274791, + "learning_rate": 9.917615889359134e-08, + "loss": 0.5756, + "step": 31208 + }, + { + "epoch": 0.9565097462302317, + "grad_norm": 1.7243060460584358, + "learning_rate": 9.903675428985405e-08, + "loss": 0.6364, + "step": 31209 + }, + { + "epoch": 0.9565403947529729, + "grad_norm": 1.8167921039898576, + "learning_rate": 9.889744724284167e-08, + "loss": 0.6308, + "step": 31210 + }, + { + "epoch": 0.9565710432757141, + "grad_norm": 1.7829093714037836, + "learning_rate": 9.875823775392645e-08, + "loss": 0.6711, + "step": 31211 + }, + { + "epoch": 0.9566016917984553, + "grad_norm": 1.9276239265112913, + "learning_rate": 9.861912582447841e-08, + "loss": 0.6356, + "step": 31212 + }, + { + "epoch": 0.9566323403211965, + "grad_norm": 0.6617834397363714, + "learning_rate": 9.848011145587088e-08, + "loss": 0.5137, + "step": 31213 + }, + { + "epoch": 0.9566629888439377, + "grad_norm": 1.9618119560980711, + "learning_rate": 9.834119464947056e-08, + "loss": 0.6713, + "step": 31214 + }, + { + "epoch": 0.9566936373666789, + "grad_norm": 1.5943217507673595, + "learning_rate": 9.820237540664967e-08, + "loss": 0.6395, + "step": 31215 + }, + { + "epoch": 0.9567242858894202, + "grad_norm": 1.5652778312341962, + "learning_rate": 9.80636537287738e-08, + "loss": 0.6114, + "step": 31216 + }, + { + "epoch": 0.9567549344121613, + "grad_norm": 1.7009326472518307, + "learning_rate": 9.792502961720963e-08, + "loss": 0.6193, + "step": 31217 + }, + { + "epoch": 0.9567855829349026, + "grad_norm": 1.6591506457793892, + "learning_rate": 9.778650307332494e-08, + "loss": 0.6167, + "step": 31218 + }, + { + "epoch": 0.9568162314576437, + "grad_norm": 1.6645673997750885, + "learning_rate": 9.764807409848199e-08, + "loss": 0.6024, + "step": 31219 + }, + { + "epoch": 0.956846879980385, + "grad_norm": 1.5388870420186271, + "learning_rate": 9.750974269404745e-08, + "loss": 0.589, + "step": 31220 + }, + { + "epoch": 0.9568775285031261, + "grad_norm": 1.8064548559347233, + "learning_rate": 9.737150886138136e-08, + "loss": 0.593, + "step": 31221 + }, + { + "epoch": 0.9569081770258674, + "grad_norm": 1.5887154879849716, + "learning_rate": 9.723337260184929e-08, + "loss": 0.5357, + "step": 31222 + }, + { + "epoch": 0.9569388255486085, + "grad_norm": 1.5734553325307774, + "learning_rate": 9.709533391681015e-08, + "loss": 0.5583, + "step": 31223 + }, + { + "epoch": 0.9569694740713498, + "grad_norm": 1.6181799701305948, + "learning_rate": 9.695739280762284e-08, + "loss": 0.6738, + "step": 31224 + }, + { + "epoch": 0.957000122594091, + "grad_norm": 1.7801616556902313, + "learning_rate": 9.681954927564962e-08, + "loss": 0.6704, + "step": 31225 + }, + { + "epoch": 0.9570307711168322, + "grad_norm": 1.665583177518374, + "learning_rate": 9.668180332224719e-08, + "loss": 0.6812, + "step": 31226 + }, + { + "epoch": 0.9570614196395734, + "grad_norm": 1.762558125896423, + "learning_rate": 9.654415494877334e-08, + "loss": 0.6833, + "step": 31227 + }, + { + "epoch": 0.9570920681623146, + "grad_norm": 1.7095960485697637, + "learning_rate": 9.640660415658254e-08, + "loss": 0.5691, + "step": 31228 + }, + { + "epoch": 0.9571227166850558, + "grad_norm": 0.6827812983369197, + "learning_rate": 9.62691509470326e-08, + "loss": 0.5205, + "step": 31229 + }, + { + "epoch": 0.957153365207797, + "grad_norm": 1.6938786691453684, + "learning_rate": 9.613179532147577e-08, + "loss": 0.5612, + "step": 31230 + }, + { + "epoch": 0.9571840137305382, + "grad_norm": 1.6574879316018336, + "learning_rate": 9.599453728126651e-08, + "loss": 0.6969, + "step": 31231 + }, + { + "epoch": 0.9572146622532794, + "grad_norm": 1.60161353736941, + "learning_rate": 9.585737682775708e-08, + "loss": 0.5494, + "step": 31232 + }, + { + "epoch": 0.9572453107760206, + "grad_norm": 0.6935656601999416, + "learning_rate": 9.572031396229975e-08, + "loss": 0.5312, + "step": 31233 + }, + { + "epoch": 0.9572759592987617, + "grad_norm": 1.7183463970471664, + "learning_rate": 9.558334868624342e-08, + "loss": 0.5276, + "step": 31234 + }, + { + "epoch": 0.957306607821503, + "grad_norm": 1.8458159552992013, + "learning_rate": 9.544648100093923e-08, + "loss": 0.7403, + "step": 31235 + }, + { + "epoch": 0.9573372563442442, + "grad_norm": 0.6816976639047183, + "learning_rate": 9.530971090773389e-08, + "loss": 0.5064, + "step": 31236 + }, + { + "epoch": 0.9573679048669854, + "grad_norm": 1.9242522719903596, + "learning_rate": 9.517303840797742e-08, + "loss": 0.7079, + "step": 31237 + }, + { + "epoch": 0.9573985533897266, + "grad_norm": 2.048955196616066, + "learning_rate": 9.503646350301543e-08, + "loss": 0.7187, + "step": 31238 + }, + { + "epoch": 0.9574292019124678, + "grad_norm": 1.9533903264199737, + "learning_rate": 9.489998619419239e-08, + "loss": 0.6745, + "step": 31239 + }, + { + "epoch": 0.957459850435209, + "grad_norm": 1.5670441368407477, + "learning_rate": 9.476360648285498e-08, + "loss": 0.4773, + "step": 31240 + }, + { + "epoch": 0.9574904989579502, + "grad_norm": 1.9526367152147146, + "learning_rate": 9.462732437034549e-08, + "loss": 0.6846, + "step": 31241 + }, + { + "epoch": 0.9575211474806914, + "grad_norm": 1.534077371575216, + "learning_rate": 9.449113985800729e-08, + "loss": 0.5082, + "step": 31242 + }, + { + "epoch": 0.9575517960034327, + "grad_norm": 1.815635907779636, + "learning_rate": 9.435505294718262e-08, + "loss": 0.6686, + "step": 31243 + }, + { + "epoch": 0.9575824445261738, + "grad_norm": 1.8294848164377262, + "learning_rate": 9.421906363921152e-08, + "loss": 0.5374, + "step": 31244 + }, + { + "epoch": 0.9576130930489151, + "grad_norm": 0.6515122721175823, + "learning_rate": 9.408317193543626e-08, + "loss": 0.5197, + "step": 31245 + }, + { + "epoch": 0.9576437415716562, + "grad_norm": 1.5988924280432864, + "learning_rate": 9.394737783719243e-08, + "loss": 0.5352, + "step": 31246 + }, + { + "epoch": 0.9576743900943975, + "grad_norm": 1.9816986805512584, + "learning_rate": 9.381168134582009e-08, + "loss": 0.6405, + "step": 31247 + }, + { + "epoch": 0.9577050386171386, + "grad_norm": 1.868158297597002, + "learning_rate": 9.367608246265591e-08, + "loss": 0.6759, + "step": 31248 + }, + { + "epoch": 0.9577356871398799, + "grad_norm": 0.7005908771479555, + "learning_rate": 9.354058118903552e-08, + "loss": 0.5346, + "step": 31249 + }, + { + "epoch": 0.957766335662621, + "grad_norm": 1.7500845012668342, + "learning_rate": 9.340517752629563e-08, + "loss": 0.6637, + "step": 31250 + }, + { + "epoch": 0.9577969841853623, + "grad_norm": 1.877934659520797, + "learning_rate": 9.326987147576738e-08, + "loss": 0.7076, + "step": 31251 + }, + { + "epoch": 0.9578276327081034, + "grad_norm": 1.666690192899966, + "learning_rate": 9.313466303878749e-08, + "loss": 0.6026, + "step": 31252 + }, + { + "epoch": 0.9578582812308447, + "grad_norm": 1.5194552566931674, + "learning_rate": 9.2999552216686e-08, + "loss": 0.5189, + "step": 31253 + }, + { + "epoch": 0.9578889297535859, + "grad_norm": 1.9892164356543218, + "learning_rate": 9.286453901079406e-08, + "loss": 0.6404, + "step": 31254 + }, + { + "epoch": 0.9579195782763271, + "grad_norm": 1.9917545014132367, + "learning_rate": 9.272962342244285e-08, + "loss": 0.681, + "step": 31255 + }, + { + "epoch": 0.9579502267990683, + "grad_norm": 1.6608532191237435, + "learning_rate": 9.259480545296239e-08, + "loss": 0.6508, + "step": 31256 + }, + { + "epoch": 0.9579808753218095, + "grad_norm": 1.7789092158718436, + "learning_rate": 9.246008510367943e-08, + "loss": 0.6096, + "step": 31257 + }, + { + "epoch": 0.9580115238445507, + "grad_norm": 1.735286284730563, + "learning_rate": 9.232546237592288e-08, + "loss": 0.6609, + "step": 31258 + }, + { + "epoch": 0.9580421723672919, + "grad_norm": 1.3658818507129236, + "learning_rate": 9.219093727101836e-08, + "loss": 0.5261, + "step": 31259 + }, + { + "epoch": 0.9580728208900331, + "grad_norm": 1.7933082469938166, + "learning_rate": 9.205650979029146e-08, + "loss": 0.5907, + "step": 31260 + }, + { + "epoch": 0.9581034694127744, + "grad_norm": 1.6400191453138167, + "learning_rate": 9.192217993506669e-08, + "loss": 0.6402, + "step": 31261 + }, + { + "epoch": 0.9581341179355155, + "grad_norm": 1.7116301797483984, + "learning_rate": 9.178794770666854e-08, + "loss": 0.603, + "step": 31262 + }, + { + "epoch": 0.9581647664582568, + "grad_norm": 1.7619123338771503, + "learning_rate": 9.165381310641708e-08, + "loss": 0.6911, + "step": 31263 + }, + { + "epoch": 0.9581954149809979, + "grad_norm": 0.6854442439298988, + "learning_rate": 9.15197761356379e-08, + "loss": 0.5319, + "step": 31264 + }, + { + "epoch": 0.9582260635037391, + "grad_norm": 1.7503795033140321, + "learning_rate": 9.138583679564772e-08, + "loss": 0.4942, + "step": 31265 + }, + { + "epoch": 0.9582567120264803, + "grad_norm": 1.721075152279246, + "learning_rate": 9.125199508776882e-08, + "loss": 0.583, + "step": 31266 + }, + { + "epoch": 0.9582873605492215, + "grad_norm": 1.8417233660236596, + "learning_rate": 9.111825101332017e-08, + "loss": 0.6421, + "step": 31267 + }, + { + "epoch": 0.9583180090719627, + "grad_norm": 1.73994522809725, + "learning_rate": 9.098460457361735e-08, + "loss": 0.7444, + "step": 31268 + }, + { + "epoch": 0.9583486575947039, + "grad_norm": 1.6053901205988388, + "learning_rate": 9.085105576997932e-08, + "loss": 0.6635, + "step": 31269 + }, + { + "epoch": 0.9583793061174452, + "grad_norm": 1.6016220211595655, + "learning_rate": 9.07176046037217e-08, + "loss": 0.573, + "step": 31270 + }, + { + "epoch": 0.9584099546401863, + "grad_norm": 1.7272392962162086, + "learning_rate": 9.058425107615787e-08, + "loss": 0.7209, + "step": 31271 + }, + { + "epoch": 0.9584406031629276, + "grad_norm": 1.7752668051757667, + "learning_rate": 9.045099518860346e-08, + "loss": 0.6093, + "step": 31272 + }, + { + "epoch": 0.9584712516856687, + "grad_norm": 1.8084347345700231, + "learning_rate": 9.031783694237073e-08, + "loss": 0.5965, + "step": 31273 + }, + { + "epoch": 0.95850190020841, + "grad_norm": 0.6360215435288685, + "learning_rate": 9.018477633877087e-08, + "loss": 0.5098, + "step": 31274 + }, + { + "epoch": 0.9585325487311511, + "grad_norm": 1.7485805176618217, + "learning_rate": 9.005181337911728e-08, + "loss": 0.5789, + "step": 31275 + }, + { + "epoch": 0.9585631972538924, + "grad_norm": 1.6231628131577023, + "learning_rate": 8.991894806471779e-08, + "loss": 0.5888, + "step": 31276 + }, + { + "epoch": 0.9585938457766335, + "grad_norm": 1.7759807047972658, + "learning_rate": 8.978618039688247e-08, + "loss": 0.6193, + "step": 31277 + }, + { + "epoch": 0.9586244942993748, + "grad_norm": 1.9137130523556583, + "learning_rate": 8.965351037692138e-08, + "loss": 0.713, + "step": 31278 + }, + { + "epoch": 0.958655142822116, + "grad_norm": 0.6839293081476424, + "learning_rate": 8.952093800613793e-08, + "loss": 0.5244, + "step": 31279 + }, + { + "epoch": 0.9586857913448572, + "grad_norm": 0.6998938604135047, + "learning_rate": 8.938846328584105e-08, + "loss": 0.5272, + "step": 31280 + }, + { + "epoch": 0.9587164398675984, + "grad_norm": 0.6873471392761052, + "learning_rate": 8.925608621733528e-08, + "loss": 0.526, + "step": 31281 + }, + { + "epoch": 0.9587470883903396, + "grad_norm": 1.7443182735295633, + "learning_rate": 8.912380680192512e-08, + "loss": 0.6689, + "step": 31282 + }, + { + "epoch": 0.9587777369130808, + "grad_norm": 1.7743223529969234, + "learning_rate": 8.899162504091396e-08, + "loss": 0.6779, + "step": 31283 + }, + { + "epoch": 0.958808385435822, + "grad_norm": 1.6990390990549313, + "learning_rate": 8.885954093560411e-08, + "loss": 0.6523, + "step": 31284 + }, + { + "epoch": 0.9588390339585632, + "grad_norm": 1.6367172312632865, + "learning_rate": 8.872755448729675e-08, + "loss": 0.6073, + "step": 31285 + }, + { + "epoch": 0.9588696824813044, + "grad_norm": 0.6591054899298027, + "learning_rate": 8.859566569729417e-08, + "loss": 0.4989, + "step": 31286 + }, + { + "epoch": 0.9589003310040456, + "grad_norm": 1.6898989059385305, + "learning_rate": 8.846387456689309e-08, + "loss": 0.6205, + "step": 31287 + }, + { + "epoch": 0.9589309795267869, + "grad_norm": 1.7952503066457903, + "learning_rate": 8.833218109739362e-08, + "loss": 0.6076, + "step": 31288 + }, + { + "epoch": 0.958961628049528, + "grad_norm": 1.7798272979128038, + "learning_rate": 8.820058529009356e-08, + "loss": 0.5892, + "step": 31289 + }, + { + "epoch": 0.9589922765722693, + "grad_norm": 1.8376132538765764, + "learning_rate": 8.806908714628859e-08, + "loss": 0.5455, + "step": 31290 + }, + { + "epoch": 0.9590229250950104, + "grad_norm": 1.7233038197078994, + "learning_rate": 8.793768666727542e-08, + "loss": 0.6385, + "step": 31291 + }, + { + "epoch": 0.9590535736177517, + "grad_norm": 1.928143781678082, + "learning_rate": 8.780638385434747e-08, + "loss": 0.6895, + "step": 31292 + }, + { + "epoch": 0.9590842221404928, + "grad_norm": 1.6110205371288886, + "learning_rate": 8.767517870880038e-08, + "loss": 0.6799, + "step": 31293 + }, + { + "epoch": 0.9591148706632341, + "grad_norm": 1.8939861022597373, + "learning_rate": 8.754407123192532e-08, + "loss": 0.6253, + "step": 31294 + }, + { + "epoch": 0.9591455191859752, + "grad_norm": 1.764867753617451, + "learning_rate": 8.741306142501571e-08, + "loss": 0.6652, + "step": 31295 + }, + { + "epoch": 0.9591761677087164, + "grad_norm": 1.6872485478134804, + "learning_rate": 8.728214928936052e-08, + "loss": 0.6, + "step": 31296 + }, + { + "epoch": 0.9592068162314576, + "grad_norm": 1.9740290590879992, + "learning_rate": 8.715133482625093e-08, + "loss": 0.7082, + "step": 31297 + }, + { + "epoch": 0.9592374647541988, + "grad_norm": 1.6569263128623626, + "learning_rate": 8.702061803697481e-08, + "loss": 0.6314, + "step": 31298 + }, + { + "epoch": 0.9592681132769401, + "grad_norm": 1.7891185437079802, + "learning_rate": 8.688999892282113e-08, + "loss": 0.6109, + "step": 31299 + }, + { + "epoch": 0.9592987617996812, + "grad_norm": 1.6738611699413144, + "learning_rate": 8.675947748507774e-08, + "loss": 0.6244, + "step": 31300 + }, + { + "epoch": 0.9593294103224225, + "grad_norm": 0.6824959636536269, + "learning_rate": 8.662905372502916e-08, + "loss": 0.5325, + "step": 31301 + }, + { + "epoch": 0.9593600588451636, + "grad_norm": 1.6899259773891888, + "learning_rate": 8.649872764396106e-08, + "loss": 0.5355, + "step": 31302 + }, + { + "epoch": 0.9593907073679049, + "grad_norm": 1.8616646738708769, + "learning_rate": 8.636849924315572e-08, + "loss": 0.5693, + "step": 31303 + }, + { + "epoch": 0.959421355890646, + "grad_norm": 0.6502235784959894, + "learning_rate": 8.623836852389989e-08, + "loss": 0.5092, + "step": 31304 + }, + { + "epoch": 0.9594520044133873, + "grad_norm": 2.1188195121218625, + "learning_rate": 8.610833548747477e-08, + "loss": 0.6545, + "step": 31305 + }, + { + "epoch": 0.9594826529361284, + "grad_norm": 1.7671431668447746, + "learning_rate": 8.597840013515934e-08, + "loss": 0.7182, + "step": 31306 + }, + { + "epoch": 0.9595133014588697, + "grad_norm": 1.5425830584426745, + "learning_rate": 8.584856246823481e-08, + "loss": 0.643, + "step": 31307 + }, + { + "epoch": 0.9595439499816109, + "grad_norm": 1.6373791261376138, + "learning_rate": 8.571882248798236e-08, + "loss": 0.5894, + "step": 31308 + }, + { + "epoch": 0.9595745985043521, + "grad_norm": 1.8566535706708591, + "learning_rate": 8.558918019567875e-08, + "loss": 0.7195, + "step": 31309 + }, + { + "epoch": 0.9596052470270933, + "grad_norm": 0.666331999077024, + "learning_rate": 8.545963559260073e-08, + "loss": 0.497, + "step": 31310 + }, + { + "epoch": 0.9596358955498345, + "grad_norm": 1.7126529980973964, + "learning_rate": 8.533018868002618e-08, + "loss": 0.612, + "step": 31311 + }, + { + "epoch": 0.9596665440725757, + "grad_norm": 0.6631711301342638, + "learning_rate": 8.520083945923074e-08, + "loss": 0.5096, + "step": 31312 + }, + { + "epoch": 0.9596971925953169, + "grad_norm": 1.8625497123172374, + "learning_rate": 8.507158793148784e-08, + "loss": 0.6759, + "step": 31313 + }, + { + "epoch": 0.9597278411180581, + "grad_norm": 1.8416242378799002, + "learning_rate": 8.49424340980709e-08, + "loss": 0.7561, + "step": 31314 + }, + { + "epoch": 0.9597584896407994, + "grad_norm": 1.6954783997087062, + "learning_rate": 8.481337796025335e-08, + "loss": 0.6747, + "step": 31315 + }, + { + "epoch": 0.9597891381635405, + "grad_norm": 1.8491762768761595, + "learning_rate": 8.46844195193075e-08, + "loss": 0.6533, + "step": 31316 + }, + { + "epoch": 0.9598197866862818, + "grad_norm": 1.5259981864740373, + "learning_rate": 8.455555877650234e-08, + "loss": 0.5397, + "step": 31317 + }, + { + "epoch": 0.9598504352090229, + "grad_norm": 1.6670329797130239, + "learning_rate": 8.442679573310686e-08, + "loss": 0.6013, + "step": 31318 + }, + { + "epoch": 0.9598810837317642, + "grad_norm": 1.9238027931103403, + "learning_rate": 8.429813039039336e-08, + "loss": 0.6422, + "step": 31319 + }, + { + "epoch": 0.9599117322545053, + "grad_norm": 1.6443537161483184, + "learning_rate": 8.41695627496264e-08, + "loss": 0.6463, + "step": 31320 + }, + { + "epoch": 0.9599423807772466, + "grad_norm": 1.6315479730651254, + "learning_rate": 8.404109281207273e-08, + "loss": 0.6831, + "step": 31321 + }, + { + "epoch": 0.9599730292999877, + "grad_norm": 1.9846377897088785, + "learning_rate": 8.391272057900025e-08, + "loss": 0.7197, + "step": 31322 + }, + { + "epoch": 0.960003677822729, + "grad_norm": 2.01176358856999, + "learning_rate": 8.378444605167346e-08, + "loss": 0.6053, + "step": 31323 + }, + { + "epoch": 0.9600343263454701, + "grad_norm": 1.7762300281192427, + "learning_rate": 8.365626923135584e-08, + "loss": 0.5426, + "step": 31324 + }, + { + "epoch": 0.9600649748682114, + "grad_norm": 1.9190821161986131, + "learning_rate": 8.352819011930968e-08, + "loss": 0.7289, + "step": 31325 + }, + { + "epoch": 0.9600956233909526, + "grad_norm": 0.68491573297499, + "learning_rate": 8.340020871679621e-08, + "loss": 0.5127, + "step": 31326 + }, + { + "epoch": 0.9601262719136937, + "grad_norm": 1.780004804844443, + "learning_rate": 8.327232502507998e-08, + "loss": 0.5274, + "step": 31327 + }, + { + "epoch": 0.960156920436435, + "grad_norm": 1.701905912717944, + "learning_rate": 8.314453904541775e-08, + "loss": 0.5792, + "step": 31328 + }, + { + "epoch": 0.9601875689591761, + "grad_norm": 1.7895134463252298, + "learning_rate": 8.301685077906962e-08, + "loss": 0.5631, + "step": 31329 + }, + { + "epoch": 0.9602182174819174, + "grad_norm": 1.79887740230298, + "learning_rate": 8.28892602272935e-08, + "loss": 0.6271, + "step": 31330 + }, + { + "epoch": 0.9602488660046585, + "grad_norm": 1.7176576131363848, + "learning_rate": 8.276176739134722e-08, + "loss": 0.6335, + "step": 31331 + }, + { + "epoch": 0.9602795145273998, + "grad_norm": 1.7470456508435195, + "learning_rate": 8.263437227248761e-08, + "loss": 0.7158, + "step": 31332 + }, + { + "epoch": 0.9603101630501409, + "grad_norm": 1.5311656149276183, + "learning_rate": 8.250707487196697e-08, + "loss": 0.5242, + "step": 31333 + }, + { + "epoch": 0.9603408115728822, + "grad_norm": 1.7311816583285127, + "learning_rate": 8.237987519104318e-08, + "loss": 0.5488, + "step": 31334 + }, + { + "epoch": 0.9603714600956234, + "grad_norm": 1.5544258642656368, + "learning_rate": 8.225277323096859e-08, + "loss": 0.6278, + "step": 31335 + }, + { + "epoch": 0.9604021086183646, + "grad_norm": 0.6700210942015213, + "learning_rate": 8.212576899299329e-08, + "loss": 0.5285, + "step": 31336 + }, + { + "epoch": 0.9604327571411058, + "grad_norm": 1.8488116799129384, + "learning_rate": 8.199886247837186e-08, + "loss": 0.5615, + "step": 31337 + }, + { + "epoch": 0.960463405663847, + "grad_norm": 1.6849562322161242, + "learning_rate": 8.187205368835216e-08, + "loss": 0.5077, + "step": 31338 + }, + { + "epoch": 0.9604940541865882, + "grad_norm": 1.9195111708845383, + "learning_rate": 8.174534262418543e-08, + "loss": 0.5797, + "step": 31339 + }, + { + "epoch": 0.9605247027093294, + "grad_norm": 2.0567168190116547, + "learning_rate": 8.161872928711956e-08, + "loss": 0.6771, + "step": 31340 + }, + { + "epoch": 0.9605553512320706, + "grad_norm": 2.052051014829999, + "learning_rate": 8.149221367840132e-08, + "loss": 0.6948, + "step": 31341 + }, + { + "epoch": 0.9605859997548118, + "grad_norm": 1.6611987993710986, + "learning_rate": 8.136579579927862e-08, + "loss": 0.548, + "step": 31342 + }, + { + "epoch": 0.960616648277553, + "grad_norm": 1.692950294219888, + "learning_rate": 8.12394756509971e-08, + "loss": 0.5454, + "step": 31343 + }, + { + "epoch": 0.9606472968002943, + "grad_norm": 0.6645928354842479, + "learning_rate": 8.111325323479913e-08, + "loss": 0.4923, + "step": 31344 + }, + { + "epoch": 0.9606779453230354, + "grad_norm": 1.9214038837059253, + "learning_rate": 8.098712855193147e-08, + "loss": 0.5317, + "step": 31345 + }, + { + "epoch": 0.9607085938457767, + "grad_norm": 1.854691411551355, + "learning_rate": 8.086110160363648e-08, + "loss": 0.7217, + "step": 31346 + }, + { + "epoch": 0.9607392423685178, + "grad_norm": 1.7934114557633183, + "learning_rate": 8.073517239115313e-08, + "loss": 0.697, + "step": 31347 + }, + { + "epoch": 0.9607698908912591, + "grad_norm": 1.7143587844973827, + "learning_rate": 8.060934091572492e-08, + "loss": 0.6222, + "step": 31348 + }, + { + "epoch": 0.9608005394140002, + "grad_norm": 1.7255648879557723, + "learning_rate": 8.048360717858972e-08, + "loss": 0.5366, + "step": 31349 + }, + { + "epoch": 0.9608311879367415, + "grad_norm": 0.6689525882092069, + "learning_rate": 8.035797118098876e-08, + "loss": 0.4996, + "step": 31350 + }, + { + "epoch": 0.9608618364594826, + "grad_norm": 1.7078270941723899, + "learning_rate": 8.023243292415884e-08, + "loss": 0.5023, + "step": 31351 + }, + { + "epoch": 0.9608924849822239, + "grad_norm": 1.5629904333954678, + "learning_rate": 8.010699240933672e-08, + "loss": 0.534, + "step": 31352 + }, + { + "epoch": 0.960923133504965, + "grad_norm": 1.7723930951230675, + "learning_rate": 7.998164963775812e-08, + "loss": 0.5932, + "step": 31353 + }, + { + "epoch": 0.9609537820277063, + "grad_norm": 1.992579468498936, + "learning_rate": 7.985640461065868e-08, + "loss": 0.6381, + "step": 31354 + }, + { + "epoch": 0.9609844305504475, + "grad_norm": 1.8535103781048046, + "learning_rate": 7.973125732927189e-08, + "loss": 0.6613, + "step": 31355 + }, + { + "epoch": 0.9610150790731887, + "grad_norm": 1.7941979371528622, + "learning_rate": 7.96062077948323e-08, + "loss": 0.5858, + "step": 31356 + }, + { + "epoch": 0.9610457275959299, + "grad_norm": 1.7252841737390494, + "learning_rate": 7.948125600857004e-08, + "loss": 0.5977, + "step": 31357 + }, + { + "epoch": 0.961076376118671, + "grad_norm": 1.6252849248600736, + "learning_rate": 7.935640197171745e-08, + "loss": 0.5476, + "step": 31358 + }, + { + "epoch": 0.9611070246414123, + "grad_norm": 0.6311284081580075, + "learning_rate": 7.923164568550468e-08, + "loss": 0.531, + "step": 31359 + }, + { + "epoch": 0.9611376731641534, + "grad_norm": 1.6983429486031636, + "learning_rate": 7.910698715115961e-08, + "loss": 0.6675, + "step": 31360 + }, + { + "epoch": 0.9611683216868947, + "grad_norm": 2.0899083470662276, + "learning_rate": 7.898242636991348e-08, + "loss": 0.7078, + "step": 31361 + }, + { + "epoch": 0.9611989702096359, + "grad_norm": 1.916705804477904, + "learning_rate": 7.885796334299089e-08, + "loss": 0.6552, + "step": 31362 + }, + { + "epoch": 0.9612296187323771, + "grad_norm": 1.9822732099859761, + "learning_rate": 7.873359807161973e-08, + "loss": 0.6319, + "step": 31363 + }, + { + "epoch": 0.9612602672551183, + "grad_norm": 1.5695202299571978, + "learning_rate": 7.860933055702569e-08, + "loss": 0.5494, + "step": 31364 + }, + { + "epoch": 0.9612909157778595, + "grad_norm": 1.6245369296732137, + "learning_rate": 7.848516080043112e-08, + "loss": 0.5535, + "step": 31365 + }, + { + "epoch": 0.9613215643006007, + "grad_norm": 1.5969696136805107, + "learning_rate": 7.836108880306059e-08, + "loss": 0.5425, + "step": 31366 + }, + { + "epoch": 0.9613522128233419, + "grad_norm": 1.6142162695994327, + "learning_rate": 7.823711456613758e-08, + "loss": 0.5807, + "step": 31367 + }, + { + "epoch": 0.9613828613460831, + "grad_norm": 1.631673309256973, + "learning_rate": 7.811323809088334e-08, + "loss": 0.643, + "step": 31368 + }, + { + "epoch": 0.9614135098688243, + "grad_norm": 0.6553199384394739, + "learning_rate": 7.798945937851688e-08, + "loss": 0.5141, + "step": 31369 + }, + { + "epoch": 0.9614441583915655, + "grad_norm": 1.9029942914849527, + "learning_rate": 7.786577843025944e-08, + "loss": 0.6402, + "step": 31370 + }, + { + "epoch": 0.9614748069143068, + "grad_norm": 1.9488184368523218, + "learning_rate": 7.774219524732895e-08, + "loss": 0.5741, + "step": 31371 + }, + { + "epoch": 0.9615054554370479, + "grad_norm": 1.7420738253930255, + "learning_rate": 7.761870983094443e-08, + "loss": 0.5834, + "step": 31372 + }, + { + "epoch": 0.9615361039597892, + "grad_norm": 1.8774093564076075, + "learning_rate": 7.749532218231937e-08, + "loss": 0.6483, + "step": 31373 + }, + { + "epoch": 0.9615667524825303, + "grad_norm": 1.73949740115079, + "learning_rate": 7.737203230267277e-08, + "loss": 0.6222, + "step": 31374 + }, + { + "epoch": 0.9615974010052716, + "grad_norm": 1.6547589825898643, + "learning_rate": 7.724884019321921e-08, + "loss": 0.7141, + "step": 31375 + }, + { + "epoch": 0.9616280495280127, + "grad_norm": 1.589937394040525, + "learning_rate": 7.712574585517108e-08, + "loss": 0.6369, + "step": 31376 + }, + { + "epoch": 0.961658698050754, + "grad_norm": 1.7520621681861759, + "learning_rate": 7.700274928974183e-08, + "loss": 0.5982, + "step": 31377 + }, + { + "epoch": 0.9616893465734951, + "grad_norm": 1.5833303037914943, + "learning_rate": 7.687985049814273e-08, + "loss": 0.6437, + "step": 31378 + }, + { + "epoch": 0.9617199950962364, + "grad_norm": 0.6728121971099271, + "learning_rate": 7.675704948158614e-08, + "loss": 0.5154, + "step": 31379 + }, + { + "epoch": 0.9617506436189776, + "grad_norm": 1.8120971054737136, + "learning_rate": 7.663434624128107e-08, + "loss": 0.604, + "step": 31380 + }, + { + "epoch": 0.9617812921417188, + "grad_norm": 1.7523847506870773, + "learning_rate": 7.651174077843659e-08, + "loss": 0.6317, + "step": 31381 + }, + { + "epoch": 0.96181194066446, + "grad_norm": 0.6850977831173152, + "learning_rate": 7.638923309426171e-08, + "loss": 0.5046, + "step": 31382 + }, + { + "epoch": 0.9618425891872012, + "grad_norm": 1.7383044433706831, + "learning_rate": 7.626682318996214e-08, + "loss": 0.7204, + "step": 31383 + }, + { + "epoch": 0.9618732377099424, + "grad_norm": 1.9110328642588257, + "learning_rate": 7.61445110667447e-08, + "loss": 0.6353, + "step": 31384 + }, + { + "epoch": 0.9619038862326836, + "grad_norm": 1.7438939250336487, + "learning_rate": 7.602229672581507e-08, + "loss": 0.631, + "step": 31385 + }, + { + "epoch": 0.9619345347554248, + "grad_norm": 1.8293282964062618, + "learning_rate": 7.590018016837675e-08, + "loss": 0.6829, + "step": 31386 + }, + { + "epoch": 0.961965183278166, + "grad_norm": 1.9223550556495652, + "learning_rate": 7.57781613956321e-08, + "loss": 0.5906, + "step": 31387 + }, + { + "epoch": 0.9619958318009072, + "grad_norm": 1.8261100724396644, + "learning_rate": 7.565624040878572e-08, + "loss": 0.7625, + "step": 31388 + }, + { + "epoch": 0.9620264803236483, + "grad_norm": 1.8137687190177074, + "learning_rate": 7.553441720903665e-08, + "loss": 0.5972, + "step": 31389 + }, + { + "epoch": 0.9620571288463896, + "grad_norm": 2.0490162658805113, + "learning_rate": 7.541269179758726e-08, + "loss": 0.6642, + "step": 31390 + }, + { + "epoch": 0.9620877773691308, + "grad_norm": 1.8253182448436613, + "learning_rate": 7.529106417563547e-08, + "loss": 0.6252, + "step": 31391 + }, + { + "epoch": 0.962118425891872, + "grad_norm": 1.9102913724882093, + "learning_rate": 7.516953434438035e-08, + "loss": 0.5869, + "step": 31392 + }, + { + "epoch": 0.9621490744146132, + "grad_norm": 1.7583816164145405, + "learning_rate": 7.504810230501869e-08, + "loss": 0.6096, + "step": 31393 + }, + { + "epoch": 0.9621797229373544, + "grad_norm": 0.6760357452296438, + "learning_rate": 7.492676805874732e-08, + "loss": 0.5267, + "step": 31394 + }, + { + "epoch": 0.9622103714600956, + "grad_norm": 1.6914623228234784, + "learning_rate": 7.480553160676196e-08, + "loss": 0.6804, + "step": 31395 + }, + { + "epoch": 0.9622410199828368, + "grad_norm": 1.8834419217273908, + "learning_rate": 7.468439295025831e-08, + "loss": 0.749, + "step": 31396 + }, + { + "epoch": 0.962271668505578, + "grad_norm": 0.6696769503053277, + "learning_rate": 7.456335209042765e-08, + "loss": 0.5161, + "step": 31397 + }, + { + "epoch": 0.9623023170283193, + "grad_norm": 1.5385983351895243, + "learning_rate": 7.444240902846456e-08, + "loss": 0.5881, + "step": 31398 + }, + { + "epoch": 0.9623329655510604, + "grad_norm": 0.6575548866221068, + "learning_rate": 7.432156376556033e-08, + "loss": 0.5082, + "step": 31399 + }, + { + "epoch": 0.9623636140738017, + "grad_norm": 1.744389786261286, + "learning_rate": 7.420081630290398e-08, + "loss": 0.6628, + "step": 31400 + }, + { + "epoch": 0.9623942625965428, + "grad_norm": 0.679826667038957, + "learning_rate": 7.408016664168682e-08, + "loss": 0.5412, + "step": 31401 + }, + { + "epoch": 0.9624249111192841, + "grad_norm": 1.7577744680257004, + "learning_rate": 7.395961478309899e-08, + "loss": 0.6254, + "step": 31402 + }, + { + "epoch": 0.9624555596420252, + "grad_norm": 1.651487822182127, + "learning_rate": 7.383916072832509e-08, + "loss": 0.6551, + "step": 31403 + }, + { + "epoch": 0.9624862081647665, + "grad_norm": 0.6376097439703019, + "learning_rate": 7.371880447855418e-08, + "loss": 0.5264, + "step": 31404 + }, + { + "epoch": 0.9625168566875076, + "grad_norm": 1.9638825433570284, + "learning_rate": 7.359854603497197e-08, + "loss": 0.6085, + "step": 31405 + }, + { + "epoch": 0.9625475052102489, + "grad_norm": 0.6461396041935467, + "learning_rate": 7.347838539876306e-08, + "loss": 0.4929, + "step": 31406 + }, + { + "epoch": 0.96257815373299, + "grad_norm": 0.6514265562910823, + "learning_rate": 7.335832257111098e-08, + "loss": 0.5219, + "step": 31407 + }, + { + "epoch": 0.9626088022557313, + "grad_norm": 1.814218539767835, + "learning_rate": 7.323835755319918e-08, + "loss": 0.5786, + "step": 31408 + }, + { + "epoch": 0.9626394507784725, + "grad_norm": 1.900968734155619, + "learning_rate": 7.31184903462101e-08, + "loss": 0.6848, + "step": 31409 + }, + { + "epoch": 0.9626700993012137, + "grad_norm": 0.6613688302732365, + "learning_rate": 7.299872095132498e-08, + "loss": 0.5327, + "step": 31410 + }, + { + "epoch": 0.9627007478239549, + "grad_norm": 1.9344357535110293, + "learning_rate": 7.287904936972179e-08, + "loss": 0.6613, + "step": 31411 + }, + { + "epoch": 0.9627313963466961, + "grad_norm": 1.6650286139495674, + "learning_rate": 7.275947560258179e-08, + "loss": 0.6183, + "step": 31412 + }, + { + "epoch": 0.9627620448694373, + "grad_norm": 1.6927805741183541, + "learning_rate": 7.263999965108404e-08, + "loss": 0.617, + "step": 31413 + }, + { + "epoch": 0.9627926933921785, + "grad_norm": 1.629013518433315, + "learning_rate": 7.252062151640316e-08, + "loss": 0.551, + "step": 31414 + }, + { + "epoch": 0.9628233419149197, + "grad_norm": 1.8419776685586828, + "learning_rate": 7.240134119971709e-08, + "loss": 0.5596, + "step": 31415 + }, + { + "epoch": 0.962853990437661, + "grad_norm": 1.6824016654512532, + "learning_rate": 7.228215870220045e-08, + "loss": 0.6115, + "step": 31416 + }, + { + "epoch": 0.9628846389604021, + "grad_norm": 1.774849608567634, + "learning_rate": 7.216307402502786e-08, + "loss": 0.6473, + "step": 31417 + }, + { + "epoch": 0.9629152874831434, + "grad_norm": 1.9324955611237047, + "learning_rate": 7.204408716937283e-08, + "loss": 0.6741, + "step": 31418 + }, + { + "epoch": 0.9629459360058845, + "grad_norm": 1.7672315285202231, + "learning_rate": 7.192519813640774e-08, + "loss": 0.714, + "step": 31419 + }, + { + "epoch": 0.9629765845286257, + "grad_norm": 1.7181764098467454, + "learning_rate": 7.180640692730278e-08, + "loss": 0.6796, + "step": 31420 + }, + { + "epoch": 0.9630072330513669, + "grad_norm": 0.6847014592697801, + "learning_rate": 7.168771354323034e-08, + "loss": 0.5398, + "step": 31421 + }, + { + "epoch": 0.9630378815741081, + "grad_norm": 1.8133111095039975, + "learning_rate": 7.156911798535949e-08, + "loss": 0.6472, + "step": 31422 + }, + { + "epoch": 0.9630685300968493, + "grad_norm": 1.9873630978829961, + "learning_rate": 7.145062025485817e-08, + "loss": 0.7387, + "step": 31423 + }, + { + "epoch": 0.9630991786195905, + "grad_norm": 0.649391535608456, + "learning_rate": 7.133222035289433e-08, + "loss": 0.5109, + "step": 31424 + }, + { + "epoch": 0.9631298271423318, + "grad_norm": 0.6476284710228384, + "learning_rate": 7.121391828063373e-08, + "loss": 0.5069, + "step": 31425 + }, + { + "epoch": 0.9631604756650729, + "grad_norm": 0.6577997354835632, + "learning_rate": 7.109571403924321e-08, + "loss": 0.5115, + "step": 31426 + }, + { + "epoch": 0.9631911241878142, + "grad_norm": 1.5097610953449556, + "learning_rate": 7.097760762988737e-08, + "loss": 0.5588, + "step": 31427 + }, + { + "epoch": 0.9632217727105553, + "grad_norm": 1.7566273136288557, + "learning_rate": 7.085959905372864e-08, + "loss": 0.6127, + "step": 31428 + }, + { + "epoch": 0.9632524212332966, + "grad_norm": 1.7525551134381798, + "learning_rate": 7.074168831193273e-08, + "loss": 0.6523, + "step": 31429 + }, + { + "epoch": 0.9632830697560377, + "grad_norm": 1.8072114827057884, + "learning_rate": 7.062387540565651e-08, + "loss": 0.719, + "step": 31430 + }, + { + "epoch": 0.963313718278779, + "grad_norm": 0.6446170817525403, + "learning_rate": 7.050616033606683e-08, + "loss": 0.4987, + "step": 31431 + }, + { + "epoch": 0.9633443668015201, + "grad_norm": 1.617061489720098, + "learning_rate": 7.038854310431942e-08, + "loss": 0.6167, + "step": 31432 + }, + { + "epoch": 0.9633750153242614, + "grad_norm": 1.7529890534765342, + "learning_rate": 7.027102371157335e-08, + "loss": 0.7177, + "step": 31433 + }, + { + "epoch": 0.9634056638470025, + "grad_norm": 1.9532955745174716, + "learning_rate": 7.015360215898769e-08, + "loss": 0.6902, + "step": 31434 + }, + { + "epoch": 0.9634363123697438, + "grad_norm": 1.6901511006571721, + "learning_rate": 7.003627844772044e-08, + "loss": 0.6303, + "step": 31435 + }, + { + "epoch": 0.963466960892485, + "grad_norm": 1.707839486554941, + "learning_rate": 6.991905257892617e-08, + "loss": 0.6121, + "step": 31436 + }, + { + "epoch": 0.9634976094152262, + "grad_norm": 1.8635537143993062, + "learning_rate": 6.980192455375956e-08, + "loss": 0.6177, + "step": 31437 + }, + { + "epoch": 0.9635282579379674, + "grad_norm": 1.6527403061926644, + "learning_rate": 6.968489437337522e-08, + "loss": 0.5361, + "step": 31438 + }, + { + "epoch": 0.9635589064607086, + "grad_norm": 1.7786801913196493, + "learning_rate": 6.956796203892668e-08, + "loss": 0.6241, + "step": 31439 + }, + { + "epoch": 0.9635895549834498, + "grad_norm": 2.0154085260331596, + "learning_rate": 6.945112755156635e-08, + "loss": 0.7625, + "step": 31440 + }, + { + "epoch": 0.963620203506191, + "grad_norm": 1.788563369500368, + "learning_rate": 6.933439091244332e-08, + "loss": 0.6221, + "step": 31441 + }, + { + "epoch": 0.9636508520289322, + "grad_norm": 1.6745172872479896, + "learning_rate": 6.921775212271108e-08, + "loss": 0.6068, + "step": 31442 + }, + { + "epoch": 0.9636815005516735, + "grad_norm": 2.4030795861916268, + "learning_rate": 6.910121118351764e-08, + "loss": 0.6977, + "step": 31443 + }, + { + "epoch": 0.9637121490744146, + "grad_norm": 1.6233942212629626, + "learning_rate": 6.898476809600985e-08, + "loss": 0.691, + "step": 31444 + }, + { + "epoch": 0.9637427975971559, + "grad_norm": 1.8879189531447105, + "learning_rate": 6.886842286133565e-08, + "loss": 0.6627, + "step": 31445 + }, + { + "epoch": 0.963773446119897, + "grad_norm": 1.8939081642032582, + "learning_rate": 6.875217548064305e-08, + "loss": 0.6113, + "step": 31446 + }, + { + "epoch": 0.9638040946426383, + "grad_norm": 1.7679808477157553, + "learning_rate": 6.863602595507556e-08, + "loss": 0.5531, + "step": 31447 + }, + { + "epoch": 0.9638347431653794, + "grad_norm": 1.6209400816080852, + "learning_rate": 6.851997428577783e-08, + "loss": 0.6209, + "step": 31448 + }, + { + "epoch": 0.9638653916881207, + "grad_norm": 1.5428820796774243, + "learning_rate": 6.84040204738945e-08, + "loss": 0.5463, + "step": 31449 + }, + { + "epoch": 0.9638960402108618, + "grad_norm": 1.845564557912998, + "learning_rate": 6.828816452056797e-08, + "loss": 0.7715, + "step": 31450 + }, + { + "epoch": 0.963926688733603, + "grad_norm": 1.6615870986437808, + "learning_rate": 6.817240642693845e-08, + "loss": 0.6156, + "step": 31451 + }, + { + "epoch": 0.9639573372563442, + "grad_norm": 1.6094373417731103, + "learning_rate": 6.805674619414726e-08, + "loss": 0.6699, + "step": 31452 + }, + { + "epoch": 0.9639879857790854, + "grad_norm": 1.8415225444436323, + "learning_rate": 6.79411838233346e-08, + "loss": 0.6824, + "step": 31453 + }, + { + "epoch": 0.9640186343018267, + "grad_norm": 0.6604916030736164, + "learning_rate": 6.782571931563952e-08, + "loss": 0.4938, + "step": 31454 + }, + { + "epoch": 0.9640492828245678, + "grad_norm": 1.7529699932964733, + "learning_rate": 6.771035267219784e-08, + "loss": 0.6328, + "step": 31455 + }, + { + "epoch": 0.9640799313473091, + "grad_norm": 1.5732135833254102, + "learning_rate": 6.759508389414749e-08, + "loss": 0.5401, + "step": 31456 + }, + { + "epoch": 0.9641105798700502, + "grad_norm": 1.5788144054788233, + "learning_rate": 6.747991298262313e-08, + "loss": 0.5897, + "step": 31457 + }, + { + "epoch": 0.9641412283927915, + "grad_norm": 2.1414779257976266, + "learning_rate": 6.736483993876274e-08, + "loss": 0.675, + "step": 31458 + }, + { + "epoch": 0.9641718769155326, + "grad_norm": 1.8165911343579348, + "learning_rate": 6.724986476369654e-08, + "loss": 0.5778, + "step": 31459 + }, + { + "epoch": 0.9642025254382739, + "grad_norm": 0.6465924319831827, + "learning_rate": 6.713498745855806e-08, + "loss": 0.4877, + "step": 31460 + }, + { + "epoch": 0.964233173961015, + "grad_norm": 1.4205259392197105, + "learning_rate": 6.702020802448195e-08, + "loss": 0.5736, + "step": 31461 + }, + { + "epoch": 0.9642638224837563, + "grad_norm": 1.6849048473949417, + "learning_rate": 6.690552646259618e-08, + "loss": 0.6224, + "step": 31462 + }, + { + "epoch": 0.9642944710064975, + "grad_norm": 1.701176556027401, + "learning_rate": 6.679094277403097e-08, + "loss": 0.6249, + "step": 31463 + }, + { + "epoch": 0.9643251195292387, + "grad_norm": 1.6841908182294476, + "learning_rate": 6.667645695991764e-08, + "loss": 0.7176, + "step": 31464 + }, + { + "epoch": 0.9643557680519799, + "grad_norm": 1.65515674646884, + "learning_rate": 6.656206902138195e-08, + "loss": 0.606, + "step": 31465 + }, + { + "epoch": 0.9643864165747211, + "grad_norm": 1.4800725966699093, + "learning_rate": 6.64477789595519e-08, + "loss": 0.5445, + "step": 31466 + }, + { + "epoch": 0.9644170650974623, + "grad_norm": 1.8478314050729319, + "learning_rate": 6.633358677555324e-08, + "loss": 0.6808, + "step": 31467 + }, + { + "epoch": 0.9644477136202035, + "grad_norm": 1.7535450887533865, + "learning_rate": 6.621949247051063e-08, + "loss": 0.5951, + "step": 31468 + }, + { + "epoch": 0.9644783621429447, + "grad_norm": 1.6330376115919423, + "learning_rate": 6.610549604555094e-08, + "loss": 0.5096, + "step": 31469 + }, + { + "epoch": 0.964509010665686, + "grad_norm": 1.6525765947485251, + "learning_rate": 6.599159750179441e-08, + "loss": 0.6223, + "step": 31470 + }, + { + "epoch": 0.9645396591884271, + "grad_norm": 1.6399358466544098, + "learning_rate": 6.587779684036455e-08, + "loss": 0.6212, + "step": 31471 + }, + { + "epoch": 0.9645703077111684, + "grad_norm": 1.7312705735918361, + "learning_rate": 6.576409406238271e-08, + "loss": 0.6996, + "step": 31472 + }, + { + "epoch": 0.9646009562339095, + "grad_norm": 1.741849300368423, + "learning_rate": 6.56504891689691e-08, + "loss": 0.6715, + "step": 31473 + }, + { + "epoch": 0.9646316047566508, + "grad_norm": 2.0188809672492933, + "learning_rate": 6.553698216124171e-08, + "loss": 0.7276, + "step": 31474 + }, + { + "epoch": 0.9646622532793919, + "grad_norm": 1.6753960668180083, + "learning_rate": 6.542357304032187e-08, + "loss": 0.5252, + "step": 31475 + }, + { + "epoch": 0.9646929018021332, + "grad_norm": 1.787035612150761, + "learning_rate": 6.531026180732426e-08, + "loss": 0.6965, + "step": 31476 + }, + { + "epoch": 0.9647235503248743, + "grad_norm": 1.644286756298497, + "learning_rate": 6.519704846336794e-08, + "loss": 0.7258, + "step": 31477 + }, + { + "epoch": 0.9647541988476156, + "grad_norm": 1.8233181761300634, + "learning_rate": 6.50839330095654e-08, + "loss": 0.7021, + "step": 31478 + }, + { + "epoch": 0.9647848473703567, + "grad_norm": 1.786674051003253, + "learning_rate": 6.497091544703349e-08, + "loss": 0.7707, + "step": 31479 + }, + { + "epoch": 0.964815495893098, + "grad_norm": 1.7088144580537057, + "learning_rate": 6.485799577688579e-08, + "loss": 0.6483, + "step": 31480 + }, + { + "epoch": 0.9648461444158392, + "grad_norm": 0.6773778293478012, + "learning_rate": 6.474517400023472e-08, + "loss": 0.5138, + "step": 31481 + }, + { + "epoch": 0.9648767929385803, + "grad_norm": 1.7515995166997718, + "learning_rate": 6.463245011818942e-08, + "loss": 0.7237, + "step": 31482 + }, + { + "epoch": 0.9649074414613216, + "grad_norm": 1.63958467130967, + "learning_rate": 6.451982413186452e-08, + "loss": 0.5442, + "step": 31483 + }, + { + "epoch": 0.9649380899840627, + "grad_norm": 1.6852774036081801, + "learning_rate": 6.440729604236695e-08, + "loss": 0.5877, + "step": 31484 + }, + { + "epoch": 0.964968738506804, + "grad_norm": 1.9769773534797517, + "learning_rate": 6.429486585080691e-08, + "loss": 0.6317, + "step": 31485 + }, + { + "epoch": 0.9649993870295451, + "grad_norm": 1.8598633438732435, + "learning_rate": 6.418253355829129e-08, + "loss": 0.6705, + "step": 31486 + }, + { + "epoch": 0.9650300355522864, + "grad_norm": 1.8251279025928429, + "learning_rate": 6.4070299165927e-08, + "loss": 0.7117, + "step": 31487 + }, + { + "epoch": 0.9650606840750275, + "grad_norm": 1.5945095417468607, + "learning_rate": 6.395816267482091e-08, + "loss": 0.603, + "step": 31488 + }, + { + "epoch": 0.9650913325977688, + "grad_norm": 1.6340861204810762, + "learning_rate": 6.384612408607771e-08, + "loss": 0.6206, + "step": 31489 + }, + { + "epoch": 0.96512198112051, + "grad_norm": 1.66916093527063, + "learning_rate": 6.373418340079984e-08, + "loss": 0.6426, + "step": 31490 + }, + { + "epoch": 0.9651526296432512, + "grad_norm": 0.6869499610940165, + "learning_rate": 6.362234062009198e-08, + "loss": 0.5291, + "step": 31491 + }, + { + "epoch": 0.9651832781659924, + "grad_norm": 1.4977412986348004, + "learning_rate": 6.351059574505547e-08, + "loss": 0.4553, + "step": 31492 + }, + { + "epoch": 0.9652139266887336, + "grad_norm": 1.9308346843163806, + "learning_rate": 6.339894877679165e-08, + "loss": 0.625, + "step": 31493 + }, + { + "epoch": 0.9652445752114748, + "grad_norm": 1.6290061580400232, + "learning_rate": 6.328739971639963e-08, + "loss": 0.5708, + "step": 31494 + }, + { + "epoch": 0.965275223734216, + "grad_norm": 2.056612717032471, + "learning_rate": 6.317594856497966e-08, + "loss": 0.6442, + "step": 31495 + }, + { + "epoch": 0.9653058722569572, + "grad_norm": 1.8994159852608772, + "learning_rate": 6.306459532362975e-08, + "loss": 0.6095, + "step": 31496 + }, + { + "epoch": 0.9653365207796984, + "grad_norm": 1.5556597749818177, + "learning_rate": 6.295333999344677e-08, + "loss": 0.6007, + "step": 31497 + }, + { + "epoch": 0.9653671693024396, + "grad_norm": 1.6915702114325837, + "learning_rate": 6.284218257552765e-08, + "loss": 0.5802, + "step": 31498 + }, + { + "epoch": 0.9653978178251809, + "grad_norm": 1.8983608406167514, + "learning_rate": 6.273112307096596e-08, + "loss": 0.7069, + "step": 31499 + }, + { + "epoch": 0.965428466347922, + "grad_norm": 1.9654497545120209, + "learning_rate": 6.262016148085748e-08, + "loss": 0.7757, + "step": 31500 + }, + { + "epoch": 0.9654591148706633, + "grad_norm": 1.5909165746430893, + "learning_rate": 6.250929780629467e-08, + "loss": 0.6115, + "step": 31501 + }, + { + "epoch": 0.9654897633934044, + "grad_norm": 1.5656678460247584, + "learning_rate": 6.23985320483711e-08, + "loss": 0.5595, + "step": 31502 + }, + { + "epoch": 0.9655204119161457, + "grad_norm": 1.7162360538266535, + "learning_rate": 6.228786420817701e-08, + "loss": 0.722, + "step": 31503 + }, + { + "epoch": 0.9655510604388868, + "grad_norm": 1.675686433857186, + "learning_rate": 6.217729428680375e-08, + "loss": 0.5894, + "step": 31504 + }, + { + "epoch": 0.9655817089616281, + "grad_norm": 0.6563135518615327, + "learning_rate": 6.206682228534045e-08, + "loss": 0.5214, + "step": 31505 + }, + { + "epoch": 0.9656123574843692, + "grad_norm": 1.723202692582435, + "learning_rate": 6.195644820487511e-08, + "loss": 0.5685, + "step": 31506 + }, + { + "epoch": 0.9656430060071105, + "grad_norm": 0.7125113374149881, + "learning_rate": 6.18461720464958e-08, + "loss": 0.5293, + "step": 31507 + }, + { + "epoch": 0.9656736545298517, + "grad_norm": 0.6736638919752269, + "learning_rate": 6.173599381128825e-08, + "loss": 0.5248, + "step": 31508 + }, + { + "epoch": 0.9657043030525929, + "grad_norm": 1.9365626333167054, + "learning_rate": 6.162591350033942e-08, + "loss": 0.6202, + "step": 31509 + }, + { + "epoch": 0.9657349515753341, + "grad_norm": 1.7101727161062203, + "learning_rate": 6.151593111473286e-08, + "loss": 0.6198, + "step": 31510 + }, + { + "epoch": 0.9657656000980753, + "grad_norm": 0.690569834252224, + "learning_rate": 6.140604665555327e-08, + "loss": 0.5547, + "step": 31511 + }, + { + "epoch": 0.9657962486208165, + "grad_norm": 1.970950671961889, + "learning_rate": 6.129626012388201e-08, + "loss": 0.5768, + "step": 31512 + }, + { + "epoch": 0.9658268971435576, + "grad_norm": 1.8757530751566998, + "learning_rate": 6.118657152080265e-08, + "loss": 0.6539, + "step": 31513 + }, + { + "epoch": 0.9658575456662989, + "grad_norm": 0.6585909717128724, + "learning_rate": 6.107698084739433e-08, + "loss": 0.5106, + "step": 31514 + }, + { + "epoch": 0.96588819418904, + "grad_norm": 0.6791031513208001, + "learning_rate": 6.09674881047373e-08, + "loss": 0.5504, + "step": 31515 + }, + { + "epoch": 0.9659188427117813, + "grad_norm": 1.8761243332534698, + "learning_rate": 6.085809329391069e-08, + "loss": 0.6296, + "step": 31516 + }, + { + "epoch": 0.9659494912345225, + "grad_norm": 1.6883775105548022, + "learning_rate": 6.074879641599252e-08, + "loss": 0.5681, + "step": 31517 + }, + { + "epoch": 0.9659801397572637, + "grad_norm": 1.585018662709661, + "learning_rate": 6.063959747205972e-08, + "loss": 0.7218, + "step": 31518 + }, + { + "epoch": 0.9660107882800049, + "grad_norm": 1.637680841204943, + "learning_rate": 6.05304964631881e-08, + "loss": 0.5949, + "step": 31519 + }, + { + "epoch": 0.9660414368027461, + "grad_norm": 1.76097399481257, + "learning_rate": 6.042149339045234e-08, + "loss": 0.6391, + "step": 31520 + }, + { + "epoch": 0.9660720853254873, + "grad_norm": 1.8174295902771307, + "learning_rate": 6.031258825492715e-08, + "loss": 0.6751, + "step": 31521 + }, + { + "epoch": 0.9661027338482285, + "grad_norm": 1.7146772254451732, + "learning_rate": 6.020378105768498e-08, + "loss": 0.6977, + "step": 31522 + }, + { + "epoch": 0.9661333823709697, + "grad_norm": 0.6808852636775025, + "learning_rate": 6.009507179979723e-08, + "loss": 0.5013, + "step": 31523 + }, + { + "epoch": 0.966164030893711, + "grad_norm": 1.4544860053464739, + "learning_rate": 5.998646048233747e-08, + "loss": 0.5025, + "step": 31524 + }, + { + "epoch": 0.9661946794164521, + "grad_norm": 0.7011821408683959, + "learning_rate": 5.987794710637374e-08, + "loss": 0.5456, + "step": 31525 + }, + { + "epoch": 0.9662253279391934, + "grad_norm": 1.6869353219487984, + "learning_rate": 5.976953167297628e-08, + "loss": 0.5398, + "step": 31526 + }, + { + "epoch": 0.9662559764619345, + "grad_norm": 1.761852935327699, + "learning_rate": 5.966121418321202e-08, + "loss": 0.6461, + "step": 31527 + }, + { + "epoch": 0.9662866249846758, + "grad_norm": 0.6972212060387929, + "learning_rate": 5.9552994638149006e-08, + "loss": 0.506, + "step": 31528 + }, + { + "epoch": 0.9663172735074169, + "grad_norm": 1.7971847885647272, + "learning_rate": 5.9444873038855264e-08, + "loss": 0.6334, + "step": 31529 + }, + { + "epoch": 0.9663479220301582, + "grad_norm": 1.7389005761872103, + "learning_rate": 5.933684938639328e-08, + "loss": 0.7053, + "step": 31530 + }, + { + "epoch": 0.9663785705528993, + "grad_norm": 1.8648030751425153, + "learning_rate": 5.9228923681828865e-08, + "loss": 0.6393, + "step": 31531 + }, + { + "epoch": 0.9664092190756406, + "grad_norm": 1.8383819823236567, + "learning_rate": 5.9121095926225615e-08, + "loss": 0.6909, + "step": 31532 + }, + { + "epoch": 0.9664398675983817, + "grad_norm": 1.664071193127192, + "learning_rate": 5.901336612064601e-08, + "loss": 0.5733, + "step": 31533 + }, + { + "epoch": 0.966470516121123, + "grad_norm": 1.942293961804067, + "learning_rate": 5.890573426615032e-08, + "loss": 0.695, + "step": 31534 + }, + { + "epoch": 0.9665011646438642, + "grad_norm": 1.8069959389731178, + "learning_rate": 5.8798200363801014e-08, + "loss": 0.6777, + "step": 31535 + }, + { + "epoch": 0.9665318131666054, + "grad_norm": 1.5281518065820763, + "learning_rate": 5.8690764414656155e-08, + "loss": 0.5052, + "step": 31536 + }, + { + "epoch": 0.9665624616893466, + "grad_norm": 1.7119006819639684, + "learning_rate": 5.8583426419774884e-08, + "loss": 0.6203, + "step": 31537 + }, + { + "epoch": 0.9665931102120878, + "grad_norm": 1.6408040318547392, + "learning_rate": 5.847618638021413e-08, + "loss": 0.5359, + "step": 31538 + }, + { + "epoch": 0.966623758734829, + "grad_norm": 1.7301607734499358, + "learning_rate": 5.836904429703194e-08, + "loss": 0.6084, + "step": 31539 + }, + { + "epoch": 0.9666544072575702, + "grad_norm": 0.6950515944700631, + "learning_rate": 5.826200017128303e-08, + "loss": 0.5422, + "step": 31540 + }, + { + "epoch": 0.9666850557803114, + "grad_norm": 1.8923680806970353, + "learning_rate": 5.815505400402211e-08, + "loss": 0.7133, + "step": 31541 + }, + { + "epoch": 0.9667157043030526, + "grad_norm": 1.6377521994863575, + "learning_rate": 5.804820579630388e-08, + "loss": 0.6474, + "step": 31542 + }, + { + "epoch": 0.9667463528257938, + "grad_norm": 1.4758949464559548, + "learning_rate": 5.794145554917974e-08, + "loss": 0.5826, + "step": 31543 + }, + { + "epoch": 0.966777001348535, + "grad_norm": 0.6734980557138984, + "learning_rate": 5.783480326370216e-08, + "loss": 0.5179, + "step": 31544 + }, + { + "epoch": 0.9668076498712762, + "grad_norm": 0.6718104122365092, + "learning_rate": 5.772824894092255e-08, + "loss": 0.5295, + "step": 31545 + }, + { + "epoch": 0.9668382983940174, + "grad_norm": 0.6936785116216969, + "learning_rate": 5.7621792581890047e-08, + "loss": 0.5302, + "step": 31546 + }, + { + "epoch": 0.9668689469167586, + "grad_norm": 0.6564051678666626, + "learning_rate": 5.751543418765382e-08, + "loss": 0.5015, + "step": 31547 + }, + { + "epoch": 0.9668995954394998, + "grad_norm": 0.7086560369466967, + "learning_rate": 5.740917375926192e-08, + "loss": 0.5074, + "step": 31548 + }, + { + "epoch": 0.966930243962241, + "grad_norm": 1.8118487024341274, + "learning_rate": 5.730301129776128e-08, + "loss": 0.6616, + "step": 31549 + }, + { + "epoch": 0.9669608924849822, + "grad_norm": 1.8854281236837116, + "learning_rate": 5.719694680419774e-08, + "loss": 0.5512, + "step": 31550 + }, + { + "epoch": 0.9669915410077234, + "grad_norm": 1.7978146600397213, + "learning_rate": 5.7090980279618233e-08, + "loss": 0.6786, + "step": 31551 + }, + { + "epoch": 0.9670221895304646, + "grad_norm": 0.648319630302696, + "learning_rate": 5.6985111725063044e-08, + "loss": 0.4991, + "step": 31552 + }, + { + "epoch": 0.9670528380532059, + "grad_norm": 1.7965679988040486, + "learning_rate": 5.687934114157912e-08, + "loss": 0.6569, + "step": 31553 + }, + { + "epoch": 0.967083486575947, + "grad_norm": 1.7633103490506588, + "learning_rate": 5.6773668530206715e-08, + "loss": 0.6402, + "step": 31554 + }, + { + "epoch": 0.9671141350986883, + "grad_norm": 0.6657761798780449, + "learning_rate": 5.6668093891987244e-08, + "loss": 0.5085, + "step": 31555 + }, + { + "epoch": 0.9671447836214294, + "grad_norm": 1.7868212264125427, + "learning_rate": 5.6562617227960967e-08, + "loss": 0.6674, + "step": 31556 + }, + { + "epoch": 0.9671754321441707, + "grad_norm": 0.667887357662684, + "learning_rate": 5.645723853916818e-08, + "loss": 0.5166, + "step": 31557 + }, + { + "epoch": 0.9672060806669118, + "grad_norm": 1.5499749931478928, + "learning_rate": 5.635195782664582e-08, + "loss": 0.5804, + "step": 31558 + }, + { + "epoch": 0.9672367291896531, + "grad_norm": 0.6844341636359722, + "learning_rate": 5.624677509143195e-08, + "loss": 0.544, + "step": 31559 + }, + { + "epoch": 0.9672673777123942, + "grad_norm": 1.564521575002133, + "learning_rate": 5.6141690334562405e-08, + "loss": 0.6035, + "step": 31560 + }, + { + "epoch": 0.9672980262351355, + "grad_norm": 1.5722254763094377, + "learning_rate": 5.6036703557074136e-08, + "loss": 0.6325, + "step": 31561 + }, + { + "epoch": 0.9673286747578766, + "grad_norm": 1.9074187192490328, + "learning_rate": 5.5931814759999645e-08, + "loss": 0.6839, + "step": 31562 + }, + { + "epoch": 0.9673593232806179, + "grad_norm": 1.375892592405149, + "learning_rate": 5.582702394437367e-08, + "loss": 0.5065, + "step": 31563 + }, + { + "epoch": 0.9673899718033591, + "grad_norm": 1.735143823885233, + "learning_rate": 5.57223311112276e-08, + "loss": 0.6268, + "step": 31564 + }, + { + "epoch": 0.9674206203261003, + "grad_norm": 1.7991089940068727, + "learning_rate": 5.561773626159395e-08, + "loss": 0.6573, + "step": 31565 + }, + { + "epoch": 0.9674512688488415, + "grad_norm": 1.7013701215880495, + "learning_rate": 5.5513239396504106e-08, + "loss": 0.7065, + "step": 31566 + }, + { + "epoch": 0.9674819173715827, + "grad_norm": 0.6807362740446778, + "learning_rate": 5.540884051698503e-08, + "loss": 0.548, + "step": 31567 + }, + { + "epoch": 0.9675125658943239, + "grad_norm": 1.6637521981716565, + "learning_rate": 5.530453962406812e-08, + "loss": 0.5272, + "step": 31568 + }, + { + "epoch": 0.9675432144170651, + "grad_norm": 2.0480730431757417, + "learning_rate": 5.520033671878033e-08, + "loss": 0.6416, + "step": 31569 + }, + { + "epoch": 0.9675738629398063, + "grad_norm": 1.704169708239062, + "learning_rate": 5.509623180214863e-08, + "loss": 0.6625, + "step": 31570 + }, + { + "epoch": 0.9676045114625476, + "grad_norm": 1.7800072171523504, + "learning_rate": 5.499222487519662e-08, + "loss": 0.6031, + "step": 31571 + }, + { + "epoch": 0.9676351599852887, + "grad_norm": 1.7460388393272819, + "learning_rate": 5.4888315938951275e-08, + "loss": 0.6353, + "step": 31572 + }, + { + "epoch": 0.96766580850803, + "grad_norm": 1.7903305593582954, + "learning_rate": 5.4784504994437324e-08, + "loss": 0.6798, + "step": 31573 + }, + { + "epoch": 0.9676964570307711, + "grad_norm": 0.6594723123487122, + "learning_rate": 5.4680792042673955e-08, + "loss": 0.4926, + "step": 31574 + }, + { + "epoch": 0.9677271055535123, + "grad_norm": 1.8865953473989794, + "learning_rate": 5.4577177084687016e-08, + "loss": 0.6223, + "step": 31575 + }, + { + "epoch": 0.9677577540762535, + "grad_norm": 1.8619036543843637, + "learning_rate": 5.4473660121494574e-08, + "loss": 0.6547, + "step": 31576 + }, + { + "epoch": 0.9677884025989947, + "grad_norm": 0.6907754118423715, + "learning_rate": 5.437024115411915e-08, + "loss": 0.5433, + "step": 31577 + }, + { + "epoch": 0.9678190511217359, + "grad_norm": 1.5250309486144473, + "learning_rate": 5.426692018357882e-08, + "loss": 0.6089, + "step": 31578 + }, + { + "epoch": 0.9678496996444771, + "grad_norm": 1.8352160660851808, + "learning_rate": 5.416369721088943e-08, + "loss": 0.5442, + "step": 31579 + }, + { + "epoch": 0.9678803481672184, + "grad_norm": 0.6758853631311259, + "learning_rate": 5.4060572237071286e-08, + "loss": 0.4777, + "step": 31580 + }, + { + "epoch": 0.9679109966899595, + "grad_norm": 1.7612626767268542, + "learning_rate": 5.3957545263138014e-08, + "loss": 0.6644, + "step": 31581 + }, + { + "epoch": 0.9679416452127008, + "grad_norm": 1.782115413641024, + "learning_rate": 5.385461629010658e-08, + "loss": 0.6423, + "step": 31582 + }, + { + "epoch": 0.9679722937354419, + "grad_norm": 1.7544851140418787, + "learning_rate": 5.37517853189895e-08, + "loss": 0.644, + "step": 31583 + }, + { + "epoch": 0.9680029422581832, + "grad_norm": 1.7316735127188394, + "learning_rate": 5.364905235080154e-08, + "loss": 0.6297, + "step": 31584 + }, + { + "epoch": 0.9680335907809243, + "grad_norm": 1.657360530408477, + "learning_rate": 5.354641738655519e-08, + "loss": 0.5581, + "step": 31585 + }, + { + "epoch": 0.9680642393036656, + "grad_norm": 1.9113119068205149, + "learning_rate": 5.344388042725968e-08, + "loss": 0.6839, + "step": 31586 + }, + { + "epoch": 0.9680948878264067, + "grad_norm": 1.7051770802793258, + "learning_rate": 5.33414414739275e-08, + "loss": 0.6669, + "step": 31587 + }, + { + "epoch": 0.968125536349148, + "grad_norm": 1.798503178333909, + "learning_rate": 5.323910052756676e-08, + "loss": 0.659, + "step": 31588 + }, + { + "epoch": 0.9681561848718891, + "grad_norm": 1.8449290229202309, + "learning_rate": 5.313685758918663e-08, + "loss": 0.6814, + "step": 31589 + }, + { + "epoch": 0.9681868333946304, + "grad_norm": 1.5636017710673935, + "learning_rate": 5.3034712659792985e-08, + "loss": 0.5973, + "step": 31590 + }, + { + "epoch": 0.9682174819173716, + "grad_norm": 2.143308628857476, + "learning_rate": 5.2932665740393905e-08, + "loss": 0.6376, + "step": 31591 + }, + { + "epoch": 0.9682481304401128, + "grad_norm": 1.7362560052324354, + "learning_rate": 5.283071683199414e-08, + "loss": 0.6959, + "step": 31592 + }, + { + "epoch": 0.968278778962854, + "grad_norm": 1.9088910455277943, + "learning_rate": 5.272886593559845e-08, + "loss": 0.7091, + "step": 31593 + }, + { + "epoch": 0.9683094274855952, + "grad_norm": 1.5734121530883558, + "learning_rate": 5.262711305221047e-08, + "loss": 0.6616, + "step": 31594 + }, + { + "epoch": 0.9683400760083364, + "grad_norm": 1.7964756725050512, + "learning_rate": 5.252545818283272e-08, + "loss": 0.6466, + "step": 31595 + }, + { + "epoch": 0.9683707245310776, + "grad_norm": 1.9420791960982493, + "learning_rate": 5.2423901328466643e-08, + "loss": 0.6594, + "step": 31596 + }, + { + "epoch": 0.9684013730538188, + "grad_norm": 1.7572140944659782, + "learning_rate": 5.232244249011253e-08, + "loss": 0.6364, + "step": 31597 + }, + { + "epoch": 0.96843202157656, + "grad_norm": 0.6688901005047582, + "learning_rate": 5.2221081668771824e-08, + "loss": 0.5278, + "step": 31598 + }, + { + "epoch": 0.9684626700993012, + "grad_norm": 0.6537711278081468, + "learning_rate": 5.211981886544148e-08, + "loss": 0.5083, + "step": 31599 + }, + { + "epoch": 0.9684933186220425, + "grad_norm": 0.6794944831817527, + "learning_rate": 5.201865408112072e-08, + "loss": 0.5054, + "step": 31600 + }, + { + "epoch": 0.9685239671447836, + "grad_norm": 1.621374534954329, + "learning_rate": 5.1917587316803186e-08, + "loss": 0.5842, + "step": 31601 + }, + { + "epoch": 0.9685546156675249, + "grad_norm": 1.6343128440926777, + "learning_rate": 5.1816618573489187e-08, + "loss": 0.6283, + "step": 31602 + }, + { + "epoch": 0.968585264190266, + "grad_norm": 1.8225912136365454, + "learning_rate": 5.171574785217015e-08, + "loss": 0.6217, + "step": 31603 + }, + { + "epoch": 0.9686159127130073, + "grad_norm": 1.6371730587056834, + "learning_rate": 5.1614975153841953e-08, + "loss": 0.5819, + "step": 31604 + }, + { + "epoch": 0.9686465612357484, + "grad_norm": 1.7315753055890863, + "learning_rate": 5.151430047949602e-08, + "loss": 0.5686, + "step": 31605 + }, + { + "epoch": 0.9686772097584896, + "grad_norm": 1.6817820693406904, + "learning_rate": 5.141372383012599e-08, + "loss": 0.5844, + "step": 31606 + }, + { + "epoch": 0.9687078582812308, + "grad_norm": 0.6836450135818868, + "learning_rate": 5.131324520672221e-08, + "loss": 0.5138, + "step": 31607 + }, + { + "epoch": 0.968738506803972, + "grad_norm": 2.167662114155139, + "learning_rate": 5.121286461027275e-08, + "loss": 0.6384, + "step": 31608 + }, + { + "epoch": 0.9687691553267133, + "grad_norm": 1.5307233059885528, + "learning_rate": 5.111258204177017e-08, + "loss": 0.6406, + "step": 31609 + }, + { + "epoch": 0.9687998038494544, + "grad_norm": 0.6465141726848013, + "learning_rate": 5.1012397502200327e-08, + "loss": 0.5024, + "step": 31610 + }, + { + "epoch": 0.9688304523721957, + "grad_norm": 1.7107820957093303, + "learning_rate": 5.091231099255023e-08, + "loss": 0.5797, + "step": 31611 + }, + { + "epoch": 0.9688611008949368, + "grad_norm": 1.8843471897710917, + "learning_rate": 5.0812322513807964e-08, + "loss": 0.6092, + "step": 31612 + }, + { + "epoch": 0.9688917494176781, + "grad_norm": 1.9257706926927562, + "learning_rate": 5.0712432066957197e-08, + "loss": 0.6189, + "step": 31613 + }, + { + "epoch": 0.9689223979404192, + "grad_norm": 1.7601520935966757, + "learning_rate": 5.0612639652981576e-08, + "loss": 0.6482, + "step": 31614 + }, + { + "epoch": 0.9689530464631605, + "grad_norm": 0.6639416098764425, + "learning_rate": 5.0512945272865876e-08, + "loss": 0.5353, + "step": 31615 + }, + { + "epoch": 0.9689836949859016, + "grad_norm": 1.9446036392076147, + "learning_rate": 5.041334892759153e-08, + "loss": 0.72, + "step": 31616 + }, + { + "epoch": 0.9690143435086429, + "grad_norm": 1.7894531952524835, + "learning_rate": 5.031385061814109e-08, + "loss": 0.5195, + "step": 31617 + }, + { + "epoch": 0.9690449920313841, + "grad_norm": 1.6805722696946048, + "learning_rate": 5.021445034549266e-08, + "loss": 0.6974, + "step": 31618 + }, + { + "epoch": 0.9690756405541253, + "grad_norm": 1.8946819890565378, + "learning_rate": 5.0115148110627674e-08, + "loss": 0.5434, + "step": 31619 + }, + { + "epoch": 0.9691062890768665, + "grad_norm": 1.9280962388472007, + "learning_rate": 5.001594391452424e-08, + "loss": 0.7404, + "step": 31620 + }, + { + "epoch": 0.9691369375996077, + "grad_norm": 1.681654729466018, + "learning_rate": 4.9916837758159366e-08, + "loss": 0.64, + "step": 31621 + }, + { + "epoch": 0.9691675861223489, + "grad_norm": 1.821844229849899, + "learning_rate": 4.981782964251003e-08, + "loss": 0.725, + "step": 31622 + }, + { + "epoch": 0.9691982346450901, + "grad_norm": 1.8020577814433936, + "learning_rate": 4.9718919568551014e-08, + "loss": 0.6311, + "step": 31623 + }, + { + "epoch": 0.9692288831678313, + "grad_norm": 1.739807478336511, + "learning_rate": 4.96201075372571e-08, + "loss": 0.6172, + "step": 31624 + }, + { + "epoch": 0.9692595316905726, + "grad_norm": 1.799245184381534, + "learning_rate": 4.952139354960195e-08, + "loss": 0.6205, + "step": 31625 + }, + { + "epoch": 0.9692901802133137, + "grad_norm": 1.626018160438453, + "learning_rate": 4.9422777606559225e-08, + "loss": 0.5795, + "step": 31626 + }, + { + "epoch": 0.969320828736055, + "grad_norm": 1.6598042002452293, + "learning_rate": 4.932425970909926e-08, + "loss": 0.5393, + "step": 31627 + }, + { + "epoch": 0.9693514772587961, + "grad_norm": 0.6799788803746845, + "learning_rate": 4.922583985819351e-08, + "loss": 0.4853, + "step": 31628 + }, + { + "epoch": 0.9693821257815374, + "grad_norm": 1.8101022542968443, + "learning_rate": 4.912751805481231e-08, + "loss": 0.7041, + "step": 31629 + }, + { + "epoch": 0.9694127743042785, + "grad_norm": 0.6828158193416775, + "learning_rate": 4.9029294299923755e-08, + "loss": 0.4996, + "step": 31630 + }, + { + "epoch": 0.9694434228270198, + "grad_norm": 1.8660184491935194, + "learning_rate": 4.893116859449487e-08, + "loss": 0.7275, + "step": 31631 + }, + { + "epoch": 0.9694740713497609, + "grad_norm": 1.6711460489659253, + "learning_rate": 4.883314093949265e-08, + "loss": 0.6136, + "step": 31632 + }, + { + "epoch": 0.9695047198725022, + "grad_norm": 2.0745588737571037, + "learning_rate": 4.8735211335885215e-08, + "loss": 0.5969, + "step": 31633 + }, + { + "epoch": 0.9695353683952433, + "grad_norm": 1.652403397229148, + "learning_rate": 4.863737978463512e-08, + "loss": 0.6069, + "step": 31634 + }, + { + "epoch": 0.9695660169179846, + "grad_norm": 1.8952617059189156, + "learning_rate": 4.8539646286707156e-08, + "loss": 0.7446, + "step": 31635 + }, + { + "epoch": 0.9695966654407258, + "grad_norm": 1.71930367510943, + "learning_rate": 4.844201084306388e-08, + "loss": 0.7238, + "step": 31636 + }, + { + "epoch": 0.9696273139634669, + "grad_norm": 1.6103467475787803, + "learning_rate": 4.8344473454667865e-08, + "loss": 0.6381, + "step": 31637 + }, + { + "epoch": 0.9696579624862082, + "grad_norm": 1.737624387995759, + "learning_rate": 4.824703412247944e-08, + "loss": 0.567, + "step": 31638 + }, + { + "epoch": 0.9696886110089493, + "grad_norm": 1.5522888534580062, + "learning_rate": 4.814969284746007e-08, + "loss": 0.6389, + "step": 31639 + }, + { + "epoch": 0.9697192595316906, + "grad_norm": 1.6474321716078912, + "learning_rate": 4.8052449630567874e-08, + "loss": 0.5366, + "step": 31640 + }, + { + "epoch": 0.9697499080544317, + "grad_norm": 1.8084861032854265, + "learning_rate": 4.7955304472760977e-08, + "loss": 0.6941, + "step": 31641 + }, + { + "epoch": 0.969780556577173, + "grad_norm": 2.0146439642525107, + "learning_rate": 4.7858257374997497e-08, + "loss": 0.5956, + "step": 31642 + }, + { + "epoch": 0.9698112050999141, + "grad_norm": 1.758277462832794, + "learning_rate": 4.7761308338232226e-08, + "loss": 0.731, + "step": 31643 + }, + { + "epoch": 0.9698418536226554, + "grad_norm": 1.6717640152560844, + "learning_rate": 4.766445736342107e-08, + "loss": 0.5844, + "step": 31644 + }, + { + "epoch": 0.9698725021453966, + "grad_norm": 1.797145473226257, + "learning_rate": 4.756770445151992e-08, + "loss": 0.5654, + "step": 31645 + }, + { + "epoch": 0.9699031506681378, + "grad_norm": 1.66966453267884, + "learning_rate": 4.7471049603478034e-08, + "loss": 0.6868, + "step": 31646 + }, + { + "epoch": 0.969933799190879, + "grad_norm": 1.7466716379059797, + "learning_rate": 4.7374492820252415e-08, + "loss": 0.6511, + "step": 31647 + }, + { + "epoch": 0.9699644477136202, + "grad_norm": 2.0286240237854445, + "learning_rate": 4.7278034102792305e-08, + "loss": 0.687, + "step": 31648 + }, + { + "epoch": 0.9699950962363614, + "grad_norm": 1.5331259180139492, + "learning_rate": 4.718167345204805e-08, + "loss": 0.5137, + "step": 31649 + }, + { + "epoch": 0.9700257447591026, + "grad_norm": 1.6140880634238453, + "learning_rate": 4.7085410868968896e-08, + "loss": 0.5849, + "step": 31650 + }, + { + "epoch": 0.9700563932818438, + "grad_norm": 1.6909162770373865, + "learning_rate": 4.6989246354504084e-08, + "loss": 0.6083, + "step": 31651 + }, + { + "epoch": 0.970087041804585, + "grad_norm": 1.9186709604594556, + "learning_rate": 4.6893179909599515e-08, + "loss": 0.6382, + "step": 31652 + }, + { + "epoch": 0.9701176903273262, + "grad_norm": 1.6003068924244013, + "learning_rate": 4.679721153520445e-08, + "loss": 0.6733, + "step": 31653 + }, + { + "epoch": 0.9701483388500675, + "grad_norm": 1.7216621073412155, + "learning_rate": 4.670134123226255e-08, + "loss": 0.67, + "step": 31654 + }, + { + "epoch": 0.9701789873728086, + "grad_norm": 1.6776045403694408, + "learning_rate": 4.6605569001719754e-08, + "loss": 0.6219, + "step": 31655 + }, + { + "epoch": 0.9702096358955499, + "grad_norm": 1.6205026409740269, + "learning_rate": 4.650989484451862e-08, + "loss": 0.6405, + "step": 31656 + }, + { + "epoch": 0.970240284418291, + "grad_norm": 1.6128984965670237, + "learning_rate": 4.641431876160174e-08, + "loss": 0.687, + "step": 31657 + }, + { + "epoch": 0.9702709329410323, + "grad_norm": 1.7771447205851887, + "learning_rate": 4.631884075391169e-08, + "loss": 0.6157, + "step": 31658 + }, + { + "epoch": 0.9703015814637734, + "grad_norm": 1.8556845721725534, + "learning_rate": 4.622346082238882e-08, + "loss": 0.6675, + "step": 31659 + }, + { + "epoch": 0.9703322299865147, + "grad_norm": 1.7157084665752365, + "learning_rate": 4.612817896797239e-08, + "loss": 0.5888, + "step": 31660 + }, + { + "epoch": 0.9703628785092558, + "grad_norm": 1.717871168334229, + "learning_rate": 4.603299519160276e-08, + "loss": 0.6782, + "step": 31661 + }, + { + "epoch": 0.9703935270319971, + "grad_norm": 0.6465899858601812, + "learning_rate": 4.593790949421695e-08, + "loss": 0.5215, + "step": 31662 + }, + { + "epoch": 0.9704241755547383, + "grad_norm": 1.8058251836933366, + "learning_rate": 4.584292187675088e-08, + "loss": 0.6, + "step": 31663 + }, + { + "epoch": 0.9704548240774795, + "grad_norm": 0.6869462092910932, + "learning_rate": 4.574803234014158e-08, + "loss": 0.5096, + "step": 31664 + }, + { + "epoch": 0.9704854726002207, + "grad_norm": 1.991300335008846, + "learning_rate": 4.5653240885323855e-08, + "loss": 0.7193, + "step": 31665 + }, + { + "epoch": 0.9705161211229619, + "grad_norm": 0.7288602981955903, + "learning_rate": 4.555854751323252e-08, + "loss": 0.5337, + "step": 31666 + }, + { + "epoch": 0.9705467696457031, + "grad_norm": 2.0114088944449793, + "learning_rate": 4.5463952224799044e-08, + "loss": 0.5866, + "step": 31667 + }, + { + "epoch": 0.9705774181684442, + "grad_norm": 2.030642737551774, + "learning_rate": 4.536945502095602e-08, + "loss": 0.5861, + "step": 31668 + }, + { + "epoch": 0.9706080666911855, + "grad_norm": 1.6765941687281722, + "learning_rate": 4.5275055902634924e-08, + "loss": 0.7288, + "step": 31669 + }, + { + "epoch": 0.9706387152139266, + "grad_norm": 1.6909554382583494, + "learning_rate": 4.5180754870766116e-08, + "loss": 0.5201, + "step": 31670 + }, + { + "epoch": 0.9706693637366679, + "grad_norm": 1.815543846368965, + "learning_rate": 4.5086551926277754e-08, + "loss": 0.5945, + "step": 31671 + }, + { + "epoch": 0.970700012259409, + "grad_norm": 1.6533221837961058, + "learning_rate": 4.499244707009909e-08, + "loss": 0.6454, + "step": 31672 + }, + { + "epoch": 0.9707306607821503, + "grad_norm": 1.7101266222542915, + "learning_rate": 4.489844030315604e-08, + "loss": 0.5717, + "step": 31673 + }, + { + "epoch": 0.9707613093048915, + "grad_norm": 0.6728767691687544, + "learning_rate": 4.4804531626377876e-08, + "loss": 0.5238, + "step": 31674 + }, + { + "epoch": 0.9707919578276327, + "grad_norm": 1.6851533359955546, + "learning_rate": 4.471072104068608e-08, + "loss": 0.6381, + "step": 31675 + }, + { + "epoch": 0.9708226063503739, + "grad_norm": 1.7568005532858773, + "learning_rate": 4.461700854700657e-08, + "loss": 0.5554, + "step": 31676 + }, + { + "epoch": 0.9708532548731151, + "grad_norm": 1.9112885113749358, + "learning_rate": 4.452339414626417e-08, + "loss": 0.6341, + "step": 31677 + }, + { + "epoch": 0.9708839033958563, + "grad_norm": 1.975440806128264, + "learning_rate": 4.442987783937924e-08, + "loss": 0.7219, + "step": 31678 + }, + { + "epoch": 0.9709145519185975, + "grad_norm": 1.685147249211554, + "learning_rate": 4.4336459627274396e-08, + "loss": 0.6461, + "step": 31679 + }, + { + "epoch": 0.9709452004413387, + "grad_norm": 1.4611952486812538, + "learning_rate": 4.424313951086889e-08, + "loss": 0.5731, + "step": 31680 + }, + { + "epoch": 0.97097584896408, + "grad_norm": 1.6369985516593342, + "learning_rate": 4.4149917491083106e-08, + "loss": 0.6602, + "step": 31681 + }, + { + "epoch": 0.9710064974868211, + "grad_norm": 1.959673933439422, + "learning_rate": 4.40567935688363e-08, + "loss": 0.623, + "step": 31682 + }, + { + "epoch": 0.9710371460095624, + "grad_norm": 1.8039683777625521, + "learning_rate": 4.396376774504441e-08, + "loss": 0.6726, + "step": 31683 + }, + { + "epoch": 0.9710677945323035, + "grad_norm": 0.6899694664949301, + "learning_rate": 4.387084002062447e-08, + "loss": 0.5452, + "step": 31684 + }, + { + "epoch": 0.9710984430550448, + "grad_norm": 1.628012137070674, + "learning_rate": 4.377801039649354e-08, + "loss": 0.6754, + "step": 31685 + }, + { + "epoch": 0.9711290915777859, + "grad_norm": 1.5980134795741008, + "learning_rate": 4.368527887356533e-08, + "loss": 0.6211, + "step": 31686 + }, + { + "epoch": 0.9711597401005272, + "grad_norm": 1.785759391837468, + "learning_rate": 4.3592645452753544e-08, + "loss": 0.5981, + "step": 31687 + }, + { + "epoch": 0.9711903886232683, + "grad_norm": 1.771162906561493, + "learning_rate": 4.35001101349708e-08, + "loss": 0.6774, + "step": 31688 + }, + { + "epoch": 0.9712210371460096, + "grad_norm": 1.9422546330040185, + "learning_rate": 4.340767292112857e-08, + "loss": 0.6008, + "step": 31689 + }, + { + "epoch": 0.9712516856687508, + "grad_norm": 1.800500386076776, + "learning_rate": 4.331533381213837e-08, + "loss": 0.7184, + "step": 31690 + }, + { + "epoch": 0.971282334191492, + "grad_norm": 1.621607360682048, + "learning_rate": 4.322309280890946e-08, + "loss": 0.6336, + "step": 31691 + }, + { + "epoch": 0.9713129827142332, + "grad_norm": 1.7051364787860321, + "learning_rate": 4.3130949912350005e-08, + "loss": 0.6618, + "step": 31692 + }, + { + "epoch": 0.9713436312369744, + "grad_norm": 1.8226838430959187, + "learning_rate": 4.303890512337039e-08, + "loss": 0.6435, + "step": 31693 + }, + { + "epoch": 0.9713742797597156, + "grad_norm": 1.7898556951492437, + "learning_rate": 4.294695844287544e-08, + "loss": 0.6618, + "step": 31694 + }, + { + "epoch": 0.9714049282824568, + "grad_norm": 0.6545624909424212, + "learning_rate": 4.285510987177221e-08, + "loss": 0.549, + "step": 31695 + }, + { + "epoch": 0.971435576805198, + "grad_norm": 1.8836571601731509, + "learning_rate": 4.2763359410964434e-08, + "loss": 0.5608, + "step": 31696 + }, + { + "epoch": 0.9714662253279392, + "grad_norm": 1.623748831802071, + "learning_rate": 4.267170706135804e-08, + "loss": 0.7083, + "step": 31697 + }, + { + "epoch": 0.9714968738506804, + "grad_norm": 1.9102818158061254, + "learning_rate": 4.258015282385342e-08, + "loss": 0.6755, + "step": 31698 + }, + { + "epoch": 0.9715275223734215, + "grad_norm": 1.7595096013992628, + "learning_rate": 4.24886966993554e-08, + "loss": 0.6427, + "step": 31699 + }, + { + "epoch": 0.9715581708961628, + "grad_norm": 1.6958469106468692, + "learning_rate": 4.239733868876439e-08, + "loss": 0.6338, + "step": 31700 + }, + { + "epoch": 0.971588819418904, + "grad_norm": 1.5438310768126873, + "learning_rate": 4.230607879297855e-08, + "loss": 0.6054, + "step": 31701 + }, + { + "epoch": 0.9716194679416452, + "grad_norm": 1.6262063416641008, + "learning_rate": 4.221491701290048e-08, + "loss": 0.6174, + "step": 31702 + }, + { + "epoch": 0.9716501164643864, + "grad_norm": 1.5775865090841235, + "learning_rate": 4.2123853349425036e-08, + "loss": 0.5191, + "step": 31703 + }, + { + "epoch": 0.9716807649871276, + "grad_norm": 1.7638541779286991, + "learning_rate": 4.20328878034526e-08, + "loss": 0.6417, + "step": 31704 + }, + { + "epoch": 0.9717114135098688, + "grad_norm": 1.7720980648665845, + "learning_rate": 4.194202037587691e-08, + "loss": 0.7286, + "step": 31705 + }, + { + "epoch": 0.97174206203261, + "grad_norm": 1.7186399445891576, + "learning_rate": 4.185125106759502e-08, + "loss": 0.5897, + "step": 31706 + }, + { + "epoch": 0.9717727105553512, + "grad_norm": 0.7016377735678487, + "learning_rate": 4.176057987950066e-08, + "loss": 0.5532, + "step": 31707 + }, + { + "epoch": 0.9718033590780925, + "grad_norm": 1.7735174575679185, + "learning_rate": 4.1670006812486454e-08, + "loss": 0.615, + "step": 31708 + }, + { + "epoch": 0.9718340076008336, + "grad_norm": 1.7087889748338518, + "learning_rate": 4.157953186744612e-08, + "loss": 0.6238, + "step": 31709 + }, + { + "epoch": 0.9718646561235749, + "grad_norm": 1.7290583907517927, + "learning_rate": 4.148915504527118e-08, + "loss": 0.6635, + "step": 31710 + }, + { + "epoch": 0.971895304646316, + "grad_norm": 1.7115298539476709, + "learning_rate": 4.13988763468498e-08, + "loss": 0.5757, + "step": 31711 + }, + { + "epoch": 0.9719259531690573, + "grad_norm": 1.8936737040457274, + "learning_rate": 4.130869577307572e-08, + "loss": 0.5755, + "step": 31712 + }, + { + "epoch": 0.9719566016917984, + "grad_norm": 1.805594172674365, + "learning_rate": 4.121861332483379e-08, + "loss": 0.622, + "step": 31713 + }, + { + "epoch": 0.9719872502145397, + "grad_norm": 2.0072693877576993, + "learning_rate": 4.1128629003012176e-08, + "loss": 0.6356, + "step": 31714 + }, + { + "epoch": 0.9720178987372808, + "grad_norm": 1.6632844844106978, + "learning_rate": 4.103874280850018e-08, + "loss": 0.6013, + "step": 31715 + }, + { + "epoch": 0.9720485472600221, + "grad_norm": 1.7289918644098798, + "learning_rate": 4.0948954742180416e-08, + "loss": 0.594, + "step": 31716 + }, + { + "epoch": 0.9720791957827633, + "grad_norm": 1.5769897852768628, + "learning_rate": 4.085926480493774e-08, + "loss": 0.5623, + "step": 31717 + }, + { + "epoch": 0.9721098443055045, + "grad_norm": 0.7124981139268732, + "learning_rate": 4.0769672997659217e-08, + "loss": 0.5628, + "step": 31718 + }, + { + "epoch": 0.9721404928282457, + "grad_norm": 1.6653893516888736, + "learning_rate": 4.0680179321223036e-08, + "loss": 0.6124, + "step": 31719 + }, + { + "epoch": 0.9721711413509869, + "grad_norm": 1.9762620379054743, + "learning_rate": 4.0590783776515154e-08, + "loss": 0.6484, + "step": 31720 + }, + { + "epoch": 0.9722017898737281, + "grad_norm": 1.787823625307929, + "learning_rate": 4.050148636441375e-08, + "loss": 0.6808, + "step": 31721 + }, + { + "epoch": 0.9722324383964693, + "grad_norm": 1.8623097626754759, + "learning_rate": 4.041228708579925e-08, + "loss": 0.6474, + "step": 31722 + }, + { + "epoch": 0.9722630869192105, + "grad_norm": 1.7381915269337187, + "learning_rate": 4.032318594155094e-08, + "loss": 0.6749, + "step": 31723 + }, + { + "epoch": 0.9722937354419517, + "grad_norm": 1.90139968884063, + "learning_rate": 4.0234182932545886e-08, + "loss": 0.622, + "step": 31724 + }, + { + "epoch": 0.9723243839646929, + "grad_norm": 1.800856936627193, + "learning_rate": 4.0145278059662283e-08, + "loss": 0.6457, + "step": 31725 + }, + { + "epoch": 0.9723550324874342, + "grad_norm": 1.6043902609659255, + "learning_rate": 4.00564713237761e-08, + "loss": 0.5917, + "step": 31726 + }, + { + "epoch": 0.9723856810101753, + "grad_norm": 1.6582360672392602, + "learning_rate": 3.9967762725761084e-08, + "loss": 0.593, + "step": 31727 + }, + { + "epoch": 0.9724163295329166, + "grad_norm": 2.0527305859407763, + "learning_rate": 3.987915226649208e-08, + "loss": 0.6066, + "step": 31728 + }, + { + "epoch": 0.9724469780556577, + "grad_norm": 0.6873631345747736, + "learning_rate": 3.9790639946842846e-08, + "loss": 0.521, + "step": 31729 + }, + { + "epoch": 0.9724776265783989, + "grad_norm": 1.8653079717179553, + "learning_rate": 3.97022257676849e-08, + "loss": 0.5803, + "step": 31730 + }, + { + "epoch": 0.9725082751011401, + "grad_norm": 1.850781845733335, + "learning_rate": 3.9613909729888655e-08, + "loss": 0.6146, + "step": 31731 + }, + { + "epoch": 0.9725389236238813, + "grad_norm": 1.8226537202678885, + "learning_rate": 3.952569183432564e-08, + "loss": 0.6105, + "step": 31732 + }, + { + "epoch": 0.9725695721466225, + "grad_norm": 0.697678061577626, + "learning_rate": 3.943757208186405e-08, + "loss": 0.5002, + "step": 31733 + }, + { + "epoch": 0.9726002206693637, + "grad_norm": 0.6900332796923371, + "learning_rate": 3.934955047337319e-08, + "loss": 0.5076, + "step": 31734 + }, + { + "epoch": 0.972630869192105, + "grad_norm": 1.812841944805589, + "learning_rate": 3.926162700971903e-08, + "loss": 0.6272, + "step": 31735 + }, + { + "epoch": 0.9726615177148461, + "grad_norm": 1.7116660156791583, + "learning_rate": 3.9173801691768655e-08, + "loss": 0.6304, + "step": 31736 + }, + { + "epoch": 0.9726921662375874, + "grad_norm": 1.7945540890401377, + "learning_rate": 3.908607452038804e-08, + "loss": 0.6115, + "step": 31737 + }, + { + "epoch": 0.9727228147603285, + "grad_norm": 1.7567378616445213, + "learning_rate": 3.899844549643983e-08, + "loss": 0.5783, + "step": 31738 + }, + { + "epoch": 0.9727534632830698, + "grad_norm": 1.8600085612821506, + "learning_rate": 3.891091462078889e-08, + "loss": 0.7663, + "step": 31739 + }, + { + "epoch": 0.9727841118058109, + "grad_norm": 1.7172006043697, + "learning_rate": 3.882348189429896e-08, + "loss": 0.6134, + "step": 31740 + }, + { + "epoch": 0.9728147603285522, + "grad_norm": 1.6648459508264515, + "learning_rate": 3.873614731782826e-08, + "loss": 0.5516, + "step": 31741 + }, + { + "epoch": 0.9728454088512933, + "grad_norm": 1.6976504028825565, + "learning_rate": 3.8648910892239435e-08, + "loss": 0.541, + "step": 31742 + }, + { + "epoch": 0.9728760573740346, + "grad_norm": 2.0816096464808904, + "learning_rate": 3.856177261839178e-08, + "loss": 0.6641, + "step": 31743 + }, + { + "epoch": 0.9729067058967757, + "grad_norm": 1.6962087879182688, + "learning_rate": 3.847473249714351e-08, + "loss": 0.5709, + "step": 31744 + }, + { + "epoch": 0.972937354419517, + "grad_norm": 0.6892019416543042, + "learning_rate": 3.838779052935282e-08, + "loss": 0.5357, + "step": 31745 + }, + { + "epoch": 0.9729680029422582, + "grad_norm": 1.7903971186988261, + "learning_rate": 3.8300946715875695e-08, + "loss": 0.5509, + "step": 31746 + }, + { + "epoch": 0.9729986514649994, + "grad_norm": 1.5863802105859535, + "learning_rate": 3.8214201057568126e-08, + "loss": 0.5594, + "step": 31747 + }, + { + "epoch": 0.9730292999877406, + "grad_norm": 1.8525287668547616, + "learning_rate": 3.812755355528497e-08, + "loss": 0.5356, + "step": 31748 + }, + { + "epoch": 0.9730599485104818, + "grad_norm": 0.7117705771401515, + "learning_rate": 3.804100420987999e-08, + "loss": 0.5222, + "step": 31749 + }, + { + "epoch": 0.973090597033223, + "grad_norm": 1.776058539714462, + "learning_rate": 3.7954553022205853e-08, + "loss": 0.6901, + "step": 31750 + }, + { + "epoch": 0.9731212455559642, + "grad_norm": 0.6799418232844859, + "learning_rate": 3.786819999311409e-08, + "loss": 0.5245, + "step": 31751 + }, + { + "epoch": 0.9731518940787054, + "grad_norm": 1.74173758223837, + "learning_rate": 3.778194512345623e-08, + "loss": 0.6912, + "step": 31752 + }, + { + "epoch": 0.9731825426014467, + "grad_norm": 2.0228415894761254, + "learning_rate": 3.769578841408161e-08, + "loss": 0.6763, + "step": 31753 + }, + { + "epoch": 0.9732131911241878, + "grad_norm": 1.7731222054670221, + "learning_rate": 3.760972986583955e-08, + "loss": 0.7094, + "step": 31754 + }, + { + "epoch": 0.9732438396469291, + "grad_norm": 1.8891698775834904, + "learning_rate": 3.7523769479577146e-08, + "loss": 0.7063, + "step": 31755 + }, + { + "epoch": 0.9732744881696702, + "grad_norm": 1.8166234768542722, + "learning_rate": 3.7437907256142605e-08, + "loss": 0.6609, + "step": 31756 + }, + { + "epoch": 0.9733051366924115, + "grad_norm": 1.7665850881435134, + "learning_rate": 3.735214319638192e-08, + "loss": 0.6108, + "step": 31757 + }, + { + "epoch": 0.9733357852151526, + "grad_norm": 1.850235743604127, + "learning_rate": 3.726647730113886e-08, + "loss": 0.5821, + "step": 31758 + }, + { + "epoch": 0.9733664337378939, + "grad_norm": 1.6413957650137034, + "learning_rate": 3.7180909571258304e-08, + "loss": 0.6437, + "step": 31759 + }, + { + "epoch": 0.973397082260635, + "grad_norm": 1.666258205812335, + "learning_rate": 3.7095440007584026e-08, + "loss": 0.6323, + "step": 31760 + }, + { + "epoch": 0.9734277307833762, + "grad_norm": 0.6874288444263181, + "learning_rate": 3.701006861095646e-08, + "loss": 0.5018, + "step": 31761 + }, + { + "epoch": 0.9734583793061174, + "grad_norm": 1.9415862196641074, + "learning_rate": 3.692479538221827e-08, + "loss": 0.6497, + "step": 31762 + }, + { + "epoch": 0.9734890278288586, + "grad_norm": 1.7080588430031627, + "learning_rate": 3.683962032220989e-08, + "loss": 0.5874, + "step": 31763 + }, + { + "epoch": 0.9735196763515999, + "grad_norm": 1.7136573991439412, + "learning_rate": 3.675454343176954e-08, + "loss": 0.5537, + "step": 31764 + }, + { + "epoch": 0.973550324874341, + "grad_norm": 1.953630579331653, + "learning_rate": 3.666956471173544e-08, + "loss": 0.6856, + "step": 31765 + }, + { + "epoch": 0.9735809733970823, + "grad_norm": 0.667402099403243, + "learning_rate": 3.658468416294469e-08, + "loss": 0.5073, + "step": 31766 + }, + { + "epoch": 0.9736116219198234, + "grad_norm": 1.5113555746670266, + "learning_rate": 3.6499901786235524e-08, + "loss": 0.6115, + "step": 31767 + }, + { + "epoch": 0.9736422704425647, + "grad_norm": 1.5808283370196818, + "learning_rate": 3.641521758244171e-08, + "loss": 0.5379, + "step": 31768 + }, + { + "epoch": 0.9736729189653058, + "grad_norm": 1.6518435998270182, + "learning_rate": 3.633063155239813e-08, + "loss": 0.6852, + "step": 31769 + }, + { + "epoch": 0.9737035674880471, + "grad_norm": 1.6863410189824681, + "learning_rate": 3.624614369693857e-08, + "loss": 0.708, + "step": 31770 + }, + { + "epoch": 0.9737342160107882, + "grad_norm": 0.6716866749118482, + "learning_rate": 3.616175401689459e-08, + "loss": 0.5365, + "step": 31771 + }, + { + "epoch": 0.9737648645335295, + "grad_norm": 1.7054269977451046, + "learning_rate": 3.607746251309885e-08, + "loss": 0.6597, + "step": 31772 + }, + { + "epoch": 0.9737955130562707, + "grad_norm": 1.7751361748793422, + "learning_rate": 3.5993269186379574e-08, + "loss": 0.6666, + "step": 31773 + }, + { + "epoch": 0.9738261615790119, + "grad_norm": 1.4569398125973998, + "learning_rate": 3.590917403756944e-08, + "loss": 0.5682, + "step": 31774 + }, + { + "epoch": 0.9738568101017531, + "grad_norm": 1.7911839971592232, + "learning_rate": 3.5825177067495554e-08, + "loss": 0.7053, + "step": 31775 + }, + { + "epoch": 0.9738874586244943, + "grad_norm": 1.6851769004754429, + "learning_rate": 3.574127827698504e-08, + "loss": 0.5998, + "step": 31776 + }, + { + "epoch": 0.9739181071472355, + "grad_norm": 1.9123585061285486, + "learning_rate": 3.565747766686611e-08, + "loss": 0.7008, + "step": 31777 + }, + { + "epoch": 0.9739487556699767, + "grad_norm": 1.7372822243540824, + "learning_rate": 3.5573775237962573e-08, + "loss": 0.5917, + "step": 31778 + }, + { + "epoch": 0.9739794041927179, + "grad_norm": 1.6646516301590444, + "learning_rate": 3.549017099110042e-08, + "loss": 0.6977, + "step": 31779 + }, + { + "epoch": 0.9740100527154592, + "grad_norm": 1.9342184102250426, + "learning_rate": 3.540666492710343e-08, + "loss": 0.6144, + "step": 31780 + }, + { + "epoch": 0.9740407012382003, + "grad_norm": 1.7629000253304956, + "learning_rate": 3.532325704679429e-08, + "loss": 0.6665, + "step": 31781 + }, + { + "epoch": 0.9740713497609416, + "grad_norm": 1.7953727090565734, + "learning_rate": 3.5239947350993456e-08, + "loss": 0.6752, + "step": 31782 + }, + { + "epoch": 0.9741019982836827, + "grad_norm": 1.7635262099761764, + "learning_rate": 3.5156735840524703e-08, + "loss": 0.6593, + "step": 31783 + }, + { + "epoch": 0.974132646806424, + "grad_norm": 1.733962326937424, + "learning_rate": 3.5073622516205164e-08, + "loss": 0.7363, + "step": 31784 + }, + { + "epoch": 0.9741632953291651, + "grad_norm": 1.6021172776098391, + "learning_rate": 3.499060737885529e-08, + "loss": 0.6692, + "step": 31785 + }, + { + "epoch": 0.9741939438519064, + "grad_norm": 1.8917044873282365, + "learning_rate": 3.4907690429292204e-08, + "loss": 0.6819, + "step": 31786 + }, + { + "epoch": 0.9742245923746475, + "grad_norm": 1.6880136896639708, + "learning_rate": 3.482487166833304e-08, + "loss": 0.6888, + "step": 31787 + }, + { + "epoch": 0.9742552408973888, + "grad_norm": 1.9971486663788238, + "learning_rate": 3.474215109679491e-08, + "loss": 0.6121, + "step": 31788 + }, + { + "epoch": 0.97428588942013, + "grad_norm": 1.6585887396083872, + "learning_rate": 3.4659528715492717e-08, + "loss": 0.6833, + "step": 31789 + }, + { + "epoch": 0.9743165379428712, + "grad_norm": 1.6886518440401619, + "learning_rate": 3.4577004525238044e-08, + "loss": 0.6656, + "step": 31790 + }, + { + "epoch": 0.9743471864656124, + "grad_norm": 1.8833969278819065, + "learning_rate": 3.44945785268469e-08, + "loss": 0.574, + "step": 31791 + }, + { + "epoch": 0.9743778349883535, + "grad_norm": 1.9694286922845914, + "learning_rate": 3.4412250721130854e-08, + "loss": 0.6511, + "step": 31792 + }, + { + "epoch": 0.9744084835110948, + "grad_norm": 1.9360267239558275, + "learning_rate": 3.433002110890038e-08, + "loss": 0.6872, + "step": 31793 + }, + { + "epoch": 0.9744391320338359, + "grad_norm": 1.9994864416321665, + "learning_rate": 3.4247889690965927e-08, + "loss": 0.618, + "step": 31794 + }, + { + "epoch": 0.9744697805565772, + "grad_norm": 0.6650314923402075, + "learning_rate": 3.416585646813686e-08, + "loss": 0.5181, + "step": 31795 + }, + { + "epoch": 0.9745004290793183, + "grad_norm": 1.9604007167403914, + "learning_rate": 3.4083921441221415e-08, + "loss": 0.5392, + "step": 31796 + }, + { + "epoch": 0.9745310776020596, + "grad_norm": 1.6058693318890536, + "learning_rate": 3.400208461102672e-08, + "loss": 0.5575, + "step": 31797 + }, + { + "epoch": 0.9745617261248007, + "grad_norm": 1.6735792515527808, + "learning_rate": 3.3920345978359916e-08, + "loss": 0.5644, + "step": 31798 + }, + { + "epoch": 0.974592374647542, + "grad_norm": 1.7963541864679229, + "learning_rate": 3.383870554402591e-08, + "loss": 0.7189, + "step": 31799 + }, + { + "epoch": 0.9746230231702832, + "grad_norm": 1.8759476731800064, + "learning_rate": 3.37571633088285e-08, + "loss": 0.6208, + "step": 31800 + }, + { + "epoch": 0.9746536716930244, + "grad_norm": 1.5582592250686091, + "learning_rate": 3.3675719273572607e-08, + "loss": 0.6159, + "step": 31801 + }, + { + "epoch": 0.9746843202157656, + "grad_norm": 1.7320546319307446, + "learning_rate": 3.3594373439058694e-08, + "loss": 0.6366, + "step": 31802 + }, + { + "epoch": 0.9747149687385068, + "grad_norm": 1.7079882076302042, + "learning_rate": 3.3513125806090565e-08, + "loss": 0.6498, + "step": 31803 + }, + { + "epoch": 0.974745617261248, + "grad_norm": 1.670040048835406, + "learning_rate": 3.343197637546758e-08, + "loss": 0.6549, + "step": 31804 + }, + { + "epoch": 0.9747762657839892, + "grad_norm": 1.5880709874345158, + "learning_rate": 3.33509251479891e-08, + "loss": 0.556, + "step": 31805 + }, + { + "epoch": 0.9748069143067304, + "grad_norm": 1.900195789824448, + "learning_rate": 3.326997212445338e-08, + "loss": 0.6382, + "step": 31806 + }, + { + "epoch": 0.9748375628294716, + "grad_norm": 0.6711050825928842, + "learning_rate": 3.318911730565977e-08, + "loss": 0.5255, + "step": 31807 + }, + { + "epoch": 0.9748682113522128, + "grad_norm": 1.7115685133227452, + "learning_rate": 3.3108360692403195e-08, + "loss": 0.6768, + "step": 31808 + }, + { + "epoch": 0.9748988598749541, + "grad_norm": 1.8209248076517348, + "learning_rate": 3.302770228547969e-08, + "loss": 0.693, + "step": 31809 + }, + { + "epoch": 0.9749295083976952, + "grad_norm": 2.0050769398133674, + "learning_rate": 3.294714208568528e-08, + "loss": 0.6563, + "step": 31810 + }, + { + "epoch": 0.9749601569204365, + "grad_norm": 1.7020058373170757, + "learning_rate": 3.286668009381267e-08, + "loss": 0.6084, + "step": 31811 + }, + { + "epoch": 0.9749908054431776, + "grad_norm": 0.6979410919985699, + "learning_rate": 3.278631631065454e-08, + "loss": 0.5451, + "step": 31812 + }, + { + "epoch": 0.9750214539659189, + "grad_norm": 1.786306526534258, + "learning_rate": 3.270605073700362e-08, + "loss": 0.6894, + "step": 31813 + }, + { + "epoch": 0.97505210248866, + "grad_norm": 1.7081763871749363, + "learning_rate": 3.2625883373649245e-08, + "loss": 0.579, + "step": 31814 + }, + { + "epoch": 0.9750827510114013, + "grad_norm": 1.6604119118298444, + "learning_rate": 3.254581422138303e-08, + "loss": 0.7092, + "step": 31815 + }, + { + "epoch": 0.9751133995341424, + "grad_norm": 1.9116248413846104, + "learning_rate": 3.2465843280994333e-08, + "loss": 0.6842, + "step": 31816 + }, + { + "epoch": 0.9751440480568837, + "grad_norm": 1.9174017212273755, + "learning_rate": 3.2385970553268084e-08, + "loss": 0.6858, + "step": 31817 + }, + { + "epoch": 0.9751746965796249, + "grad_norm": 0.6758111509086259, + "learning_rate": 3.230619603899365e-08, + "loss": 0.5177, + "step": 31818 + }, + { + "epoch": 0.9752053451023661, + "grad_norm": 1.735115489503543, + "learning_rate": 3.222651973895707e-08, + "loss": 0.6239, + "step": 31819 + }, + { + "epoch": 0.9752359936251073, + "grad_norm": 1.4420519321021015, + "learning_rate": 3.214694165394328e-08, + "loss": 0.5524, + "step": 31820 + }, + { + "epoch": 0.9752666421478485, + "grad_norm": 1.731484163226121, + "learning_rate": 3.206746178473497e-08, + "loss": 0.6163, + "step": 31821 + }, + { + "epoch": 0.9752972906705897, + "grad_norm": 1.7714883462517297, + "learning_rate": 3.198808013211707e-08, + "loss": 0.6546, + "step": 31822 + }, + { + "epoch": 0.9753279391933308, + "grad_norm": 1.9234309511718761, + "learning_rate": 3.190879669687008e-08, + "loss": 0.6664, + "step": 31823 + }, + { + "epoch": 0.9753585877160721, + "grad_norm": 1.7731274080519628, + "learning_rate": 3.182961147977781e-08, + "loss": 0.6102, + "step": 31824 + }, + { + "epoch": 0.9753892362388132, + "grad_norm": 1.9868943958307836, + "learning_rate": 3.175052448161742e-08, + "loss": 0.7635, + "step": 31825 + }, + { + "epoch": 0.9754198847615545, + "grad_norm": 1.6953287006449973, + "learning_rate": 3.16715357031705e-08, + "loss": 0.6635, + "step": 31826 + }, + { + "epoch": 0.9754505332842957, + "grad_norm": 1.7727185892943491, + "learning_rate": 3.159264514521421e-08, + "loss": 0.6673, + "step": 31827 + }, + { + "epoch": 0.9754811818070369, + "grad_norm": 1.6277980788629351, + "learning_rate": 3.1513852808525704e-08, + "loss": 0.7181, + "step": 31828 + }, + { + "epoch": 0.9755118303297781, + "grad_norm": 1.940864744709951, + "learning_rate": 3.143515869388214e-08, + "loss": 0.6305, + "step": 31829 + }, + { + "epoch": 0.9755424788525193, + "grad_norm": 1.9770688975954689, + "learning_rate": 3.1356562802058456e-08, + "loss": 0.6898, + "step": 31830 + }, + { + "epoch": 0.9755731273752605, + "grad_norm": 1.694562765633907, + "learning_rate": 3.1278065133829586e-08, + "loss": 0.6374, + "step": 31831 + }, + { + "epoch": 0.9756037758980017, + "grad_norm": 2.0605591964180316, + "learning_rate": 3.119966568996713e-08, + "loss": 0.5782, + "step": 31832 + }, + { + "epoch": 0.9756344244207429, + "grad_norm": 1.7008293534712742, + "learning_rate": 3.112136447124603e-08, + "loss": 0.6219, + "step": 31833 + }, + { + "epoch": 0.9756650729434841, + "grad_norm": 1.5981996445924886, + "learning_rate": 3.104316147843678e-08, + "loss": 0.6142, + "step": 31834 + }, + { + "epoch": 0.9756957214662253, + "grad_norm": 1.6609662760260886, + "learning_rate": 3.096505671230987e-08, + "loss": 0.5475, + "step": 31835 + }, + { + "epoch": 0.9757263699889666, + "grad_norm": 1.7941615009827414, + "learning_rate": 3.0887050173634693e-08, + "loss": 0.6643, + "step": 31836 + }, + { + "epoch": 0.9757570185117077, + "grad_norm": 2.187307498164206, + "learning_rate": 3.080914186318063e-08, + "loss": 0.6603, + "step": 31837 + }, + { + "epoch": 0.975787667034449, + "grad_norm": 1.6569134284300249, + "learning_rate": 3.073133178171484e-08, + "loss": 0.6791, + "step": 31838 + }, + { + "epoch": 0.9758183155571901, + "grad_norm": 1.8870447073186478, + "learning_rate": 3.065361993000338e-08, + "loss": 0.6396, + "step": 31839 + }, + { + "epoch": 0.9758489640799314, + "grad_norm": 1.6734823493862108, + "learning_rate": 3.057600630881341e-08, + "loss": 0.5972, + "step": 31840 + }, + { + "epoch": 0.9758796126026725, + "grad_norm": 1.743176526676469, + "learning_rate": 3.049849091890767e-08, + "loss": 0.5434, + "step": 31841 + }, + { + "epoch": 0.9759102611254138, + "grad_norm": 1.7535159973832721, + "learning_rate": 3.0421073761052186e-08, + "loss": 0.6659, + "step": 31842 + }, + { + "epoch": 0.9759409096481549, + "grad_norm": 1.619407436582987, + "learning_rate": 3.034375483600749e-08, + "loss": 0.6248, + "step": 31843 + }, + { + "epoch": 0.9759715581708962, + "grad_norm": 1.9086915748942626, + "learning_rate": 3.02665341445374e-08, + "loss": 0.6365, + "step": 31844 + }, + { + "epoch": 0.9760022066936374, + "grad_norm": 1.9127901459986185, + "learning_rate": 3.018941168740242e-08, + "loss": 0.6784, + "step": 31845 + }, + { + "epoch": 0.9760328552163786, + "grad_norm": 0.6725993306536746, + "learning_rate": 3.011238746536194e-08, + "loss": 0.522, + "step": 31846 + }, + { + "epoch": 0.9760635037391198, + "grad_norm": 1.9071254556892603, + "learning_rate": 3.003546147917424e-08, + "loss": 0.6898, + "step": 31847 + }, + { + "epoch": 0.976094152261861, + "grad_norm": 1.698103630082701, + "learning_rate": 2.995863372959873e-08, + "loss": 0.722, + "step": 31848 + }, + { + "epoch": 0.9761248007846022, + "grad_norm": 1.6384015594011283, + "learning_rate": 2.9881904217391454e-08, + "loss": 0.6638, + "step": 31849 + }, + { + "epoch": 0.9761554493073434, + "grad_norm": 1.717321581630154, + "learning_rate": 2.980527294330848e-08, + "loss": 0.6336, + "step": 31850 + }, + { + "epoch": 0.9761860978300846, + "grad_norm": 1.8435709221665098, + "learning_rate": 2.9728739908105876e-08, + "loss": 0.7176, + "step": 31851 + }, + { + "epoch": 0.9762167463528258, + "grad_norm": 1.7792964234077604, + "learning_rate": 2.9652305112536362e-08, + "loss": 0.588, + "step": 31852 + }, + { + "epoch": 0.976247394875567, + "grad_norm": 1.645923821775571, + "learning_rate": 2.9575968557353783e-08, + "loss": 0.649, + "step": 31853 + }, + { + "epoch": 0.9762780433983081, + "grad_norm": 1.8998227613758871, + "learning_rate": 2.9499730243310875e-08, + "loss": 0.6807, + "step": 31854 + }, + { + "epoch": 0.9763086919210494, + "grad_norm": 0.6688379688800127, + "learning_rate": 2.9423590171157034e-08, + "loss": 0.5156, + "step": 31855 + }, + { + "epoch": 0.9763393404437906, + "grad_norm": 1.681832191255811, + "learning_rate": 2.9347548341644993e-08, + "loss": 0.6309, + "step": 31856 + }, + { + "epoch": 0.9763699889665318, + "grad_norm": 1.521454799992831, + "learning_rate": 2.927160475552193e-08, + "loss": 0.5431, + "step": 31857 + }, + { + "epoch": 0.976400637489273, + "grad_norm": 1.680322363628826, + "learning_rate": 2.919575941353725e-08, + "loss": 0.614, + "step": 31858 + }, + { + "epoch": 0.9764312860120142, + "grad_norm": 1.527812632170121, + "learning_rate": 2.912001231643702e-08, + "loss": 0.6127, + "step": 31859 + }, + { + "epoch": 0.9764619345347554, + "grad_norm": 1.6052371280828008, + "learning_rate": 2.9044363464968418e-08, + "loss": 0.6533, + "step": 31860 + }, + { + "epoch": 0.9764925830574966, + "grad_norm": 0.7042739513890139, + "learning_rate": 2.8968812859877516e-08, + "loss": 0.5185, + "step": 31861 + }, + { + "epoch": 0.9765232315802378, + "grad_norm": 1.7509068196242485, + "learning_rate": 2.8893360501908164e-08, + "loss": 0.5918, + "step": 31862 + }, + { + "epoch": 0.976553880102979, + "grad_norm": 1.566864140031773, + "learning_rate": 2.881800639180421e-08, + "loss": 0.6001, + "step": 31863 + }, + { + "epoch": 0.9765845286257202, + "grad_norm": 1.4812792782081663, + "learning_rate": 2.8742750530307285e-08, + "loss": 0.6249, + "step": 31864 + }, + { + "epoch": 0.9766151771484615, + "grad_norm": 1.7512863510454506, + "learning_rate": 2.8667592918159017e-08, + "loss": 0.6067, + "step": 31865 + }, + { + "epoch": 0.9766458256712026, + "grad_norm": 1.9053517356677854, + "learning_rate": 2.859253355609992e-08, + "loss": 0.6434, + "step": 31866 + }, + { + "epoch": 0.9766764741939439, + "grad_norm": 1.8064789024837384, + "learning_rate": 2.8517572444870522e-08, + "loss": 0.6935, + "step": 31867 + }, + { + "epoch": 0.976707122716685, + "grad_norm": 0.6599135572719064, + "learning_rate": 2.8442709585208008e-08, + "loss": 0.5084, + "step": 31868 + }, + { + "epoch": 0.9767377712394263, + "grad_norm": 1.6610030179168311, + "learning_rate": 2.836794497785178e-08, + "loss": 0.572, + "step": 31869 + }, + { + "epoch": 0.9767684197621674, + "grad_norm": 1.9645545685658508, + "learning_rate": 2.8293278623536812e-08, + "loss": 0.6669, + "step": 31870 + }, + { + "epoch": 0.9767990682849087, + "grad_norm": 1.5929661150825276, + "learning_rate": 2.821871052300029e-08, + "loss": 0.6912, + "step": 31871 + }, + { + "epoch": 0.9768297168076499, + "grad_norm": 0.6724754699239768, + "learning_rate": 2.8144240676976076e-08, + "loss": 0.5073, + "step": 31872 + }, + { + "epoch": 0.9768603653303911, + "grad_norm": 1.7075846693976784, + "learning_rate": 2.806986908619691e-08, + "loss": 0.6176, + "step": 31873 + }, + { + "epoch": 0.9768910138531323, + "grad_norm": 1.6067906820029534, + "learning_rate": 2.7995595751397764e-08, + "loss": 0.5523, + "step": 31874 + }, + { + "epoch": 0.9769216623758735, + "grad_norm": 2.2125326421362304, + "learning_rate": 2.7921420673309164e-08, + "loss": 0.7042, + "step": 31875 + }, + { + "epoch": 0.9769523108986147, + "grad_norm": 0.651880829411456, + "learning_rate": 2.7847343852662746e-08, + "loss": 0.4947, + "step": 31876 + }, + { + "epoch": 0.9769829594213559, + "grad_norm": 1.656199492969915, + "learning_rate": 2.7773365290186815e-08, + "loss": 0.6027, + "step": 31877 + }, + { + "epoch": 0.9770136079440971, + "grad_norm": 0.6635031793273394, + "learning_rate": 2.7699484986613013e-08, + "loss": 0.509, + "step": 31878 + }, + { + "epoch": 0.9770442564668383, + "grad_norm": 1.5828778618230268, + "learning_rate": 2.7625702942666312e-08, + "loss": 0.6304, + "step": 31879 + }, + { + "epoch": 0.9770749049895795, + "grad_norm": 1.9431785035887497, + "learning_rate": 2.7552019159076126e-08, + "loss": 0.5704, + "step": 31880 + }, + { + "epoch": 0.9771055535123208, + "grad_norm": 1.6559051188755325, + "learning_rate": 2.7478433636566325e-08, + "loss": 0.618, + "step": 31881 + }, + { + "epoch": 0.9771362020350619, + "grad_norm": 0.6527774710027304, + "learning_rate": 2.7404946375864106e-08, + "loss": 0.5133, + "step": 31882 + }, + { + "epoch": 0.9771668505578032, + "grad_norm": 0.6682343984279618, + "learning_rate": 2.733155737769222e-08, + "loss": 0.5217, + "step": 31883 + }, + { + "epoch": 0.9771974990805443, + "grad_norm": 1.9808741165681727, + "learning_rate": 2.7258266642774532e-08, + "loss": 0.6515, + "step": 31884 + }, + { + "epoch": 0.9772281476032855, + "grad_norm": 1.6084469210798547, + "learning_rate": 2.7185074171831584e-08, + "loss": 0.6442, + "step": 31885 + }, + { + "epoch": 0.9772587961260267, + "grad_norm": 1.5907496203689113, + "learning_rate": 2.711197996558723e-08, + "loss": 0.626, + "step": 31886 + }, + { + "epoch": 0.9772894446487679, + "grad_norm": 1.5440306232772179, + "learning_rate": 2.7038984024759795e-08, + "loss": 0.6751, + "step": 31887 + }, + { + "epoch": 0.9773200931715091, + "grad_norm": 1.5979588040968085, + "learning_rate": 2.696608635006759e-08, + "loss": 0.5573, + "step": 31888 + }, + { + "epoch": 0.9773507416942503, + "grad_norm": 1.762031452312396, + "learning_rate": 2.6893286942232254e-08, + "loss": 0.6421, + "step": 31889 + }, + { + "epoch": 0.9773813902169916, + "grad_norm": 1.6756506020580573, + "learning_rate": 2.682058580196767e-08, + "loss": 0.6086, + "step": 31890 + }, + { + "epoch": 0.9774120387397327, + "grad_norm": 1.6850133819749384, + "learning_rate": 2.6747982929992145e-08, + "loss": 0.6374, + "step": 31891 + }, + { + "epoch": 0.977442687262474, + "grad_norm": 1.6266643834381806, + "learning_rate": 2.6675478327020666e-08, + "loss": 0.5703, + "step": 31892 + }, + { + "epoch": 0.9774733357852151, + "grad_norm": 1.6505280540715048, + "learning_rate": 2.6603071993767105e-08, + "loss": 0.6415, + "step": 31893 + }, + { + "epoch": 0.9775039843079564, + "grad_norm": 0.639583274169885, + "learning_rate": 2.6530763930945337e-08, + "loss": 0.4874, + "step": 31894 + }, + { + "epoch": 0.9775346328306975, + "grad_norm": 0.6541489162350249, + "learning_rate": 2.6458554139268124e-08, + "loss": 0.5308, + "step": 31895 + }, + { + "epoch": 0.9775652813534388, + "grad_norm": 0.6825594980116713, + "learning_rate": 2.6386442619446008e-08, + "loss": 0.5428, + "step": 31896 + }, + { + "epoch": 0.9775959298761799, + "grad_norm": 0.6851694433857674, + "learning_rate": 2.6314429372190642e-08, + "loss": 0.5205, + "step": 31897 + }, + { + "epoch": 0.9776265783989212, + "grad_norm": 1.722850687665541, + "learning_rate": 2.624251439821146e-08, + "loss": 0.66, + "step": 31898 + }, + { + "epoch": 0.9776572269216623, + "grad_norm": 1.598748088435302, + "learning_rate": 2.6170697698215673e-08, + "loss": 0.5864, + "step": 31899 + }, + { + "epoch": 0.9776878754444036, + "grad_norm": 1.650176164199381, + "learning_rate": 2.6098979272912716e-08, + "loss": 0.6493, + "step": 31900 + }, + { + "epoch": 0.9777185239671448, + "grad_norm": 1.7778182834354928, + "learning_rate": 2.6027359123007578e-08, + "loss": 0.6964, + "step": 31901 + }, + { + "epoch": 0.977749172489886, + "grad_norm": 1.6998972488286153, + "learning_rate": 2.595583724920747e-08, + "loss": 0.5402, + "step": 31902 + }, + { + "epoch": 0.9777798210126272, + "grad_norm": 1.7867611770015142, + "learning_rate": 2.5884413652216277e-08, + "loss": 0.6426, + "step": 31903 + }, + { + "epoch": 0.9778104695353684, + "grad_norm": 1.68211461883258, + "learning_rate": 2.581308833273788e-08, + "loss": 0.6671, + "step": 31904 + }, + { + "epoch": 0.9778411180581096, + "grad_norm": 1.8468170854496986, + "learning_rate": 2.5741861291476156e-08, + "loss": 0.6691, + "step": 31905 + }, + { + "epoch": 0.9778717665808508, + "grad_norm": 1.6566457175759064, + "learning_rate": 2.567073252913055e-08, + "loss": 0.5455, + "step": 31906 + }, + { + "epoch": 0.977902415103592, + "grad_norm": 1.5158571226363586, + "learning_rate": 2.559970204640383e-08, + "loss": 0.631, + "step": 31907 + }, + { + "epoch": 0.9779330636263333, + "grad_norm": 1.679168318804525, + "learning_rate": 2.5528769843995436e-08, + "loss": 0.6256, + "step": 31908 + }, + { + "epoch": 0.9779637121490744, + "grad_norm": 1.866064209494311, + "learning_rate": 2.5457935922603706e-08, + "loss": 0.6609, + "step": 31909 + }, + { + "epoch": 0.9779943606718157, + "grad_norm": 1.7576799620568877, + "learning_rate": 2.538720028292696e-08, + "loss": 0.6144, + "step": 31910 + }, + { + "epoch": 0.9780250091945568, + "grad_norm": 1.7354093715525114, + "learning_rate": 2.5316562925662424e-08, + "loss": 0.591, + "step": 31911 + }, + { + "epoch": 0.9780556577172981, + "grad_norm": 1.6298528097632654, + "learning_rate": 2.5246023851506208e-08, + "loss": 0.7095, + "step": 31912 + }, + { + "epoch": 0.9780863062400392, + "grad_norm": 1.9934773357627604, + "learning_rate": 2.5175583061153307e-08, + "loss": 0.6339, + "step": 31913 + }, + { + "epoch": 0.9781169547627805, + "grad_norm": 1.7574229669709178, + "learning_rate": 2.5105240555296506e-08, + "loss": 0.7104, + "step": 31914 + }, + { + "epoch": 0.9781476032855216, + "grad_norm": 1.6990450075068018, + "learning_rate": 2.5034996334630802e-08, + "loss": 0.619, + "step": 31915 + }, + { + "epoch": 0.9781782518082628, + "grad_norm": 1.6025777984283183, + "learning_rate": 2.4964850399847862e-08, + "loss": 0.6389, + "step": 31916 + }, + { + "epoch": 0.978208900331004, + "grad_norm": 1.7090489251857772, + "learning_rate": 2.4894802751637137e-08, + "loss": 0.6203, + "step": 31917 + }, + { + "epoch": 0.9782395488537452, + "grad_norm": 1.6803584193513093, + "learning_rate": 2.4824853390691404e-08, + "loss": 0.5845, + "step": 31918 + }, + { + "epoch": 0.9782701973764865, + "grad_norm": 1.698737840532721, + "learning_rate": 2.4755002317697895e-08, + "loss": 0.6348, + "step": 31919 + }, + { + "epoch": 0.9783008458992276, + "grad_norm": 1.8276245658983792, + "learning_rate": 2.4685249533346057e-08, + "loss": 0.5645, + "step": 31920 + }, + { + "epoch": 0.9783314944219689, + "grad_norm": 1.6371487720214297, + "learning_rate": 2.4615595038323116e-08, + "loss": 0.6182, + "step": 31921 + }, + { + "epoch": 0.97836214294471, + "grad_norm": 1.6564824164783045, + "learning_rate": 2.454603883331408e-08, + "loss": 0.6247, + "step": 31922 + }, + { + "epoch": 0.9783927914674513, + "grad_norm": 1.9021394991227838, + "learning_rate": 2.4476580919005065e-08, + "loss": 0.6596, + "step": 31923 + }, + { + "epoch": 0.9784234399901924, + "grad_norm": 1.9566303259086555, + "learning_rate": 2.4407221296082196e-08, + "loss": 0.7637, + "step": 31924 + }, + { + "epoch": 0.9784540885129337, + "grad_norm": 0.6512291596229778, + "learning_rate": 2.433795996522603e-08, + "loss": 0.4834, + "step": 31925 + }, + { + "epoch": 0.9784847370356748, + "grad_norm": 1.7867050263728586, + "learning_rate": 2.4268796927120477e-08, + "loss": 0.5958, + "step": 31926 + }, + { + "epoch": 0.9785153855584161, + "grad_norm": 1.6989824956999613, + "learning_rate": 2.41997321824472e-08, + "loss": 0.6031, + "step": 31927 + }, + { + "epoch": 0.9785460340811573, + "grad_norm": 1.9765680844919644, + "learning_rate": 2.4130765731885665e-08, + "loss": 0.7131, + "step": 31928 + }, + { + "epoch": 0.9785766826038985, + "grad_norm": 0.6664004486635321, + "learning_rate": 2.4061897576117543e-08, + "loss": 0.5238, + "step": 31929 + }, + { + "epoch": 0.9786073311266397, + "grad_norm": 1.707105796186302, + "learning_rate": 2.3993127715818964e-08, + "loss": 0.6085, + "step": 31930 + }, + { + "epoch": 0.9786379796493809, + "grad_norm": 1.6222609861537638, + "learning_rate": 2.3924456151668273e-08, + "loss": 0.6024, + "step": 31931 + }, + { + "epoch": 0.9786686281721221, + "grad_norm": 1.8727354248441708, + "learning_rate": 2.3855882884343816e-08, + "loss": 0.6468, + "step": 31932 + }, + { + "epoch": 0.9786992766948633, + "grad_norm": 0.6617583154879276, + "learning_rate": 2.378740791451839e-08, + "loss": 0.5338, + "step": 31933 + }, + { + "epoch": 0.9787299252176045, + "grad_norm": 2.098379886297526, + "learning_rate": 2.371903124286923e-08, + "loss": 0.7352, + "step": 31934 + }, + { + "epoch": 0.9787605737403458, + "grad_norm": 1.6431165561412564, + "learning_rate": 2.3650752870068016e-08, + "loss": 0.5409, + "step": 31935 + }, + { + "epoch": 0.9787912222630869, + "grad_norm": 2.1350527738845955, + "learning_rate": 2.358257279678866e-08, + "loss": 0.6398, + "step": 31936 + }, + { + "epoch": 0.9788218707858282, + "grad_norm": 1.6735130964051945, + "learning_rate": 2.3514491023702846e-08, + "loss": 0.6065, + "step": 31937 + }, + { + "epoch": 0.9788525193085693, + "grad_norm": 1.7113376861625742, + "learning_rate": 2.3446507551482257e-08, + "loss": 0.5892, + "step": 31938 + }, + { + "epoch": 0.9788831678313106, + "grad_norm": 0.660024799949207, + "learning_rate": 2.3378622380795248e-08, + "loss": 0.5063, + "step": 31939 + }, + { + "epoch": 0.9789138163540517, + "grad_norm": 1.779068574823389, + "learning_rate": 2.331083551231128e-08, + "loss": 0.626, + "step": 31940 + }, + { + "epoch": 0.978944464876793, + "grad_norm": 1.9804658213708273, + "learning_rate": 2.3243146946697602e-08, + "loss": 0.7328, + "step": 31941 + }, + { + "epoch": 0.9789751133995341, + "grad_norm": 0.6880861704473402, + "learning_rate": 2.3175556684622568e-08, + "loss": 0.5429, + "step": 31942 + }, + { + "epoch": 0.9790057619222754, + "grad_norm": 1.5744807338676097, + "learning_rate": 2.310806472675231e-08, + "loss": 0.6352, + "step": 31943 + }, + { + "epoch": 0.9790364104450165, + "grad_norm": 1.5796493720757978, + "learning_rate": 2.3040671073749632e-08, + "loss": 0.5272, + "step": 31944 + }, + { + "epoch": 0.9790670589677578, + "grad_norm": 1.5692280134853607, + "learning_rate": 2.2973375726279557e-08, + "loss": 0.6491, + "step": 31945 + }, + { + "epoch": 0.979097707490499, + "grad_norm": 1.7001215581784612, + "learning_rate": 2.290617868500711e-08, + "loss": 0.6266, + "step": 31946 + }, + { + "epoch": 0.9791283560132401, + "grad_norm": 1.5574219362853994, + "learning_rate": 2.2839079950591757e-08, + "loss": 0.6139, + "step": 31947 + }, + { + "epoch": 0.9791590045359814, + "grad_norm": 1.7095115086030657, + "learning_rate": 2.277207952369631e-08, + "loss": 0.5535, + "step": 31948 + }, + { + "epoch": 0.9791896530587225, + "grad_norm": 1.7245335497254337, + "learning_rate": 2.270517740498024e-08, + "loss": 0.6337, + "step": 31949 + }, + { + "epoch": 0.9792203015814638, + "grad_norm": 1.8115145579338834, + "learning_rate": 2.2638373595101904e-08, + "loss": 0.7044, + "step": 31950 + }, + { + "epoch": 0.9792509501042049, + "grad_norm": 1.9921779813126836, + "learning_rate": 2.2571668094721887e-08, + "loss": 0.7455, + "step": 31951 + }, + { + "epoch": 0.9792815986269462, + "grad_norm": 1.5633152045598306, + "learning_rate": 2.2505060904495224e-08, + "loss": 0.5625, + "step": 31952 + }, + { + "epoch": 0.9793122471496873, + "grad_norm": 1.5224154200943936, + "learning_rate": 2.2438552025079163e-08, + "loss": 0.5978, + "step": 31953 + }, + { + "epoch": 0.9793428956724286, + "grad_norm": 1.8590727277997834, + "learning_rate": 2.2372141457128738e-08, + "loss": 0.7274, + "step": 31954 + }, + { + "epoch": 0.9793735441951698, + "grad_norm": 1.884932751226465, + "learning_rate": 2.2305829201298978e-08, + "loss": 0.5746, + "step": 31955 + }, + { + "epoch": 0.979404192717911, + "grad_norm": 1.7559092047203988, + "learning_rate": 2.2239615258242696e-08, + "loss": 0.7029, + "step": 31956 + }, + { + "epoch": 0.9794348412406522, + "grad_norm": 2.0068882062385467, + "learning_rate": 2.2173499628612703e-08, + "loss": 0.6362, + "step": 31957 + }, + { + "epoch": 0.9794654897633934, + "grad_norm": 1.8911078172833986, + "learning_rate": 2.210748231305959e-08, + "loss": 0.657, + "step": 31958 + }, + { + "epoch": 0.9794961382861346, + "grad_norm": 1.772275934829005, + "learning_rate": 2.204156331223395e-08, + "loss": 0.6607, + "step": 31959 + }, + { + "epoch": 0.9795267868088758, + "grad_norm": 0.656364396915822, + "learning_rate": 2.1975742626786366e-08, + "loss": 0.515, + "step": 31960 + }, + { + "epoch": 0.979557435331617, + "grad_norm": 1.6993383394705701, + "learning_rate": 2.1910020257365216e-08, + "loss": 0.6351, + "step": 31961 + }, + { + "epoch": 0.9795880838543582, + "grad_norm": 1.661959935733638, + "learning_rate": 2.1844396204617756e-08, + "loss": 0.6091, + "step": 31962 + }, + { + "epoch": 0.9796187323770994, + "grad_norm": 1.7011326638834543, + "learning_rate": 2.1778870469189027e-08, + "loss": 0.7262, + "step": 31963 + }, + { + "epoch": 0.9796493808998407, + "grad_norm": 1.7577254434596026, + "learning_rate": 2.1713443051727402e-08, + "loss": 0.6696, + "step": 31964 + }, + { + "epoch": 0.9796800294225818, + "grad_norm": 0.6804932364111069, + "learning_rate": 2.1648113952875692e-08, + "loss": 0.5155, + "step": 31965 + }, + { + "epoch": 0.9797106779453231, + "grad_norm": 1.6642008560359016, + "learning_rate": 2.1582883173278944e-08, + "loss": 0.5796, + "step": 31966 + }, + { + "epoch": 0.9797413264680642, + "grad_norm": 1.7891639767178673, + "learning_rate": 2.1517750713578867e-08, + "loss": 0.7264, + "step": 31967 + }, + { + "epoch": 0.9797719749908055, + "grad_norm": 1.6865420397843272, + "learning_rate": 2.1452716574417166e-08, + "loss": 0.5288, + "step": 31968 + }, + { + "epoch": 0.9798026235135466, + "grad_norm": 1.6931555438317847, + "learning_rate": 2.138778075643444e-08, + "loss": 0.6604, + "step": 31969 + }, + { + "epoch": 0.9798332720362879, + "grad_norm": 1.7492266628102502, + "learning_rate": 2.1322943260271288e-08, + "loss": 0.684, + "step": 31970 + }, + { + "epoch": 0.979863920559029, + "grad_norm": 1.7377439460916335, + "learning_rate": 2.1258204086567204e-08, + "loss": 0.6809, + "step": 31971 + }, + { + "epoch": 0.9798945690817703, + "grad_norm": 1.7407518822769121, + "learning_rate": 2.1193563235958336e-08, + "loss": 0.653, + "step": 31972 + }, + { + "epoch": 0.9799252176045115, + "grad_norm": 0.6672473140013113, + "learning_rate": 2.112902070908307e-08, + "loss": 0.5055, + "step": 31973 + }, + { + "epoch": 0.9799558661272527, + "grad_norm": 1.7208330320650573, + "learning_rate": 2.106457650657645e-08, + "loss": 0.6676, + "step": 31974 + }, + { + "epoch": 0.9799865146499939, + "grad_norm": 0.6903038242489441, + "learning_rate": 2.1000230629073526e-08, + "loss": 0.5083, + "step": 31975 + }, + { + "epoch": 0.9800171631727351, + "grad_norm": 1.6835070019914138, + "learning_rate": 2.0935983077209344e-08, + "loss": 0.5629, + "step": 31976 + }, + { + "epoch": 0.9800478116954763, + "grad_norm": 2.015896577614777, + "learning_rate": 2.087183385161562e-08, + "loss": 0.6925, + "step": 31977 + }, + { + "epoch": 0.9800784602182174, + "grad_norm": 1.7764589219820357, + "learning_rate": 2.080778295292518e-08, + "loss": 0.6448, + "step": 31978 + }, + { + "epoch": 0.9801091087409587, + "grad_norm": 1.6100692621116044, + "learning_rate": 2.0743830381768637e-08, + "loss": 0.5737, + "step": 31979 + }, + { + "epoch": 0.9801397572636998, + "grad_norm": 1.7454700747899803, + "learning_rate": 2.067997613877659e-08, + "loss": 0.6923, + "step": 31980 + }, + { + "epoch": 0.9801704057864411, + "grad_norm": 1.8034853484965248, + "learning_rate": 2.0616220224578542e-08, + "loss": 0.6609, + "step": 31981 + }, + { + "epoch": 0.9802010543091823, + "grad_norm": 1.573330392825073, + "learning_rate": 2.0552562639801766e-08, + "loss": 0.5831, + "step": 31982 + }, + { + "epoch": 0.9802317028319235, + "grad_norm": 1.6592416657377693, + "learning_rate": 2.0489003385073536e-08, + "loss": 0.6368, + "step": 31983 + }, + { + "epoch": 0.9802623513546647, + "grad_norm": 1.756915326825829, + "learning_rate": 2.042554246102113e-08, + "loss": 0.626, + "step": 31984 + }, + { + "epoch": 0.9802929998774059, + "grad_norm": 0.6695104684505041, + "learning_rate": 2.0362179868268495e-08, + "loss": 0.4959, + "step": 31985 + }, + { + "epoch": 0.9803236484001471, + "grad_norm": 1.8244083384448901, + "learning_rate": 2.0298915607441795e-08, + "loss": 0.6104, + "step": 31986 + }, + { + "epoch": 0.9803542969228883, + "grad_norm": 1.7727606067182435, + "learning_rate": 2.0235749679162753e-08, + "loss": 0.6971, + "step": 31987 + }, + { + "epoch": 0.9803849454456295, + "grad_norm": 1.6205739985027918, + "learning_rate": 2.017268208405421e-08, + "loss": 0.6355, + "step": 31988 + }, + { + "epoch": 0.9804155939683707, + "grad_norm": 0.6864177615959073, + "learning_rate": 2.0109712822737882e-08, + "loss": 0.5109, + "step": 31989 + }, + { + "epoch": 0.9804462424911119, + "grad_norm": 1.8446381561723775, + "learning_rate": 2.004684189583439e-08, + "loss": 0.6114, + "step": 31990 + }, + { + "epoch": 0.9804768910138532, + "grad_norm": 1.9670630970698457, + "learning_rate": 1.998406930396213e-08, + "loss": 0.6553, + "step": 31991 + }, + { + "epoch": 0.9805075395365943, + "grad_norm": 1.8695130431872136, + "learning_rate": 1.99213950477406e-08, + "loss": 0.6767, + "step": 31992 + }, + { + "epoch": 0.9805381880593356, + "grad_norm": 0.6550474318474048, + "learning_rate": 1.9858819127787087e-08, + "loss": 0.4957, + "step": 31993 + }, + { + "epoch": 0.9805688365820767, + "grad_norm": 1.690513260241534, + "learning_rate": 1.9796341544717766e-08, + "loss": 0.6592, + "step": 31994 + }, + { + "epoch": 0.980599485104818, + "grad_norm": 1.8945399088745534, + "learning_rate": 1.973396229914881e-08, + "loss": 0.6063, + "step": 31995 + }, + { + "epoch": 0.9806301336275591, + "grad_norm": 1.7288586160407244, + "learning_rate": 1.9671681391695285e-08, + "loss": 0.6543, + "step": 31996 + }, + { + "epoch": 0.9806607821503004, + "grad_norm": 1.7037359992879857, + "learning_rate": 1.960949882297003e-08, + "loss": 0.6761, + "step": 31997 + }, + { + "epoch": 0.9806914306730415, + "grad_norm": 1.6010946562125141, + "learning_rate": 1.954741459358589e-08, + "loss": 0.6475, + "step": 31998 + }, + { + "epoch": 0.9807220791957828, + "grad_norm": 1.8586874363833776, + "learning_rate": 1.9485428704154595e-08, + "loss": 0.7419, + "step": 31999 + }, + { + "epoch": 0.980752727718524, + "grad_norm": 1.7962112627777291, + "learning_rate": 1.9423541155286774e-08, + "loss": 0.6258, + "step": 32000 + }, + { + "epoch": 0.9807833762412652, + "grad_norm": 1.6854951082076324, + "learning_rate": 1.9361751947591933e-08, + "loss": 0.6516, + "step": 32001 + }, + { + "epoch": 0.9808140247640064, + "grad_norm": 1.8300440610062814, + "learning_rate": 1.9300061081680698e-08, + "loss": 0.7277, + "step": 32002 + }, + { + "epoch": 0.9808446732867476, + "grad_norm": 0.6773791267516247, + "learning_rate": 1.923846855815925e-08, + "loss": 0.5188, + "step": 32003 + }, + { + "epoch": 0.9808753218094888, + "grad_norm": 1.9663586807531055, + "learning_rate": 1.9176974377633773e-08, + "loss": 0.6662, + "step": 32004 + }, + { + "epoch": 0.98090597033223, + "grad_norm": 1.7045210042936858, + "learning_rate": 1.9115578540712665e-08, + "loss": 0.6123, + "step": 32005 + }, + { + "epoch": 0.9809366188549712, + "grad_norm": 1.8174222316500945, + "learning_rate": 1.905428104799878e-08, + "loss": 0.6632, + "step": 32006 + }, + { + "epoch": 0.9809672673777124, + "grad_norm": 1.7622626911888464, + "learning_rate": 1.899308190009719e-08, + "loss": 0.6013, + "step": 32007 + }, + { + "epoch": 0.9809979159004536, + "grad_norm": 0.6707718076146891, + "learning_rate": 1.893198109761074e-08, + "loss": 0.5219, + "step": 32008 + }, + { + "epoch": 0.9810285644231947, + "grad_norm": 0.6630649391291433, + "learning_rate": 1.887097864114007e-08, + "loss": 0.4985, + "step": 32009 + }, + { + "epoch": 0.981059212945936, + "grad_norm": 0.6353028102864977, + "learning_rate": 1.8810074531289136e-08, + "loss": 0.4948, + "step": 32010 + }, + { + "epoch": 0.9810898614686772, + "grad_norm": 1.7593947747481, + "learning_rate": 1.874926876865524e-08, + "loss": 0.5988, + "step": 32011 + }, + { + "epoch": 0.9811205099914184, + "grad_norm": 1.8918456661971392, + "learning_rate": 1.8688561353837897e-08, + "loss": 0.626, + "step": 32012 + }, + { + "epoch": 0.9811511585141596, + "grad_norm": 1.7872175875829508, + "learning_rate": 1.8627952287437746e-08, + "loss": 0.6846, + "step": 32013 + }, + { + "epoch": 0.9811818070369008, + "grad_norm": 1.6323596397037459, + "learning_rate": 1.856744157004875e-08, + "loss": 0.5673, + "step": 32014 + }, + { + "epoch": 0.981212455559642, + "grad_norm": 1.7327987754199226, + "learning_rate": 1.850702920226932e-08, + "loss": 0.5964, + "step": 32015 + }, + { + "epoch": 0.9812431040823832, + "grad_norm": 0.6862288044363074, + "learning_rate": 1.8446715184694543e-08, + "loss": 0.5251, + "step": 32016 + }, + { + "epoch": 0.9812737526051244, + "grad_norm": 1.9181114974027134, + "learning_rate": 1.8386499517917267e-08, + "loss": 0.617, + "step": 32017 + }, + { + "epoch": 0.9813044011278657, + "grad_norm": 1.5271667500221302, + "learning_rate": 1.8326382202531468e-08, + "loss": 0.6486, + "step": 32018 + }, + { + "epoch": 0.9813350496506068, + "grad_norm": 1.9813953042566177, + "learning_rate": 1.8266363239130003e-08, + "loss": 0.6383, + "step": 32019 + }, + { + "epoch": 0.9813656981733481, + "grad_norm": 1.7891467656766733, + "learning_rate": 1.820644262830462e-08, + "loss": 0.6006, + "step": 32020 + }, + { + "epoch": 0.9813963466960892, + "grad_norm": 1.7008851730213417, + "learning_rate": 1.814662037064485e-08, + "loss": 0.7171, + "step": 32021 + }, + { + "epoch": 0.9814269952188305, + "grad_norm": 1.6297820763616253, + "learning_rate": 1.8086896466740223e-08, + "loss": 0.6787, + "step": 32022 + }, + { + "epoch": 0.9814576437415716, + "grad_norm": 0.6514166967999566, + "learning_rate": 1.802727091717915e-08, + "loss": 0.5011, + "step": 32023 + }, + { + "epoch": 0.9814882922643129, + "grad_norm": 1.7426311255309213, + "learning_rate": 1.7967743722550057e-08, + "loss": 0.6014, + "step": 32024 + }, + { + "epoch": 0.981518940787054, + "grad_norm": 1.9477861727624393, + "learning_rate": 1.7908314883438028e-08, + "loss": 0.6707, + "step": 32025 + }, + { + "epoch": 0.9815495893097953, + "grad_norm": 2.112569338870807, + "learning_rate": 1.784898440042926e-08, + "loss": 0.6244, + "step": 32026 + }, + { + "epoch": 0.9815802378325365, + "grad_norm": 0.6458906873261845, + "learning_rate": 1.778975227410884e-08, + "loss": 0.4759, + "step": 32027 + }, + { + "epoch": 0.9816108863552777, + "grad_norm": 1.5517311267011797, + "learning_rate": 1.7730618505060748e-08, + "loss": 0.6146, + "step": 32028 + }, + { + "epoch": 0.9816415348780189, + "grad_norm": 1.5344136808084845, + "learning_rate": 1.767158309386674e-08, + "loss": 0.5609, + "step": 32029 + }, + { + "epoch": 0.9816721834007601, + "grad_norm": 1.6785494776403662, + "learning_rate": 1.7612646041107462e-08, + "loss": 0.5686, + "step": 32030 + }, + { + "epoch": 0.9817028319235013, + "grad_norm": 1.7220363157914653, + "learning_rate": 1.7553807347366892e-08, + "loss": 0.6909, + "step": 32031 + }, + { + "epoch": 0.9817334804462425, + "grad_norm": 2.0524563832477742, + "learning_rate": 1.7495067013221235e-08, + "loss": 0.6517, + "step": 32032 + }, + { + "epoch": 0.9817641289689837, + "grad_norm": 0.6763048736481608, + "learning_rate": 1.7436425039251137e-08, + "loss": 0.5266, + "step": 32033 + }, + { + "epoch": 0.981794777491725, + "grad_norm": 1.6241632061874682, + "learning_rate": 1.7377881426033915e-08, + "loss": 0.5634, + "step": 32034 + }, + { + "epoch": 0.9818254260144661, + "grad_norm": 1.8211294919387793, + "learning_rate": 1.7319436174147996e-08, + "loss": 0.6635, + "step": 32035 + }, + { + "epoch": 0.9818560745372074, + "grad_norm": 1.8106940510485972, + "learning_rate": 1.7261089284166256e-08, + "loss": 0.6036, + "step": 32036 + }, + { + "epoch": 0.9818867230599485, + "grad_norm": 0.7094570859086947, + "learning_rate": 1.7202840756666007e-08, + "loss": 0.5238, + "step": 32037 + }, + { + "epoch": 0.9819173715826898, + "grad_norm": 1.8047452892143607, + "learning_rate": 1.7144690592219016e-08, + "loss": 0.7204, + "step": 32038 + }, + { + "epoch": 0.9819480201054309, + "grad_norm": 1.7057958367474106, + "learning_rate": 1.7086638791401487e-08, + "loss": 0.5316, + "step": 32039 + }, + { + "epoch": 0.9819786686281721, + "grad_norm": 1.7682385964052436, + "learning_rate": 1.702868535478297e-08, + "loss": 0.6402, + "step": 32040 + }, + { + "epoch": 0.9820093171509133, + "grad_norm": 1.9239062409554384, + "learning_rate": 1.6970830282934113e-08, + "loss": 0.7122, + "step": 32041 + }, + { + "epoch": 0.9820399656736545, + "grad_norm": 1.6273785714782651, + "learning_rate": 1.6913073576426687e-08, + "loss": 0.6775, + "step": 32042 + }, + { + "epoch": 0.9820706141963957, + "grad_norm": 1.9803817320598855, + "learning_rate": 1.685541523582912e-08, + "loss": 0.7636, + "step": 32043 + }, + { + "epoch": 0.9821012627191369, + "grad_norm": 0.6983784224419965, + "learning_rate": 1.679785526170985e-08, + "loss": 0.5422, + "step": 32044 + }, + { + "epoch": 0.9821319112418782, + "grad_norm": 1.6331564354177033, + "learning_rate": 1.674039365463509e-08, + "loss": 0.5541, + "step": 32045 + }, + { + "epoch": 0.9821625597646193, + "grad_norm": 1.7160731793359865, + "learning_rate": 1.6683030415171053e-08, + "loss": 0.6516, + "step": 32046 + }, + { + "epoch": 0.9821932082873606, + "grad_norm": 1.6559000277946865, + "learning_rate": 1.6625765543883952e-08, + "loss": 0.6241, + "step": 32047 + }, + { + "epoch": 0.9822238568101017, + "grad_norm": 1.7807129140584592, + "learning_rate": 1.6568599041337784e-08, + "loss": 0.6387, + "step": 32048 + }, + { + "epoch": 0.982254505332843, + "grad_norm": 0.6578720063181769, + "learning_rate": 1.651153090809543e-08, + "loss": 0.5146, + "step": 32049 + }, + { + "epoch": 0.9822851538555841, + "grad_norm": 1.533142916634892, + "learning_rate": 1.6454561144718663e-08, + "loss": 0.6335, + "step": 32050 + }, + { + "epoch": 0.9823158023783254, + "grad_norm": 1.9764048348751635, + "learning_rate": 1.6397689751770364e-08, + "loss": 0.6703, + "step": 32051 + }, + { + "epoch": 0.9823464509010665, + "grad_norm": 1.6701377535932267, + "learning_rate": 1.6340916729810086e-08, + "loss": 0.645, + "step": 32052 + }, + { + "epoch": 0.9823770994238078, + "grad_norm": 1.7004346056975688, + "learning_rate": 1.6284242079396272e-08, + "loss": 0.6218, + "step": 32053 + }, + { + "epoch": 0.982407747946549, + "grad_norm": 1.7424195604799615, + "learning_rate": 1.6227665801088478e-08, + "loss": 0.5878, + "step": 32054 + }, + { + "epoch": 0.9824383964692902, + "grad_norm": 1.7701205832956626, + "learning_rate": 1.6171187895445138e-08, + "loss": 0.589, + "step": 32055 + }, + { + "epoch": 0.9824690449920314, + "grad_norm": 1.7132265649497274, + "learning_rate": 1.6114808363020263e-08, + "loss": 0.6196, + "step": 32056 + }, + { + "epoch": 0.9824996935147726, + "grad_norm": 1.7813636199796776, + "learning_rate": 1.6058527204371176e-08, + "loss": 0.7112, + "step": 32057 + }, + { + "epoch": 0.9825303420375138, + "grad_norm": 1.7637199753464061, + "learning_rate": 1.6002344420051884e-08, + "loss": 0.5706, + "step": 32058 + }, + { + "epoch": 0.982560990560255, + "grad_norm": 1.6686205908915783, + "learning_rate": 1.5946260010616386e-08, + "loss": 0.6954, + "step": 32059 + }, + { + "epoch": 0.9825916390829962, + "grad_norm": 0.6766170834237419, + "learning_rate": 1.5890273976616464e-08, + "loss": 0.5339, + "step": 32060 + }, + { + "epoch": 0.9826222876057374, + "grad_norm": 1.9400641043843248, + "learning_rate": 1.58343863186039e-08, + "loss": 0.6526, + "step": 32061 + }, + { + "epoch": 0.9826529361284786, + "grad_norm": 0.6314945580069345, + "learning_rate": 1.5778597037130473e-08, + "loss": 0.485, + "step": 32062 + }, + { + "epoch": 0.9826835846512199, + "grad_norm": 1.7865944267251161, + "learning_rate": 1.5722906132744632e-08, + "loss": 0.6544, + "step": 32063 + }, + { + "epoch": 0.982714233173961, + "grad_norm": 1.7091808927378043, + "learning_rate": 1.5667313605995936e-08, + "loss": 0.6403, + "step": 32064 + }, + { + "epoch": 0.9827448816967023, + "grad_norm": 2.01375554874256, + "learning_rate": 1.5611819457431732e-08, + "loss": 0.6478, + "step": 32065 + }, + { + "epoch": 0.9827755302194434, + "grad_norm": 1.7742389450008444, + "learning_rate": 1.5556423687598245e-08, + "loss": 0.6564, + "step": 32066 + }, + { + "epoch": 0.9828061787421847, + "grad_norm": 1.6874754161349261, + "learning_rate": 1.5501126297042813e-08, + "loss": 0.591, + "step": 32067 + }, + { + "epoch": 0.9828368272649258, + "grad_norm": 1.6057237983930965, + "learning_rate": 1.5445927286308338e-08, + "loss": 0.6768, + "step": 32068 + }, + { + "epoch": 0.9828674757876671, + "grad_norm": 1.6760858420393008, + "learning_rate": 1.539082665594105e-08, + "loss": 0.6739, + "step": 32069 + }, + { + "epoch": 0.9828981243104082, + "grad_norm": 1.6453379586291763, + "learning_rate": 1.5335824406481625e-08, + "loss": 0.5835, + "step": 32070 + }, + { + "epoch": 0.9829287728331494, + "grad_norm": 1.4937781671515322, + "learning_rate": 1.5280920538474075e-08, + "loss": 0.5815, + "step": 32071 + }, + { + "epoch": 0.9829594213558907, + "grad_norm": 1.8809248424417366, + "learning_rate": 1.5226115052456857e-08, + "loss": 0.5686, + "step": 32072 + }, + { + "epoch": 0.9829900698786318, + "grad_norm": 1.9013737029328364, + "learning_rate": 1.517140794897287e-08, + "loss": 0.5975, + "step": 32073 + }, + { + "epoch": 0.9830207184013731, + "grad_norm": 0.6729287018493748, + "learning_rate": 1.5116799228559464e-08, + "loss": 0.5208, + "step": 32074 + }, + { + "epoch": 0.9830513669241142, + "grad_norm": 1.7169012523121248, + "learning_rate": 1.5062288891753986e-08, + "loss": 0.6015, + "step": 32075 + }, + { + "epoch": 0.9830820154468555, + "grad_norm": 0.6516143095810951, + "learning_rate": 1.5007876939094888e-08, + "loss": 0.5075, + "step": 32076 + }, + { + "epoch": 0.9831126639695966, + "grad_norm": 1.5237262330688908, + "learning_rate": 1.495356337111842e-08, + "loss": 0.6204, + "step": 32077 + }, + { + "epoch": 0.9831433124923379, + "grad_norm": 1.730718618046435, + "learning_rate": 1.4899348188359696e-08, + "loss": 0.6868, + "step": 32078 + }, + { + "epoch": 0.983173961015079, + "grad_norm": 1.668530317454035, + "learning_rate": 1.4845231391351634e-08, + "loss": 0.6302, + "step": 32079 + }, + { + "epoch": 0.9832046095378203, + "grad_norm": 1.7383952466102297, + "learning_rate": 1.4791212980628244e-08, + "loss": 0.6952, + "step": 32080 + }, + { + "epoch": 0.9832352580605614, + "grad_norm": 2.016246823361771, + "learning_rate": 1.4737292956722437e-08, + "loss": 0.7071, + "step": 32081 + }, + { + "epoch": 0.9832659065833027, + "grad_norm": 1.7095063582642833, + "learning_rate": 1.46834713201649e-08, + "loss": 0.5739, + "step": 32082 + }, + { + "epoch": 0.9832965551060439, + "grad_norm": 1.71456104559669, + "learning_rate": 1.4629748071485205e-08, + "loss": 0.6603, + "step": 32083 + }, + { + "epoch": 0.9833272036287851, + "grad_norm": 2.0709642995302495, + "learning_rate": 1.4576123211214043e-08, + "loss": 0.7173, + "step": 32084 + }, + { + "epoch": 0.9833578521515263, + "grad_norm": 0.6725476982025758, + "learning_rate": 1.4522596739879879e-08, + "loss": 0.5423, + "step": 32085 + }, + { + "epoch": 0.9833885006742675, + "grad_norm": 2.1382907019666364, + "learning_rate": 1.4469168658007848e-08, + "loss": 0.8398, + "step": 32086 + }, + { + "epoch": 0.9834191491970087, + "grad_norm": 1.9638483661353021, + "learning_rate": 1.4415838966127526e-08, + "loss": 0.6108, + "step": 32087 + }, + { + "epoch": 0.9834497977197499, + "grad_norm": 1.9017091295414519, + "learning_rate": 1.436260766476183e-08, + "loss": 0.6901, + "step": 32088 + }, + { + "epoch": 0.9834804462424911, + "grad_norm": 1.6659157232898187, + "learning_rate": 1.4309474754437003e-08, + "loss": 0.6345, + "step": 32089 + }, + { + "epoch": 0.9835110947652324, + "grad_norm": 0.6346945722108376, + "learning_rate": 1.425644023567596e-08, + "loss": 0.4976, + "step": 32090 + }, + { + "epoch": 0.9835417432879735, + "grad_norm": 1.6314762223849162, + "learning_rate": 1.420350410900051e-08, + "loss": 0.6273, + "step": 32091 + }, + { + "epoch": 0.9835723918107148, + "grad_norm": 1.6801218323990075, + "learning_rate": 1.4150666374933564e-08, + "loss": 0.6002, + "step": 32092 + }, + { + "epoch": 0.9836030403334559, + "grad_norm": 1.929931283306849, + "learning_rate": 1.4097927033994708e-08, + "loss": 0.5905, + "step": 32093 + }, + { + "epoch": 0.9836336888561972, + "grad_norm": 0.6583770979392998, + "learning_rate": 1.4045286086703525e-08, + "loss": 0.5303, + "step": 32094 + }, + { + "epoch": 0.9836643373789383, + "grad_norm": 2.409688806270703, + "learning_rate": 1.39927435335796e-08, + "loss": 0.5959, + "step": 32095 + }, + { + "epoch": 0.9836949859016796, + "grad_norm": 0.6753916186545729, + "learning_rate": 1.3940299375140298e-08, + "loss": 0.501, + "step": 32096 + }, + { + "epoch": 0.9837256344244207, + "grad_norm": 1.7956749296200063, + "learning_rate": 1.3887953611901872e-08, + "loss": 0.6878, + "step": 32097 + }, + { + "epoch": 0.983756282947162, + "grad_norm": 1.7056535025099233, + "learning_rate": 1.3835706244381685e-08, + "loss": 0.6208, + "step": 32098 + }, + { + "epoch": 0.9837869314699031, + "grad_norm": 1.6623411914690158, + "learning_rate": 1.3783557273092662e-08, + "loss": 0.6463, + "step": 32099 + }, + { + "epoch": 0.9838175799926444, + "grad_norm": 1.793448644212292, + "learning_rate": 1.3731506698548836e-08, + "loss": 0.7113, + "step": 32100 + }, + { + "epoch": 0.9838482285153856, + "grad_norm": 2.028853975787812, + "learning_rate": 1.3679554521263127e-08, + "loss": 0.6542, + "step": 32101 + }, + { + "epoch": 0.9838788770381267, + "grad_norm": 1.7274311535206355, + "learning_rate": 1.3627700741749573e-08, + "loss": 0.6894, + "step": 32102 + }, + { + "epoch": 0.983909525560868, + "grad_norm": 1.7530365263235115, + "learning_rate": 1.357594536051554e-08, + "loss": 0.5321, + "step": 32103 + }, + { + "epoch": 0.9839401740836091, + "grad_norm": 0.6367509300683423, + "learning_rate": 1.3524288378073957e-08, + "loss": 0.5072, + "step": 32104 + }, + { + "epoch": 0.9839708226063504, + "grad_norm": 1.8127007630525995, + "learning_rate": 1.3472729794933303e-08, + "loss": 0.6423, + "step": 32105 + }, + { + "epoch": 0.9840014711290915, + "grad_norm": 1.8089453933064579, + "learning_rate": 1.3421269611599841e-08, + "loss": 0.6258, + "step": 32106 + }, + { + "epoch": 0.9840321196518328, + "grad_norm": 1.8124893392935253, + "learning_rate": 1.3369907828582052e-08, + "loss": 0.6265, + "step": 32107 + }, + { + "epoch": 0.9840627681745739, + "grad_norm": 1.9357083080902495, + "learning_rate": 1.3318644446386197e-08, + "loss": 0.751, + "step": 32108 + }, + { + "epoch": 0.9840934166973152, + "grad_norm": 1.611664303340604, + "learning_rate": 1.326747946551632e-08, + "loss": 0.5891, + "step": 32109 + }, + { + "epoch": 0.9841240652200564, + "grad_norm": 1.8731141501023212, + "learning_rate": 1.321641288647757e-08, + "loss": 0.7183, + "step": 32110 + }, + { + "epoch": 0.9841547137427976, + "grad_norm": 1.6468090180067272, + "learning_rate": 1.316544470977288e-08, + "loss": 0.6149, + "step": 32111 + }, + { + "epoch": 0.9841853622655388, + "grad_norm": 1.8480471715442544, + "learning_rate": 1.3114574935904068e-08, + "loss": 0.5593, + "step": 32112 + }, + { + "epoch": 0.98421601078828, + "grad_norm": 1.6354775293254835, + "learning_rate": 1.3063803565372956e-08, + "loss": 0.6297, + "step": 32113 + }, + { + "epoch": 0.9842466593110212, + "grad_norm": 1.7636861188172093, + "learning_rate": 1.3013130598679147e-08, + "loss": 0.7225, + "step": 32114 + }, + { + "epoch": 0.9842773078337624, + "grad_norm": 1.6772376831681288, + "learning_rate": 1.2962556036322238e-08, + "loss": 0.6262, + "step": 32115 + }, + { + "epoch": 0.9843079563565036, + "grad_norm": 1.6607473736605127, + "learning_rate": 1.291207987880072e-08, + "loss": 0.6789, + "step": 32116 + }, + { + "epoch": 0.9843386048792448, + "grad_norm": 1.7133867247601875, + "learning_rate": 1.2861702126610864e-08, + "loss": 0.6743, + "step": 32117 + }, + { + "epoch": 0.984369253401986, + "grad_norm": 1.6365455885035596, + "learning_rate": 1.281142278025116e-08, + "loss": 0.5673, + "step": 32118 + }, + { + "epoch": 0.9843999019247273, + "grad_norm": 1.7234836050186293, + "learning_rate": 1.2761241840215654e-08, + "loss": 0.6423, + "step": 32119 + }, + { + "epoch": 0.9844305504474684, + "grad_norm": 1.8876149564555256, + "learning_rate": 1.27111593069984e-08, + "loss": 0.6798, + "step": 32120 + }, + { + "epoch": 0.9844611989702097, + "grad_norm": 1.992982570593176, + "learning_rate": 1.2661175181093443e-08, + "loss": 0.6613, + "step": 32121 + }, + { + "epoch": 0.9844918474929508, + "grad_norm": 1.9837996146703247, + "learning_rate": 1.2611289462993725e-08, + "loss": 0.6466, + "step": 32122 + }, + { + "epoch": 0.9845224960156921, + "grad_norm": 1.8534718934270336, + "learning_rate": 1.2561502153189964e-08, + "loss": 0.6512, + "step": 32123 + }, + { + "epoch": 0.9845531445384332, + "grad_norm": 1.6975510730541514, + "learning_rate": 1.2511813252173988e-08, + "loss": 0.5794, + "step": 32124 + }, + { + "epoch": 0.9845837930611745, + "grad_norm": 1.6972108311406018, + "learning_rate": 1.2462222760434295e-08, + "loss": 0.6829, + "step": 32125 + }, + { + "epoch": 0.9846144415839156, + "grad_norm": 0.6538637743359798, + "learning_rate": 1.2412730678459383e-08, + "loss": 0.5044, + "step": 32126 + }, + { + "epoch": 0.9846450901066569, + "grad_norm": 1.6800967842050445, + "learning_rate": 1.2363337006736643e-08, + "loss": 0.7414, + "step": 32127 + }, + { + "epoch": 0.9846757386293981, + "grad_norm": 1.747344286014292, + "learning_rate": 1.2314041745754568e-08, + "loss": 0.6684, + "step": 32128 + }, + { + "epoch": 0.9847063871521393, + "grad_norm": 1.84226572332802, + "learning_rate": 1.226484489599722e-08, + "loss": 0.6726, + "step": 32129 + }, + { + "epoch": 0.9847370356748805, + "grad_norm": 1.864815502432115, + "learning_rate": 1.2215746457949763e-08, + "loss": 0.5761, + "step": 32130 + }, + { + "epoch": 0.9847676841976217, + "grad_norm": 1.6956075098383565, + "learning_rate": 1.2166746432096255e-08, + "loss": 0.5886, + "step": 32131 + }, + { + "epoch": 0.9847983327203629, + "grad_norm": 1.73259476449078, + "learning_rate": 1.2117844818918534e-08, + "loss": 0.6577, + "step": 32132 + }, + { + "epoch": 0.984828981243104, + "grad_norm": 1.6973826834258114, + "learning_rate": 1.2069041618899545e-08, + "loss": 0.7429, + "step": 32133 + }, + { + "epoch": 0.9848596297658453, + "grad_norm": 2.149237312027545, + "learning_rate": 1.202033683252002e-08, + "loss": 0.6754, + "step": 32134 + }, + { + "epoch": 0.9848902782885864, + "grad_norm": 1.617824985715767, + "learning_rate": 1.1971730460259568e-08, + "loss": 0.6358, + "step": 32135 + }, + { + "epoch": 0.9849209268113277, + "grad_norm": 0.6572210394358761, + "learning_rate": 1.1923222502597809e-08, + "loss": 0.5142, + "step": 32136 + }, + { + "epoch": 0.9849515753340689, + "grad_norm": 1.4341636304244119, + "learning_rate": 1.1874812960012139e-08, + "loss": 0.5859, + "step": 32137 + }, + { + "epoch": 0.9849822238568101, + "grad_norm": 1.6772056635128925, + "learning_rate": 1.1826501832977733e-08, + "loss": 0.5973, + "step": 32138 + }, + { + "epoch": 0.9850128723795513, + "grad_norm": 1.6314307759090718, + "learning_rate": 1.1778289121974206e-08, + "loss": 0.6107, + "step": 32139 + }, + { + "epoch": 0.9850435209022925, + "grad_norm": 1.6979862456123136, + "learning_rate": 1.1730174827474517e-08, + "loss": 0.6458, + "step": 32140 + }, + { + "epoch": 0.9850741694250337, + "grad_norm": 1.664770890156751, + "learning_rate": 1.1682158949952727e-08, + "loss": 0.6426, + "step": 32141 + }, + { + "epoch": 0.9851048179477749, + "grad_norm": 1.914564330185277, + "learning_rate": 1.1634241489881792e-08, + "loss": 0.6237, + "step": 32142 + }, + { + "epoch": 0.9851354664705161, + "grad_norm": 1.7160532632619017, + "learning_rate": 1.1586422447734668e-08, + "loss": 0.6327, + "step": 32143 + }, + { + "epoch": 0.9851661149932573, + "grad_norm": 1.5801094392269155, + "learning_rate": 1.1538701823982091e-08, + "loss": 0.6777, + "step": 32144 + }, + { + "epoch": 0.9851967635159985, + "grad_norm": 1.6308572833071995, + "learning_rate": 1.1491079619094792e-08, + "loss": 0.6228, + "step": 32145 + }, + { + "epoch": 0.9852274120387398, + "grad_norm": 1.6611935760858205, + "learning_rate": 1.1443555833541286e-08, + "loss": 0.628, + "step": 32146 + }, + { + "epoch": 0.9852580605614809, + "grad_norm": 1.8767441310428454, + "learning_rate": 1.139613046779009e-08, + "loss": 0.665, + "step": 32147 + }, + { + "epoch": 0.9852887090842222, + "grad_norm": 1.6627311107328708, + "learning_rate": 1.1348803522308604e-08, + "loss": 0.5216, + "step": 32148 + }, + { + "epoch": 0.9853193576069633, + "grad_norm": 0.6587926583224172, + "learning_rate": 1.1301574997563125e-08, + "loss": 0.5115, + "step": 32149 + }, + { + "epoch": 0.9853500061297046, + "grad_norm": 0.6715922061459383, + "learning_rate": 1.1254444894018835e-08, + "loss": 0.5289, + "step": 32150 + }, + { + "epoch": 0.9853806546524457, + "grad_norm": 1.7059391616189796, + "learning_rate": 1.1207413212139805e-08, + "loss": 0.614, + "step": 32151 + }, + { + "epoch": 0.985411303175187, + "grad_norm": 1.6588983099467618, + "learning_rate": 1.1160479952390114e-08, + "loss": 0.6487, + "step": 32152 + }, + { + "epoch": 0.9854419516979281, + "grad_norm": 1.8723178754803704, + "learning_rate": 1.1113645115231608e-08, + "loss": 0.5772, + "step": 32153 + }, + { + "epoch": 0.9854726002206694, + "grad_norm": 1.656702964273756, + "learning_rate": 1.1066908701127255e-08, + "loss": 0.6551, + "step": 32154 + }, + { + "epoch": 0.9855032487434106, + "grad_norm": 0.7088532011713846, + "learning_rate": 1.1020270710535575e-08, + "loss": 0.4934, + "step": 32155 + }, + { + "epoch": 0.9855338972661518, + "grad_norm": 1.7475346334866937, + "learning_rate": 1.097373114391731e-08, + "loss": 0.6759, + "step": 32156 + }, + { + "epoch": 0.985564545788893, + "grad_norm": 1.6684025929721458, + "learning_rate": 1.0927290001729874e-08, + "loss": 0.7206, + "step": 32157 + }, + { + "epoch": 0.9855951943116342, + "grad_norm": 1.719229387537384, + "learning_rate": 1.0880947284432897e-08, + "loss": 0.6073, + "step": 32158 + }, + { + "epoch": 0.9856258428343754, + "grad_norm": 0.6920186129386553, + "learning_rate": 1.0834702992481572e-08, + "loss": 0.5097, + "step": 32159 + }, + { + "epoch": 0.9856564913571166, + "grad_norm": 1.8428828082531696, + "learning_rate": 1.0788557126331089e-08, + "loss": 0.6989, + "step": 32160 + }, + { + "epoch": 0.9856871398798578, + "grad_norm": 0.6621187168779483, + "learning_rate": 1.0742509686436642e-08, + "loss": 0.5336, + "step": 32161 + }, + { + "epoch": 0.985717788402599, + "grad_norm": 1.7052800753545627, + "learning_rate": 1.069656067325342e-08, + "loss": 0.5793, + "step": 32162 + }, + { + "epoch": 0.9857484369253402, + "grad_norm": 1.890615504839018, + "learning_rate": 1.0650710087231063e-08, + "loss": 0.7233, + "step": 32163 + }, + { + "epoch": 0.9857790854480813, + "grad_norm": 1.741752265877202, + "learning_rate": 1.0604957928824766e-08, + "loss": 0.7455, + "step": 32164 + }, + { + "epoch": 0.9858097339708226, + "grad_norm": 0.6909814941181165, + "learning_rate": 1.0559304198483056e-08, + "loss": 0.5252, + "step": 32165 + }, + { + "epoch": 0.9858403824935638, + "grad_norm": 1.5493078374147706, + "learning_rate": 1.0513748896656683e-08, + "loss": 0.5739, + "step": 32166 + }, + { + "epoch": 0.985871031016305, + "grad_norm": 1.9868059302760048, + "learning_rate": 1.046829202379418e-08, + "loss": 0.6327, + "step": 32167 + }, + { + "epoch": 0.9859016795390462, + "grad_norm": 1.7447167144296922, + "learning_rate": 1.0422933580342965e-08, + "loss": 0.6359, + "step": 32168 + }, + { + "epoch": 0.9859323280617874, + "grad_norm": 1.7822539396338322, + "learning_rate": 1.0377673566750457e-08, + "loss": 0.5714, + "step": 32169 + }, + { + "epoch": 0.9859629765845286, + "grad_norm": 1.6633152031508478, + "learning_rate": 1.0332511983462968e-08, + "loss": 0.5879, + "step": 32170 + }, + { + "epoch": 0.9859936251072698, + "grad_norm": 1.7018712859370906, + "learning_rate": 1.0287448830925695e-08, + "loss": 0.5597, + "step": 32171 + }, + { + "epoch": 0.986024273630011, + "grad_norm": 1.5810765958903437, + "learning_rate": 1.0242484109581619e-08, + "loss": 0.58, + "step": 32172 + }, + { + "epoch": 0.9860549221527523, + "grad_norm": 2.0067778938521115, + "learning_rate": 1.0197617819874828e-08, + "loss": 0.6832, + "step": 32173 + }, + { + "epoch": 0.9860855706754934, + "grad_norm": 1.7969716914093152, + "learning_rate": 1.015284996224608e-08, + "loss": 0.6457, + "step": 32174 + }, + { + "epoch": 0.9861162191982347, + "grad_norm": 1.7423403958439792, + "learning_rate": 1.0108180537138356e-08, + "loss": 0.654, + "step": 32175 + }, + { + "epoch": 0.9861468677209758, + "grad_norm": 0.704680976117916, + "learning_rate": 1.0063609544990194e-08, + "loss": 0.5129, + "step": 32176 + }, + { + "epoch": 0.9861775162437171, + "grad_norm": 1.6610152204805821, + "learning_rate": 1.001913698624124e-08, + "loss": 0.6671, + "step": 32177 + }, + { + "epoch": 0.9862081647664582, + "grad_norm": 0.6554652853697672, + "learning_rate": 9.974762861330035e-09, + "loss": 0.535, + "step": 32178 + }, + { + "epoch": 0.9862388132891995, + "grad_norm": 1.6339132642194318, + "learning_rate": 9.930487170692893e-09, + "loss": 0.5836, + "step": 32179 + }, + { + "epoch": 0.9862694618119406, + "grad_norm": 1.7257279673016663, + "learning_rate": 9.886309914768355e-09, + "loss": 0.6198, + "step": 32180 + }, + { + "epoch": 0.9863001103346819, + "grad_norm": 1.7284528537143238, + "learning_rate": 9.842231093988297e-09, + "loss": 0.6267, + "step": 32181 + }, + { + "epoch": 0.986330758857423, + "grad_norm": 0.651167751585903, + "learning_rate": 9.798250708790147e-09, + "loss": 0.4917, + "step": 32182 + }, + { + "epoch": 0.9863614073801643, + "grad_norm": 1.8476616677184847, + "learning_rate": 9.75436875960467e-09, + "loss": 0.6651, + "step": 32183 + }, + { + "epoch": 0.9863920559029055, + "grad_norm": 1.496840917566711, + "learning_rate": 9.710585246865966e-09, + "loss": 0.5581, + "step": 32184 + }, + { + "epoch": 0.9864227044256467, + "grad_norm": 1.6594746146372723, + "learning_rate": 9.666900171005911e-09, + "loss": 0.6349, + "step": 32185 + }, + { + "epoch": 0.9864533529483879, + "grad_norm": 1.7920019899365205, + "learning_rate": 9.623313532453049e-09, + "loss": 0.6528, + "step": 32186 + }, + { + "epoch": 0.9864840014711291, + "grad_norm": 1.665357933753125, + "learning_rate": 9.579825331638149e-09, + "loss": 0.6736, + "step": 32187 + }, + { + "epoch": 0.9865146499938703, + "grad_norm": 1.688521009443354, + "learning_rate": 9.536435568989755e-09, + "loss": 0.6858, + "step": 32188 + }, + { + "epoch": 0.9865452985166115, + "grad_norm": 1.9164291936677882, + "learning_rate": 9.493144244934194e-09, + "loss": 0.5647, + "step": 32189 + }, + { + "epoch": 0.9865759470393527, + "grad_norm": 1.799465487670078, + "learning_rate": 9.4499513598989e-09, + "loss": 0.6321, + "step": 32190 + }, + { + "epoch": 0.986606595562094, + "grad_norm": 1.929043058902464, + "learning_rate": 9.406856914310203e-09, + "loss": 0.5876, + "step": 32191 + }, + { + "epoch": 0.9866372440848351, + "grad_norm": 1.657366976352708, + "learning_rate": 9.363860908591094e-09, + "loss": 0.6408, + "step": 32192 + }, + { + "epoch": 0.9866678926075764, + "grad_norm": 1.648331276018568, + "learning_rate": 9.320963343166789e-09, + "loss": 0.6685, + "step": 32193 + }, + { + "epoch": 0.9866985411303175, + "grad_norm": 1.9365842759416791, + "learning_rate": 9.278164218459174e-09, + "loss": 0.6726, + "step": 32194 + }, + { + "epoch": 0.9867291896530587, + "grad_norm": 1.850185187642492, + "learning_rate": 9.235463534890133e-09, + "loss": 0.6137, + "step": 32195 + }, + { + "epoch": 0.9867598381757999, + "grad_norm": 1.66916401241521, + "learning_rate": 9.192861292879329e-09, + "loss": 0.5781, + "step": 32196 + }, + { + "epoch": 0.9867904866985411, + "grad_norm": 1.8271034984368222, + "learning_rate": 9.150357492848649e-09, + "loss": 0.644, + "step": 32197 + }, + { + "epoch": 0.9868211352212823, + "grad_norm": 1.685383058547836, + "learning_rate": 9.107952135215536e-09, + "loss": 0.5949, + "step": 32198 + }, + { + "epoch": 0.9868517837440235, + "grad_norm": 1.703395618658721, + "learning_rate": 9.065645220397434e-09, + "loss": 0.6139, + "step": 32199 + }, + { + "epoch": 0.9868824322667648, + "grad_norm": 1.7116972321043546, + "learning_rate": 9.023436748812897e-09, + "loss": 0.5934, + "step": 32200 + }, + { + "epoch": 0.9869130807895059, + "grad_norm": 1.6171782280729428, + "learning_rate": 8.981326720876038e-09, + "loss": 0.5635, + "step": 32201 + }, + { + "epoch": 0.9869437293122472, + "grad_norm": 1.6814233531397023, + "learning_rate": 8.939315137002081e-09, + "loss": 0.6598, + "step": 32202 + }, + { + "epoch": 0.9869743778349883, + "grad_norm": 1.7134807852044447, + "learning_rate": 8.897401997606248e-09, + "loss": 0.5869, + "step": 32203 + }, + { + "epoch": 0.9870050263577296, + "grad_norm": 1.9728125615444956, + "learning_rate": 8.855587303100433e-09, + "loss": 0.7257, + "step": 32204 + }, + { + "epoch": 0.9870356748804707, + "grad_norm": 1.6538932318823525, + "learning_rate": 8.813871053896528e-09, + "loss": 0.5918, + "step": 32205 + }, + { + "epoch": 0.987066323403212, + "grad_norm": 1.8936969146391804, + "learning_rate": 8.772253250405316e-09, + "loss": 0.7179, + "step": 32206 + }, + { + "epoch": 0.9870969719259531, + "grad_norm": 0.6749369482910463, + "learning_rate": 8.73073389303869e-09, + "loss": 0.4942, + "step": 32207 + }, + { + "epoch": 0.9871276204486944, + "grad_norm": 1.8036923313620592, + "learning_rate": 8.6893129822041e-09, + "loss": 0.6906, + "step": 32208 + }, + { + "epoch": 0.9871582689714355, + "grad_norm": 1.965484503745772, + "learning_rate": 8.647990518310112e-09, + "loss": 0.6894, + "step": 32209 + }, + { + "epoch": 0.9871889174941768, + "grad_norm": 0.6624922094819287, + "learning_rate": 8.606766501763065e-09, + "loss": 0.5407, + "step": 32210 + }, + { + "epoch": 0.987219566016918, + "grad_norm": 0.6731911503354154, + "learning_rate": 8.56564093297152e-09, + "loss": 0.5341, + "step": 32211 + }, + { + "epoch": 0.9872502145396592, + "grad_norm": 1.5472395137654296, + "learning_rate": 8.524613812337379e-09, + "loss": 0.6339, + "step": 32212 + }, + { + "epoch": 0.9872808630624004, + "grad_norm": 1.8939284868210378, + "learning_rate": 8.483685140268094e-09, + "loss": 0.6924, + "step": 32213 + }, + { + "epoch": 0.9873115115851416, + "grad_norm": 1.6442810995714823, + "learning_rate": 8.442854917164456e-09, + "loss": 0.7044, + "step": 32214 + }, + { + "epoch": 0.9873421601078828, + "grad_norm": 1.720045196952708, + "learning_rate": 8.402123143430585e-09, + "loss": 0.6421, + "step": 32215 + }, + { + "epoch": 0.987372808630624, + "grad_norm": 1.7525676667023617, + "learning_rate": 8.361489819467272e-09, + "loss": 0.694, + "step": 32216 + }, + { + "epoch": 0.9874034571533652, + "grad_norm": 1.7566538979650168, + "learning_rate": 8.320954945674198e-09, + "loss": 0.7678, + "step": 32217 + }, + { + "epoch": 0.9874341056761065, + "grad_norm": 1.7660083897736985, + "learning_rate": 8.280518522451042e-09, + "loss": 0.6975, + "step": 32218 + }, + { + "epoch": 0.9874647541988476, + "grad_norm": 0.6865557289318778, + "learning_rate": 8.240180550196374e-09, + "loss": 0.5364, + "step": 32219 + }, + { + "epoch": 0.9874954027215889, + "grad_norm": 1.7708920496259457, + "learning_rate": 8.199941029307656e-09, + "loss": 0.6272, + "step": 32220 + }, + { + "epoch": 0.98752605124433, + "grad_norm": 1.6857378731397805, + "learning_rate": 8.159799960182347e-09, + "loss": 0.6403, + "step": 32221 + }, + { + "epoch": 0.9875566997670713, + "grad_norm": 1.6152926529391083, + "learning_rate": 8.119757343214573e-09, + "loss": 0.7025, + "step": 32222 + }, + { + "epoch": 0.9875873482898124, + "grad_norm": 0.6435355824183693, + "learning_rate": 8.079813178798468e-09, + "loss": 0.5223, + "step": 32223 + }, + { + "epoch": 0.9876179968125537, + "grad_norm": 1.9235551692779047, + "learning_rate": 8.039967467329268e-09, + "loss": 0.7123, + "step": 32224 + }, + { + "epoch": 0.9876486453352948, + "grad_norm": 1.8209980204020904, + "learning_rate": 8.000220209198883e-09, + "loss": 0.5556, + "step": 32225 + }, + { + "epoch": 0.987679293858036, + "grad_norm": 0.6664673464311393, + "learning_rate": 7.960571404799222e-09, + "loss": 0.5158, + "step": 32226 + }, + { + "epoch": 0.9877099423807773, + "grad_norm": 1.7729618751390341, + "learning_rate": 7.921021054519972e-09, + "loss": 0.6052, + "step": 32227 + }, + { + "epoch": 0.9877405909035184, + "grad_norm": 1.628477515748329, + "learning_rate": 7.881569158751933e-09, + "loss": 0.653, + "step": 32228 + }, + { + "epoch": 0.9877712394262597, + "grad_norm": 1.8282701215240023, + "learning_rate": 7.842215717882574e-09, + "loss": 0.6636, + "step": 32229 + }, + { + "epoch": 0.9878018879490008, + "grad_norm": 1.8617996194022066, + "learning_rate": 7.80296073230158e-09, + "loss": 0.6618, + "step": 32230 + }, + { + "epoch": 0.9878325364717421, + "grad_norm": 1.8213700821933583, + "learning_rate": 7.763804202394198e-09, + "loss": 0.6099, + "step": 32231 + }, + { + "epoch": 0.9878631849944832, + "grad_norm": 1.6734826382547072, + "learning_rate": 7.72474612854679e-09, + "loss": 0.6016, + "step": 32232 + }, + { + "epoch": 0.9878938335172245, + "grad_norm": 2.072433837261627, + "learning_rate": 7.68578651114349e-09, + "loss": 0.6344, + "step": 32233 + }, + { + "epoch": 0.9879244820399656, + "grad_norm": 1.7123477985979803, + "learning_rate": 7.646925350569544e-09, + "loss": 0.6542, + "step": 32234 + }, + { + "epoch": 0.9879551305627069, + "grad_norm": 1.7536013965014017, + "learning_rate": 7.608162647206873e-09, + "loss": 0.6076, + "step": 32235 + }, + { + "epoch": 0.987985779085448, + "grad_norm": 1.653856995668964, + "learning_rate": 7.569498401437392e-09, + "loss": 0.6662, + "step": 32236 + }, + { + "epoch": 0.9880164276081893, + "grad_norm": 0.678532581405941, + "learning_rate": 7.530932613641905e-09, + "loss": 0.5035, + "step": 32237 + }, + { + "epoch": 0.9880470761309305, + "grad_norm": 1.588857231115619, + "learning_rate": 7.492465284201222e-09, + "loss": 0.6151, + "step": 32238 + }, + { + "epoch": 0.9880777246536717, + "grad_norm": 1.7654094638548075, + "learning_rate": 7.454096413493927e-09, + "loss": 0.6091, + "step": 32239 + }, + { + "epoch": 0.9881083731764129, + "grad_norm": 0.6607445290612972, + "learning_rate": 7.415826001898607e-09, + "loss": 0.502, + "step": 32240 + }, + { + "epoch": 0.9881390216991541, + "grad_norm": 1.7300643580539719, + "learning_rate": 7.377654049791627e-09, + "loss": 0.6104, + "step": 32241 + }, + { + "epoch": 0.9881696702218953, + "grad_norm": 1.6262302031049796, + "learning_rate": 7.3395805575493525e-09, + "loss": 0.679, + "step": 32242 + }, + { + "epoch": 0.9882003187446365, + "grad_norm": 1.7533869017871266, + "learning_rate": 7.3016055255470396e-09, + "loss": 0.6353, + "step": 32243 + }, + { + "epoch": 0.9882309672673777, + "grad_norm": 0.6625490256536467, + "learning_rate": 7.263728954157723e-09, + "loss": 0.5244, + "step": 32244 + }, + { + "epoch": 0.988261615790119, + "grad_norm": 1.737474330109666, + "learning_rate": 7.225950843756657e-09, + "loss": 0.5898, + "step": 32245 + }, + { + "epoch": 0.9882922643128601, + "grad_norm": 1.6730260836040916, + "learning_rate": 7.1882711947146575e-09, + "loss": 0.5913, + "step": 32246 + }, + { + "epoch": 0.9883229128356014, + "grad_norm": 1.8338521124681295, + "learning_rate": 7.150690007403649e-09, + "loss": 0.6573, + "step": 32247 + }, + { + "epoch": 0.9883535613583425, + "grad_norm": 1.6614136415392848, + "learning_rate": 7.113207282194446e-09, + "loss": 0.6516, + "step": 32248 + }, + { + "epoch": 0.9883842098810838, + "grad_norm": 1.611750817020008, + "learning_rate": 7.075823019454531e-09, + "loss": 0.6008, + "step": 32249 + }, + { + "epoch": 0.9884148584038249, + "grad_norm": 1.8189746570194352, + "learning_rate": 7.038537219553609e-09, + "loss": 0.5607, + "step": 32250 + }, + { + "epoch": 0.9884455069265662, + "grad_norm": 0.7042107012048059, + "learning_rate": 7.001349882859165e-09, + "loss": 0.4976, + "step": 32251 + }, + { + "epoch": 0.9884761554493073, + "grad_norm": 2.0949948588442755, + "learning_rate": 6.964261009736462e-09, + "loss": 0.6735, + "step": 32252 + }, + { + "epoch": 0.9885068039720486, + "grad_norm": 1.849843361676157, + "learning_rate": 6.927270600551872e-09, + "loss": 0.5897, + "step": 32253 + }, + { + "epoch": 0.9885374524947897, + "grad_norm": 1.6506479590274699, + "learning_rate": 6.89037865566955e-09, + "loss": 0.6268, + "step": 32254 + }, + { + "epoch": 0.988568101017531, + "grad_norm": 0.6412763214528598, + "learning_rate": 6.8535851754536476e-09, + "loss": 0.5215, + "step": 32255 + }, + { + "epoch": 0.9885987495402722, + "grad_norm": 1.962938944280979, + "learning_rate": 6.8168901602660985e-09, + "loss": 0.755, + "step": 32256 + }, + { + "epoch": 0.9886293980630133, + "grad_norm": 1.761635561304819, + "learning_rate": 6.780293610468835e-09, + "loss": 0.5991, + "step": 32257 + }, + { + "epoch": 0.9886600465857546, + "grad_norm": 1.7582914333696342, + "learning_rate": 6.743795526422681e-09, + "loss": 0.6694, + "step": 32258 + }, + { + "epoch": 0.9886906951084957, + "grad_norm": 1.7058950207176198, + "learning_rate": 6.707395908486236e-09, + "loss": 0.636, + "step": 32259 + }, + { + "epoch": 0.988721343631237, + "grad_norm": 1.8215062539980185, + "learning_rate": 6.671094757018104e-09, + "loss": 0.6932, + "step": 32260 + }, + { + "epoch": 0.9887519921539781, + "grad_norm": 1.9909952735310423, + "learning_rate": 6.6348920723768865e-09, + "loss": 0.6725, + "step": 32261 + }, + { + "epoch": 0.9887826406767194, + "grad_norm": 1.8262222735347649, + "learning_rate": 6.598787854918965e-09, + "loss": 0.61, + "step": 32262 + }, + { + "epoch": 0.9888132891994605, + "grad_norm": 1.569678008079925, + "learning_rate": 6.56278210500072e-09, + "loss": 0.6542, + "step": 32263 + }, + { + "epoch": 0.9888439377222018, + "grad_norm": 1.648603274532505, + "learning_rate": 6.526874822976315e-09, + "loss": 0.6384, + "step": 32264 + }, + { + "epoch": 0.988874586244943, + "grad_norm": 2.085853343491697, + "learning_rate": 6.491066009198799e-09, + "loss": 0.6445, + "step": 32265 + }, + { + "epoch": 0.9889052347676842, + "grad_norm": 1.7331464651110378, + "learning_rate": 6.455355664022333e-09, + "loss": 0.5781, + "step": 32266 + }, + { + "epoch": 0.9889358832904254, + "grad_norm": 1.572709095033614, + "learning_rate": 6.41974378779775e-09, + "loss": 0.6147, + "step": 32267 + }, + { + "epoch": 0.9889665318131666, + "grad_norm": 0.660447444946209, + "learning_rate": 6.384230380876988e-09, + "loss": 0.5289, + "step": 32268 + }, + { + "epoch": 0.9889971803359078, + "grad_norm": 1.653418516900295, + "learning_rate": 6.348815443608658e-09, + "loss": 0.6803, + "step": 32269 + }, + { + "epoch": 0.989027828858649, + "grad_norm": 1.7793610076716344, + "learning_rate": 6.31349897634248e-09, + "loss": 0.6439, + "step": 32270 + }, + { + "epoch": 0.9890584773813902, + "grad_norm": 1.7554901741321554, + "learning_rate": 6.278280979427065e-09, + "loss": 0.6525, + "step": 32271 + }, + { + "epoch": 0.9890891259041314, + "grad_norm": 1.636254495870848, + "learning_rate": 6.243161453208802e-09, + "loss": 0.5341, + "step": 32272 + }, + { + "epoch": 0.9891197744268726, + "grad_norm": 1.6093758540282044, + "learning_rate": 6.208140398032969e-09, + "loss": 0.504, + "step": 32273 + }, + { + "epoch": 0.9891504229496139, + "grad_norm": 1.7756452378744096, + "learning_rate": 6.173217814245958e-09, + "loss": 0.6865, + "step": 32274 + }, + { + "epoch": 0.989181071472355, + "grad_norm": 1.7161751043259983, + "learning_rate": 6.138393702190826e-09, + "loss": 0.6542, + "step": 32275 + }, + { + "epoch": 0.9892117199950963, + "grad_norm": 1.5820856829619445, + "learning_rate": 6.103668062210632e-09, + "loss": 0.6103, + "step": 32276 + }, + { + "epoch": 0.9892423685178374, + "grad_norm": 1.8439709370798478, + "learning_rate": 6.069040894649547e-09, + "loss": 0.5855, + "step": 32277 + }, + { + "epoch": 0.9892730170405787, + "grad_norm": 1.6896258856595299, + "learning_rate": 6.034512199846187e-09, + "loss": 0.5258, + "step": 32278 + }, + { + "epoch": 0.9893036655633198, + "grad_norm": 1.7921504752620692, + "learning_rate": 6.000081978142502e-09, + "loss": 0.4993, + "step": 32279 + }, + { + "epoch": 0.9893343140860611, + "grad_norm": 0.670305509812176, + "learning_rate": 5.9657502298759994e-09, + "loss": 0.5401, + "step": 32280 + }, + { + "epoch": 0.9893649626088022, + "grad_norm": 1.7762994454016467, + "learning_rate": 5.931516955386407e-09, + "loss": 0.6384, + "step": 32281 + }, + { + "epoch": 0.9893956111315435, + "grad_norm": 1.7700385129121408, + "learning_rate": 5.897382155011233e-09, + "loss": 0.6554, + "step": 32282 + }, + { + "epoch": 0.9894262596542847, + "grad_norm": 1.6253856383304712, + "learning_rate": 5.863345829085765e-09, + "loss": 0.548, + "step": 32283 + }, + { + "epoch": 0.9894569081770259, + "grad_norm": 1.7019258567144537, + "learning_rate": 5.829407977946399e-09, + "loss": 0.6294, + "step": 32284 + }, + { + "epoch": 0.9894875566997671, + "grad_norm": 0.6872259157172796, + "learning_rate": 5.795568601926205e-09, + "loss": 0.5175, + "step": 32285 + }, + { + "epoch": 0.9895182052225083, + "grad_norm": 1.6468554318467277, + "learning_rate": 5.7618277013604675e-09, + "loss": 0.5943, + "step": 32286 + }, + { + "epoch": 0.9895488537452495, + "grad_norm": 1.7461598483475171, + "learning_rate": 5.728185276580034e-09, + "loss": 0.641, + "step": 32287 + }, + { + "epoch": 0.9895795022679906, + "grad_norm": 1.7136138269018635, + "learning_rate": 5.6946413279168615e-09, + "loss": 0.6505, + "step": 32288 + }, + { + "epoch": 0.9896101507907319, + "grad_norm": 1.7422593067078356, + "learning_rate": 5.6611958557017954e-09, + "loss": 0.6684, + "step": 32289 + }, + { + "epoch": 0.989640799313473, + "grad_norm": 2.1662519980409054, + "learning_rate": 5.627848860263463e-09, + "loss": 0.6795, + "step": 32290 + }, + { + "epoch": 0.9896714478362143, + "grad_norm": 1.5723338074324267, + "learning_rate": 5.5946003419316e-09, + "loss": 0.595, + "step": 32291 + }, + { + "epoch": 0.9897020963589555, + "grad_norm": 0.658235329791122, + "learning_rate": 5.5614503010337216e-09, + "loss": 0.4807, + "step": 32292 + }, + { + "epoch": 0.9897327448816967, + "grad_norm": 1.9624093147863846, + "learning_rate": 5.528398737895125e-09, + "loss": 0.6275, + "step": 32293 + }, + { + "epoch": 0.9897633934044379, + "grad_norm": 1.60640457836508, + "learning_rate": 5.495445652843323e-09, + "loss": 0.5995, + "step": 32294 + }, + { + "epoch": 0.9897940419271791, + "grad_norm": 1.6522601950680107, + "learning_rate": 5.462591046201393e-09, + "loss": 0.5575, + "step": 32295 + }, + { + "epoch": 0.9898246904499203, + "grad_norm": 1.6493651688418334, + "learning_rate": 5.4298349182935194e-09, + "loss": 0.6849, + "step": 32296 + }, + { + "epoch": 0.9898553389726615, + "grad_norm": 1.5747095654091665, + "learning_rate": 5.3971772694438875e-09, + "loss": 0.6519, + "step": 32297 + }, + { + "epoch": 0.9898859874954027, + "grad_norm": 0.6862394563672565, + "learning_rate": 5.364618099972241e-09, + "loss": 0.533, + "step": 32298 + }, + { + "epoch": 0.989916636018144, + "grad_norm": 0.6671752841291704, + "learning_rate": 5.332157410200545e-09, + "loss": 0.5343, + "step": 32299 + }, + { + "epoch": 0.9899472845408851, + "grad_norm": 1.9691907482305158, + "learning_rate": 5.299795200447433e-09, + "loss": 0.6892, + "step": 32300 + }, + { + "epoch": 0.9899779330636264, + "grad_norm": 1.7523679195141293, + "learning_rate": 5.26753147103376e-09, + "loss": 0.6102, + "step": 32301 + }, + { + "epoch": 0.9900085815863675, + "grad_norm": 1.9127566705029255, + "learning_rate": 5.2353662222759395e-09, + "loss": 0.7615, + "step": 32302 + }, + { + "epoch": 0.9900392301091088, + "grad_norm": 1.749262532247695, + "learning_rate": 5.203299454491495e-09, + "loss": 0.6501, + "step": 32303 + }, + { + "epoch": 0.9900698786318499, + "grad_norm": 0.641882808338267, + "learning_rate": 5.1713311679968405e-09, + "loss": 0.4952, + "step": 32304 + }, + { + "epoch": 0.9901005271545912, + "grad_norm": 1.7141987567786097, + "learning_rate": 5.1394613631061685e-09, + "loss": 0.6738, + "step": 32305 + }, + { + "epoch": 0.9901311756773323, + "grad_norm": 1.7576490415624197, + "learning_rate": 5.107690040132562e-09, + "loss": 0.657, + "step": 32306 + }, + { + "epoch": 0.9901618242000736, + "grad_norm": 1.6947240532996637, + "learning_rate": 5.076017199391326e-09, + "loss": 0.5222, + "step": 32307 + }, + { + "epoch": 0.9901924727228147, + "grad_norm": 1.6023188194115074, + "learning_rate": 5.04444284119221e-09, + "loss": 0.5425, + "step": 32308 + }, + { + "epoch": 0.990223121245556, + "grad_norm": 1.75048891757693, + "learning_rate": 5.0129669658482985e-09, + "loss": 0.6427, + "step": 32309 + }, + { + "epoch": 0.9902537697682972, + "grad_norm": 1.8092355460023228, + "learning_rate": 4.981589573669343e-09, + "loss": 0.6536, + "step": 32310 + }, + { + "epoch": 0.9902844182910384, + "grad_norm": 1.6161355047533832, + "learning_rate": 4.950310664962876e-09, + "loss": 0.5986, + "step": 32311 + }, + { + "epoch": 0.9903150668137796, + "grad_norm": 1.8830871120491228, + "learning_rate": 4.91913024003976e-09, + "loss": 0.5991, + "step": 32312 + }, + { + "epoch": 0.9903457153365208, + "grad_norm": 1.8155587314738768, + "learning_rate": 4.8880482992053054e-09, + "loss": 0.6152, + "step": 32313 + }, + { + "epoch": 0.990376363859262, + "grad_norm": 0.665941521159746, + "learning_rate": 4.857064842765935e-09, + "loss": 0.5088, + "step": 32314 + }, + { + "epoch": 0.9904070123820032, + "grad_norm": 1.7183007028912305, + "learning_rate": 4.826179871028069e-09, + "loss": 0.6551, + "step": 32315 + }, + { + "epoch": 0.9904376609047444, + "grad_norm": 0.6632302237933909, + "learning_rate": 4.7953933842936895e-09, + "loss": 0.5029, + "step": 32316 + }, + { + "epoch": 0.9904683094274856, + "grad_norm": 1.5784289603769222, + "learning_rate": 4.764705382869217e-09, + "loss": 0.5956, + "step": 32317 + }, + { + "epoch": 0.9904989579502268, + "grad_norm": 1.7878019876163087, + "learning_rate": 4.734115867054412e-09, + "loss": 0.6445, + "step": 32318 + }, + { + "epoch": 0.990529606472968, + "grad_norm": 1.6467517354497534, + "learning_rate": 4.703624837152365e-09, + "loss": 0.6461, + "step": 32319 + }, + { + "epoch": 0.9905602549957092, + "grad_norm": 1.7419045540627616, + "learning_rate": 4.6732322934628374e-09, + "loss": 0.651, + "step": 32320 + }, + { + "epoch": 0.9905909035184504, + "grad_norm": 1.8490330614512431, + "learning_rate": 4.642938236285588e-09, + "loss": 0.5368, + "step": 32321 + }, + { + "epoch": 0.9906215520411916, + "grad_norm": 1.7452301127433725, + "learning_rate": 4.612742665918157e-09, + "loss": 0.6107, + "step": 32322 + }, + { + "epoch": 0.9906522005639328, + "grad_norm": 1.6790969207776731, + "learning_rate": 4.582645582660306e-09, + "loss": 0.6753, + "step": 32323 + }, + { + "epoch": 0.990682849086674, + "grad_norm": 1.739046560599462, + "learning_rate": 4.552646986805131e-09, + "loss": 0.6138, + "step": 32324 + }, + { + "epoch": 0.9907134976094152, + "grad_norm": 1.6738882811600428, + "learning_rate": 4.522746878651285e-09, + "loss": 0.669, + "step": 32325 + }, + { + "epoch": 0.9907441461321564, + "grad_norm": 1.7238626344237802, + "learning_rate": 4.492945258491865e-09, + "loss": 0.5999, + "step": 32326 + }, + { + "epoch": 0.9907747946548976, + "grad_norm": 1.7100595273809729, + "learning_rate": 4.463242126621081e-09, + "loss": 0.5549, + "step": 32327 + }, + { + "epoch": 0.9908054431776389, + "grad_norm": 1.747062077419453, + "learning_rate": 4.4336374833320315e-09, + "loss": 0.7489, + "step": 32328 + }, + { + "epoch": 0.99083609170038, + "grad_norm": 1.6920830299898033, + "learning_rate": 4.404131328915595e-09, + "loss": 0.6779, + "step": 32329 + }, + { + "epoch": 0.9908667402231213, + "grad_norm": 1.6537727474791073, + "learning_rate": 4.3747236636615395e-09, + "loss": 0.531, + "step": 32330 + }, + { + "epoch": 0.9908973887458624, + "grad_norm": 1.892976096449143, + "learning_rate": 4.345414487861854e-09, + "loss": 0.6348, + "step": 32331 + }, + { + "epoch": 0.9909280372686037, + "grad_norm": 1.67298154925118, + "learning_rate": 4.316203801804087e-09, + "loss": 0.6573, + "step": 32332 + }, + { + "epoch": 0.9909586857913448, + "grad_norm": 0.6588863893518925, + "learning_rate": 4.287091605776894e-09, + "loss": 0.5109, + "step": 32333 + }, + { + "epoch": 0.9909893343140861, + "grad_norm": 1.5853870566154036, + "learning_rate": 4.2580779000656045e-09, + "loss": 0.5946, + "step": 32334 + }, + { + "epoch": 0.9910199828368272, + "grad_norm": 1.7066219042256456, + "learning_rate": 4.229162684957766e-09, + "loss": 0.646, + "step": 32335 + }, + { + "epoch": 0.9910506313595685, + "grad_norm": 1.7028019563756498, + "learning_rate": 4.200345960736485e-09, + "loss": 0.6434, + "step": 32336 + }, + { + "epoch": 0.9910812798823097, + "grad_norm": 1.9952816416098687, + "learning_rate": 4.171627727688199e-09, + "loss": 0.5951, + "step": 32337 + }, + { + "epoch": 0.9911119284050509, + "grad_norm": 1.5051797890910132, + "learning_rate": 4.143007986092684e-09, + "loss": 0.6078, + "step": 32338 + }, + { + "epoch": 0.9911425769277921, + "grad_norm": 1.6999601127360302, + "learning_rate": 4.114486736235268e-09, + "loss": 0.6041, + "step": 32339 + }, + { + "epoch": 0.9911732254505333, + "grad_norm": 1.6467116835092044, + "learning_rate": 4.086063978394616e-09, + "loss": 0.5818, + "step": 32340 + }, + { + "epoch": 0.9912038739732745, + "grad_norm": 1.7149112542607665, + "learning_rate": 4.057739712851616e-09, + "loss": 0.6589, + "step": 32341 + }, + { + "epoch": 0.9912345224960157, + "grad_norm": 1.8569108130395302, + "learning_rate": 4.029513939884933e-09, + "loss": 0.7346, + "step": 32342 + }, + { + "epoch": 0.9912651710187569, + "grad_norm": 1.6222578645591383, + "learning_rate": 4.001386659773232e-09, + "loss": 0.6553, + "step": 32343 + }, + { + "epoch": 0.9912958195414981, + "grad_norm": 1.6878305406988277, + "learning_rate": 3.97335787279296e-09, + "loss": 0.5893, + "step": 32344 + }, + { + "epoch": 0.9913264680642393, + "grad_norm": 1.6224182631899076, + "learning_rate": 3.945427579221672e-09, + "loss": 0.5768, + "step": 32345 + }, + { + "epoch": 0.9913571165869806, + "grad_norm": 1.8036958468648228, + "learning_rate": 3.917595779333594e-09, + "loss": 0.6551, + "step": 32346 + }, + { + "epoch": 0.9913877651097217, + "grad_norm": 1.8113194764797989, + "learning_rate": 3.88986247340295e-09, + "loss": 0.6704, + "step": 32347 + }, + { + "epoch": 0.991418413632463, + "grad_norm": 1.77371996082685, + "learning_rate": 3.862227661702855e-09, + "loss": 0.6381, + "step": 32348 + }, + { + "epoch": 0.9914490621552041, + "grad_norm": 0.6608021879790401, + "learning_rate": 3.834691344505315e-09, + "loss": 0.5253, + "step": 32349 + }, + { + "epoch": 0.9914797106779453, + "grad_norm": 1.7411338945732495, + "learning_rate": 3.807253522083443e-09, + "loss": 0.7163, + "step": 32350 + }, + { + "epoch": 0.9915103592006865, + "grad_norm": 1.7620872072688751, + "learning_rate": 3.779914194705914e-09, + "loss": 0.5429, + "step": 32351 + }, + { + "epoch": 0.9915410077234277, + "grad_norm": 1.8917898607497983, + "learning_rate": 3.752673362642512e-09, + "loss": 0.6808, + "step": 32352 + }, + { + "epoch": 0.9915716562461689, + "grad_norm": 0.664120250636762, + "learning_rate": 3.7255310261608e-09, + "loss": 0.5177, + "step": 32353 + }, + { + "epoch": 0.9916023047689101, + "grad_norm": 1.577873466665401, + "learning_rate": 3.698487185530564e-09, + "loss": 0.5851, + "step": 32354 + }, + { + "epoch": 0.9916329532916514, + "grad_norm": 1.8344057863287604, + "learning_rate": 3.6715418410160353e-09, + "loss": 0.6533, + "step": 32355 + }, + { + "epoch": 0.9916636018143925, + "grad_norm": 1.5946757546206716, + "learning_rate": 3.6446949928836685e-09, + "loss": 0.5381, + "step": 32356 + }, + { + "epoch": 0.9916942503371338, + "grad_norm": 1.8177257922657266, + "learning_rate": 3.617946641396586e-09, + "loss": 0.6494, + "step": 32357 + }, + { + "epoch": 0.9917248988598749, + "grad_norm": 1.6671556592397594, + "learning_rate": 3.591296786821241e-09, + "loss": 0.658, + "step": 32358 + }, + { + "epoch": 0.9917555473826162, + "grad_norm": 1.8527351223341186, + "learning_rate": 3.5647454294174264e-09, + "loss": 0.5931, + "step": 32359 + }, + { + "epoch": 0.9917861959053573, + "grad_norm": 1.797021346762967, + "learning_rate": 3.538292569448265e-09, + "loss": 0.6243, + "step": 32360 + }, + { + "epoch": 0.9918168444280986, + "grad_norm": 1.6994054988514258, + "learning_rate": 3.511938207174659e-09, + "loss": 0.6, + "step": 32361 + }, + { + "epoch": 0.9918474929508397, + "grad_norm": 2.2030120594516585, + "learning_rate": 3.48568234285529e-09, + "loss": 0.6893, + "step": 32362 + }, + { + "epoch": 0.991878141473581, + "grad_norm": 1.7956521525000324, + "learning_rate": 3.4595249767488405e-09, + "loss": 0.6565, + "step": 32363 + }, + { + "epoch": 0.9919087899963221, + "grad_norm": 0.6602377849811036, + "learning_rate": 3.433466109112882e-09, + "loss": 0.5248, + "step": 32364 + }, + { + "epoch": 0.9919394385190634, + "grad_norm": 1.5194768413172042, + "learning_rate": 3.407505740206096e-09, + "loss": 0.5764, + "step": 32365 + }, + { + "epoch": 0.9919700870418046, + "grad_norm": 0.6648965780965111, + "learning_rate": 3.3816438702816145e-09, + "loss": 0.506, + "step": 32366 + }, + { + "epoch": 0.9920007355645458, + "grad_norm": 1.751159344482061, + "learning_rate": 3.3558804995958982e-09, + "loss": 0.6105, + "step": 32367 + }, + { + "epoch": 0.992031384087287, + "grad_norm": 1.6994820982977756, + "learning_rate": 3.3302156284031885e-09, + "loss": 0.6269, + "step": 32368 + }, + { + "epoch": 0.9920620326100282, + "grad_norm": 1.9050004854525766, + "learning_rate": 3.3046492569555057e-09, + "loss": 0.5618, + "step": 32369 + }, + { + "epoch": 0.9920926811327694, + "grad_norm": 1.733134417121312, + "learning_rate": 3.27918138550376e-09, + "loss": 0.6453, + "step": 32370 + }, + { + "epoch": 0.9921233296555106, + "grad_norm": 1.732687056545012, + "learning_rate": 3.253812014301083e-09, + "loss": 0.6167, + "step": 32371 + }, + { + "epoch": 0.9921539781782518, + "grad_norm": 0.6839628177830916, + "learning_rate": 3.2285411435961646e-09, + "loss": 0.5221, + "step": 32372 + }, + { + "epoch": 0.992184626700993, + "grad_norm": 1.8292198023614628, + "learning_rate": 3.203368773637694e-09, + "loss": 0.6876, + "step": 32373 + }, + { + "epoch": 0.9922152752237342, + "grad_norm": 1.7250964731409189, + "learning_rate": 3.1782949046743618e-09, + "loss": 0.5737, + "step": 32374 + }, + { + "epoch": 0.9922459237464755, + "grad_norm": 1.5201684032036853, + "learning_rate": 3.1533195369537474e-09, + "loss": 0.6216, + "step": 32375 + }, + { + "epoch": 0.9922765722692166, + "grad_norm": 1.8685835344748254, + "learning_rate": 3.12844267072121e-09, + "loss": 0.6802, + "step": 32376 + }, + { + "epoch": 0.9923072207919579, + "grad_norm": 0.6842644805149183, + "learning_rate": 3.1036643062209993e-09, + "loss": 0.5311, + "step": 32377 + }, + { + "epoch": 0.992337869314699, + "grad_norm": 1.565890209577882, + "learning_rate": 3.078984443698474e-09, + "loss": 0.6397, + "step": 32378 + }, + { + "epoch": 0.9923685178374403, + "grad_norm": 1.980070730812153, + "learning_rate": 3.054403083396773e-09, + "loss": 0.593, + "step": 32379 + }, + { + "epoch": 0.9923991663601814, + "grad_norm": 1.704948574536718, + "learning_rate": 3.0299202255579253e-09, + "loss": 0.586, + "step": 32380 + }, + { + "epoch": 0.9924298148829226, + "grad_norm": 2.160414602776429, + "learning_rate": 3.005535870423959e-09, + "loss": 0.7354, + "step": 32381 + }, + { + "epoch": 0.9924604634056639, + "grad_norm": 1.682007803050603, + "learning_rate": 2.981250018232462e-09, + "loss": 0.6026, + "step": 32382 + }, + { + "epoch": 0.992491111928405, + "grad_norm": 1.8097393456603605, + "learning_rate": 2.9570626692265735e-09, + "loss": 0.6544, + "step": 32383 + }, + { + "epoch": 0.9925217604511463, + "grad_norm": 1.7134680406736482, + "learning_rate": 2.93297382364055e-09, + "loss": 0.6746, + "step": 32384 + }, + { + "epoch": 0.9925524089738874, + "grad_norm": 1.9356240385294572, + "learning_rate": 2.9089834817153106e-09, + "loss": 0.556, + "step": 32385 + }, + { + "epoch": 0.9925830574966287, + "grad_norm": 1.708969899727692, + "learning_rate": 2.885091643685112e-09, + "loss": 0.665, + "step": 32386 + }, + { + "epoch": 0.9926137060193698, + "grad_norm": 0.648030522675159, + "learning_rate": 2.8612983097864312e-09, + "loss": 0.5231, + "step": 32387 + }, + { + "epoch": 0.9926443545421111, + "grad_norm": 1.657995202720453, + "learning_rate": 2.8376034802524154e-09, + "loss": 0.5937, + "step": 32388 + }, + { + "epoch": 0.9926750030648522, + "grad_norm": 1.8088256951859465, + "learning_rate": 2.8140071553184324e-09, + "loss": 0.7483, + "step": 32389 + }, + { + "epoch": 0.9927056515875935, + "grad_norm": 1.6995487424285365, + "learning_rate": 2.790509335215408e-09, + "loss": 0.6304, + "step": 32390 + }, + { + "epoch": 0.9927363001103346, + "grad_norm": 1.5422961682983347, + "learning_rate": 2.7671100201753785e-09, + "loss": 0.6282, + "step": 32391 + }, + { + "epoch": 0.9927669486330759, + "grad_norm": 1.6296448680832736, + "learning_rate": 2.743809210428161e-09, + "loss": 0.5887, + "step": 32392 + }, + { + "epoch": 0.9927975971558171, + "grad_norm": 0.6770245749065749, + "learning_rate": 2.7206069062046814e-09, + "loss": 0.5034, + "step": 32393 + }, + { + "epoch": 0.9928282456785583, + "grad_norm": 1.9163889327953783, + "learning_rate": 2.6975031077336457e-09, + "loss": 0.6908, + "step": 32394 + }, + { + "epoch": 0.9928588942012995, + "grad_norm": 1.7108244821935126, + "learning_rate": 2.674497815241539e-09, + "loss": 0.6876, + "step": 32395 + }, + { + "epoch": 0.9928895427240407, + "grad_norm": 1.737821977606983, + "learning_rate": 2.6515910289548476e-09, + "loss": 0.6042, + "step": 32396 + }, + { + "epoch": 0.9929201912467819, + "grad_norm": 1.6792804329130657, + "learning_rate": 2.6287827491011663e-09, + "loss": 0.6396, + "step": 32397 + }, + { + "epoch": 0.9929508397695231, + "grad_norm": 0.6971838422627492, + "learning_rate": 2.6060729759036506e-09, + "loss": 0.5331, + "step": 32398 + }, + { + "epoch": 0.9929814882922643, + "grad_norm": 1.7879302154392127, + "learning_rate": 2.5834617095865657e-09, + "loss": 0.7093, + "step": 32399 + }, + { + "epoch": 0.9930121368150056, + "grad_norm": 2.0342288692903123, + "learning_rate": 2.5609489503719554e-09, + "loss": 0.8062, + "step": 32400 + }, + { + "epoch": 0.9930427853377467, + "grad_norm": 1.6332666725808211, + "learning_rate": 2.538534698482975e-09, + "loss": 0.5816, + "step": 32401 + }, + { + "epoch": 0.993073433860488, + "grad_norm": 1.833026297764216, + "learning_rate": 2.5162189541394487e-09, + "loss": 0.6825, + "step": 32402 + }, + { + "epoch": 0.9931040823832291, + "grad_norm": 1.643114518729959, + "learning_rate": 2.4940017175612007e-09, + "loss": 0.5996, + "step": 32403 + }, + { + "epoch": 0.9931347309059704, + "grad_norm": 1.8715993369137136, + "learning_rate": 2.471882988968055e-09, + "loss": 0.5946, + "step": 32404 + }, + { + "epoch": 0.9931653794287115, + "grad_norm": 1.597102304369746, + "learning_rate": 2.4498627685765055e-09, + "loss": 0.6534, + "step": 32405 + }, + { + "epoch": 0.9931960279514528, + "grad_norm": 1.9038171566249156, + "learning_rate": 2.427941056605265e-09, + "loss": 0.7206, + "step": 32406 + }, + { + "epoch": 0.9932266764741939, + "grad_norm": 0.6715126177208038, + "learning_rate": 2.406117853269718e-09, + "loss": 0.5043, + "step": 32407 + }, + { + "epoch": 0.9932573249969352, + "grad_norm": 1.4791620759807123, + "learning_rate": 2.3843931587841374e-09, + "loss": 0.6074, + "step": 32408 + }, + { + "epoch": 0.9932879735196763, + "grad_norm": 2.0551118496939185, + "learning_rate": 2.3627669733639058e-09, + "loss": 0.6349, + "step": 32409 + }, + { + "epoch": 0.9933186220424176, + "grad_norm": 1.8253985399774013, + "learning_rate": 2.341239297219966e-09, + "loss": 0.7641, + "step": 32410 + }, + { + "epoch": 0.9933492705651588, + "grad_norm": 0.6809102663643476, + "learning_rate": 2.319810130566591e-09, + "loss": 0.5303, + "step": 32411 + }, + { + "epoch": 0.9933799190878999, + "grad_norm": 1.8867704706482549, + "learning_rate": 2.298479473614723e-09, + "loss": 0.5847, + "step": 32412 + }, + { + "epoch": 0.9934105676106412, + "grad_norm": 1.7307238252683776, + "learning_rate": 2.2772473265730843e-09, + "loss": 0.6212, + "step": 32413 + }, + { + "epoch": 0.9934412161333823, + "grad_norm": 1.5856448180110436, + "learning_rate": 2.256113689652617e-09, + "loss": 0.6058, + "step": 32414 + }, + { + "epoch": 0.9934718646561236, + "grad_norm": 1.7307552696605624, + "learning_rate": 2.2350785630598225e-09, + "loss": 0.6506, + "step": 32415 + }, + { + "epoch": 0.9935025131788647, + "grad_norm": 1.573842532403511, + "learning_rate": 2.214141947003423e-09, + "loss": 0.6338, + "step": 32416 + }, + { + "epoch": 0.993533161701606, + "grad_norm": 1.6312022129700394, + "learning_rate": 2.1933038416888096e-09, + "loss": 0.7377, + "step": 32417 + }, + { + "epoch": 0.9935638102243471, + "grad_norm": 1.675653211903925, + "learning_rate": 2.1725642473213736e-09, + "loss": 0.5094, + "step": 32418 + }, + { + "epoch": 0.9935944587470884, + "grad_norm": 1.7079823321524803, + "learning_rate": 2.1519231641065065e-09, + "loss": 0.6291, + "step": 32419 + }, + { + "epoch": 0.9936251072698296, + "grad_norm": 0.7033260665017912, + "learning_rate": 2.131380592246268e-09, + "loss": 0.5023, + "step": 32420 + }, + { + "epoch": 0.9936557557925708, + "grad_norm": 0.6446041934624875, + "learning_rate": 2.1109365319438304e-09, + "loss": 0.4875, + "step": 32421 + }, + { + "epoch": 0.993686404315312, + "grad_norm": 2.0165711467207794, + "learning_rate": 2.0905909834001426e-09, + "loss": 0.5745, + "step": 32422 + }, + { + "epoch": 0.9937170528380532, + "grad_norm": 2.1587231597685244, + "learning_rate": 2.070343946816156e-09, + "loss": 0.6635, + "step": 32423 + }, + { + "epoch": 0.9937477013607944, + "grad_norm": 1.812010517603448, + "learning_rate": 2.0501954223905996e-09, + "loss": 0.614, + "step": 32424 + }, + { + "epoch": 0.9937783498835356, + "grad_norm": 1.787778038491466, + "learning_rate": 2.0301454103233144e-09, + "loss": 0.7314, + "step": 32425 + }, + { + "epoch": 0.9938089984062768, + "grad_norm": 1.73506240500684, + "learning_rate": 2.0101939108108094e-09, + "loss": 0.5493, + "step": 32426 + }, + { + "epoch": 0.993839646929018, + "grad_norm": 1.8760166417873485, + "learning_rate": 1.990340924049594e-09, + "loss": 0.6777, + "step": 32427 + }, + { + "epoch": 0.9938702954517592, + "grad_norm": 1.7107501215801533, + "learning_rate": 1.9705864502361783e-09, + "loss": 0.6292, + "step": 32428 + }, + { + "epoch": 0.9939009439745005, + "grad_norm": 0.6449821375560949, + "learning_rate": 1.9509304895637403e-09, + "loss": 0.4991, + "step": 32429 + }, + { + "epoch": 0.9939315924972416, + "grad_norm": 1.6351789789385232, + "learning_rate": 1.93137304222768e-09, + "loss": 0.5578, + "step": 32430 + }, + { + "epoch": 0.9939622410199829, + "grad_norm": 1.8416972353173546, + "learning_rate": 1.9119141084200654e-09, + "loss": 0.6622, + "step": 32431 + }, + { + "epoch": 0.993992889542724, + "grad_norm": 1.9302346329591553, + "learning_rate": 1.892553688331855e-09, + "loss": 0.6194, + "step": 32432 + }, + { + "epoch": 0.9940235380654653, + "grad_norm": 1.7799129205860384, + "learning_rate": 1.8732917821551177e-09, + "loss": 0.6838, + "step": 32433 + }, + { + "epoch": 0.9940541865882064, + "grad_norm": 1.7874193593730232, + "learning_rate": 1.8541283900785912e-09, + "loss": 0.683, + "step": 32434 + }, + { + "epoch": 0.9940848351109477, + "grad_norm": 1.9658291003448907, + "learning_rate": 1.8350635122921235e-09, + "loss": 0.5997, + "step": 32435 + }, + { + "epoch": 0.9941154836336888, + "grad_norm": 1.6659969729778394, + "learning_rate": 1.816097148982232e-09, + "loss": 0.5204, + "step": 32436 + }, + { + "epoch": 0.9941461321564301, + "grad_norm": 0.6722634875087697, + "learning_rate": 1.7972293003365448e-09, + "loss": 0.5208, + "step": 32437 + }, + { + "epoch": 0.9941767806791713, + "grad_norm": 1.4571761807481522, + "learning_rate": 1.7784599665415791e-09, + "loss": 0.5212, + "step": 32438 + }, + { + "epoch": 0.9942074292019125, + "grad_norm": 1.8304986245796273, + "learning_rate": 1.7597891477805217e-09, + "loss": 0.642, + "step": 32439 + }, + { + "epoch": 0.9942380777246537, + "grad_norm": 0.6848494794211326, + "learning_rate": 1.74121684423878e-09, + "loss": 0.5165, + "step": 32440 + }, + { + "epoch": 0.9942687262473949, + "grad_norm": 1.8430999041601415, + "learning_rate": 1.7227430560995406e-09, + "loss": 0.6548, + "step": 32441 + }, + { + "epoch": 0.9942993747701361, + "grad_norm": 1.59322687356463, + "learning_rate": 1.70436778354377e-09, + "loss": 0.6132, + "step": 32442 + }, + { + "epoch": 0.9943300232928772, + "grad_norm": 1.622883510488724, + "learning_rate": 1.6860910267535446e-09, + "loss": 0.5601, + "step": 32443 + }, + { + "epoch": 0.9943606718156185, + "grad_norm": 2.0082269146720164, + "learning_rate": 1.6679127859076105e-09, + "loss": 0.6635, + "step": 32444 + }, + { + "epoch": 0.9943913203383596, + "grad_norm": 1.808030696946194, + "learning_rate": 1.6498330611858239e-09, + "loss": 0.6077, + "step": 32445 + }, + { + "epoch": 0.9944219688611009, + "grad_norm": 1.710889721099604, + "learning_rate": 1.6318518527669302e-09, + "loss": 0.7228, + "step": 32446 + }, + { + "epoch": 0.994452617383842, + "grad_norm": 1.6075421845588527, + "learning_rate": 1.6139691608285657e-09, + "loss": 0.6487, + "step": 32447 + }, + { + "epoch": 0.9944832659065833, + "grad_norm": 1.5601499858312524, + "learning_rate": 1.596184985545035e-09, + "loss": 0.6554, + "step": 32448 + }, + { + "epoch": 0.9945139144293245, + "grad_norm": 1.905813796443634, + "learning_rate": 1.5784993270917537e-09, + "loss": 0.6181, + "step": 32449 + }, + { + "epoch": 0.9945445629520657, + "grad_norm": 1.847727037807901, + "learning_rate": 1.5609121856452468e-09, + "loss": 0.567, + "step": 32450 + }, + { + "epoch": 0.9945752114748069, + "grad_norm": 1.6482778165677854, + "learning_rate": 1.543423561375379e-09, + "loss": 0.622, + "step": 32451 + }, + { + "epoch": 0.9946058599975481, + "grad_norm": 1.9299047680288364, + "learning_rate": 1.526033454457565e-09, + "loss": 0.6463, + "step": 32452 + }, + { + "epoch": 0.9946365085202893, + "grad_norm": 1.621319680083407, + "learning_rate": 1.5087418650627793e-09, + "loss": 0.5216, + "step": 32453 + }, + { + "epoch": 0.9946671570430305, + "grad_norm": 1.8010365876610808, + "learning_rate": 1.4915487933586658e-09, + "loss": 0.6687, + "step": 32454 + }, + { + "epoch": 0.9946978055657717, + "grad_norm": 1.6813075614398496, + "learning_rate": 1.4744542395184193e-09, + "loss": 0.6107, + "step": 32455 + }, + { + "epoch": 0.994728454088513, + "grad_norm": 1.6499944379829423, + "learning_rate": 1.4574582037074625e-09, + "loss": 0.6211, + "step": 32456 + }, + { + "epoch": 0.9947591026112541, + "grad_norm": 1.7153393102812986, + "learning_rate": 1.4405606860945499e-09, + "loss": 0.6445, + "step": 32457 + }, + { + "epoch": 0.9947897511339954, + "grad_norm": 1.6694472916218823, + "learning_rate": 1.4237616868462146e-09, + "loss": 0.6747, + "step": 32458 + }, + { + "epoch": 0.9948203996567365, + "grad_norm": 1.8650990328432184, + "learning_rate": 1.40706120612788e-09, + "loss": 0.6331, + "step": 32459 + }, + { + "epoch": 0.9948510481794778, + "grad_norm": 1.6348105717682437, + "learning_rate": 1.3904592441038588e-09, + "loss": 0.598, + "step": 32460 + }, + { + "epoch": 0.9948816967022189, + "grad_norm": 0.6534711545790128, + "learning_rate": 1.3739558009384645e-09, + "loss": 0.5076, + "step": 32461 + }, + { + "epoch": 0.9949123452249602, + "grad_norm": 1.7934252639529267, + "learning_rate": 1.3575508767926793e-09, + "loss": 0.6334, + "step": 32462 + }, + { + "epoch": 0.9949429937477013, + "grad_norm": 1.8458619785228205, + "learning_rate": 1.3412444718297058e-09, + "loss": 0.6046, + "step": 32463 + }, + { + "epoch": 0.9949736422704426, + "grad_norm": 1.8494854512607735, + "learning_rate": 1.325036586209416e-09, + "loss": 0.7155, + "step": 32464 + }, + { + "epoch": 0.9950042907931838, + "grad_norm": 1.644202452623491, + "learning_rate": 1.3089272200927927e-09, + "loss": 0.7882, + "step": 32465 + }, + { + "epoch": 0.995034939315925, + "grad_norm": 1.7513022354335772, + "learning_rate": 1.292916373636377e-09, + "loss": 0.5011, + "step": 32466 + }, + { + "epoch": 0.9950655878386662, + "grad_norm": 1.528357151895249, + "learning_rate": 1.2770040470000412e-09, + "loss": 0.5432, + "step": 32467 + }, + { + "epoch": 0.9950962363614074, + "grad_norm": 0.6582458664975137, + "learning_rate": 1.2611902403392161e-09, + "loss": 0.5146, + "step": 32468 + }, + { + "epoch": 0.9951268848841486, + "grad_norm": 1.8138714049789872, + "learning_rate": 1.2454749538104439e-09, + "loss": 0.5846, + "step": 32469 + }, + { + "epoch": 0.9951575334068898, + "grad_norm": 1.6030114548019605, + "learning_rate": 1.2298581875680449e-09, + "loss": 0.6192, + "step": 32470 + }, + { + "epoch": 0.995188181929631, + "grad_norm": 1.8638831402019276, + "learning_rate": 1.2143399417663405e-09, + "loss": 0.6113, + "step": 32471 + }, + { + "epoch": 0.9952188304523722, + "grad_norm": 1.5813483218528521, + "learning_rate": 1.198920216557431e-09, + "loss": 0.5578, + "step": 32472 + }, + { + "epoch": 0.9952494789751134, + "grad_norm": 1.4966290787259975, + "learning_rate": 1.1835990120945273e-09, + "loss": 0.6027, + "step": 32473 + }, + { + "epoch": 0.9952801274978545, + "grad_norm": 1.8032608566794208, + "learning_rate": 1.1683763285275096e-09, + "loss": 0.6551, + "step": 32474 + }, + { + "epoch": 0.9953107760205958, + "grad_norm": 1.7888880892466763, + "learning_rate": 1.1532521660073682e-09, + "loss": 0.638, + "step": 32475 + }, + { + "epoch": 0.995341424543337, + "grad_norm": 1.630266453868284, + "learning_rate": 1.1382265246828728e-09, + "loss": 0.6546, + "step": 32476 + }, + { + "epoch": 0.9953720730660782, + "grad_norm": 1.96352541742801, + "learning_rate": 1.123299404700573e-09, + "loss": 0.677, + "step": 32477 + }, + { + "epoch": 0.9954027215888194, + "grad_norm": 2.0284983646067825, + "learning_rate": 1.1084708062092386e-09, + "loss": 0.6767, + "step": 32478 + }, + { + "epoch": 0.9954333701115606, + "grad_norm": 1.723086289476193, + "learning_rate": 1.093740729354309e-09, + "loss": 0.5857, + "step": 32479 + }, + { + "epoch": 0.9954640186343018, + "grad_norm": 1.725393281819515, + "learning_rate": 1.0791091742812232e-09, + "loss": 0.6552, + "step": 32480 + }, + { + "epoch": 0.995494667157043, + "grad_norm": 1.842186874962294, + "learning_rate": 1.0645761411343103e-09, + "loss": 0.5298, + "step": 32481 + }, + { + "epoch": 0.9955253156797842, + "grad_norm": 1.7537485352248563, + "learning_rate": 1.0501416300567891e-09, + "loss": 0.614, + "step": 32482 + }, + { + "epoch": 0.9955559642025255, + "grad_norm": 1.6834288197169498, + "learning_rate": 1.0358056411896578e-09, + "loss": 0.6434, + "step": 32483 + }, + { + "epoch": 0.9955866127252666, + "grad_norm": 1.8097041160979324, + "learning_rate": 1.021568174675025e-09, + "loss": 0.6587, + "step": 32484 + }, + { + "epoch": 0.9956172612480079, + "grad_norm": 1.5878266519580067, + "learning_rate": 1.0074292306538892e-09, + "loss": 0.5937, + "step": 32485 + }, + { + "epoch": 0.995647909770749, + "grad_norm": 1.4687240120728935, + "learning_rate": 9.93388809265028e-10, + "loss": 0.6874, + "step": 32486 + }, + { + "epoch": 0.9956785582934903, + "grad_norm": 1.6389061682269779, + "learning_rate": 9.794469106461092e-10, + "loss": 0.6222, + "step": 32487 + }, + { + "epoch": 0.9957092068162314, + "grad_norm": 1.7766140867239775, + "learning_rate": 9.656035349348004e-10, + "loss": 0.6591, + "step": 32488 + }, + { + "epoch": 0.9957398553389727, + "grad_norm": 1.5833534219730308, + "learning_rate": 9.518586822687692e-10, + "loss": 0.6693, + "step": 32489 + }, + { + "epoch": 0.9957705038617138, + "grad_norm": 1.3670416897352666, + "learning_rate": 9.382123527812425e-10, + "loss": 0.5373, + "step": 32490 + }, + { + "epoch": 0.9958011523844551, + "grad_norm": 1.7902676052747322, + "learning_rate": 9.246645466087778e-10, + "loss": 0.6845, + "step": 32491 + }, + { + "epoch": 0.9958318009071963, + "grad_norm": 0.6542727207451593, + "learning_rate": 9.112152638834914e-10, + "loss": 0.5021, + "step": 32492 + }, + { + "epoch": 0.9958624494299375, + "grad_norm": 1.408431986556368, + "learning_rate": 8.978645047386104e-10, + "loss": 0.4647, + "step": 32493 + }, + { + "epoch": 0.9958930979526787, + "grad_norm": 1.708299065187149, + "learning_rate": 8.846122693051407e-10, + "loss": 0.6829, + "step": 32494 + }, + { + "epoch": 0.9959237464754199, + "grad_norm": 1.5924822608035465, + "learning_rate": 8.714585577140889e-10, + "loss": 0.5246, + "step": 32495 + }, + { + "epoch": 0.9959543949981611, + "grad_norm": 1.6572180415800317, + "learning_rate": 8.584033700953509e-10, + "loss": 0.6183, + "step": 32496 + }, + { + "epoch": 0.9959850435209023, + "grad_norm": 0.6809550444246961, + "learning_rate": 8.454467065766025e-10, + "loss": 0.515, + "step": 32497 + }, + { + "epoch": 0.9960156920436435, + "grad_norm": 1.7260744033110245, + "learning_rate": 8.325885672866296e-10, + "loss": 0.688, + "step": 32498 + }, + { + "epoch": 0.9960463405663847, + "grad_norm": 1.5751719367745065, + "learning_rate": 8.198289523519975e-10, + "loss": 0.7, + "step": 32499 + }, + { + "epoch": 0.9960769890891259, + "grad_norm": 1.804592707067899, + "learning_rate": 8.071678618970514e-10, + "loss": 0.6986, + "step": 32500 + }, + { + "epoch": 0.9961076376118672, + "grad_norm": 1.685878371860818, + "learning_rate": 7.946052960472462e-10, + "loss": 0.6424, + "step": 32501 + }, + { + "epoch": 0.9961382861346083, + "grad_norm": 1.5737965568924288, + "learning_rate": 7.821412549269269e-10, + "loss": 0.622, + "step": 32502 + }, + { + "epoch": 0.9961689346573496, + "grad_norm": 1.6974336845632179, + "learning_rate": 7.697757386593286e-10, + "loss": 0.5984, + "step": 32503 + }, + { + "epoch": 0.9961995831800907, + "grad_norm": 1.9017008041303305, + "learning_rate": 7.57508747364355e-10, + "loss": 0.6452, + "step": 32504 + }, + { + "epoch": 0.9962302317028319, + "grad_norm": 1.7846299730234798, + "learning_rate": 7.45340281165241e-10, + "loss": 0.7029, + "step": 32505 + }, + { + "epoch": 0.9962608802255731, + "grad_norm": 1.6058267375353001, + "learning_rate": 7.332703401796704e-10, + "loss": 0.6066, + "step": 32506 + }, + { + "epoch": 0.9962915287483143, + "grad_norm": 2.0090949411633363, + "learning_rate": 7.212989245286572e-10, + "loss": 0.6467, + "step": 32507 + }, + { + "epoch": 0.9963221772710555, + "grad_norm": 1.5356692238303984, + "learning_rate": 7.094260343276649e-10, + "loss": 0.7115, + "step": 32508 + }, + { + "epoch": 0.9963528257937967, + "grad_norm": 1.5749712404724816, + "learning_rate": 6.976516696965973e-10, + "loss": 0.6423, + "step": 32509 + }, + { + "epoch": 0.996383474316538, + "grad_norm": 1.776716615012549, + "learning_rate": 6.859758307486975e-10, + "loss": 0.6692, + "step": 32510 + }, + { + "epoch": 0.9964141228392791, + "grad_norm": 1.681603324667517, + "learning_rate": 6.743985176016487e-10, + "loss": 0.6488, + "step": 32511 + }, + { + "epoch": 0.9964447713620204, + "grad_norm": 0.6567841157140754, + "learning_rate": 6.629197303675838e-10, + "loss": 0.5083, + "step": 32512 + }, + { + "epoch": 0.9964754198847615, + "grad_norm": 1.5253823935089519, + "learning_rate": 6.515394691597454e-10, + "loss": 0.6324, + "step": 32513 + }, + { + "epoch": 0.9965060684075028, + "grad_norm": 1.6738355161918357, + "learning_rate": 6.402577340913763e-10, + "loss": 0.6269, + "step": 32514 + }, + { + "epoch": 0.9965367169302439, + "grad_norm": 1.62725534369927, + "learning_rate": 6.290745252723885e-10, + "loss": 0.5916, + "step": 32515 + }, + { + "epoch": 0.9965673654529852, + "grad_norm": 1.693913522304502, + "learning_rate": 6.179898428138042e-10, + "loss": 0.5634, + "step": 32516 + }, + { + "epoch": 0.9965980139757263, + "grad_norm": 1.6232027821962105, + "learning_rate": 6.070036868255358e-10, + "loss": 0.584, + "step": 32517 + }, + { + "epoch": 0.9966286624984676, + "grad_norm": 1.9069050894465907, + "learning_rate": 5.961160574141645e-10, + "loss": 0.6155, + "step": 32518 + }, + { + "epoch": 0.9966593110212087, + "grad_norm": 1.6675504934918215, + "learning_rate": 5.853269546873818e-10, + "loss": 0.5674, + "step": 32519 + }, + { + "epoch": 0.99668995954395, + "grad_norm": 0.6767211472094231, + "learning_rate": 5.746363787517695e-10, + "loss": 0.5362, + "step": 32520 + }, + { + "epoch": 0.9967206080666912, + "grad_norm": 1.8682050283190947, + "learning_rate": 5.640443297139086e-10, + "loss": 0.6531, + "step": 32521 + }, + { + "epoch": 0.9967512565894324, + "grad_norm": 1.6288644976059015, + "learning_rate": 5.535508076759399e-10, + "loss": 0.6183, + "step": 32522 + }, + { + "epoch": 0.9967819051121736, + "grad_norm": 1.6679395359131686, + "learning_rate": 5.431558127422243e-10, + "loss": 0.6726, + "step": 32523 + }, + { + "epoch": 0.9968125536349148, + "grad_norm": 1.6867776041552376, + "learning_rate": 5.328593450160124e-10, + "loss": 0.5608, + "step": 32524 + }, + { + "epoch": 0.996843202157656, + "grad_norm": 0.666863028122964, + "learning_rate": 5.226614045972244e-10, + "loss": 0.5184, + "step": 32525 + }, + { + "epoch": 0.9968738506803972, + "grad_norm": 1.8627832101360657, + "learning_rate": 5.125619915868907e-10, + "loss": 0.6925, + "step": 32526 + }, + { + "epoch": 0.9969044992031384, + "grad_norm": 1.7213322550382832, + "learning_rate": 5.025611060860413e-10, + "loss": 0.6656, + "step": 32527 + }, + { + "epoch": 0.9969351477258797, + "grad_norm": 1.7917878184013822, + "learning_rate": 4.926587481912659e-10, + "loss": 0.625, + "step": 32528 + }, + { + "epoch": 0.9969657962486208, + "grad_norm": 1.777140849338918, + "learning_rate": 4.828549180002639e-10, + "loss": 0.6567, + "step": 32529 + }, + { + "epoch": 0.9969964447713621, + "grad_norm": 1.7718329266169612, + "learning_rate": 4.731496156107352e-10, + "loss": 0.6358, + "step": 32530 + }, + { + "epoch": 0.9970270932941032, + "grad_norm": 1.8053172737956658, + "learning_rate": 4.6354284111815863e-10, + "loss": 0.643, + "step": 32531 + }, + { + "epoch": 0.9970577418168445, + "grad_norm": 1.722037897491795, + "learning_rate": 4.5403459461579314e-10, + "loss": 0.5973, + "step": 32532 + }, + { + "epoch": 0.9970883903395856, + "grad_norm": 1.87729461754771, + "learning_rate": 4.446248761991179e-10, + "loss": 0.6094, + "step": 32533 + }, + { + "epoch": 0.9971190388623269, + "grad_norm": 1.7531369346776968, + "learning_rate": 4.3531368596028136e-10, + "loss": 0.7109, + "step": 32534 + }, + { + "epoch": 0.997149687385068, + "grad_norm": 1.7874445317979648, + "learning_rate": 4.261010239903218e-10, + "loss": 0.6282, + "step": 32535 + }, + { + "epoch": 0.9971803359078092, + "grad_norm": 0.6759686187449647, + "learning_rate": 4.169868903802776e-10, + "loss": 0.4993, + "step": 32536 + }, + { + "epoch": 0.9972109844305505, + "grad_norm": 1.6507034625412074, + "learning_rate": 4.079712852200768e-10, + "loss": 0.6955, + "step": 32537 + }, + { + "epoch": 0.9972416329532916, + "grad_norm": 1.6724077506397808, + "learning_rate": 3.990542085996474e-10, + "loss": 0.6177, + "step": 32538 + }, + { + "epoch": 0.9972722814760329, + "grad_norm": 1.5155278245943573, + "learning_rate": 3.902356606044766e-10, + "loss": 0.6146, + "step": 32539 + }, + { + "epoch": 0.997302929998774, + "grad_norm": 1.659999347825354, + "learning_rate": 3.815156413233823e-10, + "loss": 0.6729, + "step": 32540 + }, + { + "epoch": 0.9973335785215153, + "grad_norm": 2.790816195950015, + "learning_rate": 3.7289415084185156e-10, + "loss": 0.524, + "step": 32541 + }, + { + "epoch": 0.9973642270442564, + "grad_norm": 0.6735236556157835, + "learning_rate": 3.6437118924537164e-10, + "loss": 0.4833, + "step": 32542 + }, + { + "epoch": 0.9973948755669977, + "grad_norm": 1.8293622697746428, + "learning_rate": 3.55946756616099e-10, + "loss": 0.7243, + "step": 32543 + }, + { + "epoch": 0.9974255240897388, + "grad_norm": 1.7061659435050485, + "learning_rate": 3.4762085303841063e-10, + "loss": 0.5645, + "step": 32544 + }, + { + "epoch": 0.9974561726124801, + "grad_norm": 1.6012115003774288, + "learning_rate": 3.39393478594463e-10, + "loss": 0.5009, + "step": 32545 + }, + { + "epoch": 0.9974868211352212, + "grad_norm": 1.6120539136130467, + "learning_rate": 3.312646333653025e-10, + "loss": 0.5487, + "step": 32546 + }, + { + "epoch": 0.9975174696579625, + "grad_norm": 1.8400007771499935, + "learning_rate": 3.23234317430865e-10, + "loss": 0.5952, + "step": 32547 + }, + { + "epoch": 0.9975481181807037, + "grad_norm": 2.0624732349140937, + "learning_rate": 3.153025308688662e-10, + "loss": 0.57, + "step": 32548 + }, + { + "epoch": 0.9975787667034449, + "grad_norm": 1.7854511537054867, + "learning_rate": 3.0746927375924216e-10, + "loss": 0.633, + "step": 32549 + }, + { + "epoch": 0.9976094152261861, + "grad_norm": 0.6345967684505331, + "learning_rate": 2.9973454617970854e-10, + "loss": 0.5159, + "step": 32550 + }, + { + "epoch": 0.9976400637489273, + "grad_norm": 1.5560456404762613, + "learning_rate": 2.9209834820465023e-10, + "loss": 0.5172, + "step": 32551 + }, + { + "epoch": 0.9976707122716685, + "grad_norm": 1.6467192243999649, + "learning_rate": 2.8456067990956236e-10, + "loss": 0.5848, + "step": 32552 + }, + { + "epoch": 0.9977013607944097, + "grad_norm": 1.4916440831804905, + "learning_rate": 2.771215413699402e-10, + "loss": 0.5251, + "step": 32553 + }, + { + "epoch": 0.9977320093171509, + "grad_norm": 1.778292052931338, + "learning_rate": 2.697809326579481e-10, + "loss": 0.6919, + "step": 32554 + }, + { + "epoch": 0.9977626578398922, + "grad_norm": 1.6492442975147243, + "learning_rate": 2.6253885384686093e-10, + "loss": 0.6637, + "step": 32555 + }, + { + "epoch": 0.9977933063626333, + "grad_norm": 1.6605223158955125, + "learning_rate": 2.553953050066227e-10, + "loss": 0.7093, + "step": 32556 + }, + { + "epoch": 0.9978239548853746, + "grad_norm": 1.7735999430707676, + "learning_rate": 2.483502862093978e-10, + "loss": 0.7331, + "step": 32557 + }, + { + "epoch": 0.9978546034081157, + "grad_norm": 1.8551122280758137, + "learning_rate": 2.4140379752291e-10, + "loss": 0.6674, + "step": 32558 + }, + { + "epoch": 0.997885251930857, + "grad_norm": 1.6193008193983753, + "learning_rate": 2.345558390171032e-10, + "loss": 0.5492, + "step": 32559 + }, + { + "epoch": 0.9979159004535981, + "grad_norm": 1.5393222542622536, + "learning_rate": 2.278064107585909e-10, + "loss": 0.6135, + "step": 32560 + }, + { + "epoch": 0.9979465489763394, + "grad_norm": 0.6762882586396185, + "learning_rate": 2.211555128139864e-10, + "loss": 0.5143, + "step": 32561 + }, + { + "epoch": 0.9979771974990805, + "grad_norm": 1.6773701921400925, + "learning_rate": 2.1460314524990312e-10, + "loss": 0.5887, + "step": 32562 + }, + { + "epoch": 0.9980078460218218, + "grad_norm": 1.7657374513950905, + "learning_rate": 2.0814930812851353e-10, + "loss": 0.6337, + "step": 32563 + }, + { + "epoch": 0.998038494544563, + "grad_norm": 1.8372004735569882, + "learning_rate": 2.0179400151532081e-10, + "loss": 0.6887, + "step": 32564 + }, + { + "epoch": 0.9980691430673042, + "grad_norm": 0.6773033572287505, + "learning_rate": 1.9553722547249743e-10, + "loss": 0.5411, + "step": 32565 + }, + { + "epoch": 0.9980997915900454, + "grad_norm": 1.6336966588258826, + "learning_rate": 1.8937898006110567e-10, + "loss": 0.5927, + "step": 32566 + }, + { + "epoch": 0.9981304401127865, + "grad_norm": 1.6655463568063076, + "learning_rate": 1.8331926534220778e-10, + "loss": 0.6055, + "step": 32567 + }, + { + "epoch": 0.9981610886355278, + "grad_norm": 1.6432657924603906, + "learning_rate": 1.7735808137686606e-10, + "loss": 0.6032, + "step": 32568 + }, + { + "epoch": 0.9981917371582689, + "grad_norm": 1.892167737269995, + "learning_rate": 1.7149542822170185e-10, + "loss": 0.7015, + "step": 32569 + }, + { + "epoch": 0.9982223856810102, + "grad_norm": 1.5440449687908802, + "learning_rate": 1.6573130593555697e-10, + "loss": 0.6258, + "step": 32570 + }, + { + "epoch": 0.9982530342037513, + "grad_norm": 1.4786031147733871, + "learning_rate": 1.600657145739426e-10, + "loss": 0.488, + "step": 32571 + }, + { + "epoch": 0.9982836827264926, + "grad_norm": 1.5671152738179883, + "learning_rate": 1.5449865419570054e-10, + "loss": 0.5576, + "step": 32572 + }, + { + "epoch": 0.9983143312492337, + "grad_norm": 0.6870250286956615, + "learning_rate": 1.4903012485190106e-10, + "loss": 0.5259, + "step": 32573 + }, + { + "epoch": 0.998344979771975, + "grad_norm": 0.6708570367447184, + "learning_rate": 1.4366012659916552e-10, + "loss": 0.5233, + "step": 32574 + }, + { + "epoch": 0.9983756282947162, + "grad_norm": 2.0137658966023646, + "learning_rate": 1.3838865948967439e-10, + "loss": 0.6372, + "step": 32575 + }, + { + "epoch": 0.9984062768174574, + "grad_norm": 0.6478597627160173, + "learning_rate": 1.3321572357560818e-10, + "loss": 0.4859, + "step": 32576 + }, + { + "epoch": 0.9984369253401986, + "grad_norm": 0.6940783169051805, + "learning_rate": 1.2814131890692693e-10, + "loss": 0.4988, + "step": 32577 + }, + { + "epoch": 0.9984675738629398, + "grad_norm": 1.7851710031612074, + "learning_rate": 1.2316544553359066e-10, + "loss": 0.5964, + "step": 32578 + }, + { + "epoch": 0.998498222385681, + "grad_norm": 1.6166826500753808, + "learning_rate": 1.1828810350666964e-10, + "loss": 0.591, + "step": 32579 + }, + { + "epoch": 0.9985288709084222, + "grad_norm": 0.6496364062347579, + "learning_rate": 1.13509292871683e-10, + "loss": 0.5112, + "step": 32580 + }, + { + "epoch": 0.9985595194311634, + "grad_norm": 1.7749569809197, + "learning_rate": 1.0882901367748056e-10, + "loss": 0.6039, + "step": 32581 + }, + { + "epoch": 0.9985901679539047, + "grad_norm": 1.7145513753301282, + "learning_rate": 1.0424726596958145e-10, + "loss": 0.6654, + "step": 32582 + }, + { + "epoch": 0.9986208164766458, + "grad_norm": 1.5864047311965535, + "learning_rate": 9.976404979350485e-11, + "loss": 0.6689, + "step": 32583 + }, + { + "epoch": 0.9986514649993871, + "grad_norm": 1.8163840556092938, + "learning_rate": 9.537936519254942e-11, + "loss": 0.5557, + "step": 32584 + }, + { + "epoch": 0.9986821135221282, + "grad_norm": 0.6603049343069944, + "learning_rate": 9.109321221001388e-11, + "loss": 0.5072, + "step": 32585 + }, + { + "epoch": 0.9987127620448695, + "grad_norm": 1.6764456045772806, + "learning_rate": 8.690559088919693e-11, + "loss": 0.5932, + "step": 32586 + }, + { + "epoch": 0.9987434105676106, + "grad_norm": 0.6543308748094413, + "learning_rate": 8.281650127006657e-11, + "loss": 0.5172, + "step": 32587 + }, + { + "epoch": 0.9987740590903519, + "grad_norm": 0.6561997715882928, + "learning_rate": 7.882594339370109e-11, + "loss": 0.4965, + "step": 32588 + }, + { + "epoch": 0.998804707613093, + "grad_norm": 1.7090427572912963, + "learning_rate": 7.493391729895827e-11, + "loss": 0.6961, + "step": 32589 + }, + { + "epoch": 0.9988353561358343, + "grad_norm": 1.743877652896337, + "learning_rate": 7.114042302580615e-11, + "loss": 0.6889, + "step": 32590 + }, + { + "epoch": 0.9988660046585754, + "grad_norm": 0.6863659170911217, + "learning_rate": 6.744546060866163e-11, + "loss": 0.52, + "step": 32591 + }, + { + "epoch": 0.9988966531813167, + "grad_norm": 1.8148321722736807, + "learning_rate": 6.384903008638255e-11, + "loss": 0.699, + "step": 32592 + }, + { + "epoch": 0.9989273017040579, + "grad_norm": 1.6856547126717716, + "learning_rate": 6.035113149338579e-11, + "loss": 0.5933, + "step": 32593 + }, + { + "epoch": 0.9989579502267991, + "grad_norm": 1.7579267390521236, + "learning_rate": 5.695176486519849e-11, + "loss": 0.5801, + "step": 32594 + }, + { + "epoch": 0.9989885987495403, + "grad_norm": 1.655135608748172, + "learning_rate": 5.365093023401713e-11, + "loss": 0.5983, + "step": 32595 + }, + { + "epoch": 0.9990192472722815, + "grad_norm": 1.7921584114467877, + "learning_rate": 5.044862763203817e-11, + "loss": 0.6458, + "step": 32596 + }, + { + "epoch": 0.9990498957950227, + "grad_norm": 1.6108214890372394, + "learning_rate": 4.734485709256831e-11, + "loss": 0.6511, + "step": 32597 + }, + { + "epoch": 0.9990805443177638, + "grad_norm": 1.839097232117445, + "learning_rate": 4.433961864447334e-11, + "loss": 0.6848, + "step": 32598 + }, + { + "epoch": 0.9991111928405051, + "grad_norm": 1.8658197262208152, + "learning_rate": 4.143291231772928e-11, + "loss": 0.5848, + "step": 32599 + }, + { + "epoch": 0.9991418413632462, + "grad_norm": 1.764915881840393, + "learning_rate": 3.862473814231216e-11, + "loss": 0.5962, + "step": 32600 + }, + { + "epoch": 0.9991724898859875, + "grad_norm": 1.6446843325902425, + "learning_rate": 3.5915096144867323e-11, + "loss": 0.6038, + "step": 32601 + }, + { + "epoch": 0.9992031384087287, + "grad_norm": 1.6758036944387904, + "learning_rate": 3.330398635204013e-11, + "loss": 0.5673, + "step": 32602 + }, + { + "epoch": 0.9992337869314699, + "grad_norm": 1.621602108489643, + "learning_rate": 3.0791408789365705e-11, + "loss": 0.6308, + "step": 32603 + }, + { + "epoch": 0.9992644354542111, + "grad_norm": 1.9522107773935171, + "learning_rate": 2.837736348126896e-11, + "loss": 0.6272, + "step": 32604 + }, + { + "epoch": 0.9992950839769523, + "grad_norm": 1.6540626752568077, + "learning_rate": 2.606185045328502e-11, + "loss": 0.5131, + "step": 32605 + }, + { + "epoch": 0.9993257324996935, + "grad_norm": 1.8093401212521443, + "learning_rate": 2.3844869726508126e-11, + "loss": 0.7235, + "step": 32606 + }, + { + "epoch": 0.9993563810224347, + "grad_norm": 1.706897047626367, + "learning_rate": 2.1726421324252956e-11, + "loss": 0.6393, + "step": 32607 + }, + { + "epoch": 0.9993870295451759, + "grad_norm": 1.7792625943148719, + "learning_rate": 1.9706505265393304e-11, + "loss": 0.6044, + "step": 32608 + }, + { + "epoch": 0.9994176780679171, + "grad_norm": 0.6778717602844317, + "learning_rate": 1.7785121572133635e-11, + "loss": 0.5368, + "step": 32609 + }, + { + "epoch": 0.9994483265906583, + "grad_norm": 2.0715423505982784, + "learning_rate": 1.5962270261127288e-11, + "loss": 0.7025, + "step": 32610 + }, + { + "epoch": 0.9994789751133996, + "grad_norm": 1.764035024769314, + "learning_rate": 1.4237951352358281e-11, + "loss": 0.5954, + "step": 32611 + }, + { + "epoch": 0.9995096236361407, + "grad_norm": 1.6715024669301353, + "learning_rate": 1.2612164861369736e-11, + "loss": 0.6964, + "step": 32612 + }, + { + "epoch": 0.999540272158882, + "grad_norm": 1.5116170110712923, + "learning_rate": 1.1084910804814997e-11, + "loss": 0.5633, + "step": 32613 + }, + { + "epoch": 0.9995709206816231, + "grad_norm": 0.660084537968539, + "learning_rate": 9.656189198237187e-12, + "loss": 0.5168, + "step": 32614 + }, + { + "epoch": 0.9996015692043644, + "grad_norm": 0.6782817243231997, + "learning_rate": 8.32600005384876e-12, + "loss": 0.5081, + "step": 32615 + }, + { + "epoch": 0.9996322177271055, + "grad_norm": 1.9682051346415534, + "learning_rate": 7.094343387192837e-12, + "loss": 0.6473, + "step": 32616 + }, + { + "epoch": 0.9996628662498468, + "grad_norm": 1.6512906295686534, + "learning_rate": 5.961219208261426e-12, + "loss": 0.651, + "step": 32617 + }, + { + "epoch": 0.9996935147725879, + "grad_norm": 1.6645880580997279, + "learning_rate": 4.926627530377204e-12, + "loss": 0.5475, + "step": 32618 + }, + { + "epoch": 0.9997241632953292, + "grad_norm": 1.6972321363570118, + "learning_rate": 3.990568361311731e-12, + "loss": 0.6827, + "step": 32619 + }, + { + "epoch": 0.9997548118180704, + "grad_norm": 1.6413070340222495, + "learning_rate": 3.1530417121672374e-12, + "loss": 0.6313, + "step": 32620 + }, + { + "epoch": 0.9997854603408116, + "grad_norm": 1.6991048444332444, + "learning_rate": 2.414047590715285e-12, + "loss": 0.6695, + "step": 32621 + }, + { + "epoch": 0.9998161088635528, + "grad_norm": 1.7088281419925284, + "learning_rate": 1.7735860036172114e-12, + "loss": 0.637, + "step": 32622 + }, + { + "epoch": 0.999846757386294, + "grad_norm": 1.7080934175358822, + "learning_rate": 1.231656958644578e-12, + "loss": 0.522, + "step": 32623 + }, + { + "epoch": 0.9998774059090352, + "grad_norm": 1.6165886198752055, + "learning_rate": 7.882604591280541e-13, + "loss": 0.5679, + "step": 32624 + }, + { + "epoch": 0.9999080544317764, + "grad_norm": 1.4561165714615436, + "learning_rate": 4.433965106187543e-13, + "loss": 0.5362, + "step": 32625 + }, + { + "epoch": 0.9999387029545176, + "grad_norm": 1.6575476967370761, + "learning_rate": 1.9706511644734806e-13, + "loss": 0.6427, + "step": 32626 + }, + { + "epoch": 0.9999693514772588, + "grad_norm": 0.9488578759568795, + "learning_rate": 4.926627883428125e-14, + "loss": 0.5467, + "step": 32627 + }, + { + "epoch": 1.0, + "grad_norm": 1.8381822860568406, + "learning_rate": 0.0, + "loss": 0.6039, + "step": 32628 + }, + { + "epoch": 1.0, + "step": 32628, + "total_flos": 7086529681850368.0, + "train_loss": 0.7074702348081932, + "train_runtime": 179540.8432, + "train_samples_per_second": 23.261, + "train_steps_per_second": 0.182 + } + ], + "logging_steps": 1.0, + "max_steps": 32628, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7086529681850368.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}