{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9997091755125782, "eval_steps": 500, "global_step": 3438, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005816489748436818, "grad_norm": NaN, "learning_rate": 1.0000000000000002e-06, "loss": 0.0, "step": 1 }, { "epoch": 0.0011632979496873637, "grad_norm": NaN, "learning_rate": 2.0000000000000003e-06, "loss": 0.0, "step": 2 }, { "epoch": 0.0017449469245310455, "grad_norm": NaN, "learning_rate": 3e-06, "loss": 0.0, "step": 3 }, { "epoch": 0.0023265958993747273, "grad_norm": NaN, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "step": 4 }, { "epoch": 0.0029082448742184094, "grad_norm": NaN, "learning_rate": 5e-06, "loss": 0.0, "step": 5 }, { "epoch": 0.003489893849062091, "grad_norm": NaN, "learning_rate": 6e-06, "loss": 0.0, "step": 6 }, { "epoch": 0.004071542823905773, "grad_norm": NaN, "learning_rate": 7.000000000000001e-06, "loss": 0.0, "step": 7 }, { "epoch": 0.004653191798749455, "grad_norm": NaN, "learning_rate": 8.000000000000001e-06, "loss": 0.0, "step": 8 }, { "epoch": 0.005234840773593137, "grad_norm": NaN, "learning_rate": 9e-06, "loss": 0.0, "step": 9 }, { "epoch": 0.005816489748436819, "grad_norm": NaN, "learning_rate": 1e-05, "loss": 0.0, "step": 10 }, { "epoch": 0.0063981387232805, "grad_norm": NaN, "learning_rate": 1.1000000000000001e-05, "loss": 0.0, "step": 11 }, { "epoch": 0.006979787698124182, "grad_norm": NaN, "learning_rate": 1.2e-05, "loss": 0.0, "step": 12 }, { "epoch": 0.007561436672967864, "grad_norm": NaN, "learning_rate": 1.3000000000000001e-05, "loss": 0.0, "step": 13 }, { "epoch": 0.008143085647811545, "grad_norm": NaN, "learning_rate": 1.4000000000000001e-05, "loss": 0.0, "step": 14 }, { "epoch": 0.008724734622655227, "grad_norm": NaN, "learning_rate": 1.5e-05, "loss": 0.0, "step": 15 }, { "epoch": 0.00930638359749891, "grad_norm": NaN, "learning_rate": 1.6000000000000003e-05, "loss": 0.0, "step": 16 }, { "epoch": 0.009888032572342591, "grad_norm": NaN, "learning_rate": 1.7000000000000003e-05, "loss": 0.0, "step": 17 }, { "epoch": 0.010469681547186274, "grad_norm": NaN, "learning_rate": 1.8e-05, "loss": 0.0, "step": 18 }, { "epoch": 0.011051330522029956, "grad_norm": NaN, "learning_rate": 1.9e-05, "loss": 0.0, "step": 19 }, { "epoch": 0.011632979496873638, "grad_norm": NaN, "learning_rate": 2e-05, "loss": 0.0, "step": 20 }, { "epoch": 0.012214628471717318, "grad_norm": NaN, "learning_rate": 2.1e-05, "loss": 0.0, "step": 21 }, { "epoch": 0.012796277446561, "grad_norm": NaN, "learning_rate": 2.2000000000000003e-05, "loss": 0.0, "step": 22 }, { "epoch": 0.013377926421404682, "grad_norm": NaN, "learning_rate": 2.3000000000000003e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.013959575396248364, "grad_norm": NaN, "learning_rate": 2.4e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.014541224371092046, "grad_norm": NaN, "learning_rate": 2.5e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.015122873345935728, "grad_norm": NaN, "learning_rate": 2.6000000000000002e-05, "loss": 0.0, "step": 26 }, { "epoch": 0.01570452232077941, "grad_norm": NaN, "learning_rate": 2.7000000000000002e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.01628617129562309, "grad_norm": NaN, "learning_rate": 2.8000000000000003e-05, "loss": 0.0, "step": 28 }, { "epoch": 0.016867820270466773, "grad_norm": NaN, "learning_rate": 2.9e-05, "loss": 0.0, "step": 29 }, { "epoch": 0.017449469245310455, "grad_norm": NaN, "learning_rate": 3e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.018031118220154137, "grad_norm": NaN, "learning_rate": 3.1e-05, "loss": 0.0, "step": 31 }, { "epoch": 0.01861276719499782, "grad_norm": NaN, "learning_rate": 3.2000000000000005e-05, "loss": 0.0, "step": 32 }, { "epoch": 0.0191944161698415, "grad_norm": NaN, "learning_rate": 3.3e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.019776065144685183, "grad_norm": NaN, "learning_rate": 3.4000000000000007e-05, "loss": 0.0, "step": 34 }, { "epoch": 0.020357714119528865, "grad_norm": NaN, "learning_rate": 3.5e-05, "loss": 0.0, "step": 35 }, { "epoch": 0.020939363094372547, "grad_norm": NaN, "learning_rate": 3.6e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.02152101206921623, "grad_norm": NaN, "learning_rate": 3.7e-05, "loss": 0.0, "step": 37 }, { "epoch": 0.02210266104405991, "grad_norm": NaN, "learning_rate": 3.8e-05, "loss": 0.0, "step": 38 }, { "epoch": 0.022684310018903593, "grad_norm": NaN, "learning_rate": 3.9000000000000006e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.023265958993747275, "grad_norm": NaN, "learning_rate": 4e-05, "loss": 0.0, "step": 40 }, { "epoch": 0.023847607968590954, "grad_norm": NaN, "learning_rate": 4.1e-05, "loss": 0.0, "step": 41 }, { "epoch": 0.024429256943434636, "grad_norm": NaN, "learning_rate": 4.2e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.025010905918278318, "grad_norm": NaN, "learning_rate": 4.3e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.025592554893122, "grad_norm": NaN, "learning_rate": 4.4000000000000006e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.026174203867965682, "grad_norm": NaN, "learning_rate": 4.5e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.026755852842809364, "grad_norm": NaN, "learning_rate": 4.600000000000001e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.027337501817653046, "grad_norm": NaN, "learning_rate": 4.7e-05, "loss": 0.0, "step": 47 }, { "epoch": 0.027919150792496728, "grad_norm": NaN, "learning_rate": 4.8e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.02850079976734041, "grad_norm": NaN, "learning_rate": 4.9e-05, "loss": 0.0, "step": 49 }, { "epoch": 0.029082448742184092, "grad_norm": NaN, "learning_rate": 5e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.029664097717027774, "grad_norm": NaN, "learning_rate": 5.1000000000000006e-05, "loss": 0.0, "step": 51 }, { "epoch": 0.030245746691871456, "grad_norm": NaN, "learning_rate": 5.2000000000000004e-05, "loss": 0.0, "step": 52 }, { "epoch": 0.03082739566671514, "grad_norm": NaN, "learning_rate": 5.300000000000001e-05, "loss": 0.0, "step": 53 }, { "epoch": 0.03140904464155882, "grad_norm": NaN, "learning_rate": 5.4000000000000005e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.0319906936164025, "grad_norm": NaN, "learning_rate": 5.500000000000001e-05, "loss": 0.0, "step": 55 }, { "epoch": 0.03257234259124618, "grad_norm": NaN, "learning_rate": 5.6000000000000006e-05, "loss": 0.0, "step": 56 }, { "epoch": 0.03315399156608986, "grad_norm": NaN, "learning_rate": 5.6999999999999996e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.033735640540933545, "grad_norm": NaN, "learning_rate": 5.8e-05, "loss": 0.0, "step": 58 }, { "epoch": 0.03431728951577723, "grad_norm": NaN, "learning_rate": 5.9e-05, "loss": 0.0, "step": 59 }, { "epoch": 0.03489893849062091, "grad_norm": NaN, "learning_rate": 6e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.03548058746546459, "grad_norm": NaN, "learning_rate": 6.1e-05, "loss": 0.0, "step": 61 }, { "epoch": 0.03606223644030827, "grad_norm": NaN, "learning_rate": 6.2e-05, "loss": 0.0, "step": 62 }, { "epoch": 0.036643885415151956, "grad_norm": NaN, "learning_rate": 6.3e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.03722553438999564, "grad_norm": NaN, "learning_rate": 6.400000000000001e-05, "loss": 0.0, "step": 64 }, { "epoch": 0.03780718336483932, "grad_norm": NaN, "learning_rate": 6.500000000000001e-05, "loss": 0.0, "step": 65 }, { "epoch": 0.038388832339683, "grad_norm": NaN, "learning_rate": 6.6e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.038970481314526684, "grad_norm": NaN, "learning_rate": 6.7e-05, "loss": 0.0, "step": 67 }, { "epoch": 0.039552130289370366, "grad_norm": NaN, "learning_rate": 6.800000000000001e-05, "loss": 0.0, "step": 68 }, { "epoch": 0.04013377926421405, "grad_norm": NaN, "learning_rate": 6.9e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.04071542823905773, "grad_norm": NaN, "learning_rate": 7e-05, "loss": 0.0, "step": 70 }, { "epoch": 0.04129707721390141, "grad_norm": NaN, "learning_rate": 7.1e-05, "loss": 0.0, "step": 71 }, { "epoch": 0.041878726188745094, "grad_norm": NaN, "learning_rate": 7.2e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.042460375163588776, "grad_norm": NaN, "learning_rate": 7.3e-05, "loss": 0.0, "step": 73 }, { "epoch": 0.04304202413843246, "grad_norm": NaN, "learning_rate": 7.4e-05, "loss": 0.0, "step": 74 }, { "epoch": 0.04362367311327614, "grad_norm": NaN, "learning_rate": 7.500000000000001e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.04420532208811982, "grad_norm": NaN, "learning_rate": 7.6e-05, "loss": 0.0, "step": 76 }, { "epoch": 0.044786971062963504, "grad_norm": NaN, "learning_rate": 7.7e-05, "loss": 0.0, "step": 77 }, { "epoch": 0.045368620037807186, "grad_norm": NaN, "learning_rate": 7.800000000000001e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.04595026901265087, "grad_norm": NaN, "learning_rate": 7.900000000000001e-05, "loss": 0.0, "step": 79 }, { "epoch": 0.04653191798749455, "grad_norm": NaN, "learning_rate": 8e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.047113566962338226, "grad_norm": NaN, "learning_rate": 8.1e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.04769521593718191, "grad_norm": NaN, "learning_rate": 8.2e-05, "loss": 0.0, "step": 82 }, { "epoch": 0.04827686491202559, "grad_norm": NaN, "learning_rate": 8.3e-05, "loss": 0.0, "step": 83 }, { "epoch": 0.04885851388686927, "grad_norm": NaN, "learning_rate": 8.4e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.049440162861712954, "grad_norm": NaN, "learning_rate": 8.5e-05, "loss": 0.0, "step": 85 }, { "epoch": 0.050021811836556636, "grad_norm": NaN, "learning_rate": 8.6e-05, "loss": 0.0, "step": 86 }, { "epoch": 0.05060346081140032, "grad_norm": NaN, "learning_rate": 8.7e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.051185109786244, "grad_norm": NaN, "learning_rate": 8.800000000000001e-05, "loss": 0.0, "step": 88 }, { "epoch": 0.05176675876108768, "grad_norm": NaN, "learning_rate": 8.900000000000001e-05, "loss": 0.0, "step": 89 }, { "epoch": 0.052348407735931364, "grad_norm": NaN, "learning_rate": 9e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.052930056710775046, "grad_norm": NaN, "learning_rate": 9.1e-05, "loss": 0.0, "step": 91 }, { "epoch": 0.05351170568561873, "grad_norm": NaN, "learning_rate": 9.200000000000001e-05, "loss": 0.0, "step": 92 }, { "epoch": 0.05409335466046241, "grad_norm": NaN, "learning_rate": 9.300000000000001e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.05467500363530609, "grad_norm": NaN, "learning_rate": 9.4e-05, "loss": 0.0, "step": 94 }, { "epoch": 0.055256652610149774, "grad_norm": NaN, "learning_rate": 9.5e-05, "loss": 0.0, "step": 95 }, { "epoch": 0.055838301584993456, "grad_norm": NaN, "learning_rate": 9.6e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.05641995055983714, "grad_norm": NaN, "learning_rate": 9.7e-05, "loss": 0.0, "step": 97 }, { "epoch": 0.05700159953468082, "grad_norm": NaN, "learning_rate": 9.8e-05, "loss": 0.0, "step": 98 }, { "epoch": 0.0575832485095245, "grad_norm": NaN, "learning_rate": 9.900000000000001e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.058164897484368185, "grad_norm": NaN, "learning_rate": 0.0001, "loss": 0.0, "step": 100 }, { "epoch": 0.05874654645921187, "grad_norm": NaN, "learning_rate": 9.999997785543991e-05, "loss": 0.0, "step": 101 }, { "epoch": 0.05932819543405555, "grad_norm": NaN, "learning_rate": 9.999991142177925e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.05990984440889923, "grad_norm": NaN, "learning_rate": 9.999980069907687e-05, "loss": 0.0, "step": 103 }, { "epoch": 0.06049149338374291, "grad_norm": NaN, "learning_rate": 9.999964568743084e-05, "loss": 0.0, "step": 104 }, { "epoch": 0.061073142358586595, "grad_norm": NaN, "learning_rate": 9.999944638697846e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.06165479133343028, "grad_norm": NaN, "learning_rate": 9.99992027978963e-05, "loss": 0.0, "step": 106 }, { "epoch": 0.06223644030827396, "grad_norm": NaN, "learning_rate": 9.99989149204001e-05, "loss": 0.0, "step": 107 }, { "epoch": 0.06281808928311763, "grad_norm": NaN, "learning_rate": 9.999858275474485e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.06339973825796132, "grad_norm": NaN, "learning_rate": 9.99982063012248e-05, "loss": 0.0, "step": 109 }, { "epoch": 0.063981387232805, "grad_norm": NaN, "learning_rate": 9.999778556017339e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.06456303620764868, "grad_norm": NaN, "learning_rate": 9.999732053196329e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.06514468518249236, "grad_norm": NaN, "learning_rate": 9.999681121700646e-05, "loss": 0.0, "step": 112 }, { "epoch": 0.06572633415733604, "grad_norm": NaN, "learning_rate": 9.9996257615754e-05, "loss": 0.0, "step": 113 }, { "epoch": 0.06630798313217973, "grad_norm": NaN, "learning_rate": 9.99956597286963e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.06688963210702341, "grad_norm": NaN, "learning_rate": 9.999501755636293e-05, "loss": 0.0, "step": 115 }, { "epoch": 0.06747128108186709, "grad_norm": NaN, "learning_rate": 9.999433109932277e-05, "loss": 0.0, "step": 116 }, { "epoch": 0.06805293005671077, "grad_norm": NaN, "learning_rate": 9.999360035818381e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.06863457903155445, "grad_norm": NaN, "learning_rate": 9.999282533359338e-05, "loss": 0.0, "step": 118 }, { "epoch": 0.06921622800639814, "grad_norm": NaN, "learning_rate": 9.999200602623795e-05, "loss": 0.0, "step": 119 }, { "epoch": 0.06979787698124182, "grad_norm": NaN, "learning_rate": 9.999114243684327e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.0703795259560855, "grad_norm": NaN, "learning_rate": 9.999023456617427e-05, "loss": 0.0, "step": 121 }, { "epoch": 0.07096117493092918, "grad_norm": NaN, "learning_rate": 9.998928241503513e-05, "loss": 0.0, "step": 122 }, { "epoch": 0.07154282390577286, "grad_norm": NaN, "learning_rate": 9.998828598426926e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.07212447288061655, "grad_norm": NaN, "learning_rate": 9.998724527475929e-05, "loss": 0.0, "step": 124 }, { "epoch": 0.07270612185546023, "grad_norm": NaN, "learning_rate": 9.998616028742702e-05, "loss": 0.0, "step": 125 }, { "epoch": 0.07328777083030391, "grad_norm": NaN, "learning_rate": 9.998503102323356e-05, "loss": 0.0, "step": 126 }, { "epoch": 0.0738694198051476, "grad_norm": NaN, "learning_rate": 9.998385748317917e-05, "loss": 0.0, "step": 127 }, { "epoch": 0.07445106877999128, "grad_norm": NaN, "learning_rate": 9.998263966830334e-05, "loss": 0.0, "step": 128 }, { "epoch": 0.07503271775483496, "grad_norm": NaN, "learning_rate": 9.998137757968482e-05, "loss": 0.0, "step": 129 }, { "epoch": 0.07561436672967864, "grad_norm": NaN, "learning_rate": 9.998007121844153e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.07619601570452232, "grad_norm": NaN, "learning_rate": 9.997872058573061e-05, "loss": 0.0, "step": 131 }, { "epoch": 0.076777664679366, "grad_norm": NaN, "learning_rate": 9.997732568274845e-05, "loss": 0.0, "step": 132 }, { "epoch": 0.07735931365420969, "grad_norm": NaN, "learning_rate": 9.99758865107306e-05, "loss": 0.0, "step": 133 }, { "epoch": 0.07794096262905337, "grad_norm": NaN, "learning_rate": 9.997440307095189e-05, "loss": 0.0, "step": 134 }, { "epoch": 0.07852261160389705, "grad_norm": NaN, "learning_rate": 9.997287536472629e-05, "loss": 0.0, "step": 135 }, { "epoch": 0.07910426057874073, "grad_norm": NaN, "learning_rate": 9.997130339340705e-05, "loss": 0.0, "step": 136 }, { "epoch": 0.07968590955358441, "grad_norm": NaN, "learning_rate": 9.996968715838657e-05, "loss": 0.0, "step": 137 }, { "epoch": 0.0802675585284281, "grad_norm": NaN, "learning_rate": 9.996802666109647e-05, "loss": 0.0, "step": 138 }, { "epoch": 0.08084920750327178, "grad_norm": NaN, "learning_rate": 9.996632190300762e-05, "loss": 0.0, "step": 139 }, { "epoch": 0.08143085647811546, "grad_norm": NaN, "learning_rate": 9.996457288563006e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.08201250545295914, "grad_norm": NaN, "learning_rate": 9.996277961051302e-05, "loss": 0.0, "step": 141 }, { "epoch": 0.08259415442780282, "grad_norm": NaN, "learning_rate": 9.996094207924496e-05, "loss": 0.0, "step": 142 }, { "epoch": 0.0831758034026465, "grad_norm": NaN, "learning_rate": 9.995906029345355e-05, "loss": 0.0, "step": 143 }, { "epoch": 0.08375745237749019, "grad_norm": NaN, "learning_rate": 9.995713425480562e-05, "loss": 0.0, "step": 144 }, { "epoch": 0.08433910135233387, "grad_norm": NaN, "learning_rate": 9.995516396500722e-05, "loss": 0.0, "step": 145 }, { "epoch": 0.08492075032717755, "grad_norm": NaN, "learning_rate": 9.995314942580362e-05, "loss": 0.0, "step": 146 }, { "epoch": 0.08550239930202123, "grad_norm": NaN, "learning_rate": 9.995109063897925e-05, "loss": 0.0, "step": 147 }, { "epoch": 0.08608404827686492, "grad_norm": NaN, "learning_rate": 9.994898760635775e-05, "loss": 0.0, "step": 148 }, { "epoch": 0.0866656972517086, "grad_norm": NaN, "learning_rate": 9.994684032980195e-05, "loss": 0.0, "step": 149 }, { "epoch": 0.08724734622655228, "grad_norm": NaN, "learning_rate": 9.994464881121387e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.08782899520139596, "grad_norm": NaN, "learning_rate": 9.99424130525347e-05, "loss": 0.0, "step": 151 }, { "epoch": 0.08841064417623964, "grad_norm": NaN, "learning_rate": 9.994013305574484e-05, "loss": 0.0, "step": 152 }, { "epoch": 0.08899229315108333, "grad_norm": NaN, "learning_rate": 9.99378088228639e-05, "loss": 0.0, "step": 153 }, { "epoch": 0.08957394212592701, "grad_norm": NaN, "learning_rate": 9.993544035595064e-05, "loss": 0.0, "step": 154 }, { "epoch": 0.09015559110077069, "grad_norm": NaN, "learning_rate": 9.993302765710297e-05, "loss": 0.0, "step": 155 }, { "epoch": 0.09073724007561437, "grad_norm": NaN, "learning_rate": 9.993057072845806e-05, "loss": 0.0, "step": 156 }, { "epoch": 0.09131888905045805, "grad_norm": NaN, "learning_rate": 9.992806957219219e-05, "loss": 0.0, "step": 157 }, { "epoch": 0.09190053802530174, "grad_norm": NaN, "learning_rate": 9.992552419052083e-05, "loss": 0.0, "step": 158 }, { "epoch": 0.09248218700014542, "grad_norm": NaN, "learning_rate": 9.992293458569866e-05, "loss": 0.0, "step": 159 }, { "epoch": 0.0930638359749891, "grad_norm": NaN, "learning_rate": 9.992030076001947e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.09364548494983277, "grad_norm": NaN, "learning_rate": 9.991762271581632e-05, "loss": 0.0, "step": 161 }, { "epoch": 0.09422713392467645, "grad_norm": NaN, "learning_rate": 9.991490045546131e-05, "loss": 0.0, "step": 162 }, { "epoch": 0.09480878289952013, "grad_norm": NaN, "learning_rate": 9.991213398136581e-05, "loss": 0.0, "step": 163 }, { "epoch": 0.09539043187436382, "grad_norm": NaN, "learning_rate": 9.990932329598029e-05, "loss": 0.0, "step": 164 }, { "epoch": 0.0959720808492075, "grad_norm": NaN, "learning_rate": 9.990646840179442e-05, "loss": 0.0, "step": 165 }, { "epoch": 0.09655372982405118, "grad_norm": NaN, "learning_rate": 9.9903569301337e-05, "loss": 0.0, "step": 166 }, { "epoch": 0.09713537879889486, "grad_norm": NaN, "learning_rate": 9.990062599717603e-05, "loss": 0.0, "step": 167 }, { "epoch": 0.09771702777373854, "grad_norm": NaN, "learning_rate": 9.98976384919186e-05, "loss": 0.0, "step": 168 }, { "epoch": 0.09829867674858223, "grad_norm": NaN, "learning_rate": 9.989460678821103e-05, "loss": 0.0, "step": 169 }, { "epoch": 0.09888032572342591, "grad_norm": NaN, "learning_rate": 9.989153088873871e-05, "loss": 0.0, "step": 170 }, { "epoch": 0.09946197469826959, "grad_norm": NaN, "learning_rate": 9.988841079622625e-05, "loss": 0.0, "step": 171 }, { "epoch": 0.10004362367311327, "grad_norm": NaN, "learning_rate": 9.988524651343736e-05, "loss": 0.0, "step": 172 }, { "epoch": 0.10062527264795695, "grad_norm": NaN, "learning_rate": 9.988203804317491e-05, "loss": 0.0, "step": 173 }, { "epoch": 0.10120692162280064, "grad_norm": NaN, "learning_rate": 9.987878538828088e-05, "loss": 0.0, "step": 174 }, { "epoch": 0.10178857059764432, "grad_norm": NaN, "learning_rate": 9.987548855163646e-05, "loss": 0.0, "step": 175 }, { "epoch": 0.102370219572488, "grad_norm": NaN, "learning_rate": 9.98721475361619e-05, "loss": 0.0, "step": 176 }, { "epoch": 0.10295186854733168, "grad_norm": NaN, "learning_rate": 9.986876234481661e-05, "loss": 0.0, "step": 177 }, { "epoch": 0.10353351752217536, "grad_norm": NaN, "learning_rate": 9.986533298059914e-05, "loss": 0.0, "step": 178 }, { "epoch": 0.10411516649701905, "grad_norm": NaN, "learning_rate": 9.986185944654719e-05, "loss": 0.0, "step": 179 }, { "epoch": 0.10469681547186273, "grad_norm": NaN, "learning_rate": 9.985834174573751e-05, "loss": 0.0, "step": 180 }, { "epoch": 0.10527846444670641, "grad_norm": NaN, "learning_rate": 9.985477988128603e-05, "loss": 0.0, "step": 181 }, { "epoch": 0.10586011342155009, "grad_norm": NaN, "learning_rate": 9.985117385634779e-05, "loss": 0.0, "step": 182 }, { "epoch": 0.10644176239639377, "grad_norm": NaN, "learning_rate": 9.984752367411696e-05, "loss": 0.0, "step": 183 }, { "epoch": 0.10702341137123746, "grad_norm": NaN, "learning_rate": 9.984382933782679e-05, "loss": 0.0, "step": 184 }, { "epoch": 0.10760506034608114, "grad_norm": NaN, "learning_rate": 9.984009085074966e-05, "loss": 0.0, "step": 185 }, { "epoch": 0.10818670932092482, "grad_norm": NaN, "learning_rate": 9.983630821619706e-05, "loss": 0.0, "step": 186 }, { "epoch": 0.1087683582957685, "grad_norm": NaN, "learning_rate": 9.983248143751956e-05, "loss": 0.0, "step": 187 }, { "epoch": 0.10935000727061218, "grad_norm": NaN, "learning_rate": 9.982861051810689e-05, "loss": 0.0, "step": 188 }, { "epoch": 0.10993165624545587, "grad_norm": NaN, "learning_rate": 9.982469546138782e-05, "loss": 0.0, "step": 189 }, { "epoch": 0.11051330522029955, "grad_norm": NaN, "learning_rate": 9.982073627083024e-05, "loss": 0.0, "step": 190 }, { "epoch": 0.11109495419514323, "grad_norm": NaN, "learning_rate": 9.981673294994115e-05, "loss": 0.0, "step": 191 }, { "epoch": 0.11167660316998691, "grad_norm": NaN, "learning_rate": 9.981268550226658e-05, "loss": 0.0, "step": 192 }, { "epoch": 0.1122582521448306, "grad_norm": NaN, "learning_rate": 9.980859393139173e-05, "loss": 0.0, "step": 193 }, { "epoch": 0.11283990111967428, "grad_norm": NaN, "learning_rate": 9.980445824094081e-05, "loss": 0.0, "step": 194 }, { "epoch": 0.11342155009451796, "grad_norm": NaN, "learning_rate": 9.980027843457717e-05, "loss": 0.0, "step": 195 }, { "epoch": 0.11400319906936164, "grad_norm": NaN, "learning_rate": 9.979605451600318e-05, "loss": 0.0, "step": 196 }, { "epoch": 0.11458484804420532, "grad_norm": NaN, "learning_rate": 9.979178648896034e-05, "loss": 0.0, "step": 197 }, { "epoch": 0.115166497019049, "grad_norm": NaN, "learning_rate": 9.978747435722917e-05, "loss": 0.0, "step": 198 }, { "epoch": 0.11574814599389269, "grad_norm": NaN, "learning_rate": 9.97831181246293e-05, "loss": 0.0, "step": 199 }, { "epoch": 0.11632979496873637, "grad_norm": NaN, "learning_rate": 9.977871779501939e-05, "loss": 0.0, "step": 200 }, { "epoch": 0.11691144394358005, "grad_norm": NaN, "learning_rate": 9.97742733722972e-05, "loss": 0.0, "step": 201 }, { "epoch": 0.11749309291842373, "grad_norm": NaN, "learning_rate": 9.976978486039948e-05, "loss": 0.0, "step": 202 }, { "epoch": 0.11807474189326742, "grad_norm": NaN, "learning_rate": 9.976525226330209e-05, "loss": 0.0, "step": 203 }, { "epoch": 0.1186563908681111, "grad_norm": NaN, "learning_rate": 9.976067558501995e-05, "loss": 0.0, "step": 204 }, { "epoch": 0.11923803984295478, "grad_norm": NaN, "learning_rate": 9.975605482960697e-05, "loss": 0.0, "step": 205 }, { "epoch": 0.11981968881779846, "grad_norm": NaN, "learning_rate": 9.975139000115615e-05, "loss": 0.0, "step": 206 }, { "epoch": 0.12040133779264214, "grad_norm": NaN, "learning_rate": 9.974668110379952e-05, "loss": 0.0, "step": 207 }, { "epoch": 0.12098298676748583, "grad_norm": NaN, "learning_rate": 9.974192814170811e-05, "loss": 0.0, "step": 208 }, { "epoch": 0.12156463574232951, "grad_norm": NaN, "learning_rate": 9.973713111909205e-05, "loss": 0.0, "step": 209 }, { "epoch": 0.12214628471717319, "grad_norm": NaN, "learning_rate": 9.973229004020042e-05, "loss": 0.0, "step": 210 }, { "epoch": 0.12272793369201687, "grad_norm": NaN, "learning_rate": 9.972740490932138e-05, "loss": 0.0, "step": 211 }, { "epoch": 0.12330958266686055, "grad_norm": NaN, "learning_rate": 9.972247573078208e-05, "loss": 0.0, "step": 212 }, { "epoch": 0.12389123164170424, "grad_norm": NaN, "learning_rate": 9.971750250894871e-05, "loss": 0.0, "step": 213 }, { "epoch": 0.12447288061654792, "grad_norm": NaN, "learning_rate": 9.971248524822648e-05, "loss": 0.0, "step": 214 }, { "epoch": 0.1250545295913916, "grad_norm": NaN, "learning_rate": 9.970742395305957e-05, "loss": 0.0, "step": 215 }, { "epoch": 0.12563617856623527, "grad_norm": NaN, "learning_rate": 9.97023186279312e-05, "loss": 0.0, "step": 216 }, { "epoch": 0.12621782754107896, "grad_norm": NaN, "learning_rate": 9.969716927736355e-05, "loss": 0.0, "step": 217 }, { "epoch": 0.12679947651592263, "grad_norm": NaN, "learning_rate": 9.969197590591785e-05, "loss": 0.0, "step": 218 }, { "epoch": 0.12738112549076633, "grad_norm": NaN, "learning_rate": 9.96867385181943e-05, "loss": 0.0, "step": 219 }, { "epoch": 0.12796277446561, "grad_norm": NaN, "learning_rate": 9.968145711883205e-05, "loss": 0.0, "step": 220 }, { "epoch": 0.1285444234404537, "grad_norm": NaN, "learning_rate": 9.96761317125093e-05, "loss": 0.0, "step": 221 }, { "epoch": 0.12912607241529736, "grad_norm": NaN, "learning_rate": 9.967076230394322e-05, "loss": 0.0, "step": 222 }, { "epoch": 0.12970772139014106, "grad_norm": NaN, "learning_rate": 9.96653488978899e-05, "loss": 0.0, "step": 223 }, { "epoch": 0.13028937036498472, "grad_norm": NaN, "learning_rate": 9.965989149914445e-05, "loss": 0.0, "step": 224 }, { "epoch": 0.13087101933982842, "grad_norm": NaN, "learning_rate": 9.965439011254096e-05, "loss": 0.0, "step": 225 }, { "epoch": 0.1314526683146721, "grad_norm": NaN, "learning_rate": 9.964884474295243e-05, "loss": 0.0, "step": 226 }, { "epoch": 0.13203431728951578, "grad_norm": NaN, "learning_rate": 9.964325539529087e-05, "loss": 0.0, "step": 227 }, { "epoch": 0.13261596626435945, "grad_norm": NaN, "learning_rate": 9.96376220745072e-05, "loss": 0.0, "step": 228 }, { "epoch": 0.13319761523920315, "grad_norm": NaN, "learning_rate": 9.963194478559136e-05, "loss": 0.0, "step": 229 }, { "epoch": 0.13377926421404682, "grad_norm": NaN, "learning_rate": 9.962622353357216e-05, "loss": 0.0, "step": 230 }, { "epoch": 0.1343609131888905, "grad_norm": NaN, "learning_rate": 9.96204583235174e-05, "loss": 0.0, "step": 231 }, { "epoch": 0.13494256216373418, "grad_norm": NaN, "learning_rate": 9.961464916053378e-05, "loss": 0.0, "step": 232 }, { "epoch": 0.13552421113857788, "grad_norm": NaN, "learning_rate": 9.960879604976698e-05, "loss": 0.0, "step": 233 }, { "epoch": 0.13610586011342155, "grad_norm": NaN, "learning_rate": 9.960289899640159e-05, "loss": 0.0, "step": 234 }, { "epoch": 0.13668750908826524, "grad_norm": NaN, "learning_rate": 9.959695800566107e-05, "loss": 0.0, "step": 235 }, { "epoch": 0.1372691580631089, "grad_norm": NaN, "learning_rate": 9.959097308280787e-05, "loss": 0.0, "step": 236 }, { "epoch": 0.1378508070379526, "grad_norm": NaN, "learning_rate": 9.958494423314335e-05, "loss": 0.0, "step": 237 }, { "epoch": 0.13843245601279627, "grad_norm": NaN, "learning_rate": 9.957887146200774e-05, "loss": 0.0, "step": 238 }, { "epoch": 0.13901410498763997, "grad_norm": NaN, "learning_rate": 9.957275477478018e-05, "loss": 0.0, "step": 239 }, { "epoch": 0.13959575396248364, "grad_norm": NaN, "learning_rate": 9.956659417687878e-05, "loss": 0.0, "step": 240 }, { "epoch": 0.14017740293732733, "grad_norm": NaN, "learning_rate": 9.95603896737604e-05, "loss": 0.0, "step": 241 }, { "epoch": 0.140759051912171, "grad_norm": NaN, "learning_rate": 9.955414127092095e-05, "loss": 0.0, "step": 242 }, { "epoch": 0.1413407008870147, "grad_norm": NaN, "learning_rate": 9.954784897389513e-05, "loss": 0.0, "step": 243 }, { "epoch": 0.14192234986185837, "grad_norm": NaN, "learning_rate": 9.954151278825655e-05, "loss": 0.0, "step": 244 }, { "epoch": 0.14250399883670206, "grad_norm": NaN, "learning_rate": 9.95351327196177e-05, "loss": 0.0, "step": 245 }, { "epoch": 0.14308564781154573, "grad_norm": NaN, "learning_rate": 9.95287087736299e-05, "loss": 0.0, "step": 246 }, { "epoch": 0.14366729678638943, "grad_norm": NaN, "learning_rate": 9.952224095598341e-05, "loss": 0.0, "step": 247 }, { "epoch": 0.1442489457612331, "grad_norm": NaN, "learning_rate": 9.951572927240729e-05, "loss": 0.0, "step": 248 }, { "epoch": 0.1448305947360768, "grad_norm": NaN, "learning_rate": 9.950917372866948e-05, "loss": 0.0, "step": 249 }, { "epoch": 0.14541224371092046, "grad_norm": NaN, "learning_rate": 9.950257433057675e-05, "loss": 0.0, "step": 250 }, { "epoch": 0.14599389268576415, "grad_norm": NaN, "learning_rate": 9.949593108397475e-05, "loss": 0.0, "step": 251 }, { "epoch": 0.14657554166060782, "grad_norm": NaN, "learning_rate": 9.948924399474794e-05, "loss": 0.0, "step": 252 }, { "epoch": 0.14715719063545152, "grad_norm": NaN, "learning_rate": 9.948251306881963e-05, "loss": 0.0, "step": 253 }, { "epoch": 0.1477388396102952, "grad_norm": NaN, "learning_rate": 9.947573831215195e-05, "loss": 0.0, "step": 254 }, { "epoch": 0.14832048858513888, "grad_norm": NaN, "learning_rate": 9.946891973074586e-05, "loss": 0.0, "step": 255 }, { "epoch": 0.14890213755998255, "grad_norm": NaN, "learning_rate": 9.946205733064116e-05, "loss": 0.0, "step": 256 }, { "epoch": 0.14948378653482625, "grad_norm": NaN, "learning_rate": 9.945515111791641e-05, "loss": 0.0, "step": 257 }, { "epoch": 0.15006543550966991, "grad_norm": NaN, "learning_rate": 9.944820109868904e-05, "loss": 0.0, "step": 258 }, { "epoch": 0.15064708448451358, "grad_norm": NaN, "learning_rate": 9.944120727911524e-05, "loss": 0.0, "step": 259 }, { "epoch": 0.15122873345935728, "grad_norm": NaN, "learning_rate": 9.943416966539002e-05, "loss": 0.0, "step": 260 }, { "epoch": 0.15181038243420095, "grad_norm": NaN, "learning_rate": 9.942708826374716e-05, "loss": 0.0, "step": 261 }, { "epoch": 0.15239203140904464, "grad_norm": NaN, "learning_rate": 9.941996308045927e-05, "loss": 0.0, "step": 262 }, { "epoch": 0.1529736803838883, "grad_norm": NaN, "learning_rate": 9.941279412183768e-05, "loss": 0.0, "step": 263 }, { "epoch": 0.153555329358732, "grad_norm": NaN, "learning_rate": 9.940558139423255e-05, "loss": 0.0, "step": 264 }, { "epoch": 0.15413697833357567, "grad_norm": NaN, "learning_rate": 9.939832490403277e-05, "loss": 0.0, "step": 265 }, { "epoch": 0.15471862730841937, "grad_norm": NaN, "learning_rate": 9.939102465766602e-05, "loss": 0.0, "step": 266 }, { "epoch": 0.15530027628326304, "grad_norm": NaN, "learning_rate": 9.938368066159873e-05, "loss": 0.0, "step": 267 }, { "epoch": 0.15588192525810673, "grad_norm": NaN, "learning_rate": 9.937629292233608e-05, "loss": 0.0, "step": 268 }, { "epoch": 0.1564635742329504, "grad_norm": NaN, "learning_rate": 9.936886144642201e-05, "loss": 0.0, "step": 269 }, { "epoch": 0.1570452232077941, "grad_norm": NaN, "learning_rate": 9.936138624043918e-05, "loss": 0.0, "step": 270 }, { "epoch": 0.15762687218263777, "grad_norm": NaN, "learning_rate": 9.935386731100899e-05, "loss": 0.0, "step": 271 }, { "epoch": 0.15820852115748146, "grad_norm": NaN, "learning_rate": 9.934630466479158e-05, "loss": 0.0, "step": 272 }, { "epoch": 0.15879017013232513, "grad_norm": NaN, "learning_rate": 9.933869830848581e-05, "loss": 0.0, "step": 273 }, { "epoch": 0.15937181910716883, "grad_norm": NaN, "learning_rate": 9.933104824882928e-05, "loss": 0.0, "step": 274 }, { "epoch": 0.1599534680820125, "grad_norm": NaN, "learning_rate": 9.932335449259824e-05, "loss": 0.0, "step": 275 }, { "epoch": 0.1605351170568562, "grad_norm": NaN, "learning_rate": 9.93156170466077e-05, "loss": 0.0, "step": 276 }, { "epoch": 0.16111676603169986, "grad_norm": NaN, "learning_rate": 9.930783591771136e-05, "loss": 0.0, "step": 277 }, { "epoch": 0.16169841500654356, "grad_norm": NaN, "learning_rate": 9.930001111280161e-05, "loss": 0.0, "step": 278 }, { "epoch": 0.16228006398138722, "grad_norm": NaN, "learning_rate": 9.92921426388095e-05, "loss": 0.0, "step": 279 }, { "epoch": 0.16286171295623092, "grad_norm": NaN, "learning_rate": 9.92842305027048e-05, "loss": 0.0, "step": 280 }, { "epoch": 0.1634433619310746, "grad_norm": NaN, "learning_rate": 9.927627471149595e-05, "loss": 0.0, "step": 281 }, { "epoch": 0.16402501090591828, "grad_norm": NaN, "learning_rate": 9.926827527223004e-05, "loss": 0.0, "step": 282 }, { "epoch": 0.16460665988076195, "grad_norm": NaN, "learning_rate": 9.926023219199283e-05, "loss": 0.0, "step": 283 }, { "epoch": 0.16518830885560565, "grad_norm": NaN, "learning_rate": 9.925214547790875e-05, "loss": 0.0, "step": 284 }, { "epoch": 0.16576995783044932, "grad_norm": NaN, "learning_rate": 9.924401513714086e-05, "loss": 0.0, "step": 285 }, { "epoch": 0.166351606805293, "grad_norm": NaN, "learning_rate": 9.923584117689089e-05, "loss": 0.0, "step": 286 }, { "epoch": 0.16693325578013668, "grad_norm": NaN, "learning_rate": 9.922762360439916e-05, "loss": 0.0, "step": 287 }, { "epoch": 0.16751490475498038, "grad_norm": NaN, "learning_rate": 9.921936242694467e-05, "loss": 0.0, "step": 288 }, { "epoch": 0.16809655372982404, "grad_norm": NaN, "learning_rate": 9.921105765184502e-05, "loss": 0.0, "step": 289 }, { "epoch": 0.16867820270466774, "grad_norm": NaN, "learning_rate": 9.920270928645644e-05, "loss": 0.0, "step": 290 }, { "epoch": 0.1692598516795114, "grad_norm": NaN, "learning_rate": 9.919431733817376e-05, "loss": 0.0, "step": 291 }, { "epoch": 0.1698415006543551, "grad_norm": NaN, "learning_rate": 9.918588181443043e-05, "loss": 0.0, "step": 292 }, { "epoch": 0.17042314962919877, "grad_norm": NaN, "learning_rate": 9.917740272269847e-05, "loss": 0.0, "step": 293 }, { "epoch": 0.17100479860404247, "grad_norm": NaN, "learning_rate": 9.916888007048853e-05, "loss": 0.0, "step": 294 }, { "epoch": 0.17158644757888614, "grad_norm": NaN, "learning_rate": 9.916031386534979e-05, "loss": 0.0, "step": 295 }, { "epoch": 0.17216809655372983, "grad_norm": NaN, "learning_rate": 9.915170411487009e-05, "loss": 0.0, "step": 296 }, { "epoch": 0.1727497455285735, "grad_norm": NaN, "learning_rate": 9.914305082667576e-05, "loss": 0.0, "step": 297 }, { "epoch": 0.1733313945034172, "grad_norm": NaN, "learning_rate": 9.913435400843176e-05, "loss": 0.0, "step": 298 }, { "epoch": 0.17391304347826086, "grad_norm": NaN, "learning_rate": 9.912561366784157e-05, "loss": 0.0, "step": 299 }, { "epoch": 0.17449469245310456, "grad_norm": NaN, "learning_rate": 9.911682981264722e-05, "loss": 0.0, "step": 300 }, { "epoch": 0.17507634142794823, "grad_norm": NaN, "learning_rate": 9.910800245062929e-05, "loss": 0.0, "step": 301 }, { "epoch": 0.17565799040279192, "grad_norm": NaN, "learning_rate": 9.909913158960691e-05, "loss": 0.0, "step": 302 }, { "epoch": 0.1762396393776356, "grad_norm": NaN, "learning_rate": 9.909021723743773e-05, "loss": 0.0, "step": 303 }, { "epoch": 0.1768212883524793, "grad_norm": NaN, "learning_rate": 9.908125940201793e-05, "loss": 0.0, "step": 304 }, { "epoch": 0.17740293732732296, "grad_norm": NaN, "learning_rate": 9.907225809128222e-05, "loss": 0.0, "step": 305 }, { "epoch": 0.17798458630216665, "grad_norm": NaN, "learning_rate": 9.906321331320377e-05, "loss": 0.0, "step": 306 }, { "epoch": 0.17856623527701032, "grad_norm": NaN, "learning_rate": 9.90541250757943e-05, "loss": 0.0, "step": 307 }, { "epoch": 0.17914788425185402, "grad_norm": NaN, "learning_rate": 9.9044993387104e-05, "loss": 0.0, "step": 308 }, { "epoch": 0.17972953322669769, "grad_norm": NaN, "learning_rate": 9.903581825522159e-05, "loss": 0.0, "step": 309 }, { "epoch": 0.18031118220154138, "grad_norm": NaN, "learning_rate": 9.902659968827421e-05, "loss": 0.0, "step": 310 }, { "epoch": 0.18089283117638505, "grad_norm": NaN, "learning_rate": 9.901733769442752e-05, "loss": 0.0, "step": 311 }, { "epoch": 0.18147448015122875, "grad_norm": NaN, "learning_rate": 9.900803228188562e-05, "loss": 0.0, "step": 312 }, { "epoch": 0.1820561291260724, "grad_norm": NaN, "learning_rate": 9.899868345889109e-05, "loss": 0.0, "step": 313 }, { "epoch": 0.1826377781009161, "grad_norm": NaN, "learning_rate": 9.898929123372493e-05, "loss": 0.0, "step": 314 }, { "epoch": 0.18321942707575978, "grad_norm": NaN, "learning_rate": 9.897985561470665e-05, "loss": 0.0, "step": 315 }, { "epoch": 0.18380107605060347, "grad_norm": NaN, "learning_rate": 9.897037661019413e-05, "loss": 0.0, "step": 316 }, { "epoch": 0.18438272502544714, "grad_norm": NaN, "learning_rate": 9.89608542285837e-05, "loss": 0.0, "step": 317 }, { "epoch": 0.18496437400029084, "grad_norm": NaN, "learning_rate": 9.895128847831014e-05, "loss": 0.0, "step": 318 }, { "epoch": 0.1855460229751345, "grad_norm": NaN, "learning_rate": 9.894167936784659e-05, "loss": 0.0, "step": 319 }, { "epoch": 0.1861276719499782, "grad_norm": NaN, "learning_rate": 9.893202690570467e-05, "loss": 0.0, "step": 320 }, { "epoch": 0.18670932092482187, "grad_norm": NaN, "learning_rate": 9.892233110043431e-05, "loss": 0.0, "step": 321 }, { "epoch": 0.18729096989966554, "grad_norm": NaN, "learning_rate": 9.891259196062394e-05, "loss": 0.0, "step": 322 }, { "epoch": 0.18787261887450923, "grad_norm": NaN, "learning_rate": 9.890280949490029e-05, "loss": 0.0, "step": 323 }, { "epoch": 0.1884542678493529, "grad_norm": NaN, "learning_rate": 9.88929837119285e-05, "loss": 0.0, "step": 324 }, { "epoch": 0.1890359168241966, "grad_norm": NaN, "learning_rate": 9.888311462041206e-05, "loss": 0.0, "step": 325 }, { "epoch": 0.18961756579904027, "grad_norm": NaN, "learning_rate": 9.887320222909287e-05, "loss": 0.0, "step": 326 }, { "epoch": 0.19019921477388396, "grad_norm": NaN, "learning_rate": 9.886324654675115e-05, "loss": 0.0, "step": 327 }, { "epoch": 0.19078086374872763, "grad_norm": NaN, "learning_rate": 9.885324758220543e-05, "loss": 0.0, "step": 328 }, { "epoch": 0.19136251272357133, "grad_norm": NaN, "learning_rate": 9.884320534431264e-05, "loss": 0.0, "step": 329 }, { "epoch": 0.191944161698415, "grad_norm": NaN, "learning_rate": 9.883311984196801e-05, "loss": 0.0, "step": 330 }, { "epoch": 0.1925258106732587, "grad_norm": NaN, "learning_rate": 9.882299108410513e-05, "loss": 0.0, "step": 331 }, { "epoch": 0.19310745964810236, "grad_norm": NaN, "learning_rate": 9.881281907969583e-05, "loss": 0.0, "step": 332 }, { "epoch": 0.19368910862294605, "grad_norm": NaN, "learning_rate": 9.880260383775033e-05, "loss": 0.0, "step": 333 }, { "epoch": 0.19427075759778972, "grad_norm": NaN, "learning_rate": 9.87923453673171e-05, "loss": 0.0, "step": 334 }, { "epoch": 0.19485240657263342, "grad_norm": NaN, "learning_rate": 9.87820436774829e-05, "loss": 0.0, "step": 335 }, { "epoch": 0.1954340555474771, "grad_norm": NaN, "learning_rate": 9.87716987773728e-05, "loss": 0.0, "step": 336 }, { "epoch": 0.19601570452232078, "grad_norm": NaN, "learning_rate": 9.876131067615013e-05, "loss": 0.0, "step": 337 }, { "epoch": 0.19659735349716445, "grad_norm": NaN, "learning_rate": 9.875087938301648e-05, "loss": 0.0, "step": 338 }, { "epoch": 0.19717900247200815, "grad_norm": NaN, "learning_rate": 9.87404049072117e-05, "loss": 0.0, "step": 339 }, { "epoch": 0.19776065144685182, "grad_norm": NaN, "learning_rate": 9.872988725801392e-05, "loss": 0.0, "step": 340 }, { "epoch": 0.1983423004216955, "grad_norm": NaN, "learning_rate": 9.871932644473947e-05, "loss": 0.0, "step": 341 }, { "epoch": 0.19892394939653918, "grad_norm": NaN, "learning_rate": 9.870872247674295e-05, "loss": 0.0, "step": 342 }, { "epoch": 0.19950559837138288, "grad_norm": NaN, "learning_rate": 9.869807536341714e-05, "loss": 0.0, "step": 343 }, { "epoch": 0.20008724734622654, "grad_norm": NaN, "learning_rate": 9.868738511419307e-05, "loss": 0.0, "step": 344 }, { "epoch": 0.20066889632107024, "grad_norm": NaN, "learning_rate": 9.867665173854e-05, "loss": 0.0, "step": 345 }, { "epoch": 0.2012505452959139, "grad_norm": NaN, "learning_rate": 9.866587524596535e-05, "loss": 0.0, "step": 346 }, { "epoch": 0.2018321942707576, "grad_norm": NaN, "learning_rate": 9.865505564601474e-05, "loss": 0.0, "step": 347 }, { "epoch": 0.20241384324560127, "grad_norm": NaN, "learning_rate": 9.864419294827198e-05, "loss": 0.0, "step": 348 }, { "epoch": 0.20299549222044497, "grad_norm": NaN, "learning_rate": 9.86332871623591e-05, "loss": 0.0, "step": 349 }, { "epoch": 0.20357714119528864, "grad_norm": NaN, "learning_rate": 9.862233829793619e-05, "loss": 0.0, "step": 350 }, { "epoch": 0.20415879017013233, "grad_norm": NaN, "learning_rate": 9.861134636470159e-05, "loss": 0.0, "step": 351 }, { "epoch": 0.204740439144976, "grad_norm": NaN, "learning_rate": 9.860031137239179e-05, "loss": 0.0, "step": 352 }, { "epoch": 0.2053220881198197, "grad_norm": NaN, "learning_rate": 9.858923333078133e-05, "loss": 0.0, "step": 353 }, { "epoch": 0.20590373709466336, "grad_norm": NaN, "learning_rate": 9.857811224968301e-05, "loss": 0.0, "step": 354 }, { "epoch": 0.20648538606950706, "grad_norm": NaN, "learning_rate": 9.856694813894762e-05, "loss": 0.0, "step": 355 }, { "epoch": 0.20706703504435073, "grad_norm": NaN, "learning_rate": 9.855574100846419e-05, "loss": 0.0, "step": 356 }, { "epoch": 0.20764868401919442, "grad_norm": NaN, "learning_rate": 9.854449086815978e-05, "loss": 0.0, "step": 357 }, { "epoch": 0.2082303329940381, "grad_norm": NaN, "learning_rate": 9.853319772799956e-05, "loss": 0.0, "step": 358 }, { "epoch": 0.2088119819688818, "grad_norm": NaN, "learning_rate": 9.852186159798679e-05, "loss": 0.0, "step": 359 }, { "epoch": 0.20939363094372546, "grad_norm": NaN, "learning_rate": 9.851048248816283e-05, "loss": 0.0, "step": 360 }, { "epoch": 0.20997527991856915, "grad_norm": NaN, "learning_rate": 9.84990604086071e-05, "loss": 0.0, "step": 361 }, { "epoch": 0.21055692889341282, "grad_norm": NaN, "learning_rate": 9.848759536943705e-05, "loss": 0.0, "step": 362 }, { "epoch": 0.21113857786825652, "grad_norm": NaN, "learning_rate": 9.847608738080825e-05, "loss": 0.0, "step": 363 }, { "epoch": 0.21172022684310018, "grad_norm": NaN, "learning_rate": 9.846453645291422e-05, "loss": 0.0, "step": 364 }, { "epoch": 0.21230187581794388, "grad_norm": NaN, "learning_rate": 9.845294259598661e-05, "loss": 0.0, "step": 365 }, { "epoch": 0.21288352479278755, "grad_norm": NaN, "learning_rate": 9.844130582029505e-05, "loss": 0.0, "step": 366 }, { "epoch": 0.21346517376763124, "grad_norm": NaN, "learning_rate": 9.842962613614716e-05, "loss": 0.0, "step": 367 }, { "epoch": 0.2140468227424749, "grad_norm": NaN, "learning_rate": 9.841790355388864e-05, "loss": 0.0, "step": 368 }, { "epoch": 0.2146284717173186, "grad_norm": NaN, "learning_rate": 9.840613808390314e-05, "loss": 0.0, "step": 369 }, { "epoch": 0.21521012069216228, "grad_norm": NaN, "learning_rate": 9.839432973661226e-05, "loss": 0.0, "step": 370 }, { "epoch": 0.21579176966700597, "grad_norm": NaN, "learning_rate": 9.838247852247571e-05, "loss": 0.0, "step": 371 }, { "epoch": 0.21637341864184964, "grad_norm": NaN, "learning_rate": 9.837058445199102e-05, "loss": 0.0, "step": 372 }, { "epoch": 0.21695506761669334, "grad_norm": NaN, "learning_rate": 9.835864753569375e-05, "loss": 0.0, "step": 373 }, { "epoch": 0.217536716591537, "grad_norm": NaN, "learning_rate": 9.834666778415745e-05, "loss": 0.0, "step": 374 }, { "epoch": 0.2181183655663807, "grad_norm": NaN, "learning_rate": 9.833464520799353e-05, "loss": 0.0, "step": 375 }, { "epoch": 0.21870001454122437, "grad_norm": NaN, "learning_rate": 9.83225798178514e-05, "loss": 0.0, "step": 376 }, { "epoch": 0.21928166351606806, "grad_norm": NaN, "learning_rate": 9.831047162441838e-05, "loss": 0.0, "step": 377 }, { "epoch": 0.21986331249091173, "grad_norm": NaN, "learning_rate": 9.829832063841968e-05, "loss": 0.0, "step": 378 }, { "epoch": 0.22044496146575543, "grad_norm": NaN, "learning_rate": 9.828612687061842e-05, "loss": 0.0, "step": 379 }, { "epoch": 0.2210266104405991, "grad_norm": NaN, "learning_rate": 9.827389033181564e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.2216082594154428, "grad_norm": NaN, "learning_rate": 9.826161103285027e-05, "loss": 0.0, "step": 381 }, { "epoch": 0.22218990839028646, "grad_norm": NaN, "learning_rate": 9.824928898459906e-05, "loss": 0.0, "step": 382 }, { "epoch": 0.22277155736513016, "grad_norm": NaN, "learning_rate": 9.823692419797668e-05, "loss": 0.0, "step": 383 }, { "epoch": 0.22335320633997383, "grad_norm": NaN, "learning_rate": 9.822451668393561e-05, "loss": 0.0, "step": 384 }, { "epoch": 0.22393485531481752, "grad_norm": NaN, "learning_rate": 9.821206645346628e-05, "loss": 0.0, "step": 385 }, { "epoch": 0.2245165042896612, "grad_norm": NaN, "learning_rate": 9.819957351759682e-05, "loss": 0.0, "step": 386 }, { "epoch": 0.22509815326450486, "grad_norm": NaN, "learning_rate": 9.818703788739327e-05, "loss": 0.0, "step": 387 }, { "epoch": 0.22567980223934855, "grad_norm": NaN, "learning_rate": 9.817445957395948e-05, "loss": 0.0, "step": 388 }, { "epoch": 0.22626145121419222, "grad_norm": NaN, "learning_rate": 9.816183858843709e-05, "loss": 0.0, "step": 389 }, { "epoch": 0.22684310018903592, "grad_norm": NaN, "learning_rate": 9.814917494200556e-05, "loss": 0.0, "step": 390 }, { "epoch": 0.22742474916387959, "grad_norm": NaN, "learning_rate": 9.813646864588211e-05, "loss": 0.0, "step": 391 }, { "epoch": 0.22800639813872328, "grad_norm": NaN, "learning_rate": 9.812371971132174e-05, "loss": 0.0, "step": 392 }, { "epoch": 0.22858804711356695, "grad_norm": NaN, "learning_rate": 9.811092814961728e-05, "loss": 0.0, "step": 393 }, { "epoch": 0.22916969608841065, "grad_norm": NaN, "learning_rate": 9.809809397209923e-05, "loss": 0.0, "step": 394 }, { "epoch": 0.22975134506325431, "grad_norm": NaN, "learning_rate": 9.808521719013588e-05, "loss": 0.0, "step": 395 }, { "epoch": 0.230332994038098, "grad_norm": NaN, "learning_rate": 9.807229781513329e-05, "loss": 0.0, "step": 396 }, { "epoch": 0.23091464301294168, "grad_norm": NaN, "learning_rate": 9.805933585853517e-05, "loss": 0.0, "step": 397 }, { "epoch": 0.23149629198778537, "grad_norm": NaN, "learning_rate": 9.804633133182301e-05, "loss": 0.0, "step": 398 }, { "epoch": 0.23207794096262904, "grad_norm": NaN, "learning_rate": 9.803328424651602e-05, "loss": 0.0, "step": 399 }, { "epoch": 0.23265958993747274, "grad_norm": NaN, "learning_rate": 9.802019461417105e-05, "loss": 0.0, "step": 400 }, { "epoch": 0.2332412389123164, "grad_norm": NaN, "learning_rate": 9.800706244638264e-05, "loss": 0.0, "step": 401 }, { "epoch": 0.2338228878871601, "grad_norm": NaN, "learning_rate": 9.799388775478309e-05, "loss": 0.0, "step": 402 }, { "epoch": 0.23440453686200377, "grad_norm": NaN, "learning_rate": 9.798067055104227e-05, "loss": 0.0, "step": 403 }, { "epoch": 0.23498618583684747, "grad_norm": NaN, "learning_rate": 9.796741084686778e-05, "loss": 0.0, "step": 404 }, { "epoch": 0.23556783481169113, "grad_norm": NaN, "learning_rate": 9.795410865400481e-05, "loss": 0.0, "step": 405 }, { "epoch": 0.23614948378653483, "grad_norm": NaN, "learning_rate": 9.79407639842362e-05, "loss": 0.0, "step": 406 }, { "epoch": 0.2367311327613785, "grad_norm": NaN, "learning_rate": 9.792737684938243e-05, "loss": 0.0, "step": 407 }, { "epoch": 0.2373127817362222, "grad_norm": NaN, "learning_rate": 9.791394726130158e-05, "loss": 0.0, "step": 408 }, { "epoch": 0.23789443071106586, "grad_norm": NaN, "learning_rate": 9.790047523188938e-05, "loss": 0.0, "step": 409 }, { "epoch": 0.23847607968590956, "grad_norm": NaN, "learning_rate": 9.788696077307909e-05, "loss": 0.0, "step": 410 }, { "epoch": 0.23905772866075323, "grad_norm": NaN, "learning_rate": 9.787340389684158e-05, "loss": 0.0, "step": 411 }, { "epoch": 0.23963937763559692, "grad_norm": NaN, "learning_rate": 9.78598046151853e-05, "loss": 0.0, "step": 412 }, { "epoch": 0.2402210266104406, "grad_norm": NaN, "learning_rate": 9.784616294015622e-05, "loss": 0.0, "step": 413 }, { "epoch": 0.2408026755852843, "grad_norm": NaN, "learning_rate": 9.783247888383795e-05, "loss": 0.0, "step": 414 }, { "epoch": 0.24138432456012796, "grad_norm": NaN, "learning_rate": 9.781875245835157e-05, "loss": 0.0, "step": 415 }, { "epoch": 0.24196597353497165, "grad_norm": NaN, "learning_rate": 9.780498367585568e-05, "loss": 0.0, "step": 416 }, { "epoch": 0.24254762250981532, "grad_norm": NaN, "learning_rate": 9.779117254854644e-05, "loss": 0.0, "step": 417 }, { "epoch": 0.24312927148465902, "grad_norm": NaN, "learning_rate": 9.777731908865751e-05, "loss": 0.0, "step": 418 }, { "epoch": 0.24371092045950268, "grad_norm": NaN, "learning_rate": 9.776342330846004e-05, "loss": 0.0, "step": 419 }, { "epoch": 0.24429256943434638, "grad_norm": NaN, "learning_rate": 9.774948522026267e-05, "loss": 0.0, "step": 420 }, { "epoch": 0.24487421840919005, "grad_norm": NaN, "learning_rate": 9.77355048364115e-05, "loss": 0.0, "step": 421 }, { "epoch": 0.24545586738403374, "grad_norm": NaN, "learning_rate": 9.772148216929011e-05, "loss": 0.0, "step": 422 }, { "epoch": 0.2460375163588774, "grad_norm": NaN, "learning_rate": 9.770741723131955e-05, "loss": 0.0, "step": 423 }, { "epoch": 0.2466191653337211, "grad_norm": NaN, "learning_rate": 9.769331003495828e-05, "loss": 0.0, "step": 424 }, { "epoch": 0.24720081430856478, "grad_norm": NaN, "learning_rate": 9.76791605927022e-05, "loss": 0.0, "step": 425 }, { "epoch": 0.24778246328340847, "grad_norm": NaN, "learning_rate": 9.766496891708466e-05, "loss": 0.0, "step": 426 }, { "epoch": 0.24836411225825214, "grad_norm": NaN, "learning_rate": 9.765073502067636e-05, "loss": 0.0, "step": 427 }, { "epoch": 0.24894576123309584, "grad_norm": NaN, "learning_rate": 9.763645891608548e-05, "loss": 0.0, "step": 428 }, { "epoch": 0.2495274102079395, "grad_norm": NaN, "learning_rate": 9.762214061595751e-05, "loss": 0.0, "step": 429 }, { "epoch": 0.2501090591827832, "grad_norm": NaN, "learning_rate": 9.760778013297535e-05, "loss": 0.0, "step": 430 }, { "epoch": 0.2506907081576269, "grad_norm": NaN, "learning_rate": 9.759337747985929e-05, "loss": 0.0, "step": 431 }, { "epoch": 0.25127235713247054, "grad_norm": NaN, "learning_rate": 9.75789326693669e-05, "loss": 0.0, "step": 432 }, { "epoch": 0.25185400610731423, "grad_norm": NaN, "learning_rate": 9.756444571429318e-05, "loss": 0.0, "step": 433 }, { "epoch": 0.25243565508215793, "grad_norm": NaN, "learning_rate": 9.754991662747042e-05, "loss": 0.0, "step": 434 }, { "epoch": 0.2530173040570016, "grad_norm": NaN, "learning_rate": 9.753534542176818e-05, "loss": 0.0, "step": 435 }, { "epoch": 0.25359895303184526, "grad_norm": NaN, "learning_rate": 9.752073211009344e-05, "loss": 0.0, "step": 436 }, { "epoch": 0.25418060200668896, "grad_norm": NaN, "learning_rate": 9.750607670539038e-05, "loss": 0.0, "step": 437 }, { "epoch": 0.25476225098153266, "grad_norm": NaN, "learning_rate": 9.74913792206405e-05, "loss": 0.0, "step": 438 }, { "epoch": 0.25534389995637635, "grad_norm": NaN, "learning_rate": 9.74766396688626e-05, "loss": 0.0, "step": 439 }, { "epoch": 0.25592554893122, "grad_norm": NaN, "learning_rate": 9.74618580631127e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.2565071979060637, "grad_norm": NaN, "learning_rate": 9.744703441648406e-05, "loss": 0.0, "step": 441 }, { "epoch": 0.2570888468809074, "grad_norm": NaN, "learning_rate": 9.743216874210723e-05, "loss": 0.0, "step": 442 }, { "epoch": 0.2576704958557511, "grad_norm": NaN, "learning_rate": 9.741726105314997e-05, "loss": 0.0, "step": 443 }, { "epoch": 0.2582521448305947, "grad_norm": NaN, "learning_rate": 9.740231136281723e-05, "loss": 0.0, "step": 444 }, { "epoch": 0.2588337938054384, "grad_norm": NaN, "learning_rate": 9.738731968435121e-05, "loss": 0.0, "step": 445 }, { "epoch": 0.2594154427802821, "grad_norm": NaN, "learning_rate": 9.737228603103123e-05, "loss": 0.0, "step": 446 }, { "epoch": 0.2599970917551258, "grad_norm": NaN, "learning_rate": 9.735721041617389e-05, "loss": 0.0, "step": 447 }, { "epoch": 0.26057874072996945, "grad_norm": NaN, "learning_rate": 9.734209285313286e-05, "loss": 0.0, "step": 448 }, { "epoch": 0.26116038970481315, "grad_norm": NaN, "learning_rate": 9.732693335529904e-05, "loss": 0.0, "step": 449 }, { "epoch": 0.26174203867965684, "grad_norm": NaN, "learning_rate": 9.731173193610041e-05, "loss": 0.0, "step": 450 }, { "epoch": 0.2623236876545005, "grad_norm": NaN, "learning_rate": 9.729648860900216e-05, "loss": 0.0, "step": 451 }, { "epoch": 0.2629053366293442, "grad_norm": NaN, "learning_rate": 9.728120338750655e-05, "loss": 0.0, "step": 452 }, { "epoch": 0.2634869856041879, "grad_norm": NaN, "learning_rate": 9.726587628515294e-05, "loss": 0.0, "step": 453 }, { "epoch": 0.26406863457903157, "grad_norm": NaN, "learning_rate": 9.725050731551783e-05, "loss": 0.0, "step": 454 }, { "epoch": 0.2646502835538752, "grad_norm": NaN, "learning_rate": 9.723509649221479e-05, "loss": 0.0, "step": 455 }, { "epoch": 0.2652319325287189, "grad_norm": NaN, "learning_rate": 9.721964382889442e-05, "loss": 0.0, "step": 456 }, { "epoch": 0.2658135815035626, "grad_norm": NaN, "learning_rate": 9.720414933924445e-05, "loss": 0.0, "step": 457 }, { "epoch": 0.2663952304784063, "grad_norm": NaN, "learning_rate": 9.718861303698961e-05, "loss": 0.0, "step": 458 }, { "epoch": 0.26697687945324994, "grad_norm": NaN, "learning_rate": 9.717303493589169e-05, "loss": 0.0, "step": 459 }, { "epoch": 0.26755852842809363, "grad_norm": NaN, "learning_rate": 9.715741504974951e-05, "loss": 0.0, "step": 460 }, { "epoch": 0.26814017740293733, "grad_norm": NaN, "learning_rate": 9.714175339239887e-05, "loss": 0.0, "step": 461 }, { "epoch": 0.268721826377781, "grad_norm": NaN, "learning_rate": 9.712604997771258e-05, "loss": 0.0, "step": 462 }, { "epoch": 0.26930347535262467, "grad_norm": NaN, "learning_rate": 9.711030481960048e-05, "loss": 0.0, "step": 463 }, { "epoch": 0.26988512432746836, "grad_norm": NaN, "learning_rate": 9.709451793200935e-05, "loss": 0.0, "step": 464 }, { "epoch": 0.27046677330231206, "grad_norm": NaN, "learning_rate": 9.70786893289229e-05, "loss": 0.0, "step": 465 }, { "epoch": 0.27104842227715575, "grad_norm": NaN, "learning_rate": 9.706281902436187e-05, "loss": 0.0, "step": 466 }, { "epoch": 0.2716300712519994, "grad_norm": NaN, "learning_rate": 9.704690703238389e-05, "loss": 0.0, "step": 467 }, { "epoch": 0.2722117202268431, "grad_norm": NaN, "learning_rate": 9.703095336708349e-05, "loss": 0.0, "step": 468 }, { "epoch": 0.2727933692016868, "grad_norm": NaN, "learning_rate": 9.701495804259217e-05, "loss": 0.0, "step": 469 }, { "epoch": 0.2733750181765305, "grad_norm": NaN, "learning_rate": 9.699892107307833e-05, "loss": 0.0, "step": 470 }, { "epoch": 0.2739566671513741, "grad_norm": NaN, "learning_rate": 9.69828424727472e-05, "loss": 0.0, "step": 471 }, { "epoch": 0.2745383161262178, "grad_norm": NaN, "learning_rate": 9.696672225584093e-05, "loss": 0.0, "step": 472 }, { "epoch": 0.2751199651010615, "grad_norm": NaN, "learning_rate": 9.695056043663854e-05, "loss": 0.0, "step": 473 }, { "epoch": 0.2757016140759052, "grad_norm": NaN, "learning_rate": 9.693435702945586e-05, "loss": 0.0, "step": 474 }, { "epoch": 0.27628326305074885, "grad_norm": NaN, "learning_rate": 9.69181120486456e-05, "loss": 0.0, "step": 475 }, { "epoch": 0.27686491202559255, "grad_norm": NaN, "learning_rate": 9.690182550859728e-05, "loss": 0.0, "step": 476 }, { "epoch": 0.27744656100043624, "grad_norm": NaN, "learning_rate": 9.688549742373723e-05, "loss": 0.0, "step": 477 }, { "epoch": 0.27802820997527994, "grad_norm": NaN, "learning_rate": 9.686912780852856e-05, "loss": 0.0, "step": 478 }, { "epoch": 0.2786098589501236, "grad_norm": NaN, "learning_rate": 9.68527166774712e-05, "loss": 0.0, "step": 479 }, { "epoch": 0.2791915079249673, "grad_norm": NaN, "learning_rate": 9.683626404510186e-05, "loss": 0.0, "step": 480 }, { "epoch": 0.27977315689981097, "grad_norm": NaN, "learning_rate": 9.681976992599395e-05, "loss": 0.0, "step": 481 }, { "epoch": 0.28035480587465467, "grad_norm": NaN, "learning_rate": 9.680323433475772e-05, "loss": 0.0, "step": 482 }, { "epoch": 0.2809364548494983, "grad_norm": NaN, "learning_rate": 9.678665728604007e-05, "loss": 0.0, "step": 483 }, { "epoch": 0.281518103824342, "grad_norm": NaN, "learning_rate": 9.677003879452468e-05, "loss": 0.0, "step": 484 }, { "epoch": 0.2820997527991857, "grad_norm": NaN, "learning_rate": 9.67533788749319e-05, "loss": 0.0, "step": 485 }, { "epoch": 0.2826814017740294, "grad_norm": NaN, "learning_rate": 9.673667754201879e-05, "loss": 0.0, "step": 486 }, { "epoch": 0.28326305074887304, "grad_norm": NaN, "learning_rate": 9.671993481057913e-05, "loss": 0.0, "step": 487 }, { "epoch": 0.28384469972371673, "grad_norm": NaN, "learning_rate": 9.670315069544329e-05, "loss": 0.0, "step": 488 }, { "epoch": 0.2844263486985604, "grad_norm": NaN, "learning_rate": 9.668632521147838e-05, "loss": 0.0, "step": 489 }, { "epoch": 0.2850079976734041, "grad_norm": NaN, "learning_rate": 9.666945837358811e-05, "loss": 0.0, "step": 490 }, { "epoch": 0.28558964664824776, "grad_norm": NaN, "learning_rate": 9.665255019671282e-05, "loss": 0.0, "step": 491 }, { "epoch": 0.28617129562309146, "grad_norm": NaN, "learning_rate": 9.663560069582949e-05, "loss": 0.0, "step": 492 }, { "epoch": 0.28675294459793516, "grad_norm": NaN, "learning_rate": 9.661860988595167e-05, "loss": 0.0, "step": 493 }, { "epoch": 0.28733459357277885, "grad_norm": NaN, "learning_rate": 9.660157778212952e-05, "loss": 0.0, "step": 494 }, { "epoch": 0.2879162425476225, "grad_norm": NaN, "learning_rate": 9.658450439944979e-05, "loss": 0.0, "step": 495 }, { "epoch": 0.2884978915224662, "grad_norm": NaN, "learning_rate": 9.656738975303578e-05, "loss": 0.0, "step": 496 }, { "epoch": 0.2890795404973099, "grad_norm": NaN, "learning_rate": 9.655023385804735e-05, "loss": 0.0, "step": 497 }, { "epoch": 0.2896611894721536, "grad_norm": NaN, "learning_rate": 9.653303672968088e-05, "loss": 0.0, "step": 498 }, { "epoch": 0.2902428384469972, "grad_norm": NaN, "learning_rate": 9.651579838316929e-05, "loss": 0.0, "step": 499 }, { "epoch": 0.2908244874218409, "grad_norm": NaN, "learning_rate": 9.649851883378199e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.2914061363966846, "grad_norm": NaN, "learning_rate": 9.648119809682493e-05, "loss": 0.0, "step": 501 }, { "epoch": 0.2919877853715283, "grad_norm": NaN, "learning_rate": 9.646383618764048e-05, "loss": 0.0, "step": 502 }, { "epoch": 0.29256943434637195, "grad_norm": NaN, "learning_rate": 9.644643312160751e-05, "loss": 0.0, "step": 503 }, { "epoch": 0.29315108332121564, "grad_norm": NaN, "learning_rate": 9.642898891414139e-05, "loss": 0.0, "step": 504 }, { "epoch": 0.29373273229605934, "grad_norm": NaN, "learning_rate": 9.641150358069386e-05, "loss": 0.0, "step": 505 }, { "epoch": 0.29431438127090304, "grad_norm": NaN, "learning_rate": 9.639397713675314e-05, "loss": 0.0, "step": 506 }, { "epoch": 0.2948960302457467, "grad_norm": NaN, "learning_rate": 9.637640959784384e-05, "loss": 0.0, "step": 507 }, { "epoch": 0.2954776792205904, "grad_norm": NaN, "learning_rate": 9.635880097952697e-05, "loss": 0.0, "step": 508 }, { "epoch": 0.29605932819543407, "grad_norm": NaN, "learning_rate": 9.634115129739992e-05, "loss": 0.0, "step": 509 }, { "epoch": 0.29664097717027776, "grad_norm": NaN, "learning_rate": 9.63234605670965e-05, "loss": 0.0, "step": 510 }, { "epoch": 0.2972226261451214, "grad_norm": NaN, "learning_rate": 9.630572880428684e-05, "loss": 0.0, "step": 511 }, { "epoch": 0.2978042751199651, "grad_norm": NaN, "learning_rate": 9.62879560246774e-05, "loss": 0.0, "step": 512 }, { "epoch": 0.2983859240948088, "grad_norm": NaN, "learning_rate": 9.6270142244011e-05, "loss": 0.0, "step": 513 }, { "epoch": 0.2989675730696525, "grad_norm": NaN, "learning_rate": 9.625228747806681e-05, "loss": 0.0, "step": 514 }, { "epoch": 0.29954922204449613, "grad_norm": NaN, "learning_rate": 9.623439174266024e-05, "loss": 0.0, "step": 515 }, { "epoch": 0.30013087101933983, "grad_norm": NaN, "learning_rate": 9.621645505364302e-05, "loss": 0.0, "step": 516 }, { "epoch": 0.3007125199941835, "grad_norm": NaN, "learning_rate": 9.619847742690315e-05, "loss": 0.0, "step": 517 }, { "epoch": 0.30129416896902717, "grad_norm": NaN, "learning_rate": 9.618045887836489e-05, "loss": 0.0, "step": 518 }, { "epoch": 0.30187581794387086, "grad_norm": NaN, "learning_rate": 9.616239942398878e-05, "loss": 0.0, "step": 519 }, { "epoch": 0.30245746691871456, "grad_norm": NaN, "learning_rate": 9.614429907977153e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.30303911589355825, "grad_norm": NaN, "learning_rate": 9.612615786174614e-05, "loss": 0.0, "step": 521 }, { "epoch": 0.3036207648684019, "grad_norm": NaN, "learning_rate": 9.610797578598177e-05, "loss": 0.0, "step": 522 }, { "epoch": 0.3042024138432456, "grad_norm": NaN, "learning_rate": 9.608975286858377e-05, "loss": 0.0, "step": 523 }, { "epoch": 0.3047840628180893, "grad_norm": NaN, "learning_rate": 9.607148912569368e-05, "loss": 0.0, "step": 524 }, { "epoch": 0.305365711792933, "grad_norm": NaN, "learning_rate": 9.605318457348923e-05, "loss": 0.0, "step": 525 }, { "epoch": 0.3059473607677766, "grad_norm": NaN, "learning_rate": 9.603483922818424e-05, "loss": 0.0, "step": 526 }, { "epoch": 0.3065290097426203, "grad_norm": NaN, "learning_rate": 9.601645310602871e-05, "loss": 0.0, "step": 527 }, { "epoch": 0.307110658717464, "grad_norm": NaN, "learning_rate": 9.599802622330876e-05, "loss": 0.0, "step": 528 }, { "epoch": 0.3076923076923077, "grad_norm": NaN, "learning_rate": 9.597955859634656e-05, "loss": 0.0, "step": 529 }, { "epoch": 0.30827395666715135, "grad_norm": NaN, "learning_rate": 9.596105024150041e-05, "loss": 0.0, "step": 530 }, { "epoch": 0.30885560564199505, "grad_norm": NaN, "learning_rate": 9.594250117516472e-05, "loss": 0.0, "step": 531 }, { "epoch": 0.30943725461683874, "grad_norm": NaN, "learning_rate": 9.59239114137699e-05, "loss": 0.0, "step": 532 }, { "epoch": 0.31001890359168244, "grad_norm": NaN, "learning_rate": 9.590528097378246e-05, "loss": 0.0, "step": 533 }, { "epoch": 0.3106005525665261, "grad_norm": NaN, "learning_rate": 9.588660987170489e-05, "loss": 0.0, "step": 534 }, { "epoch": 0.3111822015413698, "grad_norm": NaN, "learning_rate": 9.586789812407573e-05, "loss": 0.0, "step": 535 }, { "epoch": 0.31176385051621347, "grad_norm": NaN, "learning_rate": 9.584914574746951e-05, "loss": 0.0, "step": 536 }, { "epoch": 0.31234549949105717, "grad_norm": NaN, "learning_rate": 9.583035275849678e-05, "loss": 0.0, "step": 537 }, { "epoch": 0.3129271484659008, "grad_norm": NaN, "learning_rate": 9.581151917380401e-05, "loss": 0.0, "step": 538 }, { "epoch": 0.3135087974407445, "grad_norm": NaN, "learning_rate": 9.579264501007368e-05, "loss": 0.0, "step": 539 }, { "epoch": 0.3140904464155882, "grad_norm": NaN, "learning_rate": 9.577373028402416e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.3146720953904319, "grad_norm": NaN, "learning_rate": 9.575477501240983e-05, "loss": 0.0, "step": 541 }, { "epoch": 0.31525374436527553, "grad_norm": NaN, "learning_rate": 9.573577921202091e-05, "loss": 0.0, "step": 542 }, { "epoch": 0.31583539334011923, "grad_norm": NaN, "learning_rate": 9.571674289968353e-05, "loss": 0.0, "step": 543 }, { "epoch": 0.3164170423149629, "grad_norm": NaN, "learning_rate": 9.569766609225975e-05, "loss": 0.0, "step": 544 }, { "epoch": 0.3169986912898066, "grad_norm": NaN, "learning_rate": 9.567854880664745e-05, "loss": 0.0, "step": 545 }, { "epoch": 0.31758034026465026, "grad_norm": NaN, "learning_rate": 9.565939105978038e-05, "loss": 0.0, "step": 546 }, { "epoch": 0.31816198923949396, "grad_norm": NaN, "learning_rate": 9.564019286862817e-05, "loss": 0.0, "step": 547 }, { "epoch": 0.31874363821433765, "grad_norm": NaN, "learning_rate": 9.562095425019622e-05, "loss": 0.0, "step": 548 }, { "epoch": 0.31932528718918135, "grad_norm": NaN, "learning_rate": 9.560167522152573e-05, "loss": 0.0, "step": 549 }, { "epoch": 0.319906936164025, "grad_norm": NaN, "learning_rate": 9.558235579969377e-05, "loss": 0.0, "step": 550 }, { "epoch": 0.3204885851388687, "grad_norm": NaN, "learning_rate": 9.55629960018131e-05, "loss": 0.0, "step": 551 }, { "epoch": 0.3210702341137124, "grad_norm": NaN, "learning_rate": 9.554359584503235e-05, "loss": 0.0, "step": 552 }, { "epoch": 0.3216518830885561, "grad_norm": NaN, "learning_rate": 9.552415534653578e-05, "loss": 0.0, "step": 553 }, { "epoch": 0.3222335320633997, "grad_norm": NaN, "learning_rate": 9.550467452354346e-05, "loss": 0.0, "step": 554 }, { "epoch": 0.3228151810382434, "grad_norm": NaN, "learning_rate": 9.54851533933112e-05, "loss": 0.0, "step": 555 }, { "epoch": 0.3233968300130871, "grad_norm": NaN, "learning_rate": 9.546559197313038e-05, "loss": 0.0, "step": 556 }, { "epoch": 0.3239784789879308, "grad_norm": NaN, "learning_rate": 9.544599028032824e-05, "loss": 0.0, "step": 557 }, { "epoch": 0.32456012796277445, "grad_norm": NaN, "learning_rate": 9.54263483322676e-05, "loss": 0.0, "step": 558 }, { "epoch": 0.32514177693761814, "grad_norm": NaN, "learning_rate": 9.540666614634695e-05, "loss": 0.0, "step": 559 }, { "epoch": 0.32572342591246184, "grad_norm": NaN, "learning_rate": 9.53869437400004e-05, "loss": 0.0, "step": 560 }, { "epoch": 0.32630507488730554, "grad_norm": NaN, "learning_rate": 9.536718113069773e-05, "loss": 0.0, "step": 561 }, { "epoch": 0.3268867238621492, "grad_norm": NaN, "learning_rate": 9.534737833594433e-05, "loss": 0.0, "step": 562 }, { "epoch": 0.32746837283699287, "grad_norm": NaN, "learning_rate": 9.532753537328113e-05, "loss": 0.0, "step": 563 }, { "epoch": 0.32805002181183657, "grad_norm": NaN, "learning_rate": 9.530765226028471e-05, "loss": 0.0, "step": 564 }, { "epoch": 0.32863167078668026, "grad_norm": NaN, "learning_rate": 9.528772901456715e-05, "loss": 0.0, "step": 565 }, { "epoch": 0.3292133197615239, "grad_norm": NaN, "learning_rate": 9.526776565377613e-05, "loss": 0.0, "step": 566 }, { "epoch": 0.3297949687363676, "grad_norm": NaN, "learning_rate": 9.524776219559484e-05, "loss": 0.0, "step": 567 }, { "epoch": 0.3303766177112113, "grad_norm": NaN, "learning_rate": 9.5227718657742e-05, "loss": 0.0, "step": 568 }, { "epoch": 0.330958266686055, "grad_norm": NaN, "learning_rate": 9.520763505797181e-05, "loss": 0.0, "step": 569 }, { "epoch": 0.33153991566089863, "grad_norm": NaN, "learning_rate": 9.518751141407397e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.33212156463574233, "grad_norm": NaN, "learning_rate": 9.516734774387366e-05, "loss": 0.0, "step": 571 }, { "epoch": 0.332703213610586, "grad_norm": NaN, "learning_rate": 9.51471440652315e-05, "loss": 0.0, "step": 572 }, { "epoch": 0.3332848625854297, "grad_norm": NaN, "learning_rate": 9.512690039604356e-05, "loss": 0.0, "step": 573 }, { "epoch": 0.33386651156027336, "grad_norm": NaN, "learning_rate": 9.510661675424129e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.33444816053511706, "grad_norm": NaN, "learning_rate": 9.508629315779163e-05, "loss": 0.0, "step": 575 }, { "epoch": 0.33502980950996075, "grad_norm": NaN, "learning_rate": 9.506592962469684e-05, "loss": 0.0, "step": 576 }, { "epoch": 0.33561145848480445, "grad_norm": NaN, "learning_rate": 9.504552617299457e-05, "loss": 0.0, "step": 577 }, { "epoch": 0.3361931074596481, "grad_norm": NaN, "learning_rate": 9.502508282075787e-05, "loss": 0.0, "step": 578 }, { "epoch": 0.3367747564344918, "grad_norm": NaN, "learning_rate": 9.500459958609509e-05, "loss": 0.0, "step": 579 }, { "epoch": 0.3373564054093355, "grad_norm": NaN, "learning_rate": 9.498407648714989e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.3379380543841791, "grad_norm": NaN, "learning_rate": 9.496351354210132e-05, "loss": 0.0, "step": 581 }, { "epoch": 0.3385197033590228, "grad_norm": NaN, "learning_rate": 9.494291076916362e-05, "loss": 0.0, "step": 582 }, { "epoch": 0.3391013523338665, "grad_norm": NaN, "learning_rate": 9.49222681865864e-05, "loss": 0.0, "step": 583 }, { "epoch": 0.3396830013087102, "grad_norm": NaN, "learning_rate": 9.49015858126545e-05, "loss": 0.0, "step": 584 }, { "epoch": 0.34026465028355385, "grad_norm": NaN, "learning_rate": 9.488086366568797e-05, "loss": 0.0, "step": 585 }, { "epoch": 0.34084629925839754, "grad_norm": NaN, "learning_rate": 9.486010176404215e-05, "loss": 0.0, "step": 586 }, { "epoch": 0.34142794823324124, "grad_norm": NaN, "learning_rate": 9.483930012610756e-05, "loss": 0.0, "step": 587 }, { "epoch": 0.34200959720808494, "grad_norm": NaN, "learning_rate": 9.481845877030993e-05, "loss": 0.0, "step": 588 }, { "epoch": 0.3425912461829286, "grad_norm": NaN, "learning_rate": 9.479757771511017e-05, "loss": 0.0, "step": 589 }, { "epoch": 0.3431728951577723, "grad_norm": NaN, "learning_rate": 9.477665697900434e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.34375454413261597, "grad_norm": NaN, "learning_rate": 9.475569658052366e-05, "loss": 0.0, "step": 591 }, { "epoch": 0.34433619310745966, "grad_norm": NaN, "learning_rate": 9.473469653823448e-05, "loss": 0.0, "step": 592 }, { "epoch": 0.3449178420823033, "grad_norm": NaN, "learning_rate": 9.471365687073828e-05, "loss": 0.0, "step": 593 }, { "epoch": 0.345499491057147, "grad_norm": NaN, "learning_rate": 9.46925775966716e-05, "loss": 0.0, "step": 594 }, { "epoch": 0.3460811400319907, "grad_norm": NaN, "learning_rate": 9.467145873470613e-05, "loss": 0.0, "step": 595 }, { "epoch": 0.3466627890068344, "grad_norm": NaN, "learning_rate": 9.465030030354857e-05, "loss": 0.0, "step": 596 }, { "epoch": 0.34724443798167803, "grad_norm": NaN, "learning_rate": 9.462910232194068e-05, "loss": 0.0, "step": 597 }, { "epoch": 0.34782608695652173, "grad_norm": NaN, "learning_rate": 9.460786480865925e-05, "loss": 0.0, "step": 598 }, { "epoch": 0.3484077359313654, "grad_norm": NaN, "learning_rate": 9.458658778251612e-05, "loss": 0.0, "step": 599 }, { "epoch": 0.3489893849062091, "grad_norm": NaN, "learning_rate": 9.456527126235808e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.34957103388105276, "grad_norm": NaN, "learning_rate": 9.454391526706696e-05, "loss": 0.0, "step": 601 }, { "epoch": 0.35015268285589646, "grad_norm": NaN, "learning_rate": 9.452251981555948e-05, "loss": 0.0, "step": 602 }, { "epoch": 0.35073433183074015, "grad_norm": NaN, "learning_rate": 9.45010849267874e-05, "loss": 0.0, "step": 603 }, { "epoch": 0.35131598080558385, "grad_norm": NaN, "learning_rate": 9.447961061973735e-05, "loss": 0.0, "step": 604 }, { "epoch": 0.3518976297804275, "grad_norm": NaN, "learning_rate": 9.445809691343088e-05, "loss": 0.0, "step": 605 }, { "epoch": 0.3524792787552712, "grad_norm": NaN, "learning_rate": 9.443654382692447e-05, "loss": 0.0, "step": 606 }, { "epoch": 0.3530609277301149, "grad_norm": NaN, "learning_rate": 9.441495137930946e-05, "loss": 0.0, "step": 607 }, { "epoch": 0.3536425767049586, "grad_norm": NaN, "learning_rate": 9.439331958971205e-05, "loss": 0.0, "step": 608 }, { "epoch": 0.3542242256798022, "grad_norm": NaN, "learning_rate": 9.437164847729332e-05, "loss": 0.0, "step": 609 }, { "epoch": 0.3548058746546459, "grad_norm": NaN, "learning_rate": 9.434993806124914e-05, "loss": 0.0, "step": 610 }, { "epoch": 0.3553875236294896, "grad_norm": NaN, "learning_rate": 9.432818836081021e-05, "loss": 0.0, "step": 611 }, { "epoch": 0.3559691726043333, "grad_norm": NaN, "learning_rate": 9.430639939524206e-05, "loss": 0.0, "step": 612 }, { "epoch": 0.35655082157917695, "grad_norm": NaN, "learning_rate": 9.428457118384495e-05, "loss": 0.0, "step": 613 }, { "epoch": 0.35713247055402064, "grad_norm": NaN, "learning_rate": 9.426270374595393e-05, "loss": 0.0, "step": 614 }, { "epoch": 0.35771411952886434, "grad_norm": NaN, "learning_rate": 9.42407971009388e-05, "loss": 0.0, "step": 615 }, { "epoch": 0.35829576850370803, "grad_norm": NaN, "learning_rate": 9.421885126820406e-05, "loss": 0.0, "step": 616 }, { "epoch": 0.3588774174785517, "grad_norm": NaN, "learning_rate": 9.419686626718897e-05, "loss": 0.0, "step": 617 }, { "epoch": 0.35945906645339537, "grad_norm": NaN, "learning_rate": 9.417484211736744e-05, "loss": 0.0, "step": 618 }, { "epoch": 0.36004071542823907, "grad_norm": NaN, "learning_rate": 9.415277883824807e-05, "loss": 0.0, "step": 619 }, { "epoch": 0.36062236440308276, "grad_norm": NaN, "learning_rate": 9.413067644937414e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.3612040133779264, "grad_norm": NaN, "learning_rate": 9.410853497032355e-05, "loss": 0.0, "step": 621 }, { "epoch": 0.3617856623527701, "grad_norm": NaN, "learning_rate": 9.408635442070883e-05, "loss": 0.0, "step": 622 }, { "epoch": 0.3623673113276138, "grad_norm": NaN, "learning_rate": 9.40641348201771e-05, "loss": 0.0, "step": 623 }, { "epoch": 0.3629489603024575, "grad_norm": NaN, "learning_rate": 9.404187618841015e-05, "loss": 0.0, "step": 624 }, { "epoch": 0.36353060927730113, "grad_norm": NaN, "learning_rate": 9.401957854512424e-05, "loss": 0.0, "step": 625 }, { "epoch": 0.3641122582521448, "grad_norm": NaN, "learning_rate": 9.399724191007022e-05, "loss": 0.0, "step": 626 }, { "epoch": 0.3646939072269885, "grad_norm": NaN, "learning_rate": 9.397486630303352e-05, "loss": 0.0, "step": 627 }, { "epoch": 0.3652755562018322, "grad_norm": NaN, "learning_rate": 9.395245174383404e-05, "loss": 0.0, "step": 628 }, { "epoch": 0.36585720517667586, "grad_norm": NaN, "learning_rate": 9.392999825232621e-05, "loss": 0.0, "step": 629 }, { "epoch": 0.36643885415151956, "grad_norm": NaN, "learning_rate": 9.390750584839893e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.36702050312636325, "grad_norm": NaN, "learning_rate": 9.388497455197557e-05, "loss": 0.0, "step": 631 }, { "epoch": 0.36760215210120695, "grad_norm": NaN, "learning_rate": 9.386240438301399e-05, "loss": 0.0, "step": 632 }, { "epoch": 0.3681838010760506, "grad_norm": NaN, "learning_rate": 9.383979536150643e-05, "loss": 0.0, "step": 633 }, { "epoch": 0.3687654500508943, "grad_norm": NaN, "learning_rate": 9.381714750747954e-05, "loss": 0.0, "step": 634 }, { "epoch": 0.369347099025738, "grad_norm": NaN, "learning_rate": 9.379446084099441e-05, "loss": 0.0, "step": 635 }, { "epoch": 0.3699287480005817, "grad_norm": NaN, "learning_rate": 9.37717353821465e-05, "loss": 0.0, "step": 636 }, { "epoch": 0.3705103969754253, "grad_norm": NaN, "learning_rate": 9.37489711510656e-05, "loss": 0.0, "step": 637 }, { "epoch": 0.371092045950269, "grad_norm": NaN, "learning_rate": 9.372616816791588e-05, "loss": 0.0, "step": 638 }, { "epoch": 0.3716736949251127, "grad_norm": NaN, "learning_rate": 9.370332645289581e-05, "loss": 0.0, "step": 639 }, { "epoch": 0.3722553438999564, "grad_norm": NaN, "learning_rate": 9.368044602623818e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.37283699287480004, "grad_norm": NaN, "learning_rate": 9.36575269082101e-05, "loss": 0.0, "step": 641 }, { "epoch": 0.37341864184964374, "grad_norm": NaN, "learning_rate": 9.363456911911288e-05, "loss": 0.0, "step": 642 }, { "epoch": 0.37400029082448744, "grad_norm": NaN, "learning_rate": 9.361157267928215e-05, "loss": 0.0, "step": 643 }, { "epoch": 0.3745819397993311, "grad_norm": NaN, "learning_rate": 9.358853760908774e-05, "loss": 0.0, "step": 644 }, { "epoch": 0.37516358877417477, "grad_norm": NaN, "learning_rate": 9.356546392893372e-05, "loss": 0.0, "step": 645 }, { "epoch": 0.37574523774901847, "grad_norm": NaN, "learning_rate": 9.354235165925833e-05, "loss": 0.0, "step": 646 }, { "epoch": 0.37632688672386216, "grad_norm": NaN, "learning_rate": 9.351920082053405e-05, "loss": 0.0, "step": 647 }, { "epoch": 0.3769085356987058, "grad_norm": NaN, "learning_rate": 9.349601143326745e-05, "loss": 0.0, "step": 648 }, { "epoch": 0.3774901846735495, "grad_norm": NaN, "learning_rate": 9.347278351799929e-05, "loss": 0.0, "step": 649 }, { "epoch": 0.3780718336483932, "grad_norm": NaN, "learning_rate": 9.344951709530445e-05, "loss": 0.0, "step": 650 }, { "epoch": 0.3786534826232369, "grad_norm": NaN, "learning_rate": 9.342621218579192e-05, "loss": 0.0, "step": 651 }, { "epoch": 0.37923513159808053, "grad_norm": NaN, "learning_rate": 9.340286881010478e-05, "loss": 0.0, "step": 652 }, { "epoch": 0.37981678057292423, "grad_norm": NaN, "learning_rate": 9.337948698892018e-05, "loss": 0.0, "step": 653 }, { "epoch": 0.3803984295477679, "grad_norm": NaN, "learning_rate": 9.335606674294932e-05, "loss": 0.0, "step": 654 }, { "epoch": 0.3809800785226116, "grad_norm": NaN, "learning_rate": 9.333260809293745e-05, "loss": 0.0, "step": 655 }, { "epoch": 0.38156172749745526, "grad_norm": NaN, "learning_rate": 9.330911105966382e-05, "loss": 0.0, "step": 656 }, { "epoch": 0.38214337647229896, "grad_norm": NaN, "learning_rate": 9.328557566394171e-05, "loss": 0.0, "step": 657 }, { "epoch": 0.38272502544714265, "grad_norm": NaN, "learning_rate": 9.326200192661833e-05, "loss": 0.0, "step": 658 }, { "epoch": 0.38330667442198635, "grad_norm": NaN, "learning_rate": 9.32383898685749e-05, "loss": 0.0, "step": 659 }, { "epoch": 0.38388832339683, "grad_norm": NaN, "learning_rate": 9.321473951072657e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.3844699723716737, "grad_norm": NaN, "learning_rate": 9.31910508740224e-05, "loss": 0.0, "step": 661 }, { "epoch": 0.3850516213465174, "grad_norm": NaN, "learning_rate": 9.316732397944536e-05, "loss": 0.0, "step": 662 }, { "epoch": 0.3856332703213611, "grad_norm": NaN, "learning_rate": 9.314355884801234e-05, "loss": 0.0, "step": 663 }, { "epoch": 0.3862149192962047, "grad_norm": NaN, "learning_rate": 9.311975550077406e-05, "loss": 0.0, "step": 664 }, { "epoch": 0.3867965682710484, "grad_norm": NaN, "learning_rate": 9.309591395881509e-05, "loss": 0.0, "step": 665 }, { "epoch": 0.3873782172458921, "grad_norm": NaN, "learning_rate": 9.307203424325387e-05, "loss": 0.0, "step": 666 }, { "epoch": 0.3879598662207358, "grad_norm": NaN, "learning_rate": 9.304811637524262e-05, "loss": 0.0, "step": 667 }, { "epoch": 0.38854151519557945, "grad_norm": NaN, "learning_rate": 9.30241603759674e-05, "loss": 0.0, "step": 668 }, { "epoch": 0.38912316417042314, "grad_norm": NaN, "learning_rate": 9.300016626664798e-05, "loss": 0.0, "step": 669 }, { "epoch": 0.38970481314526684, "grad_norm": NaN, "learning_rate": 9.297613406853791e-05, "loss": 0.0, "step": 670 }, { "epoch": 0.39028646212011053, "grad_norm": NaN, "learning_rate": 9.295206380292452e-05, "loss": 0.0, "step": 671 }, { "epoch": 0.3908681110949542, "grad_norm": NaN, "learning_rate": 9.292795549112879e-05, "loss": 0.0, "step": 672 }, { "epoch": 0.39144976006979787, "grad_norm": NaN, "learning_rate": 9.290380915450547e-05, "loss": 0.0, "step": 673 }, { "epoch": 0.39203140904464157, "grad_norm": NaN, "learning_rate": 9.287962481444297e-05, "loss": 0.0, "step": 674 }, { "epoch": 0.39261305801948526, "grad_norm": NaN, "learning_rate": 9.285540249236331e-05, "loss": 0.0, "step": 675 }, { "epoch": 0.3931947069943289, "grad_norm": NaN, "learning_rate": 9.283114220972224e-05, "loss": 0.0, "step": 676 }, { "epoch": 0.3937763559691726, "grad_norm": NaN, "learning_rate": 9.280684398800906e-05, "loss": 0.0, "step": 677 }, { "epoch": 0.3943580049440163, "grad_norm": NaN, "learning_rate": 9.278250784874672e-05, "loss": 0.0, "step": 678 }, { "epoch": 0.39493965391886, "grad_norm": NaN, "learning_rate": 9.275813381349175e-05, "loss": 0.0, "step": 679 }, { "epoch": 0.39552130289370363, "grad_norm": NaN, "learning_rate": 9.273372190383422e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.3961029518685473, "grad_norm": NaN, "learning_rate": 9.270927214139778e-05, "loss": 0.0, "step": 681 }, { "epoch": 0.396684600843391, "grad_norm": NaN, "learning_rate": 9.268478454783963e-05, "loss": 0.0, "step": 682 }, { "epoch": 0.3972662498182347, "grad_norm": NaN, "learning_rate": 9.26602591448504e-05, "loss": 0.0, "step": 683 }, { "epoch": 0.39784789879307836, "grad_norm": NaN, "learning_rate": 9.263569595415429e-05, "loss": 0.0, "step": 684 }, { "epoch": 0.39842954776792205, "grad_norm": NaN, "learning_rate": 9.261109499750895e-05, "loss": 0.0, "step": 685 }, { "epoch": 0.39901119674276575, "grad_norm": NaN, "learning_rate": 9.258645629670544e-05, "loss": 0.0, "step": 686 }, { "epoch": 0.39959284571760945, "grad_norm": NaN, "learning_rate": 9.256177987356832e-05, "loss": 0.0, "step": 687 }, { "epoch": 0.4001744946924531, "grad_norm": NaN, "learning_rate": 9.253706574995551e-05, "loss": 0.0, "step": 688 }, { "epoch": 0.4007561436672968, "grad_norm": NaN, "learning_rate": 9.251231394775837e-05, "loss": 0.0, "step": 689 }, { "epoch": 0.4013377926421405, "grad_norm": NaN, "learning_rate": 9.24875244889016e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.4019194416169842, "grad_norm": NaN, "learning_rate": 9.246269739534324e-05, "loss": 0.0, "step": 691 }, { "epoch": 0.4025010905918278, "grad_norm": NaN, "learning_rate": 9.243783268907471e-05, "loss": 0.0, "step": 692 }, { "epoch": 0.4030827395666715, "grad_norm": NaN, "learning_rate": 9.241293039212075e-05, "loss": 0.0, "step": 693 }, { "epoch": 0.4036643885415152, "grad_norm": NaN, "learning_rate": 9.238799052653937e-05, "loss": 0.0, "step": 694 }, { "epoch": 0.4042460375163589, "grad_norm": NaN, "learning_rate": 9.236301311442183e-05, "loss": 0.0, "step": 695 }, { "epoch": 0.40482768649120254, "grad_norm": NaN, "learning_rate": 9.233799817789272e-05, "loss": 0.0, "step": 696 }, { "epoch": 0.40540933546604624, "grad_norm": NaN, "learning_rate": 9.231294573910982e-05, "loss": 0.0, "step": 697 }, { "epoch": 0.40599098444088993, "grad_norm": NaN, "learning_rate": 9.228785582026414e-05, "loss": 0.0, "step": 698 }, { "epoch": 0.40657263341573363, "grad_norm": NaN, "learning_rate": 9.226272844357989e-05, "loss": 0.0, "step": 699 }, { "epoch": 0.40715428239057727, "grad_norm": NaN, "learning_rate": 9.223756363131444e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.40773593136542097, "grad_norm": NaN, "learning_rate": 9.221236140575835e-05, "loss": 0.0, "step": 701 }, { "epoch": 0.40831758034026466, "grad_norm": NaN, "learning_rate": 9.218712178923532e-05, "loss": 0.0, "step": 702 }, { "epoch": 0.40889922931510836, "grad_norm": NaN, "learning_rate": 9.216184480410214e-05, "loss": 0.0, "step": 703 }, { "epoch": 0.409480878289952, "grad_norm": NaN, "learning_rate": 9.213653047274872e-05, "loss": 0.0, "step": 704 }, { "epoch": 0.4100625272647957, "grad_norm": NaN, "learning_rate": 9.211117881759807e-05, "loss": 0.0, "step": 705 }, { "epoch": 0.4106441762396394, "grad_norm": NaN, "learning_rate": 9.208578986110623e-05, "loss": 0.0, "step": 706 }, { "epoch": 0.41122582521448303, "grad_norm": NaN, "learning_rate": 9.206036362576227e-05, "loss": 0.0, "step": 707 }, { "epoch": 0.4118074741893267, "grad_norm": NaN, "learning_rate": 9.203490013408833e-05, "loss": 0.0, "step": 708 }, { "epoch": 0.4123891231641704, "grad_norm": NaN, "learning_rate": 9.200939940863951e-05, "loss": 0.0, "step": 709 }, { "epoch": 0.4129707721390141, "grad_norm": NaN, "learning_rate": 9.198386147200391e-05, "loss": 0.0, "step": 710 }, { "epoch": 0.41355242111385776, "grad_norm": NaN, "learning_rate": 9.195828634680257e-05, "loss": 0.0, "step": 711 }, { "epoch": 0.41413407008870146, "grad_norm": NaN, "learning_rate": 9.193267405568949e-05, "loss": 0.0, "step": 712 }, { "epoch": 0.41471571906354515, "grad_norm": NaN, "learning_rate": 9.190702462135161e-05, "loss": 0.0, "step": 713 }, { "epoch": 0.41529736803838885, "grad_norm": NaN, "learning_rate": 9.188133806650872e-05, "loss": 0.0, "step": 714 }, { "epoch": 0.4158790170132325, "grad_norm": NaN, "learning_rate": 9.185561441391353e-05, "loss": 0.0, "step": 715 }, { "epoch": 0.4164606659880762, "grad_norm": NaN, "learning_rate": 9.18298536863516e-05, "loss": 0.0, "step": 716 }, { "epoch": 0.4170423149629199, "grad_norm": NaN, "learning_rate": 9.180405590664131e-05, "loss": 0.0, "step": 717 }, { "epoch": 0.4176239639377636, "grad_norm": NaN, "learning_rate": 9.177822109763392e-05, "loss": 0.0, "step": 718 }, { "epoch": 0.4182056129126072, "grad_norm": NaN, "learning_rate": 9.175234928221341e-05, "loss": 0.0, "step": 719 }, { "epoch": 0.4187872618874509, "grad_norm": NaN, "learning_rate": 9.172644048329662e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.4193689108622946, "grad_norm": NaN, "learning_rate": 9.170049472383305e-05, "loss": 0.0, "step": 721 }, { "epoch": 0.4199505598371383, "grad_norm": NaN, "learning_rate": 9.167451202680503e-05, "loss": 0.0, "step": 722 }, { "epoch": 0.42053220881198194, "grad_norm": NaN, "learning_rate": 9.16484924152276e-05, "loss": 0.0, "step": 723 }, { "epoch": 0.42111385778682564, "grad_norm": NaN, "learning_rate": 9.162243591214843e-05, "loss": 0.0, "step": 724 }, { "epoch": 0.42169550676166934, "grad_norm": NaN, "learning_rate": 9.159634254064795e-05, "loss": 0.0, "step": 725 }, { "epoch": 0.42227715573651303, "grad_norm": NaN, "learning_rate": 9.157021232383918e-05, "loss": 0.0, "step": 726 }, { "epoch": 0.4228588047113567, "grad_norm": NaN, "learning_rate": 9.15440452848678e-05, "loss": 0.0, "step": 727 }, { "epoch": 0.42344045368620037, "grad_norm": NaN, "learning_rate": 9.151784144691216e-05, "loss": 0.0, "step": 728 }, { "epoch": 0.42402210266104406, "grad_norm": NaN, "learning_rate": 9.149160083318311e-05, "loss": 0.0, "step": 729 }, { "epoch": 0.42460375163588776, "grad_norm": NaN, "learning_rate": 9.146532346692414e-05, "loss": 0.0, "step": 730 }, { "epoch": 0.4251854006107314, "grad_norm": NaN, "learning_rate": 9.143900937141128e-05, "loss": 0.0, "step": 731 }, { "epoch": 0.4257670495855751, "grad_norm": NaN, "learning_rate": 9.14126585699531e-05, "loss": 0.0, "step": 732 }, { "epoch": 0.4263486985604188, "grad_norm": NaN, "learning_rate": 9.138627108589067e-05, "loss": 0.0, "step": 733 }, { "epoch": 0.4269303475352625, "grad_norm": NaN, "learning_rate": 9.135984694259756e-05, "loss": 0.0, "step": 734 }, { "epoch": 0.42751199651010613, "grad_norm": NaN, "learning_rate": 9.13333861634798e-05, "loss": 0.0, "step": 735 }, { "epoch": 0.4280936454849498, "grad_norm": NaN, "learning_rate": 9.13068887719759e-05, "loss": 0.0, "step": 736 }, { "epoch": 0.4286752944597935, "grad_norm": NaN, "learning_rate": 9.128035479155679e-05, "loss": 0.0, "step": 737 }, { "epoch": 0.4292569434346372, "grad_norm": NaN, "learning_rate": 9.125378424572578e-05, "loss": 0.0, "step": 738 }, { "epoch": 0.42983859240948086, "grad_norm": NaN, "learning_rate": 9.122717715801859e-05, "loss": 0.0, "step": 739 }, { "epoch": 0.43042024138432455, "grad_norm": NaN, "learning_rate": 9.120053355200331e-05, "loss": 0.0, "step": 740 }, { "epoch": 0.43100189035916825, "grad_norm": NaN, "learning_rate": 9.11738534512804e-05, "loss": 0.0, "step": 741 }, { "epoch": 0.43158353933401195, "grad_norm": NaN, "learning_rate": 9.114713687948262e-05, "loss": 0.0, "step": 742 }, { "epoch": 0.4321651883088556, "grad_norm": NaN, "learning_rate": 9.112038386027502e-05, "loss": 0.0, "step": 743 }, { "epoch": 0.4327468372836993, "grad_norm": NaN, "learning_rate": 9.109359441735498e-05, "loss": 0.0, "step": 744 }, { "epoch": 0.433328486258543, "grad_norm": NaN, "learning_rate": 9.106676857445209e-05, "loss": 0.0, "step": 745 }, { "epoch": 0.4339101352333867, "grad_norm": NaN, "learning_rate": 9.103990635532823e-05, "loss": 0.0, "step": 746 }, { "epoch": 0.4344917842082303, "grad_norm": NaN, "learning_rate": 9.101300778377747e-05, "loss": 0.0, "step": 747 }, { "epoch": 0.435073433183074, "grad_norm": NaN, "learning_rate": 9.098607288362607e-05, "loss": 0.0, "step": 748 }, { "epoch": 0.4356550821579177, "grad_norm": NaN, "learning_rate": 9.095910167873254e-05, "loss": 0.0, "step": 749 }, { "epoch": 0.4362367311327614, "grad_norm": NaN, "learning_rate": 9.093209419298746e-05, "loss": 0.0, "step": 750 }, { "epoch": 0.43681838010760504, "grad_norm": NaN, "learning_rate": 9.090505045031362e-05, "loss": 0.0, "step": 751 }, { "epoch": 0.43740002908244874, "grad_norm": NaN, "learning_rate": 9.087797047466584e-05, "loss": 0.0, "step": 752 }, { "epoch": 0.43798167805729243, "grad_norm": NaN, "learning_rate": 9.085085429003113e-05, "loss": 0.0, "step": 753 }, { "epoch": 0.43856332703213613, "grad_norm": NaN, "learning_rate": 9.082370192042853e-05, "loss": 0.0, "step": 754 }, { "epoch": 0.43914497600697977, "grad_norm": NaN, "learning_rate": 9.079651338990909e-05, "loss": 0.0, "step": 755 }, { "epoch": 0.43972662498182347, "grad_norm": NaN, "learning_rate": 9.076928872255597e-05, "loss": 0.0, "step": 756 }, { "epoch": 0.44030827395666716, "grad_norm": NaN, "learning_rate": 9.07420279424843e-05, "loss": 0.0, "step": 757 }, { "epoch": 0.44088992293151086, "grad_norm": NaN, "learning_rate": 9.071473107384117e-05, "loss": 0.0, "step": 758 }, { "epoch": 0.4414715719063545, "grad_norm": NaN, "learning_rate": 9.068739814080569e-05, "loss": 0.0, "step": 759 }, { "epoch": 0.4420532208811982, "grad_norm": NaN, "learning_rate": 9.066002916758889e-05, "loss": 0.0, "step": 760 }, { "epoch": 0.4426348698560419, "grad_norm": NaN, "learning_rate": 9.063262417843372e-05, "loss": 0.0, "step": 761 }, { "epoch": 0.4432165188308856, "grad_norm": NaN, "learning_rate": 9.060518319761504e-05, "loss": 0.0, "step": 762 }, { "epoch": 0.4437981678057292, "grad_norm": NaN, "learning_rate": 9.057770624943958e-05, "loss": 0.0, "step": 763 }, { "epoch": 0.4443798167805729, "grad_norm": NaN, "learning_rate": 9.055019335824595e-05, "loss": 0.0, "step": 764 }, { "epoch": 0.4449614657554166, "grad_norm": NaN, "learning_rate": 9.052264454840458e-05, "loss": 0.0, "step": 765 }, { "epoch": 0.4455431147302603, "grad_norm": NaN, "learning_rate": 9.049505984431771e-05, "loss": 0.0, "step": 766 }, { "epoch": 0.44612476370510395, "grad_norm": NaN, "learning_rate": 9.04674392704194e-05, "loss": 0.0, "step": 767 }, { "epoch": 0.44670641267994765, "grad_norm": NaN, "learning_rate": 9.043978285117547e-05, "loss": 0.0, "step": 768 }, { "epoch": 0.44728806165479135, "grad_norm": NaN, "learning_rate": 9.041209061108347e-05, "loss": 0.0, "step": 769 }, { "epoch": 0.44786971062963504, "grad_norm": NaN, "learning_rate": 9.038436257467271e-05, "loss": 0.0, "step": 770 }, { "epoch": 0.4484513596044787, "grad_norm": NaN, "learning_rate": 9.035659876650419e-05, "loss": 0.0, "step": 771 }, { "epoch": 0.4490330085793224, "grad_norm": NaN, "learning_rate": 9.032879921117064e-05, "loss": 0.0, "step": 772 }, { "epoch": 0.4496146575541661, "grad_norm": NaN, "learning_rate": 9.030096393329637e-05, "loss": 0.0, "step": 773 }, { "epoch": 0.4501963065290097, "grad_norm": NaN, "learning_rate": 9.027309295753739e-05, "loss": 0.0, "step": 774 }, { "epoch": 0.4507779555038534, "grad_norm": NaN, "learning_rate": 9.024518630858134e-05, "loss": 0.0, "step": 775 }, { "epoch": 0.4513596044786971, "grad_norm": NaN, "learning_rate": 9.021724401114742e-05, "loss": 0.0, "step": 776 }, { "epoch": 0.4519412534535408, "grad_norm": NaN, "learning_rate": 9.018926608998643e-05, "loss": 0.0, "step": 777 }, { "epoch": 0.45252290242838444, "grad_norm": NaN, "learning_rate": 9.016125256988073e-05, "loss": 0.0, "step": 778 }, { "epoch": 0.45310455140322814, "grad_norm": NaN, "learning_rate": 9.013320347564418e-05, "loss": 0.0, "step": 779 }, { "epoch": 0.45368620037807184, "grad_norm": NaN, "learning_rate": 9.010511883212221e-05, "loss": 0.0, "step": 780 }, { "epoch": 0.45426784935291553, "grad_norm": NaN, "learning_rate": 9.007699866419166e-05, "loss": 0.0, "step": 781 }, { "epoch": 0.45484949832775917, "grad_norm": NaN, "learning_rate": 9.00488429967609e-05, "loss": 0.0, "step": 782 }, { "epoch": 0.45543114730260287, "grad_norm": NaN, "learning_rate": 9.002065185476973e-05, "loss": 0.0, "step": 783 }, { "epoch": 0.45601279627744656, "grad_norm": NaN, "learning_rate": 8.999242526318938e-05, "loss": 0.0, "step": 784 }, { "epoch": 0.45659444525229026, "grad_norm": NaN, "learning_rate": 8.996416324702243e-05, "loss": 0.0, "step": 785 }, { "epoch": 0.4571760942271339, "grad_norm": NaN, "learning_rate": 8.993586583130292e-05, "loss": 0.0, "step": 786 }, { "epoch": 0.4577577432019776, "grad_norm": NaN, "learning_rate": 8.990753304109618e-05, "loss": 0.0, "step": 787 }, { "epoch": 0.4583393921768213, "grad_norm": NaN, "learning_rate": 8.987916490149888e-05, "loss": 0.0, "step": 788 }, { "epoch": 0.458921041151665, "grad_norm": NaN, "learning_rate": 8.985076143763904e-05, "loss": 0.0, "step": 789 }, { "epoch": 0.45950269012650863, "grad_norm": NaN, "learning_rate": 8.982232267467597e-05, "loss": 0.0, "step": 790 }, { "epoch": 0.4600843391013523, "grad_norm": NaN, "learning_rate": 8.979384863780018e-05, "loss": 0.0, "step": 791 }, { "epoch": 0.460665988076196, "grad_norm": NaN, "learning_rate": 8.97653393522335e-05, "loss": 0.0, "step": 792 }, { "epoch": 0.4612476370510397, "grad_norm": NaN, "learning_rate": 8.973679484322894e-05, "loss": 0.0, "step": 793 }, { "epoch": 0.46182928602588336, "grad_norm": NaN, "learning_rate": 8.970821513607073e-05, "loss": 0.0, "step": 794 }, { "epoch": 0.46241093500072705, "grad_norm": NaN, "learning_rate": 8.967960025607426e-05, "loss": 0.0, "step": 795 }, { "epoch": 0.46299258397557075, "grad_norm": NaN, "learning_rate": 8.965095022858612e-05, "loss": 0.0, "step": 796 }, { "epoch": 0.46357423295041444, "grad_norm": NaN, "learning_rate": 8.962226507898397e-05, "loss": 0.0, "step": 797 }, { "epoch": 0.4641558819252581, "grad_norm": NaN, "learning_rate": 8.959354483267662e-05, "loss": 0.0, "step": 798 }, { "epoch": 0.4647375309001018, "grad_norm": NaN, "learning_rate": 8.956478951510397e-05, "loss": 0.0, "step": 799 }, { "epoch": 0.4653191798749455, "grad_norm": NaN, "learning_rate": 8.953599915173694e-05, "loss": 0.0, "step": 800 }, { "epoch": 0.4659008288497892, "grad_norm": NaN, "learning_rate": 8.950717376807757e-05, "loss": 0.0, "step": 801 }, { "epoch": 0.4664824778246328, "grad_norm": NaN, "learning_rate": 8.947831338965884e-05, "loss": 0.0, "step": 802 }, { "epoch": 0.4670641267994765, "grad_norm": NaN, "learning_rate": 8.94494180420448e-05, "loss": 0.0, "step": 803 }, { "epoch": 0.4676457757743202, "grad_norm": NaN, "learning_rate": 8.942048775083041e-05, "loss": 0.0, "step": 804 }, { "epoch": 0.4682274247491639, "grad_norm": NaN, "learning_rate": 8.939152254164164e-05, "loss": 0.0, "step": 805 }, { "epoch": 0.46880907372400754, "grad_norm": NaN, "learning_rate": 8.936252244013535e-05, "loss": 0.0, "step": 806 }, { "epoch": 0.46939072269885124, "grad_norm": NaN, "learning_rate": 8.93334874719993e-05, "loss": 0.0, "step": 807 }, { "epoch": 0.46997237167369493, "grad_norm": NaN, "learning_rate": 8.93044176629522e-05, "loss": 0.0, "step": 808 }, { "epoch": 0.47055402064853863, "grad_norm": NaN, "learning_rate": 8.927531303874352e-05, "loss": 0.0, "step": 809 }, { "epoch": 0.47113566962338227, "grad_norm": NaN, "learning_rate": 8.924617362515366e-05, "loss": 0.0, "step": 810 }, { "epoch": 0.47171731859822597, "grad_norm": NaN, "learning_rate": 8.921699944799379e-05, "loss": 0.0, "step": 811 }, { "epoch": 0.47229896757306966, "grad_norm": NaN, "learning_rate": 8.918779053310588e-05, "loss": 0.0, "step": 812 }, { "epoch": 0.47288061654791336, "grad_norm": NaN, "learning_rate": 8.915854690636269e-05, "loss": 0.0, "step": 813 }, { "epoch": 0.473462265522757, "grad_norm": NaN, "learning_rate": 8.912926859366769e-05, "loss": 0.0, "step": 814 }, { "epoch": 0.4740439144976007, "grad_norm": NaN, "learning_rate": 8.909995562095511e-05, "loss": 0.0, "step": 815 }, { "epoch": 0.4746255634724444, "grad_norm": NaN, "learning_rate": 8.907060801418984e-05, "loss": 0.0, "step": 816 }, { "epoch": 0.4752072124472881, "grad_norm": NaN, "learning_rate": 8.904122579936752e-05, "loss": 0.0, "step": 817 }, { "epoch": 0.4757888614221317, "grad_norm": NaN, "learning_rate": 8.901180900251433e-05, "loss": 0.0, "step": 818 }, { "epoch": 0.4763705103969754, "grad_norm": NaN, "learning_rate": 8.898235764968722e-05, "loss": 0.0, "step": 819 }, { "epoch": 0.4769521593718191, "grad_norm": NaN, "learning_rate": 8.895287176697364e-05, "loss": 0.0, "step": 820 }, { "epoch": 0.4775338083466628, "grad_norm": NaN, "learning_rate": 8.892335138049168e-05, "loss": 0.0, "step": 821 }, { "epoch": 0.47811545732150645, "grad_norm": NaN, "learning_rate": 8.889379651638997e-05, "loss": 0.0, "step": 822 }, { "epoch": 0.47869710629635015, "grad_norm": NaN, "learning_rate": 8.886420720084769e-05, "loss": 0.0, "step": 823 }, { "epoch": 0.47927875527119385, "grad_norm": NaN, "learning_rate": 8.883458346007456e-05, "loss": 0.0, "step": 824 }, { "epoch": 0.47986040424603754, "grad_norm": NaN, "learning_rate": 8.880492532031073e-05, "loss": 0.0, "step": 825 }, { "epoch": 0.4804420532208812, "grad_norm": NaN, "learning_rate": 8.877523280782688e-05, "loss": 0.0, "step": 826 }, { "epoch": 0.4810237021957249, "grad_norm": NaN, "learning_rate": 8.87455059489241e-05, "loss": 0.0, "step": 827 }, { "epoch": 0.4816053511705686, "grad_norm": NaN, "learning_rate": 8.871574476993394e-05, "loss": 0.0, "step": 828 }, { "epoch": 0.48218700014541227, "grad_norm": NaN, "learning_rate": 8.868594929721832e-05, "loss": 0.0, "step": 829 }, { "epoch": 0.4827686491202559, "grad_norm": NaN, "learning_rate": 8.865611955716954e-05, "loss": 0.0, "step": 830 }, { "epoch": 0.4833502980950996, "grad_norm": NaN, "learning_rate": 8.862625557621028e-05, "loss": 0.0, "step": 831 }, { "epoch": 0.4839319470699433, "grad_norm": NaN, "learning_rate": 8.859635738079349e-05, "loss": 0.0, "step": 832 }, { "epoch": 0.484513596044787, "grad_norm": NaN, "learning_rate": 8.856642499740248e-05, "loss": 0.0, "step": 833 }, { "epoch": 0.48509524501963064, "grad_norm": NaN, "learning_rate": 8.853645845255085e-05, "loss": 0.0, "step": 834 }, { "epoch": 0.48567689399447433, "grad_norm": NaN, "learning_rate": 8.850645777278242e-05, "loss": 0.0, "step": 835 }, { "epoch": 0.48625854296931803, "grad_norm": NaN, "learning_rate": 8.847642298467125e-05, "loss": 0.0, "step": 836 }, { "epoch": 0.48684019194416167, "grad_norm": NaN, "learning_rate": 8.844635411482167e-05, "loss": 0.0, "step": 837 }, { "epoch": 0.48742184091900537, "grad_norm": NaN, "learning_rate": 8.841625118986812e-05, "loss": 0.0, "step": 838 }, { "epoch": 0.48800348989384906, "grad_norm": NaN, "learning_rate": 8.838611423647525e-05, "loss": 0.0, "step": 839 }, { "epoch": 0.48858513886869276, "grad_norm": NaN, "learning_rate": 8.835594328133785e-05, "loss": 0.0, "step": 840 }, { "epoch": 0.4891667878435364, "grad_norm": NaN, "learning_rate": 8.83257383511808e-05, "loss": 0.0, "step": 841 }, { "epoch": 0.4897484368183801, "grad_norm": NaN, "learning_rate": 8.829549947275913e-05, "loss": 0.0, "step": 842 }, { "epoch": 0.4903300857932238, "grad_norm": NaN, "learning_rate": 8.826522667285788e-05, "loss": 0.0, "step": 843 }, { "epoch": 0.4909117347680675, "grad_norm": NaN, "learning_rate": 8.823491997829216e-05, "loss": 0.0, "step": 844 }, { "epoch": 0.4914933837429111, "grad_norm": NaN, "learning_rate": 8.820457941590714e-05, "loss": 0.0, "step": 845 }, { "epoch": 0.4920750327177548, "grad_norm": NaN, "learning_rate": 8.817420501257792e-05, "loss": 0.0, "step": 846 }, { "epoch": 0.4926566816925985, "grad_norm": NaN, "learning_rate": 8.814379679520962e-05, "loss": 0.0, "step": 847 }, { "epoch": 0.4932383306674422, "grad_norm": NaN, "learning_rate": 8.811335479073732e-05, "loss": 0.0, "step": 848 }, { "epoch": 0.49381997964228586, "grad_norm": NaN, "learning_rate": 8.808287902612599e-05, "loss": 0.0, "step": 849 }, { "epoch": 0.49440162861712955, "grad_norm": NaN, "learning_rate": 8.805236952837055e-05, "loss": 0.0, "step": 850 }, { "epoch": 0.49498327759197325, "grad_norm": NaN, "learning_rate": 8.802182632449576e-05, "loss": 0.0, "step": 851 }, { "epoch": 0.49556492656681694, "grad_norm": NaN, "learning_rate": 8.799124944155626e-05, "loss": 0.0, "step": 852 }, { "epoch": 0.4961465755416606, "grad_norm": NaN, "learning_rate": 8.79606389066365e-05, "loss": 0.0, "step": 853 }, { "epoch": 0.4967282245165043, "grad_norm": NaN, "learning_rate": 8.792999474685077e-05, "loss": 0.0, "step": 854 }, { "epoch": 0.497309873491348, "grad_norm": NaN, "learning_rate": 8.789931698934313e-05, "loss": 0.0, "step": 855 }, { "epoch": 0.49789152246619167, "grad_norm": NaN, "learning_rate": 8.786860566128738e-05, "loss": 0.0, "step": 856 }, { "epoch": 0.4984731714410353, "grad_norm": NaN, "learning_rate": 8.783786078988709e-05, "loss": 0.0, "step": 857 }, { "epoch": 0.499054820415879, "grad_norm": NaN, "learning_rate": 8.780708240237551e-05, "loss": 0.0, "step": 858 }, { "epoch": 0.4996364693907227, "grad_norm": NaN, "learning_rate": 8.777627052601562e-05, "loss": 0.0, "step": 859 }, { "epoch": 0.5002181183655664, "grad_norm": NaN, "learning_rate": 8.774542518810001e-05, "loss": 0.0, "step": 860 }, { "epoch": 0.5007997673404101, "grad_norm": NaN, "learning_rate": 8.771454641595096e-05, "loss": 0.0, "step": 861 }, { "epoch": 0.5013814163152538, "grad_norm": NaN, "learning_rate": 8.768363423692031e-05, "loss": 0.0, "step": 862 }, { "epoch": 0.5019630652900974, "grad_norm": NaN, "learning_rate": 8.765268867838958e-05, "loss": 0.0, "step": 863 }, { "epoch": 0.5025447142649411, "grad_norm": NaN, "learning_rate": 8.762170976776975e-05, "loss": 0.0, "step": 864 }, { "epoch": 0.5031263632397848, "grad_norm": NaN, "learning_rate": 8.759069753250142e-05, "loss": 0.0, "step": 865 }, { "epoch": 0.5037080122146285, "grad_norm": NaN, "learning_rate": 8.755965200005466e-05, "loss": 0.0, "step": 866 }, { "epoch": 0.5042896611894722, "grad_norm": NaN, "learning_rate": 8.752857319792907e-05, "loss": 0.0, "step": 867 }, { "epoch": 0.5048713101643159, "grad_norm": NaN, "learning_rate": 8.749746115365371e-05, "loss": 0.0, "step": 868 }, { "epoch": 0.5054529591391596, "grad_norm": NaN, "learning_rate": 8.746631589478709e-05, "loss": 0.0, "step": 869 }, { "epoch": 0.5060346081140032, "grad_norm": NaN, "learning_rate": 8.743513744891711e-05, "loss": 0.0, "step": 870 }, { "epoch": 0.5066162570888468, "grad_norm": NaN, "learning_rate": 8.74039258436611e-05, "loss": 0.0, "step": 871 }, { "epoch": 0.5071979060636905, "grad_norm": NaN, "learning_rate": 8.737268110666575e-05, "loss": 0.0, "step": 872 }, { "epoch": 0.5077795550385342, "grad_norm": NaN, "learning_rate": 8.734140326560711e-05, "loss": 0.0, "step": 873 }, { "epoch": 0.5083612040133779, "grad_norm": NaN, "learning_rate": 8.731009234819051e-05, "loss": 0.0, "step": 874 }, { "epoch": 0.5089428529882216, "grad_norm": NaN, "learning_rate": 8.727874838215063e-05, "loss": 0.0, "step": 875 }, { "epoch": 0.5095245019630653, "grad_norm": NaN, "learning_rate": 8.724737139525141e-05, "loss": 0.0, "step": 876 }, { "epoch": 0.510106150937909, "grad_norm": NaN, "learning_rate": 8.721596141528603e-05, "loss": 0.0, "step": 877 }, { "epoch": 0.5106877999127527, "grad_norm": NaN, "learning_rate": 8.718451847007689e-05, "loss": 0.0, "step": 878 }, { "epoch": 0.5112694488875963, "grad_norm": NaN, "learning_rate": 8.715304258747559e-05, "loss": 0.0, "step": 879 }, { "epoch": 0.51185109786244, "grad_norm": NaN, "learning_rate": 8.712153379536293e-05, "loss": 0.0, "step": 880 }, { "epoch": 0.5124327468372837, "grad_norm": NaN, "learning_rate": 8.708999212164884e-05, "loss": 0.0, "step": 881 }, { "epoch": 0.5130143958121274, "grad_norm": NaN, "learning_rate": 8.705841759427237e-05, "loss": 0.0, "step": 882 }, { "epoch": 0.5135960447869711, "grad_norm": NaN, "learning_rate": 8.702681024120169e-05, "loss": 0.0, "step": 883 }, { "epoch": 0.5141776937618148, "grad_norm": NaN, "learning_rate": 8.699517009043404e-05, "loss": 0.0, "step": 884 }, { "epoch": 0.5147593427366585, "grad_norm": NaN, "learning_rate": 8.696349716999568e-05, "loss": 0.0, "step": 885 }, { "epoch": 0.5153409917115022, "grad_norm": NaN, "learning_rate": 8.693179150794198e-05, "loss": 0.0, "step": 886 }, { "epoch": 0.5159226406863457, "grad_norm": NaN, "learning_rate": 8.690005313235721e-05, "loss": 0.0, "step": 887 }, { "epoch": 0.5165042896611894, "grad_norm": NaN, "learning_rate": 8.686828207135469e-05, "loss": 0.0, "step": 888 }, { "epoch": 0.5170859386360331, "grad_norm": NaN, "learning_rate": 8.683647835307664e-05, "loss": 0.0, "step": 889 }, { "epoch": 0.5176675876108768, "grad_norm": NaN, "learning_rate": 8.680464200569427e-05, "loss": 0.0, "step": 890 }, { "epoch": 0.5182492365857205, "grad_norm": NaN, "learning_rate": 8.677277305740763e-05, "loss": 0.0, "step": 891 }, { "epoch": 0.5188308855605642, "grad_norm": NaN, "learning_rate": 8.674087153644569e-05, "loss": 0.0, "step": 892 }, { "epoch": 0.5194125345354079, "grad_norm": NaN, "learning_rate": 8.670893747106624e-05, "loss": 0.0, "step": 893 }, { "epoch": 0.5199941835102516, "grad_norm": NaN, "learning_rate": 8.667697088955593e-05, "loss": 0.0, "step": 894 }, { "epoch": 0.5205758324850952, "grad_norm": NaN, "learning_rate": 8.664497182023017e-05, "loss": 0.0, "step": 895 }, { "epoch": 0.5211574814599389, "grad_norm": NaN, "learning_rate": 8.66129402914332e-05, "loss": 0.0, "step": 896 }, { "epoch": 0.5217391304347826, "grad_norm": NaN, "learning_rate": 8.658087633153798e-05, "loss": 0.0, "step": 897 }, { "epoch": 0.5223207794096263, "grad_norm": NaN, "learning_rate": 8.654877996894619e-05, "loss": 0.0, "step": 898 }, { "epoch": 0.52290242838447, "grad_norm": NaN, "learning_rate": 8.65166512320882e-05, "loss": 0.0, "step": 899 }, { "epoch": 0.5234840773593137, "grad_norm": NaN, "learning_rate": 8.648449014942315e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.5240657263341574, "grad_norm": NaN, "learning_rate": 8.64522967494387e-05, "loss": 0.0, "step": 901 }, { "epoch": 0.524647375309001, "grad_norm": NaN, "learning_rate": 8.642007106065121e-05, "loss": 0.0, "step": 902 }, { "epoch": 0.5252290242838447, "grad_norm": NaN, "learning_rate": 8.638781311160565e-05, "loss": 0.0, "step": 903 }, { "epoch": 0.5258106732586884, "grad_norm": NaN, "learning_rate": 8.63555229308755e-05, "loss": 0.0, "step": 904 }, { "epoch": 0.526392322233532, "grad_norm": NaN, "learning_rate": 8.632320054706289e-05, "loss": 0.0, "step": 905 }, { "epoch": 0.5269739712083757, "grad_norm": NaN, "learning_rate": 8.629084598879837e-05, "loss": 0.0, "step": 906 }, { "epoch": 0.5275556201832194, "grad_norm": NaN, "learning_rate": 8.625845928474107e-05, "loss": 0.0, "step": 907 }, { "epoch": 0.5281372691580631, "grad_norm": NaN, "learning_rate": 8.622604046357856e-05, "loss": 0.0, "step": 908 }, { "epoch": 0.5287189181329068, "grad_norm": NaN, "learning_rate": 8.619358955402685e-05, "loss": 0.0, "step": 909 }, { "epoch": 0.5293005671077504, "grad_norm": NaN, "learning_rate": 8.616110658483038e-05, "loss": 0.0, "step": 910 }, { "epoch": 0.5298822160825941, "grad_norm": NaN, "learning_rate": 8.6128591584762e-05, "loss": 0.0, "step": 911 }, { "epoch": 0.5304638650574378, "grad_norm": NaN, "learning_rate": 8.609604458262292e-05, "loss": 0.0, "step": 912 }, { "epoch": 0.5310455140322815, "grad_norm": NaN, "learning_rate": 8.606346560724272e-05, "loss": 0.0, "step": 913 }, { "epoch": 0.5316271630071252, "grad_norm": NaN, "learning_rate": 8.603085468747925e-05, "loss": 0.0, "step": 914 }, { "epoch": 0.5322088119819689, "grad_norm": NaN, "learning_rate": 8.599821185221872e-05, "loss": 0.0, "step": 915 }, { "epoch": 0.5327904609568126, "grad_norm": NaN, "learning_rate": 8.596553713037559e-05, "loss": 0.0, "step": 916 }, { "epoch": 0.5333721099316563, "grad_norm": NaN, "learning_rate": 8.59328305508925e-05, "loss": 0.0, "step": 917 }, { "epoch": 0.5339537589064999, "grad_norm": NaN, "learning_rate": 8.59000921427404e-05, "loss": 0.0, "step": 918 }, { "epoch": 0.5345354078813436, "grad_norm": NaN, "learning_rate": 8.58673219349184e-05, "loss": 0.0, "step": 919 }, { "epoch": 0.5351170568561873, "grad_norm": NaN, "learning_rate": 8.583451995645375e-05, "loss": 0.0, "step": 920 }, { "epoch": 0.535698705831031, "grad_norm": NaN, "learning_rate": 8.58016862364019e-05, "loss": 0.0, "step": 921 }, { "epoch": 0.5362803548058747, "grad_norm": NaN, "learning_rate": 8.576882080384634e-05, "loss": 0.0, "step": 922 }, { "epoch": 0.5368620037807184, "grad_norm": NaN, "learning_rate": 8.573592368789873e-05, "loss": 0.0, "step": 923 }, { "epoch": 0.537443652755562, "grad_norm": NaN, "learning_rate": 8.570299491769874e-05, "loss": 0.0, "step": 924 }, { "epoch": 0.5380253017304057, "grad_norm": NaN, "learning_rate": 8.567003452241407e-05, "loss": 0.0, "step": 925 }, { "epoch": 0.5386069507052493, "grad_norm": NaN, "learning_rate": 8.563704253124049e-05, "loss": 0.0, "step": 926 }, { "epoch": 0.539188599680093, "grad_norm": NaN, "learning_rate": 8.560401897340171e-05, "loss": 0.0, "step": 927 }, { "epoch": 0.5397702486549367, "grad_norm": NaN, "learning_rate": 8.557096387814945e-05, "loss": 0.0, "step": 928 }, { "epoch": 0.5403518976297804, "grad_norm": NaN, "learning_rate": 8.553787727476328e-05, "loss": 0.0, "step": 929 }, { "epoch": 0.5409335466046241, "grad_norm": NaN, "learning_rate": 8.550475919255077e-05, "loss": 0.0, "step": 930 }, { "epoch": 0.5415151955794678, "grad_norm": NaN, "learning_rate": 8.54716096608473e-05, "loss": 0.0, "step": 931 }, { "epoch": 0.5420968445543115, "grad_norm": NaN, "learning_rate": 8.54384287090162e-05, "loss": 0.0, "step": 932 }, { "epoch": 0.5426784935291552, "grad_norm": NaN, "learning_rate": 8.540521636644852e-05, "loss": 0.0, "step": 933 }, { "epoch": 0.5432601425039988, "grad_norm": NaN, "learning_rate": 8.537197266256316e-05, "loss": 0.0, "step": 934 }, { "epoch": 0.5438417914788425, "grad_norm": NaN, "learning_rate": 8.533869762680686e-05, "loss": 0.0, "step": 935 }, { "epoch": 0.5444234404536862, "grad_norm": NaN, "learning_rate": 8.530539128865402e-05, "loss": 0.0, "step": 936 }, { "epoch": 0.5450050894285299, "grad_norm": NaN, "learning_rate": 8.527205367760683e-05, "loss": 0.0, "step": 937 }, { "epoch": 0.5455867384033736, "grad_norm": NaN, "learning_rate": 8.523868482319515e-05, "loss": 0.0, "step": 938 }, { "epoch": 0.5461683873782173, "grad_norm": NaN, "learning_rate": 8.520528475497654e-05, "loss": 0.0, "step": 939 }, { "epoch": 0.546750036353061, "grad_norm": NaN, "learning_rate": 8.517185350253615e-05, "loss": 0.0, "step": 940 }, { "epoch": 0.5473316853279047, "grad_norm": NaN, "learning_rate": 8.513839109548683e-05, "loss": 0.0, "step": 941 }, { "epoch": 0.5479133343027482, "grad_norm": NaN, "learning_rate": 8.510489756346899e-05, "loss": 0.0, "step": 942 }, { "epoch": 0.5484949832775919, "grad_norm": NaN, "learning_rate": 8.50713729361506e-05, "loss": 0.0, "step": 943 }, { "epoch": 0.5490766322524356, "grad_norm": NaN, "learning_rate": 8.503781724322721e-05, "loss": 0.0, "step": 944 }, { "epoch": 0.5496582812272793, "grad_norm": NaN, "learning_rate": 8.500423051442183e-05, "loss": 0.0, "step": 945 }, { "epoch": 0.550239930202123, "grad_norm": NaN, "learning_rate": 8.4970612779485e-05, "loss": 0.0, "step": 946 }, { "epoch": 0.5508215791769667, "grad_norm": NaN, "learning_rate": 8.493696406819473e-05, "loss": 0.0, "step": 947 }, { "epoch": 0.5514032281518104, "grad_norm": NaN, "learning_rate": 8.490328441035646e-05, "loss": 0.0, "step": 948 }, { "epoch": 0.5519848771266541, "grad_norm": NaN, "learning_rate": 8.4869573835803e-05, "loss": 0.0, "step": 949 }, { "epoch": 0.5525665261014977, "grad_norm": NaN, "learning_rate": 8.483583237439465e-05, "loss": 0.0, "step": 950 }, { "epoch": 0.5531481750763414, "grad_norm": NaN, "learning_rate": 8.480206005601893e-05, "loss": 0.0, "step": 951 }, { "epoch": 0.5537298240511851, "grad_norm": NaN, "learning_rate": 8.476825691059082e-05, "loss": 0.0, "step": 952 }, { "epoch": 0.5543114730260288, "grad_norm": NaN, "learning_rate": 8.473442296805254e-05, "loss": 0.0, "step": 953 }, { "epoch": 0.5548931220008725, "grad_norm": NaN, "learning_rate": 8.47005582583736e-05, "loss": 0.0, "step": 954 }, { "epoch": 0.5554747709757162, "grad_norm": NaN, "learning_rate": 8.466666281155074e-05, "loss": 0.0, "step": 955 }, { "epoch": 0.5560564199505599, "grad_norm": NaN, "learning_rate": 8.463273665760796e-05, "loss": 0.0, "step": 956 }, { "epoch": 0.5566380689254036, "grad_norm": NaN, "learning_rate": 8.459877982659648e-05, "loss": 0.0, "step": 957 }, { "epoch": 0.5572197179002472, "grad_norm": NaN, "learning_rate": 8.456479234859463e-05, "loss": 0.0, "step": 958 }, { "epoch": 0.5578013668750909, "grad_norm": NaN, "learning_rate": 8.453077425370792e-05, "loss": 0.0, "step": 959 }, { "epoch": 0.5583830158499345, "grad_norm": NaN, "learning_rate": 8.4496725572069e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.5589646648247782, "grad_norm": NaN, "learning_rate": 8.44626463338376e-05, "loss": 0.0, "step": 961 }, { "epoch": 0.5595463137996219, "grad_norm": NaN, "learning_rate": 8.442853656920046e-05, "loss": 0.0, "step": 962 }, { "epoch": 0.5601279627744656, "grad_norm": NaN, "learning_rate": 8.439439630837147e-05, "loss": 0.0, "step": 963 }, { "epoch": 0.5607096117493093, "grad_norm": NaN, "learning_rate": 8.436022558159145e-05, "loss": 0.0, "step": 964 }, { "epoch": 0.5612912607241529, "grad_norm": NaN, "learning_rate": 8.43260244191282e-05, "loss": 0.0, "step": 965 }, { "epoch": 0.5618729096989966, "grad_norm": NaN, "learning_rate": 8.429179285127656e-05, "loss": 0.0, "step": 966 }, { "epoch": 0.5624545586738403, "grad_norm": NaN, "learning_rate": 8.42575309083582e-05, "loss": 0.0, "step": 967 }, { "epoch": 0.563036207648684, "grad_norm": NaN, "learning_rate": 8.422323862072177e-05, "loss": 0.0, "step": 968 }, { "epoch": 0.5636178566235277, "grad_norm": NaN, "learning_rate": 8.418891601874278e-05, "loss": 0.0, "step": 969 }, { "epoch": 0.5641995055983714, "grad_norm": NaN, "learning_rate": 8.415456313282357e-05, "loss": 0.0, "step": 970 }, { "epoch": 0.5647811545732151, "grad_norm": NaN, "learning_rate": 8.412017999339335e-05, "loss": 0.0, "step": 971 }, { "epoch": 0.5653628035480588, "grad_norm": NaN, "learning_rate": 8.408576663090805e-05, "loss": 0.0, "step": 972 }, { "epoch": 0.5659444525229024, "grad_norm": NaN, "learning_rate": 8.405132307585048e-05, "loss": 0.0, "step": 973 }, { "epoch": 0.5665261014977461, "grad_norm": NaN, "learning_rate": 8.401684935873011e-05, "loss": 0.0, "step": 974 }, { "epoch": 0.5671077504725898, "grad_norm": NaN, "learning_rate": 8.398234551008315e-05, "loss": 0.0, "step": 975 }, { "epoch": 0.5676893994474335, "grad_norm": NaN, "learning_rate": 8.394781156047249e-05, "loss": 0.0, "step": 976 }, { "epoch": 0.5682710484222772, "grad_norm": NaN, "learning_rate": 8.391324754048771e-05, "loss": 0.0, "step": 977 }, { "epoch": 0.5688526973971209, "grad_norm": NaN, "learning_rate": 8.3878653480745e-05, "loss": 0.0, "step": 978 }, { "epoch": 0.5694343463719646, "grad_norm": NaN, "learning_rate": 8.384402941188718e-05, "loss": 0.0, "step": 979 }, { "epoch": 0.5700159953468082, "grad_norm": NaN, "learning_rate": 8.380937536458365e-05, "loss": 0.0, "step": 980 }, { "epoch": 0.5705976443216518, "grad_norm": NaN, "learning_rate": 8.377469136953034e-05, "loss": 0.0, "step": 981 }, { "epoch": 0.5711792932964955, "grad_norm": NaN, "learning_rate": 8.373997745744973e-05, "loss": 0.0, "step": 982 }, { "epoch": 0.5717609422713392, "grad_norm": NaN, "learning_rate": 8.370523365909077e-05, "loss": 0.0, "step": 983 }, { "epoch": 0.5723425912461829, "grad_norm": NaN, "learning_rate": 8.367046000522894e-05, "loss": 0.0, "step": 984 }, { "epoch": 0.5729242402210266, "grad_norm": NaN, "learning_rate": 8.363565652666609e-05, "loss": 0.0, "step": 985 }, { "epoch": 0.5735058891958703, "grad_norm": NaN, "learning_rate": 8.360082325423057e-05, "loss": 0.0, "step": 986 }, { "epoch": 0.574087538170714, "grad_norm": NaN, "learning_rate": 8.356596021877707e-05, "loss": 0.0, "step": 987 }, { "epoch": 0.5746691871455577, "grad_norm": NaN, "learning_rate": 8.353106745118664e-05, "loss": 0.0, "step": 988 }, { "epoch": 0.5752508361204013, "grad_norm": NaN, "learning_rate": 8.349614498236669e-05, "loss": 0.0, "step": 989 }, { "epoch": 0.575832485095245, "grad_norm": NaN, "learning_rate": 8.346119284325091e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.5764141340700887, "grad_norm": NaN, "learning_rate": 8.34262110647993e-05, "loss": 0.0, "step": 991 }, { "epoch": 0.5769957830449324, "grad_norm": NaN, "learning_rate": 8.339119967799814e-05, "loss": 0.0, "step": 992 }, { "epoch": 0.5775774320197761, "grad_norm": NaN, "learning_rate": 8.335615871385984e-05, "loss": 0.0, "step": 993 }, { "epoch": 0.5781590809946198, "grad_norm": NaN, "learning_rate": 8.33210882034231e-05, "loss": 0.0, "step": 994 }, { "epoch": 0.5787407299694635, "grad_norm": NaN, "learning_rate": 8.328598817775277e-05, "loss": 0.0, "step": 995 }, { "epoch": 0.5793223789443072, "grad_norm": NaN, "learning_rate": 8.325085866793982e-05, "loss": 0.0, "step": 996 }, { "epoch": 0.5799040279191507, "grad_norm": NaN, "learning_rate": 8.321569970510135e-05, "loss": 0.0, "step": 997 }, { "epoch": 0.5804856768939944, "grad_norm": NaN, "learning_rate": 8.318051132038055e-05, "loss": 0.0, "step": 998 }, { "epoch": 0.5810673258688381, "grad_norm": NaN, "learning_rate": 8.31452935449467e-05, "loss": 0.0, "step": 999 }, { "epoch": 0.5816489748436818, "grad_norm": NaN, "learning_rate": 8.311004640999504e-05, "loss": 0.0, "step": 1000 }, { "epoch": 0.5822306238185255, "grad_norm": NaN, "learning_rate": 8.307476994674689e-05, "loss": 0.0, "step": 1001 }, { "epoch": 0.5828122727933692, "grad_norm": NaN, "learning_rate": 8.303946418644951e-05, "loss": 0.0, "step": 1002 }, { "epoch": 0.5833939217682129, "grad_norm": NaN, "learning_rate": 8.300412916037614e-05, "loss": 0.0, "step": 1003 }, { "epoch": 0.5839755707430566, "grad_norm": NaN, "learning_rate": 8.296876489982591e-05, "loss": 0.0, "step": 1004 }, { "epoch": 0.5845572197179002, "grad_norm": NaN, "learning_rate": 8.293337143612387e-05, "loss": 0.0, "step": 1005 }, { "epoch": 0.5851388686927439, "grad_norm": NaN, "learning_rate": 8.289794880062089e-05, "loss": 0.0, "step": 1006 }, { "epoch": 0.5857205176675876, "grad_norm": NaN, "learning_rate": 8.286249702469378e-05, "loss": 0.0, "step": 1007 }, { "epoch": 0.5863021666424313, "grad_norm": NaN, "learning_rate": 8.282701613974504e-05, "loss": 0.0, "step": 1008 }, { "epoch": 0.586883815617275, "grad_norm": NaN, "learning_rate": 8.279150617720304e-05, "loss": 0.0, "step": 1009 }, { "epoch": 0.5874654645921187, "grad_norm": NaN, "learning_rate": 8.27559671685219e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.5880471135669624, "grad_norm": NaN, "learning_rate": 8.272039914518139e-05, "loss": 0.0, "step": 1011 }, { "epoch": 0.5886287625418061, "grad_norm": NaN, "learning_rate": 8.268480213868711e-05, "loss": 0.0, "step": 1012 }, { "epoch": 0.5892104115166497, "grad_norm": NaN, "learning_rate": 8.264917618057022e-05, "loss": 0.0, "step": 1013 }, { "epoch": 0.5897920604914934, "grad_norm": NaN, "learning_rate": 8.261352130238757e-05, "loss": 0.0, "step": 1014 }, { "epoch": 0.590373709466337, "grad_norm": NaN, "learning_rate": 8.257783753572164e-05, "loss": 0.0, "step": 1015 }, { "epoch": 0.5909553584411807, "grad_norm": NaN, "learning_rate": 8.254212491218045e-05, "loss": 0.0, "step": 1016 }, { "epoch": 0.5915370074160244, "grad_norm": NaN, "learning_rate": 8.250638346339766e-05, "loss": 0.0, "step": 1017 }, { "epoch": 0.5921186563908681, "grad_norm": NaN, "learning_rate": 8.247061322103237e-05, "loss": 0.0, "step": 1018 }, { "epoch": 0.5927003053657118, "grad_norm": NaN, "learning_rate": 8.243481421676926e-05, "loss": 0.0, "step": 1019 }, { "epoch": 0.5932819543405555, "grad_norm": NaN, "learning_rate": 8.239898648231845e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.5938636033153991, "grad_norm": NaN, "learning_rate": 8.236313004941552e-05, "loss": 0.0, "step": 1021 }, { "epoch": 0.5944452522902428, "grad_norm": NaN, "learning_rate": 8.232724494982145e-05, "loss": 0.0, "step": 1022 }, { "epoch": 0.5950269012650865, "grad_norm": NaN, "learning_rate": 8.229133121532267e-05, "loss": 0.0, "step": 1023 }, { "epoch": 0.5956085502399302, "grad_norm": NaN, "learning_rate": 8.22553888777309e-05, "loss": 0.0, "step": 1024 }, { "epoch": 0.5961901992147739, "grad_norm": NaN, "learning_rate": 8.221941796888322e-05, "loss": 0.0, "step": 1025 }, { "epoch": 0.5967718481896176, "grad_norm": NaN, "learning_rate": 8.218341852064206e-05, "loss": 0.0, "step": 1026 }, { "epoch": 0.5973534971644613, "grad_norm": NaN, "learning_rate": 8.21473905648951e-05, "loss": 0.0, "step": 1027 }, { "epoch": 0.597935146139305, "grad_norm": NaN, "learning_rate": 8.211133413355524e-05, "loss": 0.0, "step": 1028 }, { "epoch": 0.5985167951141486, "grad_norm": NaN, "learning_rate": 8.207524925856065e-05, "loss": 0.0, "step": 1029 }, { "epoch": 0.5990984440889923, "grad_norm": NaN, "learning_rate": 8.203913597187466e-05, "loss": 0.0, "step": 1030 }, { "epoch": 0.599680093063836, "grad_norm": NaN, "learning_rate": 8.200299430548581e-05, "loss": 0.0, "step": 1031 }, { "epoch": 0.6002617420386797, "grad_norm": NaN, "learning_rate": 8.196682429140773e-05, "loss": 0.0, "step": 1032 }, { "epoch": 0.6008433910135234, "grad_norm": NaN, "learning_rate": 8.19306259616792e-05, "loss": 0.0, "step": 1033 }, { "epoch": 0.601425039988367, "grad_norm": NaN, "learning_rate": 8.189439934836406e-05, "loss": 0.0, "step": 1034 }, { "epoch": 0.6020066889632107, "grad_norm": NaN, "learning_rate": 8.18581444835512e-05, "loss": 0.0, "step": 1035 }, { "epoch": 0.6025883379380543, "grad_norm": NaN, "learning_rate": 8.182186139935453e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.603169986912898, "grad_norm": NaN, "learning_rate": 8.1785550127913e-05, "loss": 0.0, "step": 1037 }, { "epoch": 0.6037516358877417, "grad_norm": NaN, "learning_rate": 8.174921070139045e-05, "loss": 0.0, "step": 1038 }, { "epoch": 0.6043332848625854, "grad_norm": NaN, "learning_rate": 8.171284315197573e-05, "loss": 0.0, "step": 1039 }, { "epoch": 0.6049149338374291, "grad_norm": NaN, "learning_rate": 8.167644751188261e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.6054965828122728, "grad_norm": NaN, "learning_rate": 8.164002381334966e-05, "loss": 0.0, "step": 1041 }, { "epoch": 0.6060782317871165, "grad_norm": NaN, "learning_rate": 8.160357208864036e-05, "loss": 0.0, "step": 1042 }, { "epoch": 0.6066598807619602, "grad_norm": NaN, "learning_rate": 8.156709237004301e-05, "loss": 0.0, "step": 1043 }, { "epoch": 0.6072415297368038, "grad_norm": NaN, "learning_rate": 8.15305846898707e-05, "loss": 0.0, "step": 1044 }, { "epoch": 0.6078231787116475, "grad_norm": NaN, "learning_rate": 8.149404908046128e-05, "loss": 0.0, "step": 1045 }, { "epoch": 0.6084048276864912, "grad_norm": NaN, "learning_rate": 8.145748557417739e-05, "loss": 0.0, "step": 1046 }, { "epoch": 0.6089864766613349, "grad_norm": NaN, "learning_rate": 8.142089420340631e-05, "loss": 0.0, "step": 1047 }, { "epoch": 0.6095681256361786, "grad_norm": NaN, "learning_rate": 8.138427500056002e-05, "loss": 0.0, "step": 1048 }, { "epoch": 0.6101497746110223, "grad_norm": NaN, "learning_rate": 8.13476279980752e-05, "loss": 0.0, "step": 1049 }, { "epoch": 0.610731423585866, "grad_norm": NaN, "learning_rate": 8.13109532284131e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.6113130725607097, "grad_norm": NaN, "learning_rate": 8.127425072405959e-05, "loss": 0.0, "step": 1051 }, { "epoch": 0.6118947215355532, "grad_norm": NaN, "learning_rate": 8.12375205175251e-05, "loss": 0.0, "step": 1052 }, { "epoch": 0.6124763705103969, "grad_norm": NaN, "learning_rate": 8.12007626413446e-05, "loss": 0.0, "step": 1053 }, { "epoch": 0.6130580194852406, "grad_norm": NaN, "learning_rate": 8.116397712807757e-05, "loss": 0.0, "step": 1054 }, { "epoch": 0.6136396684600843, "grad_norm": NaN, "learning_rate": 8.112716401030798e-05, "loss": 0.0, "step": 1055 }, { "epoch": 0.614221317434928, "grad_norm": NaN, "learning_rate": 8.109032332064421e-05, "loss": 0.0, "step": 1056 }, { "epoch": 0.6148029664097717, "grad_norm": NaN, "learning_rate": 8.105345509171915e-05, "loss": 0.0, "step": 1057 }, { "epoch": 0.6153846153846154, "grad_norm": NaN, "learning_rate": 8.101655935618997e-05, "loss": 0.0, "step": 1058 }, { "epoch": 0.6159662643594591, "grad_norm": NaN, "learning_rate": 8.09796361467383e-05, "loss": 0.0, "step": 1059 }, { "epoch": 0.6165479133343027, "grad_norm": NaN, "learning_rate": 8.094268549607008e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.6171295623091464, "grad_norm": NaN, "learning_rate": 8.090570743691548e-05, "loss": 0.0, "step": 1061 }, { "epoch": 0.6177112112839901, "grad_norm": NaN, "learning_rate": 8.08687020020291e-05, "loss": 0.0, "step": 1062 }, { "epoch": 0.6182928602588338, "grad_norm": NaN, "learning_rate": 8.083166922418963e-05, "loss": 0.0, "step": 1063 }, { "epoch": 0.6188745092336775, "grad_norm": NaN, "learning_rate": 8.079460913620011e-05, "loss": 0.0, "step": 1064 }, { "epoch": 0.6194561582085212, "grad_norm": NaN, "learning_rate": 8.075752177088769e-05, "loss": 0.0, "step": 1065 }, { "epoch": 0.6200378071833649, "grad_norm": NaN, "learning_rate": 8.07204071611037e-05, "loss": 0.0, "step": 1066 }, { "epoch": 0.6206194561582086, "grad_norm": NaN, "learning_rate": 8.06832653397236e-05, "loss": 0.0, "step": 1067 }, { "epoch": 0.6212011051330522, "grad_norm": NaN, "learning_rate": 8.0646096339647e-05, "loss": 0.0, "step": 1068 }, { "epoch": 0.6217827541078959, "grad_norm": NaN, "learning_rate": 8.06089001937975e-05, "loss": 0.0, "step": 1069 }, { "epoch": 0.6223644030827395, "grad_norm": NaN, "learning_rate": 8.057167693512282e-05, "loss": 0.0, "step": 1070 }, { "epoch": 0.6229460520575832, "grad_norm": NaN, "learning_rate": 8.053442659659465e-05, "loss": 0.0, "step": 1071 }, { "epoch": 0.6235277010324269, "grad_norm": NaN, "learning_rate": 8.04971492112087e-05, "loss": 0.0, "step": 1072 }, { "epoch": 0.6241093500072706, "grad_norm": NaN, "learning_rate": 8.045984481198463e-05, "loss": 0.0, "step": 1073 }, { "epoch": 0.6246909989821143, "grad_norm": NaN, "learning_rate": 8.0422513431966e-05, "loss": 0.0, "step": 1074 }, { "epoch": 0.625272647956958, "grad_norm": NaN, "learning_rate": 8.038515510422029e-05, "loss": 0.0, "step": 1075 }, { "epoch": 0.6258542969318016, "grad_norm": NaN, "learning_rate": 8.034776986183886e-05, "loss": 0.0, "step": 1076 }, { "epoch": 0.6264359459066453, "grad_norm": NaN, "learning_rate": 8.03103577379369e-05, "loss": 0.0, "step": 1077 }, { "epoch": 0.627017594881489, "grad_norm": NaN, "learning_rate": 8.02729187656534e-05, "loss": 0.0, "step": 1078 }, { "epoch": 0.6275992438563327, "grad_norm": NaN, "learning_rate": 8.023545297815116e-05, "loss": 0.0, "step": 1079 }, { "epoch": 0.6281808928311764, "grad_norm": NaN, "learning_rate": 8.01979604086167e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.6287625418060201, "grad_norm": NaN, "learning_rate": 8.016044109026027e-05, "loss": 0.0, "step": 1081 }, { "epoch": 0.6293441907808638, "grad_norm": NaN, "learning_rate": 8.012289505631585e-05, "loss": 0.0, "step": 1082 }, { "epoch": 0.6299258397557075, "grad_norm": NaN, "learning_rate": 8.008532234004104e-05, "loss": 0.0, "step": 1083 }, { "epoch": 0.6305074887305511, "grad_norm": NaN, "learning_rate": 8.00477229747171e-05, "loss": 0.0, "step": 1084 }, { "epoch": 0.6310891377053948, "grad_norm": NaN, "learning_rate": 8.001009699364886e-05, "loss": 0.0, "step": 1085 }, { "epoch": 0.6316707866802385, "grad_norm": NaN, "learning_rate": 7.997244443016479e-05, "loss": 0.0, "step": 1086 }, { "epoch": 0.6322524356550822, "grad_norm": NaN, "learning_rate": 7.993476531761683e-05, "loss": 0.0, "step": 1087 }, { "epoch": 0.6328340846299259, "grad_norm": NaN, "learning_rate": 7.989705968938051e-05, "loss": 0.0, "step": 1088 }, { "epoch": 0.6334157336047695, "grad_norm": NaN, "learning_rate": 7.98593275788548e-05, "loss": 0.0, "step": 1089 }, { "epoch": 0.6339973825796132, "grad_norm": NaN, "learning_rate": 7.982156901946212e-05, "loss": 0.0, "step": 1090 }, { "epoch": 0.6345790315544569, "grad_norm": NaN, "learning_rate": 7.978378404464834e-05, "loss": 0.0, "step": 1091 }, { "epoch": 0.6351606805293005, "grad_norm": NaN, "learning_rate": 7.974597268788277e-05, "loss": 0.0, "step": 1092 }, { "epoch": 0.6357423295041442, "grad_norm": NaN, "learning_rate": 7.970813498265799e-05, "loss": 0.0, "step": 1093 }, { "epoch": 0.6363239784789879, "grad_norm": NaN, "learning_rate": 7.967027096249001e-05, "loss": 0.0, "step": 1094 }, { "epoch": 0.6369056274538316, "grad_norm": NaN, "learning_rate": 7.96323806609181e-05, "loss": 0.0, "step": 1095 }, { "epoch": 0.6374872764286753, "grad_norm": NaN, "learning_rate": 7.959446411150481e-05, "loss": 0.0, "step": 1096 }, { "epoch": 0.638068925403519, "grad_norm": NaN, "learning_rate": 7.955652134783598e-05, "loss": 0.0, "step": 1097 }, { "epoch": 0.6386505743783627, "grad_norm": NaN, "learning_rate": 7.951855240352062e-05, "loss": 0.0, "step": 1098 }, { "epoch": 0.6392322233532063, "grad_norm": NaN, "learning_rate": 7.948055731219095e-05, "loss": 0.0, "step": 1099 }, { "epoch": 0.63981387232805, "grad_norm": NaN, "learning_rate": 7.944253610750236e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.6403955213028937, "grad_norm": NaN, "learning_rate": 7.940448882313338e-05, "loss": 0.0, "step": 1101 }, { "epoch": 0.6409771702777374, "grad_norm": NaN, "learning_rate": 7.93664154927856e-05, "loss": 0.0, "step": 1102 }, { "epoch": 0.6415588192525811, "grad_norm": NaN, "learning_rate": 7.932831615018373e-05, "loss": 0.0, "step": 1103 }, { "epoch": 0.6421404682274248, "grad_norm": NaN, "learning_rate": 7.929019082907548e-05, "loss": 0.0, "step": 1104 }, { "epoch": 0.6427221172022685, "grad_norm": NaN, "learning_rate": 7.925203956323159e-05, "loss": 0.0, "step": 1105 }, { "epoch": 0.6433037661771122, "grad_norm": NaN, "learning_rate": 7.921386238644579e-05, "loss": 0.0, "step": 1106 }, { "epoch": 0.6438854151519557, "grad_norm": NaN, "learning_rate": 7.917565933253475e-05, "loss": 0.0, "step": 1107 }, { "epoch": 0.6444670641267994, "grad_norm": NaN, "learning_rate": 7.913743043533806e-05, "loss": 0.0, "step": 1108 }, { "epoch": 0.6450487131016431, "grad_norm": NaN, "learning_rate": 7.90991757287182e-05, "loss": 0.0, "step": 1109 }, { "epoch": 0.6456303620764868, "grad_norm": NaN, "learning_rate": 7.906089524656048e-05, "loss": 0.0, "step": 1110 }, { "epoch": 0.6462120110513305, "grad_norm": NaN, "learning_rate": 7.902258902277315e-05, "loss": 0.0, "step": 1111 }, { "epoch": 0.6467936600261742, "grad_norm": NaN, "learning_rate": 7.898425709128716e-05, "loss": 0.0, "step": 1112 }, { "epoch": 0.6473753090010179, "grad_norm": NaN, "learning_rate": 7.894589948605625e-05, "loss": 0.0, "step": 1113 }, { "epoch": 0.6479569579758616, "grad_norm": NaN, "learning_rate": 7.890751624105691e-05, "loss": 0.0, "step": 1114 }, { "epoch": 0.6485386069507052, "grad_norm": NaN, "learning_rate": 7.886910739028835e-05, "loss": 0.0, "step": 1115 }, { "epoch": 0.6491202559255489, "grad_norm": NaN, "learning_rate": 7.883067296777247e-05, "loss": 0.0, "step": 1116 }, { "epoch": 0.6497019049003926, "grad_norm": NaN, "learning_rate": 7.879221300755378e-05, "loss": 0.0, "step": 1117 }, { "epoch": 0.6502835538752363, "grad_norm": NaN, "learning_rate": 7.875372754369944e-05, "loss": 0.0, "step": 1118 }, { "epoch": 0.65086520285008, "grad_norm": NaN, "learning_rate": 7.871521661029919e-05, "loss": 0.0, "step": 1119 }, { "epoch": 0.6514468518249237, "grad_norm": NaN, "learning_rate": 7.867668024146537e-05, "loss": 0.0, "step": 1120 }, { "epoch": 0.6520285007997674, "grad_norm": NaN, "learning_rate": 7.863811847133279e-05, "loss": 0.0, "step": 1121 }, { "epoch": 0.6526101497746111, "grad_norm": NaN, "learning_rate": 7.859953133405879e-05, "loss": 0.0, "step": 1122 }, { "epoch": 0.6531917987494547, "grad_norm": NaN, "learning_rate": 7.856091886382318e-05, "loss": 0.0, "step": 1123 }, { "epoch": 0.6537734477242984, "grad_norm": NaN, "learning_rate": 7.852228109482822e-05, "loss": 0.0, "step": 1124 }, { "epoch": 0.654355096699142, "grad_norm": NaN, "learning_rate": 7.848361806129854e-05, "loss": 0.0, "step": 1125 }, { "epoch": 0.6549367456739857, "grad_norm": NaN, "learning_rate": 7.844492979748119e-05, "loss": 0.0, "step": 1126 }, { "epoch": 0.6555183946488294, "grad_norm": NaN, "learning_rate": 7.840621633764556e-05, "loss": 0.0, "step": 1127 }, { "epoch": 0.6561000436236731, "grad_norm": NaN, "learning_rate": 7.836747771608334e-05, "loss": 0.0, "step": 1128 }, { "epoch": 0.6566816925985168, "grad_norm": NaN, "learning_rate": 7.832871396710852e-05, "loss": 0.0, "step": 1129 }, { "epoch": 0.6572633415733605, "grad_norm": NaN, "learning_rate": 7.828992512505737e-05, "loss": 0.0, "step": 1130 }, { "epoch": 0.6578449905482041, "grad_norm": NaN, "learning_rate": 7.825111122428832e-05, "loss": 0.0, "step": 1131 }, { "epoch": 0.6584266395230478, "grad_norm": NaN, "learning_rate": 7.821227229918207e-05, "loss": 0.0, "step": 1132 }, { "epoch": 0.6590082884978915, "grad_norm": NaN, "learning_rate": 7.817340838414144e-05, "loss": 0.0, "step": 1133 }, { "epoch": 0.6595899374727352, "grad_norm": NaN, "learning_rate": 7.813451951359143e-05, "loss": 0.0, "step": 1134 }, { "epoch": 0.6601715864475789, "grad_norm": NaN, "learning_rate": 7.809560572197909e-05, "loss": 0.0, "step": 1135 }, { "epoch": 0.6607532354224226, "grad_norm": NaN, "learning_rate": 7.805666704377358e-05, "loss": 0.0, "step": 1136 }, { "epoch": 0.6613348843972663, "grad_norm": NaN, "learning_rate": 7.801770351346609e-05, "loss": 0.0, "step": 1137 }, { "epoch": 0.66191653337211, "grad_norm": NaN, "learning_rate": 7.797871516556985e-05, "loss": 0.0, "step": 1138 }, { "epoch": 0.6624981823469536, "grad_norm": NaN, "learning_rate": 7.793970203462003e-05, "loss": 0.0, "step": 1139 }, { "epoch": 0.6630798313217973, "grad_norm": NaN, "learning_rate": 7.790066415517379e-05, "loss": 0.0, "step": 1140 }, { "epoch": 0.663661480296641, "grad_norm": NaN, "learning_rate": 7.786160156181019e-05, "loss": 0.0, "step": 1141 }, { "epoch": 0.6642431292714847, "grad_norm": NaN, "learning_rate": 7.782251428913019e-05, "loss": 0.0, "step": 1142 }, { "epoch": 0.6648247782463284, "grad_norm": NaN, "learning_rate": 7.778340237175661e-05, "loss": 0.0, "step": 1143 }, { "epoch": 0.665406427221172, "grad_norm": NaN, "learning_rate": 7.774426584433409e-05, "loss": 0.0, "step": 1144 }, { "epoch": 0.6659880761960157, "grad_norm": NaN, "learning_rate": 7.770510474152908e-05, "loss": 0.0, "step": 1145 }, { "epoch": 0.6665697251708594, "grad_norm": NaN, "learning_rate": 7.766591909802979e-05, "loss": 0.0, "step": 1146 }, { "epoch": 0.667151374145703, "grad_norm": NaN, "learning_rate": 7.76267089485462e-05, "loss": 0.0, "step": 1147 }, { "epoch": 0.6677330231205467, "grad_norm": NaN, "learning_rate": 7.758747432780994e-05, "loss": 0.0, "step": 1148 }, { "epoch": 0.6683146720953904, "grad_norm": NaN, "learning_rate": 7.754821527057437e-05, "loss": 0.0, "step": 1149 }, { "epoch": 0.6688963210702341, "grad_norm": NaN, "learning_rate": 7.750893181161444e-05, "loss": 0.0, "step": 1150 }, { "epoch": 0.6694779700450778, "grad_norm": NaN, "learning_rate": 7.746962398572679e-05, "loss": 0.0, "step": 1151 }, { "epoch": 0.6700596190199215, "grad_norm": NaN, "learning_rate": 7.743029182772957e-05, "loss": 0.0, "step": 1152 }, { "epoch": 0.6706412679947652, "grad_norm": NaN, "learning_rate": 7.739093537246251e-05, "loss": 0.0, "step": 1153 }, { "epoch": 0.6712229169696089, "grad_norm": NaN, "learning_rate": 7.73515546547869e-05, "loss": 0.0, "step": 1154 }, { "epoch": 0.6718045659444525, "grad_norm": NaN, "learning_rate": 7.731214970958545e-05, "loss": 0.0, "step": 1155 }, { "epoch": 0.6723862149192962, "grad_norm": NaN, "learning_rate": 7.727272057176238e-05, "loss": 0.0, "step": 1156 }, { "epoch": 0.6729678638941399, "grad_norm": NaN, "learning_rate": 7.723326727624334e-05, "loss": 0.0, "step": 1157 }, { "epoch": 0.6735495128689836, "grad_norm": NaN, "learning_rate": 7.719378985797533e-05, "loss": 0.0, "step": 1158 }, { "epoch": 0.6741311618438273, "grad_norm": NaN, "learning_rate": 7.71542883519268e-05, "loss": 0.0, "step": 1159 }, { "epoch": 0.674712810818671, "grad_norm": NaN, "learning_rate": 7.711476279308745e-05, "loss": 0.0, "step": 1160 }, { "epoch": 0.6752944597935147, "grad_norm": NaN, "learning_rate": 7.707521321646833e-05, "loss": 0.0, "step": 1161 }, { "epoch": 0.6758761087683582, "grad_norm": NaN, "learning_rate": 7.703563965710179e-05, "loss": 0.0, "step": 1162 }, { "epoch": 0.6764577577432019, "grad_norm": NaN, "learning_rate": 7.699604215004134e-05, "loss": 0.0, "step": 1163 }, { "epoch": 0.6770394067180456, "grad_norm": NaN, "learning_rate": 7.69564207303618e-05, "loss": 0.0, "step": 1164 }, { "epoch": 0.6776210556928893, "grad_norm": NaN, "learning_rate": 7.691677543315911e-05, "loss": 0.0, "step": 1165 }, { "epoch": 0.678202704667733, "grad_norm": NaN, "learning_rate": 7.687710629355038e-05, "loss": 0.0, "step": 1166 }, { "epoch": 0.6787843536425767, "grad_norm": NaN, "learning_rate": 7.683741334667385e-05, "loss": 0.0, "step": 1167 }, { "epoch": 0.6793660026174204, "grad_norm": NaN, "learning_rate": 7.679769662768879e-05, "loss": 0.0, "step": 1168 }, { "epoch": 0.6799476515922641, "grad_norm": NaN, "learning_rate": 7.675795617177561e-05, "loss": 0.0, "step": 1169 }, { "epoch": 0.6805293005671077, "grad_norm": NaN, "learning_rate": 7.67181920141357e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.6811109495419514, "grad_norm": NaN, "learning_rate": 7.667840418999144e-05, "loss": 0.0, "step": 1171 }, { "epoch": 0.6816925985167951, "grad_norm": NaN, "learning_rate": 7.66385927345862e-05, "loss": 0.0, "step": 1172 }, { "epoch": 0.6822742474916388, "grad_norm": NaN, "learning_rate": 7.659875768318425e-05, "loss": 0.0, "step": 1173 }, { "epoch": 0.6828558964664825, "grad_norm": NaN, "learning_rate": 7.655889907107076e-05, "loss": 0.0, "step": 1174 }, { "epoch": 0.6834375454413262, "grad_norm": NaN, "learning_rate": 7.651901693355185e-05, "loss": 0.0, "step": 1175 }, { "epoch": 0.6840191944161699, "grad_norm": NaN, "learning_rate": 7.647911130595436e-05, "loss": 0.0, "step": 1176 }, { "epoch": 0.6846008433910136, "grad_norm": NaN, "learning_rate": 7.643918222362603e-05, "loss": 0.0, "step": 1177 }, { "epoch": 0.6851824923658572, "grad_norm": NaN, "learning_rate": 7.639922972193529e-05, "loss": 0.0, "step": 1178 }, { "epoch": 0.6857641413407009, "grad_norm": NaN, "learning_rate": 7.635925383627141e-05, "loss": 0.0, "step": 1179 }, { "epoch": 0.6863457903155445, "grad_norm": NaN, "learning_rate": 7.63192546020443e-05, "loss": 0.0, "step": 1180 }, { "epoch": 0.6869274392903882, "grad_norm": NaN, "learning_rate": 7.62792320546846e-05, "loss": 0.0, "step": 1181 }, { "epoch": 0.6875090882652319, "grad_norm": NaN, "learning_rate": 7.623918622964355e-05, "loss": 0.0, "step": 1182 }, { "epoch": 0.6880907372400756, "grad_norm": NaN, "learning_rate": 7.619911716239303e-05, "loss": 0.0, "step": 1183 }, { "epoch": 0.6886723862149193, "grad_norm": NaN, "learning_rate": 7.615902488842556e-05, "loss": 0.0, "step": 1184 }, { "epoch": 0.689254035189763, "grad_norm": NaN, "learning_rate": 7.611890944325418e-05, "loss": 0.0, "step": 1185 }, { "epoch": 0.6898356841646066, "grad_norm": NaN, "learning_rate": 7.607877086241237e-05, "loss": 0.0, "step": 1186 }, { "epoch": 0.6904173331394503, "grad_norm": NaN, "learning_rate": 7.603860918145423e-05, "loss": 0.0, "step": 1187 }, { "epoch": 0.690998982114294, "grad_norm": NaN, "learning_rate": 7.599842443595425e-05, "loss": 0.0, "step": 1188 }, { "epoch": 0.6915806310891377, "grad_norm": NaN, "learning_rate": 7.59582166615074e-05, "loss": 0.0, "step": 1189 }, { "epoch": 0.6921622800639814, "grad_norm": NaN, "learning_rate": 7.5917985893729e-05, "loss": 0.0, "step": 1190 }, { "epoch": 0.6927439290388251, "grad_norm": NaN, "learning_rate": 7.587773216825474e-05, "loss": 0.0, "step": 1191 }, { "epoch": 0.6933255780136688, "grad_norm": NaN, "learning_rate": 7.583745552074069e-05, "loss": 0.0, "step": 1192 }, { "epoch": 0.6939072269885125, "grad_norm": NaN, "learning_rate": 7.579715598686318e-05, "loss": 0.0, "step": 1193 }, { "epoch": 0.6944888759633561, "grad_norm": NaN, "learning_rate": 7.575683360231884e-05, "loss": 0.0, "step": 1194 }, { "epoch": 0.6950705249381998, "grad_norm": NaN, "learning_rate": 7.571648840282451e-05, "loss": 0.0, "step": 1195 }, { "epoch": 0.6956521739130435, "grad_norm": NaN, "learning_rate": 7.567612042411728e-05, "loss": 0.0, "step": 1196 }, { "epoch": 0.6962338228878872, "grad_norm": NaN, "learning_rate": 7.563572970195437e-05, "loss": 0.0, "step": 1197 }, { "epoch": 0.6968154718627309, "grad_norm": NaN, "learning_rate": 7.55953162721132e-05, "loss": 0.0, "step": 1198 }, { "epoch": 0.6973971208375745, "grad_norm": NaN, "learning_rate": 7.555488017039126e-05, "loss": 0.0, "step": 1199 }, { "epoch": 0.6979787698124182, "grad_norm": NaN, "learning_rate": 7.551442143260612e-05, "loss": 0.0, "step": 1200 }, { "epoch": 0.6985604187872619, "grad_norm": NaN, "learning_rate": 7.547394009459545e-05, "loss": 0.0, "step": 1201 }, { "epoch": 0.6991420677621055, "grad_norm": NaN, "learning_rate": 7.543343619221688e-05, "loss": 0.0, "step": 1202 }, { "epoch": 0.6997237167369492, "grad_norm": NaN, "learning_rate": 7.539290976134807e-05, "loss": 0.0, "step": 1203 }, { "epoch": 0.7003053657117929, "grad_norm": NaN, "learning_rate": 7.535236083788664e-05, "loss": 0.0, "step": 1204 }, { "epoch": 0.7008870146866366, "grad_norm": NaN, "learning_rate": 7.531178945775007e-05, "loss": 0.0, "step": 1205 }, { "epoch": 0.7014686636614803, "grad_norm": NaN, "learning_rate": 7.527119565687577e-05, "loss": 0.0, "step": 1206 }, { "epoch": 0.702050312636324, "grad_norm": NaN, "learning_rate": 7.523057947122107e-05, "loss": 0.0, "step": 1207 }, { "epoch": 0.7026319616111677, "grad_norm": NaN, "learning_rate": 7.518994093676302e-05, "loss": 0.0, "step": 1208 }, { "epoch": 0.7032136105860114, "grad_norm": NaN, "learning_rate": 7.514928008949856e-05, "loss": 0.0, "step": 1209 }, { "epoch": 0.703795259560855, "grad_norm": NaN, "learning_rate": 7.510859696544432e-05, "loss": 0.0, "step": 1210 }, { "epoch": 0.7043769085356987, "grad_norm": NaN, "learning_rate": 7.50678916006367e-05, "loss": 0.0, "step": 1211 }, { "epoch": 0.7049585575105424, "grad_norm": NaN, "learning_rate": 7.502716403113182e-05, "loss": 0.0, "step": 1212 }, { "epoch": 0.7055402064853861, "grad_norm": NaN, "learning_rate": 7.498641429300541e-05, "loss": 0.0, "step": 1213 }, { "epoch": 0.7061218554602298, "grad_norm": NaN, "learning_rate": 7.494564242235289e-05, "loss": 0.0, "step": 1214 }, { "epoch": 0.7067035044350735, "grad_norm": NaN, "learning_rate": 7.490484845528926e-05, "loss": 0.0, "step": 1215 }, { "epoch": 0.7072851534099172, "grad_norm": NaN, "learning_rate": 7.486403242794913e-05, "loss": 0.0, "step": 1216 }, { "epoch": 0.7078668023847609, "grad_norm": NaN, "learning_rate": 7.482319437648657e-05, "loss": 0.0, "step": 1217 }, { "epoch": 0.7084484513596044, "grad_norm": NaN, "learning_rate": 7.478233433707523e-05, "loss": 0.0, "step": 1218 }, { "epoch": 0.7090301003344481, "grad_norm": NaN, "learning_rate": 7.474145234590822e-05, "loss": 0.0, "step": 1219 }, { "epoch": 0.7096117493092918, "grad_norm": NaN, "learning_rate": 7.470054843919807e-05, "loss": 0.0, "step": 1220 }, { "epoch": 0.7101933982841355, "grad_norm": NaN, "learning_rate": 7.465962265317676e-05, "loss": 0.0, "step": 1221 }, { "epoch": 0.7107750472589792, "grad_norm": NaN, "learning_rate": 7.461867502409562e-05, "loss": 0.0, "step": 1222 }, { "epoch": 0.7113566962338229, "grad_norm": NaN, "learning_rate": 7.457770558822534e-05, "loss": 0.0, "step": 1223 }, { "epoch": 0.7119383452086666, "grad_norm": NaN, "learning_rate": 7.453671438185594e-05, "loss": 0.0, "step": 1224 }, { "epoch": 0.7125199941835102, "grad_norm": NaN, "learning_rate": 7.449570144129667e-05, "loss": 0.0, "step": 1225 }, { "epoch": 0.7131016431583539, "grad_norm": NaN, "learning_rate": 7.445466680287611e-05, "loss": 0.0, "step": 1226 }, { "epoch": 0.7136832921331976, "grad_norm": NaN, "learning_rate": 7.441361050294202e-05, "loss": 0.0, "step": 1227 }, { "epoch": 0.7142649411080413, "grad_norm": NaN, "learning_rate": 7.437253257786133e-05, "loss": 0.0, "step": 1228 }, { "epoch": 0.714846590082885, "grad_norm": NaN, "learning_rate": 7.433143306402014e-05, "loss": 0.0, "step": 1229 }, { "epoch": 0.7154282390577287, "grad_norm": NaN, "learning_rate": 7.429031199782369e-05, "loss": 0.0, "step": 1230 }, { "epoch": 0.7160098880325724, "grad_norm": NaN, "learning_rate": 7.42491694156963e-05, "loss": 0.0, "step": 1231 }, { "epoch": 0.7165915370074161, "grad_norm": NaN, "learning_rate": 7.420800535408135e-05, "loss": 0.0, "step": 1232 }, { "epoch": 0.7171731859822597, "grad_norm": NaN, "learning_rate": 7.41668198494412e-05, "loss": 0.0, "step": 1233 }, { "epoch": 0.7177548349571033, "grad_norm": NaN, "learning_rate": 7.412561293825728e-05, "loss": 0.0, "step": 1234 }, { "epoch": 0.718336483931947, "grad_norm": NaN, "learning_rate": 7.408438465702996e-05, "loss": 0.0, "step": 1235 }, { "epoch": 0.7189181329067907, "grad_norm": NaN, "learning_rate": 7.40431350422785e-05, "loss": 0.0, "step": 1236 }, { "epoch": 0.7194997818816344, "grad_norm": NaN, "learning_rate": 7.400186413054109e-05, "loss": 0.0, "step": 1237 }, { "epoch": 0.7200814308564781, "grad_norm": NaN, "learning_rate": 7.396057195837478e-05, "loss": 0.0, "step": 1238 }, { "epoch": 0.7206630798313218, "grad_norm": NaN, "learning_rate": 7.391925856235544e-05, "loss": 0.0, "step": 1239 }, { "epoch": 0.7212447288061655, "grad_norm": NaN, "learning_rate": 7.387792397907776e-05, "loss": 0.0, "step": 1240 }, { "epoch": 0.7218263777810091, "grad_norm": NaN, "learning_rate": 7.38365682451552e-05, "loss": 0.0, "step": 1241 }, { "epoch": 0.7224080267558528, "grad_norm": NaN, "learning_rate": 7.37951913972199e-05, "loss": 0.0, "step": 1242 }, { "epoch": 0.7229896757306965, "grad_norm": NaN, "learning_rate": 7.375379347192277e-05, "loss": 0.0, "step": 1243 }, { "epoch": 0.7235713247055402, "grad_norm": NaN, "learning_rate": 7.371237450593337e-05, "loss": 0.0, "step": 1244 }, { "epoch": 0.7241529736803839, "grad_norm": NaN, "learning_rate": 7.367093453593989e-05, "loss": 0.0, "step": 1245 }, { "epoch": 0.7247346226552276, "grad_norm": NaN, "learning_rate": 7.362947359864912e-05, "loss": 0.0, "step": 1246 }, { "epoch": 0.7253162716300713, "grad_norm": NaN, "learning_rate": 7.358799173078641e-05, "loss": 0.0, "step": 1247 }, { "epoch": 0.725897920604915, "grad_norm": NaN, "learning_rate": 7.35464889690957e-05, "loss": 0.0, "step": 1248 }, { "epoch": 0.7264795695797586, "grad_norm": NaN, "learning_rate": 7.35049653503394e-05, "loss": 0.0, "step": 1249 }, { "epoch": 0.7270612185546023, "grad_norm": NaN, "learning_rate": 7.346342091129838e-05, "loss": 0.0, "step": 1250 }, { "epoch": 0.727642867529446, "grad_norm": NaN, "learning_rate": 7.3421855688772e-05, "loss": 0.0, "step": 1251 }, { "epoch": 0.7282245165042897, "grad_norm": NaN, "learning_rate": 7.338026971957798e-05, "loss": 0.0, "step": 1252 }, { "epoch": 0.7288061654791333, "grad_norm": NaN, "learning_rate": 7.333866304055243e-05, "loss": 0.0, "step": 1253 }, { "epoch": 0.729387814453977, "grad_norm": NaN, "learning_rate": 7.329703568854987e-05, "loss": 0.0, "step": 1254 }, { "epoch": 0.7299694634288207, "grad_norm": NaN, "learning_rate": 7.325538770044303e-05, "loss": 0.0, "step": 1255 }, { "epoch": 0.7305511124036644, "grad_norm": NaN, "learning_rate": 7.321371911312296e-05, "loss": 0.0, "step": 1256 }, { "epoch": 0.731132761378508, "grad_norm": NaN, "learning_rate": 7.317202996349898e-05, "loss": 0.0, "step": 1257 }, { "epoch": 0.7317144103533517, "grad_norm": NaN, "learning_rate": 7.313032028849862e-05, "loss": 0.0, "step": 1258 }, { "epoch": 0.7322960593281954, "grad_norm": NaN, "learning_rate": 7.308859012506753e-05, "loss": 0.0, "step": 1259 }, { "epoch": 0.7328777083030391, "grad_norm": NaN, "learning_rate": 7.30468395101696e-05, "loss": 0.0, "step": 1260 }, { "epoch": 0.7334593572778828, "grad_norm": NaN, "learning_rate": 7.300506848078677e-05, "loss": 0.0, "step": 1261 }, { "epoch": 0.7340410062527265, "grad_norm": NaN, "learning_rate": 7.296327707391908e-05, "loss": 0.0, "step": 1262 }, { "epoch": 0.7346226552275702, "grad_norm": NaN, "learning_rate": 7.292146532658463e-05, "loss": 0.0, "step": 1263 }, { "epoch": 0.7352043042024139, "grad_norm": NaN, "learning_rate": 7.287963327581952e-05, "loss": 0.0, "step": 1264 }, { "epoch": 0.7357859531772575, "grad_norm": NaN, "learning_rate": 7.283778095867786e-05, "loss": 0.0, "step": 1265 }, { "epoch": 0.7363676021521012, "grad_norm": NaN, "learning_rate": 7.279590841223169e-05, "loss": 0.0, "step": 1266 }, { "epoch": 0.7369492511269449, "grad_norm": NaN, "learning_rate": 7.275401567357096e-05, "loss": 0.0, "step": 1267 }, { "epoch": 0.7375309001017886, "grad_norm": NaN, "learning_rate": 7.271210277980355e-05, "loss": 0.0, "step": 1268 }, { "epoch": 0.7381125490766323, "grad_norm": NaN, "learning_rate": 7.267016976805514e-05, "loss": 0.0, "step": 1269 }, { "epoch": 0.738694198051476, "grad_norm": NaN, "learning_rate": 7.262821667546926e-05, "loss": 0.0, "step": 1270 }, { "epoch": 0.7392758470263197, "grad_norm": NaN, "learning_rate": 7.258624353920722e-05, "loss": 0.0, "step": 1271 }, { "epoch": 0.7398574960011634, "grad_norm": NaN, "learning_rate": 7.254425039644808e-05, "loss": 0.0, "step": 1272 }, { "epoch": 0.7404391449760069, "grad_norm": NaN, "learning_rate": 7.250223728438865e-05, "loss": 0.0, "step": 1273 }, { "epoch": 0.7410207939508506, "grad_norm": NaN, "learning_rate": 7.24602042402434e-05, "loss": 0.0, "step": 1274 }, { "epoch": 0.7416024429256943, "grad_norm": NaN, "learning_rate": 7.241815130124444e-05, "loss": 0.0, "step": 1275 }, { "epoch": 0.742184091900538, "grad_norm": NaN, "learning_rate": 7.237607850464152e-05, "loss": 0.0, "step": 1276 }, { "epoch": 0.7427657408753817, "grad_norm": NaN, "learning_rate": 7.233398588770202e-05, "loss": 0.0, "step": 1277 }, { "epoch": 0.7433473898502254, "grad_norm": NaN, "learning_rate": 7.229187348771081e-05, "loss": 0.0, "step": 1278 }, { "epoch": 0.7439290388250691, "grad_norm": NaN, "learning_rate": 7.224974134197031e-05, "loss": 0.0, "step": 1279 }, { "epoch": 0.7445106877999128, "grad_norm": NaN, "learning_rate": 7.220758948780046e-05, "loss": 0.0, "step": 1280 }, { "epoch": 0.7450923367747564, "grad_norm": NaN, "learning_rate": 7.21654179625386e-05, "loss": 0.0, "step": 1281 }, { "epoch": 0.7456739857496001, "grad_norm": NaN, "learning_rate": 7.212322680353953e-05, "loss": 0.0, "step": 1282 }, { "epoch": 0.7462556347244438, "grad_norm": NaN, "learning_rate": 7.208101604817546e-05, "loss": 0.0, "step": 1283 }, { "epoch": 0.7468372836992875, "grad_norm": NaN, "learning_rate": 7.203878573383594e-05, "loss": 0.0, "step": 1284 }, { "epoch": 0.7474189326741312, "grad_norm": NaN, "learning_rate": 7.199653589792779e-05, "loss": 0.0, "step": 1285 }, { "epoch": 0.7480005816489749, "grad_norm": NaN, "learning_rate": 7.19542665778752e-05, "loss": 0.0, "step": 1286 }, { "epoch": 0.7485822306238186, "grad_norm": NaN, "learning_rate": 7.191197781111959e-05, "loss": 0.0, "step": 1287 }, { "epoch": 0.7491638795986622, "grad_norm": NaN, "learning_rate": 7.186966963511962e-05, "loss": 0.0, "step": 1288 }, { "epoch": 0.7497455285735058, "grad_norm": NaN, "learning_rate": 7.18273420873511e-05, "loss": 0.0, "step": 1289 }, { "epoch": 0.7503271775483495, "grad_norm": NaN, "learning_rate": 7.178499520530704e-05, "loss": 0.0, "step": 1290 }, { "epoch": 0.7509088265231932, "grad_norm": NaN, "learning_rate": 7.174262902649758e-05, "loss": 0.0, "step": 1291 }, { "epoch": 0.7514904754980369, "grad_norm": NaN, "learning_rate": 7.17002435884499e-05, "loss": 0.0, "step": 1292 }, { "epoch": 0.7520721244728806, "grad_norm": NaN, "learning_rate": 7.16578389287083e-05, "loss": 0.0, "step": 1293 }, { "epoch": 0.7526537734477243, "grad_norm": NaN, "learning_rate": 7.161541508483408e-05, "loss": 0.0, "step": 1294 }, { "epoch": 0.753235422422568, "grad_norm": NaN, "learning_rate": 7.157297209440553e-05, "loss": 0.0, "step": 1295 }, { "epoch": 0.7538170713974116, "grad_norm": NaN, "learning_rate": 7.15305099950179e-05, "loss": 0.0, "step": 1296 }, { "epoch": 0.7543987203722553, "grad_norm": NaN, "learning_rate": 7.148802882428336e-05, "loss": 0.0, "step": 1297 }, { "epoch": 0.754980369347099, "grad_norm": NaN, "learning_rate": 7.144552861983103e-05, "loss": 0.0, "step": 1298 }, { "epoch": 0.7555620183219427, "grad_norm": NaN, "learning_rate": 7.140300941930679e-05, "loss": 0.0, "step": 1299 }, { "epoch": 0.7561436672967864, "grad_norm": NaN, "learning_rate": 7.136047126037342e-05, "loss": 0.0, "step": 1300 }, { "epoch": 0.7567253162716301, "grad_norm": NaN, "learning_rate": 7.131791418071048e-05, "loss": 0.0, "step": 1301 }, { "epoch": 0.7573069652464738, "grad_norm": NaN, "learning_rate": 7.127533821801428e-05, "loss": 0.0, "step": 1302 }, { "epoch": 0.7578886142213175, "grad_norm": NaN, "learning_rate": 7.123274340999785e-05, "loss": 0.0, "step": 1303 }, { "epoch": 0.7584702631961611, "grad_norm": NaN, "learning_rate": 7.119012979439092e-05, "loss": 0.0, "step": 1304 }, { "epoch": 0.7590519121710048, "grad_norm": NaN, "learning_rate": 7.114749740893988e-05, "loss": 0.0, "step": 1305 }, { "epoch": 0.7596335611458485, "grad_norm": NaN, "learning_rate": 7.110484629140776e-05, "loss": 0.0, "step": 1306 }, { "epoch": 0.7602152101206922, "grad_norm": NaN, "learning_rate": 7.106217647957418e-05, "loss": 0.0, "step": 1307 }, { "epoch": 0.7607968590955358, "grad_norm": NaN, "learning_rate": 7.101948801123528e-05, "loss": 0.0, "step": 1308 }, { "epoch": 0.7613785080703795, "grad_norm": NaN, "learning_rate": 7.097678092420377e-05, "loss": 0.0, "step": 1309 }, { "epoch": 0.7619601570452232, "grad_norm": NaN, "learning_rate": 7.093405525630881e-05, "loss": 0.0, "step": 1310 }, { "epoch": 0.7625418060200669, "grad_norm": NaN, "learning_rate": 7.089131104539609e-05, "loss": 0.0, "step": 1311 }, { "epoch": 0.7631234549949105, "grad_norm": NaN, "learning_rate": 7.084854832932766e-05, "loss": 0.0, "step": 1312 }, { "epoch": 0.7637051039697542, "grad_norm": NaN, "learning_rate": 7.080576714598198e-05, "loss": 0.0, "step": 1313 }, { "epoch": 0.7642867529445979, "grad_norm": NaN, "learning_rate": 7.076296753325383e-05, "loss": 0.0, "step": 1314 }, { "epoch": 0.7648684019194416, "grad_norm": NaN, "learning_rate": 7.072014952905442e-05, "loss": 0.0, "step": 1315 }, { "epoch": 0.7654500508942853, "grad_norm": NaN, "learning_rate": 7.067731317131115e-05, "loss": 0.0, "step": 1316 }, { "epoch": 0.766031699869129, "grad_norm": NaN, "learning_rate": 7.063445849796773e-05, "loss": 0.0, "step": 1317 }, { "epoch": 0.7666133488439727, "grad_norm": NaN, "learning_rate": 7.059158554698406e-05, "loss": 0.0, "step": 1318 }, { "epoch": 0.7671949978188164, "grad_norm": NaN, "learning_rate": 7.054869435633623e-05, "loss": 0.0, "step": 1319 }, { "epoch": 0.76777664679366, "grad_norm": NaN, "learning_rate": 7.050578496401653e-05, "loss": 0.0, "step": 1320 }, { "epoch": 0.7683582957685037, "grad_norm": NaN, "learning_rate": 7.046285740803334e-05, "loss": 0.0, "step": 1321 }, { "epoch": 0.7689399447433474, "grad_norm": NaN, "learning_rate": 7.041991172641114e-05, "loss": 0.0, "step": 1322 }, { "epoch": 0.7695215937181911, "grad_norm": NaN, "learning_rate": 7.037694795719042e-05, "loss": 0.0, "step": 1323 }, { "epoch": 0.7701032426930348, "grad_norm": NaN, "learning_rate": 7.033396613842777e-05, "loss": 0.0, "step": 1324 }, { "epoch": 0.7706848916678785, "grad_norm": NaN, "learning_rate": 7.029096630819572e-05, "loss": 0.0, "step": 1325 }, { "epoch": 0.7712665406427222, "grad_norm": NaN, "learning_rate": 7.024794850458277e-05, "loss": 0.0, "step": 1326 }, { "epoch": 0.7718481896175658, "grad_norm": NaN, "learning_rate": 7.020491276569332e-05, "loss": 0.0, "step": 1327 }, { "epoch": 0.7724298385924094, "grad_norm": NaN, "learning_rate": 7.016185912964767e-05, "loss": 0.0, "step": 1328 }, { "epoch": 0.7730114875672531, "grad_norm": NaN, "learning_rate": 7.011878763458198e-05, "loss": 0.0, "step": 1329 }, { "epoch": 0.7735931365420968, "grad_norm": NaN, "learning_rate": 7.007569831864822e-05, "loss": 0.0, "step": 1330 }, { "epoch": 0.7741747855169405, "grad_norm": NaN, "learning_rate": 7.003259122001416e-05, "loss": 0.0, "step": 1331 }, { "epoch": 0.7747564344917842, "grad_norm": NaN, "learning_rate": 6.998946637686327e-05, "loss": 0.0, "step": 1332 }, { "epoch": 0.7753380834666279, "grad_norm": NaN, "learning_rate": 6.994632382739483e-05, "loss": 0.0, "step": 1333 }, { "epoch": 0.7759197324414716, "grad_norm": NaN, "learning_rate": 6.990316360982371e-05, "loss": 0.0, "step": 1334 }, { "epoch": 0.7765013814163153, "grad_norm": NaN, "learning_rate": 6.985998576238049e-05, "loss": 0.0, "step": 1335 }, { "epoch": 0.7770830303911589, "grad_norm": NaN, "learning_rate": 6.981679032331135e-05, "loss": 0.0, "step": 1336 }, { "epoch": 0.7776646793660026, "grad_norm": NaN, "learning_rate": 6.977357733087805e-05, "loss": 0.0, "step": 1337 }, { "epoch": 0.7782463283408463, "grad_norm": NaN, "learning_rate": 6.973034682335789e-05, "loss": 0.0, "step": 1338 }, { "epoch": 0.77882797731569, "grad_norm": NaN, "learning_rate": 6.968709883904368e-05, "loss": 0.0, "step": 1339 }, { "epoch": 0.7794096262905337, "grad_norm": NaN, "learning_rate": 6.964383341624376e-05, "loss": 0.0, "step": 1340 }, { "epoch": 0.7799912752653774, "grad_norm": NaN, "learning_rate": 6.960055059328185e-05, "loss": 0.0, "step": 1341 }, { "epoch": 0.7805729242402211, "grad_norm": NaN, "learning_rate": 6.955725040849712e-05, "loss": 0.0, "step": 1342 }, { "epoch": 0.7811545732150648, "grad_norm": NaN, "learning_rate": 6.95139329002441e-05, "loss": 0.0, "step": 1343 }, { "epoch": 0.7817362221899083, "grad_norm": NaN, "learning_rate": 6.947059810689272e-05, "loss": 0.0, "step": 1344 }, { "epoch": 0.782317871164752, "grad_norm": NaN, "learning_rate": 6.942724606682814e-05, "loss": 0.0, "step": 1345 }, { "epoch": 0.7828995201395957, "grad_norm": NaN, "learning_rate": 6.938387681845084e-05, "loss": 0.0, "step": 1346 }, { "epoch": 0.7834811691144394, "grad_norm": NaN, "learning_rate": 6.934049040017652e-05, "loss": 0.0, "step": 1347 }, { "epoch": 0.7840628180892831, "grad_norm": NaN, "learning_rate": 6.929708685043614e-05, "loss": 0.0, "step": 1348 }, { "epoch": 0.7846444670641268, "grad_norm": NaN, "learning_rate": 6.92536662076758e-05, "loss": 0.0, "step": 1349 }, { "epoch": 0.7852261160389705, "grad_norm": NaN, "learning_rate": 6.92102285103567e-05, "loss": 0.0, "step": 1350 }, { "epoch": 0.7858077650138141, "grad_norm": NaN, "learning_rate": 6.916677379695524e-05, "loss": 0.0, "step": 1351 }, { "epoch": 0.7863894139886578, "grad_norm": NaN, "learning_rate": 6.91233021059628e-05, "loss": 0.0, "step": 1352 }, { "epoch": 0.7869710629635015, "grad_norm": NaN, "learning_rate": 6.907981347588586e-05, "loss": 0.0, "step": 1353 }, { "epoch": 0.7875527119383452, "grad_norm": NaN, "learning_rate": 6.903630794524588e-05, "loss": 0.0, "step": 1354 }, { "epoch": 0.7881343609131889, "grad_norm": NaN, "learning_rate": 6.899278555257929e-05, "loss": 0.0, "step": 1355 }, { "epoch": 0.7887160098880326, "grad_norm": NaN, "learning_rate": 6.894924633643746e-05, "loss": 0.0, "step": 1356 }, { "epoch": 0.7892976588628763, "grad_norm": NaN, "learning_rate": 6.890569033538665e-05, "loss": 0.0, "step": 1357 }, { "epoch": 0.78987930783772, "grad_norm": NaN, "learning_rate": 6.886211758800802e-05, "loss": 0.0, "step": 1358 }, { "epoch": 0.7904609568125636, "grad_norm": NaN, "learning_rate": 6.881852813289754e-05, "loss": 0.0, "step": 1359 }, { "epoch": 0.7910426057874073, "grad_norm": NaN, "learning_rate": 6.877492200866598e-05, "loss": 0.0, "step": 1360 }, { "epoch": 0.791624254762251, "grad_norm": NaN, "learning_rate": 6.873129925393888e-05, "loss": 0.0, "step": 1361 }, { "epoch": 0.7922059037370947, "grad_norm": NaN, "learning_rate": 6.868765990735648e-05, "loss": 0.0, "step": 1362 }, { "epoch": 0.7927875527119383, "grad_norm": NaN, "learning_rate": 6.864400400757378e-05, "loss": 0.0, "step": 1363 }, { "epoch": 0.793369201686782, "grad_norm": NaN, "learning_rate": 6.86003315932604e-05, "loss": 0.0, "step": 1364 }, { "epoch": 0.7939508506616257, "grad_norm": NaN, "learning_rate": 6.855664270310059e-05, "loss": 0.0, "step": 1365 }, { "epoch": 0.7945324996364694, "grad_norm": NaN, "learning_rate": 6.851293737579321e-05, "loss": 0.0, "step": 1366 }, { "epoch": 0.795114148611313, "grad_norm": NaN, "learning_rate": 6.846921565005165e-05, "loss": 0.0, "step": 1367 }, { "epoch": 0.7956957975861567, "grad_norm": NaN, "learning_rate": 6.842547756460385e-05, "loss": 0.0, "step": 1368 }, { "epoch": 0.7962774465610004, "grad_norm": NaN, "learning_rate": 6.838172315819227e-05, "loss": 0.0, "step": 1369 }, { "epoch": 0.7968590955358441, "grad_norm": NaN, "learning_rate": 6.833795246957375e-05, "loss": 0.0, "step": 1370 }, { "epoch": 0.7974407445106878, "grad_norm": NaN, "learning_rate": 6.829416553751961e-05, "loss": 0.0, "step": 1371 }, { "epoch": 0.7980223934855315, "grad_norm": NaN, "learning_rate": 6.825036240081557e-05, "loss": 0.0, "step": 1372 }, { "epoch": 0.7986040424603752, "grad_norm": NaN, "learning_rate": 6.820654309826162e-05, "loss": 0.0, "step": 1373 }, { "epoch": 0.7991856914352189, "grad_norm": NaN, "learning_rate": 6.816270766867219e-05, "loss": 0.0, "step": 1374 }, { "epoch": 0.7997673404100625, "grad_norm": NaN, "learning_rate": 6.81188561508759e-05, "loss": 0.0, "step": 1375 }, { "epoch": 0.8003489893849062, "grad_norm": NaN, "learning_rate": 6.807498858371563e-05, "loss": 0.0, "step": 1376 }, { "epoch": 0.8009306383597499, "grad_norm": NaN, "learning_rate": 6.803110500604856e-05, "loss": 0.0, "step": 1377 }, { "epoch": 0.8015122873345936, "grad_norm": NaN, "learning_rate": 6.798720545674595e-05, "loss": 0.0, "step": 1378 }, { "epoch": 0.8020939363094373, "grad_norm": NaN, "learning_rate": 6.794328997469325e-05, "loss": 0.0, "step": 1379 }, { "epoch": 0.802675585284281, "grad_norm": NaN, "learning_rate": 6.789935859879003e-05, "loss": 0.0, "step": 1380 }, { "epoch": 0.8032572342591247, "grad_norm": NaN, "learning_rate": 6.785541136794991e-05, "loss": 0.0, "step": 1381 }, { "epoch": 0.8038388832339683, "grad_norm": NaN, "learning_rate": 6.78114483211006e-05, "loss": 0.0, "step": 1382 }, { "epoch": 0.8044205322088119, "grad_norm": NaN, "learning_rate": 6.77674694971838e-05, "loss": 0.0, "step": 1383 }, { "epoch": 0.8050021811836556, "grad_norm": NaN, "learning_rate": 6.772347493515514e-05, "loss": 0.0, "step": 1384 }, { "epoch": 0.8055838301584993, "grad_norm": NaN, "learning_rate": 6.767946467398427e-05, "loss": 0.0, "step": 1385 }, { "epoch": 0.806165479133343, "grad_norm": NaN, "learning_rate": 6.763543875265466e-05, "loss": 0.0, "step": 1386 }, { "epoch": 0.8067471281081867, "grad_norm": NaN, "learning_rate": 6.759139721016374e-05, "loss": 0.0, "step": 1387 }, { "epoch": 0.8073287770830304, "grad_norm": NaN, "learning_rate": 6.754734008552271e-05, "loss": 0.0, "step": 1388 }, { "epoch": 0.8079104260578741, "grad_norm": NaN, "learning_rate": 6.75032674177566e-05, "loss": 0.0, "step": 1389 }, { "epoch": 0.8084920750327178, "grad_norm": NaN, "learning_rate": 6.745917924590422e-05, "loss": 0.0, "step": 1390 }, { "epoch": 0.8090737240075614, "grad_norm": NaN, "learning_rate": 6.741507560901805e-05, "loss": 0.0, "step": 1391 }, { "epoch": 0.8096553729824051, "grad_norm": NaN, "learning_rate": 6.737095654616438e-05, "loss": 0.0, "step": 1392 }, { "epoch": 0.8102370219572488, "grad_norm": NaN, "learning_rate": 6.732682209642305e-05, "loss": 0.0, "step": 1393 }, { "epoch": 0.8108186709320925, "grad_norm": NaN, "learning_rate": 6.72826722988876e-05, "loss": 0.0, "step": 1394 }, { "epoch": 0.8114003199069362, "grad_norm": NaN, "learning_rate": 6.723850719266514e-05, "loss": 0.0, "step": 1395 }, { "epoch": 0.8119819688817799, "grad_norm": NaN, "learning_rate": 6.719432681687633e-05, "loss": 0.0, "step": 1396 }, { "epoch": 0.8125636178566236, "grad_norm": NaN, "learning_rate": 6.71501312106554e-05, "loss": 0.0, "step": 1397 }, { "epoch": 0.8131452668314673, "grad_norm": NaN, "learning_rate": 6.710592041315001e-05, "loss": 0.0, "step": 1398 }, { "epoch": 0.8137269158063108, "grad_norm": NaN, "learning_rate": 6.706169446352133e-05, "loss": 0.0, "step": 1399 }, { "epoch": 0.8143085647811545, "grad_norm": NaN, "learning_rate": 6.70174534009439e-05, "loss": 0.0, "step": 1400 }, { "epoch": 0.8148902137559982, "grad_norm": NaN, "learning_rate": 6.697319726460572e-05, "loss": 0.0, "step": 1401 }, { "epoch": 0.8154718627308419, "grad_norm": NaN, "learning_rate": 6.692892609370805e-05, "loss": 0.0, "step": 1402 }, { "epoch": 0.8160535117056856, "grad_norm": NaN, "learning_rate": 6.688463992746554e-05, "loss": 0.0, "step": 1403 }, { "epoch": 0.8166351606805293, "grad_norm": NaN, "learning_rate": 6.68403388051061e-05, "loss": 0.0, "step": 1404 }, { "epoch": 0.817216809655373, "grad_norm": NaN, "learning_rate": 6.679602276587084e-05, "loss": 0.0, "step": 1405 }, { "epoch": 0.8177984586302167, "grad_norm": NaN, "learning_rate": 6.675169184901418e-05, "loss": 0.0, "step": 1406 }, { "epoch": 0.8183801076050603, "grad_norm": NaN, "learning_rate": 6.670734609380366e-05, "loss": 0.0, "step": 1407 }, { "epoch": 0.818961756579904, "grad_norm": NaN, "learning_rate": 6.666298553951997e-05, "loss": 0.0, "step": 1408 }, { "epoch": 0.8195434055547477, "grad_norm": NaN, "learning_rate": 6.661861022545685e-05, "loss": 0.0, "step": 1409 }, { "epoch": 0.8201250545295914, "grad_norm": NaN, "learning_rate": 6.657422019092126e-05, "loss": 0.0, "step": 1410 }, { "epoch": 0.8207067035044351, "grad_norm": NaN, "learning_rate": 6.652981547523306e-05, "loss": 0.0, "step": 1411 }, { "epoch": 0.8212883524792788, "grad_norm": NaN, "learning_rate": 6.648539611772518e-05, "loss": 0.0, "step": 1412 }, { "epoch": 0.8218700014541225, "grad_norm": NaN, "learning_rate": 6.644096215774349e-05, "loss": 0.0, "step": 1413 }, { "epoch": 0.8224516504289661, "grad_norm": NaN, "learning_rate": 6.639651363464681e-05, "loss": 0.0, "step": 1414 }, { "epoch": 0.8230332994038098, "grad_norm": NaN, "learning_rate": 6.63520505878069e-05, "loss": 0.0, "step": 1415 }, { "epoch": 0.8236149483786535, "grad_norm": NaN, "learning_rate": 6.63075730566083e-05, "loss": 0.0, "step": 1416 }, { "epoch": 0.8241965973534972, "grad_norm": NaN, "learning_rate": 6.626308108044844e-05, "loss": 0.0, "step": 1417 }, { "epoch": 0.8247782463283408, "grad_norm": NaN, "learning_rate": 6.621857469873753e-05, "loss": 0.0, "step": 1418 }, { "epoch": 0.8253598953031845, "grad_norm": NaN, "learning_rate": 6.617405395089854e-05, "loss": 0.0, "step": 1419 }, { "epoch": 0.8259415442780282, "grad_norm": NaN, "learning_rate": 6.612951887636717e-05, "loss": 0.0, "step": 1420 }, { "epoch": 0.8265231932528719, "grad_norm": NaN, "learning_rate": 6.60849695145918e-05, "loss": 0.0, "step": 1421 }, { "epoch": 0.8271048422277155, "grad_norm": NaN, "learning_rate": 6.604040590503346e-05, "loss": 0.0, "step": 1422 }, { "epoch": 0.8276864912025592, "grad_norm": NaN, "learning_rate": 6.599582808716584e-05, "loss": 0.0, "step": 1423 }, { "epoch": 0.8282681401774029, "grad_norm": NaN, "learning_rate": 6.595123610047513e-05, "loss": 0.0, "step": 1424 }, { "epoch": 0.8288497891522466, "grad_norm": NaN, "learning_rate": 6.59066299844602e-05, "loss": 0.0, "step": 1425 }, { "epoch": 0.8294314381270903, "grad_norm": NaN, "learning_rate": 6.586200977863233e-05, "loss": 0.0, "step": 1426 }, { "epoch": 0.830013087101934, "grad_norm": NaN, "learning_rate": 6.58173755225153e-05, "loss": 0.0, "step": 1427 }, { "epoch": 0.8305947360767777, "grad_norm": NaN, "learning_rate": 6.577272725564535e-05, "loss": 0.0, "step": 1428 }, { "epoch": 0.8311763850516214, "grad_norm": NaN, "learning_rate": 6.572806501757115e-05, "loss": 0.0, "step": 1429 }, { "epoch": 0.831758034026465, "grad_norm": NaN, "learning_rate": 6.568338884785371e-05, "loss": 0.0, "step": 1430 }, { "epoch": 0.8323396830013087, "grad_norm": NaN, "learning_rate": 6.563869878606642e-05, "loss": 0.0, "step": 1431 }, { "epoch": 0.8329213319761524, "grad_norm": NaN, "learning_rate": 6.55939948717949e-05, "loss": 0.0, "step": 1432 }, { "epoch": 0.8335029809509961, "grad_norm": NaN, "learning_rate": 6.554927714463712e-05, "loss": 0.0, "step": 1433 }, { "epoch": 0.8340846299258398, "grad_norm": NaN, "learning_rate": 6.550454564420327e-05, "loss": 0.0, "step": 1434 }, { "epoch": 0.8346662789006835, "grad_norm": NaN, "learning_rate": 6.545980041011571e-05, "loss": 0.0, "step": 1435 }, { "epoch": 0.8352479278755272, "grad_norm": NaN, "learning_rate": 6.541504148200899e-05, "loss": 0.0, "step": 1436 }, { "epoch": 0.8358295768503708, "grad_norm": NaN, "learning_rate": 6.537026889952976e-05, "loss": 0.0, "step": 1437 }, { "epoch": 0.8364112258252144, "grad_norm": NaN, "learning_rate": 6.532548270233679e-05, "loss": 0.0, "step": 1438 }, { "epoch": 0.8369928748000581, "grad_norm": NaN, "learning_rate": 6.528068293010094e-05, "loss": 0.0, "step": 1439 }, { "epoch": 0.8375745237749018, "grad_norm": NaN, "learning_rate": 6.523586962250502e-05, "loss": 0.0, "step": 1440 }, { "epoch": 0.8381561727497455, "grad_norm": NaN, "learning_rate": 6.519104281924388e-05, "loss": 0.0, "step": 1441 }, { "epoch": 0.8387378217245892, "grad_norm": NaN, "learning_rate": 6.514620256002433e-05, "loss": 0.0, "step": 1442 }, { "epoch": 0.8393194706994329, "grad_norm": NaN, "learning_rate": 6.510134888456505e-05, "loss": 0.0, "step": 1443 }, { "epoch": 0.8399011196742766, "grad_norm": NaN, "learning_rate": 6.505648183259667e-05, "loss": 0.0, "step": 1444 }, { "epoch": 0.8404827686491203, "grad_norm": NaN, "learning_rate": 6.501160144386162e-05, "loss": 0.0, "step": 1445 }, { "epoch": 0.8410644176239639, "grad_norm": NaN, "learning_rate": 6.496670775811417e-05, "loss": 0.0, "step": 1446 }, { "epoch": 0.8416460665988076, "grad_norm": NaN, "learning_rate": 6.492180081512034e-05, "loss": 0.0, "step": 1447 }, { "epoch": 0.8422277155736513, "grad_norm": NaN, "learning_rate": 6.487688065465792e-05, "loss": 0.0, "step": 1448 }, { "epoch": 0.842809364548495, "grad_norm": NaN, "learning_rate": 6.483194731651639e-05, "loss": 0.0, "step": 1449 }, { "epoch": 0.8433910135233387, "grad_norm": NaN, "learning_rate": 6.478700084049692e-05, "loss": 0.0, "step": 1450 }, { "epoch": 0.8439726624981824, "grad_norm": NaN, "learning_rate": 6.47420412664123e-05, "loss": 0.0, "step": 1451 }, { "epoch": 0.8445543114730261, "grad_norm": NaN, "learning_rate": 6.46970686340869e-05, "loss": 0.0, "step": 1452 }, { "epoch": 0.8451359604478698, "grad_norm": NaN, "learning_rate": 6.465208298335676e-05, "loss": 0.0, "step": 1453 }, { "epoch": 0.8457176094227133, "grad_norm": NaN, "learning_rate": 6.460708435406933e-05, "loss": 0.0, "step": 1454 }, { "epoch": 0.846299258397557, "grad_norm": NaN, "learning_rate": 6.456207278608361e-05, "loss": 0.0, "step": 1455 }, { "epoch": 0.8468809073724007, "grad_norm": NaN, "learning_rate": 6.451704831927005e-05, "loss": 0.0, "step": 1456 }, { "epoch": 0.8474625563472444, "grad_norm": NaN, "learning_rate": 6.447201099351052e-05, "loss": 0.0, "step": 1457 }, { "epoch": 0.8480442053220881, "grad_norm": NaN, "learning_rate": 6.442696084869833e-05, "loss": 0.0, "step": 1458 }, { "epoch": 0.8486258542969318, "grad_norm": NaN, "learning_rate": 6.438189792473808e-05, "loss": 0.0, "step": 1459 }, { "epoch": 0.8492075032717755, "grad_norm": NaN, "learning_rate": 6.43368222615457e-05, "loss": 0.0, "step": 1460 }, { "epoch": 0.8497891522466192, "grad_norm": NaN, "learning_rate": 6.429173389904844e-05, "loss": 0.0, "step": 1461 }, { "epoch": 0.8503708012214628, "grad_norm": NaN, "learning_rate": 6.424663287718479e-05, "loss": 0.0, "step": 1462 }, { "epoch": 0.8509524501963065, "grad_norm": NaN, "learning_rate": 6.420151923590441e-05, "loss": 0.0, "step": 1463 }, { "epoch": 0.8515340991711502, "grad_norm": NaN, "learning_rate": 6.41563930151682e-05, "loss": 0.0, "step": 1464 }, { "epoch": 0.8521157481459939, "grad_norm": NaN, "learning_rate": 6.411125425494815e-05, "loss": 0.0, "step": 1465 }, { "epoch": 0.8526973971208376, "grad_norm": NaN, "learning_rate": 6.406610299522737e-05, "loss": 0.0, "step": 1466 }, { "epoch": 0.8532790460956813, "grad_norm": NaN, "learning_rate": 6.402093927600009e-05, "loss": 0.0, "step": 1467 }, { "epoch": 0.853860695070525, "grad_norm": NaN, "learning_rate": 6.397576313727151e-05, "loss": 0.0, "step": 1468 }, { "epoch": 0.8544423440453687, "grad_norm": NaN, "learning_rate": 6.393057461905788e-05, "loss": 0.0, "step": 1469 }, { "epoch": 0.8550239930202123, "grad_norm": NaN, "learning_rate": 6.388537376138636e-05, "loss": 0.0, "step": 1470 }, { "epoch": 0.855605641995056, "grad_norm": NaN, "learning_rate": 6.38401606042951e-05, "loss": 0.0, "step": 1471 }, { "epoch": 0.8561872909698997, "grad_norm": NaN, "learning_rate": 6.379493518783312e-05, "loss": 0.0, "step": 1472 }, { "epoch": 0.8567689399447433, "grad_norm": NaN, "learning_rate": 6.374969755206028e-05, "loss": 0.0, "step": 1473 }, { "epoch": 0.857350588919587, "grad_norm": NaN, "learning_rate": 6.37044477370473e-05, "loss": 0.0, "step": 1474 }, { "epoch": 0.8579322378944307, "grad_norm": NaN, "learning_rate": 6.365918578287565e-05, "loss": 0.0, "step": 1475 }, { "epoch": 0.8585138868692744, "grad_norm": NaN, "learning_rate": 6.36139117296376e-05, "loss": 0.0, "step": 1476 }, { "epoch": 0.8590955358441181, "grad_norm": NaN, "learning_rate": 6.35686256174361e-05, "loss": 0.0, "step": 1477 }, { "epoch": 0.8596771848189617, "grad_norm": NaN, "learning_rate": 6.352332748638477e-05, "loss": 0.0, "step": 1478 }, { "epoch": 0.8602588337938054, "grad_norm": NaN, "learning_rate": 6.347801737660793e-05, "loss": 0.0, "step": 1479 }, { "epoch": 0.8608404827686491, "grad_norm": NaN, "learning_rate": 6.343269532824043e-05, "loss": 0.0, "step": 1480 }, { "epoch": 0.8614221317434928, "grad_norm": NaN, "learning_rate": 6.33873613814278e-05, "loss": 0.0, "step": 1481 }, { "epoch": 0.8620037807183365, "grad_norm": NaN, "learning_rate": 6.334201557632601e-05, "loss": 0.0, "step": 1482 }, { "epoch": 0.8625854296931802, "grad_norm": NaN, "learning_rate": 6.329665795310163e-05, "loss": 0.0, "step": 1483 }, { "epoch": 0.8631670786680239, "grad_norm": NaN, "learning_rate": 6.325128855193157e-05, "loss": 0.0, "step": 1484 }, { "epoch": 0.8637487276428675, "grad_norm": NaN, "learning_rate": 6.32059074130033e-05, "loss": 0.0, "step": 1485 }, { "epoch": 0.8643303766177112, "grad_norm": NaN, "learning_rate": 6.316051457651462e-05, "loss": 0.0, "step": 1486 }, { "epoch": 0.8649120255925549, "grad_norm": NaN, "learning_rate": 6.31151100826737e-05, "loss": 0.0, "step": 1487 }, { "epoch": 0.8654936745673986, "grad_norm": NaN, "learning_rate": 6.306969397169905e-05, "loss": 0.0, "step": 1488 }, { "epoch": 0.8660753235422423, "grad_norm": NaN, "learning_rate": 6.302426628381944e-05, "loss": 0.0, "step": 1489 }, { "epoch": 0.866656972517086, "grad_norm": NaN, "learning_rate": 6.297882705927396e-05, "loss": 0.0, "step": 1490 }, { "epoch": 0.8672386214919297, "grad_norm": NaN, "learning_rate": 6.293337633831185e-05, "loss": 0.0, "step": 1491 }, { "epoch": 0.8678202704667733, "grad_norm": NaN, "learning_rate": 6.288791416119255e-05, "loss": 0.0, "step": 1492 }, { "epoch": 0.8684019194416169, "grad_norm": NaN, "learning_rate": 6.284244056818567e-05, "loss": 0.0, "step": 1493 }, { "epoch": 0.8689835684164606, "grad_norm": NaN, "learning_rate": 6.279695559957091e-05, "loss": 0.0, "step": 1494 }, { "epoch": 0.8695652173913043, "grad_norm": NaN, "learning_rate": 6.275145929563806e-05, "loss": 0.0, "step": 1495 }, { "epoch": 0.870146866366148, "grad_norm": NaN, "learning_rate": 6.270595169668694e-05, "loss": 0.0, "step": 1496 }, { "epoch": 0.8707285153409917, "grad_norm": NaN, "learning_rate": 6.266043284302741e-05, "loss": 0.0, "step": 1497 }, { "epoch": 0.8713101643158354, "grad_norm": NaN, "learning_rate": 6.261490277497921e-05, "loss": 0.0, "step": 1498 }, { "epoch": 0.8718918132906791, "grad_norm": NaN, "learning_rate": 6.256936153287212e-05, "loss": 0.0, "step": 1499 }, { "epoch": 0.8724734622655228, "grad_norm": NaN, "learning_rate": 6.252380915704577e-05, "loss": 0.0, "step": 1500 }, { "epoch": 0.8730551112403664, "grad_norm": NaN, "learning_rate": 6.247824568784963e-05, "loss": 0.0, "step": 1501 }, { "epoch": 0.8736367602152101, "grad_norm": NaN, "learning_rate": 6.243267116564304e-05, "loss": 0.0, "step": 1502 }, { "epoch": 0.8742184091900538, "grad_norm": NaN, "learning_rate": 6.23870856307951e-05, "loss": 0.0, "step": 1503 }, { "epoch": 0.8748000581648975, "grad_norm": NaN, "learning_rate": 6.234148912368468e-05, "loss": 0.0, "step": 1504 }, { "epoch": 0.8753817071397412, "grad_norm": NaN, "learning_rate": 6.229588168470035e-05, "loss": 0.0, "step": 1505 }, { "epoch": 0.8759633561145849, "grad_norm": NaN, "learning_rate": 6.225026335424037e-05, "loss": 0.0, "step": 1506 }, { "epoch": 0.8765450050894286, "grad_norm": NaN, "learning_rate": 6.22046341727127e-05, "loss": 0.0, "step": 1507 }, { "epoch": 0.8771266540642723, "grad_norm": NaN, "learning_rate": 6.215899418053482e-05, "loss": 0.0, "step": 1508 }, { "epoch": 0.8777083030391158, "grad_norm": NaN, "learning_rate": 6.211334341813386e-05, "loss": 0.0, "step": 1509 }, { "epoch": 0.8782899520139595, "grad_norm": NaN, "learning_rate": 6.206768192594644e-05, "loss": 0.0, "step": 1510 }, { "epoch": 0.8788716009888032, "grad_norm": NaN, "learning_rate": 6.202200974441873e-05, "loss": 0.0, "step": 1511 }, { "epoch": 0.8794532499636469, "grad_norm": NaN, "learning_rate": 6.197632691400631e-05, "loss": 0.0, "step": 1512 }, { "epoch": 0.8800348989384906, "grad_norm": NaN, "learning_rate": 6.193063347517427e-05, "loss": 0.0, "step": 1513 }, { "epoch": 0.8806165479133343, "grad_norm": NaN, "learning_rate": 6.188492946839703e-05, "loss": 0.0, "step": 1514 }, { "epoch": 0.881198196888178, "grad_norm": NaN, "learning_rate": 6.183921493415839e-05, "loss": 0.0, "step": 1515 }, { "epoch": 0.8817798458630217, "grad_norm": NaN, "learning_rate": 6.17934899129515e-05, "loss": 0.0, "step": 1516 }, { "epoch": 0.8823614948378653, "grad_norm": NaN, "learning_rate": 6.174775444527876e-05, "loss": 0.0, "step": 1517 }, { "epoch": 0.882943143812709, "grad_norm": NaN, "learning_rate": 6.170200857165184e-05, "loss": 0.0, "step": 1518 }, { "epoch": 0.8835247927875527, "grad_norm": NaN, "learning_rate": 6.165625233259165e-05, "loss": 0.0, "step": 1519 }, { "epoch": 0.8841064417623964, "grad_norm": NaN, "learning_rate": 6.161048576862825e-05, "loss": 0.0, "step": 1520 }, { "epoch": 0.8846880907372401, "grad_norm": NaN, "learning_rate": 6.156470892030085e-05, "loss": 0.0, "step": 1521 }, { "epoch": 0.8852697397120838, "grad_norm": NaN, "learning_rate": 6.15189218281578e-05, "loss": 0.0, "step": 1522 }, { "epoch": 0.8858513886869275, "grad_norm": NaN, "learning_rate": 6.147312453275646e-05, "loss": 0.0, "step": 1523 }, { "epoch": 0.8864330376617712, "grad_norm": NaN, "learning_rate": 6.142731707466333e-05, "loss": 0.0, "step": 1524 }, { "epoch": 0.8870146866366148, "grad_norm": NaN, "learning_rate": 6.13814994944538e-05, "loss": 0.0, "step": 1525 }, { "epoch": 0.8875963356114585, "grad_norm": NaN, "learning_rate": 6.133567183271227e-05, "loss": 0.0, "step": 1526 }, { "epoch": 0.8881779845863021, "grad_norm": NaN, "learning_rate": 6.128983413003212e-05, "loss": 0.0, "step": 1527 }, { "epoch": 0.8887596335611458, "grad_norm": NaN, "learning_rate": 6.124398642701555e-05, "loss": 0.0, "step": 1528 }, { "epoch": 0.8893412825359895, "grad_norm": NaN, "learning_rate": 6.119812876427365e-05, "loss": 0.0, "step": 1529 }, { "epoch": 0.8899229315108332, "grad_norm": NaN, "learning_rate": 6.115226118242635e-05, "loss": 0.0, "step": 1530 }, { "epoch": 0.8905045804856769, "grad_norm": NaN, "learning_rate": 6.110638372210232e-05, "loss": 0.0, "step": 1531 }, { "epoch": 0.8910862294605206, "grad_norm": NaN, "learning_rate": 6.106049642393903e-05, "loss": 0.0, "step": 1532 }, { "epoch": 0.8916678784353642, "grad_norm": NaN, "learning_rate": 6.1014599328582634e-05, "loss": 0.0, "step": 1533 }, { "epoch": 0.8922495274102079, "grad_norm": NaN, "learning_rate": 6.096869247668796e-05, "loss": 0.0, "step": 1534 }, { "epoch": 0.8928311763850516, "grad_norm": NaN, "learning_rate": 6.092277590891849e-05, "loss": 0.0, "step": 1535 }, { "epoch": 0.8934128253598953, "grad_norm": NaN, "learning_rate": 6.087684966594634e-05, "loss": 0.0, "step": 1536 }, { "epoch": 0.893994474334739, "grad_norm": NaN, "learning_rate": 6.083091378845215e-05, "loss": 0.0, "step": 1537 }, { "epoch": 0.8945761233095827, "grad_norm": NaN, "learning_rate": 6.0784968317125104e-05, "loss": 0.0, "step": 1538 }, { "epoch": 0.8951577722844264, "grad_norm": NaN, "learning_rate": 6.073901329266289e-05, "loss": 0.0, "step": 1539 }, { "epoch": 0.8957394212592701, "grad_norm": NaN, "learning_rate": 6.0693048755771674e-05, "loss": 0.0, "step": 1540 }, { "epoch": 0.8963210702341137, "grad_norm": NaN, "learning_rate": 6.0647074747166034e-05, "loss": 0.0, "step": 1541 }, { "epoch": 0.8969027192089574, "grad_norm": NaN, "learning_rate": 6.0601091307568926e-05, "loss": 0.0, "step": 1542 }, { "epoch": 0.8974843681838011, "grad_norm": NaN, "learning_rate": 6.055509847771168e-05, "loss": 0.0, "step": 1543 }, { "epoch": 0.8980660171586448, "grad_norm": NaN, "learning_rate": 6.0509096298333944e-05, "loss": 0.0, "step": 1544 }, { "epoch": 0.8986476661334885, "grad_norm": NaN, "learning_rate": 6.046308481018361e-05, "loss": 0.0, "step": 1545 }, { "epoch": 0.8992293151083322, "grad_norm": NaN, "learning_rate": 6.041706405401687e-05, "loss": 0.0, "step": 1546 }, { "epoch": 0.8998109640831758, "grad_norm": NaN, "learning_rate": 6.03710340705981e-05, "loss": 0.0, "step": 1547 }, { "epoch": 0.9003926130580194, "grad_norm": NaN, "learning_rate": 6.0324994900699836e-05, "loss": 0.0, "step": 1548 }, { "epoch": 0.9009742620328631, "grad_norm": NaN, "learning_rate": 6.027894658510277e-05, "loss": 0.0, "step": 1549 }, { "epoch": 0.9015559110077068, "grad_norm": NaN, "learning_rate": 6.0232889164595694e-05, "loss": 0.0, "step": 1550 }, { "epoch": 0.9021375599825505, "grad_norm": NaN, "learning_rate": 6.018682267997546e-05, "loss": 0.0, "step": 1551 }, { "epoch": 0.9027192089573942, "grad_norm": NaN, "learning_rate": 6.014074717204694e-05, "loss": 0.0, "step": 1552 }, { "epoch": 0.9033008579322379, "grad_norm": NaN, "learning_rate": 6.009466268162303e-05, "loss": 0.0, "step": 1553 }, { "epoch": 0.9038825069070816, "grad_norm": NaN, "learning_rate": 6.0048569249524524e-05, "loss": 0.0, "step": 1554 }, { "epoch": 0.9044641558819253, "grad_norm": NaN, "learning_rate": 6.00024669165802e-05, "loss": 0.0, "step": 1555 }, { "epoch": 0.9050458048567689, "grad_norm": NaN, "learning_rate": 5.99563557236267e-05, "loss": 0.0, "step": 1556 }, { "epoch": 0.9056274538316126, "grad_norm": NaN, "learning_rate": 5.9910235711508486e-05, "loss": 0.0, "step": 1557 }, { "epoch": 0.9062091028064563, "grad_norm": NaN, "learning_rate": 5.986410692107787e-05, "loss": 0.0, "step": 1558 }, { "epoch": 0.9067907517813, "grad_norm": NaN, "learning_rate": 5.981796939319491e-05, "loss": 0.0, "step": 1559 }, { "epoch": 0.9073724007561437, "grad_norm": NaN, "learning_rate": 5.977182316872742e-05, "loss": 0.0, "step": 1560 }, { "epoch": 0.9079540497309874, "grad_norm": NaN, "learning_rate": 5.972566828855092e-05, "loss": 0.0, "step": 1561 }, { "epoch": 0.9085356987058311, "grad_norm": NaN, "learning_rate": 5.967950479354858e-05, "loss": 0.0, "step": 1562 }, { "epoch": 0.9091173476806748, "grad_norm": NaN, "learning_rate": 5.963333272461121e-05, "loss": 0.0, "step": 1563 }, { "epoch": 0.9096989966555183, "grad_norm": NaN, "learning_rate": 5.958715212263724e-05, "loss": 0.0, "step": 1564 }, { "epoch": 0.910280645630362, "grad_norm": NaN, "learning_rate": 5.954096302853261e-05, "loss": 0.0, "step": 1565 }, { "epoch": 0.9108622946052057, "grad_norm": NaN, "learning_rate": 5.9494765483210825e-05, "loss": 0.0, "step": 1566 }, { "epoch": 0.9114439435800494, "grad_norm": NaN, "learning_rate": 5.944855952759285e-05, "loss": 0.0, "step": 1567 }, { "epoch": 0.9120255925548931, "grad_norm": NaN, "learning_rate": 5.940234520260708e-05, "loss": 0.0, "step": 1568 }, { "epoch": 0.9126072415297368, "grad_norm": NaN, "learning_rate": 5.9356122549189396e-05, "loss": 0.0, "step": 1569 }, { "epoch": 0.9131888905045805, "grad_norm": NaN, "learning_rate": 5.9309891608283e-05, "loss": 0.0, "step": 1570 }, { "epoch": 0.9137705394794242, "grad_norm": NaN, "learning_rate": 5.926365242083842e-05, "loss": 0.0, "step": 1571 }, { "epoch": 0.9143521884542678, "grad_norm": NaN, "learning_rate": 5.921740502781353e-05, "loss": 0.0, "step": 1572 }, { "epoch": 0.9149338374291115, "grad_norm": NaN, "learning_rate": 5.917114947017347e-05, "loss": 0.0, "step": 1573 }, { "epoch": 0.9155154864039552, "grad_norm": NaN, "learning_rate": 5.912488578889059e-05, "loss": 0.0, "step": 1574 }, { "epoch": 0.9160971353787989, "grad_norm": NaN, "learning_rate": 5.907861402494445e-05, "loss": 0.0, "step": 1575 }, { "epoch": 0.9166787843536426, "grad_norm": NaN, "learning_rate": 5.903233421932174e-05, "loss": 0.0, "step": 1576 }, { "epoch": 0.9172604333284863, "grad_norm": NaN, "learning_rate": 5.898604641301631e-05, "loss": 0.0, "step": 1577 }, { "epoch": 0.91784208230333, "grad_norm": NaN, "learning_rate": 5.89397506470291e-05, "loss": 0.0, "step": 1578 }, { "epoch": 0.9184237312781737, "grad_norm": NaN, "learning_rate": 5.889344696236807e-05, "loss": 0.0, "step": 1579 }, { "epoch": 0.9190053802530173, "grad_norm": NaN, "learning_rate": 5.884713540004822e-05, "loss": 0.0, "step": 1580 }, { "epoch": 0.919587029227861, "grad_norm": NaN, "learning_rate": 5.8800816001091505e-05, "loss": 0.0, "step": 1581 }, { "epoch": 0.9201686782027046, "grad_norm": NaN, "learning_rate": 5.875448880652683e-05, "loss": 0.0, "step": 1582 }, { "epoch": 0.9207503271775483, "grad_norm": NaN, "learning_rate": 5.8708153857390025e-05, "loss": 0.0, "step": 1583 }, { "epoch": 0.921331976152392, "grad_norm": NaN, "learning_rate": 5.866181119472377e-05, "loss": 0.0, "step": 1584 }, { "epoch": 0.9219136251272357, "grad_norm": NaN, "learning_rate": 5.8615460859577565e-05, "loss": 0.0, "step": 1585 }, { "epoch": 0.9224952741020794, "grad_norm": NaN, "learning_rate": 5.8569102893007734e-05, "loss": 0.0, "step": 1586 }, { "epoch": 0.9230769230769231, "grad_norm": NaN, "learning_rate": 5.8522737336077346e-05, "loss": 0.0, "step": 1587 }, { "epoch": 0.9236585720517667, "grad_norm": NaN, "learning_rate": 5.8476364229856196e-05, "loss": 0.0, "step": 1588 }, { "epoch": 0.9242402210266104, "grad_norm": NaN, "learning_rate": 5.842998361542077e-05, "loss": 0.0, "step": 1589 }, { "epoch": 0.9248218700014541, "grad_norm": NaN, "learning_rate": 5.8383595533854185e-05, "loss": 0.0, "step": 1590 }, { "epoch": 0.9254035189762978, "grad_norm": NaN, "learning_rate": 5.83372000262462e-05, "loss": 0.0, "step": 1591 }, { "epoch": 0.9259851679511415, "grad_norm": NaN, "learning_rate": 5.829079713369313e-05, "loss": 0.0, "step": 1592 }, { "epoch": 0.9265668169259852, "grad_norm": NaN, "learning_rate": 5.824438689729785e-05, "loss": 0.0, "step": 1593 }, { "epoch": 0.9271484659008289, "grad_norm": NaN, "learning_rate": 5.8197969358169734e-05, "loss": 0.0, "step": 1594 }, { "epoch": 0.9277301148756726, "grad_norm": NaN, "learning_rate": 5.815154455742462e-05, "loss": 0.0, "step": 1595 }, { "epoch": 0.9283117638505162, "grad_norm": NaN, "learning_rate": 5.8105112536184744e-05, "loss": 0.0, "step": 1596 }, { "epoch": 0.9288934128253599, "grad_norm": NaN, "learning_rate": 5.805867333557883e-05, "loss": 0.0, "step": 1597 }, { "epoch": 0.9294750618002036, "grad_norm": NaN, "learning_rate": 5.801222699674187e-05, "loss": 0.0, "step": 1598 }, { "epoch": 0.9300567107750473, "grad_norm": NaN, "learning_rate": 5.796577356081523e-05, "loss": 0.0, "step": 1599 }, { "epoch": 0.930638359749891, "grad_norm": NaN, "learning_rate": 5.7919313068946525e-05, "loss": 0.0, "step": 1600 }, { "epoch": 0.9312200087247346, "grad_norm": NaN, "learning_rate": 5.7872845562289665e-05, "loss": 0.0, "step": 1601 }, { "epoch": 0.9318016576995783, "grad_norm": NaN, "learning_rate": 5.7826371082004736e-05, "loss": 0.0, "step": 1602 }, { "epoch": 0.932383306674422, "grad_norm": NaN, "learning_rate": 5.777988966925803e-05, "loss": 0.0, "step": 1603 }, { "epoch": 0.9329649556492656, "grad_norm": NaN, "learning_rate": 5.773340136522194e-05, "loss": 0.0, "step": 1604 }, { "epoch": 0.9335466046241093, "grad_norm": NaN, "learning_rate": 5.768690621107499e-05, "loss": 0.0, "step": 1605 }, { "epoch": 0.934128253598953, "grad_norm": NaN, "learning_rate": 5.764040424800179e-05, "loss": 0.0, "step": 1606 }, { "epoch": 0.9347099025737967, "grad_norm": NaN, "learning_rate": 5.7593895517192955e-05, "loss": 0.0, "step": 1607 }, { "epoch": 0.9352915515486404, "grad_norm": NaN, "learning_rate": 5.754738005984509e-05, "loss": 0.0, "step": 1608 }, { "epoch": 0.9358732005234841, "grad_norm": NaN, "learning_rate": 5.7500857917160776e-05, "loss": 0.0, "step": 1609 }, { "epoch": 0.9364548494983278, "grad_norm": NaN, "learning_rate": 5.745432913034851e-05, "loss": 0.0, "step": 1610 }, { "epoch": 0.9370364984731714, "grad_norm": NaN, "learning_rate": 5.740779374062266e-05, "loss": 0.0, "step": 1611 }, { "epoch": 0.9376181474480151, "grad_norm": NaN, "learning_rate": 5.7361251789203475e-05, "loss": 0.0, "step": 1612 }, { "epoch": 0.9381997964228588, "grad_norm": NaN, "learning_rate": 5.731470331731698e-05, "loss": 0.0, "step": 1613 }, { "epoch": 0.9387814453977025, "grad_norm": NaN, "learning_rate": 5.726814836619498e-05, "loss": 0.0, "step": 1614 }, { "epoch": 0.9393630943725462, "grad_norm": NaN, "learning_rate": 5.722158697707508e-05, "loss": 0.0, "step": 1615 }, { "epoch": 0.9399447433473899, "grad_norm": NaN, "learning_rate": 5.7175019191200496e-05, "loss": 0.0, "step": 1616 }, { "epoch": 0.9405263923222336, "grad_norm": NaN, "learning_rate": 5.712844504982018e-05, "loss": 0.0, "step": 1617 }, { "epoch": 0.9411080412970773, "grad_norm": NaN, "learning_rate": 5.708186459418866e-05, "loss": 0.0, "step": 1618 }, { "epoch": 0.9416896902719208, "grad_norm": NaN, "learning_rate": 5.703527786556609e-05, "loss": 0.0, "step": 1619 }, { "epoch": 0.9422713392467645, "grad_norm": NaN, "learning_rate": 5.698868490521819e-05, "loss": 0.0, "step": 1620 }, { "epoch": 0.9428529882216082, "grad_norm": NaN, "learning_rate": 5.694208575441619e-05, "loss": 0.0, "step": 1621 }, { "epoch": 0.9434346371964519, "grad_norm": NaN, "learning_rate": 5.6895480454436764e-05, "loss": 0.0, "step": 1622 }, { "epoch": 0.9440162861712956, "grad_norm": NaN, "learning_rate": 5.684886904656209e-05, "loss": 0.0, "step": 1623 }, { "epoch": 0.9445979351461393, "grad_norm": NaN, "learning_rate": 5.680225157207973e-05, "loss": 0.0, "step": 1624 }, { "epoch": 0.945179584120983, "grad_norm": NaN, "learning_rate": 5.675562807228264e-05, "loss": 0.0, "step": 1625 }, { "epoch": 0.9457612330958267, "grad_norm": NaN, "learning_rate": 5.670899858846906e-05, "loss": 0.0, "step": 1626 }, { "epoch": 0.9463428820706703, "grad_norm": NaN, "learning_rate": 5.666236316194259e-05, "loss": 0.0, "step": 1627 }, { "epoch": 0.946924531045514, "grad_norm": NaN, "learning_rate": 5.661572183401206e-05, "loss": 0.0, "step": 1628 }, { "epoch": 0.9475061800203577, "grad_norm": NaN, "learning_rate": 5.656907464599155e-05, "loss": 0.0, "step": 1629 }, { "epoch": 0.9480878289952014, "grad_norm": NaN, "learning_rate": 5.65224216392003e-05, "loss": 0.0, "step": 1630 }, { "epoch": 0.9486694779700451, "grad_norm": NaN, "learning_rate": 5.647576285496274e-05, "loss": 0.0, "step": 1631 }, { "epoch": 0.9492511269448888, "grad_norm": NaN, "learning_rate": 5.6429098334608385e-05, "loss": 0.0, "step": 1632 }, { "epoch": 0.9498327759197325, "grad_norm": NaN, "learning_rate": 5.638242811947185e-05, "loss": 0.0, "step": 1633 }, { "epoch": 0.9504144248945762, "grad_norm": NaN, "learning_rate": 5.633575225089282e-05, "loss": 0.0, "step": 1634 }, { "epoch": 0.9509960738694198, "grad_norm": NaN, "learning_rate": 5.62890707702159e-05, "loss": 0.0, "step": 1635 }, { "epoch": 0.9515777228442635, "grad_norm": NaN, "learning_rate": 5.624238371879078e-05, "loss": 0.0, "step": 1636 }, { "epoch": 0.9521593718191071, "grad_norm": NaN, "learning_rate": 5.619569113797199e-05, "loss": 0.0, "step": 1637 }, { "epoch": 0.9527410207939508, "grad_norm": NaN, "learning_rate": 5.6148993069119015e-05, "loss": 0.0, "step": 1638 }, { "epoch": 0.9533226697687945, "grad_norm": NaN, "learning_rate": 5.610228955359619e-05, "loss": 0.0, "step": 1639 }, { "epoch": 0.9539043187436382, "grad_norm": NaN, "learning_rate": 5.605558063277264e-05, "loss": 0.0, "step": 1640 }, { "epoch": 0.9544859677184819, "grad_norm": NaN, "learning_rate": 5.600886634802234e-05, "loss": 0.0, "step": 1641 }, { "epoch": 0.9550676166933256, "grad_norm": NaN, "learning_rate": 5.596214674072394e-05, "loss": 0.0, "step": 1642 }, { "epoch": 0.9556492656681692, "grad_norm": NaN, "learning_rate": 5.591542185226088e-05, "loss": 0.0, "step": 1643 }, { "epoch": 0.9562309146430129, "grad_norm": NaN, "learning_rate": 5.586869172402124e-05, "loss": 0.0, "step": 1644 }, { "epoch": 0.9568125636178566, "grad_norm": NaN, "learning_rate": 5.582195639739773e-05, "loss": 0.0, "step": 1645 }, { "epoch": 0.9573942125927003, "grad_norm": NaN, "learning_rate": 5.5775215913787704e-05, "loss": 0.0, "step": 1646 }, { "epoch": 0.957975861567544, "grad_norm": NaN, "learning_rate": 5.5728470314593015e-05, "loss": 0.0, "step": 1647 }, { "epoch": 0.9585575105423877, "grad_norm": NaN, "learning_rate": 5.568171964122014e-05, "loss": 0.0, "step": 1648 }, { "epoch": 0.9591391595172314, "grad_norm": NaN, "learning_rate": 5.5634963935079986e-05, "loss": 0.0, "step": 1649 }, { "epoch": 0.9597208084920751, "grad_norm": NaN, "learning_rate": 5.558820323758793e-05, "loss": 0.0, "step": 1650 }, { "epoch": 0.9603024574669187, "grad_norm": NaN, "learning_rate": 5.5541437590163766e-05, "loss": 0.0, "step": 1651 }, { "epoch": 0.9608841064417624, "grad_norm": NaN, "learning_rate": 5.549466703423172e-05, "loss": 0.0, "step": 1652 }, { "epoch": 0.9614657554166061, "grad_norm": NaN, "learning_rate": 5.544789161122028e-05, "loss": 0.0, "step": 1653 }, { "epoch": 0.9620474043914498, "grad_norm": NaN, "learning_rate": 5.540111136256232e-05, "loss": 0.0, "step": 1654 }, { "epoch": 0.9626290533662935, "grad_norm": NaN, "learning_rate": 5.535432632969496e-05, "loss": 0.0, "step": 1655 }, { "epoch": 0.9632107023411371, "grad_norm": NaN, "learning_rate": 5.5307536554059535e-05, "loss": 0.0, "step": 1656 }, { "epoch": 0.9637923513159808, "grad_norm": NaN, "learning_rate": 5.5260742077101634e-05, "loss": 0.0, "step": 1657 }, { "epoch": 0.9643740002908245, "grad_norm": NaN, "learning_rate": 5.521394294027098e-05, "loss": 0.0, "step": 1658 }, { "epoch": 0.9649556492656681, "grad_norm": NaN, "learning_rate": 5.516713918502141e-05, "loss": 0.0, "step": 1659 }, { "epoch": 0.9655372982405118, "grad_norm": NaN, "learning_rate": 5.5120330852810874e-05, "loss": 0.0, "step": 1660 }, { "epoch": 0.9661189472153555, "grad_norm": NaN, "learning_rate": 5.5073517985101364e-05, "loss": 0.0, "step": 1661 }, { "epoch": 0.9667005961901992, "grad_norm": NaN, "learning_rate": 5.5026700623358894e-05, "loss": 0.0, "step": 1662 }, { "epoch": 0.9672822451650429, "grad_norm": NaN, "learning_rate": 5.497987880905347e-05, "loss": 0.0, "step": 1663 }, { "epoch": 0.9678638941398866, "grad_norm": NaN, "learning_rate": 5.493305258365903e-05, "loss": 0.0, "step": 1664 }, { "epoch": 0.9684455431147303, "grad_norm": NaN, "learning_rate": 5.4886221988653394e-05, "loss": 0.0, "step": 1665 }, { "epoch": 0.969027192089574, "grad_norm": NaN, "learning_rate": 5.4839387065518324e-05, "loss": 0.0, "step": 1666 }, { "epoch": 0.9696088410644176, "grad_norm": NaN, "learning_rate": 5.479254785573934e-05, "loss": 0.0, "step": 1667 }, { "epoch": 0.9701904900392613, "grad_norm": NaN, "learning_rate": 5.47457044008058e-05, "loss": 0.0, "step": 1668 }, { "epoch": 0.970772139014105, "grad_norm": NaN, "learning_rate": 5.46988567422108e-05, "loss": 0.0, "step": 1669 }, { "epoch": 0.9713537879889487, "grad_norm": NaN, "learning_rate": 5.465200492145117e-05, "loss": 0.0, "step": 1670 }, { "epoch": 0.9719354369637924, "grad_norm": NaN, "learning_rate": 5.4605148980027455e-05, "loss": 0.0, "step": 1671 }, { "epoch": 0.9725170859386361, "grad_norm": NaN, "learning_rate": 5.4558288959443804e-05, "loss": 0.0, "step": 1672 }, { "epoch": 0.9730987349134798, "grad_norm": NaN, "learning_rate": 5.451142490120801e-05, "loss": 0.0, "step": 1673 }, { "epoch": 0.9736803838883233, "grad_norm": NaN, "learning_rate": 5.446455684683141e-05, "loss": 0.0, "step": 1674 }, { "epoch": 0.974262032863167, "grad_norm": NaN, "learning_rate": 5.441768483782892e-05, "loss": 0.0, "step": 1675 }, { "epoch": 0.9748436818380107, "grad_norm": NaN, "learning_rate": 5.437080891571893e-05, "loss": 0.0, "step": 1676 }, { "epoch": 0.9754253308128544, "grad_norm": NaN, "learning_rate": 5.432392912202332e-05, "loss": 0.0, "step": 1677 }, { "epoch": 0.9760069797876981, "grad_norm": NaN, "learning_rate": 5.427704549826739e-05, "loss": 0.0, "step": 1678 }, { "epoch": 0.9765886287625418, "grad_norm": NaN, "learning_rate": 5.423015808597979e-05, "loss": 0.0, "step": 1679 }, { "epoch": 0.9771702777373855, "grad_norm": NaN, "learning_rate": 5.418326692669261e-05, "loss": 0.0, "step": 1680 }, { "epoch": 0.9777519267122292, "grad_norm": NaN, "learning_rate": 5.41363720619412e-05, "loss": 0.0, "step": 1681 }, { "epoch": 0.9783335756870728, "grad_norm": NaN, "learning_rate": 5.408947353326419e-05, "loss": 0.0, "step": 1682 }, { "epoch": 0.9789152246619165, "grad_norm": NaN, "learning_rate": 5.40425713822035e-05, "loss": 0.0, "step": 1683 }, { "epoch": 0.9794968736367602, "grad_norm": NaN, "learning_rate": 5.399566565030419e-05, "loss": 0.0, "step": 1684 }, { "epoch": 0.9800785226116039, "grad_norm": NaN, "learning_rate": 5.3948756379114573e-05, "loss": 0.0, "step": 1685 }, { "epoch": 0.9806601715864476, "grad_norm": NaN, "learning_rate": 5.3901843610186046e-05, "loss": 0.0, "step": 1686 }, { "epoch": 0.9812418205612913, "grad_norm": NaN, "learning_rate": 5.3854927385073096e-05, "loss": 0.0, "step": 1687 }, { "epoch": 0.981823469536135, "grad_norm": NaN, "learning_rate": 5.38080077453333e-05, "loss": 0.0, "step": 1688 }, { "epoch": 0.9824051185109787, "grad_norm": NaN, "learning_rate": 5.376108473252726e-05, "loss": 0.0, "step": 1689 }, { "epoch": 0.9829867674858223, "grad_norm": NaN, "learning_rate": 5.371415838821853e-05, "loss": 0.0, "step": 1690 }, { "epoch": 0.983568416460666, "grad_norm": NaN, "learning_rate": 5.366722875397366e-05, "loss": 0.0, "step": 1691 }, { "epoch": 0.9841500654355096, "grad_norm": NaN, "learning_rate": 5.3620295871362104e-05, "loss": 0.0, "step": 1692 }, { "epoch": 0.9847317144103533, "grad_norm": NaN, "learning_rate": 5.3573359781956143e-05, "loss": 0.0, "step": 1693 }, { "epoch": 0.985313363385197, "grad_norm": NaN, "learning_rate": 5.352642052733099e-05, "loss": 0.0, "step": 1694 }, { "epoch": 0.9858950123600407, "grad_norm": NaN, "learning_rate": 5.347947814906458e-05, "loss": 0.0, "step": 1695 }, { "epoch": 0.9864766613348844, "grad_norm": NaN, "learning_rate": 5.343253268873765e-05, "loss": 0.0, "step": 1696 }, { "epoch": 0.9870583103097281, "grad_norm": NaN, "learning_rate": 5.3385584187933655e-05, "loss": 0.0, "step": 1697 }, { "epoch": 0.9876399592845717, "grad_norm": NaN, "learning_rate": 5.3338632688238765e-05, "loss": 0.0, "step": 1698 }, { "epoch": 0.9882216082594154, "grad_norm": NaN, "learning_rate": 5.3291678231241794e-05, "loss": 0.0, "step": 1699 }, { "epoch": 0.9888032572342591, "grad_norm": NaN, "learning_rate": 5.324472085853416e-05, "loss": 0.0, "step": 1700 }, { "epoch": 0.9893849062091028, "grad_norm": NaN, "learning_rate": 5.319776061170989e-05, "loss": 0.0, "step": 1701 }, { "epoch": 0.9899665551839465, "grad_norm": NaN, "learning_rate": 5.315079753236554e-05, "loss": 0.0, "step": 1702 }, { "epoch": 0.9905482041587902, "grad_norm": NaN, "learning_rate": 5.3103831662100165e-05, "loss": 0.0, "step": 1703 }, { "epoch": 0.9911298531336339, "grad_norm": NaN, "learning_rate": 5.305686304251534e-05, "loss": 0.0, "step": 1704 }, { "epoch": 0.9917115021084776, "grad_norm": NaN, "learning_rate": 5.300989171521501e-05, "loss": 0.0, "step": 1705 }, { "epoch": 0.9922931510833212, "grad_norm": NaN, "learning_rate": 5.296291772180557e-05, "loss": 0.0, "step": 1706 }, { "epoch": 0.9928748000581649, "grad_norm": NaN, "learning_rate": 5.2915941103895725e-05, "loss": 0.0, "step": 1707 }, { "epoch": 0.9934564490330086, "grad_norm": NaN, "learning_rate": 5.286896190309658e-05, "loss": 0.0, "step": 1708 }, { "epoch": 0.9940380980078523, "grad_norm": NaN, "learning_rate": 5.282198016102147e-05, "loss": 0.0, "step": 1709 }, { "epoch": 0.994619746982696, "grad_norm": NaN, "learning_rate": 5.2774995919285984e-05, "loss": 0.0, "step": 1710 }, { "epoch": 0.9952013959575396, "grad_norm": NaN, "learning_rate": 5.2728009219507936e-05, "loss": 0.0, "step": 1711 }, { "epoch": 0.9957830449323833, "grad_norm": NaN, "learning_rate": 5.2681020103307324e-05, "loss": 0.0, "step": 1712 }, { "epoch": 0.996364693907227, "grad_norm": NaN, "learning_rate": 5.263402861230629e-05, "loss": 0.0, "step": 1713 }, { "epoch": 0.9969463428820706, "grad_norm": NaN, "learning_rate": 5.258703478812905e-05, "loss": 0.0, "step": 1714 }, { "epoch": 0.9975279918569143, "grad_norm": NaN, "learning_rate": 5.2540038672401925e-05, "loss": 0.0, "step": 1715 }, { "epoch": 0.998109640831758, "grad_norm": NaN, "learning_rate": 5.249304030675323e-05, "loss": 0.0, "step": 1716 }, { "epoch": 0.9986912898066017, "grad_norm": NaN, "learning_rate": 5.2446039732813304e-05, "loss": 0.0, "step": 1717 }, { "epoch": 0.9992729387814454, "grad_norm": NaN, "learning_rate": 5.239903699221443e-05, "loss": 0.0, "step": 1718 }, { "epoch": 0.9998545877562891, "grad_norm": NaN, "learning_rate": 5.23520321265908e-05, "loss": 0.0, "step": 1719 }, { "epoch": 0.9998545877562891, "eval_loss": NaN, "eval_runtime": 16.8002, "eval_samples_per_second": 43.095, "eval_steps_per_second": 21.547, "step": 1719 }, { "epoch": 1.0004362367311328, "grad_norm": NaN, "learning_rate": 5.23050251775785e-05, "loss": 0.0, "step": 1720 }, { "epoch": 1.0010178857059764, "grad_norm": NaN, "learning_rate": 5.225801618681545e-05, "loss": 0.0, "step": 1721 }, { "epoch": 1.0015995346808202, "grad_norm": NaN, "learning_rate": 5.22110051959414e-05, "loss": 0.0, "step": 1722 }, { "epoch": 1.0021811836556638, "grad_norm": NaN, "learning_rate": 5.2163992246597856e-05, "loss": 0.0, "step": 1723 }, { "epoch": 1.0027628326305076, "grad_norm": NaN, "learning_rate": 5.2116977380428056e-05, "loss": 0.0, "step": 1724 }, { "epoch": 1.0033444816053512, "grad_norm": NaN, "learning_rate": 5.206996063907693e-05, "loss": 0.0, "step": 1725 }, { "epoch": 1.0039261305801948, "grad_norm": NaN, "learning_rate": 5.202294206419112e-05, "loss": 0.0, "step": 1726 }, { "epoch": 1.0045077795550386, "grad_norm": NaN, "learning_rate": 5.19759216974188e-05, "loss": 0.0, "step": 1727 }, { "epoch": 1.0050894285298821, "grad_norm": NaN, "learning_rate": 5.1928899580409826e-05, "loss": 0.0, "step": 1728 }, { "epoch": 1.005671077504726, "grad_norm": NaN, "learning_rate": 5.1881875754815556e-05, "loss": 0.0, "step": 1729 }, { "epoch": 1.0062527264795695, "grad_norm": NaN, "learning_rate": 5.1834850262288825e-05, "loss": 0.0, "step": 1730 }, { "epoch": 1.0068343754544133, "grad_norm": NaN, "learning_rate": 5.1787823144484036e-05, "loss": 0.0, "step": 1731 }, { "epoch": 1.007416024429257, "grad_norm": NaN, "learning_rate": 5.174079444305697e-05, "loss": 0.0, "step": 1732 }, { "epoch": 1.0079976734041005, "grad_norm": NaN, "learning_rate": 5.169376419966482e-05, "loss": 0.0, "step": 1733 }, { "epoch": 1.0085793223789443, "grad_norm": NaN, "learning_rate": 5.164673245596613e-05, "loss": 0.0, "step": 1734 }, { "epoch": 1.009160971353788, "grad_norm": NaN, "learning_rate": 5.159969925362081e-05, "loss": 0.0, "step": 1735 }, { "epoch": 1.0097426203286317, "grad_norm": NaN, "learning_rate": 5.155266463429005e-05, "loss": 0.0, "step": 1736 }, { "epoch": 1.0103242693034753, "grad_norm": NaN, "learning_rate": 5.150562863963627e-05, "loss": 0.0, "step": 1737 }, { "epoch": 1.010905918278319, "grad_norm": NaN, "learning_rate": 5.145859131132314e-05, "loss": 0.0, "step": 1738 }, { "epoch": 1.0114875672531627, "grad_norm": NaN, "learning_rate": 5.1411552691015485e-05, "loss": 0.0, "step": 1739 }, { "epoch": 1.0120692162280065, "grad_norm": NaN, "learning_rate": 5.136451282037929e-05, "loss": 0.0, "step": 1740 }, { "epoch": 1.01265086520285, "grad_norm": NaN, "learning_rate": 5.131747174108167e-05, "loss": 0.0, "step": 1741 }, { "epoch": 1.0132325141776937, "grad_norm": NaN, "learning_rate": 5.127042949479075e-05, "loss": 0.0, "step": 1742 }, { "epoch": 1.0138141631525375, "grad_norm": NaN, "learning_rate": 5.1223386123175745e-05, "loss": 0.0, "step": 1743 }, { "epoch": 1.014395812127381, "grad_norm": NaN, "learning_rate": 5.117634166790682e-05, "loss": 0.0, "step": 1744 }, { "epoch": 1.0149774611022249, "grad_norm": NaN, "learning_rate": 5.112929617065517e-05, "loss": 0.0, "step": 1745 }, { "epoch": 1.0155591100770684, "grad_norm": NaN, "learning_rate": 5.108224967309283e-05, "loss": 0.0, "step": 1746 }, { "epoch": 1.0161407590519123, "grad_norm": NaN, "learning_rate": 5.103520221689279e-05, "loss": 0.0, "step": 1747 }, { "epoch": 1.0167224080267558, "grad_norm": NaN, "learning_rate": 5.0988153843728826e-05, "loss": 0.0, "step": 1748 }, { "epoch": 1.0173040570015994, "grad_norm": NaN, "learning_rate": 5.094110459527557e-05, "loss": 0.0, "step": 1749 }, { "epoch": 1.0178857059764432, "grad_norm": NaN, "learning_rate": 5.089405451320843e-05, "loss": 0.0, "step": 1750 }, { "epoch": 1.0184673549512868, "grad_norm": NaN, "learning_rate": 5.084700363920354e-05, "loss": 0.0, "step": 1751 }, { "epoch": 1.0190490039261306, "grad_norm": NaN, "learning_rate": 5.079995201493772e-05, "loss": 0.0, "step": 1752 }, { "epoch": 1.0196306529009742, "grad_norm": NaN, "learning_rate": 5.0752899682088484e-05, "loss": 0.0, "step": 1753 }, { "epoch": 1.020212301875818, "grad_norm": NaN, "learning_rate": 5.070584668233397e-05, "loss": 0.0, "step": 1754 }, { "epoch": 1.0207939508506616, "grad_norm": NaN, "learning_rate": 5.065879305735287e-05, "loss": 0.0, "step": 1755 }, { "epoch": 1.0213755998255054, "grad_norm": NaN, "learning_rate": 5.061173884882449e-05, "loss": 0.0, "step": 1756 }, { "epoch": 1.021957248800349, "grad_norm": NaN, "learning_rate": 5.056468409842859e-05, "loss": 0.0, "step": 1757 }, { "epoch": 1.0225388977751926, "grad_norm": NaN, "learning_rate": 5.0517628847845444e-05, "loss": 0.0, "step": 1758 }, { "epoch": 1.0231205467500364, "grad_norm": NaN, "learning_rate": 5.0470573138755784e-05, "loss": 0.0, "step": 1759 }, { "epoch": 1.02370219572488, "grad_norm": NaN, "learning_rate": 5.042351701284072e-05, "loss": 0.0, "step": 1760 }, { "epoch": 1.0242838446997238, "grad_norm": NaN, "learning_rate": 5.037646051178173e-05, "loss": 0.0, "step": 1761 }, { "epoch": 1.0248654936745674, "grad_norm": NaN, "learning_rate": 5.032940367726066e-05, "loss": 0.0, "step": 1762 }, { "epoch": 1.0254471426494112, "grad_norm": NaN, "learning_rate": 5.028234655095958e-05, "loss": 0.0, "step": 1763 }, { "epoch": 1.0260287916242548, "grad_norm": NaN, "learning_rate": 5.023528917456091e-05, "loss": 0.0, "step": 1764 }, { "epoch": 1.0266104405990983, "grad_norm": NaN, "learning_rate": 5.018823158974724e-05, "loss": 0.0, "step": 1765 }, { "epoch": 1.0271920895739421, "grad_norm": NaN, "learning_rate": 5.014117383820133e-05, "loss": 0.0, "step": 1766 }, { "epoch": 1.0277737385487857, "grad_norm": NaN, "learning_rate": 5.00941159616061e-05, "loss": 0.0, "step": 1767 }, { "epoch": 1.0283553875236295, "grad_norm": NaN, "learning_rate": 5.004705800164463e-05, "loss": 0.0, "step": 1768 }, { "epoch": 1.0289370364984731, "grad_norm": NaN, "learning_rate": 5e-05, "loss": 0.0, "step": 1769 }, { "epoch": 1.029518685473317, "grad_norm": NaN, "learning_rate": 4.995294199835538e-05, "loss": 0.0, "step": 1770 }, { "epoch": 1.0301003344481605, "grad_norm": NaN, "learning_rate": 4.9905884038393905e-05, "loss": 0.0, "step": 1771 }, { "epoch": 1.0306819834230043, "grad_norm": NaN, "learning_rate": 4.98588261617987e-05, "loss": 0.0, "step": 1772 }, { "epoch": 1.031263632397848, "grad_norm": NaN, "learning_rate": 4.9811768410252794e-05, "loss": 0.0, "step": 1773 }, { "epoch": 1.0318452813726915, "grad_norm": NaN, "learning_rate": 4.9764710825439084e-05, "loss": 0.0, "step": 1774 }, { "epoch": 1.0324269303475353, "grad_norm": NaN, "learning_rate": 4.971765344904042e-05, "loss": 0.0, "step": 1775 }, { "epoch": 1.0330085793223789, "grad_norm": NaN, "learning_rate": 4.967059632273936e-05, "loss": 0.0, "step": 1776 }, { "epoch": 1.0335902282972227, "grad_norm": NaN, "learning_rate": 4.962353948821828e-05, "loss": 0.0, "step": 1777 }, { "epoch": 1.0341718772720663, "grad_norm": NaN, "learning_rate": 4.9576482987159295e-05, "loss": 0.0, "step": 1778 }, { "epoch": 1.03475352624691, "grad_norm": NaN, "learning_rate": 4.952942686124423e-05, "loss": 0.0, "step": 1779 }, { "epoch": 1.0353351752217537, "grad_norm": NaN, "learning_rate": 4.948237115215457e-05, "loss": 0.0, "step": 1780 }, { "epoch": 1.0359168241965973, "grad_norm": NaN, "learning_rate": 4.943531590157143e-05, "loss": 0.0, "step": 1781 }, { "epoch": 1.036498473171441, "grad_norm": NaN, "learning_rate": 4.9388261151175536e-05, "loss": 0.0, "step": 1782 }, { "epoch": 1.0370801221462846, "grad_norm": NaN, "learning_rate": 4.934120694264712e-05, "loss": 0.0, "step": 1783 }, { "epoch": 1.0376617711211285, "grad_norm": NaN, "learning_rate": 4.929415331766604e-05, "loss": 0.0, "step": 1784 }, { "epoch": 1.038243420095972, "grad_norm": NaN, "learning_rate": 4.9247100317911514e-05, "loss": 0.0, "step": 1785 }, { "epoch": 1.0388250690708158, "grad_norm": NaN, "learning_rate": 4.9200047985062284e-05, "loss": 0.0, "step": 1786 }, { "epoch": 1.0394067180456594, "grad_norm": NaN, "learning_rate": 4.915299636079647e-05, "loss": 0.0, "step": 1787 }, { "epoch": 1.039988367020503, "grad_norm": NaN, "learning_rate": 4.910594548679158e-05, "loss": 0.0, "step": 1788 }, { "epoch": 1.0405700159953468, "grad_norm": NaN, "learning_rate": 4.905889540472444e-05, "loss": 0.0, "step": 1789 }, { "epoch": 1.0411516649701904, "grad_norm": NaN, "learning_rate": 4.90118461562712e-05, "loss": 0.0, "step": 1790 }, { "epoch": 1.0417333139450342, "grad_norm": NaN, "learning_rate": 4.8964797783107236e-05, "loss": 0.0, "step": 1791 }, { "epoch": 1.0423149629198778, "grad_norm": NaN, "learning_rate": 4.8917750326907187e-05, "loss": 0.0, "step": 1792 }, { "epoch": 1.0428966118947216, "grad_norm": NaN, "learning_rate": 4.8870703829344834e-05, "loss": 0.0, "step": 1793 }, { "epoch": 1.0434782608695652, "grad_norm": NaN, "learning_rate": 4.8823658332093175e-05, "loss": 0.0, "step": 1794 }, { "epoch": 1.044059909844409, "grad_norm": NaN, "learning_rate": 4.877661387682427e-05, "loss": 0.0, "step": 1795 }, { "epoch": 1.0446415588192526, "grad_norm": NaN, "learning_rate": 4.872957050520926e-05, "loss": 0.0, "step": 1796 }, { "epoch": 1.0452232077940962, "grad_norm": NaN, "learning_rate": 4.8682528258918345e-05, "loss": 0.0, "step": 1797 }, { "epoch": 1.04580485676894, "grad_norm": NaN, "learning_rate": 4.863548717962071e-05, "loss": 0.0, "step": 1798 }, { "epoch": 1.0463865057437836, "grad_norm": NaN, "learning_rate": 4.858844730898453e-05, "loss": 0.0, "step": 1799 }, { "epoch": 1.0469681547186274, "grad_norm": NaN, "learning_rate": 4.854140868867688e-05, "loss": 0.0, "step": 1800 }, { "epoch": 1.047549803693471, "grad_norm": NaN, "learning_rate": 4.849437136036375e-05, "loss": 0.0, "step": 1801 }, { "epoch": 1.0481314526683148, "grad_norm": NaN, "learning_rate": 4.844733536570995e-05, "loss": 0.0, "step": 1802 }, { "epoch": 1.0487131016431583, "grad_norm": NaN, "learning_rate": 4.8400300746379193e-05, "loss": 0.0, "step": 1803 }, { "epoch": 1.0492947506180021, "grad_norm": NaN, "learning_rate": 4.835326754403387e-05, "loss": 0.0, "step": 1804 }, { "epoch": 1.0498763995928457, "grad_norm": NaN, "learning_rate": 4.83062358003352e-05, "loss": 0.0, "step": 1805 }, { "epoch": 1.0504580485676893, "grad_norm": NaN, "learning_rate": 4.825920555694304e-05, "loss": 0.0, "step": 1806 }, { "epoch": 1.0510396975425331, "grad_norm": NaN, "learning_rate": 4.8212176855515975e-05, "loss": 0.0, "step": 1807 }, { "epoch": 1.0516213465173767, "grad_norm": NaN, "learning_rate": 4.816514973771119e-05, "loss": 0.0, "step": 1808 }, { "epoch": 1.0522029954922205, "grad_norm": NaN, "learning_rate": 4.811812424518447e-05, "loss": 0.0, "step": 1809 }, { "epoch": 1.052784644467064, "grad_norm": NaN, "learning_rate": 4.8071100419590186e-05, "loss": 0.0, "step": 1810 }, { "epoch": 1.053366293441908, "grad_norm": NaN, "learning_rate": 4.802407830258119e-05, "loss": 0.0, "step": 1811 }, { "epoch": 1.0539479424167515, "grad_norm": NaN, "learning_rate": 4.7977057935808886e-05, "loss": 0.0, "step": 1812 }, { "epoch": 1.054529591391595, "grad_norm": NaN, "learning_rate": 4.793003936092306e-05, "loss": 0.0, "step": 1813 }, { "epoch": 1.0551112403664389, "grad_norm": NaN, "learning_rate": 4.7883022619571955e-05, "loss": 0.0, "step": 1814 }, { "epoch": 1.0556928893412825, "grad_norm": NaN, "learning_rate": 4.7836007753402156e-05, "loss": 0.0, "step": 1815 }, { "epoch": 1.0562745383161263, "grad_norm": NaN, "learning_rate": 4.778899480405861e-05, "loss": 0.0, "step": 1816 }, { "epoch": 1.0568561872909699, "grad_norm": NaN, "learning_rate": 4.774198381318456e-05, "loss": 0.0, "step": 1817 }, { "epoch": 1.0574378362658137, "grad_norm": NaN, "learning_rate": 4.7694974822421516e-05, "loss": 0.0, "step": 1818 }, { "epoch": 1.0580194852406573, "grad_norm": NaN, "learning_rate": 4.764796787340922e-05, "loss": 0.0, "step": 1819 }, { "epoch": 1.0586011342155008, "grad_norm": NaN, "learning_rate": 4.7600963007785574e-05, "loss": 0.0, "step": 1820 }, { "epoch": 1.0591827831903446, "grad_norm": NaN, "learning_rate": 4.7553960267186694e-05, "loss": 0.0, "step": 1821 }, { "epoch": 1.0597644321651882, "grad_norm": NaN, "learning_rate": 4.750695969324677e-05, "loss": 0.0, "step": 1822 }, { "epoch": 1.060346081140032, "grad_norm": NaN, "learning_rate": 4.7459961327598086e-05, "loss": 0.0, "step": 1823 }, { "epoch": 1.0609277301148756, "grad_norm": NaN, "learning_rate": 4.741296521187096e-05, "loss": 0.0, "step": 1824 }, { "epoch": 1.0615093790897194, "grad_norm": NaN, "learning_rate": 4.736597138769372e-05, "loss": 0.0, "step": 1825 }, { "epoch": 1.062091028064563, "grad_norm": NaN, "learning_rate": 4.731897989669269e-05, "loss": 0.0, "step": 1826 }, { "epoch": 1.0626726770394068, "grad_norm": NaN, "learning_rate": 4.727199078049208e-05, "loss": 0.0, "step": 1827 }, { "epoch": 1.0632543260142504, "grad_norm": NaN, "learning_rate": 4.722500408071404e-05, "loss": 0.0, "step": 1828 }, { "epoch": 1.063835974989094, "grad_norm": NaN, "learning_rate": 4.717801983897855e-05, "loss": 0.0, "step": 1829 }, { "epoch": 1.0644176239639378, "grad_norm": NaN, "learning_rate": 4.713103809690342e-05, "loss": 0.0, "step": 1830 }, { "epoch": 1.0649992729387814, "grad_norm": NaN, "learning_rate": 4.7084058896104273e-05, "loss": 0.0, "step": 1831 }, { "epoch": 1.0655809219136252, "grad_norm": NaN, "learning_rate": 4.703708227819445e-05, "loss": 0.0, "step": 1832 }, { "epoch": 1.0661625708884688, "grad_norm": NaN, "learning_rate": 4.6990108284785e-05, "loss": 0.0, "step": 1833 }, { "epoch": 1.0667442198633126, "grad_norm": NaN, "learning_rate": 4.6943136957484674e-05, "loss": 0.0, "step": 1834 }, { "epoch": 1.0673258688381562, "grad_norm": NaN, "learning_rate": 4.6896168337899846e-05, "loss": 0.0, "step": 1835 }, { "epoch": 1.0679075178129998, "grad_norm": NaN, "learning_rate": 4.684920246763448e-05, "loss": 0.0, "step": 1836 }, { "epoch": 1.0684891667878436, "grad_norm": NaN, "learning_rate": 4.6802239388290126e-05, "loss": 0.0, "step": 1837 }, { "epoch": 1.0690708157626871, "grad_norm": NaN, "learning_rate": 4.675527914146586e-05, "loss": 0.0, "step": 1838 }, { "epoch": 1.069652464737531, "grad_norm": NaN, "learning_rate": 4.6708321768758204e-05, "loss": 0.0, "step": 1839 }, { "epoch": 1.0702341137123745, "grad_norm": NaN, "learning_rate": 4.6661367311761233e-05, "loss": 0.0, "step": 1840 }, { "epoch": 1.0708157626872183, "grad_norm": NaN, "learning_rate": 4.661441581206635e-05, "loss": 0.0, "step": 1841 }, { "epoch": 1.071397411662062, "grad_norm": NaN, "learning_rate": 4.656746731126236e-05, "loss": 0.0, "step": 1842 }, { "epoch": 1.0719790606369055, "grad_norm": NaN, "learning_rate": 4.652052185093543e-05, "loss": 0.0, "step": 1843 }, { "epoch": 1.0725607096117493, "grad_norm": NaN, "learning_rate": 4.647357947266902e-05, "loss": 0.0, "step": 1844 }, { "epoch": 1.073142358586593, "grad_norm": NaN, "learning_rate": 4.642664021804386e-05, "loss": 0.0, "step": 1845 }, { "epoch": 1.0737240075614367, "grad_norm": NaN, "learning_rate": 4.637970412863792e-05, "loss": 0.0, "step": 1846 }, { "epoch": 1.0743056565362803, "grad_norm": NaN, "learning_rate": 4.633277124602635e-05, "loss": 0.0, "step": 1847 }, { "epoch": 1.074887305511124, "grad_norm": NaN, "learning_rate": 4.628584161178147e-05, "loss": 0.0, "step": 1848 }, { "epoch": 1.0754689544859677, "grad_norm": NaN, "learning_rate": 4.623891526747275e-05, "loss": 0.0, "step": 1849 }, { "epoch": 1.0760506034608115, "grad_norm": NaN, "learning_rate": 4.61919922546667e-05, "loss": 0.0, "step": 1850 }, { "epoch": 1.076632252435655, "grad_norm": NaN, "learning_rate": 4.6145072614926915e-05, "loss": 0.0, "step": 1851 }, { "epoch": 1.0772139014104987, "grad_norm": NaN, "learning_rate": 4.6098156389813966e-05, "loss": 0.0, "step": 1852 }, { "epoch": 1.0777955503853425, "grad_norm": NaN, "learning_rate": 4.605124362088544e-05, "loss": 0.0, "step": 1853 }, { "epoch": 1.078377199360186, "grad_norm": NaN, "learning_rate": 4.600433434969581e-05, "loss": 0.0, "step": 1854 }, { "epoch": 1.0789588483350299, "grad_norm": NaN, "learning_rate": 4.595742861779653e-05, "loss": 0.0, "step": 1855 }, { "epoch": 1.0795404973098734, "grad_norm": NaN, "learning_rate": 4.5910526466735826e-05, "loss": 0.0, "step": 1856 }, { "epoch": 1.0801221462847173, "grad_norm": NaN, "learning_rate": 4.586362793805883e-05, "loss": 0.0, "step": 1857 }, { "epoch": 1.0807037952595608, "grad_norm": NaN, "learning_rate": 4.5816733073307396e-05, "loss": 0.0, "step": 1858 }, { "epoch": 1.0812854442344046, "grad_norm": NaN, "learning_rate": 4.576984191402021e-05, "loss": 0.0, "step": 1859 }, { "epoch": 1.0818670932092482, "grad_norm": NaN, "learning_rate": 4.572295450173263e-05, "loss": 0.0, "step": 1860 }, { "epoch": 1.0824487421840918, "grad_norm": NaN, "learning_rate": 4.567607087797669e-05, "loss": 0.0, "step": 1861 }, { "epoch": 1.0830303911589356, "grad_norm": NaN, "learning_rate": 4.562919108428108e-05, "loss": 0.0, "step": 1862 }, { "epoch": 1.0836120401337792, "grad_norm": NaN, "learning_rate": 4.55823151621711e-05, "loss": 0.0, "step": 1863 }, { "epoch": 1.084193689108623, "grad_norm": NaN, "learning_rate": 4.553544315316861e-05, "loss": 0.0, "step": 1864 }, { "epoch": 1.0847753380834666, "grad_norm": NaN, "learning_rate": 4.5488575098792016e-05, "loss": 0.0, "step": 1865 }, { "epoch": 1.0853569870583104, "grad_norm": NaN, "learning_rate": 4.5441711040556214e-05, "loss": 0.0, "step": 1866 }, { "epoch": 1.085938636033154, "grad_norm": NaN, "learning_rate": 4.539485101997254e-05, "loss": 0.0, "step": 1867 }, { "epoch": 1.0865202850079976, "grad_norm": NaN, "learning_rate": 4.534799507854883e-05, "loss": 0.0, "step": 1868 }, { "epoch": 1.0871019339828414, "grad_norm": NaN, "learning_rate": 4.5301143257789216e-05, "loss": 0.0, "step": 1869 }, { "epoch": 1.087683582957685, "grad_norm": NaN, "learning_rate": 4.525429559919422e-05, "loss": 0.0, "step": 1870 }, { "epoch": 1.0882652319325288, "grad_norm": NaN, "learning_rate": 4.5207452144260674e-05, "loss": 0.0, "step": 1871 }, { "epoch": 1.0888468809073724, "grad_norm": NaN, "learning_rate": 4.516061293448169e-05, "loss": 0.0, "step": 1872 }, { "epoch": 1.0894285298822162, "grad_norm": NaN, "learning_rate": 4.511377801134661e-05, "loss": 0.0, "step": 1873 }, { "epoch": 1.0900101788570598, "grad_norm": NaN, "learning_rate": 4.5066947416340996e-05, "loss": 0.0, "step": 1874 }, { "epoch": 1.0905918278319033, "grad_norm": NaN, "learning_rate": 4.502012119094655e-05, "loss": 0.0, "step": 1875 }, { "epoch": 1.0911734768067471, "grad_norm": NaN, "learning_rate": 4.4973299376641104e-05, "loss": 0.0, "step": 1876 }, { "epoch": 1.0917551257815907, "grad_norm": NaN, "learning_rate": 4.492648201489864e-05, "loss": 0.0, "step": 1877 }, { "epoch": 1.0923367747564345, "grad_norm": NaN, "learning_rate": 4.487966914718914e-05, "loss": 0.0, "step": 1878 }, { "epoch": 1.0929184237312781, "grad_norm": NaN, "learning_rate": 4.48328608149786e-05, "loss": 0.0, "step": 1879 }, { "epoch": 1.093500072706122, "grad_norm": NaN, "learning_rate": 4.4786057059729034e-05, "loss": 0.0, "step": 1880 }, { "epoch": 1.0940817216809655, "grad_norm": NaN, "learning_rate": 4.473925792289837e-05, "loss": 0.0, "step": 1881 }, { "epoch": 1.0946633706558093, "grad_norm": NaN, "learning_rate": 4.4692463445940476e-05, "loss": 0.0, "step": 1882 }, { "epoch": 1.095245019630653, "grad_norm": NaN, "learning_rate": 4.4645673670305064e-05, "loss": 0.0, "step": 1883 }, { "epoch": 1.0958266686054965, "grad_norm": NaN, "learning_rate": 4.4598888637437705e-05, "loss": 0.0, "step": 1884 }, { "epoch": 1.0964083175803403, "grad_norm": NaN, "learning_rate": 4.4552108388779725e-05, "loss": 0.0, "step": 1885 }, { "epoch": 1.0969899665551839, "grad_norm": NaN, "learning_rate": 4.450533296576829e-05, "loss": 0.0, "step": 1886 }, { "epoch": 1.0975716155300277, "grad_norm": NaN, "learning_rate": 4.445856240983623e-05, "loss": 0.0, "step": 1887 }, { "epoch": 1.0981532645048713, "grad_norm": NaN, "learning_rate": 4.4411796762412075e-05, "loss": 0.0, "step": 1888 }, { "epoch": 1.098734913479715, "grad_norm": NaN, "learning_rate": 4.4365036064920025e-05, "loss": 0.0, "step": 1889 }, { "epoch": 1.0993165624545587, "grad_norm": NaN, "learning_rate": 4.431828035877987e-05, "loss": 0.0, "step": 1890 }, { "epoch": 1.0998982114294025, "grad_norm": NaN, "learning_rate": 4.427152968540699e-05, "loss": 0.0, "step": 1891 }, { "epoch": 1.100479860404246, "grad_norm": NaN, "learning_rate": 4.422478408621232e-05, "loss": 0.0, "step": 1892 }, { "epoch": 1.1010615093790896, "grad_norm": NaN, "learning_rate": 4.417804360260228e-05, "loss": 0.0, "step": 1893 }, { "epoch": 1.1016431583539334, "grad_norm": NaN, "learning_rate": 4.4131308275978775e-05, "loss": 0.0, "step": 1894 }, { "epoch": 1.102224807328777, "grad_norm": NaN, "learning_rate": 4.4084578147739115e-05, "loss": 0.0, "step": 1895 }, { "epoch": 1.1028064563036208, "grad_norm": NaN, "learning_rate": 4.403785325927606e-05, "loss": 0.0, "step": 1896 }, { "epoch": 1.1033881052784644, "grad_norm": NaN, "learning_rate": 4.3991133651977675e-05, "loss": 0.0, "step": 1897 }, { "epoch": 1.103969754253308, "grad_norm": NaN, "learning_rate": 4.3944419367227364e-05, "loss": 0.0, "step": 1898 }, { "epoch": 1.1045514032281518, "grad_norm": NaN, "learning_rate": 4.3897710446403824e-05, "loss": 0.0, "step": 1899 }, { "epoch": 1.1051330522029954, "grad_norm": NaN, "learning_rate": 4.3851006930881e-05, "loss": 0.0, "step": 1900 }, { "epoch": 1.1057147011778392, "grad_norm": NaN, "learning_rate": 4.380430886202802e-05, "loss": 0.0, "step": 1901 }, { "epoch": 1.1062963501526828, "grad_norm": NaN, "learning_rate": 4.375761628120924e-05, "loss": 0.0, "step": 1902 }, { "epoch": 1.1068779991275266, "grad_norm": NaN, "learning_rate": 4.3710929229784116e-05, "loss": 0.0, "step": 1903 }, { "epoch": 1.1074596481023702, "grad_norm": NaN, "learning_rate": 4.366424774910719e-05, "loss": 0.0, "step": 1904 }, { "epoch": 1.108041297077214, "grad_norm": NaN, "learning_rate": 4.361757188052815e-05, "loss": 0.0, "step": 1905 }, { "epoch": 1.1086229460520576, "grad_norm": NaN, "learning_rate": 4.357090166539162e-05, "loss": 0.0, "step": 1906 }, { "epoch": 1.1092045950269012, "grad_norm": NaN, "learning_rate": 4.352423714503727e-05, "loss": 0.0, "step": 1907 }, { "epoch": 1.109786244001745, "grad_norm": NaN, "learning_rate": 4.3477578360799706e-05, "loss": 0.0, "step": 1908 }, { "epoch": 1.1103678929765886, "grad_norm": NaN, "learning_rate": 4.343092535400847e-05, "loss": 0.0, "step": 1909 }, { "epoch": 1.1109495419514324, "grad_norm": NaN, "learning_rate": 4.3384278165987955e-05, "loss": 0.0, "step": 1910 }, { "epoch": 1.111531190926276, "grad_norm": NaN, "learning_rate": 4.333763683805743e-05, "loss": 0.0, "step": 1911 }, { "epoch": 1.1121128399011198, "grad_norm": NaN, "learning_rate": 4.329100141153096e-05, "loss": 0.0, "step": 1912 }, { "epoch": 1.1126944888759633, "grad_norm": NaN, "learning_rate": 4.3244371927717365e-05, "loss": 0.0, "step": 1913 }, { "epoch": 1.1132761378508071, "grad_norm": NaN, "learning_rate": 4.319774842792026e-05, "loss": 0.0, "step": 1914 }, { "epoch": 1.1138577868256507, "grad_norm": NaN, "learning_rate": 4.315113095343791e-05, "loss": 0.0, "step": 1915 }, { "epoch": 1.1144394358004943, "grad_norm": NaN, "learning_rate": 4.310451954556324e-05, "loss": 0.0, "step": 1916 }, { "epoch": 1.1150210847753381, "grad_norm": NaN, "learning_rate": 4.3057914245583825e-05, "loss": 0.0, "step": 1917 }, { "epoch": 1.1156027337501817, "grad_norm": NaN, "learning_rate": 4.3011315094781815e-05, "loss": 0.0, "step": 1918 }, { "epoch": 1.1161843827250255, "grad_norm": NaN, "learning_rate": 4.296472213443391e-05, "loss": 0.0, "step": 1919 }, { "epoch": 1.116766031699869, "grad_norm": NaN, "learning_rate": 4.291813540581136e-05, "loss": 0.0, "step": 1920 }, { "epoch": 1.117347680674713, "grad_norm": NaN, "learning_rate": 4.287155495017984e-05, "loss": 0.0, "step": 1921 }, { "epoch": 1.1179293296495565, "grad_norm": NaN, "learning_rate": 4.2824980808799496e-05, "loss": 0.0, "step": 1922 }, { "epoch": 1.1185109786244, "grad_norm": NaN, "learning_rate": 4.277841302292492e-05, "loss": 0.0, "step": 1923 }, { "epoch": 1.1190926275992439, "grad_norm": NaN, "learning_rate": 4.273185163380501e-05, "loss": 0.0, "step": 1924 }, { "epoch": 1.1196742765740875, "grad_norm": NaN, "learning_rate": 4.268529668268303e-05, "loss": 0.0, "step": 1925 }, { "epoch": 1.1202559255489313, "grad_norm": NaN, "learning_rate": 4.263874821079654e-05, "loss": 0.0, "step": 1926 }, { "epoch": 1.1208375745237749, "grad_norm": NaN, "learning_rate": 4.259220625937736e-05, "loss": 0.0, "step": 1927 }, { "epoch": 1.1214192234986187, "grad_norm": NaN, "learning_rate": 4.254567086965151e-05, "loss": 0.0, "step": 1928 }, { "epoch": 1.1220008724734623, "grad_norm": NaN, "learning_rate": 4.2499142082839235e-05, "loss": 0.0, "step": 1929 }, { "epoch": 1.1225825214483058, "grad_norm": NaN, "learning_rate": 4.245261994015493e-05, "loss": 0.0, "step": 1930 }, { "epoch": 1.1231641704231496, "grad_norm": NaN, "learning_rate": 4.240610448280707e-05, "loss": 0.0, "step": 1931 }, { "epoch": 1.1237458193979932, "grad_norm": NaN, "learning_rate": 4.235959575199821e-05, "loss": 0.0, "step": 1932 }, { "epoch": 1.124327468372837, "grad_norm": NaN, "learning_rate": 4.231309378892501e-05, "loss": 0.0, "step": 1933 }, { "epoch": 1.1249091173476806, "grad_norm": NaN, "learning_rate": 4.226659863477808e-05, "loss": 0.0, "step": 1934 }, { "epoch": 1.1254907663225244, "grad_norm": NaN, "learning_rate": 4.2220110330741985e-05, "loss": 0.0, "step": 1935 }, { "epoch": 1.126072415297368, "grad_norm": NaN, "learning_rate": 4.2173628917995276e-05, "loss": 0.0, "step": 1936 }, { "epoch": 1.1266540642722118, "grad_norm": NaN, "learning_rate": 4.212715443771035e-05, "loss": 0.0, "step": 1937 }, { "epoch": 1.1272357132470554, "grad_norm": NaN, "learning_rate": 4.208068693105348e-05, "loss": 0.0, "step": 1938 }, { "epoch": 1.127817362221899, "grad_norm": NaN, "learning_rate": 4.203422643918479e-05, "loss": 0.0, "step": 1939 }, { "epoch": 1.1283990111967428, "grad_norm": NaN, "learning_rate": 4.198777300325815e-05, "loss": 0.0, "step": 1940 }, { "epoch": 1.1289806601715864, "grad_norm": NaN, "learning_rate": 4.1941326664421164e-05, "loss": 0.0, "step": 1941 }, { "epoch": 1.1295623091464302, "grad_norm": NaN, "learning_rate": 4.1894887463815254e-05, "loss": 0.0, "step": 1942 }, { "epoch": 1.1301439581212738, "grad_norm": NaN, "learning_rate": 4.1848455442575394e-05, "loss": 0.0, "step": 1943 }, { "epoch": 1.1307256070961176, "grad_norm": NaN, "learning_rate": 4.180203064183028e-05, "loss": 0.0, "step": 1944 }, { "epoch": 1.1313072560709612, "grad_norm": NaN, "learning_rate": 4.1755613102702155e-05, "loss": 0.0, "step": 1945 }, { "epoch": 1.131888905045805, "grad_norm": NaN, "learning_rate": 4.170920286630688e-05, "loss": 0.0, "step": 1946 }, { "epoch": 1.1324705540206486, "grad_norm": NaN, "learning_rate": 4.166279997375382e-05, "loss": 0.0, "step": 1947 }, { "epoch": 1.1330522029954921, "grad_norm": NaN, "learning_rate": 4.1616404466145834e-05, "loss": 0.0, "step": 1948 }, { "epoch": 1.133633851970336, "grad_norm": NaN, "learning_rate": 4.157001638457925e-05, "loss": 0.0, "step": 1949 }, { "epoch": 1.1342155009451795, "grad_norm": NaN, "learning_rate": 4.15236357701438e-05, "loss": 0.0, "step": 1950 }, { "epoch": 1.1347971499200233, "grad_norm": NaN, "learning_rate": 4.147726266392266e-05, "loss": 0.0, "step": 1951 }, { "epoch": 1.135378798894867, "grad_norm": NaN, "learning_rate": 4.1430897106992264e-05, "loss": 0.0, "step": 1952 }, { "epoch": 1.1359604478697105, "grad_norm": NaN, "learning_rate": 4.138453914042244e-05, "loss": 0.0, "step": 1953 }, { "epoch": 1.1365420968445543, "grad_norm": NaN, "learning_rate": 4.133818880527624e-05, "loss": 0.0, "step": 1954 }, { "epoch": 1.137123745819398, "grad_norm": NaN, "learning_rate": 4.1291846142609986e-05, "loss": 0.0, "step": 1955 }, { "epoch": 1.1377053947942417, "grad_norm": NaN, "learning_rate": 4.124551119347318e-05, "loss": 0.0, "step": 1956 }, { "epoch": 1.1382870437690853, "grad_norm": NaN, "learning_rate": 4.119918399890851e-05, "loss": 0.0, "step": 1957 }, { "epoch": 1.138868692743929, "grad_norm": NaN, "learning_rate": 4.11528645999518e-05, "loss": 0.0, "step": 1958 }, { "epoch": 1.1394503417187727, "grad_norm": NaN, "learning_rate": 4.110655303763195e-05, "loss": 0.0, "step": 1959 }, { "epoch": 1.1400319906936165, "grad_norm": NaN, "learning_rate": 4.1060249352970905e-05, "loss": 0.0, "step": 1960 }, { "epoch": 1.14061363966846, "grad_norm": NaN, "learning_rate": 4.101395358698369e-05, "loss": 0.0, "step": 1961 }, { "epoch": 1.1411952886433037, "grad_norm": NaN, "learning_rate": 4.096766578067827e-05, "loss": 0.0, "step": 1962 }, { "epoch": 1.1417769376181475, "grad_norm": NaN, "learning_rate": 4.092138597505557e-05, "loss": 0.0, "step": 1963 }, { "epoch": 1.142358586592991, "grad_norm": NaN, "learning_rate": 4.087511421110942e-05, "loss": 0.0, "step": 1964 }, { "epoch": 1.1429402355678349, "grad_norm": NaN, "learning_rate": 4.0828850529826537e-05, "loss": 0.0, "step": 1965 }, { "epoch": 1.1435218845426784, "grad_norm": NaN, "learning_rate": 4.078259497218648e-05, "loss": 0.0, "step": 1966 }, { "epoch": 1.1441035335175223, "grad_norm": NaN, "learning_rate": 4.07363475791616e-05, "loss": 0.0, "step": 1967 }, { "epoch": 1.1446851824923658, "grad_norm": NaN, "learning_rate": 4.069010839171703e-05, "loss": 0.0, "step": 1968 }, { "epoch": 1.1452668314672096, "grad_norm": NaN, "learning_rate": 4.0643877450810595e-05, "loss": 0.0, "step": 1969 }, { "epoch": 1.1458484804420532, "grad_norm": NaN, "learning_rate": 4.0597654797392914e-05, "loss": 0.0, "step": 1970 }, { "epoch": 1.1464301294168968, "grad_norm": NaN, "learning_rate": 4.055144047240716e-05, "loss": 0.0, "step": 1971 }, { "epoch": 1.1470117783917406, "grad_norm": NaN, "learning_rate": 4.050523451678918e-05, "loss": 0.0, "step": 1972 }, { "epoch": 1.1475934273665842, "grad_norm": NaN, "learning_rate": 4.04590369714674e-05, "loss": 0.0, "step": 1973 }, { "epoch": 1.148175076341428, "grad_norm": NaN, "learning_rate": 4.041284787736277e-05, "loss": 0.0, "step": 1974 }, { "epoch": 1.1487567253162716, "grad_norm": NaN, "learning_rate": 4.03666672753888e-05, "loss": 0.0, "step": 1975 }, { "epoch": 1.1493383742911154, "grad_norm": NaN, "learning_rate": 4.032049520645144e-05, "loss": 0.0, "step": 1976 }, { "epoch": 1.149920023265959, "grad_norm": NaN, "learning_rate": 4.027433171144911e-05, "loss": 0.0, "step": 1977 }, { "epoch": 1.1505016722408028, "grad_norm": NaN, "learning_rate": 4.022817683127259e-05, "loss": 0.0, "step": 1978 }, { "epoch": 1.1510833212156464, "grad_norm": NaN, "learning_rate": 4.01820306068051e-05, "loss": 0.0, "step": 1979 }, { "epoch": 1.15166497019049, "grad_norm": NaN, "learning_rate": 4.0135893078922136e-05, "loss": 0.0, "step": 1980 }, { "epoch": 1.1522466191653338, "grad_norm": NaN, "learning_rate": 4.008976428849152e-05, "loss": 0.0, "step": 1981 }, { "epoch": 1.1528282681401774, "grad_norm": NaN, "learning_rate": 4.004364427637331e-05, "loss": 0.0, "step": 1982 }, { "epoch": 1.1534099171150212, "grad_norm": NaN, "learning_rate": 3.999753308341981e-05, "loss": 0.0, "step": 1983 }, { "epoch": 1.1539915660898648, "grad_norm": NaN, "learning_rate": 3.995143075047549e-05, "loss": 0.0, "step": 1984 }, { "epoch": 1.1545732150647083, "grad_norm": NaN, "learning_rate": 3.9905337318377e-05, "loss": 0.0, "step": 1985 }, { "epoch": 1.1551548640395521, "grad_norm": NaN, "learning_rate": 3.985925282795308e-05, "loss": 0.0, "step": 1986 }, { "epoch": 1.1557365130143957, "grad_norm": NaN, "learning_rate": 3.981317732002454e-05, "loss": 0.0, "step": 1987 }, { "epoch": 1.1563181619892395, "grad_norm": NaN, "learning_rate": 3.976711083540431e-05, "loss": 0.0, "step": 1988 }, { "epoch": 1.1568998109640831, "grad_norm": NaN, "learning_rate": 3.972105341489723e-05, "loss": 0.0, "step": 1989 }, { "epoch": 1.157481459938927, "grad_norm": NaN, "learning_rate": 3.967500509930017e-05, "loss": 0.0, "step": 1990 }, { "epoch": 1.1580631089137705, "grad_norm": NaN, "learning_rate": 3.962896592940191e-05, "loss": 0.0, "step": 1991 }, { "epoch": 1.1586447578886143, "grad_norm": NaN, "learning_rate": 3.958293594598313e-05, "loss": 0.0, "step": 1992 }, { "epoch": 1.159226406863458, "grad_norm": NaN, "learning_rate": 3.95369151898164e-05, "loss": 0.0, "step": 1993 }, { "epoch": 1.1598080558383015, "grad_norm": NaN, "learning_rate": 3.9490903701666074e-05, "loss": 0.0, "step": 1994 }, { "epoch": 1.1603897048131453, "grad_norm": NaN, "learning_rate": 3.944490152228834e-05, "loss": 0.0, "step": 1995 }, { "epoch": 1.1609713537879889, "grad_norm": NaN, "learning_rate": 3.939890869243109e-05, "loss": 0.0, "step": 1996 }, { "epoch": 1.1615530027628327, "grad_norm": NaN, "learning_rate": 3.9352925252833964e-05, "loss": 0.0, "step": 1997 }, { "epoch": 1.1621346517376763, "grad_norm": NaN, "learning_rate": 3.930695124422833e-05, "loss": 0.0, "step": 1998 }, { "epoch": 1.16271630071252, "grad_norm": NaN, "learning_rate": 3.926098670733711e-05, "loss": 0.0, "step": 1999 }, { "epoch": 1.1632979496873637, "grad_norm": NaN, "learning_rate": 3.921503168287491e-05, "loss": 0.0, "step": 2000 }, { "epoch": 1.1638795986622075, "grad_norm": NaN, "learning_rate": 3.916908621154787e-05, "loss": 0.0, "step": 2001 }, { "epoch": 1.164461247637051, "grad_norm": NaN, "learning_rate": 3.912315033405367e-05, "loss": 0.0, "step": 2002 }, { "epoch": 1.1650428966118946, "grad_norm": NaN, "learning_rate": 3.907722409108152e-05, "loss": 0.0, "step": 2003 }, { "epoch": 1.1656245455867384, "grad_norm": NaN, "learning_rate": 3.903130752331206e-05, "loss": 0.0, "step": 2004 }, { "epoch": 1.166206194561582, "grad_norm": NaN, "learning_rate": 3.898540067141739e-05, "loss": 0.0, "step": 2005 }, { "epoch": 1.1667878435364258, "grad_norm": NaN, "learning_rate": 3.893950357606097e-05, "loss": 0.0, "step": 2006 }, { "epoch": 1.1673694925112694, "grad_norm": NaN, "learning_rate": 3.889361627789768e-05, "loss": 0.0, "step": 2007 }, { "epoch": 1.167951141486113, "grad_norm": NaN, "learning_rate": 3.884773881757366e-05, "loss": 0.0, "step": 2008 }, { "epoch": 1.1685327904609568, "grad_norm": NaN, "learning_rate": 3.8801871235726355e-05, "loss": 0.0, "step": 2009 }, { "epoch": 1.1691144394358004, "grad_norm": NaN, "learning_rate": 3.8756013572984465e-05, "loss": 0.0, "step": 2010 }, { "epoch": 1.1696960884106442, "grad_norm": NaN, "learning_rate": 3.8710165869967894e-05, "loss": 0.0, "step": 2011 }, { "epoch": 1.1702777373854878, "grad_norm": NaN, "learning_rate": 3.866432816728774e-05, "loss": 0.0, "step": 2012 }, { "epoch": 1.1708593863603316, "grad_norm": NaN, "learning_rate": 3.861850050554623e-05, "loss": 0.0, "step": 2013 }, { "epoch": 1.1714410353351752, "grad_norm": NaN, "learning_rate": 3.857268292533669e-05, "loss": 0.0, "step": 2014 }, { "epoch": 1.172022684310019, "grad_norm": NaN, "learning_rate": 3.852687546724353e-05, "loss": 0.0, "step": 2015 }, { "epoch": 1.1726043332848626, "grad_norm": NaN, "learning_rate": 3.848107817184221e-05, "loss": 0.0, "step": 2016 }, { "epoch": 1.1731859822597062, "grad_norm": NaN, "learning_rate": 3.8435291079699156e-05, "loss": 0.0, "step": 2017 }, { "epoch": 1.17376763123455, "grad_norm": NaN, "learning_rate": 3.8389514231371766e-05, "loss": 0.0, "step": 2018 }, { "epoch": 1.1743492802093936, "grad_norm": NaN, "learning_rate": 3.834374766740836e-05, "loss": 0.0, "step": 2019 }, { "epoch": 1.1749309291842374, "grad_norm": NaN, "learning_rate": 3.8297991428348173e-05, "loss": 0.0, "step": 2020 }, { "epoch": 1.175512578159081, "grad_norm": NaN, "learning_rate": 3.825224555472127e-05, "loss": 0.0, "step": 2021 }, { "epoch": 1.1760942271339248, "grad_norm": NaN, "learning_rate": 3.820651008704853e-05, "loss": 0.0, "step": 2022 }, { "epoch": 1.1766758761087683, "grad_norm": NaN, "learning_rate": 3.8160785065841634e-05, "loss": 0.0, "step": 2023 }, { "epoch": 1.1772575250836121, "grad_norm": NaN, "learning_rate": 3.811507053160299e-05, "loss": 0.0, "step": 2024 }, { "epoch": 1.1778391740584557, "grad_norm": NaN, "learning_rate": 3.806936652482573e-05, "loss": 0.0, "step": 2025 }, { "epoch": 1.1784208230332993, "grad_norm": NaN, "learning_rate": 3.8023673085993684e-05, "loss": 0.0, "step": 2026 }, { "epoch": 1.1790024720081431, "grad_norm": NaN, "learning_rate": 3.797799025558128e-05, "loss": 0.0, "step": 2027 }, { "epoch": 1.1795841209829867, "grad_norm": NaN, "learning_rate": 3.793231807405356e-05, "loss": 0.0, "step": 2028 }, { "epoch": 1.1801657699578305, "grad_norm": NaN, "learning_rate": 3.7886656581866156e-05, "loss": 0.0, "step": 2029 }, { "epoch": 1.180747418932674, "grad_norm": NaN, "learning_rate": 3.784100581946519e-05, "loss": 0.0, "step": 2030 }, { "epoch": 1.181329067907518, "grad_norm": NaN, "learning_rate": 3.779536582728731e-05, "loss": 0.0, "step": 2031 }, { "epoch": 1.1819107168823615, "grad_norm": NaN, "learning_rate": 3.774973664575964e-05, "loss": 0.0, "step": 2032 }, { "epoch": 1.1824923658572053, "grad_norm": NaN, "learning_rate": 3.770411831529967e-05, "loss": 0.0, "step": 2033 }, { "epoch": 1.1830740148320489, "grad_norm": NaN, "learning_rate": 3.765851087631533e-05, "loss": 0.0, "step": 2034 }, { "epoch": 1.1836556638068925, "grad_norm": NaN, "learning_rate": 3.76129143692049e-05, "loss": 0.0, "step": 2035 }, { "epoch": 1.1842373127817363, "grad_norm": NaN, "learning_rate": 3.756732883435696e-05, "loss": 0.0, "step": 2036 }, { "epoch": 1.1848189617565799, "grad_norm": NaN, "learning_rate": 3.752175431215037e-05, "loss": 0.0, "step": 2037 }, { "epoch": 1.1854006107314237, "grad_norm": NaN, "learning_rate": 3.747619084295424e-05, "loss": 0.0, "step": 2038 }, { "epoch": 1.1859822597062673, "grad_norm": NaN, "learning_rate": 3.743063846712789e-05, "loss": 0.0, "step": 2039 }, { "epoch": 1.1865639086811108, "grad_norm": NaN, "learning_rate": 3.73850972250208e-05, "loss": 0.0, "step": 2040 }, { "epoch": 1.1871455576559546, "grad_norm": NaN, "learning_rate": 3.733956715697262e-05, "loss": 0.0, "step": 2041 }, { "epoch": 1.1877272066307982, "grad_norm": NaN, "learning_rate": 3.7294048303313076e-05, "loss": 0.0, "step": 2042 }, { "epoch": 1.188308855605642, "grad_norm": NaN, "learning_rate": 3.7248540704361954e-05, "loss": 0.0, "step": 2043 }, { "epoch": 1.1888905045804856, "grad_norm": NaN, "learning_rate": 3.7203044400429096e-05, "loss": 0.0, "step": 2044 }, { "epoch": 1.1894721535553294, "grad_norm": NaN, "learning_rate": 3.715755943181434e-05, "loss": 0.0, "step": 2045 }, { "epoch": 1.190053802530173, "grad_norm": NaN, "learning_rate": 3.7112085838807463e-05, "loss": 0.0, "step": 2046 }, { "epoch": 1.1906354515050168, "grad_norm": NaN, "learning_rate": 3.7066623661688164e-05, "loss": 0.0, "step": 2047 }, { "epoch": 1.1912171004798604, "grad_norm": NaN, "learning_rate": 3.7021172940726054e-05, "loss": 0.0, "step": 2048 }, { "epoch": 1.191798749454704, "grad_norm": NaN, "learning_rate": 3.697573371618057e-05, "loss": 0.0, "step": 2049 }, { "epoch": 1.1923803984295478, "grad_norm": NaN, "learning_rate": 3.6930306028300974e-05, "loss": 0.0, "step": 2050 }, { "epoch": 1.1929620474043914, "grad_norm": NaN, "learning_rate": 3.688488991732633e-05, "loss": 0.0, "step": 2051 }, { "epoch": 1.1935436963792352, "grad_norm": NaN, "learning_rate": 3.6839485423485395e-05, "loss": 0.0, "step": 2052 }, { "epoch": 1.1941253453540788, "grad_norm": NaN, "learning_rate": 3.6794092586996704e-05, "loss": 0.0, "step": 2053 }, { "epoch": 1.1947069943289226, "grad_norm": NaN, "learning_rate": 3.6748711448068435e-05, "loss": 0.0, "step": 2054 }, { "epoch": 1.1952886433037662, "grad_norm": NaN, "learning_rate": 3.670334204689839e-05, "loss": 0.0, "step": 2055 }, { "epoch": 1.19587029227861, "grad_norm": NaN, "learning_rate": 3.6657984423673984e-05, "loss": 0.0, "step": 2056 }, { "epoch": 1.1964519412534536, "grad_norm": NaN, "learning_rate": 3.661263861857221e-05, "loss": 0.0, "step": 2057 }, { "epoch": 1.1970335902282971, "grad_norm": NaN, "learning_rate": 3.656730467175958e-05, "loss": 0.0, "step": 2058 }, { "epoch": 1.197615239203141, "grad_norm": NaN, "learning_rate": 3.65219826233921e-05, "loss": 0.0, "step": 2059 }, { "epoch": 1.1981968881779845, "grad_norm": NaN, "learning_rate": 3.647667251361525e-05, "loss": 0.0, "step": 2060 }, { "epoch": 1.1987785371528283, "grad_norm": NaN, "learning_rate": 3.643137438256393e-05, "loss": 0.0, "step": 2061 }, { "epoch": 1.199360186127672, "grad_norm": NaN, "learning_rate": 3.6386088270362415e-05, "loss": 0.0, "step": 2062 }, { "epoch": 1.1999418351025155, "grad_norm": NaN, "learning_rate": 3.634081421712435e-05, "loss": 0.0, "step": 2063 }, { "epoch": 1.2005234840773593, "grad_norm": NaN, "learning_rate": 3.629555226295271e-05, "loss": 0.0, "step": 2064 }, { "epoch": 1.201105133052203, "grad_norm": NaN, "learning_rate": 3.6250302447939726e-05, "loss": 0.0, "step": 2065 }, { "epoch": 1.2016867820270467, "grad_norm": NaN, "learning_rate": 3.6205064812166894e-05, "loss": 0.0, "step": 2066 }, { "epoch": 1.2022684310018903, "grad_norm": NaN, "learning_rate": 3.615983939570491e-05, "loss": 0.0, "step": 2067 }, { "epoch": 1.202850079976734, "grad_norm": NaN, "learning_rate": 3.611462623861365e-05, "loss": 0.0, "step": 2068 }, { "epoch": 1.2034317289515777, "grad_norm": NaN, "learning_rate": 3.6069425380942144e-05, "loss": 0.0, "step": 2069 }, { "epoch": 1.2040133779264215, "grad_norm": NaN, "learning_rate": 3.60242368627285e-05, "loss": 0.0, "step": 2070 }, { "epoch": 1.204595026901265, "grad_norm": NaN, "learning_rate": 3.5979060723999914e-05, "loss": 0.0, "step": 2071 }, { "epoch": 1.2051766758761087, "grad_norm": NaN, "learning_rate": 3.5933897004772634e-05, "loss": 0.0, "step": 2072 }, { "epoch": 1.2057583248509525, "grad_norm": NaN, "learning_rate": 3.5888745745051865e-05, "loss": 0.0, "step": 2073 }, { "epoch": 1.206339973825796, "grad_norm": NaN, "learning_rate": 3.584360698483181e-05, "loss": 0.0, "step": 2074 }, { "epoch": 1.2069216228006399, "grad_norm": NaN, "learning_rate": 3.57984807640956e-05, "loss": 0.0, "step": 2075 }, { "epoch": 1.2075032717754834, "grad_norm": NaN, "learning_rate": 3.575336712281523e-05, "loss": 0.0, "step": 2076 }, { "epoch": 1.2080849207503273, "grad_norm": NaN, "learning_rate": 3.5708266100951574e-05, "loss": 0.0, "step": 2077 }, { "epoch": 1.2086665697251708, "grad_norm": NaN, "learning_rate": 3.566317773845432e-05, "loss": 0.0, "step": 2078 }, { "epoch": 1.2092482187000146, "grad_norm": NaN, "learning_rate": 3.561810207526195e-05, "loss": 0.0, "step": 2079 }, { "epoch": 1.2098298676748582, "grad_norm": NaN, "learning_rate": 3.557303915130169e-05, "loss": 0.0, "step": 2080 }, { "epoch": 1.2104115166497018, "grad_norm": NaN, "learning_rate": 3.552798900648949e-05, "loss": 0.0, "step": 2081 }, { "epoch": 1.2109931656245456, "grad_norm": NaN, "learning_rate": 3.5482951680729966e-05, "loss": 0.0, "step": 2082 }, { "epoch": 1.2115748145993892, "grad_norm": NaN, "learning_rate": 3.543792721391641e-05, "loss": 0.0, "step": 2083 }, { "epoch": 1.212156463574233, "grad_norm": NaN, "learning_rate": 3.539291564593068e-05, "loss": 0.0, "step": 2084 }, { "epoch": 1.2127381125490766, "grad_norm": NaN, "learning_rate": 3.534791701664325e-05, "loss": 0.0, "step": 2085 }, { "epoch": 1.2133197615239204, "grad_norm": NaN, "learning_rate": 3.53029313659131e-05, "loss": 0.0, "step": 2086 }, { "epoch": 1.213901410498764, "grad_norm": NaN, "learning_rate": 3.525795873358773e-05, "loss": 0.0, "step": 2087 }, { "epoch": 1.2144830594736078, "grad_norm": NaN, "learning_rate": 3.5212999159503115e-05, "loss": 0.0, "step": 2088 }, { "epoch": 1.2150647084484514, "grad_norm": NaN, "learning_rate": 3.5168052683483626e-05, "loss": 0.0, "step": 2089 }, { "epoch": 1.215646357423295, "grad_norm": NaN, "learning_rate": 3.5123119345342097e-05, "loss": 0.0, "step": 2090 }, { "epoch": 1.2162280063981388, "grad_norm": NaN, "learning_rate": 3.5078199184879675e-05, "loss": 0.0, "step": 2091 }, { "epoch": 1.2168096553729824, "grad_norm": NaN, "learning_rate": 3.503329224188584e-05, "loss": 0.0, "step": 2092 }, { "epoch": 1.2173913043478262, "grad_norm": NaN, "learning_rate": 3.498839855613838e-05, "loss": 0.0, "step": 2093 }, { "epoch": 1.2179729533226697, "grad_norm": NaN, "learning_rate": 3.4943518167403334e-05, "loss": 0.0, "step": 2094 }, { "epoch": 1.2185546022975133, "grad_norm": NaN, "learning_rate": 3.4898651115434956e-05, "loss": 0.0, "step": 2095 }, { "epoch": 1.2191362512723571, "grad_norm": NaN, "learning_rate": 3.4853797439975686e-05, "loss": 0.0, "step": 2096 }, { "epoch": 1.2197179002472007, "grad_norm": NaN, "learning_rate": 3.4808957180756134e-05, "loss": 0.0, "step": 2097 }, { "epoch": 1.2202995492220445, "grad_norm": NaN, "learning_rate": 3.4764130377495e-05, "loss": 0.0, "step": 2098 }, { "epoch": 1.2208811981968881, "grad_norm": NaN, "learning_rate": 3.4719317069899074e-05, "loss": 0.0, "step": 2099 }, { "epoch": 1.221462847171732, "grad_norm": NaN, "learning_rate": 3.4674517297663215e-05, "loss": 0.0, "step": 2100 }, { "epoch": 1.2220444961465755, "grad_norm": NaN, "learning_rate": 3.4629731100470254e-05, "loss": 0.0, "step": 2101 }, { "epoch": 1.2226261451214193, "grad_norm": NaN, "learning_rate": 3.4584958517991025e-05, "loss": 0.0, "step": 2102 }, { "epoch": 1.223207794096263, "grad_norm": NaN, "learning_rate": 3.45401995898843e-05, "loss": 0.0, "step": 2103 }, { "epoch": 1.2237894430711065, "grad_norm": NaN, "learning_rate": 3.449545435579674e-05, "loss": 0.0, "step": 2104 }, { "epoch": 1.2243710920459503, "grad_norm": NaN, "learning_rate": 3.4450722855362885e-05, "loss": 0.0, "step": 2105 }, { "epoch": 1.2249527410207939, "grad_norm": NaN, "learning_rate": 3.440600512820512e-05, "loss": 0.0, "step": 2106 }, { "epoch": 1.2255343899956377, "grad_norm": NaN, "learning_rate": 3.4361301213933616e-05, "loss": 0.0, "step": 2107 }, { "epoch": 1.2261160389704813, "grad_norm": NaN, "learning_rate": 3.4316611152146294e-05, "loss": 0.0, "step": 2108 }, { "epoch": 1.226697687945325, "grad_norm": NaN, "learning_rate": 3.427193498242886e-05, "loss": 0.0, "step": 2109 }, { "epoch": 1.2272793369201687, "grad_norm": NaN, "learning_rate": 3.4227272744354665e-05, "loss": 0.0, "step": 2110 }, { "epoch": 1.2278609858950125, "grad_norm": NaN, "learning_rate": 3.4182624477484716e-05, "loss": 0.0, "step": 2111 }, { "epoch": 1.228442634869856, "grad_norm": NaN, "learning_rate": 3.413799022136769e-05, "loss": 0.0, "step": 2112 }, { "epoch": 1.2290242838446996, "grad_norm": NaN, "learning_rate": 3.409337001553981e-05, "loss": 0.0, "step": 2113 }, { "epoch": 1.2296059328195434, "grad_norm": NaN, "learning_rate": 3.404876389952487e-05, "loss": 0.0, "step": 2114 }, { "epoch": 1.230187581794387, "grad_norm": NaN, "learning_rate": 3.400417191283419e-05, "loss": 0.0, "step": 2115 }, { "epoch": 1.2307692307692308, "grad_norm": NaN, "learning_rate": 3.3959594094966565e-05, "loss": 0.0, "step": 2116 }, { "epoch": 1.2313508797440744, "grad_norm": NaN, "learning_rate": 3.3915030485408214e-05, "loss": 0.0, "step": 2117 }, { "epoch": 1.2319325287189182, "grad_norm": NaN, "learning_rate": 3.387048112363285e-05, "loss": 0.0, "step": 2118 }, { "epoch": 1.2325141776937618, "grad_norm": NaN, "learning_rate": 3.382594604910147e-05, "loss": 0.0, "step": 2119 }, { "epoch": 1.2330958266686056, "grad_norm": NaN, "learning_rate": 3.378142530126247e-05, "loss": 0.0, "step": 2120 }, { "epoch": 1.2336774756434492, "grad_norm": NaN, "learning_rate": 3.3736918919551566e-05, "loss": 0.0, "step": 2121 }, { "epoch": 1.2342591246182928, "grad_norm": NaN, "learning_rate": 3.369242694339171e-05, "loss": 0.0, "step": 2122 }, { "epoch": 1.2348407735931366, "grad_norm": NaN, "learning_rate": 3.3647949412193114e-05, "loss": 0.0, "step": 2123 }, { "epoch": 1.2354224225679802, "grad_norm": NaN, "learning_rate": 3.36034863653532e-05, "loss": 0.0, "step": 2124 }, { "epoch": 1.236004071542824, "grad_norm": NaN, "learning_rate": 3.355903784225654e-05, "loss": 0.0, "step": 2125 }, { "epoch": 1.2365857205176676, "grad_norm": NaN, "learning_rate": 3.3514603882274845e-05, "loss": 0.0, "step": 2126 }, { "epoch": 1.2371673694925112, "grad_norm": NaN, "learning_rate": 3.3470184524766956e-05, "loss": 0.0, "step": 2127 }, { "epoch": 1.237749018467355, "grad_norm": NaN, "learning_rate": 3.342577980907875e-05, "loss": 0.0, "step": 2128 }, { "epoch": 1.2383306674421986, "grad_norm": NaN, "learning_rate": 3.338138977454315e-05, "loss": 0.0, "step": 2129 }, { "epoch": 1.2389123164170424, "grad_norm": NaN, "learning_rate": 3.333701446048005e-05, "loss": 0.0, "step": 2130 }, { "epoch": 1.239493965391886, "grad_norm": NaN, "learning_rate": 3.329265390619635e-05, "loss": 0.0, "step": 2131 }, { "epoch": 1.2400756143667298, "grad_norm": NaN, "learning_rate": 3.324830815098582e-05, "loss": 0.0, "step": 2132 }, { "epoch": 1.2406572633415733, "grad_norm": NaN, "learning_rate": 3.320397723412917e-05, "loss": 0.0, "step": 2133 }, { "epoch": 1.2412389123164171, "grad_norm": NaN, "learning_rate": 3.315966119489393e-05, "loss": 0.0, "step": 2134 }, { "epoch": 1.2418205612912607, "grad_norm": NaN, "learning_rate": 3.311536007253448e-05, "loss": 0.0, "step": 2135 }, { "epoch": 1.2424022102661043, "grad_norm": NaN, "learning_rate": 3.3071073906291964e-05, "loss": 0.0, "step": 2136 }, { "epoch": 1.2429838592409481, "grad_norm": NaN, "learning_rate": 3.3026802735394295e-05, "loss": 0.0, "step": 2137 }, { "epoch": 1.2435655082157917, "grad_norm": NaN, "learning_rate": 3.298254659905611e-05, "loss": 0.0, "step": 2138 }, { "epoch": 1.2441471571906355, "grad_norm": NaN, "learning_rate": 3.2938305536478674e-05, "loss": 0.0, "step": 2139 }, { "epoch": 1.244728806165479, "grad_norm": NaN, "learning_rate": 3.2894079586849996e-05, "loss": 0.0, "step": 2140 }, { "epoch": 1.245310455140323, "grad_norm": NaN, "learning_rate": 3.284986878934462e-05, "loss": 0.0, "step": 2141 }, { "epoch": 1.2458921041151665, "grad_norm": NaN, "learning_rate": 3.280567318312368e-05, "loss": 0.0, "step": 2142 }, { "epoch": 1.2464737530900103, "grad_norm": NaN, "learning_rate": 3.2761492807334884e-05, "loss": 0.0, "step": 2143 }, { "epoch": 1.2470554020648539, "grad_norm": NaN, "learning_rate": 3.271732770111242e-05, "loss": 0.0, "step": 2144 }, { "epoch": 1.2476370510396975, "grad_norm": NaN, "learning_rate": 3.267317790357696e-05, "loss": 0.0, "step": 2145 }, { "epoch": 1.2482187000145413, "grad_norm": NaN, "learning_rate": 3.262904345383563e-05, "loss": 0.0, "step": 2146 }, { "epoch": 1.2488003489893849, "grad_norm": NaN, "learning_rate": 3.258492439098195e-05, "loss": 0.0, "step": 2147 }, { "epoch": 1.2493819979642287, "grad_norm": NaN, "learning_rate": 3.25408207540958e-05, "loss": 0.0, "step": 2148 }, { "epoch": 1.2499636469390722, "grad_norm": NaN, "learning_rate": 3.2496732582243405e-05, "loss": 0.0, "step": 2149 }, { "epoch": 1.2505452959139158, "grad_norm": NaN, "learning_rate": 3.24526599144773e-05, "loss": 0.0, "step": 2150 }, { "epoch": 1.2511269448887596, "grad_norm": NaN, "learning_rate": 3.2408602789836274e-05, "loss": 0.0, "step": 2151 }, { "epoch": 1.2517085938636034, "grad_norm": NaN, "learning_rate": 3.236456124734535e-05, "loss": 0.0, "step": 2152 }, { "epoch": 1.252290242838447, "grad_norm": NaN, "learning_rate": 3.232053532601576e-05, "loss": 0.0, "step": 2153 }, { "epoch": 1.2528718918132906, "grad_norm": NaN, "learning_rate": 3.227652506484486e-05, "loss": 0.0, "step": 2154 }, { "epoch": 1.2534535407881344, "grad_norm": NaN, "learning_rate": 3.2232530502816215e-05, "loss": 0.0, "step": 2155 }, { "epoch": 1.254035189762978, "grad_norm": NaN, "learning_rate": 3.2188551678899406e-05, "loss": 0.0, "step": 2156 }, { "epoch": 1.2546168387378218, "grad_norm": NaN, "learning_rate": 3.21445886320501e-05, "loss": 0.0, "step": 2157 }, { "epoch": 1.2551984877126654, "grad_norm": NaN, "learning_rate": 3.2100641401209984e-05, "loss": 0.0, "step": 2158 }, { "epoch": 1.255780136687509, "grad_norm": NaN, "learning_rate": 3.205671002530676e-05, "loss": 0.0, "step": 2159 }, { "epoch": 1.2563617856623528, "grad_norm": NaN, "learning_rate": 3.2012794543254064e-05, "loss": 0.0, "step": 2160 }, { "epoch": 1.2569434346371964, "grad_norm": NaN, "learning_rate": 3.196889499395145e-05, "loss": 0.0, "step": 2161 }, { "epoch": 1.2575250836120402, "grad_norm": NaN, "learning_rate": 3.1925011416284364e-05, "loss": 0.0, "step": 2162 }, { "epoch": 1.2581067325868838, "grad_norm": NaN, "learning_rate": 3.188114384912412e-05, "loss": 0.0, "step": 2163 }, { "epoch": 1.2586883815617276, "grad_norm": NaN, "learning_rate": 3.183729233132782e-05, "loss": 0.0, "step": 2164 }, { "epoch": 1.2592700305365712, "grad_norm": NaN, "learning_rate": 3.1793456901738387e-05, "loss": 0.0, "step": 2165 }, { "epoch": 1.259851679511415, "grad_norm": NaN, "learning_rate": 3.174963759918446e-05, "loss": 0.0, "step": 2166 }, { "epoch": 1.2604333284862586, "grad_norm": NaN, "learning_rate": 3.17058344624804e-05, "loss": 0.0, "step": 2167 }, { "epoch": 1.2610149774611021, "grad_norm": NaN, "learning_rate": 3.166204753042626e-05, "loss": 0.0, "step": 2168 }, { "epoch": 1.261596626435946, "grad_norm": NaN, "learning_rate": 3.1618276841807746e-05, "loss": 0.0, "step": 2169 }, { "epoch": 1.2621782754107895, "grad_norm": NaN, "learning_rate": 3.157452243539615e-05, "loss": 0.0, "step": 2170 }, { "epoch": 1.2627599243856333, "grad_norm": NaN, "learning_rate": 3.1530784349948365e-05, "loss": 0.0, "step": 2171 }, { "epoch": 1.263341573360477, "grad_norm": NaN, "learning_rate": 3.148706262420682e-05, "loss": 0.0, "step": 2172 }, { "epoch": 1.2639232223353205, "grad_norm": NaN, "learning_rate": 3.1443357296899414e-05, "loss": 0.0, "step": 2173 }, { "epoch": 1.2645048713101643, "grad_norm": NaN, "learning_rate": 3.139966840673961e-05, "loss": 0.0, "step": 2174 }, { "epoch": 1.2650865202850081, "grad_norm": NaN, "learning_rate": 3.135599599242623e-05, "loss": 0.0, "step": 2175 }, { "epoch": 1.2656681692598517, "grad_norm": NaN, "learning_rate": 3.1312340092643535e-05, "loss": 0.0, "step": 2176 }, { "epoch": 1.2662498182346953, "grad_norm": NaN, "learning_rate": 3.126870074606114e-05, "loss": 0.0, "step": 2177 }, { "epoch": 1.266831467209539, "grad_norm": NaN, "learning_rate": 3.1225077991334026e-05, "loss": 0.0, "step": 2178 }, { "epoch": 1.2674131161843827, "grad_norm": NaN, "learning_rate": 3.1181471867102465e-05, "loss": 0.0, "step": 2179 }, { "epoch": 1.2679947651592265, "grad_norm": NaN, "learning_rate": 3.1137882411991983e-05, "loss": 0.0, "step": 2180 }, { "epoch": 1.26857641413407, "grad_norm": NaN, "learning_rate": 3.1094309664613366e-05, "loss": 0.0, "step": 2181 }, { "epoch": 1.2691580631089137, "grad_norm": NaN, "learning_rate": 3.105075366356256e-05, "loss": 0.0, "step": 2182 }, { "epoch": 1.2697397120837575, "grad_norm": NaN, "learning_rate": 3.100721444742073e-05, "loss": 0.0, "step": 2183 }, { "epoch": 1.2703213610586013, "grad_norm": NaN, "learning_rate": 3.0963692054754135e-05, "loss": 0.0, "step": 2184 }, { "epoch": 1.2709030100334449, "grad_norm": NaN, "learning_rate": 3.0920186524114156e-05, "loss": 0.0, "step": 2185 }, { "epoch": 1.2714846590082884, "grad_norm": NaN, "learning_rate": 3.087669789403722e-05, "loss": 0.0, "step": 2186 }, { "epoch": 1.2720663079831322, "grad_norm": NaN, "learning_rate": 3.0833226203044774e-05, "loss": 0.0, "step": 2187 }, { "epoch": 1.2726479569579758, "grad_norm": NaN, "learning_rate": 3.07897714896433e-05, "loss": 0.0, "step": 2188 }, { "epoch": 1.2732296059328196, "grad_norm": NaN, "learning_rate": 3.0746333792324225e-05, "loss": 0.0, "step": 2189 }, { "epoch": 1.2738112549076632, "grad_norm": NaN, "learning_rate": 3.070291314956387e-05, "loss": 0.0, "step": 2190 }, { "epoch": 1.2743929038825068, "grad_norm": NaN, "learning_rate": 3.065950959982348e-05, "loss": 0.0, "step": 2191 }, { "epoch": 1.2749745528573506, "grad_norm": NaN, "learning_rate": 3.061612318154918e-05, "loss": 0.0, "step": 2192 }, { "epoch": 1.2755562018321942, "grad_norm": NaN, "learning_rate": 3.057275393317188e-05, "loss": 0.0, "step": 2193 }, { "epoch": 1.276137850807038, "grad_norm": NaN, "learning_rate": 3.052940189310729e-05, "loss": 0.0, "step": 2194 }, { "epoch": 1.2767194997818816, "grad_norm": NaN, "learning_rate": 3.0486067099755906e-05, "loss": 0.0, "step": 2195 }, { "epoch": 1.2773011487567252, "grad_norm": NaN, "learning_rate": 3.0442749591502894e-05, "loss": 0.0, "step": 2196 }, { "epoch": 1.277882797731569, "grad_norm": NaN, "learning_rate": 3.0399449406718167e-05, "loss": 0.0, "step": 2197 }, { "epoch": 1.2784644467064128, "grad_norm": NaN, "learning_rate": 3.035616658375626e-05, "loss": 0.0, "step": 2198 }, { "epoch": 1.2790460956812564, "grad_norm": NaN, "learning_rate": 3.031290116095633e-05, "loss": 0.0, "step": 2199 }, { "epoch": 1.2796277446561, "grad_norm": NaN, "learning_rate": 3.0269653176642132e-05, "loss": 0.0, "step": 2200 }, { "epoch": 1.2802093936309438, "grad_norm": NaN, "learning_rate": 3.0226422669121957e-05, "loss": 0.0, "step": 2201 }, { "epoch": 1.2807910426057874, "grad_norm": NaN, "learning_rate": 3.0183209676688652e-05, "loss": 0.0, "step": 2202 }, { "epoch": 1.2813726915806312, "grad_norm": NaN, "learning_rate": 3.0140014237619512e-05, "loss": 0.0, "step": 2203 }, { "epoch": 1.2819543405554747, "grad_norm": NaN, "learning_rate": 3.0096836390176296e-05, "loss": 0.0, "step": 2204 }, { "epoch": 1.2825359895303183, "grad_norm": NaN, "learning_rate": 3.0053676172605183e-05, "loss": 0.0, "step": 2205 }, { "epoch": 1.2831176385051621, "grad_norm": NaN, "learning_rate": 3.001053362313673e-05, "loss": 0.0, "step": 2206 }, { "epoch": 1.283699287480006, "grad_norm": NaN, "learning_rate": 2.9967408779985857e-05, "loss": 0.0, "step": 2207 }, { "epoch": 1.2842809364548495, "grad_norm": NaN, "learning_rate": 2.9924301681351785e-05, "loss": 0.0, "step": 2208 }, { "epoch": 1.2848625854296931, "grad_norm": NaN, "learning_rate": 2.988121236541803e-05, "loss": 0.0, "step": 2209 }, { "epoch": 1.285444234404537, "grad_norm": NaN, "learning_rate": 2.9838140870352328e-05, "loss": 0.0, "step": 2210 }, { "epoch": 1.2860258833793805, "grad_norm": NaN, "learning_rate": 2.9795087234306684e-05, "loss": 0.0, "step": 2211 }, { "epoch": 1.2866075323542243, "grad_norm": NaN, "learning_rate": 2.9752051495417233e-05, "loss": 0.0, "step": 2212 }, { "epoch": 1.287189181329068, "grad_norm": NaN, "learning_rate": 2.9709033691804276e-05, "loss": 0.0, "step": 2213 }, { "epoch": 1.2877708303039115, "grad_norm": NaN, "learning_rate": 2.9666033861572234e-05, "loss": 0.0, "step": 2214 }, { "epoch": 1.2883524792787553, "grad_norm": NaN, "learning_rate": 2.9623052042809584e-05, "loss": 0.0, "step": 2215 }, { "epoch": 1.2889341282535989, "grad_norm": NaN, "learning_rate": 2.958008827358888e-05, "loss": 0.0, "step": 2216 }, { "epoch": 1.2895157772284427, "grad_norm": NaN, "learning_rate": 2.9537142591966672e-05, "loss": 0.0, "step": 2217 }, { "epoch": 1.2900974262032863, "grad_norm": NaN, "learning_rate": 2.949421503598348e-05, "loss": 0.0, "step": 2218 }, { "epoch": 1.29067907517813, "grad_norm": NaN, "learning_rate": 2.9451305643663773e-05, "loss": 0.0, "step": 2219 }, { "epoch": 1.2912607241529737, "grad_norm": NaN, "learning_rate": 2.9408414453015954e-05, "loss": 0.0, "step": 2220 }, { "epoch": 1.2918423731278175, "grad_norm": NaN, "learning_rate": 2.9365541502032278e-05, "loss": 0.0, "step": 2221 }, { "epoch": 1.292424022102661, "grad_norm": NaN, "learning_rate": 2.932268682868885e-05, "loss": 0.0, "step": 2222 }, { "epoch": 1.2930056710775046, "grad_norm": NaN, "learning_rate": 2.9279850470945593e-05, "loss": 0.0, "step": 2223 }, { "epoch": 1.2935873200523484, "grad_norm": NaN, "learning_rate": 2.923703246674617e-05, "loss": 0.0, "step": 2224 }, { "epoch": 1.294168969027192, "grad_norm": NaN, "learning_rate": 2.9194232854018045e-05, "loss": 0.0, "step": 2225 }, { "epoch": 1.2947506180020358, "grad_norm": NaN, "learning_rate": 2.9151451670672352e-05, "loss": 0.0, "step": 2226 }, { "epoch": 1.2953322669768794, "grad_norm": NaN, "learning_rate": 2.9108688954603914e-05, "loss": 0.0, "step": 2227 }, { "epoch": 1.295913915951723, "grad_norm": NaN, "learning_rate": 2.906594474369117e-05, "loss": 0.0, "step": 2228 }, { "epoch": 1.2964955649265668, "grad_norm": NaN, "learning_rate": 2.9023219075796227e-05, "loss": 0.0, "step": 2229 }, { "epoch": 1.2970772139014106, "grad_norm": NaN, "learning_rate": 2.8980511988764712e-05, "loss": 0.0, "step": 2230 }, { "epoch": 1.2976588628762542, "grad_norm": NaN, "learning_rate": 2.8937823520425817e-05, "loss": 0.0, "step": 2231 }, { "epoch": 1.2982405118510978, "grad_norm": NaN, "learning_rate": 2.8895153708592228e-05, "loss": 0.0, "step": 2232 }, { "epoch": 1.2988221608259416, "grad_norm": NaN, "learning_rate": 2.8852502591060117e-05, "loss": 0.0, "step": 2233 }, { "epoch": 1.2994038098007852, "grad_norm": NaN, "learning_rate": 2.880987020560909e-05, "loss": 0.0, "step": 2234 }, { "epoch": 1.299985458775629, "grad_norm": NaN, "learning_rate": 2.8767256590002166e-05, "loss": 0.0, "step": 2235 }, { "epoch": 1.3005671077504726, "grad_norm": NaN, "learning_rate": 2.8724661781985736e-05, "loss": 0.0, "step": 2236 }, { "epoch": 1.3011487567253162, "grad_norm": NaN, "learning_rate": 2.8682085819289528e-05, "loss": 0.0, "step": 2237 }, { "epoch": 1.30173040570016, "grad_norm": NaN, "learning_rate": 2.8639528739626577e-05, "loss": 0.0, "step": 2238 }, { "epoch": 1.3023120546750038, "grad_norm": NaN, "learning_rate": 2.8596990580693205e-05, "loss": 0.0, "step": 2239 }, { "epoch": 1.3028937036498474, "grad_norm": NaN, "learning_rate": 2.855447138016898e-05, "loss": 0.0, "step": 2240 }, { "epoch": 1.303475352624691, "grad_norm": NaN, "learning_rate": 2.8511971175716634e-05, "loss": 0.0, "step": 2241 }, { "epoch": 1.3040570015995347, "grad_norm": NaN, "learning_rate": 2.8469490004982107e-05, "loss": 0.0, "step": 2242 }, { "epoch": 1.3046386505743783, "grad_norm": NaN, "learning_rate": 2.8427027905594482e-05, "loss": 0.0, "step": 2243 }, { "epoch": 1.3052202995492221, "grad_norm": NaN, "learning_rate": 2.838458491516593e-05, "loss": 0.0, "step": 2244 }, { "epoch": 1.3058019485240657, "grad_norm": NaN, "learning_rate": 2.8342161071291705e-05, "loss": 0.0, "step": 2245 }, { "epoch": 1.3063835974989093, "grad_norm": NaN, "learning_rate": 2.829975641155011e-05, "loss": 0.0, "step": 2246 }, { "epoch": 1.3069652464737531, "grad_norm": NaN, "learning_rate": 2.8257370973502416e-05, "loss": 0.0, "step": 2247 }, { "epoch": 1.3075468954485967, "grad_norm": NaN, "learning_rate": 2.821500479469295e-05, "loss": 0.0, "step": 2248 }, { "epoch": 1.3081285444234405, "grad_norm": NaN, "learning_rate": 2.8172657912648898e-05, "loss": 0.0, "step": 2249 }, { "epoch": 1.308710193398284, "grad_norm": NaN, "learning_rate": 2.8130330364880387e-05, "loss": 0.0, "step": 2250 }, { "epoch": 1.3092918423731277, "grad_norm": NaN, "learning_rate": 2.808802218888041e-05, "loss": 0.0, "step": 2251 }, { "epoch": 1.3098734913479715, "grad_norm": NaN, "learning_rate": 2.804573342212481e-05, "loss": 0.0, "step": 2252 }, { "epoch": 1.3104551403228153, "grad_norm": NaN, "learning_rate": 2.8003464102072226e-05, "loss": 0.0, "step": 2253 }, { "epoch": 1.3110367892976589, "grad_norm": NaN, "learning_rate": 2.7961214266164082e-05, "loss": 0.0, "step": 2254 }, { "epoch": 1.3116184382725025, "grad_norm": NaN, "learning_rate": 2.791898395182454e-05, "loss": 0.0, "step": 2255 }, { "epoch": 1.3122000872473463, "grad_norm": NaN, "learning_rate": 2.787677319646045e-05, "loss": 0.0, "step": 2256 }, { "epoch": 1.3127817362221899, "grad_norm": NaN, "learning_rate": 2.7834582037461398e-05, "loss": 0.0, "step": 2257 }, { "epoch": 1.3133633851970337, "grad_norm": NaN, "learning_rate": 2.779241051219954e-05, "loss": 0.0, "step": 2258 }, { "epoch": 1.3139450341718772, "grad_norm": NaN, "learning_rate": 2.775025865802968e-05, "loss": 0.0, "step": 2259 }, { "epoch": 1.3145266831467208, "grad_norm": NaN, "learning_rate": 2.7708126512289197e-05, "loss": 0.0, "step": 2260 }, { "epoch": 1.3151083321215646, "grad_norm": NaN, "learning_rate": 2.766601411229799e-05, "loss": 0.0, "step": 2261 }, { "epoch": 1.3156899810964084, "grad_norm": NaN, "learning_rate": 2.7623921495358473e-05, "loss": 0.0, "step": 2262 }, { "epoch": 1.316271630071252, "grad_norm": NaN, "learning_rate": 2.7581848698755573e-05, "loss": 0.0, "step": 2263 }, { "epoch": 1.3168532790460956, "grad_norm": NaN, "learning_rate": 2.7539795759756614e-05, "loss": 0.0, "step": 2264 }, { "epoch": 1.3174349280209394, "grad_norm": NaN, "learning_rate": 2.7497762715611353e-05, "loss": 0.0, "step": 2265 }, { "epoch": 1.318016576995783, "grad_norm": NaN, "learning_rate": 2.74557496035519e-05, "loss": 0.0, "step": 2266 }, { "epoch": 1.3185982259706268, "grad_norm": NaN, "learning_rate": 2.7413756460792772e-05, "loss": 0.0, "step": 2267 }, { "epoch": 1.3191798749454704, "grad_norm": NaN, "learning_rate": 2.737178332453074e-05, "loss": 0.0, "step": 2268 }, { "epoch": 1.319761523920314, "grad_norm": NaN, "learning_rate": 2.7329830231944864e-05, "loss": 0.0, "step": 2269 }, { "epoch": 1.3203431728951578, "grad_norm": NaN, "learning_rate": 2.7287897220196455e-05, "loss": 0.0, "step": 2270 }, { "epoch": 1.3209248218700014, "grad_norm": NaN, "learning_rate": 2.724598432642904e-05, "loss": 0.0, "step": 2271 }, { "epoch": 1.3215064708448452, "grad_norm": NaN, "learning_rate": 2.720409158776832e-05, "loss": 0.0, "step": 2272 }, { "epoch": 1.3220881198196888, "grad_norm": NaN, "learning_rate": 2.7162219041322144e-05, "loss": 0.0, "step": 2273 }, { "epoch": 1.3226697687945326, "grad_norm": NaN, "learning_rate": 2.712036672418049e-05, "loss": 0.0, "step": 2274 }, { "epoch": 1.3232514177693762, "grad_norm": NaN, "learning_rate": 2.707853467341537e-05, "loss": 0.0, "step": 2275 }, { "epoch": 1.32383306674422, "grad_norm": NaN, "learning_rate": 2.7036722926080916e-05, "loss": 0.0, "step": 2276 }, { "epoch": 1.3244147157190636, "grad_norm": NaN, "learning_rate": 2.699493151921323e-05, "loss": 0.0, "step": 2277 }, { "epoch": 1.3249963646939071, "grad_norm": NaN, "learning_rate": 2.6953160489830397e-05, "loss": 0.0, "step": 2278 }, { "epoch": 1.325578013668751, "grad_norm": NaN, "learning_rate": 2.691140987493247e-05, "loss": 0.0, "step": 2279 }, { "epoch": 1.3261596626435945, "grad_norm": NaN, "learning_rate": 2.6869679711501395e-05, "loss": 0.0, "step": 2280 }, { "epoch": 1.3267413116184383, "grad_norm": NaN, "learning_rate": 2.682797003650102e-05, "loss": 0.0, "step": 2281 }, { "epoch": 1.327322960593282, "grad_norm": NaN, "learning_rate": 2.6786280886877045e-05, "loss": 0.0, "step": 2282 }, { "epoch": 1.3279046095681255, "grad_norm": NaN, "learning_rate": 2.6744612299556987e-05, "loss": 0.0, "step": 2283 }, { "epoch": 1.3284862585429693, "grad_norm": NaN, "learning_rate": 2.6702964311450125e-05, "loss": 0.0, "step": 2284 }, { "epoch": 1.3290679075178131, "grad_norm": NaN, "learning_rate": 2.6661336959447552e-05, "loss": 0.0, "step": 2285 }, { "epoch": 1.3296495564926567, "grad_norm": NaN, "learning_rate": 2.6619730280422028e-05, "loss": 0.0, "step": 2286 }, { "epoch": 1.3302312054675003, "grad_norm": NaN, "learning_rate": 2.657814431122801e-05, "loss": 0.0, "step": 2287 }, { "epoch": 1.330812854442344, "grad_norm": NaN, "learning_rate": 2.6536579088701624e-05, "loss": 0.0, "step": 2288 }, { "epoch": 1.3313945034171877, "grad_norm": NaN, "learning_rate": 2.6495034649660612e-05, "loss": 0.0, "step": 2289 }, { "epoch": 1.3319761523920315, "grad_norm": NaN, "learning_rate": 2.6453511030904306e-05, "loss": 0.0, "step": 2290 }, { "epoch": 1.332557801366875, "grad_norm": NaN, "learning_rate": 2.6412008269213595e-05, "loss": 0.0, "step": 2291 }, { "epoch": 1.3331394503417187, "grad_norm": NaN, "learning_rate": 2.6370526401350898e-05, "loss": 0.0, "step": 2292 }, { "epoch": 1.3337210993165625, "grad_norm": NaN, "learning_rate": 2.6329065464060105e-05, "loss": 0.0, "step": 2293 }, { "epoch": 1.3343027482914063, "grad_norm": NaN, "learning_rate": 2.628762549406662e-05, "loss": 0.0, "step": 2294 }, { "epoch": 1.3348843972662499, "grad_norm": NaN, "learning_rate": 2.6246206528077222e-05, "loss": 0.0, "step": 2295 }, { "epoch": 1.3354660462410934, "grad_norm": NaN, "learning_rate": 2.62048086027801e-05, "loss": 0.0, "step": 2296 }, { "epoch": 1.3360476952159372, "grad_norm": NaN, "learning_rate": 2.6163431754844814e-05, "loss": 0.0, "step": 2297 }, { "epoch": 1.3366293441907808, "grad_norm": NaN, "learning_rate": 2.612207602092224e-05, "loss": 0.0, "step": 2298 }, { "epoch": 1.3372109931656246, "grad_norm": NaN, "learning_rate": 2.608074143764456e-05, "loss": 0.0, "step": 2299 }, { "epoch": 1.3377926421404682, "grad_norm": NaN, "learning_rate": 2.6039428041625225e-05, "loss": 0.0, "step": 2300 }, { "epoch": 1.3383742911153118, "grad_norm": NaN, "learning_rate": 2.5998135869458917e-05, "loss": 0.0, "step": 2301 }, { "epoch": 1.3389559400901556, "grad_norm": NaN, "learning_rate": 2.5956864957721505e-05, "loss": 0.0, "step": 2302 }, { "epoch": 1.3395375890649992, "grad_norm": NaN, "learning_rate": 2.5915615342970028e-05, "loss": 0.0, "step": 2303 }, { "epoch": 1.340119238039843, "grad_norm": NaN, "learning_rate": 2.58743870617427e-05, "loss": 0.0, "step": 2304 }, { "epoch": 1.3407008870146866, "grad_norm": NaN, "learning_rate": 2.5833180150558796e-05, "loss": 0.0, "step": 2305 }, { "epoch": 1.3412825359895304, "grad_norm": NaN, "learning_rate": 2.579199464591866e-05, "loss": 0.0, "step": 2306 }, { "epoch": 1.341864184964374, "grad_norm": NaN, "learning_rate": 2.57508305843037e-05, "loss": 0.0, "step": 2307 }, { "epoch": 1.3424458339392178, "grad_norm": NaN, "learning_rate": 2.57096880021763e-05, "loss": 0.0, "step": 2308 }, { "epoch": 1.3430274829140614, "grad_norm": NaN, "learning_rate": 2.5668566935979864e-05, "loss": 0.0, "step": 2309 }, { "epoch": 1.343609131888905, "grad_norm": NaN, "learning_rate": 2.5627467422138684e-05, "loss": 0.0, "step": 2310 }, { "epoch": 1.3441907808637488, "grad_norm": NaN, "learning_rate": 2.5586389497057995e-05, "loss": 0.0, "step": 2311 }, { "epoch": 1.3447724298385924, "grad_norm": NaN, "learning_rate": 2.5545333197123877e-05, "loss": 0.0, "step": 2312 }, { "epoch": 1.3453540788134362, "grad_norm": NaN, "learning_rate": 2.5504298558703322e-05, "loss": 0.0, "step": 2313 }, { "epoch": 1.3459357277882797, "grad_norm": NaN, "learning_rate": 2.5463285618144072e-05, "loss": 0.0, "step": 2314 }, { "epoch": 1.3465173767631233, "grad_norm": NaN, "learning_rate": 2.542229441177466e-05, "loss": 0.0, "step": 2315 }, { "epoch": 1.3470990257379671, "grad_norm": NaN, "learning_rate": 2.5381324975904385e-05, "loss": 0.0, "step": 2316 }, { "epoch": 1.347680674712811, "grad_norm": NaN, "learning_rate": 2.5340377346823247e-05, "loss": 0.0, "step": 2317 }, { "epoch": 1.3482623236876545, "grad_norm": NaN, "learning_rate": 2.5299451560801934e-05, "loss": 0.0, "step": 2318 }, { "epoch": 1.3488439726624981, "grad_norm": NaN, "learning_rate": 2.5258547654091792e-05, "loss": 0.0, "step": 2319 }, { "epoch": 1.349425621637342, "grad_norm": NaN, "learning_rate": 2.5217665662924778e-05, "loss": 0.0, "step": 2320 }, { "epoch": 1.3500072706121855, "grad_norm": NaN, "learning_rate": 2.517680562351342e-05, "loss": 0.0, "step": 2321 }, { "epoch": 1.3505889195870293, "grad_norm": NaN, "learning_rate": 2.5135967572050868e-05, "loss": 0.0, "step": 2322 }, { "epoch": 1.351170568561873, "grad_norm": NaN, "learning_rate": 2.5095151544710726e-05, "loss": 0.0, "step": 2323 }, { "epoch": 1.3517522175367165, "grad_norm": NaN, "learning_rate": 2.5054357577647103e-05, "loss": 0.0, "step": 2324 }, { "epoch": 1.3523338665115603, "grad_norm": NaN, "learning_rate": 2.5013585706994592e-05, "loss": 0.0, "step": 2325 }, { "epoch": 1.3529155154864039, "grad_norm": NaN, "learning_rate": 2.4972835968868193e-05, "loss": 0.0, "step": 2326 }, { "epoch": 1.3534971644612477, "grad_norm": NaN, "learning_rate": 2.4932108399363302e-05, "loss": 0.0, "step": 2327 }, { "epoch": 1.3540788134360913, "grad_norm": NaN, "learning_rate": 2.4891403034555693e-05, "loss": 0.0, "step": 2328 }, { "epoch": 1.354660462410935, "grad_norm": NaN, "learning_rate": 2.4850719910501452e-05, "loss": 0.0, "step": 2329 }, { "epoch": 1.3552421113857787, "grad_norm": NaN, "learning_rate": 2.4810059063236962e-05, "loss": 0.0, "step": 2330 }, { "epoch": 1.3558237603606225, "grad_norm": NaN, "learning_rate": 2.4769420528778923e-05, "loss": 0.0, "step": 2331 }, { "epoch": 1.356405409335466, "grad_norm": NaN, "learning_rate": 2.472880434312421e-05, "loss": 0.0, "step": 2332 }, { "epoch": 1.3569870583103096, "grad_norm": NaN, "learning_rate": 2.468821054224994e-05, "loss": 0.0, "step": 2333 }, { "epoch": 1.3575687072851534, "grad_norm": NaN, "learning_rate": 2.4647639162113366e-05, "loss": 0.0, "step": 2334 }, { "epoch": 1.358150356259997, "grad_norm": NaN, "learning_rate": 2.460709023865192e-05, "loss": 0.0, "step": 2335 }, { "epoch": 1.3587320052348408, "grad_norm": NaN, "learning_rate": 2.4566563807783116e-05, "loss": 0.0, "step": 2336 }, { "epoch": 1.3593136542096844, "grad_norm": NaN, "learning_rate": 2.452605990540456e-05, "loss": 0.0, "step": 2337 }, { "epoch": 1.359895303184528, "grad_norm": NaN, "learning_rate": 2.4485578567393884e-05, "loss": 0.0, "step": 2338 }, { "epoch": 1.3604769521593718, "grad_norm": NaN, "learning_rate": 2.444511982960876e-05, "loss": 0.0, "step": 2339 }, { "epoch": 1.3610586011342156, "grad_norm": NaN, "learning_rate": 2.440468372788679e-05, "loss": 0.0, "step": 2340 }, { "epoch": 1.3616402501090592, "grad_norm": NaN, "learning_rate": 2.4364270298045616e-05, "loss": 0.0, "step": 2341 }, { "epoch": 1.3622218990839028, "grad_norm": NaN, "learning_rate": 2.4323879575882717e-05, "loss": 0.0, "step": 2342 }, { "epoch": 1.3628035480587466, "grad_norm": NaN, "learning_rate": 2.4283511597175486e-05, "loss": 0.0, "step": 2343 }, { "epoch": 1.3633851970335902, "grad_norm": NaN, "learning_rate": 2.4243166397681165e-05, "loss": 0.0, "step": 2344 }, { "epoch": 1.363966846008434, "grad_norm": NaN, "learning_rate": 2.4202844013136822e-05, "loss": 0.0, "step": 2345 }, { "epoch": 1.3645484949832776, "grad_norm": NaN, "learning_rate": 2.4162544479259314e-05, "loss": 0.0, "step": 2346 }, { "epoch": 1.3651301439581212, "grad_norm": NaN, "learning_rate": 2.4122267831745267e-05, "loss": 0.0, "step": 2347 }, { "epoch": 1.365711792932965, "grad_norm": NaN, "learning_rate": 2.4082014106271016e-05, "loss": 0.0, "step": 2348 }, { "epoch": 1.3662934419078088, "grad_norm": NaN, "learning_rate": 2.4041783338492595e-05, "loss": 0.0, "step": 2349 }, { "epoch": 1.3668750908826524, "grad_norm": NaN, "learning_rate": 2.400157556404574e-05, "loss": 0.0, "step": 2350 }, { "epoch": 1.367456739857496, "grad_norm": NaN, "learning_rate": 2.3961390818545776e-05, "loss": 0.0, "step": 2351 }, { "epoch": 1.3680383888323397, "grad_norm": NaN, "learning_rate": 2.3921229137587637e-05, "loss": 0.0, "step": 2352 }, { "epoch": 1.3686200378071833, "grad_norm": NaN, "learning_rate": 2.388109055674584e-05, "loss": 0.0, "step": 2353 }, { "epoch": 1.3692016867820271, "grad_norm": NaN, "learning_rate": 2.3840975111574425e-05, "loss": 0.0, "step": 2354 }, { "epoch": 1.3697833357568707, "grad_norm": NaN, "learning_rate": 2.380088283760696e-05, "loss": 0.0, "step": 2355 }, { "epoch": 1.3703649847317143, "grad_norm": NaN, "learning_rate": 2.3760813770356466e-05, "loss": 0.0, "step": 2356 }, { "epoch": 1.3709466337065581, "grad_norm": NaN, "learning_rate": 2.3720767945315418e-05, "loss": 0.0, "step": 2357 }, { "epoch": 1.3715282826814017, "grad_norm": NaN, "learning_rate": 2.368074539795569e-05, "loss": 0.0, "step": 2358 }, { "epoch": 1.3721099316562455, "grad_norm": NaN, "learning_rate": 2.364074616372859e-05, "loss": 0.0, "step": 2359 }, { "epoch": 1.372691580631089, "grad_norm": NaN, "learning_rate": 2.3600770278064705e-05, "loss": 0.0, "step": 2360 }, { "epoch": 1.373273229605933, "grad_norm": NaN, "learning_rate": 2.3560817776373977e-05, "loss": 0.0, "step": 2361 }, { "epoch": 1.3738548785807765, "grad_norm": NaN, "learning_rate": 2.3520888694045635e-05, "loss": 0.0, "step": 2362 }, { "epoch": 1.3744365275556203, "grad_norm": NaN, "learning_rate": 2.3480983066448154e-05, "loss": 0.0, "step": 2363 }, { "epoch": 1.3750181765304639, "grad_norm": NaN, "learning_rate": 2.3441100928929227e-05, "loss": 0.0, "step": 2364 }, { "epoch": 1.3755998255053075, "grad_norm": NaN, "learning_rate": 2.3401242316815764e-05, "loss": 0.0, "step": 2365 }, { "epoch": 1.3761814744801513, "grad_norm": NaN, "learning_rate": 2.3361407265413815e-05, "loss": 0.0, "step": 2366 }, { "epoch": 1.3767631234549949, "grad_norm": NaN, "learning_rate": 2.3321595810008568e-05, "loss": 0.0, "step": 2367 }, { "epoch": 1.3773447724298387, "grad_norm": NaN, "learning_rate": 2.3281807985864297e-05, "loss": 0.0, "step": 2368 }, { "epoch": 1.3779264214046822, "grad_norm": NaN, "learning_rate": 2.324204382822438e-05, "loss": 0.0, "step": 2369 }, { "epoch": 1.3785080703795258, "grad_norm": NaN, "learning_rate": 2.3202303372311202e-05, "loss": 0.0, "step": 2370 }, { "epoch": 1.3790897193543696, "grad_norm": NaN, "learning_rate": 2.3162586653326157e-05, "loss": 0.0, "step": 2371 }, { "epoch": 1.3796713683292134, "grad_norm": NaN, "learning_rate": 2.312289370644961e-05, "loss": 0.0, "step": 2372 }, { "epoch": 1.380253017304057, "grad_norm": NaN, "learning_rate": 2.3083224566840884e-05, "loss": 0.0, "step": 2373 }, { "epoch": 1.3808346662789006, "grad_norm": NaN, "learning_rate": 2.30435792696382e-05, "loss": 0.0, "step": 2374 }, { "epoch": 1.3814163152537444, "grad_norm": NaN, "learning_rate": 2.300395784995866e-05, "loss": 0.0, "step": 2375 }, { "epoch": 1.381997964228588, "grad_norm": NaN, "learning_rate": 2.2964360342898233e-05, "loss": 0.0, "step": 2376 }, { "epoch": 1.3825796132034318, "grad_norm": NaN, "learning_rate": 2.292478678353166e-05, "loss": 0.0, "step": 2377 }, { "epoch": 1.3831612621782754, "grad_norm": NaN, "learning_rate": 2.288523720691255e-05, "loss": 0.0, "step": 2378 }, { "epoch": 1.383742911153119, "grad_norm": NaN, "learning_rate": 2.284571164807321e-05, "loss": 0.0, "step": 2379 }, { "epoch": 1.3843245601279628, "grad_norm": NaN, "learning_rate": 2.2806210142024663e-05, "loss": 0.0, "step": 2380 }, { "epoch": 1.3849062091028064, "grad_norm": NaN, "learning_rate": 2.2766732723756674e-05, "loss": 0.0, "step": 2381 }, { "epoch": 1.3854878580776502, "grad_norm": NaN, "learning_rate": 2.2727279428237623e-05, "loss": 0.0, "step": 2382 }, { "epoch": 1.3860695070524938, "grad_norm": NaN, "learning_rate": 2.268785029041456e-05, "loss": 0.0, "step": 2383 }, { "epoch": 1.3866511560273376, "grad_norm": NaN, "learning_rate": 2.264844534521311e-05, "loss": 0.0, "step": 2384 }, { "epoch": 1.3872328050021812, "grad_norm": NaN, "learning_rate": 2.260906462753749e-05, "loss": 0.0, "step": 2385 }, { "epoch": 1.387814453977025, "grad_norm": NaN, "learning_rate": 2.2569708172270425e-05, "loss": 0.0, "step": 2386 }, { "epoch": 1.3883961029518685, "grad_norm": NaN, "learning_rate": 2.2530376014273208e-05, "loss": 0.0, "step": 2387 }, { "epoch": 1.3889777519267121, "grad_norm": NaN, "learning_rate": 2.249106818838555e-05, "loss": 0.0, "step": 2388 }, { "epoch": 1.389559400901556, "grad_norm": NaN, "learning_rate": 2.2451784729425635e-05, "loss": 0.0, "step": 2389 }, { "epoch": 1.3901410498763995, "grad_norm": NaN, "learning_rate": 2.241252567219006e-05, "loss": 0.0, "step": 2390 }, { "epoch": 1.3907226988512433, "grad_norm": NaN, "learning_rate": 2.237329105145381e-05, "loss": 0.0, "step": 2391 }, { "epoch": 1.391304347826087, "grad_norm": NaN, "learning_rate": 2.2334080901970212e-05, "loss": 0.0, "step": 2392 }, { "epoch": 1.3918859968009305, "grad_norm": NaN, "learning_rate": 2.2294895258470934e-05, "loss": 0.0, "step": 2393 }, { "epoch": 1.3924676457757743, "grad_norm": NaN, "learning_rate": 2.2255734155665923e-05, "loss": 0.0, "step": 2394 }, { "epoch": 1.3930492947506181, "grad_norm": NaN, "learning_rate": 2.221659762824339e-05, "loss": 0.0, "step": 2395 }, { "epoch": 1.3936309437254617, "grad_norm": NaN, "learning_rate": 2.2177485710869804e-05, "loss": 0.0, "step": 2396 }, { "epoch": 1.3942125927003053, "grad_norm": NaN, "learning_rate": 2.2138398438189804e-05, "loss": 0.0, "step": 2397 }, { "epoch": 1.394794241675149, "grad_norm": NaN, "learning_rate": 2.209933584482621e-05, "loss": 0.0, "step": 2398 }, { "epoch": 1.3953758906499927, "grad_norm": NaN, "learning_rate": 2.206029796537997e-05, "loss": 0.0, "step": 2399 }, { "epoch": 1.3959575396248365, "grad_norm": NaN, "learning_rate": 2.202128483443015e-05, "loss": 0.0, "step": 2400 }, { "epoch": 1.39653918859968, "grad_norm": NaN, "learning_rate": 2.198229648653391e-05, "loss": 0.0, "step": 2401 }, { "epoch": 1.3971208375745237, "grad_norm": NaN, "learning_rate": 2.1943332956226432e-05, "loss": 0.0, "step": 2402 }, { "epoch": 1.3977024865493675, "grad_norm": NaN, "learning_rate": 2.1904394278020923e-05, "loss": 0.0, "step": 2403 }, { "epoch": 1.3982841355242113, "grad_norm": NaN, "learning_rate": 2.1865480486408587e-05, "loss": 0.0, "step": 2404 }, { "epoch": 1.3988657844990549, "grad_norm": NaN, "learning_rate": 2.1826591615858548e-05, "loss": 0.0, "step": 2405 }, { "epoch": 1.3994474334738984, "grad_norm": NaN, "learning_rate": 2.178772770081793e-05, "loss": 0.0, "step": 2406 }, { "epoch": 1.4000290824487422, "grad_norm": NaN, "learning_rate": 2.1748888775711684e-05, "loss": 0.0, "step": 2407 }, { "epoch": 1.4006107314235858, "grad_norm": NaN, "learning_rate": 2.171007487494264e-05, "loss": 0.0, "step": 2408 }, { "epoch": 1.4011923803984296, "grad_norm": NaN, "learning_rate": 2.1671286032891474e-05, "loss": 0.0, "step": 2409 }, { "epoch": 1.4017740293732732, "grad_norm": NaN, "learning_rate": 2.1632522283916655e-05, "loss": 0.0, "step": 2410 }, { "epoch": 1.4023556783481168, "grad_norm": NaN, "learning_rate": 2.159378366235444e-05, "loss": 0.0, "step": 2411 }, { "epoch": 1.4029373273229606, "grad_norm": NaN, "learning_rate": 2.1555070202518806e-05, "loss": 0.0, "step": 2412 }, { "epoch": 1.4035189762978042, "grad_norm": NaN, "learning_rate": 2.151638193870147e-05, "loss": 0.0, "step": 2413 }, { "epoch": 1.404100625272648, "grad_norm": NaN, "learning_rate": 2.147771890517178e-05, "loss": 0.0, "step": 2414 }, { "epoch": 1.4046822742474916, "grad_norm": NaN, "learning_rate": 2.143908113617681e-05, "loss": 0.0, "step": 2415 }, { "epoch": 1.4052639232223354, "grad_norm": NaN, "learning_rate": 2.1400468665941208e-05, "loss": 0.0, "step": 2416 }, { "epoch": 1.405845572197179, "grad_norm": NaN, "learning_rate": 2.1361881528667216e-05, "loss": 0.0, "step": 2417 }, { "epoch": 1.4064272211720228, "grad_norm": NaN, "learning_rate": 2.1323319758534638e-05, "loss": 0.0, "step": 2418 }, { "epoch": 1.4070088701468664, "grad_norm": NaN, "learning_rate": 2.1284783389700814e-05, "loss": 0.0, "step": 2419 }, { "epoch": 1.40759051912171, "grad_norm": NaN, "learning_rate": 2.1246272456300576e-05, "loss": 0.0, "step": 2420 }, { "epoch": 1.4081721680965538, "grad_norm": NaN, "learning_rate": 2.120778699244624e-05, "loss": 0.0, "step": 2421 }, { "epoch": 1.4087538170713974, "grad_norm": NaN, "learning_rate": 2.116932703222755e-05, "loss": 0.0, "step": 2422 }, { "epoch": 1.4093354660462412, "grad_norm": NaN, "learning_rate": 2.1130892609711638e-05, "loss": 0.0, "step": 2423 }, { "epoch": 1.4099171150210847, "grad_norm": NaN, "learning_rate": 2.1092483758943088e-05, "loss": 0.0, "step": 2424 }, { "epoch": 1.4104987639959283, "grad_norm": NaN, "learning_rate": 2.105410051394375e-05, "loss": 0.0, "step": 2425 }, { "epoch": 1.4110804129707721, "grad_norm": NaN, "learning_rate": 2.1015742908712837e-05, "loss": 0.0, "step": 2426 }, { "epoch": 1.411662061945616, "grad_norm": NaN, "learning_rate": 2.0977410977226842e-05, "loss": 0.0, "step": 2427 }, { "epoch": 1.4122437109204595, "grad_norm": NaN, "learning_rate": 2.0939104753439508e-05, "loss": 0.0, "step": 2428 }, { "epoch": 1.4128253598953031, "grad_norm": NaN, "learning_rate": 2.0900824271281828e-05, "loss": 0.0, "step": 2429 }, { "epoch": 1.413407008870147, "grad_norm": NaN, "learning_rate": 2.086256956466196e-05, "loss": 0.0, "step": 2430 }, { "epoch": 1.4139886578449905, "grad_norm": NaN, "learning_rate": 2.082434066746526e-05, "loss": 0.0, "step": 2431 }, { "epoch": 1.4145703068198343, "grad_norm": NaN, "learning_rate": 2.0786137613554213e-05, "loss": 0.0, "step": 2432 }, { "epoch": 1.415151955794678, "grad_norm": NaN, "learning_rate": 2.0747960436768398e-05, "loss": 0.0, "step": 2433 }, { "epoch": 1.4157336047695215, "grad_norm": NaN, "learning_rate": 2.070980917092451e-05, "loss": 0.0, "step": 2434 }, { "epoch": 1.4163152537443653, "grad_norm": NaN, "learning_rate": 2.0671683849816264e-05, "loss": 0.0, "step": 2435 }, { "epoch": 1.416896902719209, "grad_norm": NaN, "learning_rate": 2.0633584507214392e-05, "loss": 0.0, "step": 2436 }, { "epoch": 1.4174785516940527, "grad_norm": NaN, "learning_rate": 2.0595511176866622e-05, "loss": 0.0, "step": 2437 }, { "epoch": 1.4180602006688963, "grad_norm": NaN, "learning_rate": 2.055746389249764e-05, "loss": 0.0, "step": 2438 }, { "epoch": 1.41864184964374, "grad_norm": NaN, "learning_rate": 2.051944268780906e-05, "loss": 0.0, "step": 2439 }, { "epoch": 1.4192234986185837, "grad_norm": NaN, "learning_rate": 2.0481447596479397e-05, "loss": 0.0, "step": 2440 }, { "epoch": 1.4198051475934275, "grad_norm": NaN, "learning_rate": 2.0443478652164037e-05, "loss": 0.0, "step": 2441 }, { "epoch": 1.420386796568271, "grad_norm": NaN, "learning_rate": 2.040553588849518e-05, "loss": 0.0, "step": 2442 }, { "epoch": 1.4209684455431146, "grad_norm": NaN, "learning_rate": 2.0367619339081902e-05, "loss": 0.0, "step": 2443 }, { "epoch": 1.4215500945179584, "grad_norm": NaN, "learning_rate": 2.0329729037509986e-05, "loss": 0.0, "step": 2444 }, { "epoch": 1.422131743492802, "grad_norm": NaN, "learning_rate": 2.0291865017342006e-05, "loss": 0.0, "step": 2445 }, { "epoch": 1.4227133924676458, "grad_norm": NaN, "learning_rate": 2.0254027312117235e-05, "loss": 0.0, "step": 2446 }, { "epoch": 1.4232950414424894, "grad_norm": NaN, "learning_rate": 2.021621595535166e-05, "loss": 0.0, "step": 2447 }, { "epoch": 1.423876690417333, "grad_norm": NaN, "learning_rate": 2.0178430980537895e-05, "loss": 0.0, "step": 2448 }, { "epoch": 1.4244583393921768, "grad_norm": NaN, "learning_rate": 2.0140672421145224e-05, "loss": 0.0, "step": 2449 }, { "epoch": 1.4250399883670206, "grad_norm": NaN, "learning_rate": 2.01029403106195e-05, "loss": 0.0, "step": 2450 }, { "epoch": 1.4256216373418642, "grad_norm": NaN, "learning_rate": 2.0065234682383178e-05, "loss": 0.0, "step": 2451 }, { "epoch": 1.4262032863167078, "grad_norm": NaN, "learning_rate": 2.002755556983521e-05, "loss": 0.0, "step": 2452 }, { "epoch": 1.4267849352915516, "grad_norm": NaN, "learning_rate": 1.9989903006351136e-05, "loss": 0.0, "step": 2453 }, { "epoch": 1.4273665842663952, "grad_norm": NaN, "learning_rate": 1.9952277025282906e-05, "loss": 0.0, "step": 2454 }, { "epoch": 1.427948233241239, "grad_norm": NaN, "learning_rate": 1.991467765995896e-05, "loss": 0.0, "step": 2455 }, { "epoch": 1.4285298822160826, "grad_norm": NaN, "learning_rate": 1.987710494368415e-05, "loss": 0.0, "step": 2456 }, { "epoch": 1.4291115311909262, "grad_norm": NaN, "learning_rate": 1.9839558909739732e-05, "loss": 0.0, "step": 2457 }, { "epoch": 1.42969318016577, "grad_norm": NaN, "learning_rate": 1.9802039591383314e-05, "loss": 0.0, "step": 2458 }, { "epoch": 1.4302748291406138, "grad_norm": NaN, "learning_rate": 1.9764547021848855e-05, "loss": 0.0, "step": 2459 }, { "epoch": 1.4308564781154574, "grad_norm": NaN, "learning_rate": 1.9727081234346607e-05, "loss": 0.0, "step": 2460 }, { "epoch": 1.431438127090301, "grad_norm": NaN, "learning_rate": 1.9689642262063097e-05, "loss": 0.0, "step": 2461 }, { "epoch": 1.4320197760651447, "grad_norm": NaN, "learning_rate": 1.9652230138161136e-05, "loss": 0.0, "step": 2462 }, { "epoch": 1.4326014250399883, "grad_norm": NaN, "learning_rate": 1.961484489577971e-05, "loss": 0.0, "step": 2463 }, { "epoch": 1.4331830740148321, "grad_norm": NaN, "learning_rate": 1.9577486568034005e-05, "loss": 0.0, "step": 2464 }, { "epoch": 1.4337647229896757, "grad_norm": NaN, "learning_rate": 1.9540155188015375e-05, "loss": 0.0, "step": 2465 }, { "epoch": 1.4343463719645193, "grad_norm": NaN, "learning_rate": 1.95028507887913e-05, "loss": 0.0, "step": 2466 }, { "epoch": 1.4349280209393631, "grad_norm": NaN, "learning_rate": 1.946557340340535e-05, "loss": 0.0, "step": 2467 }, { "epoch": 1.4355096699142067, "grad_norm": NaN, "learning_rate": 1.9428323064877196e-05, "loss": 0.0, "step": 2468 }, { "epoch": 1.4360913188890505, "grad_norm": NaN, "learning_rate": 1.939109980620252e-05, "loss": 0.0, "step": 2469 }, { "epoch": 1.436672967863894, "grad_norm": NaN, "learning_rate": 1.9353903660353024e-05, "loss": 0.0, "step": 2470 }, { "epoch": 1.437254616838738, "grad_norm": NaN, "learning_rate": 1.9316734660276393e-05, "loss": 0.0, "step": 2471 }, { "epoch": 1.4378362658135815, "grad_norm": NaN, "learning_rate": 1.927959283889631e-05, "loss": 0.0, "step": 2472 }, { "epoch": 1.4384179147884253, "grad_norm": NaN, "learning_rate": 1.9242478229112316e-05, "loss": 0.0, "step": 2473 }, { "epoch": 1.4389995637632689, "grad_norm": NaN, "learning_rate": 1.9205390863799887e-05, "loss": 0.0, "step": 2474 }, { "epoch": 1.4395812127381125, "grad_norm": NaN, "learning_rate": 1.9168330775810368e-05, "loss": 0.0, "step": 2475 }, { "epoch": 1.4401628617129563, "grad_norm": NaN, "learning_rate": 1.9131297997970914e-05, "loss": 0.0, "step": 2476 }, { "epoch": 1.4407445106877999, "grad_norm": NaN, "learning_rate": 1.909429256308452e-05, "loss": 0.0, "step": 2477 }, { "epoch": 1.4413261596626437, "grad_norm": NaN, "learning_rate": 1.9057314503929946e-05, "loss": 0.0, "step": 2478 }, { "epoch": 1.4419078086374872, "grad_norm": NaN, "learning_rate": 1.902036385326171e-05, "loss": 0.0, "step": 2479 }, { "epoch": 1.4424894576123308, "grad_norm": NaN, "learning_rate": 1.8983440643810026e-05, "loss": 0.0, "step": 2480 }, { "epoch": 1.4430711065871746, "grad_norm": NaN, "learning_rate": 1.8946544908280855e-05, "loss": 0.0, "step": 2481 }, { "epoch": 1.4436527555620184, "grad_norm": NaN, "learning_rate": 1.890967667935578e-05, "loss": 0.0, "step": 2482 }, { "epoch": 1.444234404536862, "grad_norm": NaN, "learning_rate": 1.8872835989692028e-05, "loss": 0.0, "step": 2483 }, { "epoch": 1.4448160535117056, "grad_norm": NaN, "learning_rate": 1.8836022871922432e-05, "loss": 0.0, "step": 2484 }, { "epoch": 1.4453977024865494, "grad_norm": NaN, "learning_rate": 1.879923735865541e-05, "loss": 0.0, "step": 2485 }, { "epoch": 1.445979351461393, "grad_norm": NaN, "learning_rate": 1.8762479482474908e-05, "loss": 0.0, "step": 2486 }, { "epoch": 1.4465610004362368, "grad_norm": NaN, "learning_rate": 1.8725749275940417e-05, "loss": 0.0, "step": 2487 }, { "epoch": 1.4471426494110804, "grad_norm": NaN, "learning_rate": 1.8689046771586905e-05, "loss": 0.0, "step": 2488 }, { "epoch": 1.447724298385924, "grad_norm": NaN, "learning_rate": 1.8652372001924806e-05, "loss": 0.0, "step": 2489 }, { "epoch": 1.4483059473607678, "grad_norm": NaN, "learning_rate": 1.8615724999439966e-05, "loss": 0.0, "step": 2490 }, { "epoch": 1.4488875963356116, "grad_norm": NaN, "learning_rate": 1.8579105796593694e-05, "loss": 0.0, "step": 2491 }, { "epoch": 1.4494692453104552, "grad_norm": NaN, "learning_rate": 1.854251442582261e-05, "loss": 0.0, "step": 2492 }, { "epoch": 1.4500508942852988, "grad_norm": NaN, "learning_rate": 1.8505950919538712e-05, "loss": 0.0, "step": 2493 }, { "epoch": 1.4506325432601426, "grad_norm": NaN, "learning_rate": 1.8469415310129314e-05, "loss": 0.0, "step": 2494 }, { "epoch": 1.4512141922349862, "grad_norm": NaN, "learning_rate": 1.8432907629957007e-05, "loss": 0.0, "step": 2495 }, { "epoch": 1.45179584120983, "grad_norm": NaN, "learning_rate": 1.8396427911359658e-05, "loss": 0.0, "step": 2496 }, { "epoch": 1.4523774901846735, "grad_norm": NaN, "learning_rate": 1.835997618665036e-05, "loss": 0.0, "step": 2497 }, { "epoch": 1.4529591391595171, "grad_norm": NaN, "learning_rate": 1.83235524881174e-05, "loss": 0.0, "step": 2498 }, { "epoch": 1.453540788134361, "grad_norm": NaN, "learning_rate": 1.828715684802425e-05, "loss": 0.0, "step": 2499 }, { "epoch": 1.4541224371092045, "grad_norm": NaN, "learning_rate": 1.8250789298609544e-05, "loss": 0.0, "step": 2500 }, { "epoch": 1.4547040860840483, "grad_norm": NaN, "learning_rate": 1.8214449872087013e-05, "loss": 0.0, "step": 2501 }, { "epoch": 1.455285735058892, "grad_norm": NaN, "learning_rate": 1.817813860064547e-05, "loss": 0.0, "step": 2502 }, { "epoch": 1.4558673840337357, "grad_norm": NaN, "learning_rate": 1.8141855516448812e-05, "loss": 0.0, "step": 2503 }, { "epoch": 1.4564490330085793, "grad_norm": NaN, "learning_rate": 1.810560065163595e-05, "loss": 0.0, "step": 2504 }, { "epoch": 1.4570306819834231, "grad_norm": NaN, "learning_rate": 1.8069374038320803e-05, "loss": 0.0, "step": 2505 }, { "epoch": 1.4576123309582667, "grad_norm": NaN, "learning_rate": 1.8033175708592277e-05, "loss": 0.0, "step": 2506 }, { "epoch": 1.4581939799331103, "grad_norm": NaN, "learning_rate": 1.79970056945142e-05, "loss": 0.0, "step": 2507 }, { "epoch": 1.458775628907954, "grad_norm": NaN, "learning_rate": 1.796086402812535e-05, "loss": 0.0, "step": 2508 }, { "epoch": 1.4593572778827977, "grad_norm": NaN, "learning_rate": 1.7924750741439355e-05, "loss": 0.0, "step": 2509 }, { "epoch": 1.4599389268576415, "grad_norm": NaN, "learning_rate": 1.7888665866444765e-05, "loss": 0.0, "step": 2510 }, { "epoch": 1.460520575832485, "grad_norm": NaN, "learning_rate": 1.7852609435104905e-05, "loss": 0.0, "step": 2511 }, { "epoch": 1.4611022248073287, "grad_norm": NaN, "learning_rate": 1.781658147935793e-05, "loss": 0.0, "step": 2512 }, { "epoch": 1.4616838737821725, "grad_norm": NaN, "learning_rate": 1.7780582031116776e-05, "loss": 0.0, "step": 2513 }, { "epoch": 1.4622655227570163, "grad_norm": NaN, "learning_rate": 1.774461112226911e-05, "loss": 0.0, "step": 2514 }, { "epoch": 1.4628471717318599, "grad_norm": NaN, "learning_rate": 1.770866878467734e-05, "loss": 0.0, "step": 2515 }, { "epoch": 1.4634288207067034, "grad_norm": NaN, "learning_rate": 1.7672755050178552e-05, "loss": 0.0, "step": 2516 }, { "epoch": 1.4640104696815472, "grad_norm": NaN, "learning_rate": 1.7636869950584496e-05, "loss": 0.0, "step": 2517 }, { "epoch": 1.4645921186563908, "grad_norm": NaN, "learning_rate": 1.760101351768155e-05, "loss": 0.0, "step": 2518 }, { "epoch": 1.4651737676312346, "grad_norm": NaN, "learning_rate": 1.756518578323074e-05, "loss": 0.0, "step": 2519 }, { "epoch": 1.4657554166060782, "grad_norm": NaN, "learning_rate": 1.7529386778967633e-05, "loss": 0.0, "step": 2520 }, { "epoch": 1.4663370655809218, "grad_norm": NaN, "learning_rate": 1.7493616536602353e-05, "loss": 0.0, "step": 2521 }, { "epoch": 1.4669187145557656, "grad_norm": NaN, "learning_rate": 1.7457875087819552e-05, "loss": 0.0, "step": 2522 }, { "epoch": 1.4675003635306092, "grad_norm": NaN, "learning_rate": 1.7422162464278376e-05, "loss": 0.0, "step": 2523 }, { "epoch": 1.468082012505453, "grad_norm": NaN, "learning_rate": 1.7386478697612436e-05, "loss": 0.0, "step": 2524 }, { "epoch": 1.4686636614802966, "grad_norm": NaN, "learning_rate": 1.735082381942979e-05, "loss": 0.0, "step": 2525 }, { "epoch": 1.4692453104551404, "grad_norm": NaN, "learning_rate": 1.7315197861312898e-05, "loss": 0.0, "step": 2526 }, { "epoch": 1.469826959429984, "grad_norm": NaN, "learning_rate": 1.7279600854818613e-05, "loss": 0.0, "step": 2527 }, { "epoch": 1.4704086084048278, "grad_norm": NaN, "learning_rate": 1.7244032831478106e-05, "loss": 0.0, "step": 2528 }, { "epoch": 1.4709902573796714, "grad_norm": NaN, "learning_rate": 1.7208493822796957e-05, "loss": 0.0, "step": 2529 }, { "epoch": 1.471571906354515, "grad_norm": NaN, "learning_rate": 1.7172983860254965e-05, "loss": 0.0, "step": 2530 }, { "epoch": 1.4721535553293588, "grad_norm": NaN, "learning_rate": 1.7137502975306236e-05, "loss": 0.0, "step": 2531 }, { "epoch": 1.4727352043042023, "grad_norm": NaN, "learning_rate": 1.710205119937911e-05, "loss": 0.0, "step": 2532 }, { "epoch": 1.4733168532790462, "grad_norm": NaN, "learning_rate": 1.706662856387615e-05, "loss": 0.0, "step": 2533 }, { "epoch": 1.4738985022538897, "grad_norm": NaN, "learning_rate": 1.7031235100174097e-05, "loss": 0.0, "step": 2534 }, { "epoch": 1.4744801512287333, "grad_norm": NaN, "learning_rate": 1.6995870839623868e-05, "loss": 0.0, "step": 2535 }, { "epoch": 1.4750618002035771, "grad_norm": NaN, "learning_rate": 1.6960535813550498e-05, "loss": 0.0, "step": 2536 }, { "epoch": 1.475643449178421, "grad_norm": NaN, "learning_rate": 1.692523005325311e-05, "loss": 0.0, "step": 2537 }, { "epoch": 1.4762250981532645, "grad_norm": NaN, "learning_rate": 1.6889953590004964e-05, "loss": 0.0, "step": 2538 }, { "epoch": 1.476806747128108, "grad_norm": NaN, "learning_rate": 1.6854706455053316e-05, "loss": 0.0, "step": 2539 }, { "epoch": 1.477388396102952, "grad_norm": NaN, "learning_rate": 1.6819488679619454e-05, "loss": 0.0, "step": 2540 }, { "epoch": 1.4779700450777955, "grad_norm": NaN, "learning_rate": 1.6784300294898666e-05, "loss": 0.0, "step": 2541 }, { "epoch": 1.4785516940526393, "grad_norm": NaN, "learning_rate": 1.6749141332060196e-05, "loss": 0.0, "step": 2542 }, { "epoch": 1.479133343027483, "grad_norm": NaN, "learning_rate": 1.6714011822247245e-05, "loss": 0.0, "step": 2543 }, { "epoch": 1.4797149920023265, "grad_norm": NaN, "learning_rate": 1.6678911796576906e-05, "loss": 0.0, "step": 2544 }, { "epoch": 1.4802966409771703, "grad_norm": NaN, "learning_rate": 1.664384128614017e-05, "loss": 0.0, "step": 2545 }, { "epoch": 1.480878289952014, "grad_norm": NaN, "learning_rate": 1.660880032200188e-05, "loss": 0.0, "step": 2546 }, { "epoch": 1.4814599389268577, "grad_norm": NaN, "learning_rate": 1.6573788935200686e-05, "loss": 0.0, "step": 2547 }, { "epoch": 1.4820415879017013, "grad_norm": NaN, "learning_rate": 1.6538807156749092e-05, "loss": 0.0, "step": 2548 }, { "epoch": 1.482623236876545, "grad_norm": NaN, "learning_rate": 1.650385501763332e-05, "loss": 0.0, "step": 2549 }, { "epoch": 1.4832048858513887, "grad_norm": NaN, "learning_rate": 1.6468932548813367e-05, "loss": 0.0, "step": 2550 }, { "epoch": 1.4837865348262325, "grad_norm": NaN, "learning_rate": 1.6434039781222938e-05, "loss": 0.0, "step": 2551 }, { "epoch": 1.484368183801076, "grad_norm": NaN, "learning_rate": 1.6399176745769428e-05, "loss": 0.0, "step": 2552 }, { "epoch": 1.4849498327759196, "grad_norm": NaN, "learning_rate": 1.636434347333391e-05, "loss": 0.0, "step": 2553 }, { "epoch": 1.4855314817507634, "grad_norm": NaN, "learning_rate": 1.6329539994771082e-05, "loss": 0.0, "step": 2554 }, { "epoch": 1.486113130725607, "grad_norm": NaN, "learning_rate": 1.6294766340909246e-05, "loss": 0.0, "step": 2555 }, { "epoch": 1.4866947797004508, "grad_norm": NaN, "learning_rate": 1.626002254255028e-05, "loss": 0.0, "step": 2556 }, { "epoch": 1.4872764286752944, "grad_norm": NaN, "learning_rate": 1.622530863046966e-05, "loss": 0.0, "step": 2557 }, { "epoch": 1.4878580776501382, "grad_norm": NaN, "learning_rate": 1.619062463541635e-05, "loss": 0.0, "step": 2558 }, { "epoch": 1.4884397266249818, "grad_norm": NaN, "learning_rate": 1.615597058811281e-05, "loss": 0.0, "step": 2559 }, { "epoch": 1.4890213755998256, "grad_norm": NaN, "learning_rate": 1.6121346519255e-05, "loss": 0.0, "step": 2560 }, { "epoch": 1.4896030245746692, "grad_norm": NaN, "learning_rate": 1.60867524595123e-05, "loss": 0.0, "step": 2561 }, { "epoch": 1.4901846735495128, "grad_norm": NaN, "learning_rate": 1.605218843952752e-05, "loss": 0.0, "step": 2562 }, { "epoch": 1.4907663225243566, "grad_norm": NaN, "learning_rate": 1.6017654489916865e-05, "loss": 0.0, "step": 2563 }, { "epoch": 1.4913479714992002, "grad_norm": NaN, "learning_rate": 1.59831506412699e-05, "loss": 0.0, "step": 2564 }, { "epoch": 1.491929620474044, "grad_norm": NaN, "learning_rate": 1.5948676924149523e-05, "loss": 0.0, "step": 2565 }, { "epoch": 1.4925112694488876, "grad_norm": NaN, "learning_rate": 1.5914233369091935e-05, "loss": 0.0, "step": 2566 }, { "epoch": 1.4930929184237312, "grad_norm": NaN, "learning_rate": 1.5879820006606662e-05, "loss": 0.0, "step": 2567 }, { "epoch": 1.493674567398575, "grad_norm": NaN, "learning_rate": 1.584543686717643e-05, "loss": 0.0, "step": 2568 }, { "epoch": 1.4942562163734188, "grad_norm": NaN, "learning_rate": 1.581108398125723e-05, "loss": 0.0, "step": 2569 }, { "epoch": 1.4948378653482624, "grad_norm": NaN, "learning_rate": 1.5776761379278238e-05, "loss": 0.0, "step": 2570 }, { "epoch": 1.495419514323106, "grad_norm": NaN, "learning_rate": 1.5742469091641815e-05, "loss": 0.0, "step": 2571 }, { "epoch": 1.4960011632979497, "grad_norm": NaN, "learning_rate": 1.5708207148723463e-05, "loss": 0.0, "step": 2572 }, { "epoch": 1.4965828122727933, "grad_norm": NaN, "learning_rate": 1.5673975580871808e-05, "loss": 0.0, "step": 2573 }, { "epoch": 1.4971644612476371, "grad_norm": NaN, "learning_rate": 1.563977441840857e-05, "loss": 0.0, "step": 2574 }, { "epoch": 1.4977461102224807, "grad_norm": NaN, "learning_rate": 1.5605603691628524e-05, "loss": 0.0, "step": 2575 }, { "epoch": 1.4983277591973243, "grad_norm": NaN, "learning_rate": 1.5571463430799528e-05, "loss": 0.0, "step": 2576 }, { "epoch": 1.4989094081721681, "grad_norm": NaN, "learning_rate": 1.5537353666162413e-05, "loss": 0.0, "step": 2577 }, { "epoch": 1.4994910571470117, "grad_norm": NaN, "learning_rate": 1.5503274427930996e-05, "loss": 0.0, "step": 2578 }, { "epoch": 1.5000727061218555, "grad_norm": NaN, "learning_rate": 1.5469225746292082e-05, "loss": 0.0, "step": 2579 }, { "epoch": 1.500654355096699, "grad_norm": NaN, "learning_rate": 1.5435207651405386e-05, "loss": 0.0, "step": 2580 }, { "epoch": 1.5012360040715427, "grad_norm": NaN, "learning_rate": 1.5401220173403537e-05, "loss": 0.0, "step": 2581 }, { "epoch": 1.5018176530463865, "grad_norm": NaN, "learning_rate": 1.5367263342392045e-05, "loss": 0.0, "step": 2582 }, { "epoch": 1.5023993020212303, "grad_norm": NaN, "learning_rate": 1.533333718844928e-05, "loss": 0.0, "step": 2583 }, { "epoch": 1.5029809509960739, "grad_norm": NaN, "learning_rate": 1.5299441741626426e-05, "loss": 0.0, "step": 2584 }, { "epoch": 1.5035625999709175, "grad_norm": NaN, "learning_rate": 1.5265577031947452e-05, "loss": 0.0, "step": 2585 }, { "epoch": 1.5041442489457613, "grad_norm": NaN, "learning_rate": 1.5231743089409167e-05, "loss": 0.0, "step": 2586 }, { "epoch": 1.504725897920605, "grad_norm": NaN, "learning_rate": 1.519793994398106e-05, "loss": 0.0, "step": 2587 }, { "epoch": 1.5053075468954487, "grad_norm": NaN, "learning_rate": 1.5164167625605363e-05, "loss": 0.0, "step": 2588 }, { "epoch": 1.5058891958702922, "grad_norm": NaN, "learning_rate": 1.5130426164196997e-05, "loss": 0.0, "step": 2589 }, { "epoch": 1.5064708448451358, "grad_norm": NaN, "learning_rate": 1.5096715589643557e-05, "loss": 0.0, "step": 2590 }, { "epoch": 1.5070524938199796, "grad_norm": NaN, "learning_rate": 1.506303593180528e-05, "loss": 0.0, "step": 2591 }, { "epoch": 1.5076341427948234, "grad_norm": NaN, "learning_rate": 1.5029387220515012e-05, "loss": 0.0, "step": 2592 }, { "epoch": 1.508215791769667, "grad_norm": NaN, "learning_rate": 1.4995769485578193e-05, "loss": 0.0, "step": 2593 }, { "epoch": 1.5087974407445106, "grad_norm": NaN, "learning_rate": 1.4962182756772797e-05, "loss": 0.0, "step": 2594 }, { "epoch": 1.5093790897193544, "grad_norm": NaN, "learning_rate": 1.4928627063849393e-05, "loss": 0.0, "step": 2595 }, { "epoch": 1.509960738694198, "grad_norm": NaN, "learning_rate": 1.489510243653101e-05, "loss": 0.0, "step": 2596 }, { "epoch": 1.5105423876690418, "grad_norm": NaN, "learning_rate": 1.4861608904513174e-05, "loss": 0.0, "step": 2597 }, { "epoch": 1.5111240366438854, "grad_norm": NaN, "learning_rate": 1.4828146497463858e-05, "loss": 0.0, "step": 2598 }, { "epoch": 1.511705685618729, "grad_norm": NaN, "learning_rate": 1.4794715245023482e-05, "loss": 0.0, "step": 2599 }, { "epoch": 1.5122873345935728, "grad_norm": NaN, "learning_rate": 1.4761315176804858e-05, "loss": 0.0, "step": 2600 }, { "epoch": 1.5128689835684166, "grad_norm": NaN, "learning_rate": 1.4727946322393182e-05, "loss": 0.0, "step": 2601 }, { "epoch": 1.5134506325432602, "grad_norm": NaN, "learning_rate": 1.4694608711345992e-05, "loss": 0.0, "step": 2602 }, { "epoch": 1.5140322815181038, "grad_norm": NaN, "learning_rate": 1.4661302373193159e-05, "loss": 0.0, "step": 2603 }, { "epoch": 1.5146139304929473, "grad_norm": NaN, "learning_rate": 1.4628027337436833e-05, "loss": 0.0, "step": 2604 }, { "epoch": 1.5151955794677912, "grad_norm": NaN, "learning_rate": 1.4594783633551496e-05, "loss": 0.0, "step": 2605 }, { "epoch": 1.515777228442635, "grad_norm": NaN, "learning_rate": 1.456157129098381e-05, "loss": 0.0, "step": 2606 }, { "epoch": 1.5163588774174785, "grad_norm": NaN, "learning_rate": 1.4528390339152692e-05, "loss": 0.0, "step": 2607 }, { "epoch": 1.5169405263923221, "grad_norm": NaN, "learning_rate": 1.4495240807449245e-05, "loss": 0.0, "step": 2608 }, { "epoch": 1.517522175367166, "grad_norm": NaN, "learning_rate": 1.4462122725236731e-05, "loss": 0.0, "step": 2609 }, { "epoch": 1.5181038243420097, "grad_norm": NaN, "learning_rate": 1.442903612185057e-05, "loss": 0.0, "step": 2610 }, { "epoch": 1.5186854733168533, "grad_norm": NaN, "learning_rate": 1.4395981026598299e-05, "loss": 0.0, "step": 2611 }, { "epoch": 1.519267122291697, "grad_norm": NaN, "learning_rate": 1.4362957468759531e-05, "loss": 0.0, "step": 2612 }, { "epoch": 1.5198487712665405, "grad_norm": NaN, "learning_rate": 1.4329965477585955e-05, "loss": 0.0, "step": 2613 }, { "epoch": 1.5204304202413843, "grad_norm": NaN, "learning_rate": 1.4297005082301285e-05, "loss": 0.0, "step": 2614 }, { "epoch": 1.5210120692162281, "grad_norm": NaN, "learning_rate": 1.4264076312101282e-05, "loss": 0.0, "step": 2615 }, { "epoch": 1.5215937181910717, "grad_norm": NaN, "learning_rate": 1.423117919615366e-05, "loss": 0.0, "step": 2616 }, { "epoch": 1.5221753671659153, "grad_norm": NaN, "learning_rate": 1.4198313763598115e-05, "loss": 0.0, "step": 2617 }, { "epoch": 1.522757016140759, "grad_norm": NaN, "learning_rate": 1.4165480043546254e-05, "loss": 0.0, "step": 2618 }, { "epoch": 1.523338665115603, "grad_norm": NaN, "learning_rate": 1.4132678065081611e-05, "loss": 0.0, "step": 2619 }, { "epoch": 1.5239203140904465, "grad_norm": NaN, "learning_rate": 1.4099907857259609e-05, "loss": 0.0, "step": 2620 }, { "epoch": 1.52450196306529, "grad_norm": NaN, "learning_rate": 1.4067169449107514e-05, "loss": 0.0, "step": 2621 }, { "epoch": 1.5250836120401337, "grad_norm": NaN, "learning_rate": 1.4034462869624438e-05, "loss": 0.0, "step": 2622 }, { "epoch": 1.5256652610149775, "grad_norm": NaN, "learning_rate": 1.4001788147781275e-05, "loss": 0.0, "step": 2623 }, { "epoch": 1.5262469099898213, "grad_norm": NaN, "learning_rate": 1.3969145312520749e-05, "loss": 0.0, "step": 2624 }, { "epoch": 1.5268285589646649, "grad_norm": NaN, "learning_rate": 1.393653439275729e-05, "loss": 0.0, "step": 2625 }, { "epoch": 1.5274102079395084, "grad_norm": NaN, "learning_rate": 1.3903955417377085e-05, "loss": 0.0, "step": 2626 }, { "epoch": 1.5279918569143522, "grad_norm": NaN, "learning_rate": 1.3871408415238013e-05, "loss": 0.0, "step": 2627 }, { "epoch": 1.5285735058891958, "grad_norm": NaN, "learning_rate": 1.3838893415169635e-05, "loss": 0.0, "step": 2628 }, { "epoch": 1.5291551548640396, "grad_norm": NaN, "learning_rate": 1.3806410445973173e-05, "loss": 0.0, "step": 2629 }, { "epoch": 1.5297368038388832, "grad_norm": NaN, "learning_rate": 1.3773959536421454e-05, "loss": 0.0, "step": 2630 }, { "epoch": 1.5303184528137268, "grad_norm": NaN, "learning_rate": 1.3741540715258939e-05, "loss": 0.0, "step": 2631 }, { "epoch": 1.5309001017885706, "grad_norm": NaN, "learning_rate": 1.3709154011201641e-05, "loss": 0.0, "step": 2632 }, { "epoch": 1.5314817507634144, "grad_norm": NaN, "learning_rate": 1.3676799452937118e-05, "loss": 0.0, "step": 2633 }, { "epoch": 1.532063399738258, "grad_norm": NaN, "learning_rate": 1.36444770691245e-05, "loss": 0.0, "step": 2634 }, { "epoch": 1.5326450487131016, "grad_norm": NaN, "learning_rate": 1.3612186888394369e-05, "loss": 0.0, "step": 2635 }, { "epoch": 1.5332266976879452, "grad_norm": NaN, "learning_rate": 1.3579928939348802e-05, "loss": 0.0, "step": 2636 }, { "epoch": 1.533808346662789, "grad_norm": NaN, "learning_rate": 1.3547703250561316e-05, "loss": 0.0, "step": 2637 }, { "epoch": 1.5343899956376328, "grad_norm": NaN, "learning_rate": 1.3515509850576868e-05, "loss": 0.0, "step": 2638 }, { "epoch": 1.5349716446124764, "grad_norm": NaN, "learning_rate": 1.3483348767911803e-05, "loss": 0.0, "step": 2639 }, { "epoch": 1.53555329358732, "grad_norm": NaN, "learning_rate": 1.3451220031053841e-05, "loss": 0.0, "step": 2640 }, { "epoch": 1.5361349425621638, "grad_norm": NaN, "learning_rate": 1.3419123668462047e-05, "loss": 0.0, "step": 2641 }, { "epoch": 1.5367165915370076, "grad_norm": NaN, "learning_rate": 1.3387059708566801e-05, "loss": 0.0, "step": 2642 }, { "epoch": 1.5372982405118512, "grad_norm": NaN, "learning_rate": 1.3355028179769829e-05, "loss": 0.0, "step": 2643 }, { "epoch": 1.5378798894866947, "grad_norm": NaN, "learning_rate": 1.3323029110444079e-05, "loss": 0.0, "step": 2644 }, { "epoch": 1.5384615384615383, "grad_norm": NaN, "learning_rate": 1.3291062528933768e-05, "loss": 0.0, "step": 2645 }, { "epoch": 1.5390431874363821, "grad_norm": NaN, "learning_rate": 1.3259128463554321e-05, "loss": 0.0, "step": 2646 }, { "epoch": 1.539624836411226, "grad_norm": NaN, "learning_rate": 1.3227226942592374e-05, "loss": 0.0, "step": 2647 }, { "epoch": 1.5402064853860695, "grad_norm": NaN, "learning_rate": 1.319535799430574e-05, "loss": 0.0, "step": 2648 }, { "epoch": 1.540788134360913, "grad_norm": NaN, "learning_rate": 1.3163521646923366e-05, "loss": 0.0, "step": 2649 }, { "epoch": 1.541369783335757, "grad_norm": NaN, "learning_rate": 1.3131717928645325e-05, "loss": 0.0, "step": 2650 }, { "epoch": 1.5419514323106005, "grad_norm": NaN, "learning_rate": 1.3099946867642804e-05, "loss": 0.0, "step": 2651 }, { "epoch": 1.5425330812854443, "grad_norm": NaN, "learning_rate": 1.3068208492058026e-05, "loss": 0.0, "step": 2652 }, { "epoch": 1.543114730260288, "grad_norm": NaN, "learning_rate": 1.3036502830004316e-05, "loss": 0.0, "step": 2653 }, { "epoch": 1.5436963792351315, "grad_norm": NaN, "learning_rate": 1.3004829909565975e-05, "loss": 0.0, "step": 2654 }, { "epoch": 1.5442780282099753, "grad_norm": NaN, "learning_rate": 1.2973189758798321e-05, "loss": 0.0, "step": 2655 }, { "epoch": 1.544859677184819, "grad_norm": NaN, "learning_rate": 1.2941582405727642e-05, "loss": 0.0, "step": 2656 }, { "epoch": 1.5454413261596627, "grad_norm": NaN, "learning_rate": 1.2910007878351177e-05, "loss": 0.0, "step": 2657 }, { "epoch": 1.5460229751345063, "grad_norm": NaN, "learning_rate": 1.2878466204637086e-05, "loss": 0.0, "step": 2658 }, { "epoch": 1.5466046241093498, "grad_norm": NaN, "learning_rate": 1.284695741252443e-05, "loss": 0.0, "step": 2659 }, { "epoch": 1.5471862730841937, "grad_norm": NaN, "learning_rate": 1.2815481529923135e-05, "loss": 0.0, "step": 2660 }, { "epoch": 1.5477679220590375, "grad_norm": NaN, "learning_rate": 1.2784038584713981e-05, "loss": 0.0, "step": 2661 }, { "epoch": 1.548349571033881, "grad_norm": NaN, "learning_rate": 1.2752628604748596e-05, "loss": 0.0, "step": 2662 }, { "epoch": 1.5489312200087246, "grad_norm": NaN, "learning_rate": 1.2721251617849373e-05, "loss": 0.0, "step": 2663 }, { "epoch": 1.5495128689835684, "grad_norm": NaN, "learning_rate": 1.2689907651809501e-05, "loss": 0.0, "step": 2664 }, { "epoch": 1.5500945179584122, "grad_norm": NaN, "learning_rate": 1.2658596734392909e-05, "loss": 0.0, "step": 2665 }, { "epoch": 1.5506761669332558, "grad_norm": NaN, "learning_rate": 1.2627318893334261e-05, "loss": 0.0, "step": 2666 }, { "epoch": 1.5512578159080994, "grad_norm": NaN, "learning_rate": 1.259607415633891e-05, "loss": 0.0, "step": 2667 }, { "epoch": 1.551839464882943, "grad_norm": NaN, "learning_rate": 1.2564862551082902e-05, "loss": 0.0, "step": 2668 }, { "epoch": 1.5524211138577868, "grad_norm": NaN, "learning_rate": 1.2533684105212928e-05, "loss": 0.0, "step": 2669 }, { "epoch": 1.5530027628326306, "grad_norm": NaN, "learning_rate": 1.2502538846346302e-05, "loss": 0.0, "step": 2670 }, { "epoch": 1.5535844118074742, "grad_norm": NaN, "learning_rate": 1.2471426802070929e-05, "loss": 0.0, "step": 2671 }, { "epoch": 1.5541660607823178, "grad_norm": NaN, "learning_rate": 1.2440347999945351e-05, "loss": 0.0, "step": 2672 }, { "epoch": 1.5547477097571616, "grad_norm": NaN, "learning_rate": 1.2409302467498595e-05, "loss": 0.0, "step": 2673 }, { "epoch": 1.5553293587320054, "grad_norm": NaN, "learning_rate": 1.2378290232230262e-05, "loss": 0.0, "step": 2674 }, { "epoch": 1.555911007706849, "grad_norm": NaN, "learning_rate": 1.2347311321610432e-05, "loss": 0.0, "step": 2675 }, { "epoch": 1.5564926566816926, "grad_norm": NaN, "learning_rate": 1.2316365763079685e-05, "loss": 0.0, "step": 2676 }, { "epoch": 1.5570743056565362, "grad_norm": NaN, "learning_rate": 1.2285453584049056e-05, "loss": 0.0, "step": 2677 }, { "epoch": 1.55765595463138, "grad_norm": NaN, "learning_rate": 1.22545748119e-05, "loss": 0.0, "step": 2678 }, { "epoch": 1.5582376036062238, "grad_norm": NaN, "learning_rate": 1.2223729473984396e-05, "loss": 0.0, "step": 2679 }, { "epoch": 1.5588192525810673, "grad_norm": NaN, "learning_rate": 1.2192917597624482e-05, "loss": 0.0, "step": 2680 }, { "epoch": 1.559400901555911, "grad_norm": NaN, "learning_rate": 1.2162139210112916e-05, "loss": 0.0, "step": 2681 }, { "epoch": 1.5599825505307547, "grad_norm": NaN, "learning_rate": 1.213139433871262e-05, "loss": 0.0, "step": 2682 }, { "epoch": 1.5605641995055983, "grad_norm": NaN, "learning_rate": 1.2100683010656877e-05, "loss": 0.0, "step": 2683 }, { "epoch": 1.5611458484804421, "grad_norm": NaN, "learning_rate": 1.2070005253149235e-05, "loss": 0.0, "step": 2684 }, { "epoch": 1.5617274974552857, "grad_norm": NaN, "learning_rate": 1.203936109336351e-05, "loss": 0.0, "step": 2685 }, { "epoch": 1.5623091464301293, "grad_norm": NaN, "learning_rate": 1.200875055844376e-05, "loss": 0.0, "step": 2686 }, { "epoch": 1.562890795404973, "grad_norm": NaN, "learning_rate": 1.1978173675504257e-05, "loss": 0.0, "step": 2687 }, { "epoch": 1.563472444379817, "grad_norm": NaN, "learning_rate": 1.1947630471629467e-05, "loss": 0.0, "step": 2688 }, { "epoch": 1.5640540933546605, "grad_norm": NaN, "learning_rate": 1.1917120973874025e-05, "loss": 0.0, "step": 2689 }, { "epoch": 1.564635742329504, "grad_norm": NaN, "learning_rate": 1.1886645209262692e-05, "loss": 0.0, "step": 2690 }, { "epoch": 1.5652173913043477, "grad_norm": NaN, "learning_rate": 1.185620320479039e-05, "loss": 0.0, "step": 2691 }, { "epoch": 1.5657990402791915, "grad_norm": NaN, "learning_rate": 1.1825794987422095e-05, "loss": 0.0, "step": 2692 }, { "epoch": 1.5663806892540353, "grad_norm": NaN, "learning_rate": 1.179542058409288e-05, "loss": 0.0, "step": 2693 }, { "epoch": 1.5669623382288789, "grad_norm": NaN, "learning_rate": 1.1765080021707847e-05, "loss": 0.0, "step": 2694 }, { "epoch": 1.5675439872037225, "grad_norm": NaN, "learning_rate": 1.173477332714214e-05, "loss": 0.0, "step": 2695 }, { "epoch": 1.5681256361785663, "grad_norm": NaN, "learning_rate": 1.1704500527240892e-05, "loss": 0.0, "step": 2696 }, { "epoch": 1.56870728515341, "grad_norm": NaN, "learning_rate": 1.1674261648819212e-05, "loss": 0.0, "step": 2697 }, { "epoch": 1.5692889341282537, "grad_norm": NaN, "learning_rate": 1.1644056718662177e-05, "loss": 0.0, "step": 2698 }, { "epoch": 1.5698705831030972, "grad_norm": NaN, "learning_rate": 1.1613885763524756e-05, "loss": 0.0, "step": 2699 }, { "epoch": 1.5704522320779408, "grad_norm": NaN, "learning_rate": 1.158374881013189e-05, "loss": 0.0, "step": 2700 }, { "epoch": 1.5710338810527846, "grad_norm": NaN, "learning_rate": 1.1553645885178338e-05, "loss": 0.0, "step": 2701 }, { "epoch": 1.5716155300276284, "grad_norm": NaN, "learning_rate": 1.1523577015328752e-05, "loss": 0.0, "step": 2702 }, { "epoch": 1.572197179002472, "grad_norm": NaN, "learning_rate": 1.1493542227217597e-05, "loss": 0.0, "step": 2703 }, { "epoch": 1.5727788279773156, "grad_norm": NaN, "learning_rate": 1.146354154744917e-05, "loss": 0.0, "step": 2704 }, { "epoch": 1.5733604769521594, "grad_norm": NaN, "learning_rate": 1.1433575002597535e-05, "loss": 0.0, "step": 2705 }, { "epoch": 1.573942125927003, "grad_norm": NaN, "learning_rate": 1.1403642619206533e-05, "loss": 0.0, "step": 2706 }, { "epoch": 1.5745237749018468, "grad_norm": NaN, "learning_rate": 1.137374442378975e-05, "loss": 0.0, "step": 2707 }, { "epoch": 1.5751054238766904, "grad_norm": NaN, "learning_rate": 1.1343880442830473e-05, "loss": 0.0, "step": 2708 }, { "epoch": 1.575687072851534, "grad_norm": NaN, "learning_rate": 1.1314050702781682e-05, "loss": 0.0, "step": 2709 }, { "epoch": 1.5762687218263778, "grad_norm": NaN, "learning_rate": 1.128425523006606e-05, "loss": 0.0, "step": 2710 }, { "epoch": 1.5768503708012216, "grad_norm": NaN, "learning_rate": 1.1254494051075903e-05, "loss": 0.0, "step": 2711 }, { "epoch": 1.5774320197760652, "grad_norm": NaN, "learning_rate": 1.1224767192173135e-05, "loss": 0.0, "step": 2712 }, { "epoch": 1.5780136687509088, "grad_norm": NaN, "learning_rate": 1.1195074679689282e-05, "loss": 0.0, "step": 2713 }, { "epoch": 1.5785953177257523, "grad_norm": NaN, "learning_rate": 1.116541653992546e-05, "loss": 0.0, "step": 2714 }, { "epoch": 1.5791769667005962, "grad_norm": NaN, "learning_rate": 1.1135792799152317e-05, "loss": 0.0, "step": 2715 }, { "epoch": 1.57975861567544, "grad_norm": NaN, "learning_rate": 1.1106203483610045e-05, "loss": 0.0, "step": 2716 }, { "epoch": 1.5803402646502835, "grad_norm": NaN, "learning_rate": 1.107664861950834e-05, "loss": 0.0, "step": 2717 }, { "epoch": 1.5809219136251271, "grad_norm": NaN, "learning_rate": 1.1047128233026366e-05, "loss": 0.0, "step": 2718 }, { "epoch": 1.581503562599971, "grad_norm": NaN, "learning_rate": 1.101764235031279e-05, "loss": 0.0, "step": 2719 }, { "epoch": 1.5820852115748147, "grad_norm": NaN, "learning_rate": 1.098819099748567e-05, "loss": 0.0, "step": 2720 }, { "epoch": 1.5826668605496583, "grad_norm": NaN, "learning_rate": 1.0958774200632505e-05, "loss": 0.0, "step": 2721 }, { "epoch": 1.583248509524502, "grad_norm": NaN, "learning_rate": 1.0929391985810167e-05, "loss": 0.0, "step": 2722 }, { "epoch": 1.5838301584993455, "grad_norm": NaN, "learning_rate": 1.0900044379044905e-05, "loss": 0.0, "step": 2723 }, { "epoch": 1.5844118074741893, "grad_norm": NaN, "learning_rate": 1.0870731406332324e-05, "loss": 0.0, "step": 2724 }, { "epoch": 1.5849934564490331, "grad_norm": NaN, "learning_rate": 1.0841453093637327e-05, "loss": 0.0, "step": 2725 }, { "epoch": 1.5855751054238767, "grad_norm": NaN, "learning_rate": 1.081220946689413e-05, "loss": 0.0, "step": 2726 }, { "epoch": 1.5861567543987203, "grad_norm": NaN, "learning_rate": 1.0783000552006234e-05, "loss": 0.0, "step": 2727 }, { "epoch": 1.586738403373564, "grad_norm": NaN, "learning_rate": 1.0753826374846355e-05, "loss": 0.0, "step": 2728 }, { "epoch": 1.587320052348408, "grad_norm": NaN, "learning_rate": 1.0724686961256496e-05, "loss": 0.0, "step": 2729 }, { "epoch": 1.5879017013232515, "grad_norm": NaN, "learning_rate": 1.0695582337047827e-05, "loss": 0.0, "step": 2730 }, { "epoch": 1.588483350298095, "grad_norm": NaN, "learning_rate": 1.066651252800071e-05, "loss": 0.0, "step": 2731 }, { "epoch": 1.5890649992729386, "grad_norm": NaN, "learning_rate": 1.0637477559864673e-05, "loss": 0.0, "step": 2732 }, { "epoch": 1.5896466482477825, "grad_norm": NaN, "learning_rate": 1.0608477458358373e-05, "loss": 0.0, "step": 2733 }, { "epoch": 1.5902282972226263, "grad_norm": NaN, "learning_rate": 1.0579512249169599e-05, "loss": 0.0, "step": 2734 }, { "epoch": 1.5908099461974698, "grad_norm": NaN, "learning_rate": 1.0550581957955218e-05, "loss": 0.0, "step": 2735 }, { "epoch": 1.5913915951723134, "grad_norm": NaN, "learning_rate": 1.0521686610341169e-05, "loss": 0.0, "step": 2736 }, { "epoch": 1.5919732441471572, "grad_norm": NaN, "learning_rate": 1.0492826231922437e-05, "loss": 0.0, "step": 2737 }, { "epoch": 1.5925548931220008, "grad_norm": NaN, "learning_rate": 1.0464000848263061e-05, "loss": 0.0, "step": 2738 }, { "epoch": 1.5931365420968446, "grad_norm": NaN, "learning_rate": 1.0435210484896046e-05, "loss": 0.0, "step": 2739 }, { "epoch": 1.5937181910716882, "grad_norm": NaN, "learning_rate": 1.0406455167323386e-05, "loss": 0.0, "step": 2740 }, { "epoch": 1.5942998400465318, "grad_norm": NaN, "learning_rate": 1.037773492101604e-05, "loss": 0.0, "step": 2741 }, { "epoch": 1.5948814890213756, "grad_norm": NaN, "learning_rate": 1.0349049771413894e-05, "loss": 0.0, "step": 2742 }, { "epoch": 1.5954631379962194, "grad_norm": NaN, "learning_rate": 1.0320399743925747e-05, "loss": 0.0, "step": 2743 }, { "epoch": 1.596044786971063, "grad_norm": NaN, "learning_rate": 1.0291784863929288e-05, "loss": 0.0, "step": 2744 }, { "epoch": 1.5966264359459066, "grad_norm": NaN, "learning_rate": 1.0263205156771083e-05, "loss": 0.0, "step": 2745 }, { "epoch": 1.5972080849207502, "grad_norm": NaN, "learning_rate": 1.0234660647766531e-05, "loss": 0.0, "step": 2746 }, { "epoch": 1.597789733895594, "grad_norm": NaN, "learning_rate": 1.0206151362199835e-05, "loss": 0.0, "step": 2747 }, { "epoch": 1.5983713828704378, "grad_norm": NaN, "learning_rate": 1.0177677325324048e-05, "loss": 0.0, "step": 2748 }, { "epoch": 1.5989530318452814, "grad_norm": NaN, "learning_rate": 1.0149238562360962e-05, "loss": 0.0, "step": 2749 }, { "epoch": 1.599534680820125, "grad_norm": NaN, "learning_rate": 1.012083509850113e-05, "loss": 0.0, "step": 2750 }, { "epoch": 1.6001163297949688, "grad_norm": NaN, "learning_rate": 1.009246695890384e-05, "loss": 0.0, "step": 2751 }, { "epoch": 1.6006979787698126, "grad_norm": NaN, "learning_rate": 1.0064134168697093e-05, "loss": 0.0, "step": 2752 }, { "epoch": 1.6012796277446562, "grad_norm": NaN, "learning_rate": 1.0035836752977573e-05, "loss": 0.0, "step": 2753 }, { "epoch": 1.6018612767194997, "grad_norm": NaN, "learning_rate": 1.0007574736810638e-05, "loss": 0.0, "step": 2754 }, { "epoch": 1.6024429256943433, "grad_norm": NaN, "learning_rate": 9.979348145230278e-06, "loss": 0.0, "step": 2755 }, { "epoch": 1.6030245746691871, "grad_norm": NaN, "learning_rate": 9.951157003239103e-06, "loss": 0.0, "step": 2756 }, { "epoch": 1.603606223644031, "grad_norm": NaN, "learning_rate": 9.923001335808352e-06, "loss": 0.0, "step": 2757 }, { "epoch": 1.6041878726188745, "grad_norm": NaN, "learning_rate": 9.894881167877812e-06, "loss": 0.0, "step": 2758 }, { "epoch": 1.604769521593718, "grad_norm": NaN, "learning_rate": 9.866796524355826e-06, "loss": 0.0, "step": 2759 }, { "epoch": 1.605351170568562, "grad_norm": NaN, "learning_rate": 9.838747430119288e-06, "loss": 0.0, "step": 2760 }, { "epoch": 1.6059328195434057, "grad_norm": NaN, "learning_rate": 9.810733910013582e-06, "loss": 0.0, "step": 2761 }, { "epoch": 1.6065144685182493, "grad_norm": NaN, "learning_rate": 9.782755988852593e-06, "loss": 0.0, "step": 2762 }, { "epoch": 1.607096117493093, "grad_norm": NaN, "learning_rate": 9.754813691418679e-06, "loss": 0.0, "step": 2763 }, { "epoch": 1.6076777664679365, "grad_norm": NaN, "learning_rate": 9.726907042462625e-06, "loss": 0.0, "step": 2764 }, { "epoch": 1.6082594154427803, "grad_norm": NaN, "learning_rate": 9.699036066703653e-06, "loss": 0.0, "step": 2765 }, { "epoch": 1.608841064417624, "grad_norm": NaN, "learning_rate": 9.671200788829371e-06, "loss": 0.0, "step": 2766 }, { "epoch": 1.6094227133924677, "grad_norm": NaN, "learning_rate": 9.643401233495809e-06, "loss": 0.0, "step": 2767 }, { "epoch": 1.6100043623673113, "grad_norm": NaN, "learning_rate": 9.6156374253273e-06, "loss": 0.0, "step": 2768 }, { "epoch": 1.6105860113421548, "grad_norm": NaN, "learning_rate": 9.58790938891655e-06, "loss": 0.0, "step": 2769 }, { "epoch": 1.6111676603169987, "grad_norm": NaN, "learning_rate": 9.560217148824551e-06, "loss": 0.0, "step": 2770 }, { "epoch": 1.6117493092918425, "grad_norm": NaN, "learning_rate": 9.532560729580614e-06, "loss": 0.0, "step": 2771 }, { "epoch": 1.612330958266686, "grad_norm": NaN, "learning_rate": 9.504940155682307e-06, "loss": 0.0, "step": 2772 }, { "epoch": 1.6129126072415296, "grad_norm": NaN, "learning_rate": 9.477355451595443e-06, "loss": 0.0, "step": 2773 }, { "epoch": 1.6134942562163734, "grad_norm": NaN, "learning_rate": 9.449806641754067e-06, "loss": 0.0, "step": 2774 }, { "epoch": 1.6140759051912172, "grad_norm": NaN, "learning_rate": 9.422293750560423e-06, "loss": 0.0, "step": 2775 }, { "epoch": 1.6146575541660608, "grad_norm": NaN, "learning_rate": 9.39481680238497e-06, "loss": 0.0, "step": 2776 }, { "epoch": 1.6152392031409044, "grad_norm": NaN, "learning_rate": 9.367375821566288e-06, "loss": 0.0, "step": 2777 }, { "epoch": 1.615820852115748, "grad_norm": NaN, "learning_rate": 9.33997083241112e-06, "loss": 0.0, "step": 2778 }, { "epoch": 1.6164025010905918, "grad_norm": NaN, "learning_rate": 9.31260185919432e-06, "loss": 0.0, "step": 2779 }, { "epoch": 1.6169841500654356, "grad_norm": NaN, "learning_rate": 9.285268926158847e-06, "loss": 0.0, "step": 2780 }, { "epoch": 1.6175657990402792, "grad_norm": NaN, "learning_rate": 9.257972057515724e-06, "loss": 0.0, "step": 2781 }, { "epoch": 1.6181474480151228, "grad_norm": NaN, "learning_rate": 9.230711277444049e-06, "loss": 0.0, "step": 2782 }, { "epoch": 1.6187290969899666, "grad_norm": NaN, "learning_rate": 9.203486610090928e-06, "loss": 0.0, "step": 2783 }, { "epoch": 1.6193107459648104, "grad_norm": NaN, "learning_rate": 9.1762980795715e-06, "loss": 0.0, "step": 2784 }, { "epoch": 1.619892394939654, "grad_norm": NaN, "learning_rate": 9.149145709968876e-06, "loss": 0.0, "step": 2785 }, { "epoch": 1.6204740439144976, "grad_norm": NaN, "learning_rate": 9.122029525334168e-06, "loss": 0.0, "step": 2786 }, { "epoch": 1.6210556928893411, "grad_norm": NaN, "learning_rate": 9.094949549686404e-06, "loss": 0.0, "step": 2787 }, { "epoch": 1.621637341864185, "grad_norm": NaN, "learning_rate": 9.06790580701255e-06, "loss": 0.0, "step": 2788 }, { "epoch": 1.6222189908390288, "grad_norm": NaN, "learning_rate": 9.040898321267477e-06, "loss": 0.0, "step": 2789 }, { "epoch": 1.6228006398138723, "grad_norm": NaN, "learning_rate": 9.013927116373939e-06, "loss": 0.0, "step": 2790 }, { "epoch": 1.623382288788716, "grad_norm": NaN, "learning_rate": 8.986992216222562e-06, "loss": 0.0, "step": 2791 }, { "epoch": 1.6239639377635597, "grad_norm": NaN, "learning_rate": 8.960093644671797e-06, "loss": 0.0, "step": 2792 }, { "epoch": 1.6245455867384033, "grad_norm": NaN, "learning_rate": 8.933231425547934e-06, "loss": 0.0, "step": 2793 }, { "epoch": 1.6251272357132471, "grad_norm": NaN, "learning_rate": 8.906405582645033e-06, "loss": 0.0, "step": 2794 }, { "epoch": 1.6257088846880907, "grad_norm": NaN, "learning_rate": 8.879616139724984e-06, "loss": 0.0, "step": 2795 }, { "epoch": 1.6262905336629343, "grad_norm": NaN, "learning_rate": 8.852863120517391e-06, "loss": 0.0, "step": 2796 }, { "epoch": 1.626872182637778, "grad_norm": NaN, "learning_rate": 8.826146548719604e-06, "loss": 0.0, "step": 2797 }, { "epoch": 1.627453831612622, "grad_norm": NaN, "learning_rate": 8.799466447996696e-06, "loss": 0.0, "step": 2798 }, { "epoch": 1.6280354805874655, "grad_norm": NaN, "learning_rate": 8.772822841981438e-06, "loss": 0.0, "step": 2799 }, { "epoch": 1.628617129562309, "grad_norm": NaN, "learning_rate": 8.746215754274251e-06, "loss": 0.0, "step": 2800 }, { "epoch": 1.6291987785371527, "grad_norm": NaN, "learning_rate": 8.719645208443233e-06, "loss": 0.0, "step": 2801 }, { "epoch": 1.6297804275119965, "grad_norm": NaN, "learning_rate": 8.693111228024109e-06, "loss": 0.0, "step": 2802 }, { "epoch": 1.6303620764868403, "grad_norm": NaN, "learning_rate": 8.6666138365202e-06, "loss": 0.0, "step": 2803 }, { "epoch": 1.6309437254616839, "grad_norm": NaN, "learning_rate": 8.640153057402451e-06, "loss": 0.0, "step": 2804 }, { "epoch": 1.6315253744365275, "grad_norm": NaN, "learning_rate": 8.613728914109337e-06, "loss": 0.0, "step": 2805 }, { "epoch": 1.6321070234113713, "grad_norm": NaN, "learning_rate": 8.587341430046908e-06, "loss": 0.0, "step": 2806 }, { "epoch": 1.632688672386215, "grad_norm": NaN, "learning_rate": 8.560990628588728e-06, "loss": 0.0, "step": 2807 }, { "epoch": 1.6332703213610587, "grad_norm": NaN, "learning_rate": 8.534676533075875e-06, "loss": 0.0, "step": 2808 }, { "epoch": 1.6338519703359022, "grad_norm": NaN, "learning_rate": 8.50839916681691e-06, "loss": 0.0, "step": 2809 }, { "epoch": 1.6344336193107458, "grad_norm": NaN, "learning_rate": 8.482158553087866e-06, "loss": 0.0, "step": 2810 }, { "epoch": 1.6350152682855896, "grad_norm": NaN, "learning_rate": 8.455954715132208e-06, "loss": 0.0, "step": 2811 }, { "epoch": 1.6355969172604334, "grad_norm": NaN, "learning_rate": 8.429787676160845e-06, "loss": 0.0, "step": 2812 }, { "epoch": 1.636178566235277, "grad_norm": NaN, "learning_rate": 8.403657459352065e-06, "loss": 0.0, "step": 2813 }, { "epoch": 1.6367602152101206, "grad_norm": NaN, "learning_rate": 8.377564087851575e-06, "loss": 0.0, "step": 2814 }, { "epoch": 1.6373418641849644, "grad_norm": NaN, "learning_rate": 8.351507584772411e-06, "loss": 0.0, "step": 2815 }, { "epoch": 1.6379235131598082, "grad_norm": NaN, "learning_rate": 8.325487973194967e-06, "loss": 0.0, "step": 2816 }, { "epoch": 1.6385051621346518, "grad_norm": NaN, "learning_rate": 8.299505276166964e-06, "loss": 0.0, "step": 2817 }, { "epoch": 1.6390868111094954, "grad_norm": NaN, "learning_rate": 8.273559516703406e-06, "loss": 0.0, "step": 2818 }, { "epoch": 1.639668460084339, "grad_norm": NaN, "learning_rate": 8.247650717786598e-06, "loss": 0.0, "step": 2819 }, { "epoch": 1.6402501090591828, "grad_norm": NaN, "learning_rate": 8.221778902366096e-06, "loss": 0.0, "step": 2820 }, { "epoch": 1.6408317580340266, "grad_norm": NaN, "learning_rate": 8.195944093358698e-06, "loss": 0.0, "step": 2821 }, { "epoch": 1.6414134070088702, "grad_norm": NaN, "learning_rate": 8.170146313648408e-06, "loss": 0.0, "step": 2822 }, { "epoch": 1.6419950559837138, "grad_norm": NaN, "learning_rate": 8.144385586086479e-06, "loss": 0.0, "step": 2823 }, { "epoch": 1.6425767049585576, "grad_norm": NaN, "learning_rate": 8.118661933491295e-06, "loss": 0.0, "step": 2824 }, { "epoch": 1.6431583539334012, "grad_norm": NaN, "learning_rate": 8.092975378648403e-06, "loss": 0.0, "step": 2825 }, { "epoch": 1.643740002908245, "grad_norm": NaN, "learning_rate": 8.067325944310517e-06, "loss": 0.0, "step": 2826 }, { "epoch": 1.6443216518830885, "grad_norm": NaN, "learning_rate": 8.041713653197447e-06, "loss": 0.0, "step": 2827 }, { "epoch": 1.6449033008579321, "grad_norm": NaN, "learning_rate": 8.016138527996114e-06, "loss": 0.0, "step": 2828 }, { "epoch": 1.645484949832776, "grad_norm": NaN, "learning_rate": 7.990600591360509e-06, "loss": 0.0, "step": 2829 }, { "epoch": 1.6460665988076197, "grad_norm": NaN, "learning_rate": 7.965099865911685e-06, "loss": 0.0, "step": 2830 }, { "epoch": 1.6466482477824633, "grad_norm": NaN, "learning_rate": 7.93963637423773e-06, "loss": 0.0, "step": 2831 }, { "epoch": 1.647229896757307, "grad_norm": NaN, "learning_rate": 7.91421013889378e-06, "loss": 0.0, "step": 2832 }, { "epoch": 1.6478115457321505, "grad_norm": NaN, "learning_rate": 7.888821182401934e-06, "loss": 0.0, "step": 2833 }, { "epoch": 1.6483931947069943, "grad_norm": NaN, "learning_rate": 7.863469527251283e-06, "loss": 0.0, "step": 2834 }, { "epoch": 1.648974843681838, "grad_norm": NaN, "learning_rate": 7.838155195897873e-06, "loss": 0.0, "step": 2835 }, { "epoch": 1.6495564926566817, "grad_norm": NaN, "learning_rate": 7.812878210764695e-06, "loss": 0.0, "step": 2836 }, { "epoch": 1.6501381416315253, "grad_norm": NaN, "learning_rate": 7.787638594241663e-06, "loss": 0.0, "step": 2837 }, { "epoch": 1.650719790606369, "grad_norm": NaN, "learning_rate": 7.762436368685584e-06, "loss": 0.0, "step": 2838 }, { "epoch": 1.651301439581213, "grad_norm": NaN, "learning_rate": 7.737271556420134e-06, "loss": 0.0, "step": 2839 }, { "epoch": 1.6518830885560565, "grad_norm": NaN, "learning_rate": 7.712144179735864e-06, "loss": 0.0, "step": 2840 }, { "epoch": 1.6524647375309, "grad_norm": NaN, "learning_rate": 7.68705426089018e-06, "loss": 0.0, "step": 2841 }, { "epoch": 1.6530463865057436, "grad_norm": NaN, "learning_rate": 7.662001822107285e-06, "loss": 0.0, "step": 2842 }, { "epoch": 1.6536280354805875, "grad_norm": NaN, "learning_rate": 7.636986885578174e-06, "loss": 0.0, "step": 2843 }, { "epoch": 1.6542096844554313, "grad_norm": NaN, "learning_rate": 7.612009473460646e-06, "loss": 0.0, "step": 2844 }, { "epoch": 1.6547913334302748, "grad_norm": NaN, "learning_rate": 7.58706960787926e-06, "loss": 0.0, "step": 2845 }, { "epoch": 1.6553729824051184, "grad_norm": NaN, "learning_rate": 7.5621673109253e-06, "loss": 0.0, "step": 2846 }, { "epoch": 1.6559546313799622, "grad_norm": NaN, "learning_rate": 7.537302604656782e-06, "loss": 0.0, "step": 2847 }, { "epoch": 1.6565362803548058, "grad_norm": NaN, "learning_rate": 7.512475511098432e-06, "loss": 0.0, "step": 2848 }, { "epoch": 1.6571179293296496, "grad_norm": NaN, "learning_rate": 7.487686052241649e-06, "loss": 0.0, "step": 2849 }, { "epoch": 1.6576995783044932, "grad_norm": NaN, "learning_rate": 7.46293425004449e-06, "loss": 0.0, "step": 2850 }, { "epoch": 1.6582812272793368, "grad_norm": NaN, "learning_rate": 7.438220126431688e-06, "loss": 0.0, "step": 2851 }, { "epoch": 1.6588628762541806, "grad_norm": NaN, "learning_rate": 7.413543703294568e-06, "loss": 0.0, "step": 2852 }, { "epoch": 1.6594445252290244, "grad_norm": NaN, "learning_rate": 7.388905002491065e-06, "loss": 0.0, "step": 2853 }, { "epoch": 1.660026174203868, "grad_norm": NaN, "learning_rate": 7.364304045845716e-06, "loss": 0.0, "step": 2854 }, { "epoch": 1.6606078231787116, "grad_norm": NaN, "learning_rate": 7.339740855149607e-06, "loss": 0.0, "step": 2855 }, { "epoch": 1.6611894721535552, "grad_norm": NaN, "learning_rate": 7.31521545216039e-06, "loss": 0.0, "step": 2856 }, { "epoch": 1.661771121128399, "grad_norm": NaN, "learning_rate": 7.2907278586022256e-06, "loss": 0.0, "step": 2857 }, { "epoch": 1.6623527701032428, "grad_norm": NaN, "learning_rate": 7.266278096165802e-06, "loss": 0.0, "step": 2858 }, { "epoch": 1.6629344190780864, "grad_norm": NaN, "learning_rate": 7.241866186508267e-06, "loss": 0.0, "step": 2859 }, { "epoch": 1.66351606805293, "grad_norm": NaN, "learning_rate": 7.217492151253291e-06, "loss": 0.0, "step": 2860 }, { "epoch": 1.6640977170277738, "grad_norm": NaN, "learning_rate": 7.193156011990954e-06, "loss": 0.0, "step": 2861 }, { "epoch": 1.6646793660026176, "grad_norm": NaN, "learning_rate": 7.168857790277777e-06, "loss": 0.0, "step": 2862 }, { "epoch": 1.6652610149774612, "grad_norm": NaN, "learning_rate": 7.144597507636697e-06, "loss": 0.0, "step": 2863 }, { "epoch": 1.6658426639523047, "grad_norm": NaN, "learning_rate": 7.120375185557049e-06, "loss": 0.0, "step": 2864 }, { "epoch": 1.6664243129271483, "grad_norm": NaN, "learning_rate": 7.096190845494532e-06, "loss": 0.0, "step": 2865 }, { "epoch": 1.6670059619019921, "grad_norm": NaN, "learning_rate": 7.072044508871223e-06, "loss": 0.0, "step": 2866 }, { "epoch": 1.667587610876836, "grad_norm": NaN, "learning_rate": 7.047936197075505e-06, "loss": 0.0, "step": 2867 }, { "epoch": 1.6681692598516795, "grad_norm": NaN, "learning_rate": 7.023865931462098e-06, "loss": 0.0, "step": 2868 }, { "epoch": 1.668750908826523, "grad_norm": NaN, "learning_rate": 6.99983373335204e-06, "loss": 0.0, "step": 2869 }, { "epoch": 1.669332557801367, "grad_norm": NaN, "learning_rate": 6.975839624032615e-06, "loss": 0.0, "step": 2870 }, { "epoch": 1.6699142067762107, "grad_norm": NaN, "learning_rate": 6.951883624757377e-06, "loss": 0.0, "step": 2871 }, { "epoch": 1.6704958557510543, "grad_norm": NaN, "learning_rate": 6.927965756746141e-06, "loss": 0.0, "step": 2872 }, { "epoch": 1.6710775047258979, "grad_norm": NaN, "learning_rate": 6.904086041184926e-06, "loss": 0.0, "step": 2873 }, { "epoch": 1.6716591537007415, "grad_norm": NaN, "learning_rate": 6.8802444992259684e-06, "loss": 0.0, "step": 2874 }, { "epoch": 1.6722408026755853, "grad_norm": NaN, "learning_rate": 6.85644115198768e-06, "loss": 0.0, "step": 2875 }, { "epoch": 1.672822451650429, "grad_norm": NaN, "learning_rate": 6.832676020554657e-06, "loss": 0.0, "step": 2876 }, { "epoch": 1.6734041006252727, "grad_norm": NaN, "learning_rate": 6.80894912597762e-06, "loss": 0.0, "step": 2877 }, { "epoch": 1.6739857496001163, "grad_norm": NaN, "learning_rate": 6.785260489273438e-06, "loss": 0.0, "step": 2878 }, { "epoch": 1.67456739857496, "grad_norm": NaN, "learning_rate": 6.7616101314251e-06, "loss": 0.0, "step": 2879 }, { "epoch": 1.6751490475498036, "grad_norm": NaN, "learning_rate": 6.737998073381674e-06, "loss": 0.0, "step": 2880 }, { "epoch": 1.6757306965246475, "grad_norm": NaN, "learning_rate": 6.7144243360583e-06, "loss": 0.0, "step": 2881 }, { "epoch": 1.676312345499491, "grad_norm": NaN, "learning_rate": 6.690888940336187e-06, "loss": 0.0, "step": 2882 }, { "epoch": 1.6768939944743346, "grad_norm": NaN, "learning_rate": 6.667391907062565e-06, "loss": 0.0, "step": 2883 }, { "epoch": 1.6774756434491784, "grad_norm": NaN, "learning_rate": 6.643933257050694e-06, "loss": 0.0, "step": 2884 }, { "epoch": 1.6780572924240222, "grad_norm": NaN, "learning_rate": 6.620513011079837e-06, "loss": 0.0, "step": 2885 }, { "epoch": 1.6786389413988658, "grad_norm": NaN, "learning_rate": 6.5971311898952396e-06, "loss": 0.0, "step": 2886 }, { "epoch": 1.6792205903737094, "grad_norm": NaN, "learning_rate": 6.573787814208088e-06, "loss": 0.0, "step": 2887 }, { "epoch": 1.679802239348553, "grad_norm": NaN, "learning_rate": 6.55048290469556e-06, "loss": 0.0, "step": 2888 }, { "epoch": 1.6803838883233968, "grad_norm": NaN, "learning_rate": 6.527216482000725e-06, "loss": 0.0, "step": 2889 }, { "epoch": 1.6809655372982406, "grad_norm": NaN, "learning_rate": 6.503988566732566e-06, "loss": 0.0, "step": 2890 }, { "epoch": 1.6815471862730842, "grad_norm": NaN, "learning_rate": 6.4807991794659625e-06, "loss": 0.0, "step": 2891 }, { "epoch": 1.6821288352479278, "grad_norm": NaN, "learning_rate": 6.457648340741673e-06, "loss": 0.0, "step": 2892 }, { "epoch": 1.6827104842227716, "grad_norm": NaN, "learning_rate": 6.434536071066294e-06, "loss": 0.0, "step": 2893 }, { "epoch": 1.6832921331976154, "grad_norm": NaN, "learning_rate": 6.41146239091227e-06, "loss": 0.0, "step": 2894 }, { "epoch": 1.683873782172459, "grad_norm": NaN, "learning_rate": 6.388427320717866e-06, "loss": 0.0, "step": 2895 }, { "epoch": 1.6844554311473026, "grad_norm": NaN, "learning_rate": 6.365430880887124e-06, "loss": 0.0, "step": 2896 }, { "epoch": 1.6850370801221461, "grad_norm": NaN, "learning_rate": 6.342473091789908e-06, "loss": 0.0, "step": 2897 }, { "epoch": 1.68561872909699, "grad_norm": NaN, "learning_rate": 6.319553973761816e-06, "loss": 0.0, "step": 2898 }, { "epoch": 1.6862003780718338, "grad_norm": NaN, "learning_rate": 6.296673547104198e-06, "loss": 0.0, "step": 2899 }, { "epoch": 1.6867820270466773, "grad_norm": NaN, "learning_rate": 6.273831832084137e-06, "loss": 0.0, "step": 2900 }, { "epoch": 1.687363676021521, "grad_norm": NaN, "learning_rate": 6.251028848934415e-06, "loss": 0.0, "step": 2901 }, { "epoch": 1.6879453249963647, "grad_norm": NaN, "learning_rate": 6.228264617853519e-06, "loss": 0.0, "step": 2902 }, { "epoch": 1.6885269739712083, "grad_norm": NaN, "learning_rate": 6.205539159005602e-06, "loss": 0.0, "step": 2903 }, { "epoch": 1.6891086229460521, "grad_norm": NaN, "learning_rate": 6.18285249252048e-06, "loss": 0.0, "step": 2904 }, { "epoch": 1.6896902719208957, "grad_norm": NaN, "learning_rate": 6.160204638493583e-06, "loss": 0.0, "step": 2905 }, { "epoch": 1.6902719208957393, "grad_norm": NaN, "learning_rate": 6.137595616986008e-06, "loss": 0.0, "step": 2906 }, { "epoch": 1.690853569870583, "grad_norm": NaN, "learning_rate": 6.11502544802442e-06, "loss": 0.0, "step": 2907 }, { "epoch": 1.691435218845427, "grad_norm": NaN, "learning_rate": 6.09249415160108e-06, "loss": 0.0, "step": 2908 }, { "epoch": 1.6920168678202705, "grad_norm": NaN, "learning_rate": 6.070001747673804e-06, "loss": 0.0, "step": 2909 }, { "epoch": 1.692598516795114, "grad_norm": NaN, "learning_rate": 6.047548256165974e-06, "loss": 0.0, "step": 2910 }, { "epoch": 1.6931801657699577, "grad_norm": NaN, "learning_rate": 6.0251336969664955e-06, "loss": 0.0, "step": 2911 }, { "epoch": 1.6937618147448015, "grad_norm": NaN, "learning_rate": 6.002758089929794e-06, "loss": 0.0, "step": 2912 }, { "epoch": 1.6943434637196453, "grad_norm": NaN, "learning_rate": 5.980421454875784e-06, "loss": 0.0, "step": 2913 }, { "epoch": 1.6949251126944889, "grad_norm": NaN, "learning_rate": 5.958123811589866e-06, "loss": 0.0, "step": 2914 }, { "epoch": 1.6955067616693325, "grad_norm": NaN, "learning_rate": 5.935865179822892e-06, "loss": 0.0, "step": 2915 }, { "epoch": 1.6960884106441763, "grad_norm": NaN, "learning_rate": 5.913645579291183e-06, "loss": 0.0, "step": 2916 }, { "epoch": 1.69667005961902, "grad_norm": NaN, "learning_rate": 5.891465029676463e-06, "loss": 0.0, "step": 2917 }, { "epoch": 1.6972517085938637, "grad_norm": NaN, "learning_rate": 5.869323550625872e-06, "loss": 0.0, "step": 2918 }, { "epoch": 1.6978333575687072, "grad_norm": NaN, "learning_rate": 5.847221161751942e-06, "loss": 0.0, "step": 2919 }, { "epoch": 1.6984150065435508, "grad_norm": NaN, "learning_rate": 5.825157882632576e-06, "loss": 0.0, "step": 2920 }, { "epoch": 1.6989966555183946, "grad_norm": NaN, "learning_rate": 5.8031337328110446e-06, "loss": 0.0, "step": 2921 }, { "epoch": 1.6995783044932384, "grad_norm": NaN, "learning_rate": 5.781148731795955e-06, "loss": 0.0, "step": 2922 }, { "epoch": 1.700159953468082, "grad_norm": NaN, "learning_rate": 5.759202899061222e-06, "loss": 0.0, "step": 2923 }, { "epoch": 1.7007416024429256, "grad_norm": NaN, "learning_rate": 5.737296254046076e-06, "loss": 0.0, "step": 2924 }, { "epoch": 1.7013232514177694, "grad_norm": NaN, "learning_rate": 5.71542881615506e-06, "loss": 0.0, "step": 2925 }, { "epoch": 1.7019049003926132, "grad_norm": NaN, "learning_rate": 5.693600604757954e-06, "loss": 0.0, "step": 2926 }, { "epoch": 1.7024865493674568, "grad_norm": NaN, "learning_rate": 5.671811639189795e-06, "loss": 0.0, "step": 2927 }, { "epoch": 1.7030681983423004, "grad_norm": NaN, "learning_rate": 5.650061938750878e-06, "loss": 0.0, "step": 2928 }, { "epoch": 1.703649847317144, "grad_norm": NaN, "learning_rate": 5.6283515227066985e-06, "loss": 0.0, "step": 2929 }, { "epoch": 1.7042314962919878, "grad_norm": NaN, "learning_rate": 5.606680410287962e-06, "loss": 0.0, "step": 2930 }, { "epoch": 1.7048131452668316, "grad_norm": NaN, "learning_rate": 5.5850486206905585e-06, "loss": 0.0, "step": 2931 }, { "epoch": 1.7053947942416752, "grad_norm": NaN, "learning_rate": 5.563456173075543e-06, "loss": 0.0, "step": 2932 }, { "epoch": 1.7059764432165188, "grad_norm": NaN, "learning_rate": 5.541903086569122e-06, "loss": 0.0, "step": 2933 }, { "epoch": 1.7065580921913626, "grad_norm": NaN, "learning_rate": 5.520389380262659e-06, "loss": 0.0, "step": 2934 }, { "epoch": 1.7071397411662061, "grad_norm": NaN, "learning_rate": 5.498915073212602e-06, "loss": 0.0, "step": 2935 }, { "epoch": 1.70772139014105, "grad_norm": NaN, "learning_rate": 5.47748018444052e-06, "loss": 0.0, "step": 2936 }, { "epoch": 1.7083030391158935, "grad_norm": NaN, "learning_rate": 5.456084732933054e-06, "loss": 0.0, "step": 2937 }, { "epoch": 1.7088846880907371, "grad_norm": NaN, "learning_rate": 5.434728737641925e-06, "loss": 0.0, "step": 2938 }, { "epoch": 1.709466337065581, "grad_norm": NaN, "learning_rate": 5.413412217483893e-06, "loss": 0.0, "step": 2939 }, { "epoch": 1.7100479860404247, "grad_norm": NaN, "learning_rate": 5.392135191340758e-06, "loss": 0.0, "step": 2940 }, { "epoch": 1.7106296350152683, "grad_norm": NaN, "learning_rate": 5.370897678059339e-06, "loss": 0.0, "step": 2941 }, { "epoch": 1.711211283990112, "grad_norm": NaN, "learning_rate": 5.349699696451438e-06, "loss": 0.0, "step": 2942 }, { "epoch": 1.7117929329649555, "grad_norm": NaN, "learning_rate": 5.328541265293874e-06, "loss": 0.0, "step": 2943 }, { "epoch": 1.7123745819397993, "grad_norm": NaN, "learning_rate": 5.307422403328399e-06, "loss": 0.0, "step": 2944 }, { "epoch": 1.712956230914643, "grad_norm": NaN, "learning_rate": 5.286343129261734e-06, "loss": 0.0, "step": 2945 }, { "epoch": 1.7135378798894867, "grad_norm": NaN, "learning_rate": 5.2653034617655365e-06, "loss": 0.0, "step": 2946 }, { "epoch": 1.7141195288643303, "grad_norm": NaN, "learning_rate": 5.244303419476359e-06, "loss": 0.0, "step": 2947 }, { "epoch": 1.714701177839174, "grad_norm": NaN, "learning_rate": 5.223343020995675e-06, "loss": 0.0, "step": 2948 }, { "epoch": 1.715282826814018, "grad_norm": NaN, "learning_rate": 5.202422284889841e-06, "loss": 0.0, "step": 2949 }, { "epoch": 1.7158644757888615, "grad_norm": NaN, "learning_rate": 5.181541229690073e-06, "loss": 0.0, "step": 2950 }, { "epoch": 1.716446124763705, "grad_norm": NaN, "learning_rate": 5.1606998738924435e-06, "loss": 0.0, "step": 2951 }, { "epoch": 1.7170277737385486, "grad_norm": NaN, "learning_rate": 5.139898235957852e-06, "loss": 0.0, "step": 2952 }, { "epoch": 1.7176094227133925, "grad_norm": NaN, "learning_rate": 5.119136334312036e-06, "loss": 0.0, "step": 2953 }, { "epoch": 1.7181910716882363, "grad_norm": NaN, "learning_rate": 5.098414187345518e-06, "loss": 0.0, "step": 2954 }, { "epoch": 1.7187727206630798, "grad_norm": NaN, "learning_rate": 5.077731813413611e-06, "loss": 0.0, "step": 2955 }, { "epoch": 1.7193543696379234, "grad_norm": NaN, "learning_rate": 5.057089230836393e-06, "loss": 0.0, "step": 2956 }, { "epoch": 1.7199360186127672, "grad_norm": NaN, "learning_rate": 5.036486457898704e-06, "loss": 0.0, "step": 2957 }, { "epoch": 1.7205176675876108, "grad_norm": NaN, "learning_rate": 5.015923512850118e-06, "loss": 0.0, "step": 2958 }, { "epoch": 1.7210993165624546, "grad_norm": NaN, "learning_rate": 4.995400413904933e-06, "loss": 0.0, "step": 2959 }, { "epoch": 1.7216809655372982, "grad_norm": NaN, "learning_rate": 4.974917179242139e-06, "loss": 0.0, "step": 2960 }, { "epoch": 1.7222626145121418, "grad_norm": NaN, "learning_rate": 4.954473827005429e-06, "loss": 0.0, "step": 2961 }, { "epoch": 1.7228442634869856, "grad_norm": NaN, "learning_rate": 4.934070375303174e-06, "loss": 0.0, "step": 2962 }, { "epoch": 1.7234259124618294, "grad_norm": NaN, "learning_rate": 4.913706842208382e-06, "loss": 0.0, "step": 2963 }, { "epoch": 1.724007561436673, "grad_norm": NaN, "learning_rate": 4.89338324575872e-06, "loss": 0.0, "step": 2964 }, { "epoch": 1.7245892104115166, "grad_norm": NaN, "learning_rate": 4.873099603956466e-06, "loss": 0.0, "step": 2965 }, { "epoch": 1.7251708593863602, "grad_norm": NaN, "learning_rate": 4.852855934768513e-06, "loss": 0.0, "step": 2966 }, { "epoch": 1.725752508361204, "grad_norm": NaN, "learning_rate": 4.832652256126352e-06, "loss": 0.0, "step": 2967 }, { "epoch": 1.7263341573360478, "grad_norm": NaN, "learning_rate": 4.81248858592604e-06, "loss": 0.0, "step": 2968 }, { "epoch": 1.7269158063108914, "grad_norm": NaN, "learning_rate": 4.792364942028205e-06, "loss": 0.0, "step": 2969 }, { "epoch": 1.727497455285735, "grad_norm": NaN, "learning_rate": 4.772281342258006e-06, "loss": 0.0, "step": 2970 }, { "epoch": 1.7280791042605788, "grad_norm": NaN, "learning_rate": 4.752237804405163e-06, "loss": 0.0, "step": 2971 }, { "epoch": 1.7286607532354226, "grad_norm": NaN, "learning_rate": 4.732234346223879e-06, "loss": 0.0, "step": 2972 }, { "epoch": 1.7292424022102661, "grad_norm": NaN, "learning_rate": 4.712270985432865e-06, "loss": 0.0, "step": 2973 }, { "epoch": 1.7298240511851097, "grad_norm": NaN, "learning_rate": 4.692347739715308e-06, "loss": 0.0, "step": 2974 }, { "epoch": 1.7304057001599533, "grad_norm": NaN, "learning_rate": 4.672464626718881e-06, "loss": 0.0, "step": 2975 }, { "epoch": 1.7309873491347971, "grad_norm": NaN, "learning_rate": 4.652621664055684e-06, "loss": 0.0, "step": 2976 }, { "epoch": 1.731568998109641, "grad_norm": NaN, "learning_rate": 4.632818869302274e-06, "loss": 0.0, "step": 2977 }, { "epoch": 1.7321506470844845, "grad_norm": NaN, "learning_rate": 4.6130562599996155e-06, "loss": 0.0, "step": 2978 }, { "epoch": 1.732732296059328, "grad_norm": NaN, "learning_rate": 4.5933338536530705e-06, "loss": 0.0, "step": 2979 }, { "epoch": 1.733313945034172, "grad_norm": NaN, "learning_rate": 4.573651667732404e-06, "loss": 0.0, "step": 2980 }, { "epoch": 1.7338955940090157, "grad_norm": NaN, "learning_rate": 4.554009719671759e-06, "loss": 0.0, "step": 2981 }, { "epoch": 1.7344772429838593, "grad_norm": NaN, "learning_rate": 4.5344080268696244e-06, "loss": 0.0, "step": 2982 }, { "epoch": 1.7350588919587029, "grad_norm": NaN, "learning_rate": 4.514846606688827e-06, "loss": 0.0, "step": 2983 }, { "epoch": 1.7356405409335465, "grad_norm": NaN, "learning_rate": 4.495325476456535e-06, "loss": 0.0, "step": 2984 }, { "epoch": 1.7362221899083903, "grad_norm": NaN, "learning_rate": 4.475844653464223e-06, "loss": 0.0, "step": 2985 }, { "epoch": 1.736803838883234, "grad_norm": NaN, "learning_rate": 4.456404154967658e-06, "loss": 0.0, "step": 2986 }, { "epoch": 1.7373854878580777, "grad_norm": NaN, "learning_rate": 4.437003998186895e-06, "loss": 0.0, "step": 2987 }, { "epoch": 1.7379671368329213, "grad_norm": NaN, "learning_rate": 4.417644200306248e-06, "loss": 0.0, "step": 2988 }, { "epoch": 1.738548785807765, "grad_norm": NaN, "learning_rate": 4.398324778474272e-06, "loss": 0.0, "step": 2989 }, { "epoch": 1.7391304347826086, "grad_norm": NaN, "learning_rate": 4.379045749803795e-06, "loss": 0.0, "step": 2990 }, { "epoch": 1.7397120837574525, "grad_norm": NaN, "learning_rate": 4.359807131371835e-06, "loss": 0.0, "step": 2991 }, { "epoch": 1.740293732732296, "grad_norm": NaN, "learning_rate": 4.340608940219615e-06, "loss": 0.0, "step": 2992 }, { "epoch": 1.7408753817071396, "grad_norm": NaN, "learning_rate": 4.32145119335256e-06, "loss": 0.0, "step": 2993 }, { "epoch": 1.7414570306819834, "grad_norm": NaN, "learning_rate": 4.302333907740263e-06, "loss": 0.0, "step": 2994 }, { "epoch": 1.7420386796568272, "grad_norm": NaN, "learning_rate": 4.2832571003164786e-06, "loss": 0.0, "step": 2995 }, { "epoch": 1.7426203286316708, "grad_norm": NaN, "learning_rate": 4.2642207879791064e-06, "loss": 0.0, "step": 2996 }, { "epoch": 1.7432019776065144, "grad_norm": NaN, "learning_rate": 4.245224987590179e-06, "loss": 0.0, "step": 2997 }, { "epoch": 1.743783626581358, "grad_norm": NaN, "learning_rate": 4.226269715975834e-06, "loss": 0.0, "step": 2998 }, { "epoch": 1.7443652755562018, "grad_norm": NaN, "learning_rate": 4.207354989926332e-06, "loss": 0.0, "step": 2999 }, { "epoch": 1.7449469245310456, "grad_norm": NaN, "learning_rate": 4.188480826195995e-06, "loss": 0.0, "step": 3000 }, { "epoch": 1.7455285735058892, "grad_norm": NaN, "learning_rate": 4.169647241503233e-06, "loss": 0.0, "step": 3001 }, { "epoch": 1.7461102224807328, "grad_norm": NaN, "learning_rate": 4.150854252530495e-06, "loss": 0.0, "step": 3002 }, { "epoch": 1.7466918714555766, "grad_norm": NaN, "learning_rate": 4.132101875924282e-06, "loss": 0.0, "step": 3003 }, { "epoch": 1.7472735204304204, "grad_norm": NaN, "learning_rate": 4.113390128295125e-06, "loss": 0.0, "step": 3004 }, { "epoch": 1.747855169405264, "grad_norm": NaN, "learning_rate": 4.094719026217553e-06, "loss": 0.0, "step": 3005 }, { "epoch": 1.7484368183801076, "grad_norm": NaN, "learning_rate": 4.076088586230104e-06, "loss": 0.0, "step": 3006 }, { "epoch": 1.7490184673549511, "grad_norm": NaN, "learning_rate": 4.057498824835282e-06, "loss": 0.0, "step": 3007 }, { "epoch": 1.749600116329795, "grad_norm": NaN, "learning_rate": 4.038949758499588e-06, "loss": 0.0, "step": 3008 }, { "epoch": 1.7501817653046388, "grad_norm": NaN, "learning_rate": 4.020441403653458e-06, "loss": 0.0, "step": 3009 }, { "epoch": 1.7507634142794823, "grad_norm": NaN, "learning_rate": 4.001973776691259e-06, "loss": 0.0, "step": 3010 }, { "epoch": 1.751345063254326, "grad_norm": NaN, "learning_rate": 3.983546893971291e-06, "loss": 0.0, "step": 3011 }, { "epoch": 1.7519267122291697, "grad_norm": NaN, "learning_rate": 3.965160771815762e-06, "loss": 0.0, "step": 3012 }, { "epoch": 1.7525083612040135, "grad_norm": NaN, "learning_rate": 3.94681542651078e-06, "loss": 0.0, "step": 3013 }, { "epoch": 1.7530900101788571, "grad_norm": NaN, "learning_rate": 3.9285108743063235e-06, "loss": 0.0, "step": 3014 }, { "epoch": 1.7536716591537007, "grad_norm": NaN, "learning_rate": 3.91024713141625e-06, "loss": 0.0, "step": 3015 }, { "epoch": 1.7542533081285443, "grad_norm": NaN, "learning_rate": 3.892024214018253e-06, "loss": 0.0, "step": 3016 }, { "epoch": 1.754834957103388, "grad_norm": NaN, "learning_rate": 3.87384213825387e-06, "loss": 0.0, "step": 3017 }, { "epoch": 1.755416606078232, "grad_norm": NaN, "learning_rate": 3.855700920228472e-06, "loss": 0.0, "step": 3018 }, { "epoch": 1.7559982550530755, "grad_norm": NaN, "learning_rate": 3.837600576011236e-06, "loss": 0.0, "step": 3019 }, { "epoch": 1.756579904027919, "grad_norm": NaN, "learning_rate": 3.819541121635117e-06, "loss": 0.0, "step": 3020 }, { "epoch": 1.7571615530027627, "grad_norm": NaN, "learning_rate": 3.8015225730968662e-06, "loss": 0.0, "step": 3021 }, { "epoch": 1.7577432019776065, "grad_norm": NaN, "learning_rate": 3.7835449463569927e-06, "loss": 0.0, "step": 3022 }, { "epoch": 1.7583248509524503, "grad_norm": NaN, "learning_rate": 3.76560825733977e-06, "loss": 0.0, "step": 3023 }, { "epoch": 1.7589064999272939, "grad_norm": NaN, "learning_rate": 3.7477125219331945e-06, "loss": 0.0, "step": 3024 }, { "epoch": 1.7594881489021374, "grad_norm": NaN, "learning_rate": 3.7298577559890023e-06, "loss": 0.0, "step": 3025 }, { "epoch": 1.7600697978769813, "grad_norm": NaN, "learning_rate": 3.71204397532261e-06, "loss": 0.0, "step": 3026 }, { "epoch": 1.760651446851825, "grad_norm": NaN, "learning_rate": 3.694271195713178e-06, "loss": 0.0, "step": 3027 }, { "epoch": 1.7612330958266686, "grad_norm": NaN, "learning_rate": 3.6765394329035086e-06, "loss": 0.0, "step": 3028 }, { "epoch": 1.7618147448015122, "grad_norm": NaN, "learning_rate": 3.658848702600087e-06, "loss": 0.0, "step": 3029 }, { "epoch": 1.7623963937763558, "grad_norm": NaN, "learning_rate": 3.6411990204730516e-06, "loss": 0.0, "step": 3030 }, { "epoch": 1.7629780427511996, "grad_norm": NaN, "learning_rate": 3.623590402156174e-06, "loss": 0.0, "step": 3031 }, { "epoch": 1.7635596917260434, "grad_norm": NaN, "learning_rate": 3.6060228632468664e-06, "loss": 0.0, "step": 3032 }, { "epoch": 1.764141340700887, "grad_norm": NaN, "learning_rate": 3.588496419306142e-06, "loss": 0.0, "step": 3033 }, { "epoch": 1.7647229896757306, "grad_norm": NaN, "learning_rate": 3.571011085858622e-06, "loss": 0.0, "step": 3034 }, { "epoch": 1.7653046386505744, "grad_norm": NaN, "learning_rate": 3.5535668783924893e-06, "loss": 0.0, "step": 3035 }, { "epoch": 1.7658862876254182, "grad_norm": NaN, "learning_rate": 3.5361638123595385e-06, "loss": 0.0, "step": 3036 }, { "epoch": 1.7664679366002618, "grad_norm": NaN, "learning_rate": 3.5188019031750897e-06, "loss": 0.0, "step": 3037 }, { "epoch": 1.7670495855751054, "grad_norm": NaN, "learning_rate": 3.5014811662180127e-06, "loss": 0.0, "step": 3038 }, { "epoch": 1.767631234549949, "grad_norm": NaN, "learning_rate": 3.4842016168307178e-06, "loss": 0.0, "step": 3039 }, { "epoch": 1.7682128835247928, "grad_norm": NaN, "learning_rate": 3.4669632703191234e-06, "loss": 0.0, "step": 3040 }, { "epoch": 1.7687945324996366, "grad_norm": NaN, "learning_rate": 3.449766141952654e-06, "loss": 0.0, "step": 3041 }, { "epoch": 1.7693761814744802, "grad_norm": NaN, "learning_rate": 3.4326102469642243e-06, "loss": 0.0, "step": 3042 }, { "epoch": 1.7699578304493238, "grad_norm": NaN, "learning_rate": 3.415495600550217e-06, "loss": 0.0, "step": 3043 }, { "epoch": 1.7705394794241676, "grad_norm": NaN, "learning_rate": 3.3984222178704894e-06, "loss": 0.0, "step": 3044 }, { "epoch": 1.7711211283990111, "grad_norm": NaN, "learning_rate": 3.381390114048344e-06, "loss": 0.0, "step": 3045 }, { "epoch": 1.771702777373855, "grad_norm": NaN, "learning_rate": 3.3643993041705233e-06, "loss": 0.0, "step": 3046 }, { "epoch": 1.7722844263486985, "grad_norm": NaN, "learning_rate": 3.347449803287184e-06, "loss": 0.0, "step": 3047 }, { "epoch": 1.7728660753235421, "grad_norm": NaN, "learning_rate": 3.3305416264118983e-06, "loss": 0.0, "step": 3048 }, { "epoch": 1.773447724298386, "grad_norm": NaN, "learning_rate": 3.313674788521626e-06, "loss": 0.0, "step": 3049 }, { "epoch": 1.7740293732732297, "grad_norm": NaN, "learning_rate": 3.296849304556715e-06, "loss": 0.0, "step": 3050 }, { "epoch": 1.7746110222480733, "grad_norm": NaN, "learning_rate": 3.280065189420889e-06, "loss": 0.0, "step": 3051 }, { "epoch": 1.775192671222917, "grad_norm": NaN, "learning_rate": 3.263322457981216e-06, "loss": 0.0, "step": 3052 }, { "epoch": 1.7757743201977605, "grad_norm": NaN, "learning_rate": 3.2466211250681165e-06, "loss": 0.0, "step": 3053 }, { "epoch": 1.7763559691726043, "grad_norm": NaN, "learning_rate": 3.2299612054753292e-06, "loss": 0.0, "step": 3054 }, { "epoch": 1.776937618147448, "grad_norm": NaN, "learning_rate": 3.2133427139599327e-06, "loss": 0.0, "step": 3055 }, { "epoch": 1.7775192671222917, "grad_norm": NaN, "learning_rate": 3.196765665242285e-06, "loss": 0.0, "step": 3056 }, { "epoch": 1.7781009160971353, "grad_norm": NaN, "learning_rate": 3.1802300740060466e-06, "loss": 0.0, "step": 3057 }, { "epoch": 1.778682565071979, "grad_norm": NaN, "learning_rate": 3.163735954898156e-06, "loss": 0.0, "step": 3058 }, { "epoch": 1.779264214046823, "grad_norm": NaN, "learning_rate": 3.1472833225288046e-06, "loss": 0.0, "step": 3059 }, { "epoch": 1.7798458630216665, "grad_norm": NaN, "learning_rate": 3.1308721914714513e-06, "loss": 0.0, "step": 3060 }, { "epoch": 1.78042751199651, "grad_norm": NaN, "learning_rate": 3.11450257626279e-06, "loss": 0.0, "step": 3061 }, { "epoch": 1.7810091609713536, "grad_norm": NaN, "learning_rate": 3.098174491402728e-06, "loss": 0.0, "step": 3062 }, { "epoch": 1.7815908099461975, "grad_norm": NaN, "learning_rate": 3.081887951354401e-06, "loss": 0.0, "step": 3063 }, { "epoch": 1.7821724589210413, "grad_norm": NaN, "learning_rate": 3.0656429705441413e-06, "loss": 0.0, "step": 3064 }, { "epoch": 1.7827541078958848, "grad_norm": NaN, "learning_rate": 3.049439563361467e-06, "loss": 0.0, "step": 3065 }, { "epoch": 1.7833357568707284, "grad_norm": NaN, "learning_rate": 3.0332777441590685e-06, "loss": 0.0, "step": 3066 }, { "epoch": 1.7839174058455722, "grad_norm": NaN, "learning_rate": 3.017157527252806e-06, "loss": 0.0, "step": 3067 }, { "epoch": 1.784499054820416, "grad_norm": NaN, "learning_rate": 3.00107892692168e-06, "loss": 0.0, "step": 3068 }, { "epoch": 1.7850807037952596, "grad_norm": NaN, "learning_rate": 2.98504195740783e-06, "loss": 0.0, "step": 3069 }, { "epoch": 1.7856623527701032, "grad_norm": NaN, "learning_rate": 2.9690466329165223e-06, "loss": 0.0, "step": 3070 }, { "epoch": 1.7862440017449468, "grad_norm": NaN, "learning_rate": 2.9530929676161336e-06, "loss": 0.0, "step": 3071 }, { "epoch": 1.7868256507197906, "grad_norm": NaN, "learning_rate": 2.9371809756381374e-06, "loss": 0.0, "step": 3072 }, { "epoch": 1.7874072996946344, "grad_norm": NaN, "learning_rate": 2.9213106710771043e-06, "loss": 0.0, "step": 3073 }, { "epoch": 1.787988948669478, "grad_norm": NaN, "learning_rate": 2.9054820679906726e-06, "loss": 0.0, "step": 3074 }, { "epoch": 1.7885705976443216, "grad_norm": NaN, "learning_rate": 2.8896951803995263e-06, "loss": 0.0, "step": 3075 }, { "epoch": 1.7891522466191654, "grad_norm": NaN, "learning_rate": 2.873950022287425e-06, "loss": 0.0, "step": 3076 }, { "epoch": 1.789733895594009, "grad_norm": NaN, "learning_rate": 2.858246607601145e-06, "loss": 0.0, "step": 3077 }, { "epoch": 1.7903155445688528, "grad_norm": NaN, "learning_rate": 2.842584950250504e-06, "loss": 0.0, "step": 3078 }, { "epoch": 1.7908971935436964, "grad_norm": NaN, "learning_rate": 2.826965064108311e-06, "loss": 0.0, "step": 3079 }, { "epoch": 1.79147884251854, "grad_norm": NaN, "learning_rate": 2.811386963010393e-06, "loss": 0.0, "step": 3080 }, { "epoch": 1.7920604914933838, "grad_norm": NaN, "learning_rate": 2.795850660755556e-06, "loss": 0.0, "step": 3081 }, { "epoch": 1.7926421404682276, "grad_norm": NaN, "learning_rate": 2.78035617110558e-06, "loss": 0.0, "step": 3082 }, { "epoch": 1.7932237894430711, "grad_norm": NaN, "learning_rate": 2.7649035077852213e-06, "loss": 0.0, "step": 3083 }, { "epoch": 1.7938054384179147, "grad_norm": NaN, "learning_rate": 2.749492684482169e-06, "loss": 0.0, "step": 3084 }, { "epoch": 1.7943870873927583, "grad_norm": NaN, "learning_rate": 2.734123714847059e-06, "loss": 0.0, "step": 3085 }, { "epoch": 1.7949687363676021, "grad_norm": NaN, "learning_rate": 2.718796612493463e-06, "loss": 0.0, "step": 3086 }, { "epoch": 1.795550385342446, "grad_norm": NaN, "learning_rate": 2.7035113909978426e-06, "loss": 0.0, "step": 3087 }, { "epoch": 1.7961320343172895, "grad_norm": NaN, "learning_rate": 2.688268063899596e-06, "loss": 0.0, "step": 3088 }, { "epoch": 1.796713683292133, "grad_norm": NaN, "learning_rate": 2.6730666447009833e-06, "loss": 0.0, "step": 3089 }, { "epoch": 1.797295332266977, "grad_norm": NaN, "learning_rate": 2.6579071468671503e-06, "loss": 0.0, "step": 3090 }, { "epoch": 1.7978769812418207, "grad_norm": NaN, "learning_rate": 2.6427895838261162e-06, "loss": 0.0, "step": 3091 }, { "epoch": 1.7984586302166643, "grad_norm": NaN, "learning_rate": 2.627713968968765e-06, "loss": 0.0, "step": 3092 }, { "epoch": 1.7990402791915079, "grad_norm": NaN, "learning_rate": 2.612680315648802e-06, "loss": 0.0, "step": 3093 }, { "epoch": 1.7996219281663515, "grad_norm": NaN, "learning_rate": 2.597688637182766e-06, "loss": 0.0, "step": 3094 }, { "epoch": 1.8002035771411953, "grad_norm": NaN, "learning_rate": 2.582738946850033e-06, "loss": 0.0, "step": 3095 }, { "epoch": 1.800785226116039, "grad_norm": NaN, "learning_rate": 2.5678312578927723e-06, "loss": 0.0, "step": 3096 }, { "epoch": 1.8013668750908827, "grad_norm": NaN, "learning_rate": 2.5529655835159482e-06, "loss": 0.0, "step": 3097 }, { "epoch": 1.8019485240657263, "grad_norm": NaN, "learning_rate": 2.538141936887317e-06, "loss": 0.0, "step": 3098 }, { "epoch": 1.80253017304057, "grad_norm": NaN, "learning_rate": 2.523360331137403e-06, "loss": 0.0, "step": 3099 }, { "epoch": 1.8031118220154136, "grad_norm": NaN, "learning_rate": 2.5086207793594885e-06, "loss": 0.0, "step": 3100 }, { "epoch": 1.8036934709902575, "grad_norm": NaN, "learning_rate": 2.493923294609618e-06, "loss": 0.0, "step": 3101 }, { "epoch": 1.804275119965101, "grad_norm": NaN, "learning_rate": 2.479267889906561e-06, "loss": 0.0, "step": 3102 }, { "epoch": 1.8048567689399446, "grad_norm": NaN, "learning_rate": 2.4646545782318177e-06, "loss": 0.0, "step": 3103 }, { "epoch": 1.8054384179147884, "grad_norm": NaN, "learning_rate": 2.450083372529599e-06, "loss": 0.0, "step": 3104 }, { "epoch": 1.8060200668896322, "grad_norm": NaN, "learning_rate": 2.435554285706826e-06, "loss": 0.0, "step": 3105 }, { "epoch": 1.8066017158644758, "grad_norm": NaN, "learning_rate": 2.4210673306331087e-06, "loss": 0.0, "step": 3106 }, { "epoch": 1.8071833648393194, "grad_norm": NaN, "learning_rate": 2.406622520140733e-06, "loss": 0.0, "step": 3107 }, { "epoch": 1.807765013814163, "grad_norm": NaN, "learning_rate": 2.3922198670246587e-06, "loss": 0.0, "step": 3108 }, { "epoch": 1.8083466627890068, "grad_norm": NaN, "learning_rate": 2.3778593840424967e-06, "loss": 0.0, "step": 3109 }, { "epoch": 1.8089283117638506, "grad_norm": NaN, "learning_rate": 2.363541083914528e-06, "loss": 0.0, "step": 3110 }, { "epoch": 1.8095099607386942, "grad_norm": NaN, "learning_rate": 2.3492649793236397e-06, "loss": 0.0, "step": 3111 }, { "epoch": 1.8100916097135378, "grad_norm": NaN, "learning_rate": 2.335031082915351e-06, "loss": 0.0, "step": 3112 }, { "epoch": 1.8106732586883816, "grad_norm": NaN, "learning_rate": 2.3208394072978034e-06, "loss": 0.0, "step": 3113 }, { "epoch": 1.8112549076632254, "grad_norm": NaN, "learning_rate": 2.3066899650417306e-06, "loss": 0.0, "step": 3114 }, { "epoch": 1.811836556638069, "grad_norm": NaN, "learning_rate": 2.2925827686804568e-06, "loss": 0.0, "step": 3115 }, { "epoch": 1.8124182056129126, "grad_norm": NaN, "learning_rate": 2.278517830709892e-06, "loss": 0.0, "step": 3116 }, { "epoch": 1.8129998545877561, "grad_norm": NaN, "learning_rate": 2.264495163588509e-06, "loss": 0.0, "step": 3117 }, { "epoch": 1.8135815035626, "grad_norm": NaN, "learning_rate": 2.2505147797373395e-06, "loss": 0.0, "step": 3118 }, { "epoch": 1.8141631525374438, "grad_norm": NaN, "learning_rate": 2.23657669153996e-06, "loss": 0.0, "step": 3119 }, { "epoch": 1.8147448015122873, "grad_norm": NaN, "learning_rate": 2.2226809113424895e-06, "loss": 0.0, "step": 3120 }, { "epoch": 1.815326450487131, "grad_norm": NaN, "learning_rate": 2.20882745145356e-06, "loss": 0.0, "step": 3121 }, { "epoch": 1.8159080994619747, "grad_norm": NaN, "learning_rate": 2.1950163241443277e-06, "loss": 0.0, "step": 3122 }, { "epoch": 1.8164897484368185, "grad_norm": NaN, "learning_rate": 2.1812475416484456e-06, "loss": 0.0, "step": 3123 }, { "epoch": 1.8170713974116621, "grad_norm": NaN, "learning_rate": 2.167521116162052e-06, "loss": 0.0, "step": 3124 }, { "epoch": 1.8176530463865057, "grad_norm": NaN, "learning_rate": 2.1538370598437773e-06, "loss": 0.0, "step": 3125 }, { "epoch": 1.8182346953613493, "grad_norm": NaN, "learning_rate": 2.1401953848147195e-06, "loss": 0.0, "step": 3126 }, { "epoch": 1.818816344336193, "grad_norm": NaN, "learning_rate": 2.1265961031584346e-06, "loss": 0.0, "step": 3127 }, { "epoch": 1.819397993311037, "grad_norm": NaN, "learning_rate": 2.1130392269209155e-06, "loss": 0.0, "step": 3128 }, { "epoch": 1.8199796422858805, "grad_norm": NaN, "learning_rate": 2.099524768110622e-06, "loss": 0.0, "step": 3129 }, { "epoch": 1.820561291260724, "grad_norm": NaN, "learning_rate": 2.086052738698413e-06, "loss": 0.0, "step": 3130 }, { "epoch": 1.8211429402355679, "grad_norm": NaN, "learning_rate": 2.072623150617581e-06, "loss": 0.0, "step": 3131 }, { "epoch": 1.8217245892104115, "grad_norm": NaN, "learning_rate": 2.0592360157638213e-06, "loss": 0.0, "step": 3132 }, { "epoch": 1.8223062381852553, "grad_norm": NaN, "learning_rate": 2.04589134599521e-06, "loss": 0.0, "step": 3133 }, { "epoch": 1.8228878871600989, "grad_norm": NaN, "learning_rate": 2.0325891531322305e-06, "loss": 0.0, "step": 3134 }, { "epoch": 1.8234695361349424, "grad_norm": NaN, "learning_rate": 2.0193294489577295e-06, "loss": 0.0, "step": 3135 }, { "epoch": 1.8240511851097863, "grad_norm": NaN, "learning_rate": 2.006112245216918e-06, "loss": 0.0, "step": 3136 }, { "epoch": 1.82463283408463, "grad_norm": NaN, "learning_rate": 1.9929375536173576e-06, "loss": 0.0, "step": 3137 }, { "epoch": 1.8252144830594736, "grad_norm": NaN, "learning_rate": 1.9798053858289645e-06, "loss": 0.0, "step": 3138 }, { "epoch": 1.8257961320343172, "grad_norm": NaN, "learning_rate": 1.9667157534839887e-06, "loss": 0.0, "step": 3139 }, { "epoch": 1.8263777810091608, "grad_norm": NaN, "learning_rate": 1.953668668176983e-06, "loss": 0.0, "step": 3140 }, { "epoch": 1.8269594299840046, "grad_norm": NaN, "learning_rate": 1.9406641414648374e-06, "loss": 0.0, "step": 3141 }, { "epoch": 1.8275410789588484, "grad_norm": NaN, "learning_rate": 1.9277021848667253e-06, "loss": 0.0, "step": 3142 }, { "epoch": 1.828122727933692, "grad_norm": NaN, "learning_rate": 1.9147828098641185e-06, "loss": 0.0, "step": 3143 }, { "epoch": 1.8287043769085356, "grad_norm": NaN, "learning_rate": 1.9019060279007794e-06, "loss": 0.0, "step": 3144 }, { "epoch": 1.8292860258833794, "grad_norm": NaN, "learning_rate": 1.8890718503827287e-06, "loss": 0.0, "step": 3145 }, { "epoch": 1.8298676748582232, "grad_norm": NaN, "learning_rate": 1.8762802886782504e-06, "loss": 0.0, "step": 3146 }, { "epoch": 1.8304493238330668, "grad_norm": NaN, "learning_rate": 1.8635313541178977e-06, "loss": 0.0, "step": 3147 }, { "epoch": 1.8310309728079104, "grad_norm": NaN, "learning_rate": 1.8508250579944485e-06, "loss": 0.0, "step": 3148 }, { "epoch": 1.831612621782754, "grad_norm": NaN, "learning_rate": 1.838161411562911e-06, "loss": 0.0, "step": 3149 }, { "epoch": 1.8321942707575978, "grad_norm": NaN, "learning_rate": 1.8255404260405284e-06, "loss": 0.0, "step": 3150 }, { "epoch": 1.8327759197324416, "grad_norm": NaN, "learning_rate": 1.8129621126067365e-06, "loss": 0.0, "step": 3151 }, { "epoch": 1.8333575687072852, "grad_norm": NaN, "learning_rate": 1.8004264824031948e-06, "loss": 0.0, "step": 3152 }, { "epoch": 1.8339392176821288, "grad_norm": NaN, "learning_rate": 1.7879335465337377e-06, "loss": 0.0, "step": 3153 }, { "epoch": 1.8345208666569726, "grad_norm": NaN, "learning_rate": 1.7754833160643858e-06, "loss": 0.0, "step": 3154 }, { "epoch": 1.8351025156318161, "grad_norm": NaN, "learning_rate": 1.7630758020233396e-06, "loss": 0.0, "step": 3155 }, { "epoch": 1.83568416460666, "grad_norm": NaN, "learning_rate": 1.750711015400952e-06, "loss": 0.0, "step": 3156 }, { "epoch": 1.8362658135815035, "grad_norm": NaN, "learning_rate": 1.73838896714974e-06, "loss": 0.0, "step": 3157 }, { "epoch": 1.8368474625563471, "grad_norm": NaN, "learning_rate": 1.7261096681843559e-06, "loss": 0.0, "step": 3158 }, { "epoch": 1.837429111531191, "grad_norm": NaN, "learning_rate": 1.7138731293815825e-06, "loss": 0.0, "step": 3159 }, { "epoch": 1.8380107605060347, "grad_norm": NaN, "learning_rate": 1.701679361580333e-06, "loss": 0.0, "step": 3160 }, { "epoch": 1.8385924094808783, "grad_norm": NaN, "learning_rate": 1.6895283755816283e-06, "loss": 0.0, "step": 3161 }, { "epoch": 1.839174058455722, "grad_norm": NaN, "learning_rate": 1.6774201821486034e-06, "loss": 0.0, "step": 3162 }, { "epoch": 1.8397557074305655, "grad_norm": NaN, "learning_rate": 1.665354792006485e-06, "loss": 0.0, "step": 3163 }, { "epoch": 1.8403373564054093, "grad_norm": NaN, "learning_rate": 1.6533322158425735e-06, "loss": 0.0, "step": 3164 }, { "epoch": 1.840919005380253, "grad_norm": NaN, "learning_rate": 1.6413524643062562e-06, "loss": 0.0, "step": 3165 }, { "epoch": 1.8415006543550967, "grad_norm": NaN, "learning_rate": 1.629415548009e-06, "loss": 0.0, "step": 3166 }, { "epoch": 1.8420823033299403, "grad_norm": NaN, "learning_rate": 1.617521477524303e-06, "loss": 0.0, "step": 3167 }, { "epoch": 1.842663952304784, "grad_norm": NaN, "learning_rate": 1.6056702633877318e-06, "loss": 0.0, "step": 3168 }, { "epoch": 1.8432456012796279, "grad_norm": NaN, "learning_rate": 1.5938619160968726e-06, "loss": 0.0, "step": 3169 }, { "epoch": 1.8438272502544715, "grad_norm": NaN, "learning_rate": 1.5820964461113642e-06, "loss": 0.0, "step": 3170 }, { "epoch": 1.844408899229315, "grad_norm": NaN, "learning_rate": 1.570373863852842e-06, "loss": 0.0, "step": 3171 }, { "epoch": 1.8449905482041586, "grad_norm": NaN, "learning_rate": 1.558694179704967e-06, "loss": 0.0, "step": 3172 }, { "epoch": 1.8455721971790024, "grad_norm": NaN, "learning_rate": 1.5470574040134022e-06, "loss": 0.0, "step": 3173 }, { "epoch": 1.8461538461538463, "grad_norm": NaN, "learning_rate": 1.535463547085786e-06, "loss": 0.0, "step": 3174 }, { "epoch": 1.8467354951286898, "grad_norm": NaN, "learning_rate": 1.523912619191764e-06, "loss": 0.0, "step": 3175 }, { "epoch": 1.8473171441035334, "grad_norm": NaN, "learning_rate": 1.512404630562947e-06, "loss": 0.0, "step": 3176 }, { "epoch": 1.8478987930783772, "grad_norm": NaN, "learning_rate": 1.5009395913929024e-06, "loss": 0.0, "step": 3177 }, { "epoch": 1.848480442053221, "grad_norm": NaN, "learning_rate": 1.4895175118371629e-06, "loss": 0.0, "step": 3178 }, { "epoch": 1.8490620910280646, "grad_norm": NaN, "learning_rate": 1.4781384020132072e-06, "loss": 0.0, "step": 3179 }, { "epoch": 1.8496437400029082, "grad_norm": NaN, "learning_rate": 1.4668022720004448e-06, "loss": 0.0, "step": 3180 }, { "epoch": 1.8502253889777518, "grad_norm": NaN, "learning_rate": 1.4555091318402269e-06, "loss": 0.0, "step": 3181 }, { "epoch": 1.8508070379525956, "grad_norm": NaN, "learning_rate": 1.4442589915358128e-06, "loss": 0.0, "step": 3182 }, { "epoch": 1.8513886869274394, "grad_norm": NaN, "learning_rate": 1.4330518610523812e-06, "loss": 0.0, "step": 3183 }, { "epoch": 1.851970335902283, "grad_norm": NaN, "learning_rate": 1.4218877503170024e-06, "loss": 0.0, "step": 3184 }, { "epoch": 1.8525519848771266, "grad_norm": NaN, "learning_rate": 1.410766669218666e-06, "loss": 0.0, "step": 3185 }, { "epoch": 1.8531336338519704, "grad_norm": NaN, "learning_rate": 1.3996886276082254e-06, "loss": 0.0, "step": 3186 }, { "epoch": 1.853715282826814, "grad_norm": NaN, "learning_rate": 1.3886536352984036e-06, "loss": 0.0, "step": 3187 }, { "epoch": 1.8542969318016578, "grad_norm": NaN, "learning_rate": 1.377661702063815e-06, "loss": 0.0, "step": 3188 }, { "epoch": 1.8548785807765014, "grad_norm": NaN, "learning_rate": 1.3667128376409156e-06, "loss": 0.0, "step": 3189 }, { "epoch": 1.855460229751345, "grad_norm": NaN, "learning_rate": 1.355807051728014e-06, "loss": 0.0, "step": 3190 }, { "epoch": 1.8560418787261888, "grad_norm": NaN, "learning_rate": 1.3449443539852724e-06, "loss": 0.0, "step": 3191 }, { "epoch": 1.8566235277010326, "grad_norm": NaN, "learning_rate": 1.3341247540346657e-06, "loss": 0.0, "step": 3192 }, { "epoch": 1.8572051766758761, "grad_norm": NaN, "learning_rate": 1.3233482614600113e-06, "loss": 0.0, "step": 3193 }, { "epoch": 1.8577868256507197, "grad_norm": NaN, "learning_rate": 1.3126148858069342e-06, "loss": 0.0, "step": 3194 }, { "epoch": 1.8583684746255633, "grad_norm": NaN, "learning_rate": 1.3019246365828796e-06, "loss": 0.0, "step": 3195 }, { "epoch": 1.8589501236004071, "grad_norm": NaN, "learning_rate": 1.2912775232570672e-06, "loss": 0.0, "step": 3196 }, { "epoch": 1.859531772575251, "grad_norm": NaN, "learning_rate": 1.2806735552605365e-06, "loss": 0.0, "step": 3197 }, { "epoch": 1.8601134215500945, "grad_norm": NaN, "learning_rate": 1.2701127419860847e-06, "loss": 0.0, "step": 3198 }, { "epoch": 1.860695070524938, "grad_norm": NaN, "learning_rate": 1.2595950927883015e-06, "loss": 0.0, "step": 3199 }, { "epoch": 1.861276719499782, "grad_norm": NaN, "learning_rate": 1.249120616983529e-06, "loss": 0.0, "step": 3200 }, { "epoch": 1.8618583684746257, "grad_norm": NaN, "learning_rate": 1.2386893238498788e-06, "loss": 0.0, "step": 3201 }, { "epoch": 1.8624400174494693, "grad_norm": NaN, "learning_rate": 1.228301222627204e-06, "loss": 0.0, "step": 3202 }, { "epoch": 1.8630216664243129, "grad_norm": NaN, "learning_rate": 1.2179563225170998e-06, "loss": 0.0, "step": 3203 }, { "epoch": 1.8636033153991565, "grad_norm": NaN, "learning_rate": 1.2076546326829085e-06, "loss": 0.0, "step": 3204 }, { "epoch": 1.8641849643740003, "grad_norm": NaN, "learning_rate": 1.1973961622496755e-06, "loss": 0.0, "step": 3205 }, { "epoch": 1.864766613348844, "grad_norm": NaN, "learning_rate": 1.1871809203041707e-06, "loss": 0.0, "step": 3206 }, { "epoch": 1.8653482623236877, "grad_norm": NaN, "learning_rate": 1.177008915894884e-06, "loss": 0.0, "step": 3207 }, { "epoch": 1.8659299112985313, "grad_norm": NaN, "learning_rate": 1.166880158031991e-06, "loss": 0.0, "step": 3208 }, { "epoch": 1.866511560273375, "grad_norm": NaN, "learning_rate": 1.156794655687371e-06, "loss": 0.0, "step": 3209 }, { "epoch": 1.8670932092482189, "grad_norm": NaN, "learning_rate": 1.1467524177945832e-06, "loss": 0.0, "step": 3210 }, { "epoch": 1.8676748582230625, "grad_norm": NaN, "learning_rate": 1.1367534532488677e-06, "loss": 0.0, "step": 3211 }, { "epoch": 1.868256507197906, "grad_norm": NaN, "learning_rate": 1.1267977709071288e-06, "loss": 0.0, "step": 3212 }, { "epoch": 1.8688381561727496, "grad_norm": NaN, "learning_rate": 1.116885379587934e-06, "loss": 0.0, "step": 3213 }, { "epoch": 1.8694198051475934, "grad_norm": NaN, "learning_rate": 1.1070162880715052e-06, "loss": 0.0, "step": 3214 }, { "epoch": 1.8700014541224372, "grad_norm": NaN, "learning_rate": 1.0971905050997155e-06, "loss": 0.0, "step": 3215 }, { "epoch": 1.8705831030972808, "grad_norm": NaN, "learning_rate": 1.0874080393760644e-06, "loss": 0.0, "step": 3216 }, { "epoch": 1.8711647520721244, "grad_norm": NaN, "learning_rate": 1.077668899565687e-06, "loss": 0.0, "step": 3217 }, { "epoch": 1.871746401046968, "grad_norm": NaN, "learning_rate": 1.0679730942953492e-06, "loss": 0.0, "step": 3218 }, { "epoch": 1.8723280500218118, "grad_norm": NaN, "learning_rate": 1.05832063215342e-06, "loss": 0.0, "step": 3219 }, { "epoch": 1.8729096989966556, "grad_norm": NaN, "learning_rate": 1.0487115216898768e-06, "loss": 0.0, "step": 3220 }, { "epoch": 1.8734913479714992, "grad_norm": NaN, "learning_rate": 1.0391457714163055e-06, "loss": 0.0, "step": 3221 }, { "epoch": 1.8740729969463428, "grad_norm": NaN, "learning_rate": 1.0296233898058783e-06, "loss": 0.0, "step": 3222 }, { "epoch": 1.8746546459211866, "grad_norm": NaN, "learning_rate": 1.0201443852933535e-06, "loss": 0.0, "step": 3223 }, { "epoch": 1.8752362948960304, "grad_norm": NaN, "learning_rate": 1.0107087662750703e-06, "loss": 0.0, "step": 3224 }, { "epoch": 1.875817943870874, "grad_norm": NaN, "learning_rate": 1.0013165411089266e-06, "loss": 0.0, "step": 3225 }, { "epoch": 1.8763995928457176, "grad_norm": NaN, "learning_rate": 9.91967718114395e-07, "loss": 0.0, "step": 3226 }, { "epoch": 1.8769812418205611, "grad_norm": NaN, "learning_rate": 9.826623055724959e-07, "loss": 0.0, "step": 3227 }, { "epoch": 1.877562890795405, "grad_norm": NaN, "learning_rate": 9.734003117257974e-07, "loss": 0.0, "step": 3228 }, { "epoch": 1.8781445397702488, "grad_norm": NaN, "learning_rate": 9.641817447784195e-07, "loss": 0.0, "step": 3229 }, { "epoch": 1.8787261887450923, "grad_norm": NaN, "learning_rate": 9.550066128959978e-07, "loss": 0.0, "step": 3230 }, { "epoch": 1.879307837719936, "grad_norm": NaN, "learning_rate": 9.458749242057085e-07, "loss": 0.0, "step": 3231 }, { "epoch": 1.8798894866947797, "grad_norm": NaN, "learning_rate": 9.367866867962427e-07, "loss": 0.0, "step": 3232 }, { "epoch": 1.8804711356696235, "grad_norm": NaN, "learning_rate": 9.277419087177885e-07, "loss": 0.0, "step": 3233 }, { "epoch": 1.8810527846444671, "grad_norm": NaN, "learning_rate": 9.187405979820651e-07, "loss": 0.0, "step": 3234 }, { "epoch": 1.8816344336193107, "grad_norm": NaN, "learning_rate": 9.097827625622723e-07, "loss": 0.0, "step": 3235 }, { "epoch": 1.8822160825941543, "grad_norm": NaN, "learning_rate": 9.008684103930964e-07, "loss": 0.0, "step": 3236 }, { "epoch": 1.882797731568998, "grad_norm": NaN, "learning_rate": 8.919975493707211e-07, "loss": 0.0, "step": 3237 }, { "epoch": 1.883379380543842, "grad_norm": NaN, "learning_rate": 8.831701873527942e-07, "loss": 0.0, "step": 3238 }, { "epoch": 1.8839610295186855, "grad_norm": NaN, "learning_rate": 8.743863321584334e-07, "loss": 0.0, "step": 3239 }, { "epoch": 1.884542678493529, "grad_norm": NaN, "learning_rate": 8.656459915682369e-07, "loss": 0.0, "step": 3240 }, { "epoch": 1.8851243274683729, "grad_norm": NaN, "learning_rate": 8.569491733242341e-07, "loss": 0.0, "step": 3241 }, { "epoch": 1.8857059764432165, "grad_norm": NaN, "learning_rate": 8.482958851299127e-07, "loss": 0.0, "step": 3242 }, { "epoch": 1.8862876254180603, "grad_norm": NaN, "learning_rate": 8.396861346502138e-07, "loss": 0.0, "step": 3243 }, { "epoch": 1.8868692743929039, "grad_norm": NaN, "learning_rate": 8.311199295114924e-07, "loss": 0.0, "step": 3244 }, { "epoch": 1.8874509233677474, "grad_norm": NaN, "learning_rate": 8.225972773015456e-07, "loss": 0.0, "step": 3245 }, { "epoch": 1.8880325723425913, "grad_norm": NaN, "learning_rate": 8.141181855695845e-07, "loss": 0.0, "step": 3246 }, { "epoch": 1.888614221317435, "grad_norm": NaN, "learning_rate": 8.056826618262459e-07, "loss": 0.0, "step": 3247 }, { "epoch": 1.8891958702922786, "grad_norm": NaN, "learning_rate": 7.972907135435637e-07, "loss": 0.0, "step": 3248 }, { "epoch": 1.8897775192671222, "grad_norm": NaN, "learning_rate": 7.889423481549862e-07, "loss": 0.0, "step": 3249 }, { "epoch": 1.8903591682419658, "grad_norm": NaN, "learning_rate": 7.806375730553372e-07, "loss": 0.0, "step": 3250 }, { "epoch": 1.8909408172168096, "grad_norm": NaN, "learning_rate": 7.72376395600849e-07, "loss": 0.0, "step": 3251 }, { "epoch": 1.8915224661916534, "grad_norm": NaN, "learning_rate": 7.641588231091234e-07, "loss": 0.0, "step": 3252 }, { "epoch": 1.892104115166497, "grad_norm": NaN, "learning_rate": 7.559848628591437e-07, "loss": 0.0, "step": 3253 }, { "epoch": 1.8926857641413406, "grad_norm": NaN, "learning_rate": 7.47854522091257e-07, "loss": 0.0, "step": 3254 }, { "epoch": 1.8932674131161844, "grad_norm": NaN, "learning_rate": 7.397678080071746e-07, "loss": 0.0, "step": 3255 }, { "epoch": 1.8938490620910282, "grad_norm": NaN, "learning_rate": 7.317247277699724e-07, "loss": 0.0, "step": 3256 }, { "epoch": 1.8944307110658718, "grad_norm": NaN, "learning_rate": 7.237252885040624e-07, "loss": 0.0, "step": 3257 }, { "epoch": 1.8950123600407154, "grad_norm": NaN, "learning_rate": 7.157694972952045e-07, "loss": 0.0, "step": 3258 }, { "epoch": 1.895594009015559, "grad_norm": NaN, "learning_rate": 7.078573611905115e-07, "loss": 0.0, "step": 3259 }, { "epoch": 1.8961756579904028, "grad_norm": NaN, "learning_rate": 6.999888871984106e-07, "loss": 0.0, "step": 3260 }, { "epoch": 1.8967573069652466, "grad_norm": NaN, "learning_rate": 6.921640822886488e-07, "loss": 0.0, "step": 3261 }, { "epoch": 1.8973389559400902, "grad_norm": NaN, "learning_rate": 6.843829533923041e-07, "loss": 0.0, "step": 3262 }, { "epoch": 1.8979206049149338, "grad_norm": NaN, "learning_rate": 6.766455074017686e-07, "loss": 0.0, "step": 3263 }, { "epoch": 1.8985022538897776, "grad_norm": NaN, "learning_rate": 6.689517511707322e-07, "loss": 0.0, "step": 3264 }, { "epoch": 1.8990839028646214, "grad_norm": NaN, "learning_rate": 6.613016915141879e-07, "loss": 0.0, "step": 3265 }, { "epoch": 1.899665551839465, "grad_norm": NaN, "learning_rate": 6.536953352084263e-07, "loss": 0.0, "step": 3266 }, { "epoch": 1.9002472008143085, "grad_norm": NaN, "learning_rate": 6.461326889910191e-07, "loss": 0.0, "step": 3267 }, { "epoch": 1.9008288497891521, "grad_norm": NaN, "learning_rate": 6.386137595608299e-07, "loss": 0.0, "step": 3268 }, { "epoch": 1.901410498763996, "grad_norm": NaN, "learning_rate": 6.311385535779979e-07, "loss": 0.0, "step": 3269 }, { "epoch": 1.9019921477388397, "grad_norm": NaN, "learning_rate": 6.237070776639209e-07, "loss": 0.0, "step": 3270 }, { "epoch": 1.9025737967136833, "grad_norm": NaN, "learning_rate": 6.163193384012722e-07, "loss": 0.0, "step": 3271 }, { "epoch": 1.903155445688527, "grad_norm": NaN, "learning_rate": 6.089753423339839e-07, "loss": 0.0, "step": 3272 }, { "epoch": 1.9037370946633707, "grad_norm": NaN, "learning_rate": 6.016750959672357e-07, "loss": 0.0, "step": 3273 }, { "epoch": 1.9043187436382143, "grad_norm": NaN, "learning_rate": 5.944186057674606e-07, "loss": 0.0, "step": 3274 }, { "epoch": 1.904900392613058, "grad_norm": NaN, "learning_rate": 5.872058781623224e-07, "loss": 0.0, "step": 3275 }, { "epoch": 1.9054820415879017, "grad_norm": NaN, "learning_rate": 5.80036919540733e-07, "loss": 0.0, "step": 3276 }, { "epoch": 1.9060636905627453, "grad_norm": NaN, "learning_rate": 5.729117362528346e-07, "loss": 0.0, "step": 3277 }, { "epoch": 1.906645339537589, "grad_norm": NaN, "learning_rate": 5.658303346099847e-07, "loss": 0.0, "step": 3278 }, { "epoch": 1.9072269885124329, "grad_norm": NaN, "learning_rate": 5.587927208847599e-07, "loss": 0.0, "step": 3279 }, { "epoch": 1.9078086374872765, "grad_norm": NaN, "learning_rate": 5.517989013109626e-07, "loss": 0.0, "step": 3280 }, { "epoch": 1.90839028646212, "grad_norm": NaN, "learning_rate": 5.448488820835929e-07, "loss": 0.0, "step": 3281 }, { "epoch": 1.9089719354369636, "grad_norm": NaN, "learning_rate": 5.379426693588485e-07, "loss": 0.0, "step": 3282 }, { "epoch": 1.9095535844118074, "grad_norm": NaN, "learning_rate": 5.310802692541416e-07, "loss": 0.0, "step": 3283 }, { "epoch": 1.9101352333866513, "grad_norm": NaN, "learning_rate": 5.242616878480599e-07, "loss": 0.0, "step": 3284 }, { "epoch": 1.9107168823614948, "grad_norm": NaN, "learning_rate": 5.174869311803831e-07, "loss": 0.0, "step": 3285 }, { "epoch": 1.9112985313363384, "grad_norm": NaN, "learning_rate": 5.107560052520665e-07, "loss": 0.0, "step": 3286 }, { "epoch": 1.9118801803111822, "grad_norm": NaN, "learning_rate": 5.040689160252576e-07, "loss": 0.0, "step": 3287 }, { "epoch": 1.912461829286026, "grad_norm": NaN, "learning_rate": 4.974256694232515e-07, "loss": 0.0, "step": 3288 }, { "epoch": 1.9130434782608696, "grad_norm": NaN, "learning_rate": 4.908262713305301e-07, "loss": 0.0, "step": 3289 }, { "epoch": 1.9136251272357132, "grad_norm": NaN, "learning_rate": 4.842707275927116e-07, "loss": 0.0, "step": 3290 }, { "epoch": 1.9142067762105568, "grad_norm": NaN, "learning_rate": 4.777590440165902e-07, "loss": 0.0, "step": 3291 }, { "epoch": 1.9147884251854006, "grad_norm": NaN, "learning_rate": 4.712912263701019e-07, "loss": 0.0, "step": 3292 }, { "epoch": 1.9153700741602444, "grad_norm": NaN, "learning_rate": 4.6486728038231397e-07, "loss": 0.0, "step": 3293 }, { "epoch": 1.915951723135088, "grad_norm": NaN, "learning_rate": 4.58487211743458e-07, "loss": 0.0, "step": 3294 }, { "epoch": 1.9165333721099316, "grad_norm": NaN, "learning_rate": 4.521510261048745e-07, "loss": 0.0, "step": 3295 }, { "epoch": 1.9171150210847754, "grad_norm": NaN, "learning_rate": 4.4585872907905723e-07, "loss": 0.0, "step": 3296 }, { "epoch": 1.917696670059619, "grad_norm": NaN, "learning_rate": 4.396103262396034e-07, "loss": 0.0, "step": 3297 }, { "epoch": 1.9182783190344628, "grad_norm": NaN, "learning_rate": 4.3340582312124123e-07, "loss": 0.0, "step": 3298 }, { "epoch": 1.9188599680093064, "grad_norm": NaN, "learning_rate": 4.2724522521981337e-07, "loss": 0.0, "step": 3299 }, { "epoch": 1.91944161698415, "grad_norm": NaN, "learning_rate": 4.211285379922658e-07, "loss": 0.0, "step": 3300 }, { "epoch": 1.9200232659589938, "grad_norm": NaN, "learning_rate": 4.150557668566535e-07, "loss": 0.0, "step": 3301 }, { "epoch": 1.9206049149338376, "grad_norm": NaN, "learning_rate": 4.0902691719212903e-07, "loss": 0.0, "step": 3302 }, { "epoch": 1.9211865639086811, "grad_norm": NaN, "learning_rate": 4.0304199433894296e-07, "loss": 0.0, "step": 3303 }, { "epoch": 1.9217682128835247, "grad_norm": NaN, "learning_rate": 3.9710100359842683e-07, "loss": 0.0, "step": 3304 }, { "epoch": 1.9223498618583683, "grad_norm": NaN, "learning_rate": 3.9120395023302114e-07, "loss": 0.0, "step": 3305 }, { "epoch": 1.9229315108332121, "grad_norm": NaN, "learning_rate": 3.8535083946621976e-07, "loss": 0.0, "step": 3306 }, { "epoch": 1.923513159808056, "grad_norm": NaN, "learning_rate": 3.7954167648260874e-07, "loss": 0.0, "step": 3307 }, { "epoch": 1.9240948087828995, "grad_norm": NaN, "learning_rate": 3.737764664278443e-07, "loss": 0.0, "step": 3308 }, { "epoch": 1.924676457757743, "grad_norm": NaN, "learning_rate": 3.680552144086469e-07, "loss": 0.0, "step": 3309 }, { "epoch": 1.925258106732587, "grad_norm": NaN, "learning_rate": 3.6237792549279613e-07, "loss": 0.0, "step": 3310 }, { "epoch": 1.9258397557074307, "grad_norm": NaN, "learning_rate": 3.567446047091416e-07, "loss": 0.0, "step": 3311 }, { "epoch": 1.9264214046822743, "grad_norm": NaN, "learning_rate": 3.511552570475807e-07, "loss": 0.0, "step": 3312 }, { "epoch": 1.9270030536571179, "grad_norm": NaN, "learning_rate": 3.4560988745904744e-07, "loss": 0.0, "step": 3313 }, { "epoch": 1.9275847026319615, "grad_norm": NaN, "learning_rate": 3.40108500855546e-07, "loss": 0.0, "step": 3314 }, { "epoch": 1.9281663516068053, "grad_norm": NaN, "learning_rate": 3.3465110211010065e-07, "loss": 0.0, "step": 3315 }, { "epoch": 1.928748000581649, "grad_norm": NaN, "learning_rate": 3.2923769605678335e-07, "loss": 0.0, "step": 3316 }, { "epoch": 1.9293296495564927, "grad_norm": NaN, "learning_rate": 3.238682874906918e-07, "loss": 0.0, "step": 3317 }, { "epoch": 1.9299112985313363, "grad_norm": NaN, "learning_rate": 3.185428811679492e-07, "loss": 0.0, "step": 3318 }, { "epoch": 1.93049294750618, "grad_norm": NaN, "learning_rate": 3.132614818057156e-07, "loss": 0.0, "step": 3319 }, { "epoch": 1.9310745964810239, "grad_norm": NaN, "learning_rate": 3.0802409408215436e-07, "loss": 0.0, "step": 3320 }, { "epoch": 1.9316562454558674, "grad_norm": NaN, "learning_rate": 3.028307226364546e-07, "loss": 0.0, "step": 3321 }, { "epoch": 1.932237894430711, "grad_norm": NaN, "learning_rate": 2.976813720688143e-07, "loss": 0.0, "step": 3322 }, { "epoch": 1.9328195434055546, "grad_norm": NaN, "learning_rate": 2.925760469404293e-07, "loss": 0.0, "step": 3323 }, { "epoch": 1.9334011923803984, "grad_norm": NaN, "learning_rate": 2.8751475177352106e-07, "loss": 0.0, "step": 3324 }, { "epoch": 1.9339828413552422, "grad_norm": NaN, "learning_rate": 2.8249749105128673e-07, "loss": 0.0, "step": 3325 }, { "epoch": 1.9345644903300858, "grad_norm": NaN, "learning_rate": 2.7752426921792695e-07, "loss": 0.0, "step": 3326 }, { "epoch": 1.9351461393049294, "grad_norm": NaN, "learning_rate": 2.7259509067863456e-07, "loss": 0.0, "step": 3327 }, { "epoch": 1.9357277882797732, "grad_norm": NaN, "learning_rate": 2.6770995979959467e-07, "loss": 0.0, "step": 3328 }, { "epoch": 1.9363094372546168, "grad_norm": NaN, "learning_rate": 2.628688809079682e-07, "loss": 0.0, "step": 3329 }, { "epoch": 1.9368910862294606, "grad_norm": NaN, "learning_rate": 2.5807185829189173e-07, "loss": 0.0, "step": 3330 }, { "epoch": 1.9374727352043042, "grad_norm": NaN, "learning_rate": 2.533188962004884e-07, "loss": 0.0, "step": 3331 }, { "epoch": 1.9380543841791478, "grad_norm": NaN, "learning_rate": 2.486099988438462e-07, "loss": 0.0, "step": 3332 }, { "epoch": 1.9386360331539916, "grad_norm": NaN, "learning_rate": 2.4394517039302844e-07, "loss": 0.0, "step": 3333 }, { "epoch": 1.9392176821288354, "grad_norm": NaN, "learning_rate": 2.3932441498005775e-07, "loss": 0.0, "step": 3334 }, { "epoch": 1.939799331103679, "grad_norm": NaN, "learning_rate": 2.3474773669791007e-07, "loss": 0.0, "step": 3335 }, { "epoch": 1.9403809800785226, "grad_norm": NaN, "learning_rate": 2.3021513960053143e-07, "loss": 0.0, "step": 3336 }, { "epoch": 1.9409626290533661, "grad_norm": NaN, "learning_rate": 2.2572662770281584e-07, "loss": 0.0, "step": 3337 }, { "epoch": 1.94154427802821, "grad_norm": NaN, "learning_rate": 2.2128220498061069e-07, "loss": 0.0, "step": 3338 }, { "epoch": 1.9421259270030538, "grad_norm": NaN, "learning_rate": 2.1688187537070581e-07, "loss": 0.0, "step": 3339 }, { "epoch": 1.9427075759778973, "grad_norm": NaN, "learning_rate": 2.125256427708333e-07, "loss": 0.0, "step": 3340 }, { "epoch": 1.943289224952741, "grad_norm": NaN, "learning_rate": 2.0821351103966768e-07, "loss": 0.0, "step": 3341 }, { "epoch": 1.9438708739275847, "grad_norm": NaN, "learning_rate": 2.0394548399682024e-07, "loss": 0.0, "step": 3342 }, { "epoch": 1.9444525229024285, "grad_norm": NaN, "learning_rate": 1.9972156542283904e-07, "loss": 0.0, "step": 3343 }, { "epoch": 1.9450341718772721, "grad_norm": NaN, "learning_rate": 1.9554175905919237e-07, "loss": 0.0, "step": 3344 }, { "epoch": 1.9456158208521157, "grad_norm": NaN, "learning_rate": 1.9140606860827969e-07, "loss": 0.0, "step": 3345 }, { "epoch": 1.9461974698269593, "grad_norm": NaN, "learning_rate": 1.8731449773342625e-07, "loss": 0.0, "step": 3346 }, { "epoch": 1.946779118801803, "grad_norm": NaN, "learning_rate": 1.8326705005886624e-07, "loss": 0.0, "step": 3347 }, { "epoch": 1.947360767776647, "grad_norm": NaN, "learning_rate": 1.7926372916975965e-07, "loss": 0.0, "step": 3348 }, { "epoch": 1.9479424167514905, "grad_norm": NaN, "learning_rate": 1.7530453861218098e-07, "loss": 0.0, "step": 3349 }, { "epoch": 1.948524065726334, "grad_norm": NaN, "learning_rate": 1.7138948189311387e-07, "loss": 0.0, "step": 3350 }, { "epoch": 1.9491057147011779, "grad_norm": NaN, "learning_rate": 1.6751856248043985e-07, "loss": 0.0, "step": 3351 }, { "epoch": 1.9496873636760215, "grad_norm": NaN, "learning_rate": 1.6369178380294948e-07, "loss": 0.0, "step": 3352 }, { "epoch": 1.9502690126508653, "grad_norm": NaN, "learning_rate": 1.5990914925034794e-07, "loss": 0.0, "step": 3353 }, { "epoch": 1.9508506616257089, "grad_norm": NaN, "learning_rate": 1.561706621732162e-07, "loss": 0.0, "step": 3354 }, { "epoch": 1.9514323106005524, "grad_norm": NaN, "learning_rate": 1.5247632588304417e-07, "loss": 0.0, "step": 3355 }, { "epoch": 1.9520139595753963, "grad_norm": NaN, "learning_rate": 1.488261436522087e-07, "loss": 0.0, "step": 3356 }, { "epoch": 1.95259560855024, "grad_norm": NaN, "learning_rate": 1.45220118713979e-07, "loss": 0.0, "step": 3357 }, { "epoch": 1.9531772575250836, "grad_norm": NaN, "learning_rate": 1.4165825426250557e-07, "loss": 0.0, "step": 3358 }, { "epoch": 1.9537589064999272, "grad_norm": NaN, "learning_rate": 1.3814055345282573e-07, "loss": 0.0, "step": 3359 }, { "epoch": 1.9543405554747708, "grad_norm": NaN, "learning_rate": 1.3466701940085259e-07, "loss": 0.0, "step": 3360 }, { "epoch": 1.9549222044496146, "grad_norm": NaN, "learning_rate": 1.3123765518339716e-07, "loss": 0.0, "step": 3361 }, { "epoch": 1.9555038534244584, "grad_norm": NaN, "learning_rate": 1.2785246383811288e-07, "loss": 0.0, "step": 3362 }, { "epoch": 1.956085502399302, "grad_norm": NaN, "learning_rate": 1.2451144836355123e-07, "loss": 0.0, "step": 3363 }, { "epoch": 1.9566671513741456, "grad_norm": NaN, "learning_rate": 1.2121461171912262e-07, "loss": 0.0, "step": 3364 }, { "epoch": 1.9572488003489894, "grad_norm": NaN, "learning_rate": 1.179619568251078e-07, "loss": 0.0, "step": 3365 }, { "epoch": 1.9578304493238332, "grad_norm": NaN, "learning_rate": 1.147534865626465e-07, "loss": 0.0, "step": 3366 }, { "epoch": 1.9584120982986768, "grad_norm": NaN, "learning_rate": 1.1158920377375426e-07, "loss": 0.0, "step": 3367 }, { "epoch": 1.9589937472735204, "grad_norm": NaN, "learning_rate": 1.08469111261289e-07, "loss": 0.0, "step": 3368 }, { "epoch": 1.959575396248364, "grad_norm": NaN, "learning_rate": 1.0539321178897888e-07, "loss": 0.0, "step": 3369 }, { "epoch": 1.9601570452232078, "grad_norm": NaN, "learning_rate": 1.0236150808139999e-07, "loss": 0.0, "step": 3370 }, { "epoch": 1.9607386941980516, "grad_norm": NaN, "learning_rate": 9.937400282398201e-08, "loss": 0.0, "step": 3371 }, { "epoch": 1.9613203431728952, "grad_norm": NaN, "learning_rate": 9.643069866300259e-08, "loss": 0.0, "step": 3372 }, { "epoch": 1.9619019921477387, "grad_norm": NaN, "learning_rate": 9.353159820559287e-08, "loss": 0.0, "step": 3373 }, { "epoch": 1.9624836411225826, "grad_norm": NaN, "learning_rate": 9.067670401972095e-08, "loss": 0.0, "step": 3374 }, { "epoch": 1.9630652900974264, "grad_norm": NaN, "learning_rate": 8.786601863420286e-08, "loss": 0.0, "step": 3375 }, { "epoch": 1.96364693907227, "grad_norm": NaN, "learning_rate": 8.509954453869152e-08, "loss": 0.0, "step": 3376 }, { "epoch": 1.9642285880471135, "grad_norm": NaN, "learning_rate": 8.23772841836934e-08, "loss": 0.0, "step": 3377 }, { "epoch": 1.9648102370219571, "grad_norm": NaN, "learning_rate": 7.96992399805241e-08, "loss": 0.0, "step": 3378 }, { "epoch": 1.965391885996801, "grad_norm": NaN, "learning_rate": 7.706541430135273e-08, "loss": 0.0, "step": 3379 }, { "epoch": 1.9659735349716447, "grad_norm": NaN, "learning_rate": 7.447580947917975e-08, "loss": 0.0, "step": 3380 }, { "epoch": 1.9665551839464883, "grad_norm": NaN, "learning_rate": 7.193042780782588e-08, "loss": 0.0, "step": 3381 }, { "epoch": 1.967136832921332, "grad_norm": NaN, "learning_rate": 6.942927154194867e-08, "loss": 0.0, "step": 3382 }, { "epoch": 1.9677184818961757, "grad_norm": NaN, "learning_rate": 6.697234289703147e-08, "loss": 0.0, "step": 3383 }, { "epoch": 1.9683001308710193, "grad_norm": NaN, "learning_rate": 6.455964404937232e-08, "loss": 0.0, "step": 3384 }, { "epoch": 1.968881779845863, "grad_norm": NaN, "learning_rate": 6.219117713610056e-08, "loss": 0.0, "step": 3385 }, { "epoch": 1.9694634288207067, "grad_norm": NaN, "learning_rate": 5.986694425516026e-08, "loss": 0.0, "step": 3386 }, { "epoch": 1.9700450777955503, "grad_norm": NaN, "learning_rate": 5.7586947465315675e-08, "loss": 0.0, "step": 3387 }, { "epoch": 1.970626726770394, "grad_norm": NaN, "learning_rate": 5.535118878615131e-08, "loss": 0.0, "step": 3388 }, { "epoch": 1.9712083757452379, "grad_norm": NaN, "learning_rate": 5.315967019806078e-08, "loss": 0.0, "step": 3389 }, { "epoch": 1.9717900247200815, "grad_norm": NaN, "learning_rate": 5.101239364225796e-08, "loss": 0.0, "step": 3390 }, { "epoch": 1.972371673694925, "grad_norm": NaN, "learning_rate": 4.890936102075472e-08, "loss": 0.0, "step": 3391 }, { "epoch": 1.9729533226697686, "grad_norm": NaN, "learning_rate": 4.685057419638317e-08, "loss": 0.0, "step": 3392 }, { "epoch": 1.9735349716446124, "grad_norm": NaN, "learning_rate": 4.4836034992779e-08, "loss": 0.0, "step": 3393 }, { "epoch": 1.9741166206194563, "grad_norm": NaN, "learning_rate": 4.286574519438702e-08, "loss": 0.0, "step": 3394 }, { "epoch": 1.9746982695942998, "grad_norm": NaN, "learning_rate": 4.0939706546461175e-08, "loss": 0.0, "step": 3395 }, { "epoch": 1.9752799185691434, "grad_norm": NaN, "learning_rate": 3.905792075504233e-08, "loss": 0.0, "step": 3396 }, { "epoch": 1.9758615675439872, "grad_norm": NaN, "learning_rate": 3.722038948698603e-08, "loss": 0.0, "step": 3397 }, { "epoch": 1.976443216518831, "grad_norm": NaN, "learning_rate": 3.542711436995139e-08, "loss": 0.0, "step": 3398 }, { "epoch": 1.9770248654936746, "grad_norm": NaN, "learning_rate": 3.3678096992384446e-08, "loss": 0.0, "step": 3399 }, { "epoch": 1.9776065144685182, "grad_norm": NaN, "learning_rate": 3.197333890353482e-08, "loss": 0.0, "step": 3400 }, { "epoch": 1.9781881634433618, "grad_norm": NaN, "learning_rate": 3.031284161344461e-08, "loss": 0.0, "step": 3401 }, { "epoch": 1.9787698124182056, "grad_norm": NaN, "learning_rate": 2.8696606592959475e-08, "loss": 0.0, "step": 3402 }, { "epoch": 1.9793514613930494, "grad_norm": NaN, "learning_rate": 2.7124635273712006e-08, "loss": 0.0, "step": 3403 }, { "epoch": 1.979933110367893, "grad_norm": NaN, "learning_rate": 2.5596929048116168e-08, "loss": 0.0, "step": 3404 }, { "epoch": 1.9805147593427366, "grad_norm": NaN, "learning_rate": 2.41134892694006e-08, "loss": 0.0, "step": 3405 }, { "epoch": 1.9810964083175804, "grad_norm": NaN, "learning_rate": 2.2674317251558664e-08, "loss": 0.0, "step": 3406 }, { "epoch": 1.981678057292424, "grad_norm": NaN, "learning_rate": 2.127941426938729e-08, "loss": 0.0, "step": 3407 }, { "epoch": 1.9822597062672678, "grad_norm": NaN, "learning_rate": 1.9928781558475883e-08, "loss": 0.0, "step": 3408 }, { "epoch": 1.9828413552421114, "grad_norm": NaN, "learning_rate": 1.862242031517858e-08, "loss": 0.0, "step": 3409 }, { "epoch": 1.983423004216955, "grad_norm": NaN, "learning_rate": 1.7360331696653075e-08, "loss": 0.0, "step": 3410 }, { "epoch": 1.9840046531917988, "grad_norm": NaN, "learning_rate": 1.614251682083845e-08, "loss": 0.0, "step": 3411 }, { "epoch": 1.9845863021666426, "grad_norm": NaN, "learning_rate": 1.496897676644404e-08, "loss": 0.0, "step": 3412 }, { "epoch": 1.9851679511414861, "grad_norm": NaN, "learning_rate": 1.3839712572977227e-08, "loss": 0.0, "step": 3413 }, { "epoch": 1.9857496001163297, "grad_norm": NaN, "learning_rate": 1.275472524072674e-08, "loss": 0.0, "step": 3414 }, { "epoch": 1.9863312490911733, "grad_norm": NaN, "learning_rate": 1.1714015730740492e-08, "loss": 0.0, "step": 3415 }, { "epoch": 1.9869128980660171, "grad_norm": NaN, "learning_rate": 1.0717584964869964e-08, "loss": 0.0, "step": 3416 }, { "epoch": 1.987494547040861, "grad_norm": NaN, "learning_rate": 9.765433825736914e-09, "loss": 0.0, "step": 3417 }, { "epoch": 1.9880761960157045, "grad_norm": NaN, "learning_rate": 8.857563156738913e-09, "loss": 0.0, "step": 3418 }, { "epoch": 1.988657844990548, "grad_norm": NaN, "learning_rate": 7.993973762049356e-09, "loss": 0.0, "step": 3419 }, { "epoch": 1.989239493965392, "grad_norm": NaN, "learning_rate": 7.1746664066230094e-09, "loss": 0.0, "step": 3420 }, { "epoch": 1.9898211429402357, "grad_norm": NaN, "learning_rate": 6.399641816184909e-09, "loss": 0.0, "step": 3421 }, { "epoch": 1.9904027919150793, "grad_norm": NaN, "learning_rate": 5.668900677235911e-09, "loss": 0.0, "step": 3422 }, { "epoch": 1.9909844408899229, "grad_norm": NaN, "learning_rate": 4.982443637063794e-09, "loss": 0.0, "step": 3423 }, { "epoch": 1.9915660898647665, "grad_norm": NaN, "learning_rate": 4.340271303715504e-09, "loss": 0.0, "step": 3424 }, { "epoch": 1.9921477388396103, "grad_norm": NaN, "learning_rate": 3.742384246008257e-09, "loss": 0.0, "step": 3425 }, { "epoch": 1.992729387814454, "grad_norm": NaN, "learning_rate": 3.188782993551742e-09, "loss": 0.0, "step": 3426 }, { "epoch": 1.9933110367892977, "grad_norm": NaN, "learning_rate": 2.679468036709265e-09, "loss": 0.0, "step": 3427 }, { "epoch": 1.9938926857641412, "grad_norm": NaN, "learning_rate": 2.2144398266199518e-09, "loss": 0.0, "step": 3428 }, { "epoch": 1.994474334738985, "grad_norm": NaN, "learning_rate": 1.7936987752098511e-09, "loss": 0.0, "step": 3429 }, { "epoch": 1.9950559837138289, "grad_norm": NaN, "learning_rate": 1.417245255153077e-09, "loss": 0.0, "step": 3430 }, { "epoch": 1.9956376326886724, "grad_norm": NaN, "learning_rate": 1.0850795999051143e-09, "loss": 0.0, "step": 3431 }, { "epoch": 1.996219281663516, "grad_norm": NaN, "learning_rate": 7.972021036972699e-10, "loss": 0.0, "step": 3432 }, { "epoch": 1.9968009306383596, "grad_norm": NaN, "learning_rate": 5.536130215311186e-10, "loss": 0.0, "step": 3433 }, { "epoch": 1.9973825796132034, "grad_norm": NaN, "learning_rate": 3.5431256916185207e-10, "loss": 0.0, "step": 3434 }, { "epoch": 1.9979642285880472, "grad_norm": NaN, "learning_rate": 1.9930092313158455e-10, "loss": 0.0, "step": 3435 }, { "epoch": 1.9985458775628908, "grad_norm": NaN, "learning_rate": 8.857822075269973e-11, "loss": 0.0, "step": 3436 }, { "epoch": 1.9991275265377344, "grad_norm": NaN, "learning_rate": 2.2144560091197363e-11, "loss": 0.0, "step": 3437 }, { "epoch": 1.9997091755125782, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3438 } ], "logging_steps": 1, "max_steps": 3438, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.203855633350656e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }