diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,17233 +1,17149 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.8004395604395604, + "epoch": 1.8000883262181657, "eval_steps": 500, - "global_step": 12288, + "global_step": 12228, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00014652014652014652, - "grad_norm": 2.4159312562266457, - "learning_rate": 9.765625e-09, - "loss": 0.6521, + "epoch": 0.00014721036360959813, + "grad_norm": 2.728368243686464, + "learning_rate": 9.813542688910697e-09, + "loss": 0.6728, "step": 1 }, { - "epoch": 0.0007326007326007326, - "grad_norm": 2.242931374608585, - "learning_rate": 4.8828125e-08, - "loss": 0.6751, + "epoch": 0.0007360518180479906, + "grad_norm": 2.36436263451345, + "learning_rate": 4.9067713444553494e-08, + "loss": 0.6616, "step": 5 }, { - "epoch": 0.0014652014652014652, - "grad_norm": 2.0137413845879233, - "learning_rate": 9.765625e-08, - "loss": 0.6577, + "epoch": 0.0014721036360959812, + "grad_norm": 2.435737051160779, + "learning_rate": 9.813542688910699e-08, + "loss": 0.6802, "step": 10 }, { - "epoch": 0.002197802197802198, - "grad_norm": 2.4615827770889775, - "learning_rate": 1.4648437500000001e-07, - "loss": 0.6693, + "epoch": 0.002208155454143972, + "grad_norm": 2.4940211075102177, + "learning_rate": 1.4720314033366047e-07, + "loss": 0.6746, "step": 15 }, { - "epoch": 0.0029304029304029304, - "grad_norm": 2.0564880448453433, - "learning_rate": 1.953125e-07, - "loss": 0.6672, + "epoch": 0.0029442072721919624, + "grad_norm": 2.3800685236179864, + "learning_rate": 1.9627085377821398e-07, + "loss": 0.6693, "step": 20 }, { - "epoch": 0.003663003663003663, - "grad_norm": 2.0796070464691887, - "learning_rate": 2.44140625e-07, - "loss": 0.6476, + "epoch": 0.003680259090239953, + "grad_norm": 1.7854052217014194, + "learning_rate": 2.4533856722276743e-07, + "loss": 0.6401, "step": 25 }, { - "epoch": 0.004395604395604396, - "grad_norm": 1.6194878179523382, - "learning_rate": 2.9296875000000003e-07, - "loss": 0.6567, + "epoch": 0.004416310908287944, + "grad_norm": 1.8729460237639912, + "learning_rate": 2.9440628066732094e-07, + "loss": 0.6544, "step": 30 }, { - "epoch": 0.005128205128205128, - "grad_norm": 1.5050734017216072, - "learning_rate": 3.4179687500000005e-07, - "loss": 0.6363, + "epoch": 0.005152362726335934, + "grad_norm": 1.3749340111576625, + "learning_rate": 3.434739941118744e-07, + "loss": 0.6246, "step": 35 }, { - "epoch": 0.005860805860805861, - "grad_norm": 1.4347029801195341, - "learning_rate": 3.90625e-07, - "loss": 0.6413, + "epoch": 0.005888414544383925, + "grad_norm": 1.548613812409993, + "learning_rate": 3.9254170755642795e-07, + "loss": 0.6268, "step": 40 }, { - "epoch": 0.006593406593406593, - "grad_norm": 1.1673856057290772, - "learning_rate": 4.3945312500000004e-07, - "loss": 0.6301, + "epoch": 0.006624466362431915, + "grad_norm": 1.2162198549255423, + "learning_rate": 4.4160942100098135e-07, + "loss": 0.632, "step": 45 }, { - "epoch": 0.007326007326007326, - "grad_norm": 1.09382389234995, - "learning_rate": 4.8828125e-07, - "loss": 0.5953, + "epoch": 0.007360518180479906, + "grad_norm": 1.1344896752663411, + "learning_rate": 4.906771344455349e-07, + "loss": 0.5938, "step": 50 }, { - "epoch": 0.00805860805860806, - "grad_norm": 0.95912234695919, - "learning_rate": 5.37109375e-07, - "loss": 0.5915, + "epoch": 0.008096569998527897, + "grad_norm": 1.0153229071086318, + "learning_rate": 5.397448478900884e-07, + "loss": 0.6086, "step": 55 }, { - "epoch": 0.008791208791208791, - "grad_norm": 0.8028087842621966, - "learning_rate": 5.859375000000001e-07, - "loss": 0.5964, + "epoch": 0.008832621816575887, + "grad_norm": 0.8911853226301731, + "learning_rate": 5.888125613346419e-07, + "loss": 0.5814, "step": 60 }, { - "epoch": 0.009523809523809525, - "grad_norm": 0.7618383092986746, - "learning_rate": 6.347656250000001e-07, - "loss": 0.5709, + "epoch": 0.009568673634623878, + "grad_norm": 0.7253317192060568, + "learning_rate": 6.378802747791954e-07, + "loss": 0.5606, "step": 65 }, { - "epoch": 0.010256410256410256, - "grad_norm": 0.6938568928414701, - "learning_rate": 6.835937500000001e-07, - "loss": 0.5726, + "epoch": 0.010304725452671868, + "grad_norm": 0.7901414889935009, + "learning_rate": 6.869479882237488e-07, + "loss": 0.5596, "step": 70 }, { - "epoch": 0.01098901098901099, - "grad_norm": 0.6871846255942565, - "learning_rate": 7.32421875e-07, - "loss": 0.5876, + "epoch": 0.011040777270719858, + "grad_norm": 0.7005773323630946, + "learning_rate": 7.360157016683022e-07, + "loss": 0.5637, "step": 75 }, { - "epoch": 0.011721611721611722, - "grad_norm": 0.6696248589211605, - "learning_rate": 7.8125e-07, - "loss": 0.5407, + "epoch": 0.01177682908876785, + "grad_norm": 0.7482787893329246, + "learning_rate": 7.850834151128559e-07, + "loss": 0.5626, "step": 80 }, { - "epoch": 0.012454212454212455, - "grad_norm": 0.6969345857899127, - "learning_rate": 8.300781250000001e-07, - "loss": 0.5514, + "epoch": 0.01251288090681584, + "grad_norm": 0.6103751688535668, + "learning_rate": 8.341511285574093e-07, + "loss": 0.5411, "step": 85 }, { - "epoch": 0.013186813186813187, - "grad_norm": 0.7080168030720344, - "learning_rate": 8.789062500000001e-07, - "loss": 0.5593, + "epoch": 0.01324893272486383, + "grad_norm": 0.6140626656209314, + "learning_rate": 8.832188420019627e-07, + "loss": 0.5437, "step": 90 }, { - "epoch": 0.01391941391941392, - "grad_norm": 0.6234493870275205, - "learning_rate": 9.277343750000001e-07, - "loss": 0.5358, + "epoch": 0.01398498454291182, + "grad_norm": 0.6232501535340139, + "learning_rate": 9.322865554465163e-07, + "loss": 0.5418, "step": 95 }, { - "epoch": 0.014652014652014652, - "grad_norm": 0.6423243268742604, - "learning_rate": 9.765625e-07, - "loss": 0.5233, + "epoch": 0.014721036360959812, + "grad_norm": 0.6084600926693361, + "learning_rate": 9.813542688910697e-07, + "loss": 0.5351, "step": 100 }, { - "epoch": 0.015384615384615385, - "grad_norm": 0.6445591567631446, - "learning_rate": 1.0253906250000001e-06, - "loss": 0.5232, + "epoch": 0.015457088179007802, + "grad_norm": 0.6302523646733236, + "learning_rate": 1.0304219823356233e-06, + "loss": 0.5262, "step": 105 }, { - "epoch": 0.01611721611721612, - "grad_norm": 0.5895225400800269, - "learning_rate": 1.07421875e-06, - "loss": 0.5491, + "epoch": 0.016193139997055794, + "grad_norm": 0.6096300857012811, + "learning_rate": 1.0794896957801768e-06, + "loss": 0.5136, "step": 110 }, { - "epoch": 0.01684981684981685, - "grad_norm": 0.6024446639287304, - "learning_rate": 1.1230468750000002e-06, - "loss": 0.5249, + "epoch": 0.016929191815103783, + "grad_norm": 0.6011309359210671, + "learning_rate": 1.1285574092247302e-06, + "loss": 0.5163, "step": 115 }, { - "epoch": 0.017582417582417582, - "grad_norm": 0.6352242041819796, - "learning_rate": 1.1718750000000001e-06, - "loss": 0.5268, + "epoch": 0.017665243633151775, + "grad_norm": 0.6104693046051632, + "learning_rate": 1.1776251226692837e-06, + "loss": 0.5253, "step": 120 }, { - "epoch": 0.018315018315018316, - "grad_norm": 0.5743873313770402, - "learning_rate": 1.220703125e-06, - "loss": 0.5359, + "epoch": 0.018401295451199763, + "grad_norm": 0.6049006080080384, + "learning_rate": 1.2266928361138373e-06, + "loss": 0.5071, "step": 125 }, { - "epoch": 0.01904761904761905, - "grad_norm": 0.6295853399822839, - "learning_rate": 1.2695312500000002e-06, - "loss": 0.5086, + "epoch": 0.019137347269247755, + "grad_norm": 0.6135220924932349, + "learning_rate": 1.2757605495583909e-06, + "loss": 0.5341, "step": 130 }, { - "epoch": 0.01978021978021978, - "grad_norm": 0.5942595247940665, - "learning_rate": 1.318359375e-06, - "loss": 0.5291, + "epoch": 0.019873399087295747, + "grad_norm": 0.5741753057894418, + "learning_rate": 1.324828263002944e-06, + "loss": 0.5184, "step": 135 }, { - "epoch": 0.020512820512820513, - "grad_norm": 0.5970756645818605, - "learning_rate": 1.3671875000000002e-06, - "loss": 0.5179, + "epoch": 0.020609450905343735, + "grad_norm": 0.5682793875483976, + "learning_rate": 1.3738959764474976e-06, + "loss": 0.5146, "step": 140 }, { - "epoch": 0.021245421245421246, - "grad_norm": 0.6016303784928376, - "learning_rate": 1.4160156250000001e-06, - "loss": 0.5169, + "epoch": 0.021345502723391727, + "grad_norm": 0.5801604412057352, + "learning_rate": 1.4229636898920513e-06, + "loss": 0.5006, "step": 145 }, { - "epoch": 0.02197802197802198, - "grad_norm": 0.6041929264356795, - "learning_rate": 1.46484375e-06, - "loss": 0.5093, + "epoch": 0.022081554541439716, + "grad_norm": 0.5716040330610084, + "learning_rate": 1.4720314033366045e-06, + "loss": 0.4972, "step": 150 }, { - "epoch": 0.02271062271062271, - "grad_norm": 0.6056640633462855, - "learning_rate": 1.5136718750000002e-06, - "loss": 0.5233, + "epoch": 0.022817606359487708, + "grad_norm": 0.6383262495009793, + "learning_rate": 1.521099116781158e-06, + "loss": 0.5134, "step": 155 }, { - "epoch": 0.023443223443223443, - "grad_norm": 0.5955769301786994, - "learning_rate": 1.5625e-06, - "loss": 0.4934, + "epoch": 0.0235536581775357, + "grad_norm": 0.574501385772374, + "learning_rate": 1.5701668302257118e-06, + "loss": 0.5305, "step": 160 }, { - "epoch": 0.024175824175824177, - "grad_norm": 0.5834862533300711, - "learning_rate": 1.6113281250000002e-06, - "loss": 0.515, + "epoch": 0.024289709995583688, + "grad_norm": 0.624077819074781, + "learning_rate": 1.619234543670265e-06, + "loss": 0.5052, "step": 165 }, { - "epoch": 0.02490842490842491, - "grad_norm": 0.5741046251131711, - "learning_rate": 1.6601562500000001e-06, - "loss": 0.5026, + "epoch": 0.02502576181363168, + "grad_norm": 0.5798590790669421, + "learning_rate": 1.6683022571148185e-06, + "loss": 0.501, "step": 170 }, { - "epoch": 0.02564102564102564, - "grad_norm": 0.6100284855554404, - "learning_rate": 1.708984375e-06, - "loss": 0.5162, + "epoch": 0.025761813631679672, + "grad_norm": 0.58108200938206, + "learning_rate": 1.717369970559372e-06, + "loss": 0.5067, "step": 175 }, { - "epoch": 0.026373626373626374, - "grad_norm": 0.5674946945185096, - "learning_rate": 1.7578125000000002e-06, - "loss": 0.4726, + "epoch": 0.02649786544972766, + "grad_norm": 0.591325748468392, + "learning_rate": 1.7664376840039254e-06, + "loss": 0.5129, "step": 180 }, { - "epoch": 0.027106227106227107, - "grad_norm": 0.6049877055480813, - "learning_rate": 1.806640625e-06, - "loss": 0.4993, + "epoch": 0.027233917267775652, + "grad_norm": 0.6038913900244864, + "learning_rate": 1.815505397448479e-06, + "loss": 0.4816, "step": 185 }, { - "epoch": 0.02783882783882784, - "grad_norm": 0.6305774506688272, - "learning_rate": 1.8554687500000002e-06, - "loss": 0.5038, + "epoch": 0.02796996908582364, + "grad_norm": 0.5775965197667471, + "learning_rate": 1.8645731108930325e-06, + "loss": 0.4966, "step": 190 }, { - "epoch": 0.02857142857142857, - "grad_norm": 0.5922615313022872, - "learning_rate": 1.9042968750000001e-06, - "loss": 0.5033, + "epoch": 0.028706020903871633, + "grad_norm": 0.6133721061273785, + "learning_rate": 1.913640824337586e-06, + "loss": 0.5071, "step": 195 }, { - "epoch": 0.029304029304029304, - "grad_norm": 0.5836180906085862, - "learning_rate": 1.953125e-06, - "loss": 0.4966, + "epoch": 0.029442072721919624, + "grad_norm": 0.5922070936085064, + "learning_rate": 1.9627085377821394e-06, + "loss": 0.4921, "step": 200 }, { - "epoch": 0.030036630036630037, - "grad_norm": 0.6174338649362718, - "learning_rate": 2.001953125e-06, - "loss": 0.504, + "epoch": 0.030178124539967613, + "grad_norm": 0.5917430267113624, + "learning_rate": 2.011776251226693e-06, + "loss": 0.4921, "step": 205 }, { - "epoch": 0.03076923076923077, - "grad_norm": 0.576153770808032, - "learning_rate": 2.0507812500000003e-06, - "loss": 0.4887, + "epoch": 0.030914176358015605, + "grad_norm": 0.6221645979156015, + "learning_rate": 2.0608439646712466e-06, + "loss": 0.4859, "step": 210 }, { - "epoch": 0.0315018315018315, - "grad_norm": 0.6030157714600828, - "learning_rate": 2.099609375e-06, - "loss": 0.5043, + "epoch": 0.0316502281760636, + "grad_norm": 0.554467637353109, + "learning_rate": 2.1099116781158e-06, + "loss": 0.5035, "step": 215 }, { - "epoch": 0.03223443223443224, - "grad_norm": 0.5486182063481856, - "learning_rate": 2.1484375e-06, - "loss": 0.465, + "epoch": 0.03238627999411159, + "grad_norm": 0.6227780076273214, + "learning_rate": 2.1589793915603537e-06, + "loss": 0.4956, "step": 220 }, { - "epoch": 0.03296703296703297, - "grad_norm": 0.5825942750922115, - "learning_rate": 2.1972656250000003e-06, - "loss": 0.4991, + "epoch": 0.033122331812159574, + "grad_norm": 0.5912071689774797, + "learning_rate": 2.208047105004907e-06, + "loss": 0.5034, "step": 225 }, { - "epoch": 0.0336996336996337, - "grad_norm": 0.5771727389731394, - "learning_rate": 2.2460937500000004e-06, - "loss": 0.5067, + "epoch": 0.033858383630207566, + "grad_norm": 0.5710270435221912, + "learning_rate": 2.2571148184494604e-06, + "loss": 0.5062, "step": 230 }, { - "epoch": 0.034432234432234435, - "grad_norm": 0.6439214652767457, - "learning_rate": 2.294921875e-06, - "loss": 0.5181, + "epoch": 0.03459443544825556, + "grad_norm": 0.5723517153838513, + "learning_rate": 2.306182531894014e-06, + "loss": 0.4973, "step": 235 }, { - "epoch": 0.035164835164835165, - "grad_norm": 0.5874429834389425, - "learning_rate": 2.3437500000000002e-06, - "loss": 0.4784, + "epoch": 0.03533048726630355, + "grad_norm": 0.6239526253603609, + "learning_rate": 2.3552502453385675e-06, + "loss": 0.4966, "step": 240 }, { - "epoch": 0.035897435897435895, - "grad_norm": 0.6170127311772752, - "learning_rate": 2.3925781250000003e-06, - "loss": 0.5006, + "epoch": 0.03606653908435154, + "grad_norm": 0.5840342287619873, + "learning_rate": 2.404317958783121e-06, + "loss": 0.4749, "step": 245 }, { - "epoch": 0.03663003663003663, - "grad_norm": 0.613101392672586, - "learning_rate": 2.44140625e-06, - "loss": 0.4976, + "epoch": 0.036802590902399526, + "grad_norm": 0.5853430651319891, + "learning_rate": 2.4533856722276746e-06, + "loss": 0.4985, "step": 250 }, { - "epoch": 0.03736263736263736, - "grad_norm": 0.5753422378290689, - "learning_rate": 2.490234375e-06, - "loss": 0.4885, + "epoch": 0.03753864272044752, + "grad_norm": 0.583836894163469, + "learning_rate": 2.5024533856722278e-06, + "loss": 0.5003, "step": 255 }, { - "epoch": 0.0380952380952381, - "grad_norm": 0.6133895496160483, - "learning_rate": 2.5390625000000003e-06, - "loss": 0.5008, + "epoch": 0.03827469453849551, + "grad_norm": 0.6068565691460917, + "learning_rate": 2.5515210991167817e-06, + "loss": 0.4752, "step": 260 }, { - "epoch": 0.03882783882783883, - "grad_norm": 0.5964068990574745, - "learning_rate": 2.587890625e-06, - "loss": 0.5151, + "epoch": 0.0390107463565435, + "grad_norm": 0.6260409519393997, + "learning_rate": 2.600588812561335e-06, + "loss": 0.5041, "step": 265 }, { - "epoch": 0.03956043956043956, - "grad_norm": 0.6182569257932172, - "learning_rate": 2.63671875e-06, - "loss": 0.4983, + "epoch": 0.039746798174591494, + "grad_norm": 0.6125760382103062, + "learning_rate": 2.649656526005888e-06, + "loss": 0.5063, "step": 270 }, { - "epoch": 0.040293040293040296, - "grad_norm": 0.5736263115770767, - "learning_rate": 2.6855468750000003e-06, - "loss": 0.4892, + "epoch": 0.04048284999263948, + "grad_norm": 0.5944574529155787, + "learning_rate": 2.698724239450442e-06, + "loss": 0.4808, "step": 275 }, { - "epoch": 0.041025641025641026, - "grad_norm": 0.619235921760934, - "learning_rate": 2.7343750000000004e-06, - "loss": 0.4738, + "epoch": 0.04121890181068747, + "grad_norm": 0.6456076986145468, + "learning_rate": 2.747791952894995e-06, + "loss": 0.4952, "step": 280 }, { - "epoch": 0.041758241758241756, - "grad_norm": 0.6539149573620286, - "learning_rate": 2.783203125e-06, - "loss": 0.4922, + "epoch": 0.04195495362873546, + "grad_norm": 0.9915769812640363, + "learning_rate": 2.7968596663395487e-06, + "loss": 0.501, "step": 285 }, { - "epoch": 0.04249084249084249, - "grad_norm": 0.61689092854405, - "learning_rate": 2.8320312500000002e-06, - "loss": 0.4976, + "epoch": 0.042691005446783455, + "grad_norm": 0.5920463349789087, + "learning_rate": 2.8459273797841027e-06, + "loss": 0.4931, "step": 290 }, { - "epoch": 0.04322344322344322, - "grad_norm": 0.628660225181727, - "learning_rate": 2.8808593750000004e-06, - "loss": 0.4826, + "epoch": 0.043427057264831447, + "grad_norm": 0.6575309659536147, + "learning_rate": 2.894995093228656e-06, + "loss": 0.4983, "step": 295 }, { - "epoch": 0.04395604395604396, - "grad_norm": 0.6617557087521471, - "learning_rate": 2.9296875e-06, - "loss": 0.4932, + "epoch": 0.04416310908287943, + "grad_norm": 0.5667173217760384, + "learning_rate": 2.944062806673209e-06, + "loss": 0.4904, "step": 300 }, { - "epoch": 0.04468864468864469, - "grad_norm": 0.5622158080515355, - "learning_rate": 2.978515625e-06, - "loss": 0.4662, + "epoch": 0.04489916090092742, + "grad_norm": 0.5821180180751933, + "learning_rate": 2.993130520117763e-06, + "loss": 0.4919, "step": 305 }, { - "epoch": 0.04542124542124542, - "grad_norm": 0.6464987442594975, - "learning_rate": 3.0273437500000003e-06, - "loss": 0.4891, + "epoch": 0.045635212718975415, + "grad_norm": 0.6231991536660229, + "learning_rate": 3.042198233562316e-06, + "loss": 0.4888, "step": 310 }, { - "epoch": 0.046153846153846156, - "grad_norm": 0.6216580067513613, - "learning_rate": 3.0761718750000004e-06, - "loss": 0.4831, + "epoch": 0.04637126453702341, + "grad_norm": 0.59956643814679, + "learning_rate": 3.0912659470068696e-06, + "loss": 0.4598, "step": 315 }, { - "epoch": 0.046886446886446886, - "grad_norm": 0.5807795396390456, - "learning_rate": 3.125e-06, - "loss": 0.4658, + "epoch": 0.0471073163550714, + "grad_norm": 0.5779052883486501, + "learning_rate": 3.1403336604514236e-06, + "loss": 0.4779, "step": 320 }, { - "epoch": 0.047619047619047616, - "grad_norm": 0.6324392259846056, - "learning_rate": 3.1738281250000003e-06, - "loss": 0.506, + "epoch": 0.04784336817311939, + "grad_norm": 0.6056761160995231, + "learning_rate": 3.1894013738959768e-06, + "loss": 0.4994, "step": 325 }, { - "epoch": 0.04835164835164835, - "grad_norm": 0.5599658664516196, - "learning_rate": 3.2226562500000004e-06, - "loss": 0.4736, + "epoch": 0.048579419991167376, + "grad_norm": 0.599035762897119, + "learning_rate": 3.23846908734053e-06, + "loss": 0.4785, "step": 330 }, { - "epoch": 0.04908424908424908, - "grad_norm": 0.6260406463036369, - "learning_rate": 3.271484375e-06, - "loss": 0.4856, + "epoch": 0.04931547180921537, + "grad_norm": 0.6302501472689347, + "learning_rate": 3.287536800785084e-06, + "loss": 0.4761, "step": 335 }, { - "epoch": 0.04981684981684982, - "grad_norm": 0.6106796656996849, - "learning_rate": 3.3203125000000002e-06, - "loss": 0.4745, + "epoch": 0.05005152362726336, + "grad_norm": 0.6243951085037258, + "learning_rate": 3.336604514229637e-06, + "loss": 0.4978, "step": 340 }, { - "epoch": 0.05054945054945055, - "grad_norm": 0.5770205864950204, - "learning_rate": 3.3691406250000004e-06, - "loss": 0.4841, + "epoch": 0.05078757544531135, + "grad_norm": 0.6106406332182736, + "learning_rate": 3.3856722276741906e-06, + "loss": 0.4869, "step": 345 }, { - "epoch": 0.05128205128205128, - "grad_norm": 0.5935897914996285, - "learning_rate": 3.41796875e-06, - "loss": 0.4745, + "epoch": 0.051523627263359344, + "grad_norm": 0.6749576604983416, + "learning_rate": 3.434739941118744e-06, + "loss": 0.4839, "step": 350 }, { - "epoch": 0.05201465201465202, - "grad_norm": 0.5871039026588047, - "learning_rate": 3.466796875e-06, - "loss": 0.4853, + "epoch": 0.05225967908140733, + "grad_norm": 0.6327724849785807, + "learning_rate": 3.4838076545632977e-06, + "loss": 0.4885, "step": 355 }, { - "epoch": 0.05274725274725275, - "grad_norm": 0.5882570675337384, - "learning_rate": 3.5156250000000003e-06, - "loss": 0.4748, + "epoch": 0.05299573089945532, + "grad_norm": 0.5768211285301043, + "learning_rate": 3.532875368007851e-06, + "loss": 0.4615, "step": 360 }, { - "epoch": 0.05347985347985348, - "grad_norm": 0.6008326193899312, - "learning_rate": 3.5644531250000005e-06, - "loss": 0.4793, + "epoch": 0.05373178271750331, + "grad_norm": 0.5919583609982937, + "learning_rate": 3.581943081452405e-06, + "loss": 0.4765, "step": 365 }, { - "epoch": 0.054212454212454214, - "grad_norm": 0.5964595164304082, - "learning_rate": 3.61328125e-06, - "loss": 0.4792, + "epoch": 0.054467834535551304, + "grad_norm": 0.5871500912160249, + "learning_rate": 3.631010794896958e-06, + "loss": 0.4806, "step": 370 }, { - "epoch": 0.054945054945054944, - "grad_norm": 0.613105851356729, - "learning_rate": 3.6621093750000003e-06, - "loss": 0.4778, + "epoch": 0.055203886353599296, + "grad_norm": 0.6106136580240804, + "learning_rate": 3.6800785083415115e-06, + "loss": 0.4826, "step": 375 }, { - "epoch": 0.05567765567765568, - "grad_norm": 0.6294016643255824, - "learning_rate": 3.7109375000000004e-06, - "loss": 0.4755, + "epoch": 0.05593993817164728, + "grad_norm": 0.5941821734755603, + "learning_rate": 3.729146221786065e-06, + "loss": 0.4762, "step": 380 }, { - "epoch": 0.05641025641025641, - "grad_norm": 0.5905210786928944, - "learning_rate": 3.759765625e-06, - "loss": 0.4751, + "epoch": 0.05667598998969527, + "grad_norm": 0.6119338751306809, + "learning_rate": 3.7782139352306186e-06, + "loss": 0.4984, "step": 385 }, { - "epoch": 0.05714285714285714, - "grad_norm": 0.572125914373807, - "learning_rate": 3.8085937500000002e-06, - "loss": 0.4857, + "epoch": 0.057412041807743265, + "grad_norm": 0.5792835900775687, + "learning_rate": 3.827281648675172e-06, + "loss": 0.4742, "step": 390 }, { - "epoch": 0.05787545787545788, - "grad_norm": 0.5797543518358743, - "learning_rate": 3.857421875e-06, - "loss": 0.473, + "epoch": 0.05814809362579126, + "grad_norm": 0.5857316557188246, + "learning_rate": 3.876349362119725e-06, + "loss": 0.487, "step": 395 }, { - "epoch": 0.05860805860805861, - "grad_norm": 0.6375925669051277, - "learning_rate": 3.90625e-06, - "loss": 0.4856, + "epoch": 0.05888414544383925, + "grad_norm": 0.5871381247935139, + "learning_rate": 3.925417075564279e-06, + "loss": 0.4664, "step": 400 }, { - "epoch": 0.05934065934065934, - "grad_norm": 0.5984195282406634, - "learning_rate": 3.955078125000001e-06, - "loss": 0.4912, + "epoch": 0.059620197261887234, + "grad_norm": 0.5996770410892924, + "learning_rate": 3.9744847890088324e-06, + "loss": 0.4818, "step": 405 }, { - "epoch": 0.060073260073260075, - "grad_norm": 0.6610747118232397, - "learning_rate": 4.00390625e-06, - "loss": 0.4769, + "epoch": 0.060356249079935226, + "grad_norm": 0.6078308418061451, + "learning_rate": 4.023552502453386e-06, + "loss": 0.4723, "step": 410 }, { - "epoch": 0.060805860805860805, - "grad_norm": 0.5884293595488747, - "learning_rate": 4.052734375e-06, - "loss": 0.4761, + "epoch": 0.06109230089798322, + "grad_norm": 0.6072936425940079, + "learning_rate": 4.0726202158979396e-06, + "loss": 0.4947, "step": 415 }, { - "epoch": 0.06153846153846154, - "grad_norm": 0.6043701478533586, - "learning_rate": 4.101562500000001e-06, - "loss": 0.4793, + "epoch": 0.06182835271603121, + "grad_norm": 0.5854312934656148, + "learning_rate": 4.121687929342493e-06, + "loss": 0.4721, "step": 420 }, { - "epoch": 0.06227106227106227, - "grad_norm": 0.5822030559431073, - "learning_rate": 4.150390625e-06, - "loss": 0.4633, + "epoch": 0.0625644045340792, + "grad_norm": 0.5912636415377042, + "learning_rate": 4.170755642787047e-06, + "loss": 0.483, "step": 425 }, { - "epoch": 0.063003663003663, - "grad_norm": 0.5959155128039212, - "learning_rate": 4.19921875e-06, - "loss": 0.4733, + "epoch": 0.0633004563521272, + "grad_norm": 0.5368794998653236, + "learning_rate": 4.2198233562316e-06, + "loss": 0.4685, "step": 430 }, { - "epoch": 0.06373626373626373, - "grad_norm": 0.6188697338791965, - "learning_rate": 4.2480468750000006e-06, - "loss": 0.491, + "epoch": 0.06403650817017519, + "grad_norm": 0.5951573811609723, + "learning_rate": 4.268891069676153e-06, + "loss": 0.4773, "step": 435 }, { - "epoch": 0.06446886446886448, - "grad_norm": 0.6097716090590869, - "learning_rate": 4.296875e-06, - "loss": 0.474, + "epoch": 0.06477255998822318, + "grad_norm": 0.5891969626171352, + "learning_rate": 4.317958783120707e-06, + "loss": 0.4511, "step": 440 }, { - "epoch": 0.0652014652014652, - "grad_norm": 0.6123749603827808, - "learning_rate": 4.345703125e-06, - "loss": 0.4624, + "epoch": 0.06550861180627116, + "grad_norm": 0.6085822185086244, + "learning_rate": 4.36702649656526e-06, + "loss": 0.4742, "step": 445 }, { - "epoch": 0.06593406593406594, - "grad_norm": 0.5873918820746148, - "learning_rate": 4.3945312500000005e-06, - "loss": 0.4831, + "epoch": 0.06624466362431915, + "grad_norm": 0.5767003298984905, + "learning_rate": 4.416094210009814e-06, + "loss": 0.4721, "step": 450 }, { - "epoch": 0.06666666666666667, - "grad_norm": 0.5894168087718852, - "learning_rate": 4.443359375e-06, - "loss": 0.4898, + "epoch": 0.06698071544236714, + "grad_norm": 0.5902641756754682, + "learning_rate": 4.465161923454367e-06, + "loss": 0.465, "step": 455 }, { - "epoch": 0.0673992673992674, - "grad_norm": 0.5678993089140597, - "learning_rate": 4.492187500000001e-06, - "loss": 0.4717, + "epoch": 0.06771676726041513, + "grad_norm": 0.5689500427434829, + "learning_rate": 4.514229636898921e-06, + "loss": 0.4501, "step": 460 }, { - "epoch": 0.06813186813186813, - "grad_norm": 0.6204084821274443, - "learning_rate": 4.5410156250000005e-06, - "loss": 0.4733, + "epoch": 0.06845281907846312, + "grad_norm": 0.6103508671952677, + "learning_rate": 4.563297350343474e-06, + "loss": 0.4821, "step": 465 }, { - "epoch": 0.06886446886446887, - "grad_norm": 0.5906487740302222, - "learning_rate": 4.58984375e-06, - "loss": 0.493, + "epoch": 0.06918887089651111, + "grad_norm": 0.6380519885930872, + "learning_rate": 4.612365063788028e-06, + "loss": 0.4709, "step": 470 }, { - "epoch": 0.0695970695970696, - "grad_norm": 0.5953739735003926, - "learning_rate": 4.638671875000001e-06, - "loss": 0.4949, + "epoch": 0.0699249227145591, + "grad_norm": 0.6310313258517151, + "learning_rate": 4.6614327772325814e-06, + "loss": 0.4927, "step": 475 }, { - "epoch": 0.07032967032967033, - "grad_norm": 0.5728951469349395, - "learning_rate": 4.6875000000000004e-06, - "loss": 0.4738, + "epoch": 0.0706609745326071, + "grad_norm": 0.6181666793600091, + "learning_rate": 4.710500490677135e-06, + "loss": 0.5014, "step": 480 }, { - "epoch": 0.07106227106227106, - "grad_norm": 0.6061175641189019, - "learning_rate": 4.736328125e-06, - "loss": 0.4758, + "epoch": 0.07139702635065509, + "grad_norm": 0.585653829471279, + "learning_rate": 4.7595682041216886e-06, + "loss": 0.4846, "step": 485 }, { - "epoch": 0.07179487179487179, - "grad_norm": 0.6124214135481375, - "learning_rate": 4.785156250000001e-06, - "loss": 0.5028, + "epoch": 0.07213307816870308, + "grad_norm": 0.5922456519304234, + "learning_rate": 4.808635917566242e-06, + "loss": 0.4759, "step": 490 }, { - "epoch": 0.07252747252747253, - "grad_norm": 0.6074050788211793, - "learning_rate": 4.833984375e-06, - "loss": 0.4825, + "epoch": 0.07286912998675106, + "grad_norm": 0.5891457619852838, + "learning_rate": 4.857703631010796e-06, + "loss": 0.4624, "step": 495 }, { - "epoch": 0.07326007326007326, - "grad_norm": 0.5641153492172148, - "learning_rate": 4.8828125e-06, - "loss": 0.4791, + "epoch": 0.07360518180479905, + "grad_norm": 0.5845369377813938, + "learning_rate": 4.906771344455349e-06, + "loss": 0.4669, "step": 500 }, { - "epoch": 0.073992673992674, - "grad_norm": 0.589145847070636, - "learning_rate": 4.931640625000001e-06, - "loss": 0.4775, + "epoch": 0.07434123362284704, + "grad_norm": 0.56432460580195, + "learning_rate": 4.955839057899902e-06, + "loss": 0.4937, "step": 505 }, { - "epoch": 0.07472527472527472, - "grad_norm": 0.6081676703097589, - "learning_rate": 4.98046875e-06, - "loss": 0.48, + "epoch": 0.07507728544089504, + "grad_norm": 0.5659440877842081, + "learning_rate": 5.0049067713444555e-06, + "loss": 0.4657, "step": 510 }, { - "epoch": 0.07545787545787545, - "grad_norm": 0.5812914515901026, - "learning_rate": 5.029296875e-06, - "loss": 0.48, + "epoch": 0.07581333725894303, + "grad_norm": 0.5505888303562148, + "learning_rate": 5.053974484789009e-06, + "loss": 0.467, "step": 515 }, { - "epoch": 0.0761904761904762, - "grad_norm": 0.6167465872303627, - "learning_rate": 5.078125000000001e-06, - "loss": 0.479, + "epoch": 0.07654938907699102, + "grad_norm": 0.5868518238421928, + "learning_rate": 5.1030421982335635e-06, + "loss": 0.4826, "step": 520 }, { - "epoch": 0.07692307692307693, - "grad_norm": 0.6100423988314366, - "learning_rate": 5.126953125e-06, - "loss": 0.4855, + "epoch": 0.07728544089503901, + "grad_norm": 0.6102754783922901, + "learning_rate": 5.152109911678116e-06, + "loss": 0.4727, "step": 525 }, { - "epoch": 0.07765567765567766, - "grad_norm": 0.5873756076460772, - "learning_rate": 5.17578125e-06, - "loss": 0.4705, + "epoch": 0.078021492713087, + "grad_norm": 0.5513354797822135, + "learning_rate": 5.20117762512267e-06, + "loss": 0.4825, "step": 530 }, { - "epoch": 0.07838827838827839, - "grad_norm": 0.6379594116334866, - "learning_rate": 5.2246093750000006e-06, - "loss": 0.4789, + "epoch": 0.078757544531135, + "grad_norm": 0.5952133347377497, + "learning_rate": 5.250245338567223e-06, + "loss": 0.4666, "step": 535 }, { - "epoch": 0.07912087912087912, - "grad_norm": 0.6043795568282792, - "learning_rate": 5.2734375e-06, - "loss": 0.4904, + "epoch": 0.07949359634918299, + "grad_norm": 0.6057839823052446, + "learning_rate": 5.299313052011776e-06, + "loss": 0.4859, "step": 540 }, { - "epoch": 0.07985347985347985, - "grad_norm": 0.5627871797826735, - "learning_rate": 5.322265625000001e-06, - "loss": 0.4732, + "epoch": 0.08022964816723098, + "grad_norm": 0.6149832759183071, + "learning_rate": 5.3483807654563304e-06, + "loss": 0.4627, "step": 545 }, { - "epoch": 0.08058608058608059, - "grad_norm": 0.5772953048482563, - "learning_rate": 5.3710937500000005e-06, - "loss": 0.4714, + "epoch": 0.08096569998527896, + "grad_norm": 0.5625685874675828, + "learning_rate": 5.397448478900884e-06, + "loss": 0.4795, "step": 550 }, { - "epoch": 0.08131868131868132, - "grad_norm": 0.5989279963185531, - "learning_rate": 5.419921875e-06, - "loss": 0.48, + "epoch": 0.08170175180332695, + "grad_norm": 0.5695634420904787, + "learning_rate": 5.446516192345437e-06, + "loss": 0.4917, "step": 555 }, { - "epoch": 0.08205128205128205, - "grad_norm": 0.5603152309391928, - "learning_rate": 5.468750000000001e-06, - "loss": 0.48, + "epoch": 0.08243780362137494, + "grad_norm": 0.5882207985851441, + "learning_rate": 5.49558390578999e-06, + "loss": 0.4904, "step": 560 }, { - "epoch": 0.08278388278388278, - "grad_norm": 0.548779034619482, - "learning_rate": 5.5175781250000005e-06, - "loss": 0.4693, + "epoch": 0.08317385543942293, + "grad_norm": 0.565711510623109, + "learning_rate": 5.544651619234545e-06, + "loss": 0.4682, "step": 565 }, { - "epoch": 0.08351648351648351, - "grad_norm": 0.5994436565592611, - "learning_rate": 5.56640625e-06, - "loss": 0.4813, + "epoch": 0.08390990725747093, + "grad_norm": 0.5825425744408332, + "learning_rate": 5.593719332679097e-06, + "loss": 0.47, "step": 570 }, { - "epoch": 0.08424908424908426, - "grad_norm": 0.6314474035157444, - "learning_rate": 5.615234375000001e-06, - "loss": 0.4784, + "epoch": 0.08464595907551892, + "grad_norm": 0.5816544517858435, + "learning_rate": 5.642787046123651e-06, + "loss": 0.4719, "step": 575 }, { - "epoch": 0.08498168498168499, - "grad_norm": 0.6246412942815367, - "learning_rate": 5.6640625000000005e-06, - "loss": 0.4741, + "epoch": 0.08538201089356691, + "grad_norm": 0.5896285603216627, + "learning_rate": 5.691854759568205e-06, + "loss": 0.4912, "step": 580 }, { - "epoch": 0.08571428571428572, - "grad_norm": 0.5866396752374676, - "learning_rate": 5.712890625e-06, - "loss": 0.4802, + "epoch": 0.0861180627116149, + "grad_norm": 0.6117545163317246, + "learning_rate": 5.740922473012758e-06, + "loss": 0.4792, "step": 585 }, { - "epoch": 0.08644688644688645, - "grad_norm": 0.5904192652410284, - "learning_rate": 5.761718750000001e-06, - "loss": 0.4565, + "epoch": 0.08685411452966289, + "grad_norm": 0.5773620560230253, + "learning_rate": 5.789990186457312e-06, + "loss": 0.4877, "step": 590 }, { - "epoch": 0.08717948717948718, - "grad_norm": 0.5519252352856109, - "learning_rate": 5.810546875e-06, - "loss": 0.4475, + "epoch": 0.08759016634771088, + "grad_norm": 0.6601154780260813, + "learning_rate": 5.839057899901865e-06, + "loss": 0.4691, "step": 595 }, { - "epoch": 0.08791208791208792, - "grad_norm": 0.5948751037148314, - "learning_rate": 5.859375e-06, - "loss": 0.4717, + "epoch": 0.08832621816575886, + "grad_norm": 0.5633119014086018, + "learning_rate": 5.888125613346418e-06, + "loss": 0.4685, "step": 600 }, { - "epoch": 0.08864468864468865, - "grad_norm": 0.5827642155370751, - "learning_rate": 5.908203125000001e-06, - "loss": 0.47, + "epoch": 0.08906226998380685, + "grad_norm": 0.5798833556082699, + "learning_rate": 5.937193326790972e-06, + "loss": 0.4845, "step": 605 }, { - "epoch": 0.08937728937728938, - "grad_norm": 0.5904415040624938, - "learning_rate": 5.95703125e-06, - "loss": 0.4668, + "epoch": 0.08979832180185485, + "grad_norm": 0.6091643398421105, + "learning_rate": 5.986261040235526e-06, + "loss": 0.4524, "step": 610 }, { - "epoch": 0.09010989010989011, - "grad_norm": 0.5761997438099439, - "learning_rate": 6.005859375e-06, - "loss": 0.4752, + "epoch": 0.09053437361990284, + "grad_norm": 0.5565575718179494, + "learning_rate": 6.035328753680079e-06, + "loss": 0.4557, "step": 615 }, { - "epoch": 0.09084249084249084, - "grad_norm": 0.5489736943110936, - "learning_rate": 6.054687500000001e-06, - "loss": 0.4566, + "epoch": 0.09127042543795083, + "grad_norm": 0.5644936245127977, + "learning_rate": 6.084396467124632e-06, + "loss": 0.4732, "step": 620 }, { - "epoch": 0.09157509157509157, - "grad_norm": 0.5597636269927064, - "learning_rate": 6.103515625e-06, - "loss": 0.4813, + "epoch": 0.09200647725599882, + "grad_norm": 0.5790776725696446, + "learning_rate": 6.1334641805691865e-06, + "loss": 0.478, "step": 625 }, { - "epoch": 0.09230769230769231, - "grad_norm": 0.5714338556216557, - "learning_rate": 6.152343750000001e-06, - "loss": 0.4735, + "epoch": 0.09274252907404681, + "grad_norm": 0.5643831879058268, + "learning_rate": 6.182531894013739e-06, + "loss": 0.4586, "step": 630 }, { - "epoch": 0.09304029304029304, - "grad_norm": 0.5952178258097818, - "learning_rate": 6.201171875000001e-06, - "loss": 0.4826, + "epoch": 0.0934785808920948, + "grad_norm": 0.5837458919285372, + "learning_rate": 6.231599607458293e-06, + "loss": 0.4866, "step": 635 }, { - "epoch": 0.09377289377289377, - "grad_norm": 0.5884694242344789, - "learning_rate": 6.25e-06, - "loss": 0.4646, + "epoch": 0.0942146327101428, + "grad_norm": 0.5566898853661357, + "learning_rate": 6.280667320902847e-06, + "loss": 0.4685, "step": 640 }, { - "epoch": 0.0945054945054945, - "grad_norm": 0.5933386033268863, - "learning_rate": 6.298828125000001e-06, - "loss": 0.4608, + "epoch": 0.09495068452819079, + "grad_norm": 0.5812564266093952, + "learning_rate": 6.3297350343474e-06, + "loss": 0.4749, "step": 645 }, { - "epoch": 0.09523809523809523, - "grad_norm": 0.6037443715273233, - "learning_rate": 6.3476562500000006e-06, - "loss": 0.4637, + "epoch": 0.09568673634623878, + "grad_norm": 0.5903324398235352, + "learning_rate": 6.3788027477919535e-06, + "loss": 0.4765, "step": 650 }, { - "epoch": 0.09597069597069598, - "grad_norm": 0.6163005967307136, - "learning_rate": 6.396484375e-06, - "loss": 0.4731, + "epoch": 0.09642278816428676, + "grad_norm": 0.6304091374610179, + "learning_rate": 6.427870461236507e-06, + "loss": 0.4756, "step": 655 }, { - "epoch": 0.0967032967032967, - "grad_norm": 0.5453447185366223, - "learning_rate": 6.445312500000001e-06, - "loss": 0.469, + "epoch": 0.09715883998233475, + "grad_norm": 0.586486674113841, + "learning_rate": 6.47693817468106e-06, + "loss": 0.4681, "step": 660 }, { - "epoch": 0.09743589743589744, - "grad_norm": 0.6212943621659923, - "learning_rate": 6.4941406250000005e-06, - "loss": 0.466, + "epoch": 0.09789489180038274, + "grad_norm": 0.5893707646862878, + "learning_rate": 6.526005888125614e-06, + "loss": 0.4815, "step": 665 }, { - "epoch": 0.09816849816849817, - "grad_norm": 0.5309135046878034, - "learning_rate": 6.54296875e-06, - "loss": 0.4626, + "epoch": 0.09863094361843074, + "grad_norm": 0.5642675537976032, + "learning_rate": 6.575073601570168e-06, + "loss": 0.4681, "step": 670 }, { - "epoch": 0.0989010989010989, - "grad_norm": 0.5382683389928992, - "learning_rate": 6.591796875000001e-06, - "loss": 0.4838, + "epoch": 0.09936699543647873, + "grad_norm": 0.5612248341901227, + "learning_rate": 6.6241413150147205e-06, + "loss": 0.4846, "step": 675 }, { - "epoch": 0.09963369963369964, - "grad_norm": 0.5827291346723401, - "learning_rate": 6.6406250000000005e-06, - "loss": 0.4905, + "epoch": 0.10010304725452672, + "grad_norm": 0.5834664312871857, + "learning_rate": 6.673209028459274e-06, + "loss": 0.4834, "step": 680 }, { - "epoch": 0.10036630036630037, - "grad_norm": 0.532784806632675, - "learning_rate": 6.689453125e-06, - "loss": 0.4744, + "epoch": 0.10083909907257471, + "grad_norm": 0.629128155687521, + "learning_rate": 6.722276741903828e-06, + "loss": 0.4722, "step": 685 }, { - "epoch": 0.1010989010989011, - "grad_norm": 0.5838167742764417, - "learning_rate": 6.738281250000001e-06, - "loss": 0.4703, + "epoch": 0.1015751508906227, + "grad_norm": 0.5828728916316157, + "learning_rate": 6.771344455348381e-06, + "loss": 0.4645, "step": 690 }, { - "epoch": 0.10183150183150183, - "grad_norm": 0.5376363462627745, - "learning_rate": 6.7871093750000004e-06, - "loss": 0.4661, + "epoch": 0.1023112027086707, + "grad_norm": 0.6490735802843812, + "learning_rate": 6.820412168792935e-06, + "loss": 0.4759, "step": 695 }, { - "epoch": 0.10256410256410256, - "grad_norm": 0.5239263973505636, - "learning_rate": 6.8359375e-06, - "loss": 0.4579, + "epoch": 0.10304725452671869, + "grad_norm": 0.5807842256629684, + "learning_rate": 6.869479882237488e-06, + "loss": 0.472, "step": 700 }, { - "epoch": 0.10329670329670329, - "grad_norm": 0.5905645370609754, - "learning_rate": 6.884765625000001e-06, - "loss": 0.4619, + "epoch": 0.10378330634476667, + "grad_norm": 0.6076870406724216, + "learning_rate": 6.918547595682041e-06, + "loss": 0.4553, "step": 705 }, { - "epoch": 0.10402930402930403, - "grad_norm": 0.5700211959784208, - "learning_rate": 6.93359375e-06, - "loss": 0.4557, + "epoch": 0.10451935816281466, + "grad_norm": 0.5648532124295207, + "learning_rate": 6.967615309126595e-06, + "loss": 0.4807, "step": 710 }, { - "epoch": 0.10476190476190476, - "grad_norm": 0.5903415816955252, - "learning_rate": 6.982421875000001e-06, - "loss": 0.4571, + "epoch": 0.10525540998086265, + "grad_norm": 0.5969143940699317, + "learning_rate": 7.016683022571149e-06, + "loss": 0.473, "step": 715 }, { - "epoch": 0.1054945054945055, - "grad_norm": 0.5574565092969472, - "learning_rate": 7.031250000000001e-06, - "loss": 0.4606, + "epoch": 0.10599146179891064, + "grad_norm": 0.6022013779305591, + "learning_rate": 7.065750736015702e-06, + "loss": 0.4619, "step": 720 }, { - "epoch": 0.10622710622710622, - "grad_norm": 0.5861130971423574, - "learning_rate": 7.080078125e-06, - "loss": 0.4594, + "epoch": 0.10672751361695863, + "grad_norm": 0.5458678918831419, + "learning_rate": 7.114818449460256e-06, + "loss": 0.4833, "step": 725 }, { - "epoch": 0.10695970695970695, - "grad_norm": 0.5486960784949657, - "learning_rate": 7.128906250000001e-06, - "loss": 0.4984, + "epoch": 0.10746356543500662, + "grad_norm": 0.5355193744561354, + "learning_rate": 7.16388616290481e-06, + "loss": 0.4761, "step": 730 }, { - "epoch": 0.1076923076923077, - "grad_norm": 0.557733033021095, - "learning_rate": 7.177734375000001e-06, - "loss": 0.4655, + "epoch": 0.10819961725305462, + "grad_norm": 0.573431459253995, + "learning_rate": 7.212953876349362e-06, + "loss": 0.4681, "step": 735 }, { - "epoch": 0.10842490842490843, - "grad_norm": 0.5436179967978834, - "learning_rate": 7.2265625e-06, - "loss": 0.4686, + "epoch": 0.10893566907110261, + "grad_norm": 0.5288123343977066, + "learning_rate": 7.262021589793916e-06, + "loss": 0.4631, "step": 740 }, { - "epoch": 0.10915750915750916, - "grad_norm": 0.5724129400787233, - "learning_rate": 7.275390625000001e-06, - "loss": 0.4661, + "epoch": 0.1096717208891506, + "grad_norm": 0.5697904981825487, + "learning_rate": 7.31108930323847e-06, + "loss": 0.4715, "step": 745 }, { - "epoch": 0.10989010989010989, - "grad_norm": 0.5466105215938837, - "learning_rate": 7.3242187500000006e-06, - "loss": 0.4731, + "epoch": 0.11040777270719859, + "grad_norm": 0.5208534768593922, + "learning_rate": 7.360157016683023e-06, + "loss": 0.4665, "step": 750 }, { - "epoch": 0.11062271062271062, - "grad_norm": 0.5768341262782312, - "learning_rate": 7.373046875e-06, - "loss": 0.4822, + "epoch": 0.11114382452524657, + "grad_norm": 0.5498377158829724, + "learning_rate": 7.4092247301275766e-06, + "loss": 0.4714, "step": 755 }, { - "epoch": 0.11135531135531136, - "grad_norm": 0.6159551594829057, - "learning_rate": 7.421875000000001e-06, - "loss": 0.476, + "epoch": 0.11187987634329456, + "grad_norm": 0.5675244832493291, + "learning_rate": 7.45829244357213e-06, + "loss": 0.457, "step": 760 }, { - "epoch": 0.11208791208791209, - "grad_norm": 0.6072974251996277, - "learning_rate": 7.4707031250000005e-06, - "loss": 0.4782, + "epoch": 0.11261592816134255, + "grad_norm": 0.5347391538454078, + "learning_rate": 7.507360157016683e-06, + "loss": 0.4657, "step": 765 }, { - "epoch": 0.11282051282051282, - "grad_norm": 0.5871442389262, - "learning_rate": 7.51953125e-06, - "loss": 0.4494, + "epoch": 0.11335197997939055, + "grad_norm": 0.5583228209281907, + "learning_rate": 7.556427870461237e-06, + "loss": 0.4764, "step": 770 }, { - "epoch": 0.11355311355311355, - "grad_norm": 0.602204799204848, - "learning_rate": 7.568359375000001e-06, - "loss": 0.4719, + "epoch": 0.11408803179743854, + "grad_norm": 0.5461223520236801, + "learning_rate": 7.605495583905791e-06, + "loss": 0.4667, "step": 775 }, { - "epoch": 0.11428571428571428, - "grad_norm": 0.5813186170396549, - "learning_rate": 7.6171875000000005e-06, - "loss": 0.4788, + "epoch": 0.11482408361548653, + "grad_norm": 0.5663027522324963, + "learning_rate": 7.654563297350344e-06, + "loss": 0.4701, "step": 780 }, { - "epoch": 0.11501831501831501, - "grad_norm": 0.6304842458401531, - "learning_rate": 7.666015625000001e-06, - "loss": 0.492, + "epoch": 0.11556013543353452, + "grad_norm": 0.5556645456848583, + "learning_rate": 7.703631010794898e-06, + "loss": 0.475, "step": 785 }, { - "epoch": 0.11575091575091576, - "grad_norm": 0.5975861262153394, - "learning_rate": 7.71484375e-06, - "loss": 0.4716, + "epoch": 0.11629618725158251, + "grad_norm": 0.5202037674063839, + "learning_rate": 7.75269872423945e-06, + "loss": 0.4582, "step": 790 }, { - "epoch": 0.11648351648351649, - "grad_norm": 0.5356317621558694, - "learning_rate": 7.763671875e-06, - "loss": 0.4504, + "epoch": 0.1170322390696305, + "grad_norm": 0.5621800088114947, + "learning_rate": 7.801766437684003e-06, + "loss": 0.4745, "step": 795 }, { - "epoch": 0.11721611721611722, - "grad_norm": 0.5559709134630877, - "learning_rate": 7.8125e-06, - "loss": 0.4712, + "epoch": 0.1177682908876785, + "grad_norm": 0.5357544493332862, + "learning_rate": 7.850834151128558e-06, + "loss": 0.4654, "step": 800 }, { - "epoch": 0.11794871794871795, - "grad_norm": 0.5629653822120899, - "learning_rate": 7.861328125e-06, - "loss": 0.4678, + "epoch": 0.11850434270572649, + "grad_norm": 0.5419172883253839, + "learning_rate": 7.899901864573112e-06, + "loss": 0.4735, "step": 805 }, { - "epoch": 0.11868131868131868, - "grad_norm": 0.5403593741271583, - "learning_rate": 7.910156250000001e-06, - "loss": 0.4771, + "epoch": 0.11924039452377447, + "grad_norm": 0.5525060417009166, + "learning_rate": 7.948969578017665e-06, + "loss": 0.4709, "step": 810 }, { - "epoch": 0.11941391941391942, - "grad_norm": 0.5386233851491723, - "learning_rate": 7.958984375000001e-06, - "loss": 0.4762, + "epoch": 0.11997644634182246, + "grad_norm": 0.5448040937044886, + "learning_rate": 7.998037291462218e-06, + "loss": 0.4664, "step": 815 }, { - "epoch": 0.12014652014652015, - "grad_norm": 0.5134313568682624, - "learning_rate": 8.0078125e-06, - "loss": 0.4646, + "epoch": 0.12071249815987045, + "grad_norm": 0.5486116584488951, + "learning_rate": 8.047105004906772e-06, + "loss": 0.4677, "step": 820 }, { - "epoch": 0.12087912087912088, - "grad_norm": 0.5707071480675963, - "learning_rate": 8.056640625e-06, - "loss": 0.4813, + "epoch": 0.12144854997791844, + "grad_norm": 0.5965699643929866, + "learning_rate": 8.096172718351325e-06, + "loss": 0.4677, "step": 825 }, { - "epoch": 0.12161172161172161, - "grad_norm": 1.057465377362691, - "learning_rate": 8.10546875e-06, - "loss": 0.4772, + "epoch": 0.12218460179596644, + "grad_norm": 0.7738268323135488, + "learning_rate": 8.145240431795879e-06, + "loss": 0.4896, "step": 830 }, { - "epoch": 0.12234432234432234, - "grad_norm": 0.7969984191209428, - "learning_rate": 8.154296875000001e-06, - "loss": 0.4616, + "epoch": 0.12292065361401443, + "grad_norm": 0.5502489638196433, + "learning_rate": 8.194308145240434e-06, + "loss": 0.4704, "step": 835 }, { - "epoch": 0.12307692307692308, - "grad_norm": 0.5508120508001506, - "learning_rate": 8.203125000000001e-06, - "loss": 0.4805, + "epoch": 0.12365670543206242, + "grad_norm": 0.5641949189118746, + "learning_rate": 8.243375858684986e-06, + "loss": 0.4764, "step": 840 }, { - "epoch": 0.12380952380952381, - "grad_norm": 0.5450863996688757, - "learning_rate": 8.251953125000001e-06, - "loss": 0.4853, + "epoch": 0.12439275725011041, + "grad_norm": 0.5289076252333902, + "learning_rate": 8.292443572129539e-06, + "loss": 0.4751, "step": 845 }, { - "epoch": 0.12454212454212454, - "grad_norm": 0.548584060543633, - "learning_rate": 8.30078125e-06, - "loss": 0.4635, + "epoch": 0.1251288090681584, + "grad_norm": 0.5476369574245666, + "learning_rate": 8.341511285574093e-06, + "loss": 0.4683, "step": 850 }, { - "epoch": 0.12527472527472527, - "grad_norm": 0.54496105370757, - "learning_rate": 8.349609375e-06, - "loss": 0.4769, + "epoch": 0.12586486088620638, + "grad_norm": 0.5498288749893551, + "learning_rate": 8.390578999018646e-06, + "loss": 0.494, "step": 855 }, { - "epoch": 0.126007326007326, - "grad_norm": 0.6056793412035149, - "learning_rate": 8.3984375e-06, - "loss": 0.4714, + "epoch": 0.1266009127042544, + "grad_norm": 0.5249158510086049, + "learning_rate": 8.4396467124632e-06, + "loss": 0.4874, "step": 860 }, { - "epoch": 0.12673992673992673, - "grad_norm": 0.5649345221606744, - "learning_rate": 8.447265625000001e-06, - "loss": 0.483, + "epoch": 0.12733696452230236, + "grad_norm": 0.5826503894307695, + "learning_rate": 8.488714425907753e-06, + "loss": 0.4794, "step": 865 }, { - "epoch": 0.12747252747252746, - "grad_norm": 0.5616348293821443, - "learning_rate": 8.496093750000001e-06, - "loss": 0.4757, + "epoch": 0.12807301634035037, + "grad_norm": 0.6067612119654865, + "learning_rate": 8.537782139352306e-06, + "loss": 0.465, "step": 870 }, { - "epoch": 0.1282051282051282, - "grad_norm": 0.5611441794672326, - "learning_rate": 8.544921875e-06, - "loss": 0.469, + "epoch": 0.12880906815839835, + "grad_norm": 0.5417276894889486, + "learning_rate": 8.58684985279686e-06, + "loss": 0.4674, "step": 875 }, { - "epoch": 0.12893772893772895, - "grad_norm": 0.5405096788454254, - "learning_rate": 8.59375e-06, - "loss": 0.4847, + "epoch": 0.12954511997644635, + "grad_norm": 0.523838175362927, + "learning_rate": 8.635917566241415e-06, + "loss": 0.469, "step": 880 }, { - "epoch": 0.12967032967032968, - "grad_norm": 0.5593871863023577, - "learning_rate": 8.642578125e-06, - "loss": 0.4709, + "epoch": 0.13028117179449433, + "grad_norm": 0.5409098167007628, + "learning_rate": 8.684985279685967e-06, + "loss": 0.4749, "step": 885 }, { - "epoch": 0.1304029304029304, - "grad_norm": 0.5219134179461249, - "learning_rate": 8.69140625e-06, - "loss": 0.4763, + "epoch": 0.1310172236125423, + "grad_norm": 0.5404589881617674, + "learning_rate": 8.73405299313052e-06, + "loss": 0.4659, "step": 890 }, { - "epoch": 0.13113553113553114, - "grad_norm": 0.5402339547204121, - "learning_rate": 8.740234375000001e-06, - "loss": 0.4665, + "epoch": 0.13175327543059032, + "grad_norm": 0.5783971691158564, + "learning_rate": 8.783120706575075e-06, + "loss": 0.4452, "step": 895 }, { - "epoch": 0.13186813186813187, - "grad_norm": 0.5017739581429062, - "learning_rate": 8.789062500000001e-06, - "loss": 0.4655, + "epoch": 0.1324893272486383, + "grad_norm": 0.5182432824868518, + "learning_rate": 8.832188420019627e-06, + "loss": 0.4683, "step": 900 }, { - "epoch": 0.1326007326007326, - "grad_norm": 0.5398065464611551, - "learning_rate": 8.837890625e-06, - "loss": 0.4685, + "epoch": 0.1332253790666863, + "grad_norm": 0.5385422506386383, + "learning_rate": 8.881256133464182e-06, + "loss": 0.4607, "step": 905 }, { - "epoch": 0.13333333333333333, - "grad_norm": 0.539672747550422, - "learning_rate": 8.88671875e-06, - "loss": 0.4646, + "epoch": 0.13396143088473428, + "grad_norm": 0.570114872389286, + "learning_rate": 8.930323846908734e-06, + "loss": 0.4725, "step": 910 }, { - "epoch": 0.13406593406593406, - "grad_norm": 0.5649222602250018, - "learning_rate": 8.935546875e-06, - "loss": 0.4712, + "epoch": 0.13469748270278228, + "grad_norm": 0.5638061126176769, + "learning_rate": 8.979391560353287e-06, + "loss": 0.4894, "step": 915 }, { - "epoch": 0.1347985347985348, - "grad_norm": 0.555978425517813, - "learning_rate": 8.984375000000002e-06, - "loss": 0.4797, + "epoch": 0.13543353452083026, + "grad_norm": 0.5284798481462458, + "learning_rate": 9.028459273797842e-06, + "loss": 0.4599, "step": 920 }, { - "epoch": 0.13553113553113552, - "grad_norm": 0.5451150959046317, - "learning_rate": 9.033203125000001e-06, - "loss": 0.4608, + "epoch": 0.13616958633887827, + "grad_norm": 0.545300441979569, + "learning_rate": 9.077526987242396e-06, + "loss": 0.4736, "step": 925 }, { - "epoch": 0.13626373626373625, - "grad_norm": 0.6136799089282742, - "learning_rate": 9.082031250000001e-06, - "loss": 0.465, + "epoch": 0.13690563815692625, + "grad_norm": 0.5443738398973984, + "learning_rate": 9.126594700686949e-06, + "loss": 0.475, "step": 930 }, { - "epoch": 0.136996336996337, - "grad_norm": 0.5392919758975416, - "learning_rate": 9.130859375e-06, - "loss": 0.4737, + "epoch": 0.13764168997497425, + "grad_norm": 0.5282020418222877, + "learning_rate": 9.175662414131501e-06, + "loss": 0.4671, "step": 935 }, { - "epoch": 0.13772893772893774, - "grad_norm": 0.5433299746227306, - "learning_rate": 9.1796875e-06, - "loss": 0.4618, + "epoch": 0.13837774179302223, + "grad_norm": 0.5228118303753154, + "learning_rate": 9.224730127576056e-06, + "loss": 0.4723, "step": 940 }, { - "epoch": 0.13846153846153847, - "grad_norm": 0.5269797392518154, - "learning_rate": 9.228515625e-06, - "loss": 0.4663, + "epoch": 0.1391137936110702, + "grad_norm": 0.5191901819627673, + "learning_rate": 9.273797841020608e-06, + "loss": 0.4808, "step": 945 }, { - "epoch": 0.1391941391941392, - "grad_norm": 0.5303507680071278, - "learning_rate": 9.277343750000001e-06, - "loss": 0.4793, + "epoch": 0.1398498454291182, + "grad_norm": 0.5293478366717721, + "learning_rate": 9.322865554465163e-06, + "loss": 0.4766, "step": 950 }, { - "epoch": 0.13992673992673993, - "grad_norm": 0.5739728476514406, - "learning_rate": 9.326171875000001e-06, - "loss": 0.4891, + "epoch": 0.1405858972471662, + "grad_norm": 0.5354175876597523, + "learning_rate": 9.371933267909716e-06, + "loss": 0.4813, "step": 955 }, { - "epoch": 0.14065934065934066, - "grad_norm": 0.5720121486255587, - "learning_rate": 9.375000000000001e-06, - "loss": 0.4781, + "epoch": 0.1413219490652142, + "grad_norm": 0.5605277944405026, + "learning_rate": 9.42100098135427e-06, + "loss": 0.4662, "step": 960 }, { - "epoch": 0.1413919413919414, - "grad_norm": 0.5226798686905856, - "learning_rate": 9.423828125e-06, - "loss": 0.4633, + "epoch": 0.14205800088326218, + "grad_norm": 0.5248861064318012, + "learning_rate": 9.470068694798823e-06, + "loss": 0.4652, "step": 965 }, { - "epoch": 0.14212454212454212, - "grad_norm": 0.5183949318801034, - "learning_rate": 9.47265625e-06, - "loss": 0.4488, + "epoch": 0.14279405270131018, + "grad_norm": 0.515307674108716, + "learning_rate": 9.519136408243377e-06, + "loss": 0.4571, "step": 970 }, { - "epoch": 0.14285714285714285, - "grad_norm": 0.530336983746849, - "learning_rate": 9.521484375e-06, - "loss": 0.4526, + "epoch": 0.14353010451935816, + "grad_norm": 0.5538399997823142, + "learning_rate": 9.56820412168793e-06, + "loss": 0.4708, "step": 975 }, { - "epoch": 0.14358974358974358, - "grad_norm": 0.5436835706956455, - "learning_rate": 9.570312500000001e-06, - "loss": 0.4716, + "epoch": 0.14426615633740617, + "grad_norm": 0.5093828223702961, + "learning_rate": 9.617271835132484e-06, + "loss": 0.4546, "step": 980 }, { - "epoch": 0.1443223443223443, - "grad_norm": 0.6066464639380588, - "learning_rate": 9.619140625000001e-06, - "loss": 0.4836, + "epoch": 0.14500220815545414, + "grad_norm": 0.5508697416192083, + "learning_rate": 9.666339548577037e-06, + "loss": 0.4627, "step": 985 }, { - "epoch": 0.14505494505494507, - "grad_norm": 0.5554488216735801, - "learning_rate": 9.66796875e-06, - "loss": 0.4524, + "epoch": 0.14573825997350212, + "grad_norm": 0.5480456861919574, + "learning_rate": 9.715407262021591e-06, + "loss": 0.4589, "step": 990 }, { - "epoch": 0.1457875457875458, - "grad_norm": 0.5478213717376119, - "learning_rate": 9.716796875e-06, - "loss": 0.4821, + "epoch": 0.14647431179155013, + "grad_norm": 0.5357831384000477, + "learning_rate": 9.764474975466144e-06, + "loss": 0.4737, "step": 995 }, { - "epoch": 0.14652014652014653, - "grad_norm": 0.5471748160993797, - "learning_rate": 9.765625e-06, - "loss": 0.488, + "epoch": 0.1472103636095981, + "grad_norm": 0.5743875070998096, + "learning_rate": 9.813542688910698e-06, + "loss": 0.4664, "step": 1000 }, { - "epoch": 0.14725274725274726, - "grad_norm": 0.5200793639005971, - "learning_rate": 9.814453125000002e-06, - "loss": 0.4639, + "epoch": 0.1479464154276461, + "grad_norm": 0.5285569772929545, + "learning_rate": 9.862610402355251e-06, + "loss": 0.4684, "step": 1005 }, { - "epoch": 0.147985347985348, - "grad_norm": 0.5416684777484602, - "learning_rate": 9.863281250000001e-06, - "loss": 0.4661, + "epoch": 0.1486824672456941, + "grad_norm": 0.5658895722799637, + "learning_rate": 9.911678115799804e-06, + "loss": 0.4727, "step": 1010 }, { - "epoch": 0.14871794871794872, - "grad_norm": 0.5416594075142821, - "learning_rate": 9.912109375000001e-06, - "loss": 0.4564, + "epoch": 0.1494185190637421, + "grad_norm": 0.5046840364272455, + "learning_rate": 9.960745829244358e-06, + "loss": 0.4691, "step": 1015 }, { - "epoch": 0.14945054945054945, - "grad_norm": 0.5303288476884193, - "learning_rate": 9.9609375e-06, - "loss": 0.461, + "epoch": 0.15015457088179007, + "grad_norm": 0.5063812234251259, + "learning_rate": 1.0009813542688911e-05, + "loss": 0.4661, "step": 1020 }, { - "epoch": 0.15018315018315018, - "grad_norm": 0.5571714281369051, - "learning_rate": 1.0009765625e-05, - "loss": 0.4854, + "epoch": 0.15089062269983808, + "grad_norm": 0.5282738769028222, + "learning_rate": 1.0058881256133465e-05, + "loss": 0.4872, "step": 1025 }, { - "epoch": 0.1509157509157509, - "grad_norm": 0.6193668370642765, - "learning_rate": 1.005859375e-05, - "loss": 0.4803, + "epoch": 0.15162667451788606, + "grad_norm": 0.6513678207446305, + "learning_rate": 1.0107948969578018e-05, + "loss": 0.4664, "step": 1030 }, { - "epoch": 0.15164835164835164, - "grad_norm": 0.5383083934721935, - "learning_rate": 1.0107421875000002e-05, - "loss": 0.4604, + "epoch": 0.15236272633593406, + "grad_norm": 0.5162653018671886, + "learning_rate": 1.0157016683022571e-05, + "loss": 0.458, "step": 1035 }, { - "epoch": 0.1523809523809524, - "grad_norm": 0.5663562446139142, - "learning_rate": 1.0156250000000001e-05, - "loss": 0.4758, + "epoch": 0.15309877815398204, + "grad_norm": 0.5153888899075103, + "learning_rate": 1.0206084396467127e-05, + "loss": 0.4868, "step": 1040 }, { - "epoch": 0.15311355311355312, - "grad_norm": 0.5668737710162076, - "learning_rate": 1.0205078125000001e-05, - "loss": 0.4543, + "epoch": 0.15383482997203002, + "grad_norm": 0.5622003345509735, + "learning_rate": 1.025515210991168e-05, + "loss": 0.4898, "step": 1045 }, { - "epoch": 0.15384615384615385, - "grad_norm": 0.5453268015424697, - "learning_rate": 1.025390625e-05, - "loss": 0.4595, + "epoch": 0.15457088179007802, + "grad_norm": 0.5053927687579358, + "learning_rate": 1.0304219823356232e-05, + "loss": 0.4604, "step": 1050 }, { - "epoch": 0.15457875457875458, - "grad_norm": 0.5671479089435382, - "learning_rate": 1.0302734375e-05, - "loss": 0.4682, + "epoch": 0.155306933608126, + "grad_norm": 0.510564493666499, + "learning_rate": 1.0353287536800787e-05, + "loss": 0.4585, "step": 1055 }, { - "epoch": 0.15531135531135531, - "grad_norm": 0.559268834190801, - "learning_rate": 1.03515625e-05, - "loss": 0.4856, + "epoch": 0.156042985426174, + "grad_norm": 0.5342815365135547, + "learning_rate": 1.040235525024534e-05, + "loss": 0.4658, "step": 1060 }, { - "epoch": 0.15604395604395604, - "grad_norm": 0.5276944813849548, - "learning_rate": 1.0400390625000001e-05, - "loss": 0.4461, + "epoch": 0.15677903724422199, + "grad_norm": 0.5303796569780999, + "learning_rate": 1.0451422963689892e-05, + "loss": 0.468, "step": 1065 }, { - "epoch": 0.15677655677655677, - "grad_norm": 0.5155281991905493, - "learning_rate": 1.0449218750000001e-05, - "loss": 0.4772, + "epoch": 0.15751508906227, + "grad_norm": 0.5435896518221282, + "learning_rate": 1.0500490677134447e-05, + "loss": 0.4664, "step": 1070 }, { - "epoch": 0.1575091575091575, - "grad_norm": 0.5184923436594232, - "learning_rate": 1.0498046875000001e-05, - "loss": 0.4556, + "epoch": 0.15825114088031797, + "grad_norm": 0.5127641089890831, + "learning_rate": 1.0549558390579e-05, + "loss": 0.4613, "step": 1075 }, { - "epoch": 0.15824175824175823, - "grad_norm": 0.5029598122850704, - "learning_rate": 1.0546875e-05, - "loss": 0.4613, + "epoch": 0.15898719269836598, + "grad_norm": 0.580969232971526, + "learning_rate": 1.0598626104023552e-05, + "loss": 0.4552, "step": 1080 }, { - "epoch": 0.15897435897435896, - "grad_norm": 0.5286665458480232, - "learning_rate": 1.0595703125e-05, - "loss": 0.4559, + "epoch": 0.15972324451641395, + "grad_norm": 0.49770589445853974, + "learning_rate": 1.0647693817468108e-05, + "loss": 0.4667, "step": 1085 }, { - "epoch": 0.1597069597069597, - "grad_norm": 0.5047397009957366, - "learning_rate": 1.0644531250000002e-05, - "loss": 0.4586, + "epoch": 0.16045929633446196, + "grad_norm": 0.5434030253983654, + "learning_rate": 1.0696761530912661e-05, + "loss": 0.4681, "step": 1090 }, { - "epoch": 0.16043956043956045, - "grad_norm": 0.7228349992269166, - "learning_rate": 1.0693359375000001e-05, - "loss": 0.4678, + "epoch": 0.16119534815250994, + "grad_norm": 0.5222809668848786, + "learning_rate": 1.0745829244357214e-05, + "loss": 0.4753, "step": 1095 }, { - "epoch": 0.16117216117216118, - "grad_norm": 0.5229916349907864, - "learning_rate": 1.0742187500000001e-05, - "loss": 0.4904, + "epoch": 0.16193139997055792, + "grad_norm": 0.5224958676817909, + "learning_rate": 1.0794896957801768e-05, + "loss": 0.4828, "step": 1100 }, { - "epoch": 0.1619047619047619, - "grad_norm": 0.5036353858856911, - "learning_rate": 1.0791015625e-05, - "loss": 0.4688, + "epoch": 0.16266745178860592, + "grad_norm": 0.5087197860162646, + "learning_rate": 1.084396467124632e-05, + "loss": 0.465, "step": 1105 }, { - "epoch": 0.16263736263736264, - "grad_norm": 0.5122903895008505, - "learning_rate": 1.083984375e-05, - "loss": 0.4757, + "epoch": 0.1634035036066539, + "grad_norm": 0.5258709460092567, + "learning_rate": 1.0893032384690873e-05, + "loss": 0.478, "step": 1110 }, { - "epoch": 0.16336996336996337, - "grad_norm": 0.497493881419283, - "learning_rate": 1.0888671875e-05, - "loss": 0.4535, + "epoch": 0.1641395554247019, + "grad_norm": 0.5013511959010691, + "learning_rate": 1.0942100098135428e-05, + "loss": 0.462, "step": 1115 }, { - "epoch": 0.1641025641025641, - "grad_norm": 0.5287816022219953, - "learning_rate": 1.0937500000000002e-05, - "loss": 0.4546, + "epoch": 0.16487560724274988, + "grad_norm": 0.523215723727446, + "learning_rate": 1.099116781157998e-05, + "loss": 0.4622, "step": 1120 }, { - "epoch": 0.16483516483516483, - "grad_norm": 0.5393853431608534, - "learning_rate": 1.0986328125000001e-05, - "loss": 0.4613, + "epoch": 0.1656116590607979, + "grad_norm": 0.4993724186877467, + "learning_rate": 1.1040235525024535e-05, + "loss": 0.4878, "step": 1125 }, { - "epoch": 0.16556776556776556, - "grad_norm": 0.5175948331294432, - "learning_rate": 1.1035156250000001e-05, - "loss": 0.4819, + "epoch": 0.16634771087884587, + "grad_norm": 0.5149066945719283, + "learning_rate": 1.108930323846909e-05, + "loss": 0.4719, "step": 1130 }, { - "epoch": 0.1663003663003663, - "grad_norm": 0.5202860471821462, - "learning_rate": 1.1083984375e-05, - "loss": 0.4674, + "epoch": 0.16708376269689387, + "grad_norm": 0.5324143493051305, + "learning_rate": 1.1138370951913642e-05, + "loss": 0.4589, "step": 1135 }, { - "epoch": 0.16703296703296702, - "grad_norm": 0.7149468935941481, - "learning_rate": 1.11328125e-05, - "loss": 0.4721, + "epoch": 0.16781981451494185, + "grad_norm": 0.5104651877601035, + "learning_rate": 1.1187438665358195e-05, + "loss": 0.4578, "step": 1140 }, { - "epoch": 0.16776556776556775, - "grad_norm": 0.5208583785619997, - "learning_rate": 1.1181640625e-05, - "loss": 0.4848, + "epoch": 0.16855586633298983, + "grad_norm": 0.5136825933725584, + "learning_rate": 1.123650637880275e-05, + "loss": 0.4602, "step": 1145 }, { - "epoch": 0.1684981684981685, - "grad_norm": 0.5240976003205658, - "learning_rate": 1.1230468750000002e-05, - "loss": 0.4805, + "epoch": 0.16929191815103783, + "grad_norm": 0.5932405914639006, + "learning_rate": 1.1285574092247302e-05, + "loss": 0.4806, "step": 1150 }, { - "epoch": 0.16923076923076924, - "grad_norm": 0.6131879110902204, - "learning_rate": 1.1279296875000001e-05, - "loss": 0.4747, + "epoch": 0.1700279699690858, + "grad_norm": 0.4987604697030047, + "learning_rate": 1.1334641805691855e-05, + "loss": 0.4708, "step": 1155 }, { - "epoch": 0.16996336996336997, - "grad_norm": 0.5194993444884493, - "learning_rate": 1.1328125000000001e-05, - "loss": 0.4723, + "epoch": 0.17076402178713382, + "grad_norm": 0.5120281390736848, + "learning_rate": 1.138370951913641e-05, + "loss": 0.4747, "step": 1160 }, { - "epoch": 0.1706959706959707, - "grad_norm": 0.5366643091240741, - "learning_rate": 1.1376953125e-05, - "loss": 0.4599, + "epoch": 0.1715000736051818, + "grad_norm": 0.5160816666730633, + "learning_rate": 1.1432777232580963e-05, + "loss": 0.4636, "step": 1165 }, { - "epoch": 0.17142857142857143, - "grad_norm": 0.5307151335800977, - "learning_rate": 1.142578125e-05, - "loss": 0.4764, + "epoch": 0.1722361254232298, + "grad_norm": 0.5050377908699278, + "learning_rate": 1.1481844946025516e-05, + "loss": 0.4571, "step": 1170 }, { - "epoch": 0.17216117216117216, - "grad_norm": 0.5404046903160628, - "learning_rate": 1.1474609375000002e-05, - "loss": 0.4746, + "epoch": 0.17297217724127778, + "grad_norm": 0.5056895497995904, + "learning_rate": 1.153091265947007e-05, + "loss": 0.4632, "step": 1175 }, { - "epoch": 0.1728937728937729, - "grad_norm": 0.5051709515526519, - "learning_rate": 1.1523437500000001e-05, - "loss": 0.4521, + "epoch": 0.17370822905932579, + "grad_norm": 0.5335108204710897, + "learning_rate": 1.1579980372914623e-05, + "loss": 0.4746, "step": 1180 }, { - "epoch": 0.17362637362637362, - "grad_norm": 0.5868257177183442, - "learning_rate": 1.1572265625000001e-05, - "loss": 0.4842, + "epoch": 0.17444428087737376, + "grad_norm": 0.516518365889526, + "learning_rate": 1.1629048086359176e-05, + "loss": 0.457, "step": 1185 }, { - "epoch": 0.17435897435897435, - "grad_norm": 0.6728971850067752, - "learning_rate": 1.162109375e-05, - "loss": 0.4532, + "epoch": 0.17518033269542177, + "grad_norm": 0.5052625349340393, + "learning_rate": 1.167811579980373e-05, + "loss": 0.4698, "step": 1190 }, { - "epoch": 0.17509157509157508, - "grad_norm": 0.5583627103602447, - "learning_rate": 1.1669921875e-05, - "loss": 0.4647, + "epoch": 0.17591638451346975, + "grad_norm": 0.5451467203953263, + "learning_rate": 1.1727183513248283e-05, + "loss": 0.4629, "step": 1195 }, { - "epoch": 0.17582417582417584, - "grad_norm": 0.5167154137386244, - "learning_rate": 1.171875e-05, - "loss": 0.4612, + "epoch": 0.17665243633151773, + "grad_norm": 0.5335149299750542, + "learning_rate": 1.1776251226692836e-05, + "loss": 0.4553, "step": 1200 }, { - "epoch": 0.17655677655677657, - "grad_norm": 0.4849028929167034, - "learning_rate": 1.1767578125000002e-05, - "loss": 0.467, + "epoch": 0.17738848814956573, + "grad_norm": 0.5652693128753509, + "learning_rate": 1.1825318940137392e-05, + "loss": 0.4659, "step": 1205 }, { - "epoch": 0.1772893772893773, - "grad_norm": 0.49902210105050515, - "learning_rate": 1.1816406250000001e-05, - "loss": 0.4617, + "epoch": 0.1781245399676137, + "grad_norm": 0.5068344293397069, + "learning_rate": 1.1874386653581945e-05, + "loss": 0.4499, "step": 1210 }, { - "epoch": 0.17802197802197803, - "grad_norm": 0.4823129924518987, - "learning_rate": 1.1865234375000001e-05, - "loss": 0.4626, + "epoch": 0.17886059178566172, + "grad_norm": 0.48440022275092465, + "learning_rate": 1.1923454367026497e-05, + "loss": 0.4591, "step": 1215 }, { - "epoch": 0.17875457875457876, - "grad_norm": 0.5300239765467367, - "learning_rate": 1.19140625e-05, - "loss": 0.4659, + "epoch": 0.1795966436037097, + "grad_norm": 0.8941425323645812, + "learning_rate": 1.1972522080471052e-05, + "loss": 0.4764, "step": 1220 }, { - "epoch": 0.1794871794871795, - "grad_norm": 0.6088645484486718, - "learning_rate": 1.1962890625e-05, - "loss": 0.4694, + "epoch": 0.1803326954217577, + "grad_norm": 0.5367647624912888, + "learning_rate": 1.2021589793915604e-05, + "loss": 0.4443, "step": 1225 }, { - "epoch": 0.18021978021978022, - "grad_norm": 0.5455240800251795, - "learning_rate": 1.201171875e-05, - "loss": 0.4845, + "epoch": 0.18106874723980568, + "grad_norm": 0.5514766921493407, + "learning_rate": 1.2070657507360157e-05, + "loss": 0.4774, "step": 1230 }, { - "epoch": 0.18095238095238095, - "grad_norm": 0.5424304480123273, - "learning_rate": 1.2060546875000002e-05, - "loss": 0.4668, + "epoch": 0.18180479905785368, + "grad_norm": 0.5388082425030042, + "learning_rate": 1.2119725220804712e-05, + "loss": 0.4594, "step": 1235 }, { - "epoch": 0.18168498168498168, - "grad_norm": 0.5203368741702424, - "learning_rate": 1.2109375000000001e-05, - "loss": 0.4862, + "epoch": 0.18254085087590166, + "grad_norm": 0.5068247119187569, + "learning_rate": 1.2168792934249264e-05, + "loss": 0.4802, "step": 1240 }, { - "epoch": 0.1824175824175824, - "grad_norm": 0.6584898777407595, - "learning_rate": 1.2158203125000001e-05, - "loss": 0.4531, + "epoch": 0.18327690269394967, + "grad_norm": 0.5227838084909681, + "learning_rate": 1.2217860647693817e-05, + "loss": 0.4691, "step": 1245 }, { - "epoch": 0.18315018315018314, - "grad_norm": 0.5327149440973011, - "learning_rate": 1.220703125e-05, - "loss": 0.4563, + "epoch": 0.18401295451199765, + "grad_norm": 0.4938061071127395, + "learning_rate": 1.2266928361138373e-05, + "loss": 0.4746, "step": 1250 }, { - "epoch": 0.1838827838827839, - "grad_norm": 0.5277598765377318, - "learning_rate": 1.2255859375e-05, - "loss": 0.4699, + "epoch": 0.18474900633004562, + "grad_norm": 0.5238294647370162, + "learning_rate": 1.2315996074582926e-05, + "loss": 0.4668, "step": 1255 }, { - "epoch": 0.18461538461538463, - "grad_norm": 0.5167807191476441, - "learning_rate": 1.2304687500000002e-05, - "loss": 0.4652, + "epoch": 0.18548505814809363, + "grad_norm": 0.46614497230235846, + "learning_rate": 1.2365063788027479e-05, + "loss": 0.4612, "step": 1260 }, { - "epoch": 0.18534798534798536, - "grad_norm": 0.4808989318720859, - "learning_rate": 1.2353515625000001e-05, - "loss": 0.4825, + "epoch": 0.1862211099661416, + "grad_norm": 0.4985552878100185, + "learning_rate": 1.2414131501472033e-05, + "loss": 0.4817, "step": 1265 }, { - "epoch": 0.18608058608058609, - "grad_norm": 0.5097882629770788, - "learning_rate": 1.2402343750000001e-05, - "loss": 0.4759, + "epoch": 0.1869571617841896, + "grad_norm": 0.5156607573559511, + "learning_rate": 1.2463199214916586e-05, + "loss": 0.4766, "step": 1270 }, { - "epoch": 0.18681318681318682, - "grad_norm": 0.5456506028065036, - "learning_rate": 1.2451171875000001e-05, - "loss": 0.4788, + "epoch": 0.1876932136022376, + "grad_norm": 0.4894863185953375, + "learning_rate": 1.2512266928361138e-05, + "loss": 0.4651, "step": 1275 }, { - "epoch": 0.18754578754578755, - "grad_norm": 0.5107241981918527, - "learning_rate": 1.25e-05, - "loss": 0.483, + "epoch": 0.1884292654202856, + "grad_norm": 0.5231954839271257, + "learning_rate": 1.2561334641805694e-05, + "loss": 0.4826, "step": 1280 }, { - "epoch": 0.18827838827838828, - "grad_norm": 0.49376810757312745, - "learning_rate": 1.2548828125e-05, - "loss": 0.4593, + "epoch": 0.18916531723833357, + "grad_norm": 0.49809529774347094, + "learning_rate": 1.2610402355250247e-05, + "loss": 0.464, "step": 1285 }, { - "epoch": 0.189010989010989, - "grad_norm": 0.5133797804113845, - "learning_rate": 1.2597656250000002e-05, - "loss": 0.4631, + "epoch": 0.18990136905638158, + "grad_norm": 0.5234554810941109, + "learning_rate": 1.26594700686948e-05, + "loss": 0.4733, "step": 1290 }, { - "epoch": 0.18974358974358974, - "grad_norm": 0.5008331295590094, - "learning_rate": 1.2646484375000001e-05, - "loss": 0.4802, + "epoch": 0.19063742087442956, + "grad_norm": 0.5051586556741555, + "learning_rate": 1.2708537782139354e-05, + "loss": 0.4738, "step": 1295 }, { - "epoch": 0.19047619047619047, - "grad_norm": 0.5675292443089186, - "learning_rate": 1.2695312500000001e-05, - "loss": 0.4888, + "epoch": 0.19137347269247756, + "grad_norm": 0.49517034893999295, + "learning_rate": 1.2757605495583907e-05, + "loss": 0.47, "step": 1300 }, { - "epoch": 0.1912087912087912, - "grad_norm": 0.5005818050999626, - "learning_rate": 1.2744140625e-05, - "loss": 0.4689, + "epoch": 0.19210952451052554, + "grad_norm": 0.5915470507381195, + "learning_rate": 1.280667320902846e-05, + "loss": 0.4781, "step": 1305 }, { - "epoch": 0.19194139194139195, - "grad_norm": 0.5283882256001375, - "learning_rate": 1.279296875e-05, - "loss": 0.4465, + "epoch": 0.19284557632857352, + "grad_norm": 0.508338735620736, + "learning_rate": 1.2855740922473014e-05, + "loss": 0.4707, "step": 1310 }, { - "epoch": 0.19267399267399268, - "grad_norm": 0.5193448027983114, - "learning_rate": 1.2841796875e-05, - "loss": 0.4678, + "epoch": 0.19358162814662153, + "grad_norm": 0.4990887547861896, + "learning_rate": 1.2904808635917567e-05, + "loss": 0.4835, "step": 1315 }, { - "epoch": 0.1934065934065934, - "grad_norm": 0.6159454505341463, - "learning_rate": 1.2890625000000002e-05, - "loss": 0.4606, + "epoch": 0.1943176799646695, + "grad_norm": 0.48691875103080917, + "learning_rate": 1.295387634936212e-05, + "loss": 0.4674, "step": 1320 }, { - "epoch": 0.19413919413919414, - "grad_norm": 0.5086536030516351, - "learning_rate": 1.2939453125000001e-05, - "loss": 0.4589, + "epoch": 0.1950537317827175, + "grad_norm": 0.4841195176880159, + "learning_rate": 1.3002944062806676e-05, + "loss": 0.4729, "step": 1325 }, { - "epoch": 0.19487179487179487, - "grad_norm": 0.48860047105264637, - "learning_rate": 1.2988281250000001e-05, - "loss": 0.4541, + "epoch": 0.1957897836007655, + "grad_norm": 0.4992178518108465, + "learning_rate": 1.3052011776251228e-05, + "loss": 0.4688, "step": 1330 }, { - "epoch": 0.1956043956043956, - "grad_norm": 0.46316718079861624, - "learning_rate": 1.3037109375e-05, - "loss": 0.4544, + "epoch": 0.1965258354188135, + "grad_norm": 0.47943066480788776, + "learning_rate": 1.3101079489695781e-05, + "loss": 0.4699, "step": 1335 }, { - "epoch": 0.19633699633699633, - "grad_norm": 0.49587456775643657, - "learning_rate": 1.30859375e-05, - "loss": 0.4623, + "epoch": 0.19726188723686147, + "grad_norm": 0.5012063847835244, + "learning_rate": 1.3150147203140335e-05, + "loss": 0.4661, "step": 1340 }, { - "epoch": 0.19706959706959706, - "grad_norm": 0.501454631805098, - "learning_rate": 1.3134765625000002e-05, - "loss": 0.4507, + "epoch": 0.19799793905490948, + "grad_norm": 0.535826158477013, + "learning_rate": 1.3199214916584888e-05, + "loss": 0.4706, "step": 1345 }, { - "epoch": 0.1978021978021978, - "grad_norm": 0.5000449976017295, - "learning_rate": 1.3183593750000002e-05, - "loss": 0.4512, + "epoch": 0.19873399087295746, + "grad_norm": 0.5071529638069743, + "learning_rate": 1.3248282630029441e-05, + "loss": 0.4625, "step": 1350 }, { - "epoch": 0.19853479853479852, - "grad_norm": 0.4839379008548153, - "learning_rate": 1.3232421875000001e-05, - "loss": 0.4862, + "epoch": 0.19947004269100543, + "grad_norm": 0.5179383619569404, + "learning_rate": 1.3297350343473995e-05, + "loss": 0.4759, "step": 1355 }, { - "epoch": 0.19926739926739928, - "grad_norm": 0.5208252130809964, - "learning_rate": 1.3281250000000001e-05, - "loss": 0.4676, + "epoch": 0.20020609450905344, + "grad_norm": 0.48405439133748607, + "learning_rate": 1.3346418056918548e-05, + "loss": 0.4761, "step": 1360 }, { - "epoch": 0.2, - "grad_norm": 0.5745959281687488, - "learning_rate": 1.3330078125e-05, - "loss": 0.4762, + "epoch": 0.20094214632710142, + "grad_norm": 0.5154094066647193, + "learning_rate": 1.33954857703631e-05, + "loss": 0.478, "step": 1365 }, { - "epoch": 0.20073260073260074, - "grad_norm": 0.6043469530792722, - "learning_rate": 1.337890625e-05, - "loss": 0.4704, + "epoch": 0.20167819814514942, + "grad_norm": 0.5039502624189186, + "learning_rate": 1.3444553483807657e-05, + "loss": 0.4465, "step": 1370 }, { - "epoch": 0.20146520146520147, - "grad_norm": 0.5334372627977081, - "learning_rate": 1.3427734375000002e-05, - "loss": 0.4716, + "epoch": 0.2024142499631974, + "grad_norm": 0.5107787451227336, + "learning_rate": 1.349362119725221e-05, + "loss": 0.4544, "step": 1375 }, { - "epoch": 0.2021978021978022, - "grad_norm": 0.5390143553108346, - "learning_rate": 1.3476562500000001e-05, - "loss": 0.4735, + "epoch": 0.2031503017812454, + "grad_norm": 0.5182898230453957, + "learning_rate": 1.3542688910696762e-05, + "loss": 0.4776, "step": 1380 }, { - "epoch": 0.20293040293040293, - "grad_norm": 0.4736959203098974, - "learning_rate": 1.3525390625000001e-05, - "loss": 0.4774, + "epoch": 0.20388635359929339, + "grad_norm": 0.5025863501160254, + "learning_rate": 1.3591756624141317e-05, + "loss": 0.4681, "step": 1385 }, { - "epoch": 0.20366300366300366, - "grad_norm": 0.49280583683920803, - "learning_rate": 1.3574218750000001e-05, - "loss": 0.4652, + "epoch": 0.2046224054173414, + "grad_norm": 0.4894363132630262, + "learning_rate": 1.364082433758587e-05, + "loss": 0.4675, "step": 1390 }, { - "epoch": 0.2043956043956044, - "grad_norm": 0.525459455539279, - "learning_rate": 1.3623046875e-05, - "loss": 0.4822, + "epoch": 0.20535845723538937, + "grad_norm": 0.5433477313290025, + "learning_rate": 1.3689892051030422e-05, + "loss": 0.4811, "step": 1395 }, { - "epoch": 0.20512820512820512, - "grad_norm": 0.48689775993494483, - "learning_rate": 1.3671875e-05, - "loss": 0.4719, + "epoch": 0.20609450905343737, + "grad_norm": 0.4967075561653421, + "learning_rate": 1.3738959764474977e-05, + "loss": 0.4458, "step": 1400 }, { - "epoch": 0.20586080586080585, - "grad_norm": 0.5212181067359885, - "learning_rate": 1.3720703125000002e-05, - "loss": 0.4709, + "epoch": 0.20683056087148535, + "grad_norm": 0.5244064638537597, + "learning_rate": 1.378802747791953e-05, + "loss": 0.469, "step": 1405 }, { - "epoch": 0.20659340659340658, - "grad_norm": 0.508003690634916, - "learning_rate": 1.3769531250000001e-05, - "loss": 0.4737, + "epoch": 0.20756661268953333, + "grad_norm": 0.5204337598099966, + "learning_rate": 1.3837095191364082e-05, + "loss": 0.4758, "step": 1410 }, { - "epoch": 0.20732600732600734, - "grad_norm": 0.5073601130842522, - "learning_rate": 1.3818359375000001e-05, - "loss": 0.4799, + "epoch": 0.20830266450758134, + "grad_norm": 0.5080676476650419, + "learning_rate": 1.3886162904808638e-05, + "loss": 0.473, "step": 1415 }, { - "epoch": 0.20805860805860807, - "grad_norm": 0.5355556977525563, - "learning_rate": 1.38671875e-05, - "loss": 0.474, + "epoch": 0.20903871632562931, + "grad_norm": 0.47828452516531716, + "learning_rate": 1.393523061825319e-05, + "loss": 0.4816, "step": 1420 }, { - "epoch": 0.2087912087912088, - "grad_norm": 0.4990582643295359, - "learning_rate": 1.3916015625e-05, - "loss": 0.4854, + "epoch": 0.20977476814367732, + "grad_norm": 0.48532948614032556, + "learning_rate": 1.3984298331697743e-05, + "loss": 0.4714, "step": 1425 }, { - "epoch": 0.20952380952380953, - "grad_norm": 0.5093226857332248, - "learning_rate": 1.3964843750000002e-05, - "loss": 0.4555, + "epoch": 0.2105108199617253, + "grad_norm": 0.5086062877140418, + "learning_rate": 1.4033366045142298e-05, + "loss": 0.4667, "step": 1430 }, { - "epoch": 0.21025641025641026, - "grad_norm": 0.502584313631928, - "learning_rate": 1.4013671875000002e-05, - "loss": 0.4732, + "epoch": 0.2112468717797733, + "grad_norm": 0.5050717781580496, + "learning_rate": 1.408243375858685e-05, + "loss": 0.478, "step": 1435 }, { - "epoch": 0.210989010989011, - "grad_norm": 0.526309212370889, - "learning_rate": 1.4062500000000001e-05, - "loss": 0.4728, + "epoch": 0.21198292359782128, + "grad_norm": 0.48821477274253927, + "learning_rate": 1.4131501472031403e-05, + "loss": 0.4588, "step": 1440 }, { - "epoch": 0.21172161172161172, - "grad_norm": 0.5176074132776409, - "learning_rate": 1.4111328125000001e-05, - "loss": 0.4664, + "epoch": 0.2127189754158693, + "grad_norm": 0.48266677376219413, + "learning_rate": 1.418056918547596e-05, + "loss": 0.4557, "step": 1445 }, { - "epoch": 0.21245421245421245, - "grad_norm": 0.4879365025716744, - "learning_rate": 1.416015625e-05, - "loss": 0.467, + "epoch": 0.21345502723391727, + "grad_norm": 0.5512455514452225, + "learning_rate": 1.4229636898920512e-05, + "loss": 0.4672, "step": 1450 }, { - "epoch": 0.21318681318681318, - "grad_norm": 1.9306549484896947, - "learning_rate": 1.4208984375e-05, - "loss": 0.4649, + "epoch": 0.21419107905196527, + "grad_norm": 0.5083481320196631, + "learning_rate": 1.4278704612365065e-05, + "loss": 0.4661, "step": 1455 }, { - "epoch": 0.2139194139194139, - "grad_norm": 0.4955066630748684, - "learning_rate": 1.4257812500000002e-05, - "loss": 0.4601, + "epoch": 0.21492713087001325, + "grad_norm": 0.5595410982557921, + "learning_rate": 1.432777232580962e-05, + "loss": 0.4708, "step": 1460 }, { - "epoch": 0.21465201465201464, - "grad_norm": 0.5025180598567222, - "learning_rate": 1.4306640625000002e-05, - "loss": 0.4608, + "epoch": 0.21566318268806123, + "grad_norm": 0.5224743589963625, + "learning_rate": 1.4376840039254172e-05, + "loss": 0.4759, "step": 1465 }, { - "epoch": 0.2153846153846154, - "grad_norm": 0.49793528536390486, - "learning_rate": 1.4355468750000001e-05, - "loss": 0.4799, + "epoch": 0.21639923450610923, + "grad_norm": 0.5195549182987274, + "learning_rate": 1.4425907752698725e-05, + "loss": 0.4649, "step": 1470 }, { - "epoch": 0.21611721611721613, - "grad_norm": 1.8297240222248374, - "learning_rate": 1.4404296875000001e-05, - "loss": 0.4689, + "epoch": 0.2171352863241572, + "grad_norm": 0.5320486190932664, + "learning_rate": 1.4474975466143279e-05, + "loss": 0.4527, "step": 1475 }, { - "epoch": 0.21684981684981686, - "grad_norm": 0.4705591944470831, - "learning_rate": 1.4453125e-05, - "loss": 0.4828, + "epoch": 0.21787133814220522, + "grad_norm": 0.5152466166857504, + "learning_rate": 1.4524043179587832e-05, + "loss": 0.4706, "step": 1480 }, { - "epoch": 0.2175824175824176, - "grad_norm": 0.4835356918254959, - "learning_rate": 1.4501953125e-05, - "loss": 0.4661, + "epoch": 0.2186073899602532, + "grad_norm": 0.4971011294748948, + "learning_rate": 1.4573110893032384e-05, + "loss": 0.4701, "step": 1485 }, { - "epoch": 0.21831501831501832, - "grad_norm": 0.510433847664988, - "learning_rate": 1.4550781250000002e-05, - "loss": 0.4748, + "epoch": 0.2193434417783012, + "grad_norm": 0.468694375011289, + "learning_rate": 1.462217860647694e-05, + "loss": 0.4653, "step": 1490 }, { - "epoch": 0.21904761904761905, - "grad_norm": 0.5216647874966567, - "learning_rate": 1.4599609375000001e-05, - "loss": 0.475, + "epoch": 0.22007949359634918, + "grad_norm": 0.47946916582322097, + "learning_rate": 1.4671246319921493e-05, + "loss": 0.4674, "step": 1495 }, { - "epoch": 0.21978021978021978, - "grad_norm": 0.4944348257107384, - "learning_rate": 1.4648437500000001e-05, - "loss": 0.4768, + "epoch": 0.22081554541439719, + "grad_norm": 0.5032322510285128, + "learning_rate": 1.4720314033366046e-05, + "loss": 0.4583, "step": 1500 }, { - "epoch": 0.2205128205128205, - "grad_norm": 0.4945475138346755, - "learning_rate": 1.4697265625000001e-05, - "loss": 0.4655, + "epoch": 0.22155159723244516, + "grad_norm": 0.5256011707393451, + "learning_rate": 1.47693817468106e-05, + "loss": 0.4835, "step": 1505 }, { - "epoch": 0.22124542124542124, - "grad_norm": 0.5006384611474771, - "learning_rate": 1.474609375e-05, - "loss": 0.4794, + "epoch": 0.22228764905049314, + "grad_norm": 0.5339877468693773, + "learning_rate": 1.4818449460255153e-05, + "loss": 0.4807, "step": 1510 }, { - "epoch": 0.22197802197802197, - "grad_norm": 0.5466308398968402, - "learning_rate": 1.4794921875000002e-05, - "loss": 0.4753, + "epoch": 0.22302370086854115, + "grad_norm": 0.5160851206234418, + "learning_rate": 1.4867517173699706e-05, + "loss": 0.4785, "step": 1515 }, { - "epoch": 0.22271062271062272, - "grad_norm": 0.5348130721491957, - "learning_rate": 1.4843750000000002e-05, - "loss": 0.4655, + "epoch": 0.22375975268658913, + "grad_norm": 0.47860088752170427, + "learning_rate": 1.491658488714426e-05, + "loss": 0.4794, "step": 1520 }, { - "epoch": 0.22344322344322345, - "grad_norm": 0.48173601863057236, - "learning_rate": 1.4892578125000001e-05, - "loss": 0.471, + "epoch": 0.22449580450463713, + "grad_norm": 0.5243776797101112, + "learning_rate": 1.4965652600588813e-05, + "loss": 0.4767, "step": 1525 }, { - "epoch": 0.22417582417582418, - "grad_norm": 0.5217530092063394, - "learning_rate": 1.4941406250000001e-05, - "loss": 0.4598, + "epoch": 0.2252318563226851, + "grad_norm": 0.5179508095461343, + "learning_rate": 1.5014720314033366e-05, + "loss": 0.4847, "step": 1530 }, { - "epoch": 0.22490842490842491, - "grad_norm": 0.4802076358174074, - "learning_rate": 1.4990234375e-05, - "loss": 0.4695, + "epoch": 0.22596790814073311, + "grad_norm": 0.5001243630855052, + "learning_rate": 1.5063788027477922e-05, + "loss": 0.4675, "step": 1535 }, { - "epoch": 0.22564102564102564, - "grad_norm": 0.49840080694366656, - "learning_rate": 1.50390625e-05, - "loss": 0.4668, + "epoch": 0.2267039599587811, + "grad_norm": 0.48838181870103403, + "learning_rate": 1.5112855740922475e-05, + "loss": 0.4765, "step": 1540 }, { - "epoch": 0.22637362637362637, - "grad_norm": 0.48517238694140485, - "learning_rate": 1.5087890625000002e-05, - "loss": 0.4545, + "epoch": 0.2274400117768291, + "grad_norm": 0.5093776483261092, + "learning_rate": 1.5161923454367027e-05, + "loss": 0.4804, "step": 1545 }, { - "epoch": 0.2271062271062271, - "grad_norm": 0.5112587584402484, - "learning_rate": 1.5136718750000002e-05, - "loss": 0.4818, + "epoch": 0.22817606359487708, + "grad_norm": 0.49757765788276226, + "learning_rate": 1.5210991167811582e-05, + "loss": 0.4602, "step": 1550 }, { - "epoch": 0.22783882783882783, - "grad_norm": 0.4622437817284304, - "learning_rate": 1.5185546875000001e-05, - "loss": 0.4778, + "epoch": 0.22891211541292508, + "grad_norm": 0.6042817011562104, + "learning_rate": 1.5260058881256136e-05, + "loss": 0.4679, "step": 1555 }, { - "epoch": 0.22857142857142856, - "grad_norm": 0.4998516140224748, - "learning_rate": 1.5234375000000001e-05, - "loss": 0.4523, + "epoch": 0.22964816723097306, + "grad_norm": 0.5333395009103001, + "learning_rate": 1.5309126594700687e-05, + "loss": 0.4774, "step": 1560 }, { - "epoch": 0.2293040293040293, - "grad_norm": 0.5313317202642523, - "learning_rate": 1.5283203125e-05, - "loss": 0.4691, + "epoch": 0.23038421904902104, + "grad_norm": 0.4880655438801243, + "learning_rate": 1.535819430814524e-05, + "loss": 0.4656, "step": 1565 }, { - "epoch": 0.23003663003663002, - "grad_norm": 0.5042407860552259, - "learning_rate": 1.5332031250000002e-05, - "loss": 0.4845, + "epoch": 0.23112027086706904, + "grad_norm": 0.49098355944447114, + "learning_rate": 1.5407262021589796e-05, + "loss": 0.4675, "step": 1570 }, { - "epoch": 0.23076923076923078, - "grad_norm": 0.49760097793140207, - "learning_rate": 1.5380859375e-05, - "loss": 0.4682, + "epoch": 0.23185632268511702, + "grad_norm": 0.5067158803640502, + "learning_rate": 1.5456329735034347e-05, + "loss": 0.4609, "step": 1575 }, { - "epoch": 0.2315018315018315, - "grad_norm": 0.4826040802476265, - "learning_rate": 1.54296875e-05, - "loss": 0.4577, + "epoch": 0.23259237450316503, + "grad_norm": 0.512616066005067, + "learning_rate": 1.55053974484789e-05, + "loss": 0.4768, "step": 1580 }, { - "epoch": 0.23223443223443224, - "grad_norm": 0.4785399152650899, - "learning_rate": 1.5478515625000003e-05, - "loss": 0.4667, + "epoch": 0.233328426321213, + "grad_norm": 0.7154629590386294, + "learning_rate": 1.5554465161923456e-05, + "loss": 0.4737, "step": 1585 }, { - "epoch": 0.23296703296703297, - "grad_norm": 0.5071525975487288, - "learning_rate": 1.552734375e-05, - "loss": 0.448, + "epoch": 0.234064478139261, + "grad_norm": 0.47648906066367264, + "learning_rate": 1.5603532875368007e-05, + "loss": 0.4498, "step": 1590 }, { - "epoch": 0.2336996336996337, - "grad_norm": 0.4765305380475655, - "learning_rate": 1.5576171875000002e-05, - "loss": 0.4486, + "epoch": 0.234800529957309, + "grad_norm": 0.4982638347719357, + "learning_rate": 1.5652600588812565e-05, + "loss": 0.4487, "step": 1595 }, { - "epoch": 0.23443223443223443, - "grad_norm": 0.506737028891706, - "learning_rate": 1.5625e-05, - "loss": 0.4594, + "epoch": 0.235536581775357, + "grad_norm": 0.5214363004304297, + "learning_rate": 1.5701668302257116e-05, + "loss": 0.4567, "step": 1600 }, { - "epoch": 0.23516483516483516, - "grad_norm": 0.5033365268409765, - "learning_rate": 1.5673828125000002e-05, - "loss": 0.4857, + "epoch": 0.23627263359340497, + "grad_norm": 0.5281970704860975, + "learning_rate": 1.575073601570167e-05, + "loss": 0.4871, "step": 1605 }, { - "epoch": 0.2358974358974359, - "grad_norm": 0.5533174142914994, - "learning_rate": 1.572265625e-05, - "loss": 0.4713, + "epoch": 0.23700868541145298, + "grad_norm": 0.47438263533599084, + "learning_rate": 1.5799803729146224e-05, + "loss": 0.4744, "step": 1610 }, { - "epoch": 0.23663003663003662, - "grad_norm": 0.49950173686881316, - "learning_rate": 1.5771484375e-05, - "loss": 0.4835, + "epoch": 0.23774473722950096, + "grad_norm": 0.49674603424275676, + "learning_rate": 1.5848871442590775e-05, + "loss": 0.4779, "step": 1615 }, { - "epoch": 0.23736263736263735, - "grad_norm": 0.477522695620854, - "learning_rate": 1.5820312500000003e-05, - "loss": 0.5004, + "epoch": 0.23848078904754894, + "grad_norm": 0.5051623027599355, + "learning_rate": 1.589793915603533e-05, + "loss": 0.4652, "step": 1620 }, { - "epoch": 0.23809523809523808, - "grad_norm": 0.4596092750042302, - "learning_rate": 1.5869140625e-05, - "loss": 0.4735, + "epoch": 0.23921684086559694, + "grad_norm": 0.5229696639451223, + "learning_rate": 1.5947006869479884e-05, + "loss": 0.4788, "step": 1625 }, { - "epoch": 0.23882783882783884, - "grad_norm": 0.4864862846797984, - "learning_rate": 1.5917968750000002e-05, - "loss": 0.48, + "epoch": 0.23995289268364492, + "grad_norm": 0.5036740790381153, + "learning_rate": 1.5996074582924435e-05, + "loss": 0.4763, "step": 1630 }, { - "epoch": 0.23956043956043957, - "grad_norm": 0.47289872856899984, - "learning_rate": 1.5966796875e-05, - "loss": 0.4747, + "epoch": 0.24068894450169293, + "grad_norm": 0.5035354462935039, + "learning_rate": 1.604514229636899e-05, + "loss": 0.4678, "step": 1635 }, { - "epoch": 0.2402930402930403, - "grad_norm": 0.46942665161369906, - "learning_rate": 1.6015625e-05, - "loss": 0.4725, + "epoch": 0.2414249963197409, + "grad_norm": 0.4900212380645429, + "learning_rate": 1.6094210009813544e-05, + "loss": 0.4696, "step": 1640 }, { - "epoch": 0.24102564102564103, - "grad_norm": 0.4781748865843972, - "learning_rate": 1.6064453125000003e-05, - "loss": 0.4726, + "epoch": 0.2421610481377889, + "grad_norm": 0.5150211975147684, + "learning_rate": 1.61432777232581e-05, + "loss": 0.4548, "step": 1645 }, { - "epoch": 0.24175824175824176, - "grad_norm": 0.4957955173984609, - "learning_rate": 1.611328125e-05, - "loss": 0.4618, + "epoch": 0.2428970999558369, + "grad_norm": 0.4981942201415043, + "learning_rate": 1.619234543670265e-05, + "loss": 0.4836, "step": 1650 }, { - "epoch": 0.2424908424908425, - "grad_norm": 0.4751687434528722, - "learning_rate": 1.6162109375000002e-05, - "loss": 0.4771, + "epoch": 0.2436331517738849, + "grad_norm": 0.5226568976328225, + "learning_rate": 1.6241413150147204e-05, + "loss": 0.4708, "step": 1655 }, { - "epoch": 0.24322344322344322, - "grad_norm": 0.5229761322805487, - "learning_rate": 1.62109375e-05, - "loss": 0.4827, + "epoch": 0.24436920359193287, + "grad_norm": 0.5157607231064318, + "learning_rate": 1.6290480863591758e-05, + "loss": 0.4601, "step": 1660 }, { - "epoch": 0.24395604395604395, - "grad_norm": 0.5022996752707977, - "learning_rate": 1.6259765625e-05, - "loss": 0.4686, + "epoch": 0.24510525540998085, + "grad_norm": 0.5395851933711503, + "learning_rate": 1.633954857703631e-05, + "loss": 0.4752, "step": 1665 }, { - "epoch": 0.24468864468864468, - "grad_norm": 0.4641070216865701, - "learning_rate": 1.6308593750000003e-05, - "loss": 0.4697, + "epoch": 0.24584130722802885, + "grad_norm": 0.4751495401729902, + "learning_rate": 1.6388616290480867e-05, + "loss": 0.4705, "step": 1670 }, { - "epoch": 0.2454212454212454, - "grad_norm": 0.49275154239134633, - "learning_rate": 1.6357421875e-05, - "loss": 0.474, + "epoch": 0.24657735904607683, + "grad_norm": 0.5237146203386382, + "learning_rate": 1.6437684003925418e-05, + "loss": 0.4816, "step": 1675 }, { - "epoch": 0.24615384615384617, - "grad_norm": 0.4661966887748862, - "learning_rate": 1.6406250000000002e-05, - "loss": 0.4929, + "epoch": 0.24731341086412484, + "grad_norm": 0.5033688649754753, + "learning_rate": 1.6486751717369972e-05, + "loss": 0.4712, "step": 1680 }, { - "epoch": 0.2468864468864469, - "grad_norm": 0.5012839182422457, - "learning_rate": 1.6455078125e-05, - "loss": 0.4713, + "epoch": 0.24804946268217282, + "grad_norm": 0.514702849413646, + "learning_rate": 1.6535819430814527e-05, + "loss": 0.4837, "step": 1685 }, { - "epoch": 0.24761904761904763, - "grad_norm": 0.49505852648352355, - "learning_rate": 1.6503906250000002e-05, - "loss": 0.459, + "epoch": 0.24878551450022082, + "grad_norm": 0.5023097725691634, + "learning_rate": 1.6584887144259078e-05, + "loss": 0.4738, "step": 1690 }, { - "epoch": 0.24835164835164836, - "grad_norm": 0.574214037509089, - "learning_rate": 1.6552734375e-05, - "loss": 0.4909, + "epoch": 0.2495215663182688, + "grad_norm": 0.5183562617108541, + "learning_rate": 1.6633954857703632e-05, + "loss": 0.4683, "step": 1695 }, { - "epoch": 0.2490842490842491, - "grad_norm": 0.5046382196659095, - "learning_rate": 1.66015625e-05, - "loss": 0.4499, + "epoch": 0.2502576181363168, + "grad_norm": 0.5360790864899261, + "learning_rate": 1.6683022571148187e-05, + "loss": 0.4743, "step": 1700 }, { - "epoch": 0.24981684981684982, - "grad_norm": 0.49535699131303024, - "learning_rate": 1.6650390625000003e-05, - "loss": 0.4721, + "epoch": 0.2509936699543648, + "grad_norm": 0.4907804756360193, + "learning_rate": 1.6732090284592738e-05, + "loss": 0.4676, "step": 1705 }, { - "epoch": 0.25054945054945055, - "grad_norm": 0.528858810537546, - "learning_rate": 1.669921875e-05, - "loss": 0.4783, + "epoch": 0.25172972177241276, + "grad_norm": 0.46744276329021295, + "learning_rate": 1.6781157998037292e-05, + "loss": 0.4716, "step": 1710 }, { - "epoch": 0.2512820512820513, - "grad_norm": 0.4971493188600044, - "learning_rate": 1.6748046875000002e-05, - "loss": 0.4619, + "epoch": 0.2524657735904608, + "grad_norm": 0.47718721326723956, + "learning_rate": 1.6830225711481847e-05, + "loss": 0.4871, "step": 1715 }, { - "epoch": 0.252014652014652, - "grad_norm": 0.4871153410339573, - "learning_rate": 1.6796875e-05, - "loss": 0.4639, + "epoch": 0.2532018254085088, + "grad_norm": 0.5248079760485729, + "learning_rate": 1.68792934249264e-05, + "loss": 0.4566, "step": 1720 }, { - "epoch": 0.25274725274725274, - "grad_norm": 0.4753062852302058, - "learning_rate": 1.6845703125e-05, - "loss": 0.4678, + "epoch": 0.25393787722655675, + "grad_norm": 0.49252041030933036, + "learning_rate": 1.6928361138370952e-05, + "loss": 0.4711, "step": 1725 }, { - "epoch": 0.25347985347985347, - "grad_norm": 0.47868759947721007, - "learning_rate": 1.6894531250000003e-05, - "loss": 0.4608, + "epoch": 0.25467392904460473, + "grad_norm": 0.4869784177371722, + "learning_rate": 1.6977428851815506e-05, + "loss": 0.4677, "step": 1730 }, { - "epoch": 0.2542124542124542, - "grad_norm": 0.4698689149576196, - "learning_rate": 1.6943359375e-05, - "loss": 0.4544, + "epoch": 0.2554099808626527, + "grad_norm": 0.5497380707317164, + "learning_rate": 1.702649656526006e-05, + "loss": 0.4673, "step": 1735 }, { - "epoch": 0.2549450549450549, - "grad_norm": 0.48882593711349437, - "learning_rate": 1.6992187500000002e-05, - "loss": 0.4728, + "epoch": 0.25614603268070074, + "grad_norm": 0.4677135024107484, + "learning_rate": 1.7075564278704612e-05, + "loss": 0.4561, "step": 1740 }, { - "epoch": 0.25567765567765566, - "grad_norm": 0.500011982347738, - "learning_rate": 1.7041015625e-05, - "loss": 0.4677, + "epoch": 0.2568820844987487, + "grad_norm": 0.5031203169744454, + "learning_rate": 1.7124631992149166e-05, + "loss": 0.4571, "step": 1745 }, { - "epoch": 0.2564102564102564, - "grad_norm": 0.49598465314699264, - "learning_rate": 1.708984375e-05, - "loss": 0.4772, + "epoch": 0.2576181363167967, + "grad_norm": 0.6191590969953277, + "learning_rate": 1.717369970559372e-05, + "loss": 0.4568, "step": 1750 }, { - "epoch": 0.2571428571428571, - "grad_norm": 0.4329945364963151, - "learning_rate": 1.7138671875000003e-05, - "loss": 0.4648, + "epoch": 0.2583541881348447, + "grad_norm": 0.4908953840225422, + "learning_rate": 1.722276741903827e-05, + "loss": 0.4819, "step": 1755 }, { - "epoch": 0.2578754578754579, - "grad_norm": 0.48232118635122256, - "learning_rate": 1.71875e-05, - "loss": 0.4954, + "epoch": 0.2590902399528927, + "grad_norm": 0.47019441958924985, + "learning_rate": 1.727183513248283e-05, + "loss": 0.4773, "step": 1760 }, { - "epoch": 0.25860805860805863, - "grad_norm": 0.4922967812176913, - "learning_rate": 1.7236328125000002e-05, - "loss": 0.493, + "epoch": 0.2598262917709407, + "grad_norm": 0.4745018245300634, + "learning_rate": 1.732090284592738e-05, + "loss": 0.4685, "step": 1765 }, { - "epoch": 0.25934065934065936, - "grad_norm": 0.49419218230058926, - "learning_rate": 1.728515625e-05, - "loss": 0.4778, + "epoch": 0.26056234358898867, + "grad_norm": 0.5598599406025719, + "learning_rate": 1.7369970559371935e-05, + "loss": 0.4841, "step": 1770 }, { - "epoch": 0.2600732600732601, - "grad_norm": 0.4610463309322283, - "learning_rate": 1.7333984375000002e-05, - "loss": 0.458, + "epoch": 0.26129839540703664, + "grad_norm": 0.4657021893443309, + "learning_rate": 1.741903827281649e-05, + "loss": 0.471, "step": 1775 }, { - "epoch": 0.2608058608058608, - "grad_norm": 0.6298389104020712, - "learning_rate": 1.73828125e-05, - "loss": 0.4724, + "epoch": 0.2620344472250846, + "grad_norm": 0.4731011863702337, + "learning_rate": 1.746810598626104e-05, + "loss": 0.4721, "step": 1780 }, { - "epoch": 0.26153846153846155, - "grad_norm": 0.44564141424220477, - "learning_rate": 1.7431640625e-05, - "loss": 0.4547, + "epoch": 0.26277049904313265, + "grad_norm": 0.4811101419462721, + "learning_rate": 1.7517173699705595e-05, + "loss": 0.4698, "step": 1785 }, { - "epoch": 0.2622710622710623, - "grad_norm": 0.4943282192514032, - "learning_rate": 1.7480468750000003e-05, - "loss": 0.4682, + "epoch": 0.26350655086118063, + "grad_norm": 0.493675477012419, + "learning_rate": 1.756624141315015e-05, + "loss": 0.4847, "step": 1790 }, { - "epoch": 0.263003663003663, - "grad_norm": 0.5296359933117363, - "learning_rate": 1.7529296875e-05, - "loss": 0.4668, + "epoch": 0.2642426026792286, + "grad_norm": 0.5003651025617639, + "learning_rate": 1.76153091265947e-05, + "loss": 0.4877, "step": 1795 }, { - "epoch": 0.26373626373626374, - "grad_norm": 0.47784329131477293, - "learning_rate": 1.7578125000000002e-05, - "loss": 0.4623, + "epoch": 0.2649786544972766, + "grad_norm": 0.6143919387458073, + "learning_rate": 1.7664376840039255e-05, + "loss": 0.4648, "step": 1800 }, { - "epoch": 0.2644688644688645, - "grad_norm": 0.474034871482463, - "learning_rate": 1.7626953125e-05, - "loss": 0.4637, + "epoch": 0.2657147063153246, + "grad_norm": 0.4927430508144176, + "learning_rate": 1.771344455348381e-05, + "loss": 0.4813, "step": 1805 }, { - "epoch": 0.2652014652014652, - "grad_norm": 0.46948293169273314, - "learning_rate": 1.767578125e-05, - "loss": 0.4659, + "epoch": 0.2664507581333726, + "grad_norm": 0.48103388838746014, + "learning_rate": 1.7762512266928363e-05, + "loss": 0.4674, "step": 1810 }, { - "epoch": 0.26593406593406593, - "grad_norm": 0.45953721175077844, - "learning_rate": 1.7724609375000003e-05, - "loss": 0.4866, + "epoch": 0.2671868099514206, + "grad_norm": 0.4499372204828534, + "learning_rate": 1.7811579980372914e-05, + "loss": 0.4642, "step": 1815 }, { - "epoch": 0.26666666666666666, - "grad_norm": 0.5001978170812945, - "learning_rate": 1.77734375e-05, - "loss": 0.4675, + "epoch": 0.26792286176946856, + "grad_norm": 0.4971451354459214, + "learning_rate": 1.786064769381747e-05, + "loss": 0.4616, "step": 1820 }, { - "epoch": 0.2673992673992674, - "grad_norm": 0.4749703185073931, - "learning_rate": 1.7822265625000002e-05, - "loss": 0.4985, + "epoch": 0.26865891358751653, + "grad_norm": 0.458431302762374, + "learning_rate": 1.7909715407262023e-05, + "loss": 0.4688, "step": 1825 }, { - "epoch": 0.2681318681318681, - "grad_norm": 0.49066995039881806, - "learning_rate": 1.787109375e-05, - "loss": 0.4921, + "epoch": 0.26939496540556457, + "grad_norm": 0.4969338641891289, + "learning_rate": 1.7958783120706574e-05, + "loss": 0.4754, "step": 1830 }, { - "epoch": 0.26886446886446885, - "grad_norm": 0.49405211470211396, - "learning_rate": 1.7919921875e-05, - "loss": 0.4892, + "epoch": 0.27013101722361255, + "grad_norm": 0.4600582361624074, + "learning_rate": 1.8007850834151132e-05, + "loss": 0.4709, "step": 1835 }, { - "epoch": 0.2695970695970696, - "grad_norm": 0.49291065382018007, - "learning_rate": 1.7968750000000003e-05, - "loss": 0.4742, + "epoch": 0.2708670690416605, + "grad_norm": 0.5108060982398376, + "learning_rate": 1.8056918547595683e-05, + "loss": 0.4747, "step": 1840 }, { - "epoch": 0.2703296703296703, - "grad_norm": 0.4494775993205646, - "learning_rate": 1.8017578125e-05, - "loss": 0.4536, + "epoch": 0.2716031208597085, + "grad_norm": 0.4460882058437207, + "learning_rate": 1.8105986261040237e-05, + "loss": 0.479, "step": 1845 }, { - "epoch": 0.27106227106227104, - "grad_norm": 0.4488385326855789, - "learning_rate": 1.8066406250000002e-05, - "loss": 0.4906, + "epoch": 0.27233917267775654, + "grad_norm": 0.4723634907192592, + "learning_rate": 1.8155053974484792e-05, + "loss": 0.4683, "step": 1850 }, { - "epoch": 0.2717948717948718, - "grad_norm": 0.46467724220767814, - "learning_rate": 1.8115234375e-05, - "loss": 0.4752, + "epoch": 0.2730752244958045, + "grad_norm": 0.46938877208903224, + "learning_rate": 1.8204121687929343e-05, + "loss": 0.4674, "step": 1855 }, { - "epoch": 0.2725274725274725, - "grad_norm": 0.4676235296258921, - "learning_rate": 1.8164062500000002e-05, - "loss": 0.4689, + "epoch": 0.2738112763138525, + "grad_norm": 0.551824744493149, + "learning_rate": 1.8253189401373897e-05, + "loss": 0.4763, "step": 1860 }, { - "epoch": 0.27326007326007323, - "grad_norm": 0.5393139967406888, - "learning_rate": 1.8212890625e-05, - "loss": 0.4697, + "epoch": 0.27454732813190047, + "grad_norm": 0.48403544448997265, + "learning_rate": 1.830225711481845e-05, + "loss": 0.4756, "step": 1865 }, { - "epoch": 0.273992673992674, - "grad_norm": 0.5861718805708979, - "learning_rate": 1.826171875e-05, - "loss": 0.4635, + "epoch": 0.2752833799499485, + "grad_norm": 0.46983314682824356, + "learning_rate": 1.8351324828263003e-05, + "loss": 0.4819, "step": 1870 }, { - "epoch": 0.27472527472527475, - "grad_norm": 0.8103200245272049, - "learning_rate": 1.8310546875000003e-05, - "loss": 0.4793, + "epoch": 0.2760194317679965, + "grad_norm": 0.4576607658725547, + "learning_rate": 1.8400392541707557e-05, + "loss": 0.4455, "step": 1875 }, { - "epoch": 0.2754578754578755, - "grad_norm": 0.4880340601172291, - "learning_rate": 1.8359375e-05, - "loss": 0.466, + "epoch": 0.27675548358604446, + "grad_norm": 0.47118086211235977, + "learning_rate": 1.844946025515211e-05, + "loss": 0.4694, "step": 1880 }, { - "epoch": 0.2761904761904762, - "grad_norm": 0.4790298158701428, - "learning_rate": 1.8408203125000002e-05, - "loss": 0.4923, + "epoch": 0.27749153540409244, + "grad_norm": 0.49239146345942914, + "learning_rate": 1.8498527968596666e-05, + "loss": 0.4805, "step": 1885 }, { - "epoch": 0.27692307692307694, - "grad_norm": 0.6603329163970332, - "learning_rate": 1.845703125e-05, - "loss": 0.4881, + "epoch": 0.2782275872221404, + "grad_norm": 0.5594939354842817, + "learning_rate": 1.8547595682041217e-05, + "loss": 0.4754, "step": 1890 }, { - "epoch": 0.27765567765567767, - "grad_norm": 0.493241053754715, - "learning_rate": 1.8505859375e-05, - "loss": 0.4857, + "epoch": 0.27896363904018845, + "grad_norm": 0.47899677266183416, + "learning_rate": 1.859666339548577e-05, + "loss": 0.471, "step": 1895 }, { - "epoch": 0.2783882783882784, - "grad_norm": 0.4681650600538861, - "learning_rate": 1.8554687500000003e-05, - "loss": 0.4762, + "epoch": 0.2796996908582364, + "grad_norm": 0.6334955053175579, + "learning_rate": 1.8645731108930326e-05, + "loss": 0.4782, "step": 1900 }, { - "epoch": 0.27912087912087913, - "grad_norm": 0.4633678790580011, - "learning_rate": 1.8603515625e-05, - "loss": 0.4609, + "epoch": 0.2804357426762844, + "grad_norm": 0.46276680124375486, + "learning_rate": 1.8694798822374877e-05, + "loss": 0.47, "step": 1905 }, { - "epoch": 0.27985347985347986, - "grad_norm": 0.4735824848064331, - "learning_rate": 1.8652343750000002e-05, - "loss": 0.4459, + "epoch": 0.2811717944943324, + "grad_norm": 0.46323911418641445, + "learning_rate": 1.874386653581943e-05, + "loss": 0.4599, "step": 1910 }, { - "epoch": 0.2805860805860806, - "grad_norm": 0.46703566408803404, - "learning_rate": 1.8701171875e-05, - "loss": 0.4624, + "epoch": 0.2819078463123804, + "grad_norm": 0.476336202980536, + "learning_rate": 1.8792934249263986e-05, + "loss": 0.4801, "step": 1915 }, { - "epoch": 0.2813186813186813, - "grad_norm": 0.4720185289135887, - "learning_rate": 1.8750000000000002e-05, - "loss": 0.4775, + "epoch": 0.2826438981304284, + "grad_norm": 0.4498718281230744, + "learning_rate": 1.884200196270854e-05, + "loss": 0.4563, "step": 1920 }, { - "epoch": 0.28205128205128205, - "grad_norm": 0.47311678768321663, - "learning_rate": 1.8798828125000003e-05, - "loss": 0.4738, + "epoch": 0.2833799499484764, + "grad_norm": 0.4671774377376596, + "learning_rate": 1.8891069676153094e-05, + "loss": 0.4792, "step": 1925 }, { - "epoch": 0.2827838827838828, - "grad_norm": 0.4572599200236833, - "learning_rate": 1.884765625e-05, - "loss": 0.48, + "epoch": 0.28411600176652435, + "grad_norm": 0.4500480347092648, + "learning_rate": 1.8940137389597645e-05, + "loss": 0.4743, "step": 1930 }, { - "epoch": 0.2835164835164835, - "grad_norm": 0.4818085410272587, - "learning_rate": 1.8896484375000003e-05, - "loss": 0.4726, + "epoch": 0.28485205358457233, + "grad_norm": 0.5107626573555243, + "learning_rate": 1.89892051030422e-05, + "loss": 0.48, "step": 1935 }, { - "epoch": 0.28424908424908424, - "grad_norm": 0.4629029786842517, - "learning_rate": 1.89453125e-05, - "loss": 0.4715, + "epoch": 0.28558810540262036, + "grad_norm": 0.517903581914278, + "learning_rate": 1.9038272816486754e-05, + "loss": 0.4892, "step": 1940 }, { - "epoch": 0.28498168498168497, - "grad_norm": 0.4873451515455, - "learning_rate": 1.8994140625000002e-05, - "loss": 0.4564, + "epoch": 0.28632415722066834, + "grad_norm": 0.4766111115870996, + "learning_rate": 1.9087340529931305e-05, + "loss": 0.4692, "step": 1945 }, { - "epoch": 0.2857142857142857, - "grad_norm": 0.4664172453174398, - "learning_rate": 1.904296875e-05, - "loss": 0.4585, + "epoch": 0.2870602090387163, + "grad_norm": 0.4825649575499548, + "learning_rate": 1.913640824337586e-05, + "loss": 0.49, "step": 1950 }, { - "epoch": 0.28644688644688643, - "grad_norm": 0.4696566885216601, - "learning_rate": 1.9091796875e-05, - "loss": 0.4603, + "epoch": 0.2877962608567643, + "grad_norm": 0.4724648176127981, + "learning_rate": 1.9185475956820414e-05, + "loss": 0.4932, "step": 1955 }, { - "epoch": 0.28717948717948716, - "grad_norm": 0.46259581185803655, - "learning_rate": 1.9140625000000003e-05, - "loss": 0.4616, + "epoch": 0.28853231267481233, + "grad_norm": 0.47881279516123754, + "learning_rate": 1.923454367026497e-05, + "loss": 0.4801, "step": 1960 }, { - "epoch": 0.2879120879120879, - "grad_norm": 0.46970335979866645, - "learning_rate": 1.9189453125e-05, - "loss": 0.4736, + "epoch": 0.2892683644928603, + "grad_norm": 0.45468198120850223, + "learning_rate": 1.9283611383709523e-05, + "loss": 0.4806, "step": 1965 }, { - "epoch": 0.2886446886446886, - "grad_norm": 0.47296502986396777, - "learning_rate": 1.9238281250000002e-05, - "loss": 0.4831, + "epoch": 0.2900044163109083, + "grad_norm": 0.44628501808614907, + "learning_rate": 1.9332679097154074e-05, + "loss": 0.4642, "step": 1970 }, { - "epoch": 0.2893772893772894, - "grad_norm": 0.5115242848392085, - "learning_rate": 1.9287109375e-05, - "loss": 0.4844, + "epoch": 0.29074046812895626, + "grad_norm": 0.44328776399808534, + "learning_rate": 1.9381746810598628e-05, + "loss": 0.4578, "step": 1975 }, { - "epoch": 0.29010989010989013, - "grad_norm": 0.4705181315677594, - "learning_rate": 1.93359375e-05, - "loss": 0.4854, + "epoch": 0.29147651994700424, + "grad_norm": 0.4684544494472312, + "learning_rate": 1.9430814524043183e-05, + "loss": 0.4675, "step": 1980 }, { - "epoch": 0.29084249084249086, - "grad_norm": 0.47843755815200834, - "learning_rate": 1.9384765625000003e-05, - "loss": 0.4663, + "epoch": 0.2922125717650523, + "grad_norm": 0.4352182628674537, + "learning_rate": 1.9479882237487734e-05, + "loss": 0.4735, "step": 1985 }, { - "epoch": 0.2915750915750916, - "grad_norm": 0.45771995006184946, - "learning_rate": 1.943359375e-05, - "loss": 0.4613, + "epoch": 0.29294862358310025, + "grad_norm": 0.45561466921902033, + "learning_rate": 1.9528949950932288e-05, + "loss": 0.4741, "step": 1990 }, { - "epoch": 0.2923076923076923, - "grad_norm": 0.48868203559391954, - "learning_rate": 1.9482421875000002e-05, - "loss": 0.4744, + "epoch": 0.29368467540114823, + "grad_norm": 0.43295581864349225, + "learning_rate": 1.9578017664376843e-05, + "loss": 0.4602, "step": 1995 }, { - "epoch": 0.29304029304029305, - "grad_norm": 0.46424047982029926, - "learning_rate": 1.953125e-05, - "loss": 0.4742, + "epoch": 0.2944207272191962, + "grad_norm": 0.4316839171230612, + "learning_rate": 1.9627085377821397e-05, + "loss": 0.467, "step": 2000 }, { - "epoch": 0.2937728937728938, - "grad_norm": 0.48114634444665394, - "learning_rate": 1.9580078125000002e-05, - "loss": 0.453, + "epoch": 0.29515677903724424, + "grad_norm": 0.4569846908548342, + "learning_rate": 1.9676153091265948e-05, + "loss": 0.4721, "step": 2005 }, { - "epoch": 0.2945054945054945, - "grad_norm": 0.4356027962155649, - "learning_rate": 1.9628906250000003e-05, - "loss": 0.4662, + "epoch": 0.2958928308552922, + "grad_norm": 0.4616736815476582, + "learning_rate": 1.9725220804710502e-05, + "loss": 0.4711, "step": 2010 }, { - "epoch": 0.29523809523809524, - "grad_norm": 0.45641072197702454, - "learning_rate": 1.9677734375e-05, - "loss": 0.4703, + "epoch": 0.2966288826733402, + "grad_norm": 0.4335580397496052, + "learning_rate": 1.9774288518155057e-05, + "loss": 0.4773, "step": 2015 }, { - "epoch": 0.295970695970696, - "grad_norm": 0.47139505467873855, - "learning_rate": 1.9726562500000003e-05, - "loss": 0.4728, + "epoch": 0.2973649344913882, + "grad_norm": 0.5072829009356221, + "learning_rate": 1.9823356231599608e-05, + "loss": 0.4618, "step": 2020 }, { - "epoch": 0.2967032967032967, - "grad_norm": 0.5130247107540543, - "learning_rate": 1.9775390625e-05, - "loss": 0.4827, + "epoch": 0.2981009863094362, + "grad_norm": 0.4553026268639488, + "learning_rate": 1.9872423945044162e-05, + "loss": 0.487, "step": 2025 }, { - "epoch": 0.29743589743589743, - "grad_norm": 0.47811321943368856, - "learning_rate": 1.9824218750000002e-05, - "loss": 0.4734, + "epoch": 0.2988370381274842, + "grad_norm": 0.46481213419200135, + "learning_rate": 1.9921491658488717e-05, + "loss": 0.4682, "step": 2030 }, { - "epoch": 0.29816849816849816, - "grad_norm": 0.47001518772631856, - "learning_rate": 1.9873046875e-05, - "loss": 0.4782, + "epoch": 0.29957308994553217, + "grad_norm": 0.4350216051138614, + "learning_rate": 1.9970559371933268e-05, + "loss": 0.4869, "step": 2035 }, { - "epoch": 0.2989010989010989, - "grad_norm": 0.4691655671055533, - "learning_rate": 1.9921875e-05, - "loss": 0.4534, + "epoch": 0.30030914176358015, + "grad_norm": 0.44647682822227974, + "learning_rate": 1.9999999413208634e-05, + "loss": 0.4687, "step": 2040 }, { - "epoch": 0.2996336996336996, - "grad_norm": 0.46638939567865445, - "learning_rate": 1.9970703125000003e-05, - "loss": 0.4727, + "epoch": 0.3010451935816281, + "grad_norm": 0.4715234199947958, + "learning_rate": 1.9999992811806558e-05, + "loss": 0.4681, "step": 2045 }, { - "epoch": 0.30036630036630035, - "grad_norm": 0.4849584243099219, - "learning_rate": 1.9999999418673043e-05, - "loss": 0.486, + "epoch": 0.30178124539967616, + "grad_norm": 0.4332618051435828, + "learning_rate": 1.9999978875518057e-05, + "loss": 0.4492, "step": 2050 }, { - "epoch": 0.3010989010989011, - "grad_norm": 0.46608548906747865, - "learning_rate": 1.9999992878745508e-05, - "loss": 0.4817, + "epoch": 0.30251729721772413, + "grad_norm": 0.48395635327674913, + "learning_rate": 1.9999957604353346e-05, + "loss": 0.485, "step": 2055 }, { - "epoch": 0.3018315018315018, - "grad_norm": 0.4582597387899196, - "learning_rate": 1.9999979072236512e-05, - "loss": 0.4655, + "epoch": 0.3032533490357721, + "grad_norm": 0.4799025445525673, + "learning_rate": 1.999992899832804e-05, + "loss": 0.4728, "step": 2060 }, { - "epoch": 0.30256410256410254, - "grad_norm": 0.4498963606835548, - "learning_rate": 1.9999957999156077e-05, - "loss": 0.4648, + "epoch": 0.3039894008538201, + "grad_norm": 0.4596658733934185, + "learning_rate": 1.999989305746311e-05, + "loss": 0.452, "step": 2065 }, { - "epoch": 0.3032967032967033, - "grad_norm": 0.4881316781230909, - "learning_rate": 1.9999929659519525e-05, - "loss": 0.4869, + "epoch": 0.3047254526718681, + "grad_norm": 0.46794477322830674, + "learning_rate": 1.999984978178492e-05, + "loss": 0.4726, "step": 2070 }, { - "epoch": 0.304029304029304, - "grad_norm": 0.47231720891197954, - "learning_rate": 1.9999894053347445e-05, - "loss": 0.4633, + "epoch": 0.3054615044899161, + "grad_norm": 0.47232714732023295, + "learning_rate": 1.999979917132522e-05, + "loss": 0.4924, "step": 2075 }, { - "epoch": 0.3047619047619048, - "grad_norm": 0.440552640547169, - "learning_rate": 1.999985118066571e-05, - "loss": 0.4732, + "epoch": 0.3061975563079641, + "grad_norm": 0.4756519580313491, + "learning_rate": 1.9999741226121124e-05, + "loss": 0.4748, "step": 2080 }, { - "epoch": 0.3054945054945055, - "grad_norm": 0.4430343759090646, - "learning_rate": 1.999980104150548e-05, - "loss": 0.4857, + "epoch": 0.30693360812601206, + "grad_norm": 0.5423454091606669, + "learning_rate": 1.9999675946215134e-05, + "loss": 0.4536, "step": 2085 }, { - "epoch": 0.30622710622710625, - "grad_norm": 0.4694057908906834, - "learning_rate": 1.999974363590318e-05, - "loss": 0.5014, + "epoch": 0.30766965994406004, + "grad_norm": 0.47056906278754096, + "learning_rate": 1.9999603331655143e-05, + "loss": 0.4932, "step": 2090 }, { - "epoch": 0.306959706959707, - "grad_norm": 0.429012323732836, - "learning_rate": 1.9999678963900533e-05, - "loss": 0.471, + "epoch": 0.30840571176210807, + "grad_norm": 0.43973383442548275, + "learning_rate": 1.9999523382494397e-05, + "loss": 0.4736, "step": 2095 }, { - "epoch": 0.3076923076923077, - "grad_norm": 0.4650830657293966, - "learning_rate": 1.9999607025544525e-05, - "loss": 0.4774, + "epoch": 0.30914176358015605, + "grad_norm": 0.46132412885474183, + "learning_rate": 1.999943609879155e-05, + "loss": 0.478, "step": 2100 }, { - "epoch": 0.30842490842490844, - "grad_norm": 0.49607542309669084, - "learning_rate": 1.9999527820887437e-05, - "loss": 0.4641, + "epoch": 0.309877815398204, + "grad_norm": 0.43501549406649326, + "learning_rate": 1.999934148061062e-05, + "loss": 0.4678, "step": 2105 }, { - "epoch": 0.30915750915750917, - "grad_norm": 0.44564274747775723, - "learning_rate": 1.999944134998682e-05, - "loss": 0.489, + "epoch": 0.310613867216252, + "grad_norm": 0.4401058980449031, + "learning_rate": 1.999923952802101e-05, + "loss": 0.4765, "step": 2110 }, { - "epoch": 0.3098901098901099, - "grad_norm": 0.4544573494793725, - "learning_rate": 1.999934761290551e-05, - "loss": 0.4845, + "epoch": 0.31134991903430004, + "grad_norm": 0.4394249742033585, + "learning_rate": 1.999913024109749e-05, + "loss": 0.4856, "step": 2115 }, { - "epoch": 0.31062271062271063, - "grad_norm": 0.47085329248475644, - "learning_rate": 1.9999246609711624e-05, - "loss": 0.4879, + "epoch": 0.312085970852348, + "grad_norm": 0.4435086725351014, + "learning_rate": 1.999901361992024e-05, + "loss": 0.4754, "step": 2120 }, { - "epoch": 0.31135531135531136, - "grad_norm": 0.4776391928865398, - "learning_rate": 1.9999138340478554e-05, - "loss": 0.4801, + "epoch": 0.312822022670396, + "grad_norm": 0.4541298259035805, + "learning_rate": 1.9998889664574785e-05, + "loss": 0.4743, "step": 2125 }, { - "epoch": 0.3120879120879121, - "grad_norm": 0.4703485930609568, - "learning_rate": 1.9999022805284977e-05, - "loss": 0.4695, + "epoch": 0.31355807448844397, + "grad_norm": 0.4567073617700798, + "learning_rate": 1.9998758375152052e-05, + "loss": 0.4769, "step": 2130 }, { - "epoch": 0.3128205128205128, - "grad_norm": 0.47262214629039767, - "learning_rate": 1.9998900004214845e-05, - "loss": 0.4788, + "epoch": 0.31429412630649195, + "grad_norm": 0.48915117920176165, + "learning_rate": 1.9998619751748338e-05, + "loss": 0.4576, "step": 2135 }, { - "epoch": 0.31355311355311355, - "grad_norm": 0.46721083494958504, - "learning_rate": 1.9998769937357394e-05, - "loss": 0.4781, + "epoch": 0.31503017812454, + "grad_norm": 0.44619255283557624, + "learning_rate": 1.999847379446532e-05, + "loss": 0.4562, "step": 2140 }, { - "epoch": 0.3142857142857143, - "grad_norm": 0.46564165100287497, - "learning_rate": 1.9998632604807138e-05, - "loss": 0.4591, + "epoch": 0.31576622994258796, + "grad_norm": 0.4450124222386723, + "learning_rate": 1.9998320503410064e-05, + "loss": 0.4905, "step": 2145 }, { - "epoch": 0.315018315018315, - "grad_norm": 0.46052659281533487, - "learning_rate": 1.999848800666387e-05, - "loss": 0.4615, + "epoch": 0.31650228176063594, + "grad_norm": 0.4404465778071448, + "learning_rate": 1.9998159878694997e-05, + "loss": 0.4815, "step": 2150 }, { - "epoch": 0.31575091575091574, - "grad_norm": 0.4523921077473231, - "learning_rate": 1.999833614303267e-05, - "loss": 0.4945, + "epoch": 0.3172383335786839, + "grad_norm": 0.5675612692588065, + "learning_rate": 1.9997991920437943e-05, + "loss": 0.4999, "step": 2155 }, { - "epoch": 0.31648351648351647, - "grad_norm": 0.6224759141299284, - "learning_rate": 1.999817701402388e-05, - "loss": 0.4865, + "epoch": 0.31797438539673195, + "grad_norm": 0.43280904474802123, + "learning_rate": 1.999781662876209e-05, + "loss": 0.476, "step": 2160 }, { - "epoch": 0.3172161172161172, - "grad_norm": 0.4549954778580932, - "learning_rate": 1.9998010619753138e-05, - "loss": 0.4703, + "epoch": 0.31871043721477993, + "grad_norm": 0.4235352582007157, + "learning_rate": 1.9997634003796025e-05, + "loss": 0.4675, "step": 2165 }, { - "epoch": 0.31794871794871793, - "grad_norm": 0.44869160471700487, - "learning_rate": 1.9997836960341356e-05, - "loss": 0.4916, + "epoch": 0.3194464890328279, + "grad_norm": 0.4688991793652292, + "learning_rate": 1.999744404567369e-05, + "loss": 0.4742, "step": 2170 }, { - "epoch": 0.31868131868131866, - "grad_norm": 0.48138120519581834, - "learning_rate": 1.999765603591473e-05, - "loss": 0.4735, + "epoch": 0.3201825408508759, + "grad_norm": 0.48925261025186434, + "learning_rate": 1.999724675453442e-05, + "loss": 0.4888, "step": 2175 }, { - "epoch": 0.3194139194139194, - "grad_norm": 0.4606953753029427, - "learning_rate": 1.999746784660472e-05, - "loss": 0.4655, + "epoch": 0.3209185926689239, + "grad_norm": 0.4495508697602979, + "learning_rate": 1.999704213052293e-05, + "loss": 0.4768, "step": 2180 }, { - "epoch": 0.3201465201465201, - "grad_norm": 0.4960164215656271, - "learning_rate": 1.999727239254808e-05, - "loss": 0.4874, + "epoch": 0.3216546444869719, + "grad_norm": 0.4609517564296356, + "learning_rate": 1.99968301737893e-05, + "loss": 0.4664, "step": 2185 }, { - "epoch": 0.3208791208791209, - "grad_norm": 0.4970242069390641, - "learning_rate": 1.9997069673886843e-05, - "loss": 0.4989, + "epoch": 0.3223906963050199, + "grad_norm": 0.4467866715727226, + "learning_rate": 1.999661088448901e-05, + "loss": 0.4849, "step": 2190 }, { - "epoch": 0.32161172161172163, - "grad_norm": 0.4453023673170512, - "learning_rate": 1.9996859690768307e-05, - "loss": 0.4887, + "epoch": 0.32312674812306785, + "grad_norm": 0.4652266644750633, + "learning_rate": 1.99963842627829e-05, + "loss": 0.472, "step": 2195 }, { - "epoch": 0.32234432234432236, - "grad_norm": 0.4762842181398665, - "learning_rate": 1.9996642443345067e-05, - "loss": 0.467, + "epoch": 0.32386279994111583, + "grad_norm": 0.4687520169018355, + "learning_rate": 1.9996150308837194e-05, + "loss": 0.4689, "step": 2200 }, { - "epoch": 0.3230769230769231, - "grad_norm": 0.45617671867468856, - "learning_rate": 1.9996417931774986e-05, - "loss": 0.4616, + "epoch": 0.32459885175916386, + "grad_norm": 0.4562403617598054, + "learning_rate": 1.9995909022823497e-05, + "loss": 0.4636, "step": 2205 }, { - "epoch": 0.3238095238095238, - "grad_norm": 0.4577829813582058, - "learning_rate": 1.9996186156221202e-05, - "loss": 0.4681, + "epoch": 0.32533490357721184, + "grad_norm": 0.46461524609801136, + "learning_rate": 1.9995660404918787e-05, + "loss": 0.4887, "step": 2210 }, { - "epoch": 0.32454212454212455, - "grad_norm": 0.4667373496580333, - "learning_rate": 1.999594711685214e-05, - "loss": 0.4811, + "epoch": 0.3260709553952598, + "grad_norm": 0.44292774850519984, + "learning_rate": 1.9995404455305426e-05, + "loss": 0.4836, "step": 2215 }, { - "epoch": 0.3252747252747253, - "grad_norm": 0.44437406789561146, - "learning_rate": 1.99957008138415e-05, - "loss": 0.4898, + "epoch": 0.3268070072133078, + "grad_norm": 0.40967463773562945, + "learning_rate": 1.999514117417115e-05, + "loss": 0.4792, "step": 2220 }, { - "epoch": 0.326007326007326, - "grad_norm": 0.4748532696317031, - "learning_rate": 1.9995447247368265e-05, - "loss": 0.4617, + "epoch": 0.32754305903135583, + "grad_norm": 0.4363305863192649, + "learning_rate": 1.9994870561709064e-05, + "loss": 0.4901, "step": 2225 }, { - "epoch": 0.32673992673992674, - "grad_norm": 0.4589811457098767, - "learning_rate": 1.9995186417616682e-05, - "loss": 0.4863, + "epoch": 0.3282791108494038, + "grad_norm": 0.4440205284030912, + "learning_rate": 1.9994592618117673e-05, + "loss": 0.4678, "step": 2230 }, { - "epoch": 0.3274725274725275, - "grad_norm": 0.4493781747279523, - "learning_rate": 1.999491832477629e-05, - "loss": 0.4594, + "epoch": 0.3290151626674518, + "grad_norm": 0.4637447567088612, + "learning_rate": 1.9994307343600838e-05, + "loss": 0.4536, "step": 2235 }, { - "epoch": 0.3282051282051282, - "grad_norm": 0.4471943486082735, - "learning_rate": 1.9994642969041902e-05, - "loss": 0.4692, + "epoch": 0.32975121448549977, + "grad_norm": 0.457851261301874, + "learning_rate": 1.9994014738367806e-05, + "loss": 0.4691, "step": 2240 }, { - "epoch": 0.32893772893772893, - "grad_norm": 0.4471860716440031, - "learning_rate": 1.9994360350613605e-05, - "loss": 0.4541, + "epoch": 0.33048726630354774, + "grad_norm": 0.4639933895918874, + "learning_rate": 1.99937148026332e-05, + "loss": 0.4733, "step": 2245 }, { - "epoch": 0.32967032967032966, - "grad_norm": 0.4415395734304109, - "learning_rate": 1.9994070469696768e-05, - "loss": 0.4606, + "epoch": 0.3312233181215958, + "grad_norm": 0.5048664352458206, + "learning_rate": 1.999340753661702e-05, + "loss": 0.4726, "step": 2250 }, { - "epoch": 0.3304029304029304, - "grad_norm": 0.44838585468171893, - "learning_rate": 1.9993773326502037e-05, - "loss": 0.4647, + "epoch": 0.33195936993964376, + "grad_norm": 0.4796488629165394, + "learning_rate": 1.9993092940544637e-05, + "loss": 0.4654, "step": 2255 }, { - "epoch": 0.3311355311355311, - "grad_norm": 0.48306770352552575, - "learning_rate": 1.9993468921245332e-05, - "loss": 0.4702, + "epoch": 0.33269542175769173, + "grad_norm": 0.46805300188648025, + "learning_rate": 1.9992771014646816e-05, + "loss": 0.458, "step": 2260 }, { - "epoch": 0.33186813186813185, - "grad_norm": 0.46716439355838063, - "learning_rate": 1.9993157254147847e-05, - "loss": 0.4748, + "epoch": 0.3334314735757397, + "grad_norm": 0.43093945222637614, + "learning_rate": 1.9992441759159673e-05, + "loss": 0.4428, "step": 2265 }, { - "epoch": 0.3326007326007326, - "grad_norm": 0.4395836964883837, - "learning_rate": 1.9992838325436062e-05, - "loss": 0.4801, + "epoch": 0.33416752539378775, + "grad_norm": 0.426941370229423, + "learning_rate": 1.9992105174324722e-05, + "loss": 0.4851, "step": 2270 }, { - "epoch": 0.3333333333333333, - "grad_norm": 0.4516346952215036, - "learning_rate": 1.9992512135341735e-05, - "loss": 0.4907, + "epoch": 0.3349035772118357, + "grad_norm": 0.4591334473999698, + "learning_rate": 1.999176126038884e-05, + "loss": 0.4898, "step": 2275 }, { - "epoch": 0.33406593406593404, - "grad_norm": 0.47750780474177107, - "learning_rate": 1.9992178684101885e-05, - "loss": 0.4744, + "epoch": 0.3356396290298837, + "grad_norm": 0.4251506129306176, + "learning_rate": 1.9991410017604283e-05, + "loss": 0.4658, "step": 2280 }, { - "epoch": 0.3347985347985348, - "grad_norm": 0.46827910368482406, - "learning_rate": 1.9991837971958823e-05, - "loss": 0.4682, + "epoch": 0.3363756808479317, + "grad_norm": 0.4782181713266746, + "learning_rate": 1.999105144622869e-05, + "loss": 0.4666, "step": 2285 }, { - "epoch": 0.3355311355311355, - "grad_norm": 0.4284253365569513, - "learning_rate": 1.999148999916013e-05, - "loss": 0.4682, + "epoch": 0.33711173266597966, + "grad_norm": 0.4479398981455512, + "learning_rate": 1.9990685546525065e-05, + "loss": 0.4619, "step": 2290 }, { - "epoch": 0.3362637362637363, - "grad_norm": 0.44894863625304804, - "learning_rate": 1.999113476595866e-05, - "loss": 0.4641, + "epoch": 0.3378477844840277, + "grad_norm": 0.48245664385321907, + "learning_rate": 1.9990312318761788e-05, + "loss": 0.4861, "step": 2295 }, { - "epoch": 0.336996336996337, - "grad_norm": 0.4319941290239276, - "learning_rate": 1.9990772272612545e-05, - "loss": 0.4766, + "epoch": 0.33858383630207567, + "grad_norm": 0.43048953402489026, + "learning_rate": 1.9989931763212628e-05, + "loss": 0.4792, "step": 2300 }, { - "epoch": 0.33772893772893775, - "grad_norm": 0.43325498368336673, - "learning_rate": 1.9990402519385204e-05, - "loss": 0.4652, + "epoch": 0.33931988812012365, + "grad_norm": 0.5985939942681147, + "learning_rate": 1.998954388015671e-05, + "loss": 0.4781, "step": 2305 }, { - "epoch": 0.3384615384615385, - "grad_norm": 0.44734423832754516, - "learning_rate": 1.9990025506545315e-05, - "loss": 0.4717, + "epoch": 0.3400559399381716, + "grad_norm": 0.43754557176144243, + "learning_rate": 1.9989148669878537e-05, + "loss": 0.4747, "step": 2310 }, { - "epoch": 0.3391941391941392, - "grad_norm": 0.4331449566526826, - "learning_rate": 1.998964123436683e-05, - "loss": 0.4785, + "epoch": 0.34079199175621966, + "grad_norm": 0.4373253784689262, + "learning_rate": 1.9988746132668006e-05, + "loss": 0.4438, "step": 2315 }, { - "epoch": 0.33992673992673994, - "grad_norm": 0.4439465535289741, - "learning_rate": 1.9989249703128998e-05, - "loss": 0.4612, + "epoch": 0.34152804357426764, + "grad_norm": 0.4768187921022628, + "learning_rate": 1.9988336268820365e-05, + "loss": 0.4595, "step": 2320 }, { - "epoch": 0.34065934065934067, - "grad_norm": 0.4318055127992744, - "learning_rate": 1.998885091311632e-05, - "loss": 0.4707, + "epoch": 0.3422640953923156, + "grad_norm": 0.4470593284289592, + "learning_rate": 1.9987919078636242e-05, + "loss": 0.4702, "step": 2325 }, { - "epoch": 0.3413919413919414, - "grad_norm": 0.4412776315633937, - "learning_rate": 1.998844486461858e-05, - "loss": 0.4844, + "epoch": 0.3430001472103636, + "grad_norm": 0.44728625386910265, + "learning_rate": 1.998749456242165e-05, + "loss": 0.4918, "step": 2330 }, { - "epoch": 0.34212454212454213, - "grad_norm": 0.4496185459470404, - "learning_rate": 1.998803155793084e-05, - "loss": 0.4755, + "epoch": 0.3437361990284116, + "grad_norm": 0.4514444055802256, + "learning_rate": 1.9987062720487956e-05, + "loss": 0.4826, "step": 2335 }, { - "epoch": 0.34285714285714286, - "grad_norm": 0.46607308652238927, - "learning_rate": 1.9987610993353436e-05, - "loss": 0.4609, + "epoch": 0.3444722508464596, + "grad_norm": 0.4535206317389082, + "learning_rate": 1.998662355315192e-05, + "loss": 0.4675, "step": 2340 }, { - "epoch": 0.3435897435897436, - "grad_norm": 0.44165834082679384, - "learning_rate": 1.9987183171191966e-05, - "loss": 0.4552, + "epoch": 0.3452083026645076, + "grad_norm": 0.4189857578216396, + "learning_rate": 1.9986177060735666e-05, + "loss": 0.4737, "step": 2345 }, { - "epoch": 0.3443223443223443, - "grad_norm": 0.45333703568193734, - "learning_rate": 1.9986748091757315e-05, - "loss": 0.4684, + "epoch": 0.34594435448255556, + "grad_norm": 0.5019851500218824, + "learning_rate": 1.998572324356669e-05, + "loss": 0.4694, "step": 2350 }, { - "epoch": 0.34505494505494505, - "grad_norm": 0.4567861600300713, - "learning_rate": 1.9986305755365638e-05, - "loss": 0.4701, + "epoch": 0.34668040630060354, + "grad_norm": 0.6105382979046713, + "learning_rate": 1.998526210197786e-05, + "loss": 0.471, "step": 2355 }, { - "epoch": 0.3457875457875458, - "grad_norm": 0.42216281070142847, - "learning_rate": 1.9985856162338365e-05, - "loss": 0.4814, + "epoch": 0.34741645811865157, + "grad_norm": 0.4403970102714735, + "learning_rate": 1.998479363630742e-05, + "loss": 0.451, "step": 2360 }, { - "epoch": 0.3465201465201465, - "grad_norm": 0.4614463501828505, - "learning_rate": 1.998539931300219e-05, - "loss": 0.4724, + "epoch": 0.34815250993669955, + "grad_norm": 0.4360708417057406, + "learning_rate": 1.998431784689898e-05, + "loss": 0.4756, "step": 2365 }, { - "epoch": 0.34725274725274724, - "grad_norm": 0.48197598208732645, - "learning_rate": 1.998493520768909e-05, - "loss": 0.4759, + "epoch": 0.34888856175474753, + "grad_norm": 0.4342094719915312, + "learning_rate": 1.998383473410154e-05, + "loss": 0.4816, "step": 2370 }, { - "epoch": 0.34798534798534797, - "grad_norm": 0.43198815185153344, - "learning_rate": 1.9984463846736313e-05, - "loss": 0.4602, + "epoch": 0.3496246135727955, + "grad_norm": 0.4558318975401987, + "learning_rate": 1.998334429826944e-05, + "loss": 0.4605, "step": 2375 }, { - "epoch": 0.3487179487179487, - "grad_norm": 0.44829414019934727, - "learning_rate": 1.9983985230486372e-05, - "loss": 0.4643, + "epoch": 0.35036066539084354, + "grad_norm": 0.4459264432189479, + "learning_rate": 1.9982846539762422e-05, + "loss": 0.4838, "step": 2380 }, { - "epoch": 0.34945054945054943, - "grad_norm": 0.4417248573626436, - "learning_rate": 1.9983499359287063e-05, - "loss": 0.4658, + "epoch": 0.3510967172088915, + "grad_norm": 0.657558154988274, + "learning_rate": 1.9982341458945584e-05, + "loss": 0.4727, "step": 2385 }, { - "epoch": 0.35018315018315016, - "grad_norm": 0.42631469243015624, - "learning_rate": 1.998300623349145e-05, - "loss": 0.466, + "epoch": 0.3518327690269395, + "grad_norm": 0.41325825632624885, + "learning_rate": 1.998182905618939e-05, + "loss": 0.4772, "step": 2390 }, { - "epoch": 0.3509157509157509, - "grad_norm": 0.40101767103579056, - "learning_rate": 1.9982505853457862e-05, - "loss": 0.4593, + "epoch": 0.3525688208449875, + "grad_norm": 0.434298057981475, + "learning_rate": 1.9981309331869693e-05, + "loss": 0.4701, "step": 2395 }, { - "epoch": 0.3516483516483517, - "grad_norm": 0.4405886955018248, - "learning_rate": 1.998199821954991e-05, - "loss": 0.4866, + "epoch": 0.35330487266303545, + "grad_norm": 0.44706761089730784, + "learning_rate": 1.99807822863677e-05, + "loss": 0.4798, "step": 2400 }, { - "epoch": 0.3523809523809524, - "grad_norm": 0.5123330356572352, - "learning_rate": 1.998148333213646e-05, - "loss": 0.4684, + "epoch": 0.3540409244810835, + "grad_norm": 0.45226798144264396, + "learning_rate": 1.9980247920069987e-05, + "loss": 0.4805, "step": 2405 }, { - "epoch": 0.35311355311355314, - "grad_norm": 0.45916305279498376, - "learning_rate": 1.9980961191591674e-05, - "loss": 0.4845, + "epoch": 0.35477697629913146, + "grad_norm": 0.4130884443923589, + "learning_rate": 1.9979706233368518e-05, + "loss": 0.4731, "step": 2410 }, { - "epoch": 0.35384615384615387, - "grad_norm": 0.4934563308167938, - "learning_rate": 1.998043179829496e-05, - "loss": 0.4559, + "epoch": 0.35551302811717944, + "grad_norm": 0.42720376029824986, + "learning_rate": 1.9979157226660605e-05, + "loss": 0.4663, "step": 2415 }, { - "epoch": 0.3545787545787546, - "grad_norm": 0.44456323846327644, - "learning_rate": 1.9979895152631005e-05, - "loss": 0.4708, + "epoch": 0.3562490799352274, + "grad_norm": 0.4506910508944987, + "learning_rate": 1.9978600900348947e-05, + "loss": 0.4736, "step": 2420 }, { - "epoch": 0.3553113553113553, - "grad_norm": 0.4464955895356425, - "learning_rate": 1.9979351254989775e-05, - "loss": 0.478, + "epoch": 0.35698513175327545, + "grad_norm": 0.4936781621144996, + "learning_rate": 1.997803725484159e-05, + "loss": 0.4715, "step": 2425 }, { - "epoch": 0.35604395604395606, - "grad_norm": 0.4634970837041198, - "learning_rate": 1.9978800105766493e-05, - "loss": 0.4802, + "epoch": 0.35772118357132343, + "grad_norm": 0.44991510071626706, + "learning_rate": 1.9977466290551976e-05, + "loss": 0.464, "step": 2430 }, { - "epoch": 0.3567765567765568, - "grad_norm": 0.4243954559204476, - "learning_rate": 1.997824170536166e-05, - "loss": 0.4808, + "epoch": 0.3584572353893714, + "grad_norm": 0.4373424349432953, + "learning_rate": 1.9976888007898893e-05, + "loss": 0.473, "step": 2435 }, { - "epoch": 0.3575091575091575, - "grad_norm": 0.4451848668333986, - "learning_rate": 1.9977676054181033e-05, - "loss": 0.4589, + "epoch": 0.3591932872074194, + "grad_norm": 0.43508431133521286, + "learning_rate": 1.997630240730651e-05, + "loss": 0.4728, "step": 2440 }, { - "epoch": 0.35824175824175825, - "grad_norm": 0.44164023449977263, - "learning_rate": 1.9977103152635656e-05, - "loss": 0.4761, + "epoch": 0.35992933902546737, + "grad_norm": 0.44562067928514576, + "learning_rate": 1.997570948920435e-05, + "loss": 0.4781, "step": 2445 }, { - "epoch": 0.358974358974359, - "grad_norm": 0.4592316037675001, - "learning_rate": 1.9976523001141835e-05, - "loss": 0.4728, + "epoch": 0.3606653908435154, + "grad_norm": 0.4361128978836239, + "learning_rate": 1.9975109254027318e-05, + "loss": 0.4596, "step": 2450 }, { - "epoch": 0.3597069597069597, - "grad_norm": 0.4592825783574398, - "learning_rate": 1.9975935600121134e-05, - "loss": 0.4673, + "epoch": 0.3614014426615634, + "grad_norm": 0.46848167821114756, + "learning_rate": 1.9974501702215683e-05, + "loss": 0.4802, "step": 2455 }, { - "epoch": 0.36043956043956044, - "grad_norm": 0.4588396167146063, - "learning_rate": 1.99753409500004e-05, - "loss": 0.4605, + "epoch": 0.36213749447961135, + "grad_norm": 0.4507629685143818, + "learning_rate": 1.9973886834215076e-05, + "loss": 0.4765, "step": 2460 }, { - "epoch": 0.36117216117216117, - "grad_norm": 0.4523330376762571, - "learning_rate": 1.9974739051211738e-05, - "loss": 0.4793, + "epoch": 0.36287354629765933, + "grad_norm": 0.44318965289648626, + "learning_rate": 1.997326465047649e-05, + "loss": 0.4729, "step": 2465 }, { - "epoch": 0.3619047619047619, - "grad_norm": 0.4505742161300693, - "learning_rate": 1.9974129904192525e-05, - "loss": 0.4757, + "epoch": 0.36360959811570737, + "grad_norm": 0.41925778256104673, + "learning_rate": 1.9972635151456296e-05, + "loss": 0.4832, "step": 2470 }, { - "epoch": 0.3626373626373626, - "grad_norm": 0.4171150087533455, - "learning_rate": 1.9973513509385394e-05, - "loss": 0.455, + "epoch": 0.36434564993375534, + "grad_norm": 0.43678667449158226, + "learning_rate": 1.9971998337616222e-05, + "loss": 0.4744, "step": 2475 }, { - "epoch": 0.36336996336996336, - "grad_norm": 0.42463363919184804, - "learning_rate": 1.9972889867238264e-05, - "loss": 0.461, + "epoch": 0.3650817017518033, + "grad_norm": 0.4715447103249681, + "learning_rate": 1.9971354209423368e-05, + "loss": 0.4699, "step": 2480 }, { - "epoch": 0.3641025641025641, - "grad_norm": 0.47370760239418574, - "learning_rate": 1.9972258978204304e-05, - "loss": 0.474, + "epoch": 0.3658177535698513, + "grad_norm": 0.43313415803213506, + "learning_rate": 1.9970702767350194e-05, + "loss": 0.4654, "step": 2485 }, { - "epoch": 0.3648351648351648, - "grad_norm": 0.4499155574302461, - "learning_rate": 1.997162084274196e-05, - "loss": 0.4791, + "epoch": 0.36655380538789933, + "grad_norm": 0.506790783037607, + "learning_rate": 1.997004401187452e-05, + "loss": 0.4691, "step": 2490 }, { - "epoch": 0.36556776556776555, - "grad_norm": 0.4696631194673513, - "learning_rate": 1.9970975461314933e-05, - "loss": 0.491, + "epoch": 0.3672898572059473, + "grad_norm": 0.48156503079351776, + "learning_rate": 1.9969377943479543e-05, + "loss": 0.4737, "step": 2495 }, { - "epoch": 0.3663003663003663, - "grad_norm": 0.44712091155797246, - "learning_rate": 1.99703228343922e-05, - "loss": 0.4902, + "epoch": 0.3680259090239953, + "grad_norm": 0.44798228820681174, + "learning_rate": 1.996870456265381e-05, + "loss": 0.4592, "step": 2500 }, { - "epoch": 0.367032967032967, - "grad_norm": 0.4348061740364967, - "learning_rate": 1.9969662962448e-05, - "loss": 0.4746, + "epoch": 0.36876196084204327, + "grad_norm": 0.4637716878626953, + "learning_rate": 1.9968023869891245e-05, + "loss": 0.4681, "step": 2505 }, { - "epoch": 0.3677655677655678, - "grad_norm": 0.5086079541138988, - "learning_rate": 1.9968995845961824e-05, - "loss": 0.4529, + "epoch": 0.36949801266009125, + "grad_norm": 0.4470703626223196, + "learning_rate": 1.9967335865691128e-05, + "loss": 0.4732, "step": 2510 }, { - "epoch": 0.3684981684981685, - "grad_norm": 0.44339962250971227, - "learning_rate": 1.996832148541845e-05, - "loss": 0.4601, + "epoch": 0.3702340644781393, + "grad_norm": 0.43703534594365284, + "learning_rate": 1.99666405505581e-05, + "loss": 0.4648, "step": 2515 }, { - "epoch": 0.36923076923076925, - "grad_norm": 0.4327290016860837, - "learning_rate": 1.9967639881307897e-05, - "loss": 0.4762, + "epoch": 0.37097011629618726, + "grad_norm": 0.4242609070008135, + "learning_rate": 1.9965937925002167e-05, + "loss": 0.4647, "step": 2520 }, { - "epoch": 0.36996336996337, - "grad_norm": 0.4582585890635615, - "learning_rate": 1.9966951034125466e-05, - "loss": 0.4788, + "epoch": 0.37170616811423524, + "grad_norm": 0.44885516226659466, + "learning_rate": 1.99652279895387e-05, + "loss": 0.4828, "step": 2525 }, { - "epoch": 0.3706959706959707, - "grad_norm": 0.4425251767984628, - "learning_rate": 1.9966254944371713e-05, - "loss": 0.4724, + "epoch": 0.3724422199322832, + "grad_norm": 0.4359073224351897, + "learning_rate": 1.9964510744688422e-05, + "loss": 0.4775, "step": 2530 }, { - "epoch": 0.37142857142857144, - "grad_norm": 0.41646091581942785, - "learning_rate": 1.9965551612552455e-05, - "loss": 0.4696, + "epoch": 0.37317827175033125, + "grad_norm": 0.46498654397417966, + "learning_rate": 1.9963786190977434e-05, + "loss": 0.4716, "step": 2535 }, { - "epoch": 0.37216117216117217, - "grad_norm": 0.40403561466112603, - "learning_rate": 1.996484103917878e-05, - "loss": 0.4581, + "epoch": 0.3739143235683792, + "grad_norm": 0.41129957424529356, + "learning_rate": 1.9963054328937184e-05, + "loss": 0.4751, "step": 2540 }, { - "epoch": 0.3728937728937729, - "grad_norm": 0.4138783207320578, - "learning_rate": 1.9964123224767023e-05, - "loss": 0.4691, + "epoch": 0.3746503753864272, + "grad_norm": 0.4350576054674013, + "learning_rate": 1.9962315159104485e-05, + "loss": 0.4567, "step": 2545 }, { - "epoch": 0.37362637362637363, - "grad_norm": 0.4582236609088255, - "learning_rate": 1.9963398169838794e-05, - "loss": 0.4598, + "epoch": 0.3753864272044752, + "grad_norm": 0.444822730056749, + "learning_rate": 1.9961568682021508e-05, + "loss": 0.4703, "step": 2550 }, { - "epoch": 0.37435897435897436, - "grad_norm": 0.43861877305531866, - "learning_rate": 1.9962665874920964e-05, - "loss": 0.4715, + "epoch": 0.37612247902252316, + "grad_norm": 0.43892132977734233, + "learning_rate": 1.996081489823579e-05, + "loss": 0.4749, "step": 2555 }, { - "epoch": 0.3750915750915751, - "grad_norm": 0.426211153181784, - "learning_rate": 1.996192634054566e-05, - "loss": 0.4752, + "epoch": 0.3768585308405712, + "grad_norm": 0.42200264359981277, + "learning_rate": 1.996005380830022e-05, + "loss": 0.4658, "step": 2560 }, { - "epoch": 0.3758241758241758, - "grad_norm": 0.4353446290075632, - "learning_rate": 1.9961179567250267e-05, - "loss": 0.4768, + "epoch": 0.37759458265861917, + "grad_norm": 0.4557070379670827, + "learning_rate": 1.995928541277305e-05, + "loss": 0.4542, "step": 2565 }, { - "epoch": 0.37655677655677655, - "grad_norm": 0.4218728291102029, - "learning_rate": 1.996042555557744e-05, - "loss": 0.464, + "epoch": 0.37833063447666715, + "grad_norm": 0.4463535286579343, + "learning_rate": 1.995850971221789e-05, + "loss": 0.4859, "step": 2570 }, { - "epoch": 0.3772893772893773, - "grad_norm": 0.43312052410700363, - "learning_rate": 1.9959664306075084e-05, - "loss": 0.4666, + "epoch": 0.3790666862947151, + "grad_norm": 0.4545292326370332, + "learning_rate": 1.9957726707203706e-05, + "loss": 0.4818, "step": 2575 }, { - "epoch": 0.378021978021978, - "grad_norm": 0.4158441193547152, - "learning_rate": 1.9958895819296368e-05, - "loss": 0.4506, + "epoch": 0.37980273811276316, + "grad_norm": 0.48012598712851023, + "learning_rate": 1.995693639830483e-05, + "loss": 0.4767, "step": 2580 }, { - "epoch": 0.37875457875457874, - "grad_norm": 0.45065981507325853, - "learning_rate": 1.9958120095799724e-05, - "loss": 0.4836, + "epoch": 0.38053878993081114, + "grad_norm": 0.4360662866815463, + "learning_rate": 1.995613878610094e-05, + "loss": 0.4742, "step": 2585 }, { - "epoch": 0.37948717948717947, - "grad_norm": 0.42564408981825813, - "learning_rate": 1.995733713614883e-05, - "loss": 0.4658, + "epoch": 0.3812748417488591, + "grad_norm": 0.4766069613257336, + "learning_rate": 1.995533387117707e-05, + "loss": 0.4757, "step": 2590 }, { - "epoch": 0.3802197802197802, - "grad_norm": 0.4214338330830984, - "learning_rate": 1.9956546940912638e-05, - "loss": 0.4671, + "epoch": 0.3820108935669071, + "grad_norm": 0.428997216193116, + "learning_rate": 1.9954521654123627e-05, + "loss": 0.471, "step": 2595 }, { - "epoch": 0.38095238095238093, - "grad_norm": 0.4304888906650203, - "learning_rate": 1.995574951066535e-05, - "loss": 0.4786, + "epoch": 0.38274694538495513, + "grad_norm": 0.4214116969762896, + "learning_rate": 1.995370213553636e-05, + "loss": 0.4641, "step": 2600 }, { - "epoch": 0.38168498168498166, - "grad_norm": 0.412768839745328, - "learning_rate": 1.9954944845986416e-05, - "loss": 0.4554, + "epoch": 0.3834829972030031, + "grad_norm": 0.41477266318843403, + "learning_rate": 1.9952875316016376e-05, + "loss": 0.4804, "step": 2605 }, { - "epoch": 0.3824175824175824, - "grad_norm": 0.41692261832569244, - "learning_rate": 1.9954132947460562e-05, - "loss": 0.4724, + "epoch": 0.3842190490210511, + "grad_norm": 0.4232486648727509, + "learning_rate": 1.995204119617014e-05, + "loss": 0.4739, "step": 2610 }, { - "epoch": 0.3831501831501832, - "grad_norm": 0.42952204546591416, - "learning_rate": 1.9953313815677763e-05, - "loss": 0.462, + "epoch": 0.38495510083909906, + "grad_norm": 0.42908297616282554, + "learning_rate": 1.995119977660947e-05, + "loss": 0.4659, "step": 2615 }, { - "epoch": 0.3838827838827839, - "grad_norm": 0.45479760018622895, - "learning_rate": 1.995248745123324e-05, - "loss": 0.4895, + "epoch": 0.38569115265714704, + "grad_norm": 0.4367130508945953, + "learning_rate": 1.995035105795153e-05, + "loss": 0.4646, "step": 2620 }, { - "epoch": 0.38461538461538464, - "grad_norm": 0.4106595241460115, - "learning_rate": 1.995165385472748e-05, - "loss": 0.4516, + "epoch": 0.3864272044751951, + "grad_norm": 0.43916168441046405, + "learning_rate": 1.994949504081886e-05, + "loss": 0.4645, "step": 2625 }, { - "epoch": 0.38534798534798537, - "grad_norm": 0.46724827471487923, - "learning_rate": 1.995081302676623e-05, - "loss": 0.4676, + "epoch": 0.38716325629324305, + "grad_norm": 0.4179134009051416, + "learning_rate": 1.9948631725839326e-05, + "loss": 0.4818, "step": 2630 }, { - "epoch": 0.3860805860805861, - "grad_norm": 0.43642050328749066, - "learning_rate": 1.9949964967960472e-05, - "loss": 0.4691, + "epoch": 0.38789930811129103, + "grad_norm": 0.42355447856295925, + "learning_rate": 1.9947761113646166e-05, + "loss": 0.4632, "step": 2635 }, { - "epoch": 0.3868131868131868, - "grad_norm": 0.4342906280447318, - "learning_rate": 1.9949109678926472e-05, - "loss": 0.5037, + "epoch": 0.388635359929339, + "grad_norm": 0.4251933072853647, + "learning_rate": 1.994688320487797e-05, + "loss": 0.4729, "step": 2640 }, { - "epoch": 0.38754578754578756, - "grad_norm": 0.4545020175156162, - "learning_rate": 1.9948247160285717e-05, - "loss": 0.4757, + "epoch": 0.38937141174738704, + "grad_norm": 0.4467923689627895, + "learning_rate": 1.9945998000178663e-05, + "loss": 0.4616, "step": 2645 }, { - "epoch": 0.3882783882783883, - "grad_norm": 0.4375133844709041, - "learning_rate": 1.9947377412664974e-05, - "loss": 0.4713, + "epoch": 0.390107463565435, + "grad_norm": 0.4491808533304436, + "learning_rate": 1.994510550019754e-05, + "loss": 0.4976, "step": 2650 }, { - "epoch": 0.389010989010989, - "grad_norm": 0.4448354117733457, - "learning_rate": 1.9946500436696248e-05, - "loss": 0.4697, + "epoch": 0.390843515383483, + "grad_norm": 0.42429793760927365, + "learning_rate": 1.994420570558924e-05, + "loss": 0.4673, "step": 2655 }, { - "epoch": 0.38974358974358975, - "grad_norm": 0.42405644917844576, - "learning_rate": 1.9945616233016803e-05, - "loss": 0.4746, + "epoch": 0.391579567201531, + "grad_norm": 0.4322490576948786, + "learning_rate": 1.9943298617013755e-05, + "loss": 0.4916, "step": 2660 }, { - "epoch": 0.3904761904761905, - "grad_norm": 0.43918153514576697, - "learning_rate": 1.9944724802269155e-05, - "loss": 0.4507, + "epoch": 0.39231561901957895, + "grad_norm": 0.42197433316013055, + "learning_rate": 1.9942384235136418e-05, + "loss": 0.4707, "step": 2665 }, { - "epoch": 0.3912087912087912, - "grad_norm": 0.437319551455226, - "learning_rate": 1.9943826145101065e-05, - "loss": 0.4876, + "epoch": 0.393051670837627, + "grad_norm": 0.4273479256117159, + "learning_rate": 1.9941462560627924e-05, + "loss": 0.4591, "step": 2670 }, { - "epoch": 0.39194139194139194, - "grad_norm": 0.4428182093921229, - "learning_rate": 1.9942920262165554e-05, - "loss": 0.4669, + "epoch": 0.39378772265567497, + "grad_norm": 0.4352470069161979, + "learning_rate": 1.994053359416431e-05, + "loss": 0.4788, "step": 2675 }, { - "epoch": 0.39267399267399267, - "grad_norm": 0.4498586125749959, - "learning_rate": 1.994200715412089e-05, - "loss": 0.457, + "epoch": 0.39452377447372294, + "grad_norm": 0.42524739307120096, + "learning_rate": 1.9939597336426958e-05, + "loss": 0.4708, "step": 2680 }, { - "epoch": 0.3934065934065934, - "grad_norm": 0.44351385651904046, - "learning_rate": 1.9941086821630585e-05, - "loss": 0.4838, + "epoch": 0.3952598262917709, + "grad_norm": 0.4778174570858514, + "learning_rate": 1.993865378810261e-05, + "loss": 0.4737, "step": 2685 }, { - "epoch": 0.3941391941391941, - "grad_norm": 0.4078717462375167, - "learning_rate": 1.9940159265363415e-05, - "loss": 0.4566, + "epoch": 0.39599587810981896, + "grad_norm": 0.43701707594895806, + "learning_rate": 1.9937702949883343e-05, + "loss": 0.454, "step": 2690 }, { - "epoch": 0.39487179487179486, - "grad_norm": 0.4011687882917101, - "learning_rate": 1.9939224485993393e-05, - "loss": 0.4543, + "epoch": 0.39673192992786693, + "grad_norm": 0.4408193981864741, + "learning_rate": 1.993674482246659e-05, + "loss": 0.4941, "step": 2695 }, { - "epoch": 0.3956043956043956, - "grad_norm": 0.42486716225410437, - "learning_rate": 1.993828248419978e-05, - "loss": 0.4583, + "epoch": 0.3974679817459149, + "grad_norm": 0.44887014268199094, + "learning_rate": 1.9935779406555126e-05, + "loss": 0.4811, "step": 2700 }, { - "epoch": 0.3963369963369963, - "grad_norm": 0.42652221818799463, - "learning_rate": 1.9937333260667095e-05, - "loss": 0.4601, + "epoch": 0.3982040335639629, + "grad_norm": 0.4313218301535768, + "learning_rate": 1.993480670285707e-05, + "loss": 0.4619, "step": 2705 }, { - "epoch": 0.39706959706959705, - "grad_norm": 0.4768230911766403, - "learning_rate": 1.99363768160851e-05, - "loss": 0.4732, + "epoch": 0.39894008538201087, + "grad_norm": 0.4373516450492796, + "learning_rate": 1.9933826712085898e-05, + "loss": 0.4689, "step": 2710 }, { - "epoch": 0.3978021978021978, - "grad_norm": 0.4351470474079274, - "learning_rate": 1.9935413151148802e-05, - "loss": 0.4642, + "epoch": 0.3996761372000589, + "grad_norm": 0.39934662785688674, + "learning_rate": 1.9932839434960414e-05, + "loss": 0.4729, "step": 2715 }, { - "epoch": 0.39853479853479856, - "grad_norm": 0.430677839074461, - "learning_rate": 1.993444226655846e-05, - "loss": 0.4647, + "epoch": 0.4004121890181069, + "grad_norm": 0.41606780448949654, + "learning_rate": 1.993184487220478e-05, + "loss": 0.4635, "step": 2720 }, { - "epoch": 0.3992673992673993, - "grad_norm": 0.4200612552021059, - "learning_rate": 1.9933464163019568e-05, - "loss": 0.472, + "epoch": 0.40114824083615486, + "grad_norm": 0.46256374395781535, + "learning_rate": 1.993084302454849e-05, + "loss": 0.4734, "step": 2725 }, { - "epoch": 0.4, - "grad_norm": 0.4526322616576567, - "learning_rate": 1.993247884124288e-05, - "loss": 0.4772, + "epoch": 0.40188429265420283, + "grad_norm": 0.4197100615354211, + "learning_rate": 1.9929833892726398e-05, + "loss": 0.4627, "step": 2730 }, { - "epoch": 0.40073260073260075, - "grad_norm": 0.42669669732174814, - "learning_rate": 1.9931486301944388e-05, - "loss": 0.4702, + "epoch": 0.40262034447225087, + "grad_norm": 0.4379080643054496, + "learning_rate": 1.9928817477478687e-05, + "loss": 0.4611, "step": 2735 }, { - "epoch": 0.4014652014652015, - "grad_norm": 0.4315295987179508, - "learning_rate": 1.9930486545845324e-05, - "loss": 0.4705, + "epoch": 0.40335639629029885, + "grad_norm": 0.4084960104593173, + "learning_rate": 1.9927793779550886e-05, + "loss": 0.4558, "step": 2740 }, { - "epoch": 0.4021978021978022, - "grad_norm": 0.5545099109486473, - "learning_rate": 1.992947957367217e-05, - "loss": 0.4567, + "epoch": 0.4040924481083468, + "grad_norm": 0.42345583706123907, + "learning_rate": 1.9926762799693865e-05, + "loss": 0.4749, "step": 2745 }, { - "epoch": 0.40293040293040294, - "grad_norm": 0.47415426519542037, - "learning_rate": 1.992846538615666e-05, - "loss": 0.4576, + "epoch": 0.4048284999263948, + "grad_norm": 0.40237746882884434, + "learning_rate": 1.992572453866384e-05, + "loss": 0.4521, "step": 2750 }, { - "epoch": 0.40366300366300367, - "grad_norm": 0.4510239651090376, - "learning_rate": 1.992744398403576e-05, - "loss": 0.4796, + "epoch": 0.40556455174444284, + "grad_norm": 0.43271978633491137, + "learning_rate": 1.9924678997222364e-05, + "loss": 0.4746, "step": 2755 }, { - "epoch": 0.4043956043956044, - "grad_norm": 0.419403060450085, - "learning_rate": 1.992641536805167e-05, - "loss": 0.4595, + "epoch": 0.4063006035624908, + "grad_norm": 0.4181052235978989, + "learning_rate": 1.992362617613633e-05, + "loss": 0.4798, "step": 2760 }, { - "epoch": 0.40512820512820513, - "grad_norm": 0.46325220121502125, - "learning_rate": 1.992537953895185e-05, - "loss": 0.4616, + "epoch": 0.4070366553805388, + "grad_norm": 0.4524920937481242, + "learning_rate": 1.9922566076177964e-05, + "loss": 0.4703, "step": 2765 }, { - "epoch": 0.40586080586080586, - "grad_norm": 0.41964023342762674, - "learning_rate": 1.9924336497489e-05, - "loss": 0.4749, + "epoch": 0.40777270719858677, + "grad_norm": 0.42518121450361884, + "learning_rate": 1.9921498698124847e-05, + "loss": 0.4523, "step": 2770 }, { - "epoch": 0.4065934065934066, - "grad_norm": 0.4140452604597255, - "learning_rate": 1.9923286244421045e-05, - "loss": 0.4766, + "epoch": 0.40850875901663475, + "grad_norm": 0.42126470170814195, + "learning_rate": 1.992042404275989e-05, + "loss": 0.4755, "step": 2775 }, { - "epoch": 0.4073260073260073, - "grad_norm": 0.4089490254304085, - "learning_rate": 1.9922228780511164e-05, - "loss": 0.4761, + "epoch": 0.4092448108346828, + "grad_norm": 0.44297126766174794, + "learning_rate": 1.9919342110871333e-05, + "loss": 0.4797, "step": 2780 }, { - "epoch": 0.40805860805860805, - "grad_norm": 0.42570100124391697, - "learning_rate": 1.9921164106527773e-05, - "loss": 0.4649, + "epoch": 0.40998086265273076, + "grad_norm": 0.4713457406940533, + "learning_rate": 1.9918252903252764e-05, + "loss": 0.481, "step": 2785 }, { - "epoch": 0.4087912087912088, - "grad_norm": 0.42213967282368353, - "learning_rate": 1.9920092223244526e-05, - "loss": 0.4545, + "epoch": 0.41071691447077874, + "grad_norm": 0.40506369456948327, + "learning_rate": 1.991715642070311e-05, + "loss": 0.4617, "step": 2790 }, { - "epoch": 0.4095238095238095, - "grad_norm": 0.4229435158499598, - "learning_rate": 1.991901313144032e-05, - "loss": 0.4672, + "epoch": 0.4114529662888267, + "grad_norm": 0.41396699426312916, + "learning_rate": 1.9916052664026623e-05, + "loss": 0.4559, "step": 2795 }, { - "epoch": 0.41025641025641024, - "grad_norm": 0.41948017040271546, - "learning_rate": 1.9917926831899283e-05, - "loss": 0.444, + "epoch": 0.41218901810687475, + "grad_norm": 0.41040000834914764, + "learning_rate": 1.99149416340329e-05, + "loss": 0.4859, "step": 2800 }, { - "epoch": 0.41098901098901097, - "grad_norm": 0.42482483007703387, - "learning_rate": 1.991683332541078e-05, - "loss": 0.4749, + "epoch": 0.4129250699249227, + "grad_norm": 0.4445291093214514, + "learning_rate": 1.9913823331536866e-05, + "loss": 0.4841, "step": 2805 }, { - "epoch": 0.4117216117216117, - "grad_norm": 0.40192909545353095, - "learning_rate": 1.9915732612769422e-05, - "loss": 0.4655, + "epoch": 0.4136611217429707, + "grad_norm": 0.48970415093420555, + "learning_rate": 1.9912697757358787e-05, + "loss": 0.4753, "step": 2810 }, { - "epoch": 0.41245421245421243, - "grad_norm": 0.5431509754402679, - "learning_rate": 1.9914624694775055e-05, - "loss": 0.4629, + "epoch": 0.4143971735610187, + "grad_norm": 0.4328230899901993, + "learning_rate": 1.9911564912324262e-05, + "loss": 0.4745, "step": 2815 }, { - "epoch": 0.41318681318681316, - "grad_norm": 0.4382504187524325, - "learning_rate": 1.9913509572232745e-05, - "loss": 0.4775, + "epoch": 0.41513322537906666, + "grad_norm": 0.4290769164771048, + "learning_rate": 1.9910424797264212e-05, + "loss": 0.4549, "step": 2820 }, { - "epoch": 0.4139194139194139, - "grad_norm": 0.446368115249646, - "learning_rate": 1.991238724595282e-05, - "loss": 0.4708, + "epoch": 0.4158692771971147, + "grad_norm": 0.4464613687929015, + "learning_rate": 1.9909277413014908e-05, + "loss": 0.4618, "step": 2825 }, { - "epoch": 0.4146520146520147, - "grad_norm": 0.4597753061899568, - "learning_rate": 1.9911257716750816e-05, - "loss": 0.4776, + "epoch": 0.4166053290151627, + "grad_norm": 0.7532038501882797, + "learning_rate": 1.990812276041794e-05, + "loss": 0.4695, "step": 2830 }, { - "epoch": 0.4153846153846154, - "grad_norm": 0.4151626162955841, - "learning_rate": 1.9910120985447525e-05, - "loss": 0.4601, + "epoch": 0.41734138083321065, + "grad_norm": 0.4010934042836865, + "learning_rate": 1.990696084032023e-05, + "loss": 0.4733, "step": 2835 }, { - "epoch": 0.41611721611721614, - "grad_norm": 0.46047399733159433, - "learning_rate": 1.9908977052868956e-05, - "loss": 0.485, + "epoch": 0.41807743265125863, + "grad_norm": 0.48258845578057696, + "learning_rate": 1.990579165357404e-05, + "loss": 0.4714, "step": 2840 }, { - "epoch": 0.41684981684981687, - "grad_norm": 0.4241080283573777, - "learning_rate": 1.990782591984636e-05, - "loss": 0.466, + "epoch": 0.41881348446930666, + "grad_norm": 0.44701405970311925, + "learning_rate": 1.9904615201036958e-05, + "loss": 0.4538, "step": 2845 }, { - "epoch": 0.4175824175824176, - "grad_norm": 0.4809486308461704, - "learning_rate": 1.9906667587216216e-05, - "loss": 0.4649, + "epoch": 0.41954953628735464, + "grad_norm": 0.4163801328729629, + "learning_rate": 1.9903431483571887e-05, + "loss": 0.4654, "step": 2850 }, { - "epoch": 0.4183150183150183, - "grad_norm": 0.4281122076011044, - "learning_rate": 1.9905502055820237e-05, - "loss": 0.4724, + "epoch": 0.4202855881054026, + "grad_norm": 0.43828348076014584, + "learning_rate": 1.990224050204708e-05, + "loss": 0.4783, "step": 2855 }, { - "epoch": 0.41904761904761906, - "grad_norm": 0.4264996088139275, - "learning_rate": 1.990432932650537e-05, - "loss": 0.48, + "epoch": 0.4210216399234506, + "grad_norm": 0.4304881069425013, + "learning_rate": 1.9901042257336107e-05, + "loss": 0.4826, "step": 2860 }, { - "epoch": 0.4197802197802198, - "grad_norm": 0.4300567168814666, - "learning_rate": 1.990314940012379e-05, - "loss": 0.4689, + "epoch": 0.4217576917414986, + "grad_norm": 0.45782879456871134, + "learning_rate": 1.9899836750317865e-05, + "loss": 0.4798, "step": 2865 }, { - "epoch": 0.4205128205128205, - "grad_norm": 0.4480456883196755, - "learning_rate": 1.990196227753289e-05, - "loss": 0.4722, + "epoch": 0.4224937435595466, + "grad_norm": 0.43195005386091423, + "learning_rate": 1.9898623981876587e-05, + "loss": 0.4643, "step": 2870 }, { - "epoch": 0.42124542124542125, - "grad_norm": 0.4322204562192915, - "learning_rate": 1.9900767959595312e-05, - "loss": 0.5072, + "epoch": 0.4232297953775946, + "grad_norm": 0.45983030843494205, + "learning_rate": 1.9897403952901822e-05, + "loss": 0.4711, "step": 2875 }, { - "epoch": 0.421978021978022, - "grad_norm": 0.4050762891489174, - "learning_rate": 1.9899566447178914e-05, - "loss": 0.4644, + "epoch": 0.42396584719564256, + "grad_norm": 0.43173486287916685, + "learning_rate": 1.9896176664288447e-05, + "loss": 0.4624, "step": 2880 }, { - "epoch": 0.4227106227106227, - "grad_norm": 0.4381041319495102, - "learning_rate": 1.9898357741156788e-05, - "loss": 0.4705, + "epoch": 0.42470189901369054, + "grad_norm": 0.44218985490594753, + "learning_rate": 1.9894942116936666e-05, + "loss": 0.4754, "step": 2885 }, { - "epoch": 0.42344322344322344, - "grad_norm": 0.41659237129980764, - "learning_rate": 1.989714184240725e-05, - "loss": 0.4513, + "epoch": 0.4254379508317386, + "grad_norm": 0.3865084959037182, + "learning_rate": 1.9893700311752003e-05, + "loss": 0.4492, "step": 2890 }, { - "epoch": 0.42417582417582417, - "grad_norm": 0.43285927087871323, - "learning_rate": 1.9895918751813843e-05, - "loss": 0.4599, + "epoch": 0.42617400264978655, + "grad_norm": 0.41897147151455905, + "learning_rate": 1.989245124964531e-05, + "loss": 0.4541, "step": 2895 }, { - "epoch": 0.4249084249084249, - "grad_norm": 1.1240756278162536, - "learning_rate": 1.9894688470265335e-05, - "loss": 0.475, + "epoch": 0.42691005446783453, + "grad_norm": 0.43255821511679104, + "learning_rate": 1.9891194931532768e-05, + "loss": 0.4645, "step": 2900 }, { - "epoch": 0.4256410256410256, - "grad_norm": 0.43100131199062985, - "learning_rate": 1.9893450998655723e-05, - "loss": 0.4781, + "epoch": 0.4276461062858825, + "grad_norm": 0.44304923672780167, + "learning_rate": 1.988993135833586e-05, + "loss": 0.4695, "step": 2905 }, { - "epoch": 0.42637362637362636, - "grad_norm": 0.42432854968802913, - "learning_rate": 1.9892206337884226e-05, - "loss": 0.4711, + "epoch": 0.42838215810393054, + "grad_norm": 0.41268218699985837, + "learning_rate": 1.9888660530981413e-05, + "loss": 0.4635, "step": 2910 }, { - "epoch": 0.4271062271062271, - "grad_norm": 0.4479110111360452, - "learning_rate": 1.9890954488855288e-05, - "loss": 0.4697, + "epoch": 0.4291182099219785, + "grad_norm": 0.4377162556413914, + "learning_rate": 1.9887382450401563e-05, + "loss": 0.4784, "step": 2915 }, { - "epoch": 0.4278388278388278, - "grad_norm": 0.44549474371119435, - "learning_rate": 1.9889695452478573e-05, - "loss": 0.4618, + "epoch": 0.4298542617400265, + "grad_norm": 0.4318335871287204, + "learning_rate": 1.9886097117533764e-05, + "loss": 0.4758, "step": 2920 }, { - "epoch": 0.42857142857142855, - "grad_norm": 0.42161761546436355, - "learning_rate": 1.9888429229668972e-05, - "loss": 0.4613, + "epoch": 0.4305903135580745, + "grad_norm": 0.4305024561550163, + "learning_rate": 1.98848045333208e-05, + "loss": 0.4675, "step": 2925 }, { - "epoch": 0.4293040293040293, - "grad_norm": 0.4215544190235201, - "learning_rate": 1.9887155821346597e-05, - "loss": 0.4508, + "epoch": 0.43132636537612246, + "grad_norm": 0.44025037364780273, + "learning_rate": 1.9883504698710764e-05, + "loss": 0.4566, "step": 2930 }, { - "epoch": 0.43003663003663006, - "grad_norm": 0.4423258878717002, - "learning_rate": 1.988587522843678e-05, - "loss": 0.4674, + "epoch": 0.4320624171941705, + "grad_norm": 0.44310453288236495, + "learning_rate": 1.9882197614657067e-05, + "loss": 0.4607, "step": 2935 }, { - "epoch": 0.4307692307692308, - "grad_norm": 0.42574662815844067, - "learning_rate": 1.9884587451870082e-05, - "loss": 0.4596, + "epoch": 0.43279846901221847, + "grad_norm": 0.4320374381203412, + "learning_rate": 1.988088328211845e-05, + "loss": 0.4703, "step": 2940 }, { - "epoch": 0.4315018315018315, - "grad_norm": 0.4170768435563345, - "learning_rate": 1.9883292492582268e-05, - "loss": 0.4608, + "epoch": 0.43353452083026645, + "grad_norm": 0.4360481931394084, + "learning_rate": 1.9879561702058954e-05, + "loss": 0.4709, "step": 2945 }, { - "epoch": 0.43223443223443225, - "grad_norm": 0.44697757313895964, - "learning_rate": 1.9881990351514333e-05, - "loss": 0.4673, + "epoch": 0.4342705726483144, + "grad_norm": 0.3961083811911451, + "learning_rate": 1.9878232875447948e-05, + "loss": 0.4668, "step": 2950 }, { - "epoch": 0.432967032967033, - "grad_norm": 0.41995177242580917, - "learning_rate": 1.9880681029612494e-05, - "loss": 0.468, + "epoch": 0.43500662446636246, + "grad_norm": 0.4331840104260549, + "learning_rate": 1.987689680326011e-05, + "loss": 0.4605, "step": 2955 }, { - "epoch": 0.4336996336996337, - "grad_norm": 0.43402289944455125, - "learning_rate": 1.9879364527828178e-05, - "loss": 0.4517, + "epoch": 0.43574267628441044, + "grad_norm": 0.43775242213997434, + "learning_rate": 1.9875553486475434e-05, + "loss": 0.4652, "step": 2960 }, { - "epoch": 0.43443223443223444, - "grad_norm": 0.46212386496174346, - "learning_rate": 1.987804084711803e-05, - "loss": 0.4849, + "epoch": 0.4364787281024584, + "grad_norm": 0.39381836032976314, + "learning_rate": 1.9874202926079226e-05, + "loss": 0.4509, "step": 2965 }, { - "epoch": 0.4351648351648352, - "grad_norm": 0.5112733415377284, - "learning_rate": 1.987670998844392e-05, - "loss": 0.4721, + "epoch": 0.4372147799205064, + "grad_norm": 0.46469421855299614, + "learning_rate": 1.9872845123062117e-05, + "loss": 0.4649, "step": 2970 }, { - "epoch": 0.4358974358974359, - "grad_norm": 0.4285494192095629, - "learning_rate": 1.9875371952772922e-05, - "loss": 0.4541, + "epoch": 0.43795083173855437, + "grad_norm": 0.43420109025858905, + "learning_rate": 1.987148007842003e-05, + "loss": 0.4707, "step": 2975 }, { - "epoch": 0.43663003663003663, - "grad_norm": 0.43764286187833223, - "learning_rate": 1.9874026741077336e-05, - "loss": 0.4777, + "epoch": 0.4386868835566024, + "grad_norm": 0.41017038513244725, + "learning_rate": 1.9870107793154215e-05, + "loss": 0.4659, "step": 2980 }, { - "epoch": 0.43736263736263736, - "grad_norm": 0.41258038663230506, - "learning_rate": 1.9872674354334667e-05, - "loss": 0.4538, + "epoch": 0.4394229353746504, + "grad_norm": 0.42378395928333984, + "learning_rate": 1.986872826827123e-05, + "loss": 0.4779, "step": 2985 }, { - "epoch": 0.4380952380952381, - "grad_norm": 0.4271331651523818, - "learning_rate": 1.987131479352764e-05, - "loss": 0.475, + "epoch": 0.44015898719269836, + "grad_norm": 0.41958986445827784, + "learning_rate": 1.9867341504782936e-05, + "loss": 0.4555, "step": 2990 }, { - "epoch": 0.4388278388278388, - "grad_norm": 0.4006631405996531, - "learning_rate": 1.9869948059644197e-05, - "loss": 0.4584, + "epoch": 0.44089503901074634, + "grad_norm": 0.44111437989525354, + "learning_rate": 1.9865947503706517e-05, + "loss": 0.4724, "step": 2995 }, { - "epoch": 0.43956043956043955, - "grad_norm": 0.44954746312551097, - "learning_rate": 1.986857415367748e-05, - "loss": 0.4723, + "epoch": 0.44163109082879437, + "grad_norm": 0.43488335562692976, + "learning_rate": 1.986454626606445e-05, + "loss": 0.4525, "step": 3000 }, { - "epoch": 0.4402930402930403, - "grad_norm": 0.4097449870862522, - "learning_rate": 1.9867193076625852e-05, - "loss": 0.4599, + "epoch": 0.44236714264684235, + "grad_norm": 0.4334718080918571, + "learning_rate": 1.9863137792884534e-05, + "loss": 0.464, "step": 3005 }, { - "epoch": 0.441025641025641, - "grad_norm": 0.4421570174944609, - "learning_rate": 1.9865804829492883e-05, - "loss": 0.4506, + "epoch": 0.4431031944648903, + "grad_norm": 0.40573764527765027, + "learning_rate": 1.9861722085199866e-05, + "loss": 0.474, "step": 3010 }, { - "epoch": 0.44175824175824174, - "grad_norm": 0.4265771508229422, - "learning_rate": 1.986440941328736e-05, - "loss": 0.4611, + "epoch": 0.4438392462829383, + "grad_norm": 0.416306792206869, + "learning_rate": 1.986029914404885e-05, + "loss": 0.4616, "step": 3015 }, { - "epoch": 0.4424908424908425, - "grad_norm": 0.4185455430733579, - "learning_rate": 1.986300682902327e-05, - "loss": 0.4593, + "epoch": 0.4445752981009863, + "grad_norm": 0.4109439567274265, + "learning_rate": 1.9858868970475202e-05, + "loss": 0.4683, "step": 3020 }, { - "epoch": 0.4432234432234432, - "grad_norm": 0.43628127572576264, - "learning_rate": 1.9861597077719818e-05, - "loss": 0.5056, + "epoch": 0.4453113499190343, + "grad_norm": 0.4281915217845688, + "learning_rate": 1.9857431565527936e-05, + "loss": 0.4473, "step": 3025 }, { - "epoch": 0.44395604395604393, - "grad_norm": 0.41242286663396627, - "learning_rate": 1.98601801604014e-05, - "loss": 0.4594, + "epoch": 0.4460474017370823, + "grad_norm": 0.3945272194503782, + "learning_rate": 1.9855986930261374e-05, + "loss": 0.4602, "step": 3030 }, { - "epoch": 0.44468864468864466, - "grad_norm": 0.4468118929304783, - "learning_rate": 1.9858756078097644e-05, - "loss": 0.4787, + "epoch": 0.44678345355513027, + "grad_norm": 0.44107881912248864, + "learning_rate": 1.9854535065735137e-05, + "loss": 0.4632, "step": 3035 }, { - "epoch": 0.44542124542124545, - "grad_norm": 0.4236707817709085, - "learning_rate": 1.985732483184337e-05, - "loss": 0.4664, + "epoch": 0.44751950537317825, + "grad_norm": 0.40426276608883266, + "learning_rate": 1.985307597301416e-05, + "loss": 0.4739, "step": 3040 }, { - "epoch": 0.4461538461538462, - "grad_norm": 0.41568305822108564, - "learning_rate": 1.9855886422678598e-05, - "loss": 0.4556, + "epoch": 0.4482555571912263, + "grad_norm": 0.4039687613674645, + "learning_rate": 1.9851609653168664e-05, + "loss": 0.4626, "step": 3045 }, { - "epoch": 0.4468864468864469, - "grad_norm": 0.4031732738635877, - "learning_rate": 1.9854440851648566e-05, - "loss": 0.4639, + "epoch": 0.44899160900927426, + "grad_norm": 0.42987297721657564, + "learning_rate": 1.9850136107274182e-05, + "loss": 0.4722, "step": 3050 }, { - "epoch": 0.44761904761904764, - "grad_norm": 0.42845350632694124, - "learning_rate": 1.9852988119803708e-05, - "loss": 0.4816, + "epoch": 0.44972766082732224, + "grad_norm": 0.4799009985252744, + "learning_rate": 1.984865533641154e-05, + "loss": 0.4498, "step": 3055 }, { - "epoch": 0.44835164835164837, - "grad_norm": 0.4182498366755355, - "learning_rate": 1.985152822819967e-05, - "loss": 0.4721, + "epoch": 0.4504637126453702, + "grad_norm": 0.4261866300237137, + "learning_rate": 1.984716734166688e-05, + "loss": 0.4703, "step": 3060 }, { - "epoch": 0.4490842490842491, - "grad_norm": 2.0996781684996297, - "learning_rate": 1.985006117789729e-05, - "loss": 0.4682, + "epoch": 0.45119976446341825, + "grad_norm": 0.41273386694642783, + "learning_rate": 1.9845672124131613e-05, + "loss": 0.4647, "step": 3065 }, { - "epoch": 0.44981684981684983, - "grad_norm": 0.41820013155916524, - "learning_rate": 1.9848586969962612e-05, - "loss": 0.4709, + "epoch": 0.45193581628146623, + "grad_norm": 0.44425449264384115, + "learning_rate": 1.9844169684902473e-05, + "loss": 0.4624, "step": 3070 }, { - "epoch": 0.45054945054945056, - "grad_norm": 0.4170880105786003, - "learning_rate": 1.984710560546688e-05, - "loss": 0.4801, + "epoch": 0.4526718680995142, + "grad_norm": 0.40456413118707896, + "learning_rate": 1.9842660025081485e-05, + "loss": 0.456, "step": 3075 }, { - "epoch": 0.4512820512820513, - "grad_norm": 0.41308092270816765, - "learning_rate": 1.984561708548655e-05, - "loss": 0.4724, + "epoch": 0.4534079199175622, + "grad_norm": 0.40770054840443004, + "learning_rate": 1.9841143145775966e-05, + "loss": 0.4561, "step": 3080 }, { - "epoch": 0.452014652014652, - "grad_norm": 0.42432685224665, - "learning_rate": 1.984412141110326e-05, - "loss": 0.524, + "epoch": 0.45414397173561016, + "grad_norm": 0.4342418217242728, + "learning_rate": 1.983961904809853e-05, + "loss": 0.4774, "step": 3085 }, { - "epoch": 0.45274725274725275, - "grad_norm": 0.4335010203313694, - "learning_rate": 1.9842618583403857e-05, - "loss": 0.4665, + "epoch": 0.4548800235536582, + "grad_norm": 0.41929000606256295, + "learning_rate": 1.9838087733167088e-05, + "loss": 0.4647, "step": 3090 }, { - "epoch": 0.4534798534798535, - "grad_norm": 0.4126926537895775, - "learning_rate": 1.984110860348038e-05, - "loss": 0.488, + "epoch": 0.4556160753717062, + "grad_norm": 0.4004519524235956, + "learning_rate": 1.983654920210484e-05, + "loss": 0.4636, "step": 3095 }, { - "epoch": 0.4542124542124542, - "grad_norm": 0.477986799745578, - "learning_rate": 1.9839591472430076e-05, - "loss": 0.48, + "epoch": 0.45635212718975415, + "grad_norm": 0.41124674907739295, + "learning_rate": 1.983500345604028e-05, + "loss": 0.4787, "step": 3100 }, { - "epoch": 0.45494505494505494, - "grad_norm": 0.3938638046525444, - "learning_rate": 1.9838067191355377e-05, - "loss": 0.4686, + "epoch": 0.45708817900780213, + "grad_norm": 0.4708532248959944, + "learning_rate": 1.9833450496107203e-05, + "loss": 0.4767, "step": 3105 }, { - "epoch": 0.45567765567765567, - "grad_norm": 0.422316248221422, - "learning_rate": 1.9836535761363914e-05, - "loss": 0.4792, + "epoch": 0.45782423082585016, + "grad_norm": 0.458182926506385, + "learning_rate": 1.9831890323444686e-05, + "loss": 0.461, "step": 3110 }, { - "epoch": 0.4564102564102564, - "grad_norm": 0.42758442770500454, - "learning_rate": 1.9834997183568517e-05, - "loss": 0.4652, + "epoch": 0.45856028264389814, + "grad_norm": 0.4283861415071619, + "learning_rate": 1.9830322939197094e-05, + "loss": 0.4612, "step": 3115 }, { - "epoch": 0.45714285714285713, - "grad_norm": 0.41505650486941165, - "learning_rate": 1.9833451459087207e-05, - "loss": 0.4694, + "epoch": 0.4592963344619461, + "grad_norm": 0.4223663432196678, + "learning_rate": 1.9828748344514087e-05, + "loss": 0.4561, "step": 3120 }, { - "epoch": 0.45787545787545786, - "grad_norm": 0.4313708557250346, - "learning_rate": 1.9831898589043195e-05, - "loss": 0.4721, + "epoch": 0.4600323862799941, + "grad_norm": 0.4386331225059637, + "learning_rate": 1.9827166540550622e-05, + "loss": 0.4732, "step": 3125 }, { - "epoch": 0.4586080586080586, - "grad_norm": 0.4039460331210853, - "learning_rate": 1.983033857456489e-05, - "loss": 0.4745, + "epoch": 0.4607684380980421, + "grad_norm": 0.4128996398421751, + "learning_rate": 1.9825577528466925e-05, + "loss": 0.4693, "step": 3130 }, { - "epoch": 0.4593406593406593, - "grad_norm": 0.41533105180381913, - "learning_rate": 1.982877141678589e-05, - "loss": 0.4589, + "epoch": 0.4615044899160901, + "grad_norm": 0.41584310994050183, + "learning_rate": 1.9823981309428525e-05, + "loss": 0.4742, "step": 3135 }, { - "epoch": 0.46007326007326005, - "grad_norm": 0.40794998171916896, - "learning_rate": 1.982719711684498e-05, - "loss": 0.4809, + "epoch": 0.4622405417341381, + "grad_norm": 0.40372530977646753, + "learning_rate": 1.9822377884606227e-05, + "loss": 0.4657, "step": 3140 }, { - "epoch": 0.4608058608058608, - "grad_norm": 0.4437074981748962, - "learning_rate": 1.9825615675886142e-05, - "loss": 0.4666, + "epoch": 0.46297659355218607, + "grad_norm": 0.4116788800619764, + "learning_rate": 1.982076725517613e-05, + "loss": 0.4768, "step": 3145 }, { - "epoch": 0.46153846153846156, - "grad_norm": 0.41963545077807324, - "learning_rate": 1.9824027095058546e-05, - "loss": 0.4577, + "epoch": 0.46371264537023404, + "grad_norm": 0.4164112439477256, + "learning_rate": 1.981914942231961e-05, + "loss": 0.4615, "step": 3150 }, { - "epoch": 0.4622710622710623, - "grad_norm": 0.41695879640414035, - "learning_rate": 1.9822431375516543e-05, - "loss": 0.4646, + "epoch": 0.4644486971882821, + "grad_norm": 0.41310344930399845, + "learning_rate": 1.9817524387223335e-05, + "loss": 0.4496, "step": 3155 }, { - "epoch": 0.463003663003663, - "grad_norm": 0.45304494814251517, - "learning_rate": 1.982082851841968e-05, - "loss": 0.4647, + "epoch": 0.46518474900633006, + "grad_norm": 0.39542517683227685, + "learning_rate": 1.9815892151079242e-05, + "loss": 0.4493, "step": 3160 }, { - "epoch": 0.46373626373626375, - "grad_norm": 0.42268156465950907, - "learning_rate": 1.9819218524932685e-05, - "loss": 0.5243, + "epoch": 0.46592080082437803, + "grad_norm": 0.39877143833113265, + "learning_rate": 1.9814252715084567e-05, + "loss": 0.4722, "step": 3165 }, { - "epoch": 0.4644688644688645, - "grad_norm": 0.4330357382344699, - "learning_rate": 1.981760139622548e-05, - "loss": 0.4628, + "epoch": 0.466656852642426, + "grad_norm": 0.4286432019131809, + "learning_rate": 1.981260608044181e-05, + "loss": 0.4814, "step": 3170 }, { - "epoch": 0.4652014652014652, - "grad_norm": 0.3912091812312883, - "learning_rate": 1.9815977133473153e-05, - "loss": 0.4655, + "epoch": 0.467392904460474, + "grad_norm": 0.4228249800132515, + "learning_rate": 1.981095224835877e-05, + "loss": 0.4496, "step": 3175 }, { - "epoch": 0.46593406593406594, - "grad_norm": 0.4117875491925943, - "learning_rate": 1.9814345737856e-05, - "loss": 0.4706, + "epoch": 0.468128956278522, + "grad_norm": 0.4002611631369483, + "learning_rate": 1.980929122004851e-05, + "loss": 0.4708, "step": 3180 }, { - "epoch": 0.4666666666666667, - "grad_norm": 0.4194813796894191, - "learning_rate": 1.9812707210559482e-05, - "loss": 0.4664, + "epoch": 0.46886500809657, + "grad_norm": 0.4193718325228932, + "learning_rate": 1.980762299672937e-05, + "loss": 0.4716, "step": 3185 }, { - "epoch": 0.4673992673992674, - "grad_norm": 0.44104772544766263, - "learning_rate": 1.9811061552774254e-05, - "loss": 0.4499, + "epoch": 0.469601059914618, + "grad_norm": 0.41774347369097076, + "learning_rate": 1.9805947579624986e-05, + "loss": 0.473, "step": 3190 }, { - "epoch": 0.46813186813186813, - "grad_norm": 0.39534418481458466, - "learning_rate": 1.980940876569614e-05, - "loss": 0.4544, + "epoch": 0.47033711173266596, + "grad_norm": 0.4180941873133464, + "learning_rate": 1.980426496996425e-05, + "loss": 0.4617, "step": 3195 }, { - "epoch": 0.46886446886446886, - "grad_norm": 0.406074463901355, - "learning_rate": 1.980774885052616e-05, - "loss": 0.4769, + "epoch": 0.471073163550714, + "grad_norm": 0.401915602088444, + "learning_rate": 1.980257516898134e-05, + "loss": 0.4661, "step": 3200 }, { - "epoch": 0.4695970695970696, - "grad_norm": 0.44974837591152544, - "learning_rate": 1.9806081808470507e-05, - "loss": 0.4722, + "epoch": 0.47180921536876197, + "grad_norm": 0.38887379149008394, + "learning_rate": 1.9800878177915705e-05, + "loss": 0.456, "step": 3205 }, { - "epoch": 0.4703296703296703, - "grad_norm": 0.44829907029092797, - "learning_rate": 1.980440764074054e-05, - "loss": 0.4734, + "epoch": 0.47254526718680995, + "grad_norm": 0.4058785283380266, + "learning_rate": 1.9799173998012072e-05, + "loss": 0.4676, "step": 3210 }, { - "epoch": 0.47106227106227105, - "grad_norm": 0.40035861121791594, - "learning_rate": 1.980272634855282e-05, - "loss": 0.4585, + "epoch": 0.4732813190048579, + "grad_norm": 0.40844557947363974, + "learning_rate": 1.9797462630520437e-05, + "loss": 0.464, "step": 3215 }, { - "epoch": 0.4717948717948718, - "grad_norm": 0.44003860092573194, - "learning_rate": 1.980103793312907e-05, - "loss": 0.4666, + "epoch": 0.47401737082290596, + "grad_norm": 0.4042514657643234, + "learning_rate": 1.979574407669607e-05, + "loss": 0.4791, "step": 3220 }, { - "epoch": 0.4725274725274725, - "grad_norm": 0.7529828149259594, - "learning_rate": 1.9799342395696185e-05, - "loss": 0.4473, + "epoch": 0.47475342264095394, + "grad_norm": 0.42841802368536447, + "learning_rate": 1.9794018337799517e-05, + "loss": 0.476, "step": 3225 }, { - "epoch": 0.47326007326007324, - "grad_norm": 0.4151198080685876, - "learning_rate": 1.9797639737486246e-05, - "loss": 0.4752, + "epoch": 0.4754894744590019, + "grad_norm": 0.40088970264481133, + "learning_rate": 1.9792285415096576e-05, + "loss": 0.4481, "step": 3230 }, { - "epoch": 0.473992673992674, - "grad_norm": 0.39959614249652226, - "learning_rate": 1.9795929959736502e-05, - "loss": 0.4599, + "epoch": 0.4762255262770499, + "grad_norm": 0.40476069582920854, + "learning_rate": 1.9790545309858336e-05, + "loss": 0.4633, "step": 3235 }, { - "epoch": 0.4747252747252747, - "grad_norm": 0.4551376232704039, - "learning_rate": 1.9794213063689386e-05, - "loss": 0.4625, + "epoch": 0.47696157809509787, + "grad_norm": 0.4199593822897928, + "learning_rate": 1.978879802336115e-05, + "loss": 0.468, "step": 3240 }, { - "epoch": 0.47545787545787543, - "grad_norm": 0.43462040045084416, - "learning_rate": 1.9792489050592486e-05, - "loss": 0.4518, + "epoch": 0.4776976299131459, + "grad_norm": 0.4085284137295676, + "learning_rate": 1.9787043556886622e-05, + "loss": 0.4604, "step": 3245 }, { - "epoch": 0.47619047619047616, - "grad_norm": 0.47841139977456976, - "learning_rate": 1.979075792169858e-05, - "loss": 0.4641, + "epoch": 0.4784336817311939, + "grad_norm": 0.44961470043396845, + "learning_rate": 1.9785281911721643e-05, + "loss": 0.4658, "step": 3250 }, { - "epoch": 0.47692307692307695, - "grad_norm": 0.39672143880466443, - "learning_rate": 1.9789019678265595e-05, - "loss": 0.4545, + "epoch": 0.47916973354924186, + "grad_norm": 0.4061982836140069, + "learning_rate": 1.9783513089158356e-05, + "loss": 0.4562, "step": 3255 }, { - "epoch": 0.4776556776556777, - "grad_norm": 0.4321837925413481, - "learning_rate": 1.978727432155665e-05, - "loss": 0.4552, + "epoch": 0.47990578536728984, + "grad_norm": 0.4195112377167201, + "learning_rate": 1.9781737090494176e-05, + "loss": 0.4583, "step": 3260 }, { - "epoch": 0.4783882783882784, - "grad_norm": 0.4403586310764908, - "learning_rate": 1.9785521852840025e-05, - "loss": 0.4728, + "epoch": 0.4806418371853379, + "grad_norm": 0.43364546064085246, + "learning_rate": 1.9779953917031776e-05, + "loss": 0.4586, "step": 3265 }, { - "epoch": 0.47912087912087914, - "grad_norm": 0.40406904762288254, - "learning_rate": 1.9783762273389162e-05, - "loss": 0.4605, + "epoch": 0.48137788900338585, + "grad_norm": 0.4115588808981016, + "learning_rate": 1.9778163570079095e-05, + "loss": 0.4724, "step": 3270 }, { - "epoch": 0.47985347985347987, - "grad_norm": 0.4251693515779485, - "learning_rate": 1.9781995584482676e-05, - "loss": 0.4555, + "epoch": 0.48211394082143383, + "grad_norm": 0.40816394060622097, + "learning_rate": 1.9776366050949333e-05, + "loss": 0.4464, "step": 3275 }, { - "epoch": 0.4805860805860806, - "grad_norm": 0.41008050156432474, - "learning_rate": 1.9780221787404343e-05, - "loss": 0.4699, + "epoch": 0.4828499926394818, + "grad_norm": 0.467843462744796, + "learning_rate": 1.9774561360960955e-05, + "loss": 0.4725, "step": 3280 }, { - "epoch": 0.48131868131868133, - "grad_norm": 0.39802159163681655, - "learning_rate": 1.977844088344312e-05, - "loss": 0.4763, + "epoch": 0.4835860444575298, + "grad_norm": 0.41759683676633286, + "learning_rate": 1.977274950143768e-05, + "loss": 0.4611, "step": 3285 }, { - "epoch": 0.48205128205128206, - "grad_norm": 0.4173141548284825, - "learning_rate": 1.97766528738931e-05, - "loss": 0.4664, + "epoch": 0.4843220962755778, + "grad_norm": 0.45394616999310433, + "learning_rate": 1.977093047370848e-05, + "loss": 0.4586, "step": 3290 }, { - "epoch": 0.4827838827838828, - "grad_norm": 0.39535352109619026, - "learning_rate": 1.977485776005357e-05, - "loss": 0.4553, + "epoch": 0.4850581480936258, + "grad_norm": 0.3971437956948848, + "learning_rate": 1.97691042791076e-05, + "loss": 0.445, "step": 3295 }, { - "epoch": 0.4835164835164835, - "grad_norm": 0.403127280029853, - "learning_rate": 1.977305554322895e-05, - "loss": 0.4638, + "epoch": 0.4857941999116738, + "grad_norm": 0.4417562604444158, + "learning_rate": 1.9767270918974533e-05, + "loss": 0.4631, "step": 3300 }, { - "epoch": 0.48424908424908425, - "grad_norm": 0.4315228635100888, - "learning_rate": 1.9771246224728853e-05, - "loss": 0.4738, + "epoch": 0.48653025172972175, + "grad_norm": 0.402271545011745, + "learning_rate": 1.9765430394654027e-05, + "loss": 0.4655, "step": 3305 }, { - "epoch": 0.484981684981685, - "grad_norm": 0.44872850655100005, - "learning_rate": 1.9769429805868027e-05, - "loss": 0.4533, + "epoch": 0.4872663035477698, + "grad_norm": 0.42477604801229096, + "learning_rate": 1.976358270749609e-05, + "loss": 0.4688, "step": 3310 }, { - "epoch": 0.4857142857142857, - "grad_norm": 0.4079127867380826, - "learning_rate": 1.9767606287966384e-05, - "loss": 0.445, + "epoch": 0.48800235536581776, + "grad_norm": 0.4329126934730056, + "learning_rate": 1.9761727858855973e-05, + "loss": 0.4474, "step": 3315 }, { - "epoch": 0.48644688644688644, - "grad_norm": 0.42517621903353997, - "learning_rate": 1.9765775672349012e-05, - "loss": 0.4701, + "epoch": 0.48873840718386574, + "grad_norm": 0.43263812510031907, + "learning_rate": 1.9759865850094197e-05, + "loss": 0.4742, "step": 3320 }, { - "epoch": 0.48717948717948717, - "grad_norm": 0.39543291608601666, - "learning_rate": 1.9763937960346132e-05, - "loss": 0.4561, + "epoch": 0.4894744590019137, + "grad_norm": 0.41666249042657116, + "learning_rate": 1.9757996682576515e-05, + "loss": 0.463, "step": 3325 }, { - "epoch": 0.4879120879120879, - "grad_norm": 0.43589487175440456, - "learning_rate": 1.9762093153293142e-05, - "loss": 0.4557, + "epoch": 0.4902105108199617, + "grad_norm": 0.4265013419951412, + "learning_rate": 1.975612035767395e-05, + "loss": 0.4717, "step": 3330 }, { - "epoch": 0.48864468864468863, - "grad_norm": 0.4129391636519809, - "learning_rate": 1.976024125253058e-05, - "loss": 0.4765, + "epoch": 0.49094656263800973, + "grad_norm": 0.4130363534076511, + "learning_rate": 1.9754236876762763e-05, + "loss": 0.4549, "step": 3335 }, { - "epoch": 0.48937728937728936, - "grad_norm": 0.4356046878850061, - "learning_rate": 1.975838225940415e-05, - "loss": 0.4509, + "epoch": 0.4916826144560577, + "grad_norm": 0.41706839132995893, + "learning_rate": 1.9752346241224466e-05, + "loss": 0.4857, "step": 3340 }, { - "epoch": 0.4901098901098901, - "grad_norm": 0.39839856694377546, - "learning_rate": 1.9756516175264703e-05, - "loss": 0.454, + "epoch": 0.4924186662741057, + "grad_norm": 0.396662264423611, + "learning_rate": 1.975044845244582e-05, + "loss": 0.4386, "step": 3345 }, { - "epoch": 0.4908424908424908, - "grad_norm": 0.4019660081498156, - "learning_rate": 1.9754643001468247e-05, - "loss": 0.4688, + "epoch": 0.49315471809215367, + "grad_norm": 0.40161162089841823, + "learning_rate": 1.9748543511818835e-05, + "loss": 0.4459, "step": 3350 }, { - "epoch": 0.49157509157509155, - "grad_norm": 0.42725169158973203, - "learning_rate": 1.9752762739375938e-05, - "loss": 0.4764, + "epoch": 0.4938907699102017, + "grad_norm": 0.3915110302234934, + "learning_rate": 1.974663142074076e-05, + "loss": 0.4625, "step": 3355 }, { - "epoch": 0.49230769230769234, - "grad_norm": 0.41230781357917834, - "learning_rate": 1.9750875390354083e-05, - "loss": 0.4759, + "epoch": 0.4946268217282497, + "grad_norm": 0.43822507627955265, + "learning_rate": 1.9744712180614093e-05, + "loss": 0.4733, "step": 3360 }, { - "epoch": 0.49304029304029307, - "grad_norm": 0.4358085904919089, - "learning_rate": 1.9748980955774148e-05, - "loss": 0.4675, + "epoch": 0.49536287354629766, + "grad_norm": 0.389001674944297, + "learning_rate": 1.974278579284658e-05, + "loss": 0.4633, "step": 3365 }, { - "epoch": 0.4937728937728938, - "grad_norm": 0.41400889915347355, - "learning_rate": 1.9747079437012736e-05, - "loss": 0.4754, + "epoch": 0.49609892536434563, + "grad_norm": 0.39998826272002275, + "learning_rate": 1.9740852258851203e-05, + "loss": 0.4656, "step": 3370 }, { - "epoch": 0.4945054945054945, - "grad_norm": 0.39535489205011043, - "learning_rate": 1.9745170835451598e-05, - "loss": 0.4843, + "epoch": 0.49683497718239367, + "grad_norm": 0.41163651340635726, + "learning_rate": 1.9738911580046185e-05, + "loss": 0.4749, "step": 3375 }, { - "epoch": 0.49523809523809526, - "grad_norm": 0.3875877035122525, - "learning_rate": 1.974325515247764e-05, - "loss": 0.4519, + "epoch": 0.49757102900044164, + "grad_norm": 0.4068628138795416, + "learning_rate": 1.9736963757855e-05, + "loss": 0.4476, "step": 3380 }, { - "epoch": 0.495970695970696, - "grad_norm": 0.40011808170012764, - "learning_rate": 1.9741332389482915e-05, - "loss": 0.4705, + "epoch": 0.4983070808184896, + "grad_norm": 0.39802458927173445, + "learning_rate": 1.973500879370635e-05, + "loss": 0.4591, "step": 3385 }, { - "epoch": 0.4967032967032967, - "grad_norm": 0.3875608965577219, - "learning_rate": 1.9739402547864605e-05, - "loss": 0.4612, + "epoch": 0.4990431326365376, + "grad_norm": 0.3959109892997859, + "learning_rate": 1.9733046689034182e-05, + "loss": 0.4696, "step": 3390 }, { - "epoch": 0.49743589743589745, - "grad_norm": 0.40605035088734287, - "learning_rate": 1.9737465629025054e-05, - "loss": 0.4729, + "epoch": 0.4997791844545856, + "grad_norm": 0.3837127945016217, + "learning_rate": 1.9731077445277684e-05, + "loss": 0.4583, "step": 3395 }, { - "epoch": 0.4981684981684982, - "grad_norm": 0.4076032885761923, - "learning_rate": 1.9735521634371734e-05, - "loss": 0.4601, + "epoch": 0.5005152362726336, + "grad_norm": 0.4205751599188156, + "learning_rate": 1.972910106388126e-05, + "loss": 0.4766, "step": 3400 }, { - "epoch": 0.4989010989010989, - "grad_norm": 0.4058404149960834, - "learning_rate": 1.973357056531727e-05, - "loss": 0.4699, + "epoch": 0.5012512880906815, + "grad_norm": 0.41793031062362607, + "learning_rate": 1.9727117546294573e-05, + "loss": 0.4594, "step": 3405 }, { - "epoch": 0.49963369963369964, - "grad_norm": 0.38441775513851356, - "learning_rate": 1.9731612423279425e-05, - "loss": 0.4395, + "epoch": 0.5019873399087296, + "grad_norm": 0.43789613525420124, + "learning_rate": 1.9725126893972514e-05, + "loss": 0.456, "step": 3410 }, { - "epoch": 0.5003663003663004, - "grad_norm": 0.3856852354737355, - "learning_rate": 1.9729647209681095e-05, - "loss": 0.4415, + "epoch": 0.5027233917267776, + "grad_norm": 0.4327081367462604, + "learning_rate": 1.9723129108375203e-05, + "loss": 0.4528, "step": 3415 }, { - "epoch": 0.5010989010989011, - "grad_norm": 0.40733571237223243, - "learning_rate": 1.9727674925950322e-05, - "loss": 0.4776, + "epoch": 0.5034594435448255, + "grad_norm": 0.40465484943979296, + "learning_rate": 1.9721124190967995e-05, + "loss": 0.4635, "step": 3420 }, { - "epoch": 0.5018315018315018, - "grad_norm": 0.4160928717740055, - "learning_rate": 1.9725695573520286e-05, - "loss": 0.4759, + "epoch": 0.5041954953628736, + "grad_norm": 0.39001525778029733, + "learning_rate": 1.9719112143221472e-05, + "loss": 0.4515, "step": 3425 }, { - "epoch": 0.5025641025641026, - "grad_norm": 0.39267972694890735, - "learning_rate": 1.9723709153829295e-05, - "loss": 0.4444, + "epoch": 0.5049315471809216, + "grad_norm": 0.4142644311024722, + "learning_rate": 1.971709296661145e-05, + "loss": 0.4528, "step": 3430 }, { - "epoch": 0.5032967032967033, - "grad_norm": 0.39638394361802154, - "learning_rate": 1.97217156683208e-05, - "loss": 0.4372, + "epoch": 0.5056675989989695, + "grad_norm": 0.4233936755271006, + "learning_rate": 1.9715066662618974e-05, + "loss": 0.4522, "step": 3435 }, { - "epoch": 0.504029304029304, - "grad_norm": 0.42218569169595366, - "learning_rate": 1.9719715118443386e-05, - "loss": 0.4581, + "epoch": 0.5064036508170175, + "grad_norm": 0.41635368549993773, + "learning_rate": 1.9713033232730318e-05, + "loss": 0.4635, "step": 3440 }, { - "epoch": 0.5047619047619047, - "grad_norm": 0.42583544248704563, - "learning_rate": 1.9717707505650767e-05, - "loss": 0.4585, + "epoch": 0.5071397026350655, + "grad_norm": 0.42385806677774274, + "learning_rate": 1.971099267843698e-05, + "loss": 0.4588, "step": 3445 }, { - "epoch": 0.5054945054945055, - "grad_norm": 0.40510679362327034, - "learning_rate": 1.9715692831401798e-05, - "loss": 0.4595, + "epoch": 0.5078757544531135, + "grad_norm": 0.39366113592704577, + "learning_rate": 1.9708945001235686e-05, + "loss": 0.4635, "step": 3450 }, { - "epoch": 0.5062271062271062, - "grad_norm": 0.4244850714320826, - "learning_rate": 1.9713671097160454e-05, - "loss": 0.4637, + "epoch": 0.5086118062711615, + "grad_norm": 0.4285306151705861, + "learning_rate": 1.9706890202628376e-05, + "loss": 0.4563, "step": 3455 }, { - "epoch": 0.5069597069597069, - "grad_norm": 0.3959484392053746, - "learning_rate": 1.971164230439585e-05, - "loss": 0.4503, + "epoch": 0.5093478580892095, + "grad_norm": 0.41035525935027833, + "learning_rate": 1.970482828412223e-05, + "loss": 0.4585, "step": 3460 }, { - "epoch": 0.5076923076923077, - "grad_norm": 0.4020029057429197, - "learning_rate": 1.970960645458222e-05, - "loss": 0.456, + "epoch": 0.5100839099072575, + "grad_norm": 0.4001206582628323, + "learning_rate": 1.9702759247229647e-05, + "loss": 0.4612, "step": 3465 }, { - "epoch": 0.5084249084249084, - "grad_norm": 0.3865009439791744, - "learning_rate": 1.9707563549198934e-05, - "loss": 0.444, + "epoch": 0.5108199617253054, + "grad_norm": 0.4076277925400673, + "learning_rate": 1.9700683093468235e-05, + "loss": 0.4656, "step": 3470 }, { - "epoch": 0.5091575091575091, - "grad_norm": 0.3824361919325873, - "learning_rate": 1.970551358973049e-05, - "loss": 0.4502, + "epoch": 0.5115560135433534, + "grad_norm": 0.40262365010775447, + "learning_rate": 1.9698599824360832e-05, + "loss": 0.4513, "step": 3475 }, { - "epoch": 0.5098901098901099, - "grad_norm": 0.39293252412701707, - "learning_rate": 1.9703456577666507e-05, - "loss": 0.4575, + "epoch": 0.5122920653614015, + "grad_norm": 0.547089339379227, + "learning_rate": 1.9696509441435498e-05, + "loss": 0.4561, "step": 3480 }, { - "epoch": 0.5106227106227106, - "grad_norm": 0.37927190798663696, - "learning_rate": 1.970139251450173e-05, - "loss": 0.4571, + "epoch": 0.5130281171794494, + "grad_norm": 0.44020060733759014, + "learning_rate": 1.9694411946225502e-05, + "loss": 0.4565, "step": 3485 }, { - "epoch": 0.5113553113553113, - "grad_norm": 0.40747726547713325, - "learning_rate": 1.9699321401736028e-05, - "loss": 0.4598, + "epoch": 0.5137641689974974, + "grad_norm": 0.41610521756030733, + "learning_rate": 1.9692307340269334e-05, + "loss": 0.4832, "step": 3490 }, { - "epoch": 0.512087912087912, - "grad_norm": 0.42005617221329955, - "learning_rate": 1.9697243240874394e-05, - "loss": 0.4582, + "epoch": 0.5145002208155454, + "grad_norm": 0.41417230996854715, + "learning_rate": 1.96901956251107e-05, + "loss": 0.446, "step": 3495 }, { - "epoch": 0.5128205128205128, - "grad_norm": 0.4117478480032374, - "learning_rate": 1.9695158033426945e-05, - "loss": 0.4634, + "epoch": 0.5152362726335934, + "grad_norm": 0.40750264954931703, + "learning_rate": 1.9688076802298525e-05, + "loss": 0.4465, "step": 3500 }, { - "epoch": 0.5135531135531135, - "grad_norm": 0.40341267325790553, - "learning_rate": 1.9693065780908907e-05, - "loss": 0.4708, + "epoch": 0.5159723244516414, + "grad_norm": 0.4170889737094148, + "learning_rate": 1.9685950873386935e-05, + "loss": 0.4539, "step": 3505 }, { - "epoch": 0.5142857142857142, - "grad_norm": 0.4306074517094488, - "learning_rate": 1.9690966484840645e-05, - "loss": 0.4583, + "epoch": 0.5167083762696894, + "grad_norm": 0.4148491025631686, + "learning_rate": 1.9683817839935278e-05, + "loss": 0.4607, "step": 3510 }, { - "epoch": 0.515018315018315, - "grad_norm": 0.40995384275820096, - "learning_rate": 1.9688860146747616e-05, - "loss": 0.4667, + "epoch": 0.5174444280877374, + "grad_norm": 0.4163873194725317, + "learning_rate": 1.968167770350812e-05, + "loss": 0.4762, "step": 3515 }, { - "epoch": 0.5157509157509158, - "grad_norm": 0.4180393839855888, - "learning_rate": 1.9686746768160422e-05, - "loss": 0.4695, + "epoch": 0.5181804799057854, + "grad_norm": 0.42547828840980223, + "learning_rate": 1.9679530465675213e-05, + "loss": 0.458, "step": 3520 }, { - "epoch": 0.5164835164835165, - "grad_norm": 0.3939921437930165, - "learning_rate": 1.9684626350614756e-05, - "loss": 0.4703, + "epoch": 0.5189165317238333, + "grad_norm": 0.43469177209885107, + "learning_rate": 1.967737612801154e-05, + "loss": 0.462, "step": 3525 }, { - "epoch": 0.5172161172161173, - "grad_norm": 0.39002670431542874, - "learning_rate": 1.968249889565145e-05, - "loss": 0.4504, + "epoch": 0.5196525835418814, + "grad_norm": 0.4100192594814962, + "learning_rate": 1.9675214692097285e-05, + "loss": 0.4417, "step": 3530 }, { - "epoch": 0.517948717948718, - "grad_norm": 0.40762066174533135, - "learning_rate": 1.968036440481643e-05, - "loss": 0.4597, + "epoch": 0.5203886353599293, + "grad_norm": 0.3912658697938237, + "learning_rate": 1.9673046159517838e-05, + "loss": 0.4667, "step": 3535 }, { - "epoch": 0.5186813186813187, - "grad_norm": 0.3970346723297381, - "learning_rate": 1.9678222879660745e-05, - "loss": 0.4592, + "epoch": 0.5211246871779773, + "grad_norm": 0.3903472216559458, + "learning_rate": 1.9670870531863795e-05, + "loss": 0.4689, "step": 3540 }, { - "epoch": 0.5194139194139195, - "grad_norm": 0.3962972174341633, - "learning_rate": 1.967607432174055e-05, - "loss": 0.4693, + "epoch": 0.5218607389960254, + "grad_norm": 0.4161083312677932, + "learning_rate": 1.966868781073095e-05, + "loss": 0.4584, "step": 3545 }, { - "epoch": 0.5201465201465202, - "grad_norm": 0.41614465682076274, - "learning_rate": 1.9673918732617116e-05, - "loss": 0.4396, + "epoch": 0.5225967908140733, + "grad_norm": 0.3938751548055341, + "learning_rate": 1.9666497997720312e-05, + "loss": 0.4574, "step": 3550 }, { - "epoch": 0.5208791208791209, - "grad_norm": 0.3931314447365937, - "learning_rate": 1.967175611385682e-05, - "loss": 0.4729, + "epoch": 0.5233328426321213, + "grad_norm": 0.39683016345756705, + "learning_rate": 1.9664301094438082e-05, + "loss": 0.454, "step": 3555 }, { - "epoch": 0.5216117216117216, - "grad_norm": 0.4307490611740214, - "learning_rate": 1.966958646703115e-05, - "loss": 0.457, + "epoch": 0.5240688944501692, + "grad_norm": 0.3798854852370915, + "learning_rate": 1.966209710249566e-05, + "loss": 0.4635, "step": 3560 }, { - "epoch": 0.5223443223443224, - "grad_norm": 0.5263107663262924, - "learning_rate": 1.966740979371669e-05, - "loss": 0.4663, + "epoch": 0.5248049462682173, + "grad_norm": 0.37628706188680294, + "learning_rate": 1.965988602350966e-05, + "loss": 0.4638, "step": 3565 }, { - "epoch": 0.5230769230769231, - "grad_norm": 0.40406648280499774, - "learning_rate": 1.9665226095495145e-05, - "loss": 0.4532, + "epoch": 0.5255409980862653, + "grad_norm": 0.37866479579838463, + "learning_rate": 1.965766785910188e-05, + "loss": 0.4651, "step": 3570 }, { - "epoch": 0.5238095238095238, - "grad_norm": 0.4000541126808561, - "learning_rate": 1.966303537395332e-05, - "loss": 0.4739, + "epoch": 0.5262770499043132, + "grad_norm": 0.40679176520597915, + "learning_rate": 1.9655442610899314e-05, + "loss": 0.4561, "step": 3575 }, { - "epoch": 0.5245421245421246, - "grad_norm": 0.4218513937972455, - "learning_rate": 1.9660837630683122e-05, - "loss": 0.4599, + "epoch": 0.5270131017223613, + "grad_norm": 0.40261444513151734, + "learning_rate": 1.9653210280534165e-05, + "loss": 0.457, "step": 3580 }, { - "epoch": 0.5252747252747253, - "grad_norm": 0.39734014409545276, - "learning_rate": 1.9658632867281554e-05, - "loss": 0.4604, + "epoch": 0.5277491535404093, + "grad_norm": 0.5761751860941852, + "learning_rate": 1.965097086964382e-05, + "loss": 0.4637, "step": 3585 }, { - "epoch": 0.526007326007326, - "grad_norm": 0.43566685531130656, - "learning_rate": 1.965642108535073e-05, - "loss": 0.4599, + "epoch": 0.5284852053584572, + "grad_norm": 0.40247146010633544, + "learning_rate": 1.9648724379870864e-05, + "loss": 0.4559, "step": 3590 }, { - "epoch": 0.5267399267399268, - "grad_norm": 0.3877333961498012, - "learning_rate": 1.965420228649786e-05, - "loss": 0.4514, + "epoch": 0.5292212571765053, + "grad_norm": 0.4075287152163133, + "learning_rate": 1.9646470812863076e-05, + "loss": 0.4677, "step": 3595 }, { - "epoch": 0.5274725274725275, - "grad_norm": 0.4037996593369899, - "learning_rate": 1.9651976472335253e-05, - "loss": 0.4733, + "epoch": 0.5299573089945532, + "grad_norm": 0.36811491522127043, + "learning_rate": 1.9644210170273414e-05, + "loss": 0.4343, "step": 3600 }, { - "epoch": 0.5282051282051282, - "grad_norm": 0.4097352848327507, - "learning_rate": 1.9649743644480322e-05, - "loss": 0.4504, + "epoch": 0.5306933608126012, + "grad_norm": 0.4078901945434575, + "learning_rate": 1.964194245376004e-05, + "loss": 0.4563, "step": 3605 }, { - "epoch": 0.528937728937729, - "grad_norm": 0.40968685362670326, - "learning_rate": 1.9647503804555562e-05, - "loss": 0.4689, + "epoch": 0.5314294126306492, + "grad_norm": 0.4002015071997187, + "learning_rate": 1.9639667664986303e-05, + "loss": 0.4488, "step": 3610 }, { - "epoch": 0.5296703296703297, - "grad_norm": 0.40407274799067217, - "learning_rate": 1.9645256954188578e-05, - "loss": 0.4809, + "epoch": 0.5321654644486972, + "grad_norm": 0.40452853995295185, + "learning_rate": 1.963738580562073e-05, + "loss": 0.4767, "step": 3615 }, { - "epoch": 0.5304029304029304, - "grad_norm": 0.43432136759647494, - "learning_rate": 1.9643003095012058e-05, - "loss": 0.4247, + "epoch": 0.5329015162667452, + "grad_norm": 0.48302106579477866, + "learning_rate": 1.9635096877337045e-05, + "loss": 0.4577, "step": 3620 }, { - "epoch": 0.5311355311355311, - "grad_norm": 0.3846967561165999, - "learning_rate": 1.9640742228663793e-05, - "loss": 0.4532, + "epoch": 0.5336375680847931, + "grad_norm": 0.37366996425582205, + "learning_rate": 1.9632800881814146e-05, + "loss": 0.4453, "step": 3625 }, { - "epoch": 0.5318681318681319, - "grad_norm": 0.39222900221330453, - "learning_rate": 1.963847435678666e-05, - "loss": 0.4707, + "epoch": 0.5343736199028412, + "grad_norm": 0.38055230387120575, + "learning_rate": 1.9630497820736128e-05, + "loss": 0.4683, "step": 3630 }, { - "epoch": 0.5326007326007326, - "grad_norm": 0.39753060827257874, - "learning_rate": 1.963619948102863e-05, - "loss": 0.4667, + "epoch": 0.5351096717208892, + "grad_norm": 0.4032298844501685, + "learning_rate": 1.9628187695792257e-05, + "loss": 0.4763, "step": 3635 }, { - "epoch": 0.5333333333333333, - "grad_norm": 0.39453610064769346, - "learning_rate": 1.9633917603042757e-05, - "loss": 0.4495, + "epoch": 0.5358457235389371, + "grad_norm": 0.39467956429798245, + "learning_rate": 1.9625870508676982e-05, + "loss": 0.4417, "step": 3640 }, { - "epoch": 0.5340659340659341, - "grad_norm": 0.42353928664626905, - "learning_rate": 1.9631628724487185e-05, - "loss": 0.5042, + "epoch": 0.5365817753569851, + "grad_norm": 0.4039574603614267, + "learning_rate": 1.9623546261089942e-05, + "loss": 0.471, "step": 3645 }, { - "epoch": 0.5347985347985348, - "grad_norm": 0.4759710558557821, - "learning_rate": 1.9629332847025154e-05, - "loss": 0.4788, + "epoch": 0.5373178271750331, + "grad_norm": 0.4038268554128066, + "learning_rate": 1.9621214954735944e-05, + "loss": 0.4745, "step": 3650 }, { - "epoch": 0.5355311355311355, - "grad_norm": 0.4128618181070763, - "learning_rate": 1.9627029972324983e-05, - "loss": 0.4623, + "epoch": 0.5380538789930811, + "grad_norm": 0.4225586718667442, + "learning_rate": 1.9618876591324973e-05, + "loss": 0.4424, "step": 3655 }, { - "epoch": 0.5362637362637362, - "grad_norm": 0.4047288745230385, - "learning_rate": 1.962472010206007e-05, - "loss": 0.4401, + "epoch": 0.5387899308111291, + "grad_norm": 0.39746799943722094, + "learning_rate": 1.9616531172572198e-05, + "loss": 0.4522, "step": 3660 }, { - "epoch": 0.536996336996337, - "grad_norm": 0.4116700375902194, - "learning_rate": 1.9622403237908907e-05, - "loss": 0.4547, + "epoch": 0.5395259826291771, + "grad_norm": 0.411921110031647, + "learning_rate": 1.9614178700197955e-05, + "loss": 0.454, "step": 3665 }, { - "epoch": 0.5377289377289377, - "grad_norm": 0.39702761823883265, - "learning_rate": 1.9620079381555062e-05, - "loss": 0.4664, + "epoch": 0.5402620344472251, + "grad_norm": 0.39706878989697264, + "learning_rate": 1.961181917592776e-05, + "loss": 0.4643, "step": 3670 }, { - "epoch": 0.5384615384615384, - "grad_norm": 0.4006365566597177, - "learning_rate": 1.9617748534687188e-05, - "loss": 0.4483, + "epoch": 0.5409980862652731, + "grad_norm": 0.39859880762184235, + "learning_rate": 1.9609452601492293e-05, + "loss": 0.4578, "step": 3675 }, { - "epoch": 0.5391941391941392, - "grad_norm": 0.4053964771147014, - "learning_rate": 1.961541069899901e-05, - "loss": 0.461, + "epoch": 0.541734138083321, + "grad_norm": 0.3925765914722752, + "learning_rate": 1.9607078978627417e-05, + "loss": 0.4457, "step": 3680 }, { - "epoch": 0.5399267399267399, - "grad_norm": 0.6423049012045474, - "learning_rate": 1.961306587618934e-05, - "loss": 0.495, + "epoch": 0.5424701899013691, + "grad_norm": 0.3973018128767815, + "learning_rate": 1.9604698309074154e-05, + "loss": 0.4636, "step": 3685 }, { - "epoch": 0.5406593406593406, - "grad_norm": 0.3910992104613754, - "learning_rate": 1.961071406796206e-05, - "loss": 0.4634, + "epoch": 0.543206241719417, + "grad_norm": 0.41295053325560216, + "learning_rate": 1.9602310594578705e-05, + "loss": 0.4673, "step": 3690 }, { - "epoch": 0.5413919413919414, - "grad_norm": 0.41479888215520155, - "learning_rate": 1.9608355276026137e-05, - "loss": 0.464, + "epoch": 0.543942293537465, + "grad_norm": 0.40261677890369485, + "learning_rate": 1.9599915836892424e-05, + "loss": 0.4554, "step": 3695 }, { - "epoch": 0.5421245421245421, - "grad_norm": 0.38381801157498935, - "learning_rate": 1.9605989502095607e-05, - "loss": 0.458, + "epoch": 0.5446783453555131, + "grad_norm": 0.4162673811273489, + "learning_rate": 1.959751403777185e-05, + "loss": 0.4709, "step": 3700 }, { - "epoch": 0.5428571428571428, - "grad_norm": 0.3726198460602332, - "learning_rate": 1.9603616747889577e-05, - "loss": 0.4591, + "epoch": 0.545414397173561, + "grad_norm": 0.3955559642902411, + "learning_rate": 1.9595105198978666e-05, + "loss": 0.4588, "step": 3705 }, { - "epoch": 0.5435897435897435, - "grad_norm": 0.409146983712297, - "learning_rate": 1.960123701513223e-05, + "epoch": 0.546150448991609, + "grad_norm": 0.4015043828053614, + "learning_rate": 1.959268932227974e-05, "loss": 0.4692, "step": 3710 }, { - "epoch": 0.5443223443223443, - "grad_norm": 0.40176825120030335, - "learning_rate": 1.9598850305552824e-05, - "loss": 0.4717, + "epoch": 0.546886500809657, + "grad_norm": 0.3852610487294835, + "learning_rate": 1.959026640944708e-05, + "loss": 0.4506, "step": 3715 }, { - "epoch": 0.545054945054945, - "grad_norm": 0.38283812772967896, - "learning_rate": 1.9596456620885675e-05, - "loss": 0.4486, + "epoch": 0.547622552627705, + "grad_norm": 0.4528320271943359, + "learning_rate": 1.9587836462257878e-05, + "loss": 0.4545, "step": 3720 }, { - "epoch": 0.5457875457875457, - "grad_norm": 0.41477686180698137, - "learning_rate": 1.9594055962870178e-05, - "loss": 0.4645, + "epoch": 0.548358604445753, + "grad_norm": 0.3882142221048134, + "learning_rate": 1.9585399482494468e-05, + "loss": 0.464, "step": 3725 }, { - "epoch": 0.5465201465201465, - "grad_norm": 0.39579938529403774, - "learning_rate": 1.9591648333250793e-05, - "loss": 0.4583, + "epoch": 0.5490946562638009, + "grad_norm": 0.4367497179561277, + "learning_rate": 1.9582955471944345e-05, + "loss": 0.4678, "step": 3730 }, { - "epoch": 0.5472527472527473, - "grad_norm": 0.38826954570377586, - "learning_rate": 1.9589233733777045e-05, - "loss": 0.463, + "epoch": 0.549830708081849, + "grad_norm": 0.3975678237393734, + "learning_rate": 1.958050443240017e-05, + "loss": 0.4541, "step": 3735 }, { - "epoch": 0.547985347985348, - "grad_norm": 0.39398242670127886, - "learning_rate": 1.958681216620352e-05, - "loss": 0.4574, + "epoch": 0.550566759899897, + "grad_norm": 0.40424599351055596, + "learning_rate": 1.957804636565975e-05, + "loss": 0.4536, "step": 3740 }, { - "epoch": 0.5487179487179488, - "grad_norm": 0.41501647675382003, - "learning_rate": 1.9584383632289877e-05, - "loss": 0.4666, + "epoch": 0.5513028117179449, + "grad_norm": 0.40267105164143574, + "learning_rate": 1.9575581273526055e-05, + "loss": 0.4648, "step": 3745 }, { - "epoch": 0.5494505494505495, - "grad_norm": 1.564754790229026, - "learning_rate": 1.9581948133800822e-05, - "loss": 0.4761, + "epoch": 0.552038863535993, + "grad_norm": 0.4116144060323841, + "learning_rate": 1.95731091578072e-05, + "loss": 0.4536, "step": 3750 }, { - "epoch": 0.5501831501831502, - "grad_norm": 0.4085577720422966, - "learning_rate": 1.9579505672506143e-05, - "loss": 0.4731, + "epoch": 0.5527749153540409, + "grad_norm": 0.3931753386822875, + "learning_rate": 1.9570630020316455e-05, + "loss": 0.4718, "step": 3755 }, { - "epoch": 0.550915750915751, - "grad_norm": 0.3965996616045536, - "learning_rate": 1.9577056250180667e-05, - "loss": 0.4641, + "epoch": 0.5535109671720889, + "grad_norm": 0.391232231865654, + "learning_rate": 1.956814386287224e-05, + "loss": 0.4694, "step": 3760 }, { - "epoch": 0.5516483516483517, - "grad_norm": 0.38306772647374016, - "learning_rate": 1.9574599868604292e-05, - "loss": 0.4632, + "epoch": 0.554247018990137, + "grad_norm": 0.391889438154536, + "learning_rate": 1.956565068729813e-05, + "loss": 0.4624, "step": 3765 }, { - "epoch": 0.5523809523809524, - "grad_norm": 0.39692838594704005, - "learning_rate": 1.957213652956197e-05, - "loss": 0.4682, + "epoch": 0.5549830708081849, + "grad_norm": 0.38870877866166037, + "learning_rate": 1.9563150495422834e-05, + "loss": 0.4646, "step": 3770 }, { - "epoch": 0.5531135531135531, - "grad_norm": 0.4139888092877681, - "learning_rate": 1.95696662348437e-05, - "loss": 0.4565, + "epoch": 0.5557191226262329, + "grad_norm": 0.40984342682564273, + "learning_rate": 1.956064328908022e-05, + "loss": 0.4616, "step": 3775 }, { - "epoch": 0.5538461538461539, - "grad_norm": 0.4038218231406527, - "learning_rate": 1.9567188986244552e-05, - "loss": 0.4542, + "epoch": 0.5564551744442808, + "grad_norm": 0.4094591347657577, + "learning_rate": 1.9558129070109294e-05, + "loss": 0.4668, "step": 3780 }, { - "epoch": 0.5545787545787546, - "grad_norm": 0.4183645918853025, - "learning_rate": 1.9564704785564634e-05, - "loss": 0.4671, + "epoch": 0.5571912262623289, + "grad_norm": 0.4107014394366221, + "learning_rate": 1.9555607840354217e-05, + "loss": 0.4706, "step": 3785 }, { - "epoch": 0.5553113553113553, - "grad_norm": 0.3880015641068925, - "learning_rate": 1.9562213634609118e-05, - "loss": 0.4651, + "epoch": 0.5579272780803769, + "grad_norm": 0.41844199725210224, + "learning_rate": 1.9553079601664274e-05, + "loss": 0.4521, "step": 3790 }, { - "epoch": 0.5560439560439561, - "grad_norm": 0.406983465349988, - "learning_rate": 1.9559715535188216e-05, - "loss": 0.4378, + "epoch": 0.5586633298984248, + "grad_norm": 0.3708737602656749, + "learning_rate": 1.9550544355893902e-05, + "loss": 0.449, "step": 3795 }, { - "epoch": 0.5567765567765568, - "grad_norm": 0.3789384861043702, - "learning_rate": 1.955721048911719e-05, - "loss": 0.4609, + "epoch": 0.5593993817164729, + "grad_norm": 0.3912874868887559, + "learning_rate": 1.954800210490268e-05, + "loss": 0.447, "step": 3800 }, { - "epoch": 0.5575091575091575, - "grad_norm": 0.3746455880435221, - "learning_rate": 1.9554698498216362e-05, - "loss": 0.4552, + "epoch": 0.5601354335345208, + "grad_norm": 0.3922403999244812, + "learning_rate": 1.9545452850555323e-05, + "loss": 0.4547, "step": 3805 }, { - "epoch": 0.5582417582417583, - "grad_norm": 0.4021508409555428, - "learning_rate": 1.9552179564311088e-05, - "loss": 0.4597, + "epoch": 0.5608714853525688, + "grad_norm": 0.4726372690020183, + "learning_rate": 1.9542896594721678e-05, + "loss": 0.4688, "step": 3810 }, { - "epoch": 0.558974358974359, - "grad_norm": 0.41079276918661684, - "learning_rate": 1.954965368923177e-05, - "loss": 0.4669, + "epoch": 0.5616075371706168, + "grad_norm": 0.3912213717737321, + "learning_rate": 1.9540333339276726e-05, + "loss": 0.4432, "step": 3815 }, { - "epoch": 0.5597069597069597, - "grad_norm": 0.4101630906550437, - "learning_rate": 1.9547120874813862e-05, - "loss": 0.4678, + "epoch": 0.5623435889886648, + "grad_norm": 0.3772805758009394, + "learning_rate": 1.9537763086100598e-05, + "loss": 0.4585, "step": 3820 }, { - "epoch": 0.5604395604395604, - "grad_norm": 0.3612610892397642, - "learning_rate": 1.9544581122897854e-05, - "loss": 0.4554, + "epoch": 0.5630796408067128, + "grad_norm": 0.38255828351165105, + "learning_rate": 1.953518583707854e-05, + "loss": 0.4326, "step": 3825 }, { - "epoch": 0.5611721611721612, - "grad_norm": 0.40697790168410697, - "learning_rate": 1.9542034435329276e-05, - "loss": 0.4623, + "epoch": 0.5638156926247608, + "grad_norm": 0.4029940798806384, + "learning_rate": 1.9532601594100936e-05, + "loss": 0.4619, "step": 3830 }, { - "epoch": 0.5619047619047619, - "grad_norm": 0.4282245049901043, - "learning_rate": 1.9539480813958703e-05, - "loss": 0.4513, + "epoch": 0.5645517444428088, + "grad_norm": 0.406308443969887, + "learning_rate": 1.9530010359063306e-05, + "loss": 0.4533, "step": 3835 }, { - "epoch": 0.5626373626373626, - "grad_norm": 0.3954324592961608, - "learning_rate": 1.953692026064175e-05, - "loss": 0.4499, + "epoch": 0.5652877962608568, + "grad_norm": 0.39335542730985024, + "learning_rate": 1.9527412133866285e-05, + "loss": 0.4503, "step": 3840 }, { - "epoch": 0.5633699633699634, - "grad_norm": 0.40670314241955646, - "learning_rate": 1.9534352777239055e-05, - "loss": 0.4541, + "epoch": 0.5660238480789047, + "grad_norm": 0.38634711158644086, + "learning_rate": 1.9524806920415646e-05, + "loss": 0.4512, "step": 3845 }, { - "epoch": 0.5641025641025641, - "grad_norm": 0.4161193541654707, - "learning_rate": 1.9531778365616315e-05, - "loss": 0.4721, + "epoch": 0.5667598998969527, + "grad_norm": 0.4022091151221169, + "learning_rate": 1.952219472062229e-05, + "loss": 0.4538, "step": 3850 }, { - "epoch": 0.5648351648351648, - "grad_norm": 0.411357018786779, - "learning_rate": 1.952919702764424e-05, - "loss": 0.4613, + "epoch": 0.5674959517150008, + "grad_norm": 0.4061506497101422, + "learning_rate": 1.951957553640223e-05, + "loss": 0.4561, "step": 3855 }, { - "epoch": 0.5655677655677656, - "grad_norm": 0.3839854092932068, - "learning_rate": 1.952660876519858e-05, - "loss": 0.4631, + "epoch": 0.5682320035330487, + "grad_norm": 0.392747642013472, + "learning_rate": 1.9516949369676613e-05, + "loss": 0.456, "step": 3860 }, { - "epoch": 0.5663003663003663, - "grad_norm": 0.4085926286619489, - "learning_rate": 1.952401358016012e-05, - "loss": 0.4626, + "epoch": 0.5689680553510967, + "grad_norm": 0.3917136099359976, + "learning_rate": 1.9514316222371703e-05, + "loss": 0.4628, "step": 3865 }, { - "epoch": 0.567032967032967, - "grad_norm": 0.44557607925036696, - "learning_rate": 1.9521411474414683e-05, - "loss": 0.4656, + "epoch": 0.5697041071691447, + "grad_norm": 0.38856963846046494, + "learning_rate": 1.9511676096418884e-05, + "loss": 0.4402, "step": 3870 }, { - "epoch": 0.5677655677655677, - "grad_norm": 0.45987074599106403, - "learning_rate": 1.95188024498531e-05, - "loss": 0.4495, + "epoch": 0.5704401589871927, + "grad_norm": 0.37840817197647325, + "learning_rate": 1.950902899375466e-05, + "loss": 0.447, "step": 3875 }, { - "epoch": 0.5684981684981685, - "grad_norm": 0.43349497128485376, - "learning_rate": 1.951618650837124e-05, - "loss": 0.4531, + "epoch": 0.5711762108052407, + "grad_norm": 0.4117374694793402, + "learning_rate": 1.9506374916320655e-05, + "loss": 0.4486, "step": 3880 }, { - "epoch": 0.5692307692307692, - "grad_norm": 0.3976789014010414, - "learning_rate": 1.9513563651870015e-05, - "loss": 0.462, + "epoch": 0.5719122626232886, + "grad_norm": 0.3961584937171518, + "learning_rate": 1.9503713866063606e-05, + "loss": 0.4724, "step": 3885 }, { - "epoch": 0.5699633699633699, - "grad_norm": 0.407893568851839, - "learning_rate": 1.951093388225533e-05, - "loss": 0.4486, + "epoch": 0.5726483144413367, + "grad_norm": 0.39437169614191947, + "learning_rate": 1.950104584493536e-05, + "loss": 0.4427, "step": 3890 }, { - "epoch": 0.5706959706959707, - "grad_norm": 0.3879247914118072, - "learning_rate": 1.950829720143814e-05, - "loss": 0.4664, + "epoch": 0.5733843662593847, + "grad_norm": 0.38806710304574166, + "learning_rate": 1.9498370854892882e-05, + "loss": 0.4675, "step": 3895 }, { - "epoch": 0.5714285714285714, - "grad_norm": 0.39537459394340074, - "learning_rate": 1.95056536113344e-05, - "loss": 0.4664, + "epoch": 0.5741204180774326, + "grad_norm": 0.40014580513422415, + "learning_rate": 1.9495688897898252e-05, + "loss": 0.4606, "step": 3900 }, { - "epoch": 0.5721611721611721, - "grad_norm": 0.39366888693656604, - "learning_rate": 1.9503003113865112e-05, - "loss": 0.4516, + "epoch": 0.5748564698954807, + "grad_norm": 0.3966916281178557, + "learning_rate": 1.9492999975918655e-05, + "loss": 0.4704, "step": 3905 }, { - "epoch": 0.5728937728937729, - "grad_norm": 0.3782777435360429, - "learning_rate": 1.9500345710956278e-05, - "loss": 0.4397, + "epoch": 0.5755925217135286, + "grad_norm": 0.3888609942340231, + "learning_rate": 1.9490304090926388e-05, + "loss": 0.4663, "step": 3910 }, { - "epoch": 0.5736263736263736, - "grad_norm": 0.4016868473347594, - "learning_rate": 1.949768140453892e-05, - "loss": 0.4573, + "epoch": 0.5763285735315766, + "grad_norm": 0.39360525777740146, + "learning_rate": 1.948760124489885e-05, + "loss": 0.4682, "step": 3915 }, { - "epoch": 0.5743589743589743, - "grad_norm": 0.40596335251444, - "learning_rate": 1.9495010196549082e-05, - "loss": 0.4649, + "epoch": 0.5770646253496247, + "grad_norm": 0.37978437058213976, + "learning_rate": 1.948489143981855e-05, + "loss": 0.4394, "step": 3920 }, { - "epoch": 0.575091575091575, - "grad_norm": 0.39015529793982856, - "learning_rate": 1.9492332088927818e-05, - "loss": 0.4516, + "epoch": 0.5778006771676726, + "grad_norm": 0.4076077124842079, + "learning_rate": 1.9482174677673104e-05, + "loss": 0.4319, "step": 3925 }, { - "epoch": 0.5758241758241758, - "grad_norm": 0.38703627226223575, - "learning_rate": 1.94896470836212e-05, - "loss": 0.4557, + "epoch": 0.5785367289857206, + "grad_norm": 0.4150972595842849, + "learning_rate": 1.947945096045522e-05, + "loss": 0.4487, "step": 3930 }, { - "epoch": 0.5765567765567765, - "grad_norm": 0.39090512141444317, - "learning_rate": 1.948695518258031e-05, - "loss": 0.4496, + "epoch": 0.5792727808037685, + "grad_norm": 0.4588906803973213, + "learning_rate": 1.9476720290162724e-05, + "loss": 0.4616, "step": 3935 }, { - "epoch": 0.5772893772893772, - "grad_norm": 0.3959545873598666, - "learning_rate": 1.948425638776124e-05, - "loss": 0.4554, + "epoch": 0.5800088326218166, + "grad_norm": 0.41074938388442134, + "learning_rate": 1.947398266879853e-05, + "loss": 0.4666, "step": 3940 }, { - "epoch": 0.578021978021978, - "grad_norm": 0.394645558625364, - "learning_rate": 1.9481550701125095e-05, - "loss": 0.4622, + "epoch": 0.5807448844398646, + "grad_norm": 0.3933742489177997, + "learning_rate": 1.947123809837065e-05, + "loss": 0.4609, "step": 3945 }, { - "epoch": 0.5787545787545788, - "grad_norm": 0.38045088173518693, - "learning_rate": 1.9478838124637982e-05, - "loss": 0.4321, + "epoch": 0.5814809362579125, + "grad_norm": 0.3909008868409509, + "learning_rate": 1.9468486580892204e-05, + "loss": 0.4537, "step": 3950 }, { - "epoch": 0.5794871794871795, - "grad_norm": 0.417808747673523, - "learning_rate": 1.9476118660271023e-05, - "loss": 0.4543, + "epoch": 0.5822169880759606, + "grad_norm": 0.3974581298450175, + "learning_rate": 1.9465728118381388e-05, + "loss": 0.462, "step": 3955 }, { - "epoch": 0.5802197802197803, - "grad_norm": 0.40359896669430806, - "learning_rate": 1.9473392310000337e-05, - "loss": 0.4615, + "epoch": 0.5829530398940085, + "grad_norm": 0.43181893833837104, + "learning_rate": 1.9462962712861518e-05, + "loss": 0.4716, "step": 3960 }, { - "epoch": 0.580952380952381, - "grad_norm": 0.4001322368419974, - "learning_rate": 1.947065907580705e-05, - "loss": 0.4521, + "epoch": 0.5836890917120565, + "grad_norm": 0.39145520327042055, + "learning_rate": 1.946019036636098e-05, + "loss": 0.4915, "step": 3965 }, { - "epoch": 0.5816849816849817, - "grad_norm": 0.40688228006534505, - "learning_rate": 1.9467918959677294e-05, - "loss": 0.4522, + "epoch": 0.5844251435301046, + "grad_norm": 0.4547161522085445, + "learning_rate": 1.9457411080913267e-05, + "loss": 0.473, "step": 3970 }, { - "epoch": 0.5824175824175825, - "grad_norm": 0.4085036595801529, - "learning_rate": 1.9465171963602193e-05, - "loss": 0.4577, + "epoch": 0.5851611953481525, + "grad_norm": 0.37696073979755373, + "learning_rate": 1.945462485855695e-05, + "loss": 0.4642, "step": 3975 }, { - "epoch": 0.5831501831501832, - "grad_norm": 0.3675283160404854, - "learning_rate": 1.9462418089577876e-05, - "loss": 0.449, + "epoch": 0.5858972471662005, + "grad_norm": 0.3850517375475203, + "learning_rate": 1.945183170133569e-05, + "loss": 0.4446, "step": 3980 }, { - "epoch": 0.5838827838827839, - "grad_norm": 0.39340033926463114, - "learning_rate": 1.9459657339605475e-05, - "loss": 0.442, + "epoch": 0.5866332989842485, + "grad_norm": 0.3896736319504672, + "learning_rate": 1.9449031611298245e-05, + "loss": 0.4613, "step": 3985 }, { - "epoch": 0.5846153846153846, - "grad_norm": 0.38981845508151175, - "learning_rate": 1.9456889715691107e-05, - "loss": 0.4502, + "epoch": 0.5873693508022965, + "grad_norm": 0.37559385194345996, + "learning_rate": 1.9446224590498447e-05, + "loss": 0.4565, "step": 3990 }, { - "epoch": 0.5853479853479854, - "grad_norm": 0.39502082453513393, - "learning_rate": 1.9454115219845895e-05, - "loss": 0.4892, + "epoch": 0.5881054026203445, + "grad_norm": 0.4316997268846546, + "learning_rate": 1.9443410640995213e-05, + "loss": 0.45, "step": 3995 }, { - "epoch": 0.5860805860805861, - "grad_norm": 0.38910933952745375, - "learning_rate": 1.9451333854085945e-05, - "loss": 0.4575, + "epoch": 0.5888414544383924, + "grad_norm": 0.4020080816861019, + "learning_rate": 1.9440589764852552e-05, + "loss": 0.457, "step": 4000 }, { - "epoch": 0.5868131868131868, - "grad_norm": 0.3872505317049606, - "learning_rate": 1.9448545620432364e-05, - "loss": 0.4572, + "epoch": 0.5895775062564405, + "grad_norm": 0.3968685516640876, + "learning_rate": 1.943776196413954e-05, + "loss": 0.4549, "step": 4005 }, { - "epoch": 0.5875457875457876, - "grad_norm": 0.4154623026662341, - "learning_rate": 1.9445750520911245e-05, - "loss": 0.4364, + "epoch": 0.5903135580744885, + "grad_norm": 0.4383342608575663, + "learning_rate": 1.9434927240930336e-05, + "loss": 0.4515, "step": 4010 }, { - "epoch": 0.5882783882783883, - "grad_norm": 0.40700084808831655, - "learning_rate": 1.944294855755367e-05, - "loss": 0.4646, + "epoch": 0.5910496098925364, + "grad_norm": 0.39025368242219294, + "learning_rate": 1.9432085597304184e-05, + "loss": 0.4563, "step": 4015 }, { - "epoch": 0.589010989010989, - "grad_norm": 0.38914455931333786, - "learning_rate": 1.9440139732395712e-05, - "loss": 0.4498, + "epoch": 0.5917856617105844, + "grad_norm": 0.38303033158729466, + "learning_rate": 1.9429237035345396e-05, + "loss": 0.4507, "step": 4020 }, { - "epoch": 0.5897435897435898, - "grad_norm": 0.3927240714457741, - "learning_rate": 1.9437324047478426e-05, - "loss": 0.4706, + "epoch": 0.5925217135286324, + "grad_norm": 0.3715174410319025, + "learning_rate": 1.9426381557143364e-05, + "loss": 0.4637, "step": 4025 }, { - "epoch": 0.5904761904761905, - "grad_norm": 0.38701060865579207, - "learning_rate": 1.943450150484786e-05, - "loss": 0.4518, + "epoch": 0.5932577653466804, + "grad_norm": 0.425662209279403, + "learning_rate": 1.9423519164792547e-05, + "loss": 0.4546, "step": 4030 }, { - "epoch": 0.5912087912087912, - "grad_norm": 0.37099875040863006, - "learning_rate": 1.9431672106555033e-05, - "loss": 0.4522, + "epoch": 0.5939938171647284, + "grad_norm": 0.3821996726394133, + "learning_rate": 1.942064986039248e-05, + "loss": 0.4449, "step": 4035 }, { - "epoch": 0.591941391941392, - "grad_norm": 0.3931898441591413, - "learning_rate": 1.942883585465595e-05, - "loss": 0.4649, + "epoch": 0.5947298689827764, + "grad_norm": 0.39339832871267766, + "learning_rate": 1.941777364604777e-05, + "loss": 0.4543, "step": 4040 }, { - "epoch": 0.5926739926739927, - "grad_norm": 0.390981673455716, - "learning_rate": 1.9425992751211605e-05, - "loss": 0.4564, + "epoch": 0.5954659208008244, + "grad_norm": 0.40259414666017407, + "learning_rate": 1.9414890523868084e-05, + "loss": 0.4654, "step": 4045 }, { - "epoch": 0.5934065934065934, - "grad_norm": 0.3851165832154698, - "learning_rate": 1.9423142798287958e-05, - "loss": 0.4556, + "epoch": 0.5962019726188724, + "grad_norm": 0.3880847307330742, + "learning_rate": 1.9412000495968165e-05, + "loss": 0.4596, "step": 4050 }, { - "epoch": 0.5941391941391941, - "grad_norm": 0.6688175508134159, - "learning_rate": 1.9420285997955953e-05, - "loss": 0.4733, + "epoch": 0.5969380244369203, + "grad_norm": 0.398812390374236, + "learning_rate": 1.9409103564467813e-05, + "loss": 0.4561, "step": 4055 }, { - "epoch": 0.5948717948717949, - "grad_norm": 0.38458780122196795, - "learning_rate": 1.941742235229151e-05, - "loss": 0.4545, + "epoch": 0.5976740762549684, + "grad_norm": 0.4026502125088563, + "learning_rate": 1.9406199731491898e-05, + "loss": 0.4623, "step": 4060 }, { - "epoch": 0.5956043956043956, - "grad_norm": 0.4032597652712211, - "learning_rate": 1.9414551863375524e-05, - "loss": 0.4498, + "epoch": 0.5984101280730163, + "grad_norm": 0.39108455961406485, + "learning_rate": 1.9403288999170353e-05, + "loss": 0.4559, "step": 4065 }, { - "epoch": 0.5963369963369963, - "grad_norm": 0.45669759618264094, - "learning_rate": 1.941167453329386e-05, - "loss": 0.4591, + "epoch": 0.5991461798910643, + "grad_norm": 0.3927115580987639, + "learning_rate": 1.9400371369638164e-05, + "loss": 0.4596, "step": 4070 }, { - "epoch": 0.5970695970695971, - "grad_norm": 0.4024185340241739, - "learning_rate": 1.9408790364137345e-05, - "loss": 0.4642, + "epoch": 0.5998822317091124, + "grad_norm": 0.38912467663104383, + "learning_rate": 1.9397446845035384e-05, + "loss": 0.4533, "step": 4075 }, { - "epoch": 0.5978021978021978, - "grad_norm": 0.3828519017237699, - "learning_rate": 1.94058993580018e-05, - "loss": 0.4671, + "epoch": 0.6006182835271603, + "grad_norm": 0.3971049468366541, + "learning_rate": 1.9394515427507117e-05, + "loss": 0.4434, "step": 4080 }, { - "epoch": 0.5985347985347985, - "grad_norm": 0.4034508466964574, - "learning_rate": 1.9403001516987992e-05, - "loss": 0.4558, + "epoch": 0.6013543353452083, + "grad_norm": 0.48869481454910163, + "learning_rate": 1.939157711920353e-05, + "loss": 0.4528, "step": 4085 }, { - "epoch": 0.5992673992673992, - "grad_norm": 0.38983586585639907, - "learning_rate": 1.9400096843201665e-05, - "loss": 0.4697, + "epoch": 0.6020903871632562, + "grad_norm": 0.39438450042318346, + "learning_rate": 1.9388631922279835e-05, + "loss": 0.4719, "step": 4090 }, { - "epoch": 0.6, - "grad_norm": 0.38934409647363133, - "learning_rate": 1.939718533875352e-05, - "loss": 0.4674, + "epoch": 0.6028264389813043, + "grad_norm": 0.3889958357897293, + "learning_rate": 1.9385679838896304e-05, + "loss": 0.4533, "step": 4095 }, { - "epoch": 0.6007326007326007, - "grad_norm": 0.40769464285287194, - "learning_rate": 1.9394267005759236e-05, - "loss": 0.4743, + "epoch": 0.6035624907993523, + "grad_norm": 0.4065933272164289, + "learning_rate": 1.938272087121826e-05, + "loss": 0.4566, "step": 4100 }, { - "epoch": 0.6014652014652014, - "grad_norm": 0.418201033254751, - "learning_rate": 1.9391341846339435e-05, - "loss": 0.4559, + "epoch": 0.6042985426174002, + "grad_norm": 0.40036695766534475, + "learning_rate": 1.9379755021416077e-05, + "loss": 0.4684, "step": 4105 }, { - "epoch": 0.6021978021978022, - "grad_norm": 0.39055369127735, - "learning_rate": 1.9388409862619716e-05, - "loss": 0.4668, + "epoch": 0.6050345944354483, + "grad_norm": 0.4185967221049071, + "learning_rate": 1.9376782291665165e-05, + "loss": 0.4687, "step": 4110 }, { - "epoch": 0.6029304029304029, - "grad_norm": 0.39385123346625744, - "learning_rate": 1.938547105673063e-05, - "loss": 0.4493, + "epoch": 0.6057706462534962, + "grad_norm": 0.3647349786411818, + "learning_rate": 1.9373802684145997e-05, + "loss": 0.43, "step": 4115 }, { - "epoch": 0.6036630036630036, - "grad_norm": 0.3778931973342424, - "learning_rate": 1.9382525430807684e-05, - "loss": 0.4612, + "epoch": 0.6065066980715442, + "grad_norm": 0.37151788676087305, + "learning_rate": 1.9370816201044074e-05, + "loss": 0.438, "step": 4120 }, { - "epoch": 0.6043956043956044, - "grad_norm": 0.3653423338067703, - "learning_rate": 1.9379572986991342e-05, - "loss": 0.4517, + "epoch": 0.6072427498895923, + "grad_norm": 0.398271370044487, + "learning_rate": 1.9367822844549963e-05, + "loss": 0.4749, "step": 4125 }, { - "epoch": 0.6051282051282051, - "grad_norm": 0.3823440864145797, - "learning_rate": 1.9376613727427025e-05, - "loss": 0.4806, + "epoch": 0.6079788017076402, + "grad_norm": 0.3891198356257357, + "learning_rate": 1.9364822616859243e-05, + "loss": 0.4516, "step": 4130 }, { - "epoch": 0.6058608058608058, - "grad_norm": 0.3942364943467247, - "learning_rate": 1.93736476542651e-05, - "loss": 0.4692, + "epoch": 0.6087148535256882, + "grad_norm": 0.39378864796931595, + "learning_rate": 1.936181552017256e-05, + "loss": 0.4797, "step": 4135 }, { - "epoch": 0.6065934065934065, - "grad_norm": 0.413109206537163, - "learning_rate": 1.93706747696609e-05, - "loss": 0.4471, + "epoch": 0.6094509053437362, + "grad_norm": 0.3933386067929528, + "learning_rate": 1.935880155669558e-05, + "loss": 0.4619, "step": 4140 }, { - "epoch": 0.6073260073260073, - "grad_norm": 0.39856129006035784, - "learning_rate": 1.936769507577468e-05, - "loss": 0.4596, + "epoch": 0.6101869571617842, + "grad_norm": 0.37594319073788085, + "learning_rate": 1.935578072863902e-05, + "loss": 0.4587, "step": 4145 }, { - "epoch": 0.608058608058608, - "grad_norm": 0.41314992268721973, - "learning_rate": 1.936470857477168e-05, - "loss": 0.4552, + "epoch": 0.6109230089798322, + "grad_norm": 0.40009248916342544, + "learning_rate": 1.935275303821861e-05, + "loss": 0.4795, "step": 4150 }, { - "epoch": 0.6087912087912087, - "grad_norm": 0.3914732298327829, - "learning_rate": 1.9361715268822056e-05, - "loss": 0.4686, + "epoch": 0.6116590607978801, + "grad_norm": 0.3877757563012712, + "learning_rate": 1.9349718487655145e-05, + "loss": 0.4585, "step": 4155 }, { - "epoch": 0.6095238095238096, - "grad_norm": 0.3889002217098044, - "learning_rate": 1.935871516010092e-05, - "loss": 0.4724, + "epoch": 0.6123951126159282, + "grad_norm": 0.39613251424470064, + "learning_rate": 1.934667707917443e-05, + "loss": 0.449, "step": 4160 }, { - "epoch": 0.6102564102564103, - "grad_norm": 0.38348773145064013, - "learning_rate": 1.9355708250788326e-05, - "loss": 0.444, + "epoch": 0.6131311644339762, + "grad_norm": 0.4474668269916825, + "learning_rate": 1.9343628815007294e-05, + "loss": 0.457, "step": 4165 }, { - "epoch": 0.610989010989011, - "grad_norm": 0.45372994439130565, - "learning_rate": 1.9352694543069278e-05, - "loss": 0.4467, + "epoch": 0.6138672162520241, + "grad_norm": 0.398639026676931, + "learning_rate": 1.9340573697389624e-05, + "loss": 0.4688, "step": 4170 }, { - "epoch": 0.6117216117216118, - "grad_norm": 0.4063317369558111, - "learning_rate": 1.9349674039133706e-05, - "loss": 0.4899, + "epoch": 0.6146032680700722, + "grad_norm": 0.39381851068614493, + "learning_rate": 1.9337511728562304e-05, + "loss": 0.4627, "step": 4175 }, { - "epoch": 0.6124542124542125, - "grad_norm": 0.38693500581456275, - "learning_rate": 1.9346646741176487e-05, - "loss": 0.4607, + "epoch": 0.6153393198881201, + "grad_norm": 0.3847963629778539, + "learning_rate": 1.9334442910771252e-05, + "loss": 0.4456, "step": 4180 }, { - "epoch": 0.6131868131868132, - "grad_norm": 0.4298047483385306, - "learning_rate": 1.9343612651397435e-05, - "loss": 0.458, + "epoch": 0.6160753717061681, + "grad_norm": 0.3906474233560638, + "learning_rate": 1.9331367246267425e-05, + "loss": 0.4397, "step": 4185 }, { - "epoch": 0.613919413919414, - "grad_norm": 0.39996348193380593, - "learning_rate": 1.9340571772001295e-05, - "loss": 0.458, + "epoch": 0.6168114235242161, + "grad_norm": 0.40814516018326935, + "learning_rate": 1.932828473730678e-05, + "loss": 0.4547, "step": 4190 }, { - "epoch": 0.6146520146520147, - "grad_norm": 0.4093122753079218, - "learning_rate": 1.933752410519775e-05, - "loss": 0.4482, + "epoch": 0.6175474753422641, + "grad_norm": 0.39056453129841734, + "learning_rate": 1.9325195386150305e-05, + "loss": 0.4419, "step": 4195 }, { - "epoch": 0.6153846153846154, - "grad_norm": 0.38555333230727856, - "learning_rate": 1.9334469653201412e-05, - "loss": 0.4591, + "epoch": 0.6182835271603121, + "grad_norm": 0.3708542016030073, + "learning_rate": 1.932209919506401e-05, + "loss": 0.4402, "step": 4200 }, { - "epoch": 0.6161172161172161, - "grad_norm": 0.37088920956413135, - "learning_rate": 1.9331408418231826e-05, - "loss": 0.4534, + "epoch": 0.6190195789783601, + "grad_norm": 0.3906520277972606, + "learning_rate": 1.9318996166318915e-05, + "loss": 0.4549, "step": 4205 }, { - "epoch": 0.6168498168498169, - "grad_norm": 0.4050551280386644, - "learning_rate": 1.9328340402513467e-05, - "loss": 0.4523, + "epoch": 0.619755630796408, + "grad_norm": 0.3873093062714683, + "learning_rate": 1.9315886302191056e-05, + "loss": 0.4529, "step": 4210 }, { - "epoch": 0.6175824175824176, - "grad_norm": 0.44848146017548013, - "learning_rate": 1.9325265608275736e-05, - "loss": 0.4624, + "epoch": 0.6204916826144561, + "grad_norm": 0.3724665558860097, + "learning_rate": 1.9312769604961486e-05, + "loss": 0.4708, "step": 4215 }, { - "epoch": 0.6183150183150183, - "grad_norm": 0.3832437196495983, - "learning_rate": 1.932218403775295e-05, - "loss": 0.4483, + "epoch": 0.621227734432504, + "grad_norm": 0.3789009595800291, + "learning_rate": 1.930964607691627e-05, + "loss": 0.4558, "step": 4220 }, { - "epoch": 0.6190476190476191, - "grad_norm": 0.38659377362158687, - "learning_rate": 1.9319095693184367e-05, - "loss": 0.4601, + "epoch": 0.621963786250552, + "grad_norm": 0.37528776730234276, + "learning_rate": 1.9306515720346485e-05, + "loss": 0.4578, "step": 4225 }, { - "epoch": 0.6197802197802198, - "grad_norm": 0.38946115154622146, - "learning_rate": 1.931600057681416e-05, - "loss": 0.4665, + "epoch": 0.6226998380686001, + "grad_norm": 0.39111547258192264, + "learning_rate": 1.93033785375482e-05, + "loss": 0.4421, "step": 4230 }, { - "epoch": 0.6205128205128205, - "grad_norm": 0.396423053727006, - "learning_rate": 1.9312898690891416e-05, - "loss": 0.4485, + "epoch": 0.623435889886648, + "grad_norm": 0.38703708176094, + "learning_rate": 1.9300234530822522e-05, + "loss": 0.4691, "step": 4235 }, { - "epoch": 0.6212454212454213, - "grad_norm": 0.36984615870705156, - "learning_rate": 1.9309790037670152e-05, - "loss": 0.4681, + "epoch": 0.624171941704696, + "grad_norm": 0.40072906015264564, + "learning_rate": 1.929708370247554e-05, + "loss": 0.4407, "step": 4240 }, { - "epoch": 0.621978021978022, - "grad_norm": 0.3906259863054413, - "learning_rate": 1.9306674619409297e-05, - "loss": 0.4562, + "epoch": 0.624907993522744, + "grad_norm": 0.4031722160372937, + "learning_rate": 1.9293926054818346e-05, + "loss": 0.4685, "step": 4245 }, { - "epoch": 0.6227106227106227, - "grad_norm": 0.3777272852941437, - "learning_rate": 1.9303552438372698e-05, - "loss": 0.4423, + "epoch": 0.625644045340792, + "grad_norm": 0.39123399596163366, + "learning_rate": 1.9290761590167047e-05, + "loss": 0.4409, "step": 4250 }, { - "epoch": 0.6234432234432234, - "grad_norm": 0.3685244570964051, - "learning_rate": 1.9300423496829112e-05, - "loss": 0.4373, + "epoch": 0.62638009715884, + "grad_norm": 0.3931115302109413, + "learning_rate": 1.9287590310842742e-05, + "loss": 0.4546, "step": 4255 }, { - "epoch": 0.6241758241758242, - "grad_norm": 0.3942685902577395, - "learning_rate": 1.929728779705221e-05, - "loss": 0.4655, + "epoch": 0.6271161489768879, + "grad_norm": 0.38239123144484477, + "learning_rate": 1.9284412219171527e-05, + "loss": 0.451, "step": 4260 }, { - "epoch": 0.6249084249084249, - "grad_norm": 0.3903998305825669, - "learning_rate": 1.929414534132058e-05, - "loss": 0.4307, + "epoch": 0.627852200794936, + "grad_norm": 0.38417626777381236, + "learning_rate": 1.9281227317484505e-05, + "loss": 0.4561, "step": 4265 }, { - "epoch": 0.6256410256410256, - "grad_norm": 0.40182735650557905, - "learning_rate": 1.9290996131917712e-05, - "loss": 0.4333, + "epoch": 0.6285882526129839, + "grad_norm": 0.3815448864691956, + "learning_rate": 1.9278035608117757e-05, + "loss": 0.4703, "step": 4270 }, { - "epoch": 0.6263736263736264, - "grad_norm": 0.40889635313254835, - "learning_rate": 1.9287840171132007e-05, - "loss": 0.4669, + "epoch": 0.6293243044310319, + "grad_norm": 0.3826401045233026, + "learning_rate": 1.9274837093412377e-05, + "loss": 0.4466, "step": 4275 }, { - "epoch": 0.6271062271062271, - "grad_norm": 0.38070505593169707, - "learning_rate": 1.9284677461256774e-05, - "loss": 0.4553, + "epoch": 0.63006035624908, + "grad_norm": 0.408795179267246, + "learning_rate": 1.9271631775714435e-05, + "loss": 0.4383, "step": 4280 }, { - "epoch": 0.6278388278388278, - "grad_norm": 0.38871611338253864, - "learning_rate": 1.928150800459022e-05, - "loss": 0.44, + "epoch": 0.6307964080671279, + "grad_norm": 0.3738144718546381, + "learning_rate": 1.9268419657374998e-05, + "loss": 0.4628, "step": 4285 }, { - "epoch": 0.6285714285714286, - "grad_norm": 0.3996118603513095, - "learning_rate": 1.9278331803435456e-05, - "loss": 0.4554, + "epoch": 0.6315324598851759, + "grad_norm": 0.41880972530769106, + "learning_rate": 1.926520074075012e-05, + "loss": 0.4519, "step": 4290 }, { - "epoch": 0.6293040293040293, - "grad_norm": 0.39601564233233194, - "learning_rate": 1.92751488601005e-05, - "loss": 0.4575, + "epoch": 0.632268511703224, + "grad_norm": 0.3929317274722913, + "learning_rate": 1.9261975028200845e-05, + "loss": 0.4562, "step": 4295 }, { - "epoch": 0.63003663003663, - "grad_norm": 0.3861021385250994, - "learning_rate": 1.9271959176898266e-05, - "loss": 0.4486, + "epoch": 0.6330045635212719, + "grad_norm": 0.3941719954305297, + "learning_rate": 1.9258742522093193e-05, + "loss": 0.4535, "step": 4300 }, { - "epoch": 0.6307692307692307, - "grad_norm": 0.3889731285104663, - "learning_rate": 1.9268762756146563e-05, - "loss": 0.4657, + "epoch": 0.6337406153393199, + "grad_norm": 0.3962349391567529, + "learning_rate": 1.9255503224798174e-05, + "loss": 0.4411, "step": 4305 }, { - "epoch": 0.6315018315018315, - "grad_norm": 0.3694210604845663, - "learning_rate": 1.9265559600168097e-05, - "loss": 0.4494, + "epoch": 0.6344766671573678, + "grad_norm": 0.39061925912957995, + "learning_rate": 1.925225713869178e-05, + "loss": 0.4616, "step": 4310 }, { - "epoch": 0.6322344322344322, - "grad_norm": 0.39217980261947694, - "learning_rate": 1.926234971129047e-05, - "loss": 0.45, + "epoch": 0.6352127189754159, + "grad_norm": 0.39295510353538005, + "learning_rate": 1.9249004266154973e-05, + "loss": 0.4517, "step": 4315 }, { - "epoch": 0.6329670329670329, - "grad_norm": 0.3806373972050949, - "learning_rate": 1.9259133091846175e-05, - "loss": 0.4551, + "epoch": 0.6359487707934639, + "grad_norm": 0.3833489356645117, + "learning_rate": 1.924574460957371e-05, + "loss": 0.4549, "step": 4320 }, { - "epoch": 0.6336996336996337, - "grad_norm": 0.39546782249074924, - "learning_rate": 1.92559097441726e-05, - "loss": 0.4525, + "epoch": 0.6366848226115118, + "grad_norm": 0.40517556279328376, + "learning_rate": 1.9242478171338903e-05, + "loss": 0.4553, "step": 4325 }, { - "epoch": 0.6344322344322344, - "grad_norm": 0.377373667713864, - "learning_rate": 1.9252679670612012e-05, - "loss": 0.4503, + "epoch": 0.6374208744295599, + "grad_norm": 0.3922899929303947, + "learning_rate": 1.9239204953846456e-05, + "loss": 0.465, "step": 4330 }, { - "epoch": 0.6351648351648351, - "grad_norm": 0.38167574564074974, - "learning_rate": 1.924944287351158e-05, - "loss": 0.4367, + "epoch": 0.6381569262476078, + "grad_norm": 0.395268937479294, + "learning_rate": 1.9235924959497237e-05, + "loss": 0.4562, "step": 4335 }, { - "epoch": 0.6358974358974359, - "grad_norm": 0.39229244686000464, - "learning_rate": 1.9246199355223344e-05, - "loss": 0.4479, + "epoch": 0.6388929780656558, + "grad_norm": 0.3809884756259666, + "learning_rate": 1.923263819069709e-05, + "loss": 0.4498, "step": 4340 }, { - "epoch": 0.6366300366300366, - "grad_norm": 0.39043896265781963, - "learning_rate": 1.924294911810424e-05, - "loss": 0.4542, + "epoch": 0.6396290298837038, + "grad_norm": 0.37874360253029615, + "learning_rate": 1.922934464985682e-05, + "loss": 0.4539, "step": 4345 }, { - "epoch": 0.6373626373626373, - "grad_norm": 0.38448022559450346, - "learning_rate": 1.923969216451607e-05, - "loss": 0.4502, + "epoch": 0.6403650817017518, + "grad_norm": 0.38874389786317726, + "learning_rate": 1.92260443393922e-05, + "loss": 0.4511, "step": 4350 }, { - "epoch": 0.638095238095238, - "grad_norm": 0.3836800759490605, - "learning_rate": 1.9236428496825543e-05, - "loss": 0.4598, + "epoch": 0.6411011335197998, + "grad_norm": 0.39098938560232144, + "learning_rate": 1.922273726172398e-05, + "loss": 0.4489, "step": 4355 }, { - "epoch": 0.6388278388278388, - "grad_norm": 0.37757456425964253, - "learning_rate": 1.9233158117404224e-05, - "loss": 0.4539, + "epoch": 0.6418371853378478, + "grad_norm": 0.4129270007904453, + "learning_rate": 1.921942341927786e-05, + "loss": 0.473, "step": 4360 }, { - "epoch": 0.6395604395604395, - "grad_norm": 0.3903909062744032, - "learning_rate": 1.922988102862856e-05, - "loss": 0.4756, + "epoch": 0.6425732371558958, + "grad_norm": 0.3942484438457897, + "learning_rate": 1.921610281448451e-05, + "loss": 0.4671, "step": 4365 }, { - "epoch": 0.6402930402930402, - "grad_norm": 0.4178460038790184, - "learning_rate": 1.9226597232879876e-05, - "loss": 0.4507, + "epoch": 0.6433092889739438, + "grad_norm": 0.37943256505154993, + "learning_rate": 1.921277544977956e-05, + "loss": 0.4634, "step": 4370 }, { - "epoch": 0.6410256410256411, - "grad_norm": 0.35841063994877587, - "learning_rate": 1.9223306732544375e-05, - "loss": 0.4541, + "epoch": 0.6440453407919917, + "grad_norm": 0.36557974071879495, + "learning_rate": 1.9209441327603587e-05, + "loss": 0.4491, "step": 4375 }, { - "epoch": 0.6417582417582418, - "grad_norm": 0.41456546802807315, - "learning_rate": 1.9220009530013123e-05, - "loss": 0.4571, + "epoch": 0.6447813926100397, + "grad_norm": 0.3834531941887774, + "learning_rate": 1.9206100450402142e-05, + "loss": 0.4412, "step": 4380 }, { - "epoch": 0.6424908424908425, - "grad_norm": 0.4001438732473654, - "learning_rate": 1.921670562768207e-05, - "loss": 0.4538, + "epoch": 0.6455174444280878, + "grad_norm": 0.3739752373908016, + "learning_rate": 1.920275282062572e-05, + "loss": 0.451, "step": 4385 }, { - "epoch": 0.6432234432234433, - "grad_norm": 0.4124870743139893, - "learning_rate": 1.9213395027952014e-05, - "loss": 0.4454, + "epoch": 0.6462534962461357, + "grad_norm": 0.3862654437042714, + "learning_rate": 1.9199398440729775e-05, + "loss": 0.4472, "step": 4390 }, { - "epoch": 0.643956043956044, - "grad_norm": 0.39453924510346333, - "learning_rate": 1.9210077733228634e-05, - "loss": 0.4392, + "epoch": 0.6469895480641837, + "grad_norm": 0.3798904378427613, + "learning_rate": 1.91960373131747e-05, + "loss": 0.4631, "step": 4395 }, { - "epoch": 0.6446886446886447, - "grad_norm": 0.3869409287281537, - "learning_rate": 1.9206753745922474e-05, - "loss": 0.4557, + "epoch": 0.6477255998822317, + "grad_norm": 0.37053600436508793, + "learning_rate": 1.919266944042585e-05, + "loss": 0.4446, "step": 4400 }, { - "epoch": 0.6454212454212455, - "grad_norm": 0.38039297534887273, - "learning_rate": 1.920342306844894e-05, - "loss": 0.47, + "epoch": 0.6484616517002797, + "grad_norm": 0.38483557594087386, + "learning_rate": 1.9189294824953522e-05, + "loss": 0.444, "step": 4405 }, { - "epoch": 0.6461538461538462, - "grad_norm": 0.39272104310666833, - "learning_rate": 1.920008570322829e-05, - "loss": 0.4389, + "epoch": 0.6491977035183277, + "grad_norm": 0.3774301947302271, + "learning_rate": 1.918591346923296e-05, + "loss": 0.4772, "step": 4410 }, { - "epoch": 0.6468864468864469, - "grad_norm": 0.38105753403110915, - "learning_rate": 1.9196741652685656e-05, - "loss": 0.4542, + "epoch": 0.6499337553363757, + "grad_norm": 0.38992859254977136, + "learning_rate": 1.9182525375744357e-05, + "loss": 0.4553, "step": 4415 }, { - "epoch": 0.6476190476190476, - "grad_norm": 0.40105121340517647, - "learning_rate": 1.9193390919251018e-05, - "loss": 0.4625, + "epoch": 0.6506698071544237, + "grad_norm": 0.3858816731551356, + "learning_rate": 1.917913054697284e-05, + "loss": 0.4428, "step": 4420 }, { - "epoch": 0.6483516483516484, - "grad_norm": 0.370705169564649, - "learning_rate": 1.9190033505359217e-05, - "loss": 0.4632, + "epoch": 0.6514058589724716, + "grad_norm": 0.39217801204631847, + "learning_rate": 1.9175728985408475e-05, + "loss": 0.4639, "step": 4425 }, { - "epoch": 0.6490842490842491, - "grad_norm": 0.3862392288450474, - "learning_rate": 1.9186669413449947e-05, - "loss": 0.4605, + "epoch": 0.6521419107905196, + "grad_norm": 0.36854030701391716, + "learning_rate": 1.9172320693546275e-05, + "loss": 0.4514, "step": 4430 }, { - "epoch": 0.6498168498168498, - "grad_norm": 0.3923304956651707, - "learning_rate": 1.918329864596775e-05, - "loss": 0.4465, + "epoch": 0.6528779626085677, + "grad_norm": 0.38816202609188655, + "learning_rate": 1.916890567388618e-05, + "loss": 0.4523, "step": 4435 }, { - "epoch": 0.6505494505494506, - "grad_norm": 0.40276713300420847, - "learning_rate": 1.917992120536203e-05, - "loss": 0.4757, + "epoch": 0.6536140144266156, + "grad_norm": 0.3924972362295436, + "learning_rate": 1.9165483928933076e-05, + "loss": 0.4303, "step": 4440 }, { - "epoch": 0.6512820512820513, - "grad_norm": 0.3884293748786176, - "learning_rate": 1.917653709408703e-05, - "loss": 0.4562, + "epoch": 0.6543500662446636, + "grad_norm": 0.3977940095412845, + "learning_rate": 1.9162055461196773e-05, + "loss": 0.4334, "step": 4445 }, { - "epoch": 0.652014652014652, - "grad_norm": 0.4057062403750789, - "learning_rate": 1.9173146314601847e-05, - "loss": 0.4601, + "epoch": 0.6550861180627117, + "grad_norm": 0.3966482233204859, + "learning_rate": 1.9158620273192013e-05, + "loss": 0.4636, "step": 4450 }, { - "epoch": 0.6527472527472528, - "grad_norm": 0.40246283304255126, - "learning_rate": 1.9169748869370417e-05, - "loss": 0.4515, + "epoch": 0.6558221698807596, + "grad_norm": 0.3776812728523491, + "learning_rate": 1.9155178367438477e-05, + "loss": 0.4441, "step": 4455 }, { - "epoch": 0.6534798534798535, - "grad_norm": 0.3843492378662538, - "learning_rate": 1.916634476086152e-05, - "loss": 0.463, + "epoch": 0.6565582216988076, + "grad_norm": 0.3943576820565779, + "learning_rate": 1.9151729746460755e-05, + "loss": 0.4471, "step": 4460 }, { - "epoch": 0.6542124542124542, - "grad_norm": 0.3684455758527936, - "learning_rate": 1.9162933991548792e-05, - "loss": 0.4606, + "epoch": 0.6572942735168555, + "grad_norm": 0.3984824508339129, + "learning_rate": 1.9148274412788384e-05, + "loss": 0.4491, "step": 4465 }, { - "epoch": 0.654945054945055, - "grad_norm": 0.4051332136256459, - "learning_rate": 1.9159516563910686e-05, - "loss": 0.4693, + "epoch": 0.6580303253349036, + "grad_norm": 0.379015524155088, + "learning_rate": 1.91448123689558e-05, + "loss": 0.4516, "step": 4470 }, { - "epoch": 0.6556776556776557, - "grad_norm": 0.38075983769129534, - "learning_rate": 1.915609248043051e-05, - "loss": 0.4528, + "epoch": 0.6587663771529516, + "grad_norm": 0.41906622965058576, + "learning_rate": 1.914134361750239e-05, + "loss": 0.4687, "step": 4475 }, { - "epoch": 0.6564102564102564, - "grad_norm": 0.3936521233161132, - "learning_rate": 1.9152661743596405e-05, - "loss": 0.4568, + "epoch": 0.6595024289709995, + "grad_norm": 0.36877550481815546, + "learning_rate": 1.9137868160972436e-05, + "loss": 0.4464, "step": 4480 }, { - "epoch": 0.6571428571428571, - "grad_norm": 0.4033142175594591, - "learning_rate": 1.9149224355901345e-05, - "loss": 0.4684, + "epoch": 0.6602384807890476, + "grad_norm": 0.40075594059466546, + "learning_rate": 1.913438600191515e-05, + "loss": 0.4645, "step": 4485 }, { - "epoch": 0.6578754578754579, - "grad_norm": 0.39258181662510266, - "learning_rate": 1.9145780319843138e-05, - "loss": 0.4537, + "epoch": 0.6609745326070955, + "grad_norm": 0.392668878233667, + "learning_rate": 1.9130897142884658e-05, + "loss": 0.4593, "step": 4490 }, { - "epoch": 0.6586080586080586, - "grad_norm": 0.38314748902028634, - "learning_rate": 1.9142329637924423e-05, - "loss": 0.4463, + "epoch": 0.6617105844251435, + "grad_norm": 0.3872265344456865, + "learning_rate": 1.912740158644e-05, + "loss": 0.4638, "step": 4495 }, { - "epoch": 0.6593406593406593, - "grad_norm": 0.37835249978141144, - "learning_rate": 1.9138872312652665e-05, - "loss": 0.4578, + "epoch": 0.6624466362431916, + "grad_norm": 0.37492574952816243, + "learning_rate": 1.9123899335145132e-05, + "loss": 0.4392, "step": 4500 }, { - "epoch": 0.6600732600732601, - "grad_norm": 0.39233915642483885, - "learning_rate": 1.913540834654016e-05, - "loss": 0.4425, + "epoch": 0.6631826880612395, + "grad_norm": 0.37314352488191793, + "learning_rate": 1.912039039156891e-05, + "loss": 0.4497, "step": 4505 }, { - "epoch": 0.6608058608058608, - "grad_norm": 0.3948802796808528, - "learning_rate": 1.9131937742104034e-05, - "loss": 0.4459, + "epoch": 0.6639187398792875, + "grad_norm": 0.3740219032255518, + "learning_rate": 1.9116874758285113e-05, + "loss": 0.4524, "step": 4510 }, { - "epoch": 0.6615384615384615, - "grad_norm": 0.3837580929885009, - "learning_rate": 1.9128460501866227e-05, - "loss": 0.4741, + "epoch": 0.6646547916973355, + "grad_norm": 0.38093178849398496, + "learning_rate": 1.9113352437872416e-05, + "loss": 0.4559, "step": 4515 }, { - "epoch": 0.6622710622710622, - "grad_norm": 0.39146826558693193, - "learning_rate": 1.9124976628353504e-05, - "loss": 0.4475, + "epoch": 0.6653908435153835, + "grad_norm": 0.3942264775237251, + "learning_rate": 1.9109823432914407e-05, + "loss": 0.465, "step": 4520 }, { - "epoch": 0.663003663003663, - "grad_norm": 0.414172032887318, - "learning_rate": 1.9121486124097457e-05, - "loss": 0.4505, + "epoch": 0.6661268953334315, + "grad_norm": 0.3826711034606891, + "learning_rate": 1.9106287745999567e-05, + "loss": 0.4605, "step": 4525 }, { - "epoch": 0.6637362637362637, - "grad_norm": 0.3925713507074972, - "learning_rate": 1.9117988991634488e-05, - "loss": 0.4607, + "epoch": 0.6668629471514794, + "grad_norm": 0.39725631489634533, + "learning_rate": 1.910274537972129e-05, + "loss": 0.4551, "step": 4530 }, { - "epoch": 0.6644688644688644, - "grad_norm": 0.3568564070162253, - "learning_rate": 1.911448523350582e-05, - "loss": 0.4512, + "epoch": 0.6675989989695275, + "grad_norm": 0.38788306742870987, + "learning_rate": 1.9099196336677864e-05, + "loss": 0.4511, "step": 4535 }, { - "epoch": 0.6652014652014652, - "grad_norm": 0.37946337855975426, - "learning_rate": 1.911097485225749e-05, - "loss": 0.4367, + "epoch": 0.6683350507875755, + "grad_norm": 0.35713665308184506, + "learning_rate": 1.9095640619472467e-05, + "loss": 0.451, "step": 4540 }, { - "epoch": 0.6659340659340659, - "grad_norm": 0.39191564372249904, - "learning_rate": 1.9107457850440343e-05, - "loss": 0.447, + "epoch": 0.6690711026056234, + "grad_norm": 0.36416995161535204, + "learning_rate": 1.9092078230713184e-05, + "loss": 0.4494, "step": 4545 }, { - "epoch": 0.6666666666666666, - "grad_norm": 0.38309908021603784, - "learning_rate": 1.910393423061004e-05, - "loss": 0.4556, + "epoch": 0.6698071544236714, + "grad_norm": 0.40150764715503834, + "learning_rate": 1.9088509173012985e-05, + "loss": 0.4615, "step": 4550 }, { - "epoch": 0.6673992673992674, - "grad_norm": 0.3651262663209095, - "learning_rate": 1.9100403995327057e-05, - "loss": 0.4427, + "epoch": 0.6705432062417194, + "grad_norm": 0.39907922416888963, + "learning_rate": 1.9084933448989734e-05, + "loss": 0.4589, "step": 4555 }, { - "epoch": 0.6681318681318681, - "grad_norm": 0.384790548297463, - "learning_rate": 1.909686714715666e-05, - "loss": 0.453, + "epoch": 0.6712792580597674, + "grad_norm": 0.3794968612658755, + "learning_rate": 1.9081351061266194e-05, + "loss": 0.4602, "step": 4560 }, { - "epoch": 0.6688644688644688, - "grad_norm": 0.36691825995705835, - "learning_rate": 1.9093323688668935e-05, - "loss": 0.4522, + "epoch": 0.6720153098778154, + "grad_norm": 0.3958971089527187, + "learning_rate": 1.9077762012470004e-05, + "loss": 0.4479, "step": 4565 }, { - "epoch": 0.6695970695970695, - "grad_norm": 0.3708935734293259, - "learning_rate": 1.908977362243877e-05, - "loss": 0.4443, + "epoch": 0.6727513616958634, + "grad_norm": 0.38697737531582765, + "learning_rate": 1.9074166305233684e-05, + "loss": 0.4464, "step": 4570 }, { - "epoch": 0.6703296703296703, - "grad_norm": 0.3674641558501077, - "learning_rate": 1.9086216951045846e-05, - "loss": 0.4473, + "epoch": 0.6734874135139114, + "grad_norm": 0.38627229075882585, + "learning_rate": 1.9070563942194658e-05, + "loss": 0.456, "step": 4575 }, { - "epoch": 0.671062271062271, - "grad_norm": 0.37713691526144866, - "learning_rate": 1.908265367707465e-05, - "loss": 0.4443, + "epoch": 0.6742234653319593, + "grad_norm": 0.3809169910379719, + "learning_rate": 1.906695492599522e-05, + "loss": 0.4553, "step": 4580 }, { - "epoch": 0.6717948717948717, - "grad_norm": 0.3893239948236134, - "learning_rate": 1.907908380311447e-05, - "loss": 0.4497, + "epoch": 0.6749595171500073, + "grad_norm": 0.3926000334263371, + "learning_rate": 1.9063339259282532e-05, + "loss": 0.4414, "step": 4585 }, { - "epoch": 0.6725274725274726, - "grad_norm": 0.3866933308377136, - "learning_rate": 1.9075507331759376e-05, - "loss": 0.471, + "epoch": 0.6756955689680554, + "grad_norm": 0.3938132421731681, + "learning_rate": 1.905971694470866e-05, + "loss": 0.4565, "step": 4590 }, { - "epoch": 0.6732600732600733, - "grad_norm": 0.3842129913868808, - "learning_rate": 1.9071924265608253e-05, - "loss": 0.4486, + "epoch": 0.6764316207861033, + "grad_norm": 0.40668102910719195, + "learning_rate": 1.9056087984930524e-05, + "loss": 0.471, "step": 4595 }, { - "epoch": 0.673992673992674, - "grad_norm": 0.3914051257219504, - "learning_rate": 1.9068334607264765e-05, - "loss": 0.4695, + "epoch": 0.6771676726041513, + "grad_norm": 0.3866271529330411, + "learning_rate": 1.905245238260993e-05, + "loss": 0.454, "step": 4600 }, { - "epoch": 0.6747252747252748, - "grad_norm": 0.36762571486296486, - "learning_rate": 1.906473835933736e-05, - "loss": 0.442, + "epoch": 0.6779037244221994, + "grad_norm": 0.3765934301591741, + "learning_rate": 1.9048810140413553e-05, + "loss": 0.4386, "step": 4605 }, { - "epoch": 0.6754578754578755, - "grad_norm": 0.38947108354293425, - "learning_rate": 1.9061135524439296e-05, - "loss": 0.4547, + "epoch": 0.6786397762402473, + "grad_norm": 0.3909238186923048, + "learning_rate": 1.9045161261012937e-05, + "loss": 0.4577, "step": 4610 }, { - "epoch": 0.6761904761904762, - "grad_norm": 0.3727955645617599, - "learning_rate": 1.9057526105188593e-05, - "loss": 0.453, + "epoch": 0.6793758280582953, + "grad_norm": 0.3560298704169209, + "learning_rate": 1.9041505747084497e-05, + "loss": 0.4633, "step": 4615 }, { - "epoch": 0.676923076923077, - "grad_norm": 0.38259607470463275, - "learning_rate": 1.9053910104208073e-05, - "loss": 0.4576, + "epoch": 0.6801118798763433, + "grad_norm": 0.37333945690798426, + "learning_rate": 1.9037843601309513e-05, + "loss": 0.4483, "step": 4620 }, { - "epoch": 0.6776556776556777, - "grad_norm": 0.3720365889474071, - "learning_rate": 1.905028752412533e-05, - "loss": 0.451, + "epoch": 0.6808479316943913, + "grad_norm": 0.3749886090724038, + "learning_rate": 1.9034174826374126e-05, + "loss": 0.4494, "step": 4625 }, { - "epoch": 0.6783882783882784, - "grad_norm": 0.3827944226686561, - "learning_rate": 1.9046658367572743e-05, - "loss": 0.4516, + "epoch": 0.6815839835124393, + "grad_norm": 0.3815012969349644, + "learning_rate": 1.9030499424969346e-05, + "loss": 0.4434, "step": 4630 }, { - "epoch": 0.6791208791208792, - "grad_norm": 0.39885198041841713, - "learning_rate": 1.9043022637187478e-05, - "loss": 0.4703, + "epoch": 0.6823200353304872, + "grad_norm": 0.41318111288780823, + "learning_rate": 1.902681739979104e-05, + "loss": 0.4528, "step": 4635 }, { - "epoch": 0.6798534798534799, - "grad_norm": 0.4834737339082414, - "learning_rate": 1.903938033561146e-05, - "loss": 0.453, + "epoch": 0.6830560871485353, + "grad_norm": 0.37845100927533615, + "learning_rate": 1.902312875353993e-05, + "loss": 0.464, "step": 4640 }, { - "epoch": 0.6805860805860806, - "grad_norm": 0.40334535967342927, - "learning_rate": 1.9035731465491404e-05, - "loss": 0.4645, + "epoch": 0.6837921389665832, + "grad_norm": 0.37294320606052356, + "learning_rate": 1.9019433488921603e-05, + "loss": 0.4513, "step": 4645 }, { - "epoch": 0.6813186813186813, - "grad_norm": 0.3811030815586793, - "learning_rate": 1.9032076029478794e-05, - "loss": 0.4396, + "epoch": 0.6845281907846312, + "grad_norm": 0.4047075759083381, + "learning_rate": 1.9015731608646495e-05, + "loss": 0.4606, "step": 4650 }, { - "epoch": 0.6820512820512821, - "grad_norm": 0.37187644492402144, - "learning_rate": 1.9028414030229885e-05, - "loss": 0.4481, + "epoch": 0.6852642426026793, + "grad_norm": 0.37858929794206153, + "learning_rate": 1.9012023115429892e-05, + "loss": 0.4436, "step": 4655 }, { - "epoch": 0.6827838827838828, - "grad_norm": 0.45571558554350805, - "learning_rate": 1.902474547040569e-05, - "loss": 0.464, + "epoch": 0.6860002944207272, + "grad_norm": 0.5284243788863681, + "learning_rate": 1.9008308011991934e-05, + "loss": 0.4651, "step": 4660 }, { - "epoch": 0.6835164835164835, - "grad_norm": 0.3898194135816126, - "learning_rate": 1.9021070352672018e-05, - "loss": 0.4326, + "epoch": 0.6867363462387752, + "grad_norm": 0.3578598682305859, + "learning_rate": 1.900458630105761e-05, + "loss": 0.4583, "step": 4665 }, { - "epoch": 0.6842490842490843, - "grad_norm": 0.373343816023147, - "learning_rate": 1.901738867969941e-05, - "loss": 0.4446, + "epoch": 0.6874723980568233, + "grad_norm": 0.36339367791316657, + "learning_rate": 1.9000857985356757e-05, + "loss": 0.4299, "step": 4670 }, { - "epoch": 0.684981684981685, - "grad_norm": 0.3967216092671176, - "learning_rate": 1.9013700454163194e-05, - "loss": 0.4621, + "epoch": 0.6882084498748712, + "grad_norm": 0.41633037540042483, + "learning_rate": 1.8997123067624052e-05, + "loss": 0.4713, "step": 4675 }, { - "epoch": 0.6857142857142857, - "grad_norm": 0.3821556837245336, - "learning_rate": 1.9010005678743445e-05, - "loss": 0.4521, + "epoch": 0.6889445016929192, + "grad_norm": 0.3710006501995171, + "learning_rate": 1.8993381550599017e-05, + "loss": 0.4741, "step": 4680 }, { - "epoch": 0.6864468864468865, - "grad_norm": 0.36149493353457285, - "learning_rate": 1.9006304356125007e-05, - "loss": 0.4532, + "epoch": 0.6896805535109671, + "grad_norm": 0.38508491341111795, + "learning_rate": 1.898963343702601e-05, + "loss": 0.4435, "step": 4685 }, { - "epoch": 0.6871794871794872, - "grad_norm": 0.3916146499177383, - "learning_rate": 1.9002596488997476e-05, - "loss": 0.469, + "epoch": 0.6904166053290152, + "grad_norm": 0.3681664224415523, + "learning_rate": 1.8985878729654242e-05, + "loss": 0.4392, "step": 4690 }, { - "epoch": 0.6879120879120879, - "grad_norm": 0.3697458703049274, - "learning_rate": 1.8998882080055208e-05, - "loss": 0.4514, + "epoch": 0.6911526571470632, + "grad_norm": 0.36978501750409004, + "learning_rate": 1.898211743123774e-05, + "loss": 0.4449, "step": 4695 }, { - "epoch": 0.6886446886446886, - "grad_norm": 0.3756463360666905, - "learning_rate": 1.899516113199731e-05, - "loss": 0.4385, + "epoch": 0.6918887089651111, + "grad_norm": 0.3981217967566912, + "learning_rate": 1.897834954453538e-05, + "loss": 0.4601, "step": 4700 }, { - "epoch": 0.6893772893772894, - "grad_norm": 0.373606856904118, - "learning_rate": 1.899143364752764e-05, - "loss": 0.4665, + "epoch": 0.6926247607831592, + "grad_norm": 0.36252916603801644, + "learning_rate": 1.8974575072310865e-05, + "loss": 0.4535, "step": 4705 }, { - "epoch": 0.6901098901098901, - "grad_norm": 0.3898021195128183, - "learning_rate": 1.898769962935481e-05, - "loss": 0.4418, + "epoch": 0.6933608126012071, + "grad_norm": 0.3863137853908461, + "learning_rate": 1.8970794017332732e-05, + "loss": 0.4546, "step": 4710 }, { - "epoch": 0.6908424908424908, - "grad_norm": 0.35755569340730087, - "learning_rate": 1.898395908019217e-05, - "loss": 0.4443, + "epoch": 0.6940968644192551, + "grad_norm": 0.42458899935521194, + "learning_rate": 1.896700638237434e-05, + "loss": 0.4636, "step": 4715 }, { - "epoch": 0.6915750915750916, - "grad_norm": 0.3615767437972971, - "learning_rate": 1.8980212002757832e-05, - "loss": 0.4562, + "epoch": 0.6948329162373031, + "grad_norm": 0.407244809353056, + "learning_rate": 1.896321217021388e-05, + "loss": 0.4563, "step": 4720 }, { - "epoch": 0.6923076923076923, - "grad_norm": 0.4006984323153798, - "learning_rate": 1.8976458399774637e-05, - "loss": 0.4211, + "epoch": 0.6955689680553511, + "grad_norm": 0.38883704310300615, + "learning_rate": 1.8959411383634368e-05, + "loss": 0.4553, "step": 4725 }, { - "epoch": 0.693040293040293, - "grad_norm": 0.42919622224646925, - "learning_rate": 1.897269827397017e-05, - "loss": 0.4516, + "epoch": 0.6963050198733991, + "grad_norm": 0.38064156287812523, + "learning_rate": 1.8955604025423636e-05, + "loss": 0.473, "step": 4730 }, { - "epoch": 0.6937728937728938, - "grad_norm": 0.3778206577833697, - "learning_rate": 1.8968931628076762e-05, - "loss": 0.4626, + "epoch": 0.697041071691447, + "grad_norm": 0.42671362604664703, + "learning_rate": 1.8951790098374342e-05, + "loss": 0.4629, "step": 4735 }, { - "epoch": 0.6945054945054945, - "grad_norm": 0.39098805861244557, - "learning_rate": 1.8965158464831482e-05, - "loss": 0.4572, + "epoch": 0.6977771235094951, + "grad_norm": 0.387449450624925, + "learning_rate": 1.894796960528396e-05, + "loss": 0.4436, "step": 4740 }, { - "epoch": 0.6952380952380952, - "grad_norm": 0.38792694325380767, - "learning_rate": 1.8961378786976127e-05, - "loss": 0.4484, + "epoch": 0.6985131753275431, + "grad_norm": 0.46819007252561434, + "learning_rate": 1.8944142548954784e-05, + "loss": 0.4379, "step": 4745 }, { - "epoch": 0.6959706959706959, - "grad_norm": 0.38773365524372716, - "learning_rate": 1.8957592597257233e-05, - "loss": 0.4427, + "epoch": 0.699249227145591, + "grad_norm": 0.37059307285585424, + "learning_rate": 1.8940308932193908e-05, + "loss": 0.4432, "step": 4750 }, { - "epoch": 0.6967032967032967, - "grad_norm": 0.40388591163772436, - "learning_rate": 1.8953799898426074e-05, - "loss": 0.4581, + "epoch": 0.699985278963639, + "grad_norm": 0.37315949296564255, + "learning_rate": 1.893646875781326e-05, + "loss": 0.4499, "step": 4755 }, { - "epoch": 0.6974358974358974, - "grad_norm": 0.4246963886620943, - "learning_rate": 1.895000069323864e-05, - "loss": 0.4629, + "epoch": 0.7007213307816871, + "grad_norm": 0.3998305870136644, + "learning_rate": 1.8932622028629557e-05, + "loss": 0.4654, "step": 4760 }, { - "epoch": 0.6981684981684981, - "grad_norm": 0.3704772638447488, - "learning_rate": 1.8946194984455656e-05, - "loss": 0.4554, + "epoch": 0.701457382599735, + "grad_norm": 0.38356851430459643, + "learning_rate": 1.892876874746434e-05, + "loss": 0.4544, "step": 4765 }, { - "epoch": 0.6989010989010989, - "grad_norm": 0.38894583359585044, - "learning_rate": 1.894238277484258e-05, - "loss": 0.4508, + "epoch": 0.702193434417783, + "grad_norm": 0.37100455436607055, + "learning_rate": 1.8924908917143946e-05, + "loss": 0.4521, "step": 4770 }, { - "epoch": 0.6996336996336996, - "grad_norm": 0.39049511016031907, - "learning_rate": 1.8938564067169584e-05, - "loss": 0.4565, + "epoch": 0.702929486235831, + "grad_norm": 0.38343900260974045, + "learning_rate": 1.8921042540499515e-05, + "loss": 0.4502, "step": 4775 }, { - "epoch": 0.7003663003663003, - "grad_norm": 0.38434378064854174, - "learning_rate": 1.893473886421156e-05, - "loss": 0.4452, + "epoch": 0.703665538053879, + "grad_norm": 0.3995468827419847, + "learning_rate": 1.8917169620367003e-05, + "loss": 0.4711, "step": 4780 }, { - "epoch": 0.701098901098901, - "grad_norm": 0.3807732418526466, - "learning_rate": 1.8930907168748134e-05, - "loss": 0.453, + "epoch": 0.704401589871927, + "grad_norm": 0.37961563979427065, + "learning_rate": 1.8913290159587143e-05, + "loss": 0.456, "step": 4785 }, { - "epoch": 0.7018315018315018, - "grad_norm": 0.38048565611635704, - "learning_rate": 1.8927068983563633e-05, - "loss": 0.4445, + "epoch": 0.705137641689975, + "grad_norm": 0.36862889514517616, + "learning_rate": 1.8909404161005486e-05, + "loss": 0.4436, "step": 4790 }, { - "epoch": 0.7025641025641025, - "grad_norm": 0.3985839982100088, - "learning_rate": 1.892322431144711e-05, - "loss": 0.4435, + "epoch": 0.705873693508023, + "grad_norm": 0.37699874981750026, + "learning_rate": 1.8905511627472365e-05, + "loss": 0.4587, "step": 4795 }, { - "epoch": 0.7032967032967034, - "grad_norm": 0.3701985887177081, - "learning_rate": 1.891937315519233e-05, - "loss": 0.4335, + "epoch": 0.7066097453260709, + "grad_norm": 0.3908195086062283, + "learning_rate": 1.8901612561842915e-05, + "loss": 0.4513, "step": 4800 }, { - "epoch": 0.7040293040293041, - "grad_norm": 0.37760096364078544, - "learning_rate": 1.891551551759777e-05, - "loss": 0.4399, + "epoch": 0.7073457971441189, + "grad_norm": 0.3665398737233992, + "learning_rate": 1.8897706966977058e-05, + "loss": 0.4518, "step": 4805 }, { - "epoch": 0.7047619047619048, - "grad_norm": 0.4047001591947668, - "learning_rate": 1.8911651401466617e-05, - "loss": 0.4585, + "epoch": 0.708081848962167, + "grad_norm": 0.3609870619410624, + "learning_rate": 1.88937948457395e-05, + "loss": 0.4411, "step": 4810 }, { - "epoch": 0.7054945054945055, - "grad_norm": 0.35180736741582225, - "learning_rate": 1.8907780809606757e-05, - "loss": 0.4374, + "epoch": 0.7088179007802149, + "grad_norm": 0.38895035996514304, + "learning_rate": 1.8889876200999744e-05, + "loss": 0.4551, "step": 4815 }, { - "epoch": 0.7062271062271063, - "grad_norm": 0.3965065752973784, - "learning_rate": 1.8903903744830797e-05, - "loss": 0.4712, + "epoch": 0.7095539525982629, + "grad_norm": 0.35603852102578454, + "learning_rate": 1.8885951035632075e-05, + "loss": 0.4648, "step": 4820 }, { - "epoch": 0.706959706959707, - "grad_norm": 0.4056638023244465, - "learning_rate": 1.8900020209956035e-05, - "loss": 0.4517, + "epoch": 0.710290004416311, + "grad_norm": 0.42671666362664806, + "learning_rate": 1.8882019352515555e-05, + "loss": 0.4887, "step": 4825 }, { - "epoch": 0.7076923076923077, - "grad_norm": 0.37618097111880616, - "learning_rate": 1.889613020780448e-05, - "loss": 0.4591, + "epoch": 0.7110260562343589, + "grad_norm": 0.3516709921941014, + "learning_rate": 1.8878081154534035e-05, + "loss": 0.4473, "step": 4830 }, { - "epoch": 0.7084249084249085, - "grad_norm": 0.36476485229020567, - "learning_rate": 1.8892233741202834e-05, - "loss": 0.4576, + "epoch": 0.7117621080524069, + "grad_norm": 0.37682190495104745, + "learning_rate": 1.887413644457614e-05, + "loss": 0.4448, "step": 4835 }, { - "epoch": 0.7091575091575092, - "grad_norm": 0.41114552994910686, - "learning_rate": 1.8888330812982493e-05, - "loss": 0.4514, + "epoch": 0.7124981598704548, + "grad_norm": 0.3734482896287629, + "learning_rate": 1.887018522553527e-05, + "loss": 0.4576, "step": 4840 }, { - "epoch": 0.7098901098901099, - "grad_norm": 0.3938823356135875, - "learning_rate": 1.888442142597956e-05, - "loss": 0.4489, + "epoch": 0.7132342116885029, + "grad_norm": 0.36700338444436587, + "learning_rate": 1.8866227500309598e-05, + "loss": 0.4463, "step": 4845 }, { - "epoch": 0.7106227106227107, - "grad_norm": 0.3616177240649825, - "learning_rate": 1.888050558303482e-05, - "loss": 0.4365, + "epoch": 0.7139702635065509, + "grad_norm": 0.37609698891657933, + "learning_rate": 1.886226327180208e-05, + "loss": 0.4568, "step": 4850 }, { - "epoch": 0.7113553113553114, - "grad_norm": 0.5594879931276083, - "learning_rate": 1.8876583286993758e-05, - "loss": 0.4475, + "epoch": 0.7147063153245988, + "grad_norm": 0.3708912459699521, + "learning_rate": 1.8858292542920435e-05, + "loss": 0.4551, "step": 4855 }, { - "epoch": 0.7120879120879121, - "grad_norm": 0.3803372257837885, - "learning_rate": 1.887265454070655e-05, - "loss": 0.4488, + "epoch": 0.7154423671426469, + "grad_norm": 0.3523567619488166, + "learning_rate": 1.8854315316577142e-05, + "loss": 0.4385, "step": 4860 }, { - "epoch": 0.7128205128205128, - "grad_norm": 0.3842306891124848, - "learning_rate": 1.886871934702804e-05, - "loss": 0.4498, + "epoch": 0.7161784189606948, + "grad_norm": 0.39825711110012857, + "learning_rate": 1.885033159568946e-05, + "loss": 0.4661, "step": 4865 }, { - "epoch": 0.7135531135531136, - "grad_norm": 0.37564182357370485, - "learning_rate": 1.886477770881778e-05, - "loss": 0.4516, + "epoch": 0.7169144707787428, + "grad_norm": 0.37288272822218954, + "learning_rate": 1.8846341383179398e-05, + "loss": 0.4517, "step": 4870 }, { - "epoch": 0.7142857142857143, - "grad_norm": 0.3664584796382565, - "learning_rate": 1.8860829628939988e-05, - "loss": 0.4355, + "epoch": 0.7176505225967909, + "grad_norm": 0.4250134249290028, + "learning_rate": 1.8842344681973742e-05, + "loss": 0.4719, "step": 4875 }, { - "epoch": 0.715018315018315, - "grad_norm": 0.37290235571696095, - "learning_rate": 1.885687511026358e-05, - "loss": 0.4469, + "epoch": 0.7183865744148388, + "grad_norm": 0.36864907984912687, + "learning_rate": 1.8838341495004023e-05, + "loss": 0.4427, "step": 4880 }, { - "epoch": 0.7157509157509158, - "grad_norm": 0.4064105840730172, - "learning_rate": 1.8852914155662133e-05, - "loss": 0.4382, + "epoch": 0.7191226262328868, + "grad_norm": 0.39950862586857455, + "learning_rate": 1.883433182520654e-05, + "loss": 0.4451, "step": 4885 }, { - "epoch": 0.7164835164835165, - "grad_norm": 0.3842875164436108, - "learning_rate": 1.8848946768013915e-05, - "loss": 0.4408, + "epoch": 0.7198586780509347, + "grad_norm": 0.38114084933695436, + "learning_rate": 1.883031567552234e-05, + "loss": 0.4481, "step": 4890 }, { - "epoch": 0.7172161172161172, - "grad_norm": 0.38081609641953207, - "learning_rate": 1.8844972950201862e-05, - "loss": 0.4593, + "epoch": 0.7205947298689828, + "grad_norm": 0.4040185481089537, + "learning_rate": 1.8826293048897224e-05, + "loss": 0.4328, "step": 4895 }, { - "epoch": 0.717948717948718, - "grad_norm": 0.47127324509102547, - "learning_rate": 1.884099270511358e-05, - "loss": 0.4521, + "epoch": 0.7213307816870308, + "grad_norm": 0.3656004947367902, + "learning_rate": 1.8822263948281746e-05, + "loss": 0.4485, "step": 4900 }, { - "epoch": 0.7186813186813187, - "grad_norm": 0.37652291233531765, - "learning_rate": 1.883700603564135e-05, - "loss": 0.4587, + "epoch": 0.7220668335050787, + "grad_norm": 0.3553643062230974, + "learning_rate": 1.881822837663121e-05, + "loss": 0.4431, "step": 4905 }, { - "epoch": 0.7194139194139194, - "grad_norm": 0.3615301757266598, - "learning_rate": 1.883301294468212e-05, - "loss": 0.4501, + "epoch": 0.7228028853231268, + "grad_norm": 0.39429759042069384, + "learning_rate": 1.8814186336905663e-05, + "loss": 0.461, "step": 4910 }, { - "epoch": 0.7201465201465201, - "grad_norm": 0.36022573004294006, - "learning_rate": 1.8829013435137506e-05, - "loss": 0.4558, + "epoch": 0.7235389371411748, + "grad_norm": 0.42350656930755187, + "learning_rate": 1.8810137832069896e-05, + "loss": 0.4621, "step": 4915 }, { - "epoch": 0.7208791208791209, - "grad_norm": 0.35055657248154937, - "learning_rate": 1.882500750991378e-05, - "loss": 0.4582, + "epoch": 0.7242749889592227, + "grad_norm": 0.37015861640127484, + "learning_rate": 1.880608286509344e-05, + "loss": 0.4409, "step": 4920 }, { - "epoch": 0.7216117216117216, - "grad_norm": 0.3552183693090387, - "learning_rate": 1.882099517192189e-05, - "loss": 0.4413, + "epoch": 0.7250110407772707, + "grad_norm": 0.3759334193858358, + "learning_rate": 1.880202143895058e-05, + "loss": 0.4443, "step": 4925 }, { - "epoch": 0.7223443223443223, - "grad_norm": 0.3674515573775624, - "learning_rate": 1.881697642407743e-05, - "loss": 0.4502, + "epoch": 0.7257470925953187, + "grad_norm": 0.3833862965964792, + "learning_rate": 1.8797953556620315e-05, + "loss": 0.4413, "step": 4930 }, { - "epoch": 0.7230769230769231, - "grad_norm": 0.375113043902126, - "learning_rate": 1.8812951269300658e-05, - "loss": 0.4593, + "epoch": 0.7264831444133667, + "grad_norm": 0.3772971808673379, + "learning_rate": 1.8793879221086403e-05, + "loss": 0.4629, "step": 4935 }, { - "epoch": 0.7238095238095238, - "grad_norm": 0.4099788031048028, - "learning_rate": 1.8808919710516495e-05, - "loss": 0.4375, + "epoch": 0.7272191962314147, + "grad_norm": 0.3874088731230548, + "learning_rate": 1.8789798435337322e-05, + "loss": 0.471, "step": 4940 }, { - "epoch": 0.7245421245421245, - "grad_norm": 0.3691481226512916, - "learning_rate": 1.8804881750654504e-05, - "loss": 0.4314, + "epoch": 0.7279552480494627, + "grad_norm": 0.3756942626441261, + "learning_rate": 1.878571120236628e-05, + "loss": 0.4357, "step": 4945 }, { - "epoch": 0.7252747252747253, - "grad_norm": 0.36505639763282793, - "learning_rate": 1.88008373926489e-05, - "loss": 0.4424, + "epoch": 0.7286912998675107, + "grad_norm": 0.39184131451221743, + "learning_rate": 1.878161752517122e-05, + "loss": 0.4451, "step": 4950 }, { - "epoch": 0.726007326007326, - "grad_norm": 0.40079042557698097, - "learning_rate": 1.879678663943856e-05, - "loss": 0.4488, + "epoch": 0.7294273516855586, + "grad_norm": 0.3765062835007046, + "learning_rate": 1.8777517406754817e-05, + "loss": 0.4571, "step": 4955 }, { - "epoch": 0.7267399267399267, - "grad_norm": 0.35847162619593637, - "learning_rate": 1.8792729493966985e-05, - "loss": 0.4495, + "epoch": 0.7301634035036066, + "grad_norm": 0.38876516628659347, + "learning_rate": 1.8773410850124454e-05, + "loss": 0.4579, "step": 4960 }, { - "epoch": 0.7274725274725274, - "grad_norm": 0.3927400030593109, - "learning_rate": 1.8788665959182345e-05, - "loss": 0.438, + "epoch": 0.7308994553216547, + "grad_norm": 0.37049204158950866, + "learning_rate": 1.876929785829225e-05, + "loss": 0.449, "step": 4965 }, { - "epoch": 0.7282051282051282, - "grad_norm": 0.3767857839509493, - "learning_rate": 1.878459603803744e-05, - "loss": 0.4504, + "epoch": 0.7316355071397026, + "grad_norm": 0.36971484242293473, + "learning_rate": 1.876517843427504e-05, + "loss": 0.4539, "step": 4970 }, { - "epoch": 0.7289377289377289, - "grad_norm": 0.3853596665238205, - "learning_rate": 1.8780519733489715e-05, - "loss": 0.4373, + "epoch": 0.7323715589577506, + "grad_norm": 0.39244436955036255, + "learning_rate": 1.8761052581094378e-05, + "loss": 0.4658, "step": 4975 }, { - "epoch": 0.7296703296703296, - "grad_norm": 0.3689570683299289, - "learning_rate": 1.877643704850125e-05, - "loss": 0.4542, + "epoch": 0.7331076107757987, + "grad_norm": 0.40362849889587354, + "learning_rate": 1.875692030177653e-05, + "loss": 0.4564, "step": 4980 }, { - "epoch": 0.7304029304029304, - "grad_norm": 0.37575696161804173, - "learning_rate": 1.877234798603876e-05, - "loss": 0.473, + "epoch": 0.7338436625938466, + "grad_norm": 0.4008458908739168, + "learning_rate": 1.8752781599352476e-05, + "loss": 0.45, "step": 4985 }, { - "epoch": 0.7311355311355311, - "grad_norm": 0.3725794967777069, - "learning_rate": 1.876825254907361e-05, - "loss": 0.4725, + "epoch": 0.7345797144118946, + "grad_norm": 0.40270604908220076, + "learning_rate": 1.8748636476857913e-05, + "loss": 0.446, "step": 4990 }, { - "epoch": 0.7318681318681318, - "grad_norm": 0.4074849563026, - "learning_rate": 1.8764150740581772e-05, - "loss": 0.4524, + "epoch": 0.7353157662299425, + "grad_norm": 0.42604148951230386, + "learning_rate": 1.874448493733325e-05, + "loss": 0.4706, "step": 4995 }, { - "epoch": 0.7326007326007326, - "grad_norm": 0.37640767192398394, - "learning_rate": 1.8760042563543862e-05, - "loss": 0.4671, + "epoch": 0.7360518180479906, + "grad_norm": 0.41486119749955624, + "learning_rate": 1.874032698382358e-05, + "loss": 0.4368, "step": 5000 }, { - "epoch": 0.7333333333333333, - "grad_norm": 0.3778473707730308, - "learning_rate": 1.8755928020945134e-05, - "loss": 0.4414, + "epoch": 0.7367878698660386, + "grad_norm": 0.37245519856096193, + "learning_rate": 1.873616261937873e-05, + "loss": 0.4455, "step": 5005 }, { - "epoch": 0.734065934065934, - "grad_norm": 0.3817578090582604, - "learning_rate": 1.875180711577544e-05, - "loss": 0.4695, + "epoch": 0.7375239216840865, + "grad_norm": 0.3818563871959328, + "learning_rate": 1.8731991847053213e-05, + "loss": 0.4618, "step": 5010 }, { - "epoch": 0.7347985347985349, - "grad_norm": 0.36071974765757886, - "learning_rate": 1.8747679851029282e-05, - "loss": 0.462, + "epoch": 0.7382599735021346, + "grad_norm": 0.380589725275835, + "learning_rate": 1.8727814669906244e-05, + "loss": 0.4334, "step": 5015 }, { - "epoch": 0.7355311355311356, - "grad_norm": 0.368934043679171, - "learning_rate": 1.874354622970577e-05, - "loss": 0.4552, + "epoch": 0.7389960253201825, + "grad_norm": 0.347189931778005, + "learning_rate": 1.8723631091001736e-05, + "loss": 0.4538, "step": 5020 }, { - "epoch": 0.7362637362637363, - "grad_norm": 0.36533841497706593, - "learning_rate": 1.8739406254808637e-05, - "loss": 0.4576, + "epoch": 0.7397320771382305, + "grad_norm": 0.3898337158360767, + "learning_rate": 1.8719441113408304e-05, + "loss": 0.4525, "step": 5025 }, { - "epoch": 0.736996336996337, - "grad_norm": 0.36771301086535146, - "learning_rate": 1.873525992934623e-05, - "loss": 0.4612, + "epoch": 0.7404681289562786, + "grad_norm": 0.38327819805222985, + "learning_rate": 1.8715244740199247e-05, + "loss": 0.4497, "step": 5030 }, { - "epoch": 0.7377289377289378, - "grad_norm": 0.3791449694508501, - "learning_rate": 1.873110725633151e-05, - "loss": 0.4517, + "epoch": 0.7412041807743265, + "grad_norm": 0.3945510025537431, + "learning_rate": 1.871104197445256e-05, + "loss": 0.4618, "step": 5035 }, { - "epoch": 0.7384615384615385, - "grad_norm": 0.3727427986764495, - "learning_rate": 1.8726948238782057e-05, - "loss": 0.4383, + "epoch": 0.7419402325923745, + "grad_norm": 0.3806548660495654, + "learning_rate": 1.8706832819250923e-05, + "loss": 0.4563, "step": 5040 }, { - "epoch": 0.7391941391941392, - "grad_norm": 0.3781575792567377, - "learning_rate": 1.8722782879720058e-05, - "loss": 0.4623, + "epoch": 0.7426762844104225, + "grad_norm": 0.36540110870625, + "learning_rate": 1.8702617277681713e-05, + "loss": 0.4511, "step": 5045 }, { - "epoch": 0.73992673992674, - "grad_norm": 0.359064891528205, - "learning_rate": 1.8718611182172298e-05, - "loss": 0.449, + "epoch": 0.7434123362284705, + "grad_norm": 0.4287468254008677, + "learning_rate": 1.8698395352836982e-05, + "loss": 0.4437, "step": 5050 }, { - "epoch": 0.7406593406593407, - "grad_norm": 0.44843334665346996, - "learning_rate": 1.871443314917019e-05, - "loss": 0.4493, + "epoch": 0.7441483880465185, + "grad_norm": 0.3913820437377021, + "learning_rate": 1.869416704781346e-05, + "loss": 0.4685, "step": 5055 }, { - "epoch": 0.7413919413919414, - "grad_norm": 0.3957264108268295, - "learning_rate": 1.871024878374973e-05, - "loss": 0.4561, + "epoch": 0.7448844398645664, + "grad_norm": 0.39129385276647827, + "learning_rate": 1.868993236571257e-05, + "loss": 0.45, "step": 5060 }, { - "epoch": 0.7421245421245422, - "grad_norm": 0.37828618552992577, - "learning_rate": 1.8706058088951523e-05, - "loss": 0.4557, + "epoch": 0.7456204916826145, + "grad_norm": 0.3673533380962962, + "learning_rate": 1.86856913096404e-05, + "loss": 0.4959, "step": 5065 }, { - "epoch": 0.7428571428571429, - "grad_norm": 0.4035863638705251, - "learning_rate": 1.8701861067820777e-05, - "loss": 0.447, + "epoch": 0.7463565435006625, + "grad_norm": 0.4092195675059342, + "learning_rate": 1.8681443882707727e-05, + "loss": 0.4521, "step": 5070 }, { - "epoch": 0.7435897435897436, - "grad_norm": 0.3750533796195528, - "learning_rate": 1.8697657723407297e-05, - "loss": 0.4583, + "epoch": 0.7470925953187104, + "grad_norm": 0.39590745577112624, + "learning_rate": 1.867719008802998e-05, + "loss": 0.4514, "step": 5075 }, { - "epoch": 0.7443223443223443, - "grad_norm": 0.37723043217670793, - "learning_rate": 1.869344805876547e-05, - "loss": 0.451, + "epoch": 0.7478286471367585, + "grad_norm": 0.38196263810623365, + "learning_rate": 1.867292992872728e-05, + "loss": 0.4511, "step": 5080 }, { - "epoch": 0.7450549450549451, - "grad_norm": 0.39718945678044637, - "learning_rate": 1.8689232076954293e-05, - "loss": 0.4639, + "epoch": 0.7485646989548064, + "grad_norm": 0.4043595516480884, + "learning_rate": 1.866866340792441e-05, + "loss": 0.459, "step": 5085 }, { - "epoch": 0.7457875457875458, - "grad_norm": 0.3791498431905776, - "learning_rate": 1.8685009781037344e-05, - "loss": 0.4927, + "epoch": 0.7493007507728544, + "grad_norm": 0.402427574093436, + "learning_rate": 1.866439052875081e-05, + "loss": 0.4419, "step": 5090 }, { - "epoch": 0.7465201465201465, - "grad_norm": 0.3799852737272029, - "learning_rate": 1.8680781174082788e-05, - "loss": 0.4501, + "epoch": 0.7500368025909024, + "grad_norm": 0.3612962663270505, + "learning_rate": 1.8660111294340596e-05, + "loss": 0.4448, "step": 5095 }, { - "epoch": 0.7472527472527473, - "grad_norm": 0.36068963179554164, - "learning_rate": 1.8676546259163383e-05, - "loss": 0.4516, + "epoch": 0.7507728544089504, + "grad_norm": 0.3855820456686503, + "learning_rate": 1.8655825707832535e-05, + "loss": 0.4501, "step": 5100 }, { - "epoch": 0.747985347985348, - "grad_norm": 0.35788361997095947, - "learning_rate": 1.8672305039356465e-05, - "loss": 0.4522, + "epoch": 0.7515089062269984, + "grad_norm": 0.37589164340013, + "learning_rate": 1.8651533772370062e-05, + "loss": 0.4479, "step": 5105 }, { - "epoch": 0.7487179487179487, - "grad_norm": 0.3433115907360614, - "learning_rate": 1.866805751774395e-05, - "loss": 0.4414, + "epoch": 0.7522449580450463, + "grad_norm": 0.38967281407068854, + "learning_rate": 1.8647235491101265e-05, + "loss": 0.4383, "step": 5110 }, { - "epoch": 0.7494505494505495, - "grad_norm": 0.4014853165182206, - "learning_rate": 1.866380369741234e-05, - "loss": 0.4405, + "epoch": 0.7529810098630944, + "grad_norm": 0.3836742905405019, + "learning_rate": 1.8642930867178887e-05, + "loss": 0.4441, "step": 5115 }, { - "epoch": 0.7501831501831502, - "grad_norm": 0.38277765018604176, - "learning_rate": 1.865954358145271e-05, - "loss": 0.45, + "epoch": 0.7537170616811424, + "grad_norm": 0.3730181494757174, + "learning_rate": 1.8638619903760316e-05, + "loss": 0.4478, "step": 5120 }, { - "epoch": 0.7509157509157509, - "grad_norm": 0.3801569195926829, - "learning_rate": 1.865527717296071e-05, - "loss": 0.4372, + "epoch": 0.7544531134991903, + "grad_norm": 0.3925730698628966, + "learning_rate": 1.8634302604007608e-05, + "loss": 0.448, "step": 5125 }, { - "epoch": 0.7516483516483516, - "grad_norm": 0.3755925269128549, - "learning_rate": 1.8651004475036564e-05, - "loss": 0.4533, + "epoch": 0.7551891653172383, + "grad_norm": 0.37571986510259725, + "learning_rate": 1.8629978971087447e-05, + "loss": 0.4543, "step": 5130 }, { - "epoch": 0.7523809523809524, - "grad_norm": 0.3611593354999632, - "learning_rate": 1.864672549078506e-05, - "loss": 0.4687, + "epoch": 0.7559252171352864, + "grad_norm": 0.3941350023240938, + "learning_rate": 1.862564900817118e-05, + "loss": 0.4657, "step": 5135 }, { - "epoch": 0.7531135531135531, - "grad_norm": 0.36469816725356635, - "learning_rate": 1.8642440223315567e-05, - "loss": 0.4404, + "epoch": 0.7566612689533343, + "grad_norm": 0.3930544565143214, + "learning_rate": 1.862131271843478e-05, + "loss": 0.4459, "step": 5140 }, { - "epoch": 0.7538461538461538, - "grad_norm": 0.36037064742670877, - "learning_rate": 1.8638148675742003e-05, - "loss": 0.4422, + "epoch": 0.7573973207713823, + "grad_norm": 0.3871592158191773, + "learning_rate": 1.8616970105058868e-05, + "loss": 0.4655, "step": 5145 }, { - "epoch": 0.7545787545787546, - "grad_norm": 0.37843169049672115, - "learning_rate": 1.8633850851182864e-05, - "loss": 0.4359, + "epoch": 0.7581333725894303, + "grad_norm": 0.3686584661352915, + "learning_rate": 1.8612621171228706e-05, + "loss": 0.4463, "step": 5150 }, { - "epoch": 0.7553113553113553, - "grad_norm": 0.369373339711034, - "learning_rate": 1.86295467527612e-05, - "loss": 0.4388, + "epoch": 0.7588694244074783, + "grad_norm": 0.37756209216050757, + "learning_rate": 1.860826592013419e-05, + "loss": 0.4412, "step": 5155 }, { - "epoch": 0.756043956043956, - "grad_norm": 0.36148931406656815, - "learning_rate": 1.8625236383604618e-05, - "loss": 0.4331, + "epoch": 0.7596054762255263, + "grad_norm": 0.41869147081399966, + "learning_rate": 1.860390435496986e-05, + "loss": 0.4518, "step": 5160 }, { - "epoch": 0.7567765567765568, - "grad_norm": 0.3593933995850175, - "learning_rate": 1.8620919746845292e-05, - "loss": 0.4437, + "epoch": 0.7603415280435742, + "grad_norm": 0.36431124316624397, + "learning_rate": 1.8599536478934857e-05, + "loss": 0.4488, "step": 5165 }, { - "epoch": 0.7575091575091575, - "grad_norm": 0.3778438132302129, - "learning_rate": 1.861659684561994e-05, - "loss": 0.458, + "epoch": 0.7610775798616223, + "grad_norm": 0.45365567304602106, + "learning_rate": 1.8595162295232985e-05, + "loss": 0.4367, "step": 5170 }, { - "epoch": 0.7582417582417582, - "grad_norm": 0.36715635699796345, - "learning_rate": 1.8612267683069833e-05, - "loss": 0.4391, + "epoch": 0.7618136316796702, + "grad_norm": 0.37125941809574287, + "learning_rate": 1.8590781807072653e-05, + "loss": 0.4437, "step": 5175 }, { - "epoch": 0.7589743589743589, - "grad_norm": 0.38118576475659105, - "learning_rate": 1.8607932262340797e-05, - "loss": 0.4476, + "epoch": 0.7625496834977182, + "grad_norm": 0.3670272715328077, + "learning_rate": 1.8586395017666907e-05, + "loss": 0.4559, "step": 5180 }, { - "epoch": 0.7597069597069597, - "grad_norm": 0.36684997930037677, - "learning_rate": 1.8603590586583203e-05, - "loss": 0.4356, + "epoch": 0.7632857353157663, + "grad_norm": 0.3603956656614328, + "learning_rate": 1.8582001930233408e-05, + "loss": 0.4459, "step": 5185 }, { - "epoch": 0.7604395604395604, - "grad_norm": 0.3761906469625915, - "learning_rate": 1.859924265895197e-05, - "loss": 0.5002, + "epoch": 0.7640217871338142, + "grad_norm": 0.3684551799200026, + "learning_rate": 1.8577602547994432e-05, + "loss": 0.4339, "step": 5190 }, { - "epoch": 0.7611721611721611, - "grad_norm": 0.4942240977546921, - "learning_rate": 1.8594888482606553e-05, - "loss": 0.4955, + "epoch": 0.7647578389518622, + "grad_norm": 0.34139189157603933, + "learning_rate": 1.857319687417689e-05, + "loss": 0.4515, "step": 5195 }, { - "epoch": 0.7619047619047619, - "grad_norm": 0.3877521828288198, - "learning_rate": 1.8590528060710955e-05, - "loss": 0.4414, + "epoch": 0.7654938907699103, + "grad_norm": 0.3628155667104919, + "learning_rate": 1.8568784912012287e-05, + "loss": 0.4527, "step": 5200 }, { - "epoch": 0.7626373626373626, - "grad_norm": 0.3887164371745051, - "learning_rate": 1.858616139643371e-05, - "loss": 0.4483, + "epoch": 0.7662299425879582, + "grad_norm": 0.3856449838688728, + "learning_rate": 1.856436666473675e-05, + "loss": 0.4607, "step": 5205 }, { - "epoch": 0.7633699633699633, - "grad_norm": 0.370497863473674, - "learning_rate": 1.85817884929479e-05, - "loss": 0.4555, + "epoch": 0.7669659944060062, + "grad_norm": 0.3608153117696857, + "learning_rate": 1.855994213559102e-05, + "loss": 0.4362, "step": 5210 }, { - "epoch": 0.764102564102564, - "grad_norm": 0.37618089203333643, - "learning_rate": 1.8577409353431125e-05, - "loss": 0.4419, + "epoch": 0.7677020462240541, + "grad_norm": 0.40070515487703645, + "learning_rate": 1.855551132782044e-05, + "loss": 0.449, "step": 5215 }, { - "epoch": 0.7648351648351648, - "grad_norm": 0.37049761322207986, - "learning_rate": 1.857302398106553e-05, - "loss": 0.4486, + "epoch": 0.7684380980421022, + "grad_norm": 0.3677756171763214, + "learning_rate": 1.8551074244674955e-05, + "loss": 0.4561, "step": 5220 }, { - "epoch": 0.7655677655677655, - "grad_norm": 0.3700544252440904, - "learning_rate": 1.856863237903778e-05, - "loss": 0.4559, + "epoch": 0.7691741498601502, + "grad_norm": 0.3781817453838708, + "learning_rate": 1.8546630889409123e-05, + "loss": 0.4517, "step": 5225 }, { - "epoch": 0.7663003663003664, - "grad_norm": 0.388488061559954, - "learning_rate": 1.856423455053907e-05, - "loss": 0.4328, + "epoch": 0.7699102016781981, + "grad_norm": 0.3627035431665186, + "learning_rate": 1.8542181265282094e-05, + "loss": 0.4436, "step": 5230 }, { - "epoch": 0.7670329670329671, - "grad_norm": 0.3653284188280524, - "learning_rate": 1.8559830498765126e-05, - "loss": 0.4335, + "epoch": 0.7706462534962462, + "grad_norm": 0.37592957274051353, + "learning_rate": 1.853772537555762e-05, + "loss": 0.4564, "step": 5235 }, { - "epoch": 0.7677655677655678, - "grad_norm": 0.35741335636323357, - "learning_rate": 1.8555420226916186e-05, - "loss": 0.4351, + "epoch": 0.7713823053142941, + "grad_norm": 0.3885122459222397, + "learning_rate": 1.8533263223504046e-05, + "loss": 0.4371, "step": 5240 }, { - "epoch": 0.7684981684981685, - "grad_norm": 0.38870008021616226, - "learning_rate": 1.8551003738197017e-05, - "loss": 0.4258, + "epoch": 0.7721183571323421, + "grad_norm": 0.36563992082775965, + "learning_rate": 1.8528794812394316e-05, + "loss": 0.4597, "step": 5245 }, { - "epoch": 0.7692307692307693, - "grad_norm": 0.35448825136665196, - "learning_rate": 1.8546581035816894e-05, - "loss": 0.4682, + "epoch": 0.7728544089503901, + "grad_norm": 0.3711770613797605, + "learning_rate": 1.852432014550596e-05, + "loss": 0.4513, "step": 5250 }, { - "epoch": 0.76996336996337, - "grad_norm": 0.3647680958110189, - "learning_rate": 1.854215212298961e-05, - "loss": 0.4454, + "epoch": 0.7735904607684381, + "grad_norm": 0.3914413582610382, + "learning_rate": 1.8519839226121092e-05, + "loss": 0.4517, "step": 5255 }, { - "epoch": 0.7706959706959707, - "grad_norm": 0.3699915302758118, - "learning_rate": 1.853771700293348e-05, - "loss": 0.4744, + "epoch": 0.7743265125864861, + "grad_norm": 0.35992934113945746, + "learning_rate": 1.8515352057526423e-05, + "loss": 0.4344, "step": 5260 }, { - "epoch": 0.7714285714285715, - "grad_norm": 0.40320021190841804, - "learning_rate": 1.8533275678871316e-05, - "loss": 0.475, + "epoch": 0.775062564404534, + "grad_norm": 0.3764103775263548, + "learning_rate": 1.8510858643013245e-05, + "loss": 0.4399, "step": 5265 }, { - "epoch": 0.7721611721611722, - "grad_norm": 0.39419311147330366, - "learning_rate": 1.8528828154030446e-05, - "loss": 0.4567, + "epoch": 0.7757986162225821, + "grad_norm": 0.3511428578042135, + "learning_rate": 1.8506358985877427e-05, + "loss": 0.4523, "step": 5270 }, { - "epoch": 0.7728937728937729, - "grad_norm": 0.3716502661782266, - "learning_rate": 1.8524374431642705e-05, - "loss": 0.4585, + "epoch": 0.7765346680406301, + "grad_norm": 0.3972820359769529, + "learning_rate": 1.8501853089419413e-05, + "loss": 0.4543, "step": 5275 }, { - "epoch": 0.7736263736263737, - "grad_norm": 0.39443663328176837, - "learning_rate": 1.8519914514944428e-05, - "loss": 0.4466, + "epoch": 0.777270719858678, + "grad_norm": 0.3756548072147889, + "learning_rate": 1.849734095694424e-05, + "loss": 0.4699, "step": 5280 }, { - "epoch": 0.7743589743589744, - "grad_norm": 0.39711950445167926, - "learning_rate": 1.8515448407176453e-05, - "loss": 0.4628, + "epoch": 0.778006771676726, + "grad_norm": 0.3747943458424141, + "learning_rate": 1.8492822591761497e-05, + "loss": 0.4367, "step": 5285 }, { - "epoch": 0.7750915750915751, - "grad_norm": 0.384051731059323, - "learning_rate": 1.8510976111584113e-05, - "loss": 0.436, + "epoch": 0.7787428234947741, + "grad_norm": 0.37093375853708366, + "learning_rate": 1.8488297997185362e-05, + "loss": 0.4661, "step": 5290 }, { - "epoch": 0.7758241758241758, - "grad_norm": 0.38157072979128603, - "learning_rate": 1.8506497631417243e-05, - "loss": 0.482, + "epoch": 0.779478875312822, + "grad_norm": 0.38644659694052486, + "learning_rate": 1.8483767176534576e-05, + "loss": 0.4555, "step": 5295 }, { - "epoch": 0.7765567765567766, - "grad_norm": 0.3870879852096543, - "learning_rate": 1.8502012969930167e-05, - "loss": 0.4362, + "epoch": 0.78021492713087, + "grad_norm": 0.38387569939452526, + "learning_rate": 1.8479230133132447e-05, + "loss": 0.447, "step": 5300 }, { - "epoch": 0.7772893772893773, - "grad_norm": 0.353015417387719, - "learning_rate": 1.8497522130381705e-05, - "loss": 0.4504, + "epoch": 0.780950978948918, + "grad_norm": 0.376852133988765, + "learning_rate": 1.8474686870306847e-05, + "loss": 0.4349, "step": 5305 }, { - "epoch": 0.778021978021978, - "grad_norm": 0.36233329639826845, - "learning_rate": 1.8493025116035164e-05, - "loss": 0.4502, + "epoch": 0.781687030766966, + "grad_norm": 0.38169512123303556, + "learning_rate": 1.8470137391390207e-05, + "loss": 0.4414, "step": 5310 }, { - "epoch": 0.7787545787545788, - "grad_norm": 0.3568028276113107, - "learning_rate": 1.848852193015834e-05, - "loss": 0.4433, + "epoch": 0.782423082585014, + "grad_norm": 0.3689428108576067, + "learning_rate": 1.8465581699719524e-05, + "loss": 0.4636, "step": 5315 }, { - "epoch": 0.7794871794871795, - "grad_norm": 0.3601597074511503, - "learning_rate": 1.8484012576023506e-05, - "loss": 0.4318, + "epoch": 0.783159134403062, + "grad_norm": 0.371240196462308, + "learning_rate": 1.846101979863635e-05, + "loss": 0.4482, "step": 5320 }, { - "epoch": 0.7802197802197802, - "grad_norm": 0.3641022279761321, - "learning_rate": 1.847949705690743e-05, - "loss": 0.4501, + "epoch": 0.78389518622111, + "grad_norm": 0.39432719446589, + "learning_rate": 1.8456451691486786e-05, + "loss": 0.4498, "step": 5325 }, { - "epoch": 0.780952380952381, - "grad_norm": 0.3467781441532753, - "learning_rate": 1.8474975376091353e-05, - "loss": 0.4453, + "epoch": 0.7846312380391579, + "grad_norm": 0.3657710193307759, + "learning_rate": 1.845187738162149e-05, + "loss": 0.4494, "step": 5330 }, { - "epoch": 0.7816849816849817, - "grad_norm": 0.3858985111673107, - "learning_rate": 1.847044753686099e-05, - "loss": 0.445, + "epoch": 0.7853672898572059, + "grad_norm": 0.3680275296634426, + "learning_rate": 1.8447296872395673e-05, + "loss": 0.4501, "step": 5335 }, { - "epoch": 0.7824175824175824, - "grad_norm": 0.3807176887377528, - "learning_rate": 1.8465913542506535e-05, - "loss": 0.4391, + "epoch": 0.786103341675254, + "grad_norm": 0.37786387115319886, + "learning_rate": 1.844271016716908e-05, + "loss": 0.4612, "step": 5340 }, { - "epoch": 0.7831501831501831, - "grad_norm": 0.35993120225254266, - "learning_rate": 1.8461373396322656e-05, - "loss": 0.455, + "epoch": 0.7868393934933019, + "grad_norm": 0.39300182510769294, + "learning_rate": 1.8438117269306016e-05, + "loss": 0.4625, "step": 5345 }, { - "epoch": 0.7838827838827839, - "grad_norm": 0.3579613273358578, - "learning_rate": 1.845682710160849e-05, - "loss": 0.4485, + "epoch": 0.7875754453113499, + "grad_norm": 0.3571373651113558, + "learning_rate": 1.843351818217532e-05, + "loss": 0.4491, "step": 5350 }, { - "epoch": 0.7846153846153846, - "grad_norm": 0.7095086087647203, - "learning_rate": 1.845227466166764e-05, - "loss": 0.4382, + "epoch": 0.788311497129398, + "grad_norm": 0.35989435857796004, + "learning_rate": 1.8428912909150378e-05, + "loss": 0.4383, "step": 5355 }, { - "epoch": 0.7853479853479853, - "grad_norm": 0.3672460689680624, - "learning_rate": 1.8447716079808173e-05, - "loss": 0.4448, + "epoch": 0.7890475489474459, + "grad_norm": 0.36017249210285296, + "learning_rate": 1.84243014536091e-05, + "loss": 0.4561, "step": 5360 }, { - "epoch": 0.7860805860805861, - "grad_norm": 0.37486097935123897, - "learning_rate": 1.8443151359342628e-05, - "loss": 0.4482, + "epoch": 0.7897836007654939, + "grad_norm": 0.3599029744954481, + "learning_rate": 1.8419683818933943e-05, + "loss": 0.4512, "step": 5365 }, { - "epoch": 0.7868131868131868, - "grad_norm": 0.36079331013575355, - "learning_rate": 1.843858050358799e-05, - "loss": 0.4352, + "epoch": 0.7905196525835418, + "grad_norm": 0.3731630654712982, + "learning_rate": 1.8415060008511888e-05, + "loss": 0.4525, "step": 5370 }, { - "epoch": 0.7875457875457875, - "grad_norm": 0.39482594783223374, - "learning_rate": 1.8434003515865723e-05, - "loss": 0.4567, + "epoch": 0.7912557044015899, + "grad_norm": 0.3700359157670555, + "learning_rate": 1.8410430025734455e-05, + "loss": 0.4587, "step": 5375 }, { - "epoch": 0.7882783882783883, - "grad_norm": 0.3999742065119079, - "learning_rate": 1.8429420399501726e-05, - "loss": 0.4613, + "epoch": 0.7919917562196379, + "grad_norm": 0.40329273308198865, + "learning_rate": 1.840579387399769e-05, + "loss": 0.4299, "step": 5380 }, { - "epoch": 0.789010989010989, - "grad_norm": 0.3838801617736062, - "learning_rate": 1.8424831157826367e-05, - "loss": 0.4642, + "epoch": 0.7927278080376858, + "grad_norm": 0.3557122428580658, + "learning_rate": 1.8401151556702147e-05, + "loss": 0.4356, "step": 5385 }, { - "epoch": 0.7897435897435897, - "grad_norm": 0.3614566587995, - "learning_rate": 1.842023579417445e-05, - "loss": 0.4446, + "epoch": 0.7934638598557339, + "grad_norm": 0.3785726951882142, + "learning_rate": 1.8396503077252922e-05, + "loss": 0.4405, "step": 5390 }, { - "epoch": 0.7904761904761904, - "grad_norm": 0.3858603380855061, - "learning_rate": 1.841563431188524e-05, - "loss": 0.4496, + "epoch": 0.7941999116737818, + "grad_norm": 0.3869038189914558, + "learning_rate": 1.8391848439059627e-05, + "loss": 0.4445, "step": 5395 }, { - "epoch": 0.7912087912087912, - "grad_norm": 0.3598539639497055, - "learning_rate": 1.8411026714302442e-05, - "loss": 0.4609, + "epoch": 0.7949359634918298, + "grad_norm": 0.3539505443058832, + "learning_rate": 1.838718764553639e-05, + "loss": 0.4293, "step": 5400 }, { - "epoch": 0.7919413919413919, - "grad_norm": 0.3938895475003292, - "learning_rate": 1.840641300477421e-05, - "loss": 0.439, + "epoch": 0.7956720153098779, + "grad_norm": 0.40425072185625943, + "learning_rate": 1.8382520700101842e-05, + "loss": 0.4567, "step": 5405 }, { - "epoch": 0.7926739926739926, - "grad_norm": 0.6150085736480287, - "learning_rate": 1.8401793186653137e-05, - "loss": 0.4785, + "epoch": 0.7964080671279258, + "grad_norm": 0.3711515438134564, + "learning_rate": 1.837784760617915e-05, + "loss": 0.437, "step": 5410 }, { - "epoch": 0.7934065934065934, - "grad_norm": 0.35701126359919005, - "learning_rate": 1.8397167263296248e-05, - "loss": 0.453, + "epoch": 0.7971441189459738, + "grad_norm": 0.3818939300370364, + "learning_rate": 1.8373168367195967e-05, + "loss": 0.4459, "step": 5415 }, { - "epoch": 0.7941391941391941, - "grad_norm": 0.38688320595265163, - "learning_rate": 1.8392535238065016e-05, - "loss": 0.4546, + "epoch": 0.7978801707640217, + "grad_norm": 0.365958939061682, + "learning_rate": 1.8368482986584474e-05, + "loss": 0.4463, "step": 5420 }, { - "epoch": 0.7948717948717948, - "grad_norm": 0.37797055568968796, - "learning_rate": 1.8387897114325337e-05, - "loss": 0.4566, + "epoch": 0.7986162225820698, + "grad_norm": 0.34264355403477026, + "learning_rate": 1.836379146778134e-05, + "loss": 0.4365, "step": 5425 }, { - "epoch": 0.7956043956043956, - "grad_norm": 0.3709536305983092, - "learning_rate": 1.838325289544755e-05, - "loss": 0.4515, + "epoch": 0.7993522744001178, + "grad_norm": 0.3827506711198045, + "learning_rate": 1.8359093814227743e-05, + "loss": 0.4464, "step": 5430 }, { - "epoch": 0.7963369963369963, - "grad_norm": 0.3974020225169862, - "learning_rate": 1.837860258480641e-05, - "loss": 0.4595, + "epoch": 0.8000883262181657, + "grad_norm": 0.3737144115110153, + "learning_rate": 1.8354390029369367e-05, + "loss": 0.4439, "step": 5435 }, { - "epoch": 0.7970695970695971, - "grad_norm": 0.3727483028673619, - "learning_rate": 1.837394618578111e-05, - "loss": 0.4514, + "epoch": 0.8008243780362138, + "grad_norm": 0.3660125324278533, + "learning_rate": 1.8349680116656383e-05, + "loss": 0.4347, "step": 5440 }, { - "epoch": 0.7978021978021979, - "grad_norm": 0.36935258408678506, - "learning_rate": 1.8369283701755264e-05, - "loss": 0.4473, + "epoch": 0.8015604298542618, + "grad_norm": 0.35499618388758564, + "learning_rate": 1.834496407954346e-05, + "loss": 0.4455, "step": 5445 }, { - "epoch": 0.7985347985347986, - "grad_norm": 0.3899646603873487, - "learning_rate": 1.8364615136116902e-05, - "loss": 0.4619, + "epoch": 0.8022964816723097, + "grad_norm": 0.391762324948392, + "learning_rate": 1.8340241921489762e-05, + "loss": 0.4508, "step": 5450 }, { - "epoch": 0.7992673992673993, - "grad_norm": 0.3938468777203018, - "learning_rate": 1.8359940492258482e-05, - "loss": 0.465, + "epoch": 0.8030325334903577, + "grad_norm": 0.3675686249642425, + "learning_rate": 1.833551364595894e-05, + "loss": 0.4541, "step": 5455 }, { - "epoch": 0.8, - "grad_norm": 0.39489444569801624, - "learning_rate": 1.835525977357687e-05, - "loss": 0.4531, + "epoch": 0.8037685853084057, + "grad_norm": 0.35284461393648797, + "learning_rate": 1.8330779256419133e-05, + "loss": 0.4223, "step": 5460 }, { - "epoch": 0.8007326007326008, - "grad_norm": 0.36553764851582793, - "learning_rate": 1.8350572983473353e-05, - "loss": 0.4537, + "epoch": 0.8045046371264537, + "grad_norm": 0.3572095074816016, + "learning_rate": 1.8326038756342967e-05, + "loss": 0.4287, "step": 5465 }, { - "epoch": 0.8014652014652015, - "grad_norm": 0.35314847287217677, - "learning_rate": 1.834588012535363e-05, - "loss": 0.4433, + "epoch": 0.8052406889445017, + "grad_norm": 0.36797577285222993, + "learning_rate": 1.8321292149207542e-05, + "loss": 0.4338, "step": 5470 }, { - "epoch": 0.8021978021978022, - "grad_norm": 0.35878611079441036, - "learning_rate": 1.8341181202627806e-05, - "loss": 0.4475, + "epoch": 0.8059767407625497, + "grad_norm": 0.3992223730063507, + "learning_rate": 1.831653943849445e-05, + "loss": 0.4508, "step": 5475 }, { - "epoch": 0.802930402930403, - "grad_norm": 0.3539347328291622, - "learning_rate": 1.833647621871039e-05, - "loss": 0.4386, + "epoch": 0.8067127925805977, + "grad_norm": 0.35710109084837255, + "learning_rate": 1.8311780627689746e-05, + "loss": 0.442, "step": 5480 }, { - "epoch": 0.8036630036630037, - "grad_norm": 0.37458522095393504, - "learning_rate": 1.83317651770203e-05, - "loss": 0.4488, + "epoch": 0.8074488443986456, + "grad_norm": 0.36857684404559116, + "learning_rate": 1.830701572028397e-05, + "loss": 0.4393, "step": 5485 }, { - "epoch": 0.8043956043956044, - "grad_norm": 0.36047244047702937, - "learning_rate": 1.832704808098086e-05, - "loss": 0.4472, + "epoch": 0.8081848962166936, + "grad_norm": 0.365337760645847, + "learning_rate": 1.8302244719772128e-05, + "loss": 0.4584, "step": 5490 }, { - "epoch": 0.8051282051282052, - "grad_norm": 0.3681356297525558, - "learning_rate": 1.8322324934019783e-05, - "loss": 0.4329, + "epoch": 0.8089209480347417, + "grad_norm": 0.36272299441325834, + "learning_rate": 1.82974676296537e-05, + "loss": 0.4414, "step": 5495 }, { - "epoch": 0.8058608058608059, - "grad_norm": 0.35249822895051003, - "learning_rate": 1.831759573956919e-05, - "loss": 0.4422, + "epoch": 0.8096569998527896, + "grad_norm": 0.3808089020887524, + "learning_rate": 1.829268445343263e-05, + "loss": 0.4494, "step": 5500 }, { - "epoch": 0.8065934065934066, - "grad_norm": 0.3787470928657058, - "learning_rate": 1.831286050106559e-05, - "loss": 0.4525, + "epoch": 0.8103930516708376, + "grad_norm": 0.36515311233950665, + "learning_rate": 1.828789519461732e-05, + "loss": 0.4304, "step": 5505 }, { - "epoch": 0.8073260073260073, - "grad_norm": 0.3614213832013432, - "learning_rate": 1.8308119221949883e-05, - "loss": 0.4449, + "epoch": 0.8111291034888857, + "grad_norm": 0.3883250866338634, + "learning_rate": 1.8283099856720644e-05, + "loss": 0.4531, "step": 5510 }, { - "epoch": 0.8080586080586081, - "grad_norm": 0.3622051651540198, - "learning_rate": 1.830337190566736e-05, - "loss": 0.4237, + "epoch": 0.8118651553069336, + "grad_norm": 0.36668979698642856, + "learning_rate": 1.8278298443259933e-05, + "loss": 0.4543, "step": 5515 }, { - "epoch": 0.8087912087912088, - "grad_norm": 0.3590555015654966, - "learning_rate": 1.8298618555667702e-05, - "loss": 0.4327, + "epoch": 0.8126012071249816, + "grad_norm": 0.36618403443345937, + "learning_rate": 1.827349095775697e-05, + "loss": 0.4531, "step": 5520 }, { - "epoch": 0.8095238095238095, - "grad_norm": 0.37340106282476715, - "learning_rate": 1.8293859175404967e-05, - "loss": 0.4384, + "epoch": 0.8133372589430296, + "grad_norm": 0.347384132693151, + "learning_rate": 1.8268677403737987e-05, + "loss": 0.4284, "step": 5525 }, { - "epoch": 0.8102564102564103, - "grad_norm": 0.36540095124697075, - "learning_rate": 1.828909376833761e-05, - "loss": 0.438, + "epoch": 0.8140733107610776, + "grad_norm": 0.35052747224037406, + "learning_rate": 1.826385778473368e-05, + "loss": 0.4692, "step": 5530 }, { - "epoch": 0.810989010989011, - "grad_norm": 0.3656833313182153, - "learning_rate": 1.8284322337928447e-05, - "loss": 0.4369, + "epoch": 0.8148093625791256, + "grad_norm": 0.3775231856628442, + "learning_rate": 1.8259032104279192e-05, + "loss": 0.4519, "step": 5535 }, { - "epoch": 0.8117216117216117, - "grad_norm": 0.38878197123203745, - "learning_rate": 1.827954488764468e-05, - "loss": 0.4432, + "epoch": 0.8155454143971735, + "grad_norm": 0.38341860673581035, + "learning_rate": 1.8254200365914095e-05, + "loss": 0.4513, "step": 5540 }, { - "epoch": 0.8124542124542125, - "grad_norm": 0.40991346099163334, - "learning_rate": 1.8274761420957885e-05, - "loss": 0.4442, + "epoch": 0.8162814662152216, + "grad_norm": 0.3757091428549333, + "learning_rate": 1.8249362573182426e-05, + "loss": 0.4421, "step": 5545 }, { - "epoch": 0.8131868131868132, - "grad_norm": 0.37087513786427795, - "learning_rate": 1.8269971941344015e-05, - "loss": 0.4441, + "epoch": 0.8170175180332695, + "grad_norm": 0.37270354415776735, + "learning_rate": 1.8244518729632653e-05, + "loss": 0.4304, "step": 5550 }, { - "epoch": 0.8139194139194139, - "grad_norm": 0.36966756582618115, - "learning_rate": 1.8265176452283376e-05, - "loss": 0.4529, + "epoch": 0.8177535698513175, + "grad_norm": 0.3523588981852036, + "learning_rate": 1.823966883881768e-05, + "loss": 0.421, "step": 5555 }, { - "epoch": 0.8146520146520146, - "grad_norm": 0.36065429278785466, - "learning_rate": 1.8260374957260663e-05, - "loss": 0.452, + "epoch": 0.8184896216693656, + "grad_norm": 0.36890743395894915, + "learning_rate": 1.8234812904294848e-05, + "loss": 0.4385, "step": 5560 }, { - "epoch": 0.8153846153846154, - "grad_norm": 0.3755528851797531, - "learning_rate": 1.8255567459764915e-05, - "loss": 0.4495, + "epoch": 0.8192256734874135, + "grad_norm": 0.3679958989076562, + "learning_rate": 1.8229950929625937e-05, + "loss": 0.4527, "step": 5565 }, { - "epoch": 0.8161172161172161, - "grad_norm": 0.38585828003647493, - "learning_rate": 1.825075396328955e-05, - "loss": 0.4563, + "epoch": 0.8199617253054615, + "grad_norm": 0.3757095310963074, + "learning_rate": 1.822508291837715e-05, + "loss": 0.4502, "step": 5570 }, { - "epoch": 0.8168498168498168, - "grad_norm": 0.38564318808684317, - "learning_rate": 1.8245934471332328e-05, - "loss": 0.4674, + "epoch": 0.8206977771235094, + "grad_norm": 0.36416547211663713, + "learning_rate": 1.8220208874119124e-05, + "loss": 0.4401, "step": 5575 }, { - "epoch": 0.8175824175824176, - "grad_norm": 0.3852320217786936, - "learning_rate": 1.8241108987395383e-05, - "loss": 0.4577, + "epoch": 0.8214338289415575, + "grad_norm": 0.38246500140389866, + "learning_rate": 1.821532880042691e-05, + "loss": 0.4553, "step": 5580 }, { - "epoch": 0.8183150183150183, - "grad_norm": 0.36745376533563195, - "learning_rate": 1.8236277514985187e-05, - "loss": 0.4577, + "epoch": 0.8221698807596055, + "grad_norm": 0.37685866397775736, + "learning_rate": 1.8210442700879993e-05, + "loss": 0.4506, "step": 5585 }, { - "epoch": 0.819047619047619, - "grad_norm": 0.35681374230355783, - "learning_rate": 1.8231440057612578e-05, - "loss": 0.4525, + "epoch": 0.8229059325776534, + "grad_norm": 0.36557315146939534, + "learning_rate": 1.8205550579062277e-05, + "loss": 0.4456, "step": 5590 }, { - "epoch": 0.8197802197802198, - "grad_norm": 0.3858709344287839, - "learning_rate": 1.8226596618792735e-05, - "loss": 0.4462, + "epoch": 0.8236419843957015, + "grad_norm": 0.361921077104969, + "learning_rate": 1.820065243856208e-05, + "loss": 0.4566, "step": 5595 }, { - "epoch": 0.8205128205128205, - "grad_norm": 0.3713342510413519, - "learning_rate": 1.8221747202045182e-05, - "loss": 0.4614, + "epoch": 0.8243780362137495, + "grad_norm": 0.3875910863569535, + "learning_rate": 1.8195748282972136e-05, + "loss": 0.4402, "step": 5600 }, { - "epoch": 0.8212454212454212, - "grad_norm": 0.37547437011409723, - "learning_rate": 1.821689181089379e-05, - "loss": 0.4526, + "epoch": 0.8251140880317974, + "grad_norm": 0.37048417394215916, + "learning_rate": 1.819083811588958e-05, + "loss": 0.4537, "step": 5605 }, { - "epoch": 0.8219780219780219, - "grad_norm": 0.3618467967252856, - "learning_rate": 1.8212030448866774e-05, - "loss": 0.4319, + "epoch": 0.8258501398498455, + "grad_norm": 0.35012768784241244, + "learning_rate": 1.8185921940915977e-05, + "loss": 0.4365, "step": 5610 }, { - "epoch": 0.8227106227106227, - "grad_norm": 0.3448704961367217, - "learning_rate": 1.820716311949668e-05, - "loss": 0.4887, + "epoch": 0.8265861916678934, + "grad_norm": 0.3705116290320156, + "learning_rate": 1.8180999761657286e-05, + "loss": 0.437, "step": 5615 }, { - "epoch": 0.8234432234432234, - "grad_norm": 0.3567342227035512, - "learning_rate": 1.8202289826320396e-05, - "loss": 0.447, + "epoch": 0.8273222434859414, + "grad_norm": 0.3587912899651877, + "learning_rate": 1.8176071581723872e-05, + "loss": 0.4319, "step": 5620 }, { - "epoch": 0.8241758241758241, - "grad_norm": 0.3590067657015829, - "learning_rate": 1.819741057287915e-05, - "loss": 0.4389, + "epoch": 0.8280582953039894, + "grad_norm": 0.3838217342869182, + "learning_rate": 1.8171137404730497e-05, + "loss": 0.4501, "step": 5625 }, { - "epoch": 0.8249084249084249, - "grad_norm": 0.38231454074130955, - "learning_rate": 1.8192525362718486e-05, - "loss": 0.4415, + "epoch": 0.8287943471220374, + "grad_norm": 0.3715639137108274, + "learning_rate": 1.816619723429633e-05, + "loss": 0.4485, "step": 5630 }, { - "epoch": 0.8256410256410256, - "grad_norm": 0.3746456485110328, - "learning_rate": 1.8187634199388284e-05, - "loss": 0.4397, + "epoch": 0.8295303989400854, + "grad_norm": 0.3483529315906707, + "learning_rate": 1.8161251074044938e-05, + "loss": 0.4464, "step": 5635 }, { - "epoch": 0.8263736263736263, - "grad_norm": 0.36611002508904406, - "learning_rate": 1.8182737086442757e-05, - "loss": 0.45, + "epoch": 0.8302664507581333, + "grad_norm": 0.36112338980486547, + "learning_rate": 1.8156298927604265e-05, + "loss": 0.4251, "step": 5640 }, { - "epoch": 0.827106227106227, - "grad_norm": 0.37341336111571827, - "learning_rate": 1.817783402744043e-05, - "loss": 0.492, + "epoch": 0.8310025025761814, + "grad_norm": 0.35503588938112196, + "learning_rate": 1.8151340798606664e-05, + "loss": 0.4565, "step": 5645 }, { - "epoch": 0.8278388278388278, - "grad_norm": 0.7970515407489609, - "learning_rate": 1.8172925025944153e-05, - "loss": 0.4431, + "epoch": 0.8317385543942294, + "grad_norm": 0.35872582895347865, + "learning_rate": 1.8146376690688867e-05, + "loss": 0.452, "step": 5650 }, { - "epoch": 0.8285714285714286, - "grad_norm": 0.36273365672650953, - "learning_rate": 1.8168010085521092e-05, - "loss": 0.4464, + "epoch": 0.8324746062122773, + "grad_norm": 0.39098541892175553, + "learning_rate": 1.8141406607491994e-05, + "loss": 0.4332, "step": 5655 }, { - "epoch": 0.8293040293040294, - "grad_norm": 0.35503852594146296, - "learning_rate": 1.8163089209742738e-05, - "loss": 0.4419, + "epoch": 0.8332106580303253, + "grad_norm": 0.34949566666207704, + "learning_rate": 1.8136430552661548e-05, + "loss": 0.4243, "step": 5660 }, { - "epoch": 0.8300366300366301, - "grad_norm": 0.35280501209823245, - "learning_rate": 1.8158162402184885e-05, - "loss": 0.4412, + "epoch": 0.8339467098483734, + "grad_norm": 0.3674696893778593, + "learning_rate": 1.8131448529847408e-05, + "loss": 0.4335, "step": 5665 }, { - "epoch": 0.8307692307692308, - "grad_norm": 0.381689499406052, - "learning_rate": 1.8153229666427637e-05, - "loss": 0.4331, + "epoch": 0.8346827616664213, + "grad_norm": 0.35916143870324896, + "learning_rate": 1.812646054270384e-05, + "loss": 0.4565, "step": 5670 }, { - "epoch": 0.8315018315018315, - "grad_norm": 0.3563507854326389, - "learning_rate": 1.8148291006055414e-05, - "loss": 0.4367, + "epoch": 0.8354188134844693, + "grad_norm": 0.38713495753289473, + "learning_rate": 1.812146659488947e-05, + "loss": 0.4559, "step": 5675 }, { - "epoch": 0.8322344322344323, - "grad_norm": 0.39339940600097917, - "learning_rate": 1.8143346424656934e-05, - "loss": 0.4355, + "epoch": 0.8361548653025173, + "grad_norm": 0.35110284160129296, + "learning_rate": 1.8116466690067305e-05, + "loss": 0.4377, "step": 5680 }, { - "epoch": 0.832967032967033, - "grad_norm": 0.3647447079242811, - "learning_rate": 1.8138395925825218e-05, - "loss": 0.4456, + "epoch": 0.8368909171205653, + "grad_norm": 0.3776798856757189, + "learning_rate": 1.8111460831904734e-05, + "loss": 0.4369, "step": 5685 }, { - "epoch": 0.8336996336996337, - "grad_norm": 0.3630697500736815, - "learning_rate": 1.8133439513157593e-05, - "loss": 0.4477, + "epoch": 0.8376269689386133, + "grad_norm": 0.36307720486779066, + "learning_rate": 1.8106449024073487e-05, + "loss": 0.4657, "step": 5690 }, { - "epoch": 0.8344322344322345, - "grad_norm": 0.3509428195025076, - "learning_rate": 1.8128477190255677e-05, - "loss": 0.4365, + "epoch": 0.8383630207566612, + "grad_norm": 0.37569502905454044, + "learning_rate": 1.8101431270249678e-05, + "loss": 0.4414, "step": 5695 }, { - "epoch": 0.8351648351648352, - "grad_norm": 0.37631260789384546, - "learning_rate": 1.812350896072538e-05, - "loss": 0.4362, + "epoch": 0.8390990725747093, + "grad_norm": 0.38106182035348096, + "learning_rate": 1.8096407574113766e-05, + "loss": 0.4506, "step": 5700 }, { - "epoch": 0.8358974358974359, - "grad_norm": 0.34402078961448856, - "learning_rate": 1.8118534828176922e-05, - "loss": 0.4538, + "epoch": 0.8398351243927572, + "grad_norm": 0.37332006831666176, + "learning_rate": 1.8091377939350585e-05, + "loss": 0.4356, "step": 5705 }, { - "epoch": 0.8366300366300367, - "grad_norm": 0.36987338499672917, - "learning_rate": 1.8113554796224787e-05, - "loss": 0.4626, + "epoch": 0.8405711762108052, + "grad_norm": 0.38142312594246786, + "learning_rate": 1.808634236964932e-05, + "loss": 0.4533, "step": 5710 }, { - "epoch": 0.8373626373626374, - "grad_norm": 0.37382664447171465, - "learning_rate": 1.8108568868487767e-05, - "loss": 0.4375, + "epoch": 0.8413072280288533, + "grad_norm": 0.35027498208673846, + "learning_rate": 1.80813008687035e-05, + "loss": 0.4288, "step": 5715 }, { - "epoch": 0.8380952380952381, - "grad_norm": 0.49701574346598654, - "learning_rate": 1.810357704858892e-05, - "loss": 0.4584, + "epoch": 0.8420432798469012, + "grad_norm": 0.36118089061149933, + "learning_rate": 1.8076253440211015e-05, + "loss": 0.463, "step": 5720 }, { - "epoch": 0.8388278388278388, - "grad_norm": 0.3946556829023878, - "learning_rate": 1.8098579340155603e-05, - "loss": 0.4536, + "epoch": 0.8427793316649492, + "grad_norm": 0.3479218004425318, + "learning_rate": 1.8071200087874096e-05, + "loss": 0.4247, "step": 5725 }, { - "epoch": 0.8395604395604396, - "grad_norm": 0.37085792154551794, - "learning_rate": 1.8093575746819443e-05, - "loss": 0.4339, + "epoch": 0.8435153834829971, + "grad_norm": 0.3647558131692801, + "learning_rate": 1.8066140815399325e-05, + "loss": 0.4499, "step": 5730 }, { - "epoch": 0.8402930402930403, - "grad_norm": 0.3996995534274054, - "learning_rate": 1.8088566272216344e-05, - "loss": 0.4593, + "epoch": 0.8442514353010452, + "grad_norm": 0.36710243654144514, + "learning_rate": 1.8061075626497623e-05, + "loss": 0.4456, "step": 5735 }, { - "epoch": 0.841025641025641, - "grad_norm": 0.36609811623203165, - "learning_rate": 1.8083550919986483e-05, - "loss": 0.4393, + "epoch": 0.8449874871190932, + "grad_norm": 0.37048760818431076, + "learning_rate": 1.8056004524884256e-05, + "loss": 0.4583, "step": 5740 }, { - "epoch": 0.8417582417582418, - "grad_norm": 0.36578815772035955, - "learning_rate": 1.8078529693774308e-05, - "loss": 0.4886, + "epoch": 0.8457235389371411, + "grad_norm": 2.407465284178379, + "learning_rate": 1.8050927514278813e-05, + "loss": 0.446, "step": 5745 }, { - "epoch": 0.8424908424908425, - "grad_norm": 0.3943288970973504, - "learning_rate": 1.807350259722854e-05, - "loss": 0.4413, + "epoch": 0.8464595907551892, + "grad_norm": 0.39853603765662204, + "learning_rate": 1.8045844598405233e-05, + "loss": 0.4483, "step": 5750 }, { - "epoch": 0.8432234432234432, - "grad_norm": 0.3404081783087885, - "learning_rate": 1.806846963400216e-05, - "loss": 0.4443, + "epoch": 0.8471956425732372, + "grad_norm": 0.3913222133525507, + "learning_rate": 1.8040755780991776e-05, + "loss": 0.4388, "step": 5755 }, { - "epoch": 0.843956043956044, - "grad_norm": 0.35096481604460117, - "learning_rate": 1.8063430807752414e-05, - "loss": 0.4433, + "epoch": 0.8479316943912851, + "grad_norm": 0.35525574871247056, + "learning_rate": 1.803566106577104e-05, + "loss": 0.4328, "step": 5760 }, { - "epoch": 0.8446886446886447, - "grad_norm": 0.38710053606474926, - "learning_rate": 1.805838612214081e-05, - "loss": 0.4396, + "epoch": 0.8486677462093332, + "grad_norm": 0.3620075978660273, + "learning_rate": 1.8030560456479933e-05, + "loss": 0.4395, "step": 5765 }, { - "epoch": 0.8454212454212454, - "grad_norm": 0.3651629348457347, - "learning_rate": 1.8053335580833112e-05, - "loss": 0.4475, + "epoch": 0.8494037980273811, + "grad_norm": 0.3518378202004742, + "learning_rate": 1.8025453956859707e-05, + "loss": 0.4345, "step": 5770 }, { - "epoch": 0.8461538461538461, - "grad_norm": 0.3987845742126223, - "learning_rate": 1.804827918749934e-05, - "loss": 0.4409, + "epoch": 0.8501398498454291, + "grad_norm": 0.3810142137831866, + "learning_rate": 1.802034157065592e-05, + "loss": 0.4457, "step": 5775 }, { - "epoch": 0.8468864468864469, - "grad_norm": 0.37633061700939063, - "learning_rate": 1.8043216945813764e-05, - "loss": 0.4495, + "epoch": 0.8508759016634772, + "grad_norm": 0.3417713865113361, + "learning_rate": 1.801522330161845e-05, + "loss": 0.4427, "step": 5780 }, { - "epoch": 0.8476190476190476, - "grad_norm": 0.37078702743335046, - "learning_rate": 1.803814885945491e-05, - "loss": 0.45, + "epoch": 0.8516119534815251, + "grad_norm": 0.3434090925556998, + "learning_rate": 1.8010099153501496e-05, + "loss": 0.4376, "step": 5785 }, { - "epoch": 0.8483516483516483, - "grad_norm": 0.36461446476347853, - "learning_rate": 1.8033074932105542e-05, - "loss": 0.4506, + "epoch": 0.8523480052995731, + "grad_norm": 0.37073730240412717, + "learning_rate": 1.8004969130063562e-05, + "loss": 0.4445, "step": 5790 }, { - "epoch": 0.8490842490842491, - "grad_norm": 0.3777899347748043, - "learning_rate": 1.8027995167452678e-05, - "loss": 0.4647, + "epoch": 0.853084057117621, + "grad_norm": 0.3706926685948443, + "learning_rate": 1.7999833235067466e-05, + "loss": 0.4364, "step": 5795 }, { - "epoch": 0.8498168498168498, - "grad_norm": 0.36394643705921864, - "learning_rate": 1.802290956918757e-05, - "loss": 0.4509, + "epoch": 0.8538201089356691, + "grad_norm": 0.35952848302399276, + "learning_rate": 1.799469147228033e-05, + "loss": 0.4594, "step": 5800 }, { - "epoch": 0.8505494505494505, - "grad_norm": 0.3685824062385075, - "learning_rate": 1.8017818141005712e-05, - "loss": 0.4452, + "epoch": 0.8545561607537171, + "grad_norm": 0.366251188214727, + "learning_rate": 1.7989543845473585e-05, + "loss": 0.4568, "step": 5805 }, { - "epoch": 0.8512820512820513, - "grad_norm": 0.37407351999618116, - "learning_rate": 1.8012720886606834e-05, - "loss": 0.429, + "epoch": 0.855292212571765, + "grad_norm": 0.37387266078880926, + "learning_rate": 1.7984390358422957e-05, + "loss": 0.4522, "step": 5810 }, { - "epoch": 0.852014652014652, - "grad_norm": 0.35006965931360773, - "learning_rate": 1.80076178096949e-05, - "loss": 0.4525, + "epoch": 0.856028264389813, + "grad_norm": 0.35727442551212146, + "learning_rate": 1.7979231014908475e-05, + "loss": 0.4547, "step": 5815 }, { - "epoch": 0.8527472527472527, - "grad_norm": 0.35092174273004206, - "learning_rate": 1.800250891397811e-05, - "loss": 0.4406, + "epoch": 0.8567643162078611, + "grad_norm": 0.35751741935540254, + "learning_rate": 1.7974065818714456e-05, + "loss": 0.4556, "step": 5820 }, { - "epoch": 0.8534798534798534, - "grad_norm": 0.37239973812022864, - "learning_rate": 1.799739420316888e-05, - "loss": 0.4521, + "epoch": 0.857500368025909, + "grad_norm": 0.3548710093691114, + "learning_rate": 1.7968894773629516e-05, + "loss": 0.435, "step": 5825 }, { - "epoch": 0.8542124542124542, - "grad_norm": 0.385803878666077, - "learning_rate": 1.7992273680983864e-05, - "loss": 0.441, + "epoch": 0.858236419843957, + "grad_norm": 0.34775308478188827, + "learning_rate": 1.796371788344657e-05, + "loss": 0.4367, "step": 5830 }, { - "epoch": 0.8549450549450549, - "grad_norm": 0.3685372405718134, - "learning_rate": 1.7987147351143933e-05, - "loss": 0.4475, + "epoch": 0.858972471662005, + "grad_norm": 0.3811514664148062, + "learning_rate": 1.7958535151962803e-05, + "loss": 0.4521, "step": 5835 }, { - "epoch": 0.8556776556776556, - "grad_norm": 0.37185126386807804, - "learning_rate": 1.7982015217374176e-05, - "loss": 0.4393, + "epoch": 0.859708523480053, + "grad_norm": 0.3669226247199922, + "learning_rate": 1.795334658297969e-05, + "loss": 0.4397, "step": 5840 }, { - "epoch": 0.8564102564102564, - "grad_norm": 0.3473113321126282, - "learning_rate": 1.7976877283403906e-05, - "loss": 0.4145, + "epoch": 0.860444575298101, + "grad_norm": 0.35854880100158387, + "learning_rate": 1.7948152180303e-05, + "loss": 0.4238, "step": 5845 }, { - "epoch": 0.8571428571428571, - "grad_norm": 0.36999708682756344, - "learning_rate": 1.7971733552966645e-05, - "loss": 0.4394, + "epoch": 0.861180627116149, + "grad_norm": 0.3654599811899801, + "learning_rate": 1.7942951947742763e-05, + "loss": 0.4304, "step": 5850 }, { - "epoch": 0.8578754578754578, - "grad_norm": 0.3793725160321135, - "learning_rate": 1.7966584029800135e-05, - "loss": 0.4557, + "epoch": 0.861916678934197, + "grad_norm": 0.34496532912792455, + "learning_rate": 1.7937745889113292e-05, + "loss": 0.4309, "step": 5855 }, { - "epoch": 0.8586080586080586, - "grad_norm": 0.35404015757458934, - "learning_rate": 1.796142871764631e-05, - "loss": 0.4283, + "epoch": 0.8626527307522449, + "grad_norm": 0.37889428636763145, + "learning_rate": 1.7932534008233182e-05, + "loss": 0.4609, "step": 5860 }, { - "epoch": 0.8593406593406593, - "grad_norm": 0.356545090974131, - "learning_rate": 1.7956267620251334e-05, - "loss": 0.4399, + "epoch": 0.863388782570293, + "grad_norm": 0.38209494491819235, + "learning_rate": 1.7927316308925283e-05, + "loss": 0.4531, "step": 5865 }, { - "epoch": 0.8600732600732601, - "grad_norm": 0.3533846397534374, - "learning_rate": 1.7951100741365555e-05, - "loss": 0.4363, + "epoch": 0.864124834388341, + "grad_norm": 0.36509393103751375, + "learning_rate": 1.7922092795016732e-05, + "loss": 0.4378, "step": 5870 }, { - "epoch": 0.8608058608058609, - "grad_norm": 0.3751400459972393, - "learning_rate": 1.7945928084743533e-05, - "loss": 0.4481, + "epoch": 0.8648608862063889, + "grad_norm": 0.34771616466055044, + "learning_rate": 1.791686347033891e-05, + "loss": 0.4419, "step": 5875 }, { - "epoch": 0.8615384615384616, - "grad_norm": 0.3559244611269117, - "learning_rate": 1.7940749654144026e-05, - "loss": 0.4361, + "epoch": 0.8655969380244369, + "grad_norm": 0.3674692070761661, + "learning_rate": 1.791162833872747e-05, + "loss": 0.4398, "step": 5880 }, { - "epoch": 0.8622710622710623, - "grad_norm": 0.3758775698301192, - "learning_rate": 1.7935565453329983e-05, - "loss": 0.4554, + "epoch": 0.8663329898424849, + "grad_norm": 0.3520396685118552, + "learning_rate": 1.790638740402233e-05, + "loss": 0.4416, "step": 5885 }, { - "epoch": 0.863003663003663, - "grad_norm": 0.6880224103063644, - "learning_rate": 1.793037548606855e-05, - "loss": 0.449, + "epoch": 0.8670690416605329, + "grad_norm": 0.3719988966714447, + "learning_rate": 1.7901140670067656e-05, + "loss": 0.4554, "step": 5890 }, { - "epoch": 0.8637362637362638, - "grad_norm": 0.3539214046988438, - "learning_rate": 1.792517975613106e-05, - "loss": 0.4345, + "epoch": 0.8678050934785809, + "grad_norm": 0.3690570201357628, + "learning_rate": 1.789588814071187e-05, + "loss": 0.4331, "step": 5895 }, { - "epoch": 0.8644688644688645, - "grad_norm": 0.35642889205036643, - "learning_rate": 1.7919978267293038e-05, - "loss": 0.4458, + "epoch": 0.8685411452966288, + "grad_norm": 0.34978248618101204, + "learning_rate": 1.789062981980765e-05, + "loss": 0.439, "step": 5900 }, { - "epoch": 0.8652014652014652, - "grad_norm": 0.3942719622252805, - "learning_rate": 1.7914771023334186e-05, - "loss": 0.4469, + "epoch": 0.8692771971146769, + "grad_norm": 0.36624291745278736, + "learning_rate": 1.788536571121191e-05, + "loss": 0.4323, "step": 5905 }, { - "epoch": 0.865934065934066, - "grad_norm": 0.3912235103465084, - "learning_rate": 1.7909558028038397e-05, - "loss": 0.4382, + "epoch": 0.8700132489327249, + "grad_norm": 0.46229638344586804, + "learning_rate": 1.788009581878582e-05, + "loss": 0.4477, "step": 5910 }, { - "epoch": 0.8666666666666667, - "grad_norm": 0.36516670112938615, - "learning_rate": 1.7904339285193738e-05, - "loss": 0.4473, + "epoch": 0.8707493007507728, + "grad_norm": 0.37396607227419815, + "learning_rate": 1.787482014639479e-05, + "loss": 0.4393, "step": 5915 }, { - "epoch": 0.8673992673992674, - "grad_norm": 0.3551416896853839, - "learning_rate": 1.7899114798592455e-05, - "loss": 0.4524, + "epoch": 0.8714853525688209, + "grad_norm": 0.37122982449214265, + "learning_rate": 1.7869538697908466e-05, + "loss": 0.4402, "step": 5920 }, { - "epoch": 0.8681318681318682, - "grad_norm": 0.3642603079549456, - "learning_rate": 1.7893884572030963e-05, - "loss": 0.4514, + "epoch": 0.8722214043868688, + "grad_norm": 0.37120657056831224, + "learning_rate": 1.7864251477200738e-05, + "loss": 0.4437, "step": 5925 }, { - "epoch": 0.8688644688644689, - "grad_norm": 0.36531254956832565, - "learning_rate": 1.7888648609309855e-05, - "loss": 0.4402, + "epoch": 0.8729574562049168, + "grad_norm": 0.3585818869382761, + "learning_rate": 1.7858958488149724e-05, + "loss": 0.4368, "step": 5930 }, { - "epoch": 0.8695970695970696, - "grad_norm": 0.3614156415868774, - "learning_rate": 1.7883406914233887e-05, - "loss": 0.4391, + "epoch": 0.8736935080229649, + "grad_norm": 0.3698141558742395, + "learning_rate": 1.7853659734637766e-05, + "loss": 0.4413, "step": 5935 }, { - "epoch": 0.8703296703296703, - "grad_norm": 0.3504318199203323, - "learning_rate": 1.7878159490611985e-05, - "loss": 0.4274, + "epoch": 0.8744295598410128, + "grad_norm": 0.3855876831267745, + "learning_rate": 1.7848355220551456e-05, + "loss": 0.4387, "step": 5940 }, { - "epoch": 0.8710622710622711, - "grad_norm": 0.37194708447240693, - "learning_rate": 1.787290634225723e-05, - "loss": 0.4446, + "epoch": 0.8751656116590608, + "grad_norm": 0.36621342556791076, + "learning_rate": 1.7843044949781583e-05, + "loss": 0.4479, "step": 5945 }, { - "epoch": 0.8717948717948718, - "grad_norm": 0.3806751249182061, - "learning_rate": 1.7867647472986867e-05, - "loss": 0.4355, + "epoch": 0.8759016634771087, + "grad_norm": 0.3424070085839823, + "learning_rate": 1.7837728926223186e-05, + "loss": 0.4428, "step": 5950 }, { - "epoch": 0.8725274725274725, - "grad_norm": 0.3858153470499486, - "learning_rate": 1.7862382886622306e-05, - "loss": 0.4309, + "epoch": 0.8766377152951568, + "grad_norm": 0.35449364017215884, + "learning_rate": 1.7832407153775502e-05, + "loss": 0.4522, "step": 5955 }, { - "epoch": 0.8732600732600733, - "grad_norm": 0.3539393871318372, - "learning_rate": 1.78571125869891e-05, - "loss": 0.4282, + "epoch": 0.8773737671132048, + "grad_norm": 0.3525405003036111, + "learning_rate": 1.7827079636342e-05, + "loss": 0.4311, "step": 5960 }, { - "epoch": 0.873992673992674, - "grad_norm": 0.3698727253718363, - "learning_rate": 1.785183657791696e-05, - "loss": 0.438, + "epoch": 0.8781098189312527, + "grad_norm": 0.35645386727273587, + "learning_rate": 1.782174637783035e-05, + "loss": 0.4411, "step": 5965 }, { - "epoch": 0.8747252747252747, - "grad_norm": 0.36033087121528645, - "learning_rate": 1.784655486323974e-05, - "loss": 0.4463, + "epoch": 0.8788458707493008, + "grad_norm": 0.3666268050359117, + "learning_rate": 1.7816407382152443e-05, + "loss": 0.447, "step": 5970 }, { - "epoch": 0.8754578754578755, - "grad_norm": 0.3696278035088527, - "learning_rate": 1.7841267446795444e-05, - "loss": 0.4459, + "epoch": 0.8795819225673488, + "grad_norm": 0.37250456874223414, + "learning_rate": 1.7811062653224374e-05, + "loss": 0.4655, "step": 5975 }, { - "epoch": 0.8761904761904762, - "grad_norm": 0.3641746049449239, - "learning_rate": 1.7835974332426224e-05, - "loss": 0.4212, + "epoch": 0.8803179743853967, + "grad_norm": 0.36092929099158944, + "learning_rate": 1.7805712194966443e-05, + "loss": 0.4566, "step": 5980 }, { - "epoch": 0.8769230769230769, - "grad_norm": 0.3496867702032214, - "learning_rate": 1.783067552397836e-05, - "loss": 0.4407, + "epoch": 0.8810540262034448, + "grad_norm": 0.33567737532624775, + "learning_rate": 1.7800356011303156e-05, + "loss": 0.4474, "step": 5985 }, { - "epoch": 0.8776556776556776, - "grad_norm": 0.3614859564979733, - "learning_rate": 1.782537102530228e-05, - "loss": 0.4522, + "epoch": 0.8817900780214927, + "grad_norm": 0.3653761820052591, + "learning_rate": 1.7794994106163212e-05, + "loss": 0.4493, "step": 5990 }, { - "epoch": 0.8783882783882784, - "grad_norm": 0.36421283641217894, - "learning_rate": 1.782006084025255e-05, - "loss": 0.4285, + "epoch": 0.8825261298395407, + "grad_norm": 0.35656753170206057, + "learning_rate": 1.778962648347951e-05, + "loss": 0.4358, "step": 5995 }, { - "epoch": 0.8791208791208791, - "grad_norm": 0.419513631468471, - "learning_rate": 1.7814744972687857e-05, - "loss": 0.4455, + "epoch": 0.8832621816575887, + "grad_norm": 0.37457391578364474, + "learning_rate": 1.7784253147189146e-05, + "loss": 0.4259, "step": 6000 }, { - "epoch": 0.8798534798534798, - "grad_norm": 0.354330410359772, - "learning_rate": 1.7809423426471018e-05, - "loss": 0.4656, + "epoch": 0.8839982334756367, + "grad_norm": 0.3566126547130828, + "learning_rate": 1.7778874101233406e-05, + "loss": 0.4465, "step": 6005 }, { - "epoch": 0.8805860805860806, - "grad_norm": 0.35900884915326187, - "learning_rate": 1.7804096205468987e-05, - "loss": 0.4473, + "epoch": 0.8847342852936847, + "grad_norm": 0.34727232006464764, + "learning_rate": 1.7773489349557756e-05, + "loss": 0.4375, "step": 6010 }, { - "epoch": 0.8813186813186813, - "grad_norm": 0.36804505720762803, - "learning_rate": 1.7798763313552832e-05, - "loss": 0.4381, + "epoch": 0.8854703371117326, + "grad_norm": 0.3486190773511903, + "learning_rate": 1.7768098896111854e-05, + "loss": 0.446, "step": 6015 }, { - "epoch": 0.882051282051282, - "grad_norm": 0.36697586215792644, - "learning_rate": 1.779342475459775e-05, - "loss": 0.4416, + "epoch": 0.8862063889297807, + "grad_norm": 0.3655089745027463, + "learning_rate": 1.7762702744849546e-05, + "loss": 0.4559, "step": 6020 }, { - "epoch": 0.8827838827838828, - "grad_norm": 0.3616003666688806, - "learning_rate": 1.7788080532483043e-05, - "loss": 0.4402, + "epoch": 0.8869424407478287, + "grad_norm": 0.3878902418363454, + "learning_rate": 1.7757300899728842e-05, + "loss": 0.4387, "step": 6025 }, { - "epoch": 0.8835164835164835, - "grad_norm": 0.3625678232884831, - "learning_rate": 1.7782730651092143e-05, - "loss": 0.4272, + "epoch": 0.8876784925658766, + "grad_norm": 0.3715951742442913, + "learning_rate": 1.775189336471194e-05, + "loss": 0.4461, "step": 6030 }, { - "epoch": 0.8842490842490842, - "grad_norm": 0.3525429739793186, - "learning_rate": 1.7777375114312584e-05, - "loss": 0.4376, + "epoch": 0.8884145443839246, + "grad_norm": 0.352923429841573, + "learning_rate": 1.774648014376521e-05, + "loss": 0.4571, "step": 6035 }, { - "epoch": 0.884981684981685, - "grad_norm": 0.37246107416507285, - "learning_rate": 1.7772013926036016e-05, - "loss": 0.4341, + "epoch": 0.8891505962019726, + "grad_norm": 0.37039532769096317, + "learning_rate": 1.774106124085919e-05, + "loss": 0.4529, "step": 6040 }, { - "epoch": 0.8857142857142857, - "grad_norm": 0.34048150719409837, - "learning_rate": 1.7766647090158193e-05, - "loss": 0.4323, + "epoch": 0.8898866480200206, + "grad_norm": 0.5768260851908226, + "learning_rate": 1.773563665996859e-05, + "loss": 0.4527, "step": 6045 }, { - "epoch": 0.8864468864468864, - "grad_norm": 0.37398024115120954, - "learning_rate": 1.7761274610578968e-05, - "loss": 0.4434, + "epoch": 0.8906226998380686, + "grad_norm": 0.4870960586341211, + "learning_rate": 1.7730206405072272e-05, + "loss": 0.4468, "step": 6050 }, { - "epoch": 0.8871794871794871, - "grad_norm": 0.35873272657760785, - "learning_rate": 1.7755896491202303e-05, - "loss": 0.4494, + "epoch": 0.8913587516561166, + "grad_norm": 0.36762519408457883, + "learning_rate": 1.772477048015328e-05, + "loss": 0.4568, "step": 6055 }, { - "epoch": 0.8879120879120879, - "grad_norm": 0.35741385020399524, - "learning_rate": 1.7750512735936257e-05, - "loss": 0.4421, + "epoch": 0.8920948034741646, + "grad_norm": 0.3637046517058373, + "learning_rate": 1.7719328889198798e-05, + "loss": 0.433, "step": 6060 }, { - "epoch": 0.8886446886446886, - "grad_norm": 0.3656854547321221, - "learning_rate": 1.774512334869298e-05, - "loss": 0.4566, + "epoch": 0.8928308552922126, + "grad_norm": 0.3714092685023693, + "learning_rate": 1.7713881636200177e-05, + "loss": 0.4553, "step": 6065 }, { - "epoch": 0.8893772893772893, - "grad_norm": 0.4183610536274334, - "learning_rate": 1.7739728333388718e-05, - "loss": 0.4478, + "epoch": 0.8935669071102605, + "grad_norm": 0.3560384199032731, + "learning_rate": 1.7708428725152917e-05, + "loss": 0.4411, "step": 6070 }, { - "epoch": 0.8901098901098901, - "grad_norm": 0.3429267484869677, - "learning_rate": 1.7734327693943805e-05, - "loss": 0.4319, + "epoch": 0.8943029589283086, + "grad_norm": 0.3570093335300102, + "learning_rate": 1.7702970160056675e-05, + "loss": 0.4531, "step": 6075 }, { - "epoch": 0.8908424908424909, - "grad_norm": 0.36437477716150785, - "learning_rate": 1.7728921434282662e-05, - "loss": 0.4519, + "epoch": 0.8950390107463565, + "grad_norm": 0.36570811442375184, + "learning_rate": 1.769750594491524e-05, + "loss": 0.458, "step": 6080 }, { - "epoch": 0.8915750915750916, - "grad_norm": 0.3620889390157865, - "learning_rate": 1.7723509558333797e-05, - "loss": 0.4305, + "epoch": 0.8957750625644045, + "grad_norm": 0.3625464342620715, + "learning_rate": 1.769203608373656e-05, + "loss": 0.4533, "step": 6085 }, { - "epoch": 0.8923076923076924, - "grad_norm": 0.36916306992865705, - "learning_rate": 1.771809207002979e-05, - "loss": 0.4648, + "epoch": 0.8965111143824526, + "grad_norm": 0.34712557080290696, + "learning_rate": 1.7686560580532718e-05, + "loss": 0.4398, "step": 6090 }, { - "epoch": 0.8930402930402931, - "grad_norm": 0.36276907808748354, - "learning_rate": 1.7712668973307314e-05, - "loss": 0.4446, + "epoch": 0.8972471662005005, + "grad_norm": 0.3434525247932784, + "learning_rate": 1.768107943931994e-05, + "loss": 0.4451, "step": 6095 }, { - "epoch": 0.8937728937728938, - "grad_norm": 0.3713824748145057, - "learning_rate": 1.7707240272107103e-05, - "loss": 0.4376, + "epoch": 0.8979832180185485, + "grad_norm": 0.49155308252521906, + "learning_rate": 1.7675592664118576e-05, + "loss": 0.4711, "step": 6100 }, { - "epoch": 0.8945054945054945, - "grad_norm": 0.3853780727628513, - "learning_rate": 1.7701805970373973e-05, - "loss": 0.4524, + "epoch": 0.8987192698365964, + "grad_norm": 2.8143041060928846, + "learning_rate": 1.7670100258953122e-05, + "loss": 0.5056, "step": 6105 }, { - "epoch": 0.8952380952380953, - "grad_norm": 0.37390910022738727, - "learning_rate": 1.7696366072056807e-05, - "loss": 0.4338, + "epoch": 0.8994553216546445, + "grad_norm": 0.37123973097542096, + "learning_rate": 1.7664602227852197e-05, + "loss": 0.4635, "step": 6110 }, { - "epoch": 0.895970695970696, - "grad_norm": 0.36797190937530155, - "learning_rate": 1.7690920581108554e-05, - "loss": 0.4503, + "epoch": 0.9001913734726925, + "grad_norm": 0.3388106319630038, + "learning_rate": 1.7659098574848545e-05, + "loss": 0.4525, "step": 6115 }, { - "epoch": 0.8967032967032967, - "grad_norm": 0.3600886980839345, - "learning_rate": 1.7685469501486223e-05, - "loss": 0.4346, + "epoch": 0.9009274252907404, + "grad_norm": 0.3229785070032047, + "learning_rate": 1.7653589303979037e-05, + "loss": 0.4424, "step": 6120 }, { - "epoch": 0.8974358974358975, - "grad_norm": 0.36132169150003796, - "learning_rate": 1.768001283715089e-05, - "loss": 0.4362, + "epoch": 0.9016634771087885, + "grad_norm": 0.35178580499584966, + "learning_rate": 1.7648074419284667e-05, + "loss": 0.4564, "step": 6125 }, { - "epoch": 0.8981684981684982, - "grad_norm": 0.3594715208945071, - "learning_rate": 1.7674550592067697e-05, - "loss": 0.45, + "epoch": 0.9023995289268365, + "grad_norm": 0.38055354355902876, + "learning_rate": 1.7642553924810536e-05, + "loss": 0.4374, "step": 6130 }, { - "epoch": 0.8989010989010989, - "grad_norm": 0.35705956611102607, - "learning_rate": 1.7669082770205816e-05, - "loss": 0.4471, + "epoch": 0.9031355807448844, + "grad_norm": 0.33434831557500305, + "learning_rate": 1.7637027824605874e-05, + "loss": 0.4462, "step": 6135 }, { - "epoch": 0.8996336996336997, - "grad_norm": 0.3651317182837795, - "learning_rate": 1.7663609375538497e-05, - "loss": 0.444, + "epoch": 0.9038716325629325, + "grad_norm": 0.4707904364370907, + "learning_rate": 1.7631496122724013e-05, + "loss": 0.4366, "step": 6140 }, { - "epoch": 0.9003663003663004, - "grad_norm": 0.3460089635371655, - "learning_rate": 1.7658130412043028e-05, - "loss": 0.4356, + "epoch": 0.9046076843809804, + "grad_norm": 0.36720105019429955, + "learning_rate": 1.7625958823222392e-05, + "loss": 0.4316, "step": 6145 }, { - "epoch": 0.9010989010989011, - "grad_norm": 0.49053288325000116, - "learning_rate": 1.7652645883700743e-05, - "loss": 0.459, + "epoch": 0.9053437361990284, + "grad_norm": 0.3957096565472825, + "learning_rate": 1.7620415930162566e-05, + "loss": 0.4609, "step": 6150 }, { - "epoch": 0.9018315018315018, - "grad_norm": 0.35973716200334555, - "learning_rate": 1.7647155794497023e-05, - "loss": 0.4436, + "epoch": 0.9060797880170764, + "grad_norm": 0.39061760884331537, + "learning_rate": 1.761486744761019e-05, + "loss": 0.4433, "step": 6155 }, { - "epoch": 0.9025641025641026, - "grad_norm": 0.36711220324788274, - "learning_rate": 1.7641660148421292e-05, - "loss": 0.4347, + "epoch": 0.9068158398351244, + "grad_norm": 0.38308404334046636, + "learning_rate": 1.7609313379635003e-05, + "loss": 0.4329, "step": 6160 }, { - "epoch": 0.9032967032967033, - "grad_norm": 0.47210235359387503, - "learning_rate": 1.7636158949467007e-05, - "loss": 0.4442, + "epoch": 0.9075518916531724, + "grad_norm": 0.36070664454033097, + "learning_rate": 1.760375373031087e-05, + "loss": 0.4334, "step": 6165 }, { - "epoch": 0.904029304029304, - "grad_norm": 0.3572689800679001, - "learning_rate": 1.763065220163166e-05, - "loss": 0.4505, + "epoch": 0.9082879434712203, + "grad_norm": 0.36561843004243144, + "learning_rate": 1.759818850371572e-05, + "loss": 0.439, "step": 6170 }, { - "epoch": 0.9047619047619048, - "grad_norm": 0.3736991210791609, - "learning_rate": 1.762513990891678e-05, - "loss": 0.4316, + "epoch": 0.9090239952892684, + "grad_norm": 0.359896754316887, + "learning_rate": 1.759261770393159e-05, + "loss": 0.4495, "step": 6175 }, { - "epoch": 0.9054945054945055, - "grad_norm": 0.36974271768950734, - "learning_rate": 1.7619622075327917e-05, - "loss": 0.4536, + "epoch": 0.9097600471073164, + "grad_norm": 0.34522120477333906, + "learning_rate": 1.758704133504461e-05, + "loss": 0.4499, "step": 6180 }, { - "epoch": 0.9062271062271062, - "grad_norm": 0.336076224398836, - "learning_rate": 1.7614098704874658e-05, - "loss": 0.4858, + "epoch": 0.9104960989253643, + "grad_norm": 0.3552054415454232, + "learning_rate": 1.758145940114497e-05, + "loss": 0.4433, "step": 6185 }, { - "epoch": 0.906959706959707, - "grad_norm": 0.3711009134399822, - "learning_rate": 1.7608569801570607e-05, - "loss": 0.444, + "epoch": 0.9112321507434124, + "grad_norm": 0.372895555177369, + "learning_rate": 1.7575871906326974e-05, + "loss": 0.4366, "step": 6190 }, { - "epoch": 0.9076923076923077, - "grad_norm": 0.3657491601918828, - "learning_rate": 1.7603035369433386e-05, - "loss": 0.4394, + "epoch": 0.9119682025614603, + "grad_norm": 0.6292986406740444, + "learning_rate": 1.7570278854688983e-05, + "loss": 0.4475, "step": 6195 }, { - "epoch": 0.9084249084249084, - "grad_norm": 0.3514255523290082, - "learning_rate": 1.759749541248464e-05, - "loss": 0.432, + "epoch": 0.9127042543795083, + "grad_norm": 0.3782237374359399, + "learning_rate": 1.7564680250333435e-05, + "loss": 0.4481, "step": 6200 }, { - "epoch": 0.9091575091575091, - "grad_norm": 0.3508779729073267, - "learning_rate": 1.7591949934750033e-05, - "loss": 0.4447, + "epoch": 0.9134403061975563, + "grad_norm": 0.3645107698776769, + "learning_rate": 1.755907609736685e-05, + "loss": 0.4952, "step": 6205 }, { - "epoch": 0.9098901098901099, - "grad_norm": 0.3510627678095866, - "learning_rate": 1.7586398940259222e-05, - "loss": 0.426, + "epoch": 0.9141763580156043, + "grad_norm": 0.36528595341648235, + "learning_rate": 1.7553466399899817e-05, + "loss": 0.4499, "step": 6210 }, { - "epoch": 0.9106227106227106, - "grad_norm": 0.33746241125074666, - "learning_rate": 1.7580842433045897e-05, - "loss": 0.4478, + "epoch": 0.9149124098336523, + "grad_norm": 0.47759719786471405, + "learning_rate": 1.7547851162046982e-05, + "loss": 0.4303, "step": 6215 }, { - "epoch": 0.9113553113553113, - "grad_norm": 0.3604438714051711, - "learning_rate": 1.7575280417147734e-05, - "loss": 0.4372, + "epoch": 0.9156484616517003, + "grad_norm": 0.3346974109016065, + "learning_rate": 1.7542230387927065e-05, + "loss": 0.4355, "step": 6220 }, { - "epoch": 0.9120879120879121, - "grad_norm": 0.3519089567901582, - "learning_rate": 1.7569712896606426e-05, - "loss": 0.4426, + "epoch": 0.9163845134697483, + "grad_norm": 1.6476739983025133, + "learning_rate": 1.753660408166284e-05, + "loss": 0.4423, "step": 6225 }, { - "epoch": 0.9128205128205128, - "grad_norm": 0.3685181626075242, - "learning_rate": 1.7564139875467657e-05, - "loss": 0.437, + "epoch": 0.9171205652877963, + "grad_norm": 0.3536202967982217, + "learning_rate": 1.7530972247381145e-05, + "loss": 0.4266, "step": 6230 }, { - "epoch": 0.9135531135531135, - "grad_norm": 0.36047567895159466, - "learning_rate": 1.755856135778111e-05, - "loss": 0.4396, + "epoch": 0.9178566171058442, + "grad_norm": 0.35739307239263124, + "learning_rate": 1.752533488921287e-05, + "loss": 0.4424, "step": 6235 }, { - "epoch": 0.9142857142857143, - "grad_norm": 0.35269599269066276, - "learning_rate": 1.7552977347600465e-05, - "loss": 0.4579, + "epoch": 0.9185926689238922, + "grad_norm": 0.35407192958711053, + "learning_rate": 1.751969201129295e-05, + "loss": 0.4564, "step": 6240 }, { - "epoch": 0.915018315018315, - "grad_norm": 0.4721634093341208, - "learning_rate": 1.754738784898339e-05, - "loss": 0.4388, + "epoch": 0.9193287207419403, + "grad_norm": 0.3549391504865819, + "learning_rate": 1.7514043617760383e-05, + "loss": 0.4394, "step": 6245 }, { - "epoch": 0.9157509157509157, - "grad_norm": 0.385978071550493, - "learning_rate": 1.7541792865991544e-05, - "loss": 0.4478, + "epoch": 0.9200647725599882, + "grad_norm": 0.3613180296609404, + "learning_rate": 1.7508389712758194e-05, + "loss": 0.4514, "step": 6250 }, { - "epoch": 0.9164835164835164, - "grad_norm": 0.34983217522519433, - "learning_rate": 1.753619240269057e-05, - "loss": 0.4222, + "epoch": 0.9208008243780362, + "grad_norm": 0.36808098019046603, + "learning_rate": 1.750273030043347e-05, + "loss": 0.4476, "step": 6255 }, { - "epoch": 0.9172161172161172, - "grad_norm": 0.36751855200567896, - "learning_rate": 1.7530586463150093e-05, - "loss": 0.4456, + "epoch": 0.9215368761960842, + "grad_norm": 0.36013451859199064, + "learning_rate": 1.749706538493733e-05, + "loss": 0.447, "step": 6260 }, { - "epoch": 0.9179487179487179, - "grad_norm": 0.37216248620528397, - "learning_rate": 1.7524975051443717e-05, - "loss": 0.4409, + "epoch": 0.9222729280141322, + "grad_norm": 0.4846557052431126, + "learning_rate": 1.749139497042492e-05, + "loss": 0.455, "step": 6265 }, { - "epoch": 0.9186813186813186, - "grad_norm": 0.36424445832211716, - "learning_rate": 1.751935817164902e-05, - "loss": 0.4348, + "epoch": 0.9230089798321802, + "grad_norm": 0.3824131604475871, + "learning_rate": 1.7485719061055432e-05, + "loss": 0.4624, "step": 6270 }, { - "epoch": 0.9194139194139194, - "grad_norm": 0.36065981663403024, - "learning_rate": 1.7513735827847563e-05, - "loss": 0.4386, + "epoch": 0.9237450316502281, + "grad_norm": 0.35124039668306284, + "learning_rate": 1.7480037660992088e-05, + "loss": 0.436, "step": 6275 }, { - "epoch": 0.9201465201465201, - "grad_norm": 0.3625708534458232, - "learning_rate": 1.750810802412487e-05, - "loss": 0.4654, + "epoch": 0.9244810834682762, + "grad_norm": 0.346051781465662, + "learning_rate": 1.7474350774402125e-05, + "loss": 0.4325, "step": 6280 }, { - "epoch": 0.9208791208791208, - "grad_norm": 0.3599274400943971, - "learning_rate": 1.7502474764570423e-05, - "loss": 0.4391, + "epoch": 0.9252171352863242, + "grad_norm": 0.3634607060672718, + "learning_rate": 1.746865840545682e-05, + "loss": 0.4764, "step": 6285 }, { - "epoch": 0.9216117216117216, - "grad_norm": 0.364927090667394, - "learning_rate": 1.749683605327769e-05, - "loss": 0.4437, + "epoch": 0.9259531871043721, + "grad_norm": 0.3484249956967804, + "learning_rate": 1.746296055833146e-05, + "loss": 0.439, "step": 6290 }, { - "epoch": 0.9223443223443224, - "grad_norm": 0.37376849612086793, - "learning_rate": 1.7491191894344086e-05, - "loss": 0.4497, + "epoch": 0.9266892389224202, + "grad_norm": 0.34103080377541267, + "learning_rate": 1.7457257237205355e-05, + "loss": 0.4325, "step": 6295 }, { - "epoch": 0.9230769230769231, - "grad_norm": 0.36168022937777394, - "learning_rate": 1.7485542291870988e-05, - "loss": 0.4409, + "epoch": 0.9274252907404681, + "grad_norm": 0.3728186219181222, + "learning_rate": 1.7451548446261828e-05, + "loss": 0.4516, "step": 6300 }, { - "epoch": 0.9238095238095239, - "grad_norm": 0.36368160179195114, - "learning_rate": 1.747988724996373e-05, - "loss": 0.4426, + "epoch": 0.9281613425585161, + "grad_norm": 0.3749731536453726, + "learning_rate": 1.744583418968822e-05, + "loss": 0.4212, "step": 6305 }, { - "epoch": 0.9245421245421246, - "grad_norm": 0.3729270678007294, - "learning_rate": 1.7474226772731594e-05, - "loss": 0.4612, + "epoch": 0.9288973943765642, + "grad_norm": 0.3508576954304995, + "learning_rate": 1.744011447167587e-05, + "loss": 0.4329, "step": 6310 }, { - "epoch": 0.9252747252747253, - "grad_norm": 0.35789516182360515, - "learning_rate": 1.746856086428782e-05, - "loss": 0.4604, + "epoch": 0.9296334461946121, + "grad_norm": 0.5724180355430332, + "learning_rate": 1.7434389296420132e-05, + "loss": 0.4329, "step": 6315 }, { - "epoch": 0.926007326007326, - "grad_norm": 0.35201255632556694, - "learning_rate": 1.746288952874958e-05, - "loss": 0.4607, + "epoch": 0.9303694980126601, + "grad_norm": 0.3665694394257063, + "learning_rate": 1.742865866812036e-05, + "loss": 0.4531, "step": 6320 }, { - "epoch": 0.9267399267399268, - "grad_norm": 0.38693859829795285, - "learning_rate": 1.7457212770238005e-05, - "loss": 0.4535, + "epoch": 0.931105549830708, + "grad_norm": 0.36417954650458134, + "learning_rate": 1.742292259097991e-05, + "loss": 0.4351, "step": 6325 }, { - "epoch": 0.9274725274725275, - "grad_norm": 0.34439330211437136, - "learning_rate": 1.7451530592878166e-05, - "loss": 0.4412, + "epoch": 0.9318416016487561, + "grad_norm": 0.3624096794766889, + "learning_rate": 1.7417181069206134e-05, + "loss": 0.4214, "step": 6330 }, { - "epoch": 0.9282051282051282, - "grad_norm": 0.35655682713389775, - "learning_rate": 1.7445843000799056e-05, - "loss": 0.4414, + "epoch": 0.9325776534668041, + "grad_norm": 0.36998020696000744, + "learning_rate": 1.741143410701037e-05, + "loss": 0.4451, "step": 6335 }, { - "epoch": 0.928937728937729, - "grad_norm": 0.3901791934309999, - "learning_rate": 1.7440149998133623e-05, - "loss": 0.4534, + "epoch": 0.933313705284852, + "grad_norm": 0.3668824985527801, + "learning_rate": 1.7405681708607956e-05, + "loss": 0.4415, "step": 6340 }, { - "epoch": 0.9296703296703297, - "grad_norm": 0.3410383184097131, - "learning_rate": 1.743445158901873e-05, - "loss": 0.437, + "epoch": 0.9340497571029001, + "grad_norm": 0.3519589134807377, + "learning_rate": 1.739992387821821e-05, + "loss": 0.4305, "step": 6345 }, { - "epoch": 0.9304029304029304, - "grad_norm": 0.3735459297517335, - "learning_rate": 1.7428747777595175e-05, - "loss": 0.433, + "epoch": 0.934785808920948, + "grad_norm": 0.3662117555408821, + "learning_rate": 1.7394160620064446e-05, + "loss": 0.4366, "step": 6350 }, { - "epoch": 0.9311355311355312, - "grad_norm": 0.37303056296747494, - "learning_rate": 1.742303856800769e-05, - "loss": 0.4388, + "epoch": 0.935521860738996, + "grad_norm": 0.3644631455233325, + "learning_rate": 1.738839193837394e-05, + "loss": 0.4658, "step": 6355 }, { - "epoch": 0.9318681318681319, - "grad_norm": 0.38304861292937553, - "learning_rate": 1.741732396440491e-05, - "loss": 0.4468, + "epoch": 0.936257912557044, + "grad_norm": 0.3771542118502433, + "learning_rate": 1.738261783737797e-05, + "loss": 0.4504, "step": 6360 }, { - "epoch": 0.9326007326007326, - "grad_norm": 0.3710683430964102, - "learning_rate": 1.741160397093941e-05, - "loss": 0.4478, + "epoch": 0.936993964375092, + "grad_norm": 0.35699812366864364, + "learning_rate": 1.7376838321311767e-05, + "loss": 0.442, "step": 6365 }, { - "epoch": 0.9333333333333333, - "grad_norm": 0.34469952745375604, - "learning_rate": 1.7405878591767668e-05, - "loss": 0.4127, + "epoch": 0.93773001619314, + "grad_norm": 0.3365994856786233, + "learning_rate": 1.737105339441455e-05, + "loss": 0.4323, "step": 6370 }, { - "epoch": 0.9340659340659341, - "grad_norm": 0.3608003661132213, - "learning_rate": 1.7400147831050084e-05, - "loss": 0.4265, + "epoch": 0.938466068011188, + "grad_norm": 0.35267502505215387, + "learning_rate": 1.73652630609295e-05, + "loss": 0.4276, "step": 6375 }, { - "epoch": 0.9347985347985348, - "grad_norm": 0.3591532950471118, - "learning_rate": 1.7394411692950967e-05, - "loss": 0.4352, + "epoch": 0.939202119829236, + "grad_norm": 0.39998013176522534, + "learning_rate": 1.735946732510376e-05, + "loss": 0.4396, "step": 6380 }, { - "epoch": 0.9355311355311355, - "grad_norm": 0.3815730058848865, - "learning_rate": 1.738867018163852e-05, - "loss": 0.4379, + "epoch": 0.939938171647284, + "grad_norm": 0.35133524270179606, + "learning_rate": 1.7353666191188438e-05, + "loss": 0.4415, "step": 6385 }, { - "epoch": 0.9362637362637363, - "grad_norm": 0.3781376412513171, - "learning_rate": 1.7382923301284875e-05, - "loss": 0.4255, + "epoch": 0.9406742234653319, + "grad_norm": 0.35417661046495785, + "learning_rate": 1.7347859663438605e-05, + "loss": 0.4385, "step": 6390 }, { - "epoch": 0.936996336996337, - "grad_norm": 0.36822030608402645, - "learning_rate": 1.7377171056066043e-05, - "loss": 0.439, + "epoch": 0.94141027528338, + "grad_norm": 0.34602419575372995, + "learning_rate": 1.734204774611329e-05, + "loss": 0.4552, "step": 6395 }, { - "epoch": 0.9377289377289377, - "grad_norm": 0.387825877238993, - "learning_rate": 1.737141345016195e-05, - "loss": 0.444, + "epoch": 0.942146327101428, + "grad_norm": 0.3525285571832158, + "learning_rate": 1.7336230443475467e-05, + "loss": 0.4278, "step": 6400 }, { - "epoch": 0.9384615384615385, - "grad_norm": 0.3695779787399637, - "learning_rate": 1.7365650487756406e-05, - "loss": 0.4453, + "epoch": 0.9428823789194759, + "grad_norm": 0.366003333949412, + "learning_rate": 1.733040775979207e-05, + "loss": 0.4381, "step": 6405 }, { - "epoch": 0.9391941391941392, - "grad_norm": 0.34677697199272484, - "learning_rate": 1.735988217303712e-05, - "loss": 0.4258, + "epoch": 0.9436184307375239, + "grad_norm": 0.3655463281507159, + "learning_rate": 1.732457969933397e-05, + "loss": 0.4405, "step": 6410 }, { - "epoch": 0.9399267399267399, - "grad_norm": 0.33535546429344404, - "learning_rate": 1.7354108510195685e-05, - "loss": 0.4327, + "epoch": 0.9443544825555719, + "grad_norm": 0.35214648952212846, + "learning_rate": 1.731874626637598e-05, + "loss": 0.4543, "step": 6415 }, { - "epoch": 0.9406593406593406, - "grad_norm": 0.359277392298737, - "learning_rate": 1.734832950342759e-05, - "loss": 0.4441, + "epoch": 0.9450905343736199, + "grad_norm": 0.36696287785978987, + "learning_rate": 1.7312907465196872e-05, + "loss": 0.4315, "step": 6420 }, { - "epoch": 0.9413919413919414, - "grad_norm": 0.3491643509615073, - "learning_rate": 1.7342545156932188e-05, - "loss": 0.4391, + "epoch": 0.9458265861916679, + "grad_norm": 0.38460952863517545, + "learning_rate": 1.7307063300079336e-05, + "loss": 0.4474, "step": 6425 }, { - "epoch": 0.9421245421245421, - "grad_norm": 0.34818401780870933, - "learning_rate": 1.7336755474912732e-05, - "loss": 0.4348, + "epoch": 0.9465626380097159, + "grad_norm": 0.35064441815279995, + "learning_rate": 1.7301213775310007e-05, + "loss": 0.4446, "step": 6430 }, { - "epoch": 0.9428571428571428, - "grad_norm": 0.3431591990728147, - "learning_rate": 1.7330960461576345e-05, - "loss": 0.4241, + "epoch": 0.9472986898277639, + "grad_norm": 0.3799412772308449, + "learning_rate": 1.7295358895179447e-05, + "loss": 0.46, "step": 6435 }, { - "epoch": 0.9435897435897436, - "grad_norm": 0.3560754743461474, - "learning_rate": 1.7325160121134025e-05, - "loss": 0.4398, + "epoch": 0.9480347416458119, + "grad_norm": 0.34400248191431165, + "learning_rate": 1.7289498663982143e-05, + "loss": 0.4343, "step": 6440 }, { - "epoch": 0.9443223443223443, - "grad_norm": 0.36686489644856285, - "learning_rate": 1.7319354457800637e-05, - "loss": 0.4458, + "epoch": 0.9487707934638598, + "grad_norm": 0.3567780544127945, + "learning_rate": 1.728363308601652e-05, + "loss": 0.4529, "step": 6445 }, { - "epoch": 0.945054945054945, - "grad_norm": 0.36003566234090917, - "learning_rate": 1.7313543475794914e-05, - "loss": 0.4561, + "epoch": 0.9495068452819079, + "grad_norm": 0.387213551748438, + "learning_rate": 1.7277762165584907e-05, + "loss": 0.4467, "step": 6450 }, { - "epoch": 0.9457875457875458, - "grad_norm": 0.34456401606560105, - "learning_rate": 1.730772717933946e-05, - "loss": 0.4685, + "epoch": 0.9502428970999558, + "grad_norm": 0.37209564706427456, + "learning_rate": 1.7271885906993565e-05, + "loss": 0.4399, "step": 6455 }, { - "epoch": 0.9465201465201465, - "grad_norm": 0.46881033387419574, - "learning_rate": 1.730190557266074e-05, - "loss": 0.4587, + "epoch": 0.9509789489180038, + "grad_norm": 0.35848848144937395, + "learning_rate": 1.7266004314552673e-05, + "loss": 0.4631, "step": 6460 }, { - "epoch": 0.9472527472527472, - "grad_norm": 0.3399485977148579, - "learning_rate": 1.7296078659989067e-05, - "loss": 0.4359, + "epoch": 0.9517150007360519, + "grad_norm": 0.3489687393519528, + "learning_rate": 1.72601173925763e-05, + "loss": 0.4417, "step": 6465 }, { - "epoch": 0.947985347985348, - "grad_norm": 0.37908412933430996, - "learning_rate": 1.7290246445558624e-05, - "loss": 0.4463, + "epoch": 0.9524510525540998, + "grad_norm": 0.37541684163596567, + "learning_rate": 1.7254225145382448e-05, + "loss": 0.4514, "step": 6470 }, { - "epoch": 0.9487179487179487, - "grad_norm": 0.3679538773079892, - "learning_rate": 1.728440893360744e-05, - "loss": 0.4367, + "epoch": 0.9531871043721478, + "grad_norm": 0.3472589809228936, + "learning_rate": 1.7248327577293018e-05, + "loss": 0.4271, "step": 6475 }, { - "epoch": 0.9494505494505494, - "grad_norm": 0.35266979897054596, - "learning_rate": 1.727856612837739e-05, - "loss": 0.4351, + "epoch": 0.9539231561901957, + "grad_norm": 0.3550590943995679, + "learning_rate": 1.724242469263381e-05, + "loss": 0.4226, "step": 6480 }, { - "epoch": 0.9501831501831501, - "grad_norm": 0.35482267331253475, - "learning_rate": 1.7272718034114202e-05, - "loss": 0.4299, + "epoch": 0.9546592080082438, + "grad_norm": 0.362827565511125, + "learning_rate": 1.7236516495734527e-05, + "loss": 0.4293, "step": 6485 }, { - "epoch": 0.9509157509157509, - "grad_norm": 0.3552781405115571, - "learning_rate": 1.7266864655067445e-05, - "loss": 0.4537, + "epoch": 0.9553952598262918, + "grad_norm": 0.3540006386075476, + "learning_rate": 1.7230602990928767e-05, + "loss": 0.4231, "step": 6490 }, { - "epoch": 0.9516483516483516, - "grad_norm": 0.3801275450482461, - "learning_rate": 1.726100599549052e-05, - "loss": 0.4445, + "epoch": 0.9561313116443397, + "grad_norm": 0.3569997769418889, + "learning_rate": 1.722468418255402e-05, + "loss": 0.425, "step": 6495 }, { - "epoch": 0.9523809523809523, - "grad_norm": 0.3671191261228335, - "learning_rate": 1.7255142059640683e-05, - "loss": 0.4416, + "epoch": 0.9568673634623878, + "grad_norm": 0.3691091286255934, + "learning_rate": 1.7218760074951668e-05, + "loss": 0.4494, "step": 6500 }, { - "epoch": 0.9531135531135531, - "grad_norm": 0.35384501680106467, - "learning_rate": 1.724927285177901e-05, - "loss": 0.4361, + "epoch": 0.9576034152804357, + "grad_norm": 0.3606401671759627, + "learning_rate": 1.7212830672466983e-05, + "loss": 0.4432, "step": 6505 }, { - "epoch": 0.9538461538461539, - "grad_norm": 0.3931706899709501, - "learning_rate": 1.7243398376170408e-05, - "loss": 0.4518, + "epoch": 0.9583394670984837, + "grad_norm": 0.35153856178393444, + "learning_rate": 1.7206895979449116e-05, + "loss": 0.4439, "step": 6510 }, { - "epoch": 0.9545787545787546, - "grad_norm": 0.3691599143921214, - "learning_rate": 1.7237518637083622e-05, - "loss": 0.4317, + "epoch": 0.9590755189165318, + "grad_norm": 0.3481970728848475, + "learning_rate": 1.7200956000251098e-05, + "loss": 0.4328, "step": 6515 }, { - "epoch": 0.9553113553113554, - "grad_norm": 0.3758014577879604, - "learning_rate": 1.7231633638791208e-05, - "loss": 0.4507, + "epoch": 0.9598115707345797, + "grad_norm": 0.36545719654945297, + "learning_rate": 1.719501073922984e-05, + "loss": 0.4199, "step": 6520 }, { - "epoch": 0.9560439560439561, - "grad_norm": 0.35354209070515097, - "learning_rate": 1.722574338556956e-05, - "loss": 0.4442, + "epoch": 0.9605476225526277, + "grad_norm": 0.3585760396318208, + "learning_rate": 1.718906020074613e-05, + "loss": 0.4445, "step": 6525 }, { - "epoch": 0.9567765567765568, - "grad_norm": 0.35157129831484946, - "learning_rate": 1.721984788169887e-05, - "loss": 0.4575, + "epoch": 0.9612836743706757, + "grad_norm": 0.3516536066245614, + "learning_rate": 1.718310438916462e-05, + "loss": 0.4289, "step": 6530 }, { - "epoch": 0.9575091575091575, - "grad_norm": 0.3846250090546446, - "learning_rate": 1.7213947131463172e-05, - "loss": 0.4451, + "epoch": 0.9620197261887237, + "grad_norm": 0.35861427654949146, + "learning_rate": 1.7177143308853837e-05, + "loss": 0.4423, "step": 6535 }, { - "epoch": 0.9582417582417583, - "grad_norm": 1.284228046597185, - "learning_rate": 1.7208041139150285e-05, - "loss": 0.4341, + "epoch": 0.9627557780067717, + "grad_norm": 0.36958938982697254, + "learning_rate": 1.7171176964186162e-05, + "loss": 0.4219, "step": 6540 }, { - "epoch": 0.958974358974359, - "grad_norm": 0.3788015557626036, - "learning_rate": 1.720212990905185e-05, - "loss": 0.4212, + "epoch": 0.9634918298248196, + "grad_norm": 0.337681353132239, + "learning_rate": 1.7165205359537852e-05, + "loss": 0.423, "step": 6545 }, { - "epoch": 0.9597069597069597, - "grad_norm": 0.3779066676468523, - "learning_rate": 1.719621344546332e-05, - "loss": 0.4436, + "epoch": 0.9642278816428677, + "grad_norm": 0.3681033444413669, + "learning_rate": 1.715922849928901e-05, + "loss": 0.4416, "step": 6550 }, { - "epoch": 0.9604395604395605, - "grad_norm": 0.35572791641424206, - "learning_rate": 1.7190291752683942e-05, - "loss": 0.4473, + "epoch": 0.9649639334609157, + "grad_norm": 0.35824920695173623, + "learning_rate": 1.71532463878236e-05, + "loss": 0.4514, "step": 6555 }, { - "epoch": 0.9611721611721612, - "grad_norm": 0.37632881645512, - "learning_rate": 1.7184364835016768e-05, - "loss": 0.4437, + "epoch": 0.9656999852789636, + "grad_norm": 0.35027880844473847, + "learning_rate": 1.7147259029529434e-05, + "loss": 0.4348, "step": 6560 }, { - "epoch": 0.9619047619047619, - "grad_norm": 0.3463323921208554, - "learning_rate": 1.7178432696768634e-05, - "loss": 0.4343, + "epoch": 0.9664360370970116, + "grad_norm": 0.35007600047694154, + "learning_rate": 1.7141266428798173e-05, + "loss": 0.4628, "step": 6565 }, { - "epoch": 0.9626373626373627, - "grad_norm": 0.34930030130427925, - "learning_rate": 1.717249534225019e-05, - "loss": 0.4431, + "epoch": 0.9671720889150596, + "grad_norm": 0.3424031971108177, + "learning_rate": 1.7135268590025327e-05, + "loss": 0.4378, "step": 6570 }, { - "epoch": 0.9633699633699634, - "grad_norm": 0.3579662908578417, - "learning_rate": 1.716655277577586e-05, - "loss": 0.4344, + "epoch": 0.9679081407331076, + "grad_norm": 0.3480667011906856, + "learning_rate": 1.712926551761025e-05, + "loss": 0.4362, "step": 6575 }, { - "epoch": 0.9641025641025641, - "grad_norm": 0.34469614850564645, - "learning_rate": 1.7160605001663867e-05, - "loss": 0.4349, + "epoch": 0.9686441925511556, + "grad_norm": 0.3492989838682148, + "learning_rate": 1.7123257215956127e-05, + "loss": 0.4317, "step": 6580 }, { - "epoch": 0.9648351648351648, - "grad_norm": 0.3358036188767999, - "learning_rate": 1.715465202423621e-05, - "loss": 0.4476, + "epoch": 0.9693802443692036, + "grad_norm": 0.3647848112736977, + "learning_rate": 1.711724368946998e-05, + "loss": 0.4379, "step": 6585 }, { - "epoch": 0.9655677655677656, - "grad_norm": 0.37389608845541517, - "learning_rate": 1.714869384781867e-05, - "loss": 0.4569, + "epoch": 0.9701162961872516, + "grad_norm": 0.3647649308439903, + "learning_rate": 1.7111224942562663e-05, + "loss": 0.4493, "step": 6590 }, { - "epoch": 0.9663003663003663, - "grad_norm": 0.3532112957351536, - "learning_rate": 1.714273047674081e-05, - "loss": 0.4298, + "epoch": 0.9708523480052996, + "grad_norm": 0.3753953789277182, + "learning_rate": 1.7105200979648874e-05, + "loss": 0.4381, "step": 6595 }, { - "epoch": 0.967032967032967, - "grad_norm": 0.3615175520371604, - "learning_rate": 1.713676191533596e-05, - "loss": 0.438, + "epoch": 0.9715883998233475, + "grad_norm": 0.37606826388414516, + "learning_rate": 1.709917180514711e-05, + "loss": 0.4624, "step": 6600 }, { - "epoch": 0.9677655677655678, - "grad_norm": 0.45637238587490664, - "learning_rate": 1.7130788167941235e-05, - "loss": 0.4735, + "epoch": 0.9723244516413956, + "grad_norm": 0.3737809109027871, + "learning_rate": 1.7093137423479714e-05, + "loss": 0.4232, "step": 6605 }, { - "epoch": 0.9684981684981685, - "grad_norm": 0.37272041568938485, - "learning_rate": 1.7124809238897508e-05, - "loss": 0.4399, + "epoch": 0.9730605034594435, + "grad_norm": 0.3780741713516723, + "learning_rate": 1.708709783907284e-05, + "loss": 0.4386, "step": 6610 }, { - "epoch": 0.9692307692307692, - "grad_norm": 0.37066324869804335, - "learning_rate": 1.7118825132549416e-05, - "loss": 0.448, + "epoch": 0.9737965552774915, + "grad_norm": 0.3686313080726544, + "learning_rate": 1.7081053056356452e-05, + "loss": 0.4561, "step": 6615 }, { - "epoch": 0.96996336996337, - "grad_norm": 0.38415151064760394, - "learning_rate": 1.711283585324536e-05, - "loss": 0.4382, + "epoch": 0.9745326070955396, + "grad_norm": 0.3557694193587092, + "learning_rate": 1.7075003079764337e-05, + "loss": 0.4234, "step": 6620 }, { - "epoch": 0.9706959706959707, - "grad_norm": 0.38204200328668275, - "learning_rate": 1.710684140533751e-05, - "loss": 0.455, + "epoch": 0.9752686589135875, + "grad_norm": 0.4071086693129615, + "learning_rate": 1.7068947913734085e-05, + "loss": 0.4395, "step": 6625 }, { - "epoch": 0.9714285714285714, - "grad_norm": 0.3676552434938297, - "learning_rate": 1.7100841793181773e-05, - "loss": 0.4404, + "epoch": 0.9760047107316355, + "grad_norm": 0.3453854466174774, + "learning_rate": 1.7062887562707097e-05, + "loss": 0.4232, "step": 6630 }, { - "epoch": 0.9721611721611721, - "grad_norm": 0.36719276204679707, - "learning_rate": 1.709483702113783e-05, - "loss": 0.4375, + "epoch": 0.9767407625496835, + "grad_norm": 0.3697484920484151, + "learning_rate": 1.7056822031128572e-05, + "loss": 0.4353, "step": 6635 }, { - "epoch": 0.9728937728937729, - "grad_norm": 0.5109971002491437, - "learning_rate": 1.7088827093569092e-05, - "loss": 0.481, + "epoch": 0.9774768143677315, + "grad_norm": 0.3654491247280484, + "learning_rate": 1.7050751323447515e-05, + "loss": 0.4331, "step": 6640 }, { - "epoch": 0.9736263736263736, - "grad_norm": 0.38455542337125864, - "learning_rate": 1.7082812014842732e-05, + "epoch": 0.9782128661857795, + "grad_norm": 0.36223079918316325, + "learning_rate": 1.7044675444116726e-05, "loss": 0.441, "step": 6645 }, { - "epoch": 0.9743589743589743, - "grad_norm": 0.35342114488002463, - "learning_rate": 1.7076791789329655e-05, - "loss": 0.4305, + "epoch": 0.9789489180038274, + "grad_norm": 0.36604867602552976, + "learning_rate": 1.703859439759279e-05, + "loss": 0.4442, "step": 6650 }, { - "epoch": 0.9750915750915751, - "grad_norm": 0.3581437314772134, - "learning_rate": 1.707076642140451e-05, - "loss": 0.4464, + "epoch": 0.9796849698218755, + "grad_norm": 0.4501107912031965, + "learning_rate": 1.703250818833609e-05, + "loss": 0.4406, "step": 6655 }, { - "epoch": 0.9758241758241758, - "grad_norm": 0.3615948884476211, - "learning_rate": 1.7064735915445683e-05, - "loss": 0.4365, + "epoch": 0.9804210216399234, + "grad_norm": 0.3657745148199479, + "learning_rate": 1.7026416820810797e-05, + "loss": 0.4385, "step": 6660 }, { - "epoch": 0.9765567765567765, - "grad_norm": 0.3411342605948674, - "learning_rate": 1.7058700275835293e-05, - "loss": 0.431, + "epoch": 0.9811570734579714, + "grad_norm": 0.37544557601913253, + "learning_rate": 1.7020320299484868e-05, + "loss": 0.4357, "step": 6665 }, { - "epoch": 0.9772893772893773, - "grad_norm": 0.37749787772603005, - "learning_rate": 1.705265950695919e-05, - "loss": 0.4436, + "epoch": 0.9818931252760195, + "grad_norm": 0.3729086931283695, + "learning_rate": 1.7014218628830026e-05, + "loss": 0.4457, "step": 6670 }, { - "epoch": 0.978021978021978, - "grad_norm": 0.3569827914060955, - "learning_rate": 1.7046613613206954e-05, - "loss": 0.4497, + "epoch": 0.9826291770940674, + "grad_norm": 0.3547966965289486, + "learning_rate": 1.7008111813321786e-05, + "loss": 0.4493, "step": 6675 }, { - "epoch": 0.9787545787545787, - "grad_norm": 0.3563775667772839, - "learning_rate": 1.7040562598971885e-05, - "loss": 0.4228, + "epoch": 0.9833652289121154, + "grad_norm": 0.35360612224713783, + "learning_rate": 1.700199985743943e-05, + "loss": 0.4332, "step": 6680 }, { - "epoch": 0.9794871794871794, - "grad_norm": 0.36042392380094623, - "learning_rate": 1.7034506468651e-05, - "loss": 0.4409, + "epoch": 0.9841012807301635, + "grad_norm": 0.34229050450837223, + "learning_rate": 1.699588276566601e-05, + "loss": 0.4352, "step": 6685 }, { - "epoch": 0.9802197802197802, - "grad_norm": 0.33415031185831234, - "learning_rate": 1.702844522664504e-05, - "loss": 0.4275, + "epoch": 0.9848373325482114, + "grad_norm": 0.36038163944276935, + "learning_rate": 1.6989760542488345e-05, + "loss": 0.4547, "step": 6690 }, { - "epoch": 0.9809523809523809, - "grad_norm": 0.33092743255070933, - "learning_rate": 1.7022378877358466e-05, - "loss": 0.4433, + "epoch": 0.9855733843662594, + "grad_norm": 0.37582589633438, + "learning_rate": 1.6983633192397023e-05, + "loss": 0.4421, "step": 6695 }, { - "epoch": 0.9816849816849816, - "grad_norm": 0.3751336919353944, - "learning_rate": 1.7016307425199437e-05, - "loss": 0.4477, + "epoch": 0.9863094361843073, + "grad_norm": 0.3752879091825568, + "learning_rate": 1.697750071988639e-05, + "loss": 0.4251, "step": 6700 }, { - "epoch": 0.9824175824175824, - "grad_norm": 0.35752258164803286, - "learning_rate": 1.7010230874579828e-05, - "loss": 0.4206, + "epoch": 0.9870454880023554, + "grad_norm": 0.3629269641916705, + "learning_rate": 1.697136312945455e-05, + "loss": 0.4491, "step": 6705 }, { - "epoch": 0.9831501831501831, - "grad_norm": 0.37406972717380194, - "learning_rate": 1.7004149229915218e-05, - "loss": 0.4414, + "epoch": 0.9877815398204034, + "grad_norm": 0.3601063193255718, + "learning_rate": 1.696522042560335e-05, + "loss": 0.4332, "step": 6710 }, { - "epoch": 0.9838827838827838, - "grad_norm": 0.35195564329231965, - "learning_rate": 1.699806249562488e-05, - "loss": 0.4389, + "epoch": 0.9885175916384513, + "grad_norm": 0.35379971824952605, + "learning_rate": 1.6959072612838402e-05, + "loss": 0.444, "step": 6715 }, { - "epoch": 0.9846153846153847, - "grad_norm": 0.3386954263403885, - "learning_rate": 1.6991970676131803e-05, - "loss": 0.4472, + "epoch": 0.9892536434564994, + "grad_norm": 0.35869165263922875, + "learning_rate": 1.695291969566906e-05, + "loss": 0.4257, "step": 6720 }, { - "epoch": 0.9853479853479854, - "grad_norm": 0.34771883733053655, - "learning_rate": 1.698587377586266e-05, - "loss": 0.4538, + "epoch": 0.9899896952745473, + "grad_norm": 0.43804822187478154, + "learning_rate": 1.6946761678608427e-05, + "loss": 0.4496, "step": 6725 }, { - "epoch": 0.9860805860805861, - "grad_norm": 0.3721375026399225, - "learning_rate": 1.6979771799247804e-05, - "loss": 0.4472, + "epoch": 0.9907257470925953, + "grad_norm": 0.34786445151583756, + "learning_rate": 1.6940598566173332e-05, + "loss": 0.4334, "step": 6730 }, { - "epoch": 0.9868131868131869, - "grad_norm": 0.36602925797704333, - "learning_rate": 1.6973664750721298e-05, - "loss": 0.4414, + "epoch": 0.9914617989106433, + "grad_norm": 0.3821109055666706, + "learning_rate": 1.6934430362884358e-05, + "loss": 0.4436, "step": 6735 }, { - "epoch": 0.9875457875457876, - "grad_norm": 0.35450346411272515, - "learning_rate": 1.696755263472088e-05, - "loss": 0.4396, + "epoch": 0.9921978507286913, + "grad_norm": 0.3760672110821931, + "learning_rate": 1.6928257073265813e-05, + "loss": 0.444, "step": 6740 }, { - "epoch": 0.9882783882783883, - "grad_norm": 0.36493321114968236, - "learning_rate": 1.6961435455687977e-05, - "loss": 0.4343, + "epoch": 0.9929339025467393, + "grad_norm": 0.3590919223385004, + "learning_rate": 1.6922078701845735e-05, + "loss": 0.4385, "step": 6745 }, { - "epoch": 0.989010989010989, - "grad_norm": 0.34588020475657766, - "learning_rate": 1.6955313218067683e-05, - "loss": 0.4273, + "epoch": 0.9936699543647873, + "grad_norm": 0.36928170979574315, + "learning_rate": 1.6915895253155903e-05, + "loss": 0.4352, "step": 6750 }, { - "epoch": 0.9897435897435898, - "grad_norm": 0.37991788360791556, - "learning_rate": 1.694918592630878e-05, - "loss": 0.445, + "epoch": 0.9944060061828353, + "grad_norm": 0.35372962548149794, + "learning_rate": 1.69097067317318e-05, + "loss": 0.4386, "step": 6755 }, { - "epoch": 0.9904761904761905, - "grad_norm": 0.3396032374471012, - "learning_rate": 1.6943053584863713e-05, - "loss": 0.4228, + "epoch": 0.9951420580008833, + "grad_norm": 0.36209291653208947, + "learning_rate": 1.690351314211264e-05, + "loss": 0.4344, "step": 6760 }, { - "epoch": 0.9912087912087912, - "grad_norm": 0.36592387012699884, - "learning_rate": 1.6936916198188607e-05, - "loss": 0.4428, + "epoch": 0.9958781098189312, + "grad_norm": 0.34743601508889976, + "learning_rate": 1.689731448884136e-05, + "loss": 0.4347, "step": 6765 }, { - "epoch": 0.991941391941392, - "grad_norm": 0.34982183026324387, - "learning_rate": 1.693077377074324e-05, - "loss": 0.4442, + "epoch": 0.9966141616369792, + "grad_norm": 0.34687773366384644, + "learning_rate": 1.6891110776464595e-05, + "loss": 0.4233, "step": 6770 }, { - "epoch": 0.9926739926739927, - "grad_norm": 0.3522221002127369, - "learning_rate": 1.6924626306991064e-05, - "loss": 0.4311, + "epoch": 0.9973502134550273, + "grad_norm": 0.40739649568714137, + "learning_rate": 1.6884902009532712e-05, + "loss": 0.4392, "step": 6775 }, { - "epoch": 0.9934065934065934, - "grad_norm": 0.35422036214466257, - "learning_rate": 1.691847381139919e-05, - "loss": 0.4357, + "epoch": 0.9980862652730752, + "grad_norm": 0.36589283999767597, + "learning_rate": 1.6878688192599766e-05, + "loss": 0.4301, "step": 6780 }, { - "epoch": 0.9941391941391942, - "grad_norm": 0.35280747117561606, - "learning_rate": 1.6912316288438376e-05, - "loss": 0.43, + "epoch": 0.9988223170911232, + "grad_norm": 0.3461679332252419, + "learning_rate": 1.6872469330223533e-05, + "loss": 0.441, "step": 6785 }, { - "epoch": 0.9948717948717949, - "grad_norm": 0.3421279801067773, - "learning_rate": 1.6906153742583043e-05, - "loss": 0.4276, + "epoch": 0.9995583689091712, + "grad_norm": 0.3767409034985814, + "learning_rate": 1.6866245426965474e-05, + "loss": 0.4304, "step": 6790 }, { - "epoch": 0.9956043956043956, - "grad_norm": 0.33916936148692983, - "learning_rate": 1.689998617831126e-05, - "loss": 0.4316, + "epoch": 1.0, + "eval_loss": 0.43871477246284485, + "eval_runtime": 1.5515, + "eval_samples_per_second": 15.469, + "eval_steps_per_second": 0.645, + "step": 6793 + }, + { + "epoch": 1.0002944207272193, + "grad_norm": 0.3866995374176673, + "learning_rate": 1.6860016487390755e-05, + "loss": 0.393, "step": 6795 }, { - "epoch": 0.9963369963369964, - "grad_norm": 0.3739503000291955, - "learning_rate": 1.6893813600104733e-05, - "loss": 0.4415, + "epoch": 1.0010304725452672, + "grad_norm": 0.3999949768145471, + "learning_rate": 1.685378251606824e-05, + "loss": 0.3513, "step": 6800 }, { - "epoch": 0.9970695970695971, - "grad_norm": 0.3674210126216178, - "learning_rate": 1.688763601244883e-05, - "loss": 0.4369, + "epoch": 1.0017665243633151, + "grad_norm": 0.39338192031228963, + "learning_rate": 1.6847543517570472e-05, + "loss": 0.3479, "step": 6805 }, { - "epoch": 0.9978021978021978, - "grad_norm": 0.3474202293916676, - "learning_rate": 1.688145341983254e-05, - "loss": 0.456, + "epoch": 1.002502576181363, + "grad_norm": 0.383222857954703, + "learning_rate": 1.6841299496473696e-05, + "loss": 0.3409, "step": 6810 }, { - "epoch": 0.9985347985347985, - "grad_norm": 0.36861289532532876, - "learning_rate": 1.6875265826748506e-05, - "loss": 0.4436, + "epoch": 1.0032386279994112, + "grad_norm": 0.38779269601567395, + "learning_rate": 1.6835050457357833e-05, + "loss": 0.3432, "step": 6815 }, { - "epoch": 0.9992673992673993, - "grad_norm": 0.3574161164208663, - "learning_rate": 1.6869073237692987e-05, - "loss": 0.451, + "epoch": 1.0039746798174591, + "grad_norm": 0.37810640687408825, + "learning_rate": 1.682879640480648e-05, + "loss": 0.3231, "step": 6820 }, { - "epoch": 1.0, - "grad_norm": 0.3408889832195101, - "learning_rate": 1.686287565716589e-05, - "loss": 0.4316, - "step": 6825 - }, - { - "epoch": 1.0, - "eval_loss": 0.40975621342658997, - "eval_runtime": 1.5969, - "eval_samples_per_second": 15.03, - "eval_steps_per_second": 0.626, + "epoch": 1.004710731635507, + "grad_norm": 0.3651737100583659, + "learning_rate": 1.6822537343406925e-05, + "loss": 0.3216, "step": 6825 }, { - "epoch": 1.0007326007326007, - "grad_norm": 0.36201863726007955, - "learning_rate": 1.685667308967073e-05, - "loss": 0.3413, + "epoch": 1.0054467834535552, + "grad_norm": 0.36337384098568465, + "learning_rate": 1.6816273277750116e-05, + "loss": 0.3303, "step": 6830 }, { - "epoch": 1.0014652014652015, - "grad_norm": 0.3902424360566943, - "learning_rate": 1.6850465539714667e-05, - "loss": 0.3322, + "epoch": 1.0061828352716031, + "grad_norm": 0.4087666983005035, + "learning_rate": 1.6810004212430677e-05, + "loss": 0.3304, "step": 6835 }, { - "epoch": 1.0021978021978022, - "grad_norm": 0.3975415065836676, - "learning_rate": 1.6844253011808468e-05, - "loss": 0.3273, + "epoch": 1.006918887089651, + "grad_norm": 0.3807588186952967, + "learning_rate": 1.6803730152046905e-05, + "loss": 0.3217, "step": 6840 }, { - "epoch": 1.002930402930403, - "grad_norm": 0.4061898685848133, - "learning_rate": 1.6838035510466516e-05, - "loss": 0.3322, + "epoch": 1.0076549389076992, + "grad_norm": 0.3759058072774958, + "learning_rate": 1.6797451101200748e-05, + "loss": 0.3243, "step": 6845 }, { - "epoch": 1.0036630036630036, - "grad_norm": 0.3593019021745724, - "learning_rate": 1.6831813040206817e-05, - "loss": 0.3217, + "epoch": 1.0083909907257471, + "grad_norm": 0.3861754610930233, + "learning_rate": 1.6791167064497828e-05, + "loss": 0.3207, "step": 6850 }, { - "epoch": 1.0043956043956044, - "grad_norm": 0.36837764560120567, - "learning_rate": 1.6825585605550982e-05, - "loss": 0.339, + "epoch": 1.009127042543795, + "grad_norm": 0.3622151306663367, + "learning_rate": 1.6784878046547414e-05, + "loss": 0.3311, "step": 6855 }, { - "epoch": 1.005128205128205, - "grad_norm": 0.3867234105785456, - "learning_rate": 1.6819353211024227e-05, - "loss": 0.3418, + "epoch": 1.0098630943618432, + "grad_norm": 0.3649932755248596, + "learning_rate": 1.6778584051962434e-05, + "loss": 0.3389, "step": 6860 }, { - "epoch": 1.0058608058608058, - "grad_norm": 0.3812144593341558, - "learning_rate": 1.681311586115538e-05, - "loss": 0.3458, + "epoch": 1.010599146179891, + "grad_norm": 0.3766699822581901, + "learning_rate": 1.6772285085359465e-05, + "loss": 0.3313, "step": 6865 }, { - "epoch": 1.0065934065934066, - "grad_norm": 0.37054904889141077, - "learning_rate": 1.680687356047686e-05, - "loss": 0.3429, + "epoch": 1.011335197997939, + "grad_norm": 0.3562643210167549, + "learning_rate": 1.676598115135873e-05, + "loss": 0.3243, "step": 6870 }, { - "epoch": 1.0073260073260073, - "grad_norm": 0.3598095442970735, - "learning_rate": 1.6800626313524696e-05, - "loss": 0.3561, + "epoch": 1.012071249815987, + "grad_norm": 0.3625978112414053, + "learning_rate": 1.67596722545841e-05, + "loss": 0.3311, "step": 6875 }, { - "epoch": 1.008058608058608, - "grad_norm": 0.3675041920360404, - "learning_rate": 1.67943741248385e-05, - "loss": 0.3415, + "epoch": 1.012807301634035, + "grad_norm": 0.36347877354101427, + "learning_rate": 1.6753358399663073e-05, + "loss": 0.3365, "step": 6880 }, { - "epoch": 1.0087912087912088, - "grad_norm": 0.38062489277406575, - "learning_rate": 1.6788116998961476e-05, - "loss": 0.3247, + "epoch": 1.013543353452083, + "grad_norm": 0.3837021753325475, + "learning_rate": 1.6747039591226802e-05, + "loss": 0.3441, "step": 6885 }, { - "epoch": 1.0095238095238095, - "grad_norm": 0.3605291982310506, - "learning_rate": 1.6781854940440424e-05, - "loss": 0.3345, + "epoch": 1.014279405270131, + "grad_norm": 0.3695457828638176, + "learning_rate": 1.6740715833910064e-05, + "loss": 0.3215, "step": 6890 }, { - "epoch": 1.0102564102564102, - "grad_norm": 0.36236834595661455, - "learning_rate": 1.677558795382572e-05, - "loss": 0.3682, + "epoch": 1.015015457088179, + "grad_norm": 0.3702978633218317, + "learning_rate": 1.6734387132351268e-05, + "loss": 0.3375, "step": 6895 }, { - "epoch": 1.010989010989011, - "grad_norm": 0.3690521144299775, - "learning_rate": 1.6769316043671327e-05, - "loss": 0.3386, + "epoch": 1.015751508906227, + "grad_norm": 0.3845715172792146, + "learning_rate": 1.672805349119244e-05, + "loss": 0.3265, "step": 6900 }, { - "epoch": 1.0117216117216117, - "grad_norm": 0.3760777588079089, - "learning_rate": 1.6763039214534782e-05, - "loss": 0.3295, + "epoch": 1.016487560724275, + "grad_norm": 0.37701638039133323, + "learning_rate": 1.6721714915079242e-05, + "loss": 0.3315, "step": 6905 }, { - "epoch": 1.0124542124542124, - "grad_norm": 0.3726828659631818, - "learning_rate": 1.6756757470977194e-05, - "loss": 0.3262, + "epoch": 1.017223612542323, + "grad_norm": 0.3533591923899028, + "learning_rate": 1.671537140866095e-05, + "loss": 0.3297, "step": 6910 }, { - "epoch": 1.0131868131868131, - "grad_norm": 0.3905666821718478, - "learning_rate": 1.675047081756325e-05, - "loss": 0.3227, + "epoch": 1.017959664360371, + "grad_norm": 0.3531464949026519, + "learning_rate": 1.6709022976590457e-05, + "loss": 0.3259, "step": 6915 }, { - "epoch": 1.0139194139194139, - "grad_norm": 0.3830327834647385, - "learning_rate": 1.67441792588612e-05, - "loss": 0.3326, + "epoch": 1.018695716178419, + "grad_norm": 0.3625200138845915, + "learning_rate": 1.6702669623524273e-05, + "loss": 0.3274, "step": 6920 }, { - "epoch": 1.0146520146520146, - "grad_norm": 0.3571195382115666, - "learning_rate": 1.673788279944286e-05, - "loss": 0.3364, + "epoch": 1.0194317679964668, + "grad_norm": 0.36846966065311787, + "learning_rate": 1.669631135412251e-05, + "loss": 0.3248, "step": 6925 }, { - "epoch": 1.0153846153846153, - "grad_norm": 0.3576188937698423, - "learning_rate": 1.6731581443883603e-05, - "loss": 0.3409, + "epoch": 1.020167819814515, + "grad_norm": 0.37776175075594376, + "learning_rate": 1.668994817304889e-05, + "loss": 0.3457, "step": 6930 }, { - "epoch": 1.016117216117216, - "grad_norm": 0.3643609147540888, - "learning_rate": 1.672527519676237e-05, - "loss": 0.3341, + "epoch": 1.020903871632563, + "grad_norm": 0.36513036919289815, + "learning_rate": 1.668358008497074e-05, + "loss": 0.3293, "step": 6935 }, { - "epoch": 1.0168498168498168, - "grad_norm": 0.3801775662889062, - "learning_rate": 1.6718964062661646e-05, - "loss": 0.3394, + "epoch": 1.0216399234506108, + "grad_norm": 0.35371926801796266, + "learning_rate": 1.667720709455898e-05, + "loss": 0.3136, "step": 6940 }, { - "epoch": 1.0175824175824175, - "grad_norm": 0.3639872213399938, - "learning_rate": 1.6712648046167478e-05, - "loss": 0.3377, + "epoch": 1.022375975268659, + "grad_norm": 0.3837532723126793, + "learning_rate": 1.667082920648813e-05, + "loss": 0.326, "step": 6945 }, { - "epoch": 1.0183150183150182, - "grad_norm": 0.3782266895702019, - "learning_rate": 1.670632715186944e-05, - "loss": 0.3282, + "epoch": 1.023112027086707, + "grad_norm": 0.3716385640915241, + "learning_rate": 1.6664446425436306e-05, + "loss": 0.3298, "step": 6950 }, { - "epoch": 1.019047619047619, - "grad_norm": 0.8201464850800466, - "learning_rate": 1.6700001384360677e-05, - "loss": 0.3354, + "epoch": 1.0238480789047548, + "grad_norm": 0.3503289333931796, + "learning_rate": 1.66580587560852e-05, + "loss": 0.3159, "step": 6955 }, { - "epoch": 1.0197802197802197, - "grad_norm": 0.37112401011560336, - "learning_rate": 1.669367074823786e-05, - "loss": 0.3224, + "epoch": 1.024584130722803, + "grad_norm": 0.36948330992076095, + "learning_rate": 1.6651666203120112e-05, + "loss": 0.3372, "step": 6960 }, { - "epoch": 1.0205128205128204, - "grad_norm": 0.3846686023445629, - "learning_rate": 1.66873352481012e-05, - "loss": 0.3255, + "epoch": 1.0253201825408509, + "grad_norm": 0.3954984454681354, + "learning_rate": 1.66452687712299e-05, + "loss": 0.327, "step": 6965 }, { - "epoch": 1.0212454212454212, - "grad_norm": 0.37024088061424254, - "learning_rate": 1.6680994888554437e-05, - "loss": 0.3265, + "epoch": 1.0260562343588988, + "grad_norm": 0.37768644504929283, + "learning_rate": 1.663886646510701e-05, + "loss": 0.3237, "step": 6970 }, { - "epoch": 1.021978021978022, - "grad_norm": 0.35699115192148506, - "learning_rate": 1.6674649674204858e-05, - "loss": 0.3403, + "epoch": 1.026792286176947, + "grad_norm": 0.366095485562135, + "learning_rate": 1.663245928944747e-05, + "loss": 0.3032, "step": 6975 }, { - "epoch": 1.0227106227106226, - "grad_norm": 0.3658745623537328, - "learning_rate": 1.6668299609663265e-05, - "loss": 0.317, + "epoch": 1.0275283379949949, + "grad_norm": 0.363572513558161, + "learning_rate": 1.6626047248950866e-05, + "loss": 0.3252, "step": 6980 }, { - "epoch": 1.0234432234432234, - "grad_norm": 0.37798107793091024, - "learning_rate": 1.666194469954398e-05, - "loss": 0.3222, + "epoch": 1.0282643898130428, + "grad_norm": 0.3844984292096295, + "learning_rate": 1.6619630348320376e-05, + "loss": 0.3348, "step": 6985 }, { - "epoch": 1.024175824175824, - "grad_norm": 0.36710683342586753, - "learning_rate": 1.6655584948464868e-05, - "loss": 0.3384, + "epoch": 1.0290004416310907, + "grad_norm": 0.3632802823565491, + "learning_rate": 1.661320859226271e-05, + "loss": 0.3207, "step": 6990 }, { - "epoch": 1.0249084249084248, - "grad_norm": 0.35108828497233013, - "learning_rate": 1.6649220361047283e-05, - "loss": 0.3262, + "epoch": 1.0297364934491389, + "grad_norm": 0.3704916233754107, + "learning_rate": 1.6606781985488163e-05, + "loss": 0.3228, "step": 6995 }, { - "epoch": 1.0256410256410255, - "grad_norm": 0.38319064202144587, - "learning_rate": 1.6642850941916117e-05, - "loss": 0.3264, + "epoch": 1.0304725452671868, + "grad_norm": 0.3540067165419581, + "learning_rate": 1.6600350532710585e-05, + "loss": 0.3215, "step": 7000 }, { - "epoch": 1.0263736263736263, - "grad_norm": 0.36976692043937126, - "learning_rate": 1.6636476695699757e-05, - "loss": 0.3469, + "epoch": 1.0312085970852347, + "grad_norm": 0.3809778134546721, + "learning_rate": 1.659391423864737e-05, + "loss": 0.3188, "step": 7005 }, { - "epoch": 1.027106227106227, - "grad_norm": 0.3863933852480613, - "learning_rate": 1.663009762703011e-05, - "loss": 0.3195, + "epoch": 1.0319446489032829, + "grad_norm": 0.3583524428586189, + "learning_rate": 1.658747310801948e-05, + "loss": 0.3106, "step": 7010 }, { - "epoch": 1.0278388278388277, - "grad_norm": 0.381641946588195, - "learning_rate": 1.6623713740542576e-05, - "loss": 0.3285, + "epoch": 1.0326807007213308, + "grad_norm": 0.37977996018229804, + "learning_rate": 1.65810271455514e-05, + "loss": 0.3244, "step": 7015 }, { - "epoch": 1.0285714285714285, - "grad_norm": 0.39398956094879073, - "learning_rate": 1.661732504087606e-05, - "loss": 0.3258, + "epoch": 1.0334167525393787, + "grad_norm": 0.37847548505124745, + "learning_rate": 1.657457635597119e-05, + "loss": 0.3304, "step": 7020 }, { - "epoch": 1.0293040293040292, - "grad_norm": 0.39078417256772674, - "learning_rate": 1.6610931532672978e-05, - "loss": 0.3478, + "epoch": 1.0341528043574268, + "grad_norm": 0.37455421483361456, + "learning_rate": 1.6568120744010423e-05, + "loss": 0.3354, "step": 7025 }, { - "epoch": 1.03003663003663, - "grad_norm": 0.3625107322517052, - "learning_rate": 1.660453322057922e-05, - "loss": 0.317, + "epoch": 1.0348888561754748, + "grad_norm": 0.3739794280581571, + "learning_rate": 1.6561660314404222e-05, + "loss": 0.3361, "step": 7030 }, { - "epoch": 1.0307692307692307, - "grad_norm": 0.36369646810472994, - "learning_rate": 1.6598130109244172e-05, - "loss": 0.3274, + "epoch": 1.0356249079935227, + "grad_norm": 0.3540033001276769, + "learning_rate": 1.6555195071891247e-05, + "loss": 0.3195, "step": 7035 }, { - "epoch": 1.0315018315018314, - "grad_norm": 0.37324379752665476, - "learning_rate": 1.6591722203320714e-05, - "loss": 0.3222, + "epoch": 1.0363609598115708, + "grad_norm": 0.3815730252997327, + "learning_rate": 1.6548725021213677e-05, + "loss": 0.3355, "step": 7040 }, { - "epoch": 1.0322344322344323, - "grad_norm": 0.36774576270196524, - "learning_rate": 1.6585309507465208e-05, - "loss": 0.3396, + "epoch": 1.0370970116296188, + "grad_norm": 0.36803983074933216, + "learning_rate": 1.654225016711723e-05, + "loss": 0.3457, "step": 7045 }, { - "epoch": 1.032967032967033, - "grad_norm": 0.37855913620930015, - "learning_rate": 1.657889202633749e-05, - "loss": 0.3272, + "epoch": 1.0378330634476667, + "grad_norm": 0.37669783307078764, + "learning_rate": 1.6535770514351138e-05, + "loss": 0.332, "step": 7050 }, { - "epoch": 1.0336996336996338, - "grad_norm": 0.3949421463343061, - "learning_rate": 1.6572469764600883e-05, - "loss": 0.3334, + "epoch": 1.0385691152657146, + "grad_norm": 0.3572094760337152, + "learning_rate": 1.6529286067668157e-05, + "loss": 0.3194, "step": 7055 }, { - "epoch": 1.0344322344322345, - "grad_norm": 0.3815264146033247, - "learning_rate": 1.6566042726922176e-05, - "loss": 0.3176, + "epoch": 1.0393051670837627, + "grad_norm": 0.366098538014259, + "learning_rate": 1.6522796831824558e-05, + "loss": 0.3228, "step": 7060 }, { - "epoch": 1.0351648351648353, - "grad_norm": 0.37446850601118586, - "learning_rate": 1.6559610917971635e-05, - "loss": 0.3204, + "epoch": 1.0400412189018107, + "grad_norm": 0.38345492926038116, + "learning_rate": 1.6516302811580128e-05, + "loss": 0.3327, "step": 7065 }, { - "epoch": 1.035897435897436, - "grad_norm": 0.3564938395181809, - "learning_rate": 1.6553174342422984e-05, - "loss": 0.3261, + "epoch": 1.0407772707198586, + "grad_norm": 0.3623147985855647, + "learning_rate": 1.6509804011698155e-05, + "loss": 0.3186, "step": 7070 }, { - "epoch": 1.0366300366300367, - "grad_norm": 0.38035681846336156, - "learning_rate": 1.6546733004953424e-05, - "loss": 0.3347, + "epoch": 1.0415133225379067, + "grad_norm": 0.3731387729661672, + "learning_rate": 1.6503300436945444e-05, + "loss": 0.3291, "step": 7075 }, { - "epoch": 1.0373626373626375, - "grad_norm": 0.3708597688900623, - "learning_rate": 1.6540286910243604e-05, - "loss": 0.3287, + "epoch": 1.0422493743559547, + "grad_norm": 0.3664054074993125, + "learning_rate": 1.649679209209229e-05, + "loss": 0.33, "step": 7080 }, { - "epoch": 1.0380952380952382, - "grad_norm": 0.36445235144132193, - "learning_rate": 1.6533836062977634e-05, - "loss": 0.3145, + "epoch": 1.0429854261740026, + "grad_norm": 0.3679294654290743, + "learning_rate": 1.64902789819125e-05, + "loss": 0.3302, "step": 7085 }, { - "epoch": 1.038827838827839, - "grad_norm": 0.3619995557101858, - "learning_rate": 1.6527380467843076e-05, - "loss": 0.3327, + "epoch": 1.0437214779920507, + "grad_norm": 0.36296276512830733, + "learning_rate": 1.6483761111183367e-05, + "loss": 0.325, "step": 7090 }, { - "epoch": 1.0395604395604396, - "grad_norm": 0.36870240909186675, - "learning_rate": 1.6520920129530953e-05, - "loss": 0.337, + "epoch": 1.0444575298100987, + "grad_norm": 0.3660734529889537, + "learning_rate": 1.647723848468568e-05, + "loss": 0.3199, "step": 7095 }, { - "epoch": 1.0402930402930404, - "grad_norm": 0.35909337971119526, - "learning_rate": 1.651445505273572e-05, - "loss": 0.3432, + "epoch": 1.0451935816281466, + "grad_norm": 0.3485985572971079, + "learning_rate": 1.6470711107203713e-05, + "loss": 0.3226, "step": 7100 }, { - "epoch": 1.041025641025641, - "grad_norm": 0.38235517642065997, - "learning_rate": 1.650798524215528e-05, - "loss": 0.3372, + "epoch": 1.0459296334461947, + "grad_norm": 0.37937783027479494, + "learning_rate": 1.6464178983525226e-05, + "loss": 0.339, "step": 7105 }, { - "epoch": 1.0417582417582418, - "grad_norm": 0.39971357321203715, - "learning_rate": 1.6501510702490975e-05, - "loss": 0.3263, + "epoch": 1.0466656852642426, + "grad_norm": 0.36644636641304684, + "learning_rate": 1.6457642118441462e-05, + "loss": 0.346, "step": 7110 }, { - "epoch": 1.0424908424908426, - "grad_norm": 0.38484932847901626, - "learning_rate": 1.649503143844759e-05, - "loss": 0.3226, + "epoch": 1.0474017370822906, + "grad_norm": 0.36447140404101613, + "learning_rate": 1.6451100516747143e-05, + "loss": 0.3236, "step": 7115 }, { - "epoch": 1.0432234432234433, - "grad_norm": 0.3875184534367614, - "learning_rate": 1.648854745473334e-05, - "loss": 0.3359, + "epoch": 1.0481377889003385, + "grad_norm": 0.34816755648439046, + "learning_rate": 1.644455418324046e-05, + "loss": 0.3168, "step": 7120 }, { - "epoch": 1.043956043956044, - "grad_norm": 0.38058217345183687, - "learning_rate": 1.6482058756059865e-05, - "loss": 0.3226, + "epoch": 1.0488738407183866, + "grad_norm": 0.37863055036846316, + "learning_rate": 1.643800312272308e-05, + "loss": 0.3404, "step": 7125 }, { - "epoch": 1.0446886446886448, - "grad_norm": 0.37616687124736103, - "learning_rate": 1.647556534714223e-05, - "loss": 0.3398, + "epoch": 1.0496098925364346, + "grad_norm": 0.37362205464081283, + "learning_rate": 1.643144734000013e-05, + "loss": 0.3126, "step": 7130 }, { - "epoch": 1.0454212454212455, - "grad_norm": 0.37108574407021766, - "learning_rate": 1.6469067232698935e-05, - "loss": 0.3367, + "epoch": 1.0503459443544825, + "grad_norm": 0.3546035403542768, + "learning_rate": 1.642488683988022e-05, + "loss": 0.329, "step": 7135 }, { - "epoch": 1.0461538461538462, - "grad_norm": 0.34837614061109273, - "learning_rate": 1.6462564417451882e-05, - "loss": 0.3222, + "epoch": 1.0510819961725306, + "grad_norm": 0.36941811259031215, + "learning_rate": 1.6418321627175387e-05, + "loss": 0.3359, "step": 7140 }, { - "epoch": 1.046886446886447, - "grad_norm": 0.3690043003938606, - "learning_rate": 1.6456056906126407e-05, - "loss": 0.3331, + "epoch": 1.0518180479905785, + "grad_norm": 0.3532587579319103, + "learning_rate": 1.641175170670115e-05, + "loss": 0.3203, "step": 7145 }, { - "epoch": 1.0476190476190477, - "grad_norm": 0.3717115798790444, - "learning_rate": 1.6449544703451246e-05, - "loss": 0.3234, + "epoch": 1.0525540998086265, + "grad_norm": 0.3822778060319189, + "learning_rate": 1.640517708327648e-05, + "loss": 0.3263, "step": 7150 }, { - "epoch": 1.0483516483516484, - "grad_norm": 0.3634923181962294, - "learning_rate": 1.6443027814158544e-05, - "loss": 0.3307, + "epoch": 1.0532901516266746, + "grad_norm": 0.3678203451952706, + "learning_rate": 1.6398597761723788e-05, + "loss": 0.3219, "step": 7155 }, { - "epoch": 1.0490842490842491, - "grad_norm": 0.34368299374778816, - "learning_rate": 1.643650624298386e-05, - "loss": 0.3214, + "epoch": 1.0540262034447225, + "grad_norm": 0.37407041493293564, + "learning_rate": 1.6392013746868933e-05, + "loss": 0.3239, "step": 7160 }, { - "epoch": 1.0498168498168499, - "grad_norm": 0.35863896650208915, - "learning_rate": 1.6429979994666148e-05, - "loss": 0.3292, + "epoch": 1.0547622552627705, + "grad_norm": 0.3559475388496358, + "learning_rate": 1.6385425043541223e-05, + "loss": 0.3397, "step": 7165 }, { - "epoch": 1.0505494505494506, - "grad_norm": 0.3671958170838681, - "learning_rate": 1.6423449073947765e-05, - "loss": 0.3101, + "epoch": 1.0554983070808186, + "grad_norm": 0.3681921421056088, + "learning_rate": 1.63788316565734e-05, + "loss": 0.3392, "step": 7170 }, { - "epoch": 1.0512820512820513, - "grad_norm": 0.3866998396533175, - "learning_rate": 1.6416913485574455e-05, - "loss": 0.3184, + "epoch": 1.0562343588988665, + "grad_norm": 0.3710832881752622, + "learning_rate": 1.6372233590801638e-05, + "loss": 0.3381, "step": 7175 }, { - "epoch": 1.052014652014652, - "grad_norm": 0.35278906085633155, - "learning_rate": 1.6410373234295365e-05, - "loss": 0.334, + "epoch": 1.0569704107169144, + "grad_norm": 0.3581905728893183, + "learning_rate": 1.636563085106555e-05, + "loss": 0.3349, "step": 7180 }, { - "epoch": 1.0527472527472528, - "grad_norm": 0.3608851491793124, - "learning_rate": 1.6403828324863028e-05, - "loss": 0.3328, + "epoch": 1.0577064625349624, + "grad_norm": 0.36005171786885787, + "learning_rate": 1.635902344220817e-05, + "loss": 0.3152, "step": 7185 }, { - "epoch": 1.0534798534798535, - "grad_norm": 0.3889648649155586, - "learning_rate": 1.6397278762033353e-05, - "loss": 0.3297, + "epoch": 1.0584425143530105, + "grad_norm": 0.36169934059746134, + "learning_rate": 1.6352411369075968e-05, + "loss": 0.3272, "step": 7190 }, { - "epoch": 1.0542124542124542, - "grad_norm": 0.3631199609889286, - "learning_rate": 1.639072455056564e-05, - "loss": 0.3256, + "epoch": 1.0591785661710584, + "grad_norm": 0.36241019282764586, + "learning_rate": 1.634579463651882e-05, + "loss": 0.3221, "step": 7195 }, { - "epoch": 1.054945054945055, - "grad_norm": 0.35555239975058406, - "learning_rate": 1.638416569522256e-05, - "loss": 0.3192, + "epoch": 1.0599146179891064, + "grad_norm": 0.3663937100330741, + "learning_rate": 1.6339173249390038e-05, + "loss": 0.3244, "step": 7200 }, { - "epoch": 1.0556776556776557, - "grad_norm": 0.38583850949236403, - "learning_rate": 1.637760220077017e-05, - "loss": 0.3284, + "epoch": 1.0606506698071545, + "grad_norm": 0.3684450024490567, + "learning_rate": 1.6332547212546328e-05, + "loss": 0.3291, "step": 7205 }, { - "epoch": 1.0564102564102564, - "grad_norm": 0.37213001830232645, - "learning_rate": 1.637103407197788e-05, - "loss": 0.3332, + "epoch": 1.0613867216252024, + "grad_norm": 0.3850360448314454, + "learning_rate": 1.632591653084782e-05, + "loss": 0.3135, "step": 7210 }, { - "epoch": 1.0571428571428572, - "grad_norm": 0.38617384644077335, - "learning_rate": 1.6364461313618484e-05, - "loss": 0.3161, + "epoch": 1.0621227734432503, + "grad_norm": 0.35248505793040924, + "learning_rate": 1.6319281209158045e-05, + "loss": 0.3238, "step": 7215 }, { - "epoch": 1.057875457875458, - "grad_norm": 0.37460027814008917, - "learning_rate": 1.6357883930468134e-05, - "loss": 0.3311, + "epoch": 1.0628588252612985, + "grad_norm": 0.38223933242711317, + "learning_rate": 1.6312641252343945e-05, + "loss": 0.3424, "step": 7220 }, { - "epoch": 1.0586080586080586, - "grad_norm": 0.37347735128227266, - "learning_rate": 1.635130192730634e-05, - "loss": 0.3198, + "epoch": 1.0635948770793464, + "grad_norm": 0.3658573144825204, + "learning_rate": 1.6305996665275854e-05, + "loss": 0.3288, "step": 7225 }, { - "epoch": 1.0593406593406594, - "grad_norm": 0.36952088656401244, - "learning_rate": 1.6344715308915976e-05, - "loss": 0.3307, + "epoch": 1.0643309288973943, + "grad_norm": 0.37273113745613967, + "learning_rate": 1.62993474528275e-05, + "loss": 0.3492, "step": 7230 }, { - "epoch": 1.06007326007326, - "grad_norm": 0.3555251241647386, - "learning_rate": 1.6338124080083256e-05, - "loss": 0.3226, + "epoch": 1.0650669807154425, + "grad_norm": 0.3602935228828251, + "learning_rate": 1.6292693619876016e-05, + "loss": 0.3241, "step": 7235 }, { - "epoch": 1.0608058608058608, - "grad_norm": 0.3916542016656616, - "learning_rate": 1.633152824559776e-05, - "loss": 0.3293, + "epoch": 1.0658030325334904, + "grad_norm": 0.3857924962744493, + "learning_rate": 1.628603517130191e-05, + "loss": 0.3372, "step": 7240 }, { - "epoch": 1.0615384615384615, - "grad_norm": 0.3698139759757373, - "learning_rate": 1.6324927810252412e-05, - "loss": 0.3266, + "epoch": 1.0665390843515383, + "grad_norm": 0.3573069234170001, + "learning_rate": 1.6279372111989084e-05, + "loss": 0.3128, "step": 7245 }, { - "epoch": 1.0622710622710623, - "grad_norm": 0.37141954324269505, - "learning_rate": 1.6318322778843467e-05, - "loss": 0.3335, + "epoch": 1.0672751361695862, + "grad_norm": 0.3751310313442755, + "learning_rate": 1.627270444682482e-05, + "loss": 0.3361, "step": 7250 }, { - "epoch": 1.063003663003663, - "grad_norm": 0.37461222790810195, - "learning_rate": 1.6311713156170535e-05, - "loss": 0.3145, + "epoch": 1.0680111879876344, + "grad_norm": 0.37863537501379835, + "learning_rate": 1.626603218069978e-05, + "loss": 0.3183, "step": 7255 }, { - "epoch": 1.0637362637362637, - "grad_norm": 0.3922862206834006, - "learning_rate": 1.6305098947036552e-05, - "loss": 0.3332, + "epoch": 1.0687472398056823, + "grad_norm": 0.3670003362762075, + "learning_rate": 1.6259355318507996e-05, + "loss": 0.318, "step": 7260 }, { - "epoch": 1.0644688644688645, - "grad_norm": 0.36960020871177257, - "learning_rate": 1.629848015624779e-05, - "loss": 0.3369, + "epoch": 1.0694832916237302, + "grad_norm": 0.3750423003028345, + "learning_rate": 1.6252673865146876e-05, + "loss": 0.3297, "step": 7265 }, { - "epoch": 1.0652014652014652, - "grad_norm": 0.37500279281500304, - "learning_rate": 1.6291856788613853e-05, - "loss": 0.3425, + "epoch": 1.0702193434417784, + "grad_norm": 0.37475485693836696, + "learning_rate": 1.6245987825517193e-05, + "loss": 0.3257, "step": 7270 }, { - "epoch": 1.065934065934066, - "grad_norm": 0.36412353671363584, - "learning_rate": 1.628522884894767e-05, - "loss": 0.3185, + "epoch": 1.0709553952598263, + "grad_norm": 0.3704294227241169, + "learning_rate": 1.6239297204523082e-05, + "loss": 0.3121, "step": 7275 }, { - "epoch": 1.0666666666666667, - "grad_norm": 0.38656051240449313, - "learning_rate": 1.6278596342065483e-05, - "loss": 0.3304, + "epoch": 1.0716914470778742, + "grad_norm": 0.3776907071057796, + "learning_rate": 1.623260200707205e-05, + "loss": 0.3252, "step": 7280 }, { - "epoch": 1.0673992673992674, - "grad_norm": 0.37517022440229425, - "learning_rate": 1.627195927278687e-05, - "loss": 0.3421, + "epoch": 1.0724274988959224, + "grad_norm": 0.37423229911933753, + "learning_rate": 1.622590223807494e-05, + "loss": 0.3276, "step": 7285 }, { - "epoch": 1.0681318681318681, - "grad_norm": 0.3536741231559894, - "learning_rate": 1.626531764593471e-05, - "loss": 0.333, + "epoch": 1.0731635507139703, + "grad_norm": 0.37166084869277755, + "learning_rate": 1.6219197902445965e-05, + "loss": 0.3284, "step": 7290 }, { - "epoch": 1.0688644688644688, - "grad_norm": 0.36764122405217287, - "learning_rate": 1.62586714663352e-05, - "loss": 0.3177, + "epoch": 1.0738996025320182, + "grad_norm": 0.3642307755417687, + "learning_rate": 1.6212489005102687e-05, + "loss": 0.3216, "step": 7295 }, { - "epoch": 1.0695970695970696, - "grad_norm": 0.3714716959836512, - "learning_rate": 1.6252020738817845e-05, - "loss": 0.3369, + "epoch": 1.0746356543500664, + "grad_norm": 0.37194292127247996, + "learning_rate": 1.6205775550966006e-05, + "loss": 0.3218, "step": 7300 }, { - "epoch": 1.0703296703296703, - "grad_norm": 0.38888598011485115, - "learning_rate": 1.624536546821545e-05, - "loss": 0.3207, + "epoch": 1.0753717061681143, + "grad_norm": 0.366829209386239, + "learning_rate": 1.6199057544960166e-05, + "loss": 0.3303, "step": 7305 }, { - "epoch": 1.071062271062271, - "grad_norm": 0.3785750277463407, - "learning_rate": 1.6238705659364125e-05, - "loss": 0.32, + "epoch": 1.0761077579861622, + "grad_norm": 0.35373260814593444, + "learning_rate": 1.6192334992012754e-05, + "loss": 0.3352, "step": 7310 }, { - "epoch": 1.0717948717948718, - "grad_norm": 0.3875034283018185, - "learning_rate": 1.6232041317103283e-05, - "loss": 0.3267, + "epoch": 1.0768438098042101, + "grad_norm": 0.36101873658612077, + "learning_rate": 1.618560789705469e-05, + "loss": 0.3468, "step": 7315 }, { - "epoch": 1.0725274725274725, - "grad_norm": 0.38196225424286717, - "learning_rate": 1.6225372446275624e-05, - "loss": 0.3351, + "epoch": 1.0775798616222583, + "grad_norm": 0.3730530720901144, + "learning_rate": 1.617887626502022e-05, + "loss": 0.326, "step": 7320 }, { - "epoch": 1.0732600732600732, - "grad_norm": 0.3624491647078481, - "learning_rate": 1.621869905172714e-05, - "loss": 0.3335, + "epoch": 1.0783159134403062, + "grad_norm": 0.35642291924912095, + "learning_rate": 1.6172140100846933e-05, + "loss": 0.3263, "step": 7325 }, { - "epoch": 1.073992673992674, - "grad_norm": 0.5550075430236053, - "learning_rate": 1.6212021138307108e-05, - "loss": 0.3257, + "epoch": 1.0790519652583541, + "grad_norm": 0.36746353036862045, + "learning_rate": 1.6165399409475723e-05, + "loss": 0.33, "step": 7330 }, { - "epoch": 1.0747252747252747, - "grad_norm": 0.37399371992603847, - "learning_rate": 1.6205338710868094e-05, - "loss": 0.3381, + "epoch": 1.0797880170764023, + "grad_norm": 0.3724282305050785, + "learning_rate": 1.615865419585082e-05, + "loss": 0.3176, "step": 7335 }, { - "epoch": 1.0754578754578754, - "grad_norm": 0.3548002458460286, - "learning_rate": 1.6198651774265947e-05, - "loss": 0.3186, + "epoch": 1.0805240688944502, + "grad_norm": 0.3696224933488861, + "learning_rate": 1.6151904464919764e-05, + "loss": 0.3151, "step": 7340 }, { - "epoch": 1.0761904761904761, - "grad_norm": 0.37323170647942955, - "learning_rate": 1.619196033335978e-05, - "loss": 0.3307, + "epoch": 1.081260120712498, + "grad_norm": 0.37514572425232984, + "learning_rate": 1.614515022163341e-05, + "loss": 0.3218, "step": 7345 }, { - "epoch": 1.0769230769230769, - "grad_norm": 0.36708288742710005, - "learning_rate": 1.618526439301199e-05, - "loss": 0.3248, + "epoch": 1.0819961725305463, + "grad_norm": 0.36528477402573506, + "learning_rate": 1.6138391470945917e-05, + "loss": 0.3247, "step": 7350 }, { - "epoch": 1.0776556776556776, - "grad_norm": 0.36098653159048294, - "learning_rate": 1.6178563958088237e-05, - "loss": 0.3236, + "epoch": 1.0827322243485942, + "grad_norm": 0.37119579299867844, + "learning_rate": 1.613162821781476e-05, + "loss": 0.319, "step": 7355 }, { - "epoch": 1.0783882783882783, - "grad_norm": 0.36894116539216226, - "learning_rate": 1.6171859033457457e-05, - "loss": 0.3333, + "epoch": 1.083468276166642, + "grad_norm": 0.3572730238206756, + "learning_rate": 1.6124860467200717e-05, + "loss": 0.3169, "step": 7360 }, { - "epoch": 1.079120879120879, - "grad_norm": 0.38834742229919167, - "learning_rate": 1.6165149623991835e-05, - "loss": 0.3166, + "epoch": 1.08420432798469, + "grad_norm": 0.3870278421078157, + "learning_rate": 1.611808822406785e-05, + "loss": 0.3217, "step": 7365 }, { - "epoch": 1.0798534798534798, - "grad_norm": 0.3587364240448985, - "learning_rate": 1.6158435734566823e-05, - "loss": 0.3339, + "epoch": 1.0849403798027382, + "grad_norm": 0.37792978078405554, + "learning_rate": 1.611131149338353e-05, + "loss": 0.3376, "step": 7370 }, { - "epoch": 1.0805860805860805, - "grad_norm": 0.3635888549564469, - "learning_rate": 1.6151717370061125e-05, - "loss": 0.3229, + "epoch": 1.085676431620786, + "grad_norm": 0.37105444025462786, + "learning_rate": 1.6104530280118417e-05, + "loss": 0.337, "step": 7375 }, { - "epoch": 1.0813186813186813, - "grad_norm": 0.3731882016211775, - "learning_rate": 1.6144994535356704e-05, - "loss": 0.3134, + "epoch": 1.086412483438834, + "grad_norm": 0.38318384864267696, + "learning_rate": 1.6097744589246453e-05, + "loss": 0.3382, "step": 7380 }, { - "epoch": 1.082051282051282, - "grad_norm": 0.35711735047435655, - "learning_rate": 1.6138267235338764e-05, - "loss": 0.3269, + "epoch": 1.0871485352568822, + "grad_norm": 0.3702880421771977, + "learning_rate": 1.6090954425744875e-05, + "loss": 0.3282, "step": 7385 }, { - "epoch": 1.0827838827838827, - "grad_norm": 0.383226470412408, - "learning_rate": 1.6131535474895753e-05, - "loss": 0.3268, + "epoch": 1.08788458707493, + "grad_norm": 0.36901695935820905, + "learning_rate": 1.6084159794594193e-05, + "loss": 0.3264, "step": 7390 }, { - "epoch": 1.0835164835164834, - "grad_norm": 0.37425974187830674, - "learning_rate": 1.6124799258919367e-05, - "loss": 0.3405, + "epoch": 1.088620638892978, + "grad_norm": 0.3558368550213881, + "learning_rate": 1.6077360700778194e-05, + "loss": 0.3275, "step": 7395 }, { - "epoch": 1.0842490842490842, - "grad_norm": 0.3647449028203382, - "learning_rate": 1.611805859230453e-05, - "loss": 0.3152, + "epoch": 1.0893566907110261, + "grad_norm": 0.3604908209621351, + "learning_rate": 1.6070557149283933e-05, + "loss": 0.3297, "step": 7400 }, { - "epoch": 1.084981684981685, - "grad_norm": 0.38305661847604566, - "learning_rate": 1.6111313479949415e-05, - "loss": 0.3234, + "epoch": 1.090092742529074, + "grad_norm": 0.37617018641738653, + "learning_rate": 1.6063749145101756e-05, + "loss": 0.3315, "step": 7405 }, { - "epoch": 1.0857142857142856, - "grad_norm": 0.3850155020309742, - "learning_rate": 1.6104563926755407e-05, - "loss": 0.3167, + "epoch": 1.090828794347122, + "grad_norm": 0.381591827635015, + "learning_rate": 1.6056936693225246e-05, + "loss": 0.3243, "step": 7410 }, { - "epoch": 1.0864468864468864, - "grad_norm": 0.3896911044662533, - "learning_rate": 1.6097809937627134e-05, - "loss": 0.3416, + "epoch": 1.09156484616517, + "grad_norm": 0.36013113925771034, + "learning_rate": 1.6050119798651275e-05, + "loss": 0.3293, "step": 7415 }, { - "epoch": 1.087179487179487, - "grad_norm": 0.3808117650518625, - "learning_rate": 1.609105151747244e-05, - "loss": 0.3407, + "epoch": 1.092300897983218, + "grad_norm": 0.3493409370534532, + "learning_rate": 1.604329846637995e-05, + "loss": 0.3337, "step": 7420 }, { - "epoch": 1.0879120879120878, - "grad_norm": 0.39129833508135087, - "learning_rate": 1.6084288671202384e-05, - "loss": 0.3279, + "epoch": 1.093036949801266, + "grad_norm": 0.3956493470643981, + "learning_rate": 1.603647270141465e-05, + "loss": 0.3411, "step": 7425 }, { - "epoch": 1.0886446886446886, - "grad_norm": 0.3619807548374353, - "learning_rate": 1.607752140373125e-05, - "loss": 0.3235, + "epoch": 1.093773001619314, + "grad_norm": 0.3656847275893169, + "learning_rate": 1.6029642508762e-05, + "loss": 0.3384, "step": 7430 }, { - "epoch": 1.0893772893772893, - "grad_norm": 0.3596232789121412, - "learning_rate": 1.6070749719976534e-05, - "loss": 0.3291, + "epoch": 1.094509053437362, + "grad_norm": 0.36577327273484317, + "learning_rate": 1.602280789343187e-05, + "loss": 0.3582, "step": 7435 }, { - "epoch": 1.09010989010989, - "grad_norm": 0.37294768245070653, - "learning_rate": 1.6063973624858936e-05, - "loss": 0.3357, + "epoch": 1.09524510525541, + "grad_norm": 0.353669292274733, + "learning_rate": 1.6015968860437382e-05, + "loss": 0.3335, "step": 7440 }, { - "epoch": 1.0908424908424907, - "grad_norm": 0.36847948298689304, - "learning_rate": 1.6057193123302363e-05, - "loss": 0.3393, + "epoch": 1.095981157073458, + "grad_norm": 0.3702265509160007, + "learning_rate": 1.600912541479489e-05, + "loss": 0.3298, "step": 7445 }, { - "epoch": 1.0915750915750915, - "grad_norm": 0.3692013692711026, - "learning_rate": 1.605040822023393e-05, - "loss": 0.3222, + "epoch": 1.096717208891506, + "grad_norm": 0.3573253785340228, + "learning_rate": 1.600227756152398e-05, + "loss": 0.3147, "step": 7450 }, { - "epoch": 1.0923076923076924, - "grad_norm": 0.365490580175772, - "learning_rate": 1.6043618920583945e-05, - "loss": 0.3219, + "epoch": 1.097453260709554, + "grad_norm": 0.3638891137918482, + "learning_rate": 1.5995425305647486e-05, + "loss": 0.3207, "step": 7455 }, { - "epoch": 1.0930402930402932, - "grad_norm": 0.37989741813217187, - "learning_rate": 1.60368252292859e-05, - "loss": 0.3158, + "epoch": 1.0981893125276019, + "grad_norm": 0.39218922642738785, + "learning_rate": 1.5988568652191463e-05, + "loss": 0.3235, "step": 7460 }, { - "epoch": 1.0937728937728939, - "grad_norm": 0.3857019049089393, - "learning_rate": 1.6030027151276504e-05, - "loss": 0.3491, + "epoch": 1.09892536434565, + "grad_norm": 0.3823963695714701, + "learning_rate": 1.598170760618519e-05, + "loss": 0.3283, "step": 7465 }, { - "epoch": 1.0945054945054946, - "grad_norm": 0.4380529333414578, - "learning_rate": 1.6023224691495636e-05, - "loss": 0.3324, + "epoch": 1.099661416163698, + "grad_norm": 0.3641134525190089, + "learning_rate": 1.5974842172661168e-05, + "loss": 0.3286, "step": 7470 }, { - "epoch": 1.0952380952380953, - "grad_norm": 0.3686451050298967, - "learning_rate": 1.601641785488636e-05, - "loss": 0.3419, + "epoch": 1.1003974679817459, + "grad_norm": 0.3701467819083142, + "learning_rate": 1.5967972356655125e-05, + "loss": 0.3274, "step": 7475 }, { - "epoch": 1.095970695970696, - "grad_norm": 0.3760684142547285, - "learning_rate": 1.600960664639492e-05, - "loss": 0.3384, + "epoch": 1.1011335197997938, + "grad_norm": 0.36222762668218655, + "learning_rate": 1.596109816320599e-05, + "loss": 0.3291, "step": 7480 }, { - "epoch": 1.0967032967032968, - "grad_norm": 0.3767875855720967, - "learning_rate": 1.6002791070970743e-05, - "loss": 0.3295, + "epoch": 1.101869571617842, + "grad_norm": 0.35521764335109396, + "learning_rate": 1.5954219597355907e-05, + "loss": 0.3258, "step": 7485 }, { - "epoch": 1.0974358974358975, - "grad_norm": 0.3845254569319136, - "learning_rate": 1.599597113356643e-05, - "loss": 0.336, + "epoch": 1.1026056234358899, + "grad_norm": 0.34725491118799207, + "learning_rate": 1.594733666415024e-05, + "loss": 0.3262, "step": 7490 }, { - "epoch": 1.0981684981684983, - "grad_norm": 0.3679990125041362, - "learning_rate": 1.5989146839137745e-05, - "loss": 0.3371, + "epoch": 1.1033416752539378, + "grad_norm": 0.3597769806477287, + "learning_rate": 1.594044936863754e-05, + "loss": 0.3198, "step": 7495 }, { - "epoch": 1.098901098901099, - "grad_norm": 0.36193018957769496, - "learning_rate": 1.598231819264361e-05, - "loss": 0.3274, + "epoch": 1.104077727071986, + "grad_norm": 0.3731792537772293, + "learning_rate": 1.5933557715869562e-05, + "loss": 0.3313, "step": 7500 }, { - "epoch": 1.0996336996336997, - "grad_norm": 0.36564077411332907, - "learning_rate": 1.5975485199046134e-05, - "loss": 0.3277, + "epoch": 1.1048137788900338, + "grad_norm": 0.37604908190706937, + "learning_rate": 1.5926661710901262e-05, + "loss": 0.3397, "step": 7505 }, { - "epoch": 1.1003663003663005, - "grad_norm": 0.3792791798482742, - "learning_rate": 1.596864786331057e-05, - "loss": 0.3357, + "epoch": 1.1055498307080818, + "grad_norm": 0.375418592273806, + "learning_rate": 1.5919761358790783e-05, + "loss": 0.318, "step": 7510 }, { - "epoch": 1.1010989010989012, - "grad_norm": 0.37858832146394067, - "learning_rate": 1.596180619040532e-05, - "loss": 0.3382, + "epoch": 1.10628588252613, + "grad_norm": 0.3731120352461751, + "learning_rate": 1.5912856664599456e-05, + "loss": 0.3307, "step": 7515 }, { - "epoch": 1.101831501831502, - "grad_norm": 0.38131450688598406, - "learning_rate": 1.5954960185301945e-05, - "loss": 0.3332, + "epoch": 1.1070219343441778, + "grad_norm": 0.3456206801012039, + "learning_rate": 1.590594763339181e-05, + "loss": 0.3273, "step": 7520 }, { - "epoch": 1.1025641025641026, - "grad_norm": 0.3795639108042152, - "learning_rate": 1.594810985297516e-05, - "loss": 0.3407, + "epoch": 1.1077579861622258, + "grad_norm": 0.38499107405021915, + "learning_rate": 1.5899034270235526e-05, + "loss": 0.338, "step": 7525 }, { - "epoch": 1.1032967032967034, - "grad_norm": 0.38294554175489987, - "learning_rate": 1.5941255198402817e-05, - "loss": 0.3409, + "epoch": 1.108494037980274, + "grad_norm": 0.3773814368821201, + "learning_rate": 1.5892116580201503e-05, + "loss": 0.336, "step": 7530 }, { - "epoch": 1.104029304029304, - "grad_norm": 0.36047873074214287, - "learning_rate": 1.5934396226565908e-05, - "loss": 0.3274, + "epoch": 1.1092300897983218, + "grad_norm": 0.3802879200646699, + "learning_rate": 1.588519456836377e-05, + "loss": 0.3422, "step": 7535 }, { - "epoch": 1.1047619047619048, - "grad_norm": 0.37183620442862136, - "learning_rate": 1.5927532942448563e-05, - "loss": 0.3346, + "epoch": 1.1099661416163698, + "grad_norm": 0.34333379544800796, + "learning_rate": 1.5878268239799567e-05, + "loss": 0.3341, "step": 7540 }, { - "epoch": 1.1054945054945056, - "grad_norm": 0.366742474177052, - "learning_rate": 1.592066535103805e-05, - "loss": 0.3349, + "epoch": 1.1107021934344177, + "grad_norm": 0.3826944545514652, + "learning_rate": 1.5871337599589273e-05, + "loss": 0.3371, "step": 7545 }, { - "epoch": 1.1062271062271063, - "grad_norm": 0.3825427448250812, - "learning_rate": 1.5913793457324765e-05, - "loss": 0.3382, + "epoch": 1.1114382452524658, + "grad_norm": 0.38749648865209124, + "learning_rate": 1.5864402652816433e-05, + "loss": 0.342, "step": 7550 }, { - "epoch": 1.106959706959707, - "grad_norm": 0.3630028811395133, - "learning_rate": 1.5906917266302224e-05, - "loss": 0.3194, + "epoch": 1.1121742970705137, + "grad_norm": 0.37327300355640614, + "learning_rate": 1.5857463404567758e-05, + "loss": 0.3208, "step": 7555 }, { - "epoch": 1.1076923076923078, - "grad_norm": 0.36450569088762086, - "learning_rate": 1.5900036782967074e-05, - "loss": 0.3388, + "epoch": 1.1129103488885617, + "grad_norm": 0.35851734984850764, + "learning_rate": 1.5850519859933115e-05, + "loss": 0.329, "step": 7560 }, { - "epoch": 1.1084249084249085, - "grad_norm": 0.36750525189012, - "learning_rate": 1.589315201231908e-05, - "loss": 0.3202, + "epoch": 1.1136464007066098, + "grad_norm": 0.370965625314894, + "learning_rate": 1.584357202400552e-05, + "loss": 0.3267, "step": 7565 }, { - "epoch": 1.1091575091575092, - "grad_norm": 0.36010730121886997, - "learning_rate": 1.5886262959361114e-05, - "loss": 0.3168, + "epoch": 1.1143824525246577, + "grad_norm": 0.3617068011157726, + "learning_rate": 1.5836619901881128e-05, + "loss": 0.3392, "step": 7570 }, { - "epoch": 1.10989010989011, - "grad_norm": 0.37408685972832806, - "learning_rate": 1.5879369629099167e-05, - "loss": 0.3288, + "epoch": 1.1151185043427057, + "grad_norm": 0.3536373085796306, + "learning_rate": 1.5829663498659247e-05, + "loss": 0.3321, "step": 7575 }, { - "epoch": 1.1106227106227107, - "grad_norm": 0.3458106728692704, - "learning_rate": 1.587247202654235e-05, - "loss": 0.3253, + "epoch": 1.1158545561607538, + "grad_norm": 0.35162096268803267, + "learning_rate": 1.582270281944233e-05, + "loss": 0.3308, "step": 7580 }, { - "epoch": 1.1113553113553114, - "grad_norm": 0.36639603817817384, - "learning_rate": 1.5865570156702848e-05, - "loss": 0.3323, + "epoch": 1.1165906079788017, + "grad_norm": 0.37021379314830466, + "learning_rate": 1.581573786933595e-05, + "loss": 0.3359, "step": 7585 }, { - "epoch": 1.1120879120879121, - "grad_norm": 0.3955687450980298, - "learning_rate": 1.5858664024595973e-05, - "loss": 0.3369, + "epoch": 1.1173266597968496, + "grad_norm": 0.3595300119663517, + "learning_rate": 1.5808768653448835e-05, + "loss": 0.339, "step": 7590 }, { - "epoch": 1.1128205128205129, - "grad_norm": 0.3611858582920874, - "learning_rate": 1.5851753635240122e-05, - "loss": 0.3192, + "epoch": 1.1180627116148978, + "grad_norm": 0.3571647240421222, + "learning_rate": 1.5801795176892817e-05, + "loss": 0.3296, "step": 7595 }, { - "epoch": 1.1135531135531136, - "grad_norm": 0.3765189228908552, - "learning_rate": 1.58448389936568e-05, - "loss": 0.3587, + "epoch": 1.1187987634329457, + "grad_norm": 0.3741042476940163, + "learning_rate": 1.5794817444782875e-05, + "loss": 0.3171, "step": 7600 }, { - "epoch": 1.1142857142857143, - "grad_norm": 0.3686849977742123, - "learning_rate": 1.5837920104870578e-05, - "loss": 0.3258, + "epoch": 1.1195348152509936, + "grad_norm": 0.3654862897816044, + "learning_rate": 1.578783546223709e-05, + "loss": 0.3317, "step": 7605 }, { - "epoch": 1.115018315018315, - "grad_norm": 0.3522453696751579, - "learning_rate": 1.583099697390913e-05, - "loss": 0.3221, + "epoch": 1.1202708670690416, + "grad_norm": 0.36602886343286756, + "learning_rate": 1.5780849234376683e-05, + "loss": 0.3293, "step": 7610 }, { - "epoch": 1.1157509157509158, - "grad_norm": 0.35684852493198504, - "learning_rate": 1.5824069605803214e-05, - "loss": 0.3275, + "epoch": 1.1210069188870897, + "grad_norm": 0.36170190489700843, + "learning_rate": 1.5773858766325968e-05, + "loss": 0.3318, "step": 7615 }, { - "epoch": 1.1164835164835165, - "grad_norm": 0.36542984023806907, - "learning_rate": 1.5817138005586655e-05, - "loss": 0.3294, + "epoch": 1.1217429707051376, + "grad_norm": 0.3660675851852243, + "learning_rate": 1.5766864063212376e-05, + "loss": 0.3309, "step": 7620 }, { - "epoch": 1.1172161172161172, - "grad_norm": 0.35939595810284075, - "learning_rate": 1.5810202178296368e-05, - "loss": 0.3209, + "epoch": 1.1224790225231855, + "grad_norm": 0.3702399812281134, + "learning_rate": 1.5759865130166456e-05, + "loss": 0.3277, "step": 7625 }, { - "epoch": 1.117948717948718, - "grad_norm": 0.37967256270515004, - "learning_rate": 1.5803262128972328e-05, - "loss": 0.3334, + "epoch": 1.1232150743412337, + "grad_norm": 0.3650387017992945, + "learning_rate": 1.575286197232184e-05, + "loss": 0.3318, "step": 7630 }, { - "epoch": 1.1186813186813187, - "grad_norm": 0.38638494038204896, - "learning_rate": 1.579631786265758e-05, - "loss": 0.3286, + "epoch": 1.1239511261592816, + "grad_norm": 0.3718314594470514, + "learning_rate": 1.5745854594815266e-05, + "loss": 0.3229, "step": 7635 }, { - "epoch": 1.1194139194139194, - "grad_norm": 0.3697407078462657, - "learning_rate": 1.5789369384398243e-05, - "loss": 0.3212, + "epoch": 1.1246871779773295, + "grad_norm": 0.39133302673669706, + "learning_rate": 1.5738843002786583e-05, + "loss": 0.3324, "step": 7640 }, { - "epoch": 1.1201465201465202, - "grad_norm": 0.38392638083705544, - "learning_rate": 1.578241669924348e-05, - "loss": 0.3331, + "epoch": 1.1254232297953777, + "grad_norm": 0.35355252114799085, + "learning_rate": 1.5731827201378703e-05, + "loss": 0.3319, "step": 7645 }, { - "epoch": 1.120879120879121, - "grad_norm": 0.3524484748280436, - "learning_rate": 1.577545981224552e-05, - "loss": 0.3266, + "epoch": 1.1261592816134256, + "grad_norm": 0.35336190013193636, + "learning_rate": 1.5724807195737654e-05, + "loss": 0.3182, "step": 7650 }, { - "epoch": 1.1216117216117216, - "grad_norm": 0.36401648760373023, - "learning_rate": 1.5768498728459652e-05, - "loss": 0.3368, + "epoch": 1.1268953334314735, + "grad_norm": 0.34575606792533403, + "learning_rate": 1.571778299101252e-05, + "loss": 0.3281, "step": 7655 }, { - "epoch": 1.1223443223443224, - "grad_norm": 0.37450744884160897, - "learning_rate": 1.5761533452944202e-05, - "loss": 0.3419, + "epoch": 1.1276313852495217, + "grad_norm": 0.37899990266269695, + "learning_rate": 1.571075459235549e-05, + "loss": 0.3371, "step": 7660 }, { - "epoch": 1.123076923076923, - "grad_norm": 0.3544280061502412, - "learning_rate": 1.5754563990760553e-05, - "loss": 0.33, + "epoch": 1.1283674370675696, + "grad_norm": 0.4079749309408199, + "learning_rate": 1.5703722004921814e-05, + "loss": 0.3258, "step": 7665 }, { - "epoch": 1.1238095238095238, - "grad_norm": 0.3717329727145123, - "learning_rate": 1.574759034697312e-05, - "loss": 0.3198, + "epoch": 1.1291034888856175, + "grad_norm": 0.3752359611414991, + "learning_rate": 1.5696685233869822e-05, + "loss": 0.3408, "step": 7670 }, { - "epoch": 1.1245421245421245, - "grad_norm": 0.36191640761951016, - "learning_rate": 1.5740612526649365e-05, - "loss": 0.3062, + "epoch": 1.1298395407036654, + "grad_norm": 0.3535172272002126, + "learning_rate": 1.5689644284360907e-05, + "loss": 0.3177, "step": 7675 }, { - "epoch": 1.1252747252747253, - "grad_norm": 0.3753666793046584, - "learning_rate": 1.573363053485978e-05, - "loss": 0.3243, + "epoch": 1.1305755925217136, + "grad_norm": 0.3578109901016605, + "learning_rate": 1.5682599161559526e-05, + "loss": 0.3284, "step": 7680 }, { - "epoch": 1.126007326007326, - "grad_norm": 0.3681268297340232, - "learning_rate": 1.572664437667789e-05, - "loss": 0.3201, + "epoch": 1.1313116443397615, + "grad_norm": 0.36881161064106277, + "learning_rate": 1.5675549870633203e-05, + "loss": 0.3294, "step": 7685 }, { - "epoch": 1.1267399267399267, - "grad_norm": 0.35574977395336677, - "learning_rate": 1.5719654057180252e-05, - "loss": 0.3249, + "epoch": 1.1320476961578094, + "grad_norm": 0.3769556399428797, + "learning_rate": 1.5668496416752516e-05, + "loss": 0.3231, "step": 7690 }, { - "epoch": 1.1274725274725275, - "grad_norm": 0.3605750556824243, - "learning_rate": 1.571265958144643e-05, - "loss": 0.3259, + "epoch": 1.1327837479758576, + "grad_norm": 0.37732308424714966, + "learning_rate": 1.56614388050911e-05, + "loss": 0.3397, "step": 7695 }, { - "epoch": 1.1282051282051282, - "grad_norm": 0.3794265246396994, - "learning_rate": 1.5705660954559035e-05, - "loss": 0.3415, + "epoch": 1.1335197997939055, + "grad_norm": 0.3786106463303893, + "learning_rate": 1.5654377040825634e-05, + "loss": 0.3448, "step": 7700 }, { - "epoch": 1.128937728937729, - "grad_norm": 0.3889151141065052, - "learning_rate": 1.569865818160367e-05, - "loss": 0.3201, + "epoch": 1.1342558516119534, + "grad_norm": 0.36074186899464955, + "learning_rate": 1.5647311129135846e-05, + "loss": 0.3256, "step": 7705 }, { - "epoch": 1.1296703296703297, - "grad_norm": 0.3520533857170965, - "learning_rate": 1.5691651267668968e-05, - "loss": 0.3272, + "epoch": 1.1349919034300016, + "grad_norm": 0.35720108673609746, + "learning_rate": 1.5640241075204507e-05, + "loss": 0.3234, "step": 7710 }, { - "epoch": 1.1304029304029304, - "grad_norm": 0.3754701024332263, - "learning_rate": 1.5684640217846555e-05, - "loss": 0.3368, + "epoch": 1.1357279552480495, + "grad_norm": 0.36684266672294585, + "learning_rate": 1.5633166884217424e-05, + "loss": 0.3346, "step": 7715 }, { - "epoch": 1.1311355311355311, - "grad_norm": 0.3470744278797411, - "learning_rate": 1.5677625037231075e-05, - "loss": 0.3256, + "epoch": 1.1364640070660974, + "grad_norm": 0.36578036204933856, + "learning_rate": 1.562608856136344e-05, + "loss": 0.3282, "step": 7720 }, { - "epoch": 1.1318681318681318, - "grad_norm": 0.3816451249663348, - "learning_rate": 1.5670605730920172e-05, + "epoch": 1.1372000588841455, + "grad_norm": 0.3664420736827633, + "learning_rate": 1.561900611183442e-05, "loss": 0.3202, "step": 7725 }, { - "epoch": 1.1326007326007326, - "grad_norm": 0.38362504353673, - "learning_rate": 1.5663582304014485e-05, - "loss": 0.3312, + "epoch": 1.1379361107021935, + "grad_norm": 0.36621600662848175, + "learning_rate": 1.561191954082528e-05, + "loss": 0.3218, "step": 7730 }, { - "epoch": 1.1333333333333333, - "grad_norm": 3.0992510017108925, - "learning_rate": 1.565655476161765e-05, - "loss": 0.3367, + "epoch": 1.1386721625202414, + "grad_norm": 0.3728211225397672, + "learning_rate": 1.5604828853533932e-05, + "loss": 0.3605, "step": 7735 }, { - "epoch": 1.134065934065934, - "grad_norm": 0.3783866557689988, - "learning_rate": 1.5649523108836283e-05, - "loss": 0.3281, + "epoch": 1.1394082143382893, + "grad_norm": 0.3607580410927885, + "learning_rate": 1.559773405516132e-05, + "loss": 0.3176, "step": 7740 }, { - "epoch": 1.1347985347985348, - "grad_norm": 0.37579886304900423, - "learning_rate": 1.5642487350780005e-05, - "loss": 0.3268, + "epoch": 1.1401442661563375, + "grad_norm": 0.3381911753649782, + "learning_rate": 1.5590635150911403e-05, + "loss": 0.3296, "step": 7745 }, { - "epoch": 1.1355311355311355, - "grad_norm": 0.38265023290453665, - "learning_rate": 1.5635447492561403e-05, - "loss": 0.3346, + "epoch": 1.1408803179743854, + "grad_norm": 0.36632901662190737, + "learning_rate": 1.5583532145991148e-05, + "loss": 0.326, "step": 7750 }, { - "epoch": 1.1362637362637362, - "grad_norm": 0.3737657873990553, - "learning_rate": 1.5628403539296058e-05, - "loss": 0.3476, + "epoch": 1.1416163697924333, + "grad_norm": 0.3710051357140936, + "learning_rate": 1.557642504561054e-05, + "loss": 0.335, "step": 7755 }, { - "epoch": 1.136996336996337, - "grad_norm": 0.38456256719679427, - "learning_rate": 1.5621355496102518e-05, - "loss": 0.3336, + "epoch": 1.1423524216104814, + "grad_norm": 0.3785712866752345, + "learning_rate": 1.5569313854982547e-05, + "loss": 0.3195, "step": 7760 }, { - "epoch": 1.1377289377289377, - "grad_norm": 0.3773294586268947, - "learning_rate": 1.5614303368102296e-05, - "loss": 0.3202, + "epoch": 1.1430884734285294, + "grad_norm": 0.3711928974743087, + "learning_rate": 1.5562198579323166e-05, + "loss": 0.3298, "step": 7765 }, { - "epoch": 1.1384615384615384, - "grad_norm": 0.3431922523735431, - "learning_rate": 1.5607247160419892e-05, - "loss": 0.3344, + "epoch": 1.1438245252465773, + "grad_norm": 0.3945063867288393, + "learning_rate": 1.555507922385136e-05, + "loss": 0.3303, "step": 7770 }, { - "epoch": 1.1391941391941391, - "grad_norm": 0.3641852418008754, - "learning_rate": 1.5600186878182765e-05, - "loss": 0.3271, + "epoch": 1.1445605770646254, + "grad_norm": 0.37004262858780046, + "learning_rate": 1.5547955793789108e-05, + "loss": 0.3285, "step": 7775 }, { - "epoch": 1.1399267399267399, - "grad_norm": 0.3589745389126589, - "learning_rate": 1.5593122526521316e-05, - "loss": 0.339, + "epoch": 1.1452966288826734, + "grad_norm": 0.367078837440859, + "learning_rate": 1.5540828294361366e-05, + "loss": 0.3154, "step": 7780 }, { - "epoch": 1.1406593406593406, - "grad_norm": 0.3837329528546989, - "learning_rate": 1.5586054110568924e-05, - "loss": 0.3295, + "epoch": 1.1460326807007213, + "grad_norm": 0.3805392937153245, + "learning_rate": 1.553369673079608e-05, + "loss": 0.3393, "step": 7785 }, { - "epoch": 1.1413919413919413, - "grad_norm": 0.35521255269951685, - "learning_rate": 1.557898163546191e-05, - "loss": 0.3275, + "epoch": 1.1467687325187694, + "grad_norm": 0.3869227223495453, + "learning_rate": 1.5526561108324165e-05, + "loss": 0.3256, "step": 7790 }, { - "epoch": 1.142124542124542, - "grad_norm": 0.3758897169810696, - "learning_rate": 1.557190510633956e-05, - "loss": 0.3445, + "epoch": 1.1475047843368174, + "grad_norm": 0.37063069123814935, + "learning_rate": 1.5519421432179537e-05, + "loss": 0.3546, "step": 7795 }, { - "epoch": 1.1428571428571428, - "grad_norm": 0.3668639865507922, - "learning_rate": 1.556482452834409e-05, - "loss": 0.3405, + "epoch": 1.1482408361548653, + "grad_norm": 0.397345443614491, + "learning_rate": 1.551227770759906e-05, + "loss": 0.322, "step": 7800 }, { - "epoch": 1.1435897435897435, - "grad_norm": 0.38211289546674637, - "learning_rate": 1.555773990662066e-05, - "loss": 0.3205, + "epoch": 1.1489768879729132, + "grad_norm": 0.3616619578212314, + "learning_rate": 1.550512993982259e-05, + "loss": 0.3245, "step": 7805 }, { - "epoch": 1.1443223443223443, - "grad_norm": 0.36502561275991563, - "learning_rate": 1.5550651246317377e-05, - "loss": 0.3152, + "epoch": 1.1497129397909613, + "grad_norm": 0.38807546453238934, + "learning_rate": 1.5497978134092925e-05, + "loss": 0.3321, "step": 7810 }, { - "epoch": 1.145054945054945, - "grad_norm": 0.3859358824241421, - "learning_rate": 1.554355855258527e-05, - "loss": 0.3328, + "epoch": 1.1504489916090093, + "grad_norm": 0.3802255631829449, + "learning_rate": 1.549082229565584e-05, + "loss": 0.3431, "step": 7815 }, { - "epoch": 1.1457875457875457, - "grad_norm": 0.3674888296797926, - "learning_rate": 1.5536461830578315e-05, - "loss": 0.3367, + "epoch": 1.1511850434270572, + "grad_norm": 0.3827544811135087, + "learning_rate": 1.5483662429760074e-05, + "loss": 0.3217, "step": 7820 }, { - "epoch": 1.1465201465201464, - "grad_norm": 0.38519406190838723, - "learning_rate": 1.5529361085453404e-05, - "loss": 0.335, + "epoch": 1.1519210952451053, + "grad_norm": 0.3763534138365317, + "learning_rate": 1.54764985416573e-05, + "loss": 0.3185, "step": 7825 }, { - "epoch": 1.1472527472527472, - "grad_norm": 0.37311672813530217, - "learning_rate": 1.5522256322370347e-05, - "loss": 0.3271, + "epoch": 1.1526571470631533, + "grad_norm": 0.3613715699731057, + "learning_rate": 1.546933063660216e-05, + "loss": 0.3546, "step": 7830 }, { - "epoch": 1.147985347985348, - "grad_norm": 0.3664044245248359, - "learning_rate": 1.5515147546491897e-05, - "loss": 0.3371, + "epoch": 1.1533931988812012, + "grad_norm": 0.39850937018007737, + "learning_rate": 1.546215871985223e-05, + "loss": 0.3424, "step": 7835 }, { - "epoch": 1.1487179487179486, - "grad_norm": 0.3558901582608226, - "learning_rate": 1.550803476298369e-05, - "loss": 0.3277, + "epoch": 1.1541292506992493, + "grad_norm": 0.36542223270984864, + "learning_rate": 1.5454982796668034e-05, + "loss": 0.3348, "step": 7840 }, { - "epoch": 1.1494505494505494, - "grad_norm": 0.3644128091998396, - "learning_rate": 1.5500917977014306e-05, - "loss": 0.3268, + "epoch": 1.1548653025172972, + "grad_norm": 0.3568104809348479, + "learning_rate": 1.5447802872313035e-05, + "loss": 0.3194, "step": 7845 }, { - "epoch": 1.15018315018315, - "grad_norm": 0.3701432111945302, - "learning_rate": 1.549379719375521e-05, - "loss": 0.3389, + "epoch": 1.1556013543353452, + "grad_norm": 0.36075462577489675, + "learning_rate": 1.544061895205363e-05, + "loss": 0.3203, "step": 7850 }, { - "epoch": 1.1509157509157508, - "grad_norm": 0.3794847352445503, - "learning_rate": 1.5486672418380786e-05, - "loss": 0.3449, + "epoch": 1.1563374061533933, + "grad_norm": 0.40442991289429586, + "learning_rate": 1.5433431041159145e-05, + "loss": 0.3348, "step": 7855 }, { - "epoch": 1.1516483516483516, - "grad_norm": 0.3492217553790412, - "learning_rate": 1.5479543656068313e-05, - "loss": 0.3208, + "epoch": 1.1570734579714412, + "grad_norm": 0.3779860773248329, + "learning_rate": 1.5426239144901836e-05, + "loss": 0.3187, "step": 7860 }, { - "epoch": 1.1523809523809523, - "grad_norm": 0.3736344818072641, - "learning_rate": 1.547241091199797e-05, - "loss": 0.3327, + "epoch": 1.1578095097894892, + "grad_norm": 0.38069957354854694, + "learning_rate": 1.5419043268556885e-05, + "loss": 0.341, "step": 7865 }, { - "epoch": 1.153113553113553, - "grad_norm": 0.37726343120982136, - "learning_rate": 1.546527419135282e-05, - "loss": 0.3342, + "epoch": 1.158545561607537, + "grad_norm": 0.3533837974365164, + "learning_rate": 1.5411843417402385e-05, + "loss": 0.3277, "step": 7870 }, { - "epoch": 1.1538461538461537, - "grad_norm": 0.3827996368281623, - "learning_rate": 1.545813349931883e-05, - "loss": 0.3351, + "epoch": 1.1592816134255852, + "grad_norm": 0.3555697016806076, + "learning_rate": 1.5404639596719344e-05, + "loss": 0.3273, "step": 7875 }, { - "epoch": 1.1545787545787545, - "grad_norm": 0.37145135784737665, - "learning_rate": 1.5450988841084844e-05, - "loss": 0.3035, + "epoch": 1.1600176652436331, + "grad_norm": 0.39214145093818986, + "learning_rate": 1.5397431811791695e-05, + "loss": 0.3322, "step": 7880 }, { - "epoch": 1.1553113553113552, - "grad_norm": 0.37420784345115, - "learning_rate": 1.5443840221842586e-05, - "loss": 0.3376, + "epoch": 1.160753717061681, + "grad_norm": 0.382712644714872, + "learning_rate": 1.5390220067906267e-05, + "loss": 0.3311, "step": 7885 }, { - "epoch": 1.156043956043956, - "grad_norm": 0.36574848734418397, - "learning_rate": 1.543668764678667e-05, - "loss": 0.3274, + "epoch": 1.1614897688797292, + "grad_norm": 0.38411978704744937, + "learning_rate": 1.5383004370352792e-05, + "loss": 0.3228, "step": 7890 }, { - "epoch": 1.1567765567765567, - "grad_norm": 0.3632006021916965, - "learning_rate": 1.5429531121114565e-05, - "loss": 0.3402, + "epoch": 1.1622258206977771, + "grad_norm": 0.36607839507668566, + "learning_rate": 1.5375784724423914e-05, + "loss": 0.3179, "step": 7895 }, { - "epoch": 1.1575091575091574, - "grad_norm": 0.38938668112252456, - "learning_rate": 1.542237065002663e-05, - "loss": 0.3325, + "epoch": 1.162961872515825, + "grad_norm": 0.35317476761041544, + "learning_rate": 1.536856113541516e-05, + "loss": 0.3258, "step": 7900 }, { - "epoch": 1.1582417582417581, - "grad_norm": 0.3938272738348802, - "learning_rate": 1.5415206238726086e-05, - "loss": 0.3357, + "epoch": 1.1636979243338732, + "grad_norm": 0.37769960094645616, + "learning_rate": 1.5361333608624955e-05, + "loss": 0.322, "step": 7905 }, { - "epoch": 1.1589743589743589, - "grad_norm": 0.3702130333111147, - "learning_rate": 1.5408037892419012e-05, - "loss": 0.3448, + "epoch": 1.1644339761519211, + "grad_norm": 0.6377854025940484, + "learning_rate": 1.5354102149354614e-05, + "loss": 0.3388, "step": 7910 }, { - "epoch": 1.1597069597069596, - "grad_norm": 0.37480011471437447, - "learning_rate": 1.5400865616314343e-05, - "loss": 0.3325, + "epoch": 1.165170027969969, + "grad_norm": 0.3842292521328781, + "learning_rate": 1.534686676290833e-05, + "loss": 0.339, "step": 7915 }, { - "epoch": 1.1604395604395605, - "grad_norm": 0.36020313134221427, - "learning_rate": 1.5393689415623883e-05, - "loss": 0.3229, + "epoch": 1.1659060797880172, + "grad_norm": 0.3792690103737219, + "learning_rate": 1.5339627454593186e-05, + "loss": 0.3266, "step": 7920 }, { - "epoch": 1.1611721611721613, - "grad_norm": 0.3621293229694593, - "learning_rate": 1.5386509295562278e-05, - "loss": 0.3431, + "epoch": 1.1666421316060651, + "grad_norm": 0.3676587886070995, + "learning_rate": 1.533238422971913e-05, + "loss": 0.3233, "step": 7925 }, { - "epoch": 1.161904761904762, - "grad_norm": 0.35748855267964647, - "learning_rate": 1.5379325261347024e-05, - "loss": 0.3254, + "epoch": 1.167378183424113, + "grad_norm": 0.36727558731704385, + "learning_rate": 1.532513709359899e-05, + "loss": 0.315, "step": 7930 }, { - "epoch": 1.1626373626373627, - "grad_norm": 0.3442282617139529, - "learning_rate": 1.5372137318198463e-05, - "loss": 0.3017, + "epoch": 1.168114235242161, + "grad_norm": 0.3506297472158483, + "learning_rate": 1.5317886051548466e-05, + "loss": 0.3274, "step": 7935 }, { - "epoch": 1.1633699633699635, - "grad_norm": 0.3787677128934951, - "learning_rate": 1.5364945471339773e-05, - "loss": 0.3173, + "epoch": 1.168850287060209, + "grad_norm": 0.3664467108879879, + "learning_rate": 1.5310631108886116e-05, + "loss": 0.3302, "step": 7940 }, { - "epoch": 1.1641025641025642, - "grad_norm": 0.36004708114704104, - "learning_rate": 1.535774972599698e-05, - "loss": 0.3207, + "epoch": 1.169586338878257, + "grad_norm": 0.35742067793297144, + "learning_rate": 1.530337227093336e-05, + "loss": 0.3317, "step": 7945 }, { - "epoch": 1.164835164835165, - "grad_norm": 0.37144192704319284, - "learning_rate": 1.5350550087398924e-05, - "loss": 0.3221, + "epoch": 1.170322390696305, + "grad_norm": 0.3683776726669245, + "learning_rate": 1.5296109543014485e-05, + "loss": 0.3158, "step": 7950 }, { - "epoch": 1.1655677655677656, - "grad_norm": 0.36107154609209374, - "learning_rate": 1.5343346560777287e-05, - "loss": 0.3289, + "epoch": 1.171058442514353, + "grad_norm": 0.3518427229639874, + "learning_rate": 1.5288842930456612e-05, + "loss": 0.336, "step": 7955 }, { - "epoch": 1.1663003663003664, - "grad_norm": 0.35726675993109364, - "learning_rate": 1.533613915136658e-05, - "loss": 0.3127, + "epoch": 1.171794494332401, + "grad_norm": 0.3487845596887438, + "learning_rate": 1.5281572438589733e-05, + "loss": 0.3198, "step": 7960 }, { - "epoch": 1.167032967032967, - "grad_norm": 0.3451931270804777, - "learning_rate": 1.5328927864404125e-05, - "loss": 0.3194, + "epoch": 1.172530546150449, + "grad_norm": 0.379988722680662, + "learning_rate": 1.5274298072746667e-05, + "loss": 0.3363, "step": 7965 }, { - "epoch": 1.1677655677655678, - "grad_norm": 0.3539475521927589, - "learning_rate": 1.532171270513007e-05, - "loss": 0.3358, + "epoch": 1.1732665979684969, + "grad_norm": 0.3549929624633907, + "learning_rate": 1.5267019838263087e-05, + "loss": 0.3217, "step": 7970 }, { - "epoch": 1.1684981684981686, - "grad_norm": 0.37785181308734567, - "learning_rate": 1.5314493678787367e-05, - "loss": 0.3331, + "epoch": 1.174002649786545, + "grad_norm": 0.35987419358775735, + "learning_rate": 1.52597377404775e-05, + "loss": 0.3193, "step": 7975 }, { - "epoch": 1.1692307692307693, - "grad_norm": 0.35682878347893937, - "learning_rate": 1.5307270790621786e-05, - "loss": 0.3358, + "epoch": 1.174738701604593, + "grad_norm": 0.36268787035510847, + "learning_rate": 1.525245178473124e-05, + "loss": 0.332, "step": 7980 }, { - "epoch": 1.16996336996337, - "grad_norm": 0.3528224207274319, - "learning_rate": 1.5300044045881904e-05, - "loss": 0.3308, + "epoch": 1.175474753422641, + "grad_norm": 0.35312966399350015, + "learning_rate": 1.5245161976368483e-05, + "loss": 0.3245, "step": 7985 }, { - "epoch": 1.1706959706959708, - "grad_norm": 0.39431002749350935, - "learning_rate": 1.5292813449819096e-05, - "loss": 0.3289, + "epoch": 1.176210805240689, + "grad_norm": 0.3695684293336986, + "learning_rate": 1.523786832073622e-05, + "loss": 0.3304, "step": 7990 }, { - "epoch": 1.1714285714285715, - "grad_norm": 0.3814929945950606, - "learning_rate": 1.528557900768754e-05, - "loss": 0.3426, + "epoch": 1.176946857058737, + "grad_norm": 0.3627286953491536, + "learning_rate": 1.5230570823184275e-05, + "loss": 0.3201, "step": 7995 }, { - "epoch": 1.1721611721611722, - "grad_norm": 0.366680665409208, - "learning_rate": 1.5278340724744204e-05, - "loss": 0.3314, + "epoch": 1.1776829088767848, + "grad_norm": 0.40544249315424846, + "learning_rate": 1.5223269489065278e-05, + "loss": 0.3462, "step": 8000 }, { - "epoch": 1.172893772893773, - "grad_norm": 0.3770267801244452, - "learning_rate": 1.5271098606248848e-05, - "loss": 0.3462, + "epoch": 1.178418960694833, + "grad_norm": 0.36346570837263104, + "learning_rate": 1.5215964323734676e-05, + "loss": 0.3324, "step": 8005 }, { - "epoch": 1.1736263736263737, - "grad_norm": 0.3514381440374758, - "learning_rate": 1.5263852657464016e-05, - "loss": 0.3194, + "epoch": 1.179155012512881, + "grad_norm": 0.3838103972999597, + "learning_rate": 1.5208655332550733e-05, + "loss": 0.3408, "step": 8010 }, { - "epoch": 1.1743589743589744, - "grad_norm": 0.4070492328211306, - "learning_rate": 1.5256602883655048e-05, - "loss": 0.3406, + "epoch": 1.1798910643309288, + "grad_norm": 0.3649917618606179, + "learning_rate": 1.5201342520874516e-05, + "loss": 0.328, "step": 8015 }, { - "epoch": 1.1750915750915751, - "grad_norm": 0.37712251514160994, - "learning_rate": 1.524934929009005e-05, - "loss": 0.334, + "epoch": 1.180627116148977, + "grad_norm": 0.350483182694087, + "learning_rate": 1.5194025894069889e-05, + "loss": 0.3277, "step": 8020 }, { - "epoch": 1.1758241758241759, - "grad_norm": 0.36395694590387156, - "learning_rate": 1.524209188203991e-05, - "loss": 0.3306, + "epoch": 1.181363167967025, + "grad_norm": 0.3650230449083749, + "learning_rate": 1.5186705457503518e-05, + "loss": 0.3338, "step": 8025 }, { - "epoch": 1.1765567765567766, - "grad_norm": 0.34500068083160973, - "learning_rate": 1.5234830664778283e-05, - "loss": 0.3239, + "epoch": 1.1820992197850728, + "grad_norm": 0.3757414247876498, + "learning_rate": 1.5179381216544871e-05, + "loss": 0.3355, "step": 8030 }, { - "epoch": 1.1772893772893773, - "grad_norm": 0.38764441603246896, - "learning_rate": 1.5227565643581604e-05, - "loss": 0.3516, + "epoch": 1.1828352716031207, + "grad_norm": 0.3581694313948743, + "learning_rate": 1.5172053176566193e-05, + "loss": 0.3266, "step": 8035 }, { - "epoch": 1.178021978021978, - "grad_norm": 0.37146934837029344, - "learning_rate": 1.5220296823729058e-05, - "loss": 0.3289, + "epoch": 1.1835713234211689, + "grad_norm": 0.35127920224525677, + "learning_rate": 1.5164721342942525e-05, + "loss": 0.3195, "step": 8040 }, { - "epoch": 1.1787545787545788, - "grad_norm": 0.37252913742566135, - "learning_rate": 1.5213024210502592e-05, - "loss": 0.3312, + "epoch": 1.1843073752392168, + "grad_norm": 0.382706540935447, + "learning_rate": 1.5157385721051684e-05, + "loss": 0.3187, "step": 8045 }, { - "epoch": 1.1794871794871795, - "grad_norm": 0.3724879987768289, - "learning_rate": 1.520574780918692e-05, - "loss": 0.3237, + "epoch": 1.185043427057265, + "grad_norm": 0.3557674138377118, + "learning_rate": 1.5150046316274279e-05, + "loss": 0.3196, "step": 8050 }, { - "epoch": 1.1802197802197802, - "grad_norm": 0.36320171408168545, - "learning_rate": 1.5198467625069495e-05, - "loss": 0.3148, + "epoch": 1.1857794788753129, + "grad_norm": 0.4068462556776589, + "learning_rate": 1.514270313399367e-05, + "loss": 0.3488, "step": 8055 }, { - "epoch": 1.180952380952381, - "grad_norm": 0.36288113898737523, - "learning_rate": 1.5191183663440536e-05, - "loss": 0.3197, + "epoch": 1.1865155306933608, + "grad_norm": 0.3616144646434693, + "learning_rate": 1.5135356179596008e-05, + "loss": 0.3422, "step": 8060 }, { - "epoch": 1.1816849816849817, - "grad_norm": 0.379903173867347, - "learning_rate": 1.518389592959299e-05, - "loss": 0.3384, + "epoch": 1.1872515825114087, + "grad_norm": 0.36811862803805906, + "learning_rate": 1.5128005458470207e-05, + "loss": 0.3158, "step": 8065 }, { - "epoch": 1.1824175824175824, - "grad_norm": 0.3616805925974018, - "learning_rate": 1.5176604428822553e-05, - "loss": 0.3313, + "epoch": 1.1879876343294569, + "grad_norm": 0.3674274299321005, + "learning_rate": 1.5120650976007941e-05, + "loss": 0.3316, "step": 8070 }, { - "epoch": 1.1831501831501832, - "grad_norm": 0.36957783238072606, - "learning_rate": 1.5169309166427658e-05, - "loss": 0.311, + "epoch": 1.1887236861475048, + "grad_norm": 0.3802645780409709, + "learning_rate": 1.5113292737603637e-05, + "loss": 0.3447, "step": 8075 }, { - "epoch": 1.183882783882784, - "grad_norm": 0.3953556623600997, - "learning_rate": 1.5162010147709467e-05, - "loss": 0.3216, + "epoch": 1.1894597379655527, + "grad_norm": 0.37954697064242204, + "learning_rate": 1.510593074865449e-05, + "loss": 0.3324, "step": 8080 }, { - "epoch": 1.1846153846153846, - "grad_norm": 0.3656002263760393, - "learning_rate": 1.515470737797188e-05, - "loss": 0.3314, + "epoch": 1.1901957897836009, + "grad_norm": 0.35419114349413583, + "learning_rate": 1.5098565014560436e-05, + "loss": 0.3438, "step": 8085 }, { - "epoch": 1.1853479853479854, - "grad_norm": 0.3646806027598352, - "learning_rate": 1.5147400862521519e-05, - "loss": 0.3153, + "epoch": 1.1909318416016488, + "grad_norm": 0.3849631664576678, + "learning_rate": 1.5091195540724161e-05, + "loss": 0.3388, "step": 8090 }, { - "epoch": 1.186080586080586, - "grad_norm": 0.37511690500361244, - "learning_rate": 1.514009060666772e-05, - "loss": 0.3384, + "epoch": 1.1916678934196967, + "grad_norm": 0.36959082043307084, + "learning_rate": 1.5083822332551096e-05, + "loss": 0.336, "step": 8095 }, { - "epoch": 1.1868131868131868, - "grad_norm": 0.35905818732390354, - "learning_rate": 1.5132776615722548e-05, - "loss": 0.3175, + "epoch": 1.1924039452377446, + "grad_norm": 0.3747436998910247, + "learning_rate": 1.5076445395449408e-05, + "loss": 0.3541, "step": 8100 }, { - "epoch": 1.1875457875457875, - "grad_norm": 0.34736547131077833, - "learning_rate": 1.512545889500078e-05, - "loss": 0.3477, + "epoch": 1.1931399970557928, + "grad_norm": 0.3676246232971342, + "learning_rate": 1.5069064734830004e-05, + "loss": 0.3208, "step": 8105 }, { - "epoch": 1.1882783882783883, - "grad_norm": 0.38204286817696836, - "learning_rate": 1.5118137449819896e-05, - "loss": 0.3266, + "epoch": 1.1938760488738407, + "grad_norm": 0.3732760017534803, + "learning_rate": 1.5061680356106512e-05, + "loss": 0.3296, "step": 8110 }, { - "epoch": 1.189010989010989, - "grad_norm": 0.3695606595203843, - "learning_rate": 1.5110812285500092e-05, - "loss": 0.3444, + "epoch": 1.1946121006918886, + "grad_norm": 0.361151747364808, + "learning_rate": 1.5054292264695298e-05, + "loss": 0.312, "step": 8115 }, { - "epoch": 1.1897435897435897, - "grad_norm": 0.3662372991469289, - "learning_rate": 1.5103483407364259e-05, - "loss": 0.3438, + "epoch": 1.1953481525099368, + "grad_norm": 0.3525055638444933, + "learning_rate": 1.5046900466015449e-05, + "loss": 0.331, "step": 8120 }, { - "epoch": 1.1904761904761905, - "grad_norm": 0.3800380320948473, - "learning_rate": 1.5096150820737993e-05, - "loss": 0.3411, + "epoch": 1.1960842043279847, + "grad_norm": 0.34297585949643866, + "learning_rate": 1.5039504965488763e-05, + "loss": 0.3256, "step": 8125 }, { - "epoch": 1.1912087912087912, - "grad_norm": 0.3681599569688727, - "learning_rate": 1.5088814530949578e-05, - "loss": 0.3162, + "epoch": 1.1968202561460326, + "grad_norm": 0.36955829764220444, + "learning_rate": 1.5032105768539766e-05, + "loss": 0.3303, "step": 8130 }, { - "epoch": 1.191941391941392, - "grad_norm": 0.3690148512442379, - "learning_rate": 1.5081474543329994e-05, + "epoch": 1.1975563079640807, + "grad_norm": 0.367012429873709, + "learning_rate": 1.5024702880595685e-05, "loss": 0.3386, "step": 8135 }, { - "epoch": 1.1926739926739927, - "grad_norm": 0.36979493878805475, - "learning_rate": 1.507413086321291e-05, - "loss": 0.3222, + "epoch": 1.1982923597821287, + "grad_norm": 0.4441685962696162, + "learning_rate": 1.501729630708646e-05, + "loss": 0.3397, "step": 8140 }, { - "epoch": 1.1934065934065934, - "grad_norm": 0.37053537861558267, - "learning_rate": 1.5066783495934666e-05, - "loss": 0.3184, + "epoch": 1.1990284116001766, + "grad_norm": 0.3749987318191806, + "learning_rate": 1.5009886053444731e-05, + "loss": 0.3199, "step": 8145 }, { - "epoch": 1.1941391941391941, - "grad_norm": 0.3616924947670475, - "learning_rate": 1.5059432446834304e-05, - "loss": 0.3302, + "epoch": 1.1997644634182247, + "grad_norm": 0.3621790453109267, + "learning_rate": 1.5002472125105839e-05, + "loss": 0.3239, "step": 8150 }, { - "epoch": 1.1948717948717948, - "grad_norm": 0.3609144662763727, - "learning_rate": 1.5052077721253513e-05, - "loss": 0.3392, + "epoch": 1.2005005152362727, + "grad_norm": 0.3644631410689124, + "learning_rate": 1.4995054527507819e-05, + "loss": 0.3427, "step": 8155 }, { - "epoch": 1.1956043956043956, - "grad_norm": 0.3883153977065576, - "learning_rate": 1.5044719324536678e-05, - "loss": 0.3316, + "epoch": 1.2012365670543206, + "grad_norm": 0.3647555049194253, + "learning_rate": 1.4987633266091401e-05, + "loss": 0.3372, "step": 8160 }, { - "epoch": 1.1963369963369963, - "grad_norm": 0.3839491724986923, - "learning_rate": 1.5037357262030833e-05, - "loss": 0.3404, + "epoch": 1.2019726188723685, + "grad_norm": 0.37004960872914333, + "learning_rate": 1.4980208346300003e-05, + "loss": 0.3447, "step": 8165 }, { - "epoch": 1.197069597069597, - "grad_norm": 0.36233751768245326, - "learning_rate": 1.5029991539085695e-05, - "loss": 0.3174, + "epoch": 1.2027086706904166, + "grad_norm": 0.35489534411365686, + "learning_rate": 1.4972779773579715e-05, + "loss": 0.3406, "step": 8170 }, { - "epoch": 1.1978021978021978, - "grad_norm": 0.35458010898660675, - "learning_rate": 1.502262216105362e-05, - "loss": 0.3392, + "epoch": 1.2034447225084646, + "grad_norm": 0.3746861117742125, + "learning_rate": 1.496534755337932e-05, + "loss": 0.3282, "step": 8175 }, { - "epoch": 1.1985347985347985, - "grad_norm": 0.37002015116368614, - "learning_rate": 1.501524913328964e-05, - "loss": 0.3449, + "epoch": 1.2041807743265125, + "grad_norm": 0.3466432584153397, + "learning_rate": 1.495791169115027e-05, + "loss": 0.3171, "step": 8180 }, { - "epoch": 1.1992673992673992, - "grad_norm": 0.3872586755325121, - "learning_rate": 1.5007872461151422e-05, - "loss": 0.3414, + "epoch": 1.2049168261445606, + "grad_norm": 0.37221546959629925, + "learning_rate": 1.4950472192346692e-05, + "loss": 0.3356, "step": 8185 }, { - "epoch": 1.2, - "grad_norm": 0.37861235573923263, - "learning_rate": 1.5000492149999292e-05, - "loss": 0.3351, + "epoch": 1.2056528779626086, + "grad_norm": 0.35670248460846826, + "learning_rate": 1.4943029062425371e-05, + "loss": 0.3203, "step": 8190 }, { - "epoch": 1.2007326007326007, - "grad_norm": 0.367540897067357, - "learning_rate": 1.499310820519622e-05, - "loss": 0.3353, + "epoch": 1.2063889297806565, + "grad_norm": 0.3688758812161541, + "learning_rate": 1.4935582306845766e-05, + "loss": 0.3268, "step": 8195 }, { - "epoch": 1.2014652014652014, - "grad_norm": 0.3724045832054376, - "learning_rate": 1.4985720632107806e-05, - "loss": 0.3482, + "epoch": 1.2071249815987046, + "grad_norm": 0.3513532074539236, + "learning_rate": 1.4928131931069993e-05, + "loss": 0.3247, "step": 8200 }, { - "epoch": 1.2021978021978021, - "grad_norm": 0.35784985312179274, - "learning_rate": 1.4978329436102299e-05, - "loss": 0.311, + "epoch": 1.2078610334167525, + "grad_norm": 0.36681068074591217, + "learning_rate": 1.4920677940562821e-05, + "loss": 0.3264, "step": 8205 }, { - "epoch": 1.2029304029304029, - "grad_norm": 0.37299163526608586, - "learning_rate": 1.4970934622550575e-05, - "loss": 0.3265, + "epoch": 1.2085970852348005, + "grad_norm": 0.36728867213792643, + "learning_rate": 1.4913220340791675e-05, + "loss": 0.3354, "step": 8210 }, { - "epoch": 1.2036630036630036, - "grad_norm": 0.406549166369611, - "learning_rate": 1.4963536196826142e-05, - "loss": 0.3262, + "epoch": 1.2093331370528486, + "grad_norm": 0.3734185422872777, + "learning_rate": 1.4905759137226613e-05, + "loss": 0.3213, "step": 8215 }, { - "epoch": 1.2043956043956043, - "grad_norm": 0.37598436424901355, - "learning_rate": 1.4956134164305122e-05, - "loss": 0.3305, + "epoch": 1.2100691888708965, + "grad_norm": 0.34587592794620753, + "learning_rate": 1.4898294335340362e-05, + "loss": 0.3323, "step": 8220 }, { - "epoch": 1.205128205128205, - "grad_norm": 0.387205644932967, - "learning_rate": 1.4948728530366274e-05, - "loss": 0.3303, + "epoch": 1.2108052406889445, + "grad_norm": 0.391945239905069, + "learning_rate": 1.4890825940608258e-05, + "loss": 0.3435, "step": 8225 }, { - "epoch": 1.2058608058608058, - "grad_norm": 0.45070623988161446, - "learning_rate": 1.4941319300390964e-05, - "loss": 0.3428, + "epoch": 1.2115412925069924, + "grad_norm": 0.3769895340415957, + "learning_rate": 1.4883353958508297e-05, + "loss": 0.3243, "step": 8230 }, { - "epoch": 1.2065934065934065, - "grad_norm": 0.3812905313874793, - "learning_rate": 1.4933906479763174e-05, - "loss": 0.3374, + "epoch": 1.2122773443250405, + "grad_norm": 0.3656163340344307, + "learning_rate": 1.4875878394521095e-05, + "loss": 0.3246, "step": 8235 }, { - "epoch": 1.2073260073260073, - "grad_norm": 0.3578092009296961, - "learning_rate": 1.4926490073869489e-05, - "loss": 0.3223, + "epoch": 1.2130133961430885, + "grad_norm": 0.3833402863688335, + "learning_rate": 1.4868399254129897e-05, + "loss": 0.3331, "step": 8240 }, { - "epoch": 1.208058608058608, - "grad_norm": 0.3560719148641902, - "learning_rate": 1.491907008809911e-05, - "loss": 0.3478, + "epoch": 1.2137494479611364, + "grad_norm": 0.36664815475729085, + "learning_rate": 1.4860916542820572e-05, + "loss": 0.3264, "step": 8245 }, { - "epoch": 1.2087912087912087, - "grad_norm": 0.37157832798696866, - "learning_rate": 1.491164652784383e-05, - "loss": 0.335, + "epoch": 1.2144854997791845, + "grad_norm": 0.35949597475343587, + "learning_rate": 1.4853430266081605e-05, + "loss": 0.3183, "step": 8250 }, { - "epoch": 1.2095238095238094, - "grad_norm": 0.3739105088939359, - "learning_rate": 1.4904219398498048e-05, - "loss": 0.3167, + "epoch": 1.2152215515972324, + "grad_norm": 0.3660859424494218, + "learning_rate": 1.4845940429404102e-05, + "loss": 0.3263, "step": 8255 }, { - "epoch": 1.2102564102564102, - "grad_norm": 0.38130465027508265, - "learning_rate": 1.489678870545875e-05, - "loss": 0.3272, + "epoch": 1.2159576034152804, + "grad_norm": 0.38645814628202896, + "learning_rate": 1.4838447038281776e-05, + "loss": 0.3351, "step": 8260 }, { - "epoch": 1.210989010989011, - "grad_norm": 0.36724998187833974, - "learning_rate": 1.4889354454125514e-05, - "loss": 0.3222, + "epoch": 1.2166936552333285, + "grad_norm": 0.37364129020267395, + "learning_rate": 1.483095009821095e-05, + "loss": 0.3368, "step": 8265 }, { - "epoch": 1.2117216117216116, - "grad_norm": 0.35581485026719434, - "learning_rate": 1.48819166499005e-05, - "loss": 0.3272, + "epoch": 1.2174297070513764, + "grad_norm": 0.3795810400075801, + "learning_rate": 1.4823449614690546e-05, + "loss": 0.3402, "step": 8270 }, { - "epoch": 1.2124542124542124, - "grad_norm": 0.3613045999885797, - "learning_rate": 1.4874475298188459e-05, - "loss": 0.3314, + "epoch": 1.2181657588694244, + "grad_norm": 0.37596010996766704, + "learning_rate": 1.4815945593222087e-05, + "loss": 0.3269, "step": 8275 }, { - "epoch": 1.213186813186813, - "grad_norm": 0.3796933374568938, - "learning_rate": 1.4867030404396704e-05, - "loss": 0.3452, + "epoch": 1.2189018106874725, + "grad_norm": 0.36523560144358447, + "learning_rate": 1.4808438039309694e-05, + "loss": 0.3252, "step": 8280 }, { - "epoch": 1.2139194139194138, - "grad_norm": 0.37809382765289035, - "learning_rate": 1.4859581973935143e-05, - "loss": 0.3235, + "epoch": 1.2196378625055204, + "grad_norm": 0.34706175679498286, + "learning_rate": 1.4800926958460074e-05, + "loss": 0.3258, "step": 8285 }, { - "epoch": 1.2146520146520146, - "grad_norm": 0.375067447560607, - "learning_rate": 1.4852130012216233e-05, - "loss": 0.3282, + "epoch": 1.2203739143235683, + "grad_norm": 0.38196664520781104, + "learning_rate": 1.4793412356182524e-05, + "loss": 0.3464, "step": 8290 }, { - "epoch": 1.2153846153846155, - "grad_norm": 0.3677148038994275, - "learning_rate": 1.4844674524655012e-05, - "loss": 0.3162, + "epoch": 1.2211099661416163, + "grad_norm": 0.36135280891199084, + "learning_rate": 1.4785894237988925e-05, + "loss": 0.3427, "step": 8295 }, { - "epoch": 1.2161172161172162, - "grad_norm": 0.3544576137509881, - "learning_rate": 1.4837215516669075e-05, - "loss": 0.3264, + "epoch": 1.2218460179596644, + "grad_norm": 0.37205099054015695, + "learning_rate": 1.4778372609393736e-05, + "loss": 0.341, "step": 8300 }, { - "epoch": 1.216849816849817, - "grad_norm": 0.3652573674769994, - "learning_rate": 1.4829752993678567e-05, - "loss": 0.3168, + "epoch": 1.2225820697777123, + "grad_norm": 0.37013183154571094, + "learning_rate": 1.4770847475913987e-05, + "loss": 0.3389, "step": 8305 }, { - "epoch": 1.2175824175824177, - "grad_norm": 0.3630429672853273, - "learning_rate": 1.4822286961106207e-05, - "loss": 0.3436, + "epoch": 1.2233181215957603, + "grad_norm": 0.38064850562116975, + "learning_rate": 1.4763318843069283e-05, + "loss": 0.3395, "step": 8310 }, { - "epoch": 1.2183150183150184, - "grad_norm": 0.367403987412385, - "learning_rate": 1.4814817424377243e-05, - "loss": 0.3376, + "epoch": 1.2240541734138084, + "grad_norm": 0.35981229827963146, + "learning_rate": 1.4755786716381795e-05, + "loss": 0.3375, "step": 8315 }, { - "epoch": 1.2190476190476192, - "grad_norm": 0.3483654972378673, - "learning_rate": 1.4807344388919482e-05, - "loss": 0.3344, + "epoch": 1.2247902252318563, + "grad_norm": 0.37567356663031276, + "learning_rate": 1.4748251101376254e-05, + "loss": 0.329, "step": 8320 }, { - "epoch": 1.2197802197802199, - "grad_norm": 0.38261252546018976, - "learning_rate": 1.4799867860163271e-05, - "loss": 0.3254, + "epoch": 1.2255262770499042, + "grad_norm": 0.3524202293263575, + "learning_rate": 1.4740712003579958e-05, + "loss": 0.3238, "step": 8325 }, { - "epoch": 1.2205128205128206, - "grad_norm": 0.3474567263427877, - "learning_rate": 1.4792387843541493e-05, - "loss": 0.3348, + "epoch": 1.2262623288679524, + "grad_norm": 0.383927065818772, + "learning_rate": 1.4733169428522747e-05, + "loss": 0.3261, "step": 8330 }, { - "epoch": 1.2212454212454213, - "grad_norm": 0.37537770891416594, - "learning_rate": 1.4784904344489566e-05, - "loss": 0.3284, + "epoch": 1.2269983806860003, + "grad_norm": 0.3599752959297416, + "learning_rate": 1.472562338173702e-05, + "loss": 0.3303, "step": 8335 }, { - "epoch": 1.221978021978022, - "grad_norm": 0.3537658201505351, - "learning_rate": 1.477741736844544e-05, - "loss": 0.3231, + "epoch": 1.2277344325040482, + "grad_norm": 0.3628757359151891, + "learning_rate": 1.4718073868757725e-05, + "loss": 0.317, "step": 8340 }, { - "epoch": 1.2227106227106228, - "grad_norm": 0.36084436630424543, - "learning_rate": 1.4769926920849594e-05, - "loss": 0.3271, + "epoch": 1.2284704843220964, + "grad_norm": 0.3479433541346978, + "learning_rate": 1.4710520895122342e-05, + "loss": 0.3229, "step": 8345 }, { - "epoch": 1.2234432234432235, - "grad_norm": 0.3599157152830956, - "learning_rate": 1.4762433007145023e-05, - "loss": 0.3226, + "epoch": 1.2292065361401443, + "grad_norm": 0.37622766300668603, + "learning_rate": 1.47029644663709e-05, + "loss": 0.3229, "step": 8350 }, { - "epoch": 1.2241758241758243, - "grad_norm": 0.3672901470854127, - "learning_rate": 1.4754935632777243e-05, - "loss": 0.3362, + "epoch": 1.2299425879581922, + "grad_norm": 0.3723072755457436, + "learning_rate": 1.4695404588045957e-05, + "loss": 0.3324, "step": 8355 }, { - "epoch": 1.224908424908425, - "grad_norm": 0.3678116918328648, - "learning_rate": 1.474743480319429e-05, - "loss": 0.32, + "epoch": 1.2306786397762401, + "grad_norm": 0.36619369272822905, + "learning_rate": 1.4687841265692595e-05, + "loss": 0.3279, "step": 8360 }, { - "epoch": 1.2256410256410257, - "grad_norm": 0.37755682120382705, - "learning_rate": 1.4739930523846706e-05, - "loss": 0.3328, + "epoch": 1.2314146915942883, + "grad_norm": 0.3576517937588514, + "learning_rate": 1.4680274504858438e-05, + "loss": 0.3378, "step": 8365 }, { - "epoch": 1.2263736263736265, - "grad_norm": 0.37187743144942614, - "learning_rate": 1.4732422800187541e-05, - "loss": 0.3203, + "epoch": 1.2321507434123362, + "grad_norm": 0.35634771515086305, + "learning_rate": 1.467270431109362e-05, + "loss": 0.3325, "step": 8370 }, { - "epoch": 1.2271062271062272, - "grad_norm": 0.3634089522289552, - "learning_rate": 1.4724911637672341e-05, - "loss": 0.3267, + "epoch": 1.2328867952303841, + "grad_norm": 0.3718668057674085, + "learning_rate": 1.4665130689950796e-05, + "loss": 0.3389, "step": 8375 }, { - "epoch": 1.227838827838828, - "grad_norm": 0.36006484573969755, - "learning_rate": 1.4717397041759167e-05, - "loss": 0.355, + "epoch": 1.2336228470484323, + "grad_norm": 0.3720584699709386, + "learning_rate": 1.4657553646985134e-05, + "loss": 0.3298, "step": 8380 }, { - "epoch": 1.2285714285714286, - "grad_norm": 0.3758528148797375, - "learning_rate": 1.4709879017908559e-05, - "loss": 0.3235, + "epoch": 1.2343588988664802, + "grad_norm": 0.3600181242934194, + "learning_rate": 1.4649973187754314e-05, + "loss": 0.3231, "step": 8385 }, { - "epoch": 1.2293040293040294, - "grad_norm": 0.3640560499525934, - "learning_rate": 1.470235757158356e-05, - "loss": 0.3244, + "epoch": 1.2350949506845281, + "grad_norm": 0.3660149728251252, + "learning_rate": 1.464238931781852e-05, + "loss": 0.3259, "step": 8390 }, { - "epoch": 1.23003663003663, - "grad_norm": 0.3718633192806409, - "learning_rate": 1.4694832708249685e-05, - "loss": 0.3085, + "epoch": 1.2358310025025763, + "grad_norm": 0.35515569774446815, + "learning_rate": 1.4634802042740439e-05, + "loss": 0.3242, "step": 8395 }, { - "epoch": 1.2307692307692308, - "grad_norm": 0.3832121158376519, - "learning_rate": 1.4687304433374948e-05, - "loss": 0.3344, + "epoch": 1.2365670543206242, + "grad_norm": 0.3581377400151042, + "learning_rate": 1.4627211368085255e-05, + "loss": 0.3215, "step": 8400 }, { - "epoch": 1.2315018315018316, - "grad_norm": 0.3607369656228018, - "learning_rate": 1.4679772752429833e-05, - "loss": 0.3209, + "epoch": 1.2373031061386721, + "grad_norm": 0.366841625067584, + "learning_rate": 1.4619617299420646e-05, + "loss": 0.328, "step": 8405 }, { - "epoch": 1.2322344322344323, - "grad_norm": 0.3752129417665394, - "learning_rate": 1.4672237670887304e-05, - "loss": 0.332, + "epoch": 1.2380391579567203, + "grad_norm": 0.3639641969747141, + "learning_rate": 1.4612019842316778e-05, + "loss": 0.3361, "step": 8410 }, { - "epoch": 1.232967032967033, - "grad_norm": 0.3872267779943652, - "learning_rate": 1.4664699194222787e-05, - "loss": 0.3402, + "epoch": 1.2387752097747682, + "grad_norm": 0.35529413346033073, + "learning_rate": 1.4604419002346306e-05, + "loss": 0.3368, "step": 8415 }, { - "epoch": 1.2336996336996338, - "grad_norm": 0.3690844057598334, - "learning_rate": 1.4657157327914189e-05, - "loss": 0.3007, + "epoch": 1.239511261592816, + "grad_norm": 0.35635443827472735, + "learning_rate": 1.4596814785084361e-05, + "loss": 0.3309, "step": 8420 }, { - "epoch": 1.2344322344322345, - "grad_norm": 0.35874344873262004, - "learning_rate": 1.4649612077441867e-05, - "loss": 0.3219, + "epoch": 1.240247313410864, + "grad_norm": 0.35017175827485586, + "learning_rate": 1.4589207196108556e-05, + "loss": 0.3475, "step": 8425 }, { - "epoch": 1.2351648351648352, - "grad_norm": 0.36923537952671387, - "learning_rate": 1.4642063448288644e-05, - "loss": 0.3264, + "epoch": 1.2409833652289122, + "grad_norm": 0.3796049352713231, + "learning_rate": 1.4581596240998975e-05, + "loss": 0.3198, "step": 8430 }, { - "epoch": 1.235897435897436, - "grad_norm": 0.3652820014861731, - "learning_rate": 1.4634511445939796e-05, - "loss": 0.3326, + "epoch": 1.24171941704696, + "grad_norm": 0.3959085195621158, + "learning_rate": 1.457398192533817e-05, + "loss": 0.3236, "step": 8435 }, { - "epoch": 1.2366300366300367, - "grad_norm": 0.39221916060357304, - "learning_rate": 1.4626956075883056e-05, - "loss": 0.3139, + "epoch": 1.242455468865008, + "grad_norm": 0.36997520474040174, + "learning_rate": 1.4566364254711159e-05, + "loss": 0.3311, "step": 8440 }, { - "epoch": 1.2373626373626374, - "grad_norm": 0.3732286402178842, - "learning_rate": 1.4619397343608593e-05, - "loss": 0.3387, + "epoch": 1.2431915206830562, + "grad_norm": 0.36217574393869123, + "learning_rate": 1.4558743234705421e-05, + "loss": 0.3042, "step": 8445 }, { - "epoch": 1.2380952380952381, - "grad_norm": 0.37948437560162124, - "learning_rate": 1.4611835254609026e-05, - "loss": 0.332, + "epoch": 1.243927572501104, + "grad_norm": 0.36238317738922754, + "learning_rate": 1.4551118870910892e-05, + "loss": 0.324, "step": 8450 }, { - "epoch": 1.2388278388278389, - "grad_norm": 0.3759087075387545, - "learning_rate": 1.4604269814379417e-05, - "loss": 0.3246, + "epoch": 1.244663624319152, + "grad_norm": 0.3815961739715229, + "learning_rate": 1.4543491168919962e-05, + "loss": 0.3424, "step": 8455 }, { - "epoch": 1.2395604395604396, - "grad_norm": 0.3904251539355884, - "learning_rate": 1.4596701028417252e-05, - "loss": 0.347, + "epoch": 1.2453996761372002, + "grad_norm": 0.36245930207044025, + "learning_rate": 1.4535860134327463e-05, + "loss": 0.3287, "step": 8460 }, { - "epoch": 1.2402930402930403, - "grad_norm": 0.36065774214941765, - "learning_rate": 1.4589128902222462e-05, - "loss": 0.3233, + "epoch": 1.246135727955248, + "grad_norm": 0.3806379970593242, + "learning_rate": 1.452822577273068e-05, + "loss": 0.3449, "step": 8465 }, { - "epoch": 1.241025641025641, - "grad_norm": 0.407025265298805, - "learning_rate": 1.4581553441297395e-05, - "loss": 0.3241, + "epoch": 1.246871779773296, + "grad_norm": 0.38549050902527776, + "learning_rate": 1.4520588089729334e-05, + "loss": 0.3446, "step": 8470 }, { - "epoch": 1.2417582417582418, - "grad_norm": 0.3788864713097616, - "learning_rate": 1.4573974651146826e-05, - "loss": 0.3165, + "epoch": 1.2476078315913441, + "grad_norm": 0.35650246505780847, + "learning_rate": 1.4512947090925588e-05, + "loss": 0.3251, "step": 8475 }, { - "epoch": 1.2424908424908425, - "grad_norm": 0.36013901165566703, - "learning_rate": 1.4566392537277943e-05, - "loss": 0.316, + "epoch": 1.248343883409392, + "grad_norm": 0.37933112214965986, + "learning_rate": 1.4505302781924022e-05, + "loss": 0.3443, "step": 8480 }, { - "epoch": 1.2432234432234432, - "grad_norm": 0.36320193457939876, - "learning_rate": 1.4558807105200363e-05, - "loss": 0.3483, + "epoch": 1.24907993522744, + "grad_norm": 0.3660288041654388, + "learning_rate": 1.449765516833166e-05, + "loss": 0.3387, "step": 8485 }, { - "epoch": 1.243956043956044, - "grad_norm": 0.35449035301865955, - "learning_rate": 1.4551218360426098e-05, - "loss": 0.3229, + "epoch": 1.249815987045488, + "grad_norm": 0.3742166905347593, + "learning_rate": 1.4490004255757942e-05, + "loss": 0.3279, "step": 8490 }, { - "epoch": 1.2446886446886447, - "grad_norm": 0.36942036637958453, - "learning_rate": 1.4543626308469582e-05, - "loss": 0.3167, + "epoch": 1.250552038863536, + "grad_norm": 0.36730577795557384, + "learning_rate": 1.4482350049814726e-05, + "loss": 0.3226, "step": 8495 }, { - "epoch": 1.2454212454212454, - "grad_norm": 0.372517896904522, - "learning_rate": 1.4536030954847644e-05, - "loss": 0.3095, + "epoch": 1.251288090681584, + "grad_norm": 0.3781036384265569, + "learning_rate": 1.4474692556116294e-05, + "loss": 0.33, "step": 8500 }, { - "epoch": 1.2461538461538462, - "grad_norm": 0.38610477964467854, - "learning_rate": 1.4528432305079511e-05, - "loss": 0.3331, + "epoch": 1.252024142499632, + "grad_norm": 0.36531158977157197, + "learning_rate": 1.4467031780279336e-05, + "loss": 0.3295, "step": 8505 }, { - "epoch": 1.246886446886447, - "grad_norm": 0.3657326446656885, - "learning_rate": 1.4520830364686809e-05, - "loss": 0.3253, + "epoch": 1.25276019431768, + "grad_norm": 0.35942077111844317, + "learning_rate": 1.4459367727922944e-05, + "loss": 0.3392, "step": 8510 }, { - "epoch": 1.2476190476190476, - "grad_norm": 0.3709210974612211, - "learning_rate": 1.4513225139193555e-05, - "loss": 0.3195, + "epoch": 1.253496246135728, + "grad_norm": 0.36649771551562454, + "learning_rate": 1.4451700404668621e-05, + "loss": 0.3293, "step": 8515 }, { - "epoch": 1.2483516483516484, - "grad_norm": 0.3671791379170008, - "learning_rate": 1.450561663412615e-05, - "loss": 0.3223, + "epoch": 1.2542322979537759, + "grad_norm": 0.3763748103335829, + "learning_rate": 1.4444029816140263e-05, + "loss": 0.3243, "step": 8520 }, { - "epoch": 1.249084249084249, - "grad_norm": 0.38509386166245346, - "learning_rate": 1.4498004855013382e-05, - "loss": 0.3476, + "epoch": 1.2549683497718238, + "grad_norm": 0.36270232876998154, + "learning_rate": 1.4436355967964164e-05, + "loss": 0.3322, "step": 8525 }, { - "epoch": 1.2498168498168498, - "grad_norm": 0.367727118151911, - "learning_rate": 1.449038980738641e-05, - "loss": 0.3339, + "epoch": 1.255704401589872, + "grad_norm": 0.36816829320081507, + "learning_rate": 1.4428678865769005e-05, + "loss": 0.3274, "step": 8530 }, { - "epoch": 1.2505494505494505, - "grad_norm": 0.3544379472629194, - "learning_rate": 1.4482771496778786e-05, - "loss": 0.3196, + "epoch": 1.2564404534079199, + "grad_norm": 0.38531463244299674, + "learning_rate": 1.4420998515185866e-05, + "loss": 0.3325, "step": 8535 }, { - "epoch": 1.2512820512820513, - "grad_norm": 0.37169888332658535, - "learning_rate": 1.4475149928726412e-05, - "loss": 0.3275, + "epoch": 1.257176505225968, + "grad_norm": 0.3484703231115639, + "learning_rate": 1.4413314921848196e-05, + "loss": 0.3195, "step": 8540 }, { - "epoch": 1.252014652014652, - "grad_norm": 0.37627880630537924, - "learning_rate": 1.446752510876757e-05, - "loss": 0.3251, + "epoch": 1.257912557044016, + "grad_norm": 0.3592125180448874, + "learning_rate": 1.4405628091391826e-05, + "loss": 0.3305, "step": 8545 }, { - "epoch": 1.2527472527472527, - "grad_norm": 0.36691119104223485, - "learning_rate": 1.4459897042442899e-05, - "loss": 0.3282, + "epoch": 1.2586486088620639, + "grad_norm": 0.35839328417008115, + "learning_rate": 1.4397938029454963e-05, + "loss": 0.3451, "step": 8550 }, { - "epoch": 1.2534798534798535, - "grad_norm": 0.36354308528144275, - "learning_rate": 1.4452265735295403e-05, - "loss": 0.335, + "epoch": 1.2593846606801118, + "grad_norm": 0.351399175552356, + "learning_rate": 1.4390244741678188e-05, + "loss": 0.3192, "step": 8555 }, { - "epoch": 1.2542124542124542, - "grad_norm": 0.3615088687926003, - "learning_rate": 1.4444631192870432e-05, - "loss": 0.3288, + "epoch": 1.26012071249816, + "grad_norm": 0.36756762895463796, + "learning_rate": 1.4382548233704442e-05, + "loss": 0.3254, "step": 8560 }, { - "epoch": 1.254945054945055, - "grad_norm": 0.355044070495033, - "learning_rate": 1.4436993420715703e-05, - "loss": 0.3175, + "epoch": 1.2608567643162079, + "grad_norm": 0.3674226336001027, + "learning_rate": 1.437484851117903e-05, + "loss": 0.3282, "step": 8565 }, { - "epoch": 1.2556776556776557, - "grad_norm": 0.3607423844814643, - "learning_rate": 1.442935242438126e-05, - "loss": 0.3223, + "epoch": 1.2615928161342558, + "grad_norm": 0.3685558855603122, + "learning_rate": 1.4367145579749613e-05, + "loss": 0.3405, "step": 8570 }, { - "epoch": 1.2564102564102564, - "grad_norm": 0.3724667809650596, - "learning_rate": 1.44217082094195e-05, - "loss": 0.3366, + "epoch": 1.262328867952304, + "grad_norm": 0.3926982981546724, + "learning_rate": 1.435943944506621e-05, + "loss": 0.3424, "step": 8575 }, { - "epoch": 1.2571428571428571, - "grad_norm": 0.36369061081177334, - "learning_rate": 1.441406078138516e-05, - "loss": 0.3297, + "epoch": 1.2630649197703518, + "grad_norm": 0.3582756586857344, + "learning_rate": 1.4351730112781187e-05, + "loss": 0.32, "step": 8580 }, { - "epoch": 1.2578754578754578, - "grad_norm": 0.37023878295575663, - "learning_rate": 1.4406410145835308e-05, - "loss": 0.311, + "epoch": 1.2638009715883998, + "grad_norm": 0.35466591104612183, + "learning_rate": 1.434401758854926e-05, + "loss": 0.3194, "step": 8585 }, { - "epoch": 1.2586080586080586, - "grad_norm": 0.3628455438920992, - "learning_rate": 1.4398756308329349e-05, - "loss": 0.3337, + "epoch": 1.2645370234064477, + "grad_norm": 0.35817468850934986, + "learning_rate": 1.4336301878027477e-05, + "loss": 0.3099, "step": 8590 }, { - "epoch": 1.2593406593406593, - "grad_norm": 0.36990808978523027, - "learning_rate": 1.4391099274429006e-05, - "loss": 0.3281, + "epoch": 1.2652730752244958, + "grad_norm": 0.36091388941579955, + "learning_rate": 1.4328582986875225e-05, + "loss": 0.3295, "step": 8595 }, { - "epoch": 1.26007326007326, - "grad_norm": 0.35199942728347305, - "learning_rate": 1.4383439049698332e-05, - "loss": 0.3344, + "epoch": 1.2660091270425438, + "grad_norm": 0.36965641039069697, + "learning_rate": 1.4320860920754234e-05, + "loss": 0.3168, "step": 8600 }, { - "epoch": 1.2608058608058608, - "grad_norm": 0.35389556178404175, - "learning_rate": 1.4375775639703694e-05, - "loss": 0.3224, + "epoch": 1.266745178860592, + "grad_norm": 0.354656763532126, + "learning_rate": 1.4313135685328555e-05, + "loss": 0.3386, "step": 8605 }, { - "epoch": 1.2615384615384615, - "grad_norm": 0.36957955875618836, - "learning_rate": 1.4368109050013774e-05, - "loss": 0.33, + "epoch": 1.2674812306786398, + "grad_norm": 0.3554833547625228, + "learning_rate": 1.4305407286264559e-05, + "loss": 0.3286, "step": 8610 }, { - "epoch": 1.2622710622710622, - "grad_norm": 0.3596168698950371, - "learning_rate": 1.4360439286199567e-05, - "loss": 0.3256, + "epoch": 1.2682172824966877, + "grad_norm": 0.3745807752955344, + "learning_rate": 1.429767572923095e-05, + "loss": 0.3295, "step": 8615 }, { - "epoch": 1.263003663003663, - "grad_norm": 0.3519913059085024, - "learning_rate": 1.4352766353834376e-05, - "loss": 0.3305, + "epoch": 1.2689533343147357, + "grad_norm": 0.3640930985321038, + "learning_rate": 1.4289941019898734e-05, + "loss": 0.328, "step": 8620 }, { - "epoch": 1.2637362637362637, - "grad_norm": 0.36741257893638307, - "learning_rate": 1.4345090258493797e-05, - "loss": 0.33, + "epoch": 1.2696893861327838, + "grad_norm": 0.35928982508334845, + "learning_rate": 1.4282203163941243e-05, + "loss": 0.3262, "step": 8625 }, { - "epoch": 1.2644688644688644, - "grad_norm": 0.3677419887989739, - "learning_rate": 1.433741100575574e-05, - "loss": 0.3334, + "epoch": 1.2704254379508317, + "grad_norm": 0.3540992145513367, + "learning_rate": 1.4274462167034107e-05, + "loss": 0.318, "step": 8630 }, { - "epoch": 1.2652014652014651, - "grad_norm": 0.35636100129085274, - "learning_rate": 1.4329728601200394e-05, - "loss": 0.3172, + "epoch": 1.2711614897688797, + "grad_norm": 0.3780018794421239, + "learning_rate": 1.4266718034855263e-05, + "loss": 0.3442, "step": 8635 }, { - "epoch": 1.2659340659340659, - "grad_norm": 0.3539343371341077, - "learning_rate": 1.4322043050410248e-05, - "loss": 0.3064, + "epoch": 1.2718975415869278, + "grad_norm": 0.3837064881062433, + "learning_rate": 1.4258970773084953e-05, + "loss": 0.341, "step": 8640 }, { - "epoch": 1.2666666666666666, - "grad_norm": 0.37701181028549535, - "learning_rate": 1.431435435897007e-05, - "loss": 0.3214, + "epoch": 1.2726335934049757, + "grad_norm": 0.37181236065817, + "learning_rate": 1.4251220387405704e-05, + "loss": 0.3239, "step": 8645 }, { - "epoch": 1.2673992673992673, - "grad_norm": 0.36302656770740904, - "learning_rate": 1.4306662532466922e-05, - "loss": 0.3258, + "epoch": 1.2733696452230236, + "grad_norm": 0.37514772793321255, + "learning_rate": 1.4243466883502344e-05, + "loss": 0.3308, "step": 8650 }, { - "epoch": 1.268131868131868, - "grad_norm": 0.36850903779226113, - "learning_rate": 1.4298967576490129e-05, - "loss": 0.3242, + "epoch": 1.2741056970410716, + "grad_norm": 0.3888885055005383, + "learning_rate": 1.4235710267061982e-05, + "loss": 0.3354, "step": 8655 }, { - "epoch": 1.2688644688644688, - "grad_norm": 0.34623155368688263, - "learning_rate": 1.4291269496631302e-05, - "loss": 0.3181, + "epoch": 1.2748417488591197, + "grad_norm": 0.3608557831671145, + "learning_rate": 1.422795054377401e-05, + "loss": 0.3271, "step": 8660 }, { - "epoch": 1.2695970695970695, - "grad_norm": 0.35643796388726406, - "learning_rate": 1.4283568298484311e-05, - "loss": 0.3291, + "epoch": 1.2755778006771676, + "grad_norm": 0.3771696834669466, + "learning_rate": 1.42201877193301e-05, + "loss": 0.3255, "step": 8665 }, { - "epoch": 1.2703296703296703, - "grad_norm": 0.3596620954791388, - "learning_rate": 1.4275863987645306e-05, - "loss": 0.3382, + "epoch": 1.2763138524952158, + "grad_norm": 0.37443360147557536, + "learning_rate": 1.4212421799424209e-05, + "loss": 0.32, "step": 8670 }, { - "epoch": 1.271062271062271, - "grad_norm": 0.357891218900445, - "learning_rate": 1.4268156569712688e-05, - "loss": 0.3308, + "epoch": 1.2770499043132637, + "grad_norm": 0.3675708947898225, + "learning_rate": 1.420465278975255e-05, + "loss": 0.3441, "step": 8675 }, { - "epoch": 1.2717948717948717, - "grad_norm": 0.4040598027313928, - "learning_rate": 1.426044605028712e-05, - "loss": 0.2991, + "epoch": 1.2777859561313116, + "grad_norm": 0.3838957751072269, + "learning_rate": 1.4196880696013601e-05, + "loss": 0.3241, "step": 8680 }, { - "epoch": 1.2725274725274724, - "grad_norm": 0.38530342372185855, - "learning_rate": 1.4252732434971513e-05, - "loss": 0.3206, + "epoch": 1.2785220079493596, + "grad_norm": 0.3779740176842661, + "learning_rate": 1.4189105523908118e-05, + "loss": 0.314, "step": 8685 }, { - "epoch": 1.2732600732600732, - "grad_norm": 0.36613327828340003, - "learning_rate": 1.424501572937104e-05, - "loss": 0.3388, + "epoch": 1.2792580597674077, + "grad_norm": 0.3572228985114576, + "learning_rate": 1.4181327279139101e-05, + "loss": 0.3325, "step": 8690 }, { - "epoch": 1.273992673992674, - "grad_norm": 0.39254721011390975, - "learning_rate": 1.4237295939093107e-05, - "loss": 0.3247, + "epoch": 1.2799941115854556, + "grad_norm": 0.35543397823294043, + "learning_rate": 1.4173545967411813e-05, + "loss": 0.329, "step": 8695 }, { - "epoch": 1.2747252747252746, - "grad_norm": 0.39593210630206305, - "learning_rate": 1.4229573069747369e-05, - "loss": 0.322, + "epoch": 1.2807301634035035, + "grad_norm": 0.3520570139100687, + "learning_rate": 1.416576159443375e-05, + "loss": 0.3315, "step": 8700 }, { - "epoch": 1.2754578754578754, - "grad_norm": 0.387086930088111, - "learning_rate": 1.4221847126945712e-05, - "loss": 0.3408, + "epoch": 1.2814662152215517, + "grad_norm": 0.3646897986733931, + "learning_rate": 1.4157974165914679e-05, + "loss": 0.3412, "step": 8705 }, { - "epoch": 1.276190476190476, - "grad_norm": 0.3651049771678776, - "learning_rate": 1.4214118116302263e-05, - "loss": 0.3195, + "epoch": 1.2822022670395996, + "grad_norm": 0.38837148575650376, + "learning_rate": 1.4150183687566585e-05, + "loss": 0.3327, "step": 8710 }, { - "epoch": 1.2769230769230768, - "grad_norm": 0.35826044372401333, - "learning_rate": 1.4206386043433373e-05, - "loss": 0.3279, + "epoch": 1.2829383188576475, + "grad_norm": 0.3576885191598486, + "learning_rate": 1.4142390165103706e-05, + "loss": 0.3433, "step": 8715 }, { - "epoch": 1.2776556776556776, - "grad_norm": 0.388816510302223, - "learning_rate": 1.4198650913957618e-05, - "loss": 0.3298, + "epoch": 1.2836743706756955, + "grad_norm": 0.3650856510045297, + "learning_rate": 1.41345936042425e-05, + "loss": 0.3211, "step": 8720 }, { - "epoch": 1.2783882783882783, - "grad_norm": 0.37877951613726674, - "learning_rate": 1.4190912733495803e-05, - "loss": 0.3303, + "epoch": 1.2844104224937436, + "grad_norm": 0.36091953357015577, + "learning_rate": 1.4126794010701667e-05, + "loss": 0.3187, "step": 8725 }, { - "epoch": 1.279120879120879, - "grad_norm": 0.3533529400655342, - "learning_rate": 1.418317150767094e-05, - "loss": 0.3466, + "epoch": 1.2851464743117915, + "grad_norm": 0.38524623893423865, + "learning_rate": 1.4118991390202121e-05, + "loss": 0.3332, "step": 8730 }, { - "epoch": 1.2798534798534797, - "grad_norm": 0.36787801088700656, - "learning_rate": 1.4175427242108254e-05, - "loss": 0.3423, + "epoch": 1.2858825261298397, + "grad_norm": 0.36112224267826737, + "learning_rate": 1.4111185748466998e-05, + "loss": 0.3143, "step": 8735 }, { - "epoch": 1.2805860805860805, - "grad_norm": 0.36406972417139055, - "learning_rate": 1.416767994243519e-05, - "loss": 0.3317, + "epoch": 1.2866185779478876, + "grad_norm": 0.3662161681057587, + "learning_rate": 1.4103377091221652e-05, + "loss": 0.3367, "step": 8740 }, { - "epoch": 1.2813186813186812, - "grad_norm": 0.3792476507833185, - "learning_rate": 1.4159929614281388e-05, - "loss": 0.3274, + "epoch": 1.2873546297659355, + "grad_norm": 0.36892244558442167, + "learning_rate": 1.4095565424193653e-05, + "loss": 0.3219, "step": 8745 }, { - "epoch": 1.282051282051282, - "grad_norm": 0.37446305544770797, - "learning_rate": 1.4152176263278685e-05, - "loss": 0.3495, + "epoch": 1.2880906815839834, + "grad_norm": 0.36791046504615804, + "learning_rate": 1.4087750753112768e-05, + "loss": 0.3265, "step": 8750 }, { - "epoch": 1.2827838827838827, - "grad_norm": 0.35389577373111636, - "learning_rate": 1.4144419895061128e-05, - "loss": 0.3257, + "epoch": 1.2888267334020316, + "grad_norm": 0.3758473221858236, + "learning_rate": 1.4079933083710978e-05, + "loss": 0.3285, "step": 8755 }, { - "epoch": 1.2835164835164834, - "grad_norm": 0.372566504995886, - "learning_rate": 1.4136660515264948e-05, - "loss": 0.3322, + "epoch": 1.2895627852200795, + "grad_norm": 0.36744876180300123, + "learning_rate": 1.4072112421722458e-05, + "loss": 0.3338, "step": 8760 }, { - "epoch": 1.2842490842490841, - "grad_norm": 0.3671150331560627, - "learning_rate": 1.4128898129528564e-05, - "loss": 0.3229, + "epoch": 1.2902988370381274, + "grad_norm": 0.35555498644135036, + "learning_rate": 1.4064288772883572e-05, + "loss": 0.3304, "step": 8765 }, { - "epoch": 1.2849816849816849, - "grad_norm": 0.36495466749219146, - "learning_rate": 1.4121132743492579e-05, - "loss": 0.3362, + "epoch": 1.2910348888561756, + "grad_norm": 0.34417999944016525, + "learning_rate": 1.4056462142932894e-05, + "loss": 0.3268, "step": 8770 }, { - "epoch": 1.2857142857142856, - "grad_norm": 0.3757007917022563, - "learning_rate": 1.4113364362799779e-05, - "loss": 0.3197, + "epoch": 1.2917709406742235, + "grad_norm": 0.361373389659639, + "learning_rate": 1.4048632537611165e-05, + "loss": 0.3108, "step": 8775 }, { - "epoch": 1.2864468864468863, - "grad_norm": 0.35545541798657093, - "learning_rate": 1.410559299309513e-05, - "loss": 0.3198, + "epoch": 1.2925069924922714, + "grad_norm": 0.35328174056540423, + "learning_rate": 1.4040799962661318e-05, + "loss": 0.3265, "step": 8780 }, { - "epoch": 1.287179487179487, - "grad_norm": 0.3601142341210226, - "learning_rate": 1.4097818640025762e-05, - "loss": 0.3268, + "epoch": 1.2932430443103193, + "grad_norm": 0.35560284177991747, + "learning_rate": 1.4032964423828457e-05, + "loss": 0.3246, "step": 8785 }, { - "epoch": 1.2879120879120878, - "grad_norm": 0.3794549038591166, - "learning_rate": 1.4090041309240975e-05, - "loss": 0.3386, + "epoch": 1.2939790961283675, + "grad_norm": 0.3762197826941611, + "learning_rate": 1.4025125926859872e-05, + "loss": 0.3357, "step": 8790 }, { - "epoch": 1.2886446886446885, - "grad_norm": 0.38102768293395706, - "learning_rate": 1.408226100639223e-05, - "loss": 0.3239, + "epoch": 1.2947151479464154, + "grad_norm": 0.36908458606723793, + "learning_rate": 1.401728447750501e-05, + "loss": 0.3417, "step": 8795 }, { - "epoch": 1.2893772893772895, - "grad_norm": 0.3491164728930594, - "learning_rate": 1.407447773713316e-05, - "loss": 0.3404, + "epoch": 1.2954511997644635, + "grad_norm": 0.34380332096863414, + "learning_rate": 1.400944008151549e-05, + "loss": 0.3197, "step": 8800 }, { - "epoch": 1.2901098901098902, - "grad_norm": 0.3578550888810835, - "learning_rate": 1.4066691507119541e-05, - "loss": 0.3314, + "epoch": 1.2961872515825115, + "grad_norm": 0.377858377856986, + "learning_rate": 1.4001592744645097e-05, + "loss": 0.3259, "step": 8805 }, { - "epoch": 1.290842490842491, - "grad_norm": 0.3583668492046273, - "learning_rate": 1.40589023220093e-05, - "loss": 0.3368, + "epoch": 1.2969233034005594, + "grad_norm": 0.36625931009980567, + "learning_rate": 1.3993742472649761e-05, + "loss": 0.3387, "step": 8810 }, { - "epoch": 1.2915750915750916, - "grad_norm": 0.35734139851501734, - "learning_rate": 1.4051110187462524e-05, - "loss": 0.3167, + "epoch": 1.2976593552186073, + "grad_norm": 0.3673278872501545, + "learning_rate": 1.3985889271287574e-05, + "loss": 0.3286, "step": 8815 }, { - "epoch": 1.2923076923076924, - "grad_norm": 0.37017966348980635, - "learning_rate": 1.4043315109141431e-05, - "loss": 0.3323, + "epoch": 1.2983954070366555, + "grad_norm": 0.3724532284800494, + "learning_rate": 1.3978033146318776e-05, + "loss": 0.3301, "step": 8820 }, { - "epoch": 1.293040293040293, - "grad_norm": 0.3658193901198154, - "learning_rate": 1.4035517092710383e-05, - "loss": 0.3218, + "epoch": 1.2991314588547034, + "grad_norm": 0.3744687861341638, + "learning_rate": 1.3970174103505748e-05, + "loss": 0.3464, "step": 8825 }, { - "epoch": 1.2937728937728938, - "grad_norm": 0.40716340886974844, - "learning_rate": 1.4027716143835873e-05, - "loss": 0.347, + "epoch": 1.2998675106727513, + "grad_norm": 0.3519718326489585, + "learning_rate": 1.3962312148613011e-05, + "loss": 0.3018, "step": 8830 }, { - "epoch": 1.2945054945054946, - "grad_norm": 0.37169502716577363, - "learning_rate": 1.4019912268186532e-05, - "loss": 0.3336, + "epoch": 1.3006035624907994, + "grad_norm": 0.3830185797720154, + "learning_rate": 1.3954447287407225e-05, + "loss": 0.3217, "step": 8835 }, { - "epoch": 1.2952380952380953, - "grad_norm": 0.36309543152317264, - "learning_rate": 1.401210547143311e-05, - "loss": 0.3426, + "epoch": 1.3013396143088474, + "grad_norm": 0.3570941300178156, + "learning_rate": 1.394657952565718e-05, + "loss": 0.3144, "step": 8840 }, { - "epoch": 1.295970695970696, - "grad_norm": 0.35397933043600555, - "learning_rate": 1.4004295759248493e-05, - "loss": 0.3342, + "epoch": 1.3020756661268953, + "grad_norm": 0.36288790463875925, + "learning_rate": 1.3938708869133795e-05, + "loss": 0.3323, "step": 8845 }, { - "epoch": 1.2967032967032968, - "grad_norm": 0.3795439470130737, - "learning_rate": 1.3996483137307663e-05, - "loss": 0.3337, + "epoch": 1.3028117179449432, + "grad_norm": 0.3447747037380813, + "learning_rate": 1.3930835323610113e-05, + "loss": 0.3171, "step": 8850 }, { - "epoch": 1.2974358974358975, - "grad_norm": 0.36739244392710657, - "learning_rate": 1.3988667611287739e-05, - "loss": 0.3414, + "epoch": 1.3035477697629914, + "grad_norm": 0.36034095883531797, + "learning_rate": 1.3922958894861292e-05, + "loss": 0.3172, "step": 8855 }, { - "epoch": 1.2981684981684982, - "grad_norm": 0.3564963815512248, - "learning_rate": 1.3980849186867934e-05, - "loss": 0.3352, + "epoch": 1.3042838215810393, + "grad_norm": 0.36541817309380326, + "learning_rate": 1.391507958866461e-05, + "loss": 0.3348, "step": 8860 }, { - "epoch": 1.298901098901099, - "grad_norm": 0.35018651495652114, - "learning_rate": 1.3973027869729578e-05, - "loss": 0.3348, + "epoch": 1.3050198733990874, + "grad_norm": 0.375051385256162, + "learning_rate": 1.3907197410799444e-05, + "loss": 0.3251, "step": 8865 }, { - "epoch": 1.2996336996336997, - "grad_norm": 0.3586857206932244, - "learning_rate": 1.3965203665556099e-05, - "loss": 0.3436, + "epoch": 1.3057559252171353, + "grad_norm": 0.381012639470291, + "learning_rate": 1.38993123670473e-05, + "loss": 0.3333, "step": 8870 }, { - "epoch": 1.3003663003663004, - "grad_norm": 0.3744151077440032, - "learning_rate": 1.3957376580033021e-05, - "loss": 0.3396, + "epoch": 1.3064919770351833, + "grad_norm": 0.3640874685498063, + "learning_rate": 1.389142446319176e-05, + "loss": 0.3305, "step": 8875 }, { - "epoch": 1.3010989010989011, - "grad_norm": 1.0865154890091215, - "learning_rate": 1.3949546618847965e-05, - "loss": 0.3311, + "epoch": 1.3072280288532312, + "grad_norm": 0.367532895857759, + "learning_rate": 1.3883533705018521e-05, + "loss": 0.3279, "step": 8880 }, { - "epoch": 1.3018315018315019, - "grad_norm": 0.3602674332465463, - "learning_rate": 1.3941713787690638e-05, - "loss": 0.3229, + "epoch": 1.3079640806712793, + "grad_norm": 0.3626074299799139, + "learning_rate": 1.3875640098315371e-05, + "loss": 0.3171, "step": 8885 }, { - "epoch": 1.3025641025641026, - "grad_norm": 0.3680869993512229, - "learning_rate": 1.3933878092252839e-05, - "loss": 0.3399, + "epoch": 1.3087001324893273, + "grad_norm": 0.3616593543054651, + "learning_rate": 1.386774364887218e-05, + "loss": 0.3243, "step": 8890 }, { - "epoch": 1.3032967032967033, - "grad_norm": 0.37199644244728475, - "learning_rate": 1.3926039538228438e-05, - "loss": 0.345, + "epoch": 1.3094361843073752, + "grad_norm": 0.36257693911461103, + "learning_rate": 1.3859844362480912e-05, + "loss": 0.3309, "step": 8895 }, { - "epoch": 1.304029304029304, - "grad_norm": 0.35505154096683034, - "learning_rate": 1.3918198131313391e-05, - "loss": 0.3219, + "epoch": 1.3101722361254233, + "grad_norm": 0.39690486121273244, + "learning_rate": 1.3851942244935608e-05, + "loss": 0.3272, "step": 8900 }, { - "epoch": 1.3047619047619048, - "grad_norm": 0.3732293335189872, - "learning_rate": 1.3910353877205722e-05, - "loss": 0.3205, + "epoch": 1.3109082879434713, + "grad_norm": 0.3742390186875793, + "learning_rate": 1.3844037302032382e-05, + "loss": 0.3293, "step": 8905 }, { - "epoch": 1.3054945054945055, - "grad_norm": 0.38105851569610655, - "learning_rate": 1.3902506781605533e-05, - "loss": 0.3392, + "epoch": 1.3116443397615192, + "grad_norm": 0.35758003112894904, + "learning_rate": 1.3836129539569427e-05, + "loss": 0.3298, "step": 8910 }, { - "epoch": 1.3062271062271062, - "grad_norm": 0.3694012986495021, - "learning_rate": 1.389465685021498e-05, - "loss": 0.3426, + "epoch": 1.312380391579567, + "grad_norm": 0.36120571548511105, + "learning_rate": 1.3828218963347002e-05, + "loss": 0.3148, "step": 8915 }, { - "epoch": 1.306959706959707, - "grad_norm": 0.3661925729167942, - "learning_rate": 1.3886804088738282e-05, - "loss": 0.331, + "epoch": 1.3131164433976152, + "grad_norm": 0.35412896216423156, + "learning_rate": 1.3820305579167425e-05, + "loss": 0.3267, "step": 8920 }, { - "epoch": 1.3076923076923077, - "grad_norm": 0.3730823269174414, - "learning_rate": 1.3878948502881716e-05, - "loss": 0.35, + "epoch": 1.3138524952156632, + "grad_norm": 0.37285971521264993, + "learning_rate": 1.3812389392835082e-05, + "loss": 0.3541, "step": 8925 }, { - "epoch": 1.3084249084249084, - "grad_norm": 0.3772251825415492, - "learning_rate": 1.3871090098353615e-05, - "loss": 0.329, + "epoch": 1.3145885470337113, + "grad_norm": 0.3717384916161553, + "learning_rate": 1.3804470410156407e-05, + "loss": 0.3186, "step": 8930 }, { - "epoch": 1.3091575091575092, - "grad_norm": 0.3601992251208424, - "learning_rate": 1.3863228880864352e-05, - "loss": 0.3366, + "epoch": 1.3153245988517592, + "grad_norm": 0.37521720529271974, + "learning_rate": 1.3796548636939888e-05, + "loss": 0.3264, "step": 8935 }, { - "epoch": 1.30989010989011, - "grad_norm": 0.3840853889143167, - "learning_rate": 1.3855364856126354e-05, - "loss": 0.3307, + "epoch": 1.3160606506698072, + "grad_norm": 0.3606370679059983, + "learning_rate": 1.3788624078996062e-05, + "loss": 0.3194, "step": 8940 }, { - "epoch": 1.3106227106227106, - "grad_norm": 0.3634546535899789, - "learning_rate": 1.3847498029854078e-05, - "loss": 0.335, + "epoch": 1.316796702487855, + "grad_norm": 0.3663504972186256, + "learning_rate": 1.3780696742137508e-05, + "loss": 0.3326, "step": 8945 }, { - "epoch": 1.3113553113553114, - "grad_norm": 0.3605399864380127, - "learning_rate": 1.3839628407764026e-05, - "loss": 0.3364, + "epoch": 1.317532754305903, + "grad_norm": 0.3749521221694766, + "learning_rate": 1.3772766632178836e-05, + "loss": 0.3353, "step": 8950 }, { - "epoch": 1.312087912087912, - "grad_norm": 0.3850927201382173, - "learning_rate": 1.3831755995574722e-05, - "loss": 0.3273, + "epoch": 1.3182688061239511, + "grad_norm": 0.35160529477528407, + "learning_rate": 1.37648337549367e-05, + "loss": 0.3247, "step": 8955 }, { - "epoch": 1.3128205128205128, - "grad_norm": 0.3633776638654286, - "learning_rate": 1.382388079900673e-05, - "loss": 0.3249, + "epoch": 1.319004857941999, + "grad_norm": 0.36493455882005593, + "learning_rate": 1.3756898116229778e-05, + "loss": 0.329, "step": 8960 }, { - "epoch": 1.3135531135531135, - "grad_norm": 0.3630863466409669, - "learning_rate": 1.381600282378262e-05, - "loss": 0.3146, + "epoch": 1.3197409097600472, + "grad_norm": 0.3749110524136941, + "learning_rate": 1.3748959721878774e-05, + "loss": 0.3394, "step": 8965 }, { - "epoch": 1.3142857142857143, - "grad_norm": 0.355632113123853, - "learning_rate": 1.3808122075626999e-05, - "loss": 0.3364, + "epoch": 1.3204769615780951, + "grad_norm": 0.376136778529708, + "learning_rate": 1.3741018577706415e-05, + "loss": 0.3217, "step": 8970 }, { - "epoch": 1.315018315018315, - "grad_norm": 0.36949929872541576, - "learning_rate": 1.3800238560266475e-05, - "loss": 0.3282, + "epoch": 1.321213013396143, + "grad_norm": 0.38402482862451526, + "learning_rate": 1.3733074689537446e-05, + "loss": 0.3414, "step": 8975 }, { - "epoch": 1.3157509157509157, - "grad_norm": 0.38108448533492045, - "learning_rate": 1.3792352283429681e-05, - "loss": 0.3373, + "epoch": 1.321949065214191, + "grad_norm": 0.3700605631799661, + "learning_rate": 1.3725128063198622e-05, + "loss": 0.3171, "step": 8980 }, { - "epoch": 1.3164835164835165, - "grad_norm": 0.37695903994833746, - "learning_rate": 1.3784463250847239e-05, - "loss": 0.3311, + "epoch": 1.3226851170322391, + "grad_norm": 0.3678747227565391, + "learning_rate": 1.371717870451871e-05, + "loss": 0.3287, "step": 8985 }, { - "epoch": 1.3172161172161172, - "grad_norm": 0.36201406054495167, - "learning_rate": 1.3776571468251787e-05, - "loss": 0.3401, + "epoch": 1.323421168850287, + "grad_norm": 0.37230065112792066, + "learning_rate": 1.370922661932847e-05, + "loss": 0.3289, "step": 8990 }, { - "epoch": 1.317948717948718, - "grad_norm": 0.3643729233817631, - "learning_rate": 1.3768676941377957e-05, - "loss": 0.3294, + "epoch": 1.3241572206683352, + "grad_norm": 0.3529879764377384, + "learning_rate": 1.3701271813460684e-05, + "loss": 0.3388, "step": 8995 }, { - "epoch": 1.3186813186813187, - "grad_norm": 0.37217102276035735, - "learning_rate": 1.3760779675962376e-05, - "loss": 0.3316, + "epoch": 1.3248932724863831, + "grad_norm": 0.36540336257391304, + "learning_rate": 1.3693314292750102e-05, + "loss": 0.3178, "step": 9000 }, { - "epoch": 1.3194139194139194, - "grad_norm": 0.3791660413316379, - "learning_rate": 1.3752879677743663e-05, - "loss": 0.3131, + "epoch": 1.325629324304431, + "grad_norm": 0.376472909085647, + "learning_rate": 1.3685354063033485e-05, + "loss": 0.3236, "step": 9005 }, { - "epoch": 1.3201465201465201, - "grad_norm": 0.39211783715886, - "learning_rate": 1.3744976952462414e-05, - "loss": 0.3268, + "epoch": 1.326365376122479, + "grad_norm": 0.37850104947330887, + "learning_rate": 1.3677391130149578e-05, + "loss": 0.3212, "step": 9010 }, { - "epoch": 1.3208791208791208, - "grad_norm": 0.36582428340374257, - "learning_rate": 1.3737071505861215e-05, - "loss": 0.3477, + "epoch": 1.3271014279405269, + "grad_norm": 0.35916930303255235, + "learning_rate": 1.3669425499939105e-05, + "loss": 0.3203, "step": 9015 }, { - "epoch": 1.3216117216117216, - "grad_norm": 0.37117014585149033, - "learning_rate": 1.3729163343684629e-05, - "loss": 0.3313, + "epoch": 1.327837479758575, + "grad_norm": 0.3608480287386314, + "learning_rate": 1.3661457178244764e-05, + "loss": 0.3224, "step": 9020 }, { - "epoch": 1.3223443223443223, - "grad_norm": 0.36382474665096526, - "learning_rate": 1.3721252471679191e-05, - "loss": 0.3326, + "epoch": 1.328573531576623, + "grad_norm": 0.35597939890032604, + "learning_rate": 1.3653486170911239e-05, + "loss": 0.3335, "step": 9025 }, { - "epoch": 1.323076923076923, - "grad_norm": 0.3642539317171026, - "learning_rate": 1.37133388955934e-05, - "loss": 0.3302, + "epoch": 1.329309583394671, + "grad_norm": 0.3640202335939862, + "learning_rate": 1.3645512483785175e-05, + "loss": 0.3305, "step": 9030 }, { - "epoch": 1.3238095238095238, - "grad_norm": 0.3628206861452227, - "learning_rate": 1.370542262117773e-05, - "loss": 0.331, + "epoch": 1.330045635212719, + "grad_norm": 0.3826170418063535, + "learning_rate": 1.3637536122715187e-05, + "loss": 0.3212, "step": 9035 }, { - "epoch": 1.3245421245421245, - "grad_norm": 0.3880906668746477, - "learning_rate": 1.3697503654184608e-05, - "loss": 0.3444, + "epoch": 1.330781687030767, + "grad_norm": 0.38003943131416446, + "learning_rate": 1.3629557093551846e-05, + "loss": 0.3262, "step": 9040 }, { - "epoch": 1.3252747252747252, - "grad_norm": 0.3457486717511427, - "learning_rate": 1.3689582000368425e-05, - "loss": 0.3345, + "epoch": 1.3315177388488149, + "grad_norm": 0.3635462646251249, + "learning_rate": 1.3621575402147686e-05, + "loss": 0.3133, "step": 9045 }, { - "epoch": 1.326007326007326, - "grad_norm": 0.3657750441820086, - "learning_rate": 1.3681657665485508e-05, - "loss": 0.3305, + "epoch": 1.332253790666863, + "grad_norm": 0.3560170171295613, + "learning_rate": 1.361359105435719e-05, + "loss": 0.3324, "step": 9050 }, { - "epoch": 1.3267399267399267, - "grad_norm": 0.3505090638445965, - "learning_rate": 1.3673730655294152e-05, - "loss": 0.3203, + "epoch": 1.332989842484911, + "grad_norm": 0.5920590711538712, + "learning_rate": 1.3605604056036793e-05, + "loss": 0.3718, "step": 9055 }, { - "epoch": 1.3274725274725274, - "grad_norm": 0.375871260070138, - "learning_rate": 1.3665800975554587e-05, - "loss": 0.3115, + "epoch": 1.3337258943029588, + "grad_norm": 0.7817575272113128, + "learning_rate": 1.3597614413044868e-05, + "loss": 0.4103, "step": 9060 }, { - "epoch": 1.3282051282051281, - "grad_norm": 0.379502320322331, - "learning_rate": 1.3657868632028983e-05, - "loss": 0.3357, + "epoch": 1.334461946121007, + "grad_norm": 0.436224414420997, + "learning_rate": 1.3589622131241736e-05, + "loss": 0.3421, "step": 9065 }, { - "epoch": 1.3289377289377289, - "grad_norm": 0.3615803192854987, - "learning_rate": 1.3649933630481444e-05, - "loss": 0.3264, + "epoch": 1.335197997939055, + "grad_norm": 0.3691518001975837, + "learning_rate": 1.3581627216489644e-05, + "loss": 0.3356, "step": 9070 }, { - "epoch": 1.3296703296703296, - "grad_norm": 0.3769021122295167, - "learning_rate": 1.3641995976678012e-05, - "loss": 0.3378, + "epoch": 1.3359340497571028, + "grad_norm": 0.36678308623805267, + "learning_rate": 1.357362967465278e-05, + "loss": 0.3184, "step": 9075 }, { - "epoch": 1.3304029304029303, - "grad_norm": 0.3624634399641649, - "learning_rate": 1.3634055676386644e-05, - "loss": 0.3158, + "epoch": 1.3366701015751508, + "grad_norm": 0.45367278789041315, + "learning_rate": 1.3565629511597252e-05, + "loss": 0.3295, "step": 9080 }, { - "epoch": 1.331135531135531, - "grad_norm": 0.3597843061920691, - "learning_rate": 1.3626112735377235e-05, - "loss": 0.3404, + "epoch": 1.337406153393199, + "grad_norm": 0.38150781862087807, + "learning_rate": 1.3557626733191094e-05, + "loss": 0.3153, "step": 9085 }, { - "epoch": 1.3318681318681318, - "grad_norm": 0.3678244039216215, - "learning_rate": 1.3618167159421591e-05, - "loss": 0.3288, + "epoch": 1.3381422052112468, + "grad_norm": 0.37214536870557213, + "learning_rate": 1.3549621345304256e-05, + "loss": 0.3227, "step": 9090 }, { - "epoch": 1.3326007326007325, - "grad_norm": 0.40084734783958803, - "learning_rate": 1.3610218954293436e-05, - "loss": 0.3235, + "epoch": 1.338878257029295, + "grad_norm": 0.3493050932086383, + "learning_rate": 1.3541613353808604e-05, + "loss": 0.3348, "step": 9095 }, { - "epoch": 1.3333333333333333, - "grad_norm": 0.3724983949020907, - "learning_rate": 1.3602268125768392e-05, - "loss": 0.3367, + "epoch": 1.339614308847343, + "grad_norm": 0.3686246268789654, + "learning_rate": 1.3533602764577913e-05, + "loss": 0.3368, "step": 9100 }, { - "epoch": 1.334065934065934, - "grad_norm": 0.36524272902640703, - "learning_rate": 1.3594314679624008e-05, - "loss": 0.3149, + "epoch": 1.3403503606653908, + "grad_norm": 0.3778724437437195, + "learning_rate": 1.3525589583487861e-05, + "loss": 0.3357, "step": 9105 }, { - "epoch": 1.3347985347985347, - "grad_norm": 0.3933348954444407, - "learning_rate": 1.3586358621639722e-05, - "loss": 0.337, + "epoch": 1.3410864124834387, + "grad_norm": 0.37353179073977216, + "learning_rate": 1.3517573816416034e-05, + "loss": 0.3239, "step": 9110 }, { - "epoch": 1.3355311355311354, - "grad_norm": 0.3566922658589224, - "learning_rate": 1.3578399957596869e-05, - "loss": 0.3258, + "epoch": 1.3418224643014869, + "grad_norm": 0.36758473241192974, + "learning_rate": 1.3509555469241911e-05, + "loss": 0.3281, "step": 9115 }, { - "epoch": 1.3362637362637364, - "grad_norm": 0.3622582797251259, - "learning_rate": 1.3570438693278682e-05, - "loss": 0.3313, + "epoch": 1.3425585161195348, + "grad_norm": 0.3688353394560033, + "learning_rate": 1.3501534547846859e-05, + "loss": 0.3133, "step": 9120 }, { - "epoch": 1.3369963369963371, - "grad_norm": 0.35355792278152115, - "learning_rate": 1.3562474834470285e-05, - "loss": 0.3308, + "epoch": 1.3432945679375827, + "grad_norm": 0.35636360295775227, + "learning_rate": 1.349351105811414e-05, + "loss": 0.3236, "step": 9125 }, { - "epoch": 1.3377289377289379, - "grad_norm": 0.35095601290160816, - "learning_rate": 1.3554508386958685e-05, - "loss": 0.3187, + "epoch": 1.3440306197556309, + "grad_norm": 0.36193316658793956, + "learning_rate": 1.348548500592889e-05, + "loss": 0.331, "step": 9130 }, { - "epoch": 1.3384615384615386, - "grad_norm": 0.366020871166583, - "learning_rate": 1.354653935653277e-05, - "loss": 0.322, + "epoch": 1.3447666715736788, + "grad_norm": 0.3531798465100575, + "learning_rate": 1.3477456397178137e-05, + "loss": 0.3144, "step": 9135 }, { - "epoch": 1.3391941391941393, - "grad_norm": 0.3727293424181606, - "learning_rate": 1.3538567748983303e-05, - "loss": 0.3318, + "epoch": 1.3455027233917267, + "grad_norm": 0.37679106122190187, + "learning_rate": 1.346942523775078e-05, + "loss": 0.3257, "step": 9140 }, { - "epoch": 1.33992673992674, - "grad_norm": 0.36974071866429414, - "learning_rate": 1.3530593570102923e-05, - "loss": 0.3221, + "epoch": 1.3462387752097746, + "grad_norm": 0.3784726502374104, + "learning_rate": 1.3461391533537586e-05, + "loss": 0.3329, "step": 9145 }, { - "epoch": 1.3406593406593408, - "grad_norm": 0.35864610045404177, - "learning_rate": 1.3522616825686135e-05, - "loss": 0.3133, + "epoch": 1.3469748270278228, + "grad_norm": 0.3630903210951978, + "learning_rate": 1.345335529043119e-05, + "loss": 0.3258, "step": 9150 }, { - "epoch": 1.3413919413919415, - "grad_norm": 0.36740161583874237, - "learning_rate": 1.351463752152931e-05, - "loss": 0.3236, + "epoch": 1.3477108788458707, + "grad_norm": 0.3837233711679743, + "learning_rate": 1.3445316514326085e-05, + "loss": 0.315, "step": 9155 }, { - "epoch": 1.3421245421245422, - "grad_norm": 0.36983404972777706, - "learning_rate": 1.3506655663430681e-05, - "loss": 0.3278, + "epoch": 1.3484469306639189, + "grad_norm": 0.3803724064684297, + "learning_rate": 1.3437275211118635e-05, + "loss": 0.3371, "step": 9160 }, { - "epoch": 1.342857142857143, - "grad_norm": 0.379522902841167, - "learning_rate": 1.3498671257190332e-05, - "loss": 0.3387, + "epoch": 1.3491829824819668, + "grad_norm": 0.3671361885025908, + "learning_rate": 1.3429231386707042e-05, + "loss": 0.3256, "step": 9165 }, { - "epoch": 1.3435897435897437, - "grad_norm": 0.3636575507838711, - "learning_rate": 1.34906843086102e-05, - "loss": 0.3218, + "epoch": 1.3499190343000147, + "grad_norm": 0.36363935603638564, + "learning_rate": 1.3421185046991364e-05, + "loss": 0.3221, "step": 9170 }, { - "epoch": 1.3443223443223444, - "grad_norm": 0.35300522843109694, - "learning_rate": 1.3482694823494074e-05, - "loss": 0.3238, + "epoch": 1.3506550861180626, + "grad_norm": 0.36065150620607056, + "learning_rate": 1.3413136197873504e-05, + "loss": 0.3181, "step": 9175 }, { - "epoch": 1.3450549450549452, - "grad_norm": 0.3890464743415906, - "learning_rate": 1.347470280764758e-05, - "loss": 0.3361, + "epoch": 1.3513911379361108, + "grad_norm": 0.3693935412323531, + "learning_rate": 1.3405084845257213e-05, + "loss": 0.314, "step": 9180 }, { - "epoch": 1.345787545787546, - "grad_norm": 0.3838027933005435, - "learning_rate": 1.3466708266878182e-05, - "loss": 0.3332, + "epoch": 1.3521271897541587, + "grad_norm": 0.3626229080305777, + "learning_rate": 1.3397030995048061e-05, + "loss": 0.3297, "step": 9185 }, { - "epoch": 1.3465201465201466, - "grad_norm": 0.36039931461903624, - "learning_rate": 1.345871120699519e-05, - "loss": 0.3172, + "epoch": 1.3528632415722066, + "grad_norm": 0.3634563658225217, + "learning_rate": 1.3388974653153466e-05, + "loss": 0.3266, "step": 9190 }, { - "epoch": 1.3472527472527474, - "grad_norm": 0.3548207792166718, - "learning_rate": 1.345071163380973e-05, - "loss": 0.3329, + "epoch": 1.3535992933902548, + "grad_norm": 0.37342942136131024, + "learning_rate": 1.3380915825482666e-05, + "loss": 0.3391, "step": 9195 }, { - "epoch": 1.347985347985348, - "grad_norm": 0.3633084594988163, - "learning_rate": 1.3442709553134767e-05, - "loss": 0.3209, + "epoch": 1.3543353452083027, + "grad_norm": 0.3563786317240363, + "learning_rate": 1.3372854517946725e-05, + "loss": 0.32, "step": 9200 }, { - "epoch": 1.3487179487179488, - "grad_norm": 0.3650618479253569, - "learning_rate": 1.3434704970785077e-05, - "loss": 0.3217, + "epoch": 1.3550713970263506, + "grad_norm": 0.35602494064860596, + "learning_rate": 1.3364790736458522e-05, + "loss": 0.3241, "step": 9205 }, { - "epoch": 1.3494505494505495, - "grad_norm": 0.34552783167904033, - "learning_rate": 1.3426697892577263e-05, - "loss": 0.3036, + "epoch": 1.3558074488443985, + "grad_norm": 0.3567453104136102, + "learning_rate": 1.3356724486932756e-05, + "loss": 0.3354, "step": 9210 }, { - "epoch": 1.3501831501831503, - "grad_norm": 0.3620188951071061, - "learning_rate": 1.3418688324329737e-05, - "loss": 0.3231, + "epoch": 1.3565435006624467, + "grad_norm": 0.37979383603228695, + "learning_rate": 1.334865577528593e-05, + "loss": 0.3301, "step": 9215 }, { - "epoch": 1.350915750915751, - "grad_norm": 0.3722775578935979, - "learning_rate": 1.3410676271862722e-05, - "loss": 0.3295, + "epoch": 1.3572795524804946, + "grad_norm": 0.3766475839970793, + "learning_rate": 1.334058460743636e-05, + "loss": 0.3336, "step": 9220 }, { - "epoch": 1.3516483516483517, - "grad_norm": 0.3643391038118818, - "learning_rate": 1.3402661740998237e-05, - "loss": 0.3354, + "epoch": 1.3580156042985427, + "grad_norm": 0.3715167034687806, + "learning_rate": 1.333251098930416e-05, + "loss": 0.3346, "step": 9225 }, { - "epoch": 1.3523809523809525, - "grad_norm": 0.37717278139689026, - "learning_rate": 1.3394644737560123e-05, - "loss": 0.3183, + "epoch": 1.3587516561165907, + "grad_norm": 0.362141433485265, + "learning_rate": 1.3324434926811242e-05, + "loss": 0.3235, "step": 9230 }, { - "epoch": 1.3531135531135532, - "grad_norm": 0.3779918764191859, - "learning_rate": 1.3386625267373993e-05, - "loss": 0.3258, + "epoch": 1.3594877079346386, + "grad_norm": 0.3688076294470631, + "learning_rate": 1.3316356425881306e-05, + "loss": 0.3353, "step": 9235 }, { - "epoch": 1.353846153846154, - "grad_norm": 0.37789420055359735, - "learning_rate": 1.337860333626728e-05, - "loss": 0.3274, + "epoch": 1.3602237597526865, + "grad_norm": 0.36877605896245913, + "learning_rate": 1.330827549243985e-05, + "loss": 0.3141, "step": 9240 }, { - "epoch": 1.3545787545787547, - "grad_norm": 0.3549763157893029, - "learning_rate": 1.3370578950069173e-05, - "loss": 0.3239, + "epoch": 1.3609598115707346, + "grad_norm": 0.35566835653531914, + "learning_rate": 1.3300192132414153e-05, + "loss": 0.314, "step": 9245 }, { - "epoch": 1.3553113553113554, - "grad_norm": 0.36645175308764016, - "learning_rate": 1.3362552114610671e-05, - "loss": 0.3348, + "epoch": 1.3616958633887826, + "grad_norm": 0.37602890117093807, + "learning_rate": 1.3292106351733265e-05, + "loss": 0.3301, "step": 9250 }, { - "epoch": 1.3560439560439561, - "grad_norm": 0.3701155894863453, - "learning_rate": 1.3354522835724542e-05, - "loss": 0.3461, + "epoch": 1.3624319152068305, + "grad_norm": 0.3762513015610662, + "learning_rate": 1.3284018156328022e-05, + "loss": 0.3357, "step": 9255 }, { - "epoch": 1.3567765567765568, - "grad_norm": 0.3730913997528624, - "learning_rate": 1.3346491119245331e-05, - "loss": 0.3115, + "epoch": 1.3631679670248786, + "grad_norm": 0.34985999884758706, + "learning_rate": 1.3275927552131027e-05, + "loss": 0.3286, "step": 9260 }, { - "epoch": 1.3575091575091576, - "grad_norm": 0.36728438979467976, - "learning_rate": 1.3338456971009352e-05, - "loss": 0.3244, + "epoch": 1.3639040188429266, + "grad_norm": 0.37783002776214625, + "learning_rate": 1.3267834545076652e-05, + "loss": 0.3355, "step": 9265 }, { - "epoch": 1.3582417582417583, - "grad_norm": 0.3647202374966414, - "learning_rate": 1.3330420396854694e-05, - "loss": 0.3326, + "epoch": 1.3646400706609745, + "grad_norm": 0.33845063438904066, + "learning_rate": 1.3259739141101026e-05, + "loss": 0.3282, "step": 9270 }, { - "epoch": 1.358974358974359, - "grad_norm": 0.35244098745828395, - "learning_rate": 1.3322381402621197e-05, - "loss": 0.3205, + "epoch": 1.3653761224790224, + "grad_norm": 0.3678656537852835, + "learning_rate": 1.3251641346142048e-05, + "loss": 0.3401, "step": 9275 }, { - "epoch": 1.3597069597069598, - "grad_norm": 0.3810067078097531, - "learning_rate": 1.3314339994150467e-05, - "loss": 0.3205, + "epoch": 1.3661121742970705, + "grad_norm": 0.3587484044138878, + "learning_rate": 1.3243541166139355e-05, + "loss": 0.3289, "step": 9280 }, { - "epoch": 1.3604395604395605, - "grad_norm": 0.3678887243014611, - "learning_rate": 1.3306296177285867e-05, - "loss": 0.3361, + "epoch": 1.3668482261151185, + "grad_norm": 0.3658949912844781, + "learning_rate": 1.3235438607034348e-05, + "loss": 0.3249, "step": 9285 }, { - "epoch": 1.3611721611721612, - "grad_norm": 0.39245251166398054, - "learning_rate": 1.3298249957872502e-05, - "loss": 0.3306, + "epoch": 1.3675842779331666, + "grad_norm": 0.35026397400861287, + "learning_rate": 1.3227333674770163e-05, + "loss": 0.3175, "step": 9290 }, { - "epoch": 1.361904761904762, - "grad_norm": 0.36205216028276305, - "learning_rate": 1.3290201341757231e-05, - "loss": 0.3227, + "epoch": 1.3683203297512145, + "grad_norm": 0.34018425443285993, + "learning_rate": 1.3219226375291678e-05, + "loss": 0.3124, "step": 9295 }, { - "epoch": 1.3626373626373627, - "grad_norm": 0.39561693455023267, - "learning_rate": 1.3282150334788649e-05, - "loss": 0.3243, + "epoch": 1.3690563815692625, + "grad_norm": 0.37839391469181477, + "learning_rate": 1.321111671454551e-05, + "loss": 0.3414, "step": 9300 }, { - "epoch": 1.3633699633699634, - "grad_norm": 0.3678755847393884, - "learning_rate": 1.3274096942817086e-05, - "loss": 0.3194, + "epoch": 1.3697924333873104, + "grad_norm": 0.3619436665518319, + "learning_rate": 1.320300469848001e-05, + "loss": 0.3278, "step": 9305 }, { - "epoch": 1.3641025641025641, - "grad_norm": 0.358546203031558, - "learning_rate": 1.3266041171694615e-05, - "loss": 0.3163, + "epoch": 1.3705284852053585, + "grad_norm": 0.361850051949316, + "learning_rate": 1.3194890333045254e-05, + "loss": 0.3095, "step": 9310 }, { - "epoch": 1.3648351648351649, - "grad_norm": 0.3652618532577394, - "learning_rate": 1.325798302727503e-05, - "loss": 0.3113, + "epoch": 1.3712645370234064, + "grad_norm": 0.353165857016833, + "learning_rate": 1.3186773624193036e-05, + "loss": 0.3178, "step": 9315 }, { - "epoch": 1.3655677655677656, - "grad_norm": 0.35800691736373186, - "learning_rate": 1.324992251541385e-05, - "loss": 0.3257, + "epoch": 1.3720005888414544, + "grad_norm": 0.35284171647302653, + "learning_rate": 1.3178654577876882e-05, + "loss": 0.32, "step": 9320 }, { - "epoch": 1.3663003663003663, - "grad_norm": 0.36688013681817694, - "learning_rate": 1.3241859641968318e-05, - "loss": 0.3328, + "epoch": 1.3727366406595025, + "grad_norm": 0.3566799951767712, + "learning_rate": 1.3170533200052019e-05, + "loss": 0.326, "step": 9325 }, { - "epoch": 1.367032967032967, - "grad_norm": 0.354463599391648, - "learning_rate": 1.3233794412797387e-05, - "loss": 0.3205, + "epoch": 1.3734726924775504, + "grad_norm": 0.38476022515812874, + "learning_rate": 1.316240949667539e-05, + "loss": 0.3296, "step": 9330 }, { - "epoch": 1.3677655677655678, - "grad_norm": 0.37314536883933214, - "learning_rate": 1.3225726833761729e-05, - "loss": 0.3243, + "epoch": 1.3742087442955984, + "grad_norm": 0.3545994591146522, + "learning_rate": 1.3154283473705645e-05, + "loss": 0.3226, "step": 9335 }, { - "epoch": 1.3684981684981685, - "grad_norm": 0.34344786208460487, - "learning_rate": 1.321765691072372e-05, - "loss": 0.3095, + "epoch": 1.3749447961136463, + "grad_norm": 0.3630231695401911, + "learning_rate": 1.3146155137103135e-05, + "loss": 0.3332, "step": 9340 }, { - "epoch": 1.3692307692307693, - "grad_norm": 0.3672887626205064, - "learning_rate": 1.3209584649547438e-05, - "loss": 0.3353, + "epoch": 1.3756808479316944, + "grad_norm": 0.36868872546961123, + "learning_rate": 1.3138024492829902e-05, + "loss": 0.329, "step": 9345 }, { - "epoch": 1.36996336996337, - "grad_norm": 0.3999571631089137, - "learning_rate": 1.3201510056098665e-05, - "loss": 0.3308, + "epoch": 1.3764168997497424, + "grad_norm": 0.3751309059411541, + "learning_rate": 1.3129891546849688e-05, + "loss": 0.3432, "step": 9350 }, { - "epoch": 1.3706959706959707, - "grad_norm": 0.34257353112977285, - "learning_rate": 1.3193433136244868e-05, - "loss": 0.3123, + "epoch": 1.3771529515677905, + "grad_norm": 0.3615824589106786, + "learning_rate": 1.3121756305127925e-05, + "loss": 0.3221, "step": 9355 }, { - "epoch": 1.3714285714285714, - "grad_norm": 0.34737676485578445, - "learning_rate": 1.3185353895855215e-05, - "loss": 0.3301, + "epoch": 1.3778890033858384, + "grad_norm": 0.3516015293803255, + "learning_rate": 1.311361877363172e-05, + "loss": 0.3167, "step": 9360 }, { - "epoch": 1.3721611721611722, - "grad_norm": 0.36982798486097596, - "learning_rate": 1.317727234080056e-05, - "loss": 0.3308, + "epoch": 1.3786250552038863, + "grad_norm": 0.37834677729887595, + "learning_rate": 1.3105478958329865e-05, + "loss": 0.3322, "step": 9365 }, { - "epoch": 1.372893772893773, - "grad_norm": 0.3669270882321095, - "learning_rate": 1.3169188476953422e-05, - "loss": 0.3305, + "epoch": 1.3793611070219343, + "grad_norm": 0.3626664899177328, + "learning_rate": 1.3097336865192825e-05, + "loss": 0.3223, "step": 9370 }, { - "epoch": 1.3736263736263736, - "grad_norm": 0.3628003702591798, - "learning_rate": 1.316110231018803e-05, - "loss": 0.3301, + "epoch": 1.3800971588399824, + "grad_norm": 0.3518899413107858, + "learning_rate": 1.3089192500192742e-05, + "loss": 0.3163, "step": 9375 }, { - "epoch": 1.3743589743589744, - "grad_norm": 0.35373465603923576, - "learning_rate": 1.3153013846380249e-05, - "loss": 0.3159, + "epoch": 1.3808332106580303, + "grad_norm": 0.3674765109654592, + "learning_rate": 1.3081045869303417e-05, + "loss": 0.3383, "step": 9380 }, { - "epoch": 1.375091575091575, - "grad_norm": 0.3655259797331987, - "learning_rate": 1.314492309140764e-05, - "loss": 0.3282, + "epoch": 1.3815692624760783, + "grad_norm": 0.3416972537434523, + "learning_rate": 1.3072896978500312e-05, + "loss": 0.3086, "step": 9385 }, { - "epoch": 1.3758241758241758, - "grad_norm": 0.3539664614226634, - "learning_rate": 1.313683005114942e-05, - "loss": 0.3144, + "epoch": 1.3823053142941264, + "grad_norm": 0.3464971046445158, + "learning_rate": 1.3064745833760554e-05, + "loss": 0.3049, "step": 9390 }, { - "epoch": 1.3765567765567766, - "grad_norm": 0.3828454934600868, - "learning_rate": 1.3128734731486465e-05, - "loss": 0.3183, + "epoch": 1.3830413661121743, + "grad_norm": 0.3616307252212219, + "learning_rate": 1.3056592441062921e-05, + "loss": 0.3348, "step": 9395 }, { - "epoch": 1.3772893772893773, - "grad_norm": 0.3621509618726607, - "learning_rate": 1.3120637138301309e-05, - "loss": 0.3311, + "epoch": 1.3837774179302222, + "grad_norm": 0.3729590232939161, + "learning_rate": 1.3048436806387831e-05, + "loss": 0.3232, "step": 9400 }, { - "epoch": 1.378021978021978, - "grad_norm": 0.40709700114637165, - "learning_rate": 1.3112537277478139e-05, - "loss": 0.3392, + "epoch": 1.3845134697482702, + "grad_norm": 0.3755404182669986, + "learning_rate": 1.304027893571736e-05, + "loss": 0.3491, "step": 9405 }, { - "epoch": 1.3787545787545787, - "grad_norm": 0.35733461333741195, - "learning_rate": 1.3104435154902789e-05, - "loss": 0.3015, + "epoch": 1.3852495215663183, + "grad_norm": 0.35654169900498894, + "learning_rate": 1.3032118835035216e-05, + "loss": 0.3201, "step": 9410 }, { - "epoch": 1.3794871794871795, - "grad_norm": 0.3683668380476496, - "learning_rate": 1.3096330776462738e-05, - "loss": 0.3485, + "epoch": 1.3859855733843662, + "grad_norm": 0.3630148565795268, + "learning_rate": 1.3023956510326747e-05, + "loss": 0.3159, "step": 9415 }, { - "epoch": 1.3802197802197802, - "grad_norm": 0.37440441580436834, - "learning_rate": 1.3088224148047102e-05, - "loss": 0.3179, + "epoch": 1.3867216252024144, + "grad_norm": 0.3806107009949009, + "learning_rate": 1.301579196757893e-05, + "loss": 0.3427, "step": 9420 }, { - "epoch": 1.380952380952381, - "grad_norm": 0.3761069786909644, - "learning_rate": 1.3080115275546633e-05, - "loss": 0.3231, + "epoch": 1.3874576770204623, + "grad_norm": 0.3609297145972763, + "learning_rate": 1.3007625212780362e-05, + "loss": 0.3359, "step": 9425 }, { - "epoch": 1.3816849816849817, - "grad_norm": 0.36812148125041294, - "learning_rate": 1.3072004164853708e-05, - "loss": 0.3318, + "epoch": 1.3881937288385102, + "grad_norm": 0.3647762309908433, + "learning_rate": 1.2999456251921274e-05, + "loss": 0.328, "step": 9430 }, { - "epoch": 1.3824175824175824, - "grad_norm": 0.3680198040443312, - "learning_rate": 1.3063890821862343e-05, - "loss": 0.3173, + "epoch": 1.3889297806565581, + "grad_norm": 0.3635213304603404, + "learning_rate": 1.2991285090993508e-05, + "loss": 0.3469, "step": 9435 }, { - "epoch": 1.3831501831501831, - "grad_norm": 0.364173471028859, - "learning_rate": 1.3055775252468168e-05, - "loss": 0.3143, + "epoch": 1.3896658324746063, + "grad_norm": 0.35263350449891173, + "learning_rate": 1.2983111735990526e-05, + "loss": 0.3262, "step": 9440 }, { - "epoch": 1.3838827838827839, - "grad_norm": 0.35694349814381804, - "learning_rate": 1.3047657462568429e-05, - "loss": 0.3134, + "epoch": 1.3904018842926542, + "grad_norm": 0.3582007215649553, + "learning_rate": 1.297493619290739e-05, + "loss": 0.3223, "step": 9445 }, { - "epoch": 1.3846153846153846, - "grad_norm": 0.3857759577482839, - "learning_rate": 1.303953745806199e-05, - "loss": 0.3222, + "epoch": 1.3911379361107021, + "grad_norm": 0.3575063738974865, + "learning_rate": 1.2966758467740778e-05, + "loss": 0.3301, "step": 9450 }, { - "epoch": 1.3853479853479853, - "grad_norm": 0.3592364706404115, - "learning_rate": 1.3031415244849322e-05, - "loss": 0.3198, + "epoch": 1.3918739879287503, + "grad_norm": 0.3737456120836393, + "learning_rate": 1.2958578566488962e-05, + "loss": 0.3179, "step": 9455 }, { - "epoch": 1.386080586080586, - "grad_norm": 0.36463687509292364, - "learning_rate": 1.3023290828832505e-05, - "loss": 0.3349, + "epoch": 1.3926100397467982, + "grad_norm": 0.3627947568637503, + "learning_rate": 1.2950396495151806e-05, + "loss": 0.3371, "step": 9460 }, { - "epoch": 1.3868131868131868, - "grad_norm": 0.3523885736969607, - "learning_rate": 1.301516421591521e-05, - "loss": 0.3116, + "epoch": 1.3933460915648461, + "grad_norm": 0.34610506809506375, + "learning_rate": 1.294221225973078e-05, + "loss": 0.3178, "step": 9465 }, { - "epoch": 1.3875457875457875, - "grad_norm": 0.3672763338810888, - "learning_rate": 1.3007035412002716e-05, - "loss": 0.3394, + "epoch": 1.394082143382894, + "grad_norm": 0.3622671936332881, + "learning_rate": 1.2934025866228921e-05, + "loss": 0.3259, "step": 9470 }, { - "epoch": 1.3882783882783882, - "grad_norm": 0.3663609919844645, - "learning_rate": 1.2998904423001884e-05, - "loss": 0.3307, + "epoch": 1.3948181952009422, + "grad_norm": 0.3654711313494927, + "learning_rate": 1.2925837320650863e-05, + "loss": 0.332, "step": 9475 }, { - "epoch": 1.389010989010989, - "grad_norm": 0.3800146791680141, - "learning_rate": 1.2990771254821173e-05, - "loss": 0.3343, + "epoch": 1.3955542470189901, + "grad_norm": 0.37062897243648746, + "learning_rate": 1.291764662900282e-05, + "loss": 0.3256, "step": 9480 }, { - "epoch": 1.3897435897435897, - "grad_norm": 0.34233438244524395, - "learning_rate": 1.2982635913370615e-05, - "loss": 0.3184, + "epoch": 1.3962902988370383, + "grad_norm": 0.3616799422926142, + "learning_rate": 1.2909453797292573e-05, + "loss": 0.3303, "step": 9485 }, { - "epoch": 1.3904761904761904, - "grad_norm": 0.3650168981731367, - "learning_rate": 1.2974498404561833e-05, - "loss": 0.3288, + "epoch": 1.3970263506550862, + "grad_norm": 0.3625431047824287, + "learning_rate": 1.2901258831529476e-05, + "loss": 0.335, "step": 9490 }, { - "epoch": 1.3912087912087912, - "grad_norm": 0.3736883637980164, - "learning_rate": 1.2966358734308009e-05, - "loss": 0.3344, + "epoch": 1.397762402473134, + "grad_norm": 0.37848657053974294, + "learning_rate": 1.2893061737724443e-05, + "loss": 0.3237, "step": 9495 }, { - "epoch": 1.3919413919413919, - "grad_norm": 0.37266422942715227, - "learning_rate": 1.2958216908523913e-05, - "loss": 0.3287, + "epoch": 1.398498454291182, + "grad_norm": 0.360329755944578, + "learning_rate": 1.2884862521889962e-05, + "loss": 0.332, "step": 9500 }, { - "epoch": 1.3926739926739926, - "grad_norm": 0.3735090036989961, - "learning_rate": 1.295007293312587e-05, - "loss": 0.3412, + "epoch": 1.3992345061092302, + "grad_norm": 0.35846538165300607, + "learning_rate": 1.2876661190040065e-05, + "loss": 0.336, "step": 9505 }, { - "epoch": 1.3934065934065933, - "grad_norm": 0.3736261624860495, - "learning_rate": 1.2941926814031771e-05, - "loss": 0.3181, + "epoch": 1.399970557927278, + "grad_norm": 0.36634166720874467, + "learning_rate": 1.2868457748190338e-05, + "loss": 0.3153, "step": 9510 }, { - "epoch": 1.394139194139194, - "grad_norm": 0.36911044210076915, - "learning_rate": 1.293377855716106e-05, - "loss": 0.3184, + "epoch": 1.400706609745326, + "grad_norm": 0.346140825339715, + "learning_rate": 1.286025220235792e-05, + "loss": 0.3131, "step": 9515 }, { - "epoch": 1.3948717948717948, - "grad_norm": 0.371489730667187, - "learning_rate": 1.2925628168434745e-05, - "loss": 0.3283, + "epoch": 1.4014426615633742, + "grad_norm": 0.3649084286169258, + "learning_rate": 1.2852044558561485e-05, + "loss": 0.3186, "step": 9520 }, { - "epoch": 1.3956043956043955, - "grad_norm": 0.3572826907489989, - "learning_rate": 1.2917475653775375e-05, - "loss": 0.3316, + "epoch": 1.402178713381422, + "grad_norm": 0.3647161259847614, + "learning_rate": 1.2843834822821254e-05, + "loss": 0.3212, "step": 9525 }, { - "epoch": 1.3963369963369963, - "grad_norm": 0.38473967712910584, - "learning_rate": 1.2909321019107042e-05, - "loss": 0.3387, + "epoch": 1.40291476519947, + "grad_norm": 0.36547776557727774, + "learning_rate": 1.2835623001158982e-05, + "loss": 0.3197, "step": 9530 }, { - "epoch": 1.397069597069597, - "grad_norm": 0.3652353261820711, - "learning_rate": 1.2901164270355384e-05, - "loss": 0.3136, + "epoch": 1.403650817017518, + "grad_norm": 0.4406783824401941, + "learning_rate": 1.2827409099597952e-05, + "loss": 0.3346, "step": 9535 }, { - "epoch": 1.3978021978021977, - "grad_norm": 0.3686381795113925, - "learning_rate": 1.2893005413447575e-05, - "loss": 0.3273, + "epoch": 1.404386868835566, + "grad_norm": 0.3804708880600225, + "learning_rate": 1.2819193124162963e-05, + "loss": 0.332, "step": 9540 }, { - "epoch": 1.3985347985347985, - "grad_norm": 0.3600860568488306, - "learning_rate": 1.2884844454312312e-05, - "loss": 0.3263, + "epoch": 1.405122920653614, + "grad_norm": 0.4583885112627424, + "learning_rate": 1.2810975080880352e-05, + "loss": 0.3277, "step": 9545 }, { - "epoch": 1.3992673992673992, - "grad_norm": 0.3519233136105934, - "learning_rate": 1.2876681398879837e-05, - "loss": 0.3165, + "epoch": 1.4058589724716621, + "grad_norm": 0.35210293086921174, + "learning_rate": 1.2802754975777962e-05, + "loss": 0.3205, "step": 9550 }, { - "epoch": 1.4, - "grad_norm": 0.38029208605876735, - "learning_rate": 1.2868516253081897e-05, - "loss": 0.3349, + "epoch": 1.40659502428971, + "grad_norm": 0.36636420152330823, + "learning_rate": 1.2794532814885152e-05, + "loss": 0.3308, "step": 9555 }, { - "epoch": 1.4007326007326006, - "grad_norm": 0.35691788504252764, - "learning_rate": 1.2860349022851772e-05, - "loss": 0.3092, + "epoch": 1.407331076107758, + "grad_norm": 0.3594038505788453, + "learning_rate": 1.2786308604232786e-05, + "loss": 0.333, "step": 9560 }, { - "epoch": 1.4014652014652014, - "grad_norm": 0.35239929231196526, - "learning_rate": 1.2852179714124241e-05, - "loss": 0.3188, + "epoch": 1.408067127925806, + "grad_norm": 0.37265979886466455, + "learning_rate": 1.2778082349853238e-05, + "loss": 0.3215, "step": 9565 }, { - "epoch": 1.402197802197802, - "grad_norm": 0.37262611721225447, - "learning_rate": 1.2844008332835616e-05, - "loss": 0.3381, + "epoch": 1.408803179743854, + "grad_norm": 0.35821379029717326, + "learning_rate": 1.2769854057780371e-05, + "loss": 0.3161, "step": 9570 }, { - "epoch": 1.4029304029304028, - "grad_norm": 0.36208087825524055, - "learning_rate": 1.2835834884923695e-05, - "loss": 0.335, + "epoch": 1.409539231561902, + "grad_norm": 0.37051766835051625, + "learning_rate": 1.2761623734049548e-05, + "loss": 0.3221, "step": 9575 }, { - "epoch": 1.4036630036630036, - "grad_norm": 0.37367278329464243, - "learning_rate": 1.2827659376327779e-05, - "loss": 0.3214, + "epoch": 1.41027528337995, + "grad_norm": 0.36328167945139916, + "learning_rate": 1.2753391384697629e-05, + "loss": 0.3315, "step": 9580 }, { - "epoch": 1.4043956043956043, - "grad_norm": 0.3695960378064843, - "learning_rate": 1.2819481812988683e-05, - "loss": 0.3241, + "epoch": 1.411011335197998, + "grad_norm": 0.3736414650967738, + "learning_rate": 1.2745157015762946e-05, + "loss": 0.3418, "step": 9585 }, { - "epoch": 1.405128205128205, - "grad_norm": 0.38738496590910493, - "learning_rate": 1.2811302200848699e-05, - "loss": 0.337, + "epoch": 1.411747387016046, + "grad_norm": 0.36379868859809367, + "learning_rate": 1.2736920633285323e-05, + "loss": 0.341, "step": 9590 }, { - "epoch": 1.4058608058608058, - "grad_norm": 0.3791099520469478, - "learning_rate": 1.2803120545851613e-05, - "loss": 0.3412, + "epoch": 1.4124834388340939, + "grad_norm": 0.39212028191698084, + "learning_rate": 1.2728682243306062e-05, + "loss": 0.3246, "step": 9595 }, { - "epoch": 1.4065934065934065, - "grad_norm": 0.35992251660877783, - "learning_rate": 1.2794936853942695e-05, - "loss": 0.3317, + "epoch": 1.4132194906521418, + "grad_norm": 0.3675636161944304, + "learning_rate": 1.2720441851867921e-05, + "loss": 0.3236, "step": 9600 }, { - "epoch": 1.4073260073260072, - "grad_norm": 0.37344607639813765, - "learning_rate": 1.2786751131068698e-05, - "loss": 0.3252, + "epoch": 1.41395554247019, + "grad_norm": 0.35409789751463777, + "learning_rate": 1.2712199465015148e-05, + "loss": 0.324, "step": 9605 }, { - "epoch": 1.408058608058608, - "grad_norm": 0.36916880428194454, - "learning_rate": 1.2778563383177845e-05, - "loss": 0.3097, + "epoch": 1.4146915942882379, + "grad_norm": 0.3601135565256549, + "learning_rate": 1.2703955088793436e-05, + "loss": 0.3295, "step": 9610 }, { - "epoch": 1.4087912087912087, - "grad_norm": 0.3651252085756624, - "learning_rate": 1.277037361621984e-05, - "loss": 0.327, + "epoch": 1.415427646106286, + "grad_norm": 0.3532116865072582, + "learning_rate": 1.2695708729249954e-05, + "loss": 0.3299, "step": 9615 }, { - "epoch": 1.4095238095238094, - "grad_norm": 0.3686790573730101, - "learning_rate": 1.2762181836145839e-05, - "loss": 0.3319, + "epoch": 1.416163697924334, + "grad_norm": 0.3832317141267909, + "learning_rate": 1.2687460392433313e-05, + "loss": 0.3323, "step": 9620 }, { - "epoch": 1.4102564102564101, - "grad_norm": 0.388258250297523, - "learning_rate": 1.275398804890848e-05, - "loss": 0.3412, + "epoch": 1.4168997497423819, + "grad_norm": 0.37985665667603924, + "learning_rate": 1.267921008439358e-05, + "loss": 0.3252, "step": 9625 }, { - "epoch": 1.4109890109890109, - "grad_norm": 0.3849595586698791, - "learning_rate": 1.274579226046184e-05, - "loss": 0.3451, + "epoch": 1.4176358015604298, + "grad_norm": 0.36948719326893054, + "learning_rate": 1.2670957811182268e-05, + "loss": 0.3112, "step": 9630 }, { - "epoch": 1.4117216117216116, - "grad_norm": 0.3592011858806382, - "learning_rate": 1.273759447676147e-05, - "loss": 0.3392, + "epoch": 1.4183718533784777, + "grad_norm": 0.3546223816404936, + "learning_rate": 1.2662703578852326e-05, + "loss": 0.3249, "step": 9635 }, { - "epoch": 1.4124542124542123, - "grad_norm": 0.37786836832104254, - "learning_rate": 1.2729394703764349e-05, - "loss": 0.3272, + "epoch": 1.4191079051965259, + "grad_norm": 0.3673572397597473, + "learning_rate": 1.2654447393458146e-05, + "loss": 0.3419, "step": 9640 }, { - "epoch": 1.413186813186813, - "grad_norm": 0.3712268682424444, - "learning_rate": 1.2721192947428922e-05, - "loss": 0.335, + "epoch": 1.4198439570145738, + "grad_norm": 0.35982448182968724, + "learning_rate": 1.2646189261055552e-05, + "loss": 0.3336, "step": 9645 }, { - "epoch": 1.4139194139194138, - "grad_norm": 0.3694596181035189, - "learning_rate": 1.271298921371506e-05, - "loss": 0.3257, + "epoch": 1.420580008832622, + "grad_norm": 0.3479702237193538, + "learning_rate": 1.2637929187701794e-05, + "loss": 0.3279, "step": 9650 }, { - "epoch": 1.4146520146520147, - "grad_norm": 0.36434138856836823, - "learning_rate": 1.2704783508584085e-05, - "loss": 0.3319, + "epoch": 1.4213160606506698, + "grad_norm": 0.36333010166494983, + "learning_rate": 1.2629667179455545e-05, + "loss": 0.3303, "step": 9655 }, { - "epoch": 1.4153846153846155, - "grad_norm": 0.3620604578743319, - "learning_rate": 1.2696575837998734e-05, - "loss": 0.3287, + "epoch": 1.4220521124687178, + "grad_norm": 0.36443666861560564, + "learning_rate": 1.26214032423769e-05, + "loss": 0.336, "step": 9660 }, { - "epoch": 1.4161172161172162, - "grad_norm": 0.36846199251115214, - "learning_rate": 1.2688366207923192e-05, - "loss": 0.3161, + "epoch": 1.4227881642867657, + "grad_norm": 0.37994334126913654, + "learning_rate": 1.2613137382527368e-05, + "loss": 0.3308, "step": 9665 }, { - "epoch": 1.416849816849817, - "grad_norm": 0.3709853962372448, - "learning_rate": 1.2680154624323049e-05, - "loss": 0.3246, + "epoch": 1.4235242161048138, + "grad_norm": 0.3550709018491184, + "learning_rate": 1.2604869605969863e-05, + "loss": 0.3189, "step": 9670 }, { - "epoch": 1.4175824175824177, - "grad_norm": 0.35401331608077374, - "learning_rate": 1.267194109316533e-05, - "loss": 0.3162, + "epoch": 1.4242602679228618, + "grad_norm": 0.3807182018327525, + "learning_rate": 1.2596599918768714e-05, + "loss": 0.3164, "step": 9675 }, { - "epoch": 1.4183150183150184, - "grad_norm": 0.37944078709014817, - "learning_rate": 1.2663725620418466e-05, - "loss": 0.3289, + "epoch": 1.4249963197409097, + "grad_norm": 0.3535627540152235, + "learning_rate": 1.258832832698965e-05, + "loss": 0.3331, "step": 9680 }, { - "epoch": 1.4190476190476191, - "grad_norm": 0.36152080018956056, - "learning_rate": 1.2655508212052299e-05, - "loss": 0.328, + "epoch": 1.4257323715589578, + "grad_norm": 0.3597186046691988, + "learning_rate": 1.2580054836699787e-05, + "loss": 0.3357, "step": 9685 }, { - "epoch": 1.4197802197802198, - "grad_norm": 0.363601312531346, - "learning_rate": 1.2647288874038081e-05, - "loss": 0.3382, + "epoch": 1.4264684233770057, + "grad_norm": 0.3607898725088784, + "learning_rate": 1.2571779453967645e-05, + "loss": 0.3177, "step": 9690 }, { - "epoch": 1.4205128205128206, - "grad_norm": 0.3519597170541447, - "learning_rate": 1.263906761234847e-05, - "loss": 0.3103, + "epoch": 1.4272044751950537, + "grad_norm": 0.3685107178438591, + "learning_rate": 1.2563502184863123e-05, + "loss": 0.3111, "step": 9695 }, { - "epoch": 1.4212454212454213, - "grad_norm": 0.36322466129170256, - "learning_rate": 1.2630844432957511e-05, - "loss": 0.3382, + "epoch": 1.4279405270131016, + "grad_norm": 0.3685121524762989, + "learning_rate": 1.2555223035457513e-05, + "loss": 0.312, "step": 9700 }, { - "epoch": 1.421978021978022, - "grad_norm": 0.3533094718983132, - "learning_rate": 1.2622619341840651e-05, - "loss": 0.3299, + "epoch": 1.4286765788311497, + "grad_norm": 0.36777856634479505, + "learning_rate": 1.2546942011823482e-05, + "loss": 0.3203, "step": 9705 }, { - "epoch": 1.4227106227106228, - "grad_norm": 0.3634175150959428, - "learning_rate": 1.2614392344974726e-05, - "loss": 0.3213, + "epoch": 1.4294126306491977, + "grad_norm": 0.3763432254595332, + "learning_rate": 1.2538659120035065e-05, + "loss": 0.3351, "step": 9710 }, { - "epoch": 1.4234432234432235, - "grad_norm": 0.3777985192244422, - "learning_rate": 1.2606163448337951e-05, - "loss": 0.3427, + "epoch": 1.4301486824672458, + "grad_norm": 0.3560562401177806, + "learning_rate": 1.2530374366167681e-05, + "loss": 0.3154, "step": 9715 }, { - "epoch": 1.4241758241758242, - "grad_norm": 0.3678173360075112, - "learning_rate": 1.2597932657909925e-05, - "loss": 0.3216, + "epoch": 1.4308847342852937, + "grad_norm": 0.3808190872790811, + "learning_rate": 1.2522087756298105e-05, + "loss": 0.3383, "step": 9720 }, { - "epoch": 1.424908424908425, - "grad_norm": 0.3852857078850646, - "learning_rate": 1.2589699979671625e-05, - "loss": 0.3381, + "epoch": 1.4316207861033416, + "grad_norm": 0.352390935554495, + "learning_rate": 1.251379929650447e-05, + "loss": 0.3163, "step": 9725 }, { - "epoch": 1.4256410256410257, - "grad_norm": 0.4059792809134705, - "learning_rate": 1.2581465419605401e-05, - "loss": 0.3288, + "epoch": 1.4323568379213896, + "grad_norm": 0.34418188179796083, + "learning_rate": 1.2505508992866277e-05, + "loss": 0.3261, "step": 9730 }, { - "epoch": 1.4263736263736264, - "grad_norm": 0.3791798416926858, - "learning_rate": 1.2573228983694959e-05, - "loss": 0.326, + "epoch": 1.4330928897394377, + "grad_norm": 0.369413162101418, + "learning_rate": 1.2497216851464376e-05, + "loss": 0.3386, "step": 9735 }, { - "epoch": 1.4271062271062271, - "grad_norm": 0.36026587693992806, - "learning_rate": 1.2564990677925384e-05, - "loss": 0.3318, + "epoch": 1.4338289415574856, + "grad_norm": 0.35288828172615627, + "learning_rate": 1.2488922878380956e-05, + "loss": 0.3185, "step": 9740 }, { - "epoch": 1.4278388278388279, - "grad_norm": 0.38410791336784683, - "learning_rate": 1.255675050828311e-05, - "loss": 0.3293, + "epoch": 1.4345649933755336, + "grad_norm": 0.37710524998769407, + "learning_rate": 1.2480627079699559e-05, + "loss": 0.334, "step": 9745 }, { - "epoch": 1.4285714285714286, - "grad_norm": 0.36705079163522697, - "learning_rate": 1.2548508480755928e-05, - "loss": 0.3297, + "epoch": 1.4353010451935817, + "grad_norm": 0.3688631183905549, + "learning_rate": 1.2472329461505066e-05, + "loss": 0.3308, "step": 9750 }, { - "epoch": 1.4293040293040293, - "grad_norm": 0.3476239543435942, - "learning_rate": 1.2540264601332974e-05, - "loss": 0.3279, + "epoch": 1.4360370970116296, + "grad_norm": 0.34165518712974824, + "learning_rate": 1.246403002988369e-05, + "loss": 0.3364, "step": 9755 }, { - "epoch": 1.43003663003663, - "grad_norm": 0.37436542488898616, - "learning_rate": 1.253201887600474e-05, - "loss": 0.3279, + "epoch": 1.4367731488296775, + "grad_norm": 0.35624705422598835, + "learning_rate": 1.2455728790922969e-05, + "loss": 0.3209, "step": 9760 }, { - "epoch": 1.4307692307692308, - "grad_norm": 0.3742352733144454, - "learning_rate": 1.2523771310763055e-05, - "loss": 0.3277, + "epoch": 1.4375092006477255, + "grad_norm": 0.3843382314486735, + "learning_rate": 1.2447425750711781e-05, + "loss": 0.309, "step": 9765 }, { - "epoch": 1.4315018315018315, - "grad_norm": 0.35372661192560517, - "learning_rate": 1.2515521911601081e-05, - "loss": 0.3234, + "epoch": 1.4382452524657736, + "grad_norm": 0.3679391034259437, + "learning_rate": 1.243912091534031e-05, + "loss": 0.332, "step": 9770 }, { - "epoch": 1.4322344322344323, - "grad_norm": 0.38120300969208226, - "learning_rate": 1.2507270684513311e-05, - "loss": 0.3128, + "epoch": 1.4389813042838215, + "grad_norm": 0.3775044558879287, + "learning_rate": 1.243081429090006e-05, + "loss": 0.336, "step": 9775 }, { - "epoch": 1.432967032967033, - "grad_norm": 0.35550449926136707, - "learning_rate": 1.2499017635495578e-05, - "loss": 0.3324, + "epoch": 1.4397173561018697, + "grad_norm": 0.37351475069537915, + "learning_rate": 1.2422505883483855e-05, + "loss": 0.3163, "step": 9780 }, { - "epoch": 1.4336996336996337, - "grad_norm": 0.35052414795080994, - "learning_rate": 1.2490762770545027e-05, - "loss": 0.3276, + "epoch": 1.4404534079199176, + "grad_norm": 0.3722635180490597, + "learning_rate": 1.241419569918582e-05, + "loss": 0.3441, "step": 9785 }, { - "epoch": 1.4344322344322344, - "grad_norm": 0.3670233096104474, - "learning_rate": 1.2482506095660131e-05, - "loss": 0.3224, + "epoch": 1.4411894597379655, + "grad_norm": 0.37797484378951485, + "learning_rate": 1.2405883744101388e-05, + "loss": 0.3157, "step": 9790 }, { - "epoch": 1.4351648351648352, - "grad_norm": 0.35942624267497264, - "learning_rate": 1.2474247616840668e-05, - "loss": 0.3272, + "epoch": 1.4419255115560135, + "grad_norm": 0.9221811180744349, + "learning_rate": 1.2397570024327284e-05, + "loss": 0.3344, "step": 9795 }, { - "epoch": 1.435897435897436, - "grad_norm": 0.35216775529264543, - "learning_rate": 1.2465987340087742e-05, - "loss": 0.3138, + "epoch": 1.4426615633740616, + "grad_norm": 0.34544402258408435, + "learning_rate": 1.2389254545961533e-05, + "loss": 0.3256, "step": 9800 }, { - "epoch": 1.4366300366300366, - "grad_norm": 0.36405057285971654, - "learning_rate": 1.2457725271403747e-05, - "loss": 0.3329, + "epoch": 1.4433976151921095, + "grad_norm": 0.3550167591332494, + "learning_rate": 1.238093731510345e-05, + "loss": 0.3299, "step": 9805 }, { - "epoch": 1.4373626373626374, - "grad_norm": 0.3468316475716749, - "learning_rate": 1.2449461416792392e-05, - "loss": 0.3241, + "epoch": 1.4441336670101574, + "grad_norm": 0.36703674325087776, + "learning_rate": 1.237261833785363e-05, + "loss": 0.3194, "step": 9810 }, { - "epoch": 1.438095238095238, - "grad_norm": 0.36416323875613926, - "learning_rate": 1.2441195782258673e-05, - "loss": 0.311, + "epoch": 1.4448697188282056, + "grad_norm": 0.3558987265422059, + "learning_rate": 1.2364297620313957e-05, + "loss": 0.317, "step": 9815 }, { - "epoch": 1.4388278388278388, - "grad_norm": 0.35905622215077654, - "learning_rate": 1.243292837380889e-05, - "loss": 0.3239, + "epoch": 1.4456057706462535, + "grad_norm": 0.35348406119176673, + "learning_rate": 1.2355975168587585e-05, + "loss": 0.3191, "step": 9820 }, { - "epoch": 1.4395604395604396, - "grad_norm": 0.39314554037862787, - "learning_rate": 1.2424659197450623e-05, - "loss": 0.3261, + "epoch": 1.4463418224643014, + "grad_norm": 0.36008976029499745, + "learning_rate": 1.234765098877894e-05, + "loss": 0.3418, "step": 9825 }, { - "epoch": 1.4402930402930403, - "grad_norm": 0.3664159966632821, - "learning_rate": 1.2416388259192747e-05, - "loss": 0.3254, + "epoch": 1.4470778742823494, + "grad_norm": 0.36495364878620473, + "learning_rate": 1.2339325086993722e-05, + "loss": 0.3273, "step": 9830 }, { - "epoch": 1.441025641025641, - "grad_norm": 0.3671811945041504, - "learning_rate": 1.2408115565045405e-05, - "loss": 0.3306, + "epoch": 1.4478139261003975, + "grad_norm": 0.35149843081280185, + "learning_rate": 1.2330997469338887e-05, + "loss": 0.3056, "step": 9835 }, { - "epoch": 1.4417582417582417, - "grad_norm": 0.3703868523472143, - "learning_rate": 1.2399841121020022e-05, - "loss": 0.3276, + "epoch": 1.4485499779184454, + "grad_norm": 0.36363605789573816, + "learning_rate": 1.2322668141922653e-05, + "loss": 0.3259, "step": 9840 }, { - "epoch": 1.4424908424908425, - "grad_norm": 0.37727072105652976, - "learning_rate": 1.2391564933129296e-05, - "loss": 0.3271, + "epoch": 1.4492860297364936, + "grad_norm": 0.36226765876901146, + "learning_rate": 1.2314337110854493e-05, + "loss": 0.314, "step": 9845 }, { - "epoch": 1.4432234432234432, - "grad_norm": 0.3779886259459971, - "learning_rate": 1.2383287007387193e-05, - "loss": 0.3092, + "epoch": 1.4500220815545415, + "grad_norm": 0.37791775400535604, + "learning_rate": 1.2306004382245125e-05, + "loss": 0.3387, "step": 9850 }, { - "epoch": 1.443956043956044, - "grad_norm": 0.35423291980474786, - "learning_rate": 1.2375007349808935e-05, - "loss": 0.3211, + "epoch": 1.4507581333725894, + "grad_norm": 0.3644828069004087, + "learning_rate": 1.2297669962206517e-05, + "loss": 0.3324, "step": 9855 }, { - "epoch": 1.4446886446886447, - "grad_norm": 0.3750162854977784, - "learning_rate": 1.236672596641101e-05, - "loss": 0.3358, + "epoch": 1.4514941851906373, + "grad_norm": 0.35087905074213327, + "learning_rate": 1.228933385685188e-05, + "loss": 0.3092, "step": 9860 }, { - "epoch": 1.4454212454212454, - "grad_norm": 0.387055357114979, - "learning_rate": 1.2358442863211155e-05, - "loss": 0.3373, + "epoch": 1.4522302370086855, + "grad_norm": 0.36174562446762215, + "learning_rate": 1.2280996072295648e-05, + "loss": 0.3264, "step": 9865 }, { - "epoch": 1.4461538461538461, - "grad_norm": 0.35012916522582016, - "learning_rate": 1.235015804622836e-05, - "loss": 0.3273, + "epoch": 1.4529662888267334, + "grad_norm": 0.3826809189587954, + "learning_rate": 1.2272656614653502e-05, + "loss": 0.338, "step": 9870 }, { - "epoch": 1.4468864468864469, - "grad_norm": 0.3733919351729767, - "learning_rate": 1.2341871521482863e-05, - "loss": 0.3296, + "epoch": 1.4537023406447813, + "grad_norm": 0.5562019271309896, + "learning_rate": 1.226431549004234e-05, + "loss": 0.3317, "step": 9875 }, { - "epoch": 1.4476190476190476, - "grad_norm": 0.348525810007892, - "learning_rate": 1.2333583294996129e-05, - "loss": 0.3179, + "epoch": 1.4544383924628295, + "grad_norm": 0.38202217470226957, + "learning_rate": 1.225597270458029e-05, + "loss": 0.3298, "step": 9880 }, { - "epoch": 1.4483516483516483, - "grad_norm": 0.3438451533587944, - "learning_rate": 1.2325293372790879e-05, - "loss": 0.3176, + "epoch": 1.4551744442808774, + "grad_norm": 0.36762194453903313, + "learning_rate": 1.2247628264386695e-05, + "loss": 0.3194, "step": 9885 }, { - "epoch": 1.449084249084249, - "grad_norm": 0.3474758651582091, - "learning_rate": 1.231700176089105e-05, - "loss": 0.3203, + "epoch": 1.4559104960989253, + "grad_norm": 0.36538088919194556, + "learning_rate": 1.2239282175582112e-05, + "loss": 0.3241, "step": 9890 }, { - "epoch": 1.4498168498168498, - "grad_norm": 0.35908142035083745, - "learning_rate": 1.2308708465321818e-05, - "loss": 0.3265, + "epoch": 1.4566465479169732, + "grad_norm": 0.35099984200093015, + "learning_rate": 1.2230934444288303e-05, + "loss": 0.3361, "step": 9895 }, { - "epoch": 1.4505494505494505, - "grad_norm": 0.3529094470709392, - "learning_rate": 1.230041349210958e-05, - "loss": 0.3182, + "epoch": 1.4573825997350214, + "grad_norm": 0.34827385648775994, + "learning_rate": 1.222258507662824e-05, + "loss": 0.3122, "step": 9900 }, { - "epoch": 1.4512820512820512, - "grad_norm": 0.3507377986859153, - "learning_rate": 1.2292116847281945e-05, - "loss": 0.3296, + "epoch": 1.4581186515530693, + "grad_norm": 0.3771645798567479, + "learning_rate": 1.2214234078726095e-05, + "loss": 0.3314, "step": 9905 }, { - "epoch": 1.452014652014652, - "grad_norm": 0.3617787600137893, - "learning_rate": 1.2283818536867742e-05, - "loss": 0.3261, + "epoch": 1.4588547033711174, + "grad_norm": 0.35445021978032787, + "learning_rate": 1.2205881456707228e-05, + "loss": 0.3303, "step": 9910 }, { - "epoch": 1.4527472527472527, - "grad_norm": 0.36655083849655967, - "learning_rate": 1.2275518566897014e-05, - "loss": 0.3394, + "epoch": 1.4595907551891654, + "grad_norm": 0.3522443625084909, + "learning_rate": 1.2197527216698211e-05, + "loss": 0.3166, "step": 9915 }, { - "epoch": 1.4534798534798534, - "grad_norm": 0.37343934762016906, - "learning_rate": 1.2267216943401004e-05, - "loss": 0.3316, + "epoch": 1.4603268070072133, + "grad_norm": 0.35827516974628426, + "learning_rate": 1.2189171364826774e-05, + "loss": 0.3409, "step": 9920 }, { - "epoch": 1.4542124542124542, - "grad_norm": 0.359956083144425, - "learning_rate": 1.2258913672412161e-05, - "loss": 0.3188, + "epoch": 1.4610628588252612, + "grad_norm": 0.36352057787727604, + "learning_rate": 1.2180813907221854e-05, + "loss": 0.3325, "step": 9925 }, { - "epoch": 1.4549450549450549, - "grad_norm": 0.36959838363749536, - "learning_rate": 1.225060875996412e-05, - "loss": 0.3269, + "epoch": 1.4617989106433094, + "grad_norm": 0.3457932075927401, + "learning_rate": 1.217245485001355e-05, + "loss": 0.3122, "step": 9930 }, { - "epoch": 1.4556776556776556, - "grad_norm": 0.3547512039117925, - "learning_rate": 1.224230221209173e-05, - "loss": 0.3192, + "epoch": 1.4625349624613573, + "grad_norm": 0.35550973394070706, + "learning_rate": 1.216409419933314e-05, + "loss": 0.3322, "step": 9935 }, { - "epoch": 1.4564102564102563, - "grad_norm": 0.36457371437656955, - "learning_rate": 1.223399403483101e-05, - "loss": 0.3252, + "epoch": 1.4632710142794052, + "grad_norm": 0.3528983985628642, + "learning_rate": 1.2155731961313078e-05, + "loss": 0.325, "step": 9940 }, { - "epoch": 1.457142857142857, - "grad_norm": 0.36923966954367354, - "learning_rate": 1.2225684234219165e-05, - "loss": 0.3252, + "epoch": 1.4640070660974533, + "grad_norm": 0.3771517536740423, + "learning_rate": 1.2147368142086962e-05, + "loss": 0.3223, "step": 9945 }, { - "epoch": 1.4578754578754578, - "grad_norm": 0.348627234105607, - "learning_rate": 1.221737281629459e-05, - "loss": 0.3119, + "epoch": 1.4647431179155013, + "grad_norm": 0.3646297373366747, + "learning_rate": 1.2139002747789572e-05, + "loss": 0.321, "step": 9950 }, { - "epoch": 1.4586080586080585, - "grad_norm": 0.37792018293515667, - "learning_rate": 1.220905978709685e-05, - "loss": 0.3262, + "epoch": 1.4654791697335492, + "grad_norm": 0.37253843366455597, + "learning_rate": 1.2130635784556832e-05, + "loss": 0.3234, "step": 9955 }, { - "epoch": 1.4593406593406593, - "grad_norm": 0.35387054623983993, - "learning_rate": 1.220074515266667e-05, - "loss": 0.3206, + "epoch": 1.4662152215515971, + "grad_norm": 0.36076725034414125, + "learning_rate": 1.212226725852582e-05, + "loss": 0.3249, "step": 9960 }, { - "epoch": 1.46007326007326, - "grad_norm": 0.3686030934970665, - "learning_rate": 1.2192428919045964e-05, - "loss": 0.3249, + "epoch": 1.4669512733696453, + "grad_norm": 0.39489509622684243, + "learning_rate": 1.211389717583476e-05, + "loss": 0.3214, "step": 9965 }, { - "epoch": 1.4608058608058607, - "grad_norm": 0.3594201922264351, - "learning_rate": 1.2184111092277784e-05, - "loss": 0.3146, + "epoch": 1.4676873251876932, + "grad_norm": 0.3692945107321283, + "learning_rate": 1.210552554262301e-05, + "loss": 0.3184, "step": 9970 }, { - "epoch": 1.4615384615384617, - "grad_norm": 0.37403194478495244, - "learning_rate": 1.2175791678406356e-05, - "loss": 0.3334, + "epoch": 1.4684233770057413, + "grad_norm": 0.37403882553149215, + "learning_rate": 1.2097152365031083e-05, + "loss": 0.3147, "step": 9975 }, { - "epoch": 1.4622710622710624, - "grad_norm": 0.35752027957136523, - "learning_rate": 1.2167470683477054e-05, - "loss": 0.3306, + "epoch": 1.4691594288237892, + "grad_norm": 0.34774471877863267, + "learning_rate": 1.2088777649200607e-05, + "loss": 0.3232, "step": 9980 }, { - "epoch": 1.4630036630036631, - "grad_norm": 0.3860281228719006, - "learning_rate": 1.2159148113536401e-05, - "loss": 0.3259, + "epoch": 1.4698954806418372, + "grad_norm": 0.3561958651402395, + "learning_rate": 1.2080401401274347e-05, + "loss": 0.3158, "step": 9985 }, { - "epoch": 1.4637362637362639, - "grad_norm": 0.34715910320392346, - "learning_rate": 1.2150823974632066e-05, - "loss": 0.3316, + "epoch": 1.470631532459885, + "grad_norm": 0.37059627371950304, + "learning_rate": 1.207202362739619e-05, + "loss": 0.3231, "step": 9990 }, { - "epoch": 1.4644688644688646, - "grad_norm": 0.36121488522937034, - "learning_rate": 1.2142498272812853e-05, - "loss": 0.3297, + "epoch": 1.4713675842779332, + "grad_norm": 0.35692510403444117, + "learning_rate": 1.2063644333711142e-05, + "loss": 0.3245, "step": 9995 }, { - "epoch": 1.4652014652014653, - "grad_norm": 0.36155581049444635, - "learning_rate": 1.2134171014128708e-05, - "loss": 0.3389, + "epoch": 1.4721036360959812, + "grad_norm": 0.3485071439118818, + "learning_rate": 1.2055263526365327e-05, + "loss": 0.3224, "step": 10000 }, { - "epoch": 1.465934065934066, - "grad_norm": 0.3623554102698869, - "learning_rate": 1.2125842204630705e-05, - "loss": 0.3353, + "epoch": 1.472839687914029, + "grad_norm": 0.37337489308107585, + "learning_rate": 1.204688121150597e-05, + "loss": 0.3404, "step": 10005 }, { - "epoch": 1.4666666666666668, - "grad_norm": 0.36221237189765315, - "learning_rate": 1.2117511850371047e-05, - "loss": 0.3146, + "epoch": 1.4735757397320772, + "grad_norm": 0.35341948300371445, + "learning_rate": 1.2038497395281415e-05, + "loss": 0.3229, "step": 10010 }, { - "epoch": 1.4673992673992675, - "grad_norm": 0.3588595459686551, - "learning_rate": 1.2109179957403051e-05, - "loss": 0.3261, + "epoch": 1.4743117915501252, + "grad_norm": 0.3578055509928498, + "learning_rate": 1.2030112083841096e-05, + "loss": 0.3159, "step": 10015 }, { - "epoch": 1.4681318681318682, - "grad_norm": 0.3764291592050611, - "learning_rate": 1.2100846531781172e-05, - "loss": 0.3303, + "epoch": 1.475047843368173, + "grad_norm": 0.3465916189627656, + "learning_rate": 1.2021725283335552e-05, + "loss": 0.3123, "step": 10020 }, { - "epoch": 1.468864468864469, - "grad_norm": 0.3731595124575503, - "learning_rate": 1.2092511579560956e-05, - "loss": 0.3208, + "epoch": 1.475783895186221, + "grad_norm": 0.3630730241253402, + "learning_rate": 1.2013336999916407e-05, + "loss": 0.3246, "step": 10025 }, { - "epoch": 1.4695970695970697, - "grad_norm": 0.34902848189581964, - "learning_rate": 1.2084175106799073e-05, - "loss": 0.3145, + "epoch": 1.4765199470042691, + "grad_norm": 0.3507765218242383, + "learning_rate": 1.200494723973638e-05, + "loss": 0.3311, "step": 10030 }, { - "epoch": 1.4703296703296704, - "grad_norm": 0.3591207789465635, - "learning_rate": 1.2075837119553286e-05, - "loss": 0.3149, + "epoch": 1.477255998822317, + "grad_norm": 0.35638794461805806, + "learning_rate": 1.1996556008949263e-05, + "loss": 0.3264, "step": 10035 }, { - "epoch": 1.4710622710622712, - "grad_norm": 0.3774858195106769, - "learning_rate": 1.2067497623882476e-05, - "loss": 0.3227, + "epoch": 1.4779920506403652, + "grad_norm": 0.3621182798155676, + "learning_rate": 1.1988163313709938e-05, + "loss": 0.3483, "step": 10040 }, { - "epoch": 1.471794871794872, - "grad_norm": 0.35215313715073393, - "learning_rate": 1.20591566258466e-05, - "loss": 0.3077, + "epoch": 1.4787281024584131, + "grad_norm": 0.37484358727447487, + "learning_rate": 1.1979769160174352e-05, + "loss": 0.3208, "step": 10045 }, { - "epoch": 1.4725274725274726, - "grad_norm": 0.3768631505032113, - "learning_rate": 1.2050814131506727e-05, - "loss": 0.3262, + "epoch": 1.479464154276461, + "grad_norm": 0.3603445775138095, + "learning_rate": 1.197137355449953e-05, + "loss": 0.3139, "step": 10050 }, { - "epoch": 1.4732600732600734, - "grad_norm": 0.34814112261314933, - "learning_rate": 1.2042470146924993e-05, - "loss": 0.3226, + "epoch": 1.480200206094509, + "grad_norm": 0.3664768734757381, + "learning_rate": 1.1962976502843555e-05, + "loss": 0.3339, "step": 10055 }, { - "epoch": 1.473992673992674, - "grad_norm": 0.3863813275545428, - "learning_rate": 1.2034124678164631e-05, - "loss": 0.3213, + "epoch": 1.4809362579125571, + "grad_norm": 0.3638454991526735, + "learning_rate": 1.1954578011365577e-05, + "loss": 0.3257, "step": 10060 }, { - "epoch": 1.4747252747252748, - "grad_norm": 0.3892196789186328, - "learning_rate": 1.2025777731289947e-05, - "loss": 0.3129, + "epoch": 1.481672309730605, + "grad_norm": 0.36466330838741023, + "learning_rate": 1.1946178086225797e-05, + "loss": 0.3286, "step": 10065 }, { - "epoch": 1.4754578754578755, - "grad_norm": 0.36067816661970625, - "learning_rate": 1.2017429312366327e-05, - "loss": 0.3261, + "epoch": 1.482408361548653, + "grad_norm": 0.3703936862028979, + "learning_rate": 1.1937776733585465e-05, + "loss": 0.325, "step": 10070 }, { - "epoch": 1.4761904761904763, - "grad_norm": 0.3706845896472646, - "learning_rate": 1.2009079427460216e-05, - "loss": 0.3119, + "epoch": 1.483144413366701, + "grad_norm": 0.3667536628158024, + "learning_rate": 1.1929373959606884e-05, + "loss": 0.319, "step": 10075 }, { - "epoch": 1.476923076923077, - "grad_norm": 0.357211144171938, - "learning_rate": 1.2000728082639133e-05, - "loss": 0.3283, + "epoch": 1.483880465184749, + "grad_norm": 0.36572450136518264, + "learning_rate": 1.1920969770453402e-05, + "loss": 0.3151, "step": 10080 }, { - "epoch": 1.4776556776556777, - "grad_norm": 0.36128512918836525, - "learning_rate": 1.1992375283971652e-05, - "loss": 0.3201, + "epoch": 1.484616517002797, + "grad_norm": 0.3759356836534542, + "learning_rate": 1.1912564172289397e-05, + "loss": 0.3436, "step": 10085 }, { - "epoch": 1.4783882783882785, - "grad_norm": 0.3527024476733989, - "learning_rate": 1.1984021037527408e-05, - "loss": 0.321, + "epoch": 1.4853525688208449, + "grad_norm": 0.35598313718531005, + "learning_rate": 1.1904157171280282e-05, + "loss": 0.3398, "step": 10090 }, { - "epoch": 1.4791208791208792, - "grad_norm": 0.36190414974375074, - "learning_rate": 1.197566534937709e-05, - "loss": 0.3294, + "epoch": 1.486088620638893, + "grad_norm": 0.3669169951482803, + "learning_rate": 1.1895748773592506e-05, + "loss": 0.3231, "step": 10095 }, { - "epoch": 1.47985347985348, - "grad_norm": 0.3527905520818129, - "learning_rate": 1.196730822559243e-05, - "loss": 0.331, + "epoch": 1.486824672456941, + "grad_norm": 0.3693018739850583, + "learning_rate": 1.1887338985393537e-05, + "loss": 0.3247, "step": 10100 }, { - "epoch": 1.4805860805860807, - "grad_norm": 0.35543633549908826, - "learning_rate": 1.1958949672246198e-05, - "loss": 0.3281, + "epoch": 1.487560724274989, + "grad_norm": 0.3610580507913168, + "learning_rate": 1.1878927812851862e-05, + "loss": 0.315, "step": 10105 }, { - "epoch": 1.4813186813186814, - "grad_norm": 0.3602609678076822, - "learning_rate": 1.195058969541222e-05, - "loss": 0.3135, + "epoch": 1.488296776093037, + "grad_norm": 0.43074367823109083, + "learning_rate": 1.1870515262136989e-05, + "loss": 0.3277, "step": 10110 }, { - "epoch": 1.4820512820512821, - "grad_norm": 0.37628993017748585, - "learning_rate": 1.1942228301165335e-05, - "loss": 0.323, + "epoch": 1.489032827911085, + "grad_norm": 0.3617920059705095, + "learning_rate": 1.1862101339419421e-05, + "loss": 0.3224, "step": 10115 }, { - "epoch": 1.4827838827838828, - "grad_norm": 0.3746875216036666, - "learning_rate": 1.1933865495581429e-05, - "loss": 0.3159, + "epoch": 1.4897688797291329, + "grad_norm": 0.34565184093631734, + "learning_rate": 1.1853686050870691e-05, + "loss": 0.3241, "step": 10120 }, { - "epoch": 1.4835164835164836, - "grad_norm": 0.3644924663906216, - "learning_rate": 1.1925501284737409e-05, - "loss": 0.3191, + "epoch": 1.490504931547181, + "grad_norm": 0.38547787369437103, + "learning_rate": 1.184526940266332e-05, + "loss": 0.3419, "step": 10125 }, { - "epoch": 1.4842490842490843, - "grad_norm": 0.3549440442907127, - "learning_rate": 1.1917135674711193e-05, - "loss": 0.3124, + "epoch": 1.491240983365229, + "grad_norm": 0.36804240650848136, + "learning_rate": 1.1836851400970824e-05, + "loss": 0.3168, "step": 10130 }, { - "epoch": 1.484981684981685, - "grad_norm": 0.37470622439881085, - "learning_rate": 1.190876867158173e-05, - "loss": 0.3266, + "epoch": 1.4919770351832768, + "grad_norm": 0.36688440785220466, + "learning_rate": 1.182843205196772e-05, + "loss": 0.3271, "step": 10135 }, { - "epoch": 1.4857142857142858, - "grad_norm": 0.39389565715663216, - "learning_rate": 1.1900400281428974e-05, - "loss": 0.3133, + "epoch": 1.492713087001325, + "grad_norm": 0.3526309047769882, + "learning_rate": 1.1820011361829509e-05, + "loss": 0.3174, "step": 10140 }, { - "epoch": 1.4864468864468865, - "grad_norm": 0.3668611786885184, - "learning_rate": 1.1892030510333893e-05, - "loss": 0.3131, + "epoch": 1.493449138819373, + "grad_norm": 0.36672637373054146, + "learning_rate": 1.1811589336732673e-05, + "loss": 0.3179, "step": 10145 }, { - "epoch": 1.4871794871794872, - "grad_norm": 0.3626705673304234, - "learning_rate": 1.1883659364378441e-05, - "loss": 0.3241, + "epoch": 1.4941851906374208, + "grad_norm": 0.3623402155215344, + "learning_rate": 1.180316598285468e-05, + "loss": 0.3374, "step": 10150 }, { - "epoch": 1.487912087912088, - "grad_norm": 0.34833211232219496, - "learning_rate": 1.1875286849645598e-05, - "loss": 0.3223, + "epoch": 1.4949212424554688, + "grad_norm": 0.3630440450286294, + "learning_rate": 1.1794741306373966e-05, + "loss": 0.3381, "step": 10155 }, { - "epoch": 1.4886446886446887, - "grad_norm": 0.3657456508591803, - "learning_rate": 1.1866912972219315e-05, - "loss": 0.3333, + "epoch": 1.495657294273517, + "grad_norm": 0.37018899419751444, + "learning_rate": 1.1786315313469943e-05, + "loss": 0.3314, "step": 10160 }, { - "epoch": 1.4893772893772894, - "grad_norm": 0.387084111689358, - "learning_rate": 1.1858537738184548e-05, - "loss": 0.3275, + "epoch": 1.4963933460915648, + "grad_norm": 0.36149997093325387, + "learning_rate": 1.177788801032298e-05, + "loss": 0.317, "step": 10165 }, { - "epoch": 1.4901098901098901, - "grad_norm": 0.3740119251070165, - "learning_rate": 1.1850161153627227e-05, - "loss": 0.3221, + "epoch": 1.497129397909613, + "grad_norm": 0.38896520393181283, + "learning_rate": 1.1769459403114419e-05, + "loss": 0.331, "step": 10170 }, { - "epoch": 1.4908424908424909, - "grad_norm": 0.36085846264377625, - "learning_rate": 1.1841783224634279e-05, - "loss": 0.3286, + "epoch": 1.497865449727661, + "grad_norm": 0.3735773166816534, + "learning_rate": 1.1761029498026547e-05, + "loss": 0.3357, "step": 10175 }, { - "epoch": 1.4915750915750916, - "grad_norm": 0.36478035016967936, - "learning_rate": 1.1833403957293588e-05, - "loss": 0.3178, + "epoch": 1.4986015015457088, + "grad_norm": 0.3682398591528469, + "learning_rate": 1.175259830124261e-05, + "loss": 0.3322, "step": 10180 }, { - "epoch": 1.4923076923076923, - "grad_norm": 0.3668196694426494, - "learning_rate": 1.1825023357694032e-05, - "loss": 0.31, + "epoch": 1.4993375533637567, + "grad_norm": 0.34970333168068707, + "learning_rate": 1.1744165818946803e-05, + "loss": 0.3387, "step": 10185 }, { - "epoch": 1.493040293040293, - "grad_norm": 0.3793473590675217, - "learning_rate": 1.1816641431925437e-05, - "loss": 0.3271, + "epoch": 1.5000736051818047, + "grad_norm": 0.37456208444503025, + "learning_rate": 1.1735732057324258e-05, + "loss": 0.3443, "step": 10190 }, { - "epoch": 1.4937728937728938, - "grad_norm": 0.34993742755389273, - "learning_rate": 1.1808258186078609e-05, - "loss": 0.3182, + "epoch": 1.5008096569998528, + "grad_norm": 0.37699410156485463, + "learning_rate": 1.1727297022561043e-05, + "loss": 0.3228, "step": 10195 }, { - "epoch": 1.4945054945054945, - "grad_norm": 0.35281709590537846, - "learning_rate": 1.17998736262453e-05, - "loss": 0.3214, + "epoch": 1.5015457088179007, + "grad_norm": 0.37620292003651296, + "learning_rate": 1.1718860720844174e-05, + "loss": 0.3341, "step": 10200 }, { - "epoch": 1.4952380952380953, - "grad_norm": 0.3625611653357865, - "learning_rate": 1.1791487758518231e-05, - "loss": 0.3265, + "epoch": 1.5022817606359489, + "grad_norm": 0.37180888214117386, + "learning_rate": 1.1710423158361576e-05, + "loss": 0.3216, "step": 10205 }, { - "epoch": 1.495970695970696, - "grad_norm": 0.3756459611364907, - "learning_rate": 1.1783100588991057e-05, - "loss": 0.3397, + "epoch": 1.5030178124539968, + "grad_norm": 0.38653617755091274, + "learning_rate": 1.170198434130212e-05, + "loss": 0.3271, "step": 10210 }, { - "epoch": 1.4967032967032967, - "grad_norm": 0.35708722996847425, - "learning_rate": 1.1774712123758392e-05, - "loss": 0.324, + "epoch": 1.5037538642720447, + "grad_norm": 0.3794914965002413, + "learning_rate": 1.1693544275855578e-05, + "loss": 0.3245, "step": 10215 }, { - "epoch": 1.4974358974358974, - "grad_norm": 0.3573300867905406, - "learning_rate": 1.1766322368915786e-05, - "loss": 0.3216, + "epoch": 1.5044899160900926, + "grad_norm": 0.37085292438323103, + "learning_rate": 1.1685102968212653e-05, + "loss": 0.3232, "step": 10220 }, { - "epoch": 1.4981684981684982, - "grad_norm": 0.3592396734716313, - "learning_rate": 1.1757931330559728e-05, - "loss": 0.3183, + "epoch": 1.5052259679081408, + "grad_norm": 0.3499641864552821, + "learning_rate": 1.1676660424564951e-05, + "loss": 0.3121, "step": 10225 }, { - "epoch": 1.498901098901099, - "grad_norm": 0.34595230332393084, - "learning_rate": 1.1749539014787637e-05, - "loss": 0.3145, + "epoch": 1.5059620197261887, + "grad_norm": 0.3788203274765936, + "learning_rate": 1.1668216651104987e-05, + "loss": 0.3285, "step": 10230 }, { - "epoch": 1.4996336996336996, - "grad_norm": 0.36411036031952054, - "learning_rate": 1.1741145427697862e-05, - "loss": 0.332, + "epoch": 1.5066980715442368, + "grad_norm": 0.3806845182429137, + "learning_rate": 1.1659771654026175e-05, + "loss": 0.3333, "step": 10235 }, { - "epoch": 1.5003663003663004, - "grad_norm": 0.35389093482467654, - "learning_rate": 1.1732750575389678e-05, - "loss": 0.3181, + "epoch": 1.5074341233622848, + "grad_norm": 0.3728816337005067, + "learning_rate": 1.1651325439522831e-05, + "loss": 0.3367, "step": 10240 }, { - "epoch": 1.501098901098901, - "grad_norm": 0.35385316323976235, - "learning_rate": 1.1724354463963274e-05, - "loss": 0.3214, + "epoch": 1.5081701751803327, + "grad_norm": 0.35231974719445514, + "learning_rate": 1.1642878013790162e-05, + "loss": 0.3085, "step": 10245 }, { - "epoch": 1.5018315018315018, - "grad_norm": 0.36749115264217064, - "learning_rate": 1.1715957099519761e-05, - "loss": 0.3498, + "epoch": 1.5089062269983806, + "grad_norm": 0.3686547819457083, + "learning_rate": 1.163442938302426e-05, + "loss": 0.335, "step": 10250 }, { - "epoch": 1.5025641025641026, - "grad_norm": 0.3638081390632744, - "learning_rate": 1.1707558488161155e-05, - "loss": 0.3298, + "epoch": 1.5096422788164285, + "grad_norm": 0.37844065365899404, + "learning_rate": 1.1625979553422106e-05, + "loss": 0.3256, "step": 10255 }, { - "epoch": 1.5032967032967033, - "grad_norm": 0.3783219086875028, - "learning_rate": 1.1699158635990375e-05, - "loss": 0.3369, + "epoch": 1.5103783306344767, + "grad_norm": 0.3582203092302988, + "learning_rate": 1.1617528531181563e-05, + "loss": 0.3275, "step": 10260 }, { - "epoch": 1.504029304029304, - "grad_norm": 0.3793939099463113, - "learning_rate": 1.1690757549111254e-05, - "loss": 0.3296, + "epoch": 1.5111143824525246, + "grad_norm": 0.3607991359308337, + "learning_rate": 1.1609076322501359e-05, + "loss": 0.3444, "step": 10265 }, { - "epoch": 1.5047619047619047, - "grad_norm": 0.34984401356687794, - "learning_rate": 1.1682355233628511e-05, - "loss": 0.3137, + "epoch": 1.5118504342705728, + "grad_norm": 0.37645404960000367, + "learning_rate": 1.1600622933581099e-05, + "loss": 0.3276, "step": 10270 }, { - "epoch": 1.5054945054945055, - "grad_norm": 0.34412254714811025, - "learning_rate": 1.1673951695647761e-05, - "loss": 0.3172, + "epoch": 1.5125864860886207, + "grad_norm": 0.34736576685366, + "learning_rate": 1.1592168370621251e-05, + "loss": 0.3297, "step": 10275 }, { - "epoch": 1.5062271062271062, - "grad_norm": 0.37108875475539005, - "learning_rate": 1.1665546941275508e-05, - "loss": 0.3163, + "epoch": 1.5133225379066686, + "grad_norm": 0.3648936267082656, + "learning_rate": 1.1583712639823145e-05, + "loss": 0.327, "step": 10280 }, { - "epoch": 1.506959706959707, - "grad_norm": 0.33443059117591795, - "learning_rate": 1.1657140976619139e-05, - "loss": 0.3133, + "epoch": 1.5140585897247165, + "grad_norm": 0.36467424229661827, + "learning_rate": 1.1575255747388973e-05, + "loss": 0.3132, "step": 10285 }, { - "epoch": 1.5076923076923077, - "grad_norm": 0.3696466136196227, - "learning_rate": 1.1648733807786924e-05, - "loss": 0.3106, + "epoch": 1.5147946415427647, + "grad_norm": 0.35051553540759905, + "learning_rate": 1.1566797699521767e-05, + "loss": 0.3119, "step": 10290 }, { - "epoch": 1.5084249084249084, - "grad_norm": 0.36664578097064954, - "learning_rate": 1.1640325440888e-05, - "loss": 0.3304, + "epoch": 1.5155306933608126, + "grad_norm": 0.36720314239688623, + "learning_rate": 1.155833850242542e-05, + "loss": 0.3301, "step": 10295 }, { - "epoch": 1.5091575091575091, - "grad_norm": 0.36115273815475946, - "learning_rate": 1.1631915882032385e-05, - "loss": 0.3339, + "epoch": 1.5162667451788607, + "grad_norm": 0.35964944120373893, + "learning_rate": 1.1549878162304658e-05, + "loss": 0.3122, "step": 10300 }, { - "epoch": 1.5098901098901099, - "grad_norm": 0.3505883530659083, - "learning_rate": 1.1623505137330953e-05, - "loss": 0.316, + "epoch": 1.5170027969969087, + "grad_norm": 0.36040990796822525, + "learning_rate": 1.154141668536505e-05, + "loss": 0.3341, "step": 10305 }, { - "epoch": 1.5106227106227106, - "grad_norm": 0.34948126102936344, - "learning_rate": 1.1615093212895447e-05, - "loss": 0.327, + "epoch": 1.5177388488149566, + "grad_norm": 0.37975850258227956, + "learning_rate": 1.1532954077812994e-05, + "loss": 0.3231, "step": 10310 }, { - "epoch": 1.5113553113553113, - "grad_norm": 0.35469293234832766, - "learning_rate": 1.1606680114838466e-05, - "loss": 0.313, + "epoch": 1.5184749006330045, + "grad_norm": 0.34035083714004977, + "learning_rate": 1.1524490345855725e-05, + "loss": 0.3199, "step": 10315 }, { - "epoch": 1.512087912087912, - "grad_norm": 0.3609675556063852, - "learning_rate": 1.1598265849273461e-05, - "loss": 0.323, + "epoch": 1.5192109524510524, + "grad_norm": 0.3499332403239346, + "learning_rate": 1.15160254957013e-05, + "loss": 0.3138, "step": 10320 }, { - "epoch": 1.5128205128205128, - "grad_norm": 0.36927530181563994, - "learning_rate": 1.1589850422314724e-05, - "loss": 0.3216, + "epoch": 1.5199470042691006, + "grad_norm": 0.3477176687159296, + "learning_rate": 1.1507559533558592e-05, + "loss": 0.3071, "step": 10325 }, { - "epoch": 1.5135531135531135, - "grad_norm": 0.36973297374522507, - "learning_rate": 1.1581433840077407e-05, - "loss": 0.3197, + "epoch": 1.5206830560871485, + "grad_norm": 0.3490031242582119, + "learning_rate": 1.1499092465637292e-05, + "loss": 0.3187, "step": 10330 }, { - "epoch": 1.5142857142857142, - "grad_norm": 0.3539413717678107, - "learning_rate": 1.1573016108677488e-05, - "loss": 0.3254, + "epoch": 1.5214191079051966, + "grad_norm": 0.371079624204397, + "learning_rate": 1.1490624298147903e-05, + "loss": 0.3224, "step": 10335 }, { - "epoch": 1.515018315018315, - "grad_norm": 0.3629618588684632, - "learning_rate": 1.1564597234231784e-05, - "loss": 0.3162, + "epoch": 1.5221551597232446, + "grad_norm": 0.35838804760470716, + "learning_rate": 1.1482155037301737e-05, + "loss": 0.3389, "step": 10340 }, { - "epoch": 1.5157509157509157, - "grad_norm": 0.36103904069581366, - "learning_rate": 1.1556177222857943e-05, - "loss": 0.3421, + "epoch": 1.5228912115412925, + "grad_norm": 0.3681521204329716, + "learning_rate": 1.1473684689310902e-05, + "loss": 0.3233, "step": 10345 }, { - "epoch": 1.5164835164835164, - "grad_norm": 0.36482620207020305, - "learning_rate": 1.1547756080674443e-05, - "loss": 0.3322, + "epoch": 1.5236272633593404, + "grad_norm": 0.3662058095887824, + "learning_rate": 1.1465213260388306e-05, + "loss": 0.3177, "step": 10350 }, { - "epoch": 1.5172161172161172, - "grad_norm": 0.37832423909240065, - "learning_rate": 1.1539333813800574e-05, - "loss": 0.3312, + "epoch": 1.5243633151773883, + "grad_norm": 0.3589300711209559, + "learning_rate": 1.1456740756747652e-05, + "loss": 0.3107, "step": 10355 }, { - "epoch": 1.5179487179487179, - "grad_norm": 0.363637168839375, - "learning_rate": 1.1530910428356456e-05, - "loss": 0.3137, + "epoch": 1.5250993669954365, + "grad_norm": 0.363259271248277, + "learning_rate": 1.1448267184603434e-05, + "loss": 0.3197, "step": 10360 }, { - "epoch": 1.5186813186813186, - "grad_norm": 0.36076878236286214, - "learning_rate": 1.1522485930463008e-05, - "loss": 0.3249, + "epoch": 1.5258354188134846, + "grad_norm": 0.3616125954800502, + "learning_rate": 1.143979255017092e-05, + "loss": 0.2999, "step": 10365 }, { - "epoch": 1.5194139194139193, - "grad_norm": 0.3530507898066571, - "learning_rate": 1.151406032624197e-05, - "loss": 0.3205, + "epoch": 1.5265714706315325, + "grad_norm": 0.3831938753206401, + "learning_rate": 1.1431316859666161e-05, + "loss": 0.3276, "step": 10370 }, { - "epoch": 1.52014652014652, - "grad_norm": 0.3627256562251993, - "learning_rate": 1.1505633621815874e-05, - "loss": 0.3267, + "epoch": 1.5273075224495805, + "grad_norm": 0.3828513312969688, + "learning_rate": 1.142284011930599e-05, + "loss": 0.3434, "step": 10375 }, { - "epoch": 1.5208791208791208, - "grad_norm": 0.3826960538358999, - "learning_rate": 1.149720582330807e-05, - "loss": 0.3289, + "epoch": 1.5280435742676284, + "grad_norm": 0.36639232431420626, + "learning_rate": 1.1414362335307999e-05, + "loss": 0.3253, "step": 10380 }, { - "epoch": 1.5216117216117215, - "grad_norm": 0.363760880261881, - "learning_rate": 1.1488776936842677e-05, - "loss": 0.3188, + "epoch": 1.5287796260856763, + "grad_norm": 0.34079896688551425, + "learning_rate": 1.1405883513890548e-05, + "loss": 0.3182, "step": 10385 }, { - "epoch": 1.5223443223443223, - "grad_norm": 0.3441238001593689, - "learning_rate": 1.1480346968544625e-05, - "loss": 0.3149, + "epoch": 1.5295156779037244, + "grad_norm": 0.368570821282134, + "learning_rate": 1.139740366127277e-05, + "loss": 0.33, "step": 10390 }, { - "epoch": 1.523076923076923, - "grad_norm": 0.36243279548096724, - "learning_rate": 1.1471915924539618e-05, - "loss": 0.3367, + "epoch": 1.5302517297217724, + "grad_norm": 0.552368359972773, + "learning_rate": 1.1388922783674541e-05, + "loss": 0.3125, "step": 10395 }, { - "epoch": 1.5238095238095237, - "grad_norm": 0.35259506480546055, - "learning_rate": 1.1463483810954156e-05, - "loss": 0.3104, + "epoch": 1.5309877815398205, + "grad_norm": 0.3678382956172097, + "learning_rate": 1.138044088731649e-05, + "loss": 0.3217, "step": 10400 }, { - "epoch": 1.5245421245421245, - "grad_norm": 0.3586045539694791, - "learning_rate": 1.1455050633915505e-05, - "loss": 0.3142, + "epoch": 1.5317238333578684, + "grad_norm": 0.3755751095061701, + "learning_rate": 1.137195797842e-05, + "loss": 0.3378, "step": 10405 }, { - "epoch": 1.5252747252747252, - "grad_norm": 0.35840004554520566, - "learning_rate": 1.14466163995517e-05, - "loss": 0.3004, + "epoch": 1.5324598851759164, + "grad_norm": 0.36753548836349775, + "learning_rate": 1.1363474063207195e-05, + "loss": 0.3196, "step": 10410 }, { - "epoch": 1.526007326007326, - "grad_norm": 0.36577144977124004, - "learning_rate": 1.1438181113991558e-05, - "loss": 0.3227, + "epoch": 1.5331959369939643, + "grad_norm": 0.36975794388840644, + "learning_rate": 1.1354989147900925e-05, + "loss": 0.3343, "step": 10415 }, { - "epoch": 1.5267399267399266, - "grad_norm": 0.3639942415883003, - "learning_rate": 1.1429744783364649e-05, - "loss": 0.3328, + "epoch": 1.5339319888120122, + "grad_norm": 0.3505498342913042, + "learning_rate": 1.1346503238724793e-05, + "loss": 0.3246, "step": 10420 }, { - "epoch": 1.5274725274725274, - "grad_norm": 0.3847428994647183, - "learning_rate": 1.1421307413801303e-05, - "loss": 0.3347, + "epoch": 1.5346680406300603, + "grad_norm": 0.3589513304964758, + "learning_rate": 1.1338016341903117e-05, + "loss": 0.325, "step": 10425 }, { - "epoch": 1.528205128205128, - "grad_norm": 0.3785174667722064, - "learning_rate": 1.1412869011432613e-05, - "loss": 0.3242, + "epoch": 1.5354040924481085, + "grad_norm": 1.4329025323902576, + "learning_rate": 1.1329528463660947e-05, + "loss": 0.3158, "step": 10430 }, { - "epoch": 1.5289377289377288, - "grad_norm": 0.3585718814838943, - "learning_rate": 1.1404429582390413e-05, - "loss": 0.3083, + "epoch": 1.5361401442661564, + "grad_norm": 0.365909611883646, + "learning_rate": 1.1321039610224047e-05, + "loss": 0.3306, "step": 10435 }, { - "epoch": 1.5296703296703296, - "grad_norm": 0.3543393918113152, - "learning_rate": 1.1395989132807287e-05, - "loss": 0.3113, + "epoch": 1.5368761960842043, + "grad_norm": 0.36666338483023514, + "learning_rate": 1.1312549787818898e-05, + "loss": 0.3283, "step": 10440 }, { - "epoch": 1.5304029304029303, - "grad_norm": 0.3679860017169941, - "learning_rate": 1.138754766881657e-05, - "loss": 0.3252, + "epoch": 1.5376122479022523, + "grad_norm": 0.3634710371258079, + "learning_rate": 1.1304059002672696e-05, + "loss": 0.3163, "step": 10445 }, { - "epoch": 1.531135531135531, - "grad_norm": 0.3493482479821301, - "learning_rate": 1.1379105196552311e-05, - "loss": 0.3156, + "epoch": 1.5383482997203002, + "grad_norm": 0.365589652938516, + "learning_rate": 1.129556726101334e-05, + "loss": 0.325, "step": 10450 }, { - "epoch": 1.5318681318681318, - "grad_norm": 0.36896222763653935, - "learning_rate": 1.1370661722149314e-05, - "loss": 0.3365, + "epoch": 1.5390843515383483, + "grad_norm": 0.3835416876507009, + "learning_rate": 1.1287074569069427e-05, + "loss": 0.3265, "step": 10455 }, { - "epoch": 1.5326007326007325, - "grad_norm": 0.36606842295285735, - "learning_rate": 1.1362217251743099e-05, - "loss": 0.3247, + "epoch": 1.5398204033563963, + "grad_norm": 0.3592191576539692, + "learning_rate": 1.1278580933070257e-05, + "loss": 0.3203, "step": 10460 }, { - "epoch": 1.5333333333333332, - "grad_norm": 0.3634722621767758, - "learning_rate": 1.135377179146992e-05, - "loss": 0.3263, + "epoch": 1.5405564551744444, + "grad_norm": 0.35570709486871005, + "learning_rate": 1.1270086359245822e-05, + "loss": 0.3083, "step": 10465 }, { - "epoch": 1.534065934065934, - "grad_norm": 0.36243529766864707, - "learning_rate": 1.1345325347466738e-05, - "loss": 0.3228, + "epoch": 1.5412925069924923, + "grad_norm": 0.3834732048865809, + "learning_rate": 1.1261590853826795e-05, + "loss": 0.34, "step": 10470 }, { - "epoch": 1.5347985347985347, - "grad_norm": 0.3510096037048702, - "learning_rate": 1.1336877925871237e-05, - "loss": 0.3231, + "epoch": 1.5420285588105402, + "grad_norm": 0.36468683586721007, + "learning_rate": 1.1253094423044542e-05, + "loss": 0.3217, "step": 10475 }, { - "epoch": 1.5355311355311354, - "grad_norm": 0.3569480778437148, - "learning_rate": 1.1328429532821806e-05, - "loss": 0.325, + "epoch": 1.5427646106285882, + "grad_norm": 0.3717550741983341, + "learning_rate": 1.1244597073131101e-05, + "loss": 0.3334, "step": 10480 }, { - "epoch": 1.5362637362637361, - "grad_norm": 0.37322684386375793, - "learning_rate": 1.1319980174457546e-05, - "loss": 0.3202, + "epoch": 1.543500662446636, + "grad_norm": 0.3607937943081953, + "learning_rate": 1.1236098810319182e-05, + "loss": 0.3187, "step": 10485 }, { - "epoch": 1.5369963369963369, - "grad_norm": 0.36709767538456534, - "learning_rate": 1.1311529856918258e-05, - "loss": 0.3284, + "epoch": 1.5442367142646842, + "grad_norm": 0.3638012987883055, + "learning_rate": 1.1227599640842178e-05, + "loss": 0.3322, "step": 10490 }, { - "epoch": 1.5377289377289376, - "grad_norm": 0.36067118105211804, - "learning_rate": 1.1303078586344434e-05, - "loss": 0.3175, + "epoch": 1.5449727660827324, + "grad_norm": 0.3513757646645374, + "learning_rate": 1.1219099570934131e-05, + "loss": 0.3109, "step": 10495 }, { - "epoch": 1.5384615384615383, - "grad_norm": 0.3632107295175718, - "learning_rate": 1.1294626368877266e-05, - "loss": 0.3204, + "epoch": 1.5457088179007803, + "grad_norm": 0.37690237543820104, + "learning_rate": 1.121059860682975e-05, + "loss": 0.3328, "step": 10500 }, { - "epoch": 1.539194139194139, - "grad_norm": 0.3652132794095445, - "learning_rate": 1.128617321065863e-05, - "loss": 0.319, + "epoch": 1.5464448697188282, + "grad_norm": 0.36143736312535396, + "learning_rate": 1.1202096754764403e-05, + "loss": 0.3239, "step": 10505 }, { - "epoch": 1.5399267399267398, - "grad_norm": 0.38779027451907705, - "learning_rate": 1.1277719117831088e-05, - "loss": 0.3406, + "epoch": 1.5471809215368761, + "grad_norm": 0.36181670331350985, + "learning_rate": 1.1193594020974105e-05, + "loss": 0.3115, "step": 10510 }, { - "epoch": 1.5406593406593405, - "grad_norm": 0.3884935873103975, - "learning_rate": 1.126926409653788e-05, - "loss": 0.3294, + "epoch": 1.547916973354924, + "grad_norm": 0.3702153858630142, + "learning_rate": 1.1185090411695519e-05, + "loss": 0.3188, "step": 10515 }, { - "epoch": 1.5413919413919412, - "grad_norm": 0.3455480192963863, - "learning_rate": 1.1260808152922916e-05, - "loss": 0.3074, + "epoch": 1.5486530251729722, + "grad_norm": 0.3581307229029183, + "learning_rate": 1.1176585933165949e-05, + "loss": 0.3245, "step": 10520 }, { - "epoch": 1.542124542124542, - "grad_norm": 0.3502506088120491, - "learning_rate": 1.1252351293130785e-05, - "loss": 0.3221, + "epoch": 1.5493890769910201, + "grad_norm": 0.3682623206651115, + "learning_rate": 1.1168080591623343e-05, + "loss": 0.3251, "step": 10525 }, { - "epoch": 1.5428571428571427, - "grad_norm": 0.3527800077691515, - "learning_rate": 1.1243893523306735e-05, - "loss": 0.327, + "epoch": 1.5501251288090683, + "grad_norm": 0.35429114443960585, + "learning_rate": 1.1159574393306272e-05, + "loss": 0.3269, "step": 10530 }, { - "epoch": 1.5435897435897434, - "grad_norm": 0.34779222936948845, - "learning_rate": 1.1235434849596683e-05, - "loss": 0.3221, + "epoch": 1.5508611806271162, + "grad_norm": 0.3661458881367873, + "learning_rate": 1.1151067344453941e-05, + "loss": 0.3362, "step": 10535 }, { - "epoch": 1.5443223443223442, - "grad_norm": 0.3581405926693141, - "learning_rate": 1.1226975278147191e-05, - "loss": 0.3249, + "epoch": 1.5515972324451641, + "grad_norm": 0.3401509268807909, + "learning_rate": 1.1142559451306183e-05, + "loss": 0.3248, "step": 10540 }, { - "epoch": 1.545054945054945, - "grad_norm": 0.3652384063913515, - "learning_rate": 1.1218514815105483e-05, - "loss": 0.3362, + "epoch": 1.552333284263212, + "grad_norm": 0.3473392948542666, + "learning_rate": 1.1134050720103438e-05, + "loss": 0.3285, "step": 10545 }, { - "epoch": 1.5457875457875456, - "grad_norm": 0.3718363796824984, - "learning_rate": 1.1210053466619427e-05, - "loss": 0.3251, + "epoch": 1.55306933608126, + "grad_norm": 0.36994117748891175, + "learning_rate": 1.112554115708677e-05, + "loss": 0.3128, "step": 10550 }, { - "epoch": 1.5465201465201464, - "grad_norm": 0.3617586012853975, - "learning_rate": 1.1201591238837536e-05, - "loss": 0.321, + "epoch": 1.553805387899308, + "grad_norm": 0.33668187505090125, + "learning_rate": 1.1117030768497854e-05, + "loss": 0.3254, "step": 10555 }, { - "epoch": 1.5472527472527473, - "grad_norm": 0.35963645658759585, - "learning_rate": 1.1193128137908962e-05, - "loss": 0.3317, + "epoch": 1.5545414397173563, + "grad_norm": 0.3618387095405319, + "learning_rate": 1.110851956057897e-05, + "loss": 0.3316, "step": 10560 }, { - "epoch": 1.547985347985348, - "grad_norm": 0.37752854864033075, - "learning_rate": 1.1184664169983486e-05, - "loss": 0.3276, + "epoch": 1.5552774915354042, + "grad_norm": 0.3491498712906266, + "learning_rate": 1.110000753957299e-05, + "loss": 0.3369, "step": 10565 }, { - "epoch": 1.5487179487179488, - "grad_norm": 0.38661437404420046, - "learning_rate": 1.117619934121153e-05, - "loss": 0.3261, + "epoch": 1.556013543353452, + "grad_norm": 0.3495879911555467, + "learning_rate": 1.1091494711723395e-05, + "loss": 0.3171, "step": 10570 }, { - "epoch": 1.5494505494505495, - "grad_norm": 0.36685370857951194, - "learning_rate": 1.1167733657744131e-05, - "loss": 0.3209, + "epoch": 1.5567495951715, + "grad_norm": 0.3907826414201101, + "learning_rate": 1.1082981083274252e-05, + "loss": 0.3196, "step": 10575 }, { - "epoch": 1.5501831501831502, - "grad_norm": 0.3686733422250125, - "learning_rate": 1.1159267125732954e-05, - "loss": 0.3192, + "epoch": 1.557485646989548, + "grad_norm": 0.36974060030759315, + "learning_rate": 1.107446666047021e-05, + "loss": 0.322, "step": 10580 }, { - "epoch": 1.550915750915751, - "grad_norm": 0.35997746733424546, - "learning_rate": 1.1150799751330273e-05, - "loss": 0.304, + "epoch": 1.558221698807596, + "grad_norm": 0.364638380907563, + "learning_rate": 1.1065951449556509e-05, + "loss": 0.3177, "step": 10585 }, { - "epoch": 1.5516483516483517, - "grad_norm": 0.35912020225512303, - "learning_rate": 1.1142331540688982e-05, - "loss": 0.3182, + "epoch": 1.558957750625644, + "grad_norm": 0.36830616107237507, + "learning_rate": 1.1057435456778967e-05, + "loss": 0.3169, "step": 10590 }, { - "epoch": 1.5523809523809524, - "grad_norm": 0.3704574449036329, - "learning_rate": 1.113386249996258e-05, - "loss": 0.3055, + "epoch": 1.5596938024436922, + "grad_norm": 0.36250697632861917, + "learning_rate": 1.1048918688383967e-05, + "loss": 0.3305, "step": 10595 }, { - "epoch": 1.5531135531135531, - "grad_norm": 0.37596344315036007, - "learning_rate": 1.1125392635305171e-05, - "loss": 0.325, + "epoch": 1.56042985426174, + "grad_norm": 0.35473255475644067, + "learning_rate": 1.104040115061847e-05, + "loss": 0.3196, "step": 10600 }, { - "epoch": 1.5538461538461539, - "grad_norm": 0.37007140282096485, - "learning_rate": 1.1116921952871451e-05, - "loss": 0.3304, + "epoch": 1.561165906079788, + "grad_norm": 0.3697377467377593, + "learning_rate": 1.1031882849729996e-05, + "loss": 0.3196, "step": 10605 }, { - "epoch": 1.5545787545787546, - "grad_norm": 0.3542567239055139, - "learning_rate": 1.110845045881672e-05, - "loss": 0.3483, + "epoch": 1.561901957897836, + "grad_norm": 0.35367404218811516, + "learning_rate": 1.1023363791966628e-05, + "loss": 0.3191, "step": 10610 }, { - "epoch": 1.5553113553113553, - "grad_norm": 0.3704260386299412, - "learning_rate": 1.1099978159296853e-05, - "loss": 0.3354, + "epoch": 1.5626380097158838, + "grad_norm": 0.37424352447772774, + "learning_rate": 1.1014843983577002e-05, + "loss": 0.3441, "step": 10615 }, { - "epoch": 1.556043956043956, - "grad_norm": 0.35617120318620293, - "learning_rate": 1.1091505060468334e-05, - "loss": 0.3145, + "epoch": 1.563374061533932, + "grad_norm": 0.3507694301126513, + "learning_rate": 1.1006323430810304e-05, + "loss": 0.3193, "step": 10620 }, { - "epoch": 1.5567765567765568, - "grad_norm": 0.3630933646411497, - "learning_rate": 1.1083031168488201e-05, - "loss": 0.3126, + "epoch": 1.5641101133519801, + "grad_norm": 0.3507120881822479, + "learning_rate": 1.0997802139916267e-05, + "loss": 0.3262, "step": 10625 }, { - "epoch": 1.5575091575091575, - "grad_norm": 0.3635654024668279, - "learning_rate": 1.1074556489514088e-05, - "loss": 0.3287, + "epoch": 1.564846165170028, + "grad_norm": 0.360651879708966, + "learning_rate": 1.0989280117145168e-05, + "loss": 0.3273, "step": 10630 }, { - "epoch": 1.5582417582417583, - "grad_norm": 0.36926283154091916, - "learning_rate": 1.106608102970419e-05, - "loss": 0.3311, + "epoch": 1.565582216988076, + "grad_norm": 0.35962526178526283, + "learning_rate": 1.0980757368747819e-05, + "loss": 0.3247, "step": 10635 }, { - "epoch": 1.558974358974359, - "grad_norm": 0.38017762750849377, - "learning_rate": 1.1057604795217275e-05, - "loss": 0.3226, + "epoch": 1.566318268806124, + "grad_norm": 0.3567122421354603, + "learning_rate": 1.097223390097556e-05, + "loss": 0.3214, "step": 10640 }, { - "epoch": 1.5597069597069597, - "grad_norm": 0.37296679726598, - "learning_rate": 1.1049127792212678e-05, - "loss": 0.3332, + "epoch": 1.5670543206241718, + "grad_norm": 0.3707989706920427, + "learning_rate": 1.0963709720080262e-05, + "loss": 0.3221, "step": 10645 }, { - "epoch": 1.5604395604395604, - "grad_norm": 0.37025087485170594, - "learning_rate": 1.1040650026850274e-05, - "loss": 0.325, + "epoch": 1.56779037244222, + "grad_norm": 0.3639122505804451, + "learning_rate": 1.0955184832314323e-05, + "loss": 0.3276, "step": 10650 }, { - "epoch": 1.5611721611721612, - "grad_norm": 0.3599046109588884, - "learning_rate": 1.1032171505290515e-05, - "loss": 0.3284, + "epoch": 1.568526424260268, + "grad_norm": 0.3603234990700586, + "learning_rate": 1.0946659243930653e-05, + "loss": 0.3027, "step": 10655 }, { - "epoch": 1.561904761904762, - "grad_norm": 0.3713491859565942, - "learning_rate": 1.102369223369439e-05, - "loss": 0.3191, + "epoch": 1.569262476078316, + "grad_norm": 0.36826062208996563, + "learning_rate": 1.0938132961182682e-05, + "loss": 0.3132, "step": 10660 }, { - "epoch": 1.5626373626373626, - "grad_norm": 0.3977929765942424, - "learning_rate": 1.1015212218223433e-05, - "loss": 0.3326, + "epoch": 1.569998527896364, + "grad_norm": 0.3550575067788097, + "learning_rate": 1.0929605990324345e-05, + "loss": 0.3158, "step": 10665 }, { - "epoch": 1.5633699633699634, - "grad_norm": 0.38423993709303933, - "learning_rate": 1.100673146503972e-05, - "loss": 0.3239, + "epoch": 1.5707345797144119, + "grad_norm": 0.3482416489950832, + "learning_rate": 1.0921078337610083e-05, + "loss": 0.3088, "step": 10670 }, { - "epoch": 1.564102564102564, - "grad_norm": 0.36288425856840323, - "learning_rate": 1.0998249980305867e-05, - "loss": 0.3256, + "epoch": 1.5714706315324598, + "grad_norm": 0.35761848210115393, + "learning_rate": 1.0912550009294834e-05, + "loss": 0.3368, "step": 10675 }, { - "epoch": 1.5648351648351648, - "grad_norm": 0.350555719189037, - "learning_rate": 1.0989767770185017e-05, - "loss": 0.3096, + "epoch": 1.5722066833505077, + "grad_norm": 0.3763713827683073, + "learning_rate": 1.0904021011634033e-05, + "loss": 0.3423, "step": 10680 }, { - "epoch": 1.5655677655677656, - "grad_norm": 0.38318092351307165, - "learning_rate": 1.098128484084084e-05, - "loss": 0.3324, + "epoch": 1.5729427351685559, + "grad_norm": 0.37829590035732397, + "learning_rate": 1.0895491350883612e-05, + "loss": 0.3208, "step": 10685 }, { - "epoch": 1.5663003663003663, - "grad_norm": 0.3618784359736923, - "learning_rate": 1.0972801198437533e-05, - "loss": 0.3229, + "epoch": 1.573678786986604, + "grad_norm": 0.36432735184879916, + "learning_rate": 1.0886961033299985e-05, + "loss": 0.3178, "step": 10690 }, { - "epoch": 1.567032967032967, - "grad_norm": 0.37990979190549173, - "learning_rate": 1.0964316849139806e-05, - "loss": 0.3229, + "epoch": 1.574414838804652, + "grad_norm": 0.37211183953152666, + "learning_rate": 1.0878430065140045e-05, + "loss": 0.3202, "step": 10695 }, { - "epoch": 1.5677655677655677, - "grad_norm": 0.3640985661929933, - "learning_rate": 1.0955831799112886e-05, - "loss": 0.3176, + "epoch": 1.5751508906226999, + "grad_norm": 0.3667957151162588, + "learning_rate": 1.0869898452661167e-05, + "loss": 0.3222, "step": 10700 }, { - "epoch": 1.5684981684981685, - "grad_norm": 0.37439906801966494, - "learning_rate": 1.0947346054522508e-05, - "loss": 0.3175, + "epoch": 1.5758869424407478, + "grad_norm": 0.35726128301731563, + "learning_rate": 1.0861366202121193e-05, + "loss": 0.3261, "step": 10705 }, { - "epoch": 1.5692307692307692, - "grad_norm": 0.3669987106800802, - "learning_rate": 1.093885962153491e-05, - "loss": 0.3235, + "epoch": 1.5766229942587957, + "grad_norm": 0.35733711708395866, + "learning_rate": 1.0852833319778442e-05, + "loss": 0.3171, "step": 10710 }, { - "epoch": 1.56996336996337, - "grad_norm": 0.3559820665989296, - "learning_rate": 1.0930372506316837e-05, - "loss": 0.33, + "epoch": 1.5773590460768439, + "grad_norm": 0.363055551154169, + "learning_rate": 1.0844299811891686e-05, + "loss": 0.3165, "step": 10715 }, { - "epoch": 1.5706959706959707, - "grad_norm": 0.35190310727973406, - "learning_rate": 1.0921884715035518e-05, - "loss": 0.3228, + "epoch": 1.5780950978948918, + "grad_norm": 0.35831292835177575, + "learning_rate": 1.0835765684720159e-05, + "loss": 0.322, "step": 10720 }, { - "epoch": 1.5714285714285714, - "grad_norm": 0.36359573484072494, - "learning_rate": 1.0913396253858685e-05, - "loss": 0.3341, + "epoch": 1.57883114971294, + "grad_norm": 0.36235266345268446, + "learning_rate": 1.0827230944523555e-05, + "loss": 0.3174, "step": 10725 }, { - "epoch": 1.5721611721611721, - "grad_norm": 0.355697692121416, - "learning_rate": 1.0904907128954551e-05, - "loss": 0.3383, + "epoch": 1.5795672015309878, + "grad_norm": 0.35646086170906893, + "learning_rate": 1.0818695597562012e-05, + "loss": 0.3119, "step": 10730 }, { - "epoch": 1.5728937728937729, - "grad_norm": 0.3461701669379614, - "learning_rate": 1.0896417346491811e-05, - "loss": 0.3188, + "epoch": 1.5803032533490358, + "grad_norm": 0.3698149547592337, + "learning_rate": 1.0810159650096116e-05, + "loss": 0.3219, "step": 10735 }, { - "epoch": 1.5736263736263736, - "grad_norm": 0.366987635423063, - "learning_rate": 1.088792691263964e-05, - "loss": 0.3222, + "epoch": 1.5810393051670837, + "grad_norm": 0.3475767326719413, + "learning_rate": 1.080162310838689e-05, + "loss": 0.3173, "step": 10740 }, { - "epoch": 1.5743589743589743, - "grad_norm": 0.37928410154460174, - "learning_rate": 1.0879435833567683e-05, - "loss": 0.3234, + "epoch": 1.5817753569851316, + "grad_norm": 0.3468615818674762, + "learning_rate": 1.0793085978695797e-05, + "loss": 0.307, "step": 10745 }, { - "epoch": 1.575091575091575, - "grad_norm": 0.37405645256523873, - "learning_rate": 1.0870944115446062e-05, - "loss": 0.3238, + "epoch": 1.5825114088031798, + "grad_norm": 0.35105289247848803, + "learning_rate": 1.0784548267284727e-05, + "loss": 0.3435, "step": 10750 }, { - "epoch": 1.5758241758241758, - "grad_norm": 0.3683261076337646, - "learning_rate": 1.0862451764445352e-05, - "loss": 0.3336, + "epoch": 1.583247460621228, + "grad_norm": 0.3506312110225094, + "learning_rate": 1.0776009980415997e-05, + "loss": 0.3219, "step": 10755 }, { - "epoch": 1.5765567765567765, - "grad_norm": 0.3634826374067524, - "learning_rate": 1.0853958786736592e-05, - "loss": 0.3251, + "epoch": 1.5839835124392758, + "grad_norm": 0.3785318098001804, + "learning_rate": 1.0767471124352355e-05, + "loss": 0.3143, "step": 10760 }, { - "epoch": 1.5772893772893772, - "grad_norm": 0.3500778650743875, - "learning_rate": 1.0845465188491284e-05, - "loss": 0.3223, + "epoch": 1.5847195642573237, + "grad_norm": 0.3814559123042075, + "learning_rate": 1.0758931705356952e-05, + "loss": 0.3272, "step": 10765 }, { - "epoch": 1.578021978021978, - "grad_norm": 0.35777802621337773, - "learning_rate": 1.0836970975881369e-05, - "loss": 0.3079, + "epoch": 1.5854556160753717, + "grad_norm": 0.3698078447907398, + "learning_rate": 1.0750391729693363e-05, + "loss": 0.3219, "step": 10770 }, { - "epoch": 1.578754578754579, - "grad_norm": 0.35417650772038833, - "learning_rate": 1.0828476155079248e-05, - "loss": 0.3199, + "epoch": 1.5861916678934196, + "grad_norm": 0.36454633834510386, + "learning_rate": 1.0741851203625569e-05, + "loss": 0.328, "step": 10775 }, { - "epoch": 1.5794871794871796, - "grad_norm": 0.36711234633272344, - "learning_rate": 1.0819980732257747e-05, - "loss": 0.3236, + "epoch": 1.5869277197114677, + "grad_norm": 0.37238642644164754, + "learning_rate": 1.0733310133417952e-05, + "loss": 0.3151, "step": 10780 }, { - "epoch": 1.5802197802197804, - "grad_norm": 0.36720371790919215, - "learning_rate": 1.0811484713590143e-05, - "loss": 0.3178, + "epoch": 1.5876637715295157, + "grad_norm": 0.3681230149728814, + "learning_rate": 1.0724768525335287e-05, + "loss": 0.3211, "step": 10785 }, { - "epoch": 1.580952380952381, - "grad_norm": 0.3586111875426069, - "learning_rate": 1.0802988105250139e-05, - "loss": 0.3243, + "epoch": 1.5883998233475638, + "grad_norm": 0.3672618834917921, + "learning_rate": 1.0716226385642768e-05, + "loss": 0.321, "step": 10790 }, { - "epoch": 1.5816849816849818, - "grad_norm": 0.34797918189175675, - "learning_rate": 1.0794490913411878e-05, - "loss": 0.3262, + "epoch": 1.5891358751656117, + "grad_norm": 0.34443770378247834, + "learning_rate": 1.0707683720605946e-05, + "loss": 0.3365, "step": 10795 }, { - "epoch": 1.5824175824175826, - "grad_norm": 0.35506683705733666, - "learning_rate": 1.0785993144249903e-05, - "loss": 0.3164, + "epoch": 1.5898719269836596, + "grad_norm": 0.3385417936764532, + "learning_rate": 1.069914053649078e-05, + "loss": 0.3161, "step": 10800 }, { - "epoch": 1.5831501831501833, - "grad_norm": 0.35529140738617393, - "learning_rate": 1.0777494803939204e-05, - "loss": 0.3323, + "epoch": 1.5906079788017076, + "grad_norm": 0.35463070768443467, + "learning_rate": 1.0690596839563602e-05, + "loss": 0.3255, "step": 10805 }, { - "epoch": 1.583882783882784, - "grad_norm": 0.3655902249344232, - "learning_rate": 1.0768995898655168e-05, - "loss": 0.3194, + "epoch": 1.5913440306197555, + "grad_norm": 0.3784813860449901, + "learning_rate": 1.0682052636091122e-05, + "loss": 0.3339, "step": 10810 }, { - "epoch": 1.5846153846153848, - "grad_norm": 0.3427492588722873, - "learning_rate": 1.0760496434573602e-05, - "loss": 0.3196, + "epoch": 1.5920800824378036, + "grad_norm": 0.3684324877590342, + "learning_rate": 1.0673507932340422e-05, + "loss": 0.3293, "step": 10815 }, { - "epoch": 1.5853479853479855, - "grad_norm": 0.3714957174420384, - "learning_rate": 1.0751996417870711e-05, - "loss": 0.3213, + "epoch": 1.5928161342558516, + "grad_norm": 0.35754642686646815, + "learning_rate": 1.0664962734578944e-05, + "loss": 0.3191, "step": 10820 }, { - "epoch": 1.5860805860805862, - "grad_norm": 0.35428875402177135, - "learning_rate": 1.074349585472311e-05, - "loss": 0.3092, + "epoch": 1.5935521860738997, + "grad_norm": 0.3633164982949058, + "learning_rate": 1.0656417049074504e-05, + "loss": 0.317, "step": 10825 }, { - "epoch": 1.586813186813187, - "grad_norm": 0.36841388820423904, - "learning_rate": 1.0734994751307799e-05, - "loss": 0.325, + "epoch": 1.5942882378919476, + "grad_norm": 0.36734435701073914, + "learning_rate": 1.0647870882095266e-05, + "loss": 0.3268, "step": 10830 }, { - "epoch": 1.5875457875457877, - "grad_norm": 0.3614957303732827, - "learning_rate": 1.0726493113802187e-05, - "loss": 0.3294, + "epoch": 1.5950242897099955, + "grad_norm": 0.3591129856402927, + "learning_rate": 1.0639324239909753e-05, + "loss": 0.3177, "step": 10835 }, { - "epoch": 1.5882783882783884, - "grad_norm": 0.3386499493874705, - "learning_rate": 1.0717990948384062e-05, - "loss": 0.3201, + "epoch": 1.5957603415280435, + "grad_norm": 0.3540197079602139, + "learning_rate": 1.0630777128786839e-05, + "loss": 0.3265, "step": 10840 }, { - "epoch": 1.5890109890109891, - "grad_norm": 0.3520618352837802, - "learning_rate": 1.0709488261231592e-05, - "loss": 0.3342, + "epoch": 1.5964963933460916, + "grad_norm": 0.35862077364823264, + "learning_rate": 1.0622229554995727e-05, + "loss": 0.3252, "step": 10845 }, { - "epoch": 1.5897435897435899, - "grad_norm": 0.36867953482519555, - "learning_rate": 1.0700985058523328e-05, - "loss": 0.3228, + "epoch": 1.5972324451641395, + "grad_norm": 0.3712442507726749, + "learning_rate": 1.0613681524805977e-05, + "loss": 0.3315, "step": 10850 }, { - "epoch": 1.5904761904761906, - "grad_norm": 0.35508710260508825, - "learning_rate": 1.0692481346438198e-05, - "loss": 0.3119, + "epoch": 1.5979684969821877, + "grad_norm": 0.36313447074643235, + "learning_rate": 1.0605133044487473e-05, + "loss": 0.3316, "step": 10855 }, { - "epoch": 1.5912087912087913, - "grad_norm": 0.35303838750743477, - "learning_rate": 1.0683977131155503e-05, - "loss": 0.3146, + "epoch": 1.5987045488002356, + "grad_norm": 0.3685281765703767, + "learning_rate": 1.0596584120310436e-05, + "loss": 0.3098, "step": 10860 }, { - "epoch": 1.591941391941392, - "grad_norm": 0.3618356902258964, - "learning_rate": 1.0675472418854894e-05, - "loss": 0.3088, + "epoch": 1.5994406006182835, + "grad_norm": 0.37036564665213156, + "learning_rate": 1.0588034758545406e-05, + "loss": 0.3189, "step": 10865 }, { - "epoch": 1.5926739926739928, - "grad_norm": 0.36667750846484176, - "learning_rate": 1.0666967215716403e-05, - "loss": 0.3309, + "epoch": 1.6001766524363314, + "grad_norm": 0.3455809354941265, + "learning_rate": 1.0579484965463251e-05, + "loss": 0.3185, "step": 10870 }, { - "epoch": 1.5934065934065935, - "grad_norm": 0.3592496842622154, - "learning_rate": 1.06584615279204e-05, - "loss": 0.3298, + "epoch": 1.6009127042543794, + "grad_norm": 0.3689604249317098, + "learning_rate": 1.057093474733515e-05, + "loss": 0.3385, "step": 10875 }, { - "epoch": 1.5941391941391942, - "grad_norm": 0.3710415634025953, - "learning_rate": 1.0649955361647629e-05, - "loss": 0.3237, + "epoch": 1.6016487560724275, + "grad_norm": 0.34771801563235544, + "learning_rate": 1.0562384110432595e-05, + "loss": 0.3111, "step": 10880 }, { - "epoch": 1.594871794871795, - "grad_norm": 0.3735755916958751, - "learning_rate": 1.0641448723079161e-05, - "loss": 0.3398, + "epoch": 1.6023848078904754, + "grad_norm": 0.3631979360473968, + "learning_rate": 1.0553833061027388e-05, + "loss": 0.318, "step": 10885 }, { - "epoch": 1.5956043956043957, - "grad_norm": 0.35845605896563393, - "learning_rate": 1.0632941618396422e-05, - "loss": 0.3265, + "epoch": 1.6031208597085236, + "grad_norm": 0.36583268761609733, + "learning_rate": 1.0545281605391624e-05, + "loss": 0.3399, "step": 10890 }, { - "epoch": 1.5963369963369964, - "grad_norm": 0.36511863236781306, - "learning_rate": 1.062443405378117e-05, - "loss": 0.3239, + "epoch": 1.6038569115265715, + "grad_norm": 0.35643244914326294, + "learning_rate": 1.053672974979771e-05, + "loss": 0.3158, "step": 10895 }, { - "epoch": 1.5970695970695972, - "grad_norm": 0.38137540806176407, - "learning_rate": 1.0615926035415505e-05, - "loss": 0.3323, + "epoch": 1.6045929633446194, + "grad_norm": 0.3802049703572021, + "learning_rate": 1.0528177500518337e-05, + "loss": 0.3368, "step": 10900 }, { - "epoch": 1.597802197802198, - "grad_norm": 0.35783473945763244, - "learning_rate": 1.060741756948185e-05, - "loss": 0.3308, + "epoch": 1.6053290151626673, + "grad_norm": 0.3465421836301589, + "learning_rate": 1.0519624863826487e-05, + "loss": 0.3178, "step": 10905 }, { - "epoch": 1.5985347985347986, - "grad_norm": 0.35899576905804875, - "learning_rate": 1.0598908662162954e-05, - "loss": 0.3167, + "epoch": 1.6060650669807155, + "grad_norm": 0.3566612836576884, + "learning_rate": 1.051107184599543e-05, + "loss": 0.3159, "step": 10910 }, { - "epoch": 1.5992673992673994, - "grad_norm": 0.36913210827949633, - "learning_rate": 1.059039931964189e-05, - "loss": 0.3191, + "epoch": 1.6068011187987634, + "grad_norm": 0.3684066675373982, + "learning_rate": 1.0502518453298705e-05, + "loss": 0.3256, "step": 10915 }, { - "epoch": 1.6, - "grad_norm": 0.3673374725819731, - "learning_rate": 1.0581889548102045e-05, - "loss": 0.3283, + "epoch": 1.6075371706168116, + "grad_norm": 0.3537753053942633, + "learning_rate": 1.0493964692010139e-05, + "loss": 0.3221, "step": 10920 }, { - "epoch": 1.6007326007326008, - "grad_norm": 0.37097335997045566, - "learning_rate": 1.057337935372712e-05, - "loss": 0.3326, + "epoch": 1.6082732224348595, + "grad_norm": 0.36413617741163884, + "learning_rate": 1.048541056840382e-05, + "loss": 0.3277, "step": 10925 }, { - "epoch": 1.6014652014652015, - "grad_norm": 0.3733652663620874, - "learning_rate": 1.0564868742701118e-05, - "loss": 0.3274, + "epoch": 1.6090092742529074, + "grad_norm": 0.36908699563989056, + "learning_rate": 1.0476856088754109e-05, + "loss": 0.3351, "step": 10930 }, { - "epoch": 1.6021978021978023, - "grad_norm": 0.3743462647789397, - "learning_rate": 1.055635772120835e-05, - "loss": 0.3409, + "epoch": 1.6097453260709553, + "grad_norm": 0.3591005393616249, + "learning_rate": 1.0468301259335619e-05, + "loss": 0.3285, "step": 10935 }, { - "epoch": 1.602930402930403, - "grad_norm": 0.36994655865314047, - "learning_rate": 1.0547846295433423e-05, - "loss": 0.3234, + "epoch": 1.6104813778890033, + "grad_norm": 0.3699031485739882, + "learning_rate": 1.0459746086423226e-05, + "loss": 0.3164, "step": 10940 }, { - "epoch": 1.6036630036630037, - "grad_norm": 0.3535248128927632, - "learning_rate": 1.053933447156124e-05, - "loss": 0.3093, + "epoch": 1.6112174297070514, + "grad_norm": 0.36334337911899667, + "learning_rate": 1.045119057629206e-05, + "loss": 0.3154, "step": 10945 }, { - "epoch": 1.6043956043956045, - "grad_norm": 0.35583013158139354, - "learning_rate": 1.0530822255776992e-05, - "loss": 0.3262, + "epoch": 1.6119534815250993, + "grad_norm": 0.36368224707182034, + "learning_rate": 1.0442634735217492e-05, + "loss": 0.3204, "step": 10950 }, { - "epoch": 1.6051282051282052, - "grad_norm": 0.35764098107248543, - "learning_rate": 1.052230965426615e-05, - "loss": 0.3192, + "epoch": 1.6126895333431475, + "grad_norm": 0.35331692575170015, + "learning_rate": 1.0434078569475137e-05, + "loss": 0.3268, "step": 10955 }, { - "epoch": 1.605860805860806, - "grad_norm": 0.3542715998233082, - "learning_rate": 1.0513796673214473e-05, - "loss": 0.3235, + "epoch": 1.6134255851611954, + "grad_norm": 0.3697883544764186, + "learning_rate": 1.0425522085340854e-05, + "loss": 0.3255, "step": 10960 }, { - "epoch": 1.6065934065934067, - "grad_norm": 0.35516004274014784, - "learning_rate": 1.0505283318807993e-05, - "loss": 0.3112, + "epoch": 1.6141616369792433, + "grad_norm": 0.3391213438029729, + "learning_rate": 1.0416965289090732e-05, + "loss": 0.3195, "step": 10965 }, { - "epoch": 1.6073260073260074, - "grad_norm": 0.3525396292954829, - "learning_rate": 1.049676959723301e-05, - "loss": 0.3214, + "epoch": 1.6148976887972912, + "grad_norm": 0.3717901742788422, + "learning_rate": 1.0408408187001084e-05, + "loss": 0.3165, "step": 10970 }, { - "epoch": 1.6080586080586081, - "grad_norm": 0.3612216554197001, - "learning_rate": 1.0488255514676099e-05, - "loss": 0.3283, + "epoch": 1.6156337406153392, + "grad_norm": 0.370841723274499, + "learning_rate": 1.0399850785348456e-05, + "loss": 0.337, "step": 10975 }, { - "epoch": 1.6087912087912088, - "grad_norm": 0.3504714435860718, - "learning_rate": 1.0479741077324082e-05, - "loss": 0.3266, + "epoch": 1.6163697924333873, + "grad_norm": 0.3506437728714445, + "learning_rate": 1.0391293090409606e-05, + "loss": 0.3102, "step": 10980 }, { - "epoch": 1.6095238095238096, - "grad_norm": 0.3771640077062345, - "learning_rate": 1.0471226291364059e-05, - "loss": 0.3344, + "epoch": 1.6171058442514354, + "grad_norm": 0.3857407088614722, + "learning_rate": 1.0382735108461512e-05, + "loss": 0.3198, "step": 10985 }, { - "epoch": 1.6102564102564103, - "grad_norm": 0.3400328640071071, - "learning_rate": 1.0462711162983368e-05, - "loss": 0.3151, + "epoch": 1.6178418960694834, + "grad_norm": 0.357091301270163, + "learning_rate": 1.0374176845781359e-05, + "loss": 0.3204, "step": 10990 }, { - "epoch": 1.610989010989011, - "grad_norm": 0.3829867285536634, - "learning_rate": 1.04541956983696e-05, - "loss": 0.325, + "epoch": 1.6185779478875313, + "grad_norm": 0.36391568727062185, + "learning_rate": 1.0365618308646543e-05, + "loss": 0.3136, "step": 10995 }, { - "epoch": 1.6117216117216118, - "grad_norm": 0.3672879257109648, - "learning_rate": 1.0445679903710593e-05, - "loss": 0.3189, + "epoch": 1.6193139997055792, + "grad_norm": 0.35090557524644195, + "learning_rate": 1.0357059503334659e-05, + "loss": 0.3043, "step": 11000 }, { - "epoch": 1.6124542124542125, - "grad_norm": 0.35467707509195023, - "learning_rate": 1.0437163785194422e-05, - "loss": 0.3244, + "epoch": 1.6200500515236271, + "grad_norm": 0.3617778129695343, + "learning_rate": 1.0348500436123496e-05, + "loss": 0.3351, "step": 11005 }, { - "epoch": 1.6131868131868132, - "grad_norm": 0.38129661853960295, - "learning_rate": 1.04286473490094e-05, - "loss": 0.3303, + "epoch": 1.6207861033416753, + "grad_norm": 0.3707477885752319, + "learning_rate": 1.0339941113291039e-05, + "loss": 0.3114, "step": 11010 }, { - "epoch": 1.613919413919414, - "grad_norm": 0.3733812647570801, - "learning_rate": 1.0420130601344069e-05, - "loss": 0.3224, + "epoch": 1.6215221551597232, + "grad_norm": 0.3677822612090754, + "learning_rate": 1.0331381541115455e-05, + "loss": 0.3162, "step": 11015 }, { - "epoch": 1.6146520146520147, - "grad_norm": 0.36123883288541436, - "learning_rate": 1.0411613548387193e-05, - "loss": 0.319, + "epoch": 1.6222582069777713, + "grad_norm": 0.38415185428291143, + "learning_rate": 1.0322821725875099e-05, + "loss": 0.3334, "step": 11020 }, { - "epoch": 1.6153846153846154, - "grad_norm": 0.351822350363969, - "learning_rate": 1.0403096196327771e-05, - "loss": 0.321, + "epoch": 1.6229942587958193, + "grad_norm": 0.3525281549471282, + "learning_rate": 1.0314261673848503e-05, + "loss": 0.3184, "step": 11025 }, { - "epoch": 1.6161172161172161, - "grad_norm": 0.3526604466203164, - "learning_rate": 1.0394578551355001e-05, - "loss": 0.3244, + "epoch": 1.6237303106138672, + "grad_norm": 0.3595350913903759, + "learning_rate": 1.0305701391314373e-05, + "loss": 0.3199, "step": 11030 }, { - "epoch": 1.6168498168498169, - "grad_norm": 0.3832048490662678, - "learning_rate": 1.038606061965831e-05, - "loss": 0.3294, + "epoch": 1.6244663624319151, + "grad_norm": 0.3758754292921578, + "learning_rate": 1.0297140884551583e-05, + "loss": 0.3135, "step": 11035 }, { - "epoch": 1.6175824175824176, - "grad_norm": 0.35259499703016156, - "learning_rate": 1.0377542407427325e-05, - "loss": 0.2995, + "epoch": 1.625202414249963, + "grad_norm": 0.35343584921681986, + "learning_rate": 1.0288580159839175e-05, + "loss": 0.3146, "step": 11040 }, { - "epoch": 1.6183150183150183, - "grad_norm": 0.3586698360311384, - "learning_rate": 1.036902392085188e-05, - "loss": 0.3324, + "epoch": 1.6259384660680112, + "grad_norm": 0.36261730003499415, + "learning_rate": 1.0280019223456343e-05, + "loss": 0.3325, "step": 11045 }, { - "epoch": 1.619047619047619, - "grad_norm": 0.35723597592106465, - "learning_rate": 1.0360505166122001e-05, - "loss": 0.3226, + "epoch": 1.6266745178860593, + "grad_norm": 0.3518738009250834, + "learning_rate": 1.0271458081682441e-05, + "loss": 0.3217, "step": 11050 }, { - "epoch": 1.6197802197802198, - "grad_norm": 0.350480501809775, - "learning_rate": 1.0351986149427923e-05, - "loss": 0.305, + "epoch": 1.6274105697041072, + "grad_norm": 0.3515017849940598, + "learning_rate": 1.026289674079698e-05, + "loss": 0.3209, "step": 11055 }, { - "epoch": 1.6205128205128205, - "grad_norm": 0.34894633948528614, - "learning_rate": 1.034346687696006e-05, - "loss": 0.3261, + "epoch": 1.6281466215221552, + "grad_norm": 0.3836613497178749, + "learning_rate": 1.0254335207079605e-05, + "loss": 0.3406, "step": 11060 }, { - "epoch": 1.6212454212454213, - "grad_norm": 0.35853179807194685, - "learning_rate": 1.0334947354909016e-05, - "loss": 0.3166, + "epoch": 1.628882673340203, + "grad_norm": 0.3532132535375887, + "learning_rate": 1.0245773486810112e-05, + "loss": 0.3071, "step": 11065 }, { - "epoch": 1.621978021978022, - "grad_norm": 0.35003878951060013, - "learning_rate": 1.0326427589465572e-05, - "loss": 0.3309, + "epoch": 1.629618725158251, + "grad_norm": 0.3797202524827566, + "learning_rate": 1.0237211586268426e-05, + "loss": 0.3177, "step": 11070 }, { - "epoch": 1.6227106227106227, - "grad_norm": 0.3544484958483273, - "learning_rate": 1.0317907586820693e-05, - "loss": 0.324, + "epoch": 1.6303547769762992, + "grad_norm": 0.36629482001705427, + "learning_rate": 1.0228649511734609e-05, + "loss": 0.3172, "step": 11075 }, { - "epoch": 1.6234432234432234, - "grad_norm": 0.36531716273820747, - "learning_rate": 1.030938735316551e-05, - "loss": 0.332, + "epoch": 1.631090828794347, + "grad_norm": 0.37781312306936443, + "learning_rate": 1.0220087269488855e-05, + "loss": 0.328, "step": 11080 }, { - "epoch": 1.6241758241758242, - "grad_norm": 0.35930407251291396, - "learning_rate": 1.0300866894691327e-05, - "loss": 0.3254, + "epoch": 1.6318268806123952, + "grad_norm": 0.35776610699652955, + "learning_rate": 1.021152486581147e-05, + "loss": 0.3179, "step": 11085 }, { - "epoch": 1.624908424908425, - "grad_norm": 0.38384908214237906, - "learning_rate": 1.029234621758961e-05, - "loss": 0.3275, + "epoch": 1.6325629324304431, + "grad_norm": 0.3763015931361535, + "learning_rate": 1.0202962306982888e-05, + "loss": 0.3226, "step": 11090 }, { - "epoch": 1.6256410256410256, - "grad_norm": 0.38532367580929405, - "learning_rate": 1.0283825328051979e-05, - "loss": 0.3287, + "epoch": 1.633298984248491, + "grad_norm": 0.38514429091010766, + "learning_rate": 1.0194399599283651e-05, + "loss": 0.3371, "step": 11095 }, { - "epoch": 1.6263736263736264, - "grad_norm": 0.3773210361204794, - "learning_rate": 1.0275304232270218e-05, - "loss": 0.3135, + "epoch": 1.634035036066539, + "grad_norm": 0.35793241978316737, + "learning_rate": 1.0185836748994415e-05, + "loss": 0.3277, "step": 11100 }, { - "epoch": 1.627106227106227, - "grad_norm": 0.3657496818091057, - "learning_rate": 1.0266782936436245e-05, - "loss": 0.324, + "epoch": 1.634771087884587, + "grad_norm": 0.37497368524438673, + "learning_rate": 1.0177273762395934e-05, + "loss": 0.3288, "step": 11105 }, { - "epoch": 1.6278388278388278, - "grad_norm": 0.36895175386064044, - "learning_rate": 1.0258261446742146e-05, - "loss": 0.3255, + "epoch": 1.635507139702635, + "grad_norm": 0.35607809608183405, + "learning_rate": 1.0168710645769068e-05, + "loss": 0.3252, "step": 11110 }, { - "epoch": 1.6285714285714286, - "grad_norm": 0.3531853261695217, - "learning_rate": 1.0249739769380127e-05, - "loss": 0.3182, + "epoch": 1.6362431915206832, + "grad_norm": 0.35851208555279285, + "learning_rate": 1.0160147405394773e-05, + "loss": 0.3214, "step": 11115 }, { - "epoch": 1.6293040293040293, - "grad_norm": 0.3513094472672362, - "learning_rate": 1.0241217910542538e-05, - "loss": 0.3184, + "epoch": 1.6369792433387311, + "grad_norm": 0.3658319036685307, + "learning_rate": 1.015158404755409e-05, + "loss": 0.3244, "step": 11120 }, { - "epoch": 1.63003663003663, - "grad_norm": 0.37444752388476754, - "learning_rate": 1.0232695876421867e-05, - "loss": 0.3297, + "epoch": 1.637715295156779, + "grad_norm": 0.3558989585624267, + "learning_rate": 1.0143020578528147e-05, + "loss": 0.3233, "step": 11125 }, { - "epoch": 1.6307692307692307, - "grad_norm": 0.3627686960119489, - "learning_rate": 1.0224173673210725e-05, - "loss": 0.3254, + "epoch": 1.638451346974827, + "grad_norm": 0.36495324263581885, + "learning_rate": 1.0134457004598159e-05, + "loss": 0.3301, "step": 11130 }, { - "epoch": 1.6315018315018315, - "grad_norm": 0.36297662351495313, - "learning_rate": 1.0215651307101834e-05, - "loss": 0.3248, + "epoch": 1.639187398792875, + "grad_norm": 0.35825485258982076, + "learning_rate": 1.012589333204542e-05, + "loss": 0.3179, "step": 11135 }, { - "epoch": 1.6322344322344322, - "grad_norm": 0.3851208137308293, - "learning_rate": 1.0207128784288056e-05, - "loss": 0.3271, + "epoch": 1.639923450610923, + "grad_norm": 0.3660277705088314, + "learning_rate": 1.0117329567151283e-05, + "loss": 0.3117, "step": 11140 }, { - "epoch": 1.632967032967033, - "grad_norm": 0.4672639910485386, - "learning_rate": 1.0198606110962352e-05, - "loss": 0.3215, + "epoch": 1.640659502428971, + "grad_norm": 0.3729825922231736, + "learning_rate": 1.0108765716197175e-05, + "loss": 0.3321, "step": 11145 }, { - "epoch": 1.6336996336996337, - "grad_norm": 0.3567162738256589, - "learning_rate": 1.0190083293317799e-05, - "loss": 0.3106, + "epoch": 1.641395554247019, + "grad_norm": 0.36488120158917253, + "learning_rate": 1.0100201785464595e-05, + "loss": 0.3306, "step": 11150 }, { - "epoch": 1.6344322344322344, - "grad_norm": 0.3577431612902422, - "learning_rate": 1.018156033754757e-05, - "loss": 0.3283, + "epoch": 1.642131606065067, + "grad_norm": 0.3659224948954743, + "learning_rate": 1.0091637781235089e-05, + "loss": 0.3207, "step": 11155 }, { - "epoch": 1.6351648351648351, - "grad_norm": 0.35993573230953435, - "learning_rate": 1.017303724984495e-05, - "loss": 0.3169, + "epoch": 1.642867657883115, + "grad_norm": 0.3560749614077936, + "learning_rate": 1.0083073709790263e-05, + "loss": 0.3166, "step": 11160 }, { - "epoch": 1.6358974358974359, - "grad_norm": 0.3653056370504911, - "learning_rate": 1.0164514036403312e-05, - "loss": 0.3401, + "epoch": 1.6436037097011629, + "grad_norm": 0.3676288149442803, + "learning_rate": 1.0074509577411773e-05, + "loss": 0.3174, "step": 11165 }, { - "epoch": 1.6366300366300366, - "grad_norm": 0.3643747070588311, - "learning_rate": 1.015599070341613e-05, - "loss": 0.3242, + "epoch": 1.6443397615192108, + "grad_norm": 0.3643760202709763, + "learning_rate": 1.0065945390381313e-05, + "loss": 0.3288, "step": 11170 }, { - "epoch": 1.6373626373626373, - "grad_norm": 0.34649245210939217, - "learning_rate": 1.0147467257076944e-05, - "loss": 0.3183, + "epoch": 1.645075813337259, + "grad_norm": 0.35903035116306165, + "learning_rate": 1.0057381154980623e-05, + "loss": 0.3159, "step": 11175 }, { - "epoch": 1.638095238095238, - "grad_norm": 0.3457145517754876, - "learning_rate": 1.0138943703579403e-05, - "loss": 0.3211, + "epoch": 1.645811865155307, + "grad_norm": 0.3590465882106527, + "learning_rate": 1.0048816877491478e-05, + "loss": 0.3139, "step": 11180 }, { - "epoch": 1.6388278388278388, - "grad_norm": 0.3605086861180557, - "learning_rate": 1.0130420049117213e-05, - "loss": 0.3178, + "epoch": 1.646547916973355, + "grad_norm": 0.36736710168489134, + "learning_rate": 1.0040252564195685e-05, + "loss": 0.3206, "step": 11185 }, { - "epoch": 1.6395604395604395, - "grad_norm": 0.3679034058115278, - "learning_rate": 1.012189629988417e-05, - "loss": 0.3193, + "epoch": 1.647283968791403, + "grad_norm": 0.33976068327768527, + "learning_rate": 1.0031688221375067e-05, + "loss": 0.32, "step": 11190 }, { - "epoch": 1.6402930402930402, - "grad_norm": 0.3634000193943487, - "learning_rate": 1.0113372462074119e-05, - "loss": 0.3269, + "epoch": 1.6480200206094509, + "grad_norm": 0.3484346612302931, + "learning_rate": 1.0023123855311485e-05, + "loss": 0.3116, "step": 11195 }, { - "epoch": 1.641025641025641, - "grad_norm": 0.3445737908775674, - "learning_rate": 1.0104848541880988e-05, - "loss": 0.3223, + "epoch": 1.6487560724274988, + "grad_norm": 0.3669113171294673, + "learning_rate": 1.0014559472286805e-05, + "loss": 0.3225, "step": 11200 }, { - "epoch": 1.6417582417582417, - "grad_norm": 0.35776440734834947, - "learning_rate": 1.0096324545498753e-05, - "loss": 0.3097, + "epoch": 1.649492124245547, + "grad_norm": 0.3733938014122494, + "learning_rate": 1.000599507858291e-05, + "loss": 0.3289, "step": 11205 }, { - "epoch": 1.6424908424908424, - "grad_norm": 0.3642975159317143, - "learning_rate": 1.0087800479121456e-05, - "loss": 0.3107, + "epoch": 1.6502281760635948, + "grad_norm": 0.3550753000278096, + "learning_rate": 9.997430680481686e-06, + "loss": 0.3129, "step": 11210 }, { - "epoch": 1.6432234432234432, - "grad_norm": 0.38435340333841345, - "learning_rate": 1.0079276348943176e-05, - "loss": 0.3457, + "epoch": 1.650964227881643, + "grad_norm": 0.37822252297886577, + "learning_rate": 9.988866284265037e-06, + "loss": 0.3245, "step": 11215 }, { - "epoch": 1.6439560439560439, - "grad_norm": 0.3575008550302539, - "learning_rate": 1.0070752161158054e-05, - "loss": 0.3337, + "epoch": 1.651700279699691, + "grad_norm": 0.3598989594453636, + "learning_rate": 9.980301896214844e-06, + "loss": 0.3312, "step": 11220 }, { - "epoch": 1.6446886446886446, - "grad_norm": 0.35737103483565313, - "learning_rate": 1.006222792196026e-05, - "loss": 0.338, + "epoch": 1.6524363315177388, + "grad_norm": 0.34117953852475663, + "learning_rate": 9.971737522612997e-06, + "loss": 0.3218, "step": 11225 }, { - "epoch": 1.6454212454212453, - "grad_norm": 0.3607550861529369, - "learning_rate": 1.0053703637544008e-05, - "loss": 0.3266, + "epoch": 1.6531723833357868, + "grad_norm": 0.37219646669441314, + "learning_rate": 9.96317316974137e-06, + "loss": 0.3175, "step": 11230 }, { - "epoch": 1.646153846153846, - "grad_norm": 0.37471399109586223, - "learning_rate": 1.0045179314103541e-05, - "loss": 0.3145, + "epoch": 1.6539084351538347, + "grad_norm": 0.3597108993691857, + "learning_rate": 9.954608843881829e-06, + "loss": 0.3149, "step": 11235 }, { - "epoch": 1.6468864468864468, - "grad_norm": 0.36592659676528055, - "learning_rate": 1.0036654957833136e-05, - "loss": 0.3399, + "epoch": 1.6546444869718828, + "grad_norm": 0.37151719154104074, + "learning_rate": 9.946044551316201e-06, + "loss": 0.3215, "step": 11240 }, { - "epoch": 1.6476190476190475, - "grad_norm": 0.3733831180052479, - "learning_rate": 1.0028130574927086e-05, - "loss": 0.321, + "epoch": 1.655380538789931, + "grad_norm": 0.35416901973604353, + "learning_rate": 9.937480298326315e-06, + "loss": 0.3143, "step": 11245 }, { - "epoch": 1.6483516483516483, - "grad_norm": 0.37609573288460046, - "learning_rate": 1.0019606171579713e-05, - "loss": 0.3293, + "epoch": 1.656116590607979, + "grad_norm": 0.35593571254728973, + "learning_rate": 9.928916091193954e-06, + "loss": 0.3294, "step": 11250 }, { - "epoch": 1.649084249084249, - "grad_norm": 0.38631610438090525, - "learning_rate": 1.0011081753985344e-05, - "loss": 0.3228, + "epoch": 1.6568526424260268, + "grad_norm": 0.3555883291894213, + "learning_rate": 9.920351936200866e-06, + "loss": 0.3098, "step": 11255 }, { - "epoch": 1.6498168498168497, - "grad_norm": 0.3571573245803005, - "learning_rate": 1.0002557328338321e-05, - "loss": 0.3079, + "epoch": 1.6575886942440747, + "grad_norm": 0.3698675521263941, + "learning_rate": 9.911787839628776e-06, + "loss": 0.3161, "step": 11260 }, { - "epoch": 1.6505494505494505, - "grad_norm": 0.36012963207083964, - "learning_rate": 9.994032900832996e-06, - "loss": 0.3235, + "epoch": 1.6583247460621227, + "grad_norm": 0.364093115448152, + "learning_rate": 9.90322380775935e-06, + "loss": 0.3279, "step": 11265 }, { - "epoch": 1.6512820512820512, - "grad_norm": 0.37324023344599405, - "learning_rate": 9.98550847766371e-06, - "loss": 0.3291, + "epoch": 1.6590607978801708, + "grad_norm": 0.3577712807114756, + "learning_rate": 9.89465984687421e-06, + "loss": 0.3234, "step": 11270 }, { - "epoch": 1.652014652014652, - "grad_norm": 0.3582050090756073, - "learning_rate": 9.976984065024821e-06, - "loss": 0.3362, + "epoch": 1.6597968496982187, + "grad_norm": 0.35390192193669356, + "learning_rate": 9.886095963254932e-06, + "loss": 0.3301, "step": 11275 }, { - "epoch": 1.6527472527472526, - "grad_norm": 0.35320476631232933, - "learning_rate": 9.968459669110655e-06, - "loss": 0.3302, + "epoch": 1.6605329015162669, + "grad_norm": 0.36574374597043474, + "learning_rate": 9.87753216318303e-06, + "loss": 0.3271, "step": 11280 }, { - "epoch": 1.6534798534798534, - "grad_norm": 0.3689275784664794, - "learning_rate": 9.959935296115547e-06, - "loss": 0.3321, + "epoch": 1.6612689533343148, + "grad_norm": 0.36397686094226955, + "learning_rate": 9.868968452939963e-06, + "loss": 0.3117, "step": 11285 }, { - "epoch": 1.654212454212454, - "grad_norm": 0.37880888788076, - "learning_rate": 9.951410952233802e-06, - "loss": 0.3334, + "epoch": 1.6620050051523627, + "grad_norm": 0.35990205667438896, + "learning_rate": 9.860404838807113e-06, + "loss": 0.3092, "step": 11290 }, { - "epoch": 1.6549450549450548, - "grad_norm": 0.3660165873618415, - "learning_rate": 9.942886643659709e-06, - "loss": 0.3048, + "epoch": 1.6627410569704106, + "grad_norm": 0.36417756157379905, + "learning_rate": 9.851841327065806e-06, + "loss": 0.32, "step": 11295 }, { - "epoch": 1.6556776556776556, - "grad_norm": 0.36704002452407064, - "learning_rate": 9.934362376587533e-06, - "loss": 0.3183, + "epoch": 1.6634771087884586, + "grad_norm": 0.3614972664039947, + "learning_rate": 9.843277923997278e-06, + "loss": 0.3246, "step": 11300 }, { - "epoch": 1.6564102564102563, - "grad_norm": 0.3660284088454685, - "learning_rate": 9.925838157211502e-06, - "loss": 0.3083, + "epoch": 1.6642131606065067, + "grad_norm": 0.35576121965472, + "learning_rate": 9.834714635882698e-06, + "loss": 0.2991, "step": 11305 }, { - "epoch": 1.657142857142857, - "grad_norm": 0.3471567412573336, - "learning_rate": 9.91731399172582e-06, - "loss": 0.3153, + "epoch": 1.6649492124245548, + "grad_norm": 0.367200520412062, + "learning_rate": 9.826151469003141e-06, + "loss": 0.323, "step": 11310 }, { - "epoch": 1.6578754578754578, - "grad_norm": 0.35843231218545296, - "learning_rate": 9.908789886324643e-06, - "loss": 0.3179, + "epoch": 1.6656852642426028, + "grad_norm": 0.3612485358302765, + "learning_rate": 9.817588429639596e-06, + "loss": 0.3166, "step": 11315 }, { - "epoch": 1.6586080586080585, - "grad_norm": 0.3597872512866094, - "learning_rate": 9.90026584720208e-06, - "loss": 0.3227, + "epoch": 1.6664213160606507, + "grad_norm": 0.35158091722786156, + "learning_rate": 9.809025524072968e-06, + "loss": 0.3067, "step": 11320 }, { - "epoch": 1.6593406593406592, - "grad_norm": 0.379192260668644, - "learning_rate": 9.891741880552209e-06, - "loss": 0.3258, + "epoch": 1.6671573678786986, + "grad_norm": 0.3592537087503226, + "learning_rate": 9.800462758584046e-06, + "loss": 0.3319, "step": 11325 }, { - "epoch": 1.66007326007326, - "grad_norm": 0.36784732716595225, - "learning_rate": 9.883217992569036e-06, - "loss": 0.3348, + "epoch": 1.6678934196967465, + "grad_norm": 0.3697049960418154, + "learning_rate": 9.791900139453531e-06, + "loss": 0.3181, "step": 11330 }, { - "epoch": 1.6608058608058607, - "grad_norm": 0.36793894286584244, - "learning_rate": 9.874694189446519e-06, - "loss": 0.3231, + "epoch": 1.6686294715147947, + "grad_norm": 0.359087187359359, + "learning_rate": 9.783337672962008e-06, + "loss": 0.3198, "step": 11335 }, { - "epoch": 1.6615384615384614, - "grad_norm": 0.35319494338805274, - "learning_rate": 9.866170477378557e-06, - "loss": 0.3331, + "epoch": 1.6693655233328426, + "grad_norm": 0.3534372970274228, + "learning_rate": 9.774775365389954e-06, + "loss": 0.3156, "step": 11340 }, { - "epoch": 1.6622710622710621, - "grad_norm": 0.37349245702611267, - "learning_rate": 9.857646862558974e-06, - "loss": 0.3294, + "epoch": 1.6701015751508907, + "grad_norm": 0.3692204619726876, + "learning_rate": 9.766213223017736e-06, + "loss": 0.3211, "step": 11345 }, { - "epoch": 1.6630036630036629, - "grad_norm": 0.34602637357840227, - "learning_rate": 9.849123351181535e-06, - "loss": 0.3079, + "epoch": 1.6708376269689387, + "grad_norm": 0.37216382490969285, + "learning_rate": 9.757651252125583e-06, + "loss": 0.3217, "step": 11350 }, { - "epoch": 1.6637362637362636, - "grad_norm": 0.37056173134429615, - "learning_rate": 9.840599949439921e-06, - "loss": 0.3333, + "epoch": 1.6715736787869866, + "grad_norm": 0.3561463345746021, + "learning_rate": 9.749089458993616e-06, + "loss": 0.3233, "step": 11355 }, { - "epoch": 1.6644688644688643, - "grad_norm": 0.3509466617996749, - "learning_rate": 9.832076663527731e-06, - "loss": 0.319, + "epoch": 1.6723097306050345, + "grad_norm": 0.4247699534265859, + "learning_rate": 9.740527849901809e-06, + "loss": 0.3292, "step": 11360 }, { - "epoch": 1.665201465201465, - "grad_norm": 0.35369028529684854, - "learning_rate": 9.823553499638493e-06, - "loss": 0.3227, + "epoch": 1.6730457824230824, + "grad_norm": 0.35419797220640364, + "learning_rate": 9.731966431130017e-06, + "loss": 0.3118, "step": 11365 }, { - "epoch": 1.6659340659340658, - "grad_norm": 0.37531209868540477, - "learning_rate": 9.815030463965626e-06, - "loss": 0.3276, + "epoch": 1.6737818342411306, + "grad_norm": 0.3577280841595848, + "learning_rate": 9.723405208957942e-06, + "loss": 0.329, "step": 11370 }, { - "epoch": 1.6666666666666665, - "grad_norm": 0.37552137751925635, - "learning_rate": 9.806507562702484e-06, - "loss": 0.3317, + "epoch": 1.6745178860591787, + "grad_norm": 0.3760532483194485, + "learning_rate": 9.714844189665149e-06, + "loss": 0.3161, "step": 11375 }, { - "epoch": 1.6673992673992672, - "grad_norm": 0.36847104061062425, - "learning_rate": 9.797984802042294e-06, - "loss": 0.3159, + "epoch": 1.6752539378772267, + "grad_norm": 0.3707340009696498, + "learning_rate": 9.70628337953106e-06, + "loss": 0.3023, "step": 11380 }, { - "epoch": 1.668131868131868, - "grad_norm": 0.36028399439737113, - "learning_rate": 9.789462188178194e-06, - "loss": 0.3226, + "epoch": 1.6759899896952746, + "grad_norm": 0.3692362425405184, + "learning_rate": 9.697722784834927e-06, + "loss": 0.3222, "step": 11385 }, { - "epoch": 1.6688644688644687, - "grad_norm": 0.34575748084355296, - "learning_rate": 9.780939727303224e-06, - "loss": 0.313, + "epoch": 1.6767260415133225, + "grad_norm": 0.34301558419759154, + "learning_rate": 9.68916241185586e-06, + "loss": 0.3168, "step": 11390 }, { - "epoch": 1.6695970695970694, - "grad_norm": 0.3810415750955356, - "learning_rate": 9.772417425610298e-06, - "loss": 0.3335, + "epoch": 1.6774620933313704, + "grad_norm": 0.3836449066934659, + "learning_rate": 9.680602266872798e-06, + "loss": 0.3241, "step": 11395 }, { - "epoch": 1.6703296703296702, - "grad_norm": 0.35729218845572247, - "learning_rate": 9.763895289292219e-06, - "loss": 0.3223, + "epoch": 1.6781981451494186, + "grad_norm": 0.37376075700344913, + "learning_rate": 9.672042356164513e-06, + "loss": 0.3178, "step": 11400 }, { - "epoch": 1.671062271062271, - "grad_norm": 0.3572872776478421, - "learning_rate": 9.755373324541676e-06, - "loss": 0.3153, + "epoch": 1.6789341969674665, + "grad_norm": 0.3480720853755703, + "learning_rate": 9.663482686009608e-06, + "loss": 0.313, "step": 11405 }, { - "epoch": 1.6717948717948716, - "grad_norm": 0.3520264191162397, - "learning_rate": 9.746851537551226e-06, - "loss": 0.3242, + "epoch": 1.6796702487855146, + "grad_norm": 0.3760967779127571, + "learning_rate": 9.654923262686505e-06, + "loss": 0.3235, "step": 11410 }, { - "epoch": 1.6725274725274726, - "grad_norm": 0.3658144165650994, - "learning_rate": 9.7383299345133e-06, - "loss": 0.3229, + "epoch": 1.6804063006035626, + "grad_norm": 0.3630609784506012, + "learning_rate": 9.646364092473456e-06, + "loss": 0.321, "step": 11415 }, { - "epoch": 1.6732600732600733, - "grad_norm": 0.36332179684457677, - "learning_rate": 9.729808521620195e-06, - "loss": 0.3305, + "epoch": 1.6811423524216105, + "grad_norm": 0.3430999427290298, + "learning_rate": 9.637805181648512e-06, + "loss": 0.3109, "step": 11420 }, { - "epoch": 1.673992673992674, - "grad_norm": 0.3533756945313288, - "learning_rate": 9.721287305064063e-06, - "loss": 0.3139, + "epoch": 1.6818784042396584, + "grad_norm": 0.3676861284324845, + "learning_rate": 9.62924653648955e-06, + "loss": 0.326, "step": 11425 }, { - "epoch": 1.6747252747252748, - "grad_norm": 0.36557240811520275, - "learning_rate": 9.712766291036934e-06, - "loss": 0.3201, + "epoch": 1.6826144560577063, + "grad_norm": 0.3723415131808058, + "learning_rate": 9.620688163274233e-06, + "loss": 0.3223, "step": 11430 }, { - "epoch": 1.6754578754578755, - "grad_norm": 0.38135701407153655, - "learning_rate": 9.704245485730662e-06, - "loss": 0.3178, + "epoch": 1.6833505078757545, + "grad_norm": 0.35744480513891475, + "learning_rate": 9.612130068280045e-06, + "loss": 0.3028, "step": 11435 }, { - "epoch": 1.6761904761904762, - "grad_norm": 0.3528386651605164, - "learning_rate": 9.695724895336971e-06, - "loss": 0.3067, + "epoch": 1.6840865596938026, + "grad_norm": 0.3729092247273533, + "learning_rate": 9.603572257784248e-06, + "loss": 0.3317, "step": 11440 }, { - "epoch": 1.676923076923077, - "grad_norm": 0.35714768527223584, - "learning_rate": 9.687204526047425e-06, - "loss": 0.3179, + "epoch": 1.6848226115118505, + "grad_norm": 0.37357385427756795, + "learning_rate": 9.595014738063904e-06, + "loss": 0.3238, "step": 11445 }, { - "epoch": 1.6776556776556777, - "grad_norm": 0.3691697304427781, - "learning_rate": 9.678684384053416e-06, - "loss": 0.3324, + "epoch": 1.6855586633298985, + "grad_norm": 0.3558762064092378, + "learning_rate": 9.58645751539587e-06, + "loss": 0.3176, "step": 11450 }, { - "epoch": 1.6783882783882784, - "grad_norm": 0.3557419470671424, - "learning_rate": 9.670164475546185e-06, - "loss": 0.3157, + "epoch": 1.6862947151479464, + "grad_norm": 0.37981691762110004, + "learning_rate": 9.577900596056763e-06, + "loss": 0.3126, "step": 11455 }, { - "epoch": 1.6791208791208792, - "grad_norm": 0.37132815959014553, - "learning_rate": 9.661644806716794e-06, - "loss": 0.3169, + "epoch": 1.6870307669659943, + "grad_norm": 0.3601599487447567, + "learning_rate": 9.569343986323003e-06, + "loss": 0.3193, "step": 11460 }, { - "epoch": 1.6798534798534799, - "grad_norm": 0.37700426652777763, - "learning_rate": 9.653125383756136e-06, - "loss": 0.3254, + "epoch": 1.6877668187840424, + "grad_norm": 0.36503126469877706, + "learning_rate": 9.56078769247076e-06, + "loss": 0.3191, "step": 11465 }, { - "epoch": 1.6805860805860806, - "grad_norm": 0.3802221839669021, - "learning_rate": 9.644606212854928e-06, - "loss": 0.3231, + "epoch": 1.6885028706020904, + "grad_norm": 0.3614226010658769, + "learning_rate": 9.552231720775993e-06, + "loss": 0.3183, "step": 11470 }, { - "epoch": 1.6813186813186813, - "grad_norm": 0.3492871322651001, - "learning_rate": 9.636087300203688e-06, - "loss": 0.3193, + "epoch": 1.6892389224201385, + "grad_norm": 0.3640655776601726, + "learning_rate": 9.543676077514405e-06, + "loss": 0.3237, "step": 11475 }, { - "epoch": 1.682051282051282, - "grad_norm": 0.3491380835719865, - "learning_rate": 9.627568651992769e-06, - "loss": 0.3227, + "epoch": 1.6899749742381864, + "grad_norm": 0.3737650398752546, + "learning_rate": 9.535120768961474e-06, + "loss": 0.323, "step": 11480 }, { - "epoch": 1.6827838827838828, - "grad_norm": 0.35610491273786476, - "learning_rate": 9.619050274412314e-06, - "loss": 0.3249, + "epoch": 1.6907110260562344, + "grad_norm": 0.3737683092309896, + "learning_rate": 9.526565801392427e-06, + "loss": 0.3286, "step": 11485 }, { - "epoch": 1.6835164835164835, - "grad_norm": 0.3630010097767952, - "learning_rate": 9.610532173652277e-06, - "loss": 0.334, + "epoch": 1.6914470778742823, + "grad_norm": 0.36946542307505453, + "learning_rate": 9.518011181082232e-06, + "loss": 0.3197, "step": 11490 }, { - "epoch": 1.6842490842490843, - "grad_norm": 0.35617526499782054, - "learning_rate": 9.602014355902411e-06, - "loss": 0.3163, + "epoch": 1.6921831296923302, + "grad_norm": 0.3662432745684035, + "learning_rate": 9.509456914305621e-06, + "loss": 0.3277, "step": 11495 }, { - "epoch": 1.684981684981685, - "grad_norm": 0.33270492209004626, - "learning_rate": 9.59349682735226e-06, - "loss": 0.3038, + "epoch": 1.6929191815103783, + "grad_norm": 0.3539749203414942, + "learning_rate": 9.500903007337046e-06, + "loss": 0.3221, "step": 11500 }, { - "epoch": 1.6857142857142857, - "grad_norm": 0.36685906208659197, - "learning_rate": 9.584979594191164e-06, - "loss": 0.3267, + "epoch": 1.6936552333284263, + "grad_norm": 0.3700640257217791, + "learning_rate": 9.492349466450713e-06, + "loss": 0.3112, "step": 11505 }, { - "epoch": 1.6864468864468865, - "grad_norm": 0.37653572563578436, - "learning_rate": 9.576462662608242e-06, - "loss": 0.331, + "epoch": 1.6943912851464744, + "grad_norm": 0.35620461331761233, + "learning_rate": 9.483796297920545e-06, + "loss": 0.3243, "step": 11510 }, { - "epoch": 1.6871794871794872, - "grad_norm": 0.35231959569304594, - "learning_rate": 9.567946038792392e-06, - "loss": 0.3092, + "epoch": 1.6951273369645223, + "grad_norm": 0.3786066512503005, + "learning_rate": 9.475243508020205e-06, + "loss": 0.3219, "step": 11515 }, { - "epoch": 1.687912087912088, - "grad_norm": 0.3654106238710355, - "learning_rate": 9.559429728932298e-06, - "loss": 0.3281, + "epoch": 1.6958633887825703, + "grad_norm": 0.3624362374642801, + "learning_rate": 9.466691103023068e-06, + "loss": 0.3197, "step": 11520 }, { - "epoch": 1.6886446886446886, - "grad_norm": 0.3808746833599318, - "learning_rate": 9.550913739216408e-06, - "loss": 0.3379, + "epoch": 1.6965994406006182, + "grad_norm": 0.3764621400962587, + "learning_rate": 9.45813908920223e-06, + "loss": 0.3162, "step": 11525 }, { - "epoch": 1.6893772893772894, - "grad_norm": 0.339177410019425, - "learning_rate": 9.542398075832944e-06, - "loss": 0.3176, + "epoch": 1.6973354924186663, + "grad_norm": 0.36089901406092384, + "learning_rate": 9.449587472830506e-06, + "loss": 0.3214, "step": 11530 }, { - "epoch": 1.69010989010989, - "grad_norm": 0.35545930670267695, - "learning_rate": 9.533882744969881e-06, - "loss": 0.3143, + "epoch": 1.6980715442367142, + "grad_norm": 0.3597727779102053, + "learning_rate": 9.441036260180403e-06, + "loss": 0.3209, "step": 11535 }, { - "epoch": 1.6908424908424908, - "grad_norm": 0.37729381669541545, - "learning_rate": 9.525367752814959e-06, - "loss": 0.3224, + "epoch": 1.6988075960547624, + "grad_norm": 0.3668632424412183, + "learning_rate": 9.432485457524156e-06, + "loss": 0.3002, "step": 11540 }, { - "epoch": 1.6915750915750916, - "grad_norm": 0.36357090958989985, - "learning_rate": 9.51685310555567e-06, - "loss": 0.3332, + "epoch": 1.6995436478728103, + "grad_norm": 0.3534486717975531, + "learning_rate": 9.423935071133675e-06, + "loss": 0.3119, "step": 11545 }, { - "epoch": 1.6923076923076923, - "grad_norm": 0.3669470447992764, - "learning_rate": 9.508338809379262e-06, - "loss": 0.3251, + "epoch": 1.7002796996908582, + "grad_norm": 0.3607190146159087, + "learning_rate": 9.415385107280579e-06, + "loss": 0.314, "step": 11550 }, { - "epoch": 1.693040293040293, - "grad_norm": 0.3537300132509414, - "learning_rate": 9.499824870472713e-06, - "loss": 0.3171, + "epoch": 1.7010157515089062, + "grad_norm": 0.3801212229524694, + "learning_rate": 9.406835572236181e-06, + "loss": 0.3162, "step": 11555 }, { - "epoch": 1.6937728937728938, - "grad_norm": 0.3485674140211883, - "learning_rate": 9.49131129502276e-06, - "loss": 0.3083, + "epoch": 1.701751803326954, + "grad_norm": 0.3582045818399246, + "learning_rate": 9.398286472271463e-06, + "loss": 0.3376, "step": 11560 }, { - "epoch": 1.6945054945054945, - "grad_norm": 0.36353765013873424, - "learning_rate": 9.482798089215857e-06, - "loss": 0.3313, + "epoch": 1.7024878551450022, + "grad_norm": 0.36865534925799104, + "learning_rate": 9.389737813657104e-06, + "loss": 0.3104, "step": 11565 }, { - "epoch": 1.6952380952380952, - "grad_norm": 0.3638997172794509, - "learning_rate": 9.474285259238207e-06, - "loss": 0.332, + "epoch": 1.7032239069630501, + "grad_norm": 0.37979748480494235, + "learning_rate": 9.381189602663446e-06, + "loss": 0.3168, "step": 11570 }, { - "epoch": 1.695970695970696, - "grad_norm": 0.3671892924647035, - "learning_rate": 9.465772811275731e-06, - "loss": 0.3239, + "epoch": 1.7039599587810983, + "grad_norm": 0.35547574158266904, + "learning_rate": 9.372641845560517e-06, + "loss": 0.3324, "step": 11575 }, { - "epoch": 1.6967032967032967, - "grad_norm": 0.3664783726017456, - "learning_rate": 9.457260751514062e-06, - "loss": 0.3307, + "epoch": 1.7046960105991462, + "grad_norm": 0.3735632819775801, + "learning_rate": 9.364094548617994e-06, + "loss": 0.3366, "step": 11580 }, { - "epoch": 1.6974358974358974, - "grad_norm": 0.3614158901243449, - "learning_rate": 9.448749086138577e-06, - "loss": 0.3066, + "epoch": 1.7054320624171941, + "grad_norm": 0.3633804896754463, + "learning_rate": 9.355547718105235e-06, + "loss": 0.3151, "step": 11585 }, { - "epoch": 1.6981684981684981, - "grad_norm": 0.36630132335180743, - "learning_rate": 9.44023782133434e-06, - "loss": 0.3172, + "epoch": 1.706168114235242, + "grad_norm": 0.36624531828629564, + "learning_rate": 9.347001360291248e-06, + "loss": 0.3193, "step": 11590 }, { - "epoch": 1.6989010989010989, - "grad_norm": 0.37453361569912347, - "learning_rate": 9.431726963286145e-06, - "loss": 0.3434, + "epoch": 1.7069041660532902, + "grad_norm": 0.3642397698832851, + "learning_rate": 9.338455481444687e-06, + "loss": 0.316, "step": 11595 }, { - "epoch": 1.6996336996336996, - "grad_norm": 0.3910068476298356, - "learning_rate": 9.423216518178474e-06, - "loss": 0.3227, + "epoch": 1.7076402178713381, + "grad_norm": 0.3544364277571668, + "learning_rate": 9.32991008783387e-06, + "loss": 0.3257, "step": 11600 }, { - "epoch": 1.7003663003663003, - "grad_norm": 0.355405058117633, - "learning_rate": 9.414706492195516e-06, - "loss": 0.3099, + "epoch": 1.7083762696893863, + "grad_norm": 0.360528483261268, + "learning_rate": 9.321365185726742e-06, + "loss": 0.3219, "step": 11605 }, { - "epoch": 1.701098901098901, - "grad_norm": 0.36483327355080997, - "learning_rate": 9.406196891521156e-06, - "loss": 0.324, + "epoch": 1.7091123215074342, + "grad_norm": 0.3602159048395578, + "learning_rate": 9.312820781390904e-06, + "loss": 0.3211, "step": 11610 }, { - "epoch": 1.7018315018315018, - "grad_norm": 0.3502827754750937, - "learning_rate": 9.397687722338972e-06, - "loss": 0.3161, + "epoch": 1.7098483733254821, + "grad_norm": 0.3634016704012471, + "learning_rate": 9.304276881093581e-06, + "loss": 0.3243, "step": 11615 }, { - "epoch": 1.7025641025641025, - "grad_norm": 0.3652283535458358, - "learning_rate": 9.38917899083222e-06, - "loss": 0.3171, + "epoch": 1.71058442514353, + "grad_norm": 0.3721367208511192, + "learning_rate": 9.295733491101627e-06, + "loss": 0.3302, "step": 11620 }, { - "epoch": 1.7032967032967035, - "grad_norm": 0.3592422843651675, - "learning_rate": 9.38067070318385e-06, - "loss": 0.3255, + "epoch": 1.711320476961578, + "grad_norm": 0.37734953264996884, + "learning_rate": 9.287190617681535e-06, + "loss": 0.3252, "step": 11625 }, { - "epoch": 1.7040293040293042, - "grad_norm": 0.36443238950693096, - "learning_rate": 9.372162865576475e-06, - "loss": 0.3262, + "epoch": 1.712056528779626, + "grad_norm": 0.3745177210233044, + "learning_rate": 9.278648267099401e-06, + "loss": 0.326, "step": 11630 }, { - "epoch": 1.704761904761905, - "grad_norm": 0.3560247837436095, - "learning_rate": 9.363655484192398e-06, - "loss": 0.3309, + "epoch": 1.712792580597674, + "grad_norm": 0.344599327114937, + "learning_rate": 9.270106445620955e-06, + "loss": 0.3032, "step": 11635 }, { - "epoch": 1.7054945054945057, - "grad_norm": 0.3625416445524746, - "learning_rate": 9.355148565213575e-06, - "loss": 0.3143, + "epoch": 1.7135286324157222, + "grad_norm": 0.36728269631926297, + "learning_rate": 9.26156515951152e-06, + "loss": 0.3106, "step": 11640 }, { - "epoch": 1.7062271062271064, - "grad_norm": 0.35517830591439836, - "learning_rate": 9.346642114821633e-06, - "loss": 0.3113, + "epoch": 1.71426468423377, + "grad_norm": 0.38568932462723304, + "learning_rate": 9.253024415036048e-06, + "loss": 0.3314, "step": 11645 }, { - "epoch": 1.7069597069597071, - "grad_norm": 0.3638641870666348, - "learning_rate": 9.33813613919786e-06, - "loss": 0.3084, + "epoch": 1.715000736051818, + "grad_norm": 0.35816029310772535, + "learning_rate": 9.244484218459076e-06, + "loss": 0.3153, "step": 11650 }, { - "epoch": 1.7076923076923078, - "grad_norm": 0.37051615835261886, - "learning_rate": 9.329630644523194e-06, - "loss": 0.3158, + "epoch": 1.715736787869866, + "grad_norm": 0.3675921115941209, + "learning_rate": 9.235944576044749e-06, + "loss": 0.3292, "step": 11655 }, { - "epoch": 1.7084249084249086, - "grad_norm": 0.34965271769287465, - "learning_rate": 9.32112563697823e-06, - "loss": 0.3383, + "epoch": 1.7164728396879139, + "grad_norm": 0.3681446675015252, + "learning_rate": 9.227405494056801e-06, + "loss": 0.3168, "step": 11660 }, { - "epoch": 1.7091575091575093, - "grad_norm": 0.38475360882844284, - "learning_rate": 9.312621122743204e-06, - "loss": 0.3254, + "epoch": 1.717208891505962, + "grad_norm": 0.37302842671111597, + "learning_rate": 9.218866978758552e-06, + "loss": 0.306, "step": 11665 }, { - "epoch": 1.70989010989011, - "grad_norm": 0.387561531309359, - "learning_rate": 9.304117107997993e-06, - "loss": 0.3261, + "epoch": 1.7179449433240102, + "grad_norm": 0.36980108376078813, + "learning_rate": 9.21032903641292e-06, + "loss": 0.3295, "step": 11670 }, { - "epoch": 1.7106227106227108, - "grad_norm": 0.3603233156524001, - "learning_rate": 9.295613598922118e-06, - "loss": 0.3316, + "epoch": 1.718680995142058, + "grad_norm": 0.38557337432235433, + "learning_rate": 9.201791673282382e-06, + "loss": 0.3162, "step": 11675 }, { - "epoch": 1.7113553113553115, - "grad_norm": 0.36202320390050996, - "learning_rate": 9.287110601694725e-06, - "loss": 0.3071, + "epoch": 1.719417046960106, + "grad_norm": 0.3481759959979618, + "learning_rate": 9.193254895629008e-06, + "loss": 0.3244, "step": 11680 }, { - "epoch": 1.7120879120879122, - "grad_norm": 0.37107975876343996, - "learning_rate": 9.278608122494583e-06, - "loss": 0.3062, + "epoch": 1.720153098778154, + "grad_norm": 0.3866550958650616, + "learning_rate": 9.184718709714434e-06, + "loss": 0.3291, "step": 11685 }, { - "epoch": 1.712820512820513, - "grad_norm": 0.3485488826316779, - "learning_rate": 9.270106167500104e-06, - "loss": 0.321, + "epoch": 1.7208891505962018, + "grad_norm": 0.3419138818206259, + "learning_rate": 9.176183121799855e-06, + "loss": 0.3125, "step": 11690 }, { - "epoch": 1.7135531135531137, - "grad_norm": 0.3438649627021723, - "learning_rate": 9.2616047428893e-06, - "loss": 0.334, + "epoch": 1.72162520241425, + "grad_norm": 0.37414788069335114, + "learning_rate": 9.167648138146034e-06, + "loss": 0.3473, "step": 11695 }, { - "epoch": 1.7142857142857144, - "grad_norm": 0.3601423798069909, - "learning_rate": 9.253103854839809e-06, - "loss": 0.3275, + "epoch": 1.722361254232298, + "grad_norm": 0.3630243483220065, + "learning_rate": 9.15911376501329e-06, + "loss": 0.3323, "step": 11700 }, { - "epoch": 1.7150183150183151, - "grad_norm": 0.4273422718384637, - "learning_rate": 9.244603509528872e-06, - "loss": 0.309, + "epoch": 1.723097306050346, + "grad_norm": 0.37503200070205966, + "learning_rate": 9.150580008661493e-06, + "loss": 0.3326, "step": 11705 }, { - "epoch": 1.7157509157509159, - "grad_norm": 0.35416457301507553, - "learning_rate": 9.236103713133338e-06, - "loss": 0.3335, + "epoch": 1.723833357868394, + "grad_norm": 0.34626640175721035, + "learning_rate": 9.142046875350057e-06, + "loss": 0.3097, "step": 11710 }, { - "epoch": 1.7164835164835166, - "grad_norm": 0.36121807433007413, - "learning_rate": 9.227604471829661e-06, - "loss": 0.3182, + "epoch": 1.724569409686442, + "grad_norm": 0.36312723593020796, + "learning_rate": 9.133514371337946e-06, + "loss": 0.317, "step": 11715 }, { - "epoch": 1.7172161172161173, - "grad_norm": 0.37011667956356065, - "learning_rate": 9.219105791793887e-06, - "loss": 0.3089, + "epoch": 1.7253054615044898, + "grad_norm": 0.3606522624501329, + "learning_rate": 9.124982502883665e-06, + "loss": 0.3124, "step": 11720 }, { - "epoch": 1.717948717948718, - "grad_norm": 0.35744838489747943, - "learning_rate": 9.210607679201655e-06, - "loss": 0.3202, + "epoch": 1.7260415133225377, + "grad_norm": 0.3533629263205742, + "learning_rate": 9.116451276245236e-06, + "loss": 0.3093, "step": 11725 }, { - "epoch": 1.7186813186813188, - "grad_norm": 0.3618705083407859, - "learning_rate": 9.202110140228195e-06, - "loss": 0.3204, + "epoch": 1.726777565140586, + "grad_norm": 0.4016205710576624, + "learning_rate": 9.107920697680233e-06, + "loss": 0.3313, "step": 11730 }, { - "epoch": 1.7194139194139195, - "grad_norm": 0.3767219311852911, - "learning_rate": 9.193613181048307e-06, - "loss": 0.3156, + "epoch": 1.727513616958634, + "grad_norm": 0.3519027384206317, + "learning_rate": 9.099390773445732e-06, + "loss": 0.3138, "step": 11735 }, { - "epoch": 1.7201465201465203, - "grad_norm": 0.36440152463634456, - "learning_rate": 9.185116807836396e-06, - "loss": 0.3236, + "epoch": 1.728249668776682, + "grad_norm": 0.3380942173230528, + "learning_rate": 9.090861509798346e-06, + "loss": 0.3183, "step": 11740 }, { - "epoch": 1.720879120879121, - "grad_norm": 0.34897844104693576, - "learning_rate": 9.17662102676641e-06, - "loss": 0.325, + "epoch": 1.7289857205947299, + "grad_norm": 0.3718209014692894, + "learning_rate": 9.082332912994197e-06, + "loss": 0.344, "step": 11745 }, { - "epoch": 1.7216117216117217, - "grad_norm": 0.3859538342940784, - "learning_rate": 9.168125844011894e-06, - "loss": 0.3365, + "epoch": 1.7297217724127778, + "grad_norm": 0.36591863526661805, + "learning_rate": 9.073804989288916e-06, + "loss": 0.3246, "step": 11750 }, { - "epoch": 1.7223443223443224, - "grad_norm": 0.38341833728345387, - "learning_rate": 9.15963126574594e-06, - "loss": 0.3243, + "epoch": 1.7304578242308257, + "grad_norm": 0.36798432127102254, + "learning_rate": 9.065277744937649e-06, + "loss": 0.3235, "step": 11755 }, { - "epoch": 1.7230769230769232, - "grad_norm": 0.3816267087520552, - "learning_rate": 9.151137298141202e-06, - "loss": 0.3296, + "epoch": 1.7311938760488739, + "grad_norm": 0.3667674320692904, + "learning_rate": 9.056751186195027e-06, + "loss": 0.3178, "step": 11760 }, { - "epoch": 1.723809523809524, - "grad_norm": 0.36212006407993647, - "learning_rate": 9.142643947369908e-06, - "loss": 0.3214, + "epoch": 1.7319299278669218, + "grad_norm": 0.3576490276811153, + "learning_rate": 9.0482253193152e-06, + "loss": 0.3211, "step": 11765 }, { - "epoch": 1.7245421245421246, - "grad_norm": 0.3664742867450757, - "learning_rate": 9.134151219603814e-06, - "loss": 0.3064, + "epoch": 1.73266597968497, + "grad_norm": 0.3536654300568951, + "learning_rate": 9.03970015055179e-06, + "loss": 0.3158, "step": 11770 }, { - "epoch": 1.7252747252747254, - "grad_norm": 0.3692896138491992, - "learning_rate": 9.125659121014238e-06, - "loss": 0.3225, + "epoch": 1.7334020315030179, + "grad_norm": 0.37498095373415735, + "learning_rate": 9.031175686157922e-06, + "loss": 0.325, "step": 11775 }, { - "epoch": 1.726007326007326, - "grad_norm": 0.34267349535358754, - "learning_rate": 9.11716765777204e-06, - "loss": 0.3242, + "epoch": 1.7341380833210658, + "grad_norm": 0.3617610370844727, + "learning_rate": 9.02265193238619e-06, + "loss": 0.3227, "step": 11780 }, { - "epoch": 1.7267399267399268, - "grad_norm": 0.3637841308342026, - "learning_rate": 9.108676836047607e-06, - "loss": 0.3089, + "epoch": 1.7348741351391137, + "grad_norm": 0.37322089530807573, + "learning_rate": 9.014128895488687e-06, + "loss": 0.3258, "step": 11785 }, { - "epoch": 1.7274725274725276, - "grad_norm": 0.3642777765178664, - "learning_rate": 9.10018666201088e-06, - "loss": 0.3132, + "epoch": 1.7356101869571616, + "grad_norm": 0.38040694565479116, + "learning_rate": 9.005606581716963e-06, + "loss": 0.3224, "step": 11790 }, { - "epoch": 1.7282051282051283, - "grad_norm": 0.35229523365361093, - "learning_rate": 9.09169714183131e-06, - "loss": 0.3117, + "epoch": 1.7363462387752098, + "grad_norm": 0.3656320697872988, + "learning_rate": 8.997084997322037e-06, + "loss": 0.2979, "step": 11795 }, { - "epoch": 1.728937728937729, - "grad_norm": 0.3826853747273903, - "learning_rate": 9.083208281677876e-06, - "loss": 0.3255, + "epoch": 1.737082290593258, + "grad_norm": 0.37893430632244535, + "learning_rate": 8.988564148554407e-06, + "loss": 0.3419, "step": 11800 }, { - "epoch": 1.7296703296703297, - "grad_norm": 0.3543829606636349, - "learning_rate": 9.074720087719092e-06, - "loss": 0.3141, + "epoch": 1.7378183424113058, + "grad_norm": 0.3341585420064362, + "learning_rate": 8.980044041664017e-06, + "loss": 0.3145, "step": 11805 }, { - "epoch": 1.7304029304029305, - "grad_norm": 0.3430715485759192, - "learning_rate": 9.06623256612297e-06, - "loss": 0.3104, + "epoch": 1.7385543942293538, + "grad_norm": 0.3738781275149642, + "learning_rate": 8.97152468290028e-06, + "loss": 0.3229, "step": 11810 }, { - "epoch": 1.7311355311355312, - "grad_norm": 0.3593795927418556, - "learning_rate": 9.057745723057047e-06, - "loss": 0.3098, + "epoch": 1.7392904460474017, + "grad_norm": 0.3634285542240195, + "learning_rate": 8.963006078512045e-06, + "loss": 0.3193, "step": 11815 }, { - "epoch": 1.731868131868132, - "grad_norm": 0.3613407364550061, - "learning_rate": 9.049259564688358e-06, - "loss": 0.3202, + "epoch": 1.7400264978654496, + "grad_norm": 0.3602021798762396, + "learning_rate": 8.954488234747623e-06, + "loss": 0.3019, "step": 11820 }, { - "epoch": 1.7326007326007327, - "grad_norm": 0.37682799720557586, - "learning_rate": 9.04077409718344e-06, - "loss": 0.3301, + "epoch": 1.7407625496834978, + "grad_norm": 0.3588648373572842, + "learning_rate": 8.945971157854759e-06, + "loss": 0.3155, "step": 11825 }, { - "epoch": 1.7333333333333334, - "grad_norm": 0.3620679460262053, - "learning_rate": 9.03228932670834e-06, - "loss": 0.3247, + "epoch": 1.7414986015015457, + "grad_norm": 0.3709253063204643, + "learning_rate": 8.937454854080634e-06, + "loss": 0.3135, "step": 11830 }, { - "epoch": 1.7340659340659341, - "grad_norm": 0.35903382088234326, - "learning_rate": 9.023805259428584e-06, - "loss": 0.3267, + "epoch": 1.7422346533195938, + "grad_norm": 0.3418584837920466, + "learning_rate": 8.928939329671869e-06, + "loss": 0.314, "step": 11835 }, { - "epoch": 1.7347985347985349, - "grad_norm": 0.3696872323845864, - "learning_rate": 9.01532190150919e-06, - "loss": 0.3238, + "epoch": 1.7429707051376417, + "grad_norm": 0.35383193781994043, + "learning_rate": 8.920424590874501e-06, + "loss": 0.323, "step": 11840 }, { - "epoch": 1.7355311355311356, - "grad_norm": 0.3625054588114784, - "learning_rate": 9.006839259114671e-06, - "loss": 0.3114, + "epoch": 1.7437067569556897, + "grad_norm": 0.3814977669249137, + "learning_rate": 8.911910643934009e-06, + "loss": 0.3269, "step": 11845 }, { - "epoch": 1.7362637362637363, - "grad_norm": 0.3959261330186957, - "learning_rate": 8.998357338409008e-06, - "loss": 0.3218, + "epoch": 1.7444428087737376, + "grad_norm": 0.3562890684910219, + "learning_rate": 8.90339749509527e-06, + "loss": 0.3197, "step": 11850 }, { - "epoch": 1.736996336996337, - "grad_norm": 0.3848786684433087, - "learning_rate": 8.989876145555664e-06, - "loss": 0.3185, + "epoch": 1.7451788605917855, + "grad_norm": 0.35410266884530084, + "learning_rate": 8.894885150602593e-06, + "loss": 0.3293, "step": 11855 }, { - "epoch": 1.7377289377289378, - "grad_norm": 0.36761605390162744, - "learning_rate": 8.98139568671757e-06, - "loss": 0.3149, + "epoch": 1.7459149124098337, + "grad_norm": 0.3562446742834736, + "learning_rate": 8.886373616699693e-06, + "loss": 0.3191, "step": 11860 }, { - "epoch": 1.7384615384615385, - "grad_norm": 0.3669698126387999, - "learning_rate": 8.972915968057126e-06, - "loss": 0.3141, + "epoch": 1.7466509642278818, + "grad_norm": 0.3520153963443219, + "learning_rate": 8.877862899629683e-06, + "loss": 0.3114, "step": 11865 }, { - "epoch": 1.7391941391941392, - "grad_norm": 0.3589977908981079, - "learning_rate": 8.964436995736192e-06, - "loss": 0.3278, + "epoch": 1.7473870160459297, + "grad_norm": 0.34841949956418056, + "learning_rate": 8.869353005635083e-06, + "loss": 0.3235, "step": 11870 }, { - "epoch": 1.73992673992674, - "grad_norm": 0.36174481925983926, - "learning_rate": 8.955958775916085e-06, - "loss": 0.3191, + "epoch": 1.7481230678639776, + "grad_norm": 0.3717679710206202, + "learning_rate": 8.860843940957808e-06, + "loss": 0.3228, "step": 11875 }, { - "epoch": 1.7406593406593407, - "grad_norm": 0.3637791217098147, - "learning_rate": 8.947481314757584e-06, - "loss": 0.3245, + "epoch": 1.7488591196820256, + "grad_norm": 0.36003777702495904, + "learning_rate": 8.852335711839163e-06, + "loss": 0.3264, "step": 11880 }, { - "epoch": 1.7413919413919414, - "grad_norm": 0.35930403580444054, - "learning_rate": 8.939004618420904e-06, - "loss": 0.324, + "epoch": 1.7495951715000735, + "grad_norm": 0.37197546550778643, + "learning_rate": 8.843828324519848e-06, + "loss": 0.3265, "step": 11885 }, { - "epoch": 1.7421245421245422, - "grad_norm": 0.3613932801966608, - "learning_rate": 8.930528693065705e-06, - "loss": 0.3003, + "epoch": 1.7503312233181216, + "grad_norm": 0.3618537335555118, + "learning_rate": 8.835321785239933e-06, + "loss": 0.318, "step": 11890 }, { - "epoch": 1.7428571428571429, - "grad_norm": 0.35673925437381887, - "learning_rate": 8.922053544851104e-06, - "loss": 0.3261, + "epoch": 1.7510672751361696, + "grad_norm": 0.3588009092977853, + "learning_rate": 8.826816100238877e-06, + "loss": 0.3242, "step": 11895 }, { - "epoch": 1.7435897435897436, - "grad_norm": 0.3696486051991411, - "learning_rate": 8.913579179935632e-06, - "loss": 0.3016, + "epoch": 1.7518033269542177, + "grad_norm": 0.3517401747717276, + "learning_rate": 8.818311275755503e-06, + "loss": 0.327, "step": 11900 }, { - "epoch": 1.7443223443223443, - "grad_norm": 0.369575499643484, - "learning_rate": 8.905105604477259e-06, - "loss": 0.3193, + "epoch": 1.7525393787722656, + "grad_norm": 0.35204195451593884, + "learning_rate": 8.809807318028014e-06, + "loss": 0.3181, "step": 11905 }, { - "epoch": 1.745054945054945, - "grad_norm": 0.35873594091365585, - "learning_rate": 8.896632824633388e-06, - "loss": 0.3131, + "epoch": 1.7532754305903135, + "grad_norm": 0.3741922261189321, + "learning_rate": 8.801304233293966e-06, + "loss": 0.3193, "step": 11910 }, { - "epoch": 1.7457875457875458, - "grad_norm": 0.36175872599773284, - "learning_rate": 8.888160846560831e-06, - "loss": 0.3375, + "epoch": 1.7540114824083615, + "grad_norm": 0.3420225028684218, + "learning_rate": 8.792802027790277e-06, + "loss": 0.3098, "step": 11915 }, { - "epoch": 1.7465201465201465, - "grad_norm": 0.3535551741500669, - "learning_rate": 8.879689676415831e-06, - "loss": 0.3223, + "epoch": 1.7547475342264094, + "grad_norm": 0.3584657902915247, + "learning_rate": 8.784300707753233e-06, + "loss": 0.3108, "step": 11920 }, { - "epoch": 1.7472527472527473, - "grad_norm": 0.3587953450284838, - "learning_rate": 8.871219320354035e-06, - "loss": 0.3126, + "epoch": 1.7554835860444575, + "grad_norm": 0.3626945700051613, + "learning_rate": 8.775800279418448e-06, + "loss": 0.3323, "step": 11925 }, { - "epoch": 1.747985347985348, - "grad_norm": 0.3728671882691343, - "learning_rate": 8.862749784530496e-06, - "loss": 0.336, + "epoch": 1.7562196378625057, + "grad_norm": 0.359525665805917, + "learning_rate": 8.767300749020903e-06, + "loss": 0.3166, "step": 11930 }, { - "epoch": 1.7487179487179487, - "grad_norm": 0.352639816244091, - "learning_rate": 8.854281075099682e-06, - "loss": 0.3137, + "epoch": 1.7569556896805536, + "grad_norm": 0.3824743319272481, + "learning_rate": 8.758802122794905e-06, + "loss": 0.3222, "step": 11935 }, { - "epoch": 1.7494505494505495, - "grad_norm": 0.3629683606087605, - "learning_rate": 8.845813198215447e-06, - "loss": 0.3089, + "epoch": 1.7576917414986015, + "grad_norm": 0.36741441504023997, + "learning_rate": 8.750304406974112e-06, + "loss": 0.3238, "step": 11940 }, { - "epoch": 1.7501831501831502, - "grad_norm": 0.3629504487027256, - "learning_rate": 8.837346160031058e-06, - "loss": 0.3218, + "epoch": 1.7584277933166494, + "grad_norm": 0.41256846537416036, + "learning_rate": 8.741807607791499e-06, + "loss": 0.323, "step": 11945 }, { - "epoch": 1.750915750915751, - "grad_norm": 0.3700814012190313, - "learning_rate": 8.828879966699154e-06, - "loss": 0.3123, + "epoch": 1.7591638451346974, + "grad_norm": 0.3730241749912984, + "learning_rate": 8.733311731479381e-06, + "loss": 0.3208, "step": 11950 }, { - "epoch": 1.7516483516483516, - "grad_norm": 0.3498093037693907, - "learning_rate": 8.820414624371764e-06, - "loss": 0.3057, + "epoch": 1.7598998969527455, + "grad_norm": 0.36336133028175893, + "learning_rate": 8.72481678426939e-06, + "loss": 0.3129, "step": 11955 }, { - "epoch": 1.7523809523809524, - "grad_norm": 0.3419278360394549, - "learning_rate": 8.811950139200312e-06, - "loss": 0.3249, + "epoch": 1.7606359487707934, + "grad_norm": 0.36831929539521435, + "learning_rate": 8.716322772392475e-06, + "loss": 0.3295, "step": 11960 }, { - "epoch": 1.753113553113553, - "grad_norm": 0.36547061947732895, - "learning_rate": 8.803486517335582e-06, - "loss": 0.3092, + "epoch": 1.7613720005888416, + "grad_norm": 0.34778430647164144, + "learning_rate": 8.707829702078909e-06, + "loss": 0.3094, "step": 11965 }, { - "epoch": 1.7538461538461538, - "grad_norm": 0.3796888728609283, - "learning_rate": 8.79502376492774e-06, - "loss": 0.3167, + "epoch": 1.7621080524068895, + "grad_norm": 0.3512160561283258, + "learning_rate": 8.699337579558256e-06, + "loss": 0.3168, "step": 11970 }, { - "epoch": 1.7545787545787546, - "grad_norm": 0.35939786416676966, - "learning_rate": 8.786561888126322e-06, - "loss": 0.3181, + "epoch": 1.7628441042249374, + "grad_norm": 0.36281175964620516, + "learning_rate": 8.690846411059406e-06, + "loss": 0.3068, "step": 11975 }, { - "epoch": 1.7553113553113553, - "grad_norm": 0.3746500651417673, - "learning_rate": 8.778100893080218e-06, - "loss": 0.3123, + "epoch": 1.7635801560429853, + "grad_norm": 0.3557135443955918, + "learning_rate": 8.682356202810533e-06, + "loss": 0.322, "step": 11980 }, { - "epoch": 1.756043956043956, - "grad_norm": 0.3988165668389479, - "learning_rate": 8.76964078593769e-06, - "loss": 0.3289, + "epoch": 1.7643162078610333, + "grad_norm": 0.3862860265518121, + "learning_rate": 8.673866961039113e-06, + "loss": 0.326, "step": 11985 }, { - "epoch": 1.7567765567765568, - "grad_norm": 0.378046991149681, - "learning_rate": 8.761181572846346e-06, - "loss": 0.3226, + "epoch": 1.7650522596790814, + "grad_norm": 0.36122095809919136, + "learning_rate": 8.665378691971921e-06, + "loss": 0.3188, "step": 11990 }, { - "epoch": 1.7575091575091575, - "grad_norm": 0.3573243416450657, - "learning_rate": 8.752723259953139e-06, - "loss": 0.2911, + "epoch": 1.7657883114971296, + "grad_norm": 0.38796255344158376, + "learning_rate": 8.656891401835e-06, + "loss": 0.31, "step": 11995 }, { - "epoch": 1.7582417582417582, - "grad_norm": 0.3500972735399683, - "learning_rate": 8.744265853404388e-06, - "loss": 0.3132, + "epoch": 1.7665243633151775, + "grad_norm": 0.3826508403570477, + "learning_rate": 8.648405096853693e-06, + "loss": 0.3252, "step": 12000 }, { - "epoch": 1.758974358974359, - "grad_norm": 0.3707317801737707, - "learning_rate": 8.735809359345728e-06, - "loss": 0.3241, + "epoch": 1.7672604151332254, + "grad_norm": 0.3579974076454023, + "learning_rate": 8.639919783252607e-06, + "loss": 0.3112, "step": 12005 }, { - "epoch": 1.7597069597069597, - "grad_norm": 0.38081215733081497, - "learning_rate": 8.727353783922157e-06, - "loss": 0.3282, + "epoch": 1.7679964669512733, + "grad_norm": 0.3760365883238129, + "learning_rate": 8.631435467255633e-06, + "loss": 0.3143, "step": 12010 }, { - "epoch": 1.7604395604395604, - "grad_norm": 0.3799190748276812, - "learning_rate": 8.718899133277982e-06, - "loss": 0.3206, + "epoch": 1.7687325187693212, + "grad_norm": 0.36629558996385087, + "learning_rate": 8.622952155085916e-06, + "loss": 0.3162, "step": 12015 }, { - "epoch": 1.7611721611721611, - "grad_norm": 0.3898396143724759, - "learning_rate": 8.710445413556849e-06, - "loss": 0.3107, + "epoch": 1.7694685705873694, + "grad_norm": 0.3675236452603656, + "learning_rate": 8.614469852965882e-06, + "loss": 0.3236, "step": 12020 }, { - "epoch": 1.7619047619047619, - "grad_norm": 0.35235777614661096, - "learning_rate": 8.701992630901734e-06, - "loss": 0.3001, + "epoch": 1.7702046224054173, + "grad_norm": 0.38324532629745844, + "learning_rate": 8.605988567117209e-06, + "loss": 0.3252, "step": 12025 }, { - "epoch": 1.7626373626373626, - "grad_norm": 0.36150924715042293, - "learning_rate": 8.693540791454918e-06, - "loss": 0.3128, + "epoch": 1.7709406742234655, + "grad_norm": 0.35252116350935897, + "learning_rate": 8.59750830376082e-06, + "loss": 0.314, "step": 12030 }, { - "epoch": 1.7633699633699633, - "grad_norm": 0.36502636344229555, - "learning_rate": 8.685089901358e-06, - "loss": 0.3133, + "epoch": 1.7716767260415134, + "grad_norm": 0.36930180379288086, + "learning_rate": 8.589029069116906e-06, + "loss": 0.3265, "step": 12035 }, { - "epoch": 1.764102564102564, - "grad_norm": 0.3763387916193856, - "learning_rate": 8.676639966751906e-06, - "loss": 0.3213, + "epoch": 1.7724127778595613, + "grad_norm": 0.35890515558951985, + "learning_rate": 8.580550869404885e-06, + "loss": 0.3079, "step": 12040 }, { - "epoch": 1.7648351648351648, - "grad_norm": 0.3657571774478381, - "learning_rate": 8.668190993776839e-06, - "loss": 0.3071, + "epoch": 1.7731488296776092, + "grad_norm": 0.3532222003117065, + "learning_rate": 8.57207371084343e-06, + "loss": 0.3176, "step": 12045 }, { - "epoch": 1.7655677655677655, - "grad_norm": 0.36736547172165485, - "learning_rate": 8.65974298857233e-06, - "loss": 0.3133, + "epoch": 1.7738848814956572, + "grad_norm": 0.38119816410804086, + "learning_rate": 8.563597599650443e-06, + "loss": 0.3038, "step": 12050 }, { - "epoch": 1.7663003663003662, - "grad_norm": 0.35536369325449246, - "learning_rate": 8.65129595727719e-06, - "loss": 0.3114, + "epoch": 1.7746209333137053, + "grad_norm": 0.37005561562251305, + "learning_rate": 8.55512254204306e-06, + "loss": 0.3213, "step": 12055 }, { - "epoch": 1.767032967032967, - "grad_norm": 0.353251646692839, - "learning_rate": 8.642849906029524e-06, - "loss": 0.3229, + "epoch": 1.7753569851317534, + "grad_norm": 0.3584610558736543, + "learning_rate": 8.546648544237646e-06, + "loss": 0.3338, "step": 12060 }, { - "epoch": 1.7677655677655677, - "grad_norm": 0.35091542948003573, - "learning_rate": 8.634404840966735e-06, - "loss": 0.316, + "epoch": 1.7760930369498014, + "grad_norm": 0.3626234503128022, + "learning_rate": 8.538175612449782e-06, + "loss": 0.315, "step": 12065 }, { - "epoch": 1.7684981684981684, - "grad_norm": 0.3632197737658276, - "learning_rate": 8.625960768225497e-06, - "loss": 0.3146, + "epoch": 1.7768290887678493, + "grad_norm": 0.3488058930300539, + "learning_rate": 8.529703752894279e-06, + "loss": 0.3115, "step": 12070 }, { - "epoch": 1.7692307692307692, - "grad_norm": 0.3711999092792661, - "learning_rate": 8.617517693941774e-06, - "loss": 0.3076, + "epoch": 1.7775651405858972, + "grad_norm": 0.37069769076875203, + "learning_rate": 8.521232971785144e-06, + "loss": 0.3269, "step": 12075 }, { - "epoch": 1.76996336996337, - "grad_norm": 0.3717352113676794, - "learning_rate": 8.609075624250797e-06, - "loss": 0.3061, + "epoch": 1.7783011924039451, + "grad_norm": 0.3370641432737643, + "learning_rate": 8.512763275335613e-06, + "loss": 0.32, "step": 12080 }, { - "epoch": 1.7706959706959706, - "grad_norm": 0.3466586631315521, - "learning_rate": 8.600634565287068e-06, - "loss": 0.3139, + "epoch": 1.7790372442219933, + "grad_norm": 0.36673057430849193, + "learning_rate": 8.504294669758112e-06, + "loss": 0.3164, "step": 12085 }, { - "epoch": 1.7714285714285714, - "grad_norm": 0.3707281491997957, - "learning_rate": 8.592194523184358e-06, - "loss": 0.3212, + "epoch": 1.7797732960400412, + "grad_norm": 0.35165694757146687, + "learning_rate": 8.49582716126427e-06, + "loss": 0.3221, "step": 12090 }, { - "epoch": 1.772161172161172, - "grad_norm": 0.378197663128552, - "learning_rate": 8.583755504075691e-06, - "loss": 0.3215, + "epoch": 1.7805093478580893, + "grad_norm": 0.3538281398166754, + "learning_rate": 8.487360756064919e-06, + "loss": 0.3084, "step": 12095 }, { - "epoch": 1.7728937728937728, - "grad_norm": 0.362223401095478, - "learning_rate": 8.575317514093361e-06, - "loss": 0.3154, + "epoch": 1.7812453996761373, + "grad_norm": 0.36598785212209156, + "learning_rate": 8.478895460370066e-06, + "loss": 0.3116, "step": 12100 }, { - "epoch": 1.7736263736263735, - "grad_norm": 0.37039390221107055, - "learning_rate": 8.566880559368904e-06, - "loss": 0.3177, + "epoch": 1.7819814514941852, + "grad_norm": 0.3924230807528141, + "learning_rate": 8.470431280388923e-06, + "loss": 0.3189, "step": 12105 }, { - "epoch": 1.7743589743589743, - "grad_norm": 0.3493911690566796, - "learning_rate": 8.5584446460331e-06, - "loss": 0.3005, + "epoch": 1.782717503312233, + "grad_norm": 0.3708815885384361, + "learning_rate": 8.461968222329867e-06, + "loss": 0.3193, "step": 12110 }, { - "epoch": 1.775091575091575, - "grad_norm": 0.3624882466553709, - "learning_rate": 8.550009780215989e-06, - "loss": 0.3241, + "epoch": 1.783453555130281, + "grad_norm": 0.362426670862962, + "learning_rate": 8.453506292400467e-06, + "loss": 0.3257, "step": 12115 }, { - "epoch": 1.7758241758241757, - "grad_norm": 0.3483831840108606, - "learning_rate": 8.541575968046833e-06, - "loss": 0.3129, + "epoch": 1.7841896069483292, + "grad_norm": 0.378441819659184, + "learning_rate": 8.445045496807449e-06, + "loss": 0.3245, "step": 12120 }, { - "epoch": 1.7765567765567765, - "grad_norm": 0.35044635499766985, - "learning_rate": 8.533143215654133e-06, - "loss": 0.3081, + "epoch": 1.784925658766377, + "grad_norm": 0.36617807113448303, + "learning_rate": 8.436585841756722e-06, + "loss": 0.3237, "step": 12125 }, { - "epoch": 1.7772893772893772, - "grad_norm": 0.35353472094648314, - "learning_rate": 8.524711529165626e-06, - "loss": 0.3267, + "epoch": 1.7856617105844252, + "grad_norm": 0.34036036676664333, + "learning_rate": 8.428127333453348e-06, + "loss": 0.3131, "step": 12130 }, { - "epoch": 1.778021978021978, - "grad_norm": 0.342653777236269, - "learning_rate": 8.516280914708266e-06, - "loss": 0.3096, + "epoch": 1.7863977624024732, + "grad_norm": 0.3542528845990886, + "learning_rate": 8.419669978101548e-06, + "loss": 0.3072, "step": 12135 }, { - "epoch": 1.7787545787545787, - "grad_norm": 0.37301413986706805, - "learning_rate": 8.507851378408237e-06, - "loss": 0.3192, + "epoch": 1.787133814220521, + "grad_norm": 0.37214659955252083, + "learning_rate": 8.411213781904708e-06, + "loss": 0.3093, "step": 12140 }, { - "epoch": 1.7794871794871794, - "grad_norm": 0.377140992048411, - "learning_rate": 8.499422926390934e-06, - "loss": 0.2989, + "epoch": 1.787869866038569, + "grad_norm": 0.3636326726321218, + "learning_rate": 8.402758751065348e-06, + "loss": 0.3148, "step": 12145 }, { - "epoch": 1.7802197802197801, - "grad_norm": 0.36740624146539635, - "learning_rate": 8.490995564780956e-06, - "loss": 0.32, + "epoch": 1.7886059178566172, + "grad_norm": 0.36761872027165726, + "learning_rate": 8.394304891785146e-06, + "loss": 0.3099, "step": 12150 }, { - "epoch": 1.7809523809523808, - "grad_norm": 0.37427727435317387, - "learning_rate": 8.482569299702133e-06, - "loss": 0.3299, + "epoch": 1.789341969674665, + "grad_norm": 0.36991402909033944, + "learning_rate": 8.38585221026492e-06, + "loss": 0.3007, "step": 12155 }, { - "epoch": 1.7816849816849816, - "grad_norm": 0.34890657708463696, - "learning_rate": 8.474144137277467e-06, - "loss": 0.3224, + "epoch": 1.7900780214927132, + "grad_norm": 0.35490913152173836, + "learning_rate": 8.377400712704615e-06, + "loss": 0.303, "step": 12160 }, { - "epoch": 1.7824175824175823, - "grad_norm": 0.3627739903636609, - "learning_rate": 8.46572008362919e-06, - "loss": 0.3171, + "epoch": 1.7908140733107611, + "grad_norm": 0.37416797974475163, + "learning_rate": 8.368950405303313e-06, + "loss": 0.3185, "step": 12165 }, { - "epoch": 1.783150183150183, - "grad_norm": 0.3520214269699448, - "learning_rate": 8.457297144878707e-06, - "loss": 0.2979, + "epoch": 1.791550125128809, + "grad_norm": 0.3690968491397019, + "learning_rate": 8.360501294259226e-06, + "loss": 0.3245, "step": 12170 }, { - "epoch": 1.7838827838827838, - "grad_norm": 0.3284681340561394, - "learning_rate": 8.448875327146616e-06, - "loss": 0.3259, + "epoch": 1.792286176946857, + "grad_norm": 0.3777750945068548, + "learning_rate": 8.352053385769684e-06, + "loss": 0.322, "step": 12175 }, { - "epoch": 1.7846153846153845, - "grad_norm": 0.37039478931971476, - "learning_rate": 8.440454636552711e-06, - "loss": 0.3213, + "epoch": 1.793022228764905, + "grad_norm": 0.3522470081324157, + "learning_rate": 8.343606686031134e-06, + "loss": 0.3299, "step": 12180 }, { - "epoch": 1.7853479853479852, - "grad_norm": 0.35992801895165194, - "learning_rate": 8.432035079215955e-06, - "loss": 0.3109, + "epoch": 1.793758280582953, + "grad_norm": 0.34710180852617534, + "learning_rate": 8.335161201239139e-06, + "loss": 0.3135, "step": 12185 }, { - "epoch": 1.786080586080586, - "grad_norm": 0.3678153677791269, - "learning_rate": 8.423616661254492e-06, - "loss": 0.3083, + "epoch": 1.794494332401001, + "grad_norm": 0.38013564054672294, + "learning_rate": 8.326716937588377e-06, + "loss": 0.3006, "step": 12190 }, { - "epoch": 1.7868131868131867, - "grad_norm": 0.36959737300887124, - "learning_rate": 8.415199388785644e-06, - "loss": 0.3233, + "epoch": 1.7952303842190491, + "grad_norm": 0.36667626878510645, + "learning_rate": 8.318273901272615e-06, + "loss": 0.317, "step": 12195 }, { - "epoch": 1.7875457875457874, - "grad_norm": 0.33852252301682484, - "learning_rate": 8.406783267925885e-06, - "loss": 0.3119, + "epoch": 1.795966436037097, + "grad_norm": 0.34718088134615965, + "learning_rate": 8.309832098484738e-06, + "loss": 0.3026, "step": 12200 }, { - "epoch": 1.7882783882783881, - "grad_norm": 0.35094676778190215, - "learning_rate": 8.398368304790876e-06, - "loss": 0.3237, + "epoch": 1.796702487855145, + "grad_norm": 0.3598725298892135, + "learning_rate": 8.301391535416707e-06, + "loss": 0.3124, "step": 12205 }, { - "epoch": 1.7890109890109889, - "grad_norm": 0.35305394428075426, - "learning_rate": 8.38995450549541e-06, - "loss": 0.3293, + "epoch": 1.797438539673193, + "grad_norm": 0.3589121979620242, + "learning_rate": 8.292952218259593e-06, + "loss": 0.3137, "step": 12210 }, { - "epoch": 1.7897435897435896, - "grad_norm": 0.3681227248765676, - "learning_rate": 8.381541876153452e-06, - "loss": 0.3181, + "epoch": 1.798174591491241, + "grad_norm": 0.356684416404285, + "learning_rate": 8.284514153203539e-06, + "loss": 0.3191, "step": 12215 }, { - "epoch": 1.7904761904761903, - "grad_norm": 0.3733700036678804, - "learning_rate": 8.373130422878112e-06, - "loss": 0.3153, + "epoch": 1.798910643309289, + "grad_norm": 0.3739630741271687, + "learning_rate": 8.27607734643777e-06, + "loss": 0.3224, "step": 12220 }, { - "epoch": 1.791208791208791, - "grad_norm": 0.36975361792702527, - "learning_rate": 8.364720151781646e-06, - "loss": 0.3324, + "epoch": 1.799646695127337, + "grad_norm": 0.3618864468446184, + "learning_rate": 8.267641804150604e-06, + "loss": 0.325, "step": 12225 - }, - { - "epoch": 1.7919413919413918, - "grad_norm": 0.3763464818047654, - "learning_rate": 8.35631106897545e-06, - "loss": 0.3307, - "step": 12230 - }, - { - "epoch": 1.7926739926739925, - "grad_norm": 0.36270037639646124, - "learning_rate": 8.347903180570059e-06, - "loss": 0.3166, - "step": 12235 - }, - { - "epoch": 1.7934065934065933, - "grad_norm": 0.37520757617123063, - "learning_rate": 8.339496492675131e-06, - "loss": 0.3126, - "step": 12240 - }, - { - "epoch": 1.794139194139194, - "grad_norm": 0.371521615358303, - "learning_rate": 8.331091011399465e-06, - "loss": 0.3183, - "step": 12245 - }, - { - "epoch": 1.7948717948717947, - "grad_norm": 0.37233301877276737, - "learning_rate": 8.32268674285098e-06, - "loss": 0.3308, - "step": 12250 - }, - { - "epoch": 1.7956043956043954, - "grad_norm": 0.3527802399491386, - "learning_rate": 8.314283693136698e-06, - "loss": 0.2959, - "step": 12255 - }, - { - "epoch": 1.7963369963369962, - "grad_norm": 0.36589424525701286, - "learning_rate": 8.305881868362781e-06, - "loss": 0.3092, - "step": 12260 - }, - { - "epoch": 1.7970695970695971, - "grad_norm": 0.37079766823485905, - "learning_rate": 8.29748127463448e-06, - "loss": 0.3257, - "step": 12265 - }, - { - "epoch": 1.7978021978021979, - "grad_norm": 0.36814513089002854, - "learning_rate": 8.289081918056163e-06, - "loss": 0.3201, - "step": 12270 - }, - { - "epoch": 1.7985347985347986, - "grad_norm": 0.39298767808091684, - "learning_rate": 8.280683804731294e-06, - "loss": 0.3186, - "step": 12275 - }, - { - "epoch": 1.7992673992673993, - "grad_norm": 0.36487639427802976, - "learning_rate": 8.27228694076243e-06, - "loss": 0.3142, - "step": 12280 - }, - { - "epoch": 1.8, - "grad_norm": 0.35845251340553863, - "learning_rate": 8.263891332251234e-06, - "loss": 0.3308, - "step": 12285 } ], "logging_steps": 5, - "max_steps": 20475, + "max_steps": 20379, "num_input_tokens_seen": 0, "num_train_epochs": 3, - "save_steps": 2048, + "save_steps": 2038, "stateful_callbacks": { "TrainerControl": { "args": { @@ -17240,7 +17156,7 @@ "attributes": {} } }, - "total_flos": 1.0331011254583296e+16, + "total_flos": 1.0280566863691776e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null