{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0353619669145546, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030353619669145547, "grad_norm": 9.667811393737793, "learning_rate": 1e-05, "loss": 5.0202, "step": 1 }, { "epoch": 0.0006070723933829109, "grad_norm": 10.303421974182129, "learning_rate": 2e-05, "loss": 4.7469, "step": 2 }, { "epoch": 0.0009106085900743664, "grad_norm": 7.488056182861328, "learning_rate": 3e-05, "loss": 5.0105, "step": 3 }, { "epoch": 0.0012141447867658219, "grad_norm": 4.885837078094482, "learning_rate": 4e-05, "loss": 4.3945, "step": 4 }, { "epoch": 0.0015176809834572772, "grad_norm": 3.793656587600708, "learning_rate": 5e-05, "loss": 4.0574, "step": 5 }, { "epoch": 0.0018212171801487327, "grad_norm": 3.9249916076660156, "learning_rate": 6e-05, "loss": 3.8179, "step": 6 }, { "epoch": 0.002124753376840188, "grad_norm": 3.4937145709991455, "learning_rate": 7e-05, "loss": 3.5297, "step": 7 }, { "epoch": 0.0024282895735316438, "grad_norm": 2.499041795730591, "learning_rate": 8e-05, "loss": 3.15, "step": 8 }, { "epoch": 0.002731825770223099, "grad_norm": 2.0781290531158447, "learning_rate": 9e-05, "loss": 2.8658, "step": 9 }, { "epoch": 0.0030353619669145544, "grad_norm": 2.0124764442443848, "learning_rate": 0.0001, "loss": 2.6826, "step": 10 }, { "epoch": 0.00333889816360601, "grad_norm": 1.4209256172180176, "learning_rate": 9.99949377341298e-05, "loss": 2.5608, "step": 11 }, { "epoch": 0.0036424343602974654, "grad_norm": 3.176084041595459, "learning_rate": 9.99898754682596e-05, "loss": 2.2416, "step": 12 }, { "epoch": 0.003945970556988921, "grad_norm": 1.4457614421844482, "learning_rate": 9.998481320238939e-05, "loss": 2.1925, "step": 13 }, { "epoch": 0.004249506753680376, "grad_norm": 1.3989348411560059, "learning_rate": 9.997975093651918e-05, "loss": 2.2165, "step": 14 }, { "epoch": 0.004553042950371832, "grad_norm": 1.0647027492523193, "learning_rate": 9.997468867064899e-05, "loss": 2.3486, "step": 15 }, { "epoch": 0.0048565791470632875, "grad_norm": 1.0246940851211548, "learning_rate": 9.996962640477879e-05, "loss": 2.19, "step": 16 }, { "epoch": 0.005160115343754742, "grad_norm": 1.029646873474121, "learning_rate": 9.996456413890858e-05, "loss": 2.4052, "step": 17 }, { "epoch": 0.005463651540446198, "grad_norm": 1.322654128074646, "learning_rate": 9.995950187303838e-05, "loss": 2.1927, "step": 18 }, { "epoch": 0.005767187737137654, "grad_norm": 2.061326026916504, "learning_rate": 9.995443960716817e-05, "loss": 2.4574, "step": 19 }, { "epoch": 0.006070723933829109, "grad_norm": 1.1343607902526855, "learning_rate": 9.994937734129797e-05, "loss": 1.9598, "step": 20 }, { "epoch": 0.0063742601305205645, "grad_norm": 1.13712477684021, "learning_rate": 9.994431507542776e-05, "loss": 2.8643, "step": 21 }, { "epoch": 0.00667779632721202, "grad_norm": 0.8220421671867371, "learning_rate": 9.993925280955756e-05, "loss": 2.0474, "step": 22 }, { "epoch": 0.006981332523903475, "grad_norm": 0.8233473300933838, "learning_rate": 9.993419054368735e-05, "loss": 2.3597, "step": 23 }, { "epoch": 0.007284868720594931, "grad_norm": 0.8661925196647644, "learning_rate": 9.992912827781716e-05, "loss": 2.2163, "step": 24 }, { "epoch": 0.007588404917286387, "grad_norm": 0.7995729446411133, "learning_rate": 9.992406601194695e-05, "loss": 1.8051, "step": 25 }, { "epoch": 0.007891941113977842, "grad_norm": 0.810165286064148, "learning_rate": 9.991900374607675e-05, "loss": 1.9189, "step": 26 }, { "epoch": 0.008195477310669297, "grad_norm": 0.8240752220153809, "learning_rate": 9.991394148020654e-05, "loss": 1.7, "step": 27 }, { "epoch": 0.008499013507360752, "grad_norm": 1.0160635709762573, "learning_rate": 9.990887921433634e-05, "loss": 2.2964, "step": 28 }, { "epoch": 0.008802549704052209, "grad_norm": 0.794966995716095, "learning_rate": 9.990381694846613e-05, "loss": 1.7333, "step": 29 }, { "epoch": 0.009106085900743664, "grad_norm": 0.5594797134399414, "learning_rate": 9.989875468259593e-05, "loss": 2.0925, "step": 30 }, { "epoch": 0.009409622097435118, "grad_norm": 0.8100740909576416, "learning_rate": 9.989369241672572e-05, "loss": 2.1218, "step": 31 }, { "epoch": 0.009713158294126575, "grad_norm": 0.7057996392250061, "learning_rate": 9.988863015085552e-05, "loss": 2.005, "step": 32 }, { "epoch": 0.01001669449081803, "grad_norm": 0.8970999121665955, "learning_rate": 9.988356788498533e-05, "loss": 2.2414, "step": 33 }, { "epoch": 0.010320230687509485, "grad_norm": 0.6290627717971802, "learning_rate": 9.987850561911512e-05, "loss": 2.2422, "step": 34 }, { "epoch": 0.010623766884200941, "grad_norm": 0.5665722489356995, "learning_rate": 9.987344335324492e-05, "loss": 1.9342, "step": 35 }, { "epoch": 0.010927303080892396, "grad_norm": 0.5792561173439026, "learning_rate": 9.986838108737472e-05, "loss": 1.8733, "step": 36 }, { "epoch": 0.011230839277583851, "grad_norm": 0.5264159440994263, "learning_rate": 9.986331882150452e-05, "loss": 2.1739, "step": 37 }, { "epoch": 0.011534375474275308, "grad_norm": 0.5069584250450134, "learning_rate": 9.985825655563431e-05, "loss": 1.6235, "step": 38 }, { "epoch": 0.011837911670966763, "grad_norm": 0.7689110636711121, "learning_rate": 9.985319428976411e-05, "loss": 1.711, "step": 39 }, { "epoch": 0.012141447867658217, "grad_norm": 0.7001574635505676, "learning_rate": 9.98481320238939e-05, "loss": 1.651, "step": 40 }, { "epoch": 0.012444984064349674, "grad_norm": 0.5615801811218262, "learning_rate": 9.98430697580237e-05, "loss": 2.128, "step": 41 }, { "epoch": 0.012748520261041129, "grad_norm": 0.8766308426856995, "learning_rate": 9.983800749215349e-05, "loss": 2.4421, "step": 42 }, { "epoch": 0.013052056457732584, "grad_norm": 0.704547107219696, "learning_rate": 9.983294522628329e-05, "loss": 1.6921, "step": 43 }, { "epoch": 0.01335559265442404, "grad_norm": 0.5749143362045288, "learning_rate": 9.982788296041308e-05, "loss": 2.0173, "step": 44 }, { "epoch": 0.013659128851115495, "grad_norm": 0.7929263710975647, "learning_rate": 9.982282069454289e-05, "loss": 2.1755, "step": 45 }, { "epoch": 0.01396266504780695, "grad_norm": 1.6391934156417847, "learning_rate": 9.981775842867269e-05, "loss": 2.4995, "step": 46 }, { "epoch": 0.014266201244498407, "grad_norm": 0.49616461992263794, "learning_rate": 9.981269616280248e-05, "loss": 2.3363, "step": 47 }, { "epoch": 0.014569737441189862, "grad_norm": 0.614272952079773, "learning_rate": 9.980763389693227e-05, "loss": 2.0277, "step": 48 }, { "epoch": 0.014873273637881317, "grad_norm": 0.6181132197380066, "learning_rate": 9.980257163106207e-05, "loss": 2.2867, "step": 49 }, { "epoch": 0.015176809834572773, "grad_norm": 0.5342630743980408, "learning_rate": 9.979750936519186e-05, "loss": 1.7314, "step": 50 }, { "epoch": 0.015480346031264228, "grad_norm": 0.4582519233226776, "learning_rate": 9.979244709932166e-05, "loss": 1.9893, "step": 51 }, { "epoch": 0.015783882227955685, "grad_norm": 0.5448606014251709, "learning_rate": 9.978738483345145e-05, "loss": 2.3266, "step": 52 }, { "epoch": 0.01608741842464714, "grad_norm": 1.0823545455932617, "learning_rate": 9.978232256758125e-05, "loss": 2.1919, "step": 53 }, { "epoch": 0.016390954621338594, "grad_norm": 0.5506464838981628, "learning_rate": 9.977726030171106e-05, "loss": 2.0735, "step": 54 }, { "epoch": 0.01669449081803005, "grad_norm": 0.568626344203949, "learning_rate": 9.977219803584085e-05, "loss": 2.051, "step": 55 }, { "epoch": 0.016998027014721504, "grad_norm": 0.512907087802887, "learning_rate": 9.976713576997065e-05, "loss": 1.6473, "step": 56 }, { "epoch": 0.017301563211412962, "grad_norm": 0.5541898012161255, "learning_rate": 9.976207350410044e-05, "loss": 1.8184, "step": 57 }, { "epoch": 0.017605099408104417, "grad_norm": 0.5083638429641724, "learning_rate": 9.975701123823024e-05, "loss": 1.7573, "step": 58 }, { "epoch": 0.017908635604795872, "grad_norm": 0.4722895920276642, "learning_rate": 9.975194897236003e-05, "loss": 2.0311, "step": 59 }, { "epoch": 0.018212171801487327, "grad_norm": 0.5068002343177795, "learning_rate": 9.974688670648983e-05, "loss": 2.1245, "step": 60 }, { "epoch": 0.018515707998178782, "grad_norm": 0.5726852416992188, "learning_rate": 9.974182444061962e-05, "loss": 2.1017, "step": 61 }, { "epoch": 0.018819244194870237, "grad_norm": 0.5240160226821899, "learning_rate": 9.973676217474942e-05, "loss": 2.2665, "step": 62 }, { "epoch": 0.019122780391561695, "grad_norm": 0.4728144705295563, "learning_rate": 9.973169990887921e-05, "loss": 2.0537, "step": 63 }, { "epoch": 0.01942631658825315, "grad_norm": 0.47115418314933777, "learning_rate": 9.972663764300902e-05, "loss": 1.2815, "step": 64 }, { "epoch": 0.019729852784944605, "grad_norm": 0.7070208191871643, "learning_rate": 9.972157537713881e-05, "loss": 1.8514, "step": 65 }, { "epoch": 0.02003338898163606, "grad_norm": 0.529069185256958, "learning_rate": 9.971651311126861e-05, "loss": 1.7602, "step": 66 }, { "epoch": 0.020336925178327515, "grad_norm": 0.7532087564468384, "learning_rate": 9.97114508453984e-05, "loss": 2.2168, "step": 67 }, { "epoch": 0.02064046137501897, "grad_norm": 0.5654622912406921, "learning_rate": 9.97063885795282e-05, "loss": 1.9634, "step": 68 }, { "epoch": 0.020943997571710428, "grad_norm": 0.701452910900116, "learning_rate": 9.970132631365799e-05, "loss": 2.044, "step": 69 }, { "epoch": 0.021247533768401883, "grad_norm": 0.5750812888145447, "learning_rate": 9.969626404778779e-05, "loss": 1.8015, "step": 70 }, { "epoch": 0.021551069965093338, "grad_norm": 0.49930402636528015, "learning_rate": 9.969120178191758e-05, "loss": 1.7998, "step": 71 }, { "epoch": 0.021854606161784793, "grad_norm": 0.4348014295101166, "learning_rate": 9.968613951604738e-05, "loss": 1.9959, "step": 72 }, { "epoch": 0.022158142358476247, "grad_norm": 0.5268503427505493, "learning_rate": 9.968107725017719e-05, "loss": 1.8497, "step": 73 }, { "epoch": 0.022461678555167702, "grad_norm": 0.578822135925293, "learning_rate": 9.967601498430698e-05, "loss": 2.3277, "step": 74 }, { "epoch": 0.02276521475185916, "grad_norm": 0.52215975522995, "learning_rate": 9.967095271843677e-05, "loss": 2.1179, "step": 75 }, { "epoch": 0.023068750948550616, "grad_norm": 0.4557477533817291, "learning_rate": 9.966589045256657e-05, "loss": 2.0132, "step": 76 }, { "epoch": 0.02337228714524207, "grad_norm": 0.5032123327255249, "learning_rate": 9.966082818669638e-05, "loss": 1.8608, "step": 77 }, { "epoch": 0.023675823341933525, "grad_norm": 0.42689865827560425, "learning_rate": 9.965576592082617e-05, "loss": 2.0437, "step": 78 }, { "epoch": 0.02397935953862498, "grad_norm": 0.44310206174850464, "learning_rate": 9.965070365495597e-05, "loss": 2.1222, "step": 79 }, { "epoch": 0.024282895735316435, "grad_norm": 0.4377008378505707, "learning_rate": 9.964564138908576e-05, "loss": 2.0418, "step": 80 }, { "epoch": 0.024586431932007893, "grad_norm": 0.35174912214279175, "learning_rate": 9.964057912321556e-05, "loss": 1.6931, "step": 81 }, { "epoch": 0.024889968128699348, "grad_norm": 0.47877687215805054, "learning_rate": 9.963551685734535e-05, "loss": 1.7049, "step": 82 }, { "epoch": 0.025193504325390803, "grad_norm": 0.4063829183578491, "learning_rate": 9.963045459147515e-05, "loss": 1.8611, "step": 83 }, { "epoch": 0.025497040522082258, "grad_norm": 0.4149170219898224, "learning_rate": 9.962539232560496e-05, "loss": 1.9439, "step": 84 }, { "epoch": 0.025800576718773713, "grad_norm": 0.4882602393627167, "learning_rate": 9.962033005973475e-05, "loss": 1.5723, "step": 85 }, { "epoch": 0.026104112915465168, "grad_norm": 0.4600992202758789, "learning_rate": 9.961526779386454e-05, "loss": 2.0142, "step": 86 }, { "epoch": 0.026407649112156626, "grad_norm": 0.43366697430610657, "learning_rate": 9.961020552799434e-05, "loss": 1.9175, "step": 87 }, { "epoch": 0.02671118530884808, "grad_norm": 0.501487135887146, "learning_rate": 9.960514326212413e-05, "loss": 1.5043, "step": 88 }, { "epoch": 0.027014721505539536, "grad_norm": 0.43821993470191956, "learning_rate": 9.960008099625393e-05, "loss": 1.8622, "step": 89 }, { "epoch": 0.02731825770223099, "grad_norm": 0.4433805048465729, "learning_rate": 9.959501873038372e-05, "loss": 1.9459, "step": 90 }, { "epoch": 0.027621793898922446, "grad_norm": 0.4686216115951538, "learning_rate": 9.958995646451352e-05, "loss": 1.7405, "step": 91 }, { "epoch": 0.0279253300956139, "grad_norm": 0.48586198687553406, "learning_rate": 9.958489419864331e-05, "loss": 2.2233, "step": 92 }, { "epoch": 0.02822886629230536, "grad_norm": 0.4018734097480774, "learning_rate": 9.957983193277312e-05, "loss": 2.0027, "step": 93 }, { "epoch": 0.028532402488996814, "grad_norm": 0.4996435344219208, "learning_rate": 9.957476966690292e-05, "loss": 1.5949, "step": 94 }, { "epoch": 0.02883593868568827, "grad_norm": 0.45447826385498047, "learning_rate": 9.956970740103271e-05, "loss": 1.7636, "step": 95 }, { "epoch": 0.029139474882379723, "grad_norm": 0.4209904372692108, "learning_rate": 9.95646451351625e-05, "loss": 1.7523, "step": 96 }, { "epoch": 0.029443011079071178, "grad_norm": 0.3740164637565613, "learning_rate": 9.95595828692923e-05, "loss": 1.9136, "step": 97 }, { "epoch": 0.029746547275762633, "grad_norm": 0.4169963598251343, "learning_rate": 9.95545206034221e-05, "loss": 1.9136, "step": 98 }, { "epoch": 0.03005008347245409, "grad_norm": 0.4683006703853607, "learning_rate": 9.954945833755189e-05, "loss": 2.0657, "step": 99 }, { "epoch": 0.030353619669145546, "grad_norm": 0.4508633017539978, "learning_rate": 9.954439607168169e-05, "loss": 2.1099, "step": 100 }, { "epoch": 0.030657155865837, "grad_norm": 0.4136218726634979, "learning_rate": 9.953933380581148e-05, "loss": 2.0183, "step": 101 }, { "epoch": 0.030960692062528456, "grad_norm": 0.44510790705680847, "learning_rate": 9.953427153994127e-05, "loss": 1.9307, "step": 102 }, { "epoch": 0.031264228259219914, "grad_norm": 0.3713892698287964, "learning_rate": 9.952920927407108e-05, "loss": 1.7017, "step": 103 }, { "epoch": 0.03156776445591137, "grad_norm": 0.47902294993400574, "learning_rate": 9.952414700820088e-05, "loss": 2.1172, "step": 104 }, { "epoch": 0.031871300652602824, "grad_norm": 0.4492317736148834, "learning_rate": 9.951908474233067e-05, "loss": 1.9752, "step": 105 }, { "epoch": 0.03217483684929428, "grad_norm": 0.4096255302429199, "learning_rate": 9.951402247646047e-05, "loss": 1.5511, "step": 106 }, { "epoch": 0.032478373045985734, "grad_norm": 0.39630818367004395, "learning_rate": 9.950896021059026e-05, "loss": 2.11, "step": 107 }, { "epoch": 0.03278190924267719, "grad_norm": 0.42648032307624817, "learning_rate": 9.950389794472006e-05, "loss": 2.1784, "step": 108 }, { "epoch": 0.033085445439368644, "grad_norm": 0.4814178943634033, "learning_rate": 9.949883567884985e-05, "loss": 1.955, "step": 109 }, { "epoch": 0.0333889816360601, "grad_norm": 0.41600191593170166, "learning_rate": 9.949377341297965e-05, "loss": 1.9163, "step": 110 }, { "epoch": 0.03369251783275155, "grad_norm": 0.4610773026943207, "learning_rate": 9.948871114710944e-05, "loss": 1.7934, "step": 111 }, { "epoch": 0.03399605402944301, "grad_norm": 0.43061718344688416, "learning_rate": 9.948364888123925e-05, "loss": 1.9278, "step": 112 }, { "epoch": 0.03429959022613446, "grad_norm": 0.3907497227191925, "learning_rate": 9.947858661536904e-05, "loss": 1.996, "step": 113 }, { "epoch": 0.034603126422825925, "grad_norm": 0.3984166383743286, "learning_rate": 9.947352434949884e-05, "loss": 1.5936, "step": 114 }, { "epoch": 0.03490666261951738, "grad_norm": 0.43406423926353455, "learning_rate": 9.946846208362863e-05, "loss": 1.8866, "step": 115 }, { "epoch": 0.035210198816208835, "grad_norm": 0.45913639664649963, "learning_rate": 9.946339981775843e-05, "loss": 1.972, "step": 116 }, { "epoch": 0.03551373501290029, "grad_norm": 0.42077311873435974, "learning_rate": 9.945833755188822e-05, "loss": 2.0081, "step": 117 }, { "epoch": 0.035817271209591744, "grad_norm": 0.41479435563087463, "learning_rate": 9.945327528601802e-05, "loss": 2.0096, "step": 118 }, { "epoch": 0.0361208074062832, "grad_norm": 0.35669025778770447, "learning_rate": 9.944821302014781e-05, "loss": 2.0074, "step": 119 }, { "epoch": 0.036424343602974654, "grad_norm": 0.4088069796562195, "learning_rate": 9.944315075427761e-05, "loss": 1.817, "step": 120 }, { "epoch": 0.03672787979966611, "grad_norm": 0.49982163310050964, "learning_rate": 9.943808848840742e-05, "loss": 1.9218, "step": 121 }, { "epoch": 0.037031415996357564, "grad_norm": 0.39924055337905884, "learning_rate": 9.943302622253721e-05, "loss": 2.2463, "step": 122 }, { "epoch": 0.03733495219304902, "grad_norm": 0.40462177991867065, "learning_rate": 9.942796395666702e-05, "loss": 2.0844, "step": 123 }, { "epoch": 0.037638488389740474, "grad_norm": 0.43440741300582886, "learning_rate": 9.942290169079681e-05, "loss": 1.8808, "step": 124 }, { "epoch": 0.03794202458643193, "grad_norm": 0.4029730260372162, "learning_rate": 9.941783942492661e-05, "loss": 1.9427, "step": 125 }, { "epoch": 0.03824556078312339, "grad_norm": 0.7807103395462036, "learning_rate": 9.94127771590564e-05, "loss": 1.9072, "step": 126 }, { "epoch": 0.038549096979814845, "grad_norm": 0.5021561980247498, "learning_rate": 9.94077148931862e-05, "loss": 2.0582, "step": 127 }, { "epoch": 0.0388526331765063, "grad_norm": 0.5161197781562805, "learning_rate": 9.9402652627316e-05, "loss": 1.9861, "step": 128 }, { "epoch": 0.039156169373197755, "grad_norm": 0.5553935766220093, "learning_rate": 9.939759036144579e-05, "loss": 2.1893, "step": 129 }, { "epoch": 0.03945970556988921, "grad_norm": 0.4241655170917511, "learning_rate": 9.939252809557558e-05, "loss": 1.9722, "step": 130 }, { "epoch": 0.039763241766580665, "grad_norm": 0.43290001153945923, "learning_rate": 9.938746582970538e-05, "loss": 1.5364, "step": 131 }, { "epoch": 0.04006677796327212, "grad_norm": 0.40089091658592224, "learning_rate": 9.938240356383519e-05, "loss": 1.9686, "step": 132 }, { "epoch": 0.040370314159963575, "grad_norm": 0.4152032434940338, "learning_rate": 9.937734129796498e-05, "loss": 1.913, "step": 133 }, { "epoch": 0.04067385035665503, "grad_norm": 0.4443211555480957, "learning_rate": 9.937227903209478e-05, "loss": 2.2354, "step": 134 }, { "epoch": 0.040977386553346484, "grad_norm": 0.41355323791503906, "learning_rate": 9.936721676622457e-05, "loss": 2.1055, "step": 135 }, { "epoch": 0.04128092275003794, "grad_norm": 0.5837479829788208, "learning_rate": 9.936215450035437e-05, "loss": 1.9085, "step": 136 }, { "epoch": 0.041584458946729394, "grad_norm": 0.40269389748573303, "learning_rate": 9.935709223448416e-05, "loss": 2.0368, "step": 137 }, { "epoch": 0.041887995143420856, "grad_norm": 0.5898969769477844, "learning_rate": 9.935202996861396e-05, "loss": 1.7933, "step": 138 }, { "epoch": 0.04219153134011231, "grad_norm": 0.41117680072784424, "learning_rate": 9.934696770274375e-05, "loss": 1.7452, "step": 139 }, { "epoch": 0.042495067536803766, "grad_norm": 0.5090368390083313, "learning_rate": 9.934190543687354e-05, "loss": 2.0141, "step": 140 }, { "epoch": 0.04279860373349522, "grad_norm": 0.4821307957172394, "learning_rate": 9.933684317100334e-05, "loss": 1.9443, "step": 141 }, { "epoch": 0.043102139930186675, "grad_norm": 0.41939428448677063, "learning_rate": 9.933178090513315e-05, "loss": 1.7401, "step": 142 }, { "epoch": 0.04340567612687813, "grad_norm": 0.4531096816062927, "learning_rate": 9.932671863926294e-05, "loss": 1.9944, "step": 143 }, { "epoch": 0.043709212323569585, "grad_norm": 0.44440799951553345, "learning_rate": 9.932165637339274e-05, "loss": 1.9648, "step": 144 }, { "epoch": 0.04401274852026104, "grad_norm": 0.36847150325775146, "learning_rate": 9.931659410752253e-05, "loss": 2.0638, "step": 145 }, { "epoch": 0.044316284716952495, "grad_norm": 0.6394171118736267, "learning_rate": 9.931153184165233e-05, "loss": 1.9476, "step": 146 }, { "epoch": 0.04461982091364395, "grad_norm": 0.41597506403923035, "learning_rate": 9.930646957578212e-05, "loss": 1.535, "step": 147 }, { "epoch": 0.044923357110335405, "grad_norm": 0.5597077012062073, "learning_rate": 9.930140730991192e-05, "loss": 1.6826, "step": 148 }, { "epoch": 0.045226893307026866, "grad_norm": 0.5532084703445435, "learning_rate": 9.929634504404171e-05, "loss": 1.8063, "step": 149 }, { "epoch": 0.04553042950371832, "grad_norm": 0.467339426279068, "learning_rate": 9.92912827781715e-05, "loss": 2.017, "step": 150 }, { "epoch": 0.045833965700409776, "grad_norm": 0.4054040312767029, "learning_rate": 9.928622051230131e-05, "loss": 1.7582, "step": 151 }, { "epoch": 0.04613750189710123, "grad_norm": 1.2743823528289795, "learning_rate": 9.928115824643111e-05, "loss": 2.0202, "step": 152 }, { "epoch": 0.046441038093792686, "grad_norm": 0.4357397258281708, "learning_rate": 9.92760959805609e-05, "loss": 1.8788, "step": 153 }, { "epoch": 0.04674457429048414, "grad_norm": 2.8793208599090576, "learning_rate": 9.92710337146907e-05, "loss": 2.1204, "step": 154 }, { "epoch": 0.047048110487175596, "grad_norm": 0.9585952162742615, "learning_rate": 9.92659714488205e-05, "loss": 1.9356, "step": 155 }, { "epoch": 0.04735164668386705, "grad_norm": 0.7857603430747986, "learning_rate": 9.926090918295029e-05, "loss": 1.9097, "step": 156 }, { "epoch": 0.047655182880558505, "grad_norm": 0.5259221792221069, "learning_rate": 9.925584691708008e-05, "loss": 2.1589, "step": 157 }, { "epoch": 0.04795871907724996, "grad_norm": 2.793253183364868, "learning_rate": 9.925078465120988e-05, "loss": 1.7202, "step": 158 }, { "epoch": 0.048262255273941415, "grad_norm": 0.4432888627052307, "learning_rate": 9.924572238533967e-05, "loss": 1.9898, "step": 159 }, { "epoch": 0.04856579147063287, "grad_norm": 0.4347291588783264, "learning_rate": 9.924066011946948e-05, "loss": 1.8142, "step": 160 }, { "epoch": 0.04886932766732433, "grad_norm": 5.273514747619629, "learning_rate": 9.923559785359928e-05, "loss": 1.8665, "step": 161 }, { "epoch": 0.04917286386401579, "grad_norm": 0.47988301515579224, "learning_rate": 9.923053558772907e-05, "loss": 1.9439, "step": 162 }, { "epoch": 0.04947640006070724, "grad_norm": 0.3584117293357849, "learning_rate": 9.922547332185887e-05, "loss": 1.8109, "step": 163 }, { "epoch": 0.049779936257398696, "grad_norm": 0.4074074923992157, "learning_rate": 9.922041105598866e-05, "loss": 2.1056, "step": 164 }, { "epoch": 0.05008347245409015, "grad_norm": 3.159336566925049, "learning_rate": 9.921534879011846e-05, "loss": 1.8672, "step": 165 }, { "epoch": 0.050387008650781606, "grad_norm": 0.38132309913635254, "learning_rate": 9.921028652424826e-05, "loss": 1.8423, "step": 166 }, { "epoch": 0.05069054484747306, "grad_norm": 0.39241936802864075, "learning_rate": 9.920522425837806e-05, "loss": 1.5949, "step": 167 }, { "epoch": 0.050994081044164516, "grad_norm": 0.38212037086486816, "learning_rate": 9.920016199250785e-05, "loss": 1.9669, "step": 168 }, { "epoch": 0.05129761724085597, "grad_norm": 0.5353955030441284, "learning_rate": 9.919509972663765e-05, "loss": 2.1806, "step": 169 }, { "epoch": 0.051601153437547426, "grad_norm": 0.4129483699798584, "learning_rate": 9.919003746076744e-05, "loss": 1.8858, "step": 170 }, { "epoch": 0.05190468963423888, "grad_norm": 0.3832380771636963, "learning_rate": 9.918497519489725e-05, "loss": 2.0321, "step": 171 }, { "epoch": 0.052208225830930335, "grad_norm": 0.4078863859176636, "learning_rate": 9.917991292902705e-05, "loss": 1.6213, "step": 172 }, { "epoch": 0.0525117620276218, "grad_norm": 0.38865014910697937, "learning_rate": 9.917485066315684e-05, "loss": 2.0052, "step": 173 }, { "epoch": 0.05281529822431325, "grad_norm": 0.4339440166950226, "learning_rate": 9.916978839728664e-05, "loss": 2.2405, "step": 174 }, { "epoch": 0.05311883442100471, "grad_norm": 0.42063045501708984, "learning_rate": 9.916472613141643e-05, "loss": 1.6529, "step": 175 }, { "epoch": 0.05342237061769616, "grad_norm": 0.4765849709510803, "learning_rate": 9.915966386554623e-05, "loss": 1.9645, "step": 176 }, { "epoch": 0.05372590681438762, "grad_norm": 0.41431936621665955, "learning_rate": 9.915460159967602e-05, "loss": 1.9709, "step": 177 }, { "epoch": 0.05402944301107907, "grad_norm": 0.3591434359550476, "learning_rate": 9.914953933380581e-05, "loss": 1.685, "step": 178 }, { "epoch": 0.054332979207770526, "grad_norm": 0.45483240485191345, "learning_rate": 9.914447706793561e-05, "loss": 1.9362, "step": 179 }, { "epoch": 0.05463651540446198, "grad_norm": 0.5468000173568726, "learning_rate": 9.91394148020654e-05, "loss": 1.6984, "step": 180 }, { "epoch": 0.054940051601153436, "grad_norm": 0.4057190716266632, "learning_rate": 9.913435253619521e-05, "loss": 1.9887, "step": 181 }, { "epoch": 0.05524358779784489, "grad_norm": 0.383211612701416, "learning_rate": 9.912929027032501e-05, "loss": 1.7825, "step": 182 }, { "epoch": 0.055547123994536346, "grad_norm": 0.3480004668235779, "learning_rate": 9.91242280044548e-05, "loss": 1.8721, "step": 183 }, { "epoch": 0.0558506601912278, "grad_norm": 0.47680413722991943, "learning_rate": 9.91191657385846e-05, "loss": 1.8113, "step": 184 }, { "epoch": 0.05615419638791926, "grad_norm": 0.37727096676826477, "learning_rate": 9.911410347271439e-05, "loss": 1.7398, "step": 185 }, { "epoch": 0.05645773258461072, "grad_norm": 0.47738176584243774, "learning_rate": 9.910904120684419e-05, "loss": 1.4651, "step": 186 }, { "epoch": 0.05676126878130217, "grad_norm": 0.44533729553222656, "learning_rate": 9.910397894097398e-05, "loss": 1.5697, "step": 187 }, { "epoch": 0.05706480497799363, "grad_norm": 0.45051974058151245, "learning_rate": 9.909891667510378e-05, "loss": 2.1577, "step": 188 }, { "epoch": 0.05736834117468508, "grad_norm": 0.4709470272064209, "learning_rate": 9.909385440923357e-05, "loss": 2.0486, "step": 189 }, { "epoch": 0.05767187737137654, "grad_norm": 0.4063846170902252, "learning_rate": 9.908879214336338e-05, "loss": 1.5453, "step": 190 }, { "epoch": 0.05797541356806799, "grad_norm": 0.374362587928772, "learning_rate": 9.908372987749317e-05, "loss": 1.5611, "step": 191 }, { "epoch": 0.05827894976475945, "grad_norm": 0.4852111041545868, "learning_rate": 9.907866761162297e-05, "loss": 1.6234, "step": 192 }, { "epoch": 0.0585824859614509, "grad_norm": 0.6863122582435608, "learning_rate": 9.907360534575276e-05, "loss": 2.1612, "step": 193 }, { "epoch": 0.058886022158142357, "grad_norm": 0.6040588021278381, "learning_rate": 9.906854307988256e-05, "loss": 2.1092, "step": 194 }, { "epoch": 0.05918955835483381, "grad_norm": 0.4148467779159546, "learning_rate": 9.906348081401235e-05, "loss": 2.1108, "step": 195 }, { "epoch": 0.059493094551525266, "grad_norm": 0.36098209023475647, "learning_rate": 9.905841854814215e-05, "loss": 2.0002, "step": 196 }, { "epoch": 0.05979663074821673, "grad_norm": 0.42360183596611023, "learning_rate": 9.905335628227194e-05, "loss": 2.3124, "step": 197 }, { "epoch": 0.06010016694490818, "grad_norm": 0.3650914430618286, "learning_rate": 9.904829401640174e-05, "loss": 1.8778, "step": 198 }, { "epoch": 0.06040370314159964, "grad_norm": 0.392995148897171, "learning_rate": 9.904323175053155e-05, "loss": 2.16, "step": 199 }, { "epoch": 0.06070723933829109, "grad_norm": 0.46390387415885925, "learning_rate": 9.903816948466134e-05, "loss": 1.8695, "step": 200 }, { "epoch": 0.06101077553498255, "grad_norm": 0.3954870402812958, "learning_rate": 9.903310721879114e-05, "loss": 1.9233, "step": 201 }, { "epoch": 0.061314311731674, "grad_norm": 0.3650193214416504, "learning_rate": 9.902804495292093e-05, "loss": 2.2504, "step": 202 }, { "epoch": 0.06161784792836546, "grad_norm": 0.3582104742527008, "learning_rate": 9.902298268705073e-05, "loss": 1.9303, "step": 203 }, { "epoch": 0.06192138412505691, "grad_norm": 0.35688868165016174, "learning_rate": 9.901792042118052e-05, "loss": 1.7078, "step": 204 }, { "epoch": 0.06222492032174837, "grad_norm": 0.3666802942752838, "learning_rate": 9.901285815531031e-05, "loss": 1.941, "step": 205 }, { "epoch": 0.06252845651843983, "grad_norm": 0.42375093698501587, "learning_rate": 9.900779588944011e-05, "loss": 2.0858, "step": 206 }, { "epoch": 0.06283199271513128, "grad_norm": 0.3913770318031311, "learning_rate": 9.90027336235699e-05, "loss": 2.1423, "step": 207 }, { "epoch": 0.06313552891182274, "grad_norm": 0.4101809859275818, "learning_rate": 9.89976713576997e-05, "loss": 2.0497, "step": 208 }, { "epoch": 0.06343906510851419, "grad_norm": 0.3696439564228058, "learning_rate": 9.899260909182951e-05, "loss": 1.9692, "step": 209 }, { "epoch": 0.06374260130520565, "grad_norm": 0.3725574016571045, "learning_rate": 9.89875468259593e-05, "loss": 2.2053, "step": 210 }, { "epoch": 0.0640461375018971, "grad_norm": 0.4886903166770935, "learning_rate": 9.898248456008911e-05, "loss": 1.8981, "step": 211 }, { "epoch": 0.06434967369858856, "grad_norm": 0.4423249661922455, "learning_rate": 9.89774222942189e-05, "loss": 1.9058, "step": 212 }, { "epoch": 0.06465320989528, "grad_norm": 0.4045765697956085, "learning_rate": 9.89723600283487e-05, "loss": 1.8056, "step": 213 }, { "epoch": 0.06495674609197147, "grad_norm": 0.43866047263145447, "learning_rate": 9.89672977624785e-05, "loss": 1.6315, "step": 214 }, { "epoch": 0.06526028228866293, "grad_norm": 0.524714469909668, "learning_rate": 9.896223549660829e-05, "loss": 2.0156, "step": 215 }, { "epoch": 0.06556381848535438, "grad_norm": 0.3752996325492859, "learning_rate": 9.895717323073808e-05, "loss": 2.2768, "step": 216 }, { "epoch": 0.06586735468204584, "grad_norm": 0.4371670186519623, "learning_rate": 9.895211096486788e-05, "loss": 2.0755, "step": 217 }, { "epoch": 0.06617089087873729, "grad_norm": 0.3751063644886017, "learning_rate": 9.894704869899767e-05, "loss": 2.2451, "step": 218 }, { "epoch": 0.06647442707542875, "grad_norm": 0.6649600267410278, "learning_rate": 9.894198643312747e-05, "loss": 1.9835, "step": 219 }, { "epoch": 0.0667779632721202, "grad_norm": 0.3941735625267029, "learning_rate": 9.893692416725728e-05, "loss": 2.0203, "step": 220 }, { "epoch": 0.06708149946881166, "grad_norm": 0.41888293623924255, "learning_rate": 9.893186190138707e-05, "loss": 1.7572, "step": 221 }, { "epoch": 0.0673850356655031, "grad_norm": 0.4820149838924408, "learning_rate": 9.892679963551687e-05, "loss": 2.0591, "step": 222 }, { "epoch": 0.06768857186219457, "grad_norm": 0.3516736626625061, "learning_rate": 9.892173736964666e-05, "loss": 1.9398, "step": 223 }, { "epoch": 0.06799210805888602, "grad_norm": 0.3873218894004822, "learning_rate": 9.891667510377646e-05, "loss": 1.6389, "step": 224 }, { "epoch": 0.06829564425557748, "grad_norm": 0.3793487846851349, "learning_rate": 9.891161283790625e-05, "loss": 2.0075, "step": 225 }, { "epoch": 0.06859918045226893, "grad_norm": 0.38987675309181213, "learning_rate": 9.890655057203605e-05, "loss": 2.0903, "step": 226 }, { "epoch": 0.06890271664896039, "grad_norm": 0.4293549358844757, "learning_rate": 9.890148830616584e-05, "loss": 2.2099, "step": 227 }, { "epoch": 0.06920625284565185, "grad_norm": 0.39895692467689514, "learning_rate": 9.889642604029564e-05, "loss": 1.8615, "step": 228 }, { "epoch": 0.0695097890423433, "grad_norm": 0.4543936252593994, "learning_rate": 9.889136377442544e-05, "loss": 2.0828, "step": 229 }, { "epoch": 0.06981332523903476, "grad_norm": 0.448477566242218, "learning_rate": 9.888630150855524e-05, "loss": 1.5524, "step": 230 }, { "epoch": 0.07011686143572621, "grad_norm": 0.428975373506546, "learning_rate": 9.888123924268503e-05, "loss": 1.3828, "step": 231 }, { "epoch": 0.07042039763241767, "grad_norm": 0.42287349700927734, "learning_rate": 9.887617697681483e-05, "loss": 2.096, "step": 232 }, { "epoch": 0.07072393382910912, "grad_norm": 0.43614649772644043, "learning_rate": 9.887111471094462e-05, "loss": 1.8238, "step": 233 }, { "epoch": 0.07102747002580058, "grad_norm": 0.47309553623199463, "learning_rate": 9.886605244507442e-05, "loss": 2.3526, "step": 234 }, { "epoch": 0.07133100622249203, "grad_norm": 0.9558483362197876, "learning_rate": 9.886099017920421e-05, "loss": 1.9816, "step": 235 }, { "epoch": 0.07163454241918349, "grad_norm": 0.3529858887195587, "learning_rate": 9.885592791333401e-05, "loss": 2.0314, "step": 236 }, { "epoch": 0.07193807861587494, "grad_norm": 0.37652599811553955, "learning_rate": 9.88508656474638e-05, "loss": 1.9381, "step": 237 }, { "epoch": 0.0722416148125664, "grad_norm": 0.40783143043518066, "learning_rate": 9.884580338159361e-05, "loss": 1.966, "step": 238 }, { "epoch": 0.07254515100925786, "grad_norm": 0.4160328805446625, "learning_rate": 9.88407411157234e-05, "loss": 1.8176, "step": 239 }, { "epoch": 0.07284868720594931, "grad_norm": 0.4397304952144623, "learning_rate": 9.88356788498532e-05, "loss": 1.6766, "step": 240 }, { "epoch": 0.07315222340264077, "grad_norm": 0.42549702525138855, "learning_rate": 9.8830616583983e-05, "loss": 2.1176, "step": 241 }, { "epoch": 0.07345575959933222, "grad_norm": 0.3747939169406891, "learning_rate": 9.882555431811279e-05, "loss": 1.5494, "step": 242 }, { "epoch": 0.07375929579602368, "grad_norm": 3.4551990032196045, "learning_rate": 9.882049205224258e-05, "loss": 2.0336, "step": 243 }, { "epoch": 0.07406283199271513, "grad_norm": 1.5632964372634888, "learning_rate": 9.881542978637238e-05, "loss": 1.7452, "step": 244 }, { "epoch": 0.07436636818940659, "grad_norm": 0.41575855016708374, "learning_rate": 9.881036752050217e-05, "loss": 2.0243, "step": 245 }, { "epoch": 0.07466990438609804, "grad_norm": 0.44168713688850403, "learning_rate": 9.880530525463197e-05, "loss": 2.0022, "step": 246 }, { "epoch": 0.0749734405827895, "grad_norm": 0.46640321612358093, "learning_rate": 9.880024298876176e-05, "loss": 1.555, "step": 247 }, { "epoch": 0.07527697677948095, "grad_norm": 0.3622835576534271, "learning_rate": 9.879518072289157e-05, "loss": 1.876, "step": 248 }, { "epoch": 0.07558051297617241, "grad_norm": 0.6277987957000732, "learning_rate": 9.879011845702137e-05, "loss": 2.2753, "step": 249 }, { "epoch": 0.07588404917286386, "grad_norm": 0.40246644616127014, "learning_rate": 9.878505619115116e-05, "loss": 1.5991, "step": 250 }, { "epoch": 0.07618758536955532, "grad_norm": 0.38388529419898987, "learning_rate": 9.877999392528096e-05, "loss": 1.9226, "step": 251 }, { "epoch": 0.07649112156624678, "grad_norm": 0.39985090494155884, "learning_rate": 9.877493165941075e-05, "loss": 2.0722, "step": 252 }, { "epoch": 0.07679465776293823, "grad_norm": 0.3872128427028656, "learning_rate": 9.876986939354055e-05, "loss": 1.9132, "step": 253 }, { "epoch": 0.07709819395962969, "grad_norm": 0.3665171265602112, "learning_rate": 9.876480712767034e-05, "loss": 1.6244, "step": 254 }, { "epoch": 0.07740173015632114, "grad_norm": 0.4011310040950775, "learning_rate": 9.875974486180015e-05, "loss": 2.1289, "step": 255 }, { "epoch": 0.0777052663530126, "grad_norm": 0.35013166069984436, "learning_rate": 9.875468259592994e-05, "loss": 1.9738, "step": 256 }, { "epoch": 0.07800880254970405, "grad_norm": 0.48468607664108276, "learning_rate": 9.874962033005974e-05, "loss": 2.1368, "step": 257 }, { "epoch": 0.07831233874639551, "grad_norm": 0.5015551447868347, "learning_rate": 9.874455806418953e-05, "loss": 2.1218, "step": 258 }, { "epoch": 0.07861587494308696, "grad_norm": 0.41915133595466614, "learning_rate": 9.873949579831934e-05, "loss": 2.0052, "step": 259 }, { "epoch": 0.07891941113977842, "grad_norm": 0.4414760172367096, "learning_rate": 9.873443353244914e-05, "loss": 1.7249, "step": 260 }, { "epoch": 0.07922294733646987, "grad_norm": 0.47259169816970825, "learning_rate": 9.872937126657893e-05, "loss": 2.1041, "step": 261 }, { "epoch": 0.07952648353316133, "grad_norm": 0.3689124882221222, "learning_rate": 9.872430900070873e-05, "loss": 1.8956, "step": 262 }, { "epoch": 0.07983001972985279, "grad_norm": 0.3948320150375366, "learning_rate": 9.871924673483852e-05, "loss": 1.9211, "step": 263 }, { "epoch": 0.08013355592654424, "grad_norm": 0.4235248267650604, "learning_rate": 9.871418446896832e-05, "loss": 1.7115, "step": 264 }, { "epoch": 0.0804370921232357, "grad_norm": 0.48399198055267334, "learning_rate": 9.870912220309811e-05, "loss": 1.77, "step": 265 }, { "epoch": 0.08074062831992715, "grad_norm": 0.34047526121139526, "learning_rate": 9.87040599372279e-05, "loss": 1.7189, "step": 266 }, { "epoch": 0.08104416451661861, "grad_norm": 0.47203269600868225, "learning_rate": 9.86989976713577e-05, "loss": 1.7674, "step": 267 }, { "epoch": 0.08134770071331006, "grad_norm": 0.3752756118774414, "learning_rate": 9.869393540548751e-05, "loss": 1.8716, "step": 268 }, { "epoch": 0.08165123691000152, "grad_norm": 0.3437153697013855, "learning_rate": 9.86888731396173e-05, "loss": 1.9824, "step": 269 }, { "epoch": 0.08195477310669297, "grad_norm": 0.4854094088077545, "learning_rate": 9.86838108737471e-05, "loss": 1.4385, "step": 270 }, { "epoch": 0.08225830930338443, "grad_norm": 0.37674829363822937, "learning_rate": 9.86787486078769e-05, "loss": 1.7877, "step": 271 }, { "epoch": 0.08256184550007588, "grad_norm": 0.4215140640735626, "learning_rate": 9.867368634200669e-05, "loss": 2.1854, "step": 272 }, { "epoch": 0.08286538169676734, "grad_norm": 0.3680359423160553, "learning_rate": 9.866862407613648e-05, "loss": 2.104, "step": 273 }, { "epoch": 0.08316891789345879, "grad_norm": 0.4195649325847626, "learning_rate": 9.866356181026628e-05, "loss": 1.469, "step": 274 }, { "epoch": 0.08347245409015025, "grad_norm": 0.480640709400177, "learning_rate": 9.865849954439607e-05, "loss": 1.8329, "step": 275 }, { "epoch": 0.08377599028684171, "grad_norm": 0.34760695695877075, "learning_rate": 9.865343727852587e-05, "loss": 1.9495, "step": 276 }, { "epoch": 0.08407952648353316, "grad_norm": 0.3803161680698395, "learning_rate": 9.864837501265568e-05, "loss": 1.9294, "step": 277 }, { "epoch": 0.08438306268022462, "grad_norm": 0.41739675402641296, "learning_rate": 9.864331274678547e-05, "loss": 2.059, "step": 278 }, { "epoch": 0.08468659887691607, "grad_norm": 0.3807448744773865, "learning_rate": 9.863825048091527e-05, "loss": 1.9741, "step": 279 }, { "epoch": 0.08499013507360753, "grad_norm": 0.3610997200012207, "learning_rate": 9.863318821504506e-05, "loss": 1.9815, "step": 280 }, { "epoch": 0.08529367127029898, "grad_norm": 0.3797460198402405, "learning_rate": 9.862812594917485e-05, "loss": 2.1394, "step": 281 }, { "epoch": 0.08559720746699044, "grad_norm": 0.3922887444496155, "learning_rate": 9.862306368330465e-05, "loss": 2.184, "step": 282 }, { "epoch": 0.08590074366368189, "grad_norm": 0.38251930475234985, "learning_rate": 9.861800141743444e-05, "loss": 2.0186, "step": 283 }, { "epoch": 0.08620427986037335, "grad_norm": 0.35968562960624695, "learning_rate": 9.861293915156424e-05, "loss": 2.0, "step": 284 }, { "epoch": 0.0865078160570648, "grad_norm": 0.37149590253829956, "learning_rate": 9.860787688569403e-05, "loss": 1.7941, "step": 285 }, { "epoch": 0.08681135225375626, "grad_norm": 0.36890628933906555, "learning_rate": 9.860281461982383e-05, "loss": 1.906, "step": 286 }, { "epoch": 0.08711488845044772, "grad_norm": 0.36025917530059814, "learning_rate": 9.859775235395364e-05, "loss": 1.9655, "step": 287 }, { "epoch": 0.08741842464713917, "grad_norm": 0.3704364001750946, "learning_rate": 9.859269008808343e-05, "loss": 1.8657, "step": 288 }, { "epoch": 0.08772196084383063, "grad_norm": 0.5996513962745667, "learning_rate": 9.858762782221323e-05, "loss": 1.7448, "step": 289 }, { "epoch": 0.08802549704052208, "grad_norm": 0.3615630269050598, "learning_rate": 9.858256555634302e-05, "loss": 1.9007, "step": 290 }, { "epoch": 0.08832903323721354, "grad_norm": 0.36014246940612793, "learning_rate": 9.857750329047282e-05, "loss": 1.927, "step": 291 }, { "epoch": 0.08863256943390499, "grad_norm": 0.5038754940032959, "learning_rate": 9.857244102460261e-05, "loss": 1.6613, "step": 292 }, { "epoch": 0.08893610563059645, "grad_norm": 0.3880213797092438, "learning_rate": 9.85673787587324e-05, "loss": 1.5563, "step": 293 }, { "epoch": 0.0892396418272879, "grad_norm": 0.43225082755088806, "learning_rate": 9.85623164928622e-05, "loss": 1.5534, "step": 294 }, { "epoch": 0.08954317802397936, "grad_norm": 0.44342055916786194, "learning_rate": 9.8557254226992e-05, "loss": 1.6211, "step": 295 }, { "epoch": 0.08984671422067081, "grad_norm": 0.42114123702049255, "learning_rate": 9.85521919611218e-05, "loss": 1.9731, "step": 296 }, { "epoch": 0.09015025041736227, "grad_norm": 0.43151113390922546, "learning_rate": 9.85471296952516e-05, "loss": 1.9519, "step": 297 }, { "epoch": 0.09045378661405373, "grad_norm": 0.38092517852783203, "learning_rate": 9.85420674293814e-05, "loss": 2.0973, "step": 298 }, { "epoch": 0.09075732281074518, "grad_norm": 0.40729570388793945, "learning_rate": 9.853700516351119e-05, "loss": 1.4395, "step": 299 }, { "epoch": 0.09106085900743664, "grad_norm": 0.3631846308708191, "learning_rate": 9.8531942897641e-05, "loss": 1.2255, "step": 300 }, { "epoch": 0.09136439520412809, "grad_norm": 0.37764397263526917, "learning_rate": 9.852688063177079e-05, "loss": 1.9941, "step": 301 }, { "epoch": 0.09166793140081955, "grad_norm": 0.3755379319190979, "learning_rate": 9.852181836590059e-05, "loss": 1.7154, "step": 302 }, { "epoch": 0.091971467597511, "grad_norm": 0.39003854990005493, "learning_rate": 9.851675610003038e-05, "loss": 1.928, "step": 303 }, { "epoch": 0.09227500379420246, "grad_norm": 0.39592432975769043, "learning_rate": 9.851169383416018e-05, "loss": 2.1913, "step": 304 }, { "epoch": 0.09257853999089391, "grad_norm": 0.4315894842147827, "learning_rate": 9.850663156828997e-05, "loss": 1.6432, "step": 305 }, { "epoch": 0.09288207618758537, "grad_norm": 0.4103511571884155, "learning_rate": 9.850156930241977e-05, "loss": 1.9944, "step": 306 }, { "epoch": 0.09318561238427682, "grad_norm": 0.4236547350883484, "learning_rate": 9.849650703654957e-05, "loss": 1.875, "step": 307 }, { "epoch": 0.09348914858096828, "grad_norm": 0.41012468934059143, "learning_rate": 9.849144477067937e-05, "loss": 2.008, "step": 308 }, { "epoch": 0.09379268477765973, "grad_norm": 0.35538622736930847, "learning_rate": 9.848638250480916e-05, "loss": 1.7322, "step": 309 }, { "epoch": 0.09409622097435119, "grad_norm": 0.3874755799770355, "learning_rate": 9.848132023893896e-05, "loss": 1.9818, "step": 310 }, { "epoch": 0.09439975717104265, "grad_norm": 0.42444977164268494, "learning_rate": 9.847625797306875e-05, "loss": 2.1606, "step": 311 }, { "epoch": 0.0947032933677341, "grad_norm": 0.5855305194854736, "learning_rate": 9.847119570719855e-05, "loss": 1.4887, "step": 312 }, { "epoch": 0.09500682956442556, "grad_norm": 0.35223227739334106, "learning_rate": 9.846613344132834e-05, "loss": 2.0025, "step": 313 }, { "epoch": 0.09531036576111701, "grad_norm": 0.4013148844242096, "learning_rate": 9.846107117545814e-05, "loss": 1.9702, "step": 314 }, { "epoch": 0.09561390195780847, "grad_norm": 0.5038349032402039, "learning_rate": 9.845600890958793e-05, "loss": 2.1532, "step": 315 }, { "epoch": 0.09591743815449992, "grad_norm": 0.4826093018054962, "learning_rate": 9.845094664371774e-05, "loss": 2.0118, "step": 316 }, { "epoch": 0.09622097435119138, "grad_norm": 0.41135913133621216, "learning_rate": 9.844588437784754e-05, "loss": 2.0707, "step": 317 }, { "epoch": 0.09652451054788283, "grad_norm": 0.4353053569793701, "learning_rate": 9.844082211197733e-05, "loss": 2.104, "step": 318 }, { "epoch": 0.09682804674457429, "grad_norm": 0.4192908704280853, "learning_rate": 9.843575984610712e-05, "loss": 1.9489, "step": 319 }, { "epoch": 0.09713158294126574, "grad_norm": 0.380562424659729, "learning_rate": 9.843069758023692e-05, "loss": 1.3602, "step": 320 }, { "epoch": 0.0974351191379572, "grad_norm": 0.3394995331764221, "learning_rate": 9.842563531436671e-05, "loss": 2.2161, "step": 321 }, { "epoch": 0.09773865533464866, "grad_norm": 0.3419237434864044, "learning_rate": 9.842057304849651e-05, "loss": 1.7146, "step": 322 }, { "epoch": 0.09804219153134011, "grad_norm": 0.3590264618396759, "learning_rate": 9.84155107826263e-05, "loss": 1.8654, "step": 323 }, { "epoch": 0.09834572772803157, "grad_norm": 0.40006300806999207, "learning_rate": 9.84104485167561e-05, "loss": 1.5787, "step": 324 }, { "epoch": 0.09864926392472302, "grad_norm": 0.33313074707984924, "learning_rate": 9.84053862508859e-05, "loss": 1.8653, "step": 325 }, { "epoch": 0.09895280012141448, "grad_norm": 0.39681655168533325, "learning_rate": 9.84003239850157e-05, "loss": 2.178, "step": 326 }, { "epoch": 0.09925633631810593, "grad_norm": 0.41945868730545044, "learning_rate": 9.83952617191455e-05, "loss": 1.8324, "step": 327 }, { "epoch": 0.09955987251479739, "grad_norm": 0.3957304060459137, "learning_rate": 9.839019945327529e-05, "loss": 1.6468, "step": 328 }, { "epoch": 0.09986340871148884, "grad_norm": 0.35814937949180603, "learning_rate": 9.838513718740509e-05, "loss": 1.6492, "step": 329 }, { "epoch": 0.1001669449081803, "grad_norm": 0.38410916924476624, "learning_rate": 9.838007492153488e-05, "loss": 1.7223, "step": 330 }, { "epoch": 0.10047048110487175, "grad_norm": 0.38490885496139526, "learning_rate": 9.837501265566468e-05, "loss": 2.0166, "step": 331 }, { "epoch": 0.10077401730156321, "grad_norm": 0.38943415880203247, "learning_rate": 9.836995038979447e-05, "loss": 1.371, "step": 332 }, { "epoch": 0.10107755349825466, "grad_norm": 0.39741018414497375, "learning_rate": 9.836488812392427e-05, "loss": 1.6233, "step": 333 }, { "epoch": 0.10138108969494612, "grad_norm": 0.4663957357406616, "learning_rate": 9.835982585805406e-05, "loss": 1.746, "step": 334 }, { "epoch": 0.10168462589163758, "grad_norm": 0.37118905782699585, "learning_rate": 9.835476359218387e-05, "loss": 1.9684, "step": 335 }, { "epoch": 0.10198816208832903, "grad_norm": 0.40275588631629944, "learning_rate": 9.834970132631366e-05, "loss": 1.9551, "step": 336 }, { "epoch": 0.1022916982850205, "grad_norm": 0.4336283206939697, "learning_rate": 9.834463906044346e-05, "loss": 2.0711, "step": 337 }, { "epoch": 0.10259523448171194, "grad_norm": 0.35735735297203064, "learning_rate": 9.833957679457325e-05, "loss": 2.1397, "step": 338 }, { "epoch": 0.1028987706784034, "grad_norm": 0.37825390696525574, "learning_rate": 9.833451452870305e-05, "loss": 1.7494, "step": 339 }, { "epoch": 0.10320230687509485, "grad_norm": 0.3384961783885956, "learning_rate": 9.832945226283284e-05, "loss": 2.0197, "step": 340 }, { "epoch": 0.10350584307178631, "grad_norm": 0.46276888251304626, "learning_rate": 9.832438999696264e-05, "loss": 1.797, "step": 341 }, { "epoch": 0.10380937926847776, "grad_norm": 0.3685421347618103, "learning_rate": 9.831932773109243e-05, "loss": 1.9301, "step": 342 }, { "epoch": 0.10411291546516922, "grad_norm": 0.38931936025619507, "learning_rate": 9.831426546522223e-05, "loss": 1.9623, "step": 343 }, { "epoch": 0.10441645166186067, "grad_norm": 0.46678805351257324, "learning_rate": 9.830920319935204e-05, "loss": 1.6708, "step": 344 }, { "epoch": 0.10471998785855213, "grad_norm": 0.4199204444885254, "learning_rate": 9.830414093348183e-05, "loss": 1.8014, "step": 345 }, { "epoch": 0.1050235240552436, "grad_norm": 0.41024506092071533, "learning_rate": 9.829907866761164e-05, "loss": 1.8829, "step": 346 }, { "epoch": 0.10532706025193504, "grad_norm": 0.5271286368370056, "learning_rate": 9.829401640174143e-05, "loss": 1.7796, "step": 347 }, { "epoch": 0.1056305964486265, "grad_norm": 0.3593878448009491, "learning_rate": 9.828895413587123e-05, "loss": 2.0697, "step": 348 }, { "epoch": 0.10593413264531795, "grad_norm": 0.44404372572898865, "learning_rate": 9.828389187000102e-05, "loss": 2.3235, "step": 349 }, { "epoch": 0.10623766884200941, "grad_norm": 0.4072231650352478, "learning_rate": 9.827882960413082e-05, "loss": 1.5391, "step": 350 }, { "epoch": 0.10654120503870086, "grad_norm": 0.3924303352832794, "learning_rate": 9.827376733826061e-05, "loss": 2.0649, "step": 351 }, { "epoch": 0.10684474123539232, "grad_norm": 0.3815264105796814, "learning_rate": 9.826870507239041e-05, "loss": 1.5821, "step": 352 }, { "epoch": 0.10714827743208377, "grad_norm": 0.40832409262657166, "learning_rate": 9.82636428065202e-05, "loss": 2.1135, "step": 353 }, { "epoch": 0.10745181362877523, "grad_norm": 0.40270155668258667, "learning_rate": 9.825858054065e-05, "loss": 1.6561, "step": 354 }, { "epoch": 0.10775534982546668, "grad_norm": 0.38295283913612366, "learning_rate": 9.82535182747798e-05, "loss": 1.8938, "step": 355 }, { "epoch": 0.10805888602215814, "grad_norm": 0.41975417733192444, "learning_rate": 9.82484560089096e-05, "loss": 1.8605, "step": 356 }, { "epoch": 0.10836242221884959, "grad_norm": 0.41388946771621704, "learning_rate": 9.82433937430394e-05, "loss": 1.812, "step": 357 }, { "epoch": 0.10866595841554105, "grad_norm": 0.3470607101917267, "learning_rate": 9.823833147716919e-05, "loss": 2.1914, "step": 358 }, { "epoch": 0.10896949461223251, "grad_norm": 0.4417155385017395, "learning_rate": 9.823326921129898e-05, "loss": 1.7644, "step": 359 }, { "epoch": 0.10927303080892396, "grad_norm": 0.33910539746284485, "learning_rate": 9.822820694542878e-05, "loss": 1.8821, "step": 360 }, { "epoch": 0.10957656700561542, "grad_norm": 0.36742356419563293, "learning_rate": 9.822314467955857e-05, "loss": 1.9684, "step": 361 }, { "epoch": 0.10988010320230687, "grad_norm": 0.407844603061676, "learning_rate": 9.821808241368837e-05, "loss": 1.8797, "step": 362 }, { "epoch": 0.11018363939899833, "grad_norm": 0.4090898036956787, "learning_rate": 9.821302014781816e-05, "loss": 1.8401, "step": 363 }, { "epoch": 0.11048717559568978, "grad_norm": 0.3852720260620117, "learning_rate": 9.820795788194796e-05, "loss": 1.6887, "step": 364 }, { "epoch": 0.11079071179238124, "grad_norm": 0.4147186875343323, "learning_rate": 9.820289561607777e-05, "loss": 1.7263, "step": 365 }, { "epoch": 0.11109424798907269, "grad_norm": 0.7032086849212646, "learning_rate": 9.819783335020756e-05, "loss": 1.5382, "step": 366 }, { "epoch": 0.11139778418576415, "grad_norm": 0.3547534644603729, "learning_rate": 9.819277108433736e-05, "loss": 1.5988, "step": 367 }, { "epoch": 0.1117013203824556, "grad_norm": 0.45878785848617554, "learning_rate": 9.818770881846715e-05, "loss": 2.2467, "step": 368 }, { "epoch": 0.11200485657914706, "grad_norm": 0.39183077216148376, "learning_rate": 9.818264655259695e-05, "loss": 1.848, "step": 369 }, { "epoch": 0.11230839277583853, "grad_norm": 0.3735283315181732, "learning_rate": 9.817758428672674e-05, "loss": 1.6925, "step": 370 }, { "epoch": 0.11261192897252997, "grad_norm": 0.3878265917301178, "learning_rate": 9.817252202085654e-05, "loss": 2.04, "step": 371 }, { "epoch": 0.11291546516922144, "grad_norm": 0.38978812098503113, "learning_rate": 9.816745975498633e-05, "loss": 1.869, "step": 372 }, { "epoch": 0.11321900136591288, "grad_norm": 0.39212337136268616, "learning_rate": 9.816239748911613e-05, "loss": 2.0549, "step": 373 }, { "epoch": 0.11352253756260434, "grad_norm": 0.39528506994247437, "learning_rate": 9.815733522324593e-05, "loss": 1.5653, "step": 374 }, { "epoch": 0.11382607375929579, "grad_norm": 0.4226018786430359, "learning_rate": 9.815227295737573e-05, "loss": 1.6231, "step": 375 }, { "epoch": 0.11412960995598725, "grad_norm": 0.3577810823917389, "learning_rate": 9.814721069150552e-05, "loss": 1.9599, "step": 376 }, { "epoch": 0.1144331461526787, "grad_norm": 0.33580708503723145, "learning_rate": 9.814214842563532e-05, "loss": 2.0419, "step": 377 }, { "epoch": 0.11473668234937016, "grad_norm": 0.38860392570495605, "learning_rate": 9.813708615976511e-05, "loss": 1.7186, "step": 378 }, { "epoch": 0.11504021854606161, "grad_norm": 0.38994479179382324, "learning_rate": 9.813202389389491e-05, "loss": 2.1848, "step": 379 }, { "epoch": 0.11534375474275307, "grad_norm": 0.3947262763977051, "learning_rate": 9.81269616280247e-05, "loss": 2.1868, "step": 380 }, { "epoch": 0.11564729093944452, "grad_norm": 0.3112877607345581, "learning_rate": 9.81218993621545e-05, "loss": 1.8604, "step": 381 }, { "epoch": 0.11595082713613598, "grad_norm": 0.375689834356308, "learning_rate": 9.811683709628429e-05, "loss": 2.0418, "step": 382 }, { "epoch": 0.11625436333282745, "grad_norm": 0.34537243843078613, "learning_rate": 9.81117748304141e-05, "loss": 1.8874, "step": 383 }, { "epoch": 0.1165578995295189, "grad_norm": 0.5077370405197144, "learning_rate": 9.81067125645439e-05, "loss": 1.7497, "step": 384 }, { "epoch": 0.11686143572621036, "grad_norm": 0.3703441023826599, "learning_rate": 9.810165029867369e-05, "loss": 1.781, "step": 385 }, { "epoch": 0.1171649719229018, "grad_norm": 0.4386610984802246, "learning_rate": 9.809658803280348e-05, "loss": 1.8428, "step": 386 }, { "epoch": 0.11746850811959327, "grad_norm": 0.37781745195388794, "learning_rate": 9.809152576693328e-05, "loss": 2.0384, "step": 387 }, { "epoch": 0.11777204431628471, "grad_norm": 0.38956716656684875, "learning_rate": 9.808646350106307e-05, "loss": 2.3534, "step": 388 }, { "epoch": 0.11807558051297617, "grad_norm": 0.3444838523864746, "learning_rate": 9.808140123519288e-05, "loss": 1.921, "step": 389 }, { "epoch": 0.11837911670966762, "grad_norm": 0.39881742000579834, "learning_rate": 9.807633896932268e-05, "loss": 2.1758, "step": 390 }, { "epoch": 0.11868265290635908, "grad_norm": 0.384226530790329, "learning_rate": 9.807127670345247e-05, "loss": 1.7651, "step": 391 }, { "epoch": 0.11898618910305053, "grad_norm": 0.36255109310150146, "learning_rate": 9.806621443758227e-05, "loss": 1.8122, "step": 392 }, { "epoch": 0.119289725299742, "grad_norm": 0.3627421259880066, "learning_rate": 9.806115217171206e-05, "loss": 1.6304, "step": 393 }, { "epoch": 0.11959326149643346, "grad_norm": 0.8936781883239746, "learning_rate": 9.805608990584187e-05, "loss": 1.8827, "step": 394 }, { "epoch": 0.1198967976931249, "grad_norm": 0.5008642673492432, "learning_rate": 9.805102763997166e-05, "loss": 1.3597, "step": 395 }, { "epoch": 0.12020033388981637, "grad_norm": 0.4444289207458496, "learning_rate": 9.804596537410146e-05, "loss": 2.1768, "step": 396 }, { "epoch": 0.12050387008650781, "grad_norm": 0.3963356912136078, "learning_rate": 9.804090310823125e-05, "loss": 1.8373, "step": 397 }, { "epoch": 0.12080740628319928, "grad_norm": 0.44095271825790405, "learning_rate": 9.803584084236105e-05, "loss": 1.7893, "step": 398 }, { "epoch": 0.12111094247989072, "grad_norm": 0.4162418246269226, "learning_rate": 9.803077857649084e-05, "loss": 1.7482, "step": 399 }, { "epoch": 0.12141447867658219, "grad_norm": 0.3853035271167755, "learning_rate": 9.802571631062064e-05, "loss": 1.6274, "step": 400 }, { "epoch": 0.12171801487327363, "grad_norm": 1.1697463989257812, "learning_rate": 9.802065404475043e-05, "loss": 2.2254, "step": 401 }, { "epoch": 0.1220215510699651, "grad_norm": 0.3899803161621094, "learning_rate": 9.801559177888023e-05, "loss": 1.9754, "step": 402 }, { "epoch": 0.12232508726665654, "grad_norm": 0.43946412205696106, "learning_rate": 9.801052951301002e-05, "loss": 2.1184, "step": 403 }, { "epoch": 0.122628623463348, "grad_norm": 0.46882718801498413, "learning_rate": 9.800546724713983e-05, "loss": 1.4423, "step": 404 }, { "epoch": 0.12293215966003945, "grad_norm": 0.4379485547542572, "learning_rate": 9.800040498126963e-05, "loss": 2.0614, "step": 405 }, { "epoch": 0.12323569585673091, "grad_norm": 0.3837740123271942, "learning_rate": 9.799534271539942e-05, "loss": 1.9974, "step": 406 }, { "epoch": 0.12353923205342238, "grad_norm": 0.35403695702552795, "learning_rate": 9.799028044952922e-05, "loss": 1.5693, "step": 407 }, { "epoch": 0.12384276825011382, "grad_norm": 0.4070426821708679, "learning_rate": 9.798521818365901e-05, "loss": 1.8704, "step": 408 }, { "epoch": 0.12414630444680529, "grad_norm": 0.4301077425479889, "learning_rate": 9.79801559177888e-05, "loss": 1.077, "step": 409 }, { "epoch": 0.12444984064349673, "grad_norm": 0.37687429785728455, "learning_rate": 9.79750936519186e-05, "loss": 1.7323, "step": 410 }, { "epoch": 0.1247533768401882, "grad_norm": 0.37393873929977417, "learning_rate": 9.79700313860484e-05, "loss": 1.9532, "step": 411 }, { "epoch": 0.12505691303687966, "grad_norm": 0.4518846869468689, "learning_rate": 9.796496912017819e-05, "loss": 2.0123, "step": 412 }, { "epoch": 0.1253604492335711, "grad_norm": 0.39417609572410583, "learning_rate": 9.7959906854308e-05, "loss": 2.2669, "step": 413 }, { "epoch": 0.12566398543026255, "grad_norm": 0.3802976608276367, "learning_rate": 9.795484458843779e-05, "loss": 2.0506, "step": 414 }, { "epoch": 0.12596752162695402, "grad_norm": 1.3118431568145752, "learning_rate": 9.794978232256759e-05, "loss": 2.2551, "step": 415 }, { "epoch": 0.12627105782364548, "grad_norm": 0.9459638595581055, "learning_rate": 9.794472005669738e-05, "loss": 1.7829, "step": 416 }, { "epoch": 0.1265745940203369, "grad_norm": 0.571232795715332, "learning_rate": 9.793965779082718e-05, "loss": 1.7768, "step": 417 }, { "epoch": 0.12687813021702837, "grad_norm": 0.3973385989665985, "learning_rate": 9.793459552495697e-05, "loss": 1.88, "step": 418 }, { "epoch": 0.12718166641371983, "grad_norm": 0.3883122503757477, "learning_rate": 9.792953325908677e-05, "loss": 1.9592, "step": 419 }, { "epoch": 0.1274852026104113, "grad_norm": 0.40379586815834045, "learning_rate": 9.792447099321656e-05, "loss": 1.9697, "step": 420 }, { "epoch": 0.12778873880710276, "grad_norm": 0.3288556635379791, "learning_rate": 9.791940872734636e-05, "loss": 1.7282, "step": 421 }, { "epoch": 0.1280922750037942, "grad_norm": 0.3872746527194977, "learning_rate": 9.791434646147616e-05, "loss": 1.9348, "step": 422 }, { "epoch": 0.12839581120048565, "grad_norm": 0.37058207392692566, "learning_rate": 9.790928419560596e-05, "loss": 1.5684, "step": 423 }, { "epoch": 0.12869934739717712, "grad_norm": 0.37466561794281006, "learning_rate": 9.790422192973575e-05, "loss": 1.9535, "step": 424 }, { "epoch": 0.12900288359386858, "grad_norm": 0.32176846265792847, "learning_rate": 9.789915966386555e-05, "loss": 1.8537, "step": 425 }, { "epoch": 0.12930641979056, "grad_norm": 0.37653467059135437, "learning_rate": 9.789409739799534e-05, "loss": 2.0701, "step": 426 }, { "epoch": 0.12960995598725147, "grad_norm": 0.38768434524536133, "learning_rate": 9.788903513212514e-05, "loss": 1.731, "step": 427 }, { "epoch": 0.12991349218394294, "grad_norm": 0.5139635801315308, "learning_rate": 9.788397286625493e-05, "loss": 2.4437, "step": 428 }, { "epoch": 0.1302170283806344, "grad_norm": 0.3759630024433136, "learning_rate": 9.787891060038473e-05, "loss": 2.0918, "step": 429 }, { "epoch": 0.13052056457732586, "grad_norm": 0.3718818426132202, "learning_rate": 9.787384833451452e-05, "loss": 1.5854, "step": 430 }, { "epoch": 0.1308241007740173, "grad_norm": 0.6460405588150024, "learning_rate": 9.786878606864432e-05, "loss": 2.2442, "step": 431 }, { "epoch": 0.13112763697070876, "grad_norm": 0.40393388271331787, "learning_rate": 9.786372380277413e-05, "loss": 1.728, "step": 432 }, { "epoch": 0.13143117316740022, "grad_norm": 0.3772658407688141, "learning_rate": 9.785866153690393e-05, "loss": 1.668, "step": 433 }, { "epoch": 0.13173470936409168, "grad_norm": 2.5252649784088135, "learning_rate": 9.785359927103373e-05, "loss": 1.8864, "step": 434 }, { "epoch": 0.1320382455607831, "grad_norm": 0.42327219247817993, "learning_rate": 9.784853700516352e-05, "loss": 2.3174, "step": 435 }, { "epoch": 0.13234178175747457, "grad_norm": 0.3689473867416382, "learning_rate": 9.784347473929332e-05, "loss": 1.9671, "step": 436 }, { "epoch": 0.13264531795416604, "grad_norm": 0.37554243206977844, "learning_rate": 9.783841247342311e-05, "loss": 1.783, "step": 437 }, { "epoch": 0.1329488541508575, "grad_norm": 0.409587025642395, "learning_rate": 9.783335020755291e-05, "loss": 2.0385, "step": 438 }, { "epoch": 0.13325239034754893, "grad_norm": 0.349252849817276, "learning_rate": 9.78282879416827e-05, "loss": 1.8785, "step": 439 }, { "epoch": 0.1335559265442404, "grad_norm": 0.36687588691711426, "learning_rate": 9.78232256758125e-05, "loss": 2.1174, "step": 440 }, { "epoch": 0.13385946274093186, "grad_norm": 0.40221846103668213, "learning_rate": 9.781816340994229e-05, "loss": 1.8385, "step": 441 }, { "epoch": 0.13416299893762332, "grad_norm": 0.5634617805480957, "learning_rate": 9.781310114407209e-05, "loss": 1.9316, "step": 442 }, { "epoch": 0.13446653513431478, "grad_norm": 0.37704020738601685, "learning_rate": 9.78080388782019e-05, "loss": 1.8865, "step": 443 }, { "epoch": 0.1347700713310062, "grad_norm": 0.36043843626976013, "learning_rate": 9.780297661233169e-05, "loss": 1.585, "step": 444 }, { "epoch": 0.13507360752769768, "grad_norm": 0.33643844723701477, "learning_rate": 9.779791434646149e-05, "loss": 1.8098, "step": 445 }, { "epoch": 0.13537714372438914, "grad_norm": 0.6782101988792419, "learning_rate": 9.779285208059128e-05, "loss": 2.0468, "step": 446 }, { "epoch": 0.1356806799210806, "grad_norm": 0.38101980090141296, "learning_rate": 9.778778981472108e-05, "loss": 2.0624, "step": 447 }, { "epoch": 0.13598421611777203, "grad_norm": 0.399311900138855, "learning_rate": 9.778272754885087e-05, "loss": 2.1652, "step": 448 }, { "epoch": 0.1362877523144635, "grad_norm": 0.3491426706314087, "learning_rate": 9.777766528298066e-05, "loss": 1.9092, "step": 449 }, { "epoch": 0.13659128851115496, "grad_norm": 0.3654717803001404, "learning_rate": 9.777260301711046e-05, "loss": 1.9773, "step": 450 }, { "epoch": 0.13689482470784642, "grad_norm": 0.394699364900589, "learning_rate": 9.776754075124025e-05, "loss": 2.1568, "step": 451 }, { "epoch": 0.13719836090453785, "grad_norm": 0.3601212203502655, "learning_rate": 9.776247848537006e-05, "loss": 1.8744, "step": 452 }, { "epoch": 0.13750189710122931, "grad_norm": 0.40716952085494995, "learning_rate": 9.775741621949986e-05, "loss": 2.1052, "step": 453 }, { "epoch": 0.13780543329792078, "grad_norm": 0.37777504324913025, "learning_rate": 9.775235395362965e-05, "loss": 1.8896, "step": 454 }, { "epoch": 0.13810896949461224, "grad_norm": 0.368600994348526, "learning_rate": 9.774729168775945e-05, "loss": 1.8285, "step": 455 }, { "epoch": 0.1384125056913037, "grad_norm": 0.41742029786109924, "learning_rate": 9.774222942188924e-05, "loss": 1.8286, "step": 456 }, { "epoch": 0.13871604188799513, "grad_norm": 0.40132156014442444, "learning_rate": 9.773716715601904e-05, "loss": 1.9515, "step": 457 }, { "epoch": 0.1390195780846866, "grad_norm": 0.44473376870155334, "learning_rate": 9.773210489014883e-05, "loss": 1.8715, "step": 458 }, { "epoch": 0.13932311428137806, "grad_norm": 0.40146371722221375, "learning_rate": 9.772704262427863e-05, "loss": 2.1469, "step": 459 }, { "epoch": 0.13962665047806952, "grad_norm": 0.3863317370414734, "learning_rate": 9.772198035840842e-05, "loss": 1.9215, "step": 460 }, { "epoch": 0.13993018667476095, "grad_norm": 0.40235334634780884, "learning_rate": 9.771691809253823e-05, "loss": 2.1276, "step": 461 }, { "epoch": 0.14023372287145242, "grad_norm": 0.46011632680892944, "learning_rate": 9.771185582666802e-05, "loss": 1.244, "step": 462 }, { "epoch": 0.14053725906814388, "grad_norm": 0.3428272008895874, "learning_rate": 9.770679356079782e-05, "loss": 1.7991, "step": 463 }, { "epoch": 0.14084079526483534, "grad_norm": 0.39976757764816284, "learning_rate": 9.770173129492761e-05, "loss": 1.7166, "step": 464 }, { "epoch": 0.1411443314615268, "grad_norm": 0.3258446753025055, "learning_rate": 9.769666902905741e-05, "loss": 1.677, "step": 465 }, { "epoch": 0.14144786765821823, "grad_norm": 0.3950905501842499, "learning_rate": 9.76916067631872e-05, "loss": 2.0122, "step": 466 }, { "epoch": 0.1417514038549097, "grad_norm": 0.39712047576904297, "learning_rate": 9.7686544497317e-05, "loss": 1.7262, "step": 467 }, { "epoch": 0.14205494005160116, "grad_norm": 0.8331599235534668, "learning_rate": 9.768148223144679e-05, "loss": 1.9852, "step": 468 }, { "epoch": 0.14235847624829262, "grad_norm": 0.3578427731990814, "learning_rate": 9.767641996557659e-05, "loss": 1.8249, "step": 469 }, { "epoch": 0.14266201244498405, "grad_norm": 0.3736058473587036, "learning_rate": 9.767135769970638e-05, "loss": 1.43, "step": 470 }, { "epoch": 0.14296554864167552, "grad_norm": 0.48153185844421387, "learning_rate": 9.766629543383619e-05, "loss": 1.8667, "step": 471 }, { "epoch": 0.14326908483836698, "grad_norm": 0.3924524188041687, "learning_rate": 9.766123316796599e-05, "loss": 2.0385, "step": 472 }, { "epoch": 0.14357262103505844, "grad_norm": 0.38956940174102783, "learning_rate": 9.765617090209578e-05, "loss": 1.3157, "step": 473 }, { "epoch": 0.14387615723174987, "grad_norm": 0.4032903015613556, "learning_rate": 9.765110863622558e-05, "loss": 1.8793, "step": 474 }, { "epoch": 0.14417969342844134, "grad_norm": 0.5116568207740784, "learning_rate": 9.764604637035537e-05, "loss": 1.7658, "step": 475 }, { "epoch": 0.1444832296251328, "grad_norm": 0.3981756269931793, "learning_rate": 9.764098410448517e-05, "loss": 1.8087, "step": 476 }, { "epoch": 0.14478676582182426, "grad_norm": 0.43181854486465454, "learning_rate": 9.763592183861496e-05, "loss": 1.5241, "step": 477 }, { "epoch": 0.14509030201851572, "grad_norm": 0.4172961413860321, "learning_rate": 9.763085957274477e-05, "loss": 1.8318, "step": 478 }, { "epoch": 0.14539383821520716, "grad_norm": 0.4135033190250397, "learning_rate": 9.762579730687456e-05, "loss": 2.0783, "step": 479 }, { "epoch": 0.14569737441189862, "grad_norm": 0.36482739448547363, "learning_rate": 9.762073504100436e-05, "loss": 2.2524, "step": 480 }, { "epoch": 0.14600091060859008, "grad_norm": 0.3704656958580017, "learning_rate": 9.761567277513415e-05, "loss": 2.0369, "step": 481 }, { "epoch": 0.14630444680528154, "grad_norm": 1.588393211364746, "learning_rate": 9.761061050926396e-05, "loss": 1.8041, "step": 482 }, { "epoch": 0.14660798300197297, "grad_norm": 0.3309743404388428, "learning_rate": 9.760554824339376e-05, "loss": 1.8373, "step": 483 }, { "epoch": 0.14691151919866444, "grad_norm": 0.34598830342292786, "learning_rate": 9.760048597752355e-05, "loss": 1.6249, "step": 484 }, { "epoch": 0.1472150553953559, "grad_norm": 0.3433639109134674, "learning_rate": 9.759542371165335e-05, "loss": 1.9454, "step": 485 }, { "epoch": 0.14751859159204736, "grad_norm": 0.3801734149456024, "learning_rate": 9.759036144578314e-05, "loss": 2.1067, "step": 486 }, { "epoch": 0.1478221277887388, "grad_norm": 0.36811041831970215, "learning_rate": 9.758529917991293e-05, "loss": 1.8642, "step": 487 }, { "epoch": 0.14812566398543026, "grad_norm": 0.3999156355857849, "learning_rate": 9.758023691404273e-05, "loss": 2.1482, "step": 488 }, { "epoch": 0.14842920018212172, "grad_norm": 0.7651489973068237, "learning_rate": 9.757517464817252e-05, "loss": 1.8213, "step": 489 }, { "epoch": 0.14873273637881318, "grad_norm": 0.3491712808609009, "learning_rate": 9.757011238230232e-05, "loss": 2.1047, "step": 490 }, { "epoch": 0.14903627257550464, "grad_norm": 1.028256893157959, "learning_rate": 9.756505011643213e-05, "loss": 2.0519, "step": 491 }, { "epoch": 0.14933980877219608, "grad_norm": 0.5957101583480835, "learning_rate": 9.755998785056192e-05, "loss": 2.1236, "step": 492 }, { "epoch": 0.14964334496888754, "grad_norm": 0.40934717655181885, "learning_rate": 9.755492558469172e-05, "loss": 1.5391, "step": 493 }, { "epoch": 0.149946881165579, "grad_norm": 0.4403507709503174, "learning_rate": 9.754986331882151e-05, "loss": 1.8388, "step": 494 }, { "epoch": 0.15025041736227046, "grad_norm": 0.4258563220500946, "learning_rate": 9.754480105295131e-05, "loss": 1.8092, "step": 495 }, { "epoch": 0.1505539535589619, "grad_norm": 0.3594823181629181, "learning_rate": 9.75397387870811e-05, "loss": 1.7195, "step": 496 }, { "epoch": 0.15085748975565336, "grad_norm": 0.30373120307922363, "learning_rate": 9.75346765212109e-05, "loss": 1.9267, "step": 497 }, { "epoch": 0.15116102595234482, "grad_norm": 0.423096626996994, "learning_rate": 9.752961425534069e-05, "loss": 2.1559, "step": 498 }, { "epoch": 0.15146456214903628, "grad_norm": 0.36935552954673767, "learning_rate": 9.752455198947049e-05, "loss": 2.0357, "step": 499 }, { "epoch": 0.15176809834572771, "grad_norm": 0.7172725200653076, "learning_rate": 9.75194897236003e-05, "loss": 2.0973, "step": 500 }, { "epoch": 0.15207163454241918, "grad_norm": 0.36897605657577515, "learning_rate": 9.751442745773009e-05, "loss": 2.1672, "step": 501 }, { "epoch": 0.15237517073911064, "grad_norm": 0.35079488158226013, "learning_rate": 9.750936519185988e-05, "loss": 2.0808, "step": 502 }, { "epoch": 0.1526787069358021, "grad_norm": 0.37833186984062195, "learning_rate": 9.750430292598968e-05, "loss": 1.8393, "step": 503 }, { "epoch": 0.15298224313249356, "grad_norm": 0.3969264328479767, "learning_rate": 9.749924066011947e-05, "loss": 2.1213, "step": 504 }, { "epoch": 0.153285779329185, "grad_norm": 0.30432841181755066, "learning_rate": 9.749417839424927e-05, "loss": 1.6397, "step": 505 }, { "epoch": 0.15358931552587646, "grad_norm": 0.30847886204719543, "learning_rate": 9.748911612837906e-05, "loss": 1.6455, "step": 506 }, { "epoch": 0.15389285172256792, "grad_norm": 0.38480496406555176, "learning_rate": 9.748405386250886e-05, "loss": 1.803, "step": 507 }, { "epoch": 0.15419638791925938, "grad_norm": 0.48439183831214905, "learning_rate": 9.747899159663865e-05, "loss": 1.6892, "step": 508 }, { "epoch": 0.15449992411595082, "grad_norm": 0.5124354362487793, "learning_rate": 9.747392933076845e-05, "loss": 2.24, "step": 509 }, { "epoch": 0.15480346031264228, "grad_norm": 0.4051717221736908, "learning_rate": 9.746886706489826e-05, "loss": 1.8621, "step": 510 }, { "epoch": 0.15510699650933374, "grad_norm": 0.6452261209487915, "learning_rate": 9.746380479902805e-05, "loss": 1.7043, "step": 511 }, { "epoch": 0.1554105327060252, "grad_norm": 0.5453522801399231, "learning_rate": 9.745874253315785e-05, "loss": 1.7325, "step": 512 }, { "epoch": 0.15571406890271666, "grad_norm": 1.0983595848083496, "learning_rate": 9.745368026728764e-05, "loss": 2.169, "step": 513 }, { "epoch": 0.1560176050994081, "grad_norm": 0.3821035623550415, "learning_rate": 9.744861800141744e-05, "loss": 2.3305, "step": 514 }, { "epoch": 0.15632114129609956, "grad_norm": 0.3694508969783783, "learning_rate": 9.744355573554723e-05, "loss": 1.8453, "step": 515 }, { "epoch": 0.15662467749279102, "grad_norm": 0.3837510943412781, "learning_rate": 9.743849346967702e-05, "loss": 1.9679, "step": 516 }, { "epoch": 0.15692821368948248, "grad_norm": 0.41427966952323914, "learning_rate": 9.743343120380682e-05, "loss": 1.9331, "step": 517 }, { "epoch": 0.15723174988617392, "grad_norm": 0.34252259135246277, "learning_rate": 9.742836893793661e-05, "loss": 1.7938, "step": 518 }, { "epoch": 0.15753528608286538, "grad_norm": 0.4043283462524414, "learning_rate": 9.742330667206642e-05, "loss": 1.4037, "step": 519 }, { "epoch": 0.15783882227955684, "grad_norm": 0.4225389361381531, "learning_rate": 9.741824440619622e-05, "loss": 1.6224, "step": 520 }, { "epoch": 0.1581423584762483, "grad_norm": 0.377590537071228, "learning_rate": 9.741318214032601e-05, "loss": 2.0567, "step": 521 }, { "epoch": 0.15844589467293974, "grad_norm": 0.46170124411582947, "learning_rate": 9.740811987445582e-05, "loss": 2.0449, "step": 522 }, { "epoch": 0.1587494308696312, "grad_norm": 0.3752427399158478, "learning_rate": 9.740305760858562e-05, "loss": 1.8207, "step": 523 }, { "epoch": 0.15905296706632266, "grad_norm": 0.390803724527359, "learning_rate": 9.739799534271541e-05, "loss": 2.0781, "step": 524 }, { "epoch": 0.15935650326301412, "grad_norm": 0.38587453961372375, "learning_rate": 9.73929330768452e-05, "loss": 1.9932, "step": 525 }, { "epoch": 0.15966003945970558, "grad_norm": 0.4154350459575653, "learning_rate": 9.7387870810975e-05, "loss": 1.7649, "step": 526 }, { "epoch": 0.15996357565639702, "grad_norm": 0.3698589503765106, "learning_rate": 9.73828085451048e-05, "loss": 1.6921, "step": 527 }, { "epoch": 0.16026711185308848, "grad_norm": 0.4110312759876251, "learning_rate": 9.737774627923459e-05, "loss": 1.1834, "step": 528 }, { "epoch": 0.16057064804977994, "grad_norm": 0.4140758812427521, "learning_rate": 9.737268401336438e-05, "loss": 1.8354, "step": 529 }, { "epoch": 0.1608741842464714, "grad_norm": 0.38738423585891724, "learning_rate": 9.736762174749419e-05, "loss": 1.9223, "step": 530 }, { "epoch": 0.16117772044316284, "grad_norm": 0.4055260717868805, "learning_rate": 9.736255948162399e-05, "loss": 1.7802, "step": 531 }, { "epoch": 0.1614812566398543, "grad_norm": 0.44946524500846863, "learning_rate": 9.735749721575378e-05, "loss": 1.8654, "step": 532 }, { "epoch": 0.16178479283654576, "grad_norm": 0.43206432461738586, "learning_rate": 9.735243494988358e-05, "loss": 1.7607, "step": 533 }, { "epoch": 0.16208832903323722, "grad_norm": 0.5007991194725037, "learning_rate": 9.734737268401337e-05, "loss": 1.9378, "step": 534 }, { "epoch": 0.16239186522992866, "grad_norm": 0.48757919669151306, "learning_rate": 9.734231041814317e-05, "loss": 2.1829, "step": 535 }, { "epoch": 0.16269540142662012, "grad_norm": 0.4159701466560364, "learning_rate": 9.733724815227296e-05, "loss": 1.8847, "step": 536 }, { "epoch": 0.16299893762331158, "grad_norm": 0.40922749042510986, "learning_rate": 9.733218588640276e-05, "loss": 1.4376, "step": 537 }, { "epoch": 0.16330247382000304, "grad_norm": 0.33677083253860474, "learning_rate": 9.732712362053255e-05, "loss": 1.9568, "step": 538 }, { "epoch": 0.1636060100166945, "grad_norm": 0.3255022168159485, "learning_rate": 9.732206135466236e-05, "loss": 1.9949, "step": 539 }, { "epoch": 0.16390954621338594, "grad_norm": 0.3848338723182678, "learning_rate": 9.731699908879215e-05, "loss": 2.042, "step": 540 }, { "epoch": 0.1642130824100774, "grad_norm": 0.3888263404369354, "learning_rate": 9.731193682292195e-05, "loss": 1.885, "step": 541 }, { "epoch": 0.16451661860676886, "grad_norm": 0.40090805292129517, "learning_rate": 9.730687455705174e-05, "loss": 1.9093, "step": 542 }, { "epoch": 0.16482015480346032, "grad_norm": 0.4106220602989197, "learning_rate": 9.730181229118154e-05, "loss": 1.8392, "step": 543 }, { "epoch": 0.16512369100015176, "grad_norm": 0.3483395278453827, "learning_rate": 9.729675002531133e-05, "loss": 2.0235, "step": 544 }, { "epoch": 0.16542722719684322, "grad_norm": 0.3686208128929138, "learning_rate": 9.729168775944113e-05, "loss": 1.9218, "step": 545 }, { "epoch": 0.16573076339353468, "grad_norm": 0.36063849925994873, "learning_rate": 9.728662549357092e-05, "loss": 1.9334, "step": 546 }, { "epoch": 0.16603429959022614, "grad_norm": 0.39365142583847046, "learning_rate": 9.728156322770072e-05, "loss": 1.9825, "step": 547 }, { "epoch": 0.16633783578691758, "grad_norm": 0.4062787592411041, "learning_rate": 9.727650096183051e-05, "loss": 1.521, "step": 548 }, { "epoch": 0.16664137198360904, "grad_norm": 0.37347134947776794, "learning_rate": 9.727143869596032e-05, "loss": 1.9356, "step": 549 }, { "epoch": 0.1669449081803005, "grad_norm": 0.3538997173309326, "learning_rate": 9.726637643009012e-05, "loss": 1.845, "step": 550 }, { "epoch": 0.16724844437699196, "grad_norm": 0.3868335783481598, "learning_rate": 9.726131416421991e-05, "loss": 1.9803, "step": 551 }, { "epoch": 0.16755198057368342, "grad_norm": 0.34705451130867004, "learning_rate": 9.72562518983497e-05, "loss": 2.0866, "step": 552 }, { "epoch": 0.16785551677037486, "grad_norm": 0.3794872462749481, "learning_rate": 9.72511896324795e-05, "loss": 2.094, "step": 553 }, { "epoch": 0.16815905296706632, "grad_norm": 0.5801231861114502, "learning_rate": 9.72461273666093e-05, "loss": 1.7851, "step": 554 }, { "epoch": 0.16846258916375778, "grad_norm": 0.3076344132423401, "learning_rate": 9.724106510073909e-05, "loss": 1.5188, "step": 555 }, { "epoch": 0.16876612536044924, "grad_norm": 0.3552989363670349, "learning_rate": 9.723600283486888e-05, "loss": 2.1063, "step": 556 }, { "epoch": 0.16906966155714068, "grad_norm": 0.36939847469329834, "learning_rate": 9.723094056899868e-05, "loss": 1.7648, "step": 557 }, { "epoch": 0.16937319775383214, "grad_norm": 0.358634889125824, "learning_rate": 9.722587830312849e-05, "loss": 1.8007, "step": 558 }, { "epoch": 0.1696767339505236, "grad_norm": 0.39962029457092285, "learning_rate": 9.722081603725828e-05, "loss": 1.8845, "step": 559 }, { "epoch": 0.16998027014721506, "grad_norm": 0.4099076986312866, "learning_rate": 9.721575377138808e-05, "loss": 1.8894, "step": 560 }, { "epoch": 0.17028380634390652, "grad_norm": 0.3610551655292511, "learning_rate": 9.721069150551787e-05, "loss": 1.8089, "step": 561 }, { "epoch": 0.17058734254059796, "grad_norm": 0.5951200723648071, "learning_rate": 9.720562923964767e-05, "loss": 1.6966, "step": 562 }, { "epoch": 0.17089087873728942, "grad_norm": 0.562522292137146, "learning_rate": 9.720056697377746e-05, "loss": 1.7704, "step": 563 }, { "epoch": 0.17119441493398088, "grad_norm": 0.6662526726722717, "learning_rate": 9.719550470790726e-05, "loss": 1.7714, "step": 564 }, { "epoch": 0.17149795113067234, "grad_norm": 0.44034865498542786, "learning_rate": 9.719044244203705e-05, "loss": 2.1042, "step": 565 }, { "epoch": 0.17180148732736378, "grad_norm": 0.39868202805519104, "learning_rate": 9.718538017616685e-05, "loss": 1.952, "step": 566 }, { "epoch": 0.17210502352405524, "grad_norm": 0.3427380621433258, "learning_rate": 9.718031791029665e-05, "loss": 2.037, "step": 567 }, { "epoch": 0.1724085597207467, "grad_norm": 0.37980929017066956, "learning_rate": 9.717525564442645e-05, "loss": 1.5378, "step": 568 }, { "epoch": 0.17271209591743816, "grad_norm": 0.32314518094062805, "learning_rate": 9.717019337855626e-05, "loss": 1.6191, "step": 569 }, { "epoch": 0.1730156321141296, "grad_norm": 0.40600740909576416, "learning_rate": 9.716513111268605e-05, "loss": 1.6055, "step": 570 }, { "epoch": 0.17331916831082106, "grad_norm": 0.37318041920661926, "learning_rate": 9.716006884681585e-05, "loss": 1.8666, "step": 571 }, { "epoch": 0.17362270450751252, "grad_norm": 0.3656068444252014, "learning_rate": 9.715500658094564e-05, "loss": 1.5983, "step": 572 }, { "epoch": 0.17392624070420398, "grad_norm": 0.3546827733516693, "learning_rate": 9.714994431507544e-05, "loss": 2.2088, "step": 573 }, { "epoch": 0.17422977690089544, "grad_norm": 0.4293152689933777, "learning_rate": 9.714488204920523e-05, "loss": 1.803, "step": 574 }, { "epoch": 0.17453331309758688, "grad_norm": 0.3790314495563507, "learning_rate": 9.713981978333503e-05, "loss": 1.9874, "step": 575 }, { "epoch": 0.17483684929427834, "grad_norm": 0.37619829177856445, "learning_rate": 9.713475751746482e-05, "loss": 1.9061, "step": 576 }, { "epoch": 0.1751403854909698, "grad_norm": 0.36988991498947144, "learning_rate": 9.712969525159462e-05, "loss": 1.5463, "step": 577 }, { "epoch": 0.17544392168766126, "grad_norm": 0.367721825838089, "learning_rate": 9.712463298572442e-05, "loss": 1.6526, "step": 578 }, { "epoch": 0.1757474578843527, "grad_norm": 0.39620110392570496, "learning_rate": 9.711957071985422e-05, "loss": 2.056, "step": 579 }, { "epoch": 0.17605099408104416, "grad_norm": 0.41518276929855347, "learning_rate": 9.711450845398401e-05, "loss": 1.6847, "step": 580 }, { "epoch": 0.17635453027773562, "grad_norm": 0.3925170302391052, "learning_rate": 9.710944618811381e-05, "loss": 1.8476, "step": 581 }, { "epoch": 0.17665806647442708, "grad_norm": 0.36658090353012085, "learning_rate": 9.71043839222436e-05, "loss": 2.0699, "step": 582 }, { "epoch": 0.17696160267111852, "grad_norm": 0.3741433620452881, "learning_rate": 9.70993216563734e-05, "loss": 1.9645, "step": 583 }, { "epoch": 0.17726513886780998, "grad_norm": 0.3742316663265228, "learning_rate": 9.709425939050319e-05, "loss": 2.3717, "step": 584 }, { "epoch": 0.17756867506450144, "grad_norm": 0.3796440660953522, "learning_rate": 9.708919712463299e-05, "loss": 1.9356, "step": 585 }, { "epoch": 0.1778722112611929, "grad_norm": 0.3976511061191559, "learning_rate": 9.708413485876278e-05, "loss": 2.1889, "step": 586 }, { "epoch": 0.17817574745788436, "grad_norm": 0.34445542097091675, "learning_rate": 9.707907259289258e-05, "loss": 1.6535, "step": 587 }, { "epoch": 0.1784792836545758, "grad_norm": 0.3982098698616028, "learning_rate": 9.707401032702239e-05, "loss": 2.0542, "step": 588 }, { "epoch": 0.17878281985126726, "grad_norm": 0.42155295610427856, "learning_rate": 9.706894806115218e-05, "loss": 1.4605, "step": 589 }, { "epoch": 0.17908635604795872, "grad_norm": 0.36341744661331177, "learning_rate": 9.706388579528197e-05, "loss": 1.8069, "step": 590 }, { "epoch": 0.17938989224465018, "grad_norm": 0.3715178668498993, "learning_rate": 9.705882352941177e-05, "loss": 1.5512, "step": 591 }, { "epoch": 0.17969342844134162, "grad_norm": 0.376767635345459, "learning_rate": 9.705376126354156e-05, "loss": 1.6027, "step": 592 }, { "epoch": 0.17999696463803308, "grad_norm": 0.4033347964286804, "learning_rate": 9.704869899767136e-05, "loss": 1.5071, "step": 593 }, { "epoch": 0.18030050083472454, "grad_norm": 0.8200478553771973, "learning_rate": 9.704363673180115e-05, "loss": 1.924, "step": 594 }, { "epoch": 0.180604037031416, "grad_norm": 0.6224507093429565, "learning_rate": 9.703857446593095e-05, "loss": 1.9684, "step": 595 }, { "epoch": 0.18090757322810747, "grad_norm": 0.32032859325408936, "learning_rate": 9.703351220006074e-05, "loss": 1.9478, "step": 596 }, { "epoch": 0.1812111094247989, "grad_norm": 0.33331337571144104, "learning_rate": 9.702844993419055e-05, "loss": 1.8177, "step": 597 }, { "epoch": 0.18151464562149036, "grad_norm": 0.47399207949638367, "learning_rate": 9.702338766832035e-05, "loss": 2.07, "step": 598 }, { "epoch": 0.18181818181818182, "grad_norm": 0.30480411648750305, "learning_rate": 9.701832540245014e-05, "loss": 2.0407, "step": 599 }, { "epoch": 0.18212171801487329, "grad_norm": 0.40148988366127014, "learning_rate": 9.701326313657994e-05, "loss": 1.8774, "step": 600 }, { "epoch": 0.18242525421156472, "grad_norm": 0.3958423137664795, "learning_rate": 9.700820087070973e-05, "loss": 1.8462, "step": 601 }, { "epoch": 0.18272879040825618, "grad_norm": 0.34824639558792114, "learning_rate": 9.700313860483953e-05, "loss": 1.7839, "step": 602 }, { "epoch": 0.18303232660494764, "grad_norm": 0.38002872467041016, "learning_rate": 9.699807633896932e-05, "loss": 2.3237, "step": 603 }, { "epoch": 0.1833358628016391, "grad_norm": 0.37800419330596924, "learning_rate": 9.699301407309912e-05, "loss": 1.9375, "step": 604 }, { "epoch": 0.18363939899833054, "grad_norm": 0.4041115939617157, "learning_rate": 9.698795180722891e-05, "loss": 2.029, "step": 605 }, { "epoch": 0.183942935195022, "grad_norm": 0.3697315454483032, "learning_rate": 9.698288954135872e-05, "loss": 1.894, "step": 606 }, { "epoch": 0.18424647139171346, "grad_norm": 0.3809906542301178, "learning_rate": 9.697782727548851e-05, "loss": 1.8242, "step": 607 }, { "epoch": 0.18455000758840492, "grad_norm": 0.3997717499732971, "learning_rate": 9.697276500961831e-05, "loss": 2.0522, "step": 608 }, { "epoch": 0.18485354378509639, "grad_norm": 0.391699880361557, "learning_rate": 9.69677027437481e-05, "loss": 1.8521, "step": 609 }, { "epoch": 0.18515707998178782, "grad_norm": 0.3667858839035034, "learning_rate": 9.69626404778779e-05, "loss": 1.7613, "step": 610 }, { "epoch": 0.18546061617847928, "grad_norm": 0.3905411958694458, "learning_rate": 9.69575782120077e-05, "loss": 1.8285, "step": 611 }, { "epoch": 0.18576415237517074, "grad_norm": 0.4121951758861542, "learning_rate": 9.69525159461375e-05, "loss": 1.8104, "step": 612 }, { "epoch": 0.1860676885718622, "grad_norm": 0.34977591037750244, "learning_rate": 9.69474536802673e-05, "loss": 1.7737, "step": 613 }, { "epoch": 0.18637122476855364, "grad_norm": 0.34084367752075195, "learning_rate": 9.694239141439709e-05, "loss": 2.0407, "step": 614 }, { "epoch": 0.1866747609652451, "grad_norm": 0.35442525148391724, "learning_rate": 9.693732914852689e-05, "loss": 1.9152, "step": 615 }, { "epoch": 0.18697829716193656, "grad_norm": 0.34404149651527405, "learning_rate": 9.693226688265668e-05, "loss": 1.7621, "step": 616 }, { "epoch": 0.18728183335862802, "grad_norm": 0.4516477882862091, "learning_rate": 9.692720461678649e-05, "loss": 1.7624, "step": 617 }, { "epoch": 0.18758536955531946, "grad_norm": 0.3506614565849304, "learning_rate": 9.692214235091628e-05, "loss": 1.6627, "step": 618 }, { "epoch": 0.18788890575201092, "grad_norm": 0.9165719151496887, "learning_rate": 9.691708008504608e-05, "loss": 2.1926, "step": 619 }, { "epoch": 0.18819244194870238, "grad_norm": 0.3361871838569641, "learning_rate": 9.691201781917587e-05, "loss": 1.5229, "step": 620 }, { "epoch": 0.18849597814539384, "grad_norm": 0.32639381289482117, "learning_rate": 9.690695555330567e-05, "loss": 1.8778, "step": 621 }, { "epoch": 0.1887995143420853, "grad_norm": 0.44261273741722107, "learning_rate": 9.690189328743546e-05, "loss": 2.0903, "step": 622 }, { "epoch": 0.18910305053877674, "grad_norm": 0.4438890516757965, "learning_rate": 9.689683102156526e-05, "loss": 1.772, "step": 623 }, { "epoch": 0.1894065867354682, "grad_norm": 0.40160682797431946, "learning_rate": 9.689176875569505e-05, "loss": 2.0964, "step": 624 }, { "epoch": 0.18971012293215966, "grad_norm": 0.4022195637226105, "learning_rate": 9.688670648982485e-05, "loss": 1.7818, "step": 625 }, { "epoch": 0.19001365912885113, "grad_norm": 0.4233214855194092, "learning_rate": 9.688164422395464e-05, "loss": 1.922, "step": 626 }, { "epoch": 0.19031719532554256, "grad_norm": 0.3864254057407379, "learning_rate": 9.687658195808445e-05, "loss": 2.0279, "step": 627 }, { "epoch": 0.19062073152223402, "grad_norm": 0.36527585983276367, "learning_rate": 9.687151969221424e-05, "loss": 2.0732, "step": 628 }, { "epoch": 0.19092426771892548, "grad_norm": 0.399237722158432, "learning_rate": 9.686645742634404e-05, "loss": 1.8889, "step": 629 }, { "epoch": 0.19122780391561695, "grad_norm": 0.3860459625720978, "learning_rate": 9.686139516047383e-05, "loss": 1.968, "step": 630 }, { "epoch": 0.19153134011230838, "grad_norm": 0.32555973529815674, "learning_rate": 9.685633289460363e-05, "loss": 2.0722, "step": 631 }, { "epoch": 0.19183487630899984, "grad_norm": 0.6093998551368713, "learning_rate": 9.685127062873342e-05, "loss": 1.8553, "step": 632 }, { "epoch": 0.1921384125056913, "grad_norm": 0.4218057692050934, "learning_rate": 9.684620836286322e-05, "loss": 1.9647, "step": 633 }, { "epoch": 0.19244194870238276, "grad_norm": 0.3779148757457733, "learning_rate": 9.684114609699301e-05, "loss": 2.0681, "step": 634 }, { "epoch": 0.19274548489907423, "grad_norm": 0.3820381760597229, "learning_rate": 9.683608383112281e-05, "loss": 2.0603, "step": 635 }, { "epoch": 0.19304902109576566, "grad_norm": 0.29337063431739807, "learning_rate": 9.683102156525262e-05, "loss": 1.7516, "step": 636 }, { "epoch": 0.19335255729245712, "grad_norm": 0.4369249939918518, "learning_rate": 9.682595929938241e-05, "loss": 1.9822, "step": 637 }, { "epoch": 0.19365609348914858, "grad_norm": 0.3766214847564697, "learning_rate": 9.68208970335122e-05, "loss": 1.7229, "step": 638 }, { "epoch": 0.19395962968584005, "grad_norm": 0.4765011668205261, "learning_rate": 9.6815834767642e-05, "loss": 1.2865, "step": 639 }, { "epoch": 0.19426316588253148, "grad_norm": 0.34236472845077515, "learning_rate": 9.68107725017718e-05, "loss": 2.1024, "step": 640 }, { "epoch": 0.19456670207922294, "grad_norm": 0.398076593875885, "learning_rate": 9.680571023590159e-05, "loss": 1.8628, "step": 641 }, { "epoch": 0.1948702382759144, "grad_norm": 0.357099711894989, "learning_rate": 9.680064797003139e-05, "loss": 2.2163, "step": 642 }, { "epoch": 0.19517377447260587, "grad_norm": 0.3296545445919037, "learning_rate": 9.679558570416118e-05, "loss": 1.8227, "step": 643 }, { "epoch": 0.19547731066929733, "grad_norm": 0.36754927039146423, "learning_rate": 9.679052343829098e-05, "loss": 1.7179, "step": 644 }, { "epoch": 0.19578084686598876, "grad_norm": 0.37275364995002747, "learning_rate": 9.678546117242078e-05, "loss": 1.6782, "step": 645 }, { "epoch": 0.19608438306268022, "grad_norm": 0.3951006531715393, "learning_rate": 9.678039890655058e-05, "loss": 2.0756, "step": 646 }, { "epoch": 0.19638791925937168, "grad_norm": 0.3560970425605774, "learning_rate": 9.677533664068037e-05, "loss": 1.8093, "step": 647 }, { "epoch": 0.19669145545606315, "grad_norm": 0.31553730368614197, "learning_rate": 9.677027437481017e-05, "loss": 1.9174, "step": 648 }, { "epoch": 0.19699499165275458, "grad_norm": 0.39949625730514526, "learning_rate": 9.676521210893996e-05, "loss": 1.6687, "step": 649 }, { "epoch": 0.19729852784944604, "grad_norm": 0.37323635816574097, "learning_rate": 9.676014984306976e-05, "loss": 1.8149, "step": 650 }, { "epoch": 0.1976020640461375, "grad_norm": 0.43527746200561523, "learning_rate": 9.675508757719955e-05, "loss": 1.8744, "step": 651 }, { "epoch": 0.19790560024282897, "grad_norm": 0.39380425214767456, "learning_rate": 9.675002531132935e-05, "loss": 1.9721, "step": 652 }, { "epoch": 0.1982091364395204, "grad_norm": 0.3384545147418976, "learning_rate": 9.674496304545914e-05, "loss": 2.0122, "step": 653 }, { "epoch": 0.19851267263621186, "grad_norm": 0.39647915959358215, "learning_rate": 9.673990077958894e-05, "loss": 2.2419, "step": 654 }, { "epoch": 0.19881620883290332, "grad_norm": 0.3358941674232483, "learning_rate": 9.673483851371875e-05, "loss": 1.8758, "step": 655 }, { "epoch": 0.19911974502959479, "grad_norm": 0.3486049771308899, "learning_rate": 9.672977624784855e-05, "loss": 1.5762, "step": 656 }, { "epoch": 0.19942328122628625, "grad_norm": 2.3050696849823, "learning_rate": 9.672471398197835e-05, "loss": 2.0056, "step": 657 }, { "epoch": 0.19972681742297768, "grad_norm": 0.35023945569992065, "learning_rate": 9.671965171610814e-05, "loss": 1.619, "step": 658 }, { "epoch": 0.20003035361966914, "grad_norm": 0.513656735420227, "learning_rate": 9.671458945023794e-05, "loss": 1.5269, "step": 659 }, { "epoch": 0.2003338898163606, "grad_norm": 0.37498149275779724, "learning_rate": 9.670952718436773e-05, "loss": 1.8553, "step": 660 }, { "epoch": 0.20063742601305207, "grad_norm": 0.4101942479610443, "learning_rate": 9.670446491849753e-05, "loss": 2.1121, "step": 661 }, { "epoch": 0.2009409622097435, "grad_norm": 0.4265679717063904, "learning_rate": 9.669940265262732e-05, "loss": 2.1863, "step": 662 }, { "epoch": 0.20124449840643496, "grad_norm": 4.817168712615967, "learning_rate": 9.669434038675712e-05, "loss": 2.0906, "step": 663 }, { "epoch": 0.20154803460312642, "grad_norm": 7.518252849578857, "learning_rate": 9.668927812088691e-05, "loss": 1.8889, "step": 664 }, { "epoch": 0.2018515707998179, "grad_norm": 0.5480749011039734, "learning_rate": 9.66842158550167e-05, "loss": 1.8439, "step": 665 }, { "epoch": 0.20215510699650932, "grad_norm": 0.3578292429447174, "learning_rate": 9.667915358914651e-05, "loss": 1.8742, "step": 666 }, { "epoch": 0.20245864319320078, "grad_norm": 0.3799275755882263, "learning_rate": 9.667409132327631e-05, "loss": 1.994, "step": 667 }, { "epoch": 0.20276217938989224, "grad_norm": 0.3736335039138794, "learning_rate": 9.66690290574061e-05, "loss": 1.7933, "step": 668 }, { "epoch": 0.2030657155865837, "grad_norm": 0.3145211637020111, "learning_rate": 9.66639667915359e-05, "loss": 1.8193, "step": 669 }, { "epoch": 0.20336925178327517, "grad_norm": 0.4940774142742157, "learning_rate": 9.66589045256657e-05, "loss": 1.9238, "step": 670 }, { "epoch": 0.2036727879799666, "grad_norm": 0.431134968996048, "learning_rate": 9.665384225979549e-05, "loss": 1.5493, "step": 671 }, { "epoch": 0.20397632417665806, "grad_norm": 0.41438859701156616, "learning_rate": 9.664877999392528e-05, "loss": 1.2076, "step": 672 }, { "epoch": 0.20427986037334953, "grad_norm": 0.38191312551498413, "learning_rate": 9.664371772805508e-05, "loss": 1.8201, "step": 673 }, { "epoch": 0.204583396570041, "grad_norm": 0.3938577175140381, "learning_rate": 9.663865546218487e-05, "loss": 1.5166, "step": 674 }, { "epoch": 0.20488693276673242, "grad_norm": 0.46312233805656433, "learning_rate": 9.663359319631468e-05, "loss": 1.4652, "step": 675 }, { "epoch": 0.20519046896342388, "grad_norm": 0.4087234139442444, "learning_rate": 9.662853093044448e-05, "loss": 1.8288, "step": 676 }, { "epoch": 0.20549400516011535, "grad_norm": 0.37329304218292236, "learning_rate": 9.662346866457427e-05, "loss": 1.9084, "step": 677 }, { "epoch": 0.2057975413568068, "grad_norm": 0.37109607458114624, "learning_rate": 9.661840639870407e-05, "loss": 1.9674, "step": 678 }, { "epoch": 0.20610107755349824, "grad_norm": 0.3936561942100525, "learning_rate": 9.661334413283386e-05, "loss": 2.0342, "step": 679 }, { "epoch": 0.2064046137501897, "grad_norm": 0.4621008634567261, "learning_rate": 9.660828186696366e-05, "loss": 1.5157, "step": 680 }, { "epoch": 0.20670814994688116, "grad_norm": 0.3849358558654785, "learning_rate": 9.660321960109345e-05, "loss": 2.1513, "step": 681 }, { "epoch": 0.20701168614357263, "grad_norm": 0.4873330295085907, "learning_rate": 9.659815733522325e-05, "loss": 1.9116, "step": 682 }, { "epoch": 0.2073152223402641, "grad_norm": 0.4687885642051697, "learning_rate": 9.659309506935304e-05, "loss": 2.278, "step": 683 }, { "epoch": 0.20761875853695552, "grad_norm": 0.3966952860355377, "learning_rate": 9.658803280348285e-05, "loss": 1.4625, "step": 684 }, { "epoch": 0.20792229473364698, "grad_norm": 0.5782402157783508, "learning_rate": 9.658297053761264e-05, "loss": 2.2779, "step": 685 }, { "epoch": 0.20822583093033845, "grad_norm": 0.37465688586235046, "learning_rate": 9.657790827174244e-05, "loss": 1.8462, "step": 686 }, { "epoch": 0.2085293671270299, "grad_norm": 0.34408631920814514, "learning_rate": 9.657284600587223e-05, "loss": 1.9881, "step": 687 }, { "epoch": 0.20883290332372134, "grad_norm": 0.6892307996749878, "learning_rate": 9.656778374000203e-05, "loss": 1.9835, "step": 688 }, { "epoch": 0.2091364395204128, "grad_norm": 0.3698042631149292, "learning_rate": 9.656272147413182e-05, "loss": 2.0665, "step": 689 }, { "epoch": 0.20943997571710427, "grad_norm": 0.41265738010406494, "learning_rate": 9.655765920826162e-05, "loss": 2.0231, "step": 690 }, { "epoch": 0.20974351191379573, "grad_norm": 0.38251030445098877, "learning_rate": 9.655259694239141e-05, "loss": 1.7058, "step": 691 }, { "epoch": 0.2100470481104872, "grad_norm": 0.468905508518219, "learning_rate": 9.65475346765212e-05, "loss": 1.6182, "step": 692 }, { "epoch": 0.21035058430717862, "grad_norm": 1.0570484399795532, "learning_rate": 9.6542472410651e-05, "loss": 2.0165, "step": 693 }, { "epoch": 0.21065412050387008, "grad_norm": 0.3978007435798645, "learning_rate": 9.653741014478081e-05, "loss": 1.7859, "step": 694 }, { "epoch": 0.21095765670056155, "grad_norm": 0.42616939544677734, "learning_rate": 9.65323478789106e-05, "loss": 1.5197, "step": 695 }, { "epoch": 0.211261192897253, "grad_norm": 0.39380377531051636, "learning_rate": 9.65272856130404e-05, "loss": 1.3796, "step": 696 }, { "epoch": 0.21156472909394444, "grad_norm": 0.38581010699272156, "learning_rate": 9.65222233471702e-05, "loss": 1.8214, "step": 697 }, { "epoch": 0.2118682652906359, "grad_norm": 0.3610150218009949, "learning_rate": 9.651716108129999e-05, "loss": 1.897, "step": 698 }, { "epoch": 0.21217180148732737, "grad_norm": 0.44913700222969055, "learning_rate": 9.651209881542978e-05, "loss": 1.8873, "step": 699 }, { "epoch": 0.21247533768401883, "grad_norm": 1.9599745273590088, "learning_rate": 9.650703654955959e-05, "loss": 1.946, "step": 700 }, { "epoch": 0.21277887388071026, "grad_norm": 1.195716381072998, "learning_rate": 9.650197428368939e-05, "loss": 1.8749, "step": 701 }, { "epoch": 0.21308241007740172, "grad_norm": 0.3154665231704712, "learning_rate": 9.649691201781918e-05, "loss": 1.5924, "step": 702 }, { "epoch": 0.21338594627409319, "grad_norm": 0.3550672233104706, "learning_rate": 9.649184975194898e-05, "loss": 1.6094, "step": 703 }, { "epoch": 0.21368948247078465, "grad_norm": 0.33744126558303833, "learning_rate": 9.648678748607877e-05, "loss": 1.3399, "step": 704 }, { "epoch": 0.2139930186674761, "grad_norm": 0.33931079506874084, "learning_rate": 9.648172522020858e-05, "loss": 2.0096, "step": 705 }, { "epoch": 0.21429655486416754, "grad_norm": 0.38951364159584045, "learning_rate": 9.647666295433837e-05, "loss": 1.7676, "step": 706 }, { "epoch": 0.214600091060859, "grad_norm": 0.408087819814682, "learning_rate": 9.647160068846817e-05, "loss": 1.7948, "step": 707 }, { "epoch": 0.21490362725755047, "grad_norm": 0.37058812379837036, "learning_rate": 9.646653842259796e-05, "loss": 1.9891, "step": 708 }, { "epoch": 0.21520716345424193, "grad_norm": 0.4003254473209381, "learning_rate": 9.646147615672776e-05, "loss": 1.8895, "step": 709 }, { "epoch": 0.21551069965093336, "grad_norm": 0.38838204741477966, "learning_rate": 9.645641389085755e-05, "loss": 2.0121, "step": 710 }, { "epoch": 0.21581423584762482, "grad_norm": 0.41912707686424255, "learning_rate": 9.645135162498735e-05, "loss": 1.9804, "step": 711 }, { "epoch": 0.2161177720443163, "grad_norm": 0.353454053401947, "learning_rate": 9.644628935911714e-05, "loss": 2.0478, "step": 712 }, { "epoch": 0.21642130824100775, "grad_norm": 0.3825720548629761, "learning_rate": 9.644122709324694e-05, "loss": 1.6676, "step": 713 }, { "epoch": 0.21672484443769918, "grad_norm": 0.4197389781475067, "learning_rate": 9.643616482737675e-05, "loss": 1.9732, "step": 714 }, { "epoch": 0.21702838063439064, "grad_norm": 0.4452435076236725, "learning_rate": 9.643110256150654e-05, "loss": 2.0918, "step": 715 }, { "epoch": 0.2173319168310821, "grad_norm": 0.3366299271583557, "learning_rate": 9.642604029563634e-05, "loss": 1.7469, "step": 716 }, { "epoch": 0.21763545302777357, "grad_norm": 0.31280553340911865, "learning_rate": 9.642097802976613e-05, "loss": 2.0348, "step": 717 }, { "epoch": 0.21793898922446503, "grad_norm": 0.425503671169281, "learning_rate": 9.641591576389593e-05, "loss": 1.3629, "step": 718 }, { "epoch": 0.21824252542115646, "grad_norm": 0.3986441493034363, "learning_rate": 9.641085349802572e-05, "loss": 1.4703, "step": 719 }, { "epoch": 0.21854606161784793, "grad_norm": 0.34377026557922363, "learning_rate": 9.640579123215552e-05, "loss": 1.9788, "step": 720 }, { "epoch": 0.2188495978145394, "grad_norm": 0.3445621430873871, "learning_rate": 9.640072896628531e-05, "loss": 1.9137, "step": 721 }, { "epoch": 0.21915313401123085, "grad_norm": 0.40363574028015137, "learning_rate": 9.63956667004151e-05, "loss": 1.8911, "step": 722 }, { "epoch": 0.21945667020792228, "grad_norm": 0.36166059970855713, "learning_rate": 9.639060443454491e-05, "loss": 1.9176, "step": 723 }, { "epoch": 0.21976020640461374, "grad_norm": 0.7732321619987488, "learning_rate": 9.638554216867471e-05, "loss": 2.1942, "step": 724 }, { "epoch": 0.2200637426013052, "grad_norm": 0.4042604863643646, "learning_rate": 9.63804799028045e-05, "loss": 1.8964, "step": 725 }, { "epoch": 0.22036727879799667, "grad_norm": 0.3888862133026123, "learning_rate": 9.63754176369343e-05, "loss": 1.716, "step": 726 }, { "epoch": 0.22067081499468813, "grad_norm": 0.32185250520706177, "learning_rate": 9.637035537106409e-05, "loss": 2.1227, "step": 727 }, { "epoch": 0.22097435119137956, "grad_norm": 0.36421746015548706, "learning_rate": 9.636529310519389e-05, "loss": 1.3262, "step": 728 }, { "epoch": 0.22127788738807103, "grad_norm": 0.42780765891075134, "learning_rate": 9.636023083932368e-05, "loss": 1.806, "step": 729 }, { "epoch": 0.2215814235847625, "grad_norm": 0.3754510283470154, "learning_rate": 9.635516857345348e-05, "loss": 1.9286, "step": 730 }, { "epoch": 0.22188495978145395, "grad_norm": 0.35199174284935, "learning_rate": 9.635010630758327e-05, "loss": 1.9703, "step": 731 }, { "epoch": 0.22218849597814538, "grad_norm": 0.36272746324539185, "learning_rate": 9.634504404171307e-05, "loss": 1.7773, "step": 732 }, { "epoch": 0.22249203217483685, "grad_norm": 0.4233802556991577, "learning_rate": 9.633998177584287e-05, "loss": 2.0016, "step": 733 }, { "epoch": 0.2227955683715283, "grad_norm": 0.46138089895248413, "learning_rate": 9.633491950997267e-05, "loss": 1.764, "step": 734 }, { "epoch": 0.22309910456821977, "grad_norm": 0.37863031029701233, "learning_rate": 9.632985724410246e-05, "loss": 1.6493, "step": 735 }, { "epoch": 0.2234026407649112, "grad_norm": 0.4493837356567383, "learning_rate": 9.632479497823226e-05, "loss": 2.04, "step": 736 }, { "epoch": 0.22370617696160267, "grad_norm": 0.581119179725647, "learning_rate": 9.631973271236205e-05, "loss": 1.777, "step": 737 }, { "epoch": 0.22400971315829413, "grad_norm": 0.3730584979057312, "learning_rate": 9.631467044649185e-05, "loss": 1.8932, "step": 738 }, { "epoch": 0.2243132493549856, "grad_norm": 0.351421594619751, "learning_rate": 9.630960818062164e-05, "loss": 2.3182, "step": 739 }, { "epoch": 0.22461678555167705, "grad_norm": 0.4237976670265198, "learning_rate": 9.630454591475144e-05, "loss": 2.1315, "step": 740 }, { "epoch": 0.22492032174836848, "grad_norm": 0.38544562458992004, "learning_rate": 9.629948364888123e-05, "loss": 1.9596, "step": 741 }, { "epoch": 0.22522385794505995, "grad_norm": 0.407672256231308, "learning_rate": 9.629442138301104e-05, "loss": 1.8694, "step": 742 }, { "epoch": 0.2255273941417514, "grad_norm": 0.4415782690048218, "learning_rate": 9.628935911714084e-05, "loss": 1.8658, "step": 743 }, { "epoch": 0.22583093033844287, "grad_norm": 0.41300657391548157, "learning_rate": 9.628429685127063e-05, "loss": 2.0477, "step": 744 }, { "epoch": 0.2261344665351343, "grad_norm": 0.36000654101371765, "learning_rate": 9.627923458540044e-05, "loss": 1.9045, "step": 745 }, { "epoch": 0.22643800273182577, "grad_norm": 0.42653003334999084, "learning_rate": 9.627417231953023e-05, "loss": 1.2151, "step": 746 }, { "epoch": 0.22674153892851723, "grad_norm": 0.4157649874687195, "learning_rate": 9.626911005366003e-05, "loss": 1.9335, "step": 747 }, { "epoch": 0.2270450751252087, "grad_norm": 0.3805077373981476, "learning_rate": 9.626404778778982e-05, "loss": 2.0803, "step": 748 }, { "epoch": 0.22734861132190012, "grad_norm": 0.39710867404937744, "learning_rate": 9.625898552191962e-05, "loss": 2.2628, "step": 749 }, { "epoch": 0.22765214751859159, "grad_norm": 0.4012609124183655, "learning_rate": 9.625392325604941e-05, "loss": 1.9586, "step": 750 }, { "epoch": 0.22795568371528305, "grad_norm": 0.9281008243560791, "learning_rate": 9.624886099017921e-05, "loss": 1.168, "step": 751 }, { "epoch": 0.2282592199119745, "grad_norm": 0.36847764253616333, "learning_rate": 9.6243798724309e-05, "loss": 1.8907, "step": 752 }, { "epoch": 0.22856275610866597, "grad_norm": 0.4531751573085785, "learning_rate": 9.623873645843881e-05, "loss": 1.4511, "step": 753 }, { "epoch": 0.2288662923053574, "grad_norm": 0.36623820662498474, "learning_rate": 9.62336741925686e-05, "loss": 1.6707, "step": 754 }, { "epoch": 0.22916982850204887, "grad_norm": 0.3104342222213745, "learning_rate": 9.62286119266984e-05, "loss": 1.988, "step": 755 }, { "epoch": 0.22947336469874033, "grad_norm": 0.3790084421634674, "learning_rate": 9.62235496608282e-05, "loss": 1.979, "step": 756 }, { "epoch": 0.2297769008954318, "grad_norm": 0.3642970323562622, "learning_rate": 9.621848739495799e-05, "loss": 1.9998, "step": 757 }, { "epoch": 0.23008043709212322, "grad_norm": 0.34588292241096497, "learning_rate": 9.621342512908779e-05, "loss": 2.0511, "step": 758 }, { "epoch": 0.2303839732888147, "grad_norm": 0.3556496798992157, "learning_rate": 9.620836286321758e-05, "loss": 1.8785, "step": 759 }, { "epoch": 0.23068750948550615, "grad_norm": 0.4669034779071808, "learning_rate": 9.620330059734737e-05, "loss": 1.5027, "step": 760 }, { "epoch": 0.2309910456821976, "grad_norm": 0.39685994386672974, "learning_rate": 9.619823833147717e-05, "loss": 2.1644, "step": 761 }, { "epoch": 0.23129458187888904, "grad_norm": 0.39183005690574646, "learning_rate": 9.619317606560698e-05, "loss": 1.9615, "step": 762 }, { "epoch": 0.2315981180755805, "grad_norm": 0.36401331424713135, "learning_rate": 9.618811379973677e-05, "loss": 1.7535, "step": 763 }, { "epoch": 0.23190165427227197, "grad_norm": 0.43118295073509216, "learning_rate": 9.618305153386657e-05, "loss": 1.884, "step": 764 }, { "epoch": 0.23220519046896343, "grad_norm": 0.5061665177345276, "learning_rate": 9.617798926799636e-05, "loss": 2.0051, "step": 765 }, { "epoch": 0.2325087266656549, "grad_norm": 0.4487472474575043, "learning_rate": 9.617292700212616e-05, "loss": 1.6831, "step": 766 }, { "epoch": 0.23281226286234633, "grad_norm": 0.3660997450351715, "learning_rate": 9.616786473625595e-05, "loss": 1.9276, "step": 767 }, { "epoch": 0.2331157990590378, "grad_norm": 0.3823026716709137, "learning_rate": 9.616280247038575e-05, "loss": 1.9817, "step": 768 }, { "epoch": 0.23341933525572925, "grad_norm": 0.32568395137786865, "learning_rate": 9.615774020451554e-05, "loss": 1.508, "step": 769 }, { "epoch": 0.2337228714524207, "grad_norm": 0.34985265135765076, "learning_rate": 9.615267793864534e-05, "loss": 1.6793, "step": 770 }, { "epoch": 0.23402640764911214, "grad_norm": 0.38563957810401917, "learning_rate": 9.614761567277513e-05, "loss": 1.588, "step": 771 }, { "epoch": 0.2343299438458036, "grad_norm": 0.33572301268577576, "learning_rate": 9.614255340690494e-05, "loss": 1.9541, "step": 772 }, { "epoch": 0.23463348004249507, "grad_norm": 0.33936449885368347, "learning_rate": 9.613749114103473e-05, "loss": 1.9311, "step": 773 }, { "epoch": 0.23493701623918653, "grad_norm": 0.34984657168388367, "learning_rate": 9.613242887516453e-05, "loss": 1.9532, "step": 774 }, { "epoch": 0.235240552435878, "grad_norm": 0.3651373088359833, "learning_rate": 9.612736660929432e-05, "loss": 1.8815, "step": 775 }, { "epoch": 0.23554408863256943, "grad_norm": 0.4317852854728699, "learning_rate": 9.612230434342412e-05, "loss": 2.0262, "step": 776 }, { "epoch": 0.2358476248292609, "grad_norm": 0.375522255897522, "learning_rate": 9.611724207755391e-05, "loss": 1.9964, "step": 777 }, { "epoch": 0.23615116102595235, "grad_norm": 0.37290844321250916, "learning_rate": 9.611217981168371e-05, "loss": 1.7456, "step": 778 }, { "epoch": 0.2364546972226438, "grad_norm": 0.3768545985221863, "learning_rate": 9.61071175458135e-05, "loss": 1.9591, "step": 779 }, { "epoch": 0.23675823341933525, "grad_norm": 0.3147246837615967, "learning_rate": 9.61020552799433e-05, "loss": 1.4033, "step": 780 }, { "epoch": 0.2370617696160267, "grad_norm": 0.4480874240398407, "learning_rate": 9.60969930140731e-05, "loss": 1.9598, "step": 781 }, { "epoch": 0.23736530581271817, "grad_norm": 0.7287562489509583, "learning_rate": 9.60919307482029e-05, "loss": 2.0097, "step": 782 }, { "epoch": 0.23766884200940963, "grad_norm": 0.36199334263801575, "learning_rate": 9.60868684823327e-05, "loss": 1.8089, "step": 783 }, { "epoch": 0.23797237820610107, "grad_norm": 0.32855263352394104, "learning_rate": 9.608180621646249e-05, "loss": 2.0199, "step": 784 }, { "epoch": 0.23827591440279253, "grad_norm": 0.37182894349098206, "learning_rate": 9.607674395059229e-05, "loss": 1.7253, "step": 785 }, { "epoch": 0.238579450599484, "grad_norm": 0.3365595042705536, "learning_rate": 9.607168168472208e-05, "loss": 1.9308, "step": 786 }, { "epoch": 0.23888298679617545, "grad_norm": 0.400685578584671, "learning_rate": 9.606661941885187e-05, "loss": 1.8939, "step": 787 }, { "epoch": 0.2391865229928669, "grad_norm": 0.6354159116744995, "learning_rate": 9.606155715298167e-05, "loss": 2.1476, "step": 788 }, { "epoch": 0.23949005918955835, "grad_norm": 0.4196738600730896, "learning_rate": 9.605649488711148e-05, "loss": 1.8457, "step": 789 }, { "epoch": 0.2397935953862498, "grad_norm": 0.35839545726776123, "learning_rate": 9.605143262124127e-05, "loss": 1.824, "step": 790 }, { "epoch": 0.24009713158294127, "grad_norm": 0.3597940504550934, "learning_rate": 9.604637035537107e-05, "loss": 1.9583, "step": 791 }, { "epoch": 0.24040066777963273, "grad_norm": 0.5783160924911499, "learning_rate": 9.604130808950088e-05, "loss": 2.2, "step": 792 }, { "epoch": 0.24070420397632417, "grad_norm": 0.3544808030128479, "learning_rate": 9.603624582363067e-05, "loss": 2.1092, "step": 793 }, { "epoch": 0.24100774017301563, "grad_norm": 0.41170623898506165, "learning_rate": 9.603118355776047e-05, "loss": 1.6004, "step": 794 }, { "epoch": 0.2413112763697071, "grad_norm": 0.3832992613315582, "learning_rate": 9.602612129189026e-05, "loss": 1.4981, "step": 795 }, { "epoch": 0.24161481256639855, "grad_norm": 0.5239993333816528, "learning_rate": 9.602105902602006e-05, "loss": 1.6026, "step": 796 }, { "epoch": 0.24191834876308999, "grad_norm": 0.38445138931274414, "learning_rate": 9.601599676014985e-05, "loss": 1.5765, "step": 797 }, { "epoch": 0.24222188495978145, "grad_norm": 0.38520511984825134, "learning_rate": 9.601093449427964e-05, "loss": 2.1069, "step": 798 }, { "epoch": 0.2425254211564729, "grad_norm": 0.3519560694694519, "learning_rate": 9.600587222840944e-05, "loss": 1.8896, "step": 799 }, { "epoch": 0.24282895735316437, "grad_norm": 0.5392457246780396, "learning_rate": 9.600080996253923e-05, "loss": 1.6273, "step": 800 }, { "epoch": 0.24313249354985583, "grad_norm": 0.4213111996650696, "learning_rate": 9.599574769666904e-05, "loss": 1.489, "step": 801 }, { "epoch": 0.24343602974654727, "grad_norm": 0.4006531834602356, "learning_rate": 9.599068543079884e-05, "loss": 1.9842, "step": 802 }, { "epoch": 0.24373956594323873, "grad_norm": 0.3792324364185333, "learning_rate": 9.598562316492863e-05, "loss": 1.727, "step": 803 }, { "epoch": 0.2440431021399302, "grad_norm": 0.3555270731449127, "learning_rate": 9.598056089905843e-05, "loss": 1.68, "step": 804 }, { "epoch": 0.24434663833662165, "grad_norm": 0.33837342262268066, "learning_rate": 9.597549863318822e-05, "loss": 2.0709, "step": 805 }, { "epoch": 0.2446501745333131, "grad_norm": 0.3812510371208191, "learning_rate": 9.597043636731802e-05, "loss": 2.1211, "step": 806 }, { "epoch": 0.24495371073000455, "grad_norm": 0.33870792388916016, "learning_rate": 9.596537410144781e-05, "loss": 2.1047, "step": 807 }, { "epoch": 0.245257246926696, "grad_norm": 0.3948252201080322, "learning_rate": 9.59603118355776e-05, "loss": 1.7553, "step": 808 }, { "epoch": 0.24556078312338747, "grad_norm": 0.39410725235939026, "learning_rate": 9.59552495697074e-05, "loss": 1.9383, "step": 809 }, { "epoch": 0.2458643193200789, "grad_norm": 0.37794989347457886, "learning_rate": 9.59501873038372e-05, "loss": 1.9115, "step": 810 }, { "epoch": 0.24616785551677037, "grad_norm": 1.6270610094070435, "learning_rate": 9.5945125037967e-05, "loss": 1.8472, "step": 811 }, { "epoch": 0.24647139171346183, "grad_norm": 0.3724587559700012, "learning_rate": 9.59400627720968e-05, "loss": 1.9087, "step": 812 }, { "epoch": 0.2467749279101533, "grad_norm": 0.4097403585910797, "learning_rate": 9.59350005062266e-05, "loss": 1.8325, "step": 813 }, { "epoch": 0.24707846410684475, "grad_norm": 0.4052940905094147, "learning_rate": 9.592993824035639e-05, "loss": 2.0241, "step": 814 }, { "epoch": 0.2473820003035362, "grad_norm": 0.3887682557106018, "learning_rate": 9.592487597448618e-05, "loss": 1.6114, "step": 815 }, { "epoch": 0.24768553650022765, "grad_norm": 0.404450386762619, "learning_rate": 9.591981370861598e-05, "loss": 1.8384, "step": 816 }, { "epoch": 0.2479890726969191, "grad_norm": 0.7955893874168396, "learning_rate": 9.591475144274577e-05, "loss": 2.2149, "step": 817 }, { "epoch": 0.24829260889361057, "grad_norm": 4.355859279632568, "learning_rate": 9.590968917687557e-05, "loss": 2.3753, "step": 818 }, { "epoch": 0.248596145090302, "grad_norm": 0.3698444962501526, "learning_rate": 9.590462691100536e-05, "loss": 1.7354, "step": 819 }, { "epoch": 0.24889968128699347, "grad_norm": 0.3658899962902069, "learning_rate": 9.589956464513517e-05, "loss": 1.7803, "step": 820 }, { "epoch": 0.24920321748368493, "grad_norm": 0.405072957277298, "learning_rate": 9.589450237926497e-05, "loss": 1.7684, "step": 821 }, { "epoch": 0.2495067536803764, "grad_norm": 0.7590973973274231, "learning_rate": 9.588944011339476e-05, "loss": 1.9466, "step": 822 }, { "epoch": 0.24981028987706785, "grad_norm": 0.5217581987380981, "learning_rate": 9.588437784752456e-05, "loss": 2.1281, "step": 823 }, { "epoch": 0.2501138260737593, "grad_norm": 0.3716435134410858, "learning_rate": 9.587931558165435e-05, "loss": 2.114, "step": 824 }, { "epoch": 0.25041736227045075, "grad_norm": 0.44017624855041504, "learning_rate": 9.587425331578414e-05, "loss": 2.0445, "step": 825 }, { "epoch": 0.2507208984671422, "grad_norm": 0.370370090007782, "learning_rate": 9.586919104991394e-05, "loss": 1.8674, "step": 826 }, { "epoch": 0.2510244346638337, "grad_norm": 0.32125499844551086, "learning_rate": 9.586412878404373e-05, "loss": 1.4129, "step": 827 }, { "epoch": 0.2513279708605251, "grad_norm": 0.4143073856830597, "learning_rate": 9.585906651817353e-05, "loss": 1.9895, "step": 828 }, { "epoch": 0.2516315070572166, "grad_norm": 0.3492576777935028, "learning_rate": 9.585400425230334e-05, "loss": 2.0669, "step": 829 }, { "epoch": 0.25193504325390803, "grad_norm": 0.4044751524925232, "learning_rate": 9.584894198643313e-05, "loss": 1.5909, "step": 830 }, { "epoch": 0.25223857945059946, "grad_norm": 0.3410158157348633, "learning_rate": 9.584387972056293e-05, "loss": 1.7485, "step": 831 }, { "epoch": 0.25254211564729095, "grad_norm": 0.340320348739624, "learning_rate": 9.583881745469272e-05, "loss": 1.8897, "step": 832 }, { "epoch": 0.2528456518439824, "grad_norm": 0.35516276955604553, "learning_rate": 9.583375518882252e-05, "loss": 1.6332, "step": 833 }, { "epoch": 0.2531491880406738, "grad_norm": 0.4099842309951782, "learning_rate": 9.582869292295232e-05, "loss": 1.5617, "step": 834 }, { "epoch": 0.2534527242373653, "grad_norm": 0.38086098432540894, "learning_rate": 9.582363065708212e-05, "loss": 2.0837, "step": 835 }, { "epoch": 0.25375626043405675, "grad_norm": 0.8040663003921509, "learning_rate": 9.581856839121191e-05, "loss": 1.8587, "step": 836 }, { "epoch": 0.25405979663074824, "grad_norm": 0.41297683119773865, "learning_rate": 9.581350612534171e-05, "loss": 1.9602, "step": 837 }, { "epoch": 0.25436333282743967, "grad_norm": 0.38155442476272583, "learning_rate": 9.58084438594715e-05, "loss": 1.375, "step": 838 }, { "epoch": 0.2546668690241311, "grad_norm": 0.3956829905509949, "learning_rate": 9.58033815936013e-05, "loss": 1.9617, "step": 839 }, { "epoch": 0.2549704052208226, "grad_norm": 0.38675928115844727, "learning_rate": 9.579831932773111e-05, "loss": 1.8186, "step": 840 }, { "epoch": 0.255273941417514, "grad_norm": 0.33989018201828003, "learning_rate": 9.57932570618609e-05, "loss": 2.1734, "step": 841 }, { "epoch": 0.2555774776142055, "grad_norm": 0.3240448534488678, "learning_rate": 9.57881947959907e-05, "loss": 1.6238, "step": 842 }, { "epoch": 0.25588101381089695, "grad_norm": 0.6117075681686401, "learning_rate": 9.578313253012049e-05, "loss": 1.986, "step": 843 }, { "epoch": 0.2561845500075884, "grad_norm": 0.3781290650367737, "learning_rate": 9.577807026425029e-05, "loss": 2.0021, "step": 844 }, { "epoch": 0.2564880862042799, "grad_norm": 0.4373374879360199, "learning_rate": 9.577300799838008e-05, "loss": 2.0195, "step": 845 }, { "epoch": 0.2567916224009713, "grad_norm": 0.4125923216342926, "learning_rate": 9.576794573250988e-05, "loss": 1.9412, "step": 846 }, { "epoch": 0.2570951585976628, "grad_norm": 0.3557007908821106, "learning_rate": 9.576288346663967e-05, "loss": 1.8098, "step": 847 }, { "epoch": 0.25739869479435423, "grad_norm": 0.49475541710853577, "learning_rate": 9.575782120076947e-05, "loss": 1.5756, "step": 848 }, { "epoch": 0.25770223099104567, "grad_norm": 0.3507518768310547, "learning_rate": 9.575275893489926e-05, "loss": 1.6413, "step": 849 }, { "epoch": 0.25800576718773716, "grad_norm": 0.39508333802223206, "learning_rate": 9.574769666902907e-05, "loss": 1.9777, "step": 850 }, { "epoch": 0.2583093033844286, "grad_norm": 0.328807532787323, "learning_rate": 9.574263440315886e-05, "loss": 1.4948, "step": 851 }, { "epoch": 0.25861283958112, "grad_norm": 0.3154551386833191, "learning_rate": 9.573757213728866e-05, "loss": 1.7809, "step": 852 }, { "epoch": 0.2589163757778115, "grad_norm": 0.502554178237915, "learning_rate": 9.573250987141845e-05, "loss": 1.4369, "step": 853 }, { "epoch": 0.25921991197450295, "grad_norm": 0.4416670799255371, "learning_rate": 9.572744760554825e-05, "loss": 1.7364, "step": 854 }, { "epoch": 0.25952344817119444, "grad_norm": 0.43228060007095337, "learning_rate": 9.572238533967804e-05, "loss": 1.3281, "step": 855 }, { "epoch": 0.25982698436788587, "grad_norm": 0.3714723289012909, "learning_rate": 9.571732307380784e-05, "loss": 2.0893, "step": 856 }, { "epoch": 0.2601305205645773, "grad_norm": 0.3309679925441742, "learning_rate": 9.571226080793763e-05, "loss": 1.7982, "step": 857 }, { "epoch": 0.2604340567612688, "grad_norm": 0.3709767460823059, "learning_rate": 9.570719854206743e-05, "loss": 1.8628, "step": 858 }, { "epoch": 0.26073759295796023, "grad_norm": 0.6020816564559937, "learning_rate": 9.570213627619724e-05, "loss": 2.0077, "step": 859 }, { "epoch": 0.2610411291546517, "grad_norm": 0.30620431900024414, "learning_rate": 9.569707401032703e-05, "loss": 1.8834, "step": 860 }, { "epoch": 0.26134466535134315, "grad_norm": 0.41518962383270264, "learning_rate": 9.569201174445683e-05, "loss": 1.8025, "step": 861 }, { "epoch": 0.2616482015480346, "grad_norm": 0.3919786512851715, "learning_rate": 9.568694947858662e-05, "loss": 1.995, "step": 862 }, { "epoch": 0.2619517377447261, "grad_norm": 0.47429168224334717, "learning_rate": 9.568188721271641e-05, "loss": 1.9423, "step": 863 }, { "epoch": 0.2622552739414175, "grad_norm": 0.8941421508789062, "learning_rate": 9.567682494684621e-05, "loss": 1.5046, "step": 864 }, { "epoch": 0.26255881013810894, "grad_norm": 0.4357859194278717, "learning_rate": 9.5671762680976e-05, "loss": 2.0023, "step": 865 }, { "epoch": 0.26286234633480043, "grad_norm": 0.3873944878578186, "learning_rate": 9.56667004151058e-05, "loss": 2.0607, "step": 866 }, { "epoch": 0.26316588253149187, "grad_norm": 0.4355853497982025, "learning_rate": 9.56616381492356e-05, "loss": 1.8254, "step": 867 }, { "epoch": 0.26346941872818336, "grad_norm": 0.3882213234901428, "learning_rate": 9.56565758833654e-05, "loss": 1.7809, "step": 868 }, { "epoch": 0.2637729549248748, "grad_norm": 0.4021656811237335, "learning_rate": 9.56515136174952e-05, "loss": 2.1321, "step": 869 }, { "epoch": 0.2640764911215662, "grad_norm": 0.43587526679039, "learning_rate": 9.564645135162499e-05, "loss": 1.7865, "step": 870 }, { "epoch": 0.2643800273182577, "grad_norm": 0.364045649766922, "learning_rate": 9.564138908575479e-05, "loss": 1.8173, "step": 871 }, { "epoch": 0.26468356351494915, "grad_norm": 0.3956625461578369, "learning_rate": 9.563632681988458e-05, "loss": 1.4822, "step": 872 }, { "epoch": 0.26498709971164064, "grad_norm": 0.40755051374435425, "learning_rate": 9.563126455401438e-05, "loss": 1.9418, "step": 873 }, { "epoch": 0.2652906359083321, "grad_norm": 0.39405086636543274, "learning_rate": 9.562620228814417e-05, "loss": 1.4529, "step": 874 }, { "epoch": 0.2655941721050235, "grad_norm": 0.4400351047515869, "learning_rate": 9.562114002227397e-05, "loss": 2.1095, "step": 875 }, { "epoch": 0.265897708301715, "grad_norm": 0.40135496854782104, "learning_rate": 9.561607775640376e-05, "loss": 1.9462, "step": 876 }, { "epoch": 0.26620124449840643, "grad_norm": 0.5949604511260986, "learning_rate": 9.561101549053356e-05, "loss": 1.8797, "step": 877 }, { "epoch": 0.26650478069509786, "grad_norm": 0.38301005959510803, "learning_rate": 9.560595322466336e-05, "loss": 2.0887, "step": 878 }, { "epoch": 0.26680831689178935, "grad_norm": 0.6215627789497375, "learning_rate": 9.560089095879317e-05, "loss": 1.7846, "step": 879 }, { "epoch": 0.2671118530884808, "grad_norm": 0.4041058123111725, "learning_rate": 9.559582869292297e-05, "loss": 1.5127, "step": 880 }, { "epoch": 0.2674153892851723, "grad_norm": 0.30281975865364075, "learning_rate": 9.559076642705276e-05, "loss": 1.8487, "step": 881 }, { "epoch": 0.2677189254818637, "grad_norm": 0.34536200761795044, "learning_rate": 9.558570416118256e-05, "loss": 1.8976, "step": 882 }, { "epoch": 0.26802246167855515, "grad_norm": 0.367245614528656, "learning_rate": 9.558064189531235e-05, "loss": 1.9804, "step": 883 }, { "epoch": 0.26832599787524664, "grad_norm": 0.41750359535217285, "learning_rate": 9.557557962944215e-05, "loss": 1.5932, "step": 884 }, { "epoch": 0.26862953407193807, "grad_norm": 0.7777047157287598, "learning_rate": 9.557051736357194e-05, "loss": 1.8513, "step": 885 }, { "epoch": 0.26893307026862956, "grad_norm": 0.3720252215862274, "learning_rate": 9.556545509770174e-05, "loss": 2.1819, "step": 886 }, { "epoch": 0.269236606465321, "grad_norm": 0.7321712970733643, "learning_rate": 9.556039283183153e-05, "loss": 1.4653, "step": 887 }, { "epoch": 0.2695401426620124, "grad_norm": 0.4140429198741913, "learning_rate": 9.555533056596133e-05, "loss": 1.9816, "step": 888 }, { "epoch": 0.2698436788587039, "grad_norm": 0.40684935450553894, "learning_rate": 9.555026830009113e-05, "loss": 1.5866, "step": 889 }, { "epoch": 0.27014721505539535, "grad_norm": 0.4067225754261017, "learning_rate": 9.554520603422093e-05, "loss": 1.5951, "step": 890 }, { "epoch": 0.2704507512520868, "grad_norm": 0.34240391850471497, "learning_rate": 9.554014376835072e-05, "loss": 1.9076, "step": 891 }, { "epoch": 0.2707542874487783, "grad_norm": 0.4634522795677185, "learning_rate": 9.553508150248052e-05, "loss": 1.9856, "step": 892 }, { "epoch": 0.2710578236454697, "grad_norm": 0.408015638589859, "learning_rate": 9.553001923661031e-05, "loss": 1.7997, "step": 893 }, { "epoch": 0.2713613598421612, "grad_norm": 0.3894648253917694, "learning_rate": 9.552495697074011e-05, "loss": 1.8381, "step": 894 }, { "epoch": 0.27166489603885263, "grad_norm": 0.37494730949401855, "learning_rate": 9.55198947048699e-05, "loss": 2.0548, "step": 895 }, { "epoch": 0.27196843223554407, "grad_norm": 0.39796411991119385, "learning_rate": 9.55148324389997e-05, "loss": 1.9272, "step": 896 }, { "epoch": 0.27227196843223556, "grad_norm": 0.40153494477272034, "learning_rate": 9.550977017312949e-05, "loss": 1.7136, "step": 897 }, { "epoch": 0.272575504628927, "grad_norm": 0.39771386981010437, "learning_rate": 9.55047079072593e-05, "loss": 2.1017, "step": 898 }, { "epoch": 0.2728790408256185, "grad_norm": 0.4085974097251892, "learning_rate": 9.54996456413891e-05, "loss": 1.3951, "step": 899 }, { "epoch": 0.2731825770223099, "grad_norm": 0.39849239587783813, "learning_rate": 9.549458337551889e-05, "loss": 1.9988, "step": 900 }, { "epoch": 0.27348611321900135, "grad_norm": 0.38662001490592957, "learning_rate": 9.548952110964868e-05, "loss": 1.8491, "step": 901 }, { "epoch": 0.27378964941569284, "grad_norm": 0.38078710436820984, "learning_rate": 9.548445884377848e-05, "loss": 1.9, "step": 902 }, { "epoch": 0.27409318561238427, "grad_norm": 0.3548724949359894, "learning_rate": 9.547939657790827e-05, "loss": 1.8754, "step": 903 }, { "epoch": 0.2743967218090757, "grad_norm": 0.37712323665618896, "learning_rate": 9.547433431203807e-05, "loss": 1.5497, "step": 904 }, { "epoch": 0.2747002580057672, "grad_norm": 0.4060449004173279, "learning_rate": 9.546927204616786e-05, "loss": 1.7231, "step": 905 }, { "epoch": 0.27500379420245863, "grad_norm": 0.42080479860305786, "learning_rate": 9.546420978029766e-05, "loss": 2.1538, "step": 906 }, { "epoch": 0.2753073303991501, "grad_norm": 0.4034046232700348, "learning_rate": 9.545914751442747e-05, "loss": 1.7335, "step": 907 }, { "epoch": 0.27561086659584155, "grad_norm": 0.3676345646381378, "learning_rate": 9.545408524855726e-05, "loss": 1.6193, "step": 908 }, { "epoch": 0.275914402792533, "grad_norm": 0.3349851965904236, "learning_rate": 9.544902298268706e-05, "loss": 1.8997, "step": 909 }, { "epoch": 0.2762179389892245, "grad_norm": 0.3676302134990692, "learning_rate": 9.544396071681685e-05, "loss": 1.4031, "step": 910 }, { "epoch": 0.2765214751859159, "grad_norm": 0.36593666672706604, "learning_rate": 9.543889845094665e-05, "loss": 1.8838, "step": 911 }, { "epoch": 0.2768250113826074, "grad_norm": 0.3793712258338928, "learning_rate": 9.543383618507644e-05, "loss": 1.5949, "step": 912 }, { "epoch": 0.27712854757929883, "grad_norm": 0.47586631774902344, "learning_rate": 9.542877391920624e-05, "loss": 1.5687, "step": 913 }, { "epoch": 0.27743208377599027, "grad_norm": 0.38850024342536926, "learning_rate": 9.542371165333603e-05, "loss": 1.7336, "step": 914 }, { "epoch": 0.27773561997268176, "grad_norm": 0.4039680063724518, "learning_rate": 9.541864938746583e-05, "loss": 2.0476, "step": 915 }, { "epoch": 0.2780391561693732, "grad_norm": 0.40498992800712585, "learning_rate": 9.541358712159562e-05, "loss": 1.6699, "step": 916 }, { "epoch": 0.2783426923660646, "grad_norm": 0.39011168479919434, "learning_rate": 9.540852485572543e-05, "loss": 1.9935, "step": 917 }, { "epoch": 0.2786462285627561, "grad_norm": 0.3864549696445465, "learning_rate": 9.540346258985522e-05, "loss": 1.8271, "step": 918 }, { "epoch": 0.27894976475944755, "grad_norm": 0.33493247628211975, "learning_rate": 9.539840032398502e-05, "loss": 1.856, "step": 919 }, { "epoch": 0.27925330095613904, "grad_norm": 0.34132060408592224, "learning_rate": 9.539333805811481e-05, "loss": 1.8836, "step": 920 }, { "epoch": 0.2795568371528305, "grad_norm": 1.5312176942825317, "learning_rate": 9.538827579224461e-05, "loss": 2.0207, "step": 921 }, { "epoch": 0.2798603733495219, "grad_norm": 0.333932489156723, "learning_rate": 9.53832135263744e-05, "loss": 2.0908, "step": 922 }, { "epoch": 0.2801639095462134, "grad_norm": 0.3688269555568695, "learning_rate": 9.537815126050421e-05, "loss": 1.8464, "step": 923 }, { "epoch": 0.28046744574290483, "grad_norm": 0.4097294211387634, "learning_rate": 9.5373088994634e-05, "loss": 1.6891, "step": 924 }, { "epoch": 0.2807709819395963, "grad_norm": 0.3737453818321228, "learning_rate": 9.53680267287638e-05, "loss": 2.0549, "step": 925 }, { "epoch": 0.28107451813628775, "grad_norm": 0.6109428405761719, "learning_rate": 9.53629644628936e-05, "loss": 1.9437, "step": 926 }, { "epoch": 0.2813780543329792, "grad_norm": 0.46215322613716125, "learning_rate": 9.535790219702339e-05, "loss": 1.5133, "step": 927 }, { "epoch": 0.2816815905296707, "grad_norm": 0.8070108294487, "learning_rate": 9.53528399311532e-05, "loss": 1.8843, "step": 928 }, { "epoch": 0.2819851267263621, "grad_norm": 0.40304142236709595, "learning_rate": 9.534777766528299e-05, "loss": 1.9742, "step": 929 }, { "epoch": 0.2822886629230536, "grad_norm": 0.35046708583831787, "learning_rate": 9.534271539941279e-05, "loss": 1.8969, "step": 930 }, { "epoch": 0.28259219911974504, "grad_norm": 0.37241777777671814, "learning_rate": 9.533765313354258e-05, "loss": 1.8138, "step": 931 }, { "epoch": 0.28289573531643647, "grad_norm": 0.38689473271369934, "learning_rate": 9.533259086767238e-05, "loss": 1.669, "step": 932 }, { "epoch": 0.28319927151312796, "grad_norm": 0.3672066926956177, "learning_rate": 9.532752860180217e-05, "loss": 1.9093, "step": 933 }, { "epoch": 0.2835028077098194, "grad_norm": 0.4022217392921448, "learning_rate": 9.532246633593197e-05, "loss": 1.6959, "step": 934 }, { "epoch": 0.2838063439065108, "grad_norm": 0.3894721269607544, "learning_rate": 9.531740407006176e-05, "loss": 1.9898, "step": 935 }, { "epoch": 0.2841098801032023, "grad_norm": 0.4395015835762024, "learning_rate": 9.531234180419156e-05, "loss": 1.5538, "step": 936 }, { "epoch": 0.28441341629989375, "grad_norm": 0.8121886849403381, "learning_rate": 9.530727953832136e-05, "loss": 1.7403, "step": 937 }, { "epoch": 0.28471695249658524, "grad_norm": 0.40073227882385254, "learning_rate": 9.530221727245116e-05, "loss": 2.0544, "step": 938 }, { "epoch": 0.2850204886932767, "grad_norm": 0.3571331202983856, "learning_rate": 9.529715500658095e-05, "loss": 1.7157, "step": 939 }, { "epoch": 0.2853240248899681, "grad_norm": 0.485147625207901, "learning_rate": 9.529209274071075e-05, "loss": 2.1489, "step": 940 }, { "epoch": 0.2856275610866596, "grad_norm": 0.6882160305976868, "learning_rate": 9.528703047484054e-05, "loss": 1.8458, "step": 941 }, { "epoch": 0.28593109728335103, "grad_norm": 0.7156968116760254, "learning_rate": 9.528196820897034e-05, "loss": 1.9529, "step": 942 }, { "epoch": 0.2862346334800425, "grad_norm": 0.4198112487792969, "learning_rate": 9.527690594310013e-05, "loss": 2.0355, "step": 943 }, { "epoch": 0.28653816967673396, "grad_norm": 0.4178343117237091, "learning_rate": 9.527184367722993e-05, "loss": 1.5801, "step": 944 }, { "epoch": 0.2868417058734254, "grad_norm": 0.3721866011619568, "learning_rate": 9.526678141135972e-05, "loss": 2.1657, "step": 945 }, { "epoch": 0.2871452420701169, "grad_norm": 0.38586944341659546, "learning_rate": 9.526171914548953e-05, "loss": 1.4879, "step": 946 }, { "epoch": 0.2874487782668083, "grad_norm": 0.42727598547935486, "learning_rate": 9.525665687961933e-05, "loss": 1.8434, "step": 947 }, { "epoch": 0.28775231446349975, "grad_norm": 0.3686284124851227, "learning_rate": 9.525159461374912e-05, "loss": 1.9346, "step": 948 }, { "epoch": 0.28805585066019124, "grad_norm": 0.41984260082244873, "learning_rate": 9.524653234787892e-05, "loss": 1.4474, "step": 949 }, { "epoch": 0.28835938685688267, "grad_norm": 0.4530123174190521, "learning_rate": 9.524147008200871e-05, "loss": 1.6863, "step": 950 }, { "epoch": 0.28866292305357416, "grad_norm": 0.40047594904899597, "learning_rate": 9.52364078161385e-05, "loss": 1.908, "step": 951 }, { "epoch": 0.2889664592502656, "grad_norm": 0.3757762610912323, "learning_rate": 9.52313455502683e-05, "loss": 1.6235, "step": 952 }, { "epoch": 0.28926999544695703, "grad_norm": 0.4337126612663269, "learning_rate": 9.52262832843981e-05, "loss": 1.6229, "step": 953 }, { "epoch": 0.2895735316436485, "grad_norm": 0.4407886564731598, "learning_rate": 9.522122101852789e-05, "loss": 1.875, "step": 954 }, { "epoch": 0.28987706784033995, "grad_norm": 0.5278657674789429, "learning_rate": 9.521615875265768e-05, "loss": 1.7199, "step": 955 }, { "epoch": 0.29018060403703144, "grad_norm": 0.4441334307193756, "learning_rate": 9.521109648678749e-05, "loss": 1.1319, "step": 956 }, { "epoch": 0.2904841402337229, "grad_norm": 0.3992663025856018, "learning_rate": 9.520603422091729e-05, "loss": 1.6948, "step": 957 }, { "epoch": 0.2907876764304143, "grad_norm": 0.3979544937610626, "learning_rate": 9.520097195504708e-05, "loss": 1.8689, "step": 958 }, { "epoch": 0.2910912126271058, "grad_norm": 0.4011298418045044, "learning_rate": 9.519590968917688e-05, "loss": 1.9491, "step": 959 }, { "epoch": 0.29139474882379723, "grad_norm": 0.4377354383468628, "learning_rate": 9.519084742330667e-05, "loss": 1.7274, "step": 960 }, { "epoch": 0.29169828502048867, "grad_norm": 0.5056617856025696, "learning_rate": 9.518578515743647e-05, "loss": 2.006, "step": 961 }, { "epoch": 0.29200182121718016, "grad_norm": 0.36736002564430237, "learning_rate": 9.518072289156626e-05, "loss": 1.6558, "step": 962 }, { "epoch": 0.2923053574138716, "grad_norm": 0.37966540455818176, "learning_rate": 9.517566062569606e-05, "loss": 2.0098, "step": 963 }, { "epoch": 0.2926088936105631, "grad_norm": 0.4026505947113037, "learning_rate": 9.517059835982585e-05, "loss": 1.868, "step": 964 }, { "epoch": 0.2929124298072545, "grad_norm": 0.461910218000412, "learning_rate": 9.516553609395566e-05, "loss": 2.1131, "step": 965 }, { "epoch": 0.29321596600394595, "grad_norm": 0.4329175651073456, "learning_rate": 9.516047382808545e-05, "loss": 2.0068, "step": 966 }, { "epoch": 0.29351950220063744, "grad_norm": 0.7611956000328064, "learning_rate": 9.515541156221526e-05, "loss": 1.9177, "step": 967 }, { "epoch": 0.2938230383973289, "grad_norm": 0.6180218458175659, "learning_rate": 9.515034929634506e-05, "loss": 1.5603, "step": 968 }, { "epoch": 0.29412657459402036, "grad_norm": 0.6556726694107056, "learning_rate": 9.514528703047485e-05, "loss": 2.1081, "step": 969 }, { "epoch": 0.2944301107907118, "grad_norm": 0.3379404842853546, "learning_rate": 9.514022476460465e-05, "loss": 1.9701, "step": 970 }, { "epoch": 0.29473364698740323, "grad_norm": 0.42676112055778503, "learning_rate": 9.513516249873444e-05, "loss": 1.6116, "step": 971 }, { "epoch": 0.2950371831840947, "grad_norm": 0.35374894738197327, "learning_rate": 9.513010023286424e-05, "loss": 2.0621, "step": 972 }, { "epoch": 0.29534071938078615, "grad_norm": 0.33012476563453674, "learning_rate": 9.512503796699403e-05, "loss": 1.4534, "step": 973 }, { "epoch": 0.2956442555774776, "grad_norm": 0.37993383407592773, "learning_rate": 9.511997570112383e-05, "loss": 1.6306, "step": 974 }, { "epoch": 0.2959477917741691, "grad_norm": 0.47140204906463623, "learning_rate": 9.511491343525362e-05, "loss": 2.0465, "step": 975 }, { "epoch": 0.2962513279708605, "grad_norm": 0.40235936641693115, "learning_rate": 9.510985116938343e-05, "loss": 1.8247, "step": 976 }, { "epoch": 0.296554864167552, "grad_norm": 0.3992665112018585, "learning_rate": 9.510478890351322e-05, "loss": 1.5702, "step": 977 }, { "epoch": 0.29685840036424344, "grad_norm": 0.4469521641731262, "learning_rate": 9.509972663764302e-05, "loss": 1.8811, "step": 978 }, { "epoch": 0.29716193656093487, "grad_norm": 0.41400644183158875, "learning_rate": 9.509466437177281e-05, "loss": 1.5374, "step": 979 }, { "epoch": 0.29746547275762636, "grad_norm": 0.36348387598991394, "learning_rate": 9.508960210590261e-05, "loss": 1.9022, "step": 980 }, { "epoch": 0.2977690089543178, "grad_norm": 0.4069242477416992, "learning_rate": 9.50845398400324e-05, "loss": 2.0066, "step": 981 }, { "epoch": 0.2980725451510093, "grad_norm": 0.3684113323688507, "learning_rate": 9.50794775741622e-05, "loss": 1.8972, "step": 982 }, { "epoch": 0.2983760813477007, "grad_norm": 0.40827688574790955, "learning_rate": 9.5074415308292e-05, "loss": 2.0659, "step": 983 }, { "epoch": 0.29867961754439215, "grad_norm": 0.32065409421920776, "learning_rate": 9.506935304242179e-05, "loss": 2.0008, "step": 984 }, { "epoch": 0.29898315374108364, "grad_norm": 0.38805294036865234, "learning_rate": 9.50642907765516e-05, "loss": 1.5027, "step": 985 }, { "epoch": 0.2992866899377751, "grad_norm": 0.3656708896160126, "learning_rate": 9.505922851068139e-05, "loss": 1.7931, "step": 986 }, { "epoch": 0.2995902261344665, "grad_norm": 0.4354289770126343, "learning_rate": 9.505416624481119e-05, "loss": 2.1183, "step": 987 }, { "epoch": 0.299893762331158, "grad_norm": 0.3970641493797302, "learning_rate": 9.504910397894098e-05, "loss": 1.8188, "step": 988 }, { "epoch": 0.30019729852784943, "grad_norm": 0.35527995228767395, "learning_rate": 9.504404171307078e-05, "loss": 1.6329, "step": 989 }, { "epoch": 0.3005008347245409, "grad_norm": 0.4018630385398865, "learning_rate": 9.503897944720057e-05, "loss": 1.993, "step": 990 }, { "epoch": 0.30080437092123236, "grad_norm": 0.36514052748680115, "learning_rate": 9.503391718133037e-05, "loss": 2.0482, "step": 991 }, { "epoch": 0.3011079071179238, "grad_norm": 0.3790993094444275, "learning_rate": 9.502885491546016e-05, "loss": 2.0286, "step": 992 }, { "epoch": 0.3014114433146153, "grad_norm": 0.314779669046402, "learning_rate": 9.502379264958995e-05, "loss": 1.8135, "step": 993 }, { "epoch": 0.3017149795113067, "grad_norm": 0.42383378744125366, "learning_rate": 9.501873038371975e-05, "loss": 1.8783, "step": 994 }, { "epoch": 0.3020185157079982, "grad_norm": 0.4036683738231659, "learning_rate": 9.501366811784956e-05, "loss": 1.6091, "step": 995 }, { "epoch": 0.30232205190468964, "grad_norm": 0.3611324429512024, "learning_rate": 9.500860585197935e-05, "loss": 1.3388, "step": 996 }, { "epoch": 0.30262558810138107, "grad_norm": 0.44210389256477356, "learning_rate": 9.500354358610915e-05, "loss": 1.6133, "step": 997 }, { "epoch": 0.30292912429807256, "grad_norm": 0.37780526280403137, "learning_rate": 9.499848132023894e-05, "loss": 1.9993, "step": 998 }, { "epoch": 0.303232660494764, "grad_norm": 0.469959557056427, "learning_rate": 9.499341905436874e-05, "loss": 1.8094, "step": 999 }, { "epoch": 0.30353619669145543, "grad_norm": 0.38992664217948914, "learning_rate": 9.498835678849853e-05, "loss": 1.8975, "step": 1000 }, { "epoch": 0.3038397328881469, "grad_norm": 0.44024091958999634, "learning_rate": 9.498329452262833e-05, "loss": 1.7081, "step": 1001 }, { "epoch": 0.30414326908483835, "grad_norm": 0.32488685846328735, "learning_rate": 9.497823225675812e-05, "loss": 1.4921, "step": 1002 }, { "epoch": 0.30444680528152984, "grad_norm": 0.7046712636947632, "learning_rate": 9.497316999088792e-05, "loss": 1.9693, "step": 1003 }, { "epoch": 0.3047503414782213, "grad_norm": 0.39591220021247864, "learning_rate": 9.496810772501772e-05, "loss": 2.0266, "step": 1004 }, { "epoch": 0.3050538776749127, "grad_norm": 0.371804416179657, "learning_rate": 9.496304545914752e-05, "loss": 1.9906, "step": 1005 }, { "epoch": 0.3053574138716042, "grad_norm": 0.32893630862236023, "learning_rate": 9.495798319327731e-05, "loss": 1.9469, "step": 1006 }, { "epoch": 0.30566095006829563, "grad_norm": 0.406531423330307, "learning_rate": 9.495292092740711e-05, "loss": 1.7575, "step": 1007 }, { "epoch": 0.3059644862649871, "grad_norm": 0.3299405872821808, "learning_rate": 9.49478586615369e-05, "loss": 1.6457, "step": 1008 }, { "epoch": 0.30626802246167856, "grad_norm": 0.40007394552230835, "learning_rate": 9.49427963956667e-05, "loss": 1.9291, "step": 1009 }, { "epoch": 0.30657155865837, "grad_norm": 0.41286107897758484, "learning_rate": 9.49377341297965e-05, "loss": 1.9869, "step": 1010 }, { "epoch": 0.3068750948550615, "grad_norm": 0.6297092437744141, "learning_rate": 9.493267186392629e-05, "loss": 2.1354, "step": 1011 }, { "epoch": 0.3071786310517529, "grad_norm": 0.4763343334197998, "learning_rate": 9.49276095980561e-05, "loss": 1.6641, "step": 1012 }, { "epoch": 0.3074821672484444, "grad_norm": 0.343218058347702, "learning_rate": 9.492254733218589e-05, "loss": 1.9556, "step": 1013 }, { "epoch": 0.30778570344513584, "grad_norm": 0.4180206060409546, "learning_rate": 9.491748506631569e-05, "loss": 2.064, "step": 1014 }, { "epoch": 0.3080892396418273, "grad_norm": 0.3307478725910187, "learning_rate": 9.49124228004455e-05, "loss": 1.9579, "step": 1015 }, { "epoch": 0.30839277583851876, "grad_norm": 0.31935417652130127, "learning_rate": 9.490736053457529e-05, "loss": 2.0038, "step": 1016 }, { "epoch": 0.3086963120352102, "grad_norm": 0.4078797399997711, "learning_rate": 9.490229826870508e-05, "loss": 1.6727, "step": 1017 }, { "epoch": 0.30899984823190163, "grad_norm": 0.4393940269947052, "learning_rate": 9.489723600283488e-05, "loss": 1.9709, "step": 1018 }, { "epoch": 0.3093033844285931, "grad_norm": 0.41586485505104065, "learning_rate": 9.489217373696467e-05, "loss": 1.9976, "step": 1019 }, { "epoch": 0.30960692062528455, "grad_norm": 0.32988855242729187, "learning_rate": 9.488711147109447e-05, "loss": 2.1278, "step": 1020 }, { "epoch": 0.30991045682197604, "grad_norm": 0.47184863686561584, "learning_rate": 9.488204920522426e-05, "loss": 1.8132, "step": 1021 }, { "epoch": 0.3102139930186675, "grad_norm": 0.32716313004493713, "learning_rate": 9.487698693935406e-05, "loss": 1.6124, "step": 1022 }, { "epoch": 0.3105175292153589, "grad_norm": 0.46906420588493347, "learning_rate": 9.487192467348385e-05, "loss": 1.9718, "step": 1023 }, { "epoch": 0.3108210654120504, "grad_norm": 0.3436840772628784, "learning_rate": 9.486686240761366e-05, "loss": 1.809, "step": 1024 }, { "epoch": 0.31112460160874184, "grad_norm": 0.39674249291419983, "learning_rate": 9.486180014174346e-05, "loss": 1.5307, "step": 1025 }, { "epoch": 0.3114281378054333, "grad_norm": 0.40978574752807617, "learning_rate": 9.485673787587325e-05, "loss": 2.0251, "step": 1026 }, { "epoch": 0.31173167400212476, "grad_norm": 0.39651399850845337, "learning_rate": 9.485167561000305e-05, "loss": 1.8872, "step": 1027 }, { "epoch": 0.3120352101988162, "grad_norm": 0.7730064988136292, "learning_rate": 9.484661334413284e-05, "loss": 1.7339, "step": 1028 }, { "epoch": 0.3123387463955077, "grad_norm": 0.36178770661354065, "learning_rate": 9.484155107826264e-05, "loss": 1.8594, "step": 1029 }, { "epoch": 0.3126422825921991, "grad_norm": 0.4153605103492737, "learning_rate": 9.483648881239243e-05, "loss": 1.8687, "step": 1030 }, { "epoch": 0.31294581878889055, "grad_norm": 0.41472381353378296, "learning_rate": 9.483142654652222e-05, "loss": 1.9665, "step": 1031 }, { "epoch": 0.31324935498558204, "grad_norm": 0.3871115744113922, "learning_rate": 9.482636428065202e-05, "loss": 2.1858, "step": 1032 }, { "epoch": 0.3135528911822735, "grad_norm": 0.33978626132011414, "learning_rate": 9.482130201478181e-05, "loss": 1.5615, "step": 1033 }, { "epoch": 0.31385642737896496, "grad_norm": 0.33726009726524353, "learning_rate": 9.481623974891162e-05, "loss": 2.1119, "step": 1034 }, { "epoch": 0.3141599635756564, "grad_norm": 0.35080355405807495, "learning_rate": 9.481117748304142e-05, "loss": 1.9497, "step": 1035 }, { "epoch": 0.31446349977234783, "grad_norm": 0.37655749917030334, "learning_rate": 9.480611521717121e-05, "loss": 1.6486, "step": 1036 }, { "epoch": 0.3147670359690393, "grad_norm": 0.3838097155094147, "learning_rate": 9.480105295130101e-05, "loss": 2.0504, "step": 1037 }, { "epoch": 0.31507057216573076, "grad_norm": 0.3412497341632843, "learning_rate": 9.47959906854308e-05, "loss": 1.8417, "step": 1038 }, { "epoch": 0.31537410836242225, "grad_norm": 0.3633384108543396, "learning_rate": 9.47909284195606e-05, "loss": 1.9713, "step": 1039 }, { "epoch": 0.3156776445591137, "grad_norm": 0.332861989736557, "learning_rate": 9.478586615369039e-05, "loss": 1.8967, "step": 1040 }, { "epoch": 0.3159811807558051, "grad_norm": 0.5054538249969482, "learning_rate": 9.478080388782019e-05, "loss": 1.8217, "step": 1041 }, { "epoch": 0.3162847169524966, "grad_norm": 0.30825376510620117, "learning_rate": 9.477574162194998e-05, "loss": 1.8026, "step": 1042 }, { "epoch": 0.31658825314918804, "grad_norm": 0.3759863078594208, "learning_rate": 9.477067935607979e-05, "loss": 1.6662, "step": 1043 }, { "epoch": 0.31689178934587947, "grad_norm": 0.36408594250679016, "learning_rate": 9.476561709020958e-05, "loss": 2.3524, "step": 1044 }, { "epoch": 0.31719532554257096, "grad_norm": 0.38226181268692017, "learning_rate": 9.476055482433938e-05, "loss": 1.8966, "step": 1045 }, { "epoch": 0.3174988617392624, "grad_norm": 0.35480546951293945, "learning_rate": 9.475549255846917e-05, "loss": 1.9114, "step": 1046 }, { "epoch": 0.3178023979359539, "grad_norm": 0.378701776266098, "learning_rate": 9.475043029259897e-05, "loss": 1.9151, "step": 1047 }, { "epoch": 0.3181059341326453, "grad_norm": 1.0800230503082275, "learning_rate": 9.474536802672876e-05, "loss": 1.3396, "step": 1048 }, { "epoch": 0.31840947032933675, "grad_norm": 0.4015067219734192, "learning_rate": 9.474030576085856e-05, "loss": 1.6889, "step": 1049 }, { "epoch": 0.31871300652602824, "grad_norm": 0.35431405901908875, "learning_rate": 9.473524349498835e-05, "loss": 1.4716, "step": 1050 }, { "epoch": 0.3190165427227197, "grad_norm": 0.4030434787273407, "learning_rate": 9.473018122911815e-05, "loss": 1.6192, "step": 1051 }, { "epoch": 0.31932007891941117, "grad_norm": 0.4005342423915863, "learning_rate": 9.472511896324796e-05, "loss": 1.5092, "step": 1052 }, { "epoch": 0.3196236151161026, "grad_norm": 1.130418062210083, "learning_rate": 9.472005669737775e-05, "loss": 1.5802, "step": 1053 }, { "epoch": 0.31992715131279403, "grad_norm": 0.41232943534851074, "learning_rate": 9.471499443150755e-05, "loss": 2.0205, "step": 1054 }, { "epoch": 0.3202306875094855, "grad_norm": 0.4155721366405487, "learning_rate": 9.470993216563734e-05, "loss": 1.88, "step": 1055 }, { "epoch": 0.32053422370617696, "grad_norm": 0.36597010493278503, "learning_rate": 9.470486989976715e-05, "loss": 1.9922, "step": 1056 }, { "epoch": 0.3208377599028684, "grad_norm": 0.8094148635864258, "learning_rate": 9.469980763389694e-05, "loss": 1.8267, "step": 1057 }, { "epoch": 0.3211412960995599, "grad_norm": 0.36358359456062317, "learning_rate": 9.469474536802674e-05, "loss": 1.5307, "step": 1058 }, { "epoch": 0.3214448322962513, "grad_norm": 0.400796502828598, "learning_rate": 9.468968310215653e-05, "loss": 1.9742, "step": 1059 }, { "epoch": 0.3217483684929428, "grad_norm": 0.3251611888408661, "learning_rate": 9.468462083628633e-05, "loss": 1.7736, "step": 1060 }, { "epoch": 0.32205190468963424, "grad_norm": 0.4060586988925934, "learning_rate": 9.467955857041612e-05, "loss": 1.7211, "step": 1061 }, { "epoch": 0.3223554408863257, "grad_norm": 0.4181293547153473, "learning_rate": 9.467449630454592e-05, "loss": 1.5085, "step": 1062 }, { "epoch": 0.32265897708301716, "grad_norm": 0.3514660894870758, "learning_rate": 9.466943403867573e-05, "loss": 1.8939, "step": 1063 }, { "epoch": 0.3229625132797086, "grad_norm": 0.3337076008319855, "learning_rate": 9.466437177280552e-05, "loss": 1.6281, "step": 1064 }, { "epoch": 0.3232660494764001, "grad_norm": 0.39011150598526, "learning_rate": 9.465930950693532e-05, "loss": 2.3316, "step": 1065 }, { "epoch": 0.3235695856730915, "grad_norm": 0.42054951190948486, "learning_rate": 9.465424724106511e-05, "loss": 1.9249, "step": 1066 }, { "epoch": 0.32387312186978295, "grad_norm": 0.37516888976097107, "learning_rate": 9.46491849751949e-05, "loss": 1.9643, "step": 1067 }, { "epoch": 0.32417665806647444, "grad_norm": 0.3549358546733856, "learning_rate": 9.46441227093247e-05, "loss": 1.9069, "step": 1068 }, { "epoch": 0.3244801942631659, "grad_norm": 1.4541680812835693, "learning_rate": 9.46390604434545e-05, "loss": 1.4868, "step": 1069 }, { "epoch": 0.3247837304598573, "grad_norm": 0.31561896204948425, "learning_rate": 9.463399817758429e-05, "loss": 1.8972, "step": 1070 }, { "epoch": 0.3250872666565488, "grad_norm": 0.35816720128059387, "learning_rate": 9.462893591171408e-05, "loss": 2.0286, "step": 1071 }, { "epoch": 0.32539080285324024, "grad_norm": 0.38618069887161255, "learning_rate": 9.462387364584388e-05, "loss": 1.7604, "step": 1072 }, { "epoch": 0.3256943390499317, "grad_norm": 0.42617419362068176, "learning_rate": 9.461881137997369e-05, "loss": 1.2767, "step": 1073 }, { "epoch": 0.32599787524662316, "grad_norm": 0.3996577262878418, "learning_rate": 9.461374911410348e-05, "loss": 2.0023, "step": 1074 }, { "epoch": 0.3263014114433146, "grad_norm": 0.6627565026283264, "learning_rate": 9.460868684823328e-05, "loss": 2.2386, "step": 1075 }, { "epoch": 0.3266049476400061, "grad_norm": 0.3753213882446289, "learning_rate": 9.460362458236307e-05, "loss": 1.6935, "step": 1076 }, { "epoch": 0.3269084838366975, "grad_norm": 0.4097970724105835, "learning_rate": 9.459856231649287e-05, "loss": 1.59, "step": 1077 }, { "epoch": 0.327212020033389, "grad_norm": 0.39637240767478943, "learning_rate": 9.459350005062266e-05, "loss": 1.5338, "step": 1078 }, { "epoch": 0.32751555623008044, "grad_norm": 0.38365036249160767, "learning_rate": 9.458843778475246e-05, "loss": 2.0128, "step": 1079 }, { "epoch": 0.3278190924267719, "grad_norm": 0.42568036913871765, "learning_rate": 9.458337551888225e-05, "loss": 1.3282, "step": 1080 }, { "epoch": 0.32812262862346336, "grad_norm": 0.4248203933238983, "learning_rate": 9.457831325301205e-05, "loss": 1.9059, "step": 1081 }, { "epoch": 0.3284261648201548, "grad_norm": 0.37200963497161865, "learning_rate": 9.457325098714185e-05, "loss": 2.085, "step": 1082 }, { "epoch": 0.32872970101684623, "grad_norm": 0.44390764832496643, "learning_rate": 9.456818872127165e-05, "loss": 1.9431, "step": 1083 }, { "epoch": 0.3290332372135377, "grad_norm": 0.44483283162117004, "learning_rate": 9.456312645540144e-05, "loss": 2.088, "step": 1084 }, { "epoch": 0.32933677341022916, "grad_norm": 0.3765670955181122, "learning_rate": 9.455806418953124e-05, "loss": 2.0446, "step": 1085 }, { "epoch": 0.32964030960692065, "grad_norm": 0.428964763879776, "learning_rate": 9.455300192366103e-05, "loss": 1.9831, "step": 1086 }, { "epoch": 0.3299438458036121, "grad_norm": 0.3957151770591736, "learning_rate": 9.454793965779083e-05, "loss": 1.7817, "step": 1087 }, { "epoch": 0.3302473820003035, "grad_norm": 0.3726184368133545, "learning_rate": 9.454287739192062e-05, "loss": 1.9929, "step": 1088 }, { "epoch": 0.330550918196995, "grad_norm": 0.41574302315711975, "learning_rate": 9.453781512605042e-05, "loss": 2.0094, "step": 1089 }, { "epoch": 0.33085445439368644, "grad_norm": 0.36284613609313965, "learning_rate": 9.453275286018021e-05, "loss": 2.0273, "step": 1090 }, { "epoch": 0.3311579905903779, "grad_norm": 0.48810014128685, "learning_rate": 9.452769059431002e-05, "loss": 1.4371, "step": 1091 }, { "epoch": 0.33146152678706936, "grad_norm": 0.3929893672466278, "learning_rate": 9.452262832843982e-05, "loss": 2.0663, "step": 1092 }, { "epoch": 0.3317650629837608, "grad_norm": 0.401722252368927, "learning_rate": 9.451756606256961e-05, "loss": 1.6119, "step": 1093 }, { "epoch": 0.3320685991804523, "grad_norm": 0.42032745480537415, "learning_rate": 9.45125037966994e-05, "loss": 1.7541, "step": 1094 }, { "epoch": 0.3323721353771437, "grad_norm": 0.3663571774959564, "learning_rate": 9.45074415308292e-05, "loss": 1.4438, "step": 1095 }, { "epoch": 0.33267567157383515, "grad_norm": 0.397624671459198, "learning_rate": 9.4502379264959e-05, "loss": 2.0996, "step": 1096 }, { "epoch": 0.33297920777052664, "grad_norm": 0.3914051651954651, "learning_rate": 9.449731699908879e-05, "loss": 1.5906, "step": 1097 }, { "epoch": 0.3332827439672181, "grad_norm": 0.3951834440231323, "learning_rate": 9.449225473321858e-05, "loss": 1.9128, "step": 1098 }, { "epoch": 0.33358628016390957, "grad_norm": 0.363696426153183, "learning_rate": 9.448719246734838e-05, "loss": 1.3447, "step": 1099 }, { "epoch": 0.333889816360601, "grad_norm": 0.3522724211215973, "learning_rate": 9.448213020147817e-05, "loss": 1.6755, "step": 1100 }, { "epoch": 0.33419335255729243, "grad_norm": 1.1290934085845947, "learning_rate": 9.447706793560798e-05, "loss": 1.6796, "step": 1101 }, { "epoch": 0.3344968887539839, "grad_norm": 0.3527061939239502, "learning_rate": 9.447200566973779e-05, "loss": 1.4917, "step": 1102 }, { "epoch": 0.33480042495067536, "grad_norm": 0.3770875930786133, "learning_rate": 9.446694340386759e-05, "loss": 1.2733, "step": 1103 }, { "epoch": 0.33510396114736685, "grad_norm": 0.3742992579936981, "learning_rate": 9.446188113799738e-05, "loss": 1.8584, "step": 1104 }, { "epoch": 0.3354074973440583, "grad_norm": 0.7284528017044067, "learning_rate": 9.445681887212718e-05, "loss": 2.0183, "step": 1105 }, { "epoch": 0.3357110335407497, "grad_norm": 0.37331897020339966, "learning_rate": 9.445175660625697e-05, "loss": 1.9604, "step": 1106 }, { "epoch": 0.3360145697374412, "grad_norm": 0.3642507791519165, "learning_rate": 9.444669434038676e-05, "loss": 1.8661, "step": 1107 }, { "epoch": 0.33631810593413264, "grad_norm": 0.4249272346496582, "learning_rate": 9.444163207451656e-05, "loss": 2.039, "step": 1108 }, { "epoch": 0.33662164213082413, "grad_norm": 0.5299102067947388, "learning_rate": 9.443656980864635e-05, "loss": 1.618, "step": 1109 }, { "epoch": 0.33692517832751556, "grad_norm": 0.37671583890914917, "learning_rate": 9.443150754277615e-05, "loss": 1.5634, "step": 1110 }, { "epoch": 0.337228714524207, "grad_norm": 0.9504343867301941, "learning_rate": 9.442644527690594e-05, "loss": 1.7275, "step": 1111 }, { "epoch": 0.3375322507208985, "grad_norm": 0.37230974435806274, "learning_rate": 9.442138301103575e-05, "loss": 1.9971, "step": 1112 }, { "epoch": 0.3378357869175899, "grad_norm": 0.4015982449054718, "learning_rate": 9.441632074516555e-05, "loss": 2.0012, "step": 1113 }, { "epoch": 0.33813932311428135, "grad_norm": 0.42521438002586365, "learning_rate": 9.441125847929534e-05, "loss": 2.1657, "step": 1114 }, { "epoch": 0.33844285931097284, "grad_norm": 0.3954319953918457, "learning_rate": 9.440619621342514e-05, "loss": 1.7999, "step": 1115 }, { "epoch": 0.3387463955076643, "grad_norm": 0.5241403579711914, "learning_rate": 9.440113394755493e-05, "loss": 2.0102, "step": 1116 }, { "epoch": 0.33904993170435577, "grad_norm": 0.4186641275882721, "learning_rate": 9.439607168168473e-05, "loss": 1.854, "step": 1117 }, { "epoch": 0.3393534679010472, "grad_norm": 0.4375157654285431, "learning_rate": 9.439100941581452e-05, "loss": 1.7774, "step": 1118 }, { "epoch": 0.33965700409773864, "grad_norm": 0.43266987800598145, "learning_rate": 9.438594714994432e-05, "loss": 1.9328, "step": 1119 }, { "epoch": 0.3399605402944301, "grad_norm": 0.5544857382774353, "learning_rate": 9.438088488407411e-05, "loss": 1.7284, "step": 1120 }, { "epoch": 0.34026407649112156, "grad_norm": 0.39998582005500793, "learning_rate": 9.437582261820392e-05, "loss": 1.8724, "step": 1121 }, { "epoch": 0.34056761268781305, "grad_norm": 0.41390395164489746, "learning_rate": 9.437076035233371e-05, "loss": 2.1188, "step": 1122 }, { "epoch": 0.3408711488845045, "grad_norm": 0.4374658167362213, "learning_rate": 9.436569808646351e-05, "loss": 1.9946, "step": 1123 }, { "epoch": 0.3411746850811959, "grad_norm": 0.3902375102043152, "learning_rate": 9.43606358205933e-05, "loss": 1.9352, "step": 1124 }, { "epoch": 0.3414782212778874, "grad_norm": 0.4049385190010071, "learning_rate": 9.43555735547231e-05, "loss": 1.7674, "step": 1125 }, { "epoch": 0.34178175747457884, "grad_norm": 0.42752334475517273, "learning_rate": 9.435051128885289e-05, "loss": 2.1489, "step": 1126 }, { "epoch": 0.3420852936712703, "grad_norm": 0.3927367925643921, "learning_rate": 9.434544902298269e-05, "loss": 1.9454, "step": 1127 }, { "epoch": 0.34238882986796176, "grad_norm": 1.4001588821411133, "learning_rate": 9.434038675711248e-05, "loss": 1.8791, "step": 1128 }, { "epoch": 0.3426923660646532, "grad_norm": 0.3640120327472687, "learning_rate": 9.433532449124228e-05, "loss": 1.9714, "step": 1129 }, { "epoch": 0.3429959022613447, "grad_norm": 0.3569428026676178, "learning_rate": 9.433026222537209e-05, "loss": 1.715, "step": 1130 }, { "epoch": 0.3432994384580361, "grad_norm": 0.3593400716781616, "learning_rate": 9.432519995950188e-05, "loss": 1.9454, "step": 1131 }, { "epoch": 0.34360297465472756, "grad_norm": 0.38255101442337036, "learning_rate": 9.432013769363168e-05, "loss": 1.9629, "step": 1132 }, { "epoch": 0.34390651085141904, "grad_norm": 0.4099471867084503, "learning_rate": 9.431507542776147e-05, "loss": 1.9556, "step": 1133 }, { "epoch": 0.3442100470481105, "grad_norm": 0.36562618613243103, "learning_rate": 9.431001316189126e-05, "loss": 1.3671, "step": 1134 }, { "epoch": 0.34451358324480197, "grad_norm": 0.49943339824676514, "learning_rate": 9.430495089602106e-05, "loss": 1.8581, "step": 1135 }, { "epoch": 0.3448171194414934, "grad_norm": 0.3707871437072754, "learning_rate": 9.429988863015085e-05, "loss": 2.0911, "step": 1136 }, { "epoch": 0.34512065563818484, "grad_norm": 0.3699527382850647, "learning_rate": 9.429482636428065e-05, "loss": 2.0198, "step": 1137 }, { "epoch": 0.3454241918348763, "grad_norm": 0.4300304055213928, "learning_rate": 9.428976409841044e-05, "loss": 2.2398, "step": 1138 }, { "epoch": 0.34572772803156776, "grad_norm": 0.38733771443367004, "learning_rate": 9.428470183254024e-05, "loss": 1.9505, "step": 1139 }, { "epoch": 0.3460312642282592, "grad_norm": 0.38434740900993347, "learning_rate": 9.427963956667005e-05, "loss": 2.0853, "step": 1140 }, { "epoch": 0.3463348004249507, "grad_norm": 0.3448013961315155, "learning_rate": 9.427457730079984e-05, "loss": 1.816, "step": 1141 }, { "epoch": 0.3466383366216421, "grad_norm": 2.17158842086792, "learning_rate": 9.426951503492964e-05, "loss": 1.9041, "step": 1142 }, { "epoch": 0.3469418728183336, "grad_norm": 0.39879223704338074, "learning_rate": 9.426445276905943e-05, "loss": 1.8991, "step": 1143 }, { "epoch": 0.34724540901502504, "grad_norm": 0.517691433429718, "learning_rate": 9.425939050318923e-05, "loss": 1.4864, "step": 1144 }, { "epoch": 0.3475489452117165, "grad_norm": 0.4679596722126007, "learning_rate": 9.425432823731903e-05, "loss": 2.127, "step": 1145 }, { "epoch": 0.34785248140840797, "grad_norm": 0.47220855951309204, "learning_rate": 9.424926597144883e-05, "loss": 1.1827, "step": 1146 }, { "epoch": 0.3481560176050994, "grad_norm": 0.4707253575325012, "learning_rate": 9.424420370557862e-05, "loss": 1.6538, "step": 1147 }, { "epoch": 0.3484595538017909, "grad_norm": 0.5610188245773315, "learning_rate": 9.423914143970842e-05, "loss": 2.0097, "step": 1148 }, { "epoch": 0.3487630899984823, "grad_norm": 0.6568597555160522, "learning_rate": 9.423407917383821e-05, "loss": 1.8777, "step": 1149 }, { "epoch": 0.34906662619517376, "grad_norm": 0.38883280754089355, "learning_rate": 9.422901690796801e-05, "loss": 1.8524, "step": 1150 }, { "epoch": 0.34937016239186525, "grad_norm": 0.34381693601608276, "learning_rate": 9.422395464209782e-05, "loss": 1.7728, "step": 1151 }, { "epoch": 0.3496736985885567, "grad_norm": 0.4320678412914276, "learning_rate": 9.421889237622761e-05, "loss": 1.6346, "step": 1152 }, { "epoch": 0.3499772347852481, "grad_norm": 0.4651411771774292, "learning_rate": 9.42138301103574e-05, "loss": 2.008, "step": 1153 }, { "epoch": 0.3502807709819396, "grad_norm": 0.5340977907180786, "learning_rate": 9.42087678444872e-05, "loss": 1.6537, "step": 1154 }, { "epoch": 0.35058430717863104, "grad_norm": 0.3686065971851349, "learning_rate": 9.4203705578617e-05, "loss": 1.6585, "step": 1155 }, { "epoch": 0.35088784337532253, "grad_norm": 0.4016922116279602, "learning_rate": 9.419864331274679e-05, "loss": 1.4874, "step": 1156 }, { "epoch": 0.35119137957201396, "grad_norm": 0.4304169714450836, "learning_rate": 9.419358104687659e-05, "loss": 2.0125, "step": 1157 }, { "epoch": 0.3514949157687054, "grad_norm": 0.3944842517375946, "learning_rate": 9.418851878100638e-05, "loss": 2.0397, "step": 1158 }, { "epoch": 0.3517984519653969, "grad_norm": 0.3778032064437866, "learning_rate": 9.418345651513618e-05, "loss": 2.0629, "step": 1159 }, { "epoch": 0.3521019881620883, "grad_norm": 0.4209291636943817, "learning_rate": 9.417839424926598e-05, "loss": 1.845, "step": 1160 }, { "epoch": 0.3524055243587798, "grad_norm": 0.3948676586151123, "learning_rate": 9.417333198339578e-05, "loss": 1.9479, "step": 1161 }, { "epoch": 0.35270906055547124, "grad_norm": 0.4018319547176361, "learning_rate": 9.416826971752557e-05, "loss": 1.9, "step": 1162 }, { "epoch": 0.3530125967521627, "grad_norm": 0.42170947790145874, "learning_rate": 9.416320745165537e-05, "loss": 1.8034, "step": 1163 }, { "epoch": 0.35331613294885417, "grad_norm": 0.3817223310470581, "learning_rate": 9.415814518578516e-05, "loss": 1.4884, "step": 1164 }, { "epoch": 0.3536196691455456, "grad_norm": 0.35511648654937744, "learning_rate": 9.415308291991496e-05, "loss": 1.7336, "step": 1165 }, { "epoch": 0.35392320534223703, "grad_norm": 0.45333489775657654, "learning_rate": 9.414802065404475e-05, "loss": 1.6451, "step": 1166 }, { "epoch": 0.3542267415389285, "grad_norm": 0.42814895510673523, "learning_rate": 9.414295838817455e-05, "loss": 1.8122, "step": 1167 }, { "epoch": 0.35453027773561996, "grad_norm": 0.39475324749946594, "learning_rate": 9.413789612230434e-05, "loss": 2.2534, "step": 1168 }, { "epoch": 0.35483381393231145, "grad_norm": 0.41115859150886536, "learning_rate": 9.413283385643415e-05, "loss": 1.8317, "step": 1169 }, { "epoch": 0.3551373501290029, "grad_norm": 0.44518032670021057, "learning_rate": 9.412777159056395e-05, "loss": 1.8648, "step": 1170 }, { "epoch": 0.3554408863256943, "grad_norm": 0.3964219391345978, "learning_rate": 9.412270932469374e-05, "loss": 1.4205, "step": 1171 }, { "epoch": 0.3557444225223858, "grad_norm": 0.3874772787094116, "learning_rate": 9.411764705882353e-05, "loss": 1.7562, "step": 1172 }, { "epoch": 0.35604795871907724, "grad_norm": 0.35493049025535583, "learning_rate": 9.411258479295333e-05, "loss": 1.1856, "step": 1173 }, { "epoch": 0.35635149491576873, "grad_norm": 0.3838149905204773, "learning_rate": 9.410752252708312e-05, "loss": 1.955, "step": 1174 }, { "epoch": 0.35665503111246016, "grad_norm": 0.46874240040779114, "learning_rate": 9.410246026121292e-05, "loss": 1.7283, "step": 1175 }, { "epoch": 0.3569585673091516, "grad_norm": 0.3673109710216522, "learning_rate": 9.409739799534271e-05, "loss": 1.8228, "step": 1176 }, { "epoch": 0.3572621035058431, "grad_norm": 0.4494078457355499, "learning_rate": 9.409233572947251e-05, "loss": 1.6355, "step": 1177 }, { "epoch": 0.3575656397025345, "grad_norm": 0.4009113609790802, "learning_rate": 9.40872734636023e-05, "loss": 1.7594, "step": 1178 }, { "epoch": 0.35786917589922596, "grad_norm": 0.4051864445209503, "learning_rate": 9.408221119773211e-05, "loss": 1.7057, "step": 1179 }, { "epoch": 0.35817271209591744, "grad_norm": 0.33628928661346436, "learning_rate": 9.40771489318619e-05, "loss": 1.8971, "step": 1180 }, { "epoch": 0.3584762482926089, "grad_norm": 0.3441104590892792, "learning_rate": 9.40720866659917e-05, "loss": 1.8399, "step": 1181 }, { "epoch": 0.35877978448930037, "grad_norm": 0.38719773292541504, "learning_rate": 9.40670244001215e-05, "loss": 2.0484, "step": 1182 }, { "epoch": 0.3590833206859918, "grad_norm": 0.4182259142398834, "learning_rate": 9.406196213425129e-05, "loss": 1.525, "step": 1183 }, { "epoch": 0.35938685688268324, "grad_norm": 0.42075198888778687, "learning_rate": 9.405689986838109e-05, "loss": 2.1262, "step": 1184 }, { "epoch": 0.3596903930793747, "grad_norm": 0.3604430556297302, "learning_rate": 9.405183760251088e-05, "loss": 1.9715, "step": 1185 }, { "epoch": 0.35999392927606616, "grad_norm": 0.46226024627685547, "learning_rate": 9.404677533664068e-05, "loss": 1.7088, "step": 1186 }, { "epoch": 0.36029746547275765, "grad_norm": 0.3673461377620697, "learning_rate": 9.404171307077047e-05, "loss": 1.7057, "step": 1187 }, { "epoch": 0.3606010016694491, "grad_norm": 0.40370312333106995, "learning_rate": 9.403665080490028e-05, "loss": 1.9058, "step": 1188 }, { "epoch": 0.3609045378661405, "grad_norm": 0.39149123430252075, "learning_rate": 9.403158853903007e-05, "loss": 2.0148, "step": 1189 }, { "epoch": 0.361208074062832, "grad_norm": 0.6711376309394836, "learning_rate": 9.402652627315988e-05, "loss": 1.69, "step": 1190 }, { "epoch": 0.36151161025952344, "grad_norm": 0.3052380084991455, "learning_rate": 9.402146400728968e-05, "loss": 1.4772, "step": 1191 }, { "epoch": 0.36181514645621493, "grad_norm": 0.37661212682724, "learning_rate": 9.401640174141947e-05, "loss": 1.7378, "step": 1192 }, { "epoch": 0.36211868265290637, "grad_norm": 0.39574167132377625, "learning_rate": 9.401133947554927e-05, "loss": 1.801, "step": 1193 }, { "epoch": 0.3624222188495978, "grad_norm": 0.44611817598342896, "learning_rate": 9.400627720967906e-05, "loss": 1.5995, "step": 1194 }, { "epoch": 0.3627257550462893, "grad_norm": 0.40026605129241943, "learning_rate": 9.400121494380886e-05, "loss": 1.6517, "step": 1195 }, { "epoch": 0.3630292912429807, "grad_norm": 0.36110207438468933, "learning_rate": 9.399615267793865e-05, "loss": 1.9764, "step": 1196 }, { "epoch": 0.36333282743967216, "grad_norm": 0.38339897990226746, "learning_rate": 9.399109041206845e-05, "loss": 2.085, "step": 1197 }, { "epoch": 0.36363636363636365, "grad_norm": 0.36159849166870117, "learning_rate": 9.398602814619824e-05, "loss": 1.7839, "step": 1198 }, { "epoch": 0.3639398998330551, "grad_norm": 0.3263375461101532, "learning_rate": 9.398096588032805e-05, "loss": 1.8143, "step": 1199 }, { "epoch": 0.36424343602974657, "grad_norm": 0.3886968195438385, "learning_rate": 9.397590361445784e-05, "loss": 2.0904, "step": 1200 }, { "epoch": 0.364546972226438, "grad_norm": 0.41123297810554504, "learning_rate": 9.397084134858764e-05, "loss": 1.8622, "step": 1201 }, { "epoch": 0.36485050842312944, "grad_norm": 0.580788791179657, "learning_rate": 9.396577908271743e-05, "loss": 1.9017, "step": 1202 }, { "epoch": 0.36515404461982093, "grad_norm": 0.3737773895263672, "learning_rate": 9.396071681684723e-05, "loss": 1.5775, "step": 1203 }, { "epoch": 0.36545758081651236, "grad_norm": 0.38713717460632324, "learning_rate": 9.395565455097702e-05, "loss": 1.967, "step": 1204 }, { "epoch": 0.36576111701320385, "grad_norm": 0.7311956882476807, "learning_rate": 9.395059228510682e-05, "loss": 1.7088, "step": 1205 }, { "epoch": 0.3660646532098953, "grad_norm": 1.4061527252197266, "learning_rate": 9.394553001923661e-05, "loss": 1.9163, "step": 1206 }, { "epoch": 0.3663681894065867, "grad_norm": 0.3753696382045746, "learning_rate": 9.39404677533664e-05, "loss": 2.1954, "step": 1207 }, { "epoch": 0.3666717256032782, "grad_norm": 0.38732466101646423, "learning_rate": 9.393540548749622e-05, "loss": 1.9101, "step": 1208 }, { "epoch": 0.36697526179996964, "grad_norm": 0.41291502118110657, "learning_rate": 9.393034322162601e-05, "loss": 1.9483, "step": 1209 }, { "epoch": 0.3672787979966611, "grad_norm": 0.44216427206993103, "learning_rate": 9.39252809557558e-05, "loss": 2.043, "step": 1210 }, { "epoch": 0.36758233419335257, "grad_norm": 0.798313319683075, "learning_rate": 9.39202186898856e-05, "loss": 1.9988, "step": 1211 }, { "epoch": 0.367885870390044, "grad_norm": 0.5483587384223938, "learning_rate": 9.39151564240154e-05, "loss": 2.0979, "step": 1212 }, { "epoch": 0.3681894065867355, "grad_norm": 0.44406580924987793, "learning_rate": 9.391009415814519e-05, "loss": 1.5858, "step": 1213 }, { "epoch": 0.3684929427834269, "grad_norm": 0.3883718252182007, "learning_rate": 9.390503189227498e-05, "loss": 1.9014, "step": 1214 }, { "epoch": 0.36879647898011836, "grad_norm": 0.7284543514251709, "learning_rate": 9.389996962640478e-05, "loss": 1.9709, "step": 1215 }, { "epoch": 0.36910001517680985, "grad_norm": 0.38549402356147766, "learning_rate": 9.389490736053457e-05, "loss": 1.9513, "step": 1216 }, { "epoch": 0.3694035513735013, "grad_norm": 0.39417389035224915, "learning_rate": 9.388984509466437e-05, "loss": 2.0409, "step": 1217 }, { "epoch": 0.36970708757019277, "grad_norm": 0.40816301107406616, "learning_rate": 9.388478282879418e-05, "loss": 2.0136, "step": 1218 }, { "epoch": 0.3700106237668842, "grad_norm": 0.5700183510780334, "learning_rate": 9.387972056292397e-05, "loss": 1.8478, "step": 1219 }, { "epoch": 0.37031415996357564, "grad_norm": 0.35159793496131897, "learning_rate": 9.387465829705377e-05, "loss": 1.6004, "step": 1220 }, { "epoch": 0.37061769616026713, "grad_norm": 0.41622206568717957, "learning_rate": 9.386959603118356e-05, "loss": 1.9104, "step": 1221 }, { "epoch": 0.37092123235695856, "grad_norm": 0.4205602705478668, "learning_rate": 9.386453376531336e-05, "loss": 2.1058, "step": 1222 }, { "epoch": 0.37122476855365, "grad_norm": 0.38390764594078064, "learning_rate": 9.385947149944315e-05, "loss": 1.8972, "step": 1223 }, { "epoch": 0.3715283047503415, "grad_norm": 0.3790401816368103, "learning_rate": 9.385440923357295e-05, "loss": 1.8975, "step": 1224 }, { "epoch": 0.3718318409470329, "grad_norm": 0.5210400223731995, "learning_rate": 9.384934696770274e-05, "loss": 1.7181, "step": 1225 }, { "epoch": 0.3721353771437244, "grad_norm": 0.4098454415798187, "learning_rate": 9.384428470183253e-05, "loss": 2.1405, "step": 1226 }, { "epoch": 0.37243891334041584, "grad_norm": 0.40917104482650757, "learning_rate": 9.383922243596234e-05, "loss": 1.8696, "step": 1227 }, { "epoch": 0.3727424495371073, "grad_norm": 0.3712831139564514, "learning_rate": 9.383416017009214e-05, "loss": 1.8792, "step": 1228 }, { "epoch": 0.37304598573379877, "grad_norm": 0.3110792934894562, "learning_rate": 9.382909790422193e-05, "loss": 1.5782, "step": 1229 }, { "epoch": 0.3733495219304902, "grad_norm": 0.3657875061035156, "learning_rate": 9.382403563835173e-05, "loss": 2.0311, "step": 1230 }, { "epoch": 0.3736530581271817, "grad_norm": 0.37432965636253357, "learning_rate": 9.381897337248152e-05, "loss": 1.8505, "step": 1231 }, { "epoch": 0.3739565943238731, "grad_norm": 0.3771384656429291, "learning_rate": 9.381391110661132e-05, "loss": 1.5715, "step": 1232 }, { "epoch": 0.37426013052056456, "grad_norm": 1.2401721477508545, "learning_rate": 9.380884884074111e-05, "loss": 1.4436, "step": 1233 }, { "epoch": 0.37456366671725605, "grad_norm": 0.36102503538131714, "learning_rate": 9.380378657487092e-05, "loss": 1.8907, "step": 1234 }, { "epoch": 0.3748672029139475, "grad_norm": 0.46541303396224976, "learning_rate": 9.379872430900072e-05, "loss": 2.0067, "step": 1235 }, { "epoch": 0.3751707391106389, "grad_norm": 0.46490392088890076, "learning_rate": 9.379366204313051e-05, "loss": 1.379, "step": 1236 }, { "epoch": 0.3754742753073304, "grad_norm": 0.40038684010505676, "learning_rate": 9.37885997772603e-05, "loss": 1.9102, "step": 1237 }, { "epoch": 0.37577781150402184, "grad_norm": 0.401563435792923, "learning_rate": 9.378353751139011e-05, "loss": 2.0087, "step": 1238 }, { "epoch": 0.37608134770071333, "grad_norm": 0.38930457830429077, "learning_rate": 9.377847524551991e-05, "loss": 1.7222, "step": 1239 }, { "epoch": 0.37638488389740477, "grad_norm": 0.4146344065666199, "learning_rate": 9.37734129796497e-05, "loss": 1.9965, "step": 1240 }, { "epoch": 0.3766884200940962, "grad_norm": 0.7829983830451965, "learning_rate": 9.37683507137795e-05, "loss": 1.7133, "step": 1241 }, { "epoch": 0.3769919562907877, "grad_norm": 0.3819306492805481, "learning_rate": 9.376328844790929e-05, "loss": 1.7532, "step": 1242 }, { "epoch": 0.3772954924874791, "grad_norm": 0.35361188650131226, "learning_rate": 9.375822618203909e-05, "loss": 1.8794, "step": 1243 }, { "epoch": 0.3775990286841706, "grad_norm": 0.37844938039779663, "learning_rate": 9.375316391616888e-05, "loss": 2.0991, "step": 1244 }, { "epoch": 0.37790256488086205, "grad_norm": 0.49530112743377686, "learning_rate": 9.374810165029868e-05, "loss": 1.5838, "step": 1245 }, { "epoch": 0.3782061010775535, "grad_norm": 0.36716628074645996, "learning_rate": 9.374303938442847e-05, "loss": 1.812, "step": 1246 }, { "epoch": 0.37850963727424497, "grad_norm": 0.3772716522216797, "learning_rate": 9.373797711855828e-05, "loss": 1.8649, "step": 1247 }, { "epoch": 0.3788131734709364, "grad_norm": 0.42215248942375183, "learning_rate": 9.373291485268807e-05, "loss": 1.8589, "step": 1248 }, { "epoch": 0.37911670966762784, "grad_norm": 0.4086074233055115, "learning_rate": 9.372785258681787e-05, "loss": 2.0305, "step": 1249 }, { "epoch": 0.3794202458643193, "grad_norm": 0.5096133947372437, "learning_rate": 9.372279032094766e-05, "loss": 2.0852, "step": 1250 }, { "epoch": 0.37972378206101076, "grad_norm": 0.41633352637290955, "learning_rate": 9.371772805507746e-05, "loss": 1.8879, "step": 1251 }, { "epoch": 0.38002731825770225, "grad_norm": 0.4787557125091553, "learning_rate": 9.371266578920725e-05, "loss": 1.9307, "step": 1252 }, { "epoch": 0.3803308544543937, "grad_norm": 0.4313805103302002, "learning_rate": 9.370760352333705e-05, "loss": 1.097, "step": 1253 }, { "epoch": 0.3806343906510851, "grad_norm": 0.3604517877101898, "learning_rate": 9.370254125746684e-05, "loss": 1.9466, "step": 1254 }, { "epoch": 0.3809379268477766, "grad_norm": 0.35350343585014343, "learning_rate": 9.369747899159664e-05, "loss": 2.1093, "step": 1255 }, { "epoch": 0.38124146304446804, "grad_norm": 0.43002399802207947, "learning_rate": 9.369241672572643e-05, "loss": 1.9016, "step": 1256 }, { "epoch": 0.38154499924115953, "grad_norm": 0.46702131628990173, "learning_rate": 9.368735445985624e-05, "loss": 1.9909, "step": 1257 }, { "epoch": 0.38184853543785097, "grad_norm": 0.42195767164230347, "learning_rate": 9.368229219398604e-05, "loss": 1.9486, "step": 1258 }, { "epoch": 0.3821520716345424, "grad_norm": 0.4160800874233246, "learning_rate": 9.367722992811583e-05, "loss": 1.2547, "step": 1259 }, { "epoch": 0.3824556078312339, "grad_norm": 0.398027628660202, "learning_rate": 9.367216766224563e-05, "loss": 1.7109, "step": 1260 }, { "epoch": 0.3827591440279253, "grad_norm": 0.35801073908805847, "learning_rate": 9.366710539637542e-05, "loss": 1.7718, "step": 1261 }, { "epoch": 0.38306268022461676, "grad_norm": 0.3769727647304535, "learning_rate": 9.366204313050522e-05, "loss": 1.7201, "step": 1262 }, { "epoch": 0.38336621642130825, "grad_norm": 0.4340580105781555, "learning_rate": 9.365698086463501e-05, "loss": 1.5747, "step": 1263 }, { "epoch": 0.3836697526179997, "grad_norm": 0.48839592933654785, "learning_rate": 9.36519185987648e-05, "loss": 2.0381, "step": 1264 }, { "epoch": 0.38397328881469117, "grad_norm": 0.3686861991882324, "learning_rate": 9.36468563328946e-05, "loss": 1.8126, "step": 1265 }, { "epoch": 0.3842768250113826, "grad_norm": 0.45264241099357605, "learning_rate": 9.364179406702441e-05, "loss": 1.7206, "step": 1266 }, { "epoch": 0.38458036120807404, "grad_norm": 0.45419684052467346, "learning_rate": 9.36367318011542e-05, "loss": 2.028, "step": 1267 }, { "epoch": 0.38488389740476553, "grad_norm": 0.38863110542297363, "learning_rate": 9.3631669535284e-05, "loss": 1.8139, "step": 1268 }, { "epoch": 0.38518743360145696, "grad_norm": 0.41982683539390564, "learning_rate": 9.362660726941379e-05, "loss": 1.9528, "step": 1269 }, { "epoch": 0.38549096979814845, "grad_norm": 0.3579862415790558, "learning_rate": 9.362154500354359e-05, "loss": 2.1949, "step": 1270 }, { "epoch": 0.3857945059948399, "grad_norm": 0.39172133803367615, "learning_rate": 9.361648273767338e-05, "loss": 1.8242, "step": 1271 }, { "epoch": 0.3860980421915313, "grad_norm": 0.36367735266685486, "learning_rate": 9.361142047180318e-05, "loss": 1.9508, "step": 1272 }, { "epoch": 0.3864015783882228, "grad_norm": 0.3536215126514435, "learning_rate": 9.360635820593297e-05, "loss": 1.7761, "step": 1273 }, { "epoch": 0.38670511458491424, "grad_norm": 0.44467857480049133, "learning_rate": 9.360129594006277e-05, "loss": 2.2006, "step": 1274 }, { "epoch": 0.38700865078160573, "grad_norm": 0.41492581367492676, "learning_rate": 9.359623367419257e-05, "loss": 1.7899, "step": 1275 }, { "epoch": 0.38731218697829717, "grad_norm": 0.4128611087799072, "learning_rate": 9.359117140832237e-05, "loss": 1.9787, "step": 1276 }, { "epoch": 0.3876157231749886, "grad_norm": 0.36134451627731323, "learning_rate": 9.358610914245216e-05, "loss": 1.941, "step": 1277 }, { "epoch": 0.3879192593716801, "grad_norm": 0.36279958486557007, "learning_rate": 9.358104687658197e-05, "loss": 2.1067, "step": 1278 }, { "epoch": 0.3882227955683715, "grad_norm": 0.432478666305542, "learning_rate": 9.357598461071177e-05, "loss": 2.1264, "step": 1279 }, { "epoch": 0.38852633176506296, "grad_norm": 0.3920331299304962, "learning_rate": 9.357092234484156e-05, "loss": 1.5522, "step": 1280 }, { "epoch": 0.38882986796175445, "grad_norm": 0.3537754416465759, "learning_rate": 9.356586007897136e-05, "loss": 1.8354, "step": 1281 }, { "epoch": 0.3891334041584459, "grad_norm": 0.40271031856536865, "learning_rate": 9.356079781310115e-05, "loss": 1.7567, "step": 1282 }, { "epoch": 0.3894369403551374, "grad_norm": 0.47157374024391174, "learning_rate": 9.355573554723095e-05, "loss": 1.8542, "step": 1283 }, { "epoch": 0.3897404765518288, "grad_norm": 0.3331926167011261, "learning_rate": 9.355067328136074e-05, "loss": 1.8651, "step": 1284 }, { "epoch": 0.39004401274852024, "grad_norm": 0.884087860584259, "learning_rate": 9.354561101549054e-05, "loss": 1.4765, "step": 1285 }, { "epoch": 0.39034754894521173, "grad_norm": 0.3618917167186737, "learning_rate": 9.354054874962034e-05, "loss": 2.0908, "step": 1286 }, { "epoch": 0.39065108514190316, "grad_norm": 0.3494134843349457, "learning_rate": 9.353548648375014e-05, "loss": 1.1975, "step": 1287 }, { "epoch": 0.39095462133859465, "grad_norm": 0.40450137853622437, "learning_rate": 9.353042421787993e-05, "loss": 1.5567, "step": 1288 }, { "epoch": 0.3912581575352861, "grad_norm": 0.3893278241157532, "learning_rate": 9.352536195200973e-05, "loss": 1.6993, "step": 1289 }, { "epoch": 0.3915616937319775, "grad_norm": 0.6020291447639465, "learning_rate": 9.352029968613952e-05, "loss": 1.7073, "step": 1290 }, { "epoch": 0.391865229928669, "grad_norm": 0.43949219584465027, "learning_rate": 9.351523742026932e-05, "loss": 1.8407, "step": 1291 }, { "epoch": 0.39216876612536045, "grad_norm": 0.41567811369895935, "learning_rate": 9.351017515439911e-05, "loss": 2.0354, "step": 1292 }, { "epoch": 0.3924723023220519, "grad_norm": 0.41198036074638367, "learning_rate": 9.350511288852891e-05, "loss": 1.964, "step": 1293 }, { "epoch": 0.39277583851874337, "grad_norm": 0.3735191524028778, "learning_rate": 9.35000506226587e-05, "loss": 1.8359, "step": 1294 }, { "epoch": 0.3930793747154348, "grad_norm": 0.4426116347312927, "learning_rate": 9.34949883567885e-05, "loss": 1.8876, "step": 1295 }, { "epoch": 0.3933829109121263, "grad_norm": 0.3956250548362732, "learning_rate": 9.34899260909183e-05, "loss": 1.5177, "step": 1296 }, { "epoch": 0.3936864471088177, "grad_norm": 0.3534790575504303, "learning_rate": 9.34848638250481e-05, "loss": 2.1419, "step": 1297 }, { "epoch": 0.39398998330550916, "grad_norm": 0.4134576916694641, "learning_rate": 9.34798015591779e-05, "loss": 1.5873, "step": 1298 }, { "epoch": 0.39429351950220065, "grad_norm": 0.4386560916900635, "learning_rate": 9.347473929330769e-05, "loss": 1.4547, "step": 1299 }, { "epoch": 0.3945970556988921, "grad_norm": 0.41839587688446045, "learning_rate": 9.346967702743749e-05, "loss": 1.5251, "step": 1300 }, { "epoch": 0.3949005918955836, "grad_norm": 0.333609938621521, "learning_rate": 9.346461476156728e-05, "loss": 1.5575, "step": 1301 }, { "epoch": 0.395204128092275, "grad_norm": 0.4706360101699829, "learning_rate": 9.345955249569707e-05, "loss": 1.8686, "step": 1302 }, { "epoch": 0.39550766428896644, "grad_norm": 0.3555939495563507, "learning_rate": 9.345449022982687e-05, "loss": 1.5205, "step": 1303 }, { "epoch": 0.39581120048565793, "grad_norm": 0.47611120343208313, "learning_rate": 9.344942796395666e-05, "loss": 1.5114, "step": 1304 }, { "epoch": 0.39611473668234937, "grad_norm": 0.570785641670227, "learning_rate": 9.344436569808647e-05, "loss": 1.9987, "step": 1305 }, { "epoch": 0.3964182728790408, "grad_norm": 0.3685778081417084, "learning_rate": 9.343930343221627e-05, "loss": 1.9471, "step": 1306 }, { "epoch": 0.3967218090757323, "grad_norm": 0.4187014698982239, "learning_rate": 9.343424116634606e-05, "loss": 2.1728, "step": 1307 }, { "epoch": 0.3970253452724237, "grad_norm": 0.35904020071029663, "learning_rate": 9.342917890047586e-05, "loss": 1.9576, "step": 1308 }, { "epoch": 0.3973288814691152, "grad_norm": 0.48214206099510193, "learning_rate": 9.342411663460565e-05, "loss": 1.7529, "step": 1309 }, { "epoch": 0.39763241766580665, "grad_norm": 0.3852714002132416, "learning_rate": 9.341905436873545e-05, "loss": 2.256, "step": 1310 }, { "epoch": 0.3979359538624981, "grad_norm": 0.44712984561920166, "learning_rate": 9.341399210286524e-05, "loss": 1.8981, "step": 1311 }, { "epoch": 0.39823949005918957, "grad_norm": 0.42379963397979736, "learning_rate": 9.340892983699504e-05, "loss": 2.0528, "step": 1312 }, { "epoch": 0.398543026255881, "grad_norm": 0.3936759829521179, "learning_rate": 9.340386757112483e-05, "loss": 1.592, "step": 1313 }, { "epoch": 0.3988465624525725, "grad_norm": 0.4035021662712097, "learning_rate": 9.339880530525464e-05, "loss": 2.0751, "step": 1314 }, { "epoch": 0.39915009864926393, "grad_norm": 0.3658972382545471, "learning_rate": 9.339374303938443e-05, "loss": 1.7568, "step": 1315 }, { "epoch": 0.39945363484595536, "grad_norm": 0.4271409511566162, "learning_rate": 9.338868077351423e-05, "loss": 2.1243, "step": 1316 }, { "epoch": 0.39975717104264685, "grad_norm": 0.3799911439418793, "learning_rate": 9.338361850764402e-05, "loss": 1.9763, "step": 1317 }, { "epoch": 0.4000607072393383, "grad_norm": 0.3878629803657532, "learning_rate": 9.337855624177382e-05, "loss": 1.6328, "step": 1318 }, { "epoch": 0.4003642434360297, "grad_norm": 0.3611898124217987, "learning_rate": 9.337349397590361e-05, "loss": 1.8017, "step": 1319 }, { "epoch": 0.4006677796327212, "grad_norm": 0.4010205864906311, "learning_rate": 9.336843171003341e-05, "loss": 2.1213, "step": 1320 }, { "epoch": 0.40097131582941264, "grad_norm": 0.4076247811317444, "learning_rate": 9.33633694441632e-05, "loss": 1.8366, "step": 1321 }, { "epoch": 0.40127485202610413, "grad_norm": 0.4172746241092682, "learning_rate": 9.3358307178293e-05, "loss": 1.7759, "step": 1322 }, { "epoch": 0.40157838822279557, "grad_norm": 0.6179870367050171, "learning_rate": 9.33532449124228e-05, "loss": 1.7803, "step": 1323 }, { "epoch": 0.401881924419487, "grad_norm": 0.38707882165908813, "learning_rate": 9.33481826465526e-05, "loss": 1.2795, "step": 1324 }, { "epoch": 0.4021854606161785, "grad_norm": 0.35764575004577637, "learning_rate": 9.334312038068241e-05, "loss": 1.3463, "step": 1325 }, { "epoch": 0.4024889968128699, "grad_norm": 0.40050292015075684, "learning_rate": 9.33380581148122e-05, "loss": 1.8487, "step": 1326 }, { "epoch": 0.4027925330095614, "grad_norm": 0.5421705842018127, "learning_rate": 9.3332995848942e-05, "loss": 2.0407, "step": 1327 }, { "epoch": 0.40309606920625285, "grad_norm": 0.5423186421394348, "learning_rate": 9.33279335830718e-05, "loss": 1.6743, "step": 1328 }, { "epoch": 0.4033996054029443, "grad_norm": 0.41429242491722107, "learning_rate": 9.332287131720159e-05, "loss": 2.1614, "step": 1329 }, { "epoch": 0.4037031415996358, "grad_norm": 0.41197100281715393, "learning_rate": 9.331780905133138e-05, "loss": 1.9414, "step": 1330 }, { "epoch": 0.4040066777963272, "grad_norm": 0.3833538293838501, "learning_rate": 9.331274678546118e-05, "loss": 1.4602, "step": 1331 }, { "epoch": 0.40431021399301864, "grad_norm": 0.4118226170539856, "learning_rate": 9.330768451959097e-05, "loss": 2.0595, "step": 1332 }, { "epoch": 0.40461375018971013, "grad_norm": 0.3417702317237854, "learning_rate": 9.330262225372077e-05, "loss": 1.5938, "step": 1333 }, { "epoch": 0.40491728638640156, "grad_norm": 0.3822105824947357, "learning_rate": 9.329755998785056e-05, "loss": 1.9013, "step": 1334 }, { "epoch": 0.40522082258309305, "grad_norm": 0.7788810133934021, "learning_rate": 9.329249772198037e-05, "loss": 1.6752, "step": 1335 }, { "epoch": 0.4055243587797845, "grad_norm": 0.4163956940174103, "learning_rate": 9.328743545611017e-05, "loss": 1.7016, "step": 1336 }, { "epoch": 0.4058278949764759, "grad_norm": 0.42450758814811707, "learning_rate": 9.328237319023996e-05, "loss": 1.831, "step": 1337 }, { "epoch": 0.4061314311731674, "grad_norm": 0.4169425666332245, "learning_rate": 9.327731092436976e-05, "loss": 1.7361, "step": 1338 }, { "epoch": 0.40643496736985885, "grad_norm": 0.3413407802581787, "learning_rate": 9.327224865849955e-05, "loss": 2.0173, "step": 1339 }, { "epoch": 0.40673850356655034, "grad_norm": 0.3989046812057495, "learning_rate": 9.326718639262934e-05, "loss": 1.9196, "step": 1340 }, { "epoch": 0.40704203976324177, "grad_norm": 0.47707435488700867, "learning_rate": 9.326212412675914e-05, "loss": 1.9119, "step": 1341 }, { "epoch": 0.4073455759599332, "grad_norm": 0.3998529314994812, "learning_rate": 9.325706186088893e-05, "loss": 1.8923, "step": 1342 }, { "epoch": 0.4076491121566247, "grad_norm": 0.3560973107814789, "learning_rate": 9.325199959501873e-05, "loss": 1.8033, "step": 1343 }, { "epoch": 0.4079526483533161, "grad_norm": 0.42655158042907715, "learning_rate": 9.324693732914854e-05, "loss": 1.5517, "step": 1344 }, { "epoch": 0.40825618455000756, "grad_norm": 0.4044337272644043, "learning_rate": 9.324187506327833e-05, "loss": 1.8332, "step": 1345 }, { "epoch": 0.40855972074669905, "grad_norm": 0.382467120885849, "learning_rate": 9.323681279740813e-05, "loss": 2.0407, "step": 1346 }, { "epoch": 0.4088632569433905, "grad_norm": 0.46734219789505005, "learning_rate": 9.323175053153792e-05, "loss": 1.5662, "step": 1347 }, { "epoch": 0.409166793140082, "grad_norm": 0.45105868577957153, "learning_rate": 9.322668826566772e-05, "loss": 2.0506, "step": 1348 }, { "epoch": 0.4094703293367734, "grad_norm": 0.3531922399997711, "learning_rate": 9.322162599979751e-05, "loss": 1.7494, "step": 1349 }, { "epoch": 0.40977386553346484, "grad_norm": 0.3707609474658966, "learning_rate": 9.32165637339273e-05, "loss": 2.2804, "step": 1350 }, { "epoch": 0.41007740173015633, "grad_norm": 0.38254693150520325, "learning_rate": 9.32115014680571e-05, "loss": 1.9183, "step": 1351 }, { "epoch": 0.41038093792684777, "grad_norm": 0.41418614983558655, "learning_rate": 9.32064392021869e-05, "loss": 1.8523, "step": 1352 }, { "epoch": 0.41068447412353926, "grad_norm": 0.42098134756088257, "learning_rate": 9.32013769363167e-05, "loss": 1.712, "step": 1353 }, { "epoch": 0.4109880103202307, "grad_norm": 0.3387204706668854, "learning_rate": 9.31963146704465e-05, "loss": 1.7652, "step": 1354 }, { "epoch": 0.4112915465169221, "grad_norm": 0.4330706000328064, "learning_rate": 9.31912524045763e-05, "loss": 1.478, "step": 1355 }, { "epoch": 0.4115950827136136, "grad_norm": 0.36673831939697266, "learning_rate": 9.318619013870609e-05, "loss": 1.7798, "step": 1356 }, { "epoch": 0.41189861891030505, "grad_norm": 0.40374481678009033, "learning_rate": 9.318112787283588e-05, "loss": 1.6161, "step": 1357 }, { "epoch": 0.4122021551069965, "grad_norm": 0.38840124011039734, "learning_rate": 9.317606560696568e-05, "loss": 1.3431, "step": 1358 }, { "epoch": 0.41250569130368797, "grad_norm": 0.4768214225769043, "learning_rate": 9.317100334109547e-05, "loss": 1.6115, "step": 1359 }, { "epoch": 0.4128092275003794, "grad_norm": 0.43069908022880554, "learning_rate": 9.316594107522527e-05, "loss": 2.0131, "step": 1360 }, { "epoch": 0.4131127636970709, "grad_norm": 0.36959967017173767, "learning_rate": 9.316087880935506e-05, "loss": 1.982, "step": 1361 }, { "epoch": 0.41341629989376233, "grad_norm": 0.3068915009498596, "learning_rate": 9.315581654348486e-05, "loss": 1.8105, "step": 1362 }, { "epoch": 0.41371983609045376, "grad_norm": 0.33738118410110474, "learning_rate": 9.315075427761467e-05, "loss": 1.6039, "step": 1363 }, { "epoch": 0.41402337228714525, "grad_norm": 0.38889479637145996, "learning_rate": 9.314569201174446e-05, "loss": 1.7897, "step": 1364 }, { "epoch": 0.4143269084838367, "grad_norm": 0.35099512338638306, "learning_rate": 9.314062974587426e-05, "loss": 1.5897, "step": 1365 }, { "epoch": 0.4146304446805282, "grad_norm": 0.3819596767425537, "learning_rate": 9.313556748000405e-05, "loss": 1.4781, "step": 1366 }, { "epoch": 0.4149339808772196, "grad_norm": 0.392493337392807, "learning_rate": 9.313050521413386e-05, "loss": 1.8577, "step": 1367 }, { "epoch": 0.41523751707391104, "grad_norm": 0.34424975514411926, "learning_rate": 9.312544294826365e-05, "loss": 1.4452, "step": 1368 }, { "epoch": 0.41554105327060253, "grad_norm": 0.44334256649017334, "learning_rate": 9.312038068239345e-05, "loss": 1.5221, "step": 1369 }, { "epoch": 0.41584458946729397, "grad_norm": 0.4194605350494385, "learning_rate": 9.311531841652324e-05, "loss": 2.3915, "step": 1370 }, { "epoch": 0.41614812566398546, "grad_norm": 0.33700132369995117, "learning_rate": 9.311025615065304e-05, "loss": 1.9193, "step": 1371 }, { "epoch": 0.4164516618606769, "grad_norm": 0.4527650773525238, "learning_rate": 9.310519388478283e-05, "loss": 1.766, "step": 1372 }, { "epoch": 0.4167551980573683, "grad_norm": 0.3435012996196747, "learning_rate": 9.310013161891263e-05, "loss": 1.8662, "step": 1373 }, { "epoch": 0.4170587342540598, "grad_norm": 0.3468983471393585, "learning_rate": 9.309506935304244e-05, "loss": 1.5261, "step": 1374 }, { "epoch": 0.41736227045075125, "grad_norm": 0.38368481397628784, "learning_rate": 9.309000708717223e-05, "loss": 1.6389, "step": 1375 }, { "epoch": 0.4176658066474427, "grad_norm": 0.38153746724128723, "learning_rate": 9.308494482130203e-05, "loss": 1.7431, "step": 1376 }, { "epoch": 0.4179693428441342, "grad_norm": 0.4192492961883545, "learning_rate": 9.307988255543182e-05, "loss": 1.8582, "step": 1377 }, { "epoch": 0.4182728790408256, "grad_norm": 0.42689767479896545, "learning_rate": 9.307482028956161e-05, "loss": 1.9049, "step": 1378 }, { "epoch": 0.4185764152375171, "grad_norm": 0.38545602560043335, "learning_rate": 9.306975802369141e-05, "loss": 1.2598, "step": 1379 }, { "epoch": 0.41887995143420853, "grad_norm": 0.4117288887500763, "learning_rate": 9.30646957578212e-05, "loss": 1.9808, "step": 1380 }, { "epoch": 0.41918348763089996, "grad_norm": 0.38102084398269653, "learning_rate": 9.3059633491951e-05, "loss": 1.9734, "step": 1381 }, { "epoch": 0.41948702382759145, "grad_norm": 0.3788716495037079, "learning_rate": 9.30545712260808e-05, "loss": 1.9655, "step": 1382 }, { "epoch": 0.4197905600242829, "grad_norm": 1.5338399410247803, "learning_rate": 9.30495089602106e-05, "loss": 2.1111, "step": 1383 }, { "epoch": 0.4200940962209744, "grad_norm": 0.40994685888290405, "learning_rate": 9.30444466943404e-05, "loss": 1.9063, "step": 1384 }, { "epoch": 0.4203976324176658, "grad_norm": 0.3389085829257965, "learning_rate": 9.303938442847019e-05, "loss": 1.8592, "step": 1385 }, { "epoch": 0.42070116861435725, "grad_norm": 1.0117053985595703, "learning_rate": 9.303432216259999e-05, "loss": 1.4198, "step": 1386 }, { "epoch": 0.42100470481104874, "grad_norm": 0.37429583072662354, "learning_rate": 9.302925989672978e-05, "loss": 1.6439, "step": 1387 }, { "epoch": 0.42130824100774017, "grad_norm": 0.397991806268692, "learning_rate": 9.302419763085958e-05, "loss": 1.9744, "step": 1388 }, { "epoch": 0.4216117772044316, "grad_norm": 0.39546629786491394, "learning_rate": 9.301913536498937e-05, "loss": 1.993, "step": 1389 }, { "epoch": 0.4219153134011231, "grad_norm": 0.3465210497379303, "learning_rate": 9.301407309911917e-05, "loss": 1.8254, "step": 1390 }, { "epoch": 0.4222188495978145, "grad_norm": 0.36281952261924744, "learning_rate": 9.300901083324896e-05, "loss": 1.9205, "step": 1391 }, { "epoch": 0.422522385794506, "grad_norm": 0.37978988885879517, "learning_rate": 9.300394856737877e-05, "loss": 1.8021, "step": 1392 }, { "epoch": 0.42282592199119745, "grad_norm": 0.3463260531425476, "learning_rate": 9.299888630150856e-05, "loss": 2.1022, "step": 1393 }, { "epoch": 0.4231294581878889, "grad_norm": 0.3449305593967438, "learning_rate": 9.299382403563836e-05, "loss": 1.808, "step": 1394 }, { "epoch": 0.4234329943845804, "grad_norm": 0.3900066018104553, "learning_rate": 9.298876176976815e-05, "loss": 1.8926, "step": 1395 }, { "epoch": 0.4237365305812718, "grad_norm": 0.3958972692489624, "learning_rate": 9.298369950389795e-05, "loss": 1.7716, "step": 1396 }, { "epoch": 0.4240400667779633, "grad_norm": 0.41263818740844727, "learning_rate": 9.297863723802774e-05, "loss": 1.9745, "step": 1397 }, { "epoch": 0.42434360297465473, "grad_norm": 0.44245028495788574, "learning_rate": 9.297357497215754e-05, "loss": 1.6498, "step": 1398 }, { "epoch": 0.42464713917134617, "grad_norm": 0.36662882566452026, "learning_rate": 9.296851270628733e-05, "loss": 1.9321, "step": 1399 }, { "epoch": 0.42495067536803766, "grad_norm": 0.38561105728149414, "learning_rate": 9.296345044041713e-05, "loss": 1.8661, "step": 1400 }, { "epoch": 0.4252542115647291, "grad_norm": 0.3688740134239197, "learning_rate": 9.295838817454692e-05, "loss": 2.1375, "step": 1401 }, { "epoch": 0.4255577477614205, "grad_norm": 0.3883054256439209, "learning_rate": 9.295332590867673e-05, "loss": 1.486, "step": 1402 }, { "epoch": 0.425861283958112, "grad_norm": 0.4107448160648346, "learning_rate": 9.294826364280653e-05, "loss": 1.9075, "step": 1403 }, { "epoch": 0.42616482015480345, "grad_norm": 0.4174923896789551, "learning_rate": 9.294320137693632e-05, "loss": 2.0668, "step": 1404 }, { "epoch": 0.42646835635149494, "grad_norm": 0.4573984444141388, "learning_rate": 9.293813911106611e-05, "loss": 1.8517, "step": 1405 }, { "epoch": 0.42677189254818637, "grad_norm": 0.3820217251777649, "learning_rate": 9.293307684519591e-05, "loss": 1.7841, "step": 1406 }, { "epoch": 0.4270754287448778, "grad_norm": 0.34213465452194214, "learning_rate": 9.29280145793257e-05, "loss": 1.9139, "step": 1407 }, { "epoch": 0.4273789649415693, "grad_norm": 0.3995790481567383, "learning_rate": 9.29229523134555e-05, "loss": 1.6883, "step": 1408 }, { "epoch": 0.42768250113826073, "grad_norm": 0.4142625331878662, "learning_rate": 9.29178900475853e-05, "loss": 2.0771, "step": 1409 }, { "epoch": 0.4279860373349522, "grad_norm": 0.3818739354610443, "learning_rate": 9.291282778171509e-05, "loss": 1.6682, "step": 1410 }, { "epoch": 0.42828957353164365, "grad_norm": 0.36996081471443176, "learning_rate": 9.29077655158449e-05, "loss": 2.1084, "step": 1411 }, { "epoch": 0.4285931097283351, "grad_norm": 0.4592280983924866, "learning_rate": 9.290270324997469e-05, "loss": 1.4502, "step": 1412 }, { "epoch": 0.4288966459250266, "grad_norm": 0.4243657886981964, "learning_rate": 9.28976409841045e-05, "loss": 1.8459, "step": 1413 }, { "epoch": 0.429200182121718, "grad_norm": 0.4068589508533478, "learning_rate": 9.28925787182343e-05, "loss": 1.8392, "step": 1414 }, { "epoch": 0.42950371831840944, "grad_norm": 0.3421384394168854, "learning_rate": 9.288751645236409e-05, "loss": 1.9204, "step": 1415 }, { "epoch": 0.42980725451510093, "grad_norm": 0.36633387207984924, "learning_rate": 9.288245418649388e-05, "loss": 2.1934, "step": 1416 }, { "epoch": 0.43011079071179237, "grad_norm": 0.6671120524406433, "learning_rate": 9.287739192062368e-05, "loss": 1.7614, "step": 1417 }, { "epoch": 0.43041432690848386, "grad_norm": 0.3610883057117462, "learning_rate": 9.287232965475347e-05, "loss": 1.9075, "step": 1418 }, { "epoch": 0.4307178631051753, "grad_norm": 0.42165407538414, "learning_rate": 9.286726738888327e-05, "loss": 1.4474, "step": 1419 }, { "epoch": 0.4310213993018667, "grad_norm": 0.38051116466522217, "learning_rate": 9.286220512301306e-05, "loss": 1.7629, "step": 1420 }, { "epoch": 0.4313249354985582, "grad_norm": 0.38990986347198486, "learning_rate": 9.285714285714286e-05, "loss": 1.7111, "step": 1421 }, { "epoch": 0.43162847169524965, "grad_norm": 0.3510812222957611, "learning_rate": 9.285208059127267e-05, "loss": 1.7695, "step": 1422 }, { "epoch": 0.43193200789194114, "grad_norm": 0.34757426381111145, "learning_rate": 9.284701832540246e-05, "loss": 2.173, "step": 1423 }, { "epoch": 0.4322355440886326, "grad_norm": 0.3806573152542114, "learning_rate": 9.284195605953226e-05, "loss": 1.8029, "step": 1424 }, { "epoch": 0.432539080285324, "grad_norm": 0.3845151662826538, "learning_rate": 9.283689379366205e-05, "loss": 1.902, "step": 1425 }, { "epoch": 0.4328426164820155, "grad_norm": 0.40006932616233826, "learning_rate": 9.283183152779185e-05, "loss": 1.6436, "step": 1426 }, { "epoch": 0.43314615267870693, "grad_norm": 0.5392235517501831, "learning_rate": 9.282676926192164e-05, "loss": 1.921, "step": 1427 }, { "epoch": 0.43344968887539836, "grad_norm": 0.4523599147796631, "learning_rate": 9.282170699605144e-05, "loss": 1.7473, "step": 1428 }, { "epoch": 0.43375322507208985, "grad_norm": 0.3809603154659271, "learning_rate": 9.281664473018123e-05, "loss": 1.5461, "step": 1429 }, { "epoch": 0.4340567612687813, "grad_norm": 0.4202471375465393, "learning_rate": 9.281158246431103e-05, "loss": 1.995, "step": 1430 }, { "epoch": 0.4343602974654728, "grad_norm": 0.42767444252967834, "learning_rate": 9.280652019844083e-05, "loss": 1.9536, "step": 1431 }, { "epoch": 0.4346638336621642, "grad_norm": 0.4050025939941406, "learning_rate": 9.280145793257063e-05, "loss": 1.6169, "step": 1432 }, { "epoch": 0.43496736985885565, "grad_norm": 0.4572995901107788, "learning_rate": 9.279639566670042e-05, "loss": 1.5711, "step": 1433 }, { "epoch": 0.43527090605554714, "grad_norm": 0.4102776050567627, "learning_rate": 9.279133340083022e-05, "loss": 1.9844, "step": 1434 }, { "epoch": 0.43557444225223857, "grad_norm": 0.4309599995613098, "learning_rate": 9.278627113496001e-05, "loss": 1.8742, "step": 1435 }, { "epoch": 0.43587797844893006, "grad_norm": 0.34951043128967285, "learning_rate": 9.278120886908981e-05, "loss": 1.9262, "step": 1436 }, { "epoch": 0.4361815146456215, "grad_norm": 0.47722557187080383, "learning_rate": 9.27761466032196e-05, "loss": 1.5605, "step": 1437 }, { "epoch": 0.4364850508423129, "grad_norm": 0.37698620557785034, "learning_rate": 9.27710843373494e-05, "loss": 2.3081, "step": 1438 }, { "epoch": 0.4367885870390044, "grad_norm": 0.40668490529060364, "learning_rate": 9.276602207147919e-05, "loss": 1.4524, "step": 1439 }, { "epoch": 0.43709212323569585, "grad_norm": 0.4384947121143341, "learning_rate": 9.276095980560899e-05, "loss": 1.7878, "step": 1440 }, { "epoch": 0.4373956594323873, "grad_norm": 3.140451192855835, "learning_rate": 9.27558975397388e-05, "loss": 1.7084, "step": 1441 }, { "epoch": 0.4376991956290788, "grad_norm": 0.43369218707084656, "learning_rate": 9.275083527386859e-05, "loss": 2.0439, "step": 1442 }, { "epoch": 0.4380027318257702, "grad_norm": 0.4725881516933441, "learning_rate": 9.274577300799838e-05, "loss": 2.0507, "step": 1443 }, { "epoch": 0.4383062680224617, "grad_norm": 0.4496382474899292, "learning_rate": 9.274071074212818e-05, "loss": 2.1349, "step": 1444 }, { "epoch": 0.43860980421915313, "grad_norm": 1.6437734365463257, "learning_rate": 9.273564847625797e-05, "loss": 1.9441, "step": 1445 }, { "epoch": 0.43891334041584457, "grad_norm": 0.4106156527996063, "learning_rate": 9.273058621038777e-05, "loss": 1.4615, "step": 1446 }, { "epoch": 0.43921687661253606, "grad_norm": 0.4387066960334778, "learning_rate": 9.272552394451756e-05, "loss": 1.8679, "step": 1447 }, { "epoch": 0.4395204128092275, "grad_norm": 0.44515758752822876, "learning_rate": 9.272046167864736e-05, "loss": 1.9675, "step": 1448 }, { "epoch": 0.439823949005919, "grad_norm": 0.43665841221809387, "learning_rate": 9.271539941277715e-05, "loss": 2.2782, "step": 1449 }, { "epoch": 0.4401274852026104, "grad_norm": 0.3593182861804962, "learning_rate": 9.271033714690696e-05, "loss": 1.6537, "step": 1450 }, { "epoch": 0.44043102139930185, "grad_norm": 0.38529497385025024, "learning_rate": 9.270527488103676e-05, "loss": 1.9399, "step": 1451 }, { "epoch": 0.44073455759599334, "grad_norm": 0.42474156618118286, "learning_rate": 9.270021261516655e-05, "loss": 1.849, "step": 1452 }, { "epoch": 0.44103809379268477, "grad_norm": 0.4505622684955597, "learning_rate": 9.269515034929635e-05, "loss": 1.9889, "step": 1453 }, { "epoch": 0.44134162998937626, "grad_norm": 1.8219722509384155, "learning_rate": 9.269008808342614e-05, "loss": 2.1467, "step": 1454 }, { "epoch": 0.4416451661860677, "grad_norm": 0.6941187381744385, "learning_rate": 9.268502581755594e-05, "loss": 2.1441, "step": 1455 }, { "epoch": 0.44194870238275913, "grad_norm": 0.6262606978416443, "learning_rate": 9.267996355168574e-05, "loss": 1.9937, "step": 1456 }, { "epoch": 0.4422522385794506, "grad_norm": 0.3790215253829956, "learning_rate": 9.267490128581554e-05, "loss": 1.7468, "step": 1457 }, { "epoch": 0.44255577477614205, "grad_norm": 0.42074668407440186, "learning_rate": 9.266983901994533e-05, "loss": 2.1245, "step": 1458 }, { "epoch": 0.4428593109728335, "grad_norm": 0.464870810508728, "learning_rate": 9.266477675407513e-05, "loss": 1.8672, "step": 1459 }, { "epoch": 0.443162847169525, "grad_norm": 0.4551111161708832, "learning_rate": 9.265971448820492e-05, "loss": 2.054, "step": 1460 }, { "epoch": 0.4434663833662164, "grad_norm": 0.3874572813510895, "learning_rate": 9.265465222233473e-05, "loss": 1.8281, "step": 1461 }, { "epoch": 0.4437699195629079, "grad_norm": 0.44287312030792236, "learning_rate": 9.264958995646453e-05, "loss": 1.6435, "step": 1462 }, { "epoch": 0.44407345575959933, "grad_norm": 0.41155338287353516, "learning_rate": 9.264452769059432e-05, "loss": 1.9611, "step": 1463 }, { "epoch": 0.44437699195629077, "grad_norm": 0.480648398399353, "learning_rate": 9.263946542472412e-05, "loss": 1.7771, "step": 1464 }, { "epoch": 0.44468052815298226, "grad_norm": 0.4704960286617279, "learning_rate": 9.263440315885391e-05, "loss": 0.6294, "step": 1465 }, { "epoch": 0.4449840643496737, "grad_norm": 0.4150315821170807, "learning_rate": 9.26293408929837e-05, "loss": 1.7698, "step": 1466 }, { "epoch": 0.4452876005463652, "grad_norm": 0.5981085300445557, "learning_rate": 9.26242786271135e-05, "loss": 1.7192, "step": 1467 }, { "epoch": 0.4455911367430566, "grad_norm": 0.43365392088890076, "learning_rate": 9.26192163612433e-05, "loss": 1.8843, "step": 1468 }, { "epoch": 0.44589467293974805, "grad_norm": 0.7336254715919495, "learning_rate": 9.261415409537309e-05, "loss": 2.0101, "step": 1469 }, { "epoch": 0.44619820913643954, "grad_norm": 0.4002796411514282, "learning_rate": 9.26090918295029e-05, "loss": 1.9817, "step": 1470 }, { "epoch": 0.446501745333131, "grad_norm": 0.4379813075065613, "learning_rate": 9.26040295636327e-05, "loss": 2.1091, "step": 1471 }, { "epoch": 0.4468052815298224, "grad_norm": 0.4577115774154663, "learning_rate": 9.259896729776249e-05, "loss": 1.6132, "step": 1472 }, { "epoch": 0.4471088177265139, "grad_norm": 0.40199458599090576, "learning_rate": 9.259390503189228e-05, "loss": 1.9815, "step": 1473 }, { "epoch": 0.44741235392320533, "grad_norm": 0.4442947506904602, "learning_rate": 9.258884276602208e-05, "loss": 1.8425, "step": 1474 }, { "epoch": 0.4477158901198968, "grad_norm": 0.3720739781856537, "learning_rate": 9.258378050015187e-05, "loss": 2.1161, "step": 1475 }, { "epoch": 0.44801942631658825, "grad_norm": 0.39746803045272827, "learning_rate": 9.257871823428167e-05, "loss": 2.0404, "step": 1476 }, { "epoch": 0.4483229625132797, "grad_norm": 0.4376835525035858, "learning_rate": 9.257365596841146e-05, "loss": 1.7201, "step": 1477 }, { "epoch": 0.4486264987099712, "grad_norm": 0.35988250374794006, "learning_rate": 9.256859370254126e-05, "loss": 1.1999, "step": 1478 }, { "epoch": 0.4489300349066626, "grad_norm": 0.41253864765167236, "learning_rate": 9.256353143667105e-05, "loss": 1.9916, "step": 1479 }, { "epoch": 0.4492335711033541, "grad_norm": 0.34956973791122437, "learning_rate": 9.255846917080086e-05, "loss": 1.7406, "step": 1480 }, { "epoch": 0.44953710730004554, "grad_norm": 0.452239453792572, "learning_rate": 9.255340690493065e-05, "loss": 2.0101, "step": 1481 }, { "epoch": 0.44984064349673697, "grad_norm": 0.36039796471595764, "learning_rate": 9.254834463906045e-05, "loss": 1.9181, "step": 1482 }, { "epoch": 0.45014417969342846, "grad_norm": 0.34030023217201233, "learning_rate": 9.254328237319024e-05, "loss": 1.6803, "step": 1483 }, { "epoch": 0.4504477158901199, "grad_norm": 0.3585798144340515, "learning_rate": 9.253822010732004e-05, "loss": 1.8983, "step": 1484 }, { "epoch": 0.4507512520868113, "grad_norm": 0.4554307758808136, "learning_rate": 9.253315784144983e-05, "loss": 1.741, "step": 1485 }, { "epoch": 0.4510547882835028, "grad_norm": 0.36281803250312805, "learning_rate": 9.252809557557963e-05, "loss": 2.0279, "step": 1486 }, { "epoch": 0.45135832448019425, "grad_norm": 0.4025228023529053, "learning_rate": 9.252303330970942e-05, "loss": 1.8517, "step": 1487 }, { "epoch": 0.45166186067688574, "grad_norm": 0.3962991535663605, "learning_rate": 9.251797104383922e-05, "loss": 1.9199, "step": 1488 }, { "epoch": 0.4519653968735772, "grad_norm": 0.4201490879058838, "learning_rate": 9.251290877796903e-05, "loss": 2.0137, "step": 1489 }, { "epoch": 0.4522689330702686, "grad_norm": 0.4605710804462433, "learning_rate": 9.250784651209882e-05, "loss": 1.975, "step": 1490 }, { "epoch": 0.4525724692669601, "grad_norm": 0.3571039140224457, "learning_rate": 9.250278424622862e-05, "loss": 1.8478, "step": 1491 }, { "epoch": 0.45287600546365153, "grad_norm": 0.406676709651947, "learning_rate": 9.249772198035841e-05, "loss": 1.965, "step": 1492 }, { "epoch": 0.453179541660343, "grad_norm": 0.6116447448730469, "learning_rate": 9.24926597144882e-05, "loss": 1.6192, "step": 1493 }, { "epoch": 0.45348307785703446, "grad_norm": 0.4193543493747711, "learning_rate": 9.2487597448618e-05, "loss": 1.8085, "step": 1494 }, { "epoch": 0.4537866140537259, "grad_norm": 0.4082903563976288, "learning_rate": 9.24825351827478e-05, "loss": 1.8924, "step": 1495 }, { "epoch": 0.4540901502504174, "grad_norm": 0.4163326919078827, "learning_rate": 9.247747291687759e-05, "loss": 1.9238, "step": 1496 }, { "epoch": 0.4543936864471088, "grad_norm": 0.4481281340122223, "learning_rate": 9.247241065100739e-05, "loss": 1.7663, "step": 1497 }, { "epoch": 0.45469722264380025, "grad_norm": 0.3282391428947449, "learning_rate": 9.24673483851372e-05, "loss": 2.0332, "step": 1498 }, { "epoch": 0.45500075884049174, "grad_norm": 0.43553873896598816, "learning_rate": 9.246228611926699e-05, "loss": 2.0712, "step": 1499 }, { "epoch": 0.45530429503718317, "grad_norm": 0.40410909056663513, "learning_rate": 9.245722385339678e-05, "loss": 1.9479, "step": 1500 }, { "epoch": 0.45560783123387466, "grad_norm": 0.36232396960258484, "learning_rate": 9.245216158752659e-05, "loss": 1.9359, "step": 1501 }, { "epoch": 0.4559113674305661, "grad_norm": 0.44860419631004333, "learning_rate": 9.244709932165639e-05, "loss": 1.263, "step": 1502 }, { "epoch": 0.45621490362725753, "grad_norm": 0.5308701395988464, "learning_rate": 9.244203705578618e-05, "loss": 2.2914, "step": 1503 }, { "epoch": 0.456518439823949, "grad_norm": 0.4460773468017578, "learning_rate": 9.243697478991598e-05, "loss": 1.8063, "step": 1504 }, { "epoch": 0.45682197602064045, "grad_norm": 0.4147963523864746, "learning_rate": 9.243191252404577e-05, "loss": 2.045, "step": 1505 }, { "epoch": 0.45712551221733194, "grad_norm": 0.34958329796791077, "learning_rate": 9.242685025817557e-05, "loss": 1.8712, "step": 1506 }, { "epoch": 0.4574290484140234, "grad_norm": 0.36072060465812683, "learning_rate": 9.242178799230536e-05, "loss": 1.7198, "step": 1507 }, { "epoch": 0.4577325846107148, "grad_norm": 0.4608067274093628, "learning_rate": 9.241672572643515e-05, "loss": 1.7165, "step": 1508 }, { "epoch": 0.4580361208074063, "grad_norm": 0.39580467343330383, "learning_rate": 9.241166346056496e-05, "loss": 1.665, "step": 1509 }, { "epoch": 0.45833965700409773, "grad_norm": 0.4920599162578583, "learning_rate": 9.240660119469476e-05, "loss": 1.872, "step": 1510 }, { "epoch": 0.45864319320078917, "grad_norm": 0.4332992136478424, "learning_rate": 9.240153892882455e-05, "loss": 1.8972, "step": 1511 }, { "epoch": 0.45894672939748066, "grad_norm": 0.39618152379989624, "learning_rate": 9.239647666295435e-05, "loss": 2.0167, "step": 1512 }, { "epoch": 0.4592502655941721, "grad_norm": 0.6713082790374756, "learning_rate": 9.239141439708414e-05, "loss": 2.0, "step": 1513 }, { "epoch": 0.4595538017908636, "grad_norm": 0.34422579407691956, "learning_rate": 9.238635213121394e-05, "loss": 1.6438, "step": 1514 }, { "epoch": 0.459857337987555, "grad_norm": 0.43874865770339966, "learning_rate": 9.238128986534373e-05, "loss": 1.6388, "step": 1515 }, { "epoch": 0.46016087418424645, "grad_norm": 0.5863097906112671, "learning_rate": 9.237622759947353e-05, "loss": 1.6764, "step": 1516 }, { "epoch": 0.46046441038093794, "grad_norm": 0.3312426805496216, "learning_rate": 9.237116533360332e-05, "loss": 1.8491, "step": 1517 }, { "epoch": 0.4607679465776294, "grad_norm": 0.3111588656902313, "learning_rate": 9.236610306773312e-05, "loss": 2.0298, "step": 1518 }, { "epoch": 0.46107148277432086, "grad_norm": 0.38705703616142273, "learning_rate": 9.236104080186292e-05, "loss": 2.0584, "step": 1519 }, { "epoch": 0.4613750189710123, "grad_norm": 0.32613542675971985, "learning_rate": 9.235597853599272e-05, "loss": 1.8722, "step": 1520 }, { "epoch": 0.46167855516770373, "grad_norm": 0.9304127097129822, "learning_rate": 9.235091627012251e-05, "loss": 1.978, "step": 1521 }, { "epoch": 0.4619820913643952, "grad_norm": 0.3754931688308716, "learning_rate": 9.234585400425231e-05, "loss": 1.8724, "step": 1522 }, { "epoch": 0.46228562756108665, "grad_norm": 0.4033370912075043, "learning_rate": 9.23407917383821e-05, "loss": 1.3349, "step": 1523 }, { "epoch": 0.4625891637577781, "grad_norm": 0.35285013914108276, "learning_rate": 9.23357294725119e-05, "loss": 1.442, "step": 1524 }, { "epoch": 0.4628926999544696, "grad_norm": 0.4044554531574249, "learning_rate": 9.23306672066417e-05, "loss": 2.0633, "step": 1525 }, { "epoch": 0.463196236151161, "grad_norm": 0.46915552020072937, "learning_rate": 9.232560494077149e-05, "loss": 1.3861, "step": 1526 }, { "epoch": 0.4634997723478525, "grad_norm": 0.4107852280139923, "learning_rate": 9.232054267490128e-05, "loss": 1.9011, "step": 1527 }, { "epoch": 0.46380330854454394, "grad_norm": 0.4018856883049011, "learning_rate": 9.231548040903109e-05, "loss": 1.843, "step": 1528 }, { "epoch": 0.46410684474123537, "grad_norm": 0.36814266443252563, "learning_rate": 9.231041814316089e-05, "loss": 1.897, "step": 1529 }, { "epoch": 0.46441038093792686, "grad_norm": 0.42271214723587036, "learning_rate": 9.230535587729068e-05, "loss": 1.9761, "step": 1530 }, { "epoch": 0.4647139171346183, "grad_norm": 0.4548446238040924, "learning_rate": 9.230029361142048e-05, "loss": 1.9313, "step": 1531 }, { "epoch": 0.4650174533313098, "grad_norm": 0.4320158064365387, "learning_rate": 9.229523134555027e-05, "loss": 1.5687, "step": 1532 }, { "epoch": 0.4653209895280012, "grad_norm": 0.3909349739551544, "learning_rate": 9.229016907968007e-05, "loss": 1.337, "step": 1533 }, { "epoch": 0.46562452572469265, "grad_norm": 0.40204015374183655, "learning_rate": 9.228510681380986e-05, "loss": 1.9838, "step": 1534 }, { "epoch": 0.46592806192138414, "grad_norm": 0.3997584879398346, "learning_rate": 9.228004454793966e-05, "loss": 1.8321, "step": 1535 }, { "epoch": 0.4662315981180756, "grad_norm": 0.43689507246017456, "learning_rate": 9.227498228206945e-05, "loss": 2.0649, "step": 1536 }, { "epoch": 0.466535134314767, "grad_norm": 0.3970150649547577, "learning_rate": 9.226992001619926e-05, "loss": 2.0077, "step": 1537 }, { "epoch": 0.4668386705114585, "grad_norm": 0.3847435414791107, "learning_rate": 9.226485775032905e-05, "loss": 2.0168, "step": 1538 }, { "epoch": 0.46714220670814993, "grad_norm": 0.40491220355033875, "learning_rate": 9.225979548445885e-05, "loss": 1.7831, "step": 1539 }, { "epoch": 0.4674457429048414, "grad_norm": 0.401903361082077, "learning_rate": 9.225473321858864e-05, "loss": 2.007, "step": 1540 }, { "epoch": 0.46774927910153286, "grad_norm": 0.6656989455223083, "learning_rate": 9.224967095271844e-05, "loss": 2.008, "step": 1541 }, { "epoch": 0.4680528152982243, "grad_norm": 0.36222347617149353, "learning_rate": 9.224460868684823e-05, "loss": 1.8659, "step": 1542 }, { "epoch": 0.4683563514949158, "grad_norm": 0.4396745562553406, "learning_rate": 9.223954642097803e-05, "loss": 1.9881, "step": 1543 }, { "epoch": 0.4686598876916072, "grad_norm": 0.5994194149971008, "learning_rate": 9.223448415510782e-05, "loss": 2.1998, "step": 1544 }, { "epoch": 0.4689634238882987, "grad_norm": 0.45558032393455505, "learning_rate": 9.222942188923763e-05, "loss": 1.6082, "step": 1545 }, { "epoch": 0.46926696008499014, "grad_norm": 0.38417017459869385, "learning_rate": 9.222435962336742e-05, "loss": 1.7655, "step": 1546 }, { "epoch": 0.46957049628168157, "grad_norm": 0.41946941614151, "learning_rate": 9.221929735749722e-05, "loss": 1.7472, "step": 1547 }, { "epoch": 0.46987403247837306, "grad_norm": 0.39455467462539673, "learning_rate": 9.221423509162703e-05, "loss": 1.8377, "step": 1548 }, { "epoch": 0.4701775686750645, "grad_norm": 0.3967253565788269, "learning_rate": 9.220917282575682e-05, "loss": 1.7838, "step": 1549 }, { "epoch": 0.470481104871756, "grad_norm": 0.42535534501075745, "learning_rate": 9.220411055988662e-05, "loss": 2.1495, "step": 1550 }, { "epoch": 0.4707846410684474, "grad_norm": 0.36706385016441345, "learning_rate": 9.219904829401641e-05, "loss": 1.9875, "step": 1551 }, { "epoch": 0.47108817726513885, "grad_norm": 0.3747560977935791, "learning_rate": 9.219398602814621e-05, "loss": 1.5342, "step": 1552 }, { "epoch": 0.47139171346183034, "grad_norm": 0.34010231494903564, "learning_rate": 9.2188923762276e-05, "loss": 2.0999, "step": 1553 }, { "epoch": 0.4716952496585218, "grad_norm": 0.40051451325416565, "learning_rate": 9.21838614964058e-05, "loss": 1.8481, "step": 1554 }, { "epoch": 0.4719987858552132, "grad_norm": 0.5217362642288208, "learning_rate": 9.217879923053559e-05, "loss": 1.4746, "step": 1555 }, { "epoch": 0.4723023220519047, "grad_norm": 0.42339226603507996, "learning_rate": 9.217373696466539e-05, "loss": 1.947, "step": 1556 }, { "epoch": 0.47260585824859613, "grad_norm": 0.3780953586101532, "learning_rate": 9.216867469879518e-05, "loss": 2.2093, "step": 1557 }, { "epoch": 0.4729093944452876, "grad_norm": 0.38509401679039, "learning_rate": 9.216361243292499e-05, "loss": 1.6966, "step": 1558 }, { "epoch": 0.47321293064197906, "grad_norm": 0.501438319683075, "learning_rate": 9.215855016705478e-05, "loss": 2.0505, "step": 1559 }, { "epoch": 0.4735164668386705, "grad_norm": 0.42260050773620605, "learning_rate": 9.215348790118458e-05, "loss": 1.9439, "step": 1560 }, { "epoch": 0.473820003035362, "grad_norm": 0.6031399965286255, "learning_rate": 9.214842563531437e-05, "loss": 1.9674, "step": 1561 }, { "epoch": 0.4741235392320534, "grad_norm": 0.3809618055820465, "learning_rate": 9.214336336944417e-05, "loss": 1.9882, "step": 1562 }, { "epoch": 0.4744270754287449, "grad_norm": 0.4074794352054596, "learning_rate": 9.213830110357396e-05, "loss": 1.6648, "step": 1563 }, { "epoch": 0.47473061162543634, "grad_norm": 0.4380822479724884, "learning_rate": 9.213323883770376e-05, "loss": 2.1327, "step": 1564 }, { "epoch": 0.4750341478221278, "grad_norm": 0.6130182147026062, "learning_rate": 9.212817657183355e-05, "loss": 1.957, "step": 1565 }, { "epoch": 0.47533768401881926, "grad_norm": 0.359451025724411, "learning_rate": 9.212311430596335e-05, "loss": 1.5301, "step": 1566 }, { "epoch": 0.4756412202155107, "grad_norm": 0.508237898349762, "learning_rate": 9.211805204009316e-05, "loss": 2.1409, "step": 1567 }, { "epoch": 0.47594475641220213, "grad_norm": 0.5652433037757874, "learning_rate": 9.211298977422295e-05, "loss": 2.2032, "step": 1568 }, { "epoch": 0.4762482926088936, "grad_norm": 0.36153456568717957, "learning_rate": 9.210792750835275e-05, "loss": 2.0994, "step": 1569 }, { "epoch": 0.47655182880558505, "grad_norm": 0.4140501320362091, "learning_rate": 9.210286524248254e-05, "loss": 1.6165, "step": 1570 }, { "epoch": 0.47685536500227654, "grad_norm": 0.36080101132392883, "learning_rate": 9.209780297661234e-05, "loss": 2.0203, "step": 1571 }, { "epoch": 0.477158901198968, "grad_norm": 0.3501390218734741, "learning_rate": 9.209274071074213e-05, "loss": 1.9692, "step": 1572 }, { "epoch": 0.4774624373956594, "grad_norm": 0.3753308653831482, "learning_rate": 9.208767844487192e-05, "loss": 1.799, "step": 1573 }, { "epoch": 0.4777659735923509, "grad_norm": 0.3621695935726166, "learning_rate": 9.208261617900172e-05, "loss": 1.8412, "step": 1574 }, { "epoch": 0.47806950978904234, "grad_norm": 0.4215545952320099, "learning_rate": 9.207755391313151e-05, "loss": 1.8227, "step": 1575 }, { "epoch": 0.4783730459857338, "grad_norm": 0.32205232977867126, "learning_rate": 9.207249164726132e-05, "loss": 1.3949, "step": 1576 }, { "epoch": 0.47867658218242526, "grad_norm": 0.34510162472724915, "learning_rate": 9.206742938139112e-05, "loss": 1.8627, "step": 1577 }, { "epoch": 0.4789801183791167, "grad_norm": 0.41916847229003906, "learning_rate": 9.206236711552091e-05, "loss": 1.6164, "step": 1578 }, { "epoch": 0.4792836545758082, "grad_norm": 0.323519229888916, "learning_rate": 9.205730484965071e-05, "loss": 1.5688, "step": 1579 }, { "epoch": 0.4795871907724996, "grad_norm": 0.4150819778442383, "learning_rate": 9.20522425837805e-05, "loss": 1.8097, "step": 1580 }, { "epoch": 0.47989072696919105, "grad_norm": 0.4045346975326538, "learning_rate": 9.20471803179103e-05, "loss": 1.91, "step": 1581 }, { "epoch": 0.48019426316588254, "grad_norm": 0.3251115083694458, "learning_rate": 9.204211805204009e-05, "loss": 1.9278, "step": 1582 }, { "epoch": 0.480497799362574, "grad_norm": 0.37068256735801697, "learning_rate": 9.203705578616989e-05, "loss": 1.8667, "step": 1583 }, { "epoch": 0.48080133555926546, "grad_norm": 0.4208294749259949, "learning_rate": 9.203199352029968e-05, "loss": 1.9405, "step": 1584 }, { "epoch": 0.4811048717559569, "grad_norm": 0.3996240794658661, "learning_rate": 9.202693125442948e-05, "loss": 1.6466, "step": 1585 }, { "epoch": 0.48140840795264833, "grad_norm": 0.44182920455932617, "learning_rate": 9.202186898855928e-05, "loss": 2.0223, "step": 1586 }, { "epoch": 0.4817119441493398, "grad_norm": 0.43203607201576233, "learning_rate": 9.201680672268908e-05, "loss": 1.7969, "step": 1587 }, { "epoch": 0.48201548034603126, "grad_norm": 0.3604522943496704, "learning_rate": 9.201174445681887e-05, "loss": 2.0201, "step": 1588 }, { "epoch": 0.48231901654272274, "grad_norm": 0.4073752760887146, "learning_rate": 9.200668219094867e-05, "loss": 1.993, "step": 1589 }, { "epoch": 0.4826225527394142, "grad_norm": 0.39307650923728943, "learning_rate": 9.200161992507848e-05, "loss": 2.3445, "step": 1590 }, { "epoch": 0.4829260889361056, "grad_norm": 0.355831503868103, "learning_rate": 9.199655765920827e-05, "loss": 2.0101, "step": 1591 }, { "epoch": 0.4832296251327971, "grad_norm": 0.5814805030822754, "learning_rate": 9.199149539333807e-05, "loss": 2.2421, "step": 1592 }, { "epoch": 0.48353316132948854, "grad_norm": 0.4290510416030884, "learning_rate": 9.198643312746786e-05, "loss": 2.1818, "step": 1593 }, { "epoch": 0.48383669752617997, "grad_norm": 7.360002040863037, "learning_rate": 9.198137086159766e-05, "loss": 2.0011, "step": 1594 }, { "epoch": 0.48414023372287146, "grad_norm": 0.5217785835266113, "learning_rate": 9.197630859572745e-05, "loss": 1.817, "step": 1595 }, { "epoch": 0.4844437699195629, "grad_norm": 0.4188072383403778, "learning_rate": 9.197124632985725e-05, "loss": 1.5588, "step": 1596 }, { "epoch": 0.4847473061162544, "grad_norm": 0.4220346212387085, "learning_rate": 9.196618406398705e-05, "loss": 1.9217, "step": 1597 }, { "epoch": 0.4850508423129458, "grad_norm": 0.5502439141273499, "learning_rate": 9.196112179811685e-05, "loss": 1.841, "step": 1598 }, { "epoch": 0.48535437850963725, "grad_norm": 0.4167909622192383, "learning_rate": 9.195605953224664e-05, "loss": 1.7868, "step": 1599 }, { "epoch": 0.48565791470632874, "grad_norm": 0.45999062061309814, "learning_rate": 9.195099726637644e-05, "loss": 1.8886, "step": 1600 }, { "epoch": 0.4859614509030202, "grad_norm": 0.3937031626701355, "learning_rate": 9.194593500050623e-05, "loss": 1.7736, "step": 1601 }, { "epoch": 0.48626498709971167, "grad_norm": 0.44424453377723694, "learning_rate": 9.194087273463603e-05, "loss": 1.6786, "step": 1602 }, { "epoch": 0.4865685232964031, "grad_norm": 0.35432350635528564, "learning_rate": 9.193581046876582e-05, "loss": 1.8425, "step": 1603 }, { "epoch": 0.48687205949309453, "grad_norm": 0.41191428899765015, "learning_rate": 9.193074820289562e-05, "loss": 1.342, "step": 1604 }, { "epoch": 0.487175595689786, "grad_norm": 0.4410790503025055, "learning_rate": 9.192568593702541e-05, "loss": 1.3158, "step": 1605 }, { "epoch": 0.48747913188647746, "grad_norm": 0.4214244782924652, "learning_rate": 9.192062367115522e-05, "loss": 2.1983, "step": 1606 }, { "epoch": 0.4877826680831689, "grad_norm": 0.4066750109195709, "learning_rate": 9.191556140528502e-05, "loss": 1.5839, "step": 1607 }, { "epoch": 0.4880862042798604, "grad_norm": 0.5248275995254517, "learning_rate": 9.191049913941481e-05, "loss": 1.6415, "step": 1608 }, { "epoch": 0.4883897404765518, "grad_norm": 0.3945814073085785, "learning_rate": 9.19054368735446e-05, "loss": 1.6788, "step": 1609 }, { "epoch": 0.4886932766732433, "grad_norm": 0.42285215854644775, "learning_rate": 9.19003746076744e-05, "loss": 1.7365, "step": 1610 }, { "epoch": 0.48899681286993474, "grad_norm": 0.43051236867904663, "learning_rate": 9.18953123418042e-05, "loss": 1.8906, "step": 1611 }, { "epoch": 0.4893003490666262, "grad_norm": 0.4336687922477722, "learning_rate": 9.189025007593399e-05, "loss": 1.6145, "step": 1612 }, { "epoch": 0.48960388526331766, "grad_norm": 0.34237489104270935, "learning_rate": 9.188518781006378e-05, "loss": 1.9992, "step": 1613 }, { "epoch": 0.4899074214600091, "grad_norm": 0.4344857931137085, "learning_rate": 9.188012554419358e-05, "loss": 1.9943, "step": 1614 }, { "epoch": 0.4902109576567006, "grad_norm": 0.3851914703845978, "learning_rate": 9.187506327832339e-05, "loss": 1.8428, "step": 1615 }, { "epoch": 0.490514493853392, "grad_norm": 0.39165550470352173, "learning_rate": 9.187000101245318e-05, "loss": 1.7958, "step": 1616 }, { "epoch": 0.49081803005008345, "grad_norm": 0.34605157375335693, "learning_rate": 9.186493874658298e-05, "loss": 1.9257, "step": 1617 }, { "epoch": 0.49112156624677494, "grad_norm": 0.422831654548645, "learning_rate": 9.185987648071277e-05, "loss": 2.1828, "step": 1618 }, { "epoch": 0.4914251024434664, "grad_norm": 0.7868388891220093, "learning_rate": 9.185481421484257e-05, "loss": 1.4172, "step": 1619 }, { "epoch": 0.4917286386401578, "grad_norm": 0.3971206247806549, "learning_rate": 9.184975194897236e-05, "loss": 1.8442, "step": 1620 }, { "epoch": 0.4920321748368493, "grad_norm": 0.39479488134384155, "learning_rate": 9.184468968310216e-05, "loss": 1.6141, "step": 1621 }, { "epoch": 0.49233571103354073, "grad_norm": 2.7340400218963623, "learning_rate": 9.183962741723195e-05, "loss": 1.7567, "step": 1622 }, { "epoch": 0.4926392472302322, "grad_norm": 0.7024746537208557, "learning_rate": 9.183456515136175e-05, "loss": 2.3221, "step": 1623 }, { "epoch": 0.49294278342692366, "grad_norm": 0.3881623148918152, "learning_rate": 9.182950288549154e-05, "loss": 2.0143, "step": 1624 }, { "epoch": 0.4932463196236151, "grad_norm": 0.35226500034332275, "learning_rate": 9.182444061962135e-05, "loss": 1.8097, "step": 1625 }, { "epoch": 0.4935498558203066, "grad_norm": 0.9839766621589661, "learning_rate": 9.181937835375114e-05, "loss": 1.9594, "step": 1626 }, { "epoch": 0.493853392016998, "grad_norm": 0.333279013633728, "learning_rate": 9.181431608788094e-05, "loss": 1.8533, "step": 1627 }, { "epoch": 0.4941569282136895, "grad_norm": 0.6945008039474487, "learning_rate": 9.180925382201073e-05, "loss": 1.3658, "step": 1628 }, { "epoch": 0.49446046441038094, "grad_norm": 0.4481600224971771, "learning_rate": 9.180419155614053e-05, "loss": 1.9189, "step": 1629 }, { "epoch": 0.4947640006070724, "grad_norm": 0.35472220182418823, "learning_rate": 9.179912929027032e-05, "loss": 1.3206, "step": 1630 }, { "epoch": 0.49506753680376386, "grad_norm": 0.5124238729476929, "learning_rate": 9.179406702440012e-05, "loss": 2.0371, "step": 1631 }, { "epoch": 0.4953710730004553, "grad_norm": 0.3843775987625122, "learning_rate": 9.178900475852991e-05, "loss": 1.5858, "step": 1632 }, { "epoch": 0.4956746091971468, "grad_norm": 0.41060924530029297, "learning_rate": 9.178394249265971e-05, "loss": 1.4591, "step": 1633 }, { "epoch": 0.4959781453938382, "grad_norm": 0.5426920056343079, "learning_rate": 9.177888022678952e-05, "loss": 2.2744, "step": 1634 }, { "epoch": 0.49628168159052966, "grad_norm": 0.4275033175945282, "learning_rate": 9.177381796091931e-05, "loss": 1.9274, "step": 1635 }, { "epoch": 0.49658521778722114, "grad_norm": 0.4715273976325989, "learning_rate": 9.176875569504912e-05, "loss": 1.5788, "step": 1636 }, { "epoch": 0.4968887539839126, "grad_norm": 0.41464027762413025, "learning_rate": 9.176369342917891e-05, "loss": 1.8147, "step": 1637 }, { "epoch": 0.497192290180604, "grad_norm": 0.4175771176815033, "learning_rate": 9.175863116330871e-05, "loss": 2.02, "step": 1638 }, { "epoch": 0.4974958263772955, "grad_norm": 0.42781904339790344, "learning_rate": 9.17535688974385e-05, "loss": 1.8772, "step": 1639 }, { "epoch": 0.49779936257398694, "grad_norm": 0.381352961063385, "learning_rate": 9.17485066315683e-05, "loss": 1.9982, "step": 1640 }, { "epoch": 0.4981028987706784, "grad_norm": 0.44887885451316833, "learning_rate": 9.174344436569809e-05, "loss": 1.6724, "step": 1641 }, { "epoch": 0.49840643496736986, "grad_norm": 0.3764267563819885, "learning_rate": 9.173838209982789e-05, "loss": 1.7327, "step": 1642 }, { "epoch": 0.4987099711640613, "grad_norm": 0.6911460161209106, "learning_rate": 9.173331983395768e-05, "loss": 2.1353, "step": 1643 }, { "epoch": 0.4990135073607528, "grad_norm": 0.39581048488616943, "learning_rate": 9.172825756808748e-05, "loss": 2.1394, "step": 1644 }, { "epoch": 0.4993170435574442, "grad_norm": 0.420389860868454, "learning_rate": 9.172319530221729e-05, "loss": 2.0948, "step": 1645 }, { "epoch": 0.4996205797541357, "grad_norm": 0.3843049108982086, "learning_rate": 9.171813303634708e-05, "loss": 2.0618, "step": 1646 }, { "epoch": 0.49992411595082714, "grad_norm": 0.3946545422077179, "learning_rate": 9.171307077047688e-05, "loss": 1.7997, "step": 1647 }, { "epoch": 0.5002276521475186, "grad_norm": 0.3740834593772888, "learning_rate": 9.170800850460667e-05, "loss": 1.8436, "step": 1648 }, { "epoch": 0.5005311883442101, "grad_norm": 0.42691826820373535, "learning_rate": 9.170294623873646e-05, "loss": 1.8915, "step": 1649 }, { "epoch": 0.5008347245409015, "grad_norm": 0.41487646102905273, "learning_rate": 9.169788397286626e-05, "loss": 1.6712, "step": 1650 }, { "epoch": 0.5011382607375929, "grad_norm": 0.44870665669441223, "learning_rate": 9.169282170699605e-05, "loss": 1.3704, "step": 1651 }, { "epoch": 0.5014417969342844, "grad_norm": 0.5584750771522522, "learning_rate": 9.168775944112585e-05, "loss": 2.2258, "step": 1652 }, { "epoch": 0.5017453331309759, "grad_norm": 0.4336828291416168, "learning_rate": 9.168269717525564e-05, "loss": 2.0302, "step": 1653 }, { "epoch": 0.5020488693276673, "grad_norm": 0.3990234434604645, "learning_rate": 9.167763490938545e-05, "loss": 1.9813, "step": 1654 }, { "epoch": 0.5023524055243588, "grad_norm": 0.42252814769744873, "learning_rate": 9.167257264351525e-05, "loss": 1.7596, "step": 1655 }, { "epoch": 0.5026559417210502, "grad_norm": 0.42766478657722473, "learning_rate": 9.166751037764504e-05, "loss": 1.4622, "step": 1656 }, { "epoch": 0.5029594779177416, "grad_norm": 0.4347383975982666, "learning_rate": 9.166244811177484e-05, "loss": 1.4987, "step": 1657 }, { "epoch": 0.5032630141144332, "grad_norm": 0.3660615384578705, "learning_rate": 9.165738584590463e-05, "loss": 1.3758, "step": 1658 }, { "epoch": 0.5035665503111246, "grad_norm": 0.3933682441711426, "learning_rate": 9.165232358003443e-05, "loss": 1.9097, "step": 1659 }, { "epoch": 0.5038700865078161, "grad_norm": 0.39718765020370483, "learning_rate": 9.164726131416422e-05, "loss": 1.9825, "step": 1660 }, { "epoch": 0.5041736227045075, "grad_norm": 0.4161352515220642, "learning_rate": 9.164219904829402e-05, "loss": 1.6346, "step": 1661 }, { "epoch": 0.5044771589011989, "grad_norm": 0.413492888212204, "learning_rate": 9.163713678242381e-05, "loss": 1.9286, "step": 1662 }, { "epoch": 0.5047806950978905, "grad_norm": 0.4211573600769043, "learning_rate": 9.16320745165536e-05, "loss": 1.5557, "step": 1663 }, { "epoch": 0.5050842312945819, "grad_norm": 0.3247505724430084, "learning_rate": 9.162701225068341e-05, "loss": 1.8372, "step": 1664 }, { "epoch": 0.5053877674912733, "grad_norm": 0.699242889881134, "learning_rate": 9.162194998481321e-05, "loss": 1.3331, "step": 1665 }, { "epoch": 0.5056913036879648, "grad_norm": 0.45382243394851685, "learning_rate": 9.1616887718943e-05, "loss": 1.5317, "step": 1666 }, { "epoch": 0.5059948398846562, "grad_norm": 0.37562644481658936, "learning_rate": 9.16118254530728e-05, "loss": 1.4138, "step": 1667 }, { "epoch": 0.5062983760813476, "grad_norm": 0.41830095648765564, "learning_rate": 9.160676318720259e-05, "loss": 2.0788, "step": 1668 }, { "epoch": 0.5066019122780392, "grad_norm": 0.4154708981513977, "learning_rate": 9.160170092133239e-05, "loss": 2.0095, "step": 1669 }, { "epoch": 0.5069054484747306, "grad_norm": 0.3693794906139374, "learning_rate": 9.159663865546218e-05, "loss": 1.8871, "step": 1670 }, { "epoch": 0.5072089846714221, "grad_norm": 0.42712700366973877, "learning_rate": 9.159157638959198e-05, "loss": 1.9114, "step": 1671 }, { "epoch": 0.5075125208681135, "grad_norm": 0.406843900680542, "learning_rate": 9.158651412372177e-05, "loss": 1.7887, "step": 1672 }, { "epoch": 0.5078160570648049, "grad_norm": 0.3689083456993103, "learning_rate": 9.158145185785158e-05, "loss": 1.8421, "step": 1673 }, { "epoch": 0.5081195932614965, "grad_norm": 0.40796002745628357, "learning_rate": 9.157638959198138e-05, "loss": 1.5014, "step": 1674 }, { "epoch": 0.5084231294581879, "grad_norm": 0.44102364778518677, "learning_rate": 9.157132732611117e-05, "loss": 1.5184, "step": 1675 }, { "epoch": 0.5087266656548793, "grad_norm": 0.4265199899673462, "learning_rate": 9.156626506024096e-05, "loss": 2.017, "step": 1676 }, { "epoch": 0.5090302018515708, "grad_norm": 0.4618091285228729, "learning_rate": 9.156120279437076e-05, "loss": 2.056, "step": 1677 }, { "epoch": 0.5093337380482622, "grad_norm": 0.4058600068092346, "learning_rate": 9.155614052850055e-05, "loss": 1.9897, "step": 1678 }, { "epoch": 0.5096372742449538, "grad_norm": 0.46722692251205444, "learning_rate": 9.155107826263036e-05, "loss": 1.9713, "step": 1679 }, { "epoch": 0.5099408104416452, "grad_norm": 0.36259156465530396, "learning_rate": 9.154601599676016e-05, "loss": 1.9321, "step": 1680 }, { "epoch": 0.5102443466383366, "grad_norm": 0.366148442029953, "learning_rate": 9.154095373088995e-05, "loss": 1.9573, "step": 1681 }, { "epoch": 0.510547882835028, "grad_norm": 0.3328361213207245, "learning_rate": 9.153589146501975e-05, "loss": 1.8222, "step": 1682 }, { "epoch": 0.5108514190317195, "grad_norm": 0.45891711115837097, "learning_rate": 9.153082919914954e-05, "loss": 1.7177, "step": 1683 }, { "epoch": 0.511154955228411, "grad_norm": 0.4405977427959442, "learning_rate": 9.152576693327935e-05, "loss": 1.8499, "step": 1684 }, { "epoch": 0.5114584914251025, "grad_norm": 0.7388264536857605, "learning_rate": 9.152070466740915e-05, "loss": 1.9884, "step": 1685 }, { "epoch": 0.5117620276217939, "grad_norm": 0.43892955780029297, "learning_rate": 9.151564240153894e-05, "loss": 2.0027, "step": 1686 }, { "epoch": 0.5120655638184853, "grad_norm": 0.42659783363342285, "learning_rate": 9.151058013566873e-05, "loss": 1.8386, "step": 1687 }, { "epoch": 0.5123691000151768, "grad_norm": 0.4364768862724304, "learning_rate": 9.150551786979853e-05, "loss": 1.6248, "step": 1688 }, { "epoch": 0.5126726362118683, "grad_norm": 0.35849112272262573, "learning_rate": 9.150045560392832e-05, "loss": 2.0983, "step": 1689 }, { "epoch": 0.5129761724085597, "grad_norm": 0.38595572113990784, "learning_rate": 9.149539333805812e-05, "loss": 1.956, "step": 1690 }, { "epoch": 0.5132797086052512, "grad_norm": 0.4161504805088043, "learning_rate": 9.149033107218791e-05, "loss": 1.8132, "step": 1691 }, { "epoch": 0.5135832448019426, "grad_norm": 0.6614299416542053, "learning_rate": 9.148526880631771e-05, "loss": 1.4403, "step": 1692 }, { "epoch": 0.513886780998634, "grad_norm": 0.4609692692756653, "learning_rate": 9.148020654044752e-05, "loss": 1.9215, "step": 1693 }, { "epoch": 0.5141903171953256, "grad_norm": 0.4489036202430725, "learning_rate": 9.147514427457731e-05, "loss": 1.7922, "step": 1694 }, { "epoch": 0.514493853392017, "grad_norm": 0.46497032046318054, "learning_rate": 9.14700820087071e-05, "loss": 1.6058, "step": 1695 }, { "epoch": 0.5147973895887085, "grad_norm": 0.39706695079803467, "learning_rate": 9.14650197428369e-05, "loss": 1.7717, "step": 1696 }, { "epoch": 0.5151009257853999, "grad_norm": 0.3839566111564636, "learning_rate": 9.14599574769667e-05, "loss": 1.8218, "step": 1697 }, { "epoch": 0.5154044619820913, "grad_norm": 0.7339301109313965, "learning_rate": 9.145489521109649e-05, "loss": 1.9836, "step": 1698 }, { "epoch": 0.5157079981787828, "grad_norm": 0.4512780010700226, "learning_rate": 9.144983294522629e-05, "loss": 2.0034, "step": 1699 }, { "epoch": 0.5160115343754743, "grad_norm": 1.845346212387085, "learning_rate": 9.144477067935608e-05, "loss": 1.6995, "step": 1700 }, { "epoch": 0.5163150705721657, "grad_norm": 0.42541632056236267, "learning_rate": 9.143970841348588e-05, "loss": 2.0264, "step": 1701 }, { "epoch": 0.5166186067688572, "grad_norm": 0.404821515083313, "learning_rate": 9.143464614761567e-05, "loss": 1.9064, "step": 1702 }, { "epoch": 0.5169221429655486, "grad_norm": 0.4223015606403351, "learning_rate": 9.142958388174548e-05, "loss": 1.8442, "step": 1703 }, { "epoch": 0.51722567916224, "grad_norm": 0.38094672560691833, "learning_rate": 9.142452161587527e-05, "loss": 1.7625, "step": 1704 }, { "epoch": 0.5175292153589316, "grad_norm": 0.3759573698043823, "learning_rate": 9.141945935000507e-05, "loss": 2.0585, "step": 1705 }, { "epoch": 0.517832751555623, "grad_norm": 0.3938165307044983, "learning_rate": 9.141439708413486e-05, "loss": 1.9594, "step": 1706 }, { "epoch": 0.5181362877523145, "grad_norm": 0.4222012758255005, "learning_rate": 9.140933481826466e-05, "loss": 1.1698, "step": 1707 }, { "epoch": 0.5184398239490059, "grad_norm": 0.419763445854187, "learning_rate": 9.140427255239445e-05, "loss": 1.9484, "step": 1708 }, { "epoch": 0.5187433601456973, "grad_norm": 0.4546319544315338, "learning_rate": 9.139921028652425e-05, "loss": 1.9924, "step": 1709 }, { "epoch": 0.5190468963423889, "grad_norm": 0.5007880330085754, "learning_rate": 9.139414802065404e-05, "loss": 2.0619, "step": 1710 }, { "epoch": 0.5193504325390803, "grad_norm": 0.3647090494632721, "learning_rate": 9.138908575478384e-05, "loss": 1.9504, "step": 1711 }, { "epoch": 0.5196539687357717, "grad_norm": 0.4546000063419342, "learning_rate": 9.138402348891365e-05, "loss": 2.0943, "step": 1712 }, { "epoch": 0.5199575049324632, "grad_norm": 0.36992448568344116, "learning_rate": 9.137896122304344e-05, "loss": 1.8111, "step": 1713 }, { "epoch": 0.5202610411291546, "grad_norm": 0.40882760286331177, "learning_rate": 9.137389895717323e-05, "loss": 1.8935, "step": 1714 }, { "epoch": 0.5205645773258462, "grad_norm": 0.39158037304878235, "learning_rate": 9.136883669130303e-05, "loss": 1.4147, "step": 1715 }, { "epoch": 0.5208681135225376, "grad_norm": 0.42174550890922546, "learning_rate": 9.136377442543282e-05, "loss": 1.5931, "step": 1716 }, { "epoch": 0.521171649719229, "grad_norm": 0.4003652036190033, "learning_rate": 9.135871215956262e-05, "loss": 1.4119, "step": 1717 }, { "epoch": 0.5214751859159205, "grad_norm": 0.42328763008117676, "learning_rate": 9.135364989369241e-05, "loss": 1.6943, "step": 1718 }, { "epoch": 0.5217787221126119, "grad_norm": 0.3831746578216553, "learning_rate": 9.134858762782221e-05, "loss": 1.8067, "step": 1719 }, { "epoch": 0.5220822583093034, "grad_norm": 0.4160243272781372, "learning_rate": 9.1343525361952e-05, "loss": 2.0725, "step": 1720 }, { "epoch": 0.5223857945059949, "grad_norm": 0.47441422939300537, "learning_rate": 9.133846309608181e-05, "loss": 2.2569, "step": 1721 }, { "epoch": 0.5226893307026863, "grad_norm": 0.34522169828414917, "learning_rate": 9.133340083021161e-05, "loss": 1.6977, "step": 1722 }, { "epoch": 0.5229928668993777, "grad_norm": 0.6760712265968323, "learning_rate": 9.132833856434142e-05, "loss": 1.7252, "step": 1723 }, { "epoch": 0.5232964030960692, "grad_norm": 0.42016392946243286, "learning_rate": 9.132327629847121e-05, "loss": 1.9835, "step": 1724 }, { "epoch": 0.5235999392927606, "grad_norm": 0.4062696099281311, "learning_rate": 9.1318214032601e-05, "loss": 1.8181, "step": 1725 }, { "epoch": 0.5239034754894522, "grad_norm": 0.37092477083206177, "learning_rate": 9.13131517667308e-05, "loss": 1.9989, "step": 1726 }, { "epoch": 0.5242070116861436, "grad_norm": 0.30382564663887024, "learning_rate": 9.13080895008606e-05, "loss": 1.5613, "step": 1727 }, { "epoch": 0.524510547882835, "grad_norm": 0.39715448021888733, "learning_rate": 9.130302723499039e-05, "loss": 1.8396, "step": 1728 }, { "epoch": 0.5248140840795265, "grad_norm": 0.698819637298584, "learning_rate": 9.129796496912018e-05, "loss": 1.6617, "step": 1729 }, { "epoch": 0.5251176202762179, "grad_norm": 0.37083616852760315, "learning_rate": 9.129290270324998e-05, "loss": 1.0619, "step": 1730 }, { "epoch": 0.5254211564729094, "grad_norm": 0.37196993827819824, "learning_rate": 9.128784043737977e-05, "loss": 1.4654, "step": 1731 }, { "epoch": 0.5257246926696009, "grad_norm": 0.38970932364463806, "learning_rate": 9.128277817150958e-05, "loss": 1.9632, "step": 1732 }, { "epoch": 0.5260282288662923, "grad_norm": 0.4937323033809662, "learning_rate": 9.127771590563938e-05, "loss": 1.5989, "step": 1733 }, { "epoch": 0.5263317650629837, "grad_norm": 0.37157008051872253, "learning_rate": 9.127265363976917e-05, "loss": 1.8486, "step": 1734 }, { "epoch": 0.5266353012596752, "grad_norm": 0.3973872661590576, "learning_rate": 9.126759137389897e-05, "loss": 1.5195, "step": 1735 }, { "epoch": 0.5269388374563667, "grad_norm": 0.3511494994163513, "learning_rate": 9.126252910802876e-05, "loss": 1.9055, "step": 1736 }, { "epoch": 0.5272423736530581, "grad_norm": 0.36223629117012024, "learning_rate": 9.125746684215856e-05, "loss": 1.5545, "step": 1737 }, { "epoch": 0.5275459098497496, "grad_norm": 0.4978778660297394, "learning_rate": 9.125240457628835e-05, "loss": 1.9145, "step": 1738 }, { "epoch": 0.527849446046441, "grad_norm": 0.3191153407096863, "learning_rate": 9.124734231041815e-05, "loss": 1.754, "step": 1739 }, { "epoch": 0.5281529822431325, "grad_norm": 0.39094769954681396, "learning_rate": 9.124228004454794e-05, "loss": 1.9462, "step": 1740 }, { "epoch": 0.528456518439824, "grad_norm": 0.6246857047080994, "learning_rate": 9.123721777867774e-05, "loss": 2.0239, "step": 1741 }, { "epoch": 0.5287600546365154, "grad_norm": 0.41962483525276184, "learning_rate": 9.123215551280754e-05, "loss": 1.9372, "step": 1742 }, { "epoch": 0.5290635908332069, "grad_norm": 0.3055092394351959, "learning_rate": 9.122709324693734e-05, "loss": 0.9516, "step": 1743 }, { "epoch": 0.5293671270298983, "grad_norm": 0.4911038875579834, "learning_rate": 9.122203098106713e-05, "loss": 1.7127, "step": 1744 }, { "epoch": 0.5296706632265897, "grad_norm": 0.7481783032417297, "learning_rate": 9.121696871519693e-05, "loss": 2.1368, "step": 1745 }, { "epoch": 0.5299741994232813, "grad_norm": 0.4397221803665161, "learning_rate": 9.121190644932672e-05, "loss": 1.978, "step": 1746 }, { "epoch": 0.5302777356199727, "grad_norm": 0.3751915991306305, "learning_rate": 9.120684418345652e-05, "loss": 2.0627, "step": 1747 }, { "epoch": 0.5305812718166641, "grad_norm": 0.474575400352478, "learning_rate": 9.120178191758631e-05, "loss": 2.018, "step": 1748 }, { "epoch": 0.5308848080133556, "grad_norm": 0.3762502372264862, "learning_rate": 9.119671965171611e-05, "loss": 1.7076, "step": 1749 }, { "epoch": 0.531188344210047, "grad_norm": 0.4058527946472168, "learning_rate": 9.11916573858459e-05, "loss": 1.768, "step": 1750 }, { "epoch": 0.5314918804067384, "grad_norm": 0.3765137791633606, "learning_rate": 9.118659511997571e-05, "loss": 1.8357, "step": 1751 }, { "epoch": 0.53179541660343, "grad_norm": 0.459602415561676, "learning_rate": 9.11815328541055e-05, "loss": 0.918, "step": 1752 }, { "epoch": 0.5320989528001214, "grad_norm": 0.4160063564777374, "learning_rate": 9.11764705882353e-05, "loss": 1.8438, "step": 1753 }, { "epoch": 0.5324024889968129, "grad_norm": 0.44720131158828735, "learning_rate": 9.11714083223651e-05, "loss": 1.6503, "step": 1754 }, { "epoch": 0.5327060251935043, "grad_norm": 0.35455620288848877, "learning_rate": 9.116634605649489e-05, "loss": 2.0683, "step": 1755 }, { "epoch": 0.5330095613901957, "grad_norm": 0.3938636779785156, "learning_rate": 9.116128379062468e-05, "loss": 1.8191, "step": 1756 }, { "epoch": 0.5333130975868873, "grad_norm": 0.38144779205322266, "learning_rate": 9.115622152475448e-05, "loss": 1.4855, "step": 1757 }, { "epoch": 0.5336166337835787, "grad_norm": 0.3418583571910858, "learning_rate": 9.115115925888427e-05, "loss": 1.8684, "step": 1758 }, { "epoch": 0.5339201699802701, "grad_norm": 0.3342360854148865, "learning_rate": 9.114609699301407e-05, "loss": 1.7817, "step": 1759 }, { "epoch": 0.5342237061769616, "grad_norm": 0.4178410768508911, "learning_rate": 9.114103472714388e-05, "loss": 1.848, "step": 1760 }, { "epoch": 0.534527242373653, "grad_norm": 0.37378084659576416, "learning_rate": 9.113597246127367e-05, "loss": 2.1967, "step": 1761 }, { "epoch": 0.5348307785703446, "grad_norm": 0.33370524644851685, "learning_rate": 9.113091019540347e-05, "loss": 1.9943, "step": 1762 }, { "epoch": 0.535134314767036, "grad_norm": 0.3402559757232666, "learning_rate": 9.112584792953326e-05, "loss": 1.7164, "step": 1763 }, { "epoch": 0.5354378509637274, "grad_norm": 0.3962159752845764, "learning_rate": 9.112078566366306e-05, "loss": 1.8821, "step": 1764 }, { "epoch": 0.5357413871604189, "grad_norm": 0.4659918546676636, "learning_rate": 9.111572339779285e-05, "loss": 1.7065, "step": 1765 }, { "epoch": 0.5360449233571103, "grad_norm": 0.38673698902130127, "learning_rate": 9.111066113192265e-05, "loss": 1.8969, "step": 1766 }, { "epoch": 0.5363484595538018, "grad_norm": 0.3595302999019623, "learning_rate": 9.110559886605244e-05, "loss": 1.881, "step": 1767 }, { "epoch": 0.5366519957504933, "grad_norm": 0.4756614565849304, "learning_rate": 9.110053660018225e-05, "loss": 1.9395, "step": 1768 }, { "epoch": 0.5369555319471847, "grad_norm": 0.36729127168655396, "learning_rate": 9.109547433431204e-05, "loss": 2.0762, "step": 1769 }, { "epoch": 0.5372590681438761, "grad_norm": 0.5436307191848755, "learning_rate": 9.109041206844184e-05, "loss": 1.9971, "step": 1770 }, { "epoch": 0.5375626043405676, "grad_norm": 0.42176029086112976, "learning_rate": 9.108534980257165e-05, "loss": 1.5326, "step": 1771 }, { "epoch": 0.5378661405372591, "grad_norm": 0.6235511302947998, "learning_rate": 9.108028753670144e-05, "loss": 1.8906, "step": 1772 }, { "epoch": 0.5381696767339506, "grad_norm": 0.42510315775871277, "learning_rate": 9.107522527083124e-05, "loss": 1.7236, "step": 1773 }, { "epoch": 0.538473212930642, "grad_norm": 0.4418346583843231, "learning_rate": 9.107016300496103e-05, "loss": 1.9227, "step": 1774 }, { "epoch": 0.5387767491273334, "grad_norm": 0.9422191977500916, "learning_rate": 9.106510073909083e-05, "loss": 1.7426, "step": 1775 }, { "epoch": 0.5390802853240249, "grad_norm": 0.44353923201560974, "learning_rate": 9.106003847322062e-05, "loss": 2.0377, "step": 1776 }, { "epoch": 0.5393838215207164, "grad_norm": 0.457926481962204, "learning_rate": 9.105497620735042e-05, "loss": 1.9583, "step": 1777 }, { "epoch": 0.5396873577174078, "grad_norm": 0.3857896327972412, "learning_rate": 9.104991394148021e-05, "loss": 1.4618, "step": 1778 }, { "epoch": 0.5399908939140993, "grad_norm": 0.4202859401702881, "learning_rate": 9.104485167561e-05, "loss": 1.7507, "step": 1779 }, { "epoch": 0.5402944301107907, "grad_norm": 0.3665039837360382, "learning_rate": 9.10397894097398e-05, "loss": 1.8576, "step": 1780 }, { "epoch": 0.5405979663074821, "grad_norm": 0.39893728494644165, "learning_rate": 9.103472714386961e-05, "loss": 1.81, "step": 1781 }, { "epoch": 0.5409015025041736, "grad_norm": 2.199347972869873, "learning_rate": 9.10296648779994e-05, "loss": 1.4915, "step": 1782 }, { "epoch": 0.5412050387008651, "grad_norm": 0.4976440966129303, "learning_rate": 9.10246026121292e-05, "loss": 1.6961, "step": 1783 }, { "epoch": 0.5415085748975565, "grad_norm": 0.4084802269935608, "learning_rate": 9.101954034625899e-05, "loss": 1.4498, "step": 1784 }, { "epoch": 0.541812111094248, "grad_norm": 0.37160369753837585, "learning_rate": 9.101447808038879e-05, "loss": 1.8383, "step": 1785 }, { "epoch": 0.5421156472909394, "grad_norm": 0.4095883071422577, "learning_rate": 9.100941581451858e-05, "loss": 1.7035, "step": 1786 }, { "epoch": 0.5424191834876309, "grad_norm": 0.3713209927082062, "learning_rate": 9.100435354864838e-05, "loss": 1.8924, "step": 1787 }, { "epoch": 0.5427227196843224, "grad_norm": 0.465432733297348, "learning_rate": 9.099929128277817e-05, "loss": 2.1334, "step": 1788 }, { "epoch": 0.5430262558810138, "grad_norm": 0.4591209590435028, "learning_rate": 9.099422901690797e-05, "loss": 1.9845, "step": 1789 }, { "epoch": 0.5433297920777053, "grad_norm": 0.45076972246170044, "learning_rate": 9.098916675103777e-05, "loss": 1.7297, "step": 1790 }, { "epoch": 0.5436333282743967, "grad_norm": 0.44921204447746277, "learning_rate": 9.098410448516757e-05, "loss": 1.9707, "step": 1791 }, { "epoch": 0.5439368644710881, "grad_norm": 0.3970228135585785, "learning_rate": 9.097904221929736e-05, "loss": 2.024, "step": 1792 }, { "epoch": 0.5442404006677797, "grad_norm": 0.4587130546569824, "learning_rate": 9.097397995342716e-05, "loss": 1.5426, "step": 1793 }, { "epoch": 0.5445439368644711, "grad_norm": 0.4152527153491974, "learning_rate": 9.096891768755695e-05, "loss": 1.9575, "step": 1794 }, { "epoch": 0.5448474730611625, "grad_norm": 0.3973013758659363, "learning_rate": 9.096385542168675e-05, "loss": 2.0246, "step": 1795 }, { "epoch": 0.545151009257854, "grad_norm": 0.3950592875480652, "learning_rate": 9.095879315581654e-05, "loss": 1.9213, "step": 1796 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4187184274196625, "learning_rate": 9.095373088994634e-05, "loss": 1.8801, "step": 1797 }, { "epoch": 0.545758081651237, "grad_norm": 0.43511682748794556, "learning_rate": 9.094866862407613e-05, "loss": 1.4022, "step": 1798 }, { "epoch": 0.5460616178479284, "grad_norm": 0.40607360005378723, "learning_rate": 9.094360635820594e-05, "loss": 1.9416, "step": 1799 }, { "epoch": 0.5463651540446198, "grad_norm": 0.3357563018798828, "learning_rate": 9.093854409233574e-05, "loss": 1.8906, "step": 1800 }, { "epoch": 0.5466686902413113, "grad_norm": 0.3839071989059448, "learning_rate": 9.093348182646553e-05, "loss": 1.7918, "step": 1801 }, { "epoch": 0.5469722264380027, "grad_norm": 0.3882817029953003, "learning_rate": 9.092841956059533e-05, "loss": 1.6346, "step": 1802 }, { "epoch": 0.5472757626346942, "grad_norm": 0.4323276877403259, "learning_rate": 9.092335729472512e-05, "loss": 1.7346, "step": 1803 }, { "epoch": 0.5475792988313857, "grad_norm": 0.39711809158325195, "learning_rate": 9.091829502885492e-05, "loss": 1.8855, "step": 1804 }, { "epoch": 0.5478828350280771, "grad_norm": 0.4660872519016266, "learning_rate": 9.091323276298471e-05, "loss": 1.1871, "step": 1805 }, { "epoch": 0.5481863712247685, "grad_norm": 0.45804888010025024, "learning_rate": 9.09081704971145e-05, "loss": 2.1685, "step": 1806 }, { "epoch": 0.54848990742146, "grad_norm": 0.5922791361808777, "learning_rate": 9.09031082312443e-05, "loss": 1.8701, "step": 1807 }, { "epoch": 0.5487934436181514, "grad_norm": 0.43038979172706604, "learning_rate": 9.08980459653741e-05, "loss": 2.1007, "step": 1808 }, { "epoch": 0.549096979814843, "grad_norm": 0.3624688684940338, "learning_rate": 9.08929836995039e-05, "loss": 1.7411, "step": 1809 }, { "epoch": 0.5494005160115344, "grad_norm": 0.40898412466049194, "learning_rate": 9.08879214336337e-05, "loss": 1.9381, "step": 1810 }, { "epoch": 0.5497040522082258, "grad_norm": 0.45767003297805786, "learning_rate": 9.088285916776349e-05, "loss": 1.8295, "step": 1811 }, { "epoch": 0.5500075884049173, "grad_norm": 0.41230660676956177, "learning_rate": 9.08777969018933e-05, "loss": 1.7066, "step": 1812 }, { "epoch": 0.5503111246016087, "grad_norm": 0.6730133891105652, "learning_rate": 9.08727346360231e-05, "loss": 2.2395, "step": 1813 }, { "epoch": 0.5506146607983002, "grad_norm": 0.39757731556892395, "learning_rate": 9.086767237015289e-05, "loss": 1.9659, "step": 1814 }, { "epoch": 0.5509181969949917, "grad_norm": 0.48182088136672974, "learning_rate": 9.086261010428269e-05, "loss": 1.7596, "step": 1815 }, { "epoch": 0.5512217331916831, "grad_norm": 0.4225050210952759, "learning_rate": 9.085754783841248e-05, "loss": 1.6582, "step": 1816 }, { "epoch": 0.5515252693883745, "grad_norm": 0.40362295508384705, "learning_rate": 9.085248557254227e-05, "loss": 1.8002, "step": 1817 }, { "epoch": 0.551828805585066, "grad_norm": 0.4283868968486786, "learning_rate": 9.084742330667207e-05, "loss": 2.0566, "step": 1818 }, { "epoch": 0.5521323417817575, "grad_norm": 0.3864719569683075, "learning_rate": 9.084236104080186e-05, "loss": 1.7037, "step": 1819 }, { "epoch": 0.552435877978449, "grad_norm": 0.45380616188049316, "learning_rate": 9.083729877493167e-05, "loss": 1.7841, "step": 1820 }, { "epoch": 0.5527394141751404, "grad_norm": 0.42916885018348694, "learning_rate": 9.083223650906147e-05, "loss": 1.8914, "step": 1821 }, { "epoch": 0.5530429503718318, "grad_norm": 0.4037598669528961, "learning_rate": 9.082717424319126e-05, "loss": 1.9326, "step": 1822 }, { "epoch": 0.5533464865685233, "grad_norm": 0.37888360023498535, "learning_rate": 9.082211197732106e-05, "loss": 1.9225, "step": 1823 }, { "epoch": 0.5536500227652148, "grad_norm": 0.44082072377204895, "learning_rate": 9.081704971145085e-05, "loss": 2.0965, "step": 1824 }, { "epoch": 0.5539535589619062, "grad_norm": 0.40458253026008606, "learning_rate": 9.081198744558065e-05, "loss": 1.9156, "step": 1825 }, { "epoch": 0.5542570951585977, "grad_norm": 0.648024320602417, "learning_rate": 9.080692517971044e-05, "loss": 1.6612, "step": 1826 }, { "epoch": 0.5545606313552891, "grad_norm": 0.38878655433654785, "learning_rate": 9.080186291384024e-05, "loss": 1.9156, "step": 1827 }, { "epoch": 0.5548641675519805, "grad_norm": 0.3615175187587738, "learning_rate": 9.079680064797003e-05, "loss": 1.9214, "step": 1828 }, { "epoch": 0.5551677037486721, "grad_norm": 0.34867003560066223, "learning_rate": 9.079173838209984e-05, "loss": 1.906, "step": 1829 }, { "epoch": 0.5554712399453635, "grad_norm": 0.6473682522773743, "learning_rate": 9.078667611622963e-05, "loss": 1.6777, "step": 1830 }, { "epoch": 0.555774776142055, "grad_norm": 0.4099821150302887, "learning_rate": 9.078161385035943e-05, "loss": 2.1986, "step": 1831 }, { "epoch": 0.5560783123387464, "grad_norm": 0.3992425799369812, "learning_rate": 9.077655158448922e-05, "loss": 2.0874, "step": 1832 }, { "epoch": 0.5563818485354378, "grad_norm": 0.3562420904636383, "learning_rate": 9.077148931861902e-05, "loss": 1.8174, "step": 1833 }, { "epoch": 0.5566853847321293, "grad_norm": 0.45232492685317993, "learning_rate": 9.076642705274881e-05, "loss": 2.0863, "step": 1834 }, { "epoch": 0.5569889209288208, "grad_norm": 0.39387455582618713, "learning_rate": 9.076136478687861e-05, "loss": 1.8224, "step": 1835 }, { "epoch": 0.5572924571255122, "grad_norm": 0.35372141003608704, "learning_rate": 9.07563025210084e-05, "loss": 1.5546, "step": 1836 }, { "epoch": 0.5575959933222037, "grad_norm": 0.4068455100059509, "learning_rate": 9.07512402551382e-05, "loss": 1.6266, "step": 1837 }, { "epoch": 0.5578995295188951, "grad_norm": 0.39574089646339417, "learning_rate": 9.0746177989268e-05, "loss": 2.0823, "step": 1838 }, { "epoch": 0.5582030657155865, "grad_norm": 1.1845453977584839, "learning_rate": 9.07411157233978e-05, "loss": 1.6966, "step": 1839 }, { "epoch": 0.5585066019122781, "grad_norm": 0.39268460869789124, "learning_rate": 9.07360534575276e-05, "loss": 1.1071, "step": 1840 }, { "epoch": 0.5588101381089695, "grad_norm": 0.4749743640422821, "learning_rate": 9.073099119165739e-05, "loss": 1.9787, "step": 1841 }, { "epoch": 0.559113674305661, "grad_norm": 0.4099438786506653, "learning_rate": 9.072592892578719e-05, "loss": 1.952, "step": 1842 }, { "epoch": 0.5594172105023524, "grad_norm": 0.4282529354095459, "learning_rate": 9.072086665991698e-05, "loss": 1.9985, "step": 1843 }, { "epoch": 0.5597207466990438, "grad_norm": 0.41518470644950867, "learning_rate": 9.071580439404678e-05, "loss": 1.6999, "step": 1844 }, { "epoch": 0.5600242828957354, "grad_norm": 0.4059050381183624, "learning_rate": 9.071074212817657e-05, "loss": 1.9737, "step": 1845 }, { "epoch": 0.5603278190924268, "grad_norm": 0.3274436295032501, "learning_rate": 9.070567986230636e-05, "loss": 1.1522, "step": 1846 }, { "epoch": 0.5606313552891182, "grad_norm": 0.4117715656757355, "learning_rate": 9.070061759643616e-05, "loss": 2.2128, "step": 1847 }, { "epoch": 0.5609348914858097, "grad_norm": 1.530457854270935, "learning_rate": 9.069555533056597e-05, "loss": 2.2275, "step": 1848 }, { "epoch": 0.5612384276825011, "grad_norm": 1.6292579174041748, "learning_rate": 9.069049306469576e-05, "loss": 1.6642, "step": 1849 }, { "epoch": 0.5615419638791926, "grad_norm": 0.4147336184978485, "learning_rate": 9.068543079882556e-05, "loss": 1.7811, "step": 1850 }, { "epoch": 0.5618455000758841, "grad_norm": 0.4253292679786682, "learning_rate": 9.068036853295535e-05, "loss": 2.1084, "step": 1851 }, { "epoch": 0.5621490362725755, "grad_norm": 0.3340885043144226, "learning_rate": 9.067530626708515e-05, "loss": 1.0015, "step": 1852 }, { "epoch": 0.5624525724692669, "grad_norm": 0.34140780568122864, "learning_rate": 9.067024400121494e-05, "loss": 1.9773, "step": 1853 }, { "epoch": 0.5627561086659584, "grad_norm": 0.48916199803352356, "learning_rate": 9.066518173534474e-05, "loss": 1.7743, "step": 1854 }, { "epoch": 0.5630596448626499, "grad_norm": 0.43407005071640015, "learning_rate": 9.066011946947453e-05, "loss": 1.8134, "step": 1855 }, { "epoch": 0.5633631810593414, "grad_norm": 1.257241129875183, "learning_rate": 9.065505720360433e-05, "loss": 1.9903, "step": 1856 }, { "epoch": 0.5636667172560328, "grad_norm": 0.4004335105419159, "learning_rate": 9.064999493773413e-05, "loss": 1.9988, "step": 1857 }, { "epoch": 0.5639702534527242, "grad_norm": 0.41307345032691956, "learning_rate": 9.064493267186393e-05, "loss": 1.9789, "step": 1858 }, { "epoch": 0.5642737896494157, "grad_norm": 0.41875752806663513, "learning_rate": 9.063987040599374e-05, "loss": 1.8535, "step": 1859 }, { "epoch": 0.5645773258461072, "grad_norm": 0.4912898540496826, "learning_rate": 9.063480814012353e-05, "loss": 2.3529, "step": 1860 }, { "epoch": 0.5648808620427986, "grad_norm": 0.4265078604221344, "learning_rate": 9.062974587425333e-05, "loss": 2.0698, "step": 1861 }, { "epoch": 0.5651843982394901, "grad_norm": 0.3786260187625885, "learning_rate": 9.062468360838312e-05, "loss": 1.818, "step": 1862 }, { "epoch": 0.5654879344361815, "grad_norm": 0.3665534257888794, "learning_rate": 9.061962134251292e-05, "loss": 1.9464, "step": 1863 }, { "epoch": 0.5657914706328729, "grad_norm": 0.4516305923461914, "learning_rate": 9.061455907664271e-05, "loss": 1.7718, "step": 1864 }, { "epoch": 0.5660950068295644, "grad_norm": 1.0637644529342651, "learning_rate": 9.06094968107725e-05, "loss": 1.8881, "step": 1865 }, { "epoch": 0.5663985430262559, "grad_norm": 0.41039812564849854, "learning_rate": 9.06044345449023e-05, "loss": 2.0346, "step": 1866 }, { "epoch": 0.5667020792229474, "grad_norm": 0.40830013155937195, "learning_rate": 9.05993722790321e-05, "loss": 1.6864, "step": 1867 }, { "epoch": 0.5670056154196388, "grad_norm": 0.37757718563079834, "learning_rate": 9.05943100131619e-05, "loss": 1.7925, "step": 1868 }, { "epoch": 0.5673091516163302, "grad_norm": 0.45366227626800537, "learning_rate": 9.05892477472917e-05, "loss": 1.7869, "step": 1869 }, { "epoch": 0.5676126878130217, "grad_norm": 0.4220414459705353, "learning_rate": 9.05841854814215e-05, "loss": 1.932, "step": 1870 }, { "epoch": 0.5679162240097132, "grad_norm": 0.4413476884365082, "learning_rate": 9.057912321555129e-05, "loss": 1.5842, "step": 1871 }, { "epoch": 0.5682197602064046, "grad_norm": 0.40240782499313354, "learning_rate": 9.057406094968108e-05, "loss": 1.9477, "step": 1872 }, { "epoch": 0.5685232964030961, "grad_norm": 0.4332951605319977, "learning_rate": 9.056899868381088e-05, "loss": 1.8664, "step": 1873 }, { "epoch": 0.5688268325997875, "grad_norm": 0.3841226398944855, "learning_rate": 9.056393641794067e-05, "loss": 2.3058, "step": 1874 }, { "epoch": 0.5691303687964789, "grad_norm": 0.3937263488769531, "learning_rate": 9.055887415207047e-05, "loss": 1.683, "step": 1875 }, { "epoch": 0.5694339049931705, "grad_norm": 0.33709925413131714, "learning_rate": 9.055381188620026e-05, "loss": 1.8616, "step": 1876 }, { "epoch": 0.5697374411898619, "grad_norm": 0.3934507668018341, "learning_rate": 9.054874962033007e-05, "loss": 1.6839, "step": 1877 }, { "epoch": 0.5700409773865533, "grad_norm": 0.4386683702468872, "learning_rate": 9.054368735445987e-05, "loss": 1.8068, "step": 1878 }, { "epoch": 0.5703445135832448, "grad_norm": 0.4416390657424927, "learning_rate": 9.053862508858966e-05, "loss": 1.6858, "step": 1879 }, { "epoch": 0.5706480497799362, "grad_norm": 0.4287014901638031, "learning_rate": 9.053356282271946e-05, "loss": 1.8889, "step": 1880 }, { "epoch": 0.5709515859766278, "grad_norm": 0.4297000765800476, "learning_rate": 9.052850055684925e-05, "loss": 1.8281, "step": 1881 }, { "epoch": 0.5712551221733192, "grad_norm": 0.48270586133003235, "learning_rate": 9.052343829097905e-05, "loss": 1.6692, "step": 1882 }, { "epoch": 0.5715586583700106, "grad_norm": 0.44133251905441284, "learning_rate": 9.051837602510884e-05, "loss": 1.8359, "step": 1883 }, { "epoch": 0.5718621945667021, "grad_norm": 0.5127750039100647, "learning_rate": 9.051331375923863e-05, "loss": 1.9437, "step": 1884 }, { "epoch": 0.5721657307633935, "grad_norm": 0.4890953600406647, "learning_rate": 9.050825149336843e-05, "loss": 1.6396, "step": 1885 }, { "epoch": 0.572469266960085, "grad_norm": 0.36201316118240356, "learning_rate": 9.050318922749822e-05, "loss": 1.6985, "step": 1886 }, { "epoch": 0.5727728031567765, "grad_norm": 0.3880859911441803, "learning_rate": 9.049812696162803e-05, "loss": 1.7916, "step": 1887 }, { "epoch": 0.5730763393534679, "grad_norm": 0.500619649887085, "learning_rate": 9.049306469575783e-05, "loss": 1.826, "step": 1888 }, { "epoch": 0.5733798755501593, "grad_norm": 0.764751672744751, "learning_rate": 9.048800242988762e-05, "loss": 1.5406, "step": 1889 }, { "epoch": 0.5736834117468508, "grad_norm": 0.4573342502117157, "learning_rate": 9.048294016401742e-05, "loss": 1.6461, "step": 1890 }, { "epoch": 0.5739869479435422, "grad_norm": 0.5972601175308228, "learning_rate": 9.047787789814721e-05, "loss": 2.3081, "step": 1891 }, { "epoch": 0.5742904841402338, "grad_norm": 0.4419214129447937, "learning_rate": 9.0472815632277e-05, "loss": 1.8907, "step": 1892 }, { "epoch": 0.5745940203369252, "grad_norm": 0.3364506959915161, "learning_rate": 9.04677533664068e-05, "loss": 1.598, "step": 1893 }, { "epoch": 0.5748975565336166, "grad_norm": 0.41443008184432983, "learning_rate": 9.04626911005366e-05, "loss": 1.91, "step": 1894 }, { "epoch": 0.5752010927303081, "grad_norm": 0.3931877315044403, "learning_rate": 9.045762883466639e-05, "loss": 2.0265, "step": 1895 }, { "epoch": 0.5755046289269995, "grad_norm": 0.3768281042575836, "learning_rate": 9.04525665687962e-05, "loss": 2.0458, "step": 1896 }, { "epoch": 0.575808165123691, "grad_norm": 0.726582407951355, "learning_rate": 9.0447504302926e-05, "loss": 1.5708, "step": 1897 }, { "epoch": 0.5761117013203825, "grad_norm": 0.4031538665294647, "learning_rate": 9.044244203705579e-05, "loss": 1.6248, "step": 1898 }, { "epoch": 0.5764152375170739, "grad_norm": 0.3605407476425171, "learning_rate": 9.043737977118558e-05, "loss": 1.9504, "step": 1899 }, { "epoch": 0.5767187737137653, "grad_norm": 0.3802354633808136, "learning_rate": 9.043231750531538e-05, "loss": 2.0487, "step": 1900 }, { "epoch": 0.5770223099104568, "grad_norm": 0.41240641474723816, "learning_rate": 9.042725523944519e-05, "loss": 1.9202, "step": 1901 }, { "epoch": 0.5773258461071483, "grad_norm": 0.36771708726882935, "learning_rate": 9.042219297357498e-05, "loss": 2.2273, "step": 1902 }, { "epoch": 0.5776293823038398, "grad_norm": 0.4182611405849457, "learning_rate": 9.041713070770478e-05, "loss": 1.8689, "step": 1903 }, { "epoch": 0.5779329185005312, "grad_norm": 0.39633724093437195, "learning_rate": 9.041206844183457e-05, "loss": 1.9744, "step": 1904 }, { "epoch": 0.5782364546972226, "grad_norm": 0.3978392481803894, "learning_rate": 9.040700617596437e-05, "loss": 1.6662, "step": 1905 }, { "epoch": 0.5785399908939141, "grad_norm": 0.3734360635280609, "learning_rate": 9.040194391009416e-05, "loss": 1.0948, "step": 1906 }, { "epoch": 0.5788435270906056, "grad_norm": 0.403392493724823, "learning_rate": 9.039688164422397e-05, "loss": 2.0251, "step": 1907 }, { "epoch": 0.579147063287297, "grad_norm": 0.350067138671875, "learning_rate": 9.039181937835376e-05, "loss": 1.543, "step": 1908 }, { "epoch": 0.5794505994839885, "grad_norm": 0.4273326098918915, "learning_rate": 9.038675711248356e-05, "loss": 1.8694, "step": 1909 }, { "epoch": 0.5797541356806799, "grad_norm": 0.4815780222415924, "learning_rate": 9.038169484661335e-05, "loss": 1.9565, "step": 1910 }, { "epoch": 0.5800576718773713, "grad_norm": 0.5379179120063782, "learning_rate": 9.037663258074315e-05, "loss": 1.9631, "step": 1911 }, { "epoch": 0.5803612080740629, "grad_norm": 0.47738704085350037, "learning_rate": 9.037157031487294e-05, "loss": 1.878, "step": 1912 }, { "epoch": 0.5806647442707543, "grad_norm": 0.426543653011322, "learning_rate": 9.036650804900274e-05, "loss": 2.0392, "step": 1913 }, { "epoch": 0.5809682804674458, "grad_norm": 0.38239404559135437, "learning_rate": 9.036144578313253e-05, "loss": 1.9695, "step": 1914 }, { "epoch": 0.5812718166641372, "grad_norm": 0.40093934535980225, "learning_rate": 9.035638351726233e-05, "loss": 1.9695, "step": 1915 }, { "epoch": 0.5815753528608286, "grad_norm": 0.3865903317928314, "learning_rate": 9.035132125139214e-05, "loss": 1.7925, "step": 1916 }, { "epoch": 0.58187888905752, "grad_norm": 0.6183242201805115, "learning_rate": 9.034625898552193e-05, "loss": 1.853, "step": 1917 }, { "epoch": 0.5821824252542116, "grad_norm": 0.4869506061077118, "learning_rate": 9.034119671965173e-05, "loss": 1.9418, "step": 1918 }, { "epoch": 0.582485961450903, "grad_norm": 0.40212881565093994, "learning_rate": 9.033613445378152e-05, "loss": 2.0259, "step": 1919 }, { "epoch": 0.5827894976475945, "grad_norm": 0.7224326729774475, "learning_rate": 9.033107218791131e-05, "loss": 1.933, "step": 1920 }, { "epoch": 0.5830930338442859, "grad_norm": 0.4369768500328064, "learning_rate": 9.032600992204111e-05, "loss": 1.7931, "step": 1921 }, { "epoch": 0.5833965700409773, "grad_norm": 0.3920018672943115, "learning_rate": 9.03209476561709e-05, "loss": 1.913, "step": 1922 }, { "epoch": 0.5837001062376689, "grad_norm": 0.5076978206634521, "learning_rate": 9.03158853903007e-05, "loss": 2.0081, "step": 1923 }, { "epoch": 0.5840036424343603, "grad_norm": 0.38379955291748047, "learning_rate": 9.03108231244305e-05, "loss": 2.0153, "step": 1924 }, { "epoch": 0.5843071786310517, "grad_norm": 0.4367254376411438, "learning_rate": 9.030576085856029e-05, "loss": 2.1004, "step": 1925 }, { "epoch": 0.5846107148277432, "grad_norm": 0.37425291538238525, "learning_rate": 9.03006985926901e-05, "loss": 1.6777, "step": 1926 }, { "epoch": 0.5849142510244346, "grad_norm": 0.37925392389297485, "learning_rate": 9.029563632681989e-05, "loss": 2.1164, "step": 1927 }, { "epoch": 0.5852177872211262, "grad_norm": 0.41369903087615967, "learning_rate": 9.029057406094969e-05, "loss": 1.7252, "step": 1928 }, { "epoch": 0.5855213234178176, "grad_norm": 0.3528081476688385, "learning_rate": 9.028551179507948e-05, "loss": 1.9181, "step": 1929 }, { "epoch": 0.585824859614509, "grad_norm": 0.38274556398391724, "learning_rate": 9.028044952920928e-05, "loss": 1.8585, "step": 1930 }, { "epoch": 0.5861283958112005, "grad_norm": 0.4036407768726349, "learning_rate": 9.027538726333907e-05, "loss": 1.9679, "step": 1931 }, { "epoch": 0.5864319320078919, "grad_norm": 0.34841248393058777, "learning_rate": 9.027032499746887e-05, "loss": 1.8571, "step": 1932 }, { "epoch": 0.5867354682045834, "grad_norm": 0.3821954131126404, "learning_rate": 9.026526273159866e-05, "loss": 1.8175, "step": 1933 }, { "epoch": 0.5870390044012749, "grad_norm": 0.3724253475666046, "learning_rate": 9.026020046572846e-05, "loss": 1.9549, "step": 1934 }, { "epoch": 0.5873425405979663, "grad_norm": 0.40494081377983093, "learning_rate": 9.025513819985826e-05, "loss": 2.0013, "step": 1935 }, { "epoch": 0.5876460767946577, "grad_norm": 0.7746275663375854, "learning_rate": 9.025007593398806e-05, "loss": 1.6739, "step": 1936 }, { "epoch": 0.5879496129913492, "grad_norm": 0.34239932894706726, "learning_rate": 9.024501366811785e-05, "loss": 1.1999, "step": 1937 }, { "epoch": 0.5882531491880407, "grad_norm": 0.40239185094833374, "learning_rate": 9.023995140224765e-05, "loss": 1.956, "step": 1938 }, { "epoch": 0.5885566853847322, "grad_norm": 0.4756642282009125, "learning_rate": 9.023488913637744e-05, "loss": 1.5705, "step": 1939 }, { "epoch": 0.5888602215814236, "grad_norm": 0.412263959646225, "learning_rate": 9.022982687050724e-05, "loss": 1.8187, "step": 1940 }, { "epoch": 0.589163757778115, "grad_norm": 0.4178502857685089, "learning_rate": 9.022476460463703e-05, "loss": 1.8955, "step": 1941 }, { "epoch": 0.5894672939748065, "grad_norm": 0.4619811475276947, "learning_rate": 9.021970233876683e-05, "loss": 1.9968, "step": 1942 }, { "epoch": 0.589770830171498, "grad_norm": 0.42839181423187256, "learning_rate": 9.021464007289662e-05, "loss": 1.5708, "step": 1943 }, { "epoch": 0.5900743663681894, "grad_norm": 0.4423038363456726, "learning_rate": 9.020957780702643e-05, "loss": 1.9684, "step": 1944 }, { "epoch": 0.5903779025648809, "grad_norm": 0.3898191452026367, "learning_rate": 9.020451554115623e-05, "loss": 1.9497, "step": 1945 }, { "epoch": 0.5906814387615723, "grad_norm": 0.701366662979126, "learning_rate": 9.019945327528603e-05, "loss": 1.7624, "step": 1946 }, { "epoch": 0.5909849749582637, "grad_norm": 0.32581913471221924, "learning_rate": 9.019439100941583e-05, "loss": 2.1484, "step": 1947 }, { "epoch": 0.5912885111549552, "grad_norm": 0.4372369050979614, "learning_rate": 9.018932874354562e-05, "loss": 1.6075, "step": 1948 }, { "epoch": 0.5915920473516467, "grad_norm": 0.39428946375846863, "learning_rate": 9.018426647767542e-05, "loss": 1.6004, "step": 1949 }, { "epoch": 0.5918955835483382, "grad_norm": 0.3934183120727539, "learning_rate": 9.017920421180521e-05, "loss": 1.9358, "step": 1950 }, { "epoch": 0.5921991197450296, "grad_norm": 0.42696380615234375, "learning_rate": 9.017414194593501e-05, "loss": 1.9646, "step": 1951 }, { "epoch": 0.592502655941721, "grad_norm": 0.38243913650512695, "learning_rate": 9.01690796800648e-05, "loss": 1.9946, "step": 1952 }, { "epoch": 0.5928061921384125, "grad_norm": 0.4068431556224823, "learning_rate": 9.01640174141946e-05, "loss": 2.0786, "step": 1953 }, { "epoch": 0.593109728335104, "grad_norm": 0.44560736417770386, "learning_rate": 9.015895514832439e-05, "loss": 1.7514, "step": 1954 }, { "epoch": 0.5934132645317954, "grad_norm": 0.4143114686012268, "learning_rate": 9.01538928824542e-05, "loss": 2.083, "step": 1955 }, { "epoch": 0.5937168007284869, "grad_norm": 0.45947229862213135, "learning_rate": 9.0148830616584e-05, "loss": 1.9313, "step": 1956 }, { "epoch": 0.5940203369251783, "grad_norm": 2.7487032413482666, "learning_rate": 9.014376835071379e-05, "loss": 1.7648, "step": 1957 }, { "epoch": 0.5943238731218697, "grad_norm": 0.3856576979160309, "learning_rate": 9.013870608484358e-05, "loss": 2.1141, "step": 1958 }, { "epoch": 0.5946274093185613, "grad_norm": 0.3741602897644043, "learning_rate": 9.013364381897338e-05, "loss": 1.6458, "step": 1959 }, { "epoch": 0.5949309455152527, "grad_norm": 0.3791872262954712, "learning_rate": 9.012858155310317e-05, "loss": 1.433, "step": 1960 }, { "epoch": 0.5952344817119442, "grad_norm": 0.32848575711250305, "learning_rate": 9.012351928723297e-05, "loss": 1.6748, "step": 1961 }, { "epoch": 0.5955380179086356, "grad_norm": 0.4328818917274475, "learning_rate": 9.011845702136276e-05, "loss": 1.8309, "step": 1962 }, { "epoch": 0.595841554105327, "grad_norm": 0.40931710600852966, "learning_rate": 9.011339475549256e-05, "loss": 2.0837, "step": 1963 }, { "epoch": 0.5961450903020186, "grad_norm": 0.3625456690788269, "learning_rate": 9.010833248962235e-05, "loss": 1.8895, "step": 1964 }, { "epoch": 0.59644862649871, "grad_norm": 0.33840253949165344, "learning_rate": 9.010327022375216e-05, "loss": 1.8706, "step": 1965 }, { "epoch": 0.5967521626954014, "grad_norm": 0.38374340534210205, "learning_rate": 9.009820795788196e-05, "loss": 1.8782, "step": 1966 }, { "epoch": 0.5970556988920929, "grad_norm": 0.41515031456947327, "learning_rate": 9.009314569201175e-05, "loss": 1.7455, "step": 1967 }, { "epoch": 0.5973592350887843, "grad_norm": 0.35676872730255127, "learning_rate": 9.008808342614155e-05, "loss": 1.7706, "step": 1968 }, { "epoch": 0.5976627712854758, "grad_norm": 0.4770854711532593, "learning_rate": 9.008302116027134e-05, "loss": 2.0954, "step": 1969 }, { "epoch": 0.5979663074821673, "grad_norm": 0.3612794876098633, "learning_rate": 9.007795889440114e-05, "loss": 2.1938, "step": 1970 }, { "epoch": 0.5982698436788587, "grad_norm": 0.5067920684814453, "learning_rate": 9.007289662853093e-05, "loss": 1.2096, "step": 1971 }, { "epoch": 0.5985733798755501, "grad_norm": 0.4193328022956848, "learning_rate": 9.006783436266073e-05, "loss": 1.6632, "step": 1972 }, { "epoch": 0.5988769160722416, "grad_norm": 0.41445595026016235, "learning_rate": 9.006277209679052e-05, "loss": 2.0237, "step": 1973 }, { "epoch": 0.599180452268933, "grad_norm": 0.4083717167377472, "learning_rate": 9.005770983092033e-05, "loss": 2.1022, "step": 1974 }, { "epoch": 0.5994839884656246, "grad_norm": 0.4897996485233307, "learning_rate": 9.005264756505012e-05, "loss": 1.6074, "step": 1975 }, { "epoch": 0.599787524662316, "grad_norm": 0.46923205256462097, "learning_rate": 9.004758529917992e-05, "loss": 2.0915, "step": 1976 }, { "epoch": 0.6000910608590074, "grad_norm": 0.37507691979408264, "learning_rate": 9.004252303330971e-05, "loss": 1.6793, "step": 1977 }, { "epoch": 0.6003945970556989, "grad_norm": 0.3973737061023712, "learning_rate": 9.003746076743951e-05, "loss": 2.0935, "step": 1978 }, { "epoch": 0.6006981332523903, "grad_norm": 0.40313783288002014, "learning_rate": 9.00323985015693e-05, "loss": 1.7405, "step": 1979 }, { "epoch": 0.6010016694490818, "grad_norm": 0.36169835925102234, "learning_rate": 9.00273362356991e-05, "loss": 2.1843, "step": 1980 }, { "epoch": 0.6013052056457733, "grad_norm": 0.41355371475219727, "learning_rate": 9.002227396982889e-05, "loss": 2.0969, "step": 1981 }, { "epoch": 0.6016087418424647, "grad_norm": 0.42378634214401245, "learning_rate": 9.001721170395869e-05, "loss": 1.9652, "step": 1982 }, { "epoch": 0.6019122780391561, "grad_norm": 0.42945531010627747, "learning_rate": 9.00121494380885e-05, "loss": 2.0625, "step": 1983 }, { "epoch": 0.6022158142358476, "grad_norm": 0.5348070859909058, "learning_rate": 9.000708717221829e-05, "loss": 1.3785, "step": 1984 }, { "epoch": 0.6025193504325391, "grad_norm": 0.35933446884155273, "learning_rate": 9.000202490634809e-05, "loss": 1.5254, "step": 1985 }, { "epoch": 0.6028228866292306, "grad_norm": 0.42495015263557434, "learning_rate": 8.999696264047788e-05, "loss": 2.1669, "step": 1986 }, { "epoch": 0.603126422825922, "grad_norm": 0.43792733550071716, "learning_rate": 8.999190037460767e-05, "loss": 1.8057, "step": 1987 }, { "epoch": 0.6034299590226134, "grad_norm": 0.39334404468536377, "learning_rate": 8.998683810873747e-05, "loss": 1.8157, "step": 1988 }, { "epoch": 0.6037334952193049, "grad_norm": 0.38974860310554504, "learning_rate": 8.998177584286726e-05, "loss": 1.8293, "step": 1989 }, { "epoch": 0.6040370314159964, "grad_norm": 0.44241687655448914, "learning_rate": 8.997671357699707e-05, "loss": 2.2078, "step": 1990 }, { "epoch": 0.6043405676126878, "grad_norm": 0.40700820088386536, "learning_rate": 8.997165131112687e-05, "loss": 1.9656, "step": 1991 }, { "epoch": 0.6046441038093793, "grad_norm": 0.3992595076560974, "learning_rate": 8.996658904525666e-05, "loss": 1.9423, "step": 1992 }, { "epoch": 0.6049476400060707, "grad_norm": 0.3922860622406006, "learning_rate": 8.996152677938646e-05, "loss": 2.1253, "step": 1993 }, { "epoch": 0.6052511762027621, "grad_norm": 0.3843866288661957, "learning_rate": 8.995646451351627e-05, "loss": 2.0756, "step": 1994 }, { "epoch": 0.6055547123994537, "grad_norm": 0.3822995722293854, "learning_rate": 8.995140224764606e-05, "loss": 1.9475, "step": 1995 }, { "epoch": 0.6058582485961451, "grad_norm": 0.4001995325088501, "learning_rate": 8.994633998177585e-05, "loss": 1.9781, "step": 1996 }, { "epoch": 0.6061617847928366, "grad_norm": 0.3775820732116699, "learning_rate": 8.994127771590565e-05, "loss": 1.7857, "step": 1997 }, { "epoch": 0.606465320989528, "grad_norm": 0.4260796308517456, "learning_rate": 8.993621545003544e-05, "loss": 1.6416, "step": 1998 }, { "epoch": 0.6067688571862194, "grad_norm": 0.39824166893959045, "learning_rate": 8.993115318416524e-05, "loss": 1.7657, "step": 1999 }, { "epoch": 0.6070723933829109, "grad_norm": 0.46430447697639465, "learning_rate": 8.992609091829503e-05, "loss": 1.8802, "step": 2000 }, { "epoch": 0.6073759295796024, "grad_norm": 0.4773789048194885, "learning_rate": 8.992102865242483e-05, "loss": 1.8786, "step": 2001 }, { "epoch": 0.6076794657762938, "grad_norm": 0.4296311140060425, "learning_rate": 8.991596638655462e-05, "loss": 1.9457, "step": 2002 }, { "epoch": 0.6079830019729853, "grad_norm": 0.41193845868110657, "learning_rate": 8.991090412068442e-05, "loss": 1.5856, "step": 2003 }, { "epoch": 0.6082865381696767, "grad_norm": 0.43040478229522705, "learning_rate": 8.990584185481423e-05, "loss": 2.1432, "step": 2004 }, { "epoch": 0.6085900743663681, "grad_norm": 0.5215789079666138, "learning_rate": 8.990077958894402e-05, "loss": 1.9584, "step": 2005 }, { "epoch": 0.6088936105630597, "grad_norm": 0.4370077848434448, "learning_rate": 8.989571732307382e-05, "loss": 1.5612, "step": 2006 }, { "epoch": 0.6091971467597511, "grad_norm": 0.4200492203235626, "learning_rate": 8.989065505720361e-05, "loss": 1.9493, "step": 2007 }, { "epoch": 0.6095006829564426, "grad_norm": 0.39453452825546265, "learning_rate": 8.98855927913334e-05, "loss": 1.9529, "step": 2008 }, { "epoch": 0.609804219153134, "grad_norm": 0.4478731155395508, "learning_rate": 8.98805305254632e-05, "loss": 2.0098, "step": 2009 }, { "epoch": 0.6101077553498254, "grad_norm": 0.39515209197998047, "learning_rate": 8.9875468259593e-05, "loss": 1.958, "step": 2010 }, { "epoch": 0.610411291546517, "grad_norm": 0.3660414516925812, "learning_rate": 8.987040599372279e-05, "loss": 1.9538, "step": 2011 }, { "epoch": 0.6107148277432084, "grad_norm": 0.3517032861709595, "learning_rate": 8.986534372785259e-05, "loss": 1.8833, "step": 2012 }, { "epoch": 0.6110183639398998, "grad_norm": 0.6502123475074768, "learning_rate": 8.98602814619824e-05, "loss": 2.1106, "step": 2013 }, { "epoch": 0.6113219001365913, "grad_norm": 0.4674864709377289, "learning_rate": 8.985521919611219e-05, "loss": 1.8986, "step": 2014 }, { "epoch": 0.6116254363332827, "grad_norm": 0.4143102467060089, "learning_rate": 8.985015693024198e-05, "loss": 1.7635, "step": 2015 }, { "epoch": 0.6119289725299742, "grad_norm": 0.4329308867454529, "learning_rate": 8.984509466437178e-05, "loss": 1.648, "step": 2016 }, { "epoch": 0.6122325087266657, "grad_norm": 0.34939324855804443, "learning_rate": 8.984003239850157e-05, "loss": 1.7641, "step": 2017 }, { "epoch": 0.6125360449233571, "grad_norm": 0.4234546720981598, "learning_rate": 8.983497013263137e-05, "loss": 1.8691, "step": 2018 }, { "epoch": 0.6128395811200485, "grad_norm": 0.7465669512748718, "learning_rate": 8.982990786676116e-05, "loss": 2.0573, "step": 2019 }, { "epoch": 0.61314311731674, "grad_norm": 0.36259400844573975, "learning_rate": 8.982484560089096e-05, "loss": 2.0654, "step": 2020 }, { "epoch": 0.6134466535134315, "grad_norm": 0.3918156623840332, "learning_rate": 8.981978333502075e-05, "loss": 2.1658, "step": 2021 }, { "epoch": 0.613750189710123, "grad_norm": 0.3924868404865265, "learning_rate": 8.981472106915056e-05, "loss": 1.9306, "step": 2022 }, { "epoch": 0.6140537259068144, "grad_norm": 0.7729107141494751, "learning_rate": 8.980965880328035e-05, "loss": 1.5911, "step": 2023 }, { "epoch": 0.6143572621035058, "grad_norm": 0.4199913442134857, "learning_rate": 8.980459653741015e-05, "loss": 1.9833, "step": 2024 }, { "epoch": 0.6146607983001973, "grad_norm": 0.40258511900901794, "learning_rate": 8.979953427153994e-05, "loss": 1.9178, "step": 2025 }, { "epoch": 0.6149643344968888, "grad_norm": 0.3859613239765167, "learning_rate": 8.979447200566974e-05, "loss": 1.7585, "step": 2026 }, { "epoch": 0.6152678706935802, "grad_norm": 0.42048898339271545, "learning_rate": 8.978940973979953e-05, "loss": 1.953, "step": 2027 }, { "epoch": 0.6155714068902717, "grad_norm": 0.39669451117515564, "learning_rate": 8.978434747392933e-05, "loss": 1.6132, "step": 2028 }, { "epoch": 0.6158749430869631, "grad_norm": 0.6679760217666626, "learning_rate": 8.977928520805912e-05, "loss": 1.9793, "step": 2029 }, { "epoch": 0.6161784792836545, "grad_norm": 0.4262414276599884, "learning_rate": 8.977422294218892e-05, "loss": 1.8002, "step": 2030 }, { "epoch": 0.616482015480346, "grad_norm": 0.3899317681789398, "learning_rate": 8.976916067631871e-05, "loss": 2.0585, "step": 2031 }, { "epoch": 0.6167855516770375, "grad_norm": 0.5402538776397705, "learning_rate": 8.976409841044852e-05, "loss": 1.6196, "step": 2032 }, { "epoch": 0.617089087873729, "grad_norm": 0.40976065397262573, "learning_rate": 8.975903614457832e-05, "loss": 1.6395, "step": 2033 }, { "epoch": 0.6173926240704204, "grad_norm": 0.5633681416511536, "learning_rate": 8.975397387870811e-05, "loss": 2.117, "step": 2034 }, { "epoch": 0.6176961602671118, "grad_norm": 0.4393365681171417, "learning_rate": 8.974891161283792e-05, "loss": 2.0793, "step": 2035 }, { "epoch": 0.6179996964638033, "grad_norm": 0.3982914388179779, "learning_rate": 8.974384934696771e-05, "loss": 1.9608, "step": 2036 }, { "epoch": 0.6183032326604948, "grad_norm": 0.41689884662628174, "learning_rate": 8.973878708109751e-05, "loss": 2.0265, "step": 2037 }, { "epoch": 0.6186067688571862, "grad_norm": 0.46085304021835327, "learning_rate": 8.97337248152273e-05, "loss": 1.8147, "step": 2038 }, { "epoch": 0.6189103050538777, "grad_norm": 0.4536703824996948, "learning_rate": 8.97286625493571e-05, "loss": 1.861, "step": 2039 }, { "epoch": 0.6192138412505691, "grad_norm": 0.4332161843776703, "learning_rate": 8.97236002834869e-05, "loss": 1.5579, "step": 2040 }, { "epoch": 0.6195173774472605, "grad_norm": 0.3992736041545868, "learning_rate": 8.971853801761669e-05, "loss": 1.9451, "step": 2041 }, { "epoch": 0.6198209136439521, "grad_norm": 0.39501848816871643, "learning_rate": 8.971347575174648e-05, "loss": 1.9228, "step": 2042 }, { "epoch": 0.6201244498406435, "grad_norm": 0.44429096579551697, "learning_rate": 8.970841348587629e-05, "loss": 1.6277, "step": 2043 }, { "epoch": 0.620427986037335, "grad_norm": 0.5381520390510559, "learning_rate": 8.970335122000609e-05, "loss": 1.4842, "step": 2044 }, { "epoch": 0.6207315222340264, "grad_norm": 0.3807857036590576, "learning_rate": 8.969828895413588e-05, "loss": 1.9271, "step": 2045 }, { "epoch": 0.6210350584307178, "grad_norm": 0.4522213041782379, "learning_rate": 8.969322668826568e-05, "loss": 1.747, "step": 2046 }, { "epoch": 0.6213385946274094, "grad_norm": 0.37111926078796387, "learning_rate": 8.968816442239547e-05, "loss": 1.9039, "step": 2047 }, { "epoch": 0.6216421308241008, "grad_norm": 0.7616074681282043, "learning_rate": 8.968310215652527e-05, "loss": 2.0966, "step": 2048 }, { "epoch": 0.6219456670207922, "grad_norm": 0.42669475078582764, "learning_rate": 8.967803989065506e-05, "loss": 2.0545, "step": 2049 }, { "epoch": 0.6222492032174837, "grad_norm": 0.3741990625858307, "learning_rate": 8.967297762478486e-05, "loss": 1.9964, "step": 2050 }, { "epoch": 0.6225527394141751, "grad_norm": 0.38759157061576843, "learning_rate": 8.966791535891465e-05, "loss": 1.8108, "step": 2051 }, { "epoch": 0.6228562756108666, "grad_norm": 0.424344003200531, "learning_rate": 8.966285309304446e-05, "loss": 1.8411, "step": 2052 }, { "epoch": 0.6231598118075581, "grad_norm": 0.3969878852367401, "learning_rate": 8.965779082717425e-05, "loss": 2.1543, "step": 2053 }, { "epoch": 0.6234633480042495, "grad_norm": 0.4188143312931061, "learning_rate": 8.965272856130405e-05, "loss": 1.8696, "step": 2054 }, { "epoch": 0.623766884200941, "grad_norm": 0.42061781883239746, "learning_rate": 8.964766629543384e-05, "loss": 2.0641, "step": 2055 }, { "epoch": 0.6240704203976324, "grad_norm": 0.3898957371711731, "learning_rate": 8.964260402956364e-05, "loss": 1.9002, "step": 2056 }, { "epoch": 0.6243739565943238, "grad_norm": 0.4503360688686371, "learning_rate": 8.963754176369343e-05, "loss": 1.8302, "step": 2057 }, { "epoch": 0.6246774927910154, "grad_norm": 0.44356441497802734, "learning_rate": 8.963247949782323e-05, "loss": 1.589, "step": 2058 }, { "epoch": 0.6249810289877068, "grad_norm": 0.3989812433719635, "learning_rate": 8.962741723195302e-05, "loss": 2.1592, "step": 2059 }, { "epoch": 0.6252845651843982, "grad_norm": 0.3959946632385254, "learning_rate": 8.962235496608282e-05, "loss": 2.0769, "step": 2060 }, { "epoch": 0.6255881013810897, "grad_norm": 0.37260061502456665, "learning_rate": 8.961729270021262e-05, "loss": 1.2118, "step": 2061 }, { "epoch": 0.6258916375777811, "grad_norm": 0.5840566754341125, "learning_rate": 8.961223043434242e-05, "loss": 2.1389, "step": 2062 }, { "epoch": 0.6261951737744726, "grad_norm": 0.44715970754623413, "learning_rate": 8.960716816847221e-05, "loss": 1.6985, "step": 2063 }, { "epoch": 0.6264987099711641, "grad_norm": 0.40047672390937805, "learning_rate": 8.960210590260201e-05, "loss": 1.9951, "step": 2064 }, { "epoch": 0.6268022461678555, "grad_norm": 0.4090017080307007, "learning_rate": 8.95970436367318e-05, "loss": 1.8258, "step": 2065 }, { "epoch": 0.627105782364547, "grad_norm": 0.39617207646369934, "learning_rate": 8.95919813708616e-05, "loss": 1.6203, "step": 2066 }, { "epoch": 0.6274093185612384, "grad_norm": 0.4236812889575958, "learning_rate": 8.95869191049914e-05, "loss": 1.9766, "step": 2067 }, { "epoch": 0.6277128547579299, "grad_norm": 0.560946524143219, "learning_rate": 8.958185683912119e-05, "loss": 2.3476, "step": 2068 }, { "epoch": 0.6280163909546214, "grad_norm": 0.4474948048591614, "learning_rate": 8.957679457325098e-05, "loss": 1.9045, "step": 2069 }, { "epoch": 0.6283199271513128, "grad_norm": 0.47307664155960083, "learning_rate": 8.957173230738078e-05, "loss": 1.9436, "step": 2070 }, { "epoch": 0.6286234633480042, "grad_norm": 0.4518156945705414, "learning_rate": 8.956667004151059e-05, "loss": 1.6307, "step": 2071 }, { "epoch": 0.6289269995446957, "grad_norm": 0.3907441794872284, "learning_rate": 8.956160777564038e-05, "loss": 1.9797, "step": 2072 }, { "epoch": 0.6292305357413872, "grad_norm": 0.7602722644805908, "learning_rate": 8.955654550977018e-05, "loss": 1.467, "step": 2073 }, { "epoch": 0.6295340719380786, "grad_norm": 0.4778296947479248, "learning_rate": 8.955148324389997e-05, "loss": 1.592, "step": 2074 }, { "epoch": 0.6298376081347701, "grad_norm": 0.5303634405136108, "learning_rate": 8.954642097802977e-05, "loss": 1.9301, "step": 2075 }, { "epoch": 0.6301411443314615, "grad_norm": 0.37609922885894775, "learning_rate": 8.954135871215956e-05, "loss": 2.0062, "step": 2076 }, { "epoch": 0.630444680528153, "grad_norm": 0.3961854875087738, "learning_rate": 8.953629644628936e-05, "loss": 2.0677, "step": 2077 }, { "epoch": 0.6307482167248445, "grad_norm": 0.43167874217033386, "learning_rate": 8.953123418041915e-05, "loss": 1.7997, "step": 2078 }, { "epoch": 0.6310517529215359, "grad_norm": 0.4458840489387512, "learning_rate": 8.952617191454896e-05, "loss": 1.6458, "step": 2079 }, { "epoch": 0.6313552891182274, "grad_norm": 0.8174628615379333, "learning_rate": 8.952110964867875e-05, "loss": 1.3436, "step": 2080 }, { "epoch": 0.6316588253149188, "grad_norm": 0.40314528346061707, "learning_rate": 8.951604738280855e-05, "loss": 1.648, "step": 2081 }, { "epoch": 0.6319623615116102, "grad_norm": 2.845505952835083, "learning_rate": 8.951098511693836e-05, "loss": 2.0645, "step": 2082 }, { "epoch": 0.6322658977083017, "grad_norm": 0.41686686873435974, "learning_rate": 8.950592285106815e-05, "loss": 1.9434, "step": 2083 }, { "epoch": 0.6325694339049932, "grad_norm": 5.579742431640625, "learning_rate": 8.950086058519795e-05, "loss": 2.1942, "step": 2084 }, { "epoch": 0.6328729701016846, "grad_norm": 0.40614521503448486, "learning_rate": 8.949579831932774e-05, "loss": 1.7526, "step": 2085 }, { "epoch": 0.6331765062983761, "grad_norm": 0.8609543442726135, "learning_rate": 8.949073605345754e-05, "loss": 1.4042, "step": 2086 }, { "epoch": 0.6334800424950675, "grad_norm": 0.451594740152359, "learning_rate": 8.948567378758733e-05, "loss": 2.085, "step": 2087 }, { "epoch": 0.6337835786917589, "grad_norm": 0.48546943068504333, "learning_rate": 8.948061152171713e-05, "loss": 1.8299, "step": 2088 }, { "epoch": 0.6340871148884505, "grad_norm": 0.4435253143310547, "learning_rate": 8.947554925584692e-05, "loss": 1.9175, "step": 2089 }, { "epoch": 0.6343906510851419, "grad_norm": 0.4109974801540375, "learning_rate": 8.947048698997671e-05, "loss": 1.6901, "step": 2090 }, { "epoch": 0.6346941872818334, "grad_norm": 0.4205876290798187, "learning_rate": 8.946542472410652e-05, "loss": 1.6726, "step": 2091 }, { "epoch": 0.6349977234785248, "grad_norm": 0.4449016749858856, "learning_rate": 8.946036245823632e-05, "loss": 2.2222, "step": 2092 }, { "epoch": 0.6353012596752162, "grad_norm": 0.44236990809440613, "learning_rate": 8.945530019236611e-05, "loss": 2.2247, "step": 2093 }, { "epoch": 0.6356047958719078, "grad_norm": 0.4700889587402344, "learning_rate": 8.945023792649591e-05, "loss": 1.6692, "step": 2094 }, { "epoch": 0.6359083320685992, "grad_norm": 0.42525413632392883, "learning_rate": 8.94451756606257e-05, "loss": 1.5726, "step": 2095 }, { "epoch": 0.6362118682652906, "grad_norm": 0.3753025233745575, "learning_rate": 8.94401133947555e-05, "loss": 1.8565, "step": 2096 }, { "epoch": 0.6365154044619821, "grad_norm": 0.3908928632736206, "learning_rate": 8.943505112888529e-05, "loss": 1.9776, "step": 2097 }, { "epoch": 0.6368189406586735, "grad_norm": 0.4409022927284241, "learning_rate": 8.942998886301509e-05, "loss": 1.8076, "step": 2098 }, { "epoch": 0.637122476855365, "grad_norm": 0.4187740981578827, "learning_rate": 8.942492659714488e-05, "loss": 2.0177, "step": 2099 }, { "epoch": 0.6374260130520565, "grad_norm": 0.4491542875766754, "learning_rate": 8.941986433127469e-05, "loss": 1.817, "step": 2100 }, { "epoch": 0.6377295492487479, "grad_norm": 0.4964027404785156, "learning_rate": 8.941480206540448e-05, "loss": 1.618, "step": 2101 }, { "epoch": 0.6380330854454394, "grad_norm": 0.4044201672077179, "learning_rate": 8.940973979953428e-05, "loss": 1.4983, "step": 2102 }, { "epoch": 0.6383366216421308, "grad_norm": 0.4115463197231293, "learning_rate": 8.940467753366407e-05, "loss": 1.9043, "step": 2103 }, { "epoch": 0.6386401578388223, "grad_norm": 0.39303481578826904, "learning_rate": 8.939961526779387e-05, "loss": 1.434, "step": 2104 }, { "epoch": 0.6389436940355138, "grad_norm": 0.3657127618789673, "learning_rate": 8.939455300192366e-05, "loss": 1.9805, "step": 2105 }, { "epoch": 0.6392472302322052, "grad_norm": 0.414969265460968, "learning_rate": 8.938949073605346e-05, "loss": 1.745, "step": 2106 }, { "epoch": 0.6395507664288966, "grad_norm": 0.4560664892196655, "learning_rate": 8.938442847018325e-05, "loss": 1.6991, "step": 2107 }, { "epoch": 0.6398543026255881, "grad_norm": 0.4387153387069702, "learning_rate": 8.937936620431305e-05, "loss": 1.7517, "step": 2108 }, { "epoch": 0.6401578388222796, "grad_norm": 0.39767786860466003, "learning_rate": 8.937430393844284e-05, "loss": 1.4668, "step": 2109 }, { "epoch": 0.640461375018971, "grad_norm": 0.4568266272544861, "learning_rate": 8.936924167257265e-05, "loss": 1.7829, "step": 2110 }, { "epoch": 0.6407649112156625, "grad_norm": 0.3790264427661896, "learning_rate": 8.936417940670245e-05, "loss": 1.8335, "step": 2111 }, { "epoch": 0.6410684474123539, "grad_norm": 0.39457952976226807, "learning_rate": 8.935911714083224e-05, "loss": 1.687, "step": 2112 }, { "epoch": 0.6413719836090453, "grad_norm": 0.32461151480674744, "learning_rate": 8.935405487496204e-05, "loss": 1.4628, "step": 2113 }, { "epoch": 0.6416755198057368, "grad_norm": 0.36477747559547424, "learning_rate": 8.934899260909183e-05, "loss": 1.9856, "step": 2114 }, { "epoch": 0.6419790560024283, "grad_norm": 0.4230240285396576, "learning_rate": 8.934393034322163e-05, "loss": 2.037, "step": 2115 }, { "epoch": 0.6422825921991198, "grad_norm": 0.3885568380355835, "learning_rate": 8.933886807735142e-05, "loss": 1.7229, "step": 2116 }, { "epoch": 0.6425861283958112, "grad_norm": 0.46484097838401794, "learning_rate": 8.933380581148121e-05, "loss": 1.9656, "step": 2117 }, { "epoch": 0.6428896645925026, "grad_norm": 0.3922126591205597, "learning_rate": 8.932874354561101e-05, "loss": 1.652, "step": 2118 }, { "epoch": 0.6431932007891941, "grad_norm": 0.4676629602909088, "learning_rate": 8.932368127974082e-05, "loss": 2.1305, "step": 2119 }, { "epoch": 0.6434967369858856, "grad_norm": 0.3731312155723572, "learning_rate": 8.931861901387061e-05, "loss": 2.0093, "step": 2120 }, { "epoch": 0.643800273182577, "grad_norm": 0.44040486216545105, "learning_rate": 8.931355674800041e-05, "loss": 1.9446, "step": 2121 }, { "epoch": 0.6441038093792685, "grad_norm": 0.3713996112346649, "learning_rate": 8.93084944821302e-05, "loss": 2.1773, "step": 2122 }, { "epoch": 0.6444073455759599, "grad_norm": 0.3798523247241974, "learning_rate": 8.930343221626e-05, "loss": 1.7056, "step": 2123 }, { "epoch": 0.6447108817726513, "grad_norm": 0.4175238013267517, "learning_rate": 8.92983699503898e-05, "loss": 1.966, "step": 2124 }, { "epoch": 0.6450144179693429, "grad_norm": 0.40957748889923096, "learning_rate": 8.92933076845196e-05, "loss": 1.6715, "step": 2125 }, { "epoch": 0.6453179541660343, "grad_norm": 0.46979820728302, "learning_rate": 8.92882454186494e-05, "loss": 1.8604, "step": 2126 }, { "epoch": 0.6456214903627258, "grad_norm": 0.3671952188014984, "learning_rate": 8.928318315277919e-05, "loss": 1.1596, "step": 2127 }, { "epoch": 0.6459250265594172, "grad_norm": 0.363288551568985, "learning_rate": 8.927812088690898e-05, "loss": 2.14, "step": 2128 }, { "epoch": 0.6462285627561086, "grad_norm": 0.3632570505142212, "learning_rate": 8.927305862103878e-05, "loss": 2.0746, "step": 2129 }, { "epoch": 0.6465320989528002, "grad_norm": 0.5912741422653198, "learning_rate": 8.926799635516859e-05, "loss": 2.1828, "step": 2130 }, { "epoch": 0.6468356351494916, "grad_norm": 0.3740077018737793, "learning_rate": 8.926293408929838e-05, "loss": 1.9439, "step": 2131 }, { "epoch": 0.647139171346183, "grad_norm": 0.5042386651039124, "learning_rate": 8.925787182342818e-05, "loss": 1.1905, "step": 2132 }, { "epoch": 0.6474427075428745, "grad_norm": 0.39761942625045776, "learning_rate": 8.925280955755797e-05, "loss": 1.3763, "step": 2133 }, { "epoch": 0.6477462437395659, "grad_norm": 0.6671484112739563, "learning_rate": 8.924774729168777e-05, "loss": 2.2412, "step": 2134 }, { "epoch": 0.6480497799362575, "grad_norm": 0.40470197796821594, "learning_rate": 8.924268502581756e-05, "loss": 2.0007, "step": 2135 }, { "epoch": 0.6483533161329489, "grad_norm": 1.5381660461425781, "learning_rate": 8.923762275994736e-05, "loss": 2.101, "step": 2136 }, { "epoch": 0.6486568523296403, "grad_norm": 0.39186039566993713, "learning_rate": 8.923256049407715e-05, "loss": 1.8134, "step": 2137 }, { "epoch": 0.6489603885263318, "grad_norm": 0.351701021194458, "learning_rate": 8.922749822820695e-05, "loss": 1.6034, "step": 2138 }, { "epoch": 0.6492639247230232, "grad_norm": 1.6814361810684204, "learning_rate": 8.922243596233675e-05, "loss": 1.6059, "step": 2139 }, { "epoch": 0.6495674609197146, "grad_norm": 0.4578597843647003, "learning_rate": 8.921737369646655e-05, "loss": 1.5098, "step": 2140 }, { "epoch": 0.6498709971164062, "grad_norm": 0.44496893882751465, "learning_rate": 8.921231143059634e-05, "loss": 2.0494, "step": 2141 }, { "epoch": 0.6501745333130976, "grad_norm": 0.3577191233634949, "learning_rate": 8.920724916472614e-05, "loss": 1.8182, "step": 2142 }, { "epoch": 0.650478069509789, "grad_norm": 0.42032182216644287, "learning_rate": 8.920218689885593e-05, "loss": 2.0543, "step": 2143 }, { "epoch": 0.6507816057064805, "grad_norm": 0.3442663550376892, "learning_rate": 8.919712463298573e-05, "loss": 1.699, "step": 2144 }, { "epoch": 0.6510851419031719, "grad_norm": 0.479557067155838, "learning_rate": 8.919206236711552e-05, "loss": 2.3661, "step": 2145 }, { "epoch": 0.6513886780998634, "grad_norm": 0.4386119246482849, "learning_rate": 8.918700010124532e-05, "loss": 1.9253, "step": 2146 }, { "epoch": 0.6516922142965549, "grad_norm": 0.38390249013900757, "learning_rate": 8.918193783537511e-05, "loss": 1.7774, "step": 2147 }, { "epoch": 0.6519957504932463, "grad_norm": 0.3760508596897125, "learning_rate": 8.917687556950491e-05, "loss": 2.0236, "step": 2148 }, { "epoch": 0.6522992866899378, "grad_norm": 0.41757336258888245, "learning_rate": 8.917181330363472e-05, "loss": 1.8861, "step": 2149 }, { "epoch": 0.6526028228866292, "grad_norm": 0.40840038657188416, "learning_rate": 8.916675103776451e-05, "loss": 1.405, "step": 2150 }, { "epoch": 0.6529063590833207, "grad_norm": 0.3661898672580719, "learning_rate": 8.91616887718943e-05, "loss": 1.6111, "step": 2151 }, { "epoch": 0.6532098952800122, "grad_norm": 0.42466968297958374, "learning_rate": 8.91566265060241e-05, "loss": 1.6713, "step": 2152 }, { "epoch": 0.6535134314767036, "grad_norm": 0.5033214092254639, "learning_rate": 8.91515642401539e-05, "loss": 2.0999, "step": 2153 }, { "epoch": 0.653816967673395, "grad_norm": 0.3836124837398529, "learning_rate": 8.914650197428369e-05, "loss": 2.2854, "step": 2154 }, { "epoch": 0.6541205038700865, "grad_norm": 0.42189982533454895, "learning_rate": 8.914143970841348e-05, "loss": 2.154, "step": 2155 }, { "epoch": 0.654424040066778, "grad_norm": 0.3981611430644989, "learning_rate": 8.913637744254328e-05, "loss": 1.6845, "step": 2156 }, { "epoch": 0.6547275762634694, "grad_norm": 0.4584210515022278, "learning_rate": 8.913131517667307e-05, "loss": 1.8831, "step": 2157 }, { "epoch": 0.6550311124601609, "grad_norm": 0.42922207713127136, "learning_rate": 8.912625291080288e-05, "loss": 1.8187, "step": 2158 }, { "epoch": 0.6553346486568523, "grad_norm": 0.4891490638256073, "learning_rate": 8.912119064493268e-05, "loss": 2.0393, "step": 2159 }, { "epoch": 0.6556381848535437, "grad_norm": 0.44946572184562683, "learning_rate": 8.911612837906247e-05, "loss": 2.0362, "step": 2160 }, { "epoch": 0.6559417210502353, "grad_norm": 0.5170040726661682, "learning_rate": 8.911106611319227e-05, "loss": 1.0148, "step": 2161 }, { "epoch": 0.6562452572469267, "grad_norm": 0.45176056027412415, "learning_rate": 8.910600384732206e-05, "loss": 1.9087, "step": 2162 }, { "epoch": 0.6565487934436182, "grad_norm": 0.3974052965641022, "learning_rate": 8.910094158145186e-05, "loss": 1.7759, "step": 2163 }, { "epoch": 0.6568523296403096, "grad_norm": 0.4142087399959564, "learning_rate": 8.909587931558165e-05, "loss": 1.8639, "step": 2164 }, { "epoch": 0.657155865837001, "grad_norm": 0.4220983386039734, "learning_rate": 8.909081704971145e-05, "loss": 1.4122, "step": 2165 }, { "epoch": 0.6574594020336925, "grad_norm": 0.37949880957603455, "learning_rate": 8.908575478384124e-05, "loss": 1.9989, "step": 2166 }, { "epoch": 0.657762938230384, "grad_norm": 0.35547998547554016, "learning_rate": 8.908069251797105e-05, "loss": 1.9514, "step": 2167 }, { "epoch": 0.6580664744270754, "grad_norm": 0.4009557366371155, "learning_rate": 8.907563025210084e-05, "loss": 1.7043, "step": 2168 }, { "epoch": 0.6583700106237669, "grad_norm": 0.38969942927360535, "learning_rate": 8.907056798623065e-05, "loss": 1.8512, "step": 2169 }, { "epoch": 0.6586735468204583, "grad_norm": 0.4015234708786011, "learning_rate": 8.906550572036045e-05, "loss": 1.9016, "step": 2170 }, { "epoch": 0.6589770830171497, "grad_norm": 0.45555707812309265, "learning_rate": 8.906044345449024e-05, "loss": 2.1088, "step": 2171 }, { "epoch": 0.6592806192138413, "grad_norm": 0.3557066321372986, "learning_rate": 8.905538118862004e-05, "loss": 1.6273, "step": 2172 }, { "epoch": 0.6595841554105327, "grad_norm": 0.44995880126953125, "learning_rate": 8.905031892274983e-05, "loss": 1.7946, "step": 2173 }, { "epoch": 0.6598876916072242, "grad_norm": 0.40973517298698425, "learning_rate": 8.904525665687963e-05, "loss": 1.7571, "step": 2174 }, { "epoch": 0.6601912278039156, "grad_norm": 0.3300071656703949, "learning_rate": 8.904019439100942e-05, "loss": 1.2977, "step": 2175 }, { "epoch": 0.660494764000607, "grad_norm": 0.4011610746383667, "learning_rate": 8.903513212513922e-05, "loss": 2.0934, "step": 2176 }, { "epoch": 0.6607983001972986, "grad_norm": 0.35637664794921875, "learning_rate": 8.903006985926901e-05, "loss": 1.9632, "step": 2177 }, { "epoch": 0.66110183639399, "grad_norm": 0.45524492859840393, "learning_rate": 8.902500759339882e-05, "loss": 1.8951, "step": 2178 }, { "epoch": 0.6614053725906814, "grad_norm": 0.45453348755836487, "learning_rate": 8.901994532752861e-05, "loss": 1.84, "step": 2179 }, { "epoch": 0.6617089087873729, "grad_norm": 0.4106372892856598, "learning_rate": 8.901488306165841e-05, "loss": 2.171, "step": 2180 }, { "epoch": 0.6620124449840643, "grad_norm": 0.6188797950744629, "learning_rate": 8.90098207957882e-05, "loss": 1.3866, "step": 2181 }, { "epoch": 0.6623159811807559, "grad_norm": 0.3466598093509674, "learning_rate": 8.9004758529918e-05, "loss": 1.7782, "step": 2182 }, { "epoch": 0.6626195173774473, "grad_norm": 0.4912582337856293, "learning_rate": 8.899969626404779e-05, "loss": 1.2761, "step": 2183 }, { "epoch": 0.6629230535741387, "grad_norm": 0.46108344197273254, "learning_rate": 8.899463399817759e-05, "loss": 1.4373, "step": 2184 }, { "epoch": 0.6632265897708302, "grad_norm": 0.5269731879234314, "learning_rate": 8.898957173230738e-05, "loss": 1.4146, "step": 2185 }, { "epoch": 0.6635301259675216, "grad_norm": 0.4078417122364044, "learning_rate": 8.898450946643718e-05, "loss": 2.2392, "step": 2186 }, { "epoch": 0.6638336621642131, "grad_norm": 0.36829376220703125, "learning_rate": 8.897944720056697e-05, "loss": 2.1447, "step": 2187 }, { "epoch": 0.6641371983609046, "grad_norm": 0.38769134879112244, "learning_rate": 8.897438493469678e-05, "loss": 1.5877, "step": 2188 }, { "epoch": 0.664440734557596, "grad_norm": 0.3847033381462097, "learning_rate": 8.896932266882658e-05, "loss": 1.9341, "step": 2189 }, { "epoch": 0.6647442707542874, "grad_norm": 0.47725987434387207, "learning_rate": 8.896426040295637e-05, "loss": 2.122, "step": 2190 }, { "epoch": 0.6650478069509789, "grad_norm": 0.4192405045032501, "learning_rate": 8.895919813708617e-05, "loss": 2.1324, "step": 2191 }, { "epoch": 0.6653513431476703, "grad_norm": 0.5160967707633972, "learning_rate": 8.895413587121596e-05, "loss": 1.4909, "step": 2192 }, { "epoch": 0.6656548793443618, "grad_norm": 0.437773734331131, "learning_rate": 8.894907360534575e-05, "loss": 1.7785, "step": 2193 }, { "epoch": 0.6659584155410533, "grad_norm": 0.36092495918273926, "learning_rate": 8.894401133947555e-05, "loss": 1.9376, "step": 2194 }, { "epoch": 0.6662619517377447, "grad_norm": 0.4263538122177124, "learning_rate": 8.893894907360534e-05, "loss": 2.1226, "step": 2195 }, { "epoch": 0.6665654879344362, "grad_norm": 0.41431042551994324, "learning_rate": 8.893388680773514e-05, "loss": 2.1941, "step": 2196 }, { "epoch": 0.6668690241311276, "grad_norm": 0.4508149027824402, "learning_rate": 8.892882454186495e-05, "loss": 1.8846, "step": 2197 }, { "epoch": 0.6671725603278191, "grad_norm": 0.3481595516204834, "learning_rate": 8.892376227599474e-05, "loss": 1.9913, "step": 2198 }, { "epoch": 0.6674760965245106, "grad_norm": 0.420114129781723, "learning_rate": 8.891870001012454e-05, "loss": 1.5997, "step": 2199 }, { "epoch": 0.667779632721202, "grad_norm": 0.44123902916908264, "learning_rate": 8.891363774425433e-05, "loss": 2.112, "step": 2200 }, { "epoch": 0.6680831689178934, "grad_norm": 0.4254309833049774, "learning_rate": 8.890857547838413e-05, "loss": 1.8562, "step": 2201 }, { "epoch": 0.6683867051145849, "grad_norm": 0.3660505414009094, "learning_rate": 8.890351321251392e-05, "loss": 1.8612, "step": 2202 }, { "epoch": 0.6686902413112764, "grad_norm": 0.3969692587852478, "learning_rate": 8.889845094664372e-05, "loss": 2.2214, "step": 2203 }, { "epoch": 0.6689937775079678, "grad_norm": 0.36976855993270874, "learning_rate": 8.889338868077351e-05, "loss": 1.8667, "step": 2204 }, { "epoch": 0.6692973137046593, "grad_norm": 0.44680026173591614, "learning_rate": 8.88883264149033e-05, "loss": 2.0648, "step": 2205 }, { "epoch": 0.6696008499013507, "grad_norm": 0.40876418352127075, "learning_rate": 8.888326414903311e-05, "loss": 2.027, "step": 2206 }, { "epoch": 0.6699043860980421, "grad_norm": 0.35822147130966187, "learning_rate": 8.887820188316291e-05, "loss": 1.8987, "step": 2207 }, { "epoch": 0.6702079222947337, "grad_norm": 0.41419175267219543, "learning_rate": 8.88731396172927e-05, "loss": 2.114, "step": 2208 }, { "epoch": 0.6705114584914251, "grad_norm": 0.3790142834186554, "learning_rate": 8.88680773514225e-05, "loss": 1.8878, "step": 2209 }, { "epoch": 0.6708149946881166, "grad_norm": 0.42493680119514465, "learning_rate": 8.88630150855523e-05, "loss": 1.8914, "step": 2210 }, { "epoch": 0.671118530884808, "grad_norm": 0.34427767992019653, "learning_rate": 8.885795281968209e-05, "loss": 2.1252, "step": 2211 }, { "epoch": 0.6714220670814994, "grad_norm": 0.43361228704452515, "learning_rate": 8.885289055381188e-05, "loss": 1.677, "step": 2212 }, { "epoch": 0.671725603278191, "grad_norm": 0.3793098032474518, "learning_rate": 8.884782828794169e-05, "loss": 1.9914, "step": 2213 }, { "epoch": 0.6720291394748824, "grad_norm": 0.4355357587337494, "learning_rate": 8.884276602207149e-05, "loss": 2.0324, "step": 2214 }, { "epoch": 0.6723326756715738, "grad_norm": 0.41514819860458374, "learning_rate": 8.883770375620128e-05, "loss": 1.6874, "step": 2215 }, { "epoch": 0.6726362118682653, "grad_norm": 0.4044744074344635, "learning_rate": 8.883264149033108e-05, "loss": 1.8879, "step": 2216 }, { "epoch": 0.6729397480649567, "grad_norm": 0.38892972469329834, "learning_rate": 8.882757922446088e-05, "loss": 1.8009, "step": 2217 }, { "epoch": 0.6732432842616483, "grad_norm": 0.41450080275535583, "learning_rate": 8.882251695859068e-05, "loss": 1.7411, "step": 2218 }, { "epoch": 0.6735468204583397, "grad_norm": 0.41548603773117065, "learning_rate": 8.881745469272047e-05, "loss": 2.158, "step": 2219 }, { "epoch": 0.6738503566550311, "grad_norm": 0.4178054928779602, "learning_rate": 8.881239242685027e-05, "loss": 1.7454, "step": 2220 }, { "epoch": 0.6741538928517226, "grad_norm": 1.661353588104248, "learning_rate": 8.880733016098006e-05, "loss": 1.6626, "step": 2221 }, { "epoch": 0.674457429048414, "grad_norm": 0.40055370330810547, "learning_rate": 8.880226789510986e-05, "loss": 1.9827, "step": 2222 }, { "epoch": 0.6747609652451054, "grad_norm": 0.41323450207710266, "learning_rate": 8.879720562923965e-05, "loss": 1.6335, "step": 2223 }, { "epoch": 0.675064501441797, "grad_norm": 0.4238756597042084, "learning_rate": 8.879214336336945e-05, "loss": 1.7076, "step": 2224 }, { "epoch": 0.6753680376384884, "grad_norm": 0.39978405833244324, "learning_rate": 8.878708109749924e-05, "loss": 1.6837, "step": 2225 }, { "epoch": 0.6756715738351798, "grad_norm": 0.4585546851158142, "learning_rate": 8.878201883162904e-05, "loss": 2.0821, "step": 2226 }, { "epoch": 0.6759751100318713, "grad_norm": 0.40500447154045105, "learning_rate": 8.877695656575885e-05, "loss": 1.7139, "step": 2227 }, { "epoch": 0.6762786462285627, "grad_norm": 0.4829038381576538, "learning_rate": 8.877189429988864e-05, "loss": 1.9029, "step": 2228 }, { "epoch": 0.6765821824252543, "grad_norm": 0.4088328182697296, "learning_rate": 8.876683203401844e-05, "loss": 2.0233, "step": 2229 }, { "epoch": 0.6768857186219457, "grad_norm": 0.4438897967338562, "learning_rate": 8.876176976814823e-05, "loss": 1.824, "step": 2230 }, { "epoch": 0.6771892548186371, "grad_norm": 0.4573661684989929, "learning_rate": 8.875670750227802e-05, "loss": 2.0605, "step": 2231 }, { "epoch": 0.6774927910153286, "grad_norm": 0.5133582949638367, "learning_rate": 8.875164523640782e-05, "loss": 1.7161, "step": 2232 }, { "epoch": 0.67779632721202, "grad_norm": 0.3775865137577057, "learning_rate": 8.874658297053761e-05, "loss": 1.513, "step": 2233 }, { "epoch": 0.6780998634087115, "grad_norm": 0.4122471809387207, "learning_rate": 8.874152070466741e-05, "loss": 1.8155, "step": 2234 }, { "epoch": 0.678403399605403, "grad_norm": 0.6338900327682495, "learning_rate": 8.87364584387972e-05, "loss": 1.5857, "step": 2235 }, { "epoch": 0.6787069358020944, "grad_norm": 0.41020557284355164, "learning_rate": 8.873139617292701e-05, "loss": 1.7888, "step": 2236 }, { "epoch": 0.6790104719987858, "grad_norm": 0.3700268268585205, "learning_rate": 8.872633390705681e-05, "loss": 1.5622, "step": 2237 }, { "epoch": 0.6793140081954773, "grad_norm": 0.4358409345149994, "learning_rate": 8.87212716411866e-05, "loss": 2.0885, "step": 2238 }, { "epoch": 0.6796175443921688, "grad_norm": 0.4212052822113037, "learning_rate": 8.87162093753164e-05, "loss": 2.0268, "step": 2239 }, { "epoch": 0.6799210805888602, "grad_norm": 0.7132793068885803, "learning_rate": 8.871114710944619e-05, "loss": 2.0234, "step": 2240 }, { "epoch": 0.6802246167855517, "grad_norm": 0.38493213057518005, "learning_rate": 8.870608484357599e-05, "loss": 1.9204, "step": 2241 }, { "epoch": 0.6805281529822431, "grad_norm": 0.3852492570877075, "learning_rate": 8.870102257770578e-05, "loss": 1.8373, "step": 2242 }, { "epoch": 0.6808316891789346, "grad_norm": 0.5180450081825256, "learning_rate": 8.869596031183558e-05, "loss": 1.3947, "step": 2243 }, { "epoch": 0.6811352253756261, "grad_norm": 0.46512570977211, "learning_rate": 8.869089804596537e-05, "loss": 2.2241, "step": 2244 }, { "epoch": 0.6814387615723175, "grad_norm": 0.387101411819458, "learning_rate": 8.868583578009518e-05, "loss": 1.7226, "step": 2245 }, { "epoch": 0.681742297769009, "grad_norm": 0.40807807445526123, "learning_rate": 8.868077351422497e-05, "loss": 1.709, "step": 2246 }, { "epoch": 0.6820458339657004, "grad_norm": 0.4039689600467682, "learning_rate": 8.867571124835477e-05, "loss": 1.6902, "step": 2247 }, { "epoch": 0.6823493701623918, "grad_norm": 0.7101170420646667, "learning_rate": 8.867064898248456e-05, "loss": 1.6965, "step": 2248 }, { "epoch": 0.6826529063590833, "grad_norm": 0.42346715927124023, "learning_rate": 8.866558671661436e-05, "loss": 1.4751, "step": 2249 }, { "epoch": 0.6829564425557748, "grad_norm": 0.42237260937690735, "learning_rate": 8.866052445074415e-05, "loss": 1.8187, "step": 2250 }, { "epoch": 0.6832599787524662, "grad_norm": 0.4752514958381653, "learning_rate": 8.865546218487395e-05, "loss": 1.7068, "step": 2251 }, { "epoch": 0.6835635149491577, "grad_norm": 0.4356227219104767, "learning_rate": 8.865039991900374e-05, "loss": 2.0188, "step": 2252 }, { "epoch": 0.6838670511458491, "grad_norm": 0.5964135527610779, "learning_rate": 8.864533765313354e-05, "loss": 1.5663, "step": 2253 }, { "epoch": 0.6841705873425405, "grad_norm": 0.40307527780532837, "learning_rate": 8.864027538726333e-05, "loss": 1.8429, "step": 2254 }, { "epoch": 0.6844741235392321, "grad_norm": 0.4318184554576874, "learning_rate": 8.863521312139314e-05, "loss": 1.7906, "step": 2255 }, { "epoch": 0.6847776597359235, "grad_norm": 0.4366863965988159, "learning_rate": 8.863015085552294e-05, "loss": 1.9137, "step": 2256 }, { "epoch": 0.685081195932615, "grad_norm": 0.4154497981071472, "learning_rate": 8.862508858965274e-05, "loss": 2.0196, "step": 2257 }, { "epoch": 0.6853847321293064, "grad_norm": 0.39866191148757935, "learning_rate": 8.862002632378254e-05, "loss": 1.9397, "step": 2258 }, { "epoch": 0.6856882683259978, "grad_norm": 0.42318910360336304, "learning_rate": 8.861496405791233e-05, "loss": 2.1399, "step": 2259 }, { "epoch": 0.6859918045226894, "grad_norm": 0.4558073878288269, "learning_rate": 8.860990179204213e-05, "loss": 1.9519, "step": 2260 }, { "epoch": 0.6862953407193808, "grad_norm": 0.45745977759361267, "learning_rate": 8.860483952617192e-05, "loss": 1.3459, "step": 2261 }, { "epoch": 0.6865988769160722, "grad_norm": 0.4418570399284363, "learning_rate": 8.859977726030172e-05, "loss": 1.9207, "step": 2262 }, { "epoch": 0.6869024131127637, "grad_norm": 0.3995778560638428, "learning_rate": 8.859471499443151e-05, "loss": 1.8226, "step": 2263 }, { "epoch": 0.6872059493094551, "grad_norm": 0.5238348841667175, "learning_rate": 8.858965272856131e-05, "loss": 1.7849, "step": 2264 }, { "epoch": 0.6875094855061467, "grad_norm": 0.3357613682746887, "learning_rate": 8.85845904626911e-05, "loss": 1.4962, "step": 2265 }, { "epoch": 0.6878130217028381, "grad_norm": 0.45454543828964233, "learning_rate": 8.857952819682091e-05, "loss": 1.8111, "step": 2266 }, { "epoch": 0.6881165578995295, "grad_norm": 0.4192381501197815, "learning_rate": 8.85744659309507e-05, "loss": 1.7505, "step": 2267 }, { "epoch": 0.688420094096221, "grad_norm": 0.4213124215602875, "learning_rate": 8.85694036650805e-05, "loss": 1.9307, "step": 2268 }, { "epoch": 0.6887236302929124, "grad_norm": 0.42022505402565, "learning_rate": 8.85643413992103e-05, "loss": 1.9889, "step": 2269 }, { "epoch": 0.6890271664896039, "grad_norm": 0.42116105556488037, "learning_rate": 8.855927913334009e-05, "loss": 1.7128, "step": 2270 }, { "epoch": 0.6893307026862954, "grad_norm": 1.0270413160324097, "learning_rate": 8.855421686746988e-05, "loss": 1.3238, "step": 2271 }, { "epoch": 0.6896342388829868, "grad_norm": 0.42168179154396057, "learning_rate": 8.854915460159968e-05, "loss": 1.6624, "step": 2272 }, { "epoch": 0.6899377750796782, "grad_norm": 0.3498344421386719, "learning_rate": 8.854409233572947e-05, "loss": 1.7382, "step": 2273 }, { "epoch": 0.6902413112763697, "grad_norm": 0.42905229330062866, "learning_rate": 8.853903006985927e-05, "loss": 2.0058, "step": 2274 }, { "epoch": 0.6905448474730611, "grad_norm": 0.41980302333831787, "learning_rate": 8.853396780398908e-05, "loss": 1.4661, "step": 2275 }, { "epoch": 0.6908483836697527, "grad_norm": 0.5022958517074585, "learning_rate": 8.852890553811887e-05, "loss": 1.6155, "step": 2276 }, { "epoch": 0.6911519198664441, "grad_norm": 0.4186939597129822, "learning_rate": 8.852384327224867e-05, "loss": 1.7008, "step": 2277 }, { "epoch": 0.6914554560631355, "grad_norm": 0.39082199335098267, "learning_rate": 8.851878100637846e-05, "loss": 2.1537, "step": 2278 }, { "epoch": 0.691758992259827, "grad_norm": 0.42378294467926025, "learning_rate": 8.851371874050826e-05, "loss": 2.0288, "step": 2279 }, { "epoch": 0.6920625284565184, "grad_norm": 0.36108916997909546, "learning_rate": 8.850865647463805e-05, "loss": 2.0387, "step": 2280 }, { "epoch": 0.6923660646532099, "grad_norm": 0.4613724648952484, "learning_rate": 8.850359420876785e-05, "loss": 1.7304, "step": 2281 }, { "epoch": 0.6926696008499014, "grad_norm": 0.4140026867389679, "learning_rate": 8.849853194289764e-05, "loss": 1.9746, "step": 2282 }, { "epoch": 0.6929731370465928, "grad_norm": 0.43233832716941833, "learning_rate": 8.849346967702744e-05, "loss": 1.9922, "step": 2283 }, { "epoch": 0.6932766732432842, "grad_norm": 0.8021528124809265, "learning_rate": 8.848840741115724e-05, "loss": 2.1604, "step": 2284 }, { "epoch": 0.6935802094399757, "grad_norm": 0.4009002447128296, "learning_rate": 8.848334514528704e-05, "loss": 1.4224, "step": 2285 }, { "epoch": 0.6938837456366672, "grad_norm": 0.3914124369621277, "learning_rate": 8.847828287941683e-05, "loss": 1.8222, "step": 2286 }, { "epoch": 0.6941872818333586, "grad_norm": 0.41309481859207153, "learning_rate": 8.847322061354663e-05, "loss": 1.9296, "step": 2287 }, { "epoch": 0.6944908180300501, "grad_norm": 0.5561639666557312, "learning_rate": 8.846815834767642e-05, "loss": 1.3577, "step": 2288 }, { "epoch": 0.6947943542267415, "grad_norm": 0.41699445247650146, "learning_rate": 8.846309608180622e-05, "loss": 1.8751, "step": 2289 }, { "epoch": 0.695097890423433, "grad_norm": 0.3643866181373596, "learning_rate": 8.845803381593601e-05, "loss": 1.5099, "step": 2290 }, { "epoch": 0.6954014266201245, "grad_norm": 0.44212576746940613, "learning_rate": 8.845297155006581e-05, "loss": 1.8293, "step": 2291 }, { "epoch": 0.6957049628168159, "grad_norm": 0.36881545186042786, "learning_rate": 8.84479092841956e-05, "loss": 1.6359, "step": 2292 }, { "epoch": 0.6960084990135074, "grad_norm": 0.3785519003868103, "learning_rate": 8.84428470183254e-05, "loss": 1.63, "step": 2293 }, { "epoch": 0.6963120352101988, "grad_norm": 0.6767301559448242, "learning_rate": 8.84377847524552e-05, "loss": 1.0786, "step": 2294 }, { "epoch": 0.6966155714068902, "grad_norm": 0.38619041442871094, "learning_rate": 8.8432722486585e-05, "loss": 1.4952, "step": 2295 }, { "epoch": 0.6969191076035818, "grad_norm": 0.6698863506317139, "learning_rate": 8.84276602207148e-05, "loss": 2.0425, "step": 2296 }, { "epoch": 0.6972226438002732, "grad_norm": 0.4446139931678772, "learning_rate": 8.842259795484459e-05, "loss": 1.7511, "step": 2297 }, { "epoch": 0.6975261799969646, "grad_norm": 0.6287797093391418, "learning_rate": 8.841753568897438e-05, "loss": 1.8198, "step": 2298 }, { "epoch": 0.6978297161936561, "grad_norm": 0.3704979717731476, "learning_rate": 8.841247342310418e-05, "loss": 1.44, "step": 2299 }, { "epoch": 0.6981332523903475, "grad_norm": 0.4163188636302948, "learning_rate": 8.840741115723397e-05, "loss": 1.623, "step": 2300 }, { "epoch": 0.6984367885870391, "grad_norm": 0.3959861993789673, "learning_rate": 8.840234889136377e-05, "loss": 1.9259, "step": 2301 }, { "epoch": 0.6987403247837305, "grad_norm": 0.5066584348678589, "learning_rate": 8.839728662549358e-05, "loss": 1.8163, "step": 2302 }, { "epoch": 0.6990438609804219, "grad_norm": 0.4553223252296448, "learning_rate": 8.839222435962337e-05, "loss": 1.4527, "step": 2303 }, { "epoch": 0.6993473971771134, "grad_norm": 0.47616320848464966, "learning_rate": 8.838716209375317e-05, "loss": 1.8694, "step": 2304 }, { "epoch": 0.6996509333738048, "grad_norm": 0.4735199511051178, "learning_rate": 8.838209982788297e-05, "loss": 1.7023, "step": 2305 }, { "epoch": 0.6999544695704962, "grad_norm": 0.427415668964386, "learning_rate": 8.837703756201277e-05, "loss": 2.0255, "step": 2306 }, { "epoch": 0.7002580057671878, "grad_norm": 0.35021111369132996, "learning_rate": 8.837197529614256e-05, "loss": 2.1431, "step": 2307 }, { "epoch": 0.7005615419638792, "grad_norm": 0.35905367136001587, "learning_rate": 8.836691303027236e-05, "loss": 1.2157, "step": 2308 }, { "epoch": 0.7008650781605706, "grad_norm": 0.39521825313568115, "learning_rate": 8.836185076440215e-05, "loss": 1.8051, "step": 2309 }, { "epoch": 0.7011686143572621, "grad_norm": 0.42580053210258484, "learning_rate": 8.835678849853195e-05, "loss": 1.9845, "step": 2310 }, { "epoch": 0.7014721505539535, "grad_norm": 0.6940016150474548, "learning_rate": 8.835172623266174e-05, "loss": 2.0824, "step": 2311 }, { "epoch": 0.7017756867506451, "grad_norm": 0.44518351554870605, "learning_rate": 8.834666396679154e-05, "loss": 2.0919, "step": 2312 }, { "epoch": 0.7020792229473365, "grad_norm": 0.4215528070926666, "learning_rate": 8.834160170092133e-05, "loss": 1.9665, "step": 2313 }, { "epoch": 0.7023827591440279, "grad_norm": 0.4305053651332855, "learning_rate": 8.833653943505114e-05, "loss": 2.0338, "step": 2314 }, { "epoch": 0.7026862953407194, "grad_norm": 0.7952874302864075, "learning_rate": 8.833147716918094e-05, "loss": 1.6609, "step": 2315 }, { "epoch": 0.7029898315374108, "grad_norm": 0.42054691910743713, "learning_rate": 8.832641490331073e-05, "loss": 1.3244, "step": 2316 }, { "epoch": 0.7032933677341023, "grad_norm": 0.3898642659187317, "learning_rate": 8.832135263744053e-05, "loss": 1.5618, "step": 2317 }, { "epoch": 0.7035969039307938, "grad_norm": 0.4959927201271057, "learning_rate": 8.831629037157032e-05, "loss": 1.999, "step": 2318 }, { "epoch": 0.7039004401274852, "grad_norm": 0.4488220810890198, "learning_rate": 8.831122810570012e-05, "loss": 1.946, "step": 2319 }, { "epoch": 0.7042039763241766, "grad_norm": 0.4661828577518463, "learning_rate": 8.830616583982991e-05, "loss": 1.9832, "step": 2320 }, { "epoch": 0.7045075125208681, "grad_norm": 0.3740446865558624, "learning_rate": 8.83011035739597e-05, "loss": 1.8934, "step": 2321 }, { "epoch": 0.7048110487175596, "grad_norm": 0.4690150022506714, "learning_rate": 8.82960413080895e-05, "loss": 1.9083, "step": 2322 }, { "epoch": 0.705114584914251, "grad_norm": 0.3680610954761505, "learning_rate": 8.829097904221931e-05, "loss": 1.9494, "step": 2323 }, { "epoch": 0.7054181211109425, "grad_norm": 0.40403270721435547, "learning_rate": 8.82859167763491e-05, "loss": 2.1369, "step": 2324 }, { "epoch": 0.7057216573076339, "grad_norm": 0.4465244710445404, "learning_rate": 8.82808545104789e-05, "loss": 1.8854, "step": 2325 }, { "epoch": 0.7060251935043254, "grad_norm": 0.45881539583206177, "learning_rate": 8.827579224460869e-05, "loss": 1.6319, "step": 2326 }, { "epoch": 0.7063287297010169, "grad_norm": 0.43863871693611145, "learning_rate": 8.827072997873849e-05, "loss": 1.8284, "step": 2327 }, { "epoch": 0.7066322658977083, "grad_norm": 0.3942803740501404, "learning_rate": 8.826566771286828e-05, "loss": 1.8663, "step": 2328 }, { "epoch": 0.7069358020943998, "grad_norm": 0.42162778973579407, "learning_rate": 8.826060544699808e-05, "loss": 1.8563, "step": 2329 }, { "epoch": 0.7072393382910912, "grad_norm": 0.4088474214076996, "learning_rate": 8.825554318112787e-05, "loss": 2.2026, "step": 2330 }, { "epoch": 0.7075428744877826, "grad_norm": 0.43421268463134766, "learning_rate": 8.825048091525767e-05, "loss": 2.2395, "step": 2331 }, { "epoch": 0.7078464106844741, "grad_norm": 0.4430371820926666, "learning_rate": 8.824541864938746e-05, "loss": 1.7794, "step": 2332 }, { "epoch": 0.7081499468811656, "grad_norm": 0.41605162620544434, "learning_rate": 8.824035638351727e-05, "loss": 2.1565, "step": 2333 }, { "epoch": 0.708453483077857, "grad_norm": 0.3622266352176666, "learning_rate": 8.823529411764706e-05, "loss": 1.4366, "step": 2334 }, { "epoch": 0.7087570192745485, "grad_norm": 0.4030252695083618, "learning_rate": 8.823023185177686e-05, "loss": 1.7127, "step": 2335 }, { "epoch": 0.7090605554712399, "grad_norm": 0.4723038375377655, "learning_rate": 8.822516958590665e-05, "loss": 1.6736, "step": 2336 }, { "epoch": 0.7093640916679314, "grad_norm": 0.39395782351493835, "learning_rate": 8.822010732003645e-05, "loss": 1.8631, "step": 2337 }, { "epoch": 0.7096676278646229, "grad_norm": 0.4566243290901184, "learning_rate": 8.821504505416624e-05, "loss": 1.8543, "step": 2338 }, { "epoch": 0.7099711640613143, "grad_norm": 0.3434160351753235, "learning_rate": 8.820998278829604e-05, "loss": 1.9953, "step": 2339 }, { "epoch": 0.7102747002580058, "grad_norm": 0.5802703499794006, "learning_rate": 8.820492052242583e-05, "loss": 1.7014, "step": 2340 }, { "epoch": 0.7105782364546972, "grad_norm": 1.1318562030792236, "learning_rate": 8.819985825655563e-05, "loss": 1.9866, "step": 2341 }, { "epoch": 0.7108817726513886, "grad_norm": 0.8500426411628723, "learning_rate": 8.819479599068544e-05, "loss": 1.2291, "step": 2342 }, { "epoch": 0.7111853088480802, "grad_norm": 0.4189402163028717, "learning_rate": 8.818973372481523e-05, "loss": 1.9396, "step": 2343 }, { "epoch": 0.7114888450447716, "grad_norm": 0.7509348392486572, "learning_rate": 8.818467145894503e-05, "loss": 2.168, "step": 2344 }, { "epoch": 0.711792381241463, "grad_norm": 0.42071589827537537, "learning_rate": 8.817960919307482e-05, "loss": 1.9493, "step": 2345 }, { "epoch": 0.7120959174381545, "grad_norm": 0.7269922494888306, "learning_rate": 8.817454692720463e-05, "loss": 1.6344, "step": 2346 }, { "epoch": 0.7123994536348459, "grad_norm": 0.5446398854255676, "learning_rate": 8.816948466133442e-05, "loss": 1.9525, "step": 2347 }, { "epoch": 0.7127029898315375, "grad_norm": 0.43752509355545044, "learning_rate": 8.816442239546422e-05, "loss": 1.9692, "step": 2348 }, { "epoch": 0.7130065260282289, "grad_norm": 0.4986307919025421, "learning_rate": 8.815936012959401e-05, "loss": 1.805, "step": 2349 }, { "epoch": 0.7133100622249203, "grad_norm": 0.47302186489105225, "learning_rate": 8.815429786372381e-05, "loss": 2.2322, "step": 2350 }, { "epoch": 0.7136135984216118, "grad_norm": 0.4359181523323059, "learning_rate": 8.81492355978536e-05, "loss": 1.7579, "step": 2351 }, { "epoch": 0.7139171346183032, "grad_norm": 0.9149986505508423, "learning_rate": 8.81441733319834e-05, "loss": 1.4888, "step": 2352 }, { "epoch": 0.7142206708149947, "grad_norm": 0.37777209281921387, "learning_rate": 8.81391110661132e-05, "loss": 2.0646, "step": 2353 }, { "epoch": 0.7145242070116862, "grad_norm": 0.527703046798706, "learning_rate": 8.8134048800243e-05, "loss": 1.9213, "step": 2354 }, { "epoch": 0.7148277432083776, "grad_norm": 0.41505396366119385, "learning_rate": 8.81289865343728e-05, "loss": 1.7469, "step": 2355 }, { "epoch": 0.715131279405069, "grad_norm": 0.44212964177131653, "learning_rate": 8.812392426850259e-05, "loss": 1.2739, "step": 2356 }, { "epoch": 0.7154348156017605, "grad_norm": 0.3863414525985718, "learning_rate": 8.811886200263239e-05, "loss": 2.2481, "step": 2357 }, { "epoch": 0.7157383517984519, "grad_norm": 0.45853668451309204, "learning_rate": 8.811379973676218e-05, "loss": 2.2813, "step": 2358 }, { "epoch": 0.7160418879951435, "grad_norm": 11.417152404785156, "learning_rate": 8.810873747089198e-05, "loss": 2.0079, "step": 2359 }, { "epoch": 0.7163454241918349, "grad_norm": 0.49986690282821655, "learning_rate": 8.810367520502177e-05, "loss": 1.5595, "step": 2360 }, { "epoch": 0.7166489603885263, "grad_norm": 0.4734189510345459, "learning_rate": 8.809861293915156e-05, "loss": 1.8213, "step": 2361 }, { "epoch": 0.7169524965852178, "grad_norm": 0.43908554315567017, "learning_rate": 8.809355067328137e-05, "loss": 1.889, "step": 2362 }, { "epoch": 0.7172560327819092, "grad_norm": 0.48986315727233887, "learning_rate": 8.808848840741117e-05, "loss": 1.9236, "step": 2363 }, { "epoch": 0.7175595689786007, "grad_norm": 0.42691266536712646, "learning_rate": 8.808342614154096e-05, "loss": 2.0663, "step": 2364 }, { "epoch": 0.7178631051752922, "grad_norm": 0.38523420691490173, "learning_rate": 8.807836387567076e-05, "loss": 1.9165, "step": 2365 }, { "epoch": 0.7181666413719836, "grad_norm": 0.29487428069114685, "learning_rate": 8.807330160980055e-05, "loss": 1.4763, "step": 2366 }, { "epoch": 0.718470177568675, "grad_norm": 0.9072676301002502, "learning_rate": 8.806823934393035e-05, "loss": 2.0882, "step": 2367 }, { "epoch": 0.7187737137653665, "grad_norm": 0.37307825684547424, "learning_rate": 8.806317707806014e-05, "loss": 1.4694, "step": 2368 }, { "epoch": 0.719077249962058, "grad_norm": 0.41390106081962585, "learning_rate": 8.805811481218994e-05, "loss": 1.6849, "step": 2369 }, { "epoch": 0.7193807861587495, "grad_norm": 0.4989478886127472, "learning_rate": 8.805305254631973e-05, "loss": 2.146, "step": 2370 }, { "epoch": 0.7196843223554409, "grad_norm": 0.4021719694137573, "learning_rate": 8.804799028044953e-05, "loss": 1.942, "step": 2371 }, { "epoch": 0.7199878585521323, "grad_norm": 0.4169461727142334, "learning_rate": 8.804292801457933e-05, "loss": 2.0278, "step": 2372 }, { "epoch": 0.7202913947488238, "grad_norm": 0.39091089367866516, "learning_rate": 8.803786574870913e-05, "loss": 1.9644, "step": 2373 }, { "epoch": 0.7205949309455153, "grad_norm": 0.45431414246559143, "learning_rate": 8.803280348283892e-05, "loss": 1.5611, "step": 2374 }, { "epoch": 0.7208984671422067, "grad_norm": 0.3896774351596832, "learning_rate": 8.802774121696872e-05, "loss": 1.7838, "step": 2375 }, { "epoch": 0.7212020033388982, "grad_norm": 0.4076644778251648, "learning_rate": 8.802267895109851e-05, "loss": 1.9717, "step": 2376 }, { "epoch": 0.7215055395355896, "grad_norm": 0.4065254032611847, "learning_rate": 8.801761668522831e-05, "loss": 1.6598, "step": 2377 }, { "epoch": 0.721809075732281, "grad_norm": 0.6506657004356384, "learning_rate": 8.80125544193581e-05, "loss": 1.9463, "step": 2378 }, { "epoch": 0.7221126119289726, "grad_norm": 0.46132421493530273, "learning_rate": 8.80074921534879e-05, "loss": 2.0071, "step": 2379 }, { "epoch": 0.722416148125664, "grad_norm": 0.3932840824127197, "learning_rate": 8.800242988761769e-05, "loss": 1.9956, "step": 2380 }, { "epoch": 0.7227196843223554, "grad_norm": 0.4919872581958771, "learning_rate": 8.79973676217475e-05, "loss": 1.658, "step": 2381 }, { "epoch": 0.7230232205190469, "grad_norm": 0.4147129952907562, "learning_rate": 8.79923053558773e-05, "loss": 1.9331, "step": 2382 }, { "epoch": 0.7233267567157383, "grad_norm": 0.4280264973640442, "learning_rate": 8.798724309000709e-05, "loss": 1.7016, "step": 2383 }, { "epoch": 0.7236302929124299, "grad_norm": 0.4554193913936615, "learning_rate": 8.798218082413689e-05, "loss": 2.08, "step": 2384 }, { "epoch": 0.7239338291091213, "grad_norm": 0.4477219581604004, "learning_rate": 8.797711855826668e-05, "loss": 1.6204, "step": 2385 }, { "epoch": 0.7242373653058127, "grad_norm": 0.32487139105796814, "learning_rate": 8.797205629239648e-05, "loss": 1.7271, "step": 2386 }, { "epoch": 0.7245409015025042, "grad_norm": 2.3734400272369385, "learning_rate": 8.796699402652627e-05, "loss": 2.3315, "step": 2387 }, { "epoch": 0.7248444376991956, "grad_norm": 0.3860095739364624, "learning_rate": 8.796193176065606e-05, "loss": 1.4387, "step": 2388 }, { "epoch": 0.725147973895887, "grad_norm": 0.3950817286968231, "learning_rate": 8.795686949478586e-05, "loss": 2.1907, "step": 2389 }, { "epoch": 0.7254515100925786, "grad_norm": 0.37350189685821533, "learning_rate": 8.795180722891567e-05, "loss": 1.5662, "step": 2390 }, { "epoch": 0.72575504628927, "grad_norm": 0.46780696511268616, "learning_rate": 8.794674496304546e-05, "loss": 1.4264, "step": 2391 }, { "epoch": 0.7260585824859614, "grad_norm": 0.3911786377429962, "learning_rate": 8.794168269717527e-05, "loss": 1.8752, "step": 2392 }, { "epoch": 0.7263621186826529, "grad_norm": 0.5619503855705261, "learning_rate": 8.793662043130507e-05, "loss": 2.0974, "step": 2393 }, { "epoch": 0.7266656548793443, "grad_norm": 0.44586917757987976, "learning_rate": 8.793155816543486e-05, "loss": 2.0871, "step": 2394 }, { "epoch": 0.7269691910760359, "grad_norm": 0.3445717990398407, "learning_rate": 8.792649589956466e-05, "loss": 1.3626, "step": 2395 }, { "epoch": 0.7272727272727273, "grad_norm": 0.410279244184494, "learning_rate": 8.792143363369445e-05, "loss": 1.5149, "step": 2396 }, { "epoch": 0.7275762634694187, "grad_norm": 0.3766261339187622, "learning_rate": 8.791637136782425e-05, "loss": 1.8248, "step": 2397 }, { "epoch": 0.7278797996661102, "grad_norm": 0.3978814482688904, "learning_rate": 8.791130910195404e-05, "loss": 1.9437, "step": 2398 }, { "epoch": 0.7281833358628016, "grad_norm": 0.36028674244880676, "learning_rate": 8.790624683608383e-05, "loss": 1.8169, "step": 2399 }, { "epoch": 0.7284868720594931, "grad_norm": 0.38613566756248474, "learning_rate": 8.790118457021363e-05, "loss": 1.3976, "step": 2400 }, { "epoch": 0.7287904082561846, "grad_norm": 0.4028817117214203, "learning_rate": 8.789612230434344e-05, "loss": 1.5574, "step": 2401 }, { "epoch": 0.729093944452876, "grad_norm": 0.4536430239677429, "learning_rate": 8.789106003847323e-05, "loss": 1.7994, "step": 2402 }, { "epoch": 0.7293974806495674, "grad_norm": 0.421176016330719, "learning_rate": 8.788599777260303e-05, "loss": 2.1671, "step": 2403 }, { "epoch": 0.7297010168462589, "grad_norm": 0.42854103446006775, "learning_rate": 8.788093550673282e-05, "loss": 1.5606, "step": 2404 }, { "epoch": 0.7300045530429504, "grad_norm": 0.38108232617378235, "learning_rate": 8.787587324086262e-05, "loss": 1.8415, "step": 2405 }, { "epoch": 0.7303080892396419, "grad_norm": 0.454464852809906, "learning_rate": 8.787081097499241e-05, "loss": 1.5911, "step": 2406 }, { "epoch": 0.7306116254363333, "grad_norm": 0.4082881808280945, "learning_rate": 8.78657487091222e-05, "loss": 2.022, "step": 2407 }, { "epoch": 0.7309151616330247, "grad_norm": 0.4951760470867157, "learning_rate": 8.7860686443252e-05, "loss": 2.12, "step": 2408 }, { "epoch": 0.7312186978297162, "grad_norm": 0.40377724170684814, "learning_rate": 8.78556241773818e-05, "loss": 2.0016, "step": 2409 }, { "epoch": 0.7315222340264077, "grad_norm": 0.403481662273407, "learning_rate": 8.785056191151159e-05, "loss": 1.9914, "step": 2410 }, { "epoch": 0.7318257702230991, "grad_norm": 0.4195014536380768, "learning_rate": 8.78454996456414e-05, "loss": 1.8749, "step": 2411 }, { "epoch": 0.7321293064197906, "grad_norm": 0.40575090050697327, "learning_rate": 8.78404373797712e-05, "loss": 1.8565, "step": 2412 }, { "epoch": 0.732432842616482, "grad_norm": 0.4025145173072815, "learning_rate": 8.783537511390099e-05, "loss": 1.7397, "step": 2413 }, { "epoch": 0.7327363788131734, "grad_norm": 0.35525646805763245, "learning_rate": 8.783031284803078e-05, "loss": 2.086, "step": 2414 }, { "epoch": 0.7330399150098649, "grad_norm": 0.4063604772090912, "learning_rate": 8.782525058216058e-05, "loss": 1.8643, "step": 2415 }, { "epoch": 0.7333434512065564, "grad_norm": 0.3689418137073517, "learning_rate": 8.782018831629037e-05, "loss": 1.9982, "step": 2416 }, { "epoch": 0.7336469874032479, "grad_norm": 0.4066859185695648, "learning_rate": 8.781512605042017e-05, "loss": 1.4861, "step": 2417 }, { "epoch": 0.7339505235999393, "grad_norm": 0.4118275046348572, "learning_rate": 8.781006378454996e-05, "loss": 1.923, "step": 2418 }, { "epoch": 0.7342540597966307, "grad_norm": 0.4238114058971405, "learning_rate": 8.780500151867976e-05, "loss": 1.7656, "step": 2419 }, { "epoch": 0.7345575959933222, "grad_norm": 0.4456924498081207, "learning_rate": 8.779993925280957e-05, "loss": 1.3612, "step": 2420 }, { "epoch": 0.7348611321900137, "grad_norm": 0.3890600800514221, "learning_rate": 8.779487698693936e-05, "loss": 1.7588, "step": 2421 }, { "epoch": 0.7351646683867051, "grad_norm": 0.35149431228637695, "learning_rate": 8.778981472106916e-05, "loss": 1.9639, "step": 2422 }, { "epoch": 0.7354682045833966, "grad_norm": 0.43158209323883057, "learning_rate": 8.778475245519895e-05, "loss": 2.008, "step": 2423 }, { "epoch": 0.735771740780088, "grad_norm": 0.4337812066078186, "learning_rate": 8.777969018932875e-05, "loss": 1.9703, "step": 2424 }, { "epoch": 0.7360752769767794, "grad_norm": 0.36363622546195984, "learning_rate": 8.777462792345854e-05, "loss": 1.712, "step": 2425 }, { "epoch": 0.736378813173471, "grad_norm": 0.3731040358543396, "learning_rate": 8.776956565758833e-05, "loss": 2.11, "step": 2426 }, { "epoch": 0.7366823493701624, "grad_norm": 0.37047722935676575, "learning_rate": 8.776450339171813e-05, "loss": 2.1481, "step": 2427 }, { "epoch": 0.7369858855668538, "grad_norm": 1.4641270637512207, "learning_rate": 8.775944112584792e-05, "loss": 1.9506, "step": 2428 }, { "epoch": 0.7372894217635453, "grad_norm": 0.3781220018863678, "learning_rate": 8.775437885997773e-05, "loss": 1.9651, "step": 2429 }, { "epoch": 0.7375929579602367, "grad_norm": 0.353572815656662, "learning_rate": 8.774931659410753e-05, "loss": 1.8171, "step": 2430 }, { "epoch": 0.7378964941569283, "grad_norm": 0.4101322889328003, "learning_rate": 8.774425432823732e-05, "loss": 1.1176, "step": 2431 }, { "epoch": 0.7382000303536197, "grad_norm": 0.3967879116535187, "learning_rate": 8.773919206236712e-05, "loss": 1.9563, "step": 2432 }, { "epoch": 0.7385035665503111, "grad_norm": 0.41269639134407043, "learning_rate": 8.773412979649691e-05, "loss": 1.8312, "step": 2433 }, { "epoch": 0.7388071027470026, "grad_norm": 0.4125451445579529, "learning_rate": 8.77290675306267e-05, "loss": 1.9907, "step": 2434 }, { "epoch": 0.739110638943694, "grad_norm": 0.447773814201355, "learning_rate": 8.772400526475652e-05, "loss": 2.0819, "step": 2435 }, { "epoch": 0.7394141751403855, "grad_norm": 0.3990137279033661, "learning_rate": 8.771894299888631e-05, "loss": 1.5107, "step": 2436 }, { "epoch": 0.739717711337077, "grad_norm": 0.56345534324646, "learning_rate": 8.77138807330161e-05, "loss": 1.6428, "step": 2437 }, { "epoch": 0.7400212475337684, "grad_norm": 0.42566823959350586, "learning_rate": 8.77088184671459e-05, "loss": 1.5812, "step": 2438 }, { "epoch": 0.7403247837304598, "grad_norm": 0.4182227551937103, "learning_rate": 8.77037562012757e-05, "loss": 1.8838, "step": 2439 }, { "epoch": 0.7406283199271513, "grad_norm": 0.3614279627799988, "learning_rate": 8.76986939354055e-05, "loss": 1.8822, "step": 2440 }, { "epoch": 0.7409318561238427, "grad_norm": 0.4376552104949951, "learning_rate": 8.76936316695353e-05, "loss": 2.0327, "step": 2441 }, { "epoch": 0.7412353923205343, "grad_norm": 0.3294520676136017, "learning_rate": 8.768856940366509e-05, "loss": 1.7045, "step": 2442 }, { "epoch": 0.7415389285172257, "grad_norm": 0.39772239327430725, "learning_rate": 8.768350713779489e-05, "loss": 1.7068, "step": 2443 }, { "epoch": 0.7418424647139171, "grad_norm": 0.4332139194011688, "learning_rate": 8.767844487192468e-05, "loss": 1.5432, "step": 2444 }, { "epoch": 0.7421460009106086, "grad_norm": 0.40865209698677063, "learning_rate": 8.767338260605448e-05, "loss": 2.0378, "step": 2445 }, { "epoch": 0.7424495371073, "grad_norm": 0.3608027696609497, "learning_rate": 8.766832034018427e-05, "loss": 1.4315, "step": 2446 }, { "epoch": 0.7427530733039915, "grad_norm": 0.38700732588768005, "learning_rate": 8.766325807431407e-05, "loss": 1.1194, "step": 2447 }, { "epoch": 0.743056609500683, "grad_norm": 0.4182412624359131, "learning_rate": 8.765819580844386e-05, "loss": 2.0976, "step": 2448 }, { "epoch": 0.7433601456973744, "grad_norm": 0.40817487239837646, "learning_rate": 8.765313354257366e-05, "loss": 1.9548, "step": 2449 }, { "epoch": 0.7436636818940658, "grad_norm": 0.4414690434932709, "learning_rate": 8.764807127670346e-05, "loss": 1.8551, "step": 2450 }, { "epoch": 0.7439672180907573, "grad_norm": 0.393435001373291, "learning_rate": 8.764300901083326e-05, "loss": 1.7419, "step": 2451 }, { "epoch": 0.7442707542874488, "grad_norm": 0.36712646484375, "learning_rate": 8.763794674496305e-05, "loss": 1.9663, "step": 2452 }, { "epoch": 0.7445742904841403, "grad_norm": 0.47254228591918945, "learning_rate": 8.763288447909285e-05, "loss": 1.6821, "step": 2453 }, { "epoch": 0.7448778266808317, "grad_norm": 0.6918731927871704, "learning_rate": 8.762782221322264e-05, "loss": 1.4488, "step": 2454 }, { "epoch": 0.7451813628775231, "grad_norm": 0.4374895393848419, "learning_rate": 8.762275994735244e-05, "loss": 1.8773, "step": 2455 }, { "epoch": 0.7454848990742146, "grad_norm": 0.3807445466518402, "learning_rate": 8.761769768148223e-05, "loss": 1.9019, "step": 2456 }, { "epoch": 0.7457884352709061, "grad_norm": 0.804283857345581, "learning_rate": 8.761263541561203e-05, "loss": 1.6503, "step": 2457 }, { "epoch": 0.7460919714675975, "grad_norm": 0.4001246690750122, "learning_rate": 8.760757314974182e-05, "loss": 1.8858, "step": 2458 }, { "epoch": 0.746395507664289, "grad_norm": 0.4548395574092865, "learning_rate": 8.760251088387163e-05, "loss": 1.4205, "step": 2459 }, { "epoch": 0.7466990438609804, "grad_norm": 0.4249577522277832, "learning_rate": 8.759744861800143e-05, "loss": 2.0217, "step": 2460 }, { "epoch": 0.7470025800576718, "grad_norm": 0.356995165348053, "learning_rate": 8.759238635213122e-05, "loss": 2.1138, "step": 2461 }, { "epoch": 0.7473061162543634, "grad_norm": 0.39245614409446716, "learning_rate": 8.758732408626102e-05, "loss": 1.9274, "step": 2462 }, { "epoch": 0.7476096524510548, "grad_norm": 0.5045961737632751, "learning_rate": 8.758226182039081e-05, "loss": 1.5911, "step": 2463 }, { "epoch": 0.7479131886477463, "grad_norm": 0.4416704773902893, "learning_rate": 8.75771995545206e-05, "loss": 1.7762, "step": 2464 }, { "epoch": 0.7482167248444377, "grad_norm": 0.6794231534004211, "learning_rate": 8.75721372886504e-05, "loss": 2.1296, "step": 2465 }, { "epoch": 0.7485202610411291, "grad_norm": 0.4514855444431305, "learning_rate": 8.75670750227802e-05, "loss": 1.9012, "step": 2466 }, { "epoch": 0.7488237972378207, "grad_norm": 0.3483482003211975, "learning_rate": 8.756201275690999e-05, "loss": 1.3373, "step": 2467 }, { "epoch": 0.7491273334345121, "grad_norm": 0.4310845136642456, "learning_rate": 8.75569504910398e-05, "loss": 1.7987, "step": 2468 }, { "epoch": 0.7494308696312035, "grad_norm": 0.39404624700546265, "learning_rate": 8.755188822516959e-05, "loss": 1.764, "step": 2469 }, { "epoch": 0.749734405827895, "grad_norm": 0.39560645818710327, "learning_rate": 8.754682595929939e-05, "loss": 1.7229, "step": 2470 }, { "epoch": 0.7500379420245864, "grad_norm": 0.4125354588031769, "learning_rate": 8.754176369342918e-05, "loss": 1.6562, "step": 2471 }, { "epoch": 0.7503414782212778, "grad_norm": 0.4781520366668701, "learning_rate": 8.753670142755898e-05, "loss": 2.0631, "step": 2472 }, { "epoch": 0.7506450144179694, "grad_norm": 0.4587598443031311, "learning_rate": 8.753163916168877e-05, "loss": 1.7579, "step": 2473 }, { "epoch": 0.7509485506146608, "grad_norm": 0.44834762811660767, "learning_rate": 8.752657689581857e-05, "loss": 1.8958, "step": 2474 }, { "epoch": 0.7512520868113522, "grad_norm": 0.3947749137878418, "learning_rate": 8.752151462994836e-05, "loss": 2.041, "step": 2475 }, { "epoch": 0.7515556230080437, "grad_norm": 0.38898783922195435, "learning_rate": 8.751645236407816e-05, "loss": 1.8326, "step": 2476 }, { "epoch": 0.7518591592047351, "grad_norm": 0.4535033404827118, "learning_rate": 8.751139009820795e-05, "loss": 1.7797, "step": 2477 }, { "epoch": 0.7521626954014267, "grad_norm": 0.34348368644714355, "learning_rate": 8.750632783233776e-05, "loss": 1.8936, "step": 2478 }, { "epoch": 0.7524662315981181, "grad_norm": 0.36187052726745605, "learning_rate": 8.750126556646755e-05, "loss": 1.4416, "step": 2479 }, { "epoch": 0.7527697677948095, "grad_norm": 0.4151141941547394, "learning_rate": 8.749620330059736e-05, "loss": 2.0148, "step": 2480 }, { "epoch": 0.753073303991501, "grad_norm": 0.39229243993759155, "learning_rate": 8.749114103472716e-05, "loss": 1.622, "step": 2481 }, { "epoch": 0.7533768401881924, "grad_norm": 0.44165119528770447, "learning_rate": 8.748607876885695e-05, "loss": 2.2657, "step": 2482 }, { "epoch": 0.7536803763848839, "grad_norm": 0.5234296917915344, "learning_rate": 8.748101650298675e-05, "loss": 1.6954, "step": 2483 }, { "epoch": 0.7539839125815754, "grad_norm": 0.4218185842037201, "learning_rate": 8.747595423711654e-05, "loss": 2.028, "step": 2484 }, { "epoch": 0.7542874487782668, "grad_norm": 0.6535462737083435, "learning_rate": 8.747089197124634e-05, "loss": 1.9042, "step": 2485 }, { "epoch": 0.7545909849749582, "grad_norm": 0.34253132343292236, "learning_rate": 8.746582970537613e-05, "loss": 1.5497, "step": 2486 }, { "epoch": 0.7548945211716497, "grad_norm": 0.4396836757659912, "learning_rate": 8.746076743950593e-05, "loss": 1.912, "step": 2487 }, { "epoch": 0.7551980573683412, "grad_norm": 0.44126465916633606, "learning_rate": 8.745570517363572e-05, "loss": 1.7907, "step": 2488 }, { "epoch": 0.7555015935650327, "grad_norm": 0.42292916774749756, "learning_rate": 8.745064290776553e-05, "loss": 1.9956, "step": 2489 }, { "epoch": 0.7558051297617241, "grad_norm": 0.4493507146835327, "learning_rate": 8.744558064189532e-05, "loss": 1.9819, "step": 2490 }, { "epoch": 0.7561086659584155, "grad_norm": 0.5793929100036621, "learning_rate": 8.744051837602512e-05, "loss": 2.1241, "step": 2491 }, { "epoch": 0.756412202155107, "grad_norm": 0.3927520513534546, "learning_rate": 8.743545611015491e-05, "loss": 1.6615, "step": 2492 }, { "epoch": 0.7567157383517985, "grad_norm": 0.4623410403728485, "learning_rate": 8.743039384428471e-05, "loss": 1.9174, "step": 2493 }, { "epoch": 0.7570192745484899, "grad_norm": 0.4135148823261261, "learning_rate": 8.74253315784145e-05, "loss": 1.9403, "step": 2494 }, { "epoch": 0.7573228107451814, "grad_norm": 0.4701920449733734, "learning_rate": 8.74202693125443e-05, "loss": 1.9479, "step": 2495 }, { "epoch": 0.7576263469418728, "grad_norm": 0.42168691754341125, "learning_rate": 8.741520704667409e-05, "loss": 1.6007, "step": 2496 }, { "epoch": 0.7579298831385642, "grad_norm": 1.0131754875183105, "learning_rate": 8.741014478080389e-05, "loss": 1.8092, "step": 2497 }, { "epoch": 0.7582334193352557, "grad_norm": 0.39457446336746216, "learning_rate": 8.74050825149337e-05, "loss": 2.0884, "step": 2498 }, { "epoch": 0.7585369555319472, "grad_norm": 0.9458907842636108, "learning_rate": 8.740002024906349e-05, "loss": 1.5426, "step": 2499 }, { "epoch": 0.7588404917286387, "grad_norm": 0.6375271677970886, "learning_rate": 8.739495798319329e-05, "loss": 1.9517, "step": 2500 }, { "epoch": 0.7591440279253301, "grad_norm": 0.4703015089035034, "learning_rate": 8.738989571732308e-05, "loss": 1.8815, "step": 2501 }, { "epoch": 0.7594475641220215, "grad_norm": 0.4868961572647095, "learning_rate": 8.738483345145287e-05, "loss": 2.1519, "step": 2502 }, { "epoch": 0.759751100318713, "grad_norm": 0.3808225691318512, "learning_rate": 8.737977118558267e-05, "loss": 1.9137, "step": 2503 }, { "epoch": 0.7600546365154045, "grad_norm": 0.4780614674091339, "learning_rate": 8.737470891971246e-05, "loss": 2.0755, "step": 2504 }, { "epoch": 0.7603581727120959, "grad_norm": 0.8534510135650635, "learning_rate": 8.736964665384226e-05, "loss": 2.2839, "step": 2505 }, { "epoch": 0.7606617089087874, "grad_norm": 0.3620678782463074, "learning_rate": 8.736458438797205e-05, "loss": 1.9497, "step": 2506 }, { "epoch": 0.7609652451054788, "grad_norm": 0.40993764996528625, "learning_rate": 8.735952212210186e-05, "loss": 1.7032, "step": 2507 }, { "epoch": 0.7612687813021702, "grad_norm": 0.43798285722732544, "learning_rate": 8.735445985623166e-05, "loss": 1.8932, "step": 2508 }, { "epoch": 0.7615723174988618, "grad_norm": 0.8272436857223511, "learning_rate": 8.734939759036145e-05, "loss": 1.9612, "step": 2509 }, { "epoch": 0.7618758536955532, "grad_norm": 0.3841719329357147, "learning_rate": 8.734433532449125e-05, "loss": 1.8698, "step": 2510 }, { "epoch": 0.7621793898922447, "grad_norm": 0.459075391292572, "learning_rate": 8.733927305862104e-05, "loss": 1.8878, "step": 2511 }, { "epoch": 0.7624829260889361, "grad_norm": 0.41815492510795593, "learning_rate": 8.733421079275084e-05, "loss": 1.8751, "step": 2512 }, { "epoch": 0.7627864622856275, "grad_norm": 0.41531050205230713, "learning_rate": 8.732914852688063e-05, "loss": 1.8247, "step": 2513 }, { "epoch": 0.7630899984823191, "grad_norm": 0.36942997574806213, "learning_rate": 8.732408626101043e-05, "loss": 2.0158, "step": 2514 }, { "epoch": 0.7633935346790105, "grad_norm": 0.3985773026943207, "learning_rate": 8.731902399514022e-05, "loss": 2.0955, "step": 2515 }, { "epoch": 0.7636970708757019, "grad_norm": 0.45657238364219666, "learning_rate": 8.731396172927002e-05, "loss": 1.2635, "step": 2516 }, { "epoch": 0.7640006070723934, "grad_norm": 0.35013964772224426, "learning_rate": 8.730889946339982e-05, "loss": 2.1223, "step": 2517 }, { "epoch": 0.7643041432690848, "grad_norm": 0.48166340589523315, "learning_rate": 8.730383719752962e-05, "loss": 1.9952, "step": 2518 }, { "epoch": 0.7646076794657763, "grad_norm": 0.3770373463630676, "learning_rate": 8.729877493165941e-05, "loss": 2.0698, "step": 2519 }, { "epoch": 0.7649112156624678, "grad_norm": 0.6299264430999756, "learning_rate": 8.729371266578921e-05, "loss": 2.1133, "step": 2520 }, { "epoch": 0.7652147518591592, "grad_norm": 0.3834339380264282, "learning_rate": 8.7288650399919e-05, "loss": 1.6254, "step": 2521 }, { "epoch": 0.7655182880558506, "grad_norm": 0.4225000739097595, "learning_rate": 8.72835881340488e-05, "loss": 1.5402, "step": 2522 }, { "epoch": 0.7658218242525421, "grad_norm": 0.3836756646633148, "learning_rate": 8.727852586817859e-05, "loss": 1.8559, "step": 2523 }, { "epoch": 0.7661253604492335, "grad_norm": 0.43883371353149414, "learning_rate": 8.72734636023084e-05, "loss": 1.8589, "step": 2524 }, { "epoch": 0.7664288966459251, "grad_norm": 0.3844871520996094, "learning_rate": 8.72684013364382e-05, "loss": 1.9853, "step": 2525 }, { "epoch": 0.7667324328426165, "grad_norm": 0.37431496381759644, "learning_rate": 8.726333907056799e-05, "loss": 1.9808, "step": 2526 }, { "epoch": 0.7670359690393079, "grad_norm": 0.35484790802001953, "learning_rate": 8.725827680469779e-05, "loss": 1.2544, "step": 2527 }, { "epoch": 0.7673395052359994, "grad_norm": 0.3555900752544403, "learning_rate": 8.72532145388276e-05, "loss": 1.3952, "step": 2528 }, { "epoch": 0.7676430414326908, "grad_norm": 0.4385487139225006, "learning_rate": 8.724815227295739e-05, "loss": 2.1207, "step": 2529 }, { "epoch": 0.7679465776293823, "grad_norm": 0.41865015029907227, "learning_rate": 8.724309000708718e-05, "loss": 1.2081, "step": 2530 }, { "epoch": 0.7682501138260738, "grad_norm": 0.44620874524116516, "learning_rate": 8.723802774121698e-05, "loss": 1.7552, "step": 2531 }, { "epoch": 0.7685536500227652, "grad_norm": 0.42884379625320435, "learning_rate": 8.723296547534677e-05, "loss": 1.8443, "step": 2532 }, { "epoch": 0.7688571862194566, "grad_norm": 0.41244685649871826, "learning_rate": 8.722790320947657e-05, "loss": 1.9597, "step": 2533 }, { "epoch": 0.7691607224161481, "grad_norm": 0.7400226593017578, "learning_rate": 8.722284094360636e-05, "loss": 1.9847, "step": 2534 }, { "epoch": 0.7694642586128396, "grad_norm": 0.4088320732116699, "learning_rate": 8.721777867773616e-05, "loss": 1.5025, "step": 2535 }, { "epoch": 0.7697677948095311, "grad_norm": 0.4008265435695648, "learning_rate": 8.721271641186595e-05, "loss": 1.879, "step": 2536 }, { "epoch": 0.7700713310062225, "grad_norm": 0.3870142996311188, "learning_rate": 8.720765414599576e-05, "loss": 1.9367, "step": 2537 }, { "epoch": 0.7703748672029139, "grad_norm": 0.4387873411178589, "learning_rate": 8.720259188012556e-05, "loss": 2.2303, "step": 2538 }, { "epoch": 0.7706784033996054, "grad_norm": 0.707614541053772, "learning_rate": 8.719752961425535e-05, "loss": 1.5612, "step": 2539 }, { "epoch": 0.7709819395962969, "grad_norm": 0.43096137046813965, "learning_rate": 8.719246734838514e-05, "loss": 1.544, "step": 2540 }, { "epoch": 0.7712854757929883, "grad_norm": 0.3771781027317047, "learning_rate": 8.718740508251494e-05, "loss": 1.79, "step": 2541 }, { "epoch": 0.7715890119896798, "grad_norm": 0.39454761147499084, "learning_rate": 8.718234281664473e-05, "loss": 1.4474, "step": 2542 }, { "epoch": 0.7718925481863712, "grad_norm": 0.421641081571579, "learning_rate": 8.717728055077453e-05, "loss": 1.8482, "step": 2543 }, { "epoch": 0.7721960843830626, "grad_norm": 0.38047879934310913, "learning_rate": 8.717221828490432e-05, "loss": 1.2413, "step": 2544 }, { "epoch": 0.7724996205797542, "grad_norm": 0.38516274094581604, "learning_rate": 8.716715601903412e-05, "loss": 1.9199, "step": 2545 }, { "epoch": 0.7728031567764456, "grad_norm": 0.38349801301956177, "learning_rate": 8.716209375316393e-05, "loss": 1.999, "step": 2546 }, { "epoch": 0.773106692973137, "grad_norm": 0.5327167510986328, "learning_rate": 8.715703148729372e-05, "loss": 1.5738, "step": 2547 }, { "epoch": 0.7734102291698285, "grad_norm": 0.3783544898033142, "learning_rate": 8.715196922142352e-05, "loss": 1.8474, "step": 2548 }, { "epoch": 0.7737137653665199, "grad_norm": 0.509729266166687, "learning_rate": 8.714690695555331e-05, "loss": 2.2824, "step": 2549 }, { "epoch": 0.7740173015632115, "grad_norm": 0.4439513087272644, "learning_rate": 8.71418446896831e-05, "loss": 1.8373, "step": 2550 }, { "epoch": 0.7743208377599029, "grad_norm": 0.4309268891811371, "learning_rate": 8.71367824238129e-05, "loss": 1.3441, "step": 2551 }, { "epoch": 0.7746243739565943, "grad_norm": 0.4033602178096771, "learning_rate": 8.71317201579427e-05, "loss": 2.0037, "step": 2552 }, { "epoch": 0.7749279101532858, "grad_norm": 0.42097219824790955, "learning_rate": 8.712665789207249e-05, "loss": 2.0319, "step": 2553 }, { "epoch": 0.7752314463499772, "grad_norm": 0.43752336502075195, "learning_rate": 8.712159562620229e-05, "loss": 1.6834, "step": 2554 }, { "epoch": 0.7755349825466686, "grad_norm": 0.4009190499782562, "learning_rate": 8.711653336033208e-05, "loss": 1.8764, "step": 2555 }, { "epoch": 0.7758385187433602, "grad_norm": 0.38049957156181335, "learning_rate": 8.711147109446189e-05, "loss": 1.5514, "step": 2556 }, { "epoch": 0.7761420549400516, "grad_norm": 0.7045227289199829, "learning_rate": 8.710640882859168e-05, "loss": 2.045, "step": 2557 }, { "epoch": 0.776445591136743, "grad_norm": 0.4141732454299927, "learning_rate": 8.710134656272148e-05, "loss": 1.9546, "step": 2558 }, { "epoch": 0.7767491273334345, "grad_norm": 0.36503976583480835, "learning_rate": 8.709628429685127e-05, "loss": 1.8611, "step": 2559 }, { "epoch": 0.7770526635301259, "grad_norm": 0.4061439335346222, "learning_rate": 8.709122203098107e-05, "loss": 1.5297, "step": 2560 }, { "epoch": 0.7773561997268175, "grad_norm": 0.39136406779289246, "learning_rate": 8.708615976511086e-05, "loss": 1.7483, "step": 2561 }, { "epoch": 0.7776597359235089, "grad_norm": 0.38786038756370544, "learning_rate": 8.708109749924066e-05, "loss": 1.6304, "step": 2562 }, { "epoch": 0.7779632721202003, "grad_norm": 0.44066160917282104, "learning_rate": 8.707603523337045e-05, "loss": 1.8819, "step": 2563 }, { "epoch": 0.7782668083168918, "grad_norm": 0.4141193628311157, "learning_rate": 8.707097296750025e-05, "loss": 1.5542, "step": 2564 }, { "epoch": 0.7785703445135832, "grad_norm": 0.3722589910030365, "learning_rate": 8.706591070163006e-05, "loss": 1.792, "step": 2565 }, { "epoch": 0.7788738807102747, "grad_norm": 0.4519922435283661, "learning_rate": 8.706084843575985e-05, "loss": 1.8324, "step": 2566 }, { "epoch": 0.7791774169069662, "grad_norm": 0.41349706053733826, "learning_rate": 8.705578616988964e-05, "loss": 1.841, "step": 2567 }, { "epoch": 0.7794809531036576, "grad_norm": 0.445417195558548, "learning_rate": 8.705072390401944e-05, "loss": 1.6788, "step": 2568 }, { "epoch": 0.779784489300349, "grad_norm": 0.35337746143341064, "learning_rate": 8.704566163814925e-05, "loss": 1.5129, "step": 2569 }, { "epoch": 0.7800880254970405, "grad_norm": 0.49805590510368347, "learning_rate": 8.704059937227904e-05, "loss": 1.7206, "step": 2570 }, { "epoch": 0.780391561693732, "grad_norm": 0.3580697774887085, "learning_rate": 8.703553710640884e-05, "loss": 1.8458, "step": 2571 }, { "epoch": 0.7806950978904235, "grad_norm": 0.557847797870636, "learning_rate": 8.703047484053863e-05, "loss": 1.824, "step": 2572 }, { "epoch": 0.7809986340871149, "grad_norm": 1.6153925657272339, "learning_rate": 8.702541257466843e-05, "loss": 2.0839, "step": 2573 }, { "epoch": 0.7813021702838063, "grad_norm": 0.44338542222976685, "learning_rate": 8.702035030879822e-05, "loss": 1.8798, "step": 2574 }, { "epoch": 0.7816057064804978, "grad_norm": 0.4379113018512726, "learning_rate": 8.701528804292802e-05, "loss": 1.8003, "step": 2575 }, { "epoch": 0.7819092426771893, "grad_norm": 0.42209142446517944, "learning_rate": 8.701022577705783e-05, "loss": 1.1716, "step": 2576 }, { "epoch": 0.7822127788738807, "grad_norm": 0.4423658549785614, "learning_rate": 8.700516351118762e-05, "loss": 1.9534, "step": 2577 }, { "epoch": 0.7825163150705722, "grad_norm": 0.4544404149055481, "learning_rate": 8.700010124531741e-05, "loss": 1.3336, "step": 2578 }, { "epoch": 0.7828198512672636, "grad_norm": 0.34568536281585693, "learning_rate": 8.699503897944721e-05, "loss": 1.9537, "step": 2579 }, { "epoch": 0.783123387463955, "grad_norm": 0.545414924621582, "learning_rate": 8.6989976713577e-05, "loss": 1.9132, "step": 2580 }, { "epoch": 0.7834269236606465, "grad_norm": 0.4345841705799103, "learning_rate": 8.69849144477068e-05, "loss": 1.7581, "step": 2581 }, { "epoch": 0.783730459857338, "grad_norm": 0.4052067995071411, "learning_rate": 8.69798521818366e-05, "loss": 1.7303, "step": 2582 }, { "epoch": 0.7840339960540295, "grad_norm": 0.34817397594451904, "learning_rate": 8.697478991596639e-05, "loss": 1.8363, "step": 2583 }, { "epoch": 0.7843375322507209, "grad_norm": 0.3445320725440979, "learning_rate": 8.696972765009618e-05, "loss": 1.9267, "step": 2584 }, { "epoch": 0.7846410684474123, "grad_norm": 0.43368205428123474, "learning_rate": 8.696466538422599e-05, "loss": 2.0271, "step": 2585 }, { "epoch": 0.7849446046441038, "grad_norm": 0.4825034439563751, "learning_rate": 8.695960311835579e-05, "loss": 2.0727, "step": 2586 }, { "epoch": 0.7852481408407953, "grad_norm": 0.6833105087280273, "learning_rate": 8.695454085248558e-05, "loss": 1.5662, "step": 2587 }, { "epoch": 0.7855516770374867, "grad_norm": 0.3476558029651642, "learning_rate": 8.694947858661538e-05, "loss": 1.9023, "step": 2588 }, { "epoch": 0.7858552132341782, "grad_norm": 0.49442049860954285, "learning_rate": 8.694441632074517e-05, "loss": 1.6733, "step": 2589 }, { "epoch": 0.7861587494308696, "grad_norm": 1.6963638067245483, "learning_rate": 8.693935405487497e-05, "loss": 2.0808, "step": 2590 }, { "epoch": 0.786462285627561, "grad_norm": 0.48170772194862366, "learning_rate": 8.693429178900476e-05, "loss": 1.6269, "step": 2591 }, { "epoch": 0.7867658218242526, "grad_norm": 0.427327036857605, "learning_rate": 8.692922952313456e-05, "loss": 2.0448, "step": 2592 }, { "epoch": 0.787069358020944, "grad_norm": 0.3641161322593689, "learning_rate": 8.692416725726435e-05, "loss": 2.0699, "step": 2593 }, { "epoch": 0.7873728942176355, "grad_norm": 0.4324423372745514, "learning_rate": 8.691910499139414e-05, "loss": 1.1989, "step": 2594 }, { "epoch": 0.7876764304143269, "grad_norm": 0.4303852617740631, "learning_rate": 8.691404272552395e-05, "loss": 1.866, "step": 2595 }, { "epoch": 0.7879799666110183, "grad_norm": 0.36840641498565674, "learning_rate": 8.690898045965375e-05, "loss": 2.0927, "step": 2596 }, { "epoch": 0.7882835028077099, "grad_norm": 0.43906763195991516, "learning_rate": 8.690391819378354e-05, "loss": 1.8755, "step": 2597 }, { "epoch": 0.7885870390044013, "grad_norm": 0.43337517976760864, "learning_rate": 8.689885592791334e-05, "loss": 1.7263, "step": 2598 }, { "epoch": 0.7888905752010927, "grad_norm": 0.35808295011520386, "learning_rate": 8.689379366204313e-05, "loss": 1.5395, "step": 2599 }, { "epoch": 0.7891941113977842, "grad_norm": 0.4063914120197296, "learning_rate": 8.688873139617293e-05, "loss": 1.7125, "step": 2600 }, { "epoch": 0.7894976475944756, "grad_norm": 0.35243427753448486, "learning_rate": 8.688366913030272e-05, "loss": 1.705, "step": 2601 }, { "epoch": 0.7898011837911671, "grad_norm": 0.4404586851596832, "learning_rate": 8.687860686443252e-05, "loss": 1.9805, "step": 2602 }, { "epoch": 0.7901047199878586, "grad_norm": 0.45531004667282104, "learning_rate": 8.687354459856231e-05, "loss": 2.1248, "step": 2603 }, { "epoch": 0.79040825618455, "grad_norm": 0.4575786292552948, "learning_rate": 8.686848233269212e-05, "loss": 2.0493, "step": 2604 }, { "epoch": 0.7907117923812415, "grad_norm": 0.4143056571483612, "learning_rate": 8.686342006682191e-05, "loss": 1.7757, "step": 2605 }, { "epoch": 0.7910153285779329, "grad_norm": 0.41257745027542114, "learning_rate": 8.685835780095171e-05, "loss": 2.1223, "step": 2606 }, { "epoch": 0.7913188647746243, "grad_norm": 0.4308036267757416, "learning_rate": 8.68532955350815e-05, "loss": 1.8967, "step": 2607 }, { "epoch": 0.7916224009713159, "grad_norm": 1.4339756965637207, "learning_rate": 8.68482332692113e-05, "loss": 2.0286, "step": 2608 }, { "epoch": 0.7919259371680073, "grad_norm": 0.39570608735084534, "learning_rate": 8.68431710033411e-05, "loss": 2.0292, "step": 2609 }, { "epoch": 0.7922294733646987, "grad_norm": 0.39638906717300415, "learning_rate": 8.683810873747089e-05, "loss": 1.9292, "step": 2610 }, { "epoch": 0.7925330095613902, "grad_norm": 0.40838631987571716, "learning_rate": 8.683304647160068e-05, "loss": 1.9439, "step": 2611 }, { "epoch": 0.7928365457580816, "grad_norm": 0.41017046570777893, "learning_rate": 8.682798420573048e-05, "loss": 1.7575, "step": 2612 }, { "epoch": 0.7931400819547731, "grad_norm": 0.38030532002449036, "learning_rate": 8.682292193986029e-05, "loss": 2.0398, "step": 2613 }, { "epoch": 0.7934436181514646, "grad_norm": 0.42547357082366943, "learning_rate": 8.681785967399008e-05, "loss": 1.9265, "step": 2614 }, { "epoch": 0.793747154348156, "grad_norm": 0.42651450634002686, "learning_rate": 8.681279740811989e-05, "loss": 1.8808, "step": 2615 }, { "epoch": 0.7940506905448474, "grad_norm": 0.4874178469181061, "learning_rate": 8.680773514224968e-05, "loss": 1.8267, "step": 2616 }, { "epoch": 0.7943542267415389, "grad_norm": 0.4573056101799011, "learning_rate": 8.680267287637948e-05, "loss": 2.0073, "step": 2617 }, { "epoch": 0.7946577629382304, "grad_norm": 0.4408004879951477, "learning_rate": 8.679761061050927e-05, "loss": 2.1719, "step": 2618 }, { "epoch": 0.7949612991349219, "grad_norm": 0.41363367438316345, "learning_rate": 8.679254834463907e-05, "loss": 2.0006, "step": 2619 }, { "epoch": 0.7952648353316133, "grad_norm": 0.3256136178970337, "learning_rate": 8.678748607876886e-05, "loss": 1.5214, "step": 2620 }, { "epoch": 0.7955683715283047, "grad_norm": 0.3597501516342163, "learning_rate": 8.678242381289866e-05, "loss": 1.9775, "step": 2621 }, { "epoch": 0.7958719077249962, "grad_norm": 0.43128228187561035, "learning_rate": 8.677736154702845e-05, "loss": 1.3915, "step": 2622 }, { "epoch": 0.7961754439216877, "grad_norm": 0.6114957332611084, "learning_rate": 8.677229928115825e-05, "loss": 2.0548, "step": 2623 }, { "epoch": 0.7964789801183791, "grad_norm": 0.6381771564483643, "learning_rate": 8.676723701528806e-05, "loss": 2.1353, "step": 2624 }, { "epoch": 0.7967825163150706, "grad_norm": 0.39409366250038147, "learning_rate": 8.676217474941785e-05, "loss": 0.9988, "step": 2625 }, { "epoch": 0.797086052511762, "grad_norm": 0.4145677387714386, "learning_rate": 8.675711248354765e-05, "loss": 2.1825, "step": 2626 }, { "epoch": 0.7973895887084534, "grad_norm": 0.40860435366630554, "learning_rate": 8.675205021767744e-05, "loss": 2.1545, "step": 2627 }, { "epoch": 0.797693124905145, "grad_norm": 0.42259758710861206, "learning_rate": 8.674698795180724e-05, "loss": 2.0872, "step": 2628 }, { "epoch": 0.7979966611018364, "grad_norm": 0.9106017351150513, "learning_rate": 8.674192568593703e-05, "loss": 1.8756, "step": 2629 }, { "epoch": 0.7983001972985279, "grad_norm": 0.4160531163215637, "learning_rate": 8.673686342006683e-05, "loss": 1.6454, "step": 2630 }, { "epoch": 0.7986037334952193, "grad_norm": 0.4564226269721985, "learning_rate": 8.673180115419662e-05, "loss": 1.6036, "step": 2631 }, { "epoch": 0.7989072696919107, "grad_norm": 0.5077611207962036, "learning_rate": 8.672673888832641e-05, "loss": 1.8217, "step": 2632 }, { "epoch": 0.7992108058886022, "grad_norm": 0.3732128143310547, "learning_rate": 8.672167662245621e-05, "loss": 1.6299, "step": 2633 }, { "epoch": 0.7995143420852937, "grad_norm": 0.4433646500110626, "learning_rate": 8.671661435658602e-05, "loss": 2.0876, "step": 2634 }, { "epoch": 0.7998178782819851, "grad_norm": 0.3869750201702118, "learning_rate": 8.671155209071581e-05, "loss": 1.9312, "step": 2635 }, { "epoch": 0.8001214144786766, "grad_norm": 0.3622623682022095, "learning_rate": 8.670648982484561e-05, "loss": 1.3211, "step": 2636 }, { "epoch": 0.800424950675368, "grad_norm": 0.38390904664993286, "learning_rate": 8.67014275589754e-05, "loss": 2.0186, "step": 2637 }, { "epoch": 0.8007284868720594, "grad_norm": 0.5641773343086243, "learning_rate": 8.66963652931052e-05, "loss": 1.5207, "step": 2638 }, { "epoch": 0.801032023068751, "grad_norm": 0.399679571390152, "learning_rate": 8.669130302723499e-05, "loss": 1.621, "step": 2639 }, { "epoch": 0.8013355592654424, "grad_norm": 0.38951337337493896, "learning_rate": 8.668624076136479e-05, "loss": 1.8445, "step": 2640 }, { "epoch": 0.8016390954621339, "grad_norm": 0.47294196486473083, "learning_rate": 8.668117849549458e-05, "loss": 1.4963, "step": 2641 }, { "epoch": 0.8019426316588253, "grad_norm": 0.4139798581600189, "learning_rate": 8.667611622962438e-05, "loss": 1.863, "step": 2642 }, { "epoch": 0.8022461678555167, "grad_norm": 0.42392146587371826, "learning_rate": 8.667105396375418e-05, "loss": 1.4778, "step": 2643 }, { "epoch": 0.8025497040522083, "grad_norm": 0.4208984673023224, "learning_rate": 8.666599169788398e-05, "loss": 1.8756, "step": 2644 }, { "epoch": 0.8028532402488997, "grad_norm": 0.3515019714832306, "learning_rate": 8.666092943201377e-05, "loss": 1.8822, "step": 2645 }, { "epoch": 0.8031567764455911, "grad_norm": 0.4780139923095703, "learning_rate": 8.665586716614357e-05, "loss": 1.7862, "step": 2646 }, { "epoch": 0.8034603126422826, "grad_norm": 0.4419991970062256, "learning_rate": 8.665080490027336e-05, "loss": 2.0666, "step": 2647 }, { "epoch": 0.803763848838974, "grad_norm": 0.43830469250679016, "learning_rate": 8.664574263440316e-05, "loss": 2.2725, "step": 2648 }, { "epoch": 0.8040673850356655, "grad_norm": 0.3989112377166748, "learning_rate": 8.664068036853295e-05, "loss": 1.9306, "step": 2649 }, { "epoch": 0.804370921232357, "grad_norm": 0.3216220438480377, "learning_rate": 8.663561810266275e-05, "loss": 1.862, "step": 2650 }, { "epoch": 0.8046744574290484, "grad_norm": 0.4546568989753723, "learning_rate": 8.663055583679254e-05, "loss": 2.0723, "step": 2651 }, { "epoch": 0.8049779936257399, "grad_norm": 0.39314141869544983, "learning_rate": 8.662549357092235e-05, "loss": 1.7299, "step": 2652 }, { "epoch": 0.8052815298224313, "grad_norm": 0.4112257957458496, "learning_rate": 8.662043130505215e-05, "loss": 1.2306, "step": 2653 }, { "epoch": 0.8055850660191228, "grad_norm": 0.8900299072265625, "learning_rate": 8.661536903918194e-05, "loss": 2.0176, "step": 2654 }, { "epoch": 0.8058886022158143, "grad_norm": 0.35671380162239075, "learning_rate": 8.661030677331174e-05, "loss": 1.521, "step": 2655 }, { "epoch": 0.8061921384125057, "grad_norm": 0.3438098430633545, "learning_rate": 8.660524450744153e-05, "loss": 1.5401, "step": 2656 }, { "epoch": 0.8064956746091971, "grad_norm": 0.4241732656955719, "learning_rate": 8.660018224157133e-05, "loss": 1.5139, "step": 2657 }, { "epoch": 0.8067992108058886, "grad_norm": 0.41691192984580994, "learning_rate": 8.659511997570113e-05, "loss": 1.9927, "step": 2658 }, { "epoch": 0.8071027470025801, "grad_norm": 0.36074796319007874, "learning_rate": 8.659005770983093e-05, "loss": 1.3125, "step": 2659 }, { "epoch": 0.8074062831992715, "grad_norm": 0.503271222114563, "learning_rate": 8.658499544396072e-05, "loss": 2.0934, "step": 2660 }, { "epoch": 0.807709819395963, "grad_norm": 0.47022250294685364, "learning_rate": 8.657993317809052e-05, "loss": 1.515, "step": 2661 }, { "epoch": 0.8080133555926544, "grad_norm": 0.5267159938812256, "learning_rate": 8.657487091222031e-05, "loss": 1.7448, "step": 2662 }, { "epoch": 0.8083168917893458, "grad_norm": 0.5382044315338135, "learning_rate": 8.656980864635012e-05, "loss": 1.6507, "step": 2663 }, { "epoch": 0.8086204279860373, "grad_norm": 0.5040610432624817, "learning_rate": 8.656474638047992e-05, "loss": 1.3789, "step": 2664 }, { "epoch": 0.8089239641827288, "grad_norm": 0.356317400932312, "learning_rate": 8.655968411460971e-05, "loss": 1.8226, "step": 2665 }, { "epoch": 0.8092275003794203, "grad_norm": 0.38693082332611084, "learning_rate": 8.65546218487395e-05, "loss": 1.8358, "step": 2666 }, { "epoch": 0.8095310365761117, "grad_norm": 0.42606496810913086, "learning_rate": 8.65495595828693e-05, "loss": 1.9002, "step": 2667 }, { "epoch": 0.8098345727728031, "grad_norm": 0.3855800926685333, "learning_rate": 8.65444973169991e-05, "loss": 1.8488, "step": 2668 }, { "epoch": 0.8101381089694946, "grad_norm": 0.46677157282829285, "learning_rate": 8.653943505112889e-05, "loss": 2.264, "step": 2669 }, { "epoch": 0.8104416451661861, "grad_norm": 0.3479576110839844, "learning_rate": 8.653437278525868e-05, "loss": 1.796, "step": 2670 }, { "epoch": 0.8107451813628775, "grad_norm": 0.4703936278820038, "learning_rate": 8.652931051938848e-05, "loss": 2.0457, "step": 2671 }, { "epoch": 0.811048717559569, "grad_norm": 0.3478047847747803, "learning_rate": 8.652424825351827e-05, "loss": 1.8033, "step": 2672 }, { "epoch": 0.8113522537562604, "grad_norm": 0.4196014702320099, "learning_rate": 8.651918598764808e-05, "loss": 1.9513, "step": 2673 }, { "epoch": 0.8116557899529518, "grad_norm": 0.36813899874687195, "learning_rate": 8.651412372177788e-05, "loss": 1.9776, "step": 2674 }, { "epoch": 0.8119593261496434, "grad_norm": 0.44413039088249207, "learning_rate": 8.650906145590767e-05, "loss": 1.9041, "step": 2675 }, { "epoch": 0.8122628623463348, "grad_norm": 0.4073639512062073, "learning_rate": 8.650399919003747e-05, "loss": 1.9749, "step": 2676 }, { "epoch": 0.8125663985430263, "grad_norm": 0.3961658775806427, "learning_rate": 8.649893692416726e-05, "loss": 1.8646, "step": 2677 }, { "epoch": 0.8128699347397177, "grad_norm": 0.536353349685669, "learning_rate": 8.649387465829706e-05, "loss": 1.7772, "step": 2678 }, { "epoch": 0.8131734709364091, "grad_norm": 0.4030105471611023, "learning_rate": 8.648881239242685e-05, "loss": 2.1668, "step": 2679 }, { "epoch": 0.8134770071331007, "grad_norm": 0.4185904264450073, "learning_rate": 8.648375012655665e-05, "loss": 1.877, "step": 2680 }, { "epoch": 0.8137805433297921, "grad_norm": 0.4530700445175171, "learning_rate": 8.647868786068644e-05, "loss": 1.7669, "step": 2681 }, { "epoch": 0.8140840795264835, "grad_norm": 0.7239555716514587, "learning_rate": 8.647362559481625e-05, "loss": 1.7399, "step": 2682 }, { "epoch": 0.814387615723175, "grad_norm": 0.5373411178588867, "learning_rate": 8.646856332894604e-05, "loss": 1.3157, "step": 2683 }, { "epoch": 0.8146911519198664, "grad_norm": 0.41730010509490967, "learning_rate": 8.646350106307584e-05, "loss": 1.9912, "step": 2684 }, { "epoch": 0.814994688116558, "grad_norm": 0.4486635625362396, "learning_rate": 8.645843879720563e-05, "loss": 1.9651, "step": 2685 }, { "epoch": 0.8152982243132494, "grad_norm": 0.5648373961448669, "learning_rate": 8.645337653133543e-05, "loss": 1.7917, "step": 2686 }, { "epoch": 0.8156017605099408, "grad_norm": 0.4445558488368988, "learning_rate": 8.644831426546522e-05, "loss": 1.71, "step": 2687 }, { "epoch": 0.8159052967066323, "grad_norm": 0.4051614999771118, "learning_rate": 8.644325199959502e-05, "loss": 2.0976, "step": 2688 }, { "epoch": 0.8162088329033237, "grad_norm": 0.41357868909835815, "learning_rate": 8.643818973372481e-05, "loss": 1.8658, "step": 2689 }, { "epoch": 0.8165123691000151, "grad_norm": 0.43194282054901123, "learning_rate": 8.643312746785461e-05, "loss": 1.3586, "step": 2690 }, { "epoch": 0.8168159052967067, "grad_norm": 0.4093270003795624, "learning_rate": 8.642806520198442e-05, "loss": 1.8946, "step": 2691 }, { "epoch": 0.8171194414933981, "grad_norm": 0.5622807741165161, "learning_rate": 8.642300293611421e-05, "loss": 1.9175, "step": 2692 }, { "epoch": 0.8174229776900895, "grad_norm": 0.41735681891441345, "learning_rate": 8.6417940670244e-05, "loss": 2.0407, "step": 2693 }, { "epoch": 0.817726513886781, "grad_norm": 0.4518575966358185, "learning_rate": 8.64128784043738e-05, "loss": 1.8137, "step": 2694 }, { "epoch": 0.8180300500834724, "grad_norm": 0.34537529945373535, "learning_rate": 8.64078161385036e-05, "loss": 1.7952, "step": 2695 }, { "epoch": 0.818333586280164, "grad_norm": 0.44123801589012146, "learning_rate": 8.640275387263339e-05, "loss": 1.74, "step": 2696 }, { "epoch": 0.8186371224768554, "grad_norm": 0.36231714487075806, "learning_rate": 8.639769160676318e-05, "loss": 1.9205, "step": 2697 }, { "epoch": 0.8189406586735468, "grad_norm": 0.4573259651660919, "learning_rate": 8.639262934089298e-05, "loss": 1.8026, "step": 2698 }, { "epoch": 0.8192441948702383, "grad_norm": 0.43909579515457153, "learning_rate": 8.638756707502277e-05, "loss": 1.9559, "step": 2699 }, { "epoch": 0.8195477310669297, "grad_norm": 0.4051404893398285, "learning_rate": 8.638250480915257e-05, "loss": 1.9962, "step": 2700 }, { "epoch": 0.8198512672636212, "grad_norm": 0.41793152689933777, "learning_rate": 8.637744254328238e-05, "loss": 1.7749, "step": 2701 }, { "epoch": 0.8201548034603127, "grad_norm": 0.5424450039863586, "learning_rate": 8.637238027741219e-05, "loss": 1.8725, "step": 2702 }, { "epoch": 0.8204583396570041, "grad_norm": 0.39918193221092224, "learning_rate": 8.636731801154198e-05, "loss": 2.0224, "step": 2703 }, { "epoch": 0.8207618758536955, "grad_norm": 0.40323561429977417, "learning_rate": 8.636225574567178e-05, "loss": 1.9338, "step": 2704 }, { "epoch": 0.821065412050387, "grad_norm": 0.40484192967414856, "learning_rate": 8.635719347980157e-05, "loss": 1.8665, "step": 2705 }, { "epoch": 0.8213689482470785, "grad_norm": 0.45353245735168457, "learning_rate": 8.635213121393137e-05, "loss": 2.0364, "step": 2706 }, { "epoch": 0.82167248444377, "grad_norm": 0.5369464755058289, "learning_rate": 8.634706894806116e-05, "loss": 1.3729, "step": 2707 }, { "epoch": 0.8219760206404614, "grad_norm": 0.3175603449344635, "learning_rate": 8.634200668219095e-05, "loss": 1.6742, "step": 2708 }, { "epoch": 0.8222795568371528, "grad_norm": 0.4314495623111725, "learning_rate": 8.633694441632075e-05, "loss": 1.9536, "step": 2709 }, { "epoch": 0.8225830930338442, "grad_norm": 0.4610050916671753, "learning_rate": 8.633188215045054e-05, "loss": 2.0608, "step": 2710 }, { "epoch": 0.8228866292305358, "grad_norm": 0.3542473018169403, "learning_rate": 8.632681988458034e-05, "loss": 1.5889, "step": 2711 }, { "epoch": 0.8231901654272272, "grad_norm": 0.4445483684539795, "learning_rate": 8.632175761871015e-05, "loss": 2.0438, "step": 2712 }, { "epoch": 0.8234937016239187, "grad_norm": 0.42590487003326416, "learning_rate": 8.631669535283994e-05, "loss": 1.6821, "step": 2713 }, { "epoch": 0.8237972378206101, "grad_norm": 0.3951219618320465, "learning_rate": 8.631163308696974e-05, "loss": 1.8549, "step": 2714 }, { "epoch": 0.8241007740173015, "grad_norm": 0.4422662556171417, "learning_rate": 8.630657082109953e-05, "loss": 1.0172, "step": 2715 }, { "epoch": 0.824404310213993, "grad_norm": 0.6093502640724182, "learning_rate": 8.630150855522933e-05, "loss": 1.469, "step": 2716 }, { "epoch": 0.8247078464106845, "grad_norm": 0.6702497005462646, "learning_rate": 8.629644628935912e-05, "loss": 1.7706, "step": 2717 }, { "epoch": 0.8250113826073759, "grad_norm": 0.4154108166694641, "learning_rate": 8.629138402348892e-05, "loss": 2.0392, "step": 2718 }, { "epoch": 0.8253149188040674, "grad_norm": 0.4183025062084198, "learning_rate": 8.628632175761871e-05, "loss": 1.648, "step": 2719 }, { "epoch": 0.8256184550007588, "grad_norm": 0.40831395983695984, "learning_rate": 8.62812594917485e-05, "loss": 1.8159, "step": 2720 }, { "epoch": 0.8259219911974502, "grad_norm": 0.3942376673221588, "learning_rate": 8.627619722587831e-05, "loss": 2.1595, "step": 2721 }, { "epoch": 0.8262255273941418, "grad_norm": 0.4016304314136505, "learning_rate": 8.627113496000811e-05, "loss": 1.9915, "step": 2722 }, { "epoch": 0.8265290635908332, "grad_norm": 0.43526315689086914, "learning_rate": 8.62660726941379e-05, "loss": 2.009, "step": 2723 }, { "epoch": 0.8268325997875247, "grad_norm": 0.4215218424797058, "learning_rate": 8.62610104282677e-05, "loss": 1.6511, "step": 2724 }, { "epoch": 0.8271361359842161, "grad_norm": 0.38574057817459106, "learning_rate": 8.62559481623975e-05, "loss": 1.6658, "step": 2725 }, { "epoch": 0.8274396721809075, "grad_norm": 0.4340943694114685, "learning_rate": 8.625088589652729e-05, "loss": 1.9324, "step": 2726 }, { "epoch": 0.8277432083775991, "grad_norm": 0.474386066198349, "learning_rate": 8.624582363065708e-05, "loss": 1.7866, "step": 2727 }, { "epoch": 0.8280467445742905, "grad_norm": 0.4177556335926056, "learning_rate": 8.624076136478688e-05, "loss": 1.4593, "step": 2728 }, { "epoch": 0.8283502807709819, "grad_norm": 0.412219375371933, "learning_rate": 8.623569909891667e-05, "loss": 1.592, "step": 2729 }, { "epoch": 0.8286538169676734, "grad_norm": 0.5068672895431519, "learning_rate": 8.623063683304648e-05, "loss": 2.0902, "step": 2730 }, { "epoch": 0.8289573531643648, "grad_norm": 0.43211403489112854, "learning_rate": 8.622557456717628e-05, "loss": 1.1154, "step": 2731 }, { "epoch": 0.8292608893610564, "grad_norm": 0.37955889105796814, "learning_rate": 8.622051230130607e-05, "loss": 1.9858, "step": 2732 }, { "epoch": 0.8295644255577478, "grad_norm": 0.4571453928947449, "learning_rate": 8.621545003543587e-05, "loss": 1.6966, "step": 2733 }, { "epoch": 0.8298679617544392, "grad_norm": 0.43323561549186707, "learning_rate": 8.621038776956566e-05, "loss": 2.0712, "step": 2734 }, { "epoch": 0.8301714979511307, "grad_norm": 0.4051443338394165, "learning_rate": 8.620532550369545e-05, "loss": 1.9132, "step": 2735 }, { "epoch": 0.8304750341478221, "grad_norm": 0.4727547764778137, "learning_rate": 8.620026323782525e-05, "loss": 2.0754, "step": 2736 }, { "epoch": 0.8307785703445136, "grad_norm": 0.8304808139801025, "learning_rate": 8.619520097195504e-05, "loss": 1.3452, "step": 2737 }, { "epoch": 0.8310821065412051, "grad_norm": 0.40300288796424866, "learning_rate": 8.619013870608484e-05, "loss": 1.903, "step": 2738 }, { "epoch": 0.8313856427378965, "grad_norm": 0.4302805960178375, "learning_rate": 8.618507644021463e-05, "loss": 1.9938, "step": 2739 }, { "epoch": 0.8316891789345879, "grad_norm": 0.41586950421333313, "learning_rate": 8.618001417434444e-05, "loss": 1.8044, "step": 2740 }, { "epoch": 0.8319927151312794, "grad_norm": 0.4185795187950134, "learning_rate": 8.617495190847424e-05, "loss": 2.0513, "step": 2741 }, { "epoch": 0.8322962513279709, "grad_norm": 0.4664061367511749, "learning_rate": 8.616988964260403e-05, "loss": 1.6634, "step": 2742 }, { "epoch": 0.8325997875246623, "grad_norm": 0.44080016016960144, "learning_rate": 8.616482737673383e-05, "loss": 1.854, "step": 2743 }, { "epoch": 0.8329033237213538, "grad_norm": 0.4284375011920929, "learning_rate": 8.615976511086362e-05, "loss": 1.9713, "step": 2744 }, { "epoch": 0.8332068599180452, "grad_norm": 0.42498892545700073, "learning_rate": 8.615470284499342e-05, "loss": 1.5883, "step": 2745 }, { "epoch": 0.8335103961147367, "grad_norm": 0.5301217436790466, "learning_rate": 8.614964057912321e-05, "loss": 0.8439, "step": 2746 }, { "epoch": 0.8338139323114281, "grad_norm": 0.5539612174034119, "learning_rate": 8.614457831325302e-05, "loss": 2.1118, "step": 2747 }, { "epoch": 0.8341174685081196, "grad_norm": 0.47817254066467285, "learning_rate": 8.613951604738281e-05, "loss": 2.1536, "step": 2748 }, { "epoch": 0.8344210047048111, "grad_norm": 0.3291810154914856, "learning_rate": 8.613445378151261e-05, "loss": 1.951, "step": 2749 }, { "epoch": 0.8347245409015025, "grad_norm": 0.365369975566864, "learning_rate": 8.61293915156424e-05, "loss": 1.4365, "step": 2750 }, { "epoch": 0.8350280770981939, "grad_norm": 0.4028696119785309, "learning_rate": 8.612432924977221e-05, "loss": 2.2443, "step": 2751 }, { "epoch": 0.8353316132948854, "grad_norm": 0.40695205330848694, "learning_rate": 8.611926698390201e-05, "loss": 2.0337, "step": 2752 }, { "epoch": 0.8356351494915769, "grad_norm": 1.1030051708221436, "learning_rate": 8.61142047180318e-05, "loss": 1.9415, "step": 2753 }, { "epoch": 0.8359386856882683, "grad_norm": 0.36230286955833435, "learning_rate": 8.61091424521616e-05, "loss": 1.7159, "step": 2754 }, { "epoch": 0.8362422218849598, "grad_norm": 0.36546850204467773, "learning_rate": 8.610408018629139e-05, "loss": 1.7412, "step": 2755 }, { "epoch": 0.8365457580816512, "grad_norm": 0.3433781564235687, "learning_rate": 8.609901792042119e-05, "loss": 1.6014, "step": 2756 }, { "epoch": 0.8368492942783426, "grad_norm": 0.4202607274055481, "learning_rate": 8.609395565455098e-05, "loss": 1.8733, "step": 2757 }, { "epoch": 0.8371528304750342, "grad_norm": 0.41112053394317627, "learning_rate": 8.608889338868078e-05, "loss": 1.8103, "step": 2758 }, { "epoch": 0.8374563666717256, "grad_norm": 0.44569066166877747, "learning_rate": 8.608383112281057e-05, "loss": 1.3989, "step": 2759 }, { "epoch": 0.8377599028684171, "grad_norm": 0.4139041602611542, "learning_rate": 8.607876885694038e-05, "loss": 1.8406, "step": 2760 }, { "epoch": 0.8380634390651085, "grad_norm": 0.40701228380203247, "learning_rate": 8.607370659107017e-05, "loss": 2.1573, "step": 2761 }, { "epoch": 0.8383669752617999, "grad_norm": 0.44878578186035156, "learning_rate": 8.606864432519997e-05, "loss": 1.5094, "step": 2762 }, { "epoch": 0.8386705114584915, "grad_norm": 0.5021698474884033, "learning_rate": 8.606358205932976e-05, "loss": 2.1396, "step": 2763 }, { "epoch": 0.8389740476551829, "grad_norm": 0.5416968464851379, "learning_rate": 8.605851979345956e-05, "loss": 1.974, "step": 2764 }, { "epoch": 0.8392775838518743, "grad_norm": 0.5953572988510132, "learning_rate": 8.605345752758935e-05, "loss": 1.7463, "step": 2765 }, { "epoch": 0.8395811200485658, "grad_norm": 0.43414852023124695, "learning_rate": 8.604839526171915e-05, "loss": 1.7121, "step": 2766 }, { "epoch": 0.8398846562452572, "grad_norm": 0.4000817537307739, "learning_rate": 8.604333299584894e-05, "loss": 2.1051, "step": 2767 }, { "epoch": 0.8401881924419488, "grad_norm": 0.6544987559318542, "learning_rate": 8.603827072997874e-05, "loss": 2.063, "step": 2768 }, { "epoch": 0.8404917286386402, "grad_norm": 0.7102285623550415, "learning_rate": 8.603320846410855e-05, "loss": 2.2053, "step": 2769 }, { "epoch": 0.8407952648353316, "grad_norm": 0.39218565821647644, "learning_rate": 8.602814619823834e-05, "loss": 1.2981, "step": 2770 }, { "epoch": 0.8410988010320231, "grad_norm": 0.42591944336891174, "learning_rate": 8.602308393236814e-05, "loss": 2.0123, "step": 2771 }, { "epoch": 0.8414023372287145, "grad_norm": 0.3958960175514221, "learning_rate": 8.601802166649793e-05, "loss": 2.1612, "step": 2772 }, { "epoch": 0.8417058734254059, "grad_norm": 0.4331991672515869, "learning_rate": 8.601295940062772e-05, "loss": 1.8874, "step": 2773 }, { "epoch": 0.8420094096220975, "grad_norm": 0.42542657256126404, "learning_rate": 8.600789713475752e-05, "loss": 1.3892, "step": 2774 }, { "epoch": 0.8423129458187889, "grad_norm": 0.3944099545478821, "learning_rate": 8.600283486888731e-05, "loss": 1.8069, "step": 2775 }, { "epoch": 0.8426164820154803, "grad_norm": 0.42040184140205383, "learning_rate": 8.599777260301711e-05, "loss": 1.8774, "step": 2776 }, { "epoch": 0.8429200182121718, "grad_norm": 0.4341401755809784, "learning_rate": 8.59927103371469e-05, "loss": 1.9773, "step": 2777 }, { "epoch": 0.8432235544088632, "grad_norm": 0.4372880458831787, "learning_rate": 8.59876480712767e-05, "loss": 2.0873, "step": 2778 }, { "epoch": 0.8435270906055548, "grad_norm": 0.3675346374511719, "learning_rate": 8.598258580540651e-05, "loss": 1.1921, "step": 2779 }, { "epoch": 0.8438306268022462, "grad_norm": 0.39204880595207214, "learning_rate": 8.59775235395363e-05, "loss": 1.9907, "step": 2780 }, { "epoch": 0.8441341629989376, "grad_norm": 0.4221871793270111, "learning_rate": 8.59724612736661e-05, "loss": 1.7039, "step": 2781 }, { "epoch": 0.844437699195629, "grad_norm": 0.4046156406402588, "learning_rate": 8.596739900779589e-05, "loss": 2.1068, "step": 2782 }, { "epoch": 0.8447412353923205, "grad_norm": 0.42431220412254333, "learning_rate": 8.596233674192569e-05, "loss": 1.7547, "step": 2783 }, { "epoch": 0.845044771589012, "grad_norm": 0.33057859539985657, "learning_rate": 8.595727447605548e-05, "loss": 1.6987, "step": 2784 }, { "epoch": 0.8453483077857035, "grad_norm": 0.40820497274398804, "learning_rate": 8.595221221018528e-05, "loss": 1.6101, "step": 2785 }, { "epoch": 0.8456518439823949, "grad_norm": 0.6457285284996033, "learning_rate": 8.594714994431507e-05, "loss": 1.927, "step": 2786 }, { "epoch": 0.8459553801790863, "grad_norm": 0.4055453836917877, "learning_rate": 8.594208767844487e-05, "loss": 1.9902, "step": 2787 }, { "epoch": 0.8462589163757778, "grad_norm": 0.45660969614982605, "learning_rate": 8.593702541257467e-05, "loss": 1.8487, "step": 2788 }, { "epoch": 0.8465624525724693, "grad_norm": 0.4082806408405304, "learning_rate": 8.593196314670447e-05, "loss": 1.7797, "step": 2789 }, { "epoch": 0.8468659887691607, "grad_norm": 0.39490821957588196, "learning_rate": 8.592690088083426e-05, "loss": 1.9501, "step": 2790 }, { "epoch": 0.8471695249658522, "grad_norm": 0.47634264826774597, "learning_rate": 8.592183861496407e-05, "loss": 1.5822, "step": 2791 }, { "epoch": 0.8474730611625436, "grad_norm": 0.4166494607925415, "learning_rate": 8.591677634909387e-05, "loss": 2.0393, "step": 2792 }, { "epoch": 0.847776597359235, "grad_norm": 0.3837972581386566, "learning_rate": 8.591171408322366e-05, "loss": 1.8317, "step": 2793 }, { "epoch": 0.8480801335559266, "grad_norm": 0.3955104947090149, "learning_rate": 8.590665181735346e-05, "loss": 2.0546, "step": 2794 }, { "epoch": 0.848383669752618, "grad_norm": 0.35945233702659607, "learning_rate": 8.590158955148325e-05, "loss": 2.0985, "step": 2795 }, { "epoch": 0.8486872059493095, "grad_norm": 0.5097954869270325, "learning_rate": 8.589652728561305e-05, "loss": 1.6378, "step": 2796 }, { "epoch": 0.8489907421460009, "grad_norm": 0.37827685475349426, "learning_rate": 8.589146501974284e-05, "loss": 1.9986, "step": 2797 }, { "epoch": 0.8492942783426923, "grad_norm": 0.39725548028945923, "learning_rate": 8.588640275387264e-05, "loss": 1.9275, "step": 2798 }, { "epoch": 0.8495978145393838, "grad_norm": 0.3660275936126709, "learning_rate": 8.588134048800244e-05, "loss": 1.7952, "step": 2799 }, { "epoch": 0.8499013507360753, "grad_norm": 0.7100840210914612, "learning_rate": 8.587627822213224e-05, "loss": 1.8986, "step": 2800 }, { "epoch": 0.8502048869327667, "grad_norm": 0.4502932131290436, "learning_rate": 8.587121595626203e-05, "loss": 1.8977, "step": 2801 }, { "epoch": 0.8505084231294582, "grad_norm": 0.3382154107093811, "learning_rate": 8.586615369039183e-05, "loss": 1.8838, "step": 2802 }, { "epoch": 0.8508119593261496, "grad_norm": 0.42528000473976135, "learning_rate": 8.586109142452162e-05, "loss": 2.4048, "step": 2803 }, { "epoch": 0.851115495522841, "grad_norm": 0.41296571493148804, "learning_rate": 8.585602915865142e-05, "loss": 1.5728, "step": 2804 }, { "epoch": 0.8514190317195326, "grad_norm": 0.4023008644580841, "learning_rate": 8.585096689278121e-05, "loss": 1.9834, "step": 2805 }, { "epoch": 0.851722567916224, "grad_norm": 0.34381431341171265, "learning_rate": 8.584590462691101e-05, "loss": 2.0097, "step": 2806 }, { "epoch": 0.8520261041129155, "grad_norm": 0.3973744809627533, "learning_rate": 8.58408423610408e-05, "loss": 2.008, "step": 2807 }, { "epoch": 0.8523296403096069, "grad_norm": 0.40327900648117065, "learning_rate": 8.583578009517061e-05, "loss": 1.9841, "step": 2808 }, { "epoch": 0.8526331765062983, "grad_norm": 0.3598316013813019, "learning_rate": 8.58307178293004e-05, "loss": 1.9542, "step": 2809 }, { "epoch": 0.8529367127029899, "grad_norm": 0.4372098743915558, "learning_rate": 8.58256555634302e-05, "loss": 1.7054, "step": 2810 }, { "epoch": 0.8532402488996813, "grad_norm": 0.39820167422294617, "learning_rate": 8.582059329756e-05, "loss": 1.6737, "step": 2811 }, { "epoch": 0.8535437850963727, "grad_norm": 0.45620962977409363, "learning_rate": 8.581553103168979e-05, "loss": 1.895, "step": 2812 }, { "epoch": 0.8538473212930642, "grad_norm": 0.4112420678138733, "learning_rate": 8.581046876581958e-05, "loss": 2.0469, "step": 2813 }, { "epoch": 0.8541508574897556, "grad_norm": 0.4265507161617279, "learning_rate": 8.580540649994938e-05, "loss": 1.8141, "step": 2814 }, { "epoch": 0.8544543936864472, "grad_norm": 0.9317876696586609, "learning_rate": 8.580034423407917e-05, "loss": 1.9912, "step": 2815 }, { "epoch": 0.8547579298831386, "grad_norm": 0.41293710470199585, "learning_rate": 8.579528196820897e-05, "loss": 2.0593, "step": 2816 }, { "epoch": 0.85506146607983, "grad_norm": 0.6074060201644897, "learning_rate": 8.579021970233876e-05, "loss": 1.7091, "step": 2817 }, { "epoch": 0.8553650022765215, "grad_norm": 0.39665672183036804, "learning_rate": 8.578515743646857e-05, "loss": 1.5974, "step": 2818 }, { "epoch": 0.8556685384732129, "grad_norm": 0.34235861897468567, "learning_rate": 8.578009517059837e-05, "loss": 1.7635, "step": 2819 }, { "epoch": 0.8559720746699044, "grad_norm": 0.416742742061615, "learning_rate": 8.577503290472816e-05, "loss": 1.6473, "step": 2820 }, { "epoch": 0.8562756108665959, "grad_norm": 0.41152289509773254, "learning_rate": 8.576997063885796e-05, "loss": 1.8142, "step": 2821 }, { "epoch": 0.8565791470632873, "grad_norm": 0.44638922810554504, "learning_rate": 8.576490837298775e-05, "loss": 1.5819, "step": 2822 }, { "epoch": 0.8568826832599787, "grad_norm": 0.38064852356910706, "learning_rate": 8.575984610711755e-05, "loss": 1.4263, "step": 2823 }, { "epoch": 0.8571862194566702, "grad_norm": 0.41755181550979614, "learning_rate": 8.575478384124734e-05, "loss": 1.5668, "step": 2824 }, { "epoch": 0.8574897556533617, "grad_norm": 0.45153340697288513, "learning_rate": 8.574972157537714e-05, "loss": 1.6601, "step": 2825 }, { "epoch": 0.8577932918500532, "grad_norm": 0.3700641989707947, "learning_rate": 8.574465930950693e-05, "loss": 1.3577, "step": 2826 }, { "epoch": 0.8580968280467446, "grad_norm": 0.44846633076667786, "learning_rate": 8.573959704363674e-05, "loss": 2.0195, "step": 2827 }, { "epoch": 0.858400364243436, "grad_norm": 0.4378660023212433, "learning_rate": 8.573453477776653e-05, "loss": 1.7914, "step": 2828 }, { "epoch": 0.8587039004401275, "grad_norm": 0.4284498691558838, "learning_rate": 8.572947251189633e-05, "loss": 1.8443, "step": 2829 }, { "epoch": 0.8590074366368189, "grad_norm": 0.4318150579929352, "learning_rate": 8.572441024602612e-05, "loss": 1.7671, "step": 2830 }, { "epoch": 0.8593109728335104, "grad_norm": 1.0121327638626099, "learning_rate": 8.571934798015592e-05, "loss": 1.9886, "step": 2831 }, { "epoch": 0.8596145090302019, "grad_norm": 0.4319281578063965, "learning_rate": 8.571428571428571e-05, "loss": 1.8299, "step": 2832 }, { "epoch": 0.8599180452268933, "grad_norm": 0.42897358536720276, "learning_rate": 8.570922344841551e-05, "loss": 2.0147, "step": 2833 }, { "epoch": 0.8602215814235847, "grad_norm": 0.39335522055625916, "learning_rate": 8.57041611825453e-05, "loss": 1.5326, "step": 2834 }, { "epoch": 0.8605251176202762, "grad_norm": 1.2661360502243042, "learning_rate": 8.56990989166751e-05, "loss": 2.1234, "step": 2835 }, { "epoch": 0.8608286538169677, "grad_norm": 0.7632877230644226, "learning_rate": 8.56940366508049e-05, "loss": 1.8358, "step": 2836 }, { "epoch": 0.8611321900136591, "grad_norm": 0.3894922733306885, "learning_rate": 8.56889743849347e-05, "loss": 1.9101, "step": 2837 }, { "epoch": 0.8614357262103506, "grad_norm": 0.3832629919052124, "learning_rate": 8.568391211906451e-05, "loss": 1.772, "step": 2838 }, { "epoch": 0.861739262407042, "grad_norm": 0.4298574924468994, "learning_rate": 8.56788498531943e-05, "loss": 1.4439, "step": 2839 }, { "epoch": 0.8620427986037335, "grad_norm": 0.44331902265548706, "learning_rate": 8.56737875873241e-05, "loss": 1.9716, "step": 2840 }, { "epoch": 0.862346334800425, "grad_norm": 0.43073487281799316, "learning_rate": 8.566872532145389e-05, "loss": 2.1116, "step": 2841 }, { "epoch": 0.8626498709971164, "grad_norm": 0.4528077244758606, "learning_rate": 8.566366305558369e-05, "loss": 1.8215, "step": 2842 }, { "epoch": 0.8629534071938079, "grad_norm": 0.43540868163108826, "learning_rate": 8.565860078971348e-05, "loss": 1.9536, "step": 2843 }, { "epoch": 0.8632569433904993, "grad_norm": 0.4424208998680115, "learning_rate": 8.565353852384328e-05, "loss": 2.0443, "step": 2844 }, { "epoch": 0.8635604795871907, "grad_norm": 0.42500391602516174, "learning_rate": 8.564847625797307e-05, "loss": 1.7454, "step": 2845 }, { "epoch": 0.8638640157838823, "grad_norm": 0.5110988020896912, "learning_rate": 8.564341399210287e-05, "loss": 1.9146, "step": 2846 }, { "epoch": 0.8641675519805737, "grad_norm": 0.44191688299179077, "learning_rate": 8.563835172623268e-05, "loss": 2.0113, "step": 2847 }, { "epoch": 0.8644710881772651, "grad_norm": 0.42467302083969116, "learning_rate": 8.563328946036247e-05, "loss": 1.8504, "step": 2848 }, { "epoch": 0.8647746243739566, "grad_norm": 0.48334258794784546, "learning_rate": 8.562822719449226e-05, "loss": 1.9385, "step": 2849 }, { "epoch": 0.865078160570648, "grad_norm": 0.42993229627609253, "learning_rate": 8.562316492862206e-05, "loss": 1.7958, "step": 2850 }, { "epoch": 0.8653816967673396, "grad_norm": 0.391629695892334, "learning_rate": 8.561810266275185e-05, "loss": 1.7413, "step": 2851 }, { "epoch": 0.865685232964031, "grad_norm": 0.46686479449272156, "learning_rate": 8.561304039688165e-05, "loss": 1.9155, "step": 2852 }, { "epoch": 0.8659887691607224, "grad_norm": 0.41826534271240234, "learning_rate": 8.560797813101144e-05, "loss": 1.7625, "step": 2853 }, { "epoch": 0.8662923053574139, "grad_norm": 0.42303943634033203, "learning_rate": 8.560291586514124e-05, "loss": 2.0604, "step": 2854 }, { "epoch": 0.8665958415541053, "grad_norm": 0.42215773463249207, "learning_rate": 8.559785359927103e-05, "loss": 2.0058, "step": 2855 }, { "epoch": 0.8668993777507967, "grad_norm": 0.45129135251045227, "learning_rate": 8.559279133340083e-05, "loss": 1.7881, "step": 2856 }, { "epoch": 0.8672029139474883, "grad_norm": 0.41676831245422363, "learning_rate": 8.558772906753064e-05, "loss": 1.8624, "step": 2857 }, { "epoch": 0.8675064501441797, "grad_norm": 0.4166240990161896, "learning_rate": 8.558266680166043e-05, "loss": 1.8828, "step": 2858 }, { "epoch": 0.8678099863408711, "grad_norm": 0.407652348279953, "learning_rate": 8.557760453579023e-05, "loss": 1.7462, "step": 2859 }, { "epoch": 0.8681135225375626, "grad_norm": 0.42764970660209656, "learning_rate": 8.557254226992002e-05, "loss": 1.607, "step": 2860 }, { "epoch": 0.868417058734254, "grad_norm": 0.4612043797969818, "learning_rate": 8.556748000404982e-05, "loss": 1.8854, "step": 2861 }, { "epoch": 0.8687205949309456, "grad_norm": 0.35503968596458435, "learning_rate": 8.556241773817961e-05, "loss": 1.6402, "step": 2862 }, { "epoch": 0.869024131127637, "grad_norm": 0.3938760459423065, "learning_rate": 8.55573554723094e-05, "loss": 1.7154, "step": 2863 }, { "epoch": 0.8693276673243284, "grad_norm": 0.6211029291152954, "learning_rate": 8.55522932064392e-05, "loss": 1.3675, "step": 2864 }, { "epoch": 0.8696312035210199, "grad_norm": 0.4015885293483734, "learning_rate": 8.5547230940569e-05, "loss": 1.1773, "step": 2865 }, { "epoch": 0.8699347397177113, "grad_norm": 0.39615508913993835, "learning_rate": 8.55421686746988e-05, "loss": 1.816, "step": 2866 }, { "epoch": 0.8702382759144028, "grad_norm": 0.4044322669506073, "learning_rate": 8.55371064088286e-05, "loss": 1.5465, "step": 2867 }, { "epoch": 0.8705418121110943, "grad_norm": 0.410137802362442, "learning_rate": 8.553204414295839e-05, "loss": 1.7645, "step": 2868 }, { "epoch": 0.8708453483077857, "grad_norm": 0.456717312335968, "learning_rate": 8.552698187708819e-05, "loss": 1.9905, "step": 2869 }, { "epoch": 0.8711488845044771, "grad_norm": 0.3661191761493683, "learning_rate": 8.552191961121798e-05, "loss": 2.1578, "step": 2870 }, { "epoch": 0.8714524207011686, "grad_norm": 0.3868817389011383, "learning_rate": 8.551685734534778e-05, "loss": 1.9996, "step": 2871 }, { "epoch": 0.8717559568978601, "grad_norm": 0.35258975625038147, "learning_rate": 8.551179507947757e-05, "loss": 1.5475, "step": 2872 }, { "epoch": 0.8720594930945516, "grad_norm": 0.4110967516899109, "learning_rate": 8.550673281360737e-05, "loss": 1.7818, "step": 2873 }, { "epoch": 0.872363029291243, "grad_norm": 0.39448168873786926, "learning_rate": 8.550167054773716e-05, "loss": 1.8685, "step": 2874 }, { "epoch": 0.8726665654879344, "grad_norm": 0.5225607752799988, "learning_rate": 8.549660828186697e-05, "loss": 2.056, "step": 2875 }, { "epoch": 0.8729701016846259, "grad_norm": 0.4417632818222046, "learning_rate": 8.549154601599676e-05, "loss": 1.8328, "step": 2876 }, { "epoch": 0.8732736378813174, "grad_norm": 0.3205631673336029, "learning_rate": 8.548648375012656e-05, "loss": 1.2795, "step": 2877 }, { "epoch": 0.8735771740780088, "grad_norm": 0.35961270332336426, "learning_rate": 8.548142148425635e-05, "loss": 1.9222, "step": 2878 }, { "epoch": 0.8738807102747003, "grad_norm": 0.4819619059562683, "learning_rate": 8.547635921838615e-05, "loss": 2.1051, "step": 2879 }, { "epoch": 0.8741842464713917, "grad_norm": 0.4361310601234436, "learning_rate": 8.547129695251596e-05, "loss": 1.3526, "step": 2880 }, { "epoch": 0.8744877826680831, "grad_norm": 0.41012874245643616, "learning_rate": 8.546623468664575e-05, "loss": 1.4308, "step": 2881 }, { "epoch": 0.8747913188647746, "grad_norm": 0.4581417441368103, "learning_rate": 8.546117242077555e-05, "loss": 2.0414, "step": 2882 }, { "epoch": 0.8750948550614661, "grad_norm": 0.5409611463546753, "learning_rate": 8.545611015490534e-05, "loss": 1.3438, "step": 2883 }, { "epoch": 0.8753983912581575, "grad_norm": 0.390472412109375, "learning_rate": 8.545104788903514e-05, "loss": 1.6817, "step": 2884 }, { "epoch": 0.875701927454849, "grad_norm": 0.5236276984214783, "learning_rate": 8.544598562316493e-05, "loss": 1.7992, "step": 2885 }, { "epoch": 0.8760054636515404, "grad_norm": 0.43483301997184753, "learning_rate": 8.544092335729474e-05, "loss": 1.8767, "step": 2886 }, { "epoch": 0.8763089998482319, "grad_norm": 0.5605120658874512, "learning_rate": 8.543586109142453e-05, "loss": 1.5323, "step": 2887 }, { "epoch": 0.8766125360449234, "grad_norm": 0.4484270215034485, "learning_rate": 8.543079882555433e-05, "loss": 1.9958, "step": 2888 }, { "epoch": 0.8769160722416148, "grad_norm": 0.40156564116477966, "learning_rate": 8.542573655968412e-05, "loss": 1.8555, "step": 2889 }, { "epoch": 0.8772196084383063, "grad_norm": 0.42205923795700073, "learning_rate": 8.542067429381392e-05, "loss": 1.5127, "step": 2890 }, { "epoch": 0.8775231446349977, "grad_norm": 0.40961888432502747, "learning_rate": 8.541561202794371e-05, "loss": 1.7508, "step": 2891 }, { "epoch": 0.8778266808316891, "grad_norm": 0.4366128742694855, "learning_rate": 8.541054976207351e-05, "loss": 1.9704, "step": 2892 }, { "epoch": 0.8781302170283807, "grad_norm": 0.4367973804473877, "learning_rate": 8.54054874962033e-05, "loss": 1.978, "step": 2893 }, { "epoch": 0.8784337532250721, "grad_norm": 0.4191198945045471, "learning_rate": 8.54004252303331e-05, "loss": 1.93, "step": 2894 }, { "epoch": 0.8787372894217635, "grad_norm": 0.40298399329185486, "learning_rate": 8.539536296446289e-05, "loss": 1.8862, "step": 2895 }, { "epoch": 0.879040825618455, "grad_norm": 0.4513075351715088, "learning_rate": 8.53903006985927e-05, "loss": 1.4634, "step": 2896 }, { "epoch": 0.8793443618151464, "grad_norm": 0.452395498752594, "learning_rate": 8.53852384327225e-05, "loss": 2.0012, "step": 2897 }, { "epoch": 0.879647898011838, "grad_norm": 0.4072858691215515, "learning_rate": 8.538017616685229e-05, "loss": 1.7729, "step": 2898 }, { "epoch": 0.8799514342085294, "grad_norm": 0.42640551924705505, "learning_rate": 8.537511390098209e-05, "loss": 1.4541, "step": 2899 }, { "epoch": 0.8802549704052208, "grad_norm": 0.37970346212387085, "learning_rate": 8.537005163511188e-05, "loss": 1.7628, "step": 2900 }, { "epoch": 0.8805585066019123, "grad_norm": 0.4421388804912567, "learning_rate": 8.536498936924168e-05, "loss": 1.7712, "step": 2901 }, { "epoch": 0.8808620427986037, "grad_norm": 0.42706549167633057, "learning_rate": 8.535992710337147e-05, "loss": 1.1601, "step": 2902 }, { "epoch": 0.8811655789952952, "grad_norm": 0.42218390107154846, "learning_rate": 8.535486483750126e-05, "loss": 2.081, "step": 2903 }, { "epoch": 0.8814691151919867, "grad_norm": 0.4469526410102844, "learning_rate": 8.534980257163106e-05, "loss": 1.9124, "step": 2904 }, { "epoch": 0.8817726513886781, "grad_norm": 0.42796406149864197, "learning_rate": 8.534474030576087e-05, "loss": 1.8129, "step": 2905 }, { "epoch": 0.8820761875853695, "grad_norm": 0.549192488193512, "learning_rate": 8.533967803989066e-05, "loss": 1.7816, "step": 2906 }, { "epoch": 0.882379723782061, "grad_norm": 0.3347112834453583, "learning_rate": 8.533461577402046e-05, "loss": 2.0233, "step": 2907 }, { "epoch": 0.8826832599787525, "grad_norm": 0.4557845890522003, "learning_rate": 8.532955350815025e-05, "loss": 1.8571, "step": 2908 }, { "epoch": 0.882986796175444, "grad_norm": 0.9646681547164917, "learning_rate": 8.532449124228005e-05, "loss": 1.9618, "step": 2909 }, { "epoch": 0.8832903323721354, "grad_norm": 0.43224748969078064, "learning_rate": 8.531942897640984e-05, "loss": 1.9805, "step": 2910 }, { "epoch": 0.8835938685688268, "grad_norm": 0.635966420173645, "learning_rate": 8.531436671053964e-05, "loss": 1.8572, "step": 2911 }, { "epoch": 0.8838974047655183, "grad_norm": 0.46912774443626404, "learning_rate": 8.530930444466943e-05, "loss": 1.9028, "step": 2912 }, { "epoch": 0.8842009409622097, "grad_norm": 0.37521597743034363, "learning_rate": 8.530424217879923e-05, "loss": 1.7295, "step": 2913 }, { "epoch": 0.8845044771589012, "grad_norm": 0.761882483959198, "learning_rate": 8.529917991292903e-05, "loss": 1.8966, "step": 2914 }, { "epoch": 0.8848080133555927, "grad_norm": 0.7302446365356445, "learning_rate": 8.529411764705883e-05, "loss": 1.9641, "step": 2915 }, { "epoch": 0.8851115495522841, "grad_norm": 0.47133728861808777, "learning_rate": 8.528905538118862e-05, "loss": 1.4893, "step": 2916 }, { "epoch": 0.8854150857489755, "grad_norm": 0.4192088842391968, "learning_rate": 8.528399311531842e-05, "loss": 1.9727, "step": 2917 }, { "epoch": 0.885718621945667, "grad_norm": 0.43679505586624146, "learning_rate": 8.527893084944821e-05, "loss": 1.7336, "step": 2918 }, { "epoch": 0.8860221581423585, "grad_norm": 0.4226585328578949, "learning_rate": 8.527386858357801e-05, "loss": 2.0413, "step": 2919 }, { "epoch": 0.88632569433905, "grad_norm": 0.5971319079399109, "learning_rate": 8.52688063177078e-05, "loss": 1.5096, "step": 2920 }, { "epoch": 0.8866292305357414, "grad_norm": 0.5016753673553467, "learning_rate": 8.52637440518376e-05, "loss": 1.9617, "step": 2921 }, { "epoch": 0.8869327667324328, "grad_norm": 0.4263944625854492, "learning_rate": 8.525868178596739e-05, "loss": 1.866, "step": 2922 }, { "epoch": 0.8872363029291243, "grad_norm": 0.4154115915298462, "learning_rate": 8.525361952009719e-05, "loss": 2.0773, "step": 2923 }, { "epoch": 0.8875398391258158, "grad_norm": 0.45908719301223755, "learning_rate": 8.5248557254227e-05, "loss": 1.9902, "step": 2924 }, { "epoch": 0.8878433753225072, "grad_norm": 0.43351608514785767, "learning_rate": 8.52434949883568e-05, "loss": 1.7658, "step": 2925 }, { "epoch": 0.8881469115191987, "grad_norm": 0.4773117005825043, "learning_rate": 8.52384327224866e-05, "loss": 1.8285, "step": 2926 }, { "epoch": 0.8884504477158901, "grad_norm": 0.604767382144928, "learning_rate": 8.52333704566164e-05, "loss": 1.9066, "step": 2927 }, { "epoch": 0.8887539839125815, "grad_norm": 1.4794889688491821, "learning_rate": 8.522830819074619e-05, "loss": 1.6015, "step": 2928 }, { "epoch": 0.8890575201092731, "grad_norm": 0.9518802165985107, "learning_rate": 8.522324592487598e-05, "loss": 2.0818, "step": 2929 }, { "epoch": 0.8893610563059645, "grad_norm": 0.9635084271430969, "learning_rate": 8.521818365900578e-05, "loss": 1.926, "step": 2930 }, { "epoch": 0.889664592502656, "grad_norm": 0.4159846007823944, "learning_rate": 8.521312139313557e-05, "loss": 1.8518, "step": 2931 }, { "epoch": 0.8899681286993474, "grad_norm": 0.42167580127716064, "learning_rate": 8.520805912726537e-05, "loss": 1.9682, "step": 2932 }, { "epoch": 0.8902716648960388, "grad_norm": 0.4509316384792328, "learning_rate": 8.520299686139516e-05, "loss": 1.9891, "step": 2933 }, { "epoch": 0.8905752010927304, "grad_norm": 0.3452865183353424, "learning_rate": 8.519793459552496e-05, "loss": 1.8003, "step": 2934 }, { "epoch": 0.8908787372894218, "grad_norm": 0.38899463415145874, "learning_rate": 8.519287232965477e-05, "loss": 1.8849, "step": 2935 }, { "epoch": 0.8911822734861132, "grad_norm": 0.4010523557662964, "learning_rate": 8.518781006378456e-05, "loss": 1.7905, "step": 2936 }, { "epoch": 0.8914858096828047, "grad_norm": 0.3848381042480469, "learning_rate": 8.518274779791436e-05, "loss": 1.8661, "step": 2937 }, { "epoch": 0.8917893458794961, "grad_norm": 0.41806578636169434, "learning_rate": 8.517768553204415e-05, "loss": 1.9617, "step": 2938 }, { "epoch": 0.8920928820761875, "grad_norm": 0.4648883640766144, "learning_rate": 8.517262326617395e-05, "loss": 1.1984, "step": 2939 }, { "epoch": 0.8923964182728791, "grad_norm": 0.43756723403930664, "learning_rate": 8.516756100030374e-05, "loss": 1.5523, "step": 2940 }, { "epoch": 0.8926999544695705, "grad_norm": 0.393741637468338, "learning_rate": 8.516249873443353e-05, "loss": 1.8876, "step": 2941 }, { "epoch": 0.893003490666262, "grad_norm": 0.41412442922592163, "learning_rate": 8.515743646856333e-05, "loss": 1.4302, "step": 2942 }, { "epoch": 0.8933070268629534, "grad_norm": 0.4743058681488037, "learning_rate": 8.515237420269312e-05, "loss": 1.9108, "step": 2943 }, { "epoch": 0.8936105630596448, "grad_norm": 0.40074145793914795, "learning_rate": 8.514731193682293e-05, "loss": 2.0512, "step": 2944 }, { "epoch": 0.8939140992563364, "grad_norm": 0.39886727929115295, "learning_rate": 8.514224967095273e-05, "loss": 1.8853, "step": 2945 }, { "epoch": 0.8942176354530278, "grad_norm": 0.9438028335571289, "learning_rate": 8.513718740508252e-05, "loss": 2.1271, "step": 2946 }, { "epoch": 0.8945211716497192, "grad_norm": 0.38940876722335815, "learning_rate": 8.513212513921232e-05, "loss": 1.9008, "step": 2947 }, { "epoch": 0.8948247078464107, "grad_norm": 0.3668425381183624, "learning_rate": 8.512706287334211e-05, "loss": 1.8479, "step": 2948 }, { "epoch": 0.8951282440431021, "grad_norm": 0.41969189047813416, "learning_rate": 8.512200060747191e-05, "loss": 2.0179, "step": 2949 }, { "epoch": 0.8954317802397936, "grad_norm": 0.377257376909256, "learning_rate": 8.51169383416017e-05, "loss": 1.8602, "step": 2950 }, { "epoch": 0.8957353164364851, "grad_norm": 0.47926634550094604, "learning_rate": 8.51118760757315e-05, "loss": 1.9323, "step": 2951 }, { "epoch": 0.8960388526331765, "grad_norm": 0.4736182689666748, "learning_rate": 8.510681380986129e-05, "loss": 1.7645, "step": 2952 }, { "epoch": 0.8963423888298679, "grad_norm": 0.45783525705337524, "learning_rate": 8.51017515439911e-05, "loss": 2.0585, "step": 2953 }, { "epoch": 0.8966459250265594, "grad_norm": 0.4085424840450287, "learning_rate": 8.50966892781209e-05, "loss": 1.8516, "step": 2954 }, { "epoch": 0.8969494612232509, "grad_norm": 0.4012138545513153, "learning_rate": 8.509162701225069e-05, "loss": 1.9907, "step": 2955 }, { "epoch": 0.8972529974199424, "grad_norm": 0.4017476737499237, "learning_rate": 8.508656474638048e-05, "loss": 1.9477, "step": 2956 }, { "epoch": 0.8975565336166338, "grad_norm": 0.3720763325691223, "learning_rate": 8.508150248051028e-05, "loss": 1.9121, "step": 2957 }, { "epoch": 0.8978600698133252, "grad_norm": 0.3642348349094391, "learning_rate": 8.507644021464007e-05, "loss": 1.8507, "step": 2958 }, { "epoch": 0.8981636060100167, "grad_norm": 0.46299463510513306, "learning_rate": 8.507137794876987e-05, "loss": 2.0811, "step": 2959 }, { "epoch": 0.8984671422067082, "grad_norm": 0.3806562125682831, "learning_rate": 8.506631568289966e-05, "loss": 1.6783, "step": 2960 }, { "epoch": 0.8987706784033996, "grad_norm": 0.4003051221370697, "learning_rate": 8.506125341702946e-05, "loss": 1.7978, "step": 2961 }, { "epoch": 0.8990742146000911, "grad_norm": 0.42008984088897705, "learning_rate": 8.505619115115925e-05, "loss": 1.9064, "step": 2962 }, { "epoch": 0.8993777507967825, "grad_norm": 0.4423260986804962, "learning_rate": 8.505112888528906e-05, "loss": 1.9743, "step": 2963 }, { "epoch": 0.8996812869934739, "grad_norm": 0.4516521990299225, "learning_rate": 8.504606661941886e-05, "loss": 1.6559, "step": 2964 }, { "epoch": 0.8999848231901654, "grad_norm": 0.4269407093524933, "learning_rate": 8.504100435354865e-05, "loss": 1.677, "step": 2965 }, { "epoch": 0.9002883593868569, "grad_norm": 0.4931739568710327, "learning_rate": 8.503594208767845e-05, "loss": 1.7786, "step": 2966 }, { "epoch": 0.9005918955835484, "grad_norm": 0.4014637768268585, "learning_rate": 8.503087982180824e-05, "loss": 2.0737, "step": 2967 }, { "epoch": 0.9008954317802398, "grad_norm": 0.4077427387237549, "learning_rate": 8.502581755593804e-05, "loss": 1.9301, "step": 2968 }, { "epoch": 0.9011989679769312, "grad_norm": 0.40187394618988037, "learning_rate": 8.502075529006784e-05, "loss": 2.0045, "step": 2969 }, { "epoch": 0.9015025041736227, "grad_norm": 0.6499014496803284, "learning_rate": 8.501569302419764e-05, "loss": 2.066, "step": 2970 }, { "epoch": 0.9018060403703142, "grad_norm": 0.4545782804489136, "learning_rate": 8.501063075832743e-05, "loss": 1.7235, "step": 2971 }, { "epoch": 0.9021095765670056, "grad_norm": 0.4248214066028595, "learning_rate": 8.500556849245723e-05, "loss": 2.0612, "step": 2972 }, { "epoch": 0.9024131127636971, "grad_norm": 0.49832749366760254, "learning_rate": 8.500050622658702e-05, "loss": 1.9235, "step": 2973 }, { "epoch": 0.9027166489603885, "grad_norm": 0.35065793991088867, "learning_rate": 8.499544396071683e-05, "loss": 1.7691, "step": 2974 }, { "epoch": 0.9030201851570799, "grad_norm": 0.40507805347442627, "learning_rate": 8.499038169484663e-05, "loss": 2.055, "step": 2975 }, { "epoch": 0.9033237213537715, "grad_norm": 0.44182345271110535, "learning_rate": 8.498531942897642e-05, "loss": 1.5353, "step": 2976 }, { "epoch": 0.9036272575504629, "grad_norm": 0.4512852430343628, "learning_rate": 8.498025716310622e-05, "loss": 1.9116, "step": 2977 }, { "epoch": 0.9039307937471543, "grad_norm": 0.44310954213142395, "learning_rate": 8.497519489723601e-05, "loss": 1.6875, "step": 2978 }, { "epoch": 0.9042343299438458, "grad_norm": 0.4079609215259552, "learning_rate": 8.49701326313658e-05, "loss": 1.0174, "step": 2979 }, { "epoch": 0.9045378661405372, "grad_norm": 0.3950175940990448, "learning_rate": 8.49650703654956e-05, "loss": 1.933, "step": 2980 }, { "epoch": 0.9048414023372288, "grad_norm": 0.3858761787414551, "learning_rate": 8.49600080996254e-05, "loss": 1.6877, "step": 2981 }, { "epoch": 0.9051449385339202, "grad_norm": 0.41248536109924316, "learning_rate": 8.495494583375519e-05, "loss": 1.1987, "step": 2982 }, { "epoch": 0.9054484747306116, "grad_norm": 0.3943655490875244, "learning_rate": 8.4949883567885e-05, "loss": 1.5532, "step": 2983 }, { "epoch": 0.9057520109273031, "grad_norm": 0.37889233231544495, "learning_rate": 8.494482130201479e-05, "loss": 1.8119, "step": 2984 }, { "epoch": 0.9060555471239945, "grad_norm": 0.3723227381706238, "learning_rate": 8.493975903614459e-05, "loss": 1.8415, "step": 2985 }, { "epoch": 0.906359083320686, "grad_norm": 0.4503065347671509, "learning_rate": 8.493469677027438e-05, "loss": 1.6841, "step": 2986 }, { "epoch": 0.9066626195173775, "grad_norm": 0.41649529337882996, "learning_rate": 8.492963450440418e-05, "loss": 1.9298, "step": 2987 }, { "epoch": 0.9069661557140689, "grad_norm": 0.3602710962295532, "learning_rate": 8.492457223853397e-05, "loss": 2.167, "step": 2988 }, { "epoch": 0.9072696919107603, "grad_norm": 0.39875030517578125, "learning_rate": 8.491950997266377e-05, "loss": 2.1081, "step": 2989 }, { "epoch": 0.9075732281074518, "grad_norm": 0.42908263206481934, "learning_rate": 8.491444770679356e-05, "loss": 1.7717, "step": 2990 }, { "epoch": 0.9078767643041433, "grad_norm": 0.4125417470932007, "learning_rate": 8.490938544092336e-05, "loss": 2.2085, "step": 2991 }, { "epoch": 0.9081803005008348, "grad_norm": 0.4204493463039398, "learning_rate": 8.490432317505316e-05, "loss": 1.7223, "step": 2992 }, { "epoch": 0.9084838366975262, "grad_norm": 0.48912370204925537, "learning_rate": 8.489926090918296e-05, "loss": 1.7614, "step": 2993 }, { "epoch": 0.9087873728942176, "grad_norm": 0.44855475425720215, "learning_rate": 8.489419864331275e-05, "loss": 1.9345, "step": 2994 }, { "epoch": 0.9090909090909091, "grad_norm": 0.35656431317329407, "learning_rate": 8.488913637744255e-05, "loss": 1.3761, "step": 2995 }, { "epoch": 0.9093944452876005, "grad_norm": 0.431643545627594, "learning_rate": 8.488407411157234e-05, "loss": 2.0976, "step": 2996 }, { "epoch": 0.909697981484292, "grad_norm": 0.3734411299228668, "learning_rate": 8.487901184570214e-05, "loss": 1.5943, "step": 2997 }, { "epoch": 0.9100015176809835, "grad_norm": 0.4734323024749756, "learning_rate": 8.487394957983193e-05, "loss": 1.9213, "step": 2998 }, { "epoch": 0.9103050538776749, "grad_norm": 0.39105552434921265, "learning_rate": 8.486888731396173e-05, "loss": 1.7569, "step": 2999 }, { "epoch": 0.9106085900743663, "grad_norm": 0.35980144143104553, "learning_rate": 8.486382504809152e-05, "loss": 1.4274, "step": 3000 }, { "epoch": 0.9109121262710578, "grad_norm": 0.6007620692253113, "learning_rate": 8.485876278222132e-05, "loss": 1.7682, "step": 3001 }, { "epoch": 0.9112156624677493, "grad_norm": 0.493499755859375, "learning_rate": 8.485370051635113e-05, "loss": 2.0509, "step": 3002 }, { "epoch": 0.9115191986644408, "grad_norm": 1.5949187278747559, "learning_rate": 8.484863825048092e-05, "loss": 1.3541, "step": 3003 }, { "epoch": 0.9118227348611322, "grad_norm": 0.4298441410064697, "learning_rate": 8.484357598461072e-05, "loss": 1.9893, "step": 3004 }, { "epoch": 0.9121262710578236, "grad_norm": 0.4736660420894623, "learning_rate": 8.483851371874051e-05, "loss": 2.0483, "step": 3005 }, { "epoch": 0.9124298072545151, "grad_norm": 0.39900222420692444, "learning_rate": 8.48334514528703e-05, "loss": 1.9998, "step": 3006 }, { "epoch": 0.9127333434512066, "grad_norm": 0.43523257970809937, "learning_rate": 8.48283891870001e-05, "loss": 2.0205, "step": 3007 }, { "epoch": 0.913036879647898, "grad_norm": 0.43466445803642273, "learning_rate": 8.48233269211299e-05, "loss": 2.0233, "step": 3008 }, { "epoch": 0.9133404158445895, "grad_norm": 0.4107099175453186, "learning_rate": 8.481826465525969e-05, "loss": 1.8499, "step": 3009 }, { "epoch": 0.9136439520412809, "grad_norm": 0.3902339041233063, "learning_rate": 8.481320238938948e-05, "loss": 1.6908, "step": 3010 }, { "epoch": 0.9139474882379723, "grad_norm": 0.42095887660980225, "learning_rate": 8.480814012351929e-05, "loss": 1.8919, "step": 3011 }, { "epoch": 0.9142510244346639, "grad_norm": 0.4260912537574768, "learning_rate": 8.480307785764909e-05, "loss": 1.8475, "step": 3012 }, { "epoch": 0.9145545606313553, "grad_norm": 0.49090254306793213, "learning_rate": 8.479801559177888e-05, "loss": 1.284, "step": 3013 }, { "epoch": 0.9148580968280468, "grad_norm": 0.5418572425842285, "learning_rate": 8.479295332590869e-05, "loss": 1.4133, "step": 3014 }, { "epoch": 0.9151616330247382, "grad_norm": 0.46666690707206726, "learning_rate": 8.478789106003849e-05, "loss": 1.6623, "step": 3015 }, { "epoch": 0.9154651692214296, "grad_norm": 0.3846968710422516, "learning_rate": 8.478282879416828e-05, "loss": 2.0276, "step": 3016 }, { "epoch": 0.9157687054181212, "grad_norm": 0.3893693685531616, "learning_rate": 8.477776652829807e-05, "loss": 1.77, "step": 3017 }, { "epoch": 0.9160722416148126, "grad_norm": 0.46749499440193176, "learning_rate": 8.477270426242787e-05, "loss": 1.9687, "step": 3018 }, { "epoch": 0.916375777811504, "grad_norm": 0.5093361735343933, "learning_rate": 8.476764199655766e-05, "loss": 1.5275, "step": 3019 }, { "epoch": 0.9166793140081955, "grad_norm": 0.415081650018692, "learning_rate": 8.476257973068746e-05, "loss": 2.1493, "step": 3020 }, { "epoch": 0.9169828502048869, "grad_norm": 0.43510860204696655, "learning_rate": 8.475751746481725e-05, "loss": 2.1311, "step": 3021 }, { "epoch": 0.9172863864015783, "grad_norm": 0.6513949036598206, "learning_rate": 8.475245519894706e-05, "loss": 1.6994, "step": 3022 }, { "epoch": 0.9175899225982699, "grad_norm": 0.3848845362663269, "learning_rate": 8.474739293307686e-05, "loss": 1.9801, "step": 3023 }, { "epoch": 0.9178934587949613, "grad_norm": 0.41188767552375793, "learning_rate": 8.474233066720665e-05, "loss": 1.8483, "step": 3024 }, { "epoch": 0.9181969949916527, "grad_norm": 0.4070264399051666, "learning_rate": 8.473726840133645e-05, "loss": 1.7551, "step": 3025 }, { "epoch": 0.9185005311883442, "grad_norm": 0.3943238854408264, "learning_rate": 8.473220613546624e-05, "loss": 1.9301, "step": 3026 }, { "epoch": 0.9188040673850356, "grad_norm": 0.39565154910087585, "learning_rate": 8.472714386959604e-05, "loss": 1.9999, "step": 3027 }, { "epoch": 0.9191076035817272, "grad_norm": 0.4589279592037201, "learning_rate": 8.472208160372583e-05, "loss": 1.6646, "step": 3028 }, { "epoch": 0.9194111397784186, "grad_norm": 0.41473063826560974, "learning_rate": 8.471701933785563e-05, "loss": 1.611, "step": 3029 }, { "epoch": 0.91971467597511, "grad_norm": 0.42082396149635315, "learning_rate": 8.471195707198542e-05, "loss": 1.923, "step": 3030 }, { "epoch": 0.9200182121718015, "grad_norm": 0.5210884809494019, "learning_rate": 8.470689480611523e-05, "loss": 2.0562, "step": 3031 }, { "epoch": 0.9203217483684929, "grad_norm": 0.462028831243515, "learning_rate": 8.470183254024502e-05, "loss": 2.1234, "step": 3032 }, { "epoch": 0.9206252845651844, "grad_norm": 0.452634334564209, "learning_rate": 8.469677027437482e-05, "loss": 1.8217, "step": 3033 }, { "epoch": 0.9209288207618759, "grad_norm": 0.4052957594394684, "learning_rate": 8.469170800850461e-05, "loss": 1.782, "step": 3034 }, { "epoch": 0.9212323569585673, "grad_norm": 0.4169760048389435, "learning_rate": 8.468664574263441e-05, "loss": 1.9965, "step": 3035 }, { "epoch": 0.9215358931552587, "grad_norm": 0.41070201992988586, "learning_rate": 8.46815834767642e-05, "loss": 1.4293, "step": 3036 }, { "epoch": 0.9218394293519502, "grad_norm": 0.4272339940071106, "learning_rate": 8.4676521210894e-05, "loss": 1.7909, "step": 3037 }, { "epoch": 0.9221429655486417, "grad_norm": 0.3842034935951233, "learning_rate": 8.467145894502379e-05, "loss": 1.8883, "step": 3038 }, { "epoch": 0.9224465017453332, "grad_norm": 0.4226452112197876, "learning_rate": 8.466639667915359e-05, "loss": 1.8218, "step": 3039 }, { "epoch": 0.9227500379420246, "grad_norm": 0.4224850833415985, "learning_rate": 8.466133441328338e-05, "loss": 1.833, "step": 3040 }, { "epoch": 0.923053574138716, "grad_norm": 0.41073185205459595, "learning_rate": 8.465627214741319e-05, "loss": 2.0014, "step": 3041 }, { "epoch": 0.9233571103354075, "grad_norm": 0.44303256273269653, "learning_rate": 8.465120988154299e-05, "loss": 2.058, "step": 3042 }, { "epoch": 0.923660646532099, "grad_norm": 0.4708426594734192, "learning_rate": 8.464614761567278e-05, "loss": 1.7387, "step": 3043 }, { "epoch": 0.9239641827287904, "grad_norm": 0.36340072751045227, "learning_rate": 8.464108534980257e-05, "loss": 1.7583, "step": 3044 }, { "epoch": 0.9242677189254819, "grad_norm": 0.45171916484832764, "learning_rate": 8.463602308393237e-05, "loss": 1.5946, "step": 3045 }, { "epoch": 0.9245712551221733, "grad_norm": 0.39633792638778687, "learning_rate": 8.463096081806216e-05, "loss": 1.9707, "step": 3046 }, { "epoch": 0.9248747913188647, "grad_norm": 0.43228423595428467, "learning_rate": 8.462589855219196e-05, "loss": 1.9343, "step": 3047 }, { "epoch": 0.9251783275155562, "grad_norm": 0.37613019347190857, "learning_rate": 8.462083628632175e-05, "loss": 1.9903, "step": 3048 }, { "epoch": 0.9254818637122477, "grad_norm": 0.4591895341873169, "learning_rate": 8.461577402045155e-05, "loss": 1.6375, "step": 3049 }, { "epoch": 0.9257853999089392, "grad_norm": 0.419057697057724, "learning_rate": 8.461071175458136e-05, "loss": 1.5849, "step": 3050 }, { "epoch": 0.9260889361056306, "grad_norm": 0.4567117393016815, "learning_rate": 8.460564948871115e-05, "loss": 1.7519, "step": 3051 }, { "epoch": 0.926392472302322, "grad_norm": 0.49061158299446106, "learning_rate": 8.460058722284095e-05, "loss": 2.0161, "step": 3052 }, { "epoch": 0.9266960084990135, "grad_norm": 0.4232182502746582, "learning_rate": 8.459552495697074e-05, "loss": 1.8288, "step": 3053 }, { "epoch": 0.926999544695705, "grad_norm": 0.4958473742008209, "learning_rate": 8.459046269110054e-05, "loss": 1.5602, "step": 3054 }, { "epoch": 0.9273030808923964, "grad_norm": 0.4351857602596283, "learning_rate": 8.458540042523033e-05, "loss": 1.7898, "step": 3055 }, { "epoch": 0.9276066170890879, "grad_norm": 0.3431285619735718, "learning_rate": 8.458033815936013e-05, "loss": 1.8609, "step": 3056 }, { "epoch": 0.9279101532857793, "grad_norm": 0.40975016355514526, "learning_rate": 8.457527589348992e-05, "loss": 1.9818, "step": 3057 }, { "epoch": 0.9282136894824707, "grad_norm": 0.42433828115463257, "learning_rate": 8.457021362761973e-05, "loss": 1.4293, "step": 3058 }, { "epoch": 0.9285172256791623, "grad_norm": 0.37647899985313416, "learning_rate": 8.456515136174952e-05, "loss": 1.641, "step": 3059 }, { "epoch": 0.9288207618758537, "grad_norm": 0.39693647623062134, "learning_rate": 8.456008909587932e-05, "loss": 1.6236, "step": 3060 }, { "epoch": 0.9291242980725452, "grad_norm": 0.4057548940181732, "learning_rate": 8.455502683000913e-05, "loss": 1.7059, "step": 3061 }, { "epoch": 0.9294278342692366, "grad_norm": 0.4264809787273407, "learning_rate": 8.454996456413892e-05, "loss": 1.9343, "step": 3062 }, { "epoch": 0.929731370465928, "grad_norm": 0.4178743362426758, "learning_rate": 8.454490229826872e-05, "loss": 1.8086, "step": 3063 }, { "epoch": 0.9300349066626196, "grad_norm": 0.4191538095474243, "learning_rate": 8.453984003239851e-05, "loss": 1.7379, "step": 3064 }, { "epoch": 0.930338442859311, "grad_norm": 0.3979043960571289, "learning_rate": 8.45347777665283e-05, "loss": 1.193, "step": 3065 }, { "epoch": 0.9306419790560024, "grad_norm": 0.49591994285583496, "learning_rate": 8.45297155006581e-05, "loss": 2.0942, "step": 3066 }, { "epoch": 0.9309455152526939, "grad_norm": 0.37868285179138184, "learning_rate": 8.45246532347879e-05, "loss": 1.9563, "step": 3067 }, { "epoch": 0.9312490514493853, "grad_norm": 0.6344965696334839, "learning_rate": 8.451959096891769e-05, "loss": 2.0734, "step": 3068 }, { "epoch": 0.9315525876460768, "grad_norm": 0.3740929067134857, "learning_rate": 8.451452870304749e-05, "loss": 1.507, "step": 3069 }, { "epoch": 0.9318561238427683, "grad_norm": 0.4502614736557007, "learning_rate": 8.45094664371773e-05, "loss": 2.2908, "step": 3070 }, { "epoch": 0.9321596600394597, "grad_norm": 0.4272226393222809, "learning_rate": 8.450440417130709e-05, "loss": 2.0146, "step": 3071 }, { "epoch": 0.9324631962361511, "grad_norm": 0.4830857217311859, "learning_rate": 8.449934190543688e-05, "loss": 1.9573, "step": 3072 }, { "epoch": 0.9327667324328426, "grad_norm": 0.4085777997970581, "learning_rate": 8.449427963956668e-05, "loss": 1.7846, "step": 3073 }, { "epoch": 0.933070268629534, "grad_norm": 0.41114342212677, "learning_rate": 8.448921737369647e-05, "loss": 1.8357, "step": 3074 }, { "epoch": 0.9333738048262256, "grad_norm": 0.4729441702365875, "learning_rate": 8.448415510782627e-05, "loss": 1.7578, "step": 3075 }, { "epoch": 0.933677341022917, "grad_norm": 0.5033143758773804, "learning_rate": 8.447909284195606e-05, "loss": 1.5804, "step": 3076 }, { "epoch": 0.9339808772196084, "grad_norm": 0.39915481209754944, "learning_rate": 8.447403057608586e-05, "loss": 1.9723, "step": 3077 }, { "epoch": 0.9342844134162999, "grad_norm": 0.48224571347236633, "learning_rate": 8.446896831021565e-05, "loss": 2.0484, "step": 3078 }, { "epoch": 0.9345879496129913, "grad_norm": 0.4606854319572449, "learning_rate": 8.446390604434545e-05, "loss": 1.687, "step": 3079 }, { "epoch": 0.9348914858096828, "grad_norm": 0.5270693898200989, "learning_rate": 8.445884377847526e-05, "loss": 1.9201, "step": 3080 }, { "epoch": 0.9351950220063743, "grad_norm": 0.39141446352005005, "learning_rate": 8.445378151260505e-05, "loss": 1.8601, "step": 3081 }, { "epoch": 0.9354985582030657, "grad_norm": 0.3756524920463562, "learning_rate": 8.444871924673484e-05, "loss": 1.5993, "step": 3082 }, { "epoch": 0.9358020943997571, "grad_norm": 1.0641098022460938, "learning_rate": 8.444365698086464e-05, "loss": 1.7314, "step": 3083 }, { "epoch": 0.9361056305964486, "grad_norm": 0.4183383584022522, "learning_rate": 8.443859471499443e-05, "loss": 1.5999, "step": 3084 }, { "epoch": 0.9364091667931401, "grad_norm": 0.43228405714035034, "learning_rate": 8.443353244912423e-05, "loss": 1.7582, "step": 3085 }, { "epoch": 0.9367127029898316, "grad_norm": 0.38039523363113403, "learning_rate": 8.442847018325402e-05, "loss": 1.9152, "step": 3086 }, { "epoch": 0.937016239186523, "grad_norm": 0.3952110707759857, "learning_rate": 8.442340791738382e-05, "loss": 1.4944, "step": 3087 }, { "epoch": 0.9373197753832144, "grad_norm": 0.40946584939956665, "learning_rate": 8.441834565151361e-05, "loss": 1.8466, "step": 3088 }, { "epoch": 0.9376233115799059, "grad_norm": 0.3978438675403595, "learning_rate": 8.441328338564342e-05, "loss": 1.6802, "step": 3089 }, { "epoch": 0.9379268477765974, "grad_norm": 0.4061533808708191, "learning_rate": 8.440822111977322e-05, "loss": 1.744, "step": 3090 }, { "epoch": 0.9382303839732888, "grad_norm": 0.4017331004142761, "learning_rate": 8.440315885390301e-05, "loss": 1.6486, "step": 3091 }, { "epoch": 0.9385339201699803, "grad_norm": 0.4937646687030792, "learning_rate": 8.43980965880328e-05, "loss": 1.9909, "step": 3092 }, { "epoch": 0.9388374563666717, "grad_norm": 0.5023379921913147, "learning_rate": 8.43930343221626e-05, "loss": 1.5965, "step": 3093 }, { "epoch": 0.9391409925633631, "grad_norm": 0.47406286001205444, "learning_rate": 8.43879720562924e-05, "loss": 1.7296, "step": 3094 }, { "epoch": 0.9394445287600547, "grad_norm": 0.4132237136363983, "learning_rate": 8.438290979042219e-05, "loss": 2.0174, "step": 3095 }, { "epoch": 0.9397480649567461, "grad_norm": 0.39927050471305847, "learning_rate": 8.437784752455199e-05, "loss": 1.9771, "step": 3096 }, { "epoch": 0.9400516011534376, "grad_norm": 0.4007806181907654, "learning_rate": 8.437278525868178e-05, "loss": 2.0763, "step": 3097 }, { "epoch": 0.940355137350129, "grad_norm": 0.45542803406715393, "learning_rate": 8.436772299281159e-05, "loss": 1.6179, "step": 3098 }, { "epoch": 0.9406586735468204, "grad_norm": 0.4620111286640167, "learning_rate": 8.436266072694138e-05, "loss": 1.5975, "step": 3099 }, { "epoch": 0.940962209743512, "grad_norm": 0.41753649711608887, "learning_rate": 8.435759846107118e-05, "loss": 1.8776, "step": 3100 }, { "epoch": 0.9412657459402034, "grad_norm": 0.4777916371822357, "learning_rate": 8.435253619520097e-05, "loss": 1.503, "step": 3101 }, { "epoch": 0.9415692821368948, "grad_norm": 0.3927490711212158, "learning_rate": 8.434747392933077e-05, "loss": 2.0409, "step": 3102 }, { "epoch": 0.9418728183335863, "grad_norm": 0.4261821210384369, "learning_rate": 8.434241166346058e-05, "loss": 1.9552, "step": 3103 }, { "epoch": 0.9421763545302777, "grad_norm": 0.4125726521015167, "learning_rate": 8.433734939759037e-05, "loss": 2.0691, "step": 3104 }, { "epoch": 0.9424798907269691, "grad_norm": 0.46787095069885254, "learning_rate": 8.433228713172017e-05, "loss": 2.1579, "step": 3105 }, { "epoch": 0.9427834269236607, "grad_norm": 0.47438356280326843, "learning_rate": 8.432722486584996e-05, "loss": 1.9795, "step": 3106 }, { "epoch": 0.9430869631203521, "grad_norm": 0.39783594012260437, "learning_rate": 8.432216259997976e-05, "loss": 1.8703, "step": 3107 }, { "epoch": 0.9433904993170436, "grad_norm": 0.3802180588245392, "learning_rate": 8.431710033410955e-05, "loss": 1.8415, "step": 3108 }, { "epoch": 0.943694035513735, "grad_norm": 0.3957035541534424, "learning_rate": 8.431203806823936e-05, "loss": 1.8566, "step": 3109 }, { "epoch": 0.9439975717104264, "grad_norm": 0.4278394281864166, "learning_rate": 8.430697580236915e-05, "loss": 1.5011, "step": 3110 }, { "epoch": 0.944301107907118, "grad_norm": 0.4553667902946472, "learning_rate": 8.430191353649895e-05, "loss": 1.6098, "step": 3111 }, { "epoch": 0.9446046441038094, "grad_norm": 0.47047415375709534, "learning_rate": 8.429685127062874e-05, "loss": 1.4019, "step": 3112 }, { "epoch": 0.9449081803005008, "grad_norm": 0.4949280619621277, "learning_rate": 8.429178900475854e-05, "loss": 2.0078, "step": 3113 }, { "epoch": 0.9452117164971923, "grad_norm": 0.43883949518203735, "learning_rate": 8.428672673888833e-05, "loss": 2.1833, "step": 3114 }, { "epoch": 0.9455152526938837, "grad_norm": 0.44522538781166077, "learning_rate": 8.428166447301813e-05, "loss": 1.8481, "step": 3115 }, { "epoch": 0.9458187888905752, "grad_norm": 0.4013282060623169, "learning_rate": 8.427660220714792e-05, "loss": 2.224, "step": 3116 }, { "epoch": 0.9461223250872667, "grad_norm": 0.6805521249771118, "learning_rate": 8.427153994127772e-05, "loss": 1.3343, "step": 3117 }, { "epoch": 0.9464258612839581, "grad_norm": 0.3767577111721039, "learning_rate": 8.426647767540751e-05, "loss": 1.9997, "step": 3118 }, { "epoch": 0.9467293974806495, "grad_norm": 0.43915873765945435, "learning_rate": 8.426141540953732e-05, "loss": 1.7449, "step": 3119 }, { "epoch": 0.947032933677341, "grad_norm": 0.4347434639930725, "learning_rate": 8.425635314366711e-05, "loss": 1.8674, "step": 3120 }, { "epoch": 0.9473364698740325, "grad_norm": 0.39332154393196106, "learning_rate": 8.425129087779691e-05, "loss": 1.7705, "step": 3121 }, { "epoch": 0.947640006070724, "grad_norm": 0.3791426122188568, "learning_rate": 8.42462286119267e-05, "loss": 2.0012, "step": 3122 }, { "epoch": 0.9479435422674154, "grad_norm": 0.4299080967903137, "learning_rate": 8.42411663460565e-05, "loss": 2.0032, "step": 3123 }, { "epoch": 0.9482470784641068, "grad_norm": 0.4709744155406952, "learning_rate": 8.42361040801863e-05, "loss": 1.6847, "step": 3124 }, { "epoch": 0.9485506146607983, "grad_norm": 0.4098724126815796, "learning_rate": 8.423104181431609e-05, "loss": 1.6806, "step": 3125 }, { "epoch": 0.9488541508574898, "grad_norm": 0.42105549573898315, "learning_rate": 8.422597954844588e-05, "loss": 2.0726, "step": 3126 }, { "epoch": 0.9491576870541812, "grad_norm": 0.43502575159072876, "learning_rate": 8.422091728257568e-05, "loss": 2.081, "step": 3127 }, { "epoch": 0.9494612232508727, "grad_norm": 2.2358832359313965, "learning_rate": 8.421585501670549e-05, "loss": 1.7987, "step": 3128 }, { "epoch": 0.9497647594475641, "grad_norm": 0.4136318564414978, "learning_rate": 8.421079275083528e-05, "loss": 1.8534, "step": 3129 }, { "epoch": 0.9500682956442555, "grad_norm": 0.4346698820590973, "learning_rate": 8.420573048496508e-05, "loss": 1.736, "step": 3130 }, { "epoch": 0.950371831840947, "grad_norm": 0.4190434217453003, "learning_rate": 8.420066821909487e-05, "loss": 1.9535, "step": 3131 }, { "epoch": 0.9506753680376385, "grad_norm": 0.41435617208480835, "learning_rate": 8.419560595322467e-05, "loss": 1.7954, "step": 3132 }, { "epoch": 0.95097890423433, "grad_norm": 0.48440879583358765, "learning_rate": 8.419054368735446e-05, "loss": 1.8961, "step": 3133 }, { "epoch": 0.9512824404310214, "grad_norm": 0.43945470452308655, "learning_rate": 8.418548142148426e-05, "loss": 1.6069, "step": 3134 }, { "epoch": 0.9515859766277128, "grad_norm": 0.4803379774093628, "learning_rate": 8.418041915561405e-05, "loss": 1.763, "step": 3135 }, { "epoch": 0.9518895128244043, "grad_norm": 0.6266827583312988, "learning_rate": 8.417535688974385e-05, "loss": 2.0856, "step": 3136 }, { "epoch": 0.9521930490210958, "grad_norm": 0.39886969327926636, "learning_rate": 8.417029462387365e-05, "loss": 2.0025, "step": 3137 }, { "epoch": 0.9524965852177872, "grad_norm": 0.4129003882408142, "learning_rate": 8.416523235800345e-05, "loss": 1.8249, "step": 3138 }, { "epoch": 0.9528001214144787, "grad_norm": 0.501766562461853, "learning_rate": 8.416017009213324e-05, "loss": 1.5152, "step": 3139 }, { "epoch": 0.9531036576111701, "grad_norm": 0.36676540970802307, "learning_rate": 8.415510782626304e-05, "loss": 1.714, "step": 3140 }, { "epoch": 0.9534071938078615, "grad_norm": 0.3785346746444702, "learning_rate": 8.415004556039283e-05, "loss": 1.8275, "step": 3141 }, { "epoch": 0.9537107300045531, "grad_norm": 0.495661199092865, "learning_rate": 8.414498329452263e-05, "loss": 2.2824, "step": 3142 }, { "epoch": 0.9540142662012445, "grad_norm": 0.361187607049942, "learning_rate": 8.413992102865242e-05, "loss": 2.1639, "step": 3143 }, { "epoch": 0.954317802397936, "grad_norm": 0.4101172983646393, "learning_rate": 8.413485876278222e-05, "loss": 1.8902, "step": 3144 }, { "epoch": 0.9546213385946274, "grad_norm": 0.4362664222717285, "learning_rate": 8.412979649691201e-05, "loss": 1.5794, "step": 3145 }, { "epoch": 0.9549248747913188, "grad_norm": 0.3917141258716583, "learning_rate": 8.41247342310418e-05, "loss": 1.9078, "step": 3146 }, { "epoch": 0.9552284109880104, "grad_norm": 0.4394182562828064, "learning_rate": 8.411967196517161e-05, "loss": 1.7582, "step": 3147 }, { "epoch": 0.9555319471847018, "grad_norm": 0.3726690709590912, "learning_rate": 8.411460969930142e-05, "loss": 2.0366, "step": 3148 }, { "epoch": 0.9558354833813932, "grad_norm": 0.40689945220947266, "learning_rate": 8.410954743343122e-05, "loss": 1.9526, "step": 3149 }, { "epoch": 0.9561390195780847, "grad_norm": 0.3834896385669708, "learning_rate": 8.410448516756101e-05, "loss": 1.8866, "step": 3150 }, { "epoch": 0.9564425557747761, "grad_norm": 0.3632187247276306, "learning_rate": 8.409942290169081e-05, "loss": 1.8138, "step": 3151 }, { "epoch": 0.9567460919714676, "grad_norm": 0.42645806074142456, "learning_rate": 8.40943606358206e-05, "loss": 1.8987, "step": 3152 }, { "epoch": 0.9570496281681591, "grad_norm": 0.3693891763687134, "learning_rate": 8.40892983699504e-05, "loss": 1.8998, "step": 3153 }, { "epoch": 0.9573531643648505, "grad_norm": 0.35439208149909973, "learning_rate": 8.408423610408019e-05, "loss": 1.5383, "step": 3154 }, { "epoch": 0.957656700561542, "grad_norm": 0.4941239356994629, "learning_rate": 8.407917383820999e-05, "loss": 1.6191, "step": 3155 }, { "epoch": 0.9579602367582334, "grad_norm": 0.4424782693386078, "learning_rate": 8.407411157233978e-05, "loss": 1.8852, "step": 3156 }, { "epoch": 0.9582637729549248, "grad_norm": 0.42269936203956604, "learning_rate": 8.406904930646958e-05, "loss": 1.9435, "step": 3157 }, { "epoch": 0.9585673091516164, "grad_norm": 0.48583322763442993, "learning_rate": 8.406398704059938e-05, "loss": 2.0429, "step": 3158 }, { "epoch": 0.9588708453483078, "grad_norm": 0.3931976854801178, "learning_rate": 8.405892477472918e-05, "loss": 1.8182, "step": 3159 }, { "epoch": 0.9591743815449992, "grad_norm": 0.4035438001155853, "learning_rate": 8.405386250885897e-05, "loss": 2.0072, "step": 3160 }, { "epoch": 0.9594779177416907, "grad_norm": 0.4093266725540161, "learning_rate": 8.404880024298877e-05, "loss": 1.7673, "step": 3161 }, { "epoch": 0.9597814539383821, "grad_norm": 0.455600380897522, "learning_rate": 8.404373797711856e-05, "loss": 2.065, "step": 3162 }, { "epoch": 0.9600849901350736, "grad_norm": 0.3882180154323578, "learning_rate": 8.403867571124836e-05, "loss": 2.3081, "step": 3163 }, { "epoch": 0.9603885263317651, "grad_norm": 0.4267047941684723, "learning_rate": 8.403361344537815e-05, "loss": 1.1691, "step": 3164 }, { "epoch": 0.9606920625284565, "grad_norm": 0.3406231701374054, "learning_rate": 8.402855117950795e-05, "loss": 1.1152, "step": 3165 }, { "epoch": 0.960995598725148, "grad_norm": 0.34307190775871277, "learning_rate": 8.402348891363774e-05, "loss": 1.4004, "step": 3166 }, { "epoch": 0.9612991349218394, "grad_norm": 0.42091912031173706, "learning_rate": 8.401842664776755e-05, "loss": 1.736, "step": 3167 }, { "epoch": 0.9616026711185309, "grad_norm": 0.7770476341247559, "learning_rate": 8.401336438189735e-05, "loss": 1.7832, "step": 3168 }, { "epoch": 0.9619062073152224, "grad_norm": 0.428165078163147, "learning_rate": 8.400830211602714e-05, "loss": 1.1608, "step": 3169 }, { "epoch": 0.9622097435119138, "grad_norm": 1.1989792585372925, "learning_rate": 8.400323985015694e-05, "loss": 1.8082, "step": 3170 }, { "epoch": 0.9625132797086052, "grad_norm": 0.41019129753112793, "learning_rate": 8.399817758428673e-05, "loss": 1.9886, "step": 3171 }, { "epoch": 0.9628168159052967, "grad_norm": 0.394325315952301, "learning_rate": 8.399311531841653e-05, "loss": 2.0017, "step": 3172 }, { "epoch": 0.9631203521019882, "grad_norm": 0.42622506618499756, "learning_rate": 8.398805305254632e-05, "loss": 1.9955, "step": 3173 }, { "epoch": 0.9634238882986796, "grad_norm": 0.6596471071243286, "learning_rate": 8.398299078667612e-05, "loss": 1.7009, "step": 3174 }, { "epoch": 0.9637274244953711, "grad_norm": 0.4476582407951355, "learning_rate": 8.397792852080591e-05, "loss": 1.514, "step": 3175 }, { "epoch": 0.9640309606920625, "grad_norm": 0.4172927737236023, "learning_rate": 8.397286625493572e-05, "loss": 1.8323, "step": 3176 }, { "epoch": 0.964334496888754, "grad_norm": 0.46736040711402893, "learning_rate": 8.396780398906551e-05, "loss": 1.6481, "step": 3177 }, { "epoch": 0.9646380330854455, "grad_norm": 0.39226916432380676, "learning_rate": 8.396274172319531e-05, "loss": 2.1195, "step": 3178 }, { "epoch": 0.9649415692821369, "grad_norm": 0.44442611932754517, "learning_rate": 8.39576794573251e-05, "loss": 1.9057, "step": 3179 }, { "epoch": 0.9652451054788284, "grad_norm": 0.46118879318237305, "learning_rate": 8.39526171914549e-05, "loss": 1.7676, "step": 3180 }, { "epoch": 0.9655486416755198, "grad_norm": 0.38712286949157715, "learning_rate": 8.394755492558469e-05, "loss": 1.9661, "step": 3181 }, { "epoch": 0.9658521778722112, "grad_norm": 0.3752710223197937, "learning_rate": 8.394249265971449e-05, "loss": 2.014, "step": 3182 }, { "epoch": 0.9661557140689028, "grad_norm": 0.4489542245864868, "learning_rate": 8.393743039384428e-05, "loss": 1.374, "step": 3183 }, { "epoch": 0.9664592502655942, "grad_norm": 0.7780880928039551, "learning_rate": 8.393236812797408e-05, "loss": 1.7041, "step": 3184 }, { "epoch": 0.9667627864622856, "grad_norm": 0.3980183005332947, "learning_rate": 8.392730586210387e-05, "loss": 2.193, "step": 3185 }, { "epoch": 0.9670663226589771, "grad_norm": 0.6425886750221252, "learning_rate": 8.392224359623368e-05, "loss": 2.0175, "step": 3186 }, { "epoch": 0.9673698588556685, "grad_norm": 0.5367327928543091, "learning_rate": 8.391718133036347e-05, "loss": 1.6297, "step": 3187 }, { "epoch": 0.9676733950523599, "grad_norm": 0.4846515357494354, "learning_rate": 8.391211906449327e-05, "loss": 2.1914, "step": 3188 }, { "epoch": 0.9679769312490515, "grad_norm": 0.4136618971824646, "learning_rate": 8.390705679862306e-05, "loss": 1.7868, "step": 3189 }, { "epoch": 0.9682804674457429, "grad_norm": 0.49537962675094604, "learning_rate": 8.390199453275286e-05, "loss": 1.5581, "step": 3190 }, { "epoch": 0.9685840036424344, "grad_norm": 0.40194493532180786, "learning_rate": 8.389693226688265e-05, "loss": 1.8225, "step": 3191 }, { "epoch": 0.9688875398391258, "grad_norm": 0.4222577214241028, "learning_rate": 8.389187000101246e-05, "loss": 2.1477, "step": 3192 }, { "epoch": 0.9691910760358172, "grad_norm": 0.44375449419021606, "learning_rate": 8.388680773514226e-05, "loss": 1.8249, "step": 3193 }, { "epoch": 0.9694946122325088, "grad_norm": 0.47520217299461365, "learning_rate": 8.388174546927205e-05, "loss": 1.6632, "step": 3194 }, { "epoch": 0.9697981484292002, "grad_norm": 0.4534616470336914, "learning_rate": 8.387668320340185e-05, "loss": 1.6831, "step": 3195 }, { "epoch": 0.9701016846258916, "grad_norm": 0.38130199909210205, "learning_rate": 8.387162093753164e-05, "loss": 1.4674, "step": 3196 }, { "epoch": 0.9704052208225831, "grad_norm": 0.4485642611980438, "learning_rate": 8.386655867166145e-05, "loss": 1.6484, "step": 3197 }, { "epoch": 0.9707087570192745, "grad_norm": 0.4354068636894226, "learning_rate": 8.386149640579124e-05, "loss": 2.1642, "step": 3198 }, { "epoch": 0.971012293215966, "grad_norm": 0.635586678981781, "learning_rate": 8.385643413992104e-05, "loss": 2.143, "step": 3199 }, { "epoch": 0.9713158294126575, "grad_norm": 0.411639004945755, "learning_rate": 8.385137187405083e-05, "loss": 1.6845, "step": 3200 }, { "epoch": 0.9716193656093489, "grad_norm": 0.8328726887702942, "learning_rate": 8.384630960818063e-05, "loss": 1.9091, "step": 3201 }, { "epoch": 0.9719229018060404, "grad_norm": 0.3916926383972168, "learning_rate": 8.384124734231042e-05, "loss": 1.921, "step": 3202 }, { "epoch": 0.9722264380027318, "grad_norm": 0.5521138906478882, "learning_rate": 8.383618507644022e-05, "loss": 1.4916, "step": 3203 }, { "epoch": 0.9725299741994233, "grad_norm": 0.4344598948955536, "learning_rate": 8.383112281057001e-05, "loss": 1.8373, "step": 3204 }, { "epoch": 0.9728335103961148, "grad_norm": 0.5206196904182434, "learning_rate": 8.382606054469981e-05, "loss": 1.02, "step": 3205 }, { "epoch": 0.9731370465928062, "grad_norm": 0.4448585510253906, "learning_rate": 8.382099827882962e-05, "loss": 1.9117, "step": 3206 }, { "epoch": 0.9734405827894976, "grad_norm": 0.39624467492103577, "learning_rate": 8.381593601295941e-05, "loss": 1.8143, "step": 3207 }, { "epoch": 0.9737441189861891, "grad_norm": 0.43617355823516846, "learning_rate": 8.38108737470892e-05, "loss": 2.0802, "step": 3208 }, { "epoch": 0.9740476551828806, "grad_norm": 0.33979347348213196, "learning_rate": 8.3805811481219e-05, "loss": 1.801, "step": 3209 }, { "epoch": 0.974351191379572, "grad_norm": 0.4414675831794739, "learning_rate": 8.38007492153488e-05, "loss": 1.7201, "step": 3210 }, { "epoch": 0.9746547275762635, "grad_norm": 0.4153429865837097, "learning_rate": 8.379568694947859e-05, "loss": 1.9318, "step": 3211 }, { "epoch": 0.9749582637729549, "grad_norm": 0.5069173574447632, "learning_rate": 8.379062468360839e-05, "loss": 1.6647, "step": 3212 }, { "epoch": 0.9752617999696463, "grad_norm": 0.4137086868286133, "learning_rate": 8.378556241773818e-05, "loss": 1.7239, "step": 3213 }, { "epoch": 0.9755653361663378, "grad_norm": 0.4533393383026123, "learning_rate": 8.378050015186797e-05, "loss": 1.5102, "step": 3214 }, { "epoch": 0.9758688723630293, "grad_norm": 0.5293918251991272, "learning_rate": 8.377543788599778e-05, "loss": 1.7053, "step": 3215 }, { "epoch": 0.9761724085597208, "grad_norm": 0.43977779150009155, "learning_rate": 8.377037562012758e-05, "loss": 1.7897, "step": 3216 }, { "epoch": 0.9764759447564122, "grad_norm": 0.32830044627189636, "learning_rate": 8.376531335425737e-05, "loss": 1.4395, "step": 3217 }, { "epoch": 0.9767794809531036, "grad_norm": 0.4043295085430145, "learning_rate": 8.376025108838717e-05, "loss": 1.8202, "step": 3218 }, { "epoch": 0.9770830171497951, "grad_norm": 0.392102986574173, "learning_rate": 8.375518882251696e-05, "loss": 1.8039, "step": 3219 }, { "epoch": 0.9773865533464866, "grad_norm": 0.3810761868953705, "learning_rate": 8.375012655664676e-05, "loss": 1.7554, "step": 3220 }, { "epoch": 0.977690089543178, "grad_norm": 0.43835926055908203, "learning_rate": 8.374506429077655e-05, "loss": 1.3123, "step": 3221 }, { "epoch": 0.9779936257398695, "grad_norm": 0.40104183554649353, "learning_rate": 8.374000202490635e-05, "loss": 1.9954, "step": 3222 }, { "epoch": 0.9782971619365609, "grad_norm": 0.423921138048172, "learning_rate": 8.373493975903614e-05, "loss": 1.8166, "step": 3223 }, { "epoch": 0.9786006981332523, "grad_norm": 0.3636658191680908, "learning_rate": 8.372987749316594e-05, "loss": 1.7822, "step": 3224 }, { "epoch": 0.9789042343299439, "grad_norm": 0.5033218860626221, "learning_rate": 8.372481522729574e-05, "loss": 1.9422, "step": 3225 }, { "epoch": 0.9792077705266353, "grad_norm": 0.43753013014793396, "learning_rate": 8.371975296142554e-05, "loss": 1.8121, "step": 3226 }, { "epoch": 0.9795113067233268, "grad_norm": 0.40286412835121155, "learning_rate": 8.371469069555533e-05, "loss": 1.3284, "step": 3227 }, { "epoch": 0.9798148429200182, "grad_norm": 0.4499688148498535, "learning_rate": 8.370962842968513e-05, "loss": 2.001, "step": 3228 }, { "epoch": 0.9801183791167096, "grad_norm": 0.4191727042198181, "learning_rate": 8.370456616381492e-05, "loss": 1.822, "step": 3229 }, { "epoch": 0.9804219153134012, "grad_norm": 0.5225554704666138, "learning_rate": 8.369950389794472e-05, "loss": 1.7851, "step": 3230 }, { "epoch": 0.9807254515100926, "grad_norm": 0.48582664132118225, "learning_rate": 8.369444163207451e-05, "loss": 1.7945, "step": 3231 }, { "epoch": 0.981028987706784, "grad_norm": 0.39768776297569275, "learning_rate": 8.368937936620431e-05, "loss": 1.4223, "step": 3232 }, { "epoch": 0.9813325239034755, "grad_norm": 0.38653451204299927, "learning_rate": 8.36843171003341e-05, "loss": 1.913, "step": 3233 }, { "epoch": 0.9816360601001669, "grad_norm": 0.42827606201171875, "learning_rate": 8.367925483446391e-05, "loss": 1.8324, "step": 3234 }, { "epoch": 0.9819395962968585, "grad_norm": 0.5108979344367981, "learning_rate": 8.36741925685937e-05, "loss": 1.8669, "step": 3235 }, { "epoch": 0.9822431324935499, "grad_norm": 0.49551811814308167, "learning_rate": 8.366913030272351e-05, "loss": 1.8174, "step": 3236 }, { "epoch": 0.9825466686902413, "grad_norm": 0.3723476529121399, "learning_rate": 8.366406803685331e-05, "loss": 2.2496, "step": 3237 }, { "epoch": 0.9828502048869328, "grad_norm": 0.4076946973800659, "learning_rate": 8.36590057709831e-05, "loss": 2.0584, "step": 3238 }, { "epoch": 0.9831537410836242, "grad_norm": 0.3642696440219879, "learning_rate": 8.36539435051129e-05, "loss": 1.9793, "step": 3239 }, { "epoch": 0.9834572772803156, "grad_norm": 0.3629249632358551, "learning_rate": 8.36488812392427e-05, "loss": 1.3264, "step": 3240 }, { "epoch": 0.9837608134770072, "grad_norm": 0.3478296399116516, "learning_rate": 8.364381897337249e-05, "loss": 1.7251, "step": 3241 }, { "epoch": 0.9840643496736986, "grad_norm": 0.3987084627151489, "learning_rate": 8.363875670750228e-05, "loss": 1.8442, "step": 3242 }, { "epoch": 0.98436788587039, "grad_norm": 0.4380822777748108, "learning_rate": 8.363369444163208e-05, "loss": 1.9684, "step": 3243 }, { "epoch": 0.9846714220670815, "grad_norm": 0.3827231824398041, "learning_rate": 8.362863217576187e-05, "loss": 1.3639, "step": 3244 }, { "epoch": 0.9849749582637729, "grad_norm": 0.39212536811828613, "learning_rate": 8.362356990989168e-05, "loss": 1.8493, "step": 3245 }, { "epoch": 0.9852784944604644, "grad_norm": 0.40932169556617737, "learning_rate": 8.361850764402148e-05, "loss": 1.9505, "step": 3246 }, { "epoch": 0.9855820306571559, "grad_norm": 0.40743735432624817, "learning_rate": 8.361344537815127e-05, "loss": 1.6057, "step": 3247 }, { "epoch": 0.9858855668538473, "grad_norm": 0.44995588064193726, "learning_rate": 8.360838311228107e-05, "loss": 2.0585, "step": 3248 }, { "epoch": 0.9861891030505388, "grad_norm": 0.40385690331459045, "learning_rate": 8.360332084641086e-05, "loss": 1.9227, "step": 3249 }, { "epoch": 0.9864926392472302, "grad_norm": 0.3181687891483307, "learning_rate": 8.359825858054065e-05, "loss": 1.8171, "step": 3250 }, { "epoch": 0.9867961754439217, "grad_norm": 0.5163337588310242, "learning_rate": 8.359319631467045e-05, "loss": 2.0483, "step": 3251 }, { "epoch": 0.9870997116406132, "grad_norm": 0.3739945888519287, "learning_rate": 8.358813404880024e-05, "loss": 1.8311, "step": 3252 }, { "epoch": 0.9874032478373046, "grad_norm": 0.38173356652259827, "learning_rate": 8.358307178293004e-05, "loss": 2.2227, "step": 3253 }, { "epoch": 0.987706784033996, "grad_norm": 0.5024820566177368, "learning_rate": 8.357800951705985e-05, "loss": 1.4709, "step": 3254 }, { "epoch": 0.9880103202306875, "grad_norm": 0.467106431722641, "learning_rate": 8.357294725118964e-05, "loss": 1.9105, "step": 3255 }, { "epoch": 0.988313856427379, "grad_norm": 0.34657251834869385, "learning_rate": 8.356788498531944e-05, "loss": 1.7206, "step": 3256 }, { "epoch": 0.9886173926240704, "grad_norm": 0.44770774245262146, "learning_rate": 8.356282271944923e-05, "loss": 1.8675, "step": 3257 }, { "epoch": 0.9889209288207619, "grad_norm": 0.45685702562332153, "learning_rate": 8.355776045357903e-05, "loss": 2.0606, "step": 3258 }, { "epoch": 0.9892244650174533, "grad_norm": 0.464417964220047, "learning_rate": 8.355269818770882e-05, "loss": 1.7514, "step": 3259 }, { "epoch": 0.9895280012141447, "grad_norm": 0.3830156624317169, "learning_rate": 8.354763592183862e-05, "loss": 1.9846, "step": 3260 }, { "epoch": 0.9898315374108363, "grad_norm": 0.9585968255996704, "learning_rate": 8.354257365596841e-05, "loss": 1.5536, "step": 3261 }, { "epoch": 0.9901350736075277, "grad_norm": 0.5059092044830322, "learning_rate": 8.35375113900982e-05, "loss": 1.6083, "step": 3262 }, { "epoch": 0.9904386098042192, "grad_norm": 0.4165644943714142, "learning_rate": 8.3532449124228e-05, "loss": 1.8939, "step": 3263 }, { "epoch": 0.9907421460009106, "grad_norm": 0.4935527443885803, "learning_rate": 8.352738685835781e-05, "loss": 1.4373, "step": 3264 }, { "epoch": 0.991045682197602, "grad_norm": 0.37208595871925354, "learning_rate": 8.35223245924876e-05, "loss": 1.9555, "step": 3265 }, { "epoch": 0.9913492183942936, "grad_norm": 0.4575968384742737, "learning_rate": 8.35172623266174e-05, "loss": 1.5079, "step": 3266 }, { "epoch": 0.991652754590985, "grad_norm": 0.7772040963172913, "learning_rate": 8.35122000607472e-05, "loss": 1.5094, "step": 3267 }, { "epoch": 0.9919562907876764, "grad_norm": 0.3907686173915863, "learning_rate": 8.350713779487699e-05, "loss": 1.8795, "step": 3268 }, { "epoch": 0.9922598269843679, "grad_norm": 0.40200579166412354, "learning_rate": 8.350207552900678e-05, "loss": 1.9728, "step": 3269 }, { "epoch": 0.9925633631810593, "grad_norm": 0.8831415772438049, "learning_rate": 8.349701326313658e-05, "loss": 1.9289, "step": 3270 }, { "epoch": 0.9928668993777507, "grad_norm": 0.4205533266067505, "learning_rate": 8.349195099726637e-05, "loss": 1.6273, "step": 3271 }, { "epoch": 0.9931704355744423, "grad_norm": 0.3926026225090027, "learning_rate": 8.348688873139617e-05, "loss": 1.9261, "step": 3272 }, { "epoch": 0.9934739717711337, "grad_norm": 0.4113319516181946, "learning_rate": 8.348182646552598e-05, "loss": 1.8568, "step": 3273 }, { "epoch": 0.9937775079678252, "grad_norm": 0.4487648904323578, "learning_rate": 8.347676419965577e-05, "loss": 1.5191, "step": 3274 }, { "epoch": 0.9940810441645166, "grad_norm": 0.5642136335372925, "learning_rate": 8.347170193378557e-05, "loss": 1.5365, "step": 3275 }, { "epoch": 0.994384580361208, "grad_norm": 0.3658483624458313, "learning_rate": 8.346663966791536e-05, "loss": 2.0491, "step": 3276 }, { "epoch": 0.9946881165578996, "grad_norm": 0.397892564535141, "learning_rate": 8.346157740204516e-05, "loss": 2.0732, "step": 3277 }, { "epoch": 0.994991652754591, "grad_norm": 0.39073920249938965, "learning_rate": 8.345651513617495e-05, "loss": 1.4625, "step": 3278 }, { "epoch": 0.9952951889512824, "grad_norm": 0.46809303760528564, "learning_rate": 8.345145287030474e-05, "loss": 1.6965, "step": 3279 }, { "epoch": 0.9955987251479739, "grad_norm": 0.5772141814231873, "learning_rate": 8.344639060443454e-05, "loss": 2.0566, "step": 3280 }, { "epoch": 0.9959022613446653, "grad_norm": 0.43104979395866394, "learning_rate": 8.344132833856435e-05, "loss": 1.8229, "step": 3281 }, { "epoch": 0.9962057975413569, "grad_norm": 0.4449026882648468, "learning_rate": 8.343626607269414e-05, "loss": 2.1477, "step": 3282 }, { "epoch": 0.9965093337380483, "grad_norm": 0.5293501019477844, "learning_rate": 8.343120380682394e-05, "loss": 1.6738, "step": 3283 }, { "epoch": 0.9968128699347397, "grad_norm": 0.33359965682029724, "learning_rate": 8.342614154095375e-05, "loss": 1.0495, "step": 3284 }, { "epoch": 0.9971164061314312, "grad_norm": 0.3978114128112793, "learning_rate": 8.342107927508354e-05, "loss": 1.9208, "step": 3285 }, { "epoch": 0.9974199423281226, "grad_norm": 0.8589026927947998, "learning_rate": 8.341601700921334e-05, "loss": 2.0542, "step": 3286 }, { "epoch": 0.9977234785248141, "grad_norm": 0.44943809509277344, "learning_rate": 8.341095474334313e-05, "loss": 1.5192, "step": 3287 }, { "epoch": 0.9980270147215056, "grad_norm": 0.4497203826904297, "learning_rate": 8.340589247747292e-05, "loss": 1.9164, "step": 3288 }, { "epoch": 0.998330550918197, "grad_norm": 0.4665476977825165, "learning_rate": 8.340083021160272e-05, "loss": 1.7622, "step": 3289 }, { "epoch": 0.9986340871148884, "grad_norm": 0.43909046053886414, "learning_rate": 8.339576794573251e-05, "loss": 1.9663, "step": 3290 }, { "epoch": 0.9989376233115799, "grad_norm": 0.401471883058548, "learning_rate": 8.339070567986231e-05, "loss": 1.9976, "step": 3291 }, { "epoch": 0.9992411595082714, "grad_norm": 0.41323089599609375, "learning_rate": 8.33856434139921e-05, "loss": 1.9335, "step": 3292 }, { "epoch": 0.9995446957049628, "grad_norm": 0.5027084350585938, "learning_rate": 8.33805811481219e-05, "loss": 1.9357, "step": 3293 }, { "epoch": 0.9998482319016543, "grad_norm": 0.4065913259983063, "learning_rate": 8.337551888225171e-05, "loss": 1.7295, "step": 3294 }, { "epoch": 1.0001517680983458, "grad_norm": 0.9244177341461182, "learning_rate": 8.33704566163815e-05, "loss": 1.8995, "step": 3295 }, { "epoch": 1.0004553042950373, "grad_norm": 0.4463343024253845, "learning_rate": 8.33653943505113e-05, "loss": 1.4706, "step": 3296 }, { "epoch": 1.0007588404917287, "grad_norm": 0.38846856355667114, "learning_rate": 8.336033208464109e-05, "loss": 1.5425, "step": 3297 }, { "epoch": 1.0010623766884201, "grad_norm": 0.4786315858364105, "learning_rate": 8.335526981877089e-05, "loss": 1.4659, "step": 3298 }, { "epoch": 1.0013659128851116, "grad_norm": 0.42400819063186646, "learning_rate": 8.335020755290068e-05, "loss": 1.8604, "step": 3299 }, { "epoch": 1.001669449081803, "grad_norm": 0.5442892909049988, "learning_rate": 8.334514528703048e-05, "loss": 1.1405, "step": 3300 }, { "epoch": 1.0019729852784944, "grad_norm": 0.43166840076446533, "learning_rate": 8.334008302116027e-05, "loss": 1.6397, "step": 3301 }, { "epoch": 1.0022765214751859, "grad_norm": 0.5023279786109924, "learning_rate": 8.333502075529007e-05, "loss": 1.8333, "step": 3302 }, { "epoch": 1.0025800576718773, "grad_norm": 0.9390707015991211, "learning_rate": 8.332995848941987e-05, "loss": 1.0042, "step": 3303 }, { "epoch": 1.0028835938685687, "grad_norm": 0.4091005027294159, "learning_rate": 8.332489622354967e-05, "loss": 1.6559, "step": 3304 }, { "epoch": 1.0031871300652604, "grad_norm": 1.1399965286254883, "learning_rate": 8.331983395767946e-05, "loss": 1.2274, "step": 3305 }, { "epoch": 1.0034906662619518, "grad_norm": 0.4626876711845398, "learning_rate": 8.331477169180926e-05, "loss": 1.7368, "step": 3306 }, { "epoch": 1.0037942024586433, "grad_norm": 0.6136215329170227, "learning_rate": 8.330970942593905e-05, "loss": 1.442, "step": 3307 }, { "epoch": 1.0040977386553347, "grad_norm": 0.7071730494499207, "learning_rate": 8.330464716006885e-05, "loss": 1.2879, "step": 3308 }, { "epoch": 1.0044012748520261, "grad_norm": 0.7924762964248657, "learning_rate": 8.329958489419864e-05, "loss": 0.8643, "step": 3309 }, { "epoch": 1.0047048110487176, "grad_norm": 0.42483755946159363, "learning_rate": 8.329452262832844e-05, "loss": 1.2481, "step": 3310 }, { "epoch": 1.005008347245409, "grad_norm": 0.472868412733078, "learning_rate": 8.328946036245823e-05, "loss": 1.5925, "step": 3311 }, { "epoch": 1.0053118834421004, "grad_norm": 0.43884944915771484, "learning_rate": 8.328439809658804e-05, "loss": 1.6212, "step": 3312 }, { "epoch": 1.0056154196387919, "grad_norm": 0.8129292130470276, "learning_rate": 8.327933583071784e-05, "loss": 1.1515, "step": 3313 }, { "epoch": 1.0059189558354833, "grad_norm": 0.5763627886772156, "learning_rate": 8.327427356484763e-05, "loss": 1.8529, "step": 3314 }, { "epoch": 1.0062224920321747, "grad_norm": 0.7095117568969727, "learning_rate": 8.326921129897743e-05, "loss": 1.5395, "step": 3315 }, { "epoch": 1.0065260282288664, "grad_norm": 0.4236385226249695, "learning_rate": 8.326414903310722e-05, "loss": 1.8515, "step": 3316 }, { "epoch": 1.0068295644255578, "grad_norm": 0.49490535259246826, "learning_rate": 8.325908676723701e-05, "loss": 1.3917, "step": 3317 }, { "epoch": 1.0071331006222493, "grad_norm": 0.4824042022228241, "learning_rate": 8.325402450136681e-05, "loss": 1.4685, "step": 3318 }, { "epoch": 1.0074366368189407, "grad_norm": 0.3797103464603424, "learning_rate": 8.32489622354966e-05, "loss": 1.2106, "step": 3319 }, { "epoch": 1.0077401730156321, "grad_norm": 0.3965649902820587, "learning_rate": 8.32438999696264e-05, "loss": 1.9664, "step": 3320 }, { "epoch": 1.0080437092123236, "grad_norm": 0.45846912264823914, "learning_rate": 8.323883770375621e-05, "loss": 1.6475, "step": 3321 }, { "epoch": 1.008347245409015, "grad_norm": 0.4603506326675415, "learning_rate": 8.3233775437886e-05, "loss": 1.6987, "step": 3322 }, { "epoch": 1.0086507816057064, "grad_norm": 0.4500599801540375, "learning_rate": 8.32287131720158e-05, "loss": 1.5708, "step": 3323 }, { "epoch": 1.0089543178023979, "grad_norm": 0.3444702923297882, "learning_rate": 8.322365090614559e-05, "loss": 1.0653, "step": 3324 }, { "epoch": 1.0092578539990893, "grad_norm": 0.6663349270820618, "learning_rate": 8.32185886402754e-05, "loss": 1.6385, "step": 3325 }, { "epoch": 1.009561390195781, "grad_norm": 0.41944435238838196, "learning_rate": 8.32135263744052e-05, "loss": 1.6023, "step": 3326 }, { "epoch": 1.0098649263924724, "grad_norm": 0.45441389083862305, "learning_rate": 8.320846410853499e-05, "loss": 1.4627, "step": 3327 }, { "epoch": 1.0101684625891638, "grad_norm": 0.4097443222999573, "learning_rate": 8.320340184266478e-05, "loss": 1.7927, "step": 3328 }, { "epoch": 1.0104719987858553, "grad_norm": 0.4613991677761078, "learning_rate": 8.319833957679458e-05, "loss": 1.8251, "step": 3329 }, { "epoch": 1.0107755349825467, "grad_norm": 0.43065598607063293, "learning_rate": 8.319327731092437e-05, "loss": 1.5799, "step": 3330 }, { "epoch": 1.0110790711792381, "grad_norm": 0.44765231013298035, "learning_rate": 8.318821504505417e-05, "loss": 1.2663, "step": 3331 }, { "epoch": 1.0113826073759296, "grad_norm": 0.4792670011520386, "learning_rate": 8.318315277918396e-05, "loss": 1.6675, "step": 3332 }, { "epoch": 1.011686143572621, "grad_norm": 0.46904540061950684, "learning_rate": 8.317809051331377e-05, "loss": 1.6204, "step": 3333 }, { "epoch": 1.0119896797693124, "grad_norm": 0.43783873319625854, "learning_rate": 8.317302824744357e-05, "loss": 1.8069, "step": 3334 }, { "epoch": 1.0122932159660039, "grad_norm": 0.4738599359989166, "learning_rate": 8.316796598157336e-05, "loss": 1.8303, "step": 3335 }, { "epoch": 1.0125967521626955, "grad_norm": 0.46483105421066284, "learning_rate": 8.316290371570316e-05, "loss": 1.3623, "step": 3336 }, { "epoch": 1.012900288359387, "grad_norm": 0.3990911543369293, "learning_rate": 8.315784144983295e-05, "loss": 1.4985, "step": 3337 }, { "epoch": 1.0132038245560784, "grad_norm": 0.887096107006073, "learning_rate": 8.315277918396275e-05, "loss": 1.422, "step": 3338 }, { "epoch": 1.0135073607527698, "grad_norm": 0.4891083836555481, "learning_rate": 8.314771691809254e-05, "loss": 1.7041, "step": 3339 }, { "epoch": 1.0138108969494612, "grad_norm": 0.5917662382125854, "learning_rate": 8.314265465222234e-05, "loss": 1.519, "step": 3340 }, { "epoch": 1.0141144331461527, "grad_norm": 0.4926755726337433, "learning_rate": 8.313759238635213e-05, "loss": 1.4773, "step": 3341 }, { "epoch": 1.0144179693428441, "grad_norm": 0.42069876194000244, "learning_rate": 8.313253012048194e-05, "loss": 1.39, "step": 3342 }, { "epoch": 1.0147215055395356, "grad_norm": 0.5121155977249146, "learning_rate": 8.312746785461173e-05, "loss": 1.4662, "step": 3343 }, { "epoch": 1.015025041736227, "grad_norm": 0.4447721838951111, "learning_rate": 8.312240558874153e-05, "loss": 1.5363, "step": 3344 }, { "epoch": 1.0153285779329184, "grad_norm": 0.5080471634864807, "learning_rate": 8.311734332287132e-05, "loss": 1.6594, "step": 3345 }, { "epoch": 1.0156321141296099, "grad_norm": 0.6592669486999512, "learning_rate": 8.311228105700112e-05, "loss": 1.9014, "step": 3346 }, { "epoch": 1.0159356503263015, "grad_norm": 0.48114287853240967, "learning_rate": 8.310721879113091e-05, "loss": 1.4714, "step": 3347 }, { "epoch": 1.016239186522993, "grad_norm": 0.4233185350894928, "learning_rate": 8.310215652526071e-05, "loss": 1.5716, "step": 3348 }, { "epoch": 1.0165427227196844, "grad_norm": 0.41045501828193665, "learning_rate": 8.30970942593905e-05, "loss": 1.6364, "step": 3349 }, { "epoch": 1.0168462589163758, "grad_norm": 0.48139652609825134, "learning_rate": 8.30920319935203e-05, "loss": 1.3519, "step": 3350 }, { "epoch": 1.0171497951130672, "grad_norm": 0.908079981803894, "learning_rate": 8.30869697276501e-05, "loss": 1.5322, "step": 3351 }, { "epoch": 1.0174533313097587, "grad_norm": 0.6833621859550476, "learning_rate": 8.30819074617799e-05, "loss": 1.8607, "step": 3352 }, { "epoch": 1.0177568675064501, "grad_norm": 0.40126875042915344, "learning_rate": 8.30768451959097e-05, "loss": 1.7857, "step": 3353 }, { "epoch": 1.0180604037031415, "grad_norm": 0.5128709077835083, "learning_rate": 8.307178293003949e-05, "loss": 1.5926, "step": 3354 }, { "epoch": 1.018363939899833, "grad_norm": 0.37055882811546326, "learning_rate": 8.306672066416928e-05, "loss": 1.3967, "step": 3355 }, { "epoch": 1.0186674760965244, "grad_norm": 0.4289558529853821, "learning_rate": 8.306165839829908e-05, "loss": 1.5605, "step": 3356 }, { "epoch": 1.018971012293216, "grad_norm": 0.42714962363243103, "learning_rate": 8.305659613242887e-05, "loss": 1.4025, "step": 3357 }, { "epoch": 1.0192745484899075, "grad_norm": 0.5124452114105225, "learning_rate": 8.305153386655867e-05, "loss": 1.6768, "step": 3358 }, { "epoch": 1.019578084686599, "grad_norm": 0.5419396758079529, "learning_rate": 8.304647160068846e-05, "loss": 1.918, "step": 3359 }, { "epoch": 1.0198816208832904, "grad_norm": 0.4802889823913574, "learning_rate": 8.304140933481827e-05, "loss": 1.6741, "step": 3360 }, { "epoch": 1.0201851570799818, "grad_norm": 0.4368225038051605, "learning_rate": 8.303634706894807e-05, "loss": 1.7055, "step": 3361 }, { "epoch": 1.0204886932766732, "grad_norm": 0.42357340455055237, "learning_rate": 8.303128480307786e-05, "loss": 1.5149, "step": 3362 }, { "epoch": 1.0207922294733647, "grad_norm": 0.49789273738861084, "learning_rate": 8.302622253720766e-05, "loss": 1.0468, "step": 3363 }, { "epoch": 1.021095765670056, "grad_norm": 0.4446631073951721, "learning_rate": 8.302116027133745e-05, "loss": 1.9386, "step": 3364 }, { "epoch": 1.0213993018667475, "grad_norm": 0.47823619842529297, "learning_rate": 8.301609800546725e-05, "loss": 1.6925, "step": 3365 }, { "epoch": 1.021702838063439, "grad_norm": 0.44988545775413513, "learning_rate": 8.301103573959704e-05, "loss": 1.7536, "step": 3366 }, { "epoch": 1.0220063742601304, "grad_norm": 0.5718437433242798, "learning_rate": 8.300597347372684e-05, "loss": 1.9711, "step": 3367 }, { "epoch": 1.022309910456822, "grad_norm": 0.6239888072013855, "learning_rate": 8.300091120785663e-05, "loss": 1.2131, "step": 3368 }, { "epoch": 1.0226134466535135, "grad_norm": 0.3814062774181366, "learning_rate": 8.299584894198643e-05, "loss": 1.7703, "step": 3369 }, { "epoch": 1.022916982850205, "grad_norm": 0.5032787322998047, "learning_rate": 8.299078667611623e-05, "loss": 1.5713, "step": 3370 }, { "epoch": 1.0232205190468964, "grad_norm": 0.4844599664211273, "learning_rate": 8.298572441024603e-05, "loss": 2.0, "step": 3371 }, { "epoch": 1.0235240552435878, "grad_norm": 0.4193565249443054, "learning_rate": 8.298066214437584e-05, "loss": 1.7529, "step": 3372 }, { "epoch": 1.0238275914402792, "grad_norm": 0.46902111172676086, "learning_rate": 8.297559987850563e-05, "loss": 1.6534, "step": 3373 }, { "epoch": 1.0241311276369707, "grad_norm": 0.3767467141151428, "learning_rate": 8.297053761263543e-05, "loss": 1.2065, "step": 3374 }, { "epoch": 1.024434663833662, "grad_norm": 0.4703841209411621, "learning_rate": 8.296547534676522e-05, "loss": 1.3887, "step": 3375 }, { "epoch": 1.0247382000303535, "grad_norm": 0.4988289177417755, "learning_rate": 8.296041308089502e-05, "loss": 1.7307, "step": 3376 }, { "epoch": 1.025041736227045, "grad_norm": 0.5288352370262146, "learning_rate": 8.295535081502481e-05, "loss": 1.1543, "step": 3377 }, { "epoch": 1.0253452724237366, "grad_norm": 0.44995805621147156, "learning_rate": 8.29502885491546e-05, "loss": 1.7502, "step": 3378 }, { "epoch": 1.025648808620428, "grad_norm": 0.5098589062690735, "learning_rate": 8.29452262832844e-05, "loss": 1.3828, "step": 3379 }, { "epoch": 1.0259523448171195, "grad_norm": 0.44407930970191956, "learning_rate": 8.29401640174142e-05, "loss": 1.6914, "step": 3380 }, { "epoch": 1.026255881013811, "grad_norm": 0.5067889094352722, "learning_rate": 8.2935101751544e-05, "loss": 1.5762, "step": 3381 }, { "epoch": 1.0265594172105024, "grad_norm": 0.554493248462677, "learning_rate": 8.29300394856738e-05, "loss": 1.5306, "step": 3382 }, { "epoch": 1.0268629534071938, "grad_norm": 0.4484996497631073, "learning_rate": 8.292497721980359e-05, "loss": 1.8593, "step": 3383 }, { "epoch": 1.0271664896038852, "grad_norm": 0.44968825578689575, "learning_rate": 8.291991495393339e-05, "loss": 1.5937, "step": 3384 }, { "epoch": 1.0274700258005767, "grad_norm": 0.5322727560997009, "learning_rate": 8.291485268806318e-05, "loss": 1.6408, "step": 3385 }, { "epoch": 1.027773561997268, "grad_norm": 0.47786515951156616, "learning_rate": 8.290979042219298e-05, "loss": 1.7686, "step": 3386 }, { "epoch": 1.0280770981939595, "grad_norm": 0.5193067193031311, "learning_rate": 8.290472815632277e-05, "loss": 1.7331, "step": 3387 }, { "epoch": 1.0283806343906512, "grad_norm": 0.4540434181690216, "learning_rate": 8.289966589045257e-05, "loss": 1.9388, "step": 3388 }, { "epoch": 1.0286841705873426, "grad_norm": 0.46607738733291626, "learning_rate": 8.289460362458236e-05, "loss": 1.3198, "step": 3389 }, { "epoch": 1.028987706784034, "grad_norm": 0.45862218737602234, "learning_rate": 8.288954135871217e-05, "loss": 1.8702, "step": 3390 }, { "epoch": 1.0292912429807255, "grad_norm": 0.427997887134552, "learning_rate": 8.288447909284196e-05, "loss": 1.6277, "step": 3391 }, { "epoch": 1.029594779177417, "grad_norm": 0.5143392086029053, "learning_rate": 8.287941682697176e-05, "loss": 1.6736, "step": 3392 }, { "epoch": 1.0298983153741084, "grad_norm": 0.42483991384506226, "learning_rate": 8.287435456110155e-05, "loss": 1.7603, "step": 3393 }, { "epoch": 1.0302018515707998, "grad_norm": 0.5088010430335999, "learning_rate": 8.286929229523135e-05, "loss": 1.7789, "step": 3394 }, { "epoch": 1.0305053877674912, "grad_norm": 0.4327448904514313, "learning_rate": 8.286423002936114e-05, "loss": 1.8521, "step": 3395 }, { "epoch": 1.0308089239641827, "grad_norm": 0.5049906969070435, "learning_rate": 8.285916776349094e-05, "loss": 1.0102, "step": 3396 }, { "epoch": 1.031112460160874, "grad_norm": 0.5136983394622803, "learning_rate": 8.285410549762073e-05, "loss": 1.7249, "step": 3397 }, { "epoch": 1.0314159963575655, "grad_norm": 0.6552875638008118, "learning_rate": 8.284904323175053e-05, "loss": 1.9925, "step": 3398 }, { "epoch": 1.0317195325542572, "grad_norm": 0.5211325883865356, "learning_rate": 8.284398096588032e-05, "loss": 1.4414, "step": 3399 }, { "epoch": 1.0320230687509486, "grad_norm": 0.5152223110198975, "learning_rate": 8.283891870001013e-05, "loss": 1.6676, "step": 3400 }, { "epoch": 1.03232660494764, "grad_norm": 0.5396007299423218, "learning_rate": 8.283385643413993e-05, "loss": 1.5183, "step": 3401 }, { "epoch": 1.0326301411443315, "grad_norm": 0.40341633558273315, "learning_rate": 8.282879416826972e-05, "loss": 1.5223, "step": 3402 }, { "epoch": 1.032933677341023, "grad_norm": 1.1678493022918701, "learning_rate": 8.282373190239952e-05, "loss": 1.5059, "step": 3403 }, { "epoch": 1.0332372135377144, "grad_norm": 0.48273664712905884, "learning_rate": 8.281866963652931e-05, "loss": 1.7899, "step": 3404 }, { "epoch": 1.0335407497344058, "grad_norm": 0.5220130085945129, "learning_rate": 8.28136073706591e-05, "loss": 1.7364, "step": 3405 }, { "epoch": 1.0338442859310972, "grad_norm": 0.6939902901649475, "learning_rate": 8.28085451047889e-05, "loss": 1.4888, "step": 3406 }, { "epoch": 1.0341478221277887, "grad_norm": 0.48666536808013916, "learning_rate": 8.28034828389187e-05, "loss": 1.7026, "step": 3407 }, { "epoch": 1.03445135832448, "grad_norm": 0.44669362902641296, "learning_rate": 8.279842057304849e-05, "loss": 1.8834, "step": 3408 }, { "epoch": 1.0347548945211718, "grad_norm": 0.49153947830200195, "learning_rate": 8.27933583071783e-05, "loss": 1.8004, "step": 3409 }, { "epoch": 1.0350584307178632, "grad_norm": 0.512179970741272, "learning_rate": 8.278829604130809e-05, "loss": 1.456, "step": 3410 }, { "epoch": 1.0353619669145546, "grad_norm": 0.4923495054244995, "learning_rate": 8.278323377543789e-05, "loss": 1.862, "step": 3411 }, { "epoch": 1.035665503111246, "grad_norm": 0.4329081177711487, "learning_rate": 8.277817150956768e-05, "loss": 1.567, "step": 3412 }, { "epoch": 1.0359690393079375, "grad_norm": 0.43733224272727966, "learning_rate": 8.277310924369748e-05, "loss": 1.1695, "step": 3413 }, { "epoch": 1.036272575504629, "grad_norm": 0.5006493330001831, "learning_rate": 8.276804697782729e-05, "loss": 1.7505, "step": 3414 }, { "epoch": 1.0365761117013204, "grad_norm": 0.5372819304466248, "learning_rate": 8.276298471195708e-05, "loss": 1.4789, "step": 3415 }, { "epoch": 1.0368796478980118, "grad_norm": 0.5383666753768921, "learning_rate": 8.275792244608688e-05, "loss": 2.0097, "step": 3416 }, { "epoch": 1.0371831840947032, "grad_norm": 0.4749530851840973, "learning_rate": 8.275286018021667e-05, "loss": 1.9549, "step": 3417 }, { "epoch": 1.0374867202913947, "grad_norm": 0.4715706408023834, "learning_rate": 8.274779791434647e-05, "loss": 1.4, "step": 3418 }, { "epoch": 1.037790256488086, "grad_norm": 0.5556216239929199, "learning_rate": 8.274273564847626e-05, "loss": 1.4848, "step": 3419 }, { "epoch": 1.0380937926847777, "grad_norm": 0.46504640579223633, "learning_rate": 8.273767338260607e-05, "loss": 1.4732, "step": 3420 }, { "epoch": 1.0383973288814692, "grad_norm": 0.5332744121551514, "learning_rate": 8.273261111673586e-05, "loss": 1.8816, "step": 3421 }, { "epoch": 1.0387008650781606, "grad_norm": 0.5160506963729858, "learning_rate": 8.272754885086566e-05, "loss": 1.5866, "step": 3422 }, { "epoch": 1.039004401274852, "grad_norm": 0.47710439562797546, "learning_rate": 8.272248658499545e-05, "loss": 1.8447, "step": 3423 }, { "epoch": 1.0393079374715435, "grad_norm": 0.5172427892684937, "learning_rate": 8.271742431912525e-05, "loss": 1.7375, "step": 3424 }, { "epoch": 1.039611473668235, "grad_norm": 0.47947341203689575, "learning_rate": 8.271236205325504e-05, "loss": 1.7389, "step": 3425 }, { "epoch": 1.0399150098649264, "grad_norm": 0.49502432346343994, "learning_rate": 8.270729978738484e-05, "loss": 1.2841, "step": 3426 }, { "epoch": 1.0402185460616178, "grad_norm": 0.3829929828643799, "learning_rate": 8.270223752151463e-05, "loss": 1.3311, "step": 3427 }, { "epoch": 1.0405220822583092, "grad_norm": 0.4519781470298767, "learning_rate": 8.269717525564443e-05, "loss": 1.6033, "step": 3428 }, { "epoch": 1.0408256184550007, "grad_norm": 0.5434120893478394, "learning_rate": 8.269211298977423e-05, "loss": 1.7751, "step": 3429 }, { "epoch": 1.0411291546516923, "grad_norm": 0.47064414620399475, "learning_rate": 8.268705072390403e-05, "loss": 1.5697, "step": 3430 }, { "epoch": 1.0414326908483837, "grad_norm": 0.4496542811393738, "learning_rate": 8.268198845803382e-05, "loss": 1.8201, "step": 3431 }, { "epoch": 1.0417362270450752, "grad_norm": 0.5103307962417603, "learning_rate": 8.267692619216362e-05, "loss": 1.6933, "step": 3432 }, { "epoch": 1.0420397632417666, "grad_norm": 0.46422064304351807, "learning_rate": 8.267186392629341e-05, "loss": 1.7127, "step": 3433 }, { "epoch": 1.042343299438458, "grad_norm": 0.4085420072078705, "learning_rate": 8.266680166042321e-05, "loss": 1.9025, "step": 3434 }, { "epoch": 1.0426468356351495, "grad_norm": 0.5440680980682373, "learning_rate": 8.2661739394553e-05, "loss": 1.7786, "step": 3435 }, { "epoch": 1.042950371831841, "grad_norm": 0.4835658669471741, "learning_rate": 8.26566771286828e-05, "loss": 1.8355, "step": 3436 }, { "epoch": 1.0432539080285324, "grad_norm": 0.45734959840774536, "learning_rate": 8.26516148628126e-05, "loss": 1.225, "step": 3437 }, { "epoch": 1.0435574442252238, "grad_norm": 0.49322158098220825, "learning_rate": 8.264655259694239e-05, "loss": 1.7176, "step": 3438 }, { "epoch": 1.0438609804219152, "grad_norm": 0.44236719608306885, "learning_rate": 8.26414903310722e-05, "loss": 1.8368, "step": 3439 }, { "epoch": 1.0441645166186069, "grad_norm": 0.5147215127944946, "learning_rate": 8.263642806520199e-05, "loss": 1.7734, "step": 3440 }, { "epoch": 1.0444680528152983, "grad_norm": 0.4684102535247803, "learning_rate": 8.263136579933179e-05, "loss": 1.6995, "step": 3441 }, { "epoch": 1.0447715890119897, "grad_norm": 0.4718666076660156, "learning_rate": 8.262630353346158e-05, "loss": 1.8043, "step": 3442 }, { "epoch": 1.0450751252086812, "grad_norm": 0.5067316293716431, "learning_rate": 8.262124126759138e-05, "loss": 1.5676, "step": 3443 }, { "epoch": 1.0453786614053726, "grad_norm": 0.5173615217208862, "learning_rate": 8.261617900172117e-05, "loss": 1.5209, "step": 3444 }, { "epoch": 1.045682197602064, "grad_norm": 0.505668044090271, "learning_rate": 8.261111673585097e-05, "loss": 1.7613, "step": 3445 }, { "epoch": 1.0459857337987555, "grad_norm": 0.39871707558631897, "learning_rate": 8.260605446998076e-05, "loss": 1.2263, "step": 3446 }, { "epoch": 1.046289269995447, "grad_norm": 0.46265268325805664, "learning_rate": 8.260099220411055e-05, "loss": 1.5503, "step": 3447 }, { "epoch": 1.0465928061921383, "grad_norm": 0.4678381681442261, "learning_rate": 8.259592993824036e-05, "loss": 1.4896, "step": 3448 }, { "epoch": 1.0468963423888298, "grad_norm": 0.4106181263923645, "learning_rate": 8.259086767237016e-05, "loss": 1.6483, "step": 3449 }, { "epoch": 1.0471998785855212, "grad_norm": 0.5662809014320374, "learning_rate": 8.258580540649995e-05, "loss": 1.7384, "step": 3450 }, { "epoch": 1.0475034147822129, "grad_norm": 0.5098707675933838, "learning_rate": 8.258074314062975e-05, "loss": 1.6576, "step": 3451 }, { "epoch": 1.0478069509789043, "grad_norm": 0.5266808867454529, "learning_rate": 8.257568087475954e-05, "loss": 1.7297, "step": 3452 }, { "epoch": 1.0481104871755957, "grad_norm": 0.4747079014778137, "learning_rate": 8.257061860888934e-05, "loss": 1.9322, "step": 3453 }, { "epoch": 1.0484140233722872, "grad_norm": 0.5059680938720703, "learning_rate": 8.256555634301913e-05, "loss": 1.3994, "step": 3454 }, { "epoch": 1.0487175595689786, "grad_norm": 0.42003318667411804, "learning_rate": 8.256049407714893e-05, "loss": 1.9084, "step": 3455 }, { "epoch": 1.04902109576567, "grad_norm": 0.5502803325653076, "learning_rate": 8.255543181127872e-05, "loss": 1.7448, "step": 3456 }, { "epoch": 1.0493246319623615, "grad_norm": 9.822343826293945, "learning_rate": 8.255036954540853e-05, "loss": 1.7656, "step": 3457 }, { "epoch": 1.049628168159053, "grad_norm": 0.45939022302627563, "learning_rate": 8.254530727953832e-05, "loss": 1.5728, "step": 3458 }, { "epoch": 1.0499317043557443, "grad_norm": 0.45364564657211304, "learning_rate": 8.254024501366813e-05, "loss": 1.6829, "step": 3459 }, { "epoch": 1.0502352405524358, "grad_norm": 0.5563533902168274, "learning_rate": 8.253518274779793e-05, "loss": 1.4829, "step": 3460 }, { "epoch": 1.0505387767491274, "grad_norm": 0.5119985342025757, "learning_rate": 8.253012048192772e-05, "loss": 1.4512, "step": 3461 }, { "epoch": 1.0508423129458189, "grad_norm": 0.4572368562221527, "learning_rate": 8.252505821605752e-05, "loss": 1.0341, "step": 3462 }, { "epoch": 1.0511458491425103, "grad_norm": 0.5076282024383545, "learning_rate": 8.251999595018731e-05, "loss": 1.6044, "step": 3463 }, { "epoch": 1.0514493853392017, "grad_norm": 0.6191340088844299, "learning_rate": 8.251493368431711e-05, "loss": 1.1313, "step": 3464 }, { "epoch": 1.0517529215358932, "grad_norm": 0.5099151730537415, "learning_rate": 8.25098714184469e-05, "loss": 1.8197, "step": 3465 }, { "epoch": 1.0520564577325846, "grad_norm": 0.5666268467903137, "learning_rate": 8.25048091525767e-05, "loss": 1.5068, "step": 3466 }, { "epoch": 1.052359993929276, "grad_norm": 0.6376385688781738, "learning_rate": 8.249974688670649e-05, "loss": 1.9853, "step": 3467 }, { "epoch": 1.0526635301259675, "grad_norm": 0.47871848940849304, "learning_rate": 8.24946846208363e-05, "loss": 1.6678, "step": 3468 }, { "epoch": 1.052967066322659, "grad_norm": 0.4443090856075287, "learning_rate": 8.24896223549661e-05, "loss": 1.7854, "step": 3469 }, { "epoch": 1.0532706025193503, "grad_norm": 0.4864484667778015, "learning_rate": 8.248456008909589e-05, "loss": 1.7076, "step": 3470 }, { "epoch": 1.053574138716042, "grad_norm": 0.5868439078330994, "learning_rate": 8.247949782322568e-05, "loss": 1.4256, "step": 3471 }, { "epoch": 1.0538776749127334, "grad_norm": 0.47932010889053345, "learning_rate": 8.247443555735548e-05, "loss": 1.7303, "step": 3472 }, { "epoch": 1.0541812111094249, "grad_norm": 0.44566673040390015, "learning_rate": 8.246937329148527e-05, "loss": 1.5097, "step": 3473 }, { "epoch": 1.0544847473061163, "grad_norm": 0.43415167927742004, "learning_rate": 8.246431102561507e-05, "loss": 1.5726, "step": 3474 }, { "epoch": 1.0547882835028077, "grad_norm": 0.5519189834594727, "learning_rate": 8.245924875974486e-05, "loss": 1.9481, "step": 3475 }, { "epoch": 1.0550918196994992, "grad_norm": 0.5307170152664185, "learning_rate": 8.245418649387466e-05, "loss": 1.1548, "step": 3476 }, { "epoch": 1.0553953558961906, "grad_norm": 0.46552446484565735, "learning_rate": 8.244912422800445e-05, "loss": 1.5465, "step": 3477 }, { "epoch": 1.055698892092882, "grad_norm": 0.45253750681877136, "learning_rate": 8.244406196213426e-05, "loss": 1.4566, "step": 3478 }, { "epoch": 1.0560024282895735, "grad_norm": 0.49360036849975586, "learning_rate": 8.243899969626406e-05, "loss": 1.7727, "step": 3479 }, { "epoch": 1.056305964486265, "grad_norm": 0.5780231356620789, "learning_rate": 8.243393743039385e-05, "loss": 1.7624, "step": 3480 }, { "epoch": 1.0566095006829563, "grad_norm": 0.5073447227478027, "learning_rate": 8.242887516452365e-05, "loss": 1.087, "step": 3481 }, { "epoch": 1.056913036879648, "grad_norm": 0.48128485679626465, "learning_rate": 8.242381289865344e-05, "loss": 1.5136, "step": 3482 }, { "epoch": 1.0572165730763394, "grad_norm": 0.4653654992580414, "learning_rate": 8.241875063278324e-05, "loss": 1.757, "step": 3483 }, { "epoch": 1.0575201092730309, "grad_norm": 0.6654927730560303, "learning_rate": 8.241368836691303e-05, "loss": 1.3386, "step": 3484 }, { "epoch": 1.0578236454697223, "grad_norm": 4.631044387817383, "learning_rate": 8.240862610104282e-05, "loss": 1.4554, "step": 3485 }, { "epoch": 1.0581271816664137, "grad_norm": 0.39119642972946167, "learning_rate": 8.240356383517262e-05, "loss": 2.0105, "step": 3486 }, { "epoch": 1.0584307178631052, "grad_norm": 0.4815531075000763, "learning_rate": 8.239850156930243e-05, "loss": 1.4448, "step": 3487 }, { "epoch": 1.0587342540597966, "grad_norm": 0.6337845921516418, "learning_rate": 8.239343930343222e-05, "loss": 1.1668, "step": 3488 }, { "epoch": 1.059037790256488, "grad_norm": 0.49312466382980347, "learning_rate": 8.238837703756202e-05, "loss": 1.0456, "step": 3489 }, { "epoch": 1.0593413264531795, "grad_norm": 0.7493519186973572, "learning_rate": 8.238331477169181e-05, "loss": 1.1351, "step": 3490 }, { "epoch": 1.059644862649871, "grad_norm": 0.46115273237228394, "learning_rate": 8.237825250582161e-05, "loss": 1.662, "step": 3491 }, { "epoch": 1.0599483988465626, "grad_norm": 0.5314909815788269, "learning_rate": 8.23731902399514e-05, "loss": 1.93, "step": 3492 }, { "epoch": 1.060251935043254, "grad_norm": 0.4482044279575348, "learning_rate": 8.23681279740812e-05, "loss": 1.5613, "step": 3493 }, { "epoch": 1.0605554712399454, "grad_norm": 0.4885779321193695, "learning_rate": 8.236306570821099e-05, "loss": 0.8249, "step": 3494 }, { "epoch": 1.0608590074366369, "grad_norm": 0.49470406770706177, "learning_rate": 8.235800344234079e-05, "loss": 1.2612, "step": 3495 }, { "epoch": 1.0611625436333283, "grad_norm": 0.5256443023681641, "learning_rate": 8.23529411764706e-05, "loss": 1.726, "step": 3496 }, { "epoch": 1.0614660798300197, "grad_norm": 0.45549553632736206, "learning_rate": 8.234787891060039e-05, "loss": 1.6308, "step": 3497 }, { "epoch": 1.0617696160267112, "grad_norm": 0.45309826731681824, "learning_rate": 8.234281664473018e-05, "loss": 1.6844, "step": 3498 }, { "epoch": 1.0620731522234026, "grad_norm": 3.0413029193878174, "learning_rate": 8.233775437885998e-05, "loss": 1.5278, "step": 3499 }, { "epoch": 1.062376688420094, "grad_norm": 0.5270426869392395, "learning_rate": 8.233269211298977e-05, "loss": 1.9033, "step": 3500 }, { "epoch": 1.0626802246167855, "grad_norm": 0.5388810634613037, "learning_rate": 8.232762984711957e-05, "loss": 1.4571, "step": 3501 }, { "epoch": 1.0629837608134771, "grad_norm": 0.5315312743186951, "learning_rate": 8.232256758124936e-05, "loss": 1.7535, "step": 3502 }, { "epoch": 1.0632872970101686, "grad_norm": 0.530817985534668, "learning_rate": 8.231750531537917e-05, "loss": 1.3184, "step": 3503 }, { "epoch": 1.06359083320686, "grad_norm": 0.4437430500984192, "learning_rate": 8.231244304950897e-05, "loss": 1.6532, "step": 3504 }, { "epoch": 1.0638943694035514, "grad_norm": 0.5160355567932129, "learning_rate": 8.230738078363876e-05, "loss": 1.6429, "step": 3505 }, { "epoch": 1.0641979056002429, "grad_norm": 0.5177229046821594, "learning_rate": 8.230231851776856e-05, "loss": 1.3577, "step": 3506 }, { "epoch": 1.0645014417969343, "grad_norm": 0.5815109014511108, "learning_rate": 8.229725625189836e-05, "loss": 1.3545, "step": 3507 }, { "epoch": 1.0648049779936257, "grad_norm": 0.7150763273239136, "learning_rate": 8.229219398602816e-05, "loss": 1.5478, "step": 3508 }, { "epoch": 1.0651085141903172, "grad_norm": 0.5490028262138367, "learning_rate": 8.228713172015795e-05, "loss": 1.8155, "step": 3509 }, { "epoch": 1.0654120503870086, "grad_norm": 0.455064982175827, "learning_rate": 8.228206945428775e-05, "loss": 1.4917, "step": 3510 }, { "epoch": 1.0657155865837, "grad_norm": 1.305898666381836, "learning_rate": 8.227700718841754e-05, "loss": 1.6654, "step": 3511 }, { "epoch": 1.0660191227803915, "grad_norm": 0.4300096929073334, "learning_rate": 8.227194492254734e-05, "loss": 1.8274, "step": 3512 }, { "epoch": 1.0663226589770831, "grad_norm": 0.3954041600227356, "learning_rate": 8.226688265667713e-05, "loss": 1.537, "step": 3513 }, { "epoch": 1.0666261951737745, "grad_norm": 0.48388218879699707, "learning_rate": 8.226182039080693e-05, "loss": 1.8136, "step": 3514 }, { "epoch": 1.066929731370466, "grad_norm": 0.5675212740898132, "learning_rate": 8.225675812493672e-05, "loss": 1.7286, "step": 3515 }, { "epoch": 1.0672332675671574, "grad_norm": 0.4250033497810364, "learning_rate": 8.225169585906652e-05, "loss": 1.6428, "step": 3516 }, { "epoch": 1.0675368037638489, "grad_norm": 0.41837701201438904, "learning_rate": 8.224663359319633e-05, "loss": 1.0005, "step": 3517 }, { "epoch": 1.0678403399605403, "grad_norm": 0.6654266119003296, "learning_rate": 8.224157132732612e-05, "loss": 1.5319, "step": 3518 }, { "epoch": 1.0681438761572317, "grad_norm": 0.5207446813583374, "learning_rate": 8.223650906145592e-05, "loss": 1.7826, "step": 3519 }, { "epoch": 1.0684474123539232, "grad_norm": 0.5787805914878845, "learning_rate": 8.223144679558571e-05, "loss": 1.6859, "step": 3520 }, { "epoch": 1.0687509485506146, "grad_norm": 0.5311471819877625, "learning_rate": 8.22263845297155e-05, "loss": 1.9176, "step": 3521 }, { "epoch": 1.069054484747306, "grad_norm": 0.5248377323150635, "learning_rate": 8.22213222638453e-05, "loss": 1.8143, "step": 3522 }, { "epoch": 1.0693580209439975, "grad_norm": 0.48425552248954773, "learning_rate": 8.22162599979751e-05, "loss": 1.8119, "step": 3523 }, { "epoch": 1.0696615571406891, "grad_norm": 0.49413445591926575, "learning_rate": 8.221119773210489e-05, "loss": 1.2554, "step": 3524 }, { "epoch": 1.0699650933373805, "grad_norm": 0.48792800307273865, "learning_rate": 8.220613546623468e-05, "loss": 1.7597, "step": 3525 }, { "epoch": 1.070268629534072, "grad_norm": 0.4943905174732208, "learning_rate": 8.220107320036449e-05, "loss": 0.9761, "step": 3526 }, { "epoch": 1.0705721657307634, "grad_norm": 0.6244659423828125, "learning_rate": 8.219601093449429e-05, "loss": 0.7937, "step": 3527 }, { "epoch": 1.0708757019274548, "grad_norm": 0.5777722001075745, "learning_rate": 8.219094866862408e-05, "loss": 1.2079, "step": 3528 }, { "epoch": 1.0711792381241463, "grad_norm": 0.4799225926399231, "learning_rate": 8.218588640275388e-05, "loss": 1.3656, "step": 3529 }, { "epoch": 1.0714827743208377, "grad_norm": 0.48858943581581116, "learning_rate": 8.218082413688367e-05, "loss": 1.6026, "step": 3530 }, { "epoch": 1.0717863105175292, "grad_norm": 0.532616376876831, "learning_rate": 8.217576187101347e-05, "loss": 1.3968, "step": 3531 }, { "epoch": 1.0720898467142206, "grad_norm": 0.4278903603553772, "learning_rate": 8.217069960514326e-05, "loss": 1.1862, "step": 3532 }, { "epoch": 1.0723933829109122, "grad_norm": 0.47585421800613403, "learning_rate": 8.216563733927306e-05, "loss": 1.4731, "step": 3533 }, { "epoch": 1.0726969191076037, "grad_norm": 0.5400151014328003, "learning_rate": 8.216057507340285e-05, "loss": 1.9027, "step": 3534 }, { "epoch": 1.073000455304295, "grad_norm": 0.4817771911621094, "learning_rate": 8.215551280753266e-05, "loss": 1.7373, "step": 3535 }, { "epoch": 1.0733039915009865, "grad_norm": 0.46320927143096924, "learning_rate": 8.215045054166245e-05, "loss": 1.691, "step": 3536 }, { "epoch": 1.073607527697678, "grad_norm": 0.5020293593406677, "learning_rate": 8.214538827579225e-05, "loss": 1.8651, "step": 3537 }, { "epoch": 1.0739110638943694, "grad_norm": 0.5326499342918396, "learning_rate": 8.214032600992204e-05, "loss": 1.5852, "step": 3538 }, { "epoch": 1.0742146000910608, "grad_norm": 0.44219890236854553, "learning_rate": 8.213526374405184e-05, "loss": 1.7222, "step": 3539 }, { "epoch": 1.0745181362877523, "grad_norm": 0.5475308895111084, "learning_rate": 8.213020147818163e-05, "loss": 1.3513, "step": 3540 }, { "epoch": 1.0748216724844437, "grad_norm": 0.4063558876514435, "learning_rate": 8.212513921231143e-05, "loss": 1.5747, "step": 3541 }, { "epoch": 1.0751252086811351, "grad_norm": 0.49913567304611206, "learning_rate": 8.212007694644122e-05, "loss": 1.8541, "step": 3542 }, { "epoch": 1.0754287448778266, "grad_norm": 0.5605202913284302, "learning_rate": 8.211501468057102e-05, "loss": 1.5137, "step": 3543 }, { "epoch": 1.0757322810745182, "grad_norm": 0.8763415813446045, "learning_rate": 8.210995241470081e-05, "loss": 1.3439, "step": 3544 }, { "epoch": 1.0760358172712097, "grad_norm": 0.5468719601631165, "learning_rate": 8.210489014883062e-05, "loss": 1.8425, "step": 3545 }, { "epoch": 1.076339353467901, "grad_norm": 0.6741669774055481, "learning_rate": 8.209982788296042e-05, "loss": 1.5244, "step": 3546 }, { "epoch": 1.0766428896645925, "grad_norm": 0.4905114769935608, "learning_rate": 8.209476561709021e-05, "loss": 1.3453, "step": 3547 }, { "epoch": 1.076946425861284, "grad_norm": 0.4014042615890503, "learning_rate": 8.208970335122002e-05, "loss": 1.7543, "step": 3548 }, { "epoch": 1.0772499620579754, "grad_norm": 0.41040587425231934, "learning_rate": 8.208464108534981e-05, "loss": 1.4491, "step": 3549 }, { "epoch": 1.0775534982546668, "grad_norm": 0.558845043182373, "learning_rate": 8.207957881947961e-05, "loss": 1.6391, "step": 3550 }, { "epoch": 1.0778570344513583, "grad_norm": 0.5031947493553162, "learning_rate": 8.20745165536094e-05, "loss": 1.8803, "step": 3551 }, { "epoch": 1.0781605706480497, "grad_norm": 0.5727876424789429, "learning_rate": 8.20694542877392e-05, "loss": 1.7184, "step": 3552 }, { "epoch": 1.0784641068447411, "grad_norm": 0.506584107875824, "learning_rate": 8.206439202186899e-05, "loss": 1.6442, "step": 3553 }, { "epoch": 1.0787676430414326, "grad_norm": 0.41179177165031433, "learning_rate": 8.205932975599879e-05, "loss": 1.8083, "step": 3554 }, { "epoch": 1.0790711792381242, "grad_norm": 0.4515683054924011, "learning_rate": 8.205426749012858e-05, "loss": 1.675, "step": 3555 }, { "epoch": 1.0793747154348157, "grad_norm": 0.7169219851493835, "learning_rate": 8.204920522425839e-05, "loss": 1.81, "step": 3556 }, { "epoch": 1.079678251631507, "grad_norm": 0.4774874150753021, "learning_rate": 8.204414295838819e-05, "loss": 1.8261, "step": 3557 }, { "epoch": 1.0799817878281985, "grad_norm": 0.5474801659584045, "learning_rate": 8.203908069251798e-05, "loss": 1.7529, "step": 3558 }, { "epoch": 1.08028532402489, "grad_norm": 0.49499598145484924, "learning_rate": 8.203401842664778e-05, "loss": 1.6019, "step": 3559 }, { "epoch": 1.0805888602215814, "grad_norm": 0.5839375257492065, "learning_rate": 8.202895616077757e-05, "loss": 1.1219, "step": 3560 }, { "epoch": 1.0808923964182728, "grad_norm": 0.5204687714576721, "learning_rate": 8.202389389490736e-05, "loss": 2.0522, "step": 3561 }, { "epoch": 1.0811959326149643, "grad_norm": 0.5370623469352722, "learning_rate": 8.201883162903716e-05, "loss": 1.5759, "step": 3562 }, { "epoch": 1.0814994688116557, "grad_norm": 0.49342766404151917, "learning_rate": 8.201376936316695e-05, "loss": 1.4948, "step": 3563 }, { "epoch": 1.0818030050083474, "grad_norm": 0.4796147644519806, "learning_rate": 8.200870709729675e-05, "loss": 2.0453, "step": 3564 }, { "epoch": 1.0821065412050388, "grad_norm": 0.41939178109169006, "learning_rate": 8.200364483142656e-05, "loss": 1.2251, "step": 3565 }, { "epoch": 1.0824100774017302, "grad_norm": 0.5219609141349792, "learning_rate": 8.199858256555635e-05, "loss": 1.3768, "step": 3566 }, { "epoch": 1.0827136135984217, "grad_norm": 0.45907774567604065, "learning_rate": 8.199352029968615e-05, "loss": 1.4058, "step": 3567 }, { "epoch": 1.083017149795113, "grad_norm": 0.6365559101104736, "learning_rate": 8.198845803381594e-05, "loss": 1.6417, "step": 3568 }, { "epoch": 1.0833206859918045, "grad_norm": 0.5077771544456482, "learning_rate": 8.198339576794574e-05, "loss": 1.8395, "step": 3569 }, { "epoch": 1.083624222188496, "grad_norm": 0.4947681128978729, "learning_rate": 8.197833350207553e-05, "loss": 1.6237, "step": 3570 }, { "epoch": 1.0839277583851874, "grad_norm": 0.770715057849884, "learning_rate": 8.197327123620533e-05, "loss": 0.9829, "step": 3571 }, { "epoch": 1.0842312945818788, "grad_norm": 0.5345532298088074, "learning_rate": 8.196820897033512e-05, "loss": 1.7476, "step": 3572 }, { "epoch": 1.0845348307785703, "grad_norm": 0.5688590407371521, "learning_rate": 8.196314670446492e-05, "loss": 1.8579, "step": 3573 }, { "epoch": 1.0848383669752617, "grad_norm": 0.5151784420013428, "learning_rate": 8.195808443859472e-05, "loss": 1.508, "step": 3574 }, { "epoch": 1.0851419031719534, "grad_norm": 0.4951719045639038, "learning_rate": 8.195302217272452e-05, "loss": 1.776, "step": 3575 }, { "epoch": 1.0854454393686448, "grad_norm": 0.4807814359664917, "learning_rate": 8.194795990685431e-05, "loss": 1.7716, "step": 3576 }, { "epoch": 1.0857489755653362, "grad_norm": 0.4597873091697693, "learning_rate": 8.194289764098411e-05, "loss": 1.7043, "step": 3577 }, { "epoch": 1.0860525117620277, "grad_norm": 0.45082420110702515, "learning_rate": 8.19378353751139e-05, "loss": 1.8152, "step": 3578 }, { "epoch": 1.086356047958719, "grad_norm": 0.5934682488441467, "learning_rate": 8.19327731092437e-05, "loss": 1.5072, "step": 3579 }, { "epoch": 1.0866595841554105, "grad_norm": 0.496003657579422, "learning_rate": 8.192771084337349e-05, "loss": 1.5421, "step": 3580 }, { "epoch": 1.086963120352102, "grad_norm": 0.5661733746528625, "learning_rate": 8.192264857750329e-05, "loss": 1.3126, "step": 3581 }, { "epoch": 1.0872666565487934, "grad_norm": 0.6730456352233887, "learning_rate": 8.191758631163308e-05, "loss": 1.5007, "step": 3582 }, { "epoch": 1.0875701927454848, "grad_norm": 0.6048880815505981, "learning_rate": 8.191252404576288e-05, "loss": 1.7904, "step": 3583 }, { "epoch": 1.0878737289421763, "grad_norm": 0.5846467614173889, "learning_rate": 8.190746177989269e-05, "loss": 1.824, "step": 3584 }, { "epoch": 1.0881772651388677, "grad_norm": 0.47012564539909363, "learning_rate": 8.190239951402248e-05, "loss": 1.4203, "step": 3585 }, { "epoch": 1.0884808013355594, "grad_norm": 0.5919317007064819, "learning_rate": 8.189733724815228e-05, "loss": 1.6285, "step": 3586 }, { "epoch": 1.0887843375322508, "grad_norm": 0.4845069944858551, "learning_rate": 8.189227498228207e-05, "loss": 1.8242, "step": 3587 }, { "epoch": 1.0890878737289422, "grad_norm": 0.6998928785324097, "learning_rate": 8.188721271641186e-05, "loss": 1.7961, "step": 3588 }, { "epoch": 1.0893914099256337, "grad_norm": 0.5783026814460754, "learning_rate": 8.188215045054166e-05, "loss": 1.7251, "step": 3589 }, { "epoch": 1.089694946122325, "grad_norm": 0.48840558528900146, "learning_rate": 8.187708818467145e-05, "loss": 1.5038, "step": 3590 }, { "epoch": 1.0899984823190165, "grad_norm": 0.47411298751831055, "learning_rate": 8.187202591880125e-05, "loss": 1.2775, "step": 3591 }, { "epoch": 1.090302018515708, "grad_norm": 0.39426228404045105, "learning_rate": 8.186696365293106e-05, "loss": 1.6157, "step": 3592 }, { "epoch": 1.0906055547123994, "grad_norm": 0.4736963212490082, "learning_rate": 8.186190138706085e-05, "loss": 1.7205, "step": 3593 }, { "epoch": 1.0909090909090908, "grad_norm": 0.4530097544193268, "learning_rate": 8.185683912119065e-05, "loss": 1.4091, "step": 3594 }, { "epoch": 1.0912126271057823, "grad_norm": 0.6029506921768188, "learning_rate": 8.185177685532046e-05, "loss": 1.5175, "step": 3595 }, { "epoch": 1.091516163302474, "grad_norm": 0.5318018198013306, "learning_rate": 8.184671458945025e-05, "loss": 1.9868, "step": 3596 }, { "epoch": 1.0918196994991654, "grad_norm": 0.515565812587738, "learning_rate": 8.184165232358005e-05, "loss": 1.7638, "step": 3597 }, { "epoch": 1.0921232356958568, "grad_norm": 0.54291832447052, "learning_rate": 8.183659005770984e-05, "loss": 1.5345, "step": 3598 }, { "epoch": 1.0924267718925482, "grad_norm": 0.45856529474258423, "learning_rate": 8.183152779183963e-05, "loss": 1.6028, "step": 3599 }, { "epoch": 1.0927303080892397, "grad_norm": 0.5494308471679688, "learning_rate": 8.182646552596943e-05, "loss": 1.743, "step": 3600 }, { "epoch": 1.093033844285931, "grad_norm": 0.6626517176628113, "learning_rate": 8.182140326009922e-05, "loss": 1.2859, "step": 3601 }, { "epoch": 1.0933373804826225, "grad_norm": 0.5031691193580627, "learning_rate": 8.181634099422902e-05, "loss": 1.742, "step": 3602 }, { "epoch": 1.093640916679314, "grad_norm": 0.4413023889064789, "learning_rate": 8.181127872835881e-05, "loss": 1.7242, "step": 3603 }, { "epoch": 1.0939444528760054, "grad_norm": 1.5234955549240112, "learning_rate": 8.180621646248862e-05, "loss": 1.9161, "step": 3604 }, { "epoch": 1.0942479890726968, "grad_norm": 0.4144064784049988, "learning_rate": 8.180115419661842e-05, "loss": 1.7456, "step": 3605 }, { "epoch": 1.0945515252693885, "grad_norm": 0.39565610885620117, "learning_rate": 8.179609193074821e-05, "loss": 1.9448, "step": 3606 }, { "epoch": 1.09485506146608, "grad_norm": 0.484910786151886, "learning_rate": 8.1791029664878e-05, "loss": 1.8062, "step": 3607 }, { "epoch": 1.0951585976627713, "grad_norm": 0.4055248200893402, "learning_rate": 8.17859673990078e-05, "loss": 1.7592, "step": 3608 }, { "epoch": 1.0954621338594628, "grad_norm": 0.48900482058525085, "learning_rate": 8.17809051331376e-05, "loss": 1.7554, "step": 3609 }, { "epoch": 1.0957656700561542, "grad_norm": 0.4918522834777832, "learning_rate": 8.177584286726739e-05, "loss": 1.3203, "step": 3610 }, { "epoch": 1.0960692062528457, "grad_norm": 0.4044332206249237, "learning_rate": 8.177078060139719e-05, "loss": 1.1163, "step": 3611 }, { "epoch": 1.096372742449537, "grad_norm": 0.5623982548713684, "learning_rate": 8.176571833552698e-05, "loss": 1.782, "step": 3612 }, { "epoch": 1.0966762786462285, "grad_norm": 0.49447813630104065, "learning_rate": 8.176065606965679e-05, "loss": 1.8644, "step": 3613 }, { "epoch": 1.09697981484292, "grad_norm": 0.48916736245155334, "learning_rate": 8.175559380378658e-05, "loss": 1.8049, "step": 3614 }, { "epoch": 1.0972833510396114, "grad_norm": 0.4916020333766937, "learning_rate": 8.175053153791638e-05, "loss": 1.5267, "step": 3615 }, { "epoch": 1.0975868872363028, "grad_norm": 0.5263971090316772, "learning_rate": 8.174546927204617e-05, "loss": 1.8126, "step": 3616 }, { "epoch": 1.0978904234329945, "grad_norm": 0.5741640329360962, "learning_rate": 8.174040700617597e-05, "loss": 1.2607, "step": 3617 }, { "epoch": 1.098193959629686, "grad_norm": 1.0414916276931763, "learning_rate": 8.173534474030576e-05, "loss": 1.6205, "step": 3618 }, { "epoch": 1.0984974958263773, "grad_norm": 0.494989275932312, "learning_rate": 8.173028247443556e-05, "loss": 1.9782, "step": 3619 }, { "epoch": 1.0988010320230688, "grad_norm": 0.4781859815120697, "learning_rate": 8.172522020856535e-05, "loss": 1.9079, "step": 3620 }, { "epoch": 1.0991045682197602, "grad_norm": 0.5632685422897339, "learning_rate": 8.172015794269515e-05, "loss": 1.9584, "step": 3621 }, { "epoch": 1.0994081044164516, "grad_norm": 0.6196724772453308, "learning_rate": 8.171509567682494e-05, "loss": 1.5911, "step": 3622 }, { "epoch": 1.099711640613143, "grad_norm": 0.9034321904182434, "learning_rate": 8.171003341095475e-05, "loss": 1.5019, "step": 3623 }, { "epoch": 1.1000151768098345, "grad_norm": 0.370377779006958, "learning_rate": 8.170497114508455e-05, "loss": 1.2399, "step": 3624 }, { "epoch": 1.100318713006526, "grad_norm": 0.5086367130279541, "learning_rate": 8.169990887921434e-05, "loss": 1.3943, "step": 3625 }, { "epoch": 1.1006222492032174, "grad_norm": 0.5467544198036194, "learning_rate": 8.169484661334413e-05, "loss": 1.9572, "step": 3626 }, { "epoch": 1.100925785399909, "grad_norm": 0.5355213284492493, "learning_rate": 8.168978434747393e-05, "loss": 1.5189, "step": 3627 }, { "epoch": 1.1012293215966005, "grad_norm": 0.48003602027893066, "learning_rate": 8.168472208160372e-05, "loss": 1.7356, "step": 3628 }, { "epoch": 1.101532857793292, "grad_norm": 0.5566051006317139, "learning_rate": 8.167965981573352e-05, "loss": 1.4201, "step": 3629 }, { "epoch": 1.1018363939899833, "grad_norm": 0.504786491394043, "learning_rate": 8.167459754986331e-05, "loss": 1.6986, "step": 3630 }, { "epoch": 1.1021399301866748, "grad_norm": 0.5683818459510803, "learning_rate": 8.166953528399311e-05, "loss": 1.633, "step": 3631 }, { "epoch": 1.1024434663833662, "grad_norm": 0.4428302049636841, "learning_rate": 8.166447301812292e-05, "loss": 1.715, "step": 3632 }, { "epoch": 1.1027470025800576, "grad_norm": 0.5696395635604858, "learning_rate": 8.165941075225271e-05, "loss": 1.0826, "step": 3633 }, { "epoch": 1.103050538776749, "grad_norm": 0.5010141134262085, "learning_rate": 8.16543484863825e-05, "loss": 1.3363, "step": 3634 }, { "epoch": 1.1033540749734405, "grad_norm": 0.5403134822845459, "learning_rate": 8.16492862205123e-05, "loss": 1.6684, "step": 3635 }, { "epoch": 1.103657611170132, "grad_norm": 2.0790061950683594, "learning_rate": 8.16442239546421e-05, "loss": 1.7737, "step": 3636 }, { "epoch": 1.1039611473668236, "grad_norm": 0.5232675671577454, "learning_rate": 8.16391616887719e-05, "loss": 1.8925, "step": 3637 }, { "epoch": 1.104264683563515, "grad_norm": 0.5211415886878967, "learning_rate": 8.16340994229017e-05, "loss": 1.5844, "step": 3638 }, { "epoch": 1.1045682197602065, "grad_norm": 0.4988497495651245, "learning_rate": 8.16290371570315e-05, "loss": 1.4171, "step": 3639 }, { "epoch": 1.104871755956898, "grad_norm": 0.5389904975891113, "learning_rate": 8.162397489116129e-05, "loss": 1.9304, "step": 3640 }, { "epoch": 1.1051752921535893, "grad_norm": 0.5663283467292786, "learning_rate": 8.161891262529108e-05, "loss": 1.7161, "step": 3641 }, { "epoch": 1.1054788283502808, "grad_norm": 0.5647397637367249, "learning_rate": 8.161385035942088e-05, "loss": 1.4674, "step": 3642 }, { "epoch": 1.1057823645469722, "grad_norm": 0.5939072966575623, "learning_rate": 8.160878809355069e-05, "loss": 1.0629, "step": 3643 }, { "epoch": 1.1060859007436636, "grad_norm": 0.5135029554367065, "learning_rate": 8.160372582768048e-05, "loss": 1.705, "step": 3644 }, { "epoch": 1.106389436940355, "grad_norm": 0.45538634061813354, "learning_rate": 8.159866356181028e-05, "loss": 1.6446, "step": 3645 }, { "epoch": 1.1066929731370465, "grad_norm": 0.5306661128997803, "learning_rate": 8.159360129594007e-05, "loss": 1.2614, "step": 3646 }, { "epoch": 1.106996509333738, "grad_norm": 0.5330759882926941, "learning_rate": 8.158853903006987e-05, "loss": 1.5618, "step": 3647 }, { "epoch": 1.1073000455304296, "grad_norm": 0.4016754627227783, "learning_rate": 8.158347676419966e-05, "loss": 1.6048, "step": 3648 }, { "epoch": 1.107603581727121, "grad_norm": 0.4580400288105011, "learning_rate": 8.157841449832946e-05, "loss": 2.0648, "step": 3649 }, { "epoch": 1.1079071179238125, "grad_norm": 0.6977163553237915, "learning_rate": 8.157335223245925e-05, "loss": 1.2671, "step": 3650 }, { "epoch": 1.108210654120504, "grad_norm": 0.5107961297035217, "learning_rate": 8.156828996658905e-05, "loss": 1.8615, "step": 3651 }, { "epoch": 1.1085141903171953, "grad_norm": 0.5081155300140381, "learning_rate": 8.156322770071885e-05, "loss": 1.4985, "step": 3652 }, { "epoch": 1.1088177265138868, "grad_norm": 0.5033918619155884, "learning_rate": 8.155816543484865e-05, "loss": 1.3806, "step": 3653 }, { "epoch": 1.1091212627105782, "grad_norm": 0.4880038797855377, "learning_rate": 8.155310316897844e-05, "loss": 1.4382, "step": 3654 }, { "epoch": 1.1094247989072696, "grad_norm": 0.5263247489929199, "learning_rate": 8.154804090310824e-05, "loss": 1.5557, "step": 3655 }, { "epoch": 1.109728335103961, "grad_norm": 0.5132997632026672, "learning_rate": 8.154297863723803e-05, "loss": 1.6921, "step": 3656 }, { "epoch": 1.1100318713006525, "grad_norm": 0.4751412570476532, "learning_rate": 8.153791637136783e-05, "loss": 1.4542, "step": 3657 }, { "epoch": 1.1103354074973442, "grad_norm": 0.47098231315612793, "learning_rate": 8.153285410549762e-05, "loss": 1.7529, "step": 3658 }, { "epoch": 1.1106389436940356, "grad_norm": 0.5147930979728699, "learning_rate": 8.152779183962742e-05, "loss": 1.6664, "step": 3659 }, { "epoch": 1.110942479890727, "grad_norm": 0.557507336139679, "learning_rate": 8.152272957375721e-05, "loss": 1.4994, "step": 3660 }, { "epoch": 1.1112460160874185, "grad_norm": 0.47173547744750977, "learning_rate": 8.1517667307887e-05, "loss": 1.7713, "step": 3661 }, { "epoch": 1.11154955228411, "grad_norm": 0.42760956287384033, "learning_rate": 8.151260504201682e-05, "loss": 1.9084, "step": 3662 }, { "epoch": 1.1118530884808013, "grad_norm": 0.6474758982658386, "learning_rate": 8.150754277614661e-05, "loss": 1.9066, "step": 3663 }, { "epoch": 1.1121566246774928, "grad_norm": 0.4683403968811035, "learning_rate": 8.15024805102764e-05, "loss": 1.9612, "step": 3664 }, { "epoch": 1.1124601608741842, "grad_norm": 0.4510393440723419, "learning_rate": 8.14974182444062e-05, "loss": 1.8173, "step": 3665 }, { "epoch": 1.1127636970708756, "grad_norm": 0.4649883210659027, "learning_rate": 8.1492355978536e-05, "loss": 1.7189, "step": 3666 }, { "epoch": 1.113067233267567, "grad_norm": 0.4617062509059906, "learning_rate": 8.148729371266579e-05, "loss": 1.5588, "step": 3667 }, { "epoch": 1.1133707694642587, "grad_norm": 0.38514718413352966, "learning_rate": 8.148223144679558e-05, "loss": 1.2109, "step": 3668 }, { "epoch": 1.1136743056609502, "grad_norm": 0.5455525517463684, "learning_rate": 8.147716918092538e-05, "loss": 1.3428, "step": 3669 }, { "epoch": 1.1139778418576416, "grad_norm": 0.47001487016677856, "learning_rate": 8.147210691505517e-05, "loss": 1.8286, "step": 3670 }, { "epoch": 1.114281378054333, "grad_norm": 0.9772850871086121, "learning_rate": 8.146704464918498e-05, "loss": 1.5406, "step": 3671 }, { "epoch": 1.1145849142510245, "grad_norm": 0.5333663821220398, "learning_rate": 8.146198238331478e-05, "loss": 1.5502, "step": 3672 }, { "epoch": 1.114888450447716, "grad_norm": 0.5091346502304077, "learning_rate": 8.145692011744457e-05, "loss": 1.6799, "step": 3673 }, { "epoch": 1.1151919866444073, "grad_norm": 0.6163585186004639, "learning_rate": 8.145185785157437e-05, "loss": 1.4412, "step": 3674 }, { "epoch": 1.1154955228410988, "grad_norm": 0.5357707142829895, "learning_rate": 8.144679558570416e-05, "loss": 1.6769, "step": 3675 }, { "epoch": 1.1157990590377902, "grad_norm": 0.45370087027549744, "learning_rate": 8.144173331983396e-05, "loss": 1.5989, "step": 3676 }, { "epoch": 1.1161025952344816, "grad_norm": 0.4834679067134857, "learning_rate": 8.143667105396375e-05, "loss": 1.4882, "step": 3677 }, { "epoch": 1.116406131431173, "grad_norm": 0.5106636881828308, "learning_rate": 8.143160878809355e-05, "loss": 1.414, "step": 3678 }, { "epoch": 1.1167096676278647, "grad_norm": 0.4484593868255615, "learning_rate": 8.142654652222334e-05, "loss": 1.7469, "step": 3679 }, { "epoch": 1.1170132038245562, "grad_norm": 0.43629521131515503, "learning_rate": 8.142148425635315e-05, "loss": 1.6167, "step": 3680 }, { "epoch": 1.1173167400212476, "grad_norm": 0.46599555015563965, "learning_rate": 8.141642199048294e-05, "loss": 1.6053, "step": 3681 }, { "epoch": 1.117620276217939, "grad_norm": 0.46938660740852356, "learning_rate": 8.141135972461275e-05, "loss": 1.4003, "step": 3682 }, { "epoch": 1.1179238124146305, "grad_norm": 0.5234637260437012, "learning_rate": 8.140629745874255e-05, "loss": 1.8717, "step": 3683 }, { "epoch": 1.118227348611322, "grad_norm": 0.5148733854293823, "learning_rate": 8.140123519287234e-05, "loss": 1.8073, "step": 3684 }, { "epoch": 1.1185308848080133, "grad_norm": 0.4216020405292511, "learning_rate": 8.139617292700214e-05, "loss": 1.3997, "step": 3685 }, { "epoch": 1.1188344210047048, "grad_norm": 0.44757702946662903, "learning_rate": 8.139111066113193e-05, "loss": 1.7372, "step": 3686 }, { "epoch": 1.1191379572013962, "grad_norm": 0.4530711770057678, "learning_rate": 8.138604839526173e-05, "loss": 1.6412, "step": 3687 }, { "epoch": 1.1194414933980876, "grad_norm": 0.45930105447769165, "learning_rate": 8.138098612939152e-05, "loss": 1.8236, "step": 3688 }, { "epoch": 1.119745029594779, "grad_norm": 0.4823213219642639, "learning_rate": 8.137592386352132e-05, "loss": 1.4396, "step": 3689 }, { "epoch": 1.1200485657914707, "grad_norm": 0.46218788623809814, "learning_rate": 8.137086159765111e-05, "loss": 1.7306, "step": 3690 }, { "epoch": 1.1203521019881622, "grad_norm": 0.47568848729133606, "learning_rate": 8.136579933178092e-05, "loss": 1.7071, "step": 3691 }, { "epoch": 1.1206556381848536, "grad_norm": 0.491416335105896, "learning_rate": 8.136073706591071e-05, "loss": 1.4723, "step": 3692 }, { "epoch": 1.120959174381545, "grad_norm": 0.5410199761390686, "learning_rate": 8.135567480004051e-05, "loss": 1.325, "step": 3693 }, { "epoch": 1.1212627105782365, "grad_norm": 0.49941226840019226, "learning_rate": 8.13506125341703e-05, "loss": 1.717, "step": 3694 }, { "epoch": 1.1215662467749279, "grad_norm": 0.5728047490119934, "learning_rate": 8.13455502683001e-05, "loss": 1.7745, "step": 3695 }, { "epoch": 1.1218697829716193, "grad_norm": 0.5127580165863037, "learning_rate": 8.134048800242989e-05, "loss": 1.9077, "step": 3696 }, { "epoch": 1.1221733191683108, "grad_norm": 0.7718825340270996, "learning_rate": 8.133542573655969e-05, "loss": 1.6438, "step": 3697 }, { "epoch": 1.1224768553650022, "grad_norm": 0.48605823516845703, "learning_rate": 8.133036347068948e-05, "loss": 1.7385, "step": 3698 }, { "epoch": 1.1227803915616938, "grad_norm": 0.7080191969871521, "learning_rate": 8.132530120481928e-05, "loss": 1.6901, "step": 3699 }, { "epoch": 1.1230839277583853, "grad_norm": 0.5506136417388916, "learning_rate": 8.132023893894907e-05, "loss": 1.451, "step": 3700 }, { "epoch": 1.1233874639550767, "grad_norm": 0.40935492515563965, "learning_rate": 8.131517667307888e-05, "loss": 1.1997, "step": 3701 }, { "epoch": 1.1236910001517681, "grad_norm": 0.695495069026947, "learning_rate": 8.131011440720867e-05, "loss": 1.5494, "step": 3702 }, { "epoch": 1.1239945363484596, "grad_norm": 0.5104714035987854, "learning_rate": 8.130505214133847e-05, "loss": 1.2269, "step": 3703 }, { "epoch": 1.124298072545151, "grad_norm": 0.5129423141479492, "learning_rate": 8.129998987546826e-05, "loss": 1.5293, "step": 3704 }, { "epoch": 1.1246016087418425, "grad_norm": 0.4727494716644287, "learning_rate": 8.129492760959806e-05, "loss": 1.6413, "step": 3705 }, { "epoch": 1.1249051449385339, "grad_norm": 0.4820862114429474, "learning_rate": 8.128986534372785e-05, "loss": 1.1424, "step": 3706 }, { "epoch": 1.1252086811352253, "grad_norm": 0.37725722789764404, "learning_rate": 8.128480307785765e-05, "loss": 1.7004, "step": 3707 }, { "epoch": 1.1255122173319168, "grad_norm": 0.4898647964000702, "learning_rate": 8.127974081198744e-05, "loss": 1.8321, "step": 3708 }, { "epoch": 1.1258157535286082, "grad_norm": 0.5332454442977905, "learning_rate": 8.127467854611724e-05, "loss": 1.4906, "step": 3709 }, { "epoch": 1.1261192897252998, "grad_norm": 1.1369999647140503, "learning_rate": 8.126961628024705e-05, "loss": 1.8496, "step": 3710 }, { "epoch": 1.1264228259219913, "grad_norm": 0.4872293472290039, "learning_rate": 8.126455401437684e-05, "loss": 1.6577, "step": 3711 }, { "epoch": 1.1267263621186827, "grad_norm": 0.5259954333305359, "learning_rate": 8.125949174850664e-05, "loss": 1.3823, "step": 3712 }, { "epoch": 1.1270298983153741, "grad_norm": 0.44852215051651, "learning_rate": 8.125442948263643e-05, "loss": 1.7758, "step": 3713 }, { "epoch": 1.1273334345120656, "grad_norm": 0.49278607964515686, "learning_rate": 8.124936721676623e-05, "loss": 1.5611, "step": 3714 }, { "epoch": 1.127636970708757, "grad_norm": 0.5423696041107178, "learning_rate": 8.124430495089602e-05, "loss": 1.5364, "step": 3715 }, { "epoch": 1.1279405069054484, "grad_norm": 0.8286866545677185, "learning_rate": 8.123924268502582e-05, "loss": 1.538, "step": 3716 }, { "epoch": 1.1282440431021399, "grad_norm": 0.4774687886238098, "learning_rate": 8.123418041915561e-05, "loss": 1.9607, "step": 3717 }, { "epoch": 1.1285475792988313, "grad_norm": 0.5174298286437988, "learning_rate": 8.12291181532854e-05, "loss": 1.4305, "step": 3718 }, { "epoch": 1.1288511154955228, "grad_norm": 0.4205905795097351, "learning_rate": 8.122405588741521e-05, "loss": 1.6605, "step": 3719 }, { "epoch": 1.1291546516922142, "grad_norm": 0.49820128083229065, "learning_rate": 8.121899362154501e-05, "loss": 1.6201, "step": 3720 }, { "epoch": 1.1294581878889058, "grad_norm": 0.6406720280647278, "learning_rate": 8.12139313556748e-05, "loss": 1.645, "step": 3721 }, { "epoch": 1.1297617240855973, "grad_norm": 0.5179185271263123, "learning_rate": 8.12088690898046e-05, "loss": 1.2224, "step": 3722 }, { "epoch": 1.1300652602822887, "grad_norm": 0.6311380863189697, "learning_rate": 8.120380682393439e-05, "loss": 1.8095, "step": 3723 }, { "epoch": 1.1303687964789801, "grad_norm": 0.6742674112319946, "learning_rate": 8.119874455806419e-05, "loss": 1.2631, "step": 3724 }, { "epoch": 1.1306723326756716, "grad_norm": 0.857227623462677, "learning_rate": 8.1193682292194e-05, "loss": 1.6925, "step": 3725 }, { "epoch": 1.130975868872363, "grad_norm": 0.46885544061660767, "learning_rate": 8.118862002632379e-05, "loss": 1.601, "step": 3726 }, { "epoch": 1.1312794050690544, "grad_norm": 0.4162534773349762, "learning_rate": 8.118355776045359e-05, "loss": 1.8378, "step": 3727 }, { "epoch": 1.1315829412657459, "grad_norm": 0.42384031414985657, "learning_rate": 8.117849549458338e-05, "loss": 1.9497, "step": 3728 }, { "epoch": 1.1318864774624373, "grad_norm": 0.5691695213317871, "learning_rate": 8.117343322871317e-05, "loss": 1.6685, "step": 3729 }, { "epoch": 1.132190013659129, "grad_norm": 0.5050380229949951, "learning_rate": 8.116837096284298e-05, "loss": 1.6185, "step": 3730 }, { "epoch": 1.1324935498558204, "grad_norm": 0.5230247974395752, "learning_rate": 8.116330869697278e-05, "loss": 1.5512, "step": 3731 }, { "epoch": 1.1327970860525118, "grad_norm": 0.5464157462120056, "learning_rate": 8.115824643110257e-05, "loss": 1.6746, "step": 3732 }, { "epoch": 1.1331006222492033, "grad_norm": 0.5749616622924805, "learning_rate": 8.115318416523237e-05, "loss": 1.572, "step": 3733 }, { "epoch": 1.1334041584458947, "grad_norm": 0.6497030854225159, "learning_rate": 8.114812189936216e-05, "loss": 1.1994, "step": 3734 }, { "epoch": 1.1337076946425861, "grad_norm": 0.4995471239089966, "learning_rate": 8.114305963349196e-05, "loss": 1.7605, "step": 3735 }, { "epoch": 1.1340112308392776, "grad_norm": 0.4408343732357025, "learning_rate": 8.113799736762175e-05, "loss": 1.164, "step": 3736 }, { "epoch": 1.134314767035969, "grad_norm": 0.49405989050865173, "learning_rate": 8.113293510175155e-05, "loss": 1.2219, "step": 3737 }, { "epoch": 1.1346183032326604, "grad_norm": 0.5476012825965881, "learning_rate": 8.112787283588134e-05, "loss": 1.7411, "step": 3738 }, { "epoch": 1.1349218394293519, "grad_norm": 0.4971032738685608, "learning_rate": 8.112281057001114e-05, "loss": 1.9983, "step": 3739 }, { "epoch": 1.1352253756260433, "grad_norm": 0.5163978338241577, "learning_rate": 8.111774830414094e-05, "loss": 1.5051, "step": 3740 }, { "epoch": 1.135528911822735, "grad_norm": 0.41611355543136597, "learning_rate": 8.111268603827074e-05, "loss": 1.3488, "step": 3741 }, { "epoch": 1.1358324480194264, "grad_norm": 0.595660924911499, "learning_rate": 8.110762377240053e-05, "loss": 1.1249, "step": 3742 }, { "epoch": 1.1361359842161178, "grad_norm": 0.5125541090965271, "learning_rate": 8.110256150653033e-05, "loss": 1.3889, "step": 3743 }, { "epoch": 1.1364395204128093, "grad_norm": 0.5227445960044861, "learning_rate": 8.109749924066012e-05, "loss": 1.8178, "step": 3744 }, { "epoch": 1.1367430566095007, "grad_norm": 0.551460325717926, "learning_rate": 8.109243697478992e-05, "loss": 1.5691, "step": 3745 }, { "epoch": 1.1370465928061921, "grad_norm": 0.5809578895568848, "learning_rate": 8.108737470891971e-05, "loss": 1.6748, "step": 3746 }, { "epoch": 1.1373501290028836, "grad_norm": 0.5961951017379761, "learning_rate": 8.108231244304951e-05, "loss": 1.7098, "step": 3747 }, { "epoch": 1.137653665199575, "grad_norm": 0.5831936597824097, "learning_rate": 8.10772501771793e-05, "loss": 1.7794, "step": 3748 }, { "epoch": 1.1379572013962664, "grad_norm": 0.5263493657112122, "learning_rate": 8.107218791130911e-05, "loss": 1.5674, "step": 3749 }, { "epoch": 1.1382607375929579, "grad_norm": 0.7186266183853149, "learning_rate": 8.10671256454389e-05, "loss": 1.4561, "step": 3750 }, { "epoch": 1.1385642737896493, "grad_norm": 0.4602220952510834, "learning_rate": 8.10620633795687e-05, "loss": 1.9415, "step": 3751 }, { "epoch": 1.138867809986341, "grad_norm": 0.6327370405197144, "learning_rate": 8.10570011136985e-05, "loss": 1.4777, "step": 3752 }, { "epoch": 1.1391713461830324, "grad_norm": 0.5156751275062561, "learning_rate": 8.105193884782829e-05, "loss": 1.6268, "step": 3753 }, { "epoch": 1.1394748823797238, "grad_norm": 0.5427886247634888, "learning_rate": 8.104687658195809e-05, "loss": 1.7466, "step": 3754 }, { "epoch": 1.1397784185764153, "grad_norm": 0.4821757972240448, "learning_rate": 8.104181431608788e-05, "loss": 1.8944, "step": 3755 }, { "epoch": 1.1400819547731067, "grad_norm": 0.5586848258972168, "learning_rate": 8.103675205021767e-05, "loss": 1.4175, "step": 3756 }, { "epoch": 1.1403854909697981, "grad_norm": 0.4749287962913513, "learning_rate": 8.103168978434747e-05, "loss": 1.71, "step": 3757 }, { "epoch": 1.1406890271664896, "grad_norm": 0.5775233507156372, "learning_rate": 8.102662751847728e-05, "loss": 1.2132, "step": 3758 }, { "epoch": 1.140992563363181, "grad_norm": 0.8754368424415588, "learning_rate": 8.102156525260707e-05, "loss": 1.6928, "step": 3759 }, { "epoch": 1.1412960995598724, "grad_norm": 0.512400209903717, "learning_rate": 8.101650298673687e-05, "loss": 1.7876, "step": 3760 }, { "epoch": 1.141599635756564, "grad_norm": 0.5088605284690857, "learning_rate": 8.101144072086666e-05, "loss": 1.5775, "step": 3761 }, { "epoch": 1.1419031719532553, "grad_norm": 0.5294544100761414, "learning_rate": 8.100637845499646e-05, "loss": 1.7209, "step": 3762 }, { "epoch": 1.142206708149947, "grad_norm": 0.534830629825592, "learning_rate": 8.100131618912625e-05, "loss": 1.59, "step": 3763 }, { "epoch": 1.1425102443466384, "grad_norm": 0.5802815556526184, "learning_rate": 8.099625392325605e-05, "loss": 1.6018, "step": 3764 }, { "epoch": 1.1428137805433298, "grad_norm": 0.5445258021354675, "learning_rate": 8.099119165738584e-05, "loss": 1.1691, "step": 3765 }, { "epoch": 1.1431173167400213, "grad_norm": 0.7283796072006226, "learning_rate": 8.098612939151564e-05, "loss": 1.592, "step": 3766 }, { "epoch": 1.1434208529367127, "grad_norm": 0.5029062032699585, "learning_rate": 8.098106712564543e-05, "loss": 1.6991, "step": 3767 }, { "epoch": 1.1437243891334041, "grad_norm": 0.5014142394065857, "learning_rate": 8.097600485977524e-05, "loss": 1.4275, "step": 3768 }, { "epoch": 1.1440279253300956, "grad_norm": 0.5292351245880127, "learning_rate": 8.097094259390503e-05, "loss": 1.4919, "step": 3769 }, { "epoch": 1.144331461526787, "grad_norm": 0.47075581550598145, "learning_rate": 8.096588032803484e-05, "loss": 1.1845, "step": 3770 }, { "epoch": 1.1446349977234784, "grad_norm": 1.0443851947784424, "learning_rate": 8.096081806216464e-05, "loss": 1.7547, "step": 3771 }, { "epoch": 1.14493853392017, "grad_norm": 0.4734131991863251, "learning_rate": 8.095575579629443e-05, "loss": 1.7828, "step": 3772 }, { "epoch": 1.1452420701168615, "grad_norm": 0.5368636250495911, "learning_rate": 8.095069353042423e-05, "loss": 1.7424, "step": 3773 }, { "epoch": 1.145545606313553, "grad_norm": 0.42162859439849854, "learning_rate": 8.094563126455402e-05, "loss": 1.6908, "step": 3774 }, { "epoch": 1.1458491425102444, "grad_norm": 0.5565983057022095, "learning_rate": 8.094056899868382e-05, "loss": 1.7771, "step": 3775 }, { "epoch": 1.1461526787069358, "grad_norm": 1.02000892162323, "learning_rate": 8.093550673281361e-05, "loss": 1.416, "step": 3776 }, { "epoch": 1.1464562149036273, "grad_norm": 0.5681176781654358, "learning_rate": 8.09304444669434e-05, "loss": 1.6256, "step": 3777 }, { "epoch": 1.1467597511003187, "grad_norm": 0.47247472405433655, "learning_rate": 8.09253822010732e-05, "loss": 1.1948, "step": 3778 }, { "epoch": 1.1470632872970101, "grad_norm": 0.5024043321609497, "learning_rate": 8.092031993520301e-05, "loss": 1.749, "step": 3779 }, { "epoch": 1.1473668234937016, "grad_norm": 0.45547524094581604, "learning_rate": 8.09152576693328e-05, "loss": 1.9116, "step": 3780 }, { "epoch": 1.147670359690393, "grad_norm": 0.5225228071212769, "learning_rate": 8.09101954034626e-05, "loss": 1.5611, "step": 3781 }, { "epoch": 1.1479738958870844, "grad_norm": 0.5099766850471497, "learning_rate": 8.09051331375924e-05, "loss": 1.6613, "step": 3782 }, { "epoch": 1.148277432083776, "grad_norm": 0.5814756751060486, "learning_rate": 8.090007087172219e-05, "loss": 1.7684, "step": 3783 }, { "epoch": 1.1485809682804675, "grad_norm": 0.5852969884872437, "learning_rate": 8.089500860585198e-05, "loss": 1.8952, "step": 3784 }, { "epoch": 1.148884504477159, "grad_norm": 1.107011318206787, "learning_rate": 8.088994633998178e-05, "loss": 0.8599, "step": 3785 }, { "epoch": 1.1491880406738504, "grad_norm": 0.570236086845398, "learning_rate": 8.088488407411157e-05, "loss": 1.7961, "step": 3786 }, { "epoch": 1.1494915768705418, "grad_norm": 0.6179838180541992, "learning_rate": 8.087982180824137e-05, "loss": 1.3554, "step": 3787 }, { "epoch": 1.1497951130672333, "grad_norm": 0.4929959177970886, "learning_rate": 8.087475954237118e-05, "loss": 1.7513, "step": 3788 }, { "epoch": 1.1500986492639247, "grad_norm": 0.48527392745018005, "learning_rate": 8.086969727650097e-05, "loss": 1.7123, "step": 3789 }, { "epoch": 1.1504021854606161, "grad_norm": 0.4290766417980194, "learning_rate": 8.086463501063077e-05, "loss": 1.8191, "step": 3790 }, { "epoch": 1.1507057216573076, "grad_norm": 0.448541522026062, "learning_rate": 8.085957274476056e-05, "loss": 0.9604, "step": 3791 }, { "epoch": 1.1510092578539992, "grad_norm": 0.41005972027778625, "learning_rate": 8.085451047889036e-05, "loss": 1.3889, "step": 3792 }, { "epoch": 1.1513127940506904, "grad_norm": 0.5160661935806274, "learning_rate": 8.084944821302015e-05, "loss": 1.7646, "step": 3793 }, { "epoch": 1.151616330247382, "grad_norm": 0.5291827321052551, "learning_rate": 8.084438594714994e-05, "loss": 1.7524, "step": 3794 }, { "epoch": 1.1519198664440735, "grad_norm": 0.47271502017974854, "learning_rate": 8.083932368127974e-05, "loss": 1.8938, "step": 3795 }, { "epoch": 1.152223402640765, "grad_norm": 0.42168867588043213, "learning_rate": 8.083426141540953e-05, "loss": 1.5912, "step": 3796 }, { "epoch": 1.1525269388374564, "grad_norm": 0.687078058719635, "learning_rate": 8.082919914953934e-05, "loss": 1.7467, "step": 3797 }, { "epoch": 1.1528304750341478, "grad_norm": 0.49269938468933105, "learning_rate": 8.082413688366914e-05, "loss": 1.4631, "step": 3798 }, { "epoch": 1.1531340112308393, "grad_norm": 0.49438297748565674, "learning_rate": 8.081907461779893e-05, "loss": 1.5982, "step": 3799 }, { "epoch": 1.1534375474275307, "grad_norm": 0.5099816918373108, "learning_rate": 8.081401235192873e-05, "loss": 1.6595, "step": 3800 }, { "epoch": 1.1537410836242221, "grad_norm": 0.5655848979949951, "learning_rate": 8.080895008605852e-05, "loss": 1.3832, "step": 3801 }, { "epoch": 1.1540446198209136, "grad_norm": 0.5156016945838928, "learning_rate": 8.080388782018832e-05, "loss": 1.3481, "step": 3802 }, { "epoch": 1.1543481560176052, "grad_norm": 0.3963063657283783, "learning_rate": 8.079882555431811e-05, "loss": 1.1331, "step": 3803 }, { "epoch": 1.1546516922142966, "grad_norm": 0.5562106370925903, "learning_rate": 8.07937632884479e-05, "loss": 1.7705, "step": 3804 }, { "epoch": 1.154955228410988, "grad_norm": 0.5723055601119995, "learning_rate": 8.07887010225777e-05, "loss": 1.6301, "step": 3805 }, { "epoch": 1.1552587646076795, "grad_norm": 0.598197877407074, "learning_rate": 8.07836387567075e-05, "loss": 1.1021, "step": 3806 }, { "epoch": 1.155562300804371, "grad_norm": 0.47882279753685, "learning_rate": 8.07785764908373e-05, "loss": 1.7258, "step": 3807 }, { "epoch": 1.1558658370010624, "grad_norm": 0.777834951877594, "learning_rate": 8.07735142249671e-05, "loss": 2.1178, "step": 3808 }, { "epoch": 1.1561693731977538, "grad_norm": 0.5457454919815063, "learning_rate": 8.07684519590969e-05, "loss": 1.6053, "step": 3809 }, { "epoch": 1.1564729093944452, "grad_norm": 0.6933251023292542, "learning_rate": 8.076338969322669e-05, "loss": 1.2324, "step": 3810 }, { "epoch": 1.1567764455911367, "grad_norm": 0.7639228701591492, "learning_rate": 8.075832742735648e-05, "loss": 1.5564, "step": 3811 }, { "epoch": 1.1570799817878281, "grad_norm": 0.5342271327972412, "learning_rate": 8.075326516148628e-05, "loss": 1.6796, "step": 3812 }, { "epoch": 1.1573835179845195, "grad_norm": 0.49576303362846375, "learning_rate": 8.074820289561607e-05, "loss": 1.1671, "step": 3813 }, { "epoch": 1.1576870541812112, "grad_norm": 0.45418110489845276, "learning_rate": 8.074314062974588e-05, "loss": 1.821, "step": 3814 }, { "epoch": 1.1579905903779026, "grad_norm": 0.49441277980804443, "learning_rate": 8.073807836387568e-05, "loss": 1.6599, "step": 3815 }, { "epoch": 1.158294126574594, "grad_norm": 0.5793106555938721, "learning_rate": 8.073301609800547e-05, "loss": 1.7381, "step": 3816 }, { "epoch": 1.1585976627712855, "grad_norm": 0.6442940831184387, "learning_rate": 8.072795383213527e-05, "loss": 2.0495, "step": 3817 }, { "epoch": 1.158901198967977, "grad_norm": 0.5308768153190613, "learning_rate": 8.072289156626507e-05, "loss": 1.6042, "step": 3818 }, { "epoch": 1.1592047351646684, "grad_norm": 0.4919055104255676, "learning_rate": 8.071782930039487e-05, "loss": 1.4157, "step": 3819 }, { "epoch": 1.1595082713613598, "grad_norm": 0.506922721862793, "learning_rate": 8.071276703452466e-05, "loss": 1.5703, "step": 3820 }, { "epoch": 1.1598118075580512, "grad_norm": 0.5807430148124695, "learning_rate": 8.070770476865446e-05, "loss": 1.4976, "step": 3821 }, { "epoch": 1.1601153437547427, "grad_norm": 0.489700049161911, "learning_rate": 8.070264250278425e-05, "loss": 1.0653, "step": 3822 }, { "epoch": 1.1604188799514341, "grad_norm": 0.4885237514972687, "learning_rate": 8.069758023691405e-05, "loss": 1.4299, "step": 3823 }, { "epoch": 1.1607224161481255, "grad_norm": 0.6799513697624207, "learning_rate": 8.069251797104384e-05, "loss": 1.2705, "step": 3824 }, { "epoch": 1.1610259523448172, "grad_norm": 0.42571133375167847, "learning_rate": 8.068745570517364e-05, "loss": 1.2678, "step": 3825 }, { "epoch": 1.1613294885415086, "grad_norm": 0.4387352466583252, "learning_rate": 8.068239343930343e-05, "loss": 1.7297, "step": 3826 }, { "epoch": 1.1616330247382, "grad_norm": 0.4486967921257019, "learning_rate": 8.067733117343324e-05, "loss": 1.424, "step": 3827 }, { "epoch": 1.1619365609348915, "grad_norm": 0.5393472909927368, "learning_rate": 8.067226890756304e-05, "loss": 1.4029, "step": 3828 }, { "epoch": 1.162240097131583, "grad_norm": 0.5873778462409973, "learning_rate": 8.066720664169283e-05, "loss": 1.713, "step": 3829 }, { "epoch": 1.1625436333282744, "grad_norm": 0.5121496915817261, "learning_rate": 8.066214437582263e-05, "loss": 1.6434, "step": 3830 }, { "epoch": 1.1628471695249658, "grad_norm": 0.5076146125793457, "learning_rate": 8.065708210995242e-05, "loss": 1.4398, "step": 3831 }, { "epoch": 1.1631507057216572, "grad_norm": 0.5533864498138428, "learning_rate": 8.065201984408221e-05, "loss": 1.4061, "step": 3832 }, { "epoch": 1.1634542419183487, "grad_norm": 0.5335582494735718, "learning_rate": 8.064695757821201e-05, "loss": 1.8173, "step": 3833 }, { "epoch": 1.1637577781150403, "grad_norm": 0.45890846848487854, "learning_rate": 8.06418953123418e-05, "loss": 1.7913, "step": 3834 }, { "epoch": 1.1640613143117318, "grad_norm": 0.5167157053947449, "learning_rate": 8.06368330464716e-05, "loss": 1.7257, "step": 3835 }, { "epoch": 1.1643648505084232, "grad_norm": 0.5924651622772217, "learning_rate": 8.063177078060141e-05, "loss": 1.7063, "step": 3836 }, { "epoch": 1.1646683867051146, "grad_norm": 0.48032063245773315, "learning_rate": 8.06267085147312e-05, "loss": 1.6812, "step": 3837 }, { "epoch": 1.164971922901806, "grad_norm": 0.4973551034927368, "learning_rate": 8.0621646248861e-05, "loss": 1.5737, "step": 3838 }, { "epoch": 1.1652754590984975, "grad_norm": 0.5250328183174133, "learning_rate": 8.061658398299079e-05, "loss": 1.2556, "step": 3839 }, { "epoch": 1.165578995295189, "grad_norm": 0.4585050642490387, "learning_rate": 8.061152171712059e-05, "loss": 1.0296, "step": 3840 }, { "epoch": 1.1658825314918804, "grad_norm": 0.5470108389854431, "learning_rate": 8.060645945125038e-05, "loss": 1.9698, "step": 3841 }, { "epoch": 1.1661860676885718, "grad_norm": 0.512869119644165, "learning_rate": 8.060139718538018e-05, "loss": 1.6972, "step": 3842 }, { "epoch": 1.1664896038852632, "grad_norm": 0.5125742554664612, "learning_rate": 8.059633491950997e-05, "loss": 1.8175, "step": 3843 }, { "epoch": 1.1667931400819547, "grad_norm": 0.4400027394294739, "learning_rate": 8.059127265363977e-05, "loss": 1.559, "step": 3844 }, { "epoch": 1.1670966762786463, "grad_norm": 0.695661187171936, "learning_rate": 8.058621038776956e-05, "loss": 1.7339, "step": 3845 }, { "epoch": 1.1674002124753378, "grad_norm": 0.7078722715377808, "learning_rate": 8.058114812189937e-05, "loss": 1.8629, "step": 3846 }, { "epoch": 1.1677037486720292, "grad_norm": 0.6370052099227905, "learning_rate": 8.057608585602916e-05, "loss": 1.3688, "step": 3847 }, { "epoch": 1.1680072848687206, "grad_norm": 0.5090733766555786, "learning_rate": 8.057102359015896e-05, "loss": 1.5642, "step": 3848 }, { "epoch": 1.168310821065412, "grad_norm": 0.6681460738182068, "learning_rate": 8.056596132428875e-05, "loss": 1.6192, "step": 3849 }, { "epoch": 1.1686143572621035, "grad_norm": 0.507787823677063, "learning_rate": 8.056089905841855e-05, "loss": 1.789, "step": 3850 }, { "epoch": 1.168917893458795, "grad_norm": 0.5228447318077087, "learning_rate": 8.055583679254834e-05, "loss": 1.4613, "step": 3851 }, { "epoch": 1.1692214296554864, "grad_norm": 0.43099793791770935, "learning_rate": 8.055077452667814e-05, "loss": 1.4872, "step": 3852 }, { "epoch": 1.1695249658521778, "grad_norm": 0.5265125036239624, "learning_rate": 8.054571226080793e-05, "loss": 1.4256, "step": 3853 }, { "epoch": 1.1698285020488692, "grad_norm": 0.4951777458190918, "learning_rate": 8.054064999493773e-05, "loss": 1.5817, "step": 3854 }, { "epoch": 1.1701320382455607, "grad_norm": 0.6004377007484436, "learning_rate": 8.053558772906754e-05, "loss": 1.6111, "step": 3855 }, { "epoch": 1.1704355744422523, "grad_norm": 0.43849846720695496, "learning_rate": 8.053052546319733e-05, "loss": 1.8428, "step": 3856 }, { "epoch": 1.1707391106389438, "grad_norm": 0.5169385075569153, "learning_rate": 8.052546319732713e-05, "loss": 1.9268, "step": 3857 }, { "epoch": 1.1710426468356352, "grad_norm": 0.5246443152427673, "learning_rate": 8.052040093145692e-05, "loss": 1.5416, "step": 3858 }, { "epoch": 1.1713461830323266, "grad_norm": 0.6965652704238892, "learning_rate": 8.051533866558673e-05, "loss": 1.9555, "step": 3859 }, { "epoch": 1.171649719229018, "grad_norm": 0.5366235971450806, "learning_rate": 8.051027639971652e-05, "loss": 1.5321, "step": 3860 }, { "epoch": 1.1719532554257095, "grad_norm": 0.4586010277271271, "learning_rate": 8.050521413384632e-05, "loss": 1.7923, "step": 3861 }, { "epoch": 1.172256791622401, "grad_norm": 0.467289000749588, "learning_rate": 8.050015186797611e-05, "loss": 1.5372, "step": 3862 }, { "epoch": 1.1725603278190924, "grad_norm": 0.47332337498664856, "learning_rate": 8.049508960210591e-05, "loss": 1.8386, "step": 3863 }, { "epoch": 1.1728638640157838, "grad_norm": 0.4423415958881378, "learning_rate": 8.04900273362357e-05, "loss": 1.6493, "step": 3864 }, { "epoch": 1.1731674002124755, "grad_norm": 0.5270376801490784, "learning_rate": 8.04849650703655e-05, "loss": 1.6646, "step": 3865 }, { "epoch": 1.1734709364091669, "grad_norm": 0.45497021079063416, "learning_rate": 8.04799028044953e-05, "loss": 1.8518, "step": 3866 }, { "epoch": 1.1737744726058583, "grad_norm": 0.4662345349788666, "learning_rate": 8.04748405386251e-05, "loss": 1.1371, "step": 3867 }, { "epoch": 1.1740780088025498, "grad_norm": 0.6664243340492249, "learning_rate": 8.04697782727549e-05, "loss": 1.4377, "step": 3868 }, { "epoch": 1.1743815449992412, "grad_norm": 0.5250559449195862, "learning_rate": 8.046471600688469e-05, "loss": 1.5832, "step": 3869 }, { "epoch": 1.1746850811959326, "grad_norm": 0.6499760746955872, "learning_rate": 8.045965374101448e-05, "loss": 2.0563, "step": 3870 }, { "epoch": 1.174988617392624, "grad_norm": 0.5033279061317444, "learning_rate": 8.045459147514428e-05, "loss": 1.6149, "step": 3871 }, { "epoch": 1.1752921535893155, "grad_norm": 0.5690430402755737, "learning_rate": 8.044952920927407e-05, "loss": 1.6062, "step": 3872 }, { "epoch": 1.175595689786007, "grad_norm": 0.5902601480484009, "learning_rate": 8.044446694340387e-05, "loss": 1.6706, "step": 3873 }, { "epoch": 1.1758992259826984, "grad_norm": 0.4814835786819458, "learning_rate": 8.043940467753366e-05, "loss": 1.7218, "step": 3874 }, { "epoch": 1.1762027621793898, "grad_norm": 0.49932724237442017, "learning_rate": 8.043434241166347e-05, "loss": 1.7839, "step": 3875 }, { "epoch": 1.1765062983760814, "grad_norm": 0.48431655764579773, "learning_rate": 8.042928014579327e-05, "loss": 1.5341, "step": 3876 }, { "epoch": 1.1768098345727729, "grad_norm": 0.5197260975837708, "learning_rate": 8.042421787992306e-05, "loss": 1.0566, "step": 3877 }, { "epoch": 1.1771133707694643, "grad_norm": 0.578517735004425, "learning_rate": 8.041915561405286e-05, "loss": 1.4678, "step": 3878 }, { "epoch": 1.1774169069661558, "grad_norm": 1.7475563287734985, "learning_rate": 8.041409334818265e-05, "loss": 1.3523, "step": 3879 }, { "epoch": 1.1777204431628472, "grad_norm": 0.4969330132007599, "learning_rate": 8.040903108231245e-05, "loss": 1.7174, "step": 3880 }, { "epoch": 1.1780239793595386, "grad_norm": 0.5173845291137695, "learning_rate": 8.040396881644224e-05, "loss": 1.7304, "step": 3881 }, { "epoch": 1.17832751555623, "grad_norm": 0.5299842357635498, "learning_rate": 8.039890655057204e-05, "loss": 1.903, "step": 3882 }, { "epoch": 1.1786310517529215, "grad_norm": 0.4468022286891937, "learning_rate": 8.039384428470183e-05, "loss": 1.8697, "step": 3883 }, { "epoch": 1.178934587949613, "grad_norm": 0.5375956892967224, "learning_rate": 8.038878201883163e-05, "loss": 1.4277, "step": 3884 }, { "epoch": 1.1792381241463044, "grad_norm": 0.5312685370445251, "learning_rate": 8.038371975296143e-05, "loss": 1.5446, "step": 3885 }, { "epoch": 1.1795416603429958, "grad_norm": 0.4160996377468109, "learning_rate": 8.037865748709123e-05, "loss": 1.439, "step": 3886 }, { "epoch": 1.1798451965396874, "grad_norm": 0.5316253900527954, "learning_rate": 8.037359522122102e-05, "loss": 1.6263, "step": 3887 }, { "epoch": 1.1801487327363789, "grad_norm": 0.5926960706710815, "learning_rate": 8.036853295535082e-05, "loss": 1.2918, "step": 3888 }, { "epoch": 1.1804522689330703, "grad_norm": 0.557610034942627, "learning_rate": 8.036347068948061e-05, "loss": 1.7301, "step": 3889 }, { "epoch": 1.1807558051297617, "grad_norm": 0.5832757949829102, "learning_rate": 8.035840842361041e-05, "loss": 1.7146, "step": 3890 }, { "epoch": 1.1810593413264532, "grad_norm": 0.6123656034469604, "learning_rate": 8.03533461577402e-05, "loss": 1.7001, "step": 3891 }, { "epoch": 1.1813628775231446, "grad_norm": 0.912534773349762, "learning_rate": 8.034828389187e-05, "loss": 1.6857, "step": 3892 }, { "epoch": 1.181666413719836, "grad_norm": 0.8999262452125549, "learning_rate": 8.034322162599979e-05, "loss": 1.6986, "step": 3893 }, { "epoch": 1.1819699499165275, "grad_norm": 0.5878473520278931, "learning_rate": 8.03381593601296e-05, "loss": 1.596, "step": 3894 }, { "epoch": 1.182273486113219, "grad_norm": 0.48312613368034363, "learning_rate": 8.03330970942594e-05, "loss": 1.6136, "step": 3895 }, { "epoch": 1.1825770223099106, "grad_norm": 0.4754876494407654, "learning_rate": 8.032803482838919e-05, "loss": 1.5353, "step": 3896 }, { "epoch": 1.182880558506602, "grad_norm": 0.5699150562286377, "learning_rate": 8.032297256251898e-05, "loss": 1.4489, "step": 3897 }, { "epoch": 1.1831840947032934, "grad_norm": 0.407832533121109, "learning_rate": 8.031791029664878e-05, "loss": 1.7367, "step": 3898 }, { "epoch": 1.1834876308999849, "grad_norm": 0.5520400404930115, "learning_rate": 8.031284803077857e-05, "loss": 1.9028, "step": 3899 }, { "epoch": 1.1837911670966763, "grad_norm": 1.5105438232421875, "learning_rate": 8.030778576490837e-05, "loss": 1.6191, "step": 3900 }, { "epoch": 1.1840947032933677, "grad_norm": 0.5375813245773315, "learning_rate": 8.030272349903816e-05, "loss": 1.7874, "step": 3901 }, { "epoch": 1.1843982394900592, "grad_norm": 0.6227301955223083, "learning_rate": 8.029766123316796e-05, "loss": 1.6987, "step": 3902 }, { "epoch": 1.1847017756867506, "grad_norm": 0.5316741466522217, "learning_rate": 8.029259896729777e-05, "loss": 1.4782, "step": 3903 }, { "epoch": 1.185005311883442, "grad_norm": 0.5515217185020447, "learning_rate": 8.028753670142756e-05, "loss": 1.7657, "step": 3904 }, { "epoch": 1.1853088480801335, "grad_norm": 0.5551007390022278, "learning_rate": 8.028247443555737e-05, "loss": 1.7397, "step": 3905 }, { "epoch": 1.185612384276825, "grad_norm": 0.5252280831336975, "learning_rate": 8.027741216968717e-05, "loss": 1.5638, "step": 3906 }, { "epoch": 1.1859159204735166, "grad_norm": 0.5458955764770508, "learning_rate": 8.027234990381696e-05, "loss": 1.1987, "step": 3907 }, { "epoch": 1.186219456670208, "grad_norm": 0.4248373210430145, "learning_rate": 8.026728763794675e-05, "loss": 1.3968, "step": 3908 }, { "epoch": 1.1865229928668994, "grad_norm": 0.5304285287857056, "learning_rate": 8.026222537207655e-05, "loss": 1.8042, "step": 3909 }, { "epoch": 1.1868265290635909, "grad_norm": 0.5563675165176392, "learning_rate": 8.025716310620634e-05, "loss": 1.8037, "step": 3910 }, { "epoch": 1.1871300652602823, "grad_norm": 0.5306220650672913, "learning_rate": 8.025210084033614e-05, "loss": 1.0585, "step": 3911 }, { "epoch": 1.1874336014569737, "grad_norm": 0.8807549476623535, "learning_rate": 8.024703857446593e-05, "loss": 1.3612, "step": 3912 }, { "epoch": 1.1877371376536652, "grad_norm": 0.8862515687942505, "learning_rate": 8.024197630859573e-05, "loss": 1.4855, "step": 3913 }, { "epoch": 1.1880406738503566, "grad_norm": 0.4896228611469269, "learning_rate": 8.023691404272554e-05, "loss": 1.7299, "step": 3914 }, { "epoch": 1.188344210047048, "grad_norm": 0.4608948230743408, "learning_rate": 8.023185177685533e-05, "loss": 1.7385, "step": 3915 }, { "epoch": 1.1886477462437395, "grad_norm": 0.48187950253486633, "learning_rate": 8.022678951098513e-05, "loss": 1.8916, "step": 3916 }, { "epoch": 1.188951282440431, "grad_norm": 0.679067075252533, "learning_rate": 8.022172724511492e-05, "loss": 1.6252, "step": 3917 }, { "epoch": 1.1892548186371226, "grad_norm": 0.48127803206443787, "learning_rate": 8.021666497924472e-05, "loss": 1.5121, "step": 3918 }, { "epoch": 1.189558354833814, "grad_norm": 0.5188263654708862, "learning_rate": 8.021160271337451e-05, "loss": 1.8135, "step": 3919 }, { "epoch": 1.1898618910305054, "grad_norm": 0.49956321716308594, "learning_rate": 8.02065404475043e-05, "loss": 1.4235, "step": 3920 }, { "epoch": 1.1901654272271969, "grad_norm": 0.5067212581634521, "learning_rate": 8.02014781816341e-05, "loss": 1.4532, "step": 3921 }, { "epoch": 1.1904689634238883, "grad_norm": 0.4681207239627838, "learning_rate": 8.01964159157639e-05, "loss": 1.6127, "step": 3922 }, { "epoch": 1.1907724996205797, "grad_norm": 3.2469677925109863, "learning_rate": 8.019135364989369e-05, "loss": 1.2453, "step": 3923 }, { "epoch": 1.1910760358172712, "grad_norm": 0.4977457821369171, "learning_rate": 8.01862913840235e-05, "loss": 1.6088, "step": 3924 }, { "epoch": 1.1913795720139626, "grad_norm": 0.4594128727912903, "learning_rate": 8.01812291181533e-05, "loss": 1.6691, "step": 3925 }, { "epoch": 1.191683108210654, "grad_norm": 0.49090635776519775, "learning_rate": 8.017616685228309e-05, "loss": 1.6259, "step": 3926 }, { "epoch": 1.1919866444073457, "grad_norm": 0.5628901124000549, "learning_rate": 8.017110458641288e-05, "loss": 1.4469, "step": 3927 }, { "epoch": 1.192290180604037, "grad_norm": 0.5099067091941833, "learning_rate": 8.016604232054268e-05, "loss": 1.7828, "step": 3928 }, { "epoch": 1.1925937168007286, "grad_norm": 0.546001672744751, "learning_rate": 8.016098005467247e-05, "loss": 1.9593, "step": 3929 }, { "epoch": 1.19289725299742, "grad_norm": 0.5143636465072632, "learning_rate": 8.015591778880227e-05, "loss": 1.6765, "step": 3930 }, { "epoch": 1.1932007891941114, "grad_norm": 0.5303293466567993, "learning_rate": 8.015085552293206e-05, "loss": 1.6472, "step": 3931 }, { "epoch": 1.1935043253908029, "grad_norm": 0.5036451816558838, "learning_rate": 8.014579325706186e-05, "loss": 1.6347, "step": 3932 }, { "epoch": 1.1938078615874943, "grad_norm": 0.5039635896682739, "learning_rate": 8.014073099119167e-05, "loss": 1.6958, "step": 3933 }, { "epoch": 1.1941113977841857, "grad_norm": 1.2352403402328491, "learning_rate": 8.013566872532146e-05, "loss": 1.2393, "step": 3934 }, { "epoch": 1.1944149339808772, "grad_norm": 0.4501217007637024, "learning_rate": 8.013060645945125e-05, "loss": 1.7043, "step": 3935 }, { "epoch": 1.1947184701775686, "grad_norm": 0.47026047110557556, "learning_rate": 8.012554419358105e-05, "loss": 1.8155, "step": 3936 }, { "epoch": 1.19502200637426, "grad_norm": 1.4304115772247314, "learning_rate": 8.012048192771084e-05, "loss": 1.4581, "step": 3937 }, { "epoch": 1.1953255425709517, "grad_norm": 0.5405710339546204, "learning_rate": 8.011541966184064e-05, "loss": 1.1641, "step": 3938 }, { "epoch": 1.1956290787676431, "grad_norm": 0.4712076187133789, "learning_rate": 8.011035739597043e-05, "loss": 1.7641, "step": 3939 }, { "epoch": 1.1959326149643346, "grad_norm": 0.5050527453422546, "learning_rate": 8.010529513010023e-05, "loss": 1.6604, "step": 3940 }, { "epoch": 1.196236151161026, "grad_norm": 0.5808800458908081, "learning_rate": 8.010023286423002e-05, "loss": 1.6771, "step": 3941 }, { "epoch": 1.1965396873577174, "grad_norm": 0.5032142996788025, "learning_rate": 8.009517059835983e-05, "loss": 1.7387, "step": 3942 }, { "epoch": 1.1968432235544089, "grad_norm": 0.5121293067932129, "learning_rate": 8.009010833248963e-05, "loss": 1.7244, "step": 3943 }, { "epoch": 1.1971467597511003, "grad_norm": 0.5486367344856262, "learning_rate": 8.008504606661942e-05, "loss": 1.2352, "step": 3944 }, { "epoch": 1.1974502959477917, "grad_norm": 0.4894542396068573, "learning_rate": 8.007998380074922e-05, "loss": 1.6186, "step": 3945 }, { "epoch": 1.1977538321444832, "grad_norm": 0.5042988657951355, "learning_rate": 8.007492153487901e-05, "loss": 1.946, "step": 3946 }, { "epoch": 1.1980573683411746, "grad_norm": 0.495233416557312, "learning_rate": 8.00698592690088e-05, "loss": 1.8769, "step": 3947 }, { "epoch": 1.198360904537866, "grad_norm": 1.2553257942199707, "learning_rate": 8.006479700313861e-05, "loss": 1.2075, "step": 3948 }, { "epoch": 1.1986644407345577, "grad_norm": 0.5711445808410645, "learning_rate": 8.005973473726841e-05, "loss": 1.5606, "step": 3949 }, { "epoch": 1.1989679769312491, "grad_norm": 0.40884625911712646, "learning_rate": 8.00546724713982e-05, "loss": 1.6262, "step": 3950 }, { "epoch": 1.1992715131279406, "grad_norm": 0.46294069290161133, "learning_rate": 8.0049610205528e-05, "loss": 1.9437, "step": 3951 }, { "epoch": 1.199575049324632, "grad_norm": 0.43358656764030457, "learning_rate": 8.00445479396578e-05, "loss": 1.708, "step": 3952 }, { "epoch": 1.1998785855213234, "grad_norm": 0.48382043838500977, "learning_rate": 8.00394856737876e-05, "loss": 1.3979, "step": 3953 }, { "epoch": 1.2001821217180149, "grad_norm": 0.5665555596351624, "learning_rate": 8.00344234079174e-05, "loss": 1.6572, "step": 3954 }, { "epoch": 1.2004856579147063, "grad_norm": 0.49451395869255066, "learning_rate": 8.002936114204719e-05, "loss": 1.781, "step": 3955 }, { "epoch": 1.2007891941113977, "grad_norm": 0.47142118215560913, "learning_rate": 8.002429887617699e-05, "loss": 1.9088, "step": 3956 }, { "epoch": 1.2010927303080892, "grad_norm": 0.470790833234787, "learning_rate": 8.001923661030678e-05, "loss": 1.0126, "step": 3957 }, { "epoch": 1.2013962665047808, "grad_norm": 0.5409683585166931, "learning_rate": 8.001417434443658e-05, "loss": 1.6517, "step": 3958 }, { "epoch": 1.201699802701472, "grad_norm": 0.5949118137359619, "learning_rate": 8.000911207856637e-05, "loss": 1.4266, "step": 3959 }, { "epoch": 1.2020033388981637, "grad_norm": 0.48740145564079285, "learning_rate": 8.000404981269617e-05, "loss": 1.7951, "step": 3960 }, { "epoch": 1.2023068750948551, "grad_norm": 0.5187662839889526, "learning_rate": 7.999898754682596e-05, "loss": 1.5534, "step": 3961 }, { "epoch": 1.2026104112915466, "grad_norm": 0.48459750413894653, "learning_rate": 7.999392528095575e-05, "loss": 1.3518, "step": 3962 }, { "epoch": 1.202913947488238, "grad_norm": 0.9907912015914917, "learning_rate": 7.998886301508556e-05, "loss": 1.3871, "step": 3963 }, { "epoch": 1.2032174836849294, "grad_norm": 0.460400253534317, "learning_rate": 7.998380074921536e-05, "loss": 1.8961, "step": 3964 }, { "epoch": 1.2035210198816209, "grad_norm": 0.480882853269577, "learning_rate": 7.997873848334515e-05, "loss": 1.5589, "step": 3965 }, { "epoch": 1.2038245560783123, "grad_norm": 0.5009549260139465, "learning_rate": 7.997367621747495e-05, "loss": 1.6373, "step": 3966 }, { "epoch": 1.2041280922750037, "grad_norm": 0.5043389797210693, "learning_rate": 7.996861395160474e-05, "loss": 1.4056, "step": 3967 }, { "epoch": 1.2044316284716952, "grad_norm": 0.5639089941978455, "learning_rate": 7.996355168573454e-05, "loss": 1.7575, "step": 3968 }, { "epoch": 1.2047351646683868, "grad_norm": 0.43601855635643005, "learning_rate": 7.995848941986433e-05, "loss": 0.9839, "step": 3969 }, { "epoch": 1.2050387008650782, "grad_norm": 0.4941728711128235, "learning_rate": 7.995342715399413e-05, "loss": 1.7745, "step": 3970 }, { "epoch": 1.2053422370617697, "grad_norm": 0.5130265355110168, "learning_rate": 7.994836488812392e-05, "loss": 1.6205, "step": 3971 }, { "epoch": 1.2056457732584611, "grad_norm": 0.4736790060997009, "learning_rate": 7.994330262225373e-05, "loss": 1.728, "step": 3972 }, { "epoch": 1.2059493094551526, "grad_norm": 0.4924636781215668, "learning_rate": 7.993824035638352e-05, "loss": 1.6651, "step": 3973 }, { "epoch": 1.206252845651844, "grad_norm": 0.4965018332004547, "learning_rate": 7.993317809051332e-05, "loss": 1.7473, "step": 3974 }, { "epoch": 1.2065563818485354, "grad_norm": 0.5033332705497742, "learning_rate": 7.992811582464311e-05, "loss": 1.8963, "step": 3975 }, { "epoch": 1.2068599180452269, "grad_norm": 0.5527442097663879, "learning_rate": 7.992305355877291e-05, "loss": 1.4068, "step": 3976 }, { "epoch": 1.2071634542419183, "grad_norm": 0.6062281131744385, "learning_rate": 7.99179912929027e-05, "loss": 1.8115, "step": 3977 }, { "epoch": 1.2074669904386097, "grad_norm": 0.6154857873916626, "learning_rate": 7.99129290270325e-05, "loss": 1.4765, "step": 3978 }, { "epoch": 1.2077705266353012, "grad_norm": 0.49860256910324097, "learning_rate": 7.99078667611623e-05, "loss": 1.6141, "step": 3979 }, { "epoch": 1.2080740628319928, "grad_norm": 0.5555553436279297, "learning_rate": 7.990280449529209e-05, "loss": 1.4785, "step": 3980 }, { "epoch": 1.2083775990286842, "grad_norm": 1.548555612564087, "learning_rate": 7.98977422294219e-05, "loss": 1.6227, "step": 3981 }, { "epoch": 1.2086811352253757, "grad_norm": 0.42255523800849915, "learning_rate": 7.989267996355169e-05, "loss": 1.128, "step": 3982 }, { "epoch": 1.2089846714220671, "grad_norm": 0.4955081343650818, "learning_rate": 7.988761769768149e-05, "loss": 1.742, "step": 3983 }, { "epoch": 1.2092882076187585, "grad_norm": 0.4081752598285675, "learning_rate": 7.988255543181128e-05, "loss": 1.3424, "step": 3984 }, { "epoch": 1.20959174381545, "grad_norm": 0.6145724654197693, "learning_rate": 7.987749316594108e-05, "loss": 1.7208, "step": 3985 }, { "epoch": 1.2098952800121414, "grad_norm": 0.5161190629005432, "learning_rate": 7.987243090007087e-05, "loss": 1.2054, "step": 3986 }, { "epoch": 1.2101988162088329, "grad_norm": 0.4951912462711334, "learning_rate": 7.986736863420067e-05, "loss": 1.6643, "step": 3987 }, { "epoch": 1.2105023524055243, "grad_norm": 0.4547387957572937, "learning_rate": 7.986230636833046e-05, "loss": 1.6728, "step": 3988 }, { "epoch": 1.2108058886022157, "grad_norm": 0.5978291630744934, "learning_rate": 7.985724410246025e-05, "loss": 1.5965, "step": 3989 }, { "epoch": 1.2111094247989072, "grad_norm": 0.48494064807891846, "learning_rate": 7.985218183659005e-05, "loss": 1.7527, "step": 3990 }, { "epoch": 1.2114129609955988, "grad_norm": 0.5768218636512756, "learning_rate": 7.984711957071986e-05, "loss": 1.5072, "step": 3991 }, { "epoch": 1.2117164971922902, "grad_norm": 0.4511902928352356, "learning_rate": 7.984205730484967e-05, "loss": 1.8114, "step": 3992 }, { "epoch": 1.2120200333889817, "grad_norm": 0.47839030623435974, "learning_rate": 7.983699503897946e-05, "loss": 1.682, "step": 3993 }, { "epoch": 1.212323569585673, "grad_norm": 0.5301209688186646, "learning_rate": 7.983193277310926e-05, "loss": 1.9935, "step": 3994 }, { "epoch": 1.2126271057823645, "grad_norm": 0.4306343197822571, "learning_rate": 7.982687050723905e-05, "loss": 1.2576, "step": 3995 }, { "epoch": 1.212930641979056, "grad_norm": 0.7651235461235046, "learning_rate": 7.982180824136885e-05, "loss": 1.7594, "step": 3996 }, { "epoch": 1.2132341781757474, "grad_norm": 1.4882827997207642, "learning_rate": 7.981674597549864e-05, "loss": 1.565, "step": 3997 }, { "epoch": 1.2135377143724388, "grad_norm": 0.6777023673057556, "learning_rate": 7.981168370962844e-05, "loss": 1.3189, "step": 3998 }, { "epoch": 1.2138412505691303, "grad_norm": 0.8454605937004089, "learning_rate": 7.980662144375823e-05, "loss": 1.1823, "step": 3999 }, { "epoch": 1.214144786765822, "grad_norm": 0.5341261625289917, "learning_rate": 7.980155917788802e-05, "loss": 1.6943, "step": 4000 }, { "epoch": 1.2144483229625134, "grad_norm": 0.44602447748184204, "learning_rate": 7.979649691201782e-05, "loss": 1.658, "step": 4001 }, { "epoch": 1.2147518591592048, "grad_norm": 0.4305332899093628, "learning_rate": 7.979143464614763e-05, "loss": 1.9482, "step": 4002 }, { "epoch": 1.2150553953558962, "grad_norm": 0.8739070892333984, "learning_rate": 7.978637238027742e-05, "loss": 0.7679, "step": 4003 }, { "epoch": 1.2153589315525877, "grad_norm": 1.0856356620788574, "learning_rate": 7.978131011440722e-05, "loss": 1.9586, "step": 4004 }, { "epoch": 1.215662467749279, "grad_norm": 0.5733173489570618, "learning_rate": 7.977624784853701e-05, "loss": 1.8098, "step": 4005 }, { "epoch": 1.2159660039459705, "grad_norm": 0.5946094989776611, "learning_rate": 7.977118558266681e-05, "loss": 1.7604, "step": 4006 }, { "epoch": 1.216269540142662, "grad_norm": 0.4725922644138336, "learning_rate": 7.97661233167966e-05, "loss": 2.0565, "step": 4007 }, { "epoch": 1.2165730763393534, "grad_norm": 0.49736130237579346, "learning_rate": 7.97610610509264e-05, "loss": 1.8367, "step": 4008 }, { "epoch": 1.2168766125360448, "grad_norm": 0.4991004467010498, "learning_rate": 7.975599878505619e-05, "loss": 1.6324, "step": 4009 }, { "epoch": 1.2171801487327363, "grad_norm": 0.4400973618030548, "learning_rate": 7.975093651918599e-05, "loss": 1.2879, "step": 4010 }, { "epoch": 1.217483684929428, "grad_norm": 0.5288736820220947, "learning_rate": 7.97458742533158e-05, "loss": 1.7851, "step": 4011 }, { "epoch": 1.2177872211261194, "grad_norm": 0.5546556115150452, "learning_rate": 7.974081198744559e-05, "loss": 1.5529, "step": 4012 }, { "epoch": 1.2180907573228108, "grad_norm": 0.6062618494033813, "learning_rate": 7.973574972157538e-05, "loss": 1.8191, "step": 4013 }, { "epoch": 1.2183942935195022, "grad_norm": 0.5226564407348633, "learning_rate": 7.973068745570518e-05, "loss": 1.8121, "step": 4014 }, { "epoch": 1.2186978297161937, "grad_norm": 0.5904191732406616, "learning_rate": 7.972562518983497e-05, "loss": 1.7177, "step": 4015 }, { "epoch": 1.219001365912885, "grad_norm": 0.7053862810134888, "learning_rate": 7.972056292396477e-05, "loss": 1.2042, "step": 4016 }, { "epoch": 1.2193049021095765, "grad_norm": 0.4357145130634308, "learning_rate": 7.971550065809456e-05, "loss": 1.3318, "step": 4017 }, { "epoch": 1.219608438306268, "grad_norm": 0.5167221426963806, "learning_rate": 7.971043839222436e-05, "loss": 1.3981, "step": 4018 }, { "epoch": 1.2199119745029594, "grad_norm": 0.5844317078590393, "learning_rate": 7.970537612635415e-05, "loss": 1.5698, "step": 4019 }, { "epoch": 1.2202155106996508, "grad_norm": 0.47601571679115295, "learning_rate": 7.970031386048396e-05, "loss": 1.6672, "step": 4020 }, { "epoch": 1.2205190468963423, "grad_norm": 0.5375113487243652, "learning_rate": 7.969525159461376e-05, "loss": 1.8034, "step": 4021 }, { "epoch": 1.220822583093034, "grad_norm": 0.5123186111450195, "learning_rate": 7.969018932874355e-05, "loss": 1.9045, "step": 4022 }, { "epoch": 1.2211261192897254, "grad_norm": 0.5284671187400818, "learning_rate": 7.968512706287335e-05, "loss": 1.5642, "step": 4023 }, { "epoch": 1.2214296554864168, "grad_norm": 0.47770121693611145, "learning_rate": 7.968006479700314e-05, "loss": 1.4129, "step": 4024 }, { "epoch": 1.2217331916831082, "grad_norm": 0.5404722094535828, "learning_rate": 7.967500253113294e-05, "loss": 1.8306, "step": 4025 }, { "epoch": 1.2220367278797997, "grad_norm": 0.4636070430278778, "learning_rate": 7.966994026526273e-05, "loss": 1.9329, "step": 4026 }, { "epoch": 1.222340264076491, "grad_norm": 0.4419996440410614, "learning_rate": 7.966487799939252e-05, "loss": 1.2901, "step": 4027 }, { "epoch": 1.2226438002731825, "grad_norm": 0.9507476091384888, "learning_rate": 7.965981573352232e-05, "loss": 1.5751, "step": 4028 }, { "epoch": 1.222947336469874, "grad_norm": 0.6683091521263123, "learning_rate": 7.965475346765211e-05, "loss": 1.9538, "step": 4029 }, { "epoch": 1.2232508726665654, "grad_norm": 0.5125116109848022, "learning_rate": 7.964969120178192e-05, "loss": 1.7736, "step": 4030 }, { "epoch": 1.223554408863257, "grad_norm": 0.5630882978439331, "learning_rate": 7.964462893591172e-05, "loss": 1.4199, "step": 4031 }, { "epoch": 1.2238579450599485, "grad_norm": 0.5021055340766907, "learning_rate": 7.963956667004151e-05, "loss": 1.7814, "step": 4032 }, { "epoch": 1.22416148125664, "grad_norm": 0.6553727984428406, "learning_rate": 7.963450440417131e-05, "loss": 1.8245, "step": 4033 }, { "epoch": 1.2244650174533314, "grad_norm": 0.359862357378006, "learning_rate": 7.96294421383011e-05, "loss": 1.4599, "step": 4034 }, { "epoch": 1.2247685536500228, "grad_norm": 0.541898787021637, "learning_rate": 7.96243798724309e-05, "loss": 1.7938, "step": 4035 }, { "epoch": 1.2250720898467142, "grad_norm": 0.5998459458351135, "learning_rate": 7.961931760656069e-05, "loss": 2.0052, "step": 4036 }, { "epoch": 1.2253756260434057, "grad_norm": 0.5199744701385498, "learning_rate": 7.96142553406905e-05, "loss": 1.6126, "step": 4037 }, { "epoch": 1.225679162240097, "grad_norm": 0.6697866916656494, "learning_rate": 7.96091930748203e-05, "loss": 1.4301, "step": 4038 }, { "epoch": 1.2259826984367885, "grad_norm": 0.5745710730552673, "learning_rate": 7.960413080895009e-05, "loss": 1.7098, "step": 4039 }, { "epoch": 1.22628623463348, "grad_norm": 0.5066909193992615, "learning_rate": 7.959906854307988e-05, "loss": 2.1108, "step": 4040 }, { "epoch": 1.2265897708301714, "grad_norm": 0.526587963104248, "learning_rate": 7.959400627720969e-05, "loss": 1.555, "step": 4041 }, { "epoch": 1.226893307026863, "grad_norm": 0.5360167622566223, "learning_rate": 7.958894401133949e-05, "loss": 1.7745, "step": 4042 }, { "epoch": 1.2271968432235545, "grad_norm": 0.5217559337615967, "learning_rate": 7.958388174546928e-05, "loss": 1.7375, "step": 4043 }, { "epoch": 1.227500379420246, "grad_norm": 0.48961344361305237, "learning_rate": 7.957881947959908e-05, "loss": 1.7585, "step": 4044 }, { "epoch": 1.2278039156169374, "grad_norm": 0.469046413898468, "learning_rate": 7.957375721372887e-05, "loss": 1.7162, "step": 4045 }, { "epoch": 1.2281074518136288, "grad_norm": 0.5398122072219849, "learning_rate": 7.956869494785867e-05, "loss": 1.4512, "step": 4046 }, { "epoch": 1.2284109880103202, "grad_norm": 0.615802526473999, "learning_rate": 7.956363268198846e-05, "loss": 1.9028, "step": 4047 }, { "epoch": 1.2287145242070117, "grad_norm": 0.8024721741676331, "learning_rate": 7.955857041611826e-05, "loss": 1.6869, "step": 4048 }, { "epoch": 1.229018060403703, "grad_norm": 0.4279814064502716, "learning_rate": 7.955350815024805e-05, "loss": 1.6522, "step": 4049 }, { "epoch": 1.2293215966003945, "grad_norm": 0.5428003668785095, "learning_rate": 7.954844588437786e-05, "loss": 0.5135, "step": 4050 }, { "epoch": 1.229625132797086, "grad_norm": 0.5030322670936584, "learning_rate": 7.954338361850765e-05, "loss": 1.6959, "step": 4051 }, { "epoch": 1.2299286689937774, "grad_norm": 0.5142816305160522, "learning_rate": 7.953832135263745e-05, "loss": 0.9787, "step": 4052 }, { "epoch": 1.230232205190469, "grad_norm": 0.476615309715271, "learning_rate": 7.953325908676724e-05, "loss": 1.5626, "step": 4053 }, { "epoch": 1.2305357413871605, "grad_norm": 0.6307333111763, "learning_rate": 7.952819682089704e-05, "loss": 1.1121, "step": 4054 }, { "epoch": 1.230839277583852, "grad_norm": 0.6847572922706604, "learning_rate": 7.952313455502683e-05, "loss": 1.2124, "step": 4055 }, { "epoch": 1.2311428137805434, "grad_norm": 0.6222221851348877, "learning_rate": 7.951807228915663e-05, "loss": 1.7183, "step": 4056 }, { "epoch": 1.2314463499772348, "grad_norm": 0.5753226280212402, "learning_rate": 7.951301002328642e-05, "loss": 1.1929, "step": 4057 }, { "epoch": 1.2317498861739262, "grad_norm": 0.5416280031204224, "learning_rate": 7.950794775741622e-05, "loss": 1.7334, "step": 4058 }, { "epoch": 1.2320534223706177, "grad_norm": 0.555026113986969, "learning_rate": 7.950288549154603e-05, "loss": 1.308, "step": 4059 }, { "epoch": 1.232356958567309, "grad_norm": 0.5982434153556824, "learning_rate": 7.949782322567582e-05, "loss": 1.5865, "step": 4060 }, { "epoch": 1.2326604947640005, "grad_norm": 0.5335420370101929, "learning_rate": 7.949276095980562e-05, "loss": 1.2918, "step": 4061 }, { "epoch": 1.2329640309606922, "grad_norm": 0.4593915343284607, "learning_rate": 7.948769869393541e-05, "loss": 1.4323, "step": 4062 }, { "epoch": 1.2332675671573834, "grad_norm": 0.4725258946418762, "learning_rate": 7.94826364280652e-05, "loss": 1.4033, "step": 4063 }, { "epoch": 1.233571103354075, "grad_norm": 0.4811660945415497, "learning_rate": 7.9477574162195e-05, "loss": 1.9863, "step": 4064 }, { "epoch": 1.2338746395507665, "grad_norm": 0.6393853425979614, "learning_rate": 7.94725118963248e-05, "loss": 1.7669, "step": 4065 }, { "epoch": 1.234178175747458, "grad_norm": 0.5385059118270874, "learning_rate": 7.946744963045459e-05, "loss": 1.7176, "step": 4066 }, { "epoch": 1.2344817119441494, "grad_norm": 0.5656344890594482, "learning_rate": 7.946238736458438e-05, "loss": 1.1107, "step": 4067 }, { "epoch": 1.2347852481408408, "grad_norm": 0.5419704914093018, "learning_rate": 7.945732509871418e-05, "loss": 1.609, "step": 4068 }, { "epoch": 1.2350887843375322, "grad_norm": 0.5997708439826965, "learning_rate": 7.945226283284399e-05, "loss": 1.4907, "step": 4069 }, { "epoch": 1.2353923205342237, "grad_norm": 0.6861248016357422, "learning_rate": 7.944720056697378e-05, "loss": 1.2756, "step": 4070 }, { "epoch": 1.235695856730915, "grad_norm": 0.6528524160385132, "learning_rate": 7.944213830110358e-05, "loss": 1.3925, "step": 4071 }, { "epoch": 1.2359993929276065, "grad_norm": 0.5758741497993469, "learning_rate": 7.943707603523337e-05, "loss": 1.6849, "step": 4072 }, { "epoch": 1.2363029291242982, "grad_norm": 0.4997986853122711, "learning_rate": 7.943201376936317e-05, "loss": 1.698, "step": 4073 }, { "epoch": 1.2366064653209896, "grad_norm": 0.48319289088249207, "learning_rate": 7.942695150349296e-05, "loss": 1.7681, "step": 4074 }, { "epoch": 1.236910001517681, "grad_norm": 0.6239420771598816, "learning_rate": 7.942188923762276e-05, "loss": 1.417, "step": 4075 }, { "epoch": 1.2372135377143725, "grad_norm": 0.33963650465011597, "learning_rate": 7.941682697175255e-05, "loss": 1.2974, "step": 4076 }, { "epoch": 1.237517073911064, "grad_norm": 0.9823358654975891, "learning_rate": 7.941176470588235e-05, "loss": 1.3348, "step": 4077 }, { "epoch": 1.2378206101077553, "grad_norm": 0.5259244441986084, "learning_rate": 7.940670244001215e-05, "loss": 1.735, "step": 4078 }, { "epoch": 1.2381241463044468, "grad_norm": 0.48676925897598267, "learning_rate": 7.940164017414195e-05, "loss": 1.8874, "step": 4079 }, { "epoch": 1.2384276825011382, "grad_norm": 0.4340936243534088, "learning_rate": 7.939657790827174e-05, "loss": 1.8046, "step": 4080 }, { "epoch": 1.2387312186978297, "grad_norm": 0.5821318030357361, "learning_rate": 7.939151564240155e-05, "loss": 1.5047, "step": 4081 }, { "epoch": 1.239034754894521, "grad_norm": 0.5104279518127441, "learning_rate": 7.938645337653135e-05, "loss": 1.5912, "step": 4082 }, { "epoch": 1.2393382910912125, "grad_norm": 0.511009693145752, "learning_rate": 7.938139111066114e-05, "loss": 1.7989, "step": 4083 }, { "epoch": 1.2396418272879042, "grad_norm": 0.5314620137214661, "learning_rate": 7.937632884479094e-05, "loss": 1.7351, "step": 4084 }, { "epoch": 1.2399453634845956, "grad_norm": 0.5641986131668091, "learning_rate": 7.937126657892073e-05, "loss": 1.2645, "step": 4085 }, { "epoch": 1.240248899681287, "grad_norm": 0.4240927994251251, "learning_rate": 7.936620431305053e-05, "loss": 1.9579, "step": 4086 }, { "epoch": 1.2405524358779785, "grad_norm": 0.5414063334465027, "learning_rate": 7.936114204718032e-05, "loss": 1.7812, "step": 4087 }, { "epoch": 1.24085597207467, "grad_norm": 0.5069623589515686, "learning_rate": 7.935607978131012e-05, "loss": 1.3929, "step": 4088 }, { "epoch": 1.2411595082713613, "grad_norm": 0.5139201879501343, "learning_rate": 7.935101751543992e-05, "loss": 1.757, "step": 4089 }, { "epoch": 1.2414630444680528, "grad_norm": 0.6100741624832153, "learning_rate": 7.934595524956972e-05, "loss": 1.328, "step": 4090 }, { "epoch": 1.2417665806647442, "grad_norm": 0.54306960105896, "learning_rate": 7.934089298369951e-05, "loss": 1.3427, "step": 4091 }, { "epoch": 1.2420701168614356, "grad_norm": 0.6219137907028198, "learning_rate": 7.933583071782931e-05, "loss": 1.9352, "step": 4092 }, { "epoch": 1.2423736530581273, "grad_norm": 0.5613607168197632, "learning_rate": 7.93307684519591e-05, "loss": 1.775, "step": 4093 }, { "epoch": 1.2426771892548185, "grad_norm": 0.4179461896419525, "learning_rate": 7.93257061860889e-05, "loss": 1.8111, "step": 4094 }, { "epoch": 1.2429807254515102, "grad_norm": 0.469662606716156, "learning_rate": 7.932064392021869e-05, "loss": 1.6524, "step": 4095 }, { "epoch": 1.2432842616482016, "grad_norm": 0.5118122696876526, "learning_rate": 7.931558165434849e-05, "loss": 1.6265, "step": 4096 }, { "epoch": 1.243587797844893, "grad_norm": 0.5353027582168579, "learning_rate": 7.931051938847828e-05, "loss": 1.7069, "step": 4097 }, { "epoch": 1.2438913340415845, "grad_norm": 0.4309144914150238, "learning_rate": 7.930545712260809e-05, "loss": 1.9461, "step": 4098 }, { "epoch": 1.244194870238276, "grad_norm": 0.5440289378166199, "learning_rate": 7.930039485673789e-05, "loss": 1.7768, "step": 4099 }, { "epoch": 1.2444984064349673, "grad_norm": 0.8079060912132263, "learning_rate": 7.929533259086768e-05, "loss": 1.764, "step": 4100 }, { "epoch": 1.2448019426316588, "grad_norm": 0.48742911219596863, "learning_rate": 7.929027032499748e-05, "loss": 1.7685, "step": 4101 }, { "epoch": 1.2451054788283502, "grad_norm": 0.4479953944683075, "learning_rate": 7.928520805912727e-05, "loss": 1.3186, "step": 4102 }, { "epoch": 1.2454090150250416, "grad_norm": 0.730973482131958, "learning_rate": 7.928014579325706e-05, "loss": 1.5752, "step": 4103 }, { "epoch": 1.2457125512217333, "grad_norm": 0.5573110580444336, "learning_rate": 7.927508352738686e-05, "loss": 1.7444, "step": 4104 }, { "epoch": 1.2460160874184247, "grad_norm": 0.5198193192481995, "learning_rate": 7.927002126151665e-05, "loss": 1.6983, "step": 4105 }, { "epoch": 1.2463196236151162, "grad_norm": 0.7842777967453003, "learning_rate": 7.926495899564645e-05, "loss": 1.4516, "step": 4106 }, { "epoch": 1.2466231598118076, "grad_norm": 0.7621930837631226, "learning_rate": 7.925989672977624e-05, "loss": 1.8484, "step": 4107 }, { "epoch": 1.246926696008499, "grad_norm": 0.5683858394622803, "learning_rate": 7.925483446390605e-05, "loss": 1.881, "step": 4108 }, { "epoch": 1.2472302322051905, "grad_norm": 0.5511360764503479, "learning_rate": 7.924977219803585e-05, "loss": 1.7022, "step": 4109 }, { "epoch": 1.247533768401882, "grad_norm": 0.5645896196365356, "learning_rate": 7.924470993216564e-05, "loss": 1.8527, "step": 4110 }, { "epoch": 1.2478373045985733, "grad_norm": 0.5634721517562866, "learning_rate": 7.923964766629544e-05, "loss": 1.7056, "step": 4111 }, { "epoch": 1.2481408407952648, "grad_norm": 0.525572657585144, "learning_rate": 7.923458540042523e-05, "loss": 1.6768, "step": 4112 }, { "epoch": 1.2484443769919562, "grad_norm": 0.49522659182548523, "learning_rate": 7.922952313455503e-05, "loss": 1.689, "step": 4113 }, { "epoch": 1.2487479131886476, "grad_norm": 0.48527786135673523, "learning_rate": 7.922446086868482e-05, "loss": 1.4007, "step": 4114 }, { "epoch": 1.2490514493853393, "grad_norm": 0.7304596900939941, "learning_rate": 7.921939860281462e-05, "loss": 1.4368, "step": 4115 }, { "epoch": 1.2493549855820307, "grad_norm": 0.5543166995048523, "learning_rate": 7.921433633694441e-05, "loss": 1.5934, "step": 4116 }, { "epoch": 1.2496585217787222, "grad_norm": 0.5324878692626953, "learning_rate": 7.920927407107422e-05, "loss": 1.6429, "step": 4117 }, { "epoch": 1.2499620579754136, "grad_norm": 0.48488008975982666, "learning_rate": 7.920421180520401e-05, "loss": 1.281, "step": 4118 }, { "epoch": 1.250265594172105, "grad_norm": 0.5011441111564636, "learning_rate": 7.919914953933381e-05, "loss": 1.4652, "step": 4119 }, { "epoch": 1.2505691303687965, "grad_norm": 0.45932215452194214, "learning_rate": 7.91940872734636e-05, "loss": 1.8158, "step": 4120 }, { "epoch": 1.250872666565488, "grad_norm": 0.5459942817687988, "learning_rate": 7.91890250075934e-05, "loss": 1.4303, "step": 4121 }, { "epoch": 1.2511762027621793, "grad_norm": 0.5746182799339294, "learning_rate": 7.918396274172319e-05, "loss": 0.8716, "step": 4122 }, { "epoch": 1.2514797389588708, "grad_norm": 0.5529314875602722, "learning_rate": 7.917890047585299e-05, "loss": 1.7586, "step": 4123 }, { "epoch": 1.2517832751555624, "grad_norm": 0.5376235842704773, "learning_rate": 7.917383820998278e-05, "loss": 1.3224, "step": 4124 }, { "epoch": 1.2520868113522536, "grad_norm": 0.5033860206604004, "learning_rate": 7.916877594411258e-05, "loss": 1.612, "step": 4125 }, { "epoch": 1.2523903475489453, "grad_norm": 0.8470841646194458, "learning_rate": 7.916371367824239e-05, "loss": 0.9943, "step": 4126 }, { "epoch": 1.2526938837456367, "grad_norm": 0.5008292198181152, "learning_rate": 7.915865141237218e-05, "loss": 1.6873, "step": 4127 }, { "epoch": 1.2529974199423282, "grad_norm": 0.47267603874206543, "learning_rate": 7.915358914650199e-05, "loss": 1.7586, "step": 4128 }, { "epoch": 1.2533009561390196, "grad_norm": 0.43899428844451904, "learning_rate": 7.914852688063178e-05, "loss": 1.1282, "step": 4129 }, { "epoch": 1.253604492335711, "grad_norm": 0.4466108977794647, "learning_rate": 7.914346461476158e-05, "loss": 1.9044, "step": 4130 }, { "epoch": 1.2539080285324025, "grad_norm": 0.4737585783004761, "learning_rate": 7.913840234889137e-05, "loss": 1.6987, "step": 4131 }, { "epoch": 1.254211564729094, "grad_norm": 0.4812822937965393, "learning_rate": 7.913334008302117e-05, "loss": 0.8285, "step": 4132 }, { "epoch": 1.2545151009257853, "grad_norm": 0.5936858654022217, "learning_rate": 7.912827781715096e-05, "loss": 1.5008, "step": 4133 }, { "epoch": 1.2548186371224768, "grad_norm": 0.6072853207588196, "learning_rate": 7.912321555128076e-05, "loss": 1.2776, "step": 4134 }, { "epoch": 1.2551221733191684, "grad_norm": 0.6064707636833191, "learning_rate": 7.911815328541055e-05, "loss": 2.0275, "step": 4135 }, { "epoch": 1.2554257095158596, "grad_norm": 0.6108651757240295, "learning_rate": 7.911309101954035e-05, "loss": 1.5007, "step": 4136 }, { "epoch": 1.2557292457125513, "grad_norm": 0.47926777601242065, "learning_rate": 7.910802875367016e-05, "loss": 1.5429, "step": 4137 }, { "epoch": 1.2560327819092427, "grad_norm": 0.4983449876308441, "learning_rate": 7.910296648779995e-05, "loss": 1.7559, "step": 4138 }, { "epoch": 1.2563363181059342, "grad_norm": 0.5694484710693359, "learning_rate": 7.909790422192975e-05, "loss": 1.9243, "step": 4139 }, { "epoch": 1.2566398543026256, "grad_norm": 1.9593089818954468, "learning_rate": 7.909284195605954e-05, "loss": 1.1921, "step": 4140 }, { "epoch": 1.256943390499317, "grad_norm": 0.4247440993785858, "learning_rate": 7.908777969018933e-05, "loss": 1.1906, "step": 4141 }, { "epoch": 1.2572469266960085, "grad_norm": 0.4611380398273468, "learning_rate": 7.908271742431913e-05, "loss": 1.7775, "step": 4142 }, { "epoch": 1.2575504628927, "grad_norm": 0.5350640416145325, "learning_rate": 7.907765515844892e-05, "loss": 1.656, "step": 4143 }, { "epoch": 1.2578539990893913, "grad_norm": 0.5389347076416016, "learning_rate": 7.907259289257872e-05, "loss": 1.9316, "step": 4144 }, { "epoch": 1.2581575352860828, "grad_norm": 0.7473251819610596, "learning_rate": 7.906753062670851e-05, "loss": 1.0278, "step": 4145 }, { "epoch": 1.2584610714827744, "grad_norm": 0.6106333136558533, "learning_rate": 7.906246836083831e-05, "loss": 1.4104, "step": 4146 }, { "epoch": 1.2587646076794659, "grad_norm": 0.5932298898696899, "learning_rate": 7.905740609496812e-05, "loss": 1.5366, "step": 4147 }, { "epoch": 1.2590681438761573, "grad_norm": 0.6722679138183594, "learning_rate": 7.905234382909791e-05, "loss": 1.6094, "step": 4148 }, { "epoch": 1.2593716800728487, "grad_norm": 0.5468285083770752, "learning_rate": 7.90472815632277e-05, "loss": 1.8556, "step": 4149 }, { "epoch": 1.2596752162695402, "grad_norm": 0.6329994201660156, "learning_rate": 7.90422192973575e-05, "loss": 1.7896, "step": 4150 }, { "epoch": 1.2599787524662316, "grad_norm": 0.4546709358692169, "learning_rate": 7.90371570314873e-05, "loss": 1.7365, "step": 4151 }, { "epoch": 1.260282288662923, "grad_norm": 1.1800211668014526, "learning_rate": 7.903209476561709e-05, "loss": 1.6131, "step": 4152 }, { "epoch": 1.2605858248596145, "grad_norm": 0.6181531548500061, "learning_rate": 7.902703249974689e-05, "loss": 1.9795, "step": 4153 }, { "epoch": 1.260889361056306, "grad_norm": 0.4418911039829254, "learning_rate": 7.902197023387668e-05, "loss": 1.7098, "step": 4154 }, { "epoch": 1.2611928972529975, "grad_norm": 0.49086809158325195, "learning_rate": 7.901690796800648e-05, "loss": 1.8146, "step": 4155 }, { "epoch": 1.2614964334496888, "grad_norm": 0.5296843647956848, "learning_rate": 7.901184570213628e-05, "loss": 1.8665, "step": 4156 }, { "epoch": 1.2617999696463804, "grad_norm": 0.58831787109375, "learning_rate": 7.900678343626608e-05, "loss": 1.17, "step": 4157 }, { "epoch": 1.2621035058430718, "grad_norm": 0.6061310768127441, "learning_rate": 7.900172117039587e-05, "loss": 1.2425, "step": 4158 }, { "epoch": 1.2624070420397633, "grad_norm": 0.5422036647796631, "learning_rate": 7.899665890452567e-05, "loss": 1.8006, "step": 4159 }, { "epoch": 1.2627105782364547, "grad_norm": 0.5529537796974182, "learning_rate": 7.899159663865546e-05, "loss": 1.4331, "step": 4160 }, { "epoch": 1.2630141144331462, "grad_norm": 0.5384377241134644, "learning_rate": 7.898653437278526e-05, "loss": 1.7835, "step": 4161 }, { "epoch": 1.2633176506298376, "grad_norm": 0.47016119956970215, "learning_rate": 7.898147210691505e-05, "loss": 1.2891, "step": 4162 }, { "epoch": 1.263621186826529, "grad_norm": 0.5672861337661743, "learning_rate": 7.897640984104485e-05, "loss": 1.8462, "step": 4163 }, { "epoch": 1.2639247230232205, "grad_norm": 0.5559744834899902, "learning_rate": 7.897134757517464e-05, "loss": 1.6961, "step": 4164 }, { "epoch": 1.2642282592199119, "grad_norm": 0.5468736886978149, "learning_rate": 7.896628530930445e-05, "loss": 1.9711, "step": 4165 }, { "epoch": 1.2645317954166035, "grad_norm": 0.5326992869377136, "learning_rate": 7.896122304343425e-05, "loss": 1.8224, "step": 4166 }, { "epoch": 1.2648353316132948, "grad_norm": 0.5643085837364197, "learning_rate": 7.895616077756404e-05, "loss": 1.4387, "step": 4167 }, { "epoch": 1.2651388678099864, "grad_norm": 0.5054870247840881, "learning_rate": 7.895109851169383e-05, "loss": 1.615, "step": 4168 }, { "epoch": 1.2654424040066778, "grad_norm": 0.47247734665870667, "learning_rate": 7.894603624582363e-05, "loss": 1.3798, "step": 4169 }, { "epoch": 1.2657459402033693, "grad_norm": 1.0349562168121338, "learning_rate": 7.894097397995344e-05, "loss": 1.2852, "step": 4170 }, { "epoch": 1.2660494764000607, "grad_norm": 0.4410361647605896, "learning_rate": 7.893591171408323e-05, "loss": 1.1724, "step": 4171 }, { "epoch": 1.2663530125967521, "grad_norm": 0.4972288906574249, "learning_rate": 7.893084944821303e-05, "loss": 1.8946, "step": 4172 }, { "epoch": 1.2666565487934436, "grad_norm": 0.5441880822181702, "learning_rate": 7.892578718234282e-05, "loss": 1.5582, "step": 4173 }, { "epoch": 1.266960084990135, "grad_norm": 0.6143850088119507, "learning_rate": 7.892072491647262e-05, "loss": 1.5693, "step": 4174 }, { "epoch": 1.2672636211868264, "grad_norm": 0.49787527322769165, "learning_rate": 7.891566265060241e-05, "loss": 1.7123, "step": 4175 }, { "epoch": 1.2675671573835179, "grad_norm": 0.5437433123588562, "learning_rate": 7.891060038473222e-05, "loss": 1.4882, "step": 4176 }, { "epoch": 1.2678706935802095, "grad_norm": 0.4889003038406372, "learning_rate": 7.890553811886202e-05, "loss": 1.6611, "step": 4177 }, { "epoch": 1.268174229776901, "grad_norm": 0.5215193629264832, "learning_rate": 7.890047585299181e-05, "loss": 1.7556, "step": 4178 }, { "epoch": 1.2684777659735924, "grad_norm": 0.5660579204559326, "learning_rate": 7.88954135871216e-05, "loss": 1.6121, "step": 4179 }, { "epoch": 1.2687813021702838, "grad_norm": 0.4969503879547119, "learning_rate": 7.88903513212514e-05, "loss": 1.6405, "step": 4180 }, { "epoch": 1.2690848383669753, "grad_norm": 0.528002142906189, "learning_rate": 7.88852890553812e-05, "loss": 1.4562, "step": 4181 }, { "epoch": 1.2693883745636667, "grad_norm": 0.5257166028022766, "learning_rate": 7.888022678951099e-05, "loss": 0.9778, "step": 4182 }, { "epoch": 1.2696919107603581, "grad_norm": 0.6283448934555054, "learning_rate": 7.887516452364078e-05, "loss": 1.6004, "step": 4183 }, { "epoch": 1.2699954469570496, "grad_norm": 0.5746490359306335, "learning_rate": 7.887010225777058e-05, "loss": 2.0969, "step": 4184 }, { "epoch": 1.270298983153741, "grad_norm": 0.5110850930213928, "learning_rate": 7.886503999190037e-05, "loss": 1.608, "step": 4185 }, { "epoch": 1.2706025193504327, "grad_norm": 0.48699095845222473, "learning_rate": 7.885997772603018e-05, "loss": 1.8913, "step": 4186 }, { "epoch": 1.2709060555471239, "grad_norm": 0.6163387894630432, "learning_rate": 7.885491546015998e-05, "loss": 1.9176, "step": 4187 }, { "epoch": 1.2712095917438155, "grad_norm": 0.5177997350692749, "learning_rate": 7.884985319428977e-05, "loss": 1.343, "step": 4188 }, { "epoch": 1.271513127940507, "grad_norm": 0.5938069820404053, "learning_rate": 7.884479092841957e-05, "loss": 1.4215, "step": 4189 }, { "epoch": 1.2718166641371984, "grad_norm": 0.5924432277679443, "learning_rate": 7.883972866254936e-05, "loss": 1.6975, "step": 4190 }, { "epoch": 1.2721202003338898, "grad_norm": 0.4476606845855713, "learning_rate": 7.883466639667916e-05, "loss": 1.5248, "step": 4191 }, { "epoch": 1.2724237365305813, "grad_norm": 0.5014340281486511, "learning_rate": 7.882960413080895e-05, "loss": 1.8972, "step": 4192 }, { "epoch": 1.2727272727272727, "grad_norm": 0.5956346988677979, "learning_rate": 7.882454186493875e-05, "loss": 1.634, "step": 4193 }, { "epoch": 1.2730308089239641, "grad_norm": 0.4889664947986603, "learning_rate": 7.881947959906854e-05, "loss": 1.6317, "step": 4194 }, { "epoch": 1.2733343451206556, "grad_norm": 0.6264504194259644, "learning_rate": 7.881441733319835e-05, "loss": 1.1283, "step": 4195 }, { "epoch": 1.273637881317347, "grad_norm": 0.5363176465034485, "learning_rate": 7.880935506732814e-05, "loss": 1.8875, "step": 4196 }, { "epoch": 1.2739414175140387, "grad_norm": 0.6158793568611145, "learning_rate": 7.880429280145794e-05, "loss": 1.6791, "step": 4197 }, { "epoch": 1.2742449537107299, "grad_norm": 0.500588059425354, "learning_rate": 7.879923053558773e-05, "loss": 1.8019, "step": 4198 }, { "epoch": 1.2745484899074215, "grad_norm": 0.40965986251831055, "learning_rate": 7.879416826971753e-05, "loss": 1.1229, "step": 4199 }, { "epoch": 1.274852026104113, "grad_norm": 0.5436606407165527, "learning_rate": 7.878910600384732e-05, "loss": 1.3439, "step": 4200 }, { "epoch": 1.2751555623008044, "grad_norm": 0.4682038724422455, "learning_rate": 7.878404373797712e-05, "loss": 1.3136, "step": 4201 }, { "epoch": 1.2754590984974958, "grad_norm": 0.5896442532539368, "learning_rate": 7.877898147210691e-05, "loss": 1.8639, "step": 4202 }, { "epoch": 1.2757626346941873, "grad_norm": 0.47574663162231445, "learning_rate": 7.877391920623671e-05, "loss": 1.8716, "step": 4203 }, { "epoch": 1.2760661708908787, "grad_norm": 0.48415321111679077, "learning_rate": 7.876885694036652e-05, "loss": 1.8162, "step": 4204 }, { "epoch": 1.2763697070875701, "grad_norm": 0.45066776871681213, "learning_rate": 7.876379467449631e-05, "loss": 1.8577, "step": 4205 }, { "epoch": 1.2766732432842616, "grad_norm": 0.719373345375061, "learning_rate": 7.87587324086261e-05, "loss": 1.3966, "step": 4206 }, { "epoch": 1.276976779480953, "grad_norm": 0.5312978029251099, "learning_rate": 7.87536701427559e-05, "loss": 1.7253, "step": 4207 }, { "epoch": 1.2772803156776447, "grad_norm": 0.46591347455978394, "learning_rate": 7.87486078768857e-05, "loss": 1.5623, "step": 4208 }, { "epoch": 1.277583851874336, "grad_norm": 0.524597704410553, "learning_rate": 7.874354561101549e-05, "loss": 1.5096, "step": 4209 }, { "epoch": 1.2778873880710275, "grad_norm": 0.5499995350837708, "learning_rate": 7.873848334514528e-05, "loss": 1.077, "step": 4210 }, { "epoch": 1.278190924267719, "grad_norm": 0.5146958231925964, "learning_rate": 7.873342107927508e-05, "loss": 1.3829, "step": 4211 }, { "epoch": 1.2784944604644104, "grad_norm": 0.4905032217502594, "learning_rate": 7.872835881340487e-05, "loss": 2.0092, "step": 4212 }, { "epoch": 1.2787979966611018, "grad_norm": 0.5487980246543884, "learning_rate": 7.872329654753467e-05, "loss": 1.7831, "step": 4213 }, { "epoch": 1.2791015328577933, "grad_norm": 0.6084244251251221, "learning_rate": 7.871823428166448e-05, "loss": 1.6648, "step": 4214 }, { "epoch": 1.2794050690544847, "grad_norm": 0.4461166560649872, "learning_rate": 7.871317201579429e-05, "loss": 1.8962, "step": 4215 }, { "epoch": 1.2797086052511761, "grad_norm": 0.5374088287353516, "learning_rate": 7.870810974992408e-05, "loss": 2.0232, "step": 4216 }, { "epoch": 1.2800121414478678, "grad_norm": 0.75820392370224, "learning_rate": 7.870304748405387e-05, "loss": 1.816, "step": 4217 }, { "epoch": 1.280315677644559, "grad_norm": 0.5131720900535583, "learning_rate": 7.869798521818367e-05, "loss": 1.6794, "step": 4218 }, { "epoch": 1.2806192138412507, "grad_norm": 0.5353872179985046, "learning_rate": 7.869292295231346e-05, "loss": 1.6256, "step": 4219 }, { "epoch": 1.280922750037942, "grad_norm": 0.5746525526046753, "learning_rate": 7.868786068644326e-05, "loss": 1.4688, "step": 4220 }, { "epoch": 1.2812262862346335, "grad_norm": 0.4623584747314453, "learning_rate": 7.868279842057305e-05, "loss": 1.8531, "step": 4221 }, { "epoch": 1.281529822431325, "grad_norm": 0.45438817143440247, "learning_rate": 7.867773615470285e-05, "loss": 2.0002, "step": 4222 }, { "epoch": 1.2818333586280164, "grad_norm": 0.5203740000724792, "learning_rate": 7.867267388883264e-05, "loss": 1.5445, "step": 4223 }, { "epoch": 1.2821368948247078, "grad_norm": 0.5143687129020691, "learning_rate": 7.866761162296244e-05, "loss": 1.6431, "step": 4224 }, { "epoch": 1.2824404310213993, "grad_norm": 0.42706185579299927, "learning_rate": 7.866254935709225e-05, "loss": 1.6228, "step": 4225 }, { "epoch": 1.2827439672180907, "grad_norm": 0.49767574667930603, "learning_rate": 7.865748709122204e-05, "loss": 1.8411, "step": 4226 }, { "epoch": 1.2830475034147821, "grad_norm": 1.3535252809524536, "learning_rate": 7.865242482535184e-05, "loss": 1.4473, "step": 4227 }, { "epoch": 1.2833510396114738, "grad_norm": 0.8082306981086731, "learning_rate": 7.864736255948163e-05, "loss": 1.8145, "step": 4228 }, { "epoch": 1.283654575808165, "grad_norm": 0.5417949557304382, "learning_rate": 7.864230029361143e-05, "loss": 1.8166, "step": 4229 }, { "epoch": 1.2839581120048567, "grad_norm": 0.45955365896224976, "learning_rate": 7.863723802774122e-05, "loss": 1.8228, "step": 4230 }, { "epoch": 1.284261648201548, "grad_norm": 0.512393593788147, "learning_rate": 7.863217576187102e-05, "loss": 1.7234, "step": 4231 }, { "epoch": 1.2845651843982395, "grad_norm": 0.6876609325408936, "learning_rate": 7.862711349600081e-05, "loss": 1.6925, "step": 4232 }, { "epoch": 1.284868720594931, "grad_norm": 0.4111330509185791, "learning_rate": 7.86220512301306e-05, "loss": 1.5009, "step": 4233 }, { "epoch": 1.2851722567916224, "grad_norm": 1.1861661672592163, "learning_rate": 7.861698896426041e-05, "loss": 1.8235, "step": 4234 }, { "epoch": 1.2854757929883138, "grad_norm": 0.47272002696990967, "learning_rate": 7.861192669839021e-05, "loss": 1.8286, "step": 4235 }, { "epoch": 1.2857793291850053, "grad_norm": 0.5344218015670776, "learning_rate": 7.860686443252e-05, "loss": 1.72, "step": 4236 }, { "epoch": 1.2860828653816967, "grad_norm": 0.6022644639015198, "learning_rate": 7.86018021666498e-05, "loss": 1.6321, "step": 4237 }, { "epoch": 1.2863864015783881, "grad_norm": 0.47625505924224854, "learning_rate": 7.859673990077959e-05, "loss": 1.5977, "step": 4238 }, { "epoch": 1.2866899377750798, "grad_norm": 0.5062893033027649, "learning_rate": 7.859167763490939e-05, "loss": 1.7365, "step": 4239 }, { "epoch": 1.286993473971771, "grad_norm": 0.6051474213600159, "learning_rate": 7.858661536903918e-05, "loss": 1.141, "step": 4240 }, { "epoch": 1.2872970101684627, "grad_norm": 0.47197285294532776, "learning_rate": 7.858155310316898e-05, "loss": 1.4489, "step": 4241 }, { "epoch": 1.287600546365154, "grad_norm": 0.942813515663147, "learning_rate": 7.857649083729877e-05, "loss": 1.5708, "step": 4242 }, { "epoch": 1.2879040825618455, "grad_norm": 0.574316143989563, "learning_rate": 7.857142857142858e-05, "loss": 1.5814, "step": 4243 }, { "epoch": 1.288207618758537, "grad_norm": 0.491734117269516, "learning_rate": 7.856636630555837e-05, "loss": 2.1318, "step": 4244 }, { "epoch": 1.2885111549552284, "grad_norm": 0.5177463889122009, "learning_rate": 7.856130403968817e-05, "loss": 1.1797, "step": 4245 }, { "epoch": 1.2888146911519198, "grad_norm": 0.5329721570014954, "learning_rate": 7.855624177381796e-05, "loss": 1.8383, "step": 4246 }, { "epoch": 1.2891182273486113, "grad_norm": 0.5926728248596191, "learning_rate": 7.855117950794776e-05, "loss": 1.5035, "step": 4247 }, { "epoch": 1.289421763545303, "grad_norm": 0.8679110407829285, "learning_rate": 7.854611724207755e-05, "loss": 1.39, "step": 4248 }, { "epoch": 1.2897252997419941, "grad_norm": 0.5424472689628601, "learning_rate": 7.854105497620735e-05, "loss": 1.4622, "step": 4249 }, { "epoch": 1.2900288359386858, "grad_norm": 0.556882917881012, "learning_rate": 7.853599271033714e-05, "loss": 2.0715, "step": 4250 }, { "epoch": 1.2903323721353772, "grad_norm": 0.4874313175678253, "learning_rate": 7.853093044446694e-05, "loss": 1.3539, "step": 4251 }, { "epoch": 1.2906359083320686, "grad_norm": 0.5125330090522766, "learning_rate": 7.852586817859673e-05, "loss": 1.2546, "step": 4252 }, { "epoch": 1.29093944452876, "grad_norm": 0.5772746205329895, "learning_rate": 7.852080591272654e-05, "loss": 1.8095, "step": 4253 }, { "epoch": 1.2912429807254515, "grad_norm": 0.5157619118690491, "learning_rate": 7.851574364685634e-05, "loss": 1.4485, "step": 4254 }, { "epoch": 1.291546516922143, "grad_norm": 0.5483890771865845, "learning_rate": 7.851068138098613e-05, "loss": 1.4703, "step": 4255 }, { "epoch": 1.2918500531188344, "grad_norm": 0.5271161794662476, "learning_rate": 7.850561911511593e-05, "loss": 1.6138, "step": 4256 }, { "epoch": 1.2921535893155258, "grad_norm": 0.5618970990180969, "learning_rate": 7.850055684924572e-05, "loss": 1.5364, "step": 4257 }, { "epoch": 1.2924571255122173, "grad_norm": 0.5047764182090759, "learning_rate": 7.849549458337552e-05, "loss": 1.7535, "step": 4258 }, { "epoch": 1.292760661708909, "grad_norm": 0.5401104688644409, "learning_rate": 7.849043231750532e-05, "loss": 1.6222, "step": 4259 }, { "epoch": 1.2930641979056001, "grad_norm": 0.5193315148353577, "learning_rate": 7.848537005163512e-05, "loss": 1.5011, "step": 4260 }, { "epoch": 1.2933677341022918, "grad_norm": 0.5561128258705139, "learning_rate": 7.848030778576491e-05, "loss": 1.474, "step": 4261 }, { "epoch": 1.2936712702989832, "grad_norm": 0.4799818992614746, "learning_rate": 7.847524551989471e-05, "loss": 1.1537, "step": 4262 }, { "epoch": 1.2939748064956746, "grad_norm": 0.47398802638053894, "learning_rate": 7.84701832540245e-05, "loss": 1.3131, "step": 4263 }, { "epoch": 1.294278342692366, "grad_norm": 0.5396584272384644, "learning_rate": 7.846512098815431e-05, "loss": 1.8291, "step": 4264 }, { "epoch": 1.2945818788890575, "grad_norm": 0.5745276808738708, "learning_rate": 7.84600587222841e-05, "loss": 1.4679, "step": 4265 }, { "epoch": 1.294885415085749, "grad_norm": 0.4571831226348877, "learning_rate": 7.84549964564139e-05, "loss": 1.2732, "step": 4266 }, { "epoch": 1.2951889512824404, "grad_norm": 0.5755758285522461, "learning_rate": 7.84499341905437e-05, "loss": 1.5643, "step": 4267 }, { "epoch": 1.2954924874791318, "grad_norm": 0.4864906966686249, "learning_rate": 7.844487192467349e-05, "loss": 1.7432, "step": 4268 }, { "epoch": 1.2957960236758232, "grad_norm": 0.5033892393112183, "learning_rate": 7.843980965880329e-05, "loss": 1.8336, "step": 4269 }, { "epoch": 1.296099559872515, "grad_norm": 0.5319492220878601, "learning_rate": 7.843474739293308e-05, "loss": 1.0529, "step": 4270 }, { "epoch": 1.2964030960692061, "grad_norm": 0.5821359157562256, "learning_rate": 7.842968512706287e-05, "loss": 1.513, "step": 4271 }, { "epoch": 1.2967066322658978, "grad_norm": 0.5181142687797546, "learning_rate": 7.842462286119267e-05, "loss": 1.759, "step": 4272 }, { "epoch": 1.2970101684625892, "grad_norm": 0.614640474319458, "learning_rate": 7.841956059532248e-05, "loss": 2.1172, "step": 4273 }, { "epoch": 1.2973137046592806, "grad_norm": 0.5049278736114502, "learning_rate": 7.841449832945227e-05, "loss": 1.7877, "step": 4274 }, { "epoch": 1.297617240855972, "grad_norm": 0.9571356177330017, "learning_rate": 7.840943606358207e-05, "loss": 1.4665, "step": 4275 }, { "epoch": 1.2979207770526635, "grad_norm": 0.9249529242515564, "learning_rate": 7.840437379771186e-05, "loss": 1.066, "step": 4276 }, { "epoch": 1.298224313249355, "grad_norm": 0.4696667492389679, "learning_rate": 7.839931153184166e-05, "loss": 1.5052, "step": 4277 }, { "epoch": 1.2985278494460464, "grad_norm": 0.44224581122398376, "learning_rate": 7.839424926597145e-05, "loss": 1.3647, "step": 4278 }, { "epoch": 1.298831385642738, "grad_norm": 0.4960525631904602, "learning_rate": 7.838918700010125e-05, "loss": 1.6771, "step": 4279 }, { "epoch": 1.2991349218394292, "grad_norm": 0.5027126669883728, "learning_rate": 7.838412473423104e-05, "loss": 1.4619, "step": 4280 }, { "epoch": 1.299438458036121, "grad_norm": 0.547900378704071, "learning_rate": 7.837906246836084e-05, "loss": 1.768, "step": 4281 }, { "epoch": 1.2997419942328123, "grad_norm": 0.6043573021888733, "learning_rate": 7.837400020249064e-05, "loss": 1.4069, "step": 4282 }, { "epoch": 1.3000455304295038, "grad_norm": 0.6143855452537537, "learning_rate": 7.836893793662044e-05, "loss": 1.7363, "step": 4283 }, { "epoch": 1.3003490666261952, "grad_norm": 0.37396860122680664, "learning_rate": 7.836387567075023e-05, "loss": 1.2622, "step": 4284 }, { "epoch": 1.3006526028228866, "grad_norm": 0.5041464567184448, "learning_rate": 7.835881340488003e-05, "loss": 1.8698, "step": 4285 }, { "epoch": 1.300956139019578, "grad_norm": 0.5512543320655823, "learning_rate": 7.835375113900982e-05, "loss": 1.7105, "step": 4286 }, { "epoch": 1.3012596752162695, "grad_norm": 0.5649422407150269, "learning_rate": 7.834868887313962e-05, "loss": 1.859, "step": 4287 }, { "epoch": 1.301563211412961, "grad_norm": 0.5447474718093872, "learning_rate": 7.834362660726941e-05, "loss": 1.5593, "step": 4288 }, { "epoch": 1.3018667476096524, "grad_norm": 0.601941704750061, "learning_rate": 7.833856434139921e-05, "loss": 1.0568, "step": 4289 }, { "epoch": 1.302170283806344, "grad_norm": 0.527382493019104, "learning_rate": 7.8333502075529e-05, "loss": 1.5618, "step": 4290 }, { "epoch": 1.3024738200030352, "grad_norm": 0.6012628078460693, "learning_rate": 7.83284398096588e-05, "loss": 1.6444, "step": 4291 }, { "epoch": 1.302777356199727, "grad_norm": 0.552227795124054, "learning_rate": 7.83233775437886e-05, "loss": 1.5268, "step": 4292 }, { "epoch": 1.3030808923964183, "grad_norm": 0.6038103699684143, "learning_rate": 7.83183152779184e-05, "loss": 1.6447, "step": 4293 }, { "epoch": 1.3033844285931098, "grad_norm": 0.6079698801040649, "learning_rate": 7.83132530120482e-05, "loss": 1.6474, "step": 4294 }, { "epoch": 1.3036879647898012, "grad_norm": 0.5434030890464783, "learning_rate": 7.830819074617799e-05, "loss": 1.7963, "step": 4295 }, { "epoch": 1.3039915009864926, "grad_norm": 0.4274088144302368, "learning_rate": 7.830312848030779e-05, "loss": 1.5818, "step": 4296 }, { "epoch": 1.304295037183184, "grad_norm": 0.5364022254943848, "learning_rate": 7.829806621443758e-05, "loss": 1.6226, "step": 4297 }, { "epoch": 1.3045985733798755, "grad_norm": 0.47048020362854004, "learning_rate": 7.829300394856738e-05, "loss": 1.7852, "step": 4298 }, { "epoch": 1.304902109576567, "grad_norm": 0.9063312411308289, "learning_rate": 7.828794168269717e-05, "loss": 1.5717, "step": 4299 }, { "epoch": 1.3052056457732584, "grad_norm": 0.5323641300201416, "learning_rate": 7.828287941682696e-05, "loss": 1.6635, "step": 4300 }, { "epoch": 1.30550918196995, "grad_norm": 0.4425245523452759, "learning_rate": 7.827781715095677e-05, "loss": 1.9034, "step": 4301 }, { "epoch": 1.3058127181666412, "grad_norm": 0.553707480430603, "learning_rate": 7.827275488508657e-05, "loss": 1.7733, "step": 4302 }, { "epoch": 1.306116254363333, "grad_norm": 0.5723200440406799, "learning_rate": 7.826769261921636e-05, "loss": 1.6769, "step": 4303 }, { "epoch": 1.3064197905600243, "grad_norm": 0.9885048866271973, "learning_rate": 7.826263035334617e-05, "loss": 0.931, "step": 4304 }, { "epoch": 1.3067233267567158, "grad_norm": 0.506885826587677, "learning_rate": 7.825756808747597e-05, "loss": 1.795, "step": 4305 }, { "epoch": 1.3070268629534072, "grad_norm": 0.5606210231781006, "learning_rate": 7.825250582160576e-05, "loss": 1.2253, "step": 4306 }, { "epoch": 1.3073303991500986, "grad_norm": 0.7443845868110657, "learning_rate": 7.824744355573556e-05, "loss": 1.9322, "step": 4307 }, { "epoch": 1.30763393534679, "grad_norm": 0.5564523935317993, "learning_rate": 7.824238128986535e-05, "loss": 1.6893, "step": 4308 }, { "epoch": 1.3079374715434815, "grad_norm": 0.5279399752616882, "learning_rate": 7.823731902399514e-05, "loss": 1.8677, "step": 4309 }, { "epoch": 1.308241007740173, "grad_norm": 0.43700218200683594, "learning_rate": 7.823225675812494e-05, "loss": 1.9146, "step": 4310 }, { "epoch": 1.3085445439368644, "grad_norm": 0.49414920806884766, "learning_rate": 7.822719449225473e-05, "loss": 1.5188, "step": 4311 }, { "epoch": 1.308848080133556, "grad_norm": 0.5282542109489441, "learning_rate": 7.822213222638454e-05, "loss": 1.869, "step": 4312 }, { "epoch": 1.3091516163302475, "grad_norm": 0.5799091458320618, "learning_rate": 7.821706996051434e-05, "loss": 1.4757, "step": 4313 }, { "epoch": 1.309455152526939, "grad_norm": 0.5351842045783997, "learning_rate": 7.821200769464413e-05, "loss": 1.9567, "step": 4314 }, { "epoch": 1.3097586887236303, "grad_norm": 0.7459053993225098, "learning_rate": 7.820694542877393e-05, "loss": 1.3684, "step": 4315 }, { "epoch": 1.3100622249203218, "grad_norm": 0.5702859163284302, "learning_rate": 7.820188316290372e-05, "loss": 1.9038, "step": 4316 }, { "epoch": 1.3103657611170132, "grad_norm": 0.5957212448120117, "learning_rate": 7.819682089703352e-05, "loss": 1.6239, "step": 4317 }, { "epoch": 1.3106692973137046, "grad_norm": 0.5540712475776672, "learning_rate": 7.819175863116331e-05, "loss": 1.7692, "step": 4318 }, { "epoch": 1.310972833510396, "grad_norm": 0.5562117099761963, "learning_rate": 7.81866963652931e-05, "loss": 1.5726, "step": 4319 }, { "epoch": 1.3112763697070875, "grad_norm": 0.597484290599823, "learning_rate": 7.81816340994229e-05, "loss": 1.2881, "step": 4320 }, { "epoch": 1.3115799059037792, "grad_norm": 0.4560807943344116, "learning_rate": 7.817657183355271e-05, "loss": 1.6298, "step": 4321 }, { "epoch": 1.3118834421004704, "grad_norm": 0.5292729139328003, "learning_rate": 7.81715095676825e-05, "loss": 1.427, "step": 4322 }, { "epoch": 1.312186978297162, "grad_norm": 0.5305735468864441, "learning_rate": 7.81664473018123e-05, "loss": 1.457, "step": 4323 }, { "epoch": 1.3124905144938535, "grad_norm": 0.4915447533130646, "learning_rate": 7.81613850359421e-05, "loss": 1.523, "step": 4324 }, { "epoch": 1.312794050690545, "grad_norm": 0.5826139450073242, "learning_rate": 7.815632277007189e-05, "loss": 1.562, "step": 4325 }, { "epoch": 1.3130975868872363, "grad_norm": 0.39976975321769714, "learning_rate": 7.815126050420168e-05, "loss": 1.4048, "step": 4326 }, { "epoch": 1.3134011230839278, "grad_norm": 0.4750303030014038, "learning_rate": 7.814619823833148e-05, "loss": 1.9087, "step": 4327 }, { "epoch": 1.3137046592806192, "grad_norm": 0.6270216703414917, "learning_rate": 7.814113597246127e-05, "loss": 1.6378, "step": 4328 }, { "epoch": 1.3140081954773106, "grad_norm": 0.553433895111084, "learning_rate": 7.813607370659107e-05, "loss": 1.6217, "step": 4329 }, { "epoch": 1.314311731674002, "grad_norm": 0.568313717842102, "learning_rate": 7.813101144072086e-05, "loss": 1.875, "step": 4330 }, { "epoch": 1.3146152678706935, "grad_norm": 0.47426313161849976, "learning_rate": 7.812594917485067e-05, "loss": 1.8395, "step": 4331 }, { "epoch": 1.3149188040673851, "grad_norm": 0.5144320726394653, "learning_rate": 7.812088690898047e-05, "loss": 1.6856, "step": 4332 }, { "epoch": 1.3152223402640764, "grad_norm": 0.4862743616104126, "learning_rate": 7.811582464311026e-05, "loss": 1.6403, "step": 4333 }, { "epoch": 1.315525876460768, "grad_norm": 0.5968044996261597, "learning_rate": 7.811076237724006e-05, "loss": 1.5937, "step": 4334 }, { "epoch": 1.3158294126574595, "grad_norm": 0.8257614374160767, "learning_rate": 7.810570011136985e-05, "loss": 1.6954, "step": 4335 }, { "epoch": 1.3161329488541509, "grad_norm": 0.8569319844245911, "learning_rate": 7.810063784549965e-05, "loss": 1.1517, "step": 4336 }, { "epoch": 1.3164364850508423, "grad_norm": 0.5136444568634033, "learning_rate": 7.809557557962944e-05, "loss": 1.4734, "step": 4337 }, { "epoch": 1.3167400212475338, "grad_norm": 0.4905669093132019, "learning_rate": 7.809051331375923e-05, "loss": 1.6479, "step": 4338 }, { "epoch": 1.3170435574442252, "grad_norm": 0.5407041311264038, "learning_rate": 7.808545104788903e-05, "loss": 1.7631, "step": 4339 }, { "epoch": 1.3173470936409166, "grad_norm": 0.5710767507553101, "learning_rate": 7.808038878201884e-05, "loss": 1.3063, "step": 4340 }, { "epoch": 1.317650629837608, "grad_norm": 0.7482556104660034, "learning_rate": 7.807532651614863e-05, "loss": 1.4685, "step": 4341 }, { "epoch": 1.3179541660342995, "grad_norm": 0.5573562979698181, "learning_rate": 7.807026425027843e-05, "loss": 1.0559, "step": 4342 }, { "epoch": 1.3182577022309911, "grad_norm": 0.5664482712745667, "learning_rate": 7.806520198440822e-05, "loss": 1.4078, "step": 4343 }, { "epoch": 1.3185612384276826, "grad_norm": 0.5078407526016235, "learning_rate": 7.806013971853802e-05, "loss": 1.8284, "step": 4344 }, { "epoch": 1.318864774624374, "grad_norm": 0.5389295220375061, "learning_rate": 7.805507745266781e-05, "loss": 2.0116, "step": 4345 }, { "epoch": 1.3191683108210654, "grad_norm": 0.5661474466323853, "learning_rate": 7.80500151867976e-05, "loss": 1.4659, "step": 4346 }, { "epoch": 1.3194718470177569, "grad_norm": 0.5620774626731873, "learning_rate": 7.80449529209274e-05, "loss": 1.8105, "step": 4347 }, { "epoch": 1.3197753832144483, "grad_norm": 0.7904017567634583, "learning_rate": 7.803989065505721e-05, "loss": 1.7561, "step": 4348 }, { "epoch": 1.3200789194111398, "grad_norm": 0.5164950489997864, "learning_rate": 7.8034828389187e-05, "loss": 1.7777, "step": 4349 }, { "epoch": 1.3203824556078312, "grad_norm": 0.4666251838207245, "learning_rate": 7.80297661233168e-05, "loss": 1.8709, "step": 4350 }, { "epoch": 1.3206859918045226, "grad_norm": 1.2652373313903809, "learning_rate": 7.802470385744661e-05, "loss": 1.7007, "step": 4351 }, { "epoch": 1.3209895280012143, "grad_norm": 0.527914822101593, "learning_rate": 7.80196415915764e-05, "loss": 1.7681, "step": 4352 }, { "epoch": 1.3212930641979055, "grad_norm": 0.49614670872688293, "learning_rate": 7.80145793257062e-05, "loss": 1.8928, "step": 4353 }, { "epoch": 1.3215966003945971, "grad_norm": 0.5666537284851074, "learning_rate": 7.800951705983599e-05, "loss": 1.645, "step": 4354 }, { "epoch": 1.3219001365912886, "grad_norm": 0.479524701833725, "learning_rate": 7.800445479396579e-05, "loss": 1.6011, "step": 4355 }, { "epoch": 1.32220367278798, "grad_norm": 0.6410484313964844, "learning_rate": 7.799939252809558e-05, "loss": 1.4003, "step": 4356 }, { "epoch": 1.3225072089846714, "grad_norm": 0.5031597018241882, "learning_rate": 7.799433026222538e-05, "loss": 1.618, "step": 4357 }, { "epoch": 1.3228107451813629, "grad_norm": 0.5946468710899353, "learning_rate": 7.798926799635517e-05, "loss": 1.8065, "step": 4358 }, { "epoch": 1.3231142813780543, "grad_norm": 0.5310636758804321, "learning_rate": 7.798420573048497e-05, "loss": 1.6345, "step": 4359 }, { "epoch": 1.3234178175747457, "grad_norm": 0.4639948904514313, "learning_rate": 7.797914346461477e-05, "loss": 1.4134, "step": 4360 }, { "epoch": 1.3237213537714372, "grad_norm": 0.5224514007568359, "learning_rate": 7.797408119874457e-05, "loss": 1.6937, "step": 4361 }, { "epoch": 1.3240248899681286, "grad_norm": 0.5320509672164917, "learning_rate": 7.796901893287436e-05, "loss": 1.6988, "step": 4362 }, { "epoch": 1.3243284261648203, "grad_norm": 0.46581387519836426, "learning_rate": 7.796395666700416e-05, "loss": 1.2369, "step": 4363 }, { "epoch": 1.3246319623615115, "grad_norm": 0.5986335277557373, "learning_rate": 7.795889440113395e-05, "loss": 1.3976, "step": 4364 }, { "epoch": 1.3249354985582031, "grad_norm": 0.46746957302093506, "learning_rate": 7.795383213526375e-05, "loss": 1.8816, "step": 4365 }, { "epoch": 1.3252390347548946, "grad_norm": 0.505970299243927, "learning_rate": 7.794876986939354e-05, "loss": 1.825, "step": 4366 }, { "epoch": 1.325542570951586, "grad_norm": 0.4795224666595459, "learning_rate": 7.794370760352334e-05, "loss": 1.9911, "step": 4367 }, { "epoch": 1.3258461071482774, "grad_norm": 0.5055125951766968, "learning_rate": 7.793864533765313e-05, "loss": 0.8959, "step": 4368 }, { "epoch": 1.3261496433449689, "grad_norm": 0.5013410449028015, "learning_rate": 7.793358307178293e-05, "loss": 1.7976, "step": 4369 }, { "epoch": 1.3264531795416603, "grad_norm": 0.6693021655082703, "learning_rate": 7.792852080591274e-05, "loss": 1.3238, "step": 4370 }, { "epoch": 1.3267567157383517, "grad_norm": 0.5832652449607849, "learning_rate": 7.792345854004253e-05, "loss": 1.1272, "step": 4371 }, { "epoch": 1.3270602519350432, "grad_norm": 0.6789683103561401, "learning_rate": 7.791839627417233e-05, "loss": 1.6659, "step": 4372 }, { "epoch": 1.3273637881317346, "grad_norm": 0.5950153470039368, "learning_rate": 7.791333400830212e-05, "loss": 1.3312, "step": 4373 }, { "epoch": 1.3276673243284263, "grad_norm": 0.508441686630249, "learning_rate": 7.790827174243191e-05, "loss": 2.0006, "step": 4374 }, { "epoch": 1.3279708605251177, "grad_norm": 0.5696821212768555, "learning_rate": 7.790320947656171e-05, "loss": 1.5527, "step": 4375 }, { "epoch": 1.3282743967218091, "grad_norm": 0.5755532383918762, "learning_rate": 7.78981472106915e-05, "loss": 1.6013, "step": 4376 }, { "epoch": 1.3285779329185006, "grad_norm": 0.573461651802063, "learning_rate": 7.78930849448213e-05, "loss": 1.5225, "step": 4377 }, { "epoch": 1.328881469115192, "grad_norm": 0.5905348062515259, "learning_rate": 7.78880226789511e-05, "loss": 1.6171, "step": 4378 }, { "epoch": 1.3291850053118834, "grad_norm": 0.5171772241592407, "learning_rate": 7.78829604130809e-05, "loss": 1.403, "step": 4379 }, { "epoch": 1.3294885415085749, "grad_norm": 0.596920907497406, "learning_rate": 7.78778981472107e-05, "loss": 1.8381, "step": 4380 }, { "epoch": 1.3297920777052663, "grad_norm": 0.5608156323432922, "learning_rate": 7.787283588134049e-05, "loss": 1.7, "step": 4381 }, { "epoch": 1.3300956139019577, "grad_norm": 0.5419397950172424, "learning_rate": 7.786777361547029e-05, "loss": 1.952, "step": 4382 }, { "epoch": 1.3303991500986494, "grad_norm": 0.5569834113121033, "learning_rate": 7.786271134960008e-05, "loss": 1.3201, "step": 4383 }, { "epoch": 1.3307026862953406, "grad_norm": 0.5397844910621643, "learning_rate": 7.785764908372988e-05, "loss": 1.6868, "step": 4384 }, { "epoch": 1.3310062224920323, "grad_norm": 0.5021274089813232, "learning_rate": 7.785258681785967e-05, "loss": 1.2401, "step": 4385 }, { "epoch": 1.3313097586887237, "grad_norm": 0.5976943373680115, "learning_rate": 7.784752455198947e-05, "loss": 1.6123, "step": 4386 }, { "epoch": 1.3316132948854151, "grad_norm": 0.5162238478660583, "learning_rate": 7.784246228611926e-05, "loss": 1.8259, "step": 4387 }, { "epoch": 1.3319168310821066, "grad_norm": 0.5028786659240723, "learning_rate": 7.783740002024907e-05, "loss": 1.08, "step": 4388 }, { "epoch": 1.332220367278798, "grad_norm": 0.4934920072555542, "learning_rate": 7.783233775437886e-05, "loss": 1.7243, "step": 4389 }, { "epoch": 1.3325239034754894, "grad_norm": 0.40703603625297546, "learning_rate": 7.782727548850866e-05, "loss": 1.4138, "step": 4390 }, { "epoch": 1.3328274396721809, "grad_norm": 0.5625724792480469, "learning_rate": 7.782221322263845e-05, "loss": 1.775, "step": 4391 }, { "epoch": 1.3331309758688723, "grad_norm": 0.5374658703804016, "learning_rate": 7.781715095676825e-05, "loss": 1.4939, "step": 4392 }, { "epoch": 1.3334345120655637, "grad_norm": 0.5174360275268555, "learning_rate": 7.781208869089806e-05, "loss": 1.6717, "step": 4393 }, { "epoch": 1.3337380482622554, "grad_norm": 0.42424100637435913, "learning_rate": 7.780702642502785e-05, "loss": 1.1542, "step": 4394 }, { "epoch": 1.3340415844589466, "grad_norm": 0.5028985142707825, "learning_rate": 7.780196415915765e-05, "loss": 1.8072, "step": 4395 }, { "epoch": 1.3343451206556383, "grad_norm": 0.5722070336341858, "learning_rate": 7.779690189328744e-05, "loss": 1.7323, "step": 4396 }, { "epoch": 1.3346486568523297, "grad_norm": 0.5366278290748596, "learning_rate": 7.779183962741724e-05, "loss": 1.7701, "step": 4397 }, { "epoch": 1.3349521930490211, "grad_norm": 0.38516730070114136, "learning_rate": 7.778677736154703e-05, "loss": 1.4055, "step": 4398 }, { "epoch": 1.3352557292457126, "grad_norm": 0.4074214696884155, "learning_rate": 7.778171509567684e-05, "loss": 1.8972, "step": 4399 }, { "epoch": 1.335559265442404, "grad_norm": 0.519972562789917, "learning_rate": 7.777665282980663e-05, "loss": 1.9597, "step": 4400 }, { "epoch": 1.3358628016390954, "grad_norm": 0.46331217885017395, "learning_rate": 7.777159056393643e-05, "loss": 1.9177, "step": 4401 }, { "epoch": 1.3361663378357869, "grad_norm": 0.5337703824043274, "learning_rate": 7.776652829806622e-05, "loss": 1.4516, "step": 4402 }, { "epoch": 1.3364698740324783, "grad_norm": 0.5979052186012268, "learning_rate": 7.776146603219602e-05, "loss": 1.78, "step": 4403 }, { "epoch": 1.3367734102291697, "grad_norm": 0.5446959733963013, "learning_rate": 7.775640376632581e-05, "loss": 1.7176, "step": 4404 }, { "epoch": 1.3370769464258614, "grad_norm": 0.45639851689338684, "learning_rate": 7.775134150045561e-05, "loss": 1.1853, "step": 4405 }, { "epoch": 1.3373804826225526, "grad_norm": 0.5213142037391663, "learning_rate": 7.77462792345854e-05, "loss": 1.7366, "step": 4406 }, { "epoch": 1.3376840188192443, "grad_norm": 0.5132396221160889, "learning_rate": 7.77412169687152e-05, "loss": 1.5986, "step": 4407 }, { "epoch": 1.3379875550159357, "grad_norm": 0.5052165389060974, "learning_rate": 7.773615470284499e-05, "loss": 1.7698, "step": 4408 }, { "epoch": 1.3382910912126271, "grad_norm": 0.5907325148582458, "learning_rate": 7.77310924369748e-05, "loss": 1.6297, "step": 4409 }, { "epoch": 1.3385946274093186, "grad_norm": 0.5656864643096924, "learning_rate": 7.77260301711046e-05, "loss": 1.6696, "step": 4410 }, { "epoch": 1.33889816360601, "grad_norm": 0.6518059968948364, "learning_rate": 7.772096790523439e-05, "loss": 1.3919, "step": 4411 }, { "epoch": 1.3392016998027014, "grad_norm": 0.5692440271377563, "learning_rate": 7.771590563936418e-05, "loss": 1.5376, "step": 4412 }, { "epoch": 1.3395052359993929, "grad_norm": 0.5003619194030762, "learning_rate": 7.771084337349398e-05, "loss": 1.5989, "step": 4413 }, { "epoch": 1.3398087721960845, "grad_norm": 0.5197275280952454, "learning_rate": 7.770578110762377e-05, "loss": 1.7232, "step": 4414 }, { "epoch": 1.3401123083927757, "grad_norm": 0.498151570558548, "learning_rate": 7.770071884175357e-05, "loss": 1.8019, "step": 4415 }, { "epoch": 1.3404158445894674, "grad_norm": 0.5612940788269043, "learning_rate": 7.769565657588336e-05, "loss": 2.1291, "step": 4416 }, { "epoch": 1.3407193807861588, "grad_norm": 0.6868269443511963, "learning_rate": 7.769059431001316e-05, "loss": 1.6882, "step": 4417 }, { "epoch": 1.3410229169828503, "grad_norm": 0.48497381806373596, "learning_rate": 7.768553204414297e-05, "loss": 1.6664, "step": 4418 }, { "epoch": 1.3413264531795417, "grad_norm": 0.5606340169906616, "learning_rate": 7.768046977827276e-05, "loss": 1.8263, "step": 4419 }, { "epoch": 1.3416299893762331, "grad_norm": 0.4336944818496704, "learning_rate": 7.767540751240256e-05, "loss": 1.8313, "step": 4420 }, { "epoch": 1.3419335255729246, "grad_norm": 0.8053896427154541, "learning_rate": 7.767034524653235e-05, "loss": 1.5858, "step": 4421 }, { "epoch": 1.342237061769616, "grad_norm": 0.4444625973701477, "learning_rate": 7.766528298066215e-05, "loss": 1.5288, "step": 4422 }, { "epoch": 1.3425405979663074, "grad_norm": 0.5923985242843628, "learning_rate": 7.766022071479194e-05, "loss": 1.7149, "step": 4423 }, { "epoch": 1.3428441341629989, "grad_norm": 0.5716237425804138, "learning_rate": 7.765515844892174e-05, "loss": 1.8926, "step": 4424 }, { "epoch": 1.3431476703596905, "grad_norm": 0.5298818349838257, "learning_rate": 7.765009618305153e-05, "loss": 1.616, "step": 4425 }, { "epoch": 1.3434512065563817, "grad_norm": 0.5272706151008606, "learning_rate": 7.764503391718133e-05, "loss": 2.0209, "step": 4426 }, { "epoch": 1.3437547427530734, "grad_norm": 0.6190195083618164, "learning_rate": 7.763997165131113e-05, "loss": 1.4569, "step": 4427 }, { "epoch": 1.3440582789497648, "grad_norm": 0.6583245992660522, "learning_rate": 7.763490938544093e-05, "loss": 1.5768, "step": 4428 }, { "epoch": 1.3443618151464563, "grad_norm": 0.5891104936599731, "learning_rate": 7.762984711957072e-05, "loss": 1.7829, "step": 4429 }, { "epoch": 1.3446653513431477, "grad_norm": 0.47343286871910095, "learning_rate": 7.762478485370052e-05, "loss": 1.6713, "step": 4430 }, { "epoch": 1.3449688875398391, "grad_norm": 0.4860784113407135, "learning_rate": 7.761972258783031e-05, "loss": 1.4912, "step": 4431 }, { "epoch": 1.3452724237365306, "grad_norm": 0.5806290507316589, "learning_rate": 7.761466032196011e-05, "loss": 1.9675, "step": 4432 }, { "epoch": 1.345575959933222, "grad_norm": 0.7708868384361267, "learning_rate": 7.76095980560899e-05, "loss": 1.8951, "step": 4433 }, { "epoch": 1.3458794961299134, "grad_norm": 0.4970077574253082, "learning_rate": 7.76045357902197e-05, "loss": 1.706, "step": 4434 }, { "epoch": 1.3461830323266049, "grad_norm": 0.5021582841873169, "learning_rate": 7.759947352434949e-05, "loss": 1.693, "step": 4435 }, { "epoch": 1.3464865685232965, "grad_norm": 0.511876106262207, "learning_rate": 7.759441125847929e-05, "loss": 1.5194, "step": 4436 }, { "epoch": 1.3467901047199877, "grad_norm": 0.5340248942375183, "learning_rate": 7.75893489926091e-05, "loss": 1.5881, "step": 4437 }, { "epoch": 1.3470936409166794, "grad_norm": 0.4671190083026886, "learning_rate": 7.75842867267389e-05, "loss": 1.6339, "step": 4438 }, { "epoch": 1.3473971771133708, "grad_norm": 0.500554084777832, "learning_rate": 7.75792244608687e-05, "loss": 1.7419, "step": 4439 }, { "epoch": 1.3477007133100622, "grad_norm": 0.5325165390968323, "learning_rate": 7.75741621949985e-05, "loss": 1.4884, "step": 4440 }, { "epoch": 1.3480042495067537, "grad_norm": 0.5758363604545593, "learning_rate": 7.756909992912829e-05, "loss": 1.7129, "step": 4441 }, { "epoch": 1.3483077857034451, "grad_norm": 0.44963330030441284, "learning_rate": 7.756403766325808e-05, "loss": 1.7194, "step": 4442 }, { "epoch": 1.3486113219001366, "grad_norm": 0.49702900648117065, "learning_rate": 7.755897539738788e-05, "loss": 1.3038, "step": 4443 }, { "epoch": 1.348914858096828, "grad_norm": 0.5857210755348206, "learning_rate": 7.755391313151767e-05, "loss": 1.282, "step": 4444 }, { "epoch": 1.3492183942935196, "grad_norm": 0.47209036350250244, "learning_rate": 7.754885086564747e-05, "loss": 1.7731, "step": 4445 }, { "epoch": 1.3495219304902109, "grad_norm": 0.6250527501106262, "learning_rate": 7.754378859977726e-05, "loss": 1.6755, "step": 4446 }, { "epoch": 1.3498254666869025, "grad_norm": 0.5777345299720764, "learning_rate": 7.753872633390706e-05, "loss": 1.587, "step": 4447 }, { "epoch": 1.350129002883594, "grad_norm": 0.5663403868675232, "learning_rate": 7.753366406803687e-05, "loss": 1.8542, "step": 4448 }, { "epoch": 1.3504325390802854, "grad_norm": 0.4112309515476227, "learning_rate": 7.752860180216666e-05, "loss": 0.5226, "step": 4449 }, { "epoch": 1.3507360752769768, "grad_norm": 0.5185359716415405, "learning_rate": 7.752353953629645e-05, "loss": 1.7451, "step": 4450 }, { "epoch": 1.3510396114736682, "grad_norm": 0.4213547706604004, "learning_rate": 7.751847727042625e-05, "loss": 1.8753, "step": 4451 }, { "epoch": 1.3513431476703597, "grad_norm": 0.563086211681366, "learning_rate": 7.751341500455604e-05, "loss": 1.6842, "step": 4452 }, { "epoch": 1.3516466838670511, "grad_norm": 0.4909687042236328, "learning_rate": 7.750835273868584e-05, "loss": 1.252, "step": 4453 }, { "epoch": 1.3519502200637425, "grad_norm": 0.3422612249851227, "learning_rate": 7.750329047281563e-05, "loss": 1.4584, "step": 4454 }, { "epoch": 1.352253756260434, "grad_norm": 0.5087230205535889, "learning_rate": 7.749822820694543e-05, "loss": 1.7198, "step": 4455 }, { "epoch": 1.3525572924571256, "grad_norm": 0.5094039440155029, "learning_rate": 7.749316594107522e-05, "loss": 1.8156, "step": 4456 }, { "epoch": 1.3528608286538168, "grad_norm": 0.5707986354827881, "learning_rate": 7.748810367520503e-05, "loss": 1.4866, "step": 4457 }, { "epoch": 1.3531643648505085, "grad_norm": 0.5794532895088196, "learning_rate": 7.748304140933483e-05, "loss": 1.7428, "step": 4458 }, { "epoch": 1.3534679010472, "grad_norm": 0.38065409660339355, "learning_rate": 7.747797914346462e-05, "loss": 1.2621, "step": 4459 }, { "epoch": 1.3537714372438914, "grad_norm": 0.5278635025024414, "learning_rate": 7.747291687759442e-05, "loss": 1.7026, "step": 4460 }, { "epoch": 1.3540749734405828, "grad_norm": 0.584745466709137, "learning_rate": 7.746785461172421e-05, "loss": 1.9313, "step": 4461 }, { "epoch": 1.3543785096372742, "grad_norm": 0.5232474207878113, "learning_rate": 7.7462792345854e-05, "loss": 1.8206, "step": 4462 }, { "epoch": 1.3546820458339657, "grad_norm": 0.647459089756012, "learning_rate": 7.74577300799838e-05, "loss": 1.3045, "step": 4463 }, { "epoch": 1.354985582030657, "grad_norm": 0.5332021117210388, "learning_rate": 7.74526678141136e-05, "loss": 1.7948, "step": 4464 }, { "epoch": 1.3552891182273485, "grad_norm": 0.5136265754699707, "learning_rate": 7.744760554824339e-05, "loss": 1.8639, "step": 4465 }, { "epoch": 1.35559265442404, "grad_norm": 0.5178390741348267, "learning_rate": 7.74425432823732e-05, "loss": 1.3601, "step": 4466 }, { "epoch": 1.3558961906207316, "grad_norm": 0.4487190246582031, "learning_rate": 7.7437481016503e-05, "loss": 1.7373, "step": 4467 }, { "epoch": 1.3561997268174228, "grad_norm": 0.6040553450584412, "learning_rate": 7.743241875063279e-05, "loss": 1.1153, "step": 4468 }, { "epoch": 1.3565032630141145, "grad_norm": 0.533661961555481, "learning_rate": 7.742735648476258e-05, "loss": 1.6861, "step": 4469 }, { "epoch": 1.356806799210806, "grad_norm": 0.5596243739128113, "learning_rate": 7.742229421889238e-05, "loss": 1.6324, "step": 4470 }, { "epoch": 1.3571103354074974, "grad_norm": 0.5296964645385742, "learning_rate": 7.741723195302217e-05, "loss": 1.7621, "step": 4471 }, { "epoch": 1.3574138716041888, "grad_norm": 0.4589049518108368, "learning_rate": 7.741216968715197e-05, "loss": 1.7543, "step": 4472 }, { "epoch": 1.3577174078008802, "grad_norm": 0.5302414298057556, "learning_rate": 7.740710742128176e-05, "loss": 1.5546, "step": 4473 }, { "epoch": 1.3580209439975717, "grad_norm": 0.6762099862098694, "learning_rate": 7.740204515541156e-05, "loss": 1.3986, "step": 4474 }, { "epoch": 1.358324480194263, "grad_norm": 0.4986850321292877, "learning_rate": 7.739698288954135e-05, "loss": 1.9552, "step": 4475 }, { "epoch": 1.3586280163909545, "grad_norm": 0.602545976638794, "learning_rate": 7.739192062367116e-05, "loss": 1.1948, "step": 4476 }, { "epoch": 1.358931552587646, "grad_norm": 0.5418416857719421, "learning_rate": 7.738685835780095e-05, "loss": 1.8975, "step": 4477 }, { "epoch": 1.3592350887843376, "grad_norm": 0.5278483033180237, "learning_rate": 7.738179609193075e-05, "loss": 1.8907, "step": 4478 }, { "epoch": 1.359538624981029, "grad_norm": 0.5483014583587646, "learning_rate": 7.737673382606054e-05, "loss": 1.6767, "step": 4479 }, { "epoch": 1.3598421611777205, "grad_norm": 0.5430691838264465, "learning_rate": 7.737167156019034e-05, "loss": 1.7736, "step": 4480 }, { "epoch": 1.360145697374412, "grad_norm": 0.5248533487319946, "learning_rate": 7.736660929432013e-05, "loss": 1.6338, "step": 4481 }, { "epoch": 1.3604492335711034, "grad_norm": 0.49445077776908875, "learning_rate": 7.736154702844994e-05, "loss": 1.7626, "step": 4482 }, { "epoch": 1.3607527697677948, "grad_norm": 0.91696697473526, "learning_rate": 7.735648476257974e-05, "loss": 1.5865, "step": 4483 }, { "epoch": 1.3610563059644862, "grad_norm": 0.5618857145309448, "learning_rate": 7.735142249670953e-05, "loss": 1.6746, "step": 4484 }, { "epoch": 1.3613598421611777, "grad_norm": 0.497670441865921, "learning_rate": 7.734636023083933e-05, "loss": 1.7111, "step": 4485 }, { "epoch": 1.361663378357869, "grad_norm": 0.5272151827812195, "learning_rate": 7.734129796496912e-05, "loss": 1.7878, "step": 4486 }, { "epoch": 1.3619669145545608, "grad_norm": 0.5168973803520203, "learning_rate": 7.733623569909893e-05, "loss": 1.8668, "step": 4487 }, { "epoch": 1.362270450751252, "grad_norm": 0.7518470883369446, "learning_rate": 7.733117343322872e-05, "loss": 1.9678, "step": 4488 }, { "epoch": 1.3625739869479436, "grad_norm": 0.5416634678840637, "learning_rate": 7.732611116735852e-05, "loss": 1.8942, "step": 4489 }, { "epoch": 1.362877523144635, "grad_norm": 0.5445654988288879, "learning_rate": 7.732104890148831e-05, "loss": 1.1588, "step": 4490 }, { "epoch": 1.3631810593413265, "grad_norm": 0.4440891146659851, "learning_rate": 7.731598663561811e-05, "loss": 1.0499, "step": 4491 }, { "epoch": 1.363484595538018, "grad_norm": 0.4910653233528137, "learning_rate": 7.73109243697479e-05, "loss": 1.5553, "step": 4492 }, { "epoch": 1.3637881317347094, "grad_norm": 0.438759446144104, "learning_rate": 7.73058621038777e-05, "loss": 1.7959, "step": 4493 }, { "epoch": 1.3640916679314008, "grad_norm": 0.527281641960144, "learning_rate": 7.73007998380075e-05, "loss": 1.9108, "step": 4494 }, { "epoch": 1.3643952041280922, "grad_norm": 0.5466772317886353, "learning_rate": 7.729573757213729e-05, "loss": 1.3945, "step": 4495 }, { "epoch": 1.3646987403247837, "grad_norm": 0.47780725359916687, "learning_rate": 7.72906753062671e-05, "loss": 1.685, "step": 4496 }, { "epoch": 1.365002276521475, "grad_norm": 0.5176924467086792, "learning_rate": 7.728561304039689e-05, "loss": 1.7529, "step": 4497 }, { "epoch": 1.3653058127181668, "grad_norm": 0.572093665599823, "learning_rate": 7.728055077452669e-05, "loss": 1.8185, "step": 4498 }, { "epoch": 1.365609348914858, "grad_norm": 0.6627562046051025, "learning_rate": 7.727548850865648e-05, "loss": 1.6821, "step": 4499 }, { "epoch": 1.3659128851115496, "grad_norm": 0.4149744212627411, "learning_rate": 7.727042624278628e-05, "loss": 1.8283, "step": 4500 }, { "epoch": 1.366216421308241, "grad_norm": 0.590709388256073, "learning_rate": 7.726536397691607e-05, "loss": 1.6729, "step": 4501 }, { "epoch": 1.3665199575049325, "grad_norm": 0.5867372751235962, "learning_rate": 7.726030171104587e-05, "loss": 1.0463, "step": 4502 }, { "epoch": 1.366823493701624, "grad_norm": 0.4657502770423889, "learning_rate": 7.725523944517566e-05, "loss": 1.6178, "step": 4503 }, { "epoch": 1.3671270298983154, "grad_norm": 0.43361958861351013, "learning_rate": 7.725017717930546e-05, "loss": 1.7674, "step": 4504 }, { "epoch": 1.3674305660950068, "grad_norm": 0.44244110584259033, "learning_rate": 7.724511491343526e-05, "loss": 1.7252, "step": 4505 }, { "epoch": 1.3677341022916982, "grad_norm": 0.4858807921409607, "learning_rate": 7.724005264756506e-05, "loss": 1.698, "step": 4506 }, { "epoch": 1.3680376384883897, "grad_norm": 0.46887320280075073, "learning_rate": 7.723499038169485e-05, "loss": 1.8538, "step": 4507 }, { "epoch": 1.368341174685081, "grad_norm": 0.6876465678215027, "learning_rate": 7.722992811582465e-05, "loss": 1.3666, "step": 4508 }, { "epoch": 1.3686447108817728, "grad_norm": 0.6491440534591675, "learning_rate": 7.722486584995444e-05, "loss": 1.4743, "step": 4509 }, { "epoch": 1.3689482470784642, "grad_norm": 0.6107078790664673, "learning_rate": 7.721980358408424e-05, "loss": 1.8493, "step": 4510 }, { "epoch": 1.3692517832751556, "grad_norm": 0.9721749424934387, "learning_rate": 7.721474131821403e-05, "loss": 1.2488, "step": 4511 }, { "epoch": 1.369555319471847, "grad_norm": 0.4856562614440918, "learning_rate": 7.720967905234383e-05, "loss": 1.6012, "step": 4512 }, { "epoch": 1.3698588556685385, "grad_norm": 0.5122790336608887, "learning_rate": 7.720461678647362e-05, "loss": 1.9067, "step": 4513 }, { "epoch": 1.37016239186523, "grad_norm": 0.45854032039642334, "learning_rate": 7.719955452060342e-05, "loss": 1.6468, "step": 4514 }, { "epoch": 1.3704659280619214, "grad_norm": 0.5521238446235657, "learning_rate": 7.719449225473322e-05, "loss": 1.6377, "step": 4515 }, { "epoch": 1.3707694642586128, "grad_norm": 0.4813542068004608, "learning_rate": 7.718942998886302e-05, "loss": 1.704, "step": 4516 }, { "epoch": 1.3710730004553042, "grad_norm": 0.604897141456604, "learning_rate": 7.718436772299281e-05, "loss": 1.7302, "step": 4517 }, { "epoch": 1.3713765366519959, "grad_norm": 0.5965009927749634, "learning_rate": 7.717930545712261e-05, "loss": 1.7354, "step": 4518 }, { "epoch": 1.371680072848687, "grad_norm": 0.4838474988937378, "learning_rate": 7.71742431912524e-05, "loss": 1.2562, "step": 4519 }, { "epoch": 1.3719836090453787, "grad_norm": 0.5375049114227295, "learning_rate": 7.71691809253822e-05, "loss": 1.8191, "step": 4520 }, { "epoch": 1.3722871452420702, "grad_norm": 0.47960102558135986, "learning_rate": 7.7164118659512e-05, "loss": 1.8247, "step": 4521 }, { "epoch": 1.3725906814387616, "grad_norm": 0.42799845337867737, "learning_rate": 7.715905639364179e-05, "loss": 1.6597, "step": 4522 }, { "epoch": 1.372894217635453, "grad_norm": 1.6170735359191895, "learning_rate": 7.715399412777158e-05, "loss": 1.4322, "step": 4523 }, { "epoch": 1.3731977538321445, "grad_norm": 0.5388947129249573, "learning_rate": 7.714893186190139e-05, "loss": 1.2625, "step": 4524 }, { "epoch": 1.373501290028836, "grad_norm": 1.5445969104766846, "learning_rate": 7.714386959603119e-05, "loss": 1.091, "step": 4525 }, { "epoch": 1.3738048262255274, "grad_norm": 0.5684570074081421, "learning_rate": 7.7138807330161e-05, "loss": 1.4048, "step": 4526 }, { "epoch": 1.3741083624222188, "grad_norm": 0.565920889377594, "learning_rate": 7.713374506429079e-05, "loss": 1.5052, "step": 4527 }, { "epoch": 1.3744118986189102, "grad_norm": 0.5732594728469849, "learning_rate": 7.712868279842058e-05, "loss": 1.3838, "step": 4528 }, { "epoch": 1.3747154348156019, "grad_norm": 0.6106382608413696, "learning_rate": 7.712362053255038e-05, "loss": 1.2531, "step": 4529 }, { "epoch": 1.375018971012293, "grad_norm": 0.6688647270202637, "learning_rate": 7.711855826668017e-05, "loss": 1.7071, "step": 4530 }, { "epoch": 1.3753225072089847, "grad_norm": 0.534449577331543, "learning_rate": 7.711349600080997e-05, "loss": 1.7455, "step": 4531 }, { "epoch": 1.3756260434056762, "grad_norm": 0.5120007991790771, "learning_rate": 7.710843373493976e-05, "loss": 1.8533, "step": 4532 }, { "epoch": 1.3759295796023676, "grad_norm": 0.5665999054908752, "learning_rate": 7.710337146906956e-05, "loss": 1.8525, "step": 4533 }, { "epoch": 1.376233115799059, "grad_norm": 0.5839645266532898, "learning_rate": 7.709830920319935e-05, "loss": 1.5777, "step": 4534 }, { "epoch": 1.3765366519957505, "grad_norm": 0.5277237296104431, "learning_rate": 7.709324693732916e-05, "loss": 1.8254, "step": 4535 }, { "epoch": 1.376840188192442, "grad_norm": 0.53985196352005, "learning_rate": 7.708818467145896e-05, "loss": 1.3575, "step": 4536 }, { "epoch": 1.3771437243891334, "grad_norm": 0.5370942950248718, "learning_rate": 7.708312240558875e-05, "loss": 1.6435, "step": 4537 }, { "epoch": 1.3774472605858248, "grad_norm": 0.4175781011581421, "learning_rate": 7.707806013971855e-05, "loss": 1.4873, "step": 4538 }, { "epoch": 1.3777507967825162, "grad_norm": 0.5643919110298157, "learning_rate": 7.707299787384834e-05, "loss": 1.8618, "step": 4539 }, { "epoch": 1.3780543329792079, "grad_norm": 0.5539401769638062, "learning_rate": 7.706793560797814e-05, "loss": 1.837, "step": 4540 }, { "epoch": 1.3783578691758993, "grad_norm": 0.4684971570968628, "learning_rate": 7.706287334210793e-05, "loss": 1.7192, "step": 4541 }, { "epoch": 1.3786614053725907, "grad_norm": 0.5881449580192566, "learning_rate": 7.705781107623773e-05, "loss": 0.9102, "step": 4542 }, { "epoch": 1.3789649415692822, "grad_norm": 0.515137255191803, "learning_rate": 7.705274881036752e-05, "loss": 1.3175, "step": 4543 }, { "epoch": 1.3792684777659736, "grad_norm": 0.5105268955230713, "learning_rate": 7.704768654449733e-05, "loss": 1.7881, "step": 4544 }, { "epoch": 1.379572013962665, "grad_norm": 0.5810350179672241, "learning_rate": 7.704262427862712e-05, "loss": 1.1397, "step": 4545 }, { "epoch": 1.3798755501593565, "grad_norm": 0.5091977715492249, "learning_rate": 7.703756201275692e-05, "loss": 2.0762, "step": 4546 }, { "epoch": 1.380179086356048, "grad_norm": 0.5842112302780151, "learning_rate": 7.703249974688671e-05, "loss": 1.8471, "step": 4547 }, { "epoch": 1.3804826225527393, "grad_norm": 0.890228807926178, "learning_rate": 7.702743748101651e-05, "loss": 1.3657, "step": 4548 }, { "epoch": 1.380786158749431, "grad_norm": 0.6264392733573914, "learning_rate": 7.70223752151463e-05, "loss": 1.7772, "step": 4549 }, { "epoch": 1.3810896949461222, "grad_norm": 0.5175667405128479, "learning_rate": 7.70173129492761e-05, "loss": 1.598, "step": 4550 }, { "epoch": 1.3813932311428139, "grad_norm": 0.5696103572845459, "learning_rate": 7.701225068340589e-05, "loss": 1.8323, "step": 4551 }, { "epoch": 1.3816967673395053, "grad_norm": 0.6493420600891113, "learning_rate": 7.700718841753569e-05, "loss": 1.1722, "step": 4552 }, { "epoch": 1.3820003035361967, "grad_norm": 0.5873674154281616, "learning_rate": 7.700212615166548e-05, "loss": 1.7187, "step": 4553 }, { "epoch": 1.3823038397328882, "grad_norm": 0.5197107791900635, "learning_rate": 7.699706388579529e-05, "loss": 1.3359, "step": 4554 }, { "epoch": 1.3826073759295796, "grad_norm": 0.7124143838882446, "learning_rate": 7.699200161992508e-05, "loss": 1.4417, "step": 4555 }, { "epoch": 1.382910912126271, "grad_norm": 0.49033257365226746, "learning_rate": 7.698693935405488e-05, "loss": 1.7348, "step": 4556 }, { "epoch": 1.3832144483229625, "grad_norm": 0.5684939622879028, "learning_rate": 7.698187708818467e-05, "loss": 1.4596, "step": 4557 }, { "epoch": 1.383517984519654, "grad_norm": 0.5147912502288818, "learning_rate": 7.697681482231447e-05, "loss": 2.0594, "step": 4558 }, { "epoch": 1.3838215207163453, "grad_norm": 0.5846849679946899, "learning_rate": 7.697175255644426e-05, "loss": 1.6377, "step": 4559 }, { "epoch": 1.384125056913037, "grad_norm": 0.4074210822582245, "learning_rate": 7.696669029057406e-05, "loss": 0.914, "step": 4560 }, { "epoch": 1.3844285931097282, "grad_norm": 0.5171768069267273, "learning_rate": 7.696162802470385e-05, "loss": 1.5171, "step": 4561 }, { "epoch": 1.3847321293064199, "grad_norm": 0.4810327887535095, "learning_rate": 7.695656575883365e-05, "loss": 1.8348, "step": 4562 }, { "epoch": 1.3850356655031113, "grad_norm": 0.49266764521598816, "learning_rate": 7.695150349296346e-05, "loss": 1.8055, "step": 4563 }, { "epoch": 1.3853392016998027, "grad_norm": 0.7078859210014343, "learning_rate": 7.694644122709325e-05, "loss": 2.0686, "step": 4564 }, { "epoch": 1.3856427378964942, "grad_norm": 0.5220522880554199, "learning_rate": 7.694137896122305e-05, "loss": 1.7709, "step": 4565 }, { "epoch": 1.3859462740931856, "grad_norm": 0.5483274459838867, "learning_rate": 7.693631669535284e-05, "loss": 1.3725, "step": 4566 }, { "epoch": 1.386249810289877, "grad_norm": 0.5607746243476868, "learning_rate": 7.693125442948264e-05, "loss": 1.5361, "step": 4567 }, { "epoch": 1.3865533464865685, "grad_norm": 0.5287503004074097, "learning_rate": 7.692619216361243e-05, "loss": 1.4119, "step": 4568 }, { "epoch": 1.38685688268326, "grad_norm": 0.7318875193595886, "learning_rate": 7.692112989774223e-05, "loss": 1.7289, "step": 4569 }, { "epoch": 1.3871604188799513, "grad_norm": 1.9280884265899658, "learning_rate": 7.691606763187202e-05, "loss": 2.1525, "step": 4570 }, { "epoch": 1.387463955076643, "grad_norm": 0.456725537776947, "learning_rate": 7.691100536600183e-05, "loss": 1.2377, "step": 4571 }, { "epoch": 1.3877674912733342, "grad_norm": 0.541840672492981, "learning_rate": 7.690594310013162e-05, "loss": 1.3657, "step": 4572 }, { "epoch": 1.3880710274700259, "grad_norm": 0.47133737802505493, "learning_rate": 7.690088083426142e-05, "loss": 1.8308, "step": 4573 }, { "epoch": 1.3883745636667173, "grad_norm": 0.517042875289917, "learning_rate": 7.689581856839123e-05, "loss": 1.8143, "step": 4574 }, { "epoch": 1.3886780998634087, "grad_norm": 0.6014066338539124, "learning_rate": 7.689075630252102e-05, "loss": 1.6925, "step": 4575 }, { "epoch": 1.3889816360601002, "grad_norm": 0.4809291958808899, "learning_rate": 7.688569403665082e-05, "loss": 1.9902, "step": 4576 }, { "epoch": 1.3892851722567916, "grad_norm": 0.6222968101501465, "learning_rate": 7.688063177078061e-05, "loss": 1.701, "step": 4577 }, { "epoch": 1.389588708453483, "grad_norm": 0.4806516468524933, "learning_rate": 7.68755695049104e-05, "loss": 2.0006, "step": 4578 }, { "epoch": 1.3898922446501745, "grad_norm": 0.6643034815788269, "learning_rate": 7.68705072390402e-05, "loss": 1.7205, "step": 4579 }, { "epoch": 1.3901957808468661, "grad_norm": 0.6698145270347595, "learning_rate": 7.686544497317e-05, "loss": 1.7128, "step": 4580 }, { "epoch": 1.3904993170435573, "grad_norm": 0.5213164687156677, "learning_rate": 7.686038270729979e-05, "loss": 1.5694, "step": 4581 }, { "epoch": 1.390802853240249, "grad_norm": 0.5646512508392334, "learning_rate": 7.685532044142958e-05, "loss": 1.6217, "step": 4582 }, { "epoch": 1.3911063894369404, "grad_norm": 0.6456514000892639, "learning_rate": 7.685025817555939e-05, "loss": 1.6194, "step": 4583 }, { "epoch": 1.3914099256336319, "grad_norm": 0.6872454881668091, "learning_rate": 7.684519590968919e-05, "loss": 1.5893, "step": 4584 }, { "epoch": 1.3917134618303233, "grad_norm": 0.36732757091522217, "learning_rate": 7.684013364381898e-05, "loss": 1.3996, "step": 4585 }, { "epoch": 1.3920169980270147, "grad_norm": 0.4791436791419983, "learning_rate": 7.683507137794878e-05, "loss": 1.386, "step": 4586 }, { "epoch": 1.3923205342237062, "grad_norm": 0.4628514051437378, "learning_rate": 7.683000911207857e-05, "loss": 1.483, "step": 4587 }, { "epoch": 1.3926240704203976, "grad_norm": 0.5104525089263916, "learning_rate": 7.682494684620837e-05, "loss": 1.8155, "step": 4588 }, { "epoch": 1.392927606617089, "grad_norm": 0.49548450112342834, "learning_rate": 7.681988458033816e-05, "loss": 1.6658, "step": 4589 }, { "epoch": 1.3932311428137805, "grad_norm": 0.6759939193725586, "learning_rate": 7.681482231446796e-05, "loss": 1.798, "step": 4590 }, { "epoch": 1.3935346790104721, "grad_norm": 0.5013139843940735, "learning_rate": 7.680976004859775e-05, "loss": 1.4767, "step": 4591 }, { "epoch": 1.3938382152071633, "grad_norm": 0.5529112219810486, "learning_rate": 7.680469778272755e-05, "loss": 2.0458, "step": 4592 }, { "epoch": 1.394141751403855, "grad_norm": 0.4460000991821289, "learning_rate": 7.679963551685735e-05, "loss": 1.4609, "step": 4593 }, { "epoch": 1.3944452876005464, "grad_norm": 0.5933369994163513, "learning_rate": 7.679457325098715e-05, "loss": 1.4755, "step": 4594 }, { "epoch": 1.3947488237972379, "grad_norm": 0.5485585927963257, "learning_rate": 7.678951098511694e-05, "loss": 1.5591, "step": 4595 }, { "epoch": 1.3950523599939293, "grad_norm": 0.5463452339172363, "learning_rate": 7.678444871924674e-05, "loss": 1.4816, "step": 4596 }, { "epoch": 1.3953558961906207, "grad_norm": 0.5605781078338623, "learning_rate": 7.677938645337653e-05, "loss": 1.4213, "step": 4597 }, { "epoch": 1.3956594323873122, "grad_norm": 0.5562348961830139, "learning_rate": 7.677432418750633e-05, "loss": 1.6317, "step": 4598 }, { "epoch": 1.3959629685840036, "grad_norm": 0.5632421970367432, "learning_rate": 7.676926192163612e-05, "loss": 1.6231, "step": 4599 }, { "epoch": 1.396266504780695, "grad_norm": 0.5117442011833191, "learning_rate": 7.676419965576592e-05, "loss": 1.535, "step": 4600 }, { "epoch": 1.3965700409773865, "grad_norm": 0.576318085193634, "learning_rate": 7.675913738989571e-05, "loss": 1.6773, "step": 4601 }, { "epoch": 1.3968735771740781, "grad_norm": 0.5794771909713745, "learning_rate": 7.675407512402552e-05, "loss": 1.3376, "step": 4602 }, { "epoch": 1.3971771133707693, "grad_norm": 0.519234299659729, "learning_rate": 7.674901285815532e-05, "loss": 1.4872, "step": 4603 }, { "epoch": 1.397480649567461, "grad_norm": 0.48116299510002136, "learning_rate": 7.674395059228511e-05, "loss": 1.7884, "step": 4604 }, { "epoch": 1.3977841857641524, "grad_norm": 0.4485664367675781, "learning_rate": 7.67388883264149e-05, "loss": 1.7424, "step": 4605 }, { "epoch": 1.3980877219608439, "grad_norm": 0.5528675317764282, "learning_rate": 7.67338260605447e-05, "loss": 1.5698, "step": 4606 }, { "epoch": 1.3983912581575353, "grad_norm": 0.6204590797424316, "learning_rate": 7.67287637946745e-05, "loss": 1.5903, "step": 4607 }, { "epoch": 1.3986947943542267, "grad_norm": 0.5104626417160034, "learning_rate": 7.672370152880429e-05, "loss": 1.7593, "step": 4608 }, { "epoch": 1.3989983305509182, "grad_norm": 0.5637308359146118, "learning_rate": 7.671863926293408e-05, "loss": 1.459, "step": 4609 }, { "epoch": 1.3993018667476096, "grad_norm": 0.4793213903903961, "learning_rate": 7.671357699706388e-05, "loss": 1.5054, "step": 4610 }, { "epoch": 1.399605402944301, "grad_norm": 0.5412987470626831, "learning_rate": 7.670851473119369e-05, "loss": 1.5059, "step": 4611 }, { "epoch": 1.3999089391409925, "grad_norm": 0.5955209136009216, "learning_rate": 7.670345246532348e-05, "loss": 1.5, "step": 4612 }, { "epoch": 1.4002124753376841, "grad_norm": 2.214019536972046, "learning_rate": 7.669839019945328e-05, "loss": 0.8095, "step": 4613 }, { "epoch": 1.4005160115343755, "grad_norm": 0.5414422154426575, "learning_rate": 7.669332793358307e-05, "loss": 1.5825, "step": 4614 }, { "epoch": 1.400819547731067, "grad_norm": 0.564838707447052, "learning_rate": 7.668826566771288e-05, "loss": 1.645, "step": 4615 }, { "epoch": 1.4011230839277584, "grad_norm": 0.5650699734687805, "learning_rate": 7.668320340184268e-05, "loss": 1.3399, "step": 4616 }, { "epoch": 1.4014266201244499, "grad_norm": 0.6401330232620239, "learning_rate": 7.667814113597247e-05, "loss": 1.9951, "step": 4617 }, { "epoch": 1.4017301563211413, "grad_norm": 0.586685836315155, "learning_rate": 7.667307887010226e-05, "loss": 1.4413, "step": 4618 }, { "epoch": 1.4020336925178327, "grad_norm": 0.551403284072876, "learning_rate": 7.666801660423206e-05, "loss": 1.693, "step": 4619 }, { "epoch": 1.4023372287145242, "grad_norm": 0.4873546361923218, "learning_rate": 7.666295433836185e-05, "loss": 1.8064, "step": 4620 }, { "epoch": 1.4026407649112156, "grad_norm": 0.6161354780197144, "learning_rate": 7.665789207249165e-05, "loss": 1.5373, "step": 4621 }, { "epoch": 1.4029443011079072, "grad_norm": 0.556962788105011, "learning_rate": 7.665282980662146e-05, "loss": 1.5957, "step": 4622 }, { "epoch": 1.4032478373045985, "grad_norm": 0.566043496131897, "learning_rate": 7.664776754075125e-05, "loss": 1.5312, "step": 4623 }, { "epoch": 1.4035513735012901, "grad_norm": 0.5295618772506714, "learning_rate": 7.664270527488105e-05, "loss": 1.7293, "step": 4624 }, { "epoch": 1.4038549096979815, "grad_norm": 0.47450530529022217, "learning_rate": 7.663764300901084e-05, "loss": 1.6276, "step": 4625 }, { "epoch": 1.404158445894673, "grad_norm": 0.5055714249610901, "learning_rate": 7.663258074314064e-05, "loss": 1.8185, "step": 4626 }, { "epoch": 1.4044619820913644, "grad_norm": 0.43849778175354004, "learning_rate": 7.662751847727043e-05, "loss": 1.2328, "step": 4627 }, { "epoch": 1.4047655182880558, "grad_norm": 0.49399152398109436, "learning_rate": 7.662245621140023e-05, "loss": 1.4324, "step": 4628 }, { "epoch": 1.4050690544847473, "grad_norm": 2.6655149459838867, "learning_rate": 7.661739394553002e-05, "loss": 1.0911, "step": 4629 }, { "epoch": 1.4053725906814387, "grad_norm": 0.5486051440238953, "learning_rate": 7.661233167965982e-05, "loss": 1.6358, "step": 4630 }, { "epoch": 1.4056761268781301, "grad_norm": 0.4878422021865845, "learning_rate": 7.660726941378961e-05, "loss": 1.3881, "step": 4631 }, { "epoch": 1.4059796630748216, "grad_norm": 0.5612585544586182, "learning_rate": 7.660220714791942e-05, "loss": 2.0723, "step": 4632 }, { "epoch": 1.4062831992715132, "grad_norm": 0.579932689666748, "learning_rate": 7.659714488204921e-05, "loss": 1.3426, "step": 4633 }, { "epoch": 1.4065867354682045, "grad_norm": 0.4245831370353699, "learning_rate": 7.659208261617901e-05, "loss": 1.4925, "step": 4634 }, { "epoch": 1.406890271664896, "grad_norm": 0.7544228434562683, "learning_rate": 7.65870203503088e-05, "loss": 1.5973, "step": 4635 }, { "epoch": 1.4071938078615875, "grad_norm": 0.4973390996456146, "learning_rate": 7.65819580844386e-05, "loss": 1.6748, "step": 4636 }, { "epoch": 1.407497344058279, "grad_norm": 0.5662533044815063, "learning_rate": 7.657689581856839e-05, "loss": 2.0457, "step": 4637 }, { "epoch": 1.4078008802549704, "grad_norm": 0.5135043859481812, "learning_rate": 7.657183355269819e-05, "loss": 1.8762, "step": 4638 }, { "epoch": 1.4081044164516618, "grad_norm": 0.5957035422325134, "learning_rate": 7.656677128682798e-05, "loss": 1.6636, "step": 4639 }, { "epoch": 1.4084079526483533, "grad_norm": 0.5083897113800049, "learning_rate": 7.656170902095778e-05, "loss": 1.8611, "step": 4640 }, { "epoch": 1.4087114888450447, "grad_norm": 0.5058372020721436, "learning_rate": 7.655664675508759e-05, "loss": 1.8053, "step": 4641 }, { "epoch": 1.4090150250417361, "grad_norm": 0.5841237306594849, "learning_rate": 7.655158448921738e-05, "loss": 1.1644, "step": 4642 }, { "epoch": 1.4093185612384276, "grad_norm": 0.5634949803352356, "learning_rate": 7.654652222334718e-05, "loss": 1.4692, "step": 4643 }, { "epoch": 1.4096220974351192, "grad_norm": 0.9778512716293335, "learning_rate": 7.654145995747697e-05, "loss": 1.4193, "step": 4644 }, { "epoch": 1.4099256336318107, "grad_norm": 0.5774378776550293, "learning_rate": 7.653639769160677e-05, "loss": 1.9765, "step": 4645 }, { "epoch": 1.410229169828502, "grad_norm": 0.552975594997406, "learning_rate": 7.653133542573656e-05, "loss": 1.4232, "step": 4646 }, { "epoch": 1.4105327060251935, "grad_norm": 0.4814278483390808, "learning_rate": 7.652627315986635e-05, "loss": 1.6995, "step": 4647 }, { "epoch": 1.410836242221885, "grad_norm": 0.5448489189147949, "learning_rate": 7.652121089399615e-05, "loss": 1.6537, "step": 4648 }, { "epoch": 1.4111397784185764, "grad_norm": 0.5354534387588501, "learning_rate": 7.651614862812594e-05, "loss": 1.5464, "step": 4649 }, { "epoch": 1.4114433146152678, "grad_norm": 0.507394015789032, "learning_rate": 7.651108636225575e-05, "loss": 1.955, "step": 4650 }, { "epoch": 1.4117468508119593, "grad_norm": 0.49544399976730347, "learning_rate": 7.650602409638555e-05, "loss": 1.8493, "step": 4651 }, { "epoch": 1.4120503870086507, "grad_norm": 0.5341572165489197, "learning_rate": 7.650096183051534e-05, "loss": 1.8316, "step": 4652 }, { "epoch": 1.4123539232053424, "grad_norm": 0.5258657932281494, "learning_rate": 7.649589956464514e-05, "loss": 1.8353, "step": 4653 }, { "epoch": 1.4126574594020336, "grad_norm": 0.4699995517730713, "learning_rate": 7.649083729877493e-05, "loss": 1.6508, "step": 4654 }, { "epoch": 1.4129609955987252, "grad_norm": 0.5846216082572937, "learning_rate": 7.648577503290473e-05, "loss": 1.3771, "step": 4655 }, { "epoch": 1.4132645317954167, "grad_norm": 0.5015813112258911, "learning_rate": 7.648071276703452e-05, "loss": 1.8213, "step": 4656 }, { "epoch": 1.413568067992108, "grad_norm": 0.5818765759468079, "learning_rate": 7.647565050116432e-05, "loss": 1.5861, "step": 4657 }, { "epoch": 1.4138716041887995, "grad_norm": 0.5784850716590881, "learning_rate": 7.647058823529411e-05, "loss": 1.0852, "step": 4658 }, { "epoch": 1.414175140385491, "grad_norm": 0.5787924528121948, "learning_rate": 7.64655259694239e-05, "loss": 1.8358, "step": 4659 }, { "epoch": 1.4144786765821824, "grad_norm": 0.5465903282165527, "learning_rate": 7.646046370355371e-05, "loss": 1.7018, "step": 4660 }, { "epoch": 1.4147822127788738, "grad_norm": 0.5020646452903748, "learning_rate": 7.645540143768352e-05, "loss": 1.8004, "step": 4661 }, { "epoch": 1.4150857489755653, "grad_norm": 0.5645866990089417, "learning_rate": 7.645033917181332e-05, "loss": 1.255, "step": 4662 }, { "epoch": 1.4153892851722567, "grad_norm": 0.4355092942714691, "learning_rate": 7.644527690594311e-05, "loss": 1.1957, "step": 4663 }, { "epoch": 1.4156928213689484, "grad_norm": 0.6315711140632629, "learning_rate": 7.644021464007291e-05, "loss": 1.2085, "step": 4664 }, { "epoch": 1.4159963575656396, "grad_norm": 0.47501039505004883, "learning_rate": 7.64351523742027e-05, "loss": 2.0933, "step": 4665 }, { "epoch": 1.4162998937623312, "grad_norm": 0.45434871315956116, "learning_rate": 7.64300901083325e-05, "loss": 1.2583, "step": 4666 }, { "epoch": 1.4166034299590227, "grad_norm": 0.6555678844451904, "learning_rate": 7.642502784246229e-05, "loss": 1.5034, "step": 4667 }, { "epoch": 1.416906966155714, "grad_norm": 0.5302774310112, "learning_rate": 7.641996557659209e-05, "loss": 2.0505, "step": 4668 }, { "epoch": 1.4172105023524055, "grad_norm": 0.4992988109588623, "learning_rate": 7.641490331072188e-05, "loss": 1.492, "step": 4669 }, { "epoch": 1.417514038549097, "grad_norm": 0.5464704036712646, "learning_rate": 7.640984104485168e-05, "loss": 1.8207, "step": 4670 }, { "epoch": 1.4178175747457884, "grad_norm": 0.60384601354599, "learning_rate": 7.640477877898148e-05, "loss": 1.6035, "step": 4671 }, { "epoch": 1.4181211109424798, "grad_norm": 0.5650917887687683, "learning_rate": 7.639971651311128e-05, "loss": 1.6211, "step": 4672 }, { "epoch": 1.4184246471391713, "grad_norm": 0.6552462577819824, "learning_rate": 7.639465424724107e-05, "loss": 1.5755, "step": 4673 }, { "epoch": 1.4187281833358627, "grad_norm": 0.5759985446929932, "learning_rate": 7.638959198137087e-05, "loss": 1.4893, "step": 4674 }, { "epoch": 1.4190317195325544, "grad_norm": 0.5508556365966797, "learning_rate": 7.638452971550066e-05, "loss": 1.6477, "step": 4675 }, { "epoch": 1.4193352557292458, "grad_norm": 0.48576992750167847, "learning_rate": 7.637946744963046e-05, "loss": 1.8028, "step": 4676 }, { "epoch": 1.4196387919259372, "grad_norm": 1.5687899589538574, "learning_rate": 7.637440518376025e-05, "loss": 1.4341, "step": 4677 }, { "epoch": 1.4199423281226287, "grad_norm": 0.4605062007904053, "learning_rate": 7.636934291789005e-05, "loss": 1.6236, "step": 4678 }, { "epoch": 1.42024586431932, "grad_norm": 0.5490358471870422, "learning_rate": 7.636428065201984e-05, "loss": 1.4707, "step": 4679 }, { "epoch": 1.4205494005160115, "grad_norm": 0.5529788732528687, "learning_rate": 7.635921838614965e-05, "loss": 1.5166, "step": 4680 }, { "epoch": 1.420852936712703, "grad_norm": 0.5166842341423035, "learning_rate": 7.635415612027945e-05, "loss": 1.7152, "step": 4681 }, { "epoch": 1.4211564729093944, "grad_norm": 0.5218782424926758, "learning_rate": 7.634909385440924e-05, "loss": 1.4112, "step": 4682 }, { "epoch": 1.4214600091060858, "grad_norm": 0.5360171794891357, "learning_rate": 7.634403158853904e-05, "loss": 1.9449, "step": 4683 }, { "epoch": 1.4217635453027775, "grad_norm": 0.45122429728507996, "learning_rate": 7.633896932266883e-05, "loss": 1.5348, "step": 4684 }, { "epoch": 1.4220670814994687, "grad_norm": 0.541969895362854, "learning_rate": 7.633390705679862e-05, "loss": 1.549, "step": 4685 }, { "epoch": 1.4223706176961604, "grad_norm": 0.45322978496551514, "learning_rate": 7.632884479092842e-05, "loss": 1.6838, "step": 4686 }, { "epoch": 1.4226741538928518, "grad_norm": 0.5295718908309937, "learning_rate": 7.632378252505821e-05, "loss": 1.539, "step": 4687 }, { "epoch": 1.4229776900895432, "grad_norm": 0.49040156602859497, "learning_rate": 7.631872025918801e-05, "loss": 1.8898, "step": 4688 }, { "epoch": 1.4232812262862347, "grad_norm": 0.5323116183280945, "learning_rate": 7.631365799331782e-05, "loss": 1.818, "step": 4689 }, { "epoch": 1.423584762482926, "grad_norm": 0.5131807327270508, "learning_rate": 7.630859572744761e-05, "loss": 1.4028, "step": 4690 }, { "epoch": 1.4238882986796175, "grad_norm": 0.5579325556755066, "learning_rate": 7.630353346157741e-05, "loss": 1.4887, "step": 4691 }, { "epoch": 1.424191834876309, "grad_norm": 0.5273114442825317, "learning_rate": 7.62984711957072e-05, "loss": 1.9394, "step": 4692 }, { "epoch": 1.4244953710730004, "grad_norm": 0.5281380414962769, "learning_rate": 7.6293408929837e-05, "loss": 1.359, "step": 4693 }, { "epoch": 1.4247989072696918, "grad_norm": 0.5331006050109863, "learning_rate": 7.628834666396679e-05, "loss": 1.6283, "step": 4694 }, { "epoch": 1.4251024434663835, "grad_norm": 0.6110636591911316, "learning_rate": 7.628328439809659e-05, "loss": 1.4206, "step": 4695 }, { "epoch": 1.4254059796630747, "grad_norm": 0.5301719903945923, "learning_rate": 7.627822213222638e-05, "loss": 1.9281, "step": 4696 }, { "epoch": 1.4257095158597664, "grad_norm": 0.5527634620666504, "learning_rate": 7.627315986635618e-05, "loss": 1.5625, "step": 4697 }, { "epoch": 1.4260130520564578, "grad_norm": 0.5319656729698181, "learning_rate": 7.626809760048597e-05, "loss": 1.67, "step": 4698 }, { "epoch": 1.4263165882531492, "grad_norm": 0.5614206790924072, "learning_rate": 7.626303533461578e-05, "loss": 1.7777, "step": 4699 }, { "epoch": 1.4266201244498407, "grad_norm": 0.5032866597175598, "learning_rate": 7.625797306874557e-05, "loss": 1.9566, "step": 4700 }, { "epoch": 1.426923660646532, "grad_norm": 0.5314932465553284, "learning_rate": 7.625291080287537e-05, "loss": 1.6655, "step": 4701 }, { "epoch": 1.4272271968432235, "grad_norm": 0.47711166739463806, "learning_rate": 7.624784853700516e-05, "loss": 1.6008, "step": 4702 }, { "epoch": 1.427530733039915, "grad_norm": 0.5699756741523743, "learning_rate": 7.624278627113496e-05, "loss": 1.5925, "step": 4703 }, { "epoch": 1.4278342692366064, "grad_norm": 0.5623230338096619, "learning_rate": 7.623772400526477e-05, "loss": 1.7858, "step": 4704 }, { "epoch": 1.4281378054332978, "grad_norm": 0.5966967344284058, "learning_rate": 7.623266173939456e-05, "loss": 1.0612, "step": 4705 }, { "epoch": 1.4284413416299895, "grad_norm": 0.492683082818985, "learning_rate": 7.622759947352436e-05, "loss": 1.895, "step": 4706 }, { "epoch": 1.428744877826681, "grad_norm": 0.5114320516586304, "learning_rate": 7.622253720765415e-05, "loss": 1.9176, "step": 4707 }, { "epoch": 1.4290484140233723, "grad_norm": 0.47429636120796204, "learning_rate": 7.621747494178395e-05, "loss": 2.1511, "step": 4708 }, { "epoch": 1.4293519502200638, "grad_norm": 0.5218935012817383, "learning_rate": 7.621241267591374e-05, "loss": 1.5853, "step": 4709 }, { "epoch": 1.4296554864167552, "grad_norm": 0.5426785349845886, "learning_rate": 7.620735041004355e-05, "loss": 1.8157, "step": 4710 }, { "epoch": 1.4299590226134467, "grad_norm": 0.5341148972511292, "learning_rate": 7.620228814417334e-05, "loss": 1.697, "step": 4711 }, { "epoch": 1.430262558810138, "grad_norm": 0.7054175734519958, "learning_rate": 7.619722587830314e-05, "loss": 1.5556, "step": 4712 }, { "epoch": 1.4305660950068295, "grad_norm": 0.903923749923706, "learning_rate": 7.619216361243293e-05, "loss": 1.6436, "step": 4713 }, { "epoch": 1.430869631203521, "grad_norm": 0.48794859647750854, "learning_rate": 7.618710134656273e-05, "loss": 1.809, "step": 4714 }, { "epoch": 1.4311731674002126, "grad_norm": 0.47096773982048035, "learning_rate": 7.618203908069252e-05, "loss": 1.6649, "step": 4715 }, { "epoch": 1.4314767035969038, "grad_norm": 0.6093799471855164, "learning_rate": 7.617697681482232e-05, "loss": 1.8031, "step": 4716 }, { "epoch": 1.4317802397935955, "grad_norm": 0.5231300592422485, "learning_rate": 7.617191454895211e-05, "loss": 1.7465, "step": 4717 }, { "epoch": 1.432083775990287, "grad_norm": 0.545767605304718, "learning_rate": 7.616685228308191e-05, "loss": 0.9108, "step": 4718 }, { "epoch": 1.4323873121869783, "grad_norm": 0.4710002839565277, "learning_rate": 7.616179001721172e-05, "loss": 1.8432, "step": 4719 }, { "epoch": 1.4326908483836698, "grad_norm": 0.6048409938812256, "learning_rate": 7.615672775134151e-05, "loss": 1.8028, "step": 4720 }, { "epoch": 1.4329943845803612, "grad_norm": 0.4102180600166321, "learning_rate": 7.61516654854713e-05, "loss": 1.3209, "step": 4721 }, { "epoch": 1.4332979207770526, "grad_norm": 0.5769391655921936, "learning_rate": 7.61466032196011e-05, "loss": 1.4543, "step": 4722 }, { "epoch": 1.433601456973744, "grad_norm": 0.5256406664848328, "learning_rate": 7.61415409537309e-05, "loss": 1.572, "step": 4723 }, { "epoch": 1.4339049931704355, "grad_norm": 0.5865902900695801, "learning_rate": 7.613647868786069e-05, "loss": 1.3323, "step": 4724 }, { "epoch": 1.434208529367127, "grad_norm": 0.5765762329101562, "learning_rate": 7.613141642199048e-05, "loss": 1.8572, "step": 4725 }, { "epoch": 1.4345120655638186, "grad_norm": 0.6536111235618591, "learning_rate": 7.612635415612028e-05, "loss": 1.1194, "step": 4726 }, { "epoch": 1.4348156017605098, "grad_norm": 0.559202253818512, "learning_rate": 7.612129189025007e-05, "loss": 1.9195, "step": 4727 }, { "epoch": 1.4351191379572015, "grad_norm": 0.5304152369499207, "learning_rate": 7.611622962437988e-05, "loss": 1.4013, "step": 4728 }, { "epoch": 1.435422674153893, "grad_norm": 0.5185491442680359, "learning_rate": 7.611116735850968e-05, "loss": 1.7297, "step": 4729 }, { "epoch": 1.4357262103505843, "grad_norm": 0.5597679018974304, "learning_rate": 7.610610509263947e-05, "loss": 1.4894, "step": 4730 }, { "epoch": 1.4360297465472758, "grad_norm": 0.47206321358680725, "learning_rate": 7.610104282676927e-05, "loss": 2.1203, "step": 4731 }, { "epoch": 1.4363332827439672, "grad_norm": 0.536067545413971, "learning_rate": 7.609598056089906e-05, "loss": 1.4863, "step": 4732 }, { "epoch": 1.4366368189406586, "grad_norm": 0.510307788848877, "learning_rate": 7.609091829502886e-05, "loss": 1.6554, "step": 4733 }, { "epoch": 1.43694035513735, "grad_norm": 0.4985925853252411, "learning_rate": 7.608585602915865e-05, "loss": 1.5833, "step": 4734 }, { "epoch": 1.4372438913340415, "grad_norm": 0.4628017544746399, "learning_rate": 7.608079376328845e-05, "loss": 1.7996, "step": 4735 }, { "epoch": 1.437547427530733, "grad_norm": 0.5663459300994873, "learning_rate": 7.607573149741824e-05, "loss": 1.5257, "step": 4736 }, { "epoch": 1.4378509637274246, "grad_norm": 0.4896049201488495, "learning_rate": 7.607066923154804e-05, "loss": 1.8763, "step": 4737 }, { "epoch": 1.4381544999241158, "grad_norm": 0.4533321261405945, "learning_rate": 7.606560696567784e-05, "loss": 0.8658, "step": 4738 }, { "epoch": 1.4384580361208075, "grad_norm": 0.5471289157867432, "learning_rate": 7.606054469980764e-05, "loss": 2.0191, "step": 4739 }, { "epoch": 1.438761572317499, "grad_norm": 0.45516133308410645, "learning_rate": 7.605548243393743e-05, "loss": 1.3933, "step": 4740 }, { "epoch": 1.4390651085141903, "grad_norm": 0.7363737225532532, "learning_rate": 7.605042016806723e-05, "loss": 1.7563, "step": 4741 }, { "epoch": 1.4393686447108818, "grad_norm": 0.403385192155838, "learning_rate": 7.604535790219702e-05, "loss": 1.9145, "step": 4742 }, { "epoch": 1.4396721809075732, "grad_norm": 0.502412736415863, "learning_rate": 7.604029563632682e-05, "loss": 1.6981, "step": 4743 }, { "epoch": 1.4399757171042646, "grad_norm": 0.5202810168266296, "learning_rate": 7.603523337045661e-05, "loss": 1.7792, "step": 4744 }, { "epoch": 1.440279253300956, "grad_norm": 0.5282444357872009, "learning_rate": 7.603017110458641e-05, "loss": 1.7323, "step": 4745 }, { "epoch": 1.4405827894976477, "grad_norm": 0.4394991099834442, "learning_rate": 7.60251088387162e-05, "loss": 1.7267, "step": 4746 }, { "epoch": 1.440886325694339, "grad_norm": 0.5699142217636108, "learning_rate": 7.602004657284601e-05, "loss": 1.7191, "step": 4747 }, { "epoch": 1.4411898618910306, "grad_norm": 0.5591686964035034, "learning_rate": 7.60149843069758e-05, "loss": 1.6067, "step": 4748 }, { "epoch": 1.441493398087722, "grad_norm": 0.4762207269668579, "learning_rate": 7.600992204110561e-05, "loss": 1.7094, "step": 4749 }, { "epoch": 1.4417969342844135, "grad_norm": 0.5511293411254883, "learning_rate": 7.600485977523541e-05, "loss": 1.4719, "step": 4750 }, { "epoch": 1.442100470481105, "grad_norm": 0.5466549396514893, "learning_rate": 7.59997975093652e-05, "loss": 1.8903, "step": 4751 }, { "epoch": 1.4424040066777963, "grad_norm": 0.7153205871582031, "learning_rate": 7.5994735243495e-05, "loss": 1.6593, "step": 4752 }, { "epoch": 1.4427075428744878, "grad_norm": 0.49872803688049316, "learning_rate": 7.598967297762479e-05, "loss": 1.3233, "step": 4753 }, { "epoch": 1.4430110790711792, "grad_norm": 0.558476984500885, "learning_rate": 7.598461071175459e-05, "loss": 1.5951, "step": 4754 }, { "epoch": 1.4433146152678706, "grad_norm": 0.4782353639602661, "learning_rate": 7.597954844588438e-05, "loss": 1.5372, "step": 4755 }, { "epoch": 1.443618151464562, "grad_norm": 0.5572071075439453, "learning_rate": 7.597448618001418e-05, "loss": 1.4866, "step": 4756 }, { "epoch": 1.4439216876612537, "grad_norm": 0.61551433801651, "learning_rate": 7.596942391414397e-05, "loss": 1.6909, "step": 4757 }, { "epoch": 1.444225223857945, "grad_norm": 0.5780778527259827, "learning_rate": 7.596436164827378e-05, "loss": 1.4837, "step": 4758 }, { "epoch": 1.4445287600546366, "grad_norm": 0.6437112092971802, "learning_rate": 7.595929938240357e-05, "loss": 2.1734, "step": 4759 }, { "epoch": 1.444832296251328, "grad_norm": 0.5654916167259216, "learning_rate": 7.595423711653337e-05, "loss": 1.407, "step": 4760 }, { "epoch": 1.4451358324480195, "grad_norm": 0.6159085631370544, "learning_rate": 7.594917485066316e-05, "loss": 1.6069, "step": 4761 }, { "epoch": 1.445439368644711, "grad_norm": 0.5013946294784546, "learning_rate": 7.594411258479296e-05, "loss": 1.8218, "step": 4762 }, { "epoch": 1.4457429048414023, "grad_norm": 0.49084192514419556, "learning_rate": 7.593905031892275e-05, "loss": 1.8749, "step": 4763 }, { "epoch": 1.4460464410380938, "grad_norm": 0.4393550455570221, "learning_rate": 7.593398805305255e-05, "loss": 1.4319, "step": 4764 }, { "epoch": 1.4463499772347852, "grad_norm": 0.5603815317153931, "learning_rate": 7.592892578718234e-05, "loss": 1.6359, "step": 4765 }, { "epoch": 1.4466535134314766, "grad_norm": 0.5094273686408997, "learning_rate": 7.592386352131214e-05, "loss": 1.9181, "step": 4766 }, { "epoch": 1.446957049628168, "grad_norm": 0.5389642119407654, "learning_rate": 7.591880125544195e-05, "loss": 1.888, "step": 4767 }, { "epoch": 1.4472605858248597, "grad_norm": 0.5263598561286926, "learning_rate": 7.591373898957174e-05, "loss": 1.7503, "step": 4768 }, { "epoch": 1.447564122021551, "grad_norm": 0.4412989318370819, "learning_rate": 7.590867672370154e-05, "loss": 1.2045, "step": 4769 }, { "epoch": 1.4478676582182426, "grad_norm": 0.5222303867340088, "learning_rate": 7.590361445783133e-05, "loss": 1.8976, "step": 4770 }, { "epoch": 1.448171194414934, "grad_norm": 0.49793916940689087, "learning_rate": 7.589855219196113e-05, "loss": 1.3931, "step": 4771 }, { "epoch": 1.4484747306116255, "grad_norm": 0.5002750754356384, "learning_rate": 7.589348992609092e-05, "loss": 1.6249, "step": 4772 }, { "epoch": 1.448778266808317, "grad_norm": 0.4378749132156372, "learning_rate": 7.588842766022072e-05, "loss": 1.6208, "step": 4773 }, { "epoch": 1.4490818030050083, "grad_norm": 0.5322429537773132, "learning_rate": 7.588336539435051e-05, "loss": 1.6444, "step": 4774 }, { "epoch": 1.4493853392016998, "grad_norm": 0.5761924982070923, "learning_rate": 7.58783031284803e-05, "loss": 1.6111, "step": 4775 }, { "epoch": 1.4496888753983912, "grad_norm": 0.5562173128128052, "learning_rate": 7.58732408626101e-05, "loss": 1.9395, "step": 4776 }, { "epoch": 1.4499924115950826, "grad_norm": 0.5333907008171082, "learning_rate": 7.586817859673991e-05, "loss": 1.8048, "step": 4777 }, { "epoch": 1.450295947791774, "grad_norm": 0.5698195099830627, "learning_rate": 7.58631163308697e-05, "loss": 1.4219, "step": 4778 }, { "epoch": 1.4505994839884657, "grad_norm": 0.4817031919956207, "learning_rate": 7.58580540649995e-05, "loss": 0.9775, "step": 4779 }, { "epoch": 1.4509030201851572, "grad_norm": 0.527871310710907, "learning_rate": 7.585299179912929e-05, "loss": 1.8112, "step": 4780 }, { "epoch": 1.4512065563818486, "grad_norm": 0.5405674576759338, "learning_rate": 7.584792953325909e-05, "loss": 1.8271, "step": 4781 }, { "epoch": 1.45151009257854, "grad_norm": 0.5090677738189697, "learning_rate": 7.584286726738888e-05, "loss": 1.851, "step": 4782 }, { "epoch": 1.4518136287752315, "grad_norm": 0.5070275068283081, "learning_rate": 7.583780500151868e-05, "loss": 1.4614, "step": 4783 }, { "epoch": 1.452117164971923, "grad_norm": 0.5664345622062683, "learning_rate": 7.583274273564847e-05, "loss": 1.4379, "step": 4784 }, { "epoch": 1.4524207011686143, "grad_norm": 0.7471550107002258, "learning_rate": 7.582768046977827e-05, "loss": 1.3397, "step": 4785 }, { "epoch": 1.4527242373653058, "grad_norm": 0.4601489007472992, "learning_rate": 7.582261820390808e-05, "loss": 1.8286, "step": 4786 }, { "epoch": 1.4530277735619972, "grad_norm": 0.5429388880729675, "learning_rate": 7.581755593803787e-05, "loss": 1.5062, "step": 4787 }, { "epoch": 1.4533313097586888, "grad_norm": 0.5463858842849731, "learning_rate": 7.581249367216766e-05, "loss": 1.8726, "step": 4788 }, { "epoch": 1.45363484595538, "grad_norm": 0.4973449110984802, "learning_rate": 7.580743140629746e-05, "loss": 1.7763, "step": 4789 }, { "epoch": 1.4539383821520717, "grad_norm": 0.7041242122650146, "learning_rate": 7.580236914042725e-05, "loss": 1.91, "step": 4790 }, { "epoch": 1.4542419183487632, "grad_norm": 0.7030282020568848, "learning_rate": 7.579730687455705e-05, "loss": 1.5485, "step": 4791 }, { "epoch": 1.4545454545454546, "grad_norm": 0.52986079454422, "learning_rate": 7.579224460868684e-05, "loss": 1.9781, "step": 4792 }, { "epoch": 1.454848990742146, "grad_norm": 0.4283183515071869, "learning_rate": 7.578718234281665e-05, "loss": 1.8996, "step": 4793 }, { "epoch": 1.4551525269388375, "grad_norm": 0.5609734654426575, "learning_rate": 7.578212007694645e-05, "loss": 1.5946, "step": 4794 }, { "epoch": 1.4554560631355289, "grad_norm": 0.46615251898765564, "learning_rate": 7.577705781107624e-05, "loss": 1.7604, "step": 4795 }, { "epoch": 1.4557595993322203, "grad_norm": 0.5443726778030396, "learning_rate": 7.577199554520604e-05, "loss": 1.5563, "step": 4796 }, { "epoch": 1.4560631355289118, "grad_norm": 0.5708737969398499, "learning_rate": 7.576693327933584e-05, "loss": 1.4693, "step": 4797 }, { "epoch": 1.4563666717256032, "grad_norm": 0.5923335552215576, "learning_rate": 7.576187101346564e-05, "loss": 1.915, "step": 4798 }, { "epoch": 1.4566702079222948, "grad_norm": 0.5595468282699585, "learning_rate": 7.575680874759543e-05, "loss": 1.4823, "step": 4799 }, { "epoch": 1.456973744118986, "grad_norm": 0.583549439907074, "learning_rate": 7.575174648172523e-05, "loss": 1.9422, "step": 4800 }, { "epoch": 1.4572772803156777, "grad_norm": 0.46514013409614563, "learning_rate": 7.574668421585502e-05, "loss": 2.0324, "step": 4801 }, { "epoch": 1.4575808165123691, "grad_norm": 0.46036651730537415, "learning_rate": 7.574162194998482e-05, "loss": 1.0407, "step": 4802 }, { "epoch": 1.4578843527090606, "grad_norm": 0.5273803472518921, "learning_rate": 7.573655968411461e-05, "loss": 1.4721, "step": 4803 }, { "epoch": 1.458187888905752, "grad_norm": 0.39598920941352844, "learning_rate": 7.573149741824441e-05, "loss": 1.2043, "step": 4804 }, { "epoch": 1.4584914251024435, "grad_norm": 0.5395318865776062, "learning_rate": 7.57264351523742e-05, "loss": 1.5629, "step": 4805 }, { "epoch": 1.4587949612991349, "grad_norm": 0.42741745710372925, "learning_rate": 7.572137288650401e-05, "loss": 1.7217, "step": 4806 }, { "epoch": 1.4590984974958263, "grad_norm": 0.5320115685462952, "learning_rate": 7.57163106206338e-05, "loss": 1.7133, "step": 4807 }, { "epoch": 1.4594020336925178, "grad_norm": 0.5228841304779053, "learning_rate": 7.57112483547636e-05, "loss": 1.3938, "step": 4808 }, { "epoch": 1.4597055698892092, "grad_norm": 0.4937410354614258, "learning_rate": 7.57061860888934e-05, "loss": 1.4786, "step": 4809 }, { "epoch": 1.4600091060859008, "grad_norm": 0.46726706624031067, "learning_rate": 7.570112382302319e-05, "loss": 1.8383, "step": 4810 }, { "epoch": 1.4603126422825923, "grad_norm": 0.5124707221984863, "learning_rate": 7.569606155715299e-05, "loss": 1.5768, "step": 4811 }, { "epoch": 1.4606161784792837, "grad_norm": 0.5231637954711914, "learning_rate": 7.569099929128278e-05, "loss": 1.8708, "step": 4812 }, { "epoch": 1.4609197146759751, "grad_norm": 0.5755016803741455, "learning_rate": 7.568593702541258e-05, "loss": 1.2787, "step": 4813 }, { "epoch": 1.4612232508726666, "grad_norm": 0.550441563129425, "learning_rate": 7.568087475954237e-05, "loss": 1.6205, "step": 4814 }, { "epoch": 1.461526787069358, "grad_norm": 0.5654580593109131, "learning_rate": 7.567581249367216e-05, "loss": 1.4837, "step": 4815 }, { "epoch": 1.4618303232660494, "grad_norm": 0.4842512905597687, "learning_rate": 7.567075022780197e-05, "loss": 1.9908, "step": 4816 }, { "epoch": 1.4621338594627409, "grad_norm": 0.5777815580368042, "learning_rate": 7.566568796193177e-05, "loss": 1.7202, "step": 4817 }, { "epoch": 1.4624373956594323, "grad_norm": 0.5063660144805908, "learning_rate": 7.566062569606156e-05, "loss": 1.5393, "step": 4818 }, { "epoch": 1.462740931856124, "grad_norm": 0.4989129602909088, "learning_rate": 7.565556343019136e-05, "loss": 1.6849, "step": 4819 }, { "epoch": 1.4630444680528152, "grad_norm": 0.42997097969055176, "learning_rate": 7.565050116432115e-05, "loss": 1.6785, "step": 4820 }, { "epoch": 1.4633480042495068, "grad_norm": 0.4580230414867401, "learning_rate": 7.564543889845095e-05, "loss": 1.9124, "step": 4821 }, { "epoch": 1.4636515404461983, "grad_norm": 0.44534167647361755, "learning_rate": 7.564037663258074e-05, "loss": 1.6729, "step": 4822 }, { "epoch": 1.4639550766428897, "grad_norm": 0.5121662616729736, "learning_rate": 7.563531436671054e-05, "loss": 1.8268, "step": 4823 }, { "epoch": 1.4642586128395811, "grad_norm": 0.5929915904998779, "learning_rate": 7.563025210084033e-05, "loss": 1.4041, "step": 4824 }, { "epoch": 1.4645621490362726, "grad_norm": 0.6041303277015686, "learning_rate": 7.562518983497014e-05, "loss": 1.5088, "step": 4825 }, { "epoch": 1.464865685232964, "grad_norm": 0.6024086475372314, "learning_rate": 7.562012756909993e-05, "loss": 1.7524, "step": 4826 }, { "epoch": 1.4651692214296554, "grad_norm": 0.5736476182937622, "learning_rate": 7.561506530322973e-05, "loss": 1.6131, "step": 4827 }, { "epoch": 1.4654727576263469, "grad_norm": 0.5395902991294861, "learning_rate": 7.561000303735952e-05, "loss": 1.4278, "step": 4828 }, { "epoch": 1.4657762938230383, "grad_norm": 0.7780386209487915, "learning_rate": 7.560494077148932e-05, "loss": 1.3143, "step": 4829 }, { "epoch": 1.46607983001973, "grad_norm": 0.46117058396339417, "learning_rate": 7.559987850561911e-05, "loss": 1.6481, "step": 4830 }, { "epoch": 1.4663833662164212, "grad_norm": 0.7055519223213196, "learning_rate": 7.559481623974891e-05, "loss": 1.7452, "step": 4831 }, { "epoch": 1.4666869024131128, "grad_norm": 0.5338720679283142, "learning_rate": 7.55897539738787e-05, "loss": 1.7455, "step": 4832 }, { "epoch": 1.4669904386098043, "grad_norm": 0.6645461916923523, "learning_rate": 7.55846917080085e-05, "loss": 1.6314, "step": 4833 }, { "epoch": 1.4672939748064957, "grad_norm": 0.561160683631897, "learning_rate": 7.55796294421383e-05, "loss": 1.3721, "step": 4834 }, { "epoch": 1.4675975110031871, "grad_norm": 0.4907180964946747, "learning_rate": 7.55745671762681e-05, "loss": 1.6024, "step": 4835 }, { "epoch": 1.4679010471998786, "grad_norm": 0.6963015794754028, "learning_rate": 7.55695049103979e-05, "loss": 1.8332, "step": 4836 }, { "epoch": 1.46820458339657, "grad_norm": 0.5751698017120361, "learning_rate": 7.556444264452769e-05, "loss": 1.7317, "step": 4837 }, { "epoch": 1.4685081195932614, "grad_norm": 0.5866538882255554, "learning_rate": 7.55593803786575e-05, "loss": 1.6873, "step": 4838 }, { "epoch": 1.4688116557899529, "grad_norm": 0.5480346083641052, "learning_rate": 7.55543181127873e-05, "loss": 1.8819, "step": 4839 }, { "epoch": 1.4691151919866443, "grad_norm": 0.5058147311210632, "learning_rate": 7.554925584691709e-05, "loss": 1.4353, "step": 4840 }, { "epoch": 1.469418728183336, "grad_norm": 0.5135140419006348, "learning_rate": 7.554419358104688e-05, "loss": 1.5989, "step": 4841 }, { "epoch": 1.4697222643800274, "grad_norm": 0.5217494964599609, "learning_rate": 7.553913131517668e-05, "loss": 1.7296, "step": 4842 }, { "epoch": 1.4700258005767188, "grad_norm": 0.5667610168457031, "learning_rate": 7.553406904930647e-05, "loss": 1.5823, "step": 4843 }, { "epoch": 1.4703293367734103, "grad_norm": 0.9399384260177612, "learning_rate": 7.552900678343627e-05, "loss": 1.3248, "step": 4844 }, { "epoch": 1.4706328729701017, "grad_norm": 0.7394365668296814, "learning_rate": 7.552394451756608e-05, "loss": 1.6822, "step": 4845 }, { "epoch": 1.4709364091667931, "grad_norm": 0.532162070274353, "learning_rate": 7.551888225169587e-05, "loss": 1.4122, "step": 4846 }, { "epoch": 1.4712399453634846, "grad_norm": 0.5215944647789001, "learning_rate": 7.551381998582567e-05, "loss": 1.6496, "step": 4847 }, { "epoch": 1.471543481560176, "grad_norm": 0.6880224943161011, "learning_rate": 7.550875771995546e-05, "loss": 1.4341, "step": 4848 }, { "epoch": 1.4718470177568674, "grad_norm": 0.5888542532920837, "learning_rate": 7.550369545408526e-05, "loss": 1.2608, "step": 4849 }, { "epoch": 1.472150553953559, "grad_norm": 0.5835822224617004, "learning_rate": 7.549863318821505e-05, "loss": 1.7639, "step": 4850 }, { "epoch": 1.4724540901502503, "grad_norm": 0.5503977537155151, "learning_rate": 7.549357092234485e-05, "loss": 1.5515, "step": 4851 }, { "epoch": 1.472757626346942, "grad_norm": 0.548978865146637, "learning_rate": 7.548850865647464e-05, "loss": 1.8194, "step": 4852 }, { "epoch": 1.4730611625436334, "grad_norm": 0.5089396238327026, "learning_rate": 7.548344639060443e-05, "loss": 1.4529, "step": 4853 }, { "epoch": 1.4733646987403248, "grad_norm": 0.5043666362762451, "learning_rate": 7.547838412473423e-05, "loss": 1.2321, "step": 4854 }, { "epoch": 1.4736682349370163, "grad_norm": 0.5701262354850769, "learning_rate": 7.547332185886404e-05, "loss": 1.4486, "step": 4855 }, { "epoch": 1.4739717711337077, "grad_norm": 0.6709611415863037, "learning_rate": 7.546825959299383e-05, "loss": 1.8892, "step": 4856 }, { "epoch": 1.4742753073303991, "grad_norm": 0.6108323931694031, "learning_rate": 7.546319732712363e-05, "loss": 2.0002, "step": 4857 }, { "epoch": 1.4745788435270906, "grad_norm": 0.5647789835929871, "learning_rate": 7.545813506125342e-05, "loss": 1.6422, "step": 4858 }, { "epoch": 1.474882379723782, "grad_norm": 1.221641182899475, "learning_rate": 7.545307279538322e-05, "loss": 1.7342, "step": 4859 }, { "epoch": 1.4751859159204734, "grad_norm": 0.6236290335655212, "learning_rate": 7.544801052951301e-05, "loss": 1.5418, "step": 4860 }, { "epoch": 1.475489452117165, "grad_norm": 0.617218017578125, "learning_rate": 7.54429482636428e-05, "loss": 1.1776, "step": 4861 }, { "epoch": 1.4757929883138563, "grad_norm": 0.559785783290863, "learning_rate": 7.54378859977726e-05, "loss": 1.3574, "step": 4862 }, { "epoch": 1.476096524510548, "grad_norm": 0.5048946738243103, "learning_rate": 7.54328237319024e-05, "loss": 1.3648, "step": 4863 }, { "epoch": 1.4764000607072394, "grad_norm": 0.5944718718528748, "learning_rate": 7.54277614660322e-05, "loss": 1.8113, "step": 4864 }, { "epoch": 1.4767035969039308, "grad_norm": 0.4631864130496979, "learning_rate": 7.5422699200162e-05, "loss": 1.2327, "step": 4865 }, { "epoch": 1.4770071331006223, "grad_norm": 0.5952193737030029, "learning_rate": 7.54176369342918e-05, "loss": 1.5327, "step": 4866 }, { "epoch": 1.4773106692973137, "grad_norm": 0.46949222683906555, "learning_rate": 7.541257466842159e-05, "loss": 1.2225, "step": 4867 }, { "epoch": 1.4776142054940051, "grad_norm": 0.6023997664451599, "learning_rate": 7.540751240255138e-05, "loss": 1.8067, "step": 4868 }, { "epoch": 1.4779177416906966, "grad_norm": 0.5851592421531677, "learning_rate": 7.540245013668118e-05, "loss": 1.0784, "step": 4869 }, { "epoch": 1.478221277887388, "grad_norm": 0.5134581923484802, "learning_rate": 7.539738787081097e-05, "loss": 1.9496, "step": 4870 }, { "epoch": 1.4785248140840794, "grad_norm": 0.7432266473770142, "learning_rate": 7.539232560494077e-05, "loss": 1.6646, "step": 4871 }, { "epoch": 1.478828350280771, "grad_norm": 0.47093117237091064, "learning_rate": 7.538726333907056e-05, "loss": 1.4715, "step": 4872 }, { "epoch": 1.4791318864774623, "grad_norm": 0.5831784605979919, "learning_rate": 7.538220107320037e-05, "loss": 1.4022, "step": 4873 }, { "epoch": 1.479435422674154, "grad_norm": 0.4904673397541046, "learning_rate": 7.537713880733017e-05, "loss": 1.4029, "step": 4874 }, { "epoch": 1.4797389588708454, "grad_norm": 0.7261788249015808, "learning_rate": 7.537207654145996e-05, "loss": 1.5217, "step": 4875 }, { "epoch": 1.4800424950675368, "grad_norm": 0.46538203954696655, "learning_rate": 7.536701427558976e-05, "loss": 1.8431, "step": 4876 }, { "epoch": 1.4803460312642283, "grad_norm": 0.6086418628692627, "learning_rate": 7.536195200971955e-05, "loss": 2.0019, "step": 4877 }, { "epoch": 1.4806495674609197, "grad_norm": 0.5402958393096924, "learning_rate": 7.535688974384935e-05, "loss": 1.6871, "step": 4878 }, { "epoch": 1.4809531036576111, "grad_norm": 0.5592092871665955, "learning_rate": 7.535182747797914e-05, "loss": 1.958, "step": 4879 }, { "epoch": 1.4812566398543026, "grad_norm": 0.6036184430122375, "learning_rate": 7.534676521210893e-05, "loss": 1.6384, "step": 4880 }, { "epoch": 1.4815601760509942, "grad_norm": 0.4480501115322113, "learning_rate": 7.534170294623873e-05, "loss": 1.7879, "step": 4881 }, { "epoch": 1.4818637122476854, "grad_norm": 0.5329246520996094, "learning_rate": 7.533664068036854e-05, "loss": 1.4749, "step": 4882 }, { "epoch": 1.482167248444377, "grad_norm": 0.5111972689628601, "learning_rate": 7.533157841449833e-05, "loss": 2.0178, "step": 4883 }, { "epoch": 1.4824707846410685, "grad_norm": 0.47190091013908386, "learning_rate": 7.532651614862814e-05, "loss": 1.4551, "step": 4884 }, { "epoch": 1.48277432083776, "grad_norm": 0.4564230442047119, "learning_rate": 7.532145388275794e-05, "loss": 1.4727, "step": 4885 }, { "epoch": 1.4830778570344514, "grad_norm": 0.550284743309021, "learning_rate": 7.531639161688773e-05, "loss": 1.7774, "step": 4886 }, { "epoch": 1.4833813932311428, "grad_norm": 0.9376974701881409, "learning_rate": 7.531132935101753e-05, "loss": 1.6029, "step": 4887 }, { "epoch": 1.4836849294278343, "grad_norm": 0.48898518085479736, "learning_rate": 7.530626708514732e-05, "loss": 1.7904, "step": 4888 }, { "epoch": 1.4839884656245257, "grad_norm": 0.5602661967277527, "learning_rate": 7.530120481927712e-05, "loss": 1.6557, "step": 4889 }, { "epoch": 1.4842920018212171, "grad_norm": 0.5517251491546631, "learning_rate": 7.529614255340691e-05, "loss": 1.7286, "step": 4890 }, { "epoch": 1.4845955380179086, "grad_norm": 0.6002934575080872, "learning_rate": 7.52910802875367e-05, "loss": 1.2734, "step": 4891 }, { "epoch": 1.4848990742146002, "grad_norm": 0.502790629863739, "learning_rate": 7.52860180216665e-05, "loss": 1.6792, "step": 4892 }, { "epoch": 1.4852026104112914, "grad_norm": 0.5758108496665955, "learning_rate": 7.52809557557963e-05, "loss": 1.6734, "step": 4893 }, { "epoch": 1.485506146607983, "grad_norm": 0.5325357913970947, "learning_rate": 7.52758934899261e-05, "loss": 1.7765, "step": 4894 }, { "epoch": 1.4858096828046745, "grad_norm": 0.6031767725944519, "learning_rate": 7.52708312240559e-05, "loss": 0.881, "step": 4895 }, { "epoch": 1.486113219001366, "grad_norm": 0.5476002097129822, "learning_rate": 7.526576895818569e-05, "loss": 1.7584, "step": 4896 }, { "epoch": 1.4864167551980574, "grad_norm": 0.6910513043403625, "learning_rate": 7.526070669231549e-05, "loss": 1.9068, "step": 4897 }, { "epoch": 1.4867202913947488, "grad_norm": 0.5156431198120117, "learning_rate": 7.525564442644528e-05, "loss": 1.7052, "step": 4898 }, { "epoch": 1.4870238275914403, "grad_norm": 0.9591718316078186, "learning_rate": 7.525058216057508e-05, "loss": 1.5279, "step": 4899 }, { "epoch": 1.4873273637881317, "grad_norm": 0.4468207061290741, "learning_rate": 7.524551989470487e-05, "loss": 1.7925, "step": 4900 }, { "epoch": 1.4876308999848231, "grad_norm": 0.5419300198554993, "learning_rate": 7.524045762883467e-05, "loss": 1.306, "step": 4901 }, { "epoch": 1.4879344361815146, "grad_norm": 0.47516146302223206, "learning_rate": 7.523539536296446e-05, "loss": 1.7452, "step": 4902 }, { "epoch": 1.4882379723782062, "grad_norm": 0.4809563457965851, "learning_rate": 7.523033309709427e-05, "loss": 1.6979, "step": 4903 }, { "epoch": 1.4885415085748974, "grad_norm": 0.5383793711662292, "learning_rate": 7.522527083122406e-05, "loss": 1.7181, "step": 4904 }, { "epoch": 1.488845044771589, "grad_norm": 0.5974624156951904, "learning_rate": 7.522020856535386e-05, "loss": 1.3829, "step": 4905 }, { "epoch": 1.4891485809682805, "grad_norm": 0.5320436358451843, "learning_rate": 7.521514629948365e-05, "loss": 1.6578, "step": 4906 }, { "epoch": 1.489452117164972, "grad_norm": 0.49754762649536133, "learning_rate": 7.521008403361345e-05, "loss": 1.7021, "step": 4907 }, { "epoch": 1.4897556533616634, "grad_norm": 0.559622585773468, "learning_rate": 7.520502176774324e-05, "loss": 1.7037, "step": 4908 }, { "epoch": 1.4900591895583548, "grad_norm": 0.5277832746505737, "learning_rate": 7.519995950187304e-05, "loss": 0.8649, "step": 4909 }, { "epoch": 1.4903627257550462, "grad_norm": 0.751785159111023, "learning_rate": 7.519489723600283e-05, "loss": 0.7861, "step": 4910 }, { "epoch": 1.4906662619517377, "grad_norm": 0.5476072430610657, "learning_rate": 7.518983497013263e-05, "loss": 1.7936, "step": 4911 }, { "epoch": 1.4909697981484293, "grad_norm": 0.5985202789306641, "learning_rate": 7.518477270426244e-05, "loss": 1.6177, "step": 4912 }, { "epoch": 1.4912733343451205, "grad_norm": 0.5717979073524475, "learning_rate": 7.517971043839223e-05, "loss": 1.7372, "step": 4913 }, { "epoch": 1.4915768705418122, "grad_norm": 0.620720386505127, "learning_rate": 7.517464817252203e-05, "loss": 2.0156, "step": 4914 }, { "epoch": 1.4918804067385036, "grad_norm": 0.5804673433303833, "learning_rate": 7.516958590665182e-05, "loss": 1.521, "step": 4915 }, { "epoch": 1.492183942935195, "grad_norm": 0.7272177338600159, "learning_rate": 7.516452364078162e-05, "loss": 1.5384, "step": 4916 }, { "epoch": 1.4924874791318865, "grad_norm": 0.6218146085739136, "learning_rate": 7.515946137491141e-05, "loss": 1.6263, "step": 4917 }, { "epoch": 1.492791015328578, "grad_norm": 0.6515426635742188, "learning_rate": 7.51543991090412e-05, "loss": 1.1875, "step": 4918 }, { "epoch": 1.4930945515252694, "grad_norm": 0.6261817812919617, "learning_rate": 7.5149336843171e-05, "loss": 1.7554, "step": 4919 }, { "epoch": 1.4933980877219608, "grad_norm": 0.5548028945922852, "learning_rate": 7.51442745773008e-05, "loss": 1.5579, "step": 4920 }, { "epoch": 1.4937016239186522, "grad_norm": 0.4987451136112213, "learning_rate": 7.513921231143059e-05, "loss": 1.7415, "step": 4921 }, { "epoch": 1.4940051601153437, "grad_norm": 0.3654840290546417, "learning_rate": 7.51341500455604e-05, "loss": 0.965, "step": 4922 }, { "epoch": 1.4943086963120353, "grad_norm": 0.6118152141571045, "learning_rate": 7.512908777969019e-05, "loss": 1.4371, "step": 4923 }, { "epoch": 1.4946122325087265, "grad_norm": 0.5052013993263245, "learning_rate": 7.512402551381999e-05, "loss": 1.5476, "step": 4924 }, { "epoch": 1.4949157687054182, "grad_norm": 0.5931279063224792, "learning_rate": 7.511896324794978e-05, "loss": 1.8175, "step": 4925 }, { "epoch": 1.4952193049021096, "grad_norm": 1.926950454711914, "learning_rate": 7.511390098207958e-05, "loss": 1.6452, "step": 4926 }, { "epoch": 1.495522841098801, "grad_norm": 0.5131598711013794, "learning_rate": 7.510883871620939e-05, "loss": 1.7048, "step": 4927 }, { "epoch": 1.4958263772954925, "grad_norm": 0.6365157961845398, "learning_rate": 7.510377645033918e-05, "loss": 1.4151, "step": 4928 }, { "epoch": 1.496129913492184, "grad_norm": 0.48523157835006714, "learning_rate": 7.509871418446897e-05, "loss": 1.9676, "step": 4929 }, { "epoch": 1.4964334496888754, "grad_norm": 0.4741292893886566, "learning_rate": 7.509365191859877e-05, "loss": 1.9867, "step": 4930 }, { "epoch": 1.4967369858855668, "grad_norm": 0.6503263115882874, "learning_rate": 7.508858965272856e-05, "loss": 1.6746, "step": 4931 }, { "epoch": 1.4970405220822582, "grad_norm": 0.5478717684745789, "learning_rate": 7.508352738685836e-05, "loss": 1.711, "step": 4932 }, { "epoch": 1.4973440582789497, "grad_norm": 0.5373908877372742, "learning_rate": 7.507846512098817e-05, "loss": 1.7798, "step": 4933 }, { "epoch": 1.4976475944756413, "grad_norm": 0.559581995010376, "learning_rate": 7.507340285511796e-05, "loss": 1.351, "step": 4934 }, { "epoch": 1.4979511306723325, "grad_norm": 0.5667752027511597, "learning_rate": 7.506834058924776e-05, "loss": 1.3402, "step": 4935 }, { "epoch": 1.4982546668690242, "grad_norm": 0.4814712703227997, "learning_rate": 7.506327832337755e-05, "loss": 1.7732, "step": 4936 }, { "epoch": 1.4985582030657156, "grad_norm": 0.5498847365379333, "learning_rate": 7.505821605750735e-05, "loss": 1.7493, "step": 4937 }, { "epoch": 1.498861739262407, "grad_norm": 0.4908760190010071, "learning_rate": 7.505315379163714e-05, "loss": 1.8062, "step": 4938 }, { "epoch": 1.4991652754590985, "grad_norm": 0.4848566949367523, "learning_rate": 7.504809152576694e-05, "loss": 1.9671, "step": 4939 }, { "epoch": 1.49946881165579, "grad_norm": 0.49739202857017517, "learning_rate": 7.504302925989673e-05, "loss": 1.3136, "step": 4940 }, { "epoch": 1.4997723478524814, "grad_norm": 0.5910068154335022, "learning_rate": 7.503796699402653e-05, "loss": 1.7624, "step": 4941 }, { "epoch": 1.5000758840491728, "grad_norm": 0.4773714542388916, "learning_rate": 7.503290472815633e-05, "loss": 1.6358, "step": 4942 }, { "epoch": 1.5003794202458645, "grad_norm": 0.5713791847229004, "learning_rate": 7.502784246228613e-05, "loss": 1.8797, "step": 4943 }, { "epoch": 1.5006829564425557, "grad_norm": 0.5687196850776672, "learning_rate": 7.502278019641592e-05, "loss": 1.8505, "step": 4944 }, { "epoch": 1.5009864926392473, "grad_norm": 0.5714086294174194, "learning_rate": 7.501771793054572e-05, "loss": 1.0782, "step": 4945 }, { "epoch": 1.5012900288359385, "grad_norm": 0.5151203870773315, "learning_rate": 7.501265566467551e-05, "loss": 1.8271, "step": 4946 }, { "epoch": 1.5015935650326302, "grad_norm": 0.7299343943595886, "learning_rate": 7.500759339880531e-05, "loss": 1.5812, "step": 4947 }, { "epoch": 1.5018971012293216, "grad_norm": 1.2121248245239258, "learning_rate": 7.50025311329351e-05, "loss": 1.4983, "step": 4948 }, { "epoch": 1.502200637426013, "grad_norm": 0.5982694029808044, "learning_rate": 7.49974688670649e-05, "loss": 1.2174, "step": 4949 }, { "epoch": 1.5025041736227045, "grad_norm": 0.5536935329437256, "learning_rate": 7.499240660119469e-05, "loss": 1.496, "step": 4950 }, { "epoch": 1.502807709819396, "grad_norm": 0.579058825969696, "learning_rate": 7.49873443353245e-05, "loss": 1.6991, "step": 4951 }, { "epoch": 1.5031112460160874, "grad_norm": 0.5158204436302185, "learning_rate": 7.49822820694543e-05, "loss": 1.6041, "step": 4952 }, { "epoch": 1.5034147822127788, "grad_norm": 0.5574657917022705, "learning_rate": 7.497721980358409e-05, "loss": 0.996, "step": 4953 }, { "epoch": 1.5037183184094705, "grad_norm": 0.5834664106369019, "learning_rate": 7.497215753771389e-05, "loss": 1.675, "step": 4954 }, { "epoch": 1.5040218546061617, "grad_norm": 0.5058130025863647, "learning_rate": 7.496709527184368e-05, "loss": 1.8119, "step": 4955 }, { "epoch": 1.5043253908028533, "grad_norm": 0.6237099170684814, "learning_rate": 7.496203300597347e-05, "loss": 1.7406, "step": 4956 }, { "epoch": 1.5046289269995445, "grad_norm": 0.579137921333313, "learning_rate": 7.495697074010327e-05, "loss": 1.7724, "step": 4957 }, { "epoch": 1.5049324631962362, "grad_norm": 0.6171398162841797, "learning_rate": 7.495190847423306e-05, "loss": 1.6596, "step": 4958 }, { "epoch": 1.5052359993929276, "grad_norm": 0.5880246162414551, "learning_rate": 7.494684620836286e-05, "loss": 1.5784, "step": 4959 }, { "epoch": 1.505539535589619, "grad_norm": 0.48399999737739563, "learning_rate": 7.494178394249265e-05, "loss": 0.9879, "step": 4960 }, { "epoch": 1.5058430717863105, "grad_norm": 0.6185703277587891, "learning_rate": 7.493672167662246e-05, "loss": 1.5767, "step": 4961 }, { "epoch": 1.506146607983002, "grad_norm": 0.5096151232719421, "learning_rate": 7.493165941075226e-05, "loss": 1.5414, "step": 4962 }, { "epoch": 1.5064501441796936, "grad_norm": 0.5900314450263977, "learning_rate": 7.492659714488205e-05, "loss": 1.9153, "step": 4963 }, { "epoch": 1.5067536803763848, "grad_norm": 0.6663991808891296, "learning_rate": 7.492153487901185e-05, "loss": 1.6301, "step": 4964 }, { "epoch": 1.5070572165730765, "grad_norm": 0.6082780957221985, "learning_rate": 7.491647261314164e-05, "loss": 1.9522, "step": 4965 }, { "epoch": 1.5073607527697677, "grad_norm": 0.5442984700202942, "learning_rate": 7.491141034727144e-05, "loss": 0.9322, "step": 4966 }, { "epoch": 1.5076642889664593, "grad_norm": 0.5286507606506348, "learning_rate": 7.490634808140123e-05, "loss": 1.6876, "step": 4967 }, { "epoch": 1.5079678251631508, "grad_norm": 0.5359693169593811, "learning_rate": 7.490128581553103e-05, "loss": 1.4959, "step": 4968 }, { "epoch": 1.5082713613598422, "grad_norm": 0.5242936611175537, "learning_rate": 7.489622354966082e-05, "loss": 1.9794, "step": 4969 }, { "epoch": 1.5085748975565336, "grad_norm": 0.42484426498413086, "learning_rate": 7.489116128379063e-05, "loss": 1.7186, "step": 4970 }, { "epoch": 1.508878433753225, "grad_norm": 0.8689149618148804, "learning_rate": 7.488609901792042e-05, "loss": 1.6143, "step": 4971 }, { "epoch": 1.5091819699499165, "grad_norm": 0.49162110686302185, "learning_rate": 7.488103675205023e-05, "loss": 1.7344, "step": 4972 }, { "epoch": 1.509485506146608, "grad_norm": 0.541732132434845, "learning_rate": 7.487597448618003e-05, "loss": 1.5149, "step": 4973 }, { "epoch": 1.5097890423432996, "grad_norm": 0.542533278465271, "learning_rate": 7.487091222030982e-05, "loss": 1.8941, "step": 4974 }, { "epoch": 1.5100925785399908, "grad_norm": 0.500696063041687, "learning_rate": 7.486584995443962e-05, "loss": 1.7141, "step": 4975 }, { "epoch": 1.5103961147366824, "grad_norm": 0.5934485197067261, "learning_rate": 7.486078768856941e-05, "loss": 1.4603, "step": 4976 }, { "epoch": 1.5106996509333737, "grad_norm": 1.424613356590271, "learning_rate": 7.48557254226992e-05, "loss": 1.6062, "step": 4977 }, { "epoch": 1.5110031871300653, "grad_norm": 0.48682644963264465, "learning_rate": 7.4850663156829e-05, "loss": 1.7172, "step": 4978 }, { "epoch": 1.5113067233267568, "grad_norm": 0.4934718608856201, "learning_rate": 7.48456008909588e-05, "loss": 1.7463, "step": 4979 }, { "epoch": 1.5116102595234482, "grad_norm": 0.5626336336135864, "learning_rate": 7.484053862508859e-05, "loss": 1.5496, "step": 4980 }, { "epoch": 1.5119137957201396, "grad_norm": 0.4722979962825775, "learning_rate": 7.48354763592184e-05, "loss": 1.6378, "step": 4981 }, { "epoch": 1.512217331916831, "grad_norm": 0.5361006855964661, "learning_rate": 7.48304140933482e-05, "loss": 1.5269, "step": 4982 }, { "epoch": 1.5125208681135225, "grad_norm": 0.5464332103729248, "learning_rate": 7.482535182747799e-05, "loss": 1.8989, "step": 4983 }, { "epoch": 1.512824404310214, "grad_norm": 0.4606230556964874, "learning_rate": 7.482028956160778e-05, "loss": 1.8319, "step": 4984 }, { "epoch": 1.5131279405069056, "grad_norm": 0.5384346842765808, "learning_rate": 7.481522729573758e-05, "loss": 1.8504, "step": 4985 }, { "epoch": 1.5134314767035968, "grad_norm": 0.49810662865638733, "learning_rate": 7.481016502986737e-05, "loss": 1.7573, "step": 4986 }, { "epoch": 1.5137350129002884, "grad_norm": 0.5364110469818115, "learning_rate": 7.480510276399717e-05, "loss": 1.8853, "step": 4987 }, { "epoch": 1.5140385490969797, "grad_norm": 0.5262428522109985, "learning_rate": 7.480004049812696e-05, "loss": 1.9199, "step": 4988 }, { "epoch": 1.5143420852936713, "grad_norm": 0.48900026082992554, "learning_rate": 7.479497823225676e-05, "loss": 1.8181, "step": 4989 }, { "epoch": 1.5146456214903627, "grad_norm": 0.5142418742179871, "learning_rate": 7.478991596638657e-05, "loss": 1.801, "step": 4990 }, { "epoch": 1.5149491576870542, "grad_norm": 0.5188509225845337, "learning_rate": 7.478485370051636e-05, "loss": 1.7511, "step": 4991 }, { "epoch": 1.5152526938837456, "grad_norm": 0.5165709853172302, "learning_rate": 7.477979143464616e-05, "loss": 1.7038, "step": 4992 }, { "epoch": 1.515556230080437, "grad_norm": 0.5722384452819824, "learning_rate": 7.477472916877595e-05, "loss": 1.7524, "step": 4993 }, { "epoch": 1.5158597662771287, "grad_norm": 0.6268503665924072, "learning_rate": 7.476966690290574e-05, "loss": 1.7185, "step": 4994 }, { "epoch": 1.51616330247382, "grad_norm": 0.8802040815353394, "learning_rate": 7.476460463703554e-05, "loss": 1.4555, "step": 4995 }, { "epoch": 1.5164668386705116, "grad_norm": 0.5494096875190735, "learning_rate": 7.475954237116533e-05, "loss": 2.1006, "step": 4996 }, { "epoch": 1.5167703748672028, "grad_norm": 0.4991436004638672, "learning_rate": 7.475448010529513e-05, "loss": 1.905, "step": 4997 }, { "epoch": 1.5170739110638944, "grad_norm": 0.5484010577201843, "learning_rate": 7.474941783942492e-05, "loss": 1.7698, "step": 4998 }, { "epoch": 1.5173774472605859, "grad_norm": 0.5888267755508423, "learning_rate": 7.474435557355472e-05, "loss": 1.591, "step": 4999 }, { "epoch": 1.5176809834572773, "grad_norm": 0.6114574670791626, "learning_rate": 7.473929330768453e-05, "loss": 1.474, "step": 5000 }, { "epoch": 1.5179845196539687, "grad_norm": 0.48462623357772827, "learning_rate": 7.473423104181432e-05, "loss": 1.3337, "step": 5001 }, { "epoch": 1.5182880558506602, "grad_norm": 0.5276066660881042, "learning_rate": 7.472916877594412e-05, "loss": 1.7273, "step": 5002 }, { "epoch": 1.5185915920473516, "grad_norm": 0.520941972732544, "learning_rate": 7.472410651007391e-05, "loss": 1.3722, "step": 5003 }, { "epoch": 1.518895128244043, "grad_norm": 1.0179864168167114, "learning_rate": 7.47190442442037e-05, "loss": 1.4482, "step": 5004 }, { "epoch": 1.5191986644407347, "grad_norm": 0.5575673580169678, "learning_rate": 7.47139819783335e-05, "loss": 1.6712, "step": 5005 }, { "epoch": 1.519502200637426, "grad_norm": 0.6005712747573853, "learning_rate": 7.47089197124633e-05, "loss": 1.3345, "step": 5006 }, { "epoch": 1.5198057368341176, "grad_norm": 0.8138278722763062, "learning_rate": 7.470385744659309e-05, "loss": 1.6787, "step": 5007 }, { "epoch": 1.5201092730308088, "grad_norm": 0.5643385052680969, "learning_rate": 7.469879518072289e-05, "loss": 1.9712, "step": 5008 }, { "epoch": 1.5204128092275004, "grad_norm": 0.6395078301429749, "learning_rate": 7.46937329148527e-05, "loss": 1.2771, "step": 5009 }, { "epoch": 1.5207163454241919, "grad_norm": 0.5270484089851379, "learning_rate": 7.468867064898249e-05, "loss": 1.7923, "step": 5010 }, { "epoch": 1.5210198816208833, "grad_norm": 0.7293537259101868, "learning_rate": 7.468360838311228e-05, "loss": 1.8557, "step": 5011 }, { "epoch": 1.5213234178175747, "grad_norm": 0.4917014539241791, "learning_rate": 7.467854611724208e-05, "loss": 1.8161, "step": 5012 }, { "epoch": 1.5216269540142662, "grad_norm": 0.4320582449436188, "learning_rate": 7.467348385137187e-05, "loss": 1.7172, "step": 5013 }, { "epoch": 1.5219304902109576, "grad_norm": 0.4874110817909241, "learning_rate": 7.466842158550167e-05, "loss": 1.7386, "step": 5014 }, { "epoch": 1.522234026407649, "grad_norm": 0.45594799518585205, "learning_rate": 7.466335931963146e-05, "loss": 1.8101, "step": 5015 }, { "epoch": 1.5225375626043407, "grad_norm": 0.5213348269462585, "learning_rate": 7.465829705376127e-05, "loss": 1.5338, "step": 5016 }, { "epoch": 1.522841098801032, "grad_norm": 0.5447767376899719, "learning_rate": 7.465323478789107e-05, "loss": 1.7583, "step": 5017 }, { "epoch": 1.5231446349977236, "grad_norm": 0.45312610268592834, "learning_rate": 7.464817252202086e-05, "loss": 1.2033, "step": 5018 }, { "epoch": 1.5234481711944148, "grad_norm": 0.8900678157806396, "learning_rate": 7.464311025615066e-05, "loss": 1.3666, "step": 5019 }, { "epoch": 1.5237517073911064, "grad_norm": 0.6067972183227539, "learning_rate": 7.463804799028046e-05, "loss": 1.7647, "step": 5020 }, { "epoch": 1.5240552435877979, "grad_norm": 0.5241597890853882, "learning_rate": 7.463298572441026e-05, "loss": 1.7492, "step": 5021 }, { "epoch": 1.5243587797844893, "grad_norm": 0.5625531673431396, "learning_rate": 7.462792345854005e-05, "loss": 2.0467, "step": 5022 }, { "epoch": 1.5246623159811807, "grad_norm": 0.4781433939933777, "learning_rate": 7.462286119266985e-05, "loss": 1.3643, "step": 5023 }, { "epoch": 1.5249658521778722, "grad_norm": 0.6785547137260437, "learning_rate": 7.461779892679964e-05, "loss": 1.9067, "step": 5024 }, { "epoch": 1.5252693883745638, "grad_norm": 0.5150321125984192, "learning_rate": 7.461273666092944e-05, "loss": 1.8381, "step": 5025 }, { "epoch": 1.525572924571255, "grad_norm": 0.49465686082839966, "learning_rate": 7.460767439505923e-05, "loss": 1.5487, "step": 5026 }, { "epoch": 1.5258764607679467, "grad_norm": 0.6422455906867981, "learning_rate": 7.460261212918903e-05, "loss": 1.8731, "step": 5027 }, { "epoch": 1.526179996964638, "grad_norm": 0.5934743881225586, "learning_rate": 7.459754986331882e-05, "loss": 1.6849, "step": 5028 }, { "epoch": 1.5264835331613296, "grad_norm": 0.4186806380748749, "learning_rate": 7.459248759744863e-05, "loss": 1.8836, "step": 5029 }, { "epoch": 1.526787069358021, "grad_norm": 0.3892343044281006, "learning_rate": 7.458742533157843e-05, "loss": 1.5684, "step": 5030 }, { "epoch": 1.5270906055547124, "grad_norm": 0.5003823041915894, "learning_rate": 7.458236306570822e-05, "loss": 1.2924, "step": 5031 }, { "epoch": 1.5273941417514039, "grad_norm": 0.6574296355247498, "learning_rate": 7.457730079983801e-05, "loss": 1.8827, "step": 5032 }, { "epoch": 1.5276976779480953, "grad_norm": 0.5175241231918335, "learning_rate": 7.457223853396781e-05, "loss": 1.5016, "step": 5033 }, { "epoch": 1.5280012141447867, "grad_norm": 0.5171802043914795, "learning_rate": 7.45671762680976e-05, "loss": 1.7959, "step": 5034 }, { "epoch": 1.5283047503414782, "grad_norm": 0.48254334926605225, "learning_rate": 7.45621140022274e-05, "loss": 1.8864, "step": 5035 }, { "epoch": 1.5286082865381698, "grad_norm": 0.6125036478042603, "learning_rate": 7.45570517363572e-05, "loss": 1.6523, "step": 5036 }, { "epoch": 1.528911822734861, "grad_norm": 0.6008560657501221, "learning_rate": 7.455198947048699e-05, "loss": 1.2628, "step": 5037 }, { "epoch": 1.5292153589315527, "grad_norm": 0.4975941777229309, "learning_rate": 7.454692720461678e-05, "loss": 1.8079, "step": 5038 }, { "epoch": 1.529518895128244, "grad_norm": 0.5716906785964966, "learning_rate": 7.454186493874659e-05, "loss": 1.1925, "step": 5039 }, { "epoch": 1.5298224313249356, "grad_norm": 0.8717072606086731, "learning_rate": 7.453680267287639e-05, "loss": 1.8971, "step": 5040 }, { "epoch": 1.530125967521627, "grad_norm": 0.533805251121521, "learning_rate": 7.453174040700618e-05, "loss": 1.5106, "step": 5041 }, { "epoch": 1.5304295037183184, "grad_norm": 1.7133526802062988, "learning_rate": 7.452667814113598e-05, "loss": 1.8119, "step": 5042 }, { "epoch": 1.5307330399150099, "grad_norm": 0.5104836225509644, "learning_rate": 7.452161587526577e-05, "loss": 1.7062, "step": 5043 }, { "epoch": 1.5310365761117013, "grad_norm": 0.4728488028049469, "learning_rate": 7.451655360939557e-05, "loss": 1.494, "step": 5044 }, { "epoch": 1.5313401123083927, "grad_norm": 0.511103093624115, "learning_rate": 7.451149134352536e-05, "loss": 1.2143, "step": 5045 }, { "epoch": 1.5316436485050842, "grad_norm": 0.5534620881080627, "learning_rate": 7.450642907765516e-05, "loss": 1.6128, "step": 5046 }, { "epoch": 1.5319471847017758, "grad_norm": 0.5195817351341248, "learning_rate": 7.450136681178495e-05, "loss": 1.7958, "step": 5047 }, { "epoch": 1.532250720898467, "grad_norm": 0.6067714095115662, "learning_rate": 7.449630454591476e-05, "loss": 1.8131, "step": 5048 }, { "epoch": 1.5325542570951587, "grad_norm": 0.62360018491745, "learning_rate": 7.449124228004455e-05, "loss": 2.1314, "step": 5049 }, { "epoch": 1.53285779329185, "grad_norm": 0.5383068323135376, "learning_rate": 7.448618001417435e-05, "loss": 1.8666, "step": 5050 }, { "epoch": 1.5331613294885416, "grad_norm": 0.5535653829574585, "learning_rate": 7.448111774830414e-05, "loss": 1.5019, "step": 5051 }, { "epoch": 1.533464865685233, "grad_norm": 0.5406197309494019, "learning_rate": 7.447605548243394e-05, "loss": 1.6894, "step": 5052 }, { "epoch": 1.5337684018819244, "grad_norm": 0.5757835507392883, "learning_rate": 7.447099321656373e-05, "loss": 1.1037, "step": 5053 }, { "epoch": 1.5340719380786159, "grad_norm": 0.6058068871498108, "learning_rate": 7.446593095069353e-05, "loss": 1.4349, "step": 5054 }, { "epoch": 1.5343754742753073, "grad_norm": 0.5739513039588928, "learning_rate": 7.446086868482332e-05, "loss": 1.8045, "step": 5055 }, { "epoch": 1.534679010471999, "grad_norm": 0.5705426931381226, "learning_rate": 7.445580641895312e-05, "loss": 1.8432, "step": 5056 }, { "epoch": 1.5349825466686902, "grad_norm": 0.46640723943710327, "learning_rate": 7.445074415308293e-05, "loss": 1.7681, "step": 5057 }, { "epoch": 1.5352860828653818, "grad_norm": 0.5512862801551819, "learning_rate": 7.444568188721272e-05, "loss": 1.8849, "step": 5058 }, { "epoch": 1.535589619062073, "grad_norm": 0.5038082003593445, "learning_rate": 7.444061962134251e-05, "loss": 1.3138, "step": 5059 }, { "epoch": 1.5358931552587647, "grad_norm": 0.5421281456947327, "learning_rate": 7.443555735547232e-05, "loss": 1.4232, "step": 5060 }, { "epoch": 1.536196691455456, "grad_norm": 0.5326946973800659, "learning_rate": 7.443049508960212e-05, "loss": 1.7202, "step": 5061 }, { "epoch": 1.5365002276521476, "grad_norm": 0.4917674660682678, "learning_rate": 7.442543282373191e-05, "loss": 1.8367, "step": 5062 }, { "epoch": 1.536803763848839, "grad_norm": 0.49285170435905457, "learning_rate": 7.442037055786171e-05, "loss": 1.6482, "step": 5063 }, { "epoch": 1.5371073000455304, "grad_norm": 0.5591158270835876, "learning_rate": 7.44153082919915e-05, "loss": 1.943, "step": 5064 }, { "epoch": 1.5374108362422219, "grad_norm": 0.531862199306488, "learning_rate": 7.44102460261213e-05, "loss": 1.7295, "step": 5065 }, { "epoch": 1.5377143724389133, "grad_norm": 0.7069116234779358, "learning_rate": 7.440518376025109e-05, "loss": 1.5248, "step": 5066 }, { "epoch": 1.538017908635605, "grad_norm": 0.5532852411270142, "learning_rate": 7.440012149438089e-05, "loss": 1.5866, "step": 5067 }, { "epoch": 1.5383214448322962, "grad_norm": 0.4988435208797455, "learning_rate": 7.43950592285107e-05, "loss": 1.711, "step": 5068 }, { "epoch": 1.5386249810289878, "grad_norm": 0.490222305059433, "learning_rate": 7.438999696264049e-05, "loss": 1.8174, "step": 5069 }, { "epoch": 1.538928517225679, "grad_norm": 0.5370107889175415, "learning_rate": 7.438493469677028e-05, "loss": 1.8036, "step": 5070 }, { "epoch": 1.5392320534223707, "grad_norm": 0.5577163100242615, "learning_rate": 7.437987243090008e-05, "loss": 1.4909, "step": 5071 }, { "epoch": 1.5395355896190621, "grad_norm": 0.49091118574142456, "learning_rate": 7.437481016502987e-05, "loss": 1.1968, "step": 5072 }, { "epoch": 1.5398391258157536, "grad_norm": 0.5760853290557861, "learning_rate": 7.436974789915967e-05, "loss": 1.7479, "step": 5073 }, { "epoch": 1.540142662012445, "grad_norm": 0.4731634855270386, "learning_rate": 7.436468563328946e-05, "loss": 1.7511, "step": 5074 }, { "epoch": 1.5404461982091364, "grad_norm": 0.5668302774429321, "learning_rate": 7.435962336741926e-05, "loss": 1.5773, "step": 5075 }, { "epoch": 1.5407497344058279, "grad_norm": 0.477153480052948, "learning_rate": 7.435456110154905e-05, "loss": 1.783, "step": 5076 }, { "epoch": 1.5410532706025193, "grad_norm": 0.532961368560791, "learning_rate": 7.434949883567885e-05, "loss": 1.815, "step": 5077 }, { "epoch": 1.541356806799211, "grad_norm": 0.584200382232666, "learning_rate": 7.434443656980866e-05, "loss": 1.5517, "step": 5078 }, { "epoch": 1.5416603429959022, "grad_norm": 0.6001595258712769, "learning_rate": 7.433937430393845e-05, "loss": 1.5844, "step": 5079 }, { "epoch": 1.5419638791925938, "grad_norm": 0.5196036100387573, "learning_rate": 7.433431203806825e-05, "loss": 1.5434, "step": 5080 }, { "epoch": 1.542267415389285, "grad_norm": 0.5187863111495972, "learning_rate": 7.432924977219804e-05, "loss": 1.5561, "step": 5081 }, { "epoch": 1.5425709515859767, "grad_norm": 0.6012201309204102, "learning_rate": 7.432418750632784e-05, "loss": 1.31, "step": 5082 }, { "epoch": 1.5428744877826681, "grad_norm": 0.5970558524131775, "learning_rate": 7.431912524045763e-05, "loss": 1.8189, "step": 5083 }, { "epoch": 1.5431780239793595, "grad_norm": 0.5802868008613586, "learning_rate": 7.431406297458743e-05, "loss": 1.6781, "step": 5084 }, { "epoch": 1.543481560176051, "grad_norm": 0.48958060145378113, "learning_rate": 7.430900070871722e-05, "loss": 1.2931, "step": 5085 }, { "epoch": 1.5437850963727424, "grad_norm": 0.47764867544174194, "learning_rate": 7.430393844284701e-05, "loss": 1.8627, "step": 5086 }, { "epoch": 1.5440886325694338, "grad_norm": 0.6315385103225708, "learning_rate": 7.429887617697682e-05, "loss": 1.1492, "step": 5087 }, { "epoch": 1.5443921687661253, "grad_norm": 0.6129793524742126, "learning_rate": 7.429381391110662e-05, "loss": 1.6304, "step": 5088 }, { "epoch": 1.544695704962817, "grad_norm": 1.4443278312683105, "learning_rate": 7.428875164523641e-05, "loss": 1.9673, "step": 5089 }, { "epoch": 1.5449992411595082, "grad_norm": 0.596580445766449, "learning_rate": 7.428368937936621e-05, "loss": 1.7126, "step": 5090 }, { "epoch": 1.5453027773561998, "grad_norm": 0.5361682772636414, "learning_rate": 7.4278627113496e-05, "loss": 1.8962, "step": 5091 }, { "epoch": 1.545606313552891, "grad_norm": 0.5299656987190247, "learning_rate": 7.42735648476258e-05, "loss": 1.7639, "step": 5092 }, { "epoch": 1.5459098497495827, "grad_norm": 0.4721045196056366, "learning_rate": 7.426850258175559e-05, "loss": 1.7856, "step": 5093 }, { "epoch": 1.546213385946274, "grad_norm": 0.5393961668014526, "learning_rate": 7.426344031588539e-05, "loss": 1.7939, "step": 5094 }, { "epoch": 1.5465169221429655, "grad_norm": 0.48364582657814026, "learning_rate": 7.425837805001518e-05, "loss": 1.7527, "step": 5095 }, { "epoch": 1.546820458339657, "grad_norm": 0.5363742113113403, "learning_rate": 7.425331578414499e-05, "loss": 1.445, "step": 5096 }, { "epoch": 1.5471239945363484, "grad_norm": 0.676005482673645, "learning_rate": 7.424825351827478e-05, "loss": 1.8364, "step": 5097 }, { "epoch": 1.54742753073304, "grad_norm": 0.5144119262695312, "learning_rate": 7.424319125240458e-05, "loss": 1.2264, "step": 5098 }, { "epoch": 1.5477310669297313, "grad_norm": 0.6479708552360535, "learning_rate": 7.423812898653437e-05, "loss": 1.4466, "step": 5099 }, { "epoch": 1.548034603126423, "grad_norm": 0.6273570656776428, "learning_rate": 7.423306672066417e-05, "loss": 1.7581, "step": 5100 }, { "epoch": 1.5483381393231141, "grad_norm": 0.4922904372215271, "learning_rate": 7.422800445479396e-05, "loss": 1.6592, "step": 5101 }, { "epoch": 1.5486416755198058, "grad_norm": 0.5469740033149719, "learning_rate": 7.422294218892376e-05, "loss": 1.8006, "step": 5102 }, { "epoch": 1.5489452117164972, "grad_norm": 0.5148813128471375, "learning_rate": 7.421787992305355e-05, "loss": 1.4116, "step": 5103 }, { "epoch": 1.5492487479131887, "grad_norm": 0.5936328768730164, "learning_rate": 7.421281765718335e-05, "loss": 1.7438, "step": 5104 }, { "epoch": 1.54955228410988, "grad_norm": 0.6445397138595581, "learning_rate": 7.420775539131316e-05, "loss": 1.5208, "step": 5105 }, { "epoch": 1.5498558203065715, "grad_norm": 0.431699275970459, "learning_rate": 7.420269312544295e-05, "loss": 0.9626, "step": 5106 }, { "epoch": 1.550159356503263, "grad_norm": 0.5394765734672546, "learning_rate": 7.419763085957276e-05, "loss": 1.8403, "step": 5107 }, { "epoch": 1.5504628926999544, "grad_norm": 0.5958315134048462, "learning_rate": 7.419256859370255e-05, "loss": 1.7594, "step": 5108 }, { "epoch": 1.550766428896646, "grad_norm": 0.5397548675537109, "learning_rate": 7.418750632783235e-05, "loss": 1.4339, "step": 5109 }, { "epoch": 1.5510699650933373, "grad_norm": 0.6198435425758362, "learning_rate": 7.418244406196214e-05, "loss": 1.5588, "step": 5110 }, { "epoch": 1.551373501290029, "grad_norm": 0.5071767568588257, "learning_rate": 7.417738179609194e-05, "loss": 1.7752, "step": 5111 }, { "epoch": 1.5516770374867201, "grad_norm": 0.5771663784980774, "learning_rate": 7.417231953022173e-05, "loss": 1.8022, "step": 5112 }, { "epoch": 1.5519805736834118, "grad_norm": 0.4798254668712616, "learning_rate": 7.416725726435153e-05, "loss": 1.6264, "step": 5113 }, { "epoch": 1.5522841098801032, "grad_norm": 0.5879905819892883, "learning_rate": 7.416219499848132e-05, "loss": 1.7573, "step": 5114 }, { "epoch": 1.5525876460767947, "grad_norm": 0.5705510973930359, "learning_rate": 7.415713273261112e-05, "loss": 1.7869, "step": 5115 }, { "epoch": 1.552891182273486, "grad_norm": 0.5407357215881348, "learning_rate": 7.415207046674091e-05, "loss": 1.7814, "step": 5116 }, { "epoch": 1.5531947184701775, "grad_norm": 0.5827559232711792, "learning_rate": 7.414700820087072e-05, "loss": 1.6038, "step": 5117 }, { "epoch": 1.553498254666869, "grad_norm": 0.49504661560058594, "learning_rate": 7.414194593500052e-05, "loss": 1.3291, "step": 5118 }, { "epoch": 1.5538017908635604, "grad_norm": 0.6688182353973389, "learning_rate": 7.413688366913031e-05, "loss": 1.8172, "step": 5119 }, { "epoch": 1.554105327060252, "grad_norm": 0.5043573379516602, "learning_rate": 7.41318214032601e-05, "loss": 1.7996, "step": 5120 }, { "epoch": 1.5544088632569433, "grad_norm": 0.508563220500946, "learning_rate": 7.41267591373899e-05, "loss": 1.6502, "step": 5121 }, { "epoch": 1.554712399453635, "grad_norm": 0.5409261584281921, "learning_rate": 7.41216968715197e-05, "loss": 1.8077, "step": 5122 }, { "epoch": 1.5550159356503261, "grad_norm": 0.6074355244636536, "learning_rate": 7.411663460564949e-05, "loss": 2.22, "step": 5123 }, { "epoch": 1.5553194718470178, "grad_norm": 0.5736792087554932, "learning_rate": 7.411157233977928e-05, "loss": 1.5849, "step": 5124 }, { "epoch": 1.5556230080437092, "grad_norm": 0.5450262427330017, "learning_rate": 7.410651007390908e-05, "loss": 1.3948, "step": 5125 }, { "epoch": 1.5559265442404007, "grad_norm": 0.3917832672595978, "learning_rate": 7.410144780803889e-05, "loss": 1.2642, "step": 5126 }, { "epoch": 1.556230080437092, "grad_norm": 0.6708034873008728, "learning_rate": 7.409638554216868e-05, "loss": 1.4424, "step": 5127 }, { "epoch": 1.5565336166337835, "grad_norm": 0.442088782787323, "learning_rate": 7.409132327629848e-05, "loss": 1.663, "step": 5128 }, { "epoch": 1.5568371528304752, "grad_norm": 0.5082998871803284, "learning_rate": 7.408626101042827e-05, "loss": 1.5504, "step": 5129 }, { "epoch": 1.5571406890271664, "grad_norm": 0.5985785722732544, "learning_rate": 7.408119874455807e-05, "loss": 1.5082, "step": 5130 }, { "epoch": 1.557444225223858, "grad_norm": 0.5707488656044006, "learning_rate": 7.407613647868786e-05, "loss": 1.7492, "step": 5131 }, { "epoch": 1.5577477614205493, "grad_norm": 0.5827080011367798, "learning_rate": 7.407107421281766e-05, "loss": 1.5818, "step": 5132 }, { "epoch": 1.558051297617241, "grad_norm": 0.5113789439201355, "learning_rate": 7.406601194694745e-05, "loss": 1.7846, "step": 5133 }, { "epoch": 1.5583548338139324, "grad_norm": 1.9733306169509888, "learning_rate": 7.406094968107725e-05, "loss": 1.667, "step": 5134 }, { "epoch": 1.5586583700106238, "grad_norm": 0.5422725081443787, "learning_rate": 7.405588741520705e-05, "loss": 1.5454, "step": 5135 }, { "epoch": 1.5589619062073152, "grad_norm": 0.5780110955238342, "learning_rate": 7.405082514933685e-05, "loss": 1.8285, "step": 5136 }, { "epoch": 1.5592654424040067, "grad_norm": 0.5252494215965271, "learning_rate": 7.404576288346664e-05, "loss": 2.0586, "step": 5137 }, { "epoch": 1.559568978600698, "grad_norm": 0.560908854007721, "learning_rate": 7.404070061759644e-05, "loss": 1.8195, "step": 5138 }, { "epoch": 1.5598725147973895, "grad_norm": 0.46185895800590515, "learning_rate": 7.403563835172623e-05, "loss": 1.244, "step": 5139 }, { "epoch": 1.5601760509940812, "grad_norm": 0.5307225584983826, "learning_rate": 7.403057608585603e-05, "loss": 1.9262, "step": 5140 }, { "epoch": 1.5604795871907724, "grad_norm": 0.49698513746261597, "learning_rate": 7.402551381998582e-05, "loss": 1.7835, "step": 5141 }, { "epoch": 1.560783123387464, "grad_norm": 0.5683524012565613, "learning_rate": 7.402045155411562e-05, "loss": 1.7943, "step": 5142 }, { "epoch": 1.5610866595841553, "grad_norm": 1.0103782415390015, "learning_rate": 7.401538928824541e-05, "loss": 1.7038, "step": 5143 }, { "epoch": 1.561390195780847, "grad_norm": 0.5294140577316284, "learning_rate": 7.401032702237521e-05, "loss": 1.5177, "step": 5144 }, { "epoch": 1.5616937319775384, "grad_norm": 0.5453392267227173, "learning_rate": 7.400526475650502e-05, "loss": 1.6897, "step": 5145 }, { "epoch": 1.5619972681742298, "grad_norm": 0.5333344340324402, "learning_rate": 7.400020249063481e-05, "loss": 1.8362, "step": 5146 }, { "epoch": 1.5623008043709212, "grad_norm": 0.5554139614105225, "learning_rate": 7.39951402247646e-05, "loss": 1.7446, "step": 5147 }, { "epoch": 1.5626043405676127, "grad_norm": 0.5534631013870239, "learning_rate": 7.39900779588944e-05, "loss": 1.8738, "step": 5148 }, { "epoch": 1.562907876764304, "grad_norm": 0.5668753981590271, "learning_rate": 7.398501569302421e-05, "loss": 1.7437, "step": 5149 }, { "epoch": 1.5632114129609955, "grad_norm": 0.5681511163711548, "learning_rate": 7.3979953427154e-05, "loss": 1.6496, "step": 5150 }, { "epoch": 1.5635149491576872, "grad_norm": 0.6148278713226318, "learning_rate": 7.39748911612838e-05, "loss": 1.6409, "step": 5151 }, { "epoch": 1.5638184853543784, "grad_norm": 0.529887318611145, "learning_rate": 7.39698288954136e-05, "loss": 1.7175, "step": 5152 }, { "epoch": 1.56412202155107, "grad_norm": 0.541483461856842, "learning_rate": 7.396476662954339e-05, "loss": 2.0849, "step": 5153 }, { "epoch": 1.5644255577477613, "grad_norm": 0.4464404582977295, "learning_rate": 7.395970436367318e-05, "loss": 1.376, "step": 5154 }, { "epoch": 1.564729093944453, "grad_norm": 0.8220679759979248, "learning_rate": 7.395464209780298e-05, "loss": 1.1533, "step": 5155 }, { "epoch": 1.5650326301411444, "grad_norm": 0.5027211308479309, "learning_rate": 7.394957983193279e-05, "loss": 1.648, "step": 5156 }, { "epoch": 1.5653361663378358, "grad_norm": 0.5189235210418701, "learning_rate": 7.394451756606258e-05, "loss": 1.4195, "step": 5157 }, { "epoch": 1.5656397025345272, "grad_norm": 0.5146520137786865, "learning_rate": 7.393945530019238e-05, "loss": 1.2644, "step": 5158 }, { "epoch": 1.5659432387312187, "grad_norm": 0.6035975217819214, "learning_rate": 7.393439303432217e-05, "loss": 1.6141, "step": 5159 }, { "epoch": 1.5662467749279103, "grad_norm": 0.5004559755325317, "learning_rate": 7.392933076845197e-05, "loss": 1.7301, "step": 5160 }, { "epoch": 1.5665503111246015, "grad_norm": 0.5110512375831604, "learning_rate": 7.392426850258176e-05, "loss": 1.7182, "step": 5161 }, { "epoch": 1.5668538473212932, "grad_norm": 0.6292902827262878, "learning_rate": 7.391920623671155e-05, "loss": 1.3006, "step": 5162 }, { "epoch": 1.5671573835179844, "grad_norm": 0.6202772259712219, "learning_rate": 7.391414397084135e-05, "loss": 1.8302, "step": 5163 }, { "epoch": 1.567460919714676, "grad_norm": 0.5574930310249329, "learning_rate": 7.390908170497114e-05, "loss": 1.587, "step": 5164 }, { "epoch": 1.5677644559113675, "grad_norm": 0.6311307549476624, "learning_rate": 7.390401943910095e-05, "loss": 1.8448, "step": 5165 }, { "epoch": 1.568067992108059, "grad_norm": 0.5117753148078918, "learning_rate": 7.389895717323075e-05, "loss": 1.7946, "step": 5166 }, { "epoch": 1.5683715283047504, "grad_norm": 1.1032054424285889, "learning_rate": 7.389389490736054e-05, "loss": 1.3926, "step": 5167 }, { "epoch": 1.5686750645014418, "grad_norm": 0.5589320659637451, "learning_rate": 7.388883264149034e-05, "loss": 1.8236, "step": 5168 }, { "epoch": 1.5689786006981332, "grad_norm": 0.5848040580749512, "learning_rate": 7.388377037562013e-05, "loss": 1.6766, "step": 5169 }, { "epoch": 1.5692821368948247, "grad_norm": 0.45275813341140747, "learning_rate": 7.387870810974993e-05, "loss": 1.7592, "step": 5170 }, { "epoch": 1.5695856730915163, "grad_norm": 0.5285073518753052, "learning_rate": 7.387364584387972e-05, "loss": 1.7066, "step": 5171 }, { "epoch": 1.5698892092882075, "grad_norm": 0.46561524271965027, "learning_rate": 7.386858357800952e-05, "loss": 1.7283, "step": 5172 }, { "epoch": 1.5701927454848992, "grad_norm": 0.570890486240387, "learning_rate": 7.386352131213931e-05, "loss": 1.7499, "step": 5173 }, { "epoch": 1.5704962816815904, "grad_norm": 0.6721476316452026, "learning_rate": 7.385845904626912e-05, "loss": 1.4815, "step": 5174 }, { "epoch": 1.570799817878282, "grad_norm": 0.4744941294193268, "learning_rate": 7.385339678039891e-05, "loss": 1.7145, "step": 5175 }, { "epoch": 1.5711033540749735, "grad_norm": 0.560144305229187, "learning_rate": 7.384833451452871e-05, "loss": 1.9931, "step": 5176 }, { "epoch": 1.571406890271665, "grad_norm": 0.5556525588035583, "learning_rate": 7.38432722486585e-05, "loss": 1.7716, "step": 5177 }, { "epoch": 1.5717104264683563, "grad_norm": 0.5719964504241943, "learning_rate": 7.38382099827883e-05, "loss": 2.0032, "step": 5178 }, { "epoch": 1.5720139626650478, "grad_norm": 0.4774825870990753, "learning_rate": 7.38331477169181e-05, "loss": 1.4463, "step": 5179 }, { "epoch": 1.5723174988617392, "grad_norm": 0.5442168712615967, "learning_rate": 7.382808545104789e-05, "loss": 1.6563, "step": 5180 }, { "epoch": 1.5726210350584306, "grad_norm": 0.5183798670768738, "learning_rate": 7.382302318517768e-05, "loss": 1.2414, "step": 5181 }, { "epoch": 1.5729245712551223, "grad_norm": 0.574775755405426, "learning_rate": 7.381796091930748e-05, "loss": 1.7268, "step": 5182 }, { "epoch": 1.5732281074518135, "grad_norm": 0.47192198038101196, "learning_rate": 7.381289865343727e-05, "loss": 1.2211, "step": 5183 }, { "epoch": 1.5735316436485052, "grad_norm": 0.5779873132705688, "learning_rate": 7.380783638756708e-05, "loss": 1.5045, "step": 5184 }, { "epoch": 1.5738351798451964, "grad_norm": 0.5421679019927979, "learning_rate": 7.380277412169688e-05, "loss": 1.6357, "step": 5185 }, { "epoch": 1.574138716041888, "grad_norm": 0.8882037401199341, "learning_rate": 7.379771185582667e-05, "loss": 1.7545, "step": 5186 }, { "epoch": 1.5744422522385795, "grad_norm": 0.5748177170753479, "learning_rate": 7.379264958995647e-05, "loss": 1.5829, "step": 5187 }, { "epoch": 1.574745788435271, "grad_norm": 0.5343853831291199, "learning_rate": 7.378758732408626e-05, "loss": 1.7749, "step": 5188 }, { "epoch": 1.5750493246319623, "grad_norm": 0.4772493243217468, "learning_rate": 7.378252505821605e-05, "loss": 1.6533, "step": 5189 }, { "epoch": 1.5753528608286538, "grad_norm": 0.5794268250465393, "learning_rate": 7.377746279234585e-05, "loss": 1.6251, "step": 5190 }, { "epoch": 1.5756563970253454, "grad_norm": 0.6607987880706787, "learning_rate": 7.377240052647564e-05, "loss": 1.3856, "step": 5191 }, { "epoch": 1.5759599332220366, "grad_norm": 0.5267585515975952, "learning_rate": 7.376733826060544e-05, "loss": 1.7208, "step": 5192 }, { "epoch": 1.5762634694187283, "grad_norm": 0.7312984466552734, "learning_rate": 7.376227599473525e-05, "loss": 1.8771, "step": 5193 }, { "epoch": 1.5765670056154195, "grad_norm": 0.533445417881012, "learning_rate": 7.375721372886504e-05, "loss": 1.7044, "step": 5194 }, { "epoch": 1.5768705418121112, "grad_norm": 0.5625982880592346, "learning_rate": 7.375215146299485e-05, "loss": 1.7064, "step": 5195 }, { "epoch": 1.5771740780088026, "grad_norm": 0.5268564820289612, "learning_rate": 7.374708919712465e-05, "loss": 1.7857, "step": 5196 }, { "epoch": 1.577477614205494, "grad_norm": 0.6782357096672058, "learning_rate": 7.374202693125444e-05, "loss": 1.7049, "step": 5197 }, { "epoch": 1.5777811504021855, "grad_norm": 0.5990774035453796, "learning_rate": 7.373696466538424e-05, "loss": 1.806, "step": 5198 }, { "epoch": 1.578084686598877, "grad_norm": 0.6708577275276184, "learning_rate": 7.373190239951403e-05, "loss": 1.4618, "step": 5199 }, { "epoch": 1.5783882227955683, "grad_norm": 0.547925591468811, "learning_rate": 7.372684013364382e-05, "loss": 1.6713, "step": 5200 }, { "epoch": 1.5786917589922598, "grad_norm": 0.5250493288040161, "learning_rate": 7.372177786777362e-05, "loss": 1.897, "step": 5201 }, { "epoch": 1.5789952951889514, "grad_norm": 0.5222801566123962, "learning_rate": 7.371671560190341e-05, "loss": 1.8989, "step": 5202 }, { "epoch": 1.5792988313856426, "grad_norm": 0.581599771976471, "learning_rate": 7.371165333603321e-05, "loss": 1.3431, "step": 5203 }, { "epoch": 1.5796023675823343, "grad_norm": 0.5837922692298889, "learning_rate": 7.370659107016302e-05, "loss": 1.2763, "step": 5204 }, { "epoch": 1.5799059037790255, "grad_norm": 0.5853214859962463, "learning_rate": 7.370152880429281e-05, "loss": 1.5066, "step": 5205 }, { "epoch": 1.5802094399757172, "grad_norm": 0.5608053803443909, "learning_rate": 7.369646653842261e-05, "loss": 1.8226, "step": 5206 }, { "epoch": 1.5805129761724086, "grad_norm": 0.5073212385177612, "learning_rate": 7.36914042725524e-05, "loss": 1.1189, "step": 5207 }, { "epoch": 1.5808165123691, "grad_norm": 0.5363342761993408, "learning_rate": 7.36863420066822e-05, "loss": 1.5296, "step": 5208 }, { "epoch": 1.5811200485657915, "grad_norm": 0.4550332725048065, "learning_rate": 7.368127974081199e-05, "loss": 1.8264, "step": 5209 }, { "epoch": 1.581423584762483, "grad_norm": 0.5468671917915344, "learning_rate": 7.367621747494179e-05, "loss": 1.7837, "step": 5210 }, { "epoch": 1.5817271209591743, "grad_norm": 0.5812280774116516, "learning_rate": 7.367115520907158e-05, "loss": 1.6072, "step": 5211 }, { "epoch": 1.5820306571558658, "grad_norm": 0.6074302196502686, "learning_rate": 7.366609294320138e-05, "loss": 1.3217, "step": 5212 }, { "epoch": 1.5823341933525574, "grad_norm": 0.8310289978981018, "learning_rate": 7.366103067733118e-05, "loss": 1.6822, "step": 5213 }, { "epoch": 1.5826377295492486, "grad_norm": 0.5651637315750122, "learning_rate": 7.365596841146098e-05, "loss": 1.9405, "step": 5214 }, { "epoch": 1.5829412657459403, "grad_norm": 0.5646255612373352, "learning_rate": 7.365090614559077e-05, "loss": 1.7228, "step": 5215 }, { "epoch": 1.5832448019426315, "grad_norm": 0.621033787727356, "learning_rate": 7.364584387972057e-05, "loss": 1.2852, "step": 5216 }, { "epoch": 1.5835483381393232, "grad_norm": 0.5037727952003479, "learning_rate": 7.364078161385036e-05, "loss": 1.3287, "step": 5217 }, { "epoch": 1.5838518743360146, "grad_norm": 0.5539587736129761, "learning_rate": 7.363571934798016e-05, "loss": 1.2148, "step": 5218 }, { "epoch": 1.584155410532706, "grad_norm": 0.48859071731567383, "learning_rate": 7.363065708210995e-05, "loss": 1.8287, "step": 5219 }, { "epoch": 1.5844589467293975, "grad_norm": 0.964847981929779, "learning_rate": 7.362559481623975e-05, "loss": 1.7749, "step": 5220 }, { "epoch": 1.584762482926089, "grad_norm": 0.5616121292114258, "learning_rate": 7.362053255036954e-05, "loss": 1.6908, "step": 5221 }, { "epoch": 1.5850660191227806, "grad_norm": 0.5466740727424622, "learning_rate": 7.361547028449934e-05, "loss": 1.4226, "step": 5222 }, { "epoch": 1.5853695553194718, "grad_norm": 0.4894787073135376, "learning_rate": 7.361040801862915e-05, "loss": 1.3254, "step": 5223 }, { "epoch": 1.5856730915161634, "grad_norm": 0.4261757433414459, "learning_rate": 7.360534575275894e-05, "loss": 0.9082, "step": 5224 }, { "epoch": 1.5859766277128546, "grad_norm": 0.5084283947944641, "learning_rate": 7.360028348688874e-05, "loss": 1.2136, "step": 5225 }, { "epoch": 1.5862801639095463, "grad_norm": 0.41630545258522034, "learning_rate": 7.359522122101853e-05, "loss": 1.8932, "step": 5226 }, { "epoch": 1.5865837001062375, "grad_norm": 0.5659027695655823, "learning_rate": 7.359015895514832e-05, "loss": 1.36, "step": 5227 }, { "epoch": 1.5868872363029292, "grad_norm": 0.5066294074058533, "learning_rate": 7.358509668927812e-05, "loss": 1.6067, "step": 5228 }, { "epoch": 1.5871907724996206, "grad_norm": 0.44448360800743103, "learning_rate": 7.358003442340791e-05, "loss": 1.1769, "step": 5229 }, { "epoch": 1.587494308696312, "grad_norm": 0.5426762104034424, "learning_rate": 7.357497215753771e-05, "loss": 1.6513, "step": 5230 }, { "epoch": 1.5877978448930035, "grad_norm": 0.5724853277206421, "learning_rate": 7.35699098916675e-05, "loss": 1.6179, "step": 5231 }, { "epoch": 1.588101381089695, "grad_norm": 0.5723243951797485, "learning_rate": 7.356484762579731e-05, "loss": 2.1125, "step": 5232 }, { "epoch": 1.5884049172863866, "grad_norm": 0.6052478551864624, "learning_rate": 7.355978535992711e-05, "loss": 1.6911, "step": 5233 }, { "epoch": 1.5887084534830778, "grad_norm": 0.5020635724067688, "learning_rate": 7.35547230940569e-05, "loss": 1.8538, "step": 5234 }, { "epoch": 1.5890119896797694, "grad_norm": 0.5227568745613098, "learning_rate": 7.35496608281867e-05, "loss": 1.906, "step": 5235 }, { "epoch": 1.5893155258764606, "grad_norm": 0.549309492111206, "learning_rate": 7.354459856231649e-05, "loss": 1.363, "step": 5236 }, { "epoch": 1.5896190620731523, "grad_norm": 0.4708143472671509, "learning_rate": 7.353953629644629e-05, "loss": 2.0159, "step": 5237 }, { "epoch": 1.5899225982698437, "grad_norm": 0.5588541030883789, "learning_rate": 7.35344740305761e-05, "loss": 1.6558, "step": 5238 }, { "epoch": 1.5902261344665352, "grad_norm": 0.4781373143196106, "learning_rate": 7.352941176470589e-05, "loss": 1.7517, "step": 5239 }, { "epoch": 1.5905296706632266, "grad_norm": 0.5474836230278015, "learning_rate": 7.352434949883568e-05, "loss": 1.4177, "step": 5240 }, { "epoch": 1.590833206859918, "grad_norm": 0.6164267659187317, "learning_rate": 7.351928723296548e-05, "loss": 1.5234, "step": 5241 }, { "epoch": 1.5911367430566095, "grad_norm": 0.5702620148658752, "learning_rate": 7.351422496709527e-05, "loss": 1.5461, "step": 5242 }, { "epoch": 1.591440279253301, "grad_norm": 0.609255313873291, "learning_rate": 7.350916270122508e-05, "loss": 1.9137, "step": 5243 }, { "epoch": 1.5917438154499925, "grad_norm": 0.5648717880249023, "learning_rate": 7.350410043535488e-05, "loss": 1.7468, "step": 5244 }, { "epoch": 1.5920473516466838, "grad_norm": 0.5698893070220947, "learning_rate": 7.349903816948467e-05, "loss": 1.4076, "step": 5245 }, { "epoch": 1.5923508878433754, "grad_norm": 0.5639516711235046, "learning_rate": 7.349397590361447e-05, "loss": 1.9586, "step": 5246 }, { "epoch": 1.5926544240400666, "grad_norm": 0.6253385543823242, "learning_rate": 7.348891363774426e-05, "loss": 1.5954, "step": 5247 }, { "epoch": 1.5929579602367583, "grad_norm": 0.47530102729797363, "learning_rate": 7.348385137187406e-05, "loss": 1.371, "step": 5248 }, { "epoch": 1.5932614964334497, "grad_norm": 0.7411981225013733, "learning_rate": 7.347878910600385e-05, "loss": 1.9518, "step": 5249 }, { "epoch": 1.5935650326301412, "grad_norm": 1.0231763124465942, "learning_rate": 7.347372684013365e-05, "loss": 1.1718, "step": 5250 }, { "epoch": 1.5938685688268326, "grad_norm": 0.6356399059295654, "learning_rate": 7.346866457426344e-05, "loss": 1.8131, "step": 5251 }, { "epoch": 1.594172105023524, "grad_norm": 0.577565610408783, "learning_rate": 7.346360230839325e-05, "loss": 1.7185, "step": 5252 }, { "epoch": 1.5944756412202155, "grad_norm": 0.6096293330192566, "learning_rate": 7.345854004252304e-05, "loss": 1.534, "step": 5253 }, { "epoch": 1.594779177416907, "grad_norm": 0.5408341288566589, "learning_rate": 7.345347777665284e-05, "loss": 1.7644, "step": 5254 }, { "epoch": 1.5950827136135985, "grad_norm": 0.5350682139396667, "learning_rate": 7.344841551078263e-05, "loss": 1.723, "step": 5255 }, { "epoch": 1.5953862498102898, "grad_norm": 0.8048761487007141, "learning_rate": 7.344335324491243e-05, "loss": 1.5371, "step": 5256 }, { "epoch": 1.5956897860069814, "grad_norm": 0.427385538816452, "learning_rate": 7.343829097904222e-05, "loss": 1.7227, "step": 5257 }, { "epoch": 1.5959933222036726, "grad_norm": 0.5769528150558472, "learning_rate": 7.343322871317202e-05, "loss": 1.8178, "step": 5258 }, { "epoch": 1.5962968584003643, "grad_norm": 0.6781782507896423, "learning_rate": 7.342816644730181e-05, "loss": 1.7434, "step": 5259 }, { "epoch": 1.5966003945970557, "grad_norm": 0.5181839466094971, "learning_rate": 7.342310418143161e-05, "loss": 1.6126, "step": 5260 }, { "epoch": 1.5969039307937472, "grad_norm": 0.5487415790557861, "learning_rate": 7.34180419155614e-05, "loss": 1.6194, "step": 5261 }, { "epoch": 1.5972074669904386, "grad_norm": 0.5643243789672852, "learning_rate": 7.341297964969121e-05, "loss": 1.5133, "step": 5262 }, { "epoch": 1.59751100318713, "grad_norm": 0.6052178144454956, "learning_rate": 7.3407917383821e-05, "loss": 1.7253, "step": 5263 }, { "epoch": 1.5978145393838217, "grad_norm": 0.5676378607749939, "learning_rate": 7.34028551179508e-05, "loss": 1.7509, "step": 5264 }, { "epoch": 1.5981180755805129, "grad_norm": 0.6112813949584961, "learning_rate": 7.33977928520806e-05, "loss": 1.7657, "step": 5265 }, { "epoch": 1.5984216117772045, "grad_norm": 0.4967426657676697, "learning_rate": 7.339273058621039e-05, "loss": 1.7637, "step": 5266 }, { "epoch": 1.5987251479738958, "grad_norm": 0.5924389958381653, "learning_rate": 7.338766832034018e-05, "loss": 1.7697, "step": 5267 }, { "epoch": 1.5990286841705874, "grad_norm": 0.547675371170044, "learning_rate": 7.338260605446998e-05, "loss": 1.5096, "step": 5268 }, { "epoch": 1.5993322203672788, "grad_norm": 0.5636522769927979, "learning_rate": 7.337754378859977e-05, "loss": 1.5292, "step": 5269 }, { "epoch": 1.5996357565639703, "grad_norm": 0.5873835682868958, "learning_rate": 7.337248152272957e-05, "loss": 1.5638, "step": 5270 }, { "epoch": 1.5999392927606617, "grad_norm": 0.548093855381012, "learning_rate": 7.336741925685938e-05, "loss": 0.982, "step": 5271 }, { "epoch": 1.6002428289573531, "grad_norm": 0.4551286995410919, "learning_rate": 7.336235699098917e-05, "loss": 0.8414, "step": 5272 }, { "epoch": 1.6005463651540446, "grad_norm": 0.4967573881149292, "learning_rate": 7.335729472511897e-05, "loss": 1.8692, "step": 5273 }, { "epoch": 1.600849901350736, "grad_norm": 0.5112420916557312, "learning_rate": 7.335223245924876e-05, "loss": 1.7884, "step": 5274 }, { "epoch": 1.6011534375474277, "grad_norm": 0.6654168367385864, "learning_rate": 7.334717019337856e-05, "loss": 1.8305, "step": 5275 }, { "epoch": 1.6014569737441189, "grad_norm": 0.5480862855911255, "learning_rate": 7.334210792750835e-05, "loss": 1.5863, "step": 5276 }, { "epoch": 1.6017605099408105, "grad_norm": 0.5990899205207825, "learning_rate": 7.333704566163815e-05, "loss": 1.2868, "step": 5277 }, { "epoch": 1.6020640461375018, "grad_norm": 0.5188322067260742, "learning_rate": 7.333198339576794e-05, "loss": 1.8025, "step": 5278 }, { "epoch": 1.6023675823341934, "grad_norm": 0.5856629014015198, "learning_rate": 7.332692112989774e-05, "loss": 1.3265, "step": 5279 }, { "epoch": 1.6026711185308848, "grad_norm": 0.545465350151062, "learning_rate": 7.332185886402754e-05, "loss": 1.862, "step": 5280 }, { "epoch": 1.6029746547275763, "grad_norm": 0.4535203278064728, "learning_rate": 7.331679659815734e-05, "loss": 1.9352, "step": 5281 }, { "epoch": 1.6032781909242677, "grad_norm": 0.6021257638931274, "learning_rate": 7.331173433228713e-05, "loss": 1.7702, "step": 5282 }, { "epoch": 1.6035817271209591, "grad_norm": 0.6483513712882996, "learning_rate": 7.330667206641694e-05, "loss": 1.3199, "step": 5283 }, { "epoch": 1.6038852633176506, "grad_norm": 0.46683990955352783, "learning_rate": 7.330160980054674e-05, "loss": 1.8755, "step": 5284 }, { "epoch": 1.604188799514342, "grad_norm": 0.55056232213974, "learning_rate": 7.329654753467653e-05, "loss": 1.1878, "step": 5285 }, { "epoch": 1.6044923357110337, "grad_norm": 0.572959303855896, "learning_rate": 7.329148526880633e-05, "loss": 1.3519, "step": 5286 }, { "epoch": 1.6047958719077249, "grad_norm": 0.6803028583526611, "learning_rate": 7.328642300293612e-05, "loss": 1.3468, "step": 5287 }, { "epoch": 1.6050994081044165, "grad_norm": 0.6406558156013489, "learning_rate": 7.328136073706592e-05, "loss": 1.3617, "step": 5288 }, { "epoch": 1.6054029443011077, "grad_norm": 0.8463157415390015, "learning_rate": 7.327629847119571e-05, "loss": 1.4584, "step": 5289 }, { "epoch": 1.6057064804977994, "grad_norm": 0.6122679114341736, "learning_rate": 7.32712362053255e-05, "loss": 1.5421, "step": 5290 }, { "epoch": 1.6060100166944908, "grad_norm": 0.4659373164176941, "learning_rate": 7.326617393945531e-05, "loss": 1.6206, "step": 5291 }, { "epoch": 1.6063135528911823, "grad_norm": 0.5055060386657715, "learning_rate": 7.326111167358511e-05, "loss": 2.0784, "step": 5292 }, { "epoch": 1.6066170890878737, "grad_norm": 0.5983397960662842, "learning_rate": 7.32560494077149e-05, "loss": 1.7021, "step": 5293 }, { "epoch": 1.6069206252845651, "grad_norm": 0.5488964915275574, "learning_rate": 7.32509871418447e-05, "loss": 1.8662, "step": 5294 }, { "epoch": 1.6072241614812568, "grad_norm": 0.48650479316711426, "learning_rate": 7.324592487597449e-05, "loss": 1.6154, "step": 5295 }, { "epoch": 1.607527697677948, "grad_norm": 0.5552559494972229, "learning_rate": 7.324086261010429e-05, "loss": 1.1789, "step": 5296 }, { "epoch": 1.6078312338746397, "grad_norm": 0.595207691192627, "learning_rate": 7.323580034423408e-05, "loss": 1.2947, "step": 5297 }, { "epoch": 1.6081347700713309, "grad_norm": 0.5485917925834656, "learning_rate": 7.323073807836388e-05, "loss": 1.7643, "step": 5298 }, { "epoch": 1.6084383062680225, "grad_norm": 0.5586134791374207, "learning_rate": 7.322567581249367e-05, "loss": 1.7608, "step": 5299 }, { "epoch": 1.608741842464714, "grad_norm": 0.5535512566566467, "learning_rate": 7.322061354662347e-05, "loss": 1.7434, "step": 5300 }, { "epoch": 1.6090453786614054, "grad_norm": 0.540846586227417, "learning_rate": 7.321555128075328e-05, "loss": 1.7094, "step": 5301 }, { "epoch": 1.6093489148580968, "grad_norm": 0.5523681044578552, "learning_rate": 7.321048901488307e-05, "loss": 1.6085, "step": 5302 }, { "epoch": 1.6096524510547883, "grad_norm": 0.4929633140563965, "learning_rate": 7.320542674901286e-05, "loss": 1.148, "step": 5303 }, { "epoch": 1.6099559872514797, "grad_norm": 0.8295103311538696, "learning_rate": 7.320036448314266e-05, "loss": 1.7371, "step": 5304 }, { "epoch": 1.6102595234481711, "grad_norm": 0.5253877639770508, "learning_rate": 7.319530221727245e-05, "loss": 1.7138, "step": 5305 }, { "epoch": 1.6105630596448628, "grad_norm": 0.5233611464500427, "learning_rate": 7.319023995140225e-05, "loss": 1.8088, "step": 5306 }, { "epoch": 1.610866595841554, "grad_norm": 1.7711957693099976, "learning_rate": 7.318517768553204e-05, "loss": 1.4473, "step": 5307 }, { "epoch": 1.6111701320382457, "grad_norm": 0.6009371876716614, "learning_rate": 7.318011541966184e-05, "loss": 1.8576, "step": 5308 }, { "epoch": 1.6114736682349369, "grad_norm": 0.4875546395778656, "learning_rate": 7.317505315379163e-05, "loss": 1.5732, "step": 5309 }, { "epoch": 1.6117772044316285, "grad_norm": 0.5272681713104248, "learning_rate": 7.316999088792144e-05, "loss": 1.8052, "step": 5310 }, { "epoch": 1.61208074062832, "grad_norm": 0.5559819340705872, "learning_rate": 7.316492862205124e-05, "loss": 1.3846, "step": 5311 }, { "epoch": 1.6123842768250114, "grad_norm": 0.5686014890670776, "learning_rate": 7.315986635618103e-05, "loss": 1.8608, "step": 5312 }, { "epoch": 1.6126878130217028, "grad_norm": 0.533433198928833, "learning_rate": 7.315480409031083e-05, "loss": 1.516, "step": 5313 }, { "epoch": 1.6129913492183943, "grad_norm": 0.5107302665710449, "learning_rate": 7.314974182444062e-05, "loss": 1.9804, "step": 5314 }, { "epoch": 1.6132948854150857, "grad_norm": 0.48063787817955017, "learning_rate": 7.314467955857042e-05, "loss": 1.5903, "step": 5315 }, { "epoch": 1.6135984216117771, "grad_norm": 0.5146118998527527, "learning_rate": 7.313961729270021e-05, "loss": 1.5592, "step": 5316 }, { "epoch": 1.6139019578084688, "grad_norm": 0.5074766874313354, "learning_rate": 7.313455502683e-05, "loss": 1.7058, "step": 5317 }, { "epoch": 1.61420549400516, "grad_norm": 0.49822354316711426, "learning_rate": 7.31294927609598e-05, "loss": 1.7331, "step": 5318 }, { "epoch": 1.6145090302018517, "grad_norm": 0.5845142006874084, "learning_rate": 7.312443049508961e-05, "loss": 1.9431, "step": 5319 }, { "epoch": 1.6148125663985429, "grad_norm": 0.4772799611091614, "learning_rate": 7.31193682292194e-05, "loss": 2.0023, "step": 5320 }, { "epoch": 1.6151161025952345, "grad_norm": 0.6034853458404541, "learning_rate": 7.31143059633492e-05, "loss": 1.7094, "step": 5321 }, { "epoch": 1.615419638791926, "grad_norm": 0.5286986231803894, "learning_rate": 7.310924369747899e-05, "loss": 1.7875, "step": 5322 }, { "epoch": 1.6157231749886174, "grad_norm": 0.6037927269935608, "learning_rate": 7.310418143160879e-05, "loss": 1.7475, "step": 5323 }, { "epoch": 1.6160267111853088, "grad_norm": 7.824846267700195, "learning_rate": 7.309911916573858e-05, "loss": 1.6713, "step": 5324 }, { "epoch": 1.6163302473820003, "grad_norm": 0.6112735271453857, "learning_rate": 7.309405689986838e-05, "loss": 1.7414, "step": 5325 }, { "epoch": 1.616633783578692, "grad_norm": 0.5998720526695251, "learning_rate": 7.308899463399817e-05, "loss": 1.1521, "step": 5326 }, { "epoch": 1.6169373197753831, "grad_norm": 0.5878032445907593, "learning_rate": 7.308393236812798e-05, "loss": 1.4747, "step": 5327 }, { "epoch": 1.6172408559720748, "grad_norm": 0.5835018157958984, "learning_rate": 7.307887010225778e-05, "loss": 1.6374, "step": 5328 }, { "epoch": 1.617544392168766, "grad_norm": 0.5299321413040161, "learning_rate": 7.307380783638757e-05, "loss": 1.8917, "step": 5329 }, { "epoch": 1.6178479283654577, "grad_norm": 0.5514640808105469, "learning_rate": 7.306874557051738e-05, "loss": 1.6584, "step": 5330 }, { "epoch": 1.618151464562149, "grad_norm": 0.44423699378967285, "learning_rate": 7.306368330464717e-05, "loss": 1.3953, "step": 5331 }, { "epoch": 1.6184550007588405, "grad_norm": 0.45627906918525696, "learning_rate": 7.305862103877697e-05, "loss": 1.4091, "step": 5332 }, { "epoch": 1.618758536955532, "grad_norm": 0.500055193901062, "learning_rate": 7.305355877290676e-05, "loss": 1.7118, "step": 5333 }, { "epoch": 1.6190620731522234, "grad_norm": 0.5136251449584961, "learning_rate": 7.304849650703656e-05, "loss": 1.6012, "step": 5334 }, { "epoch": 1.6193656093489148, "grad_norm": 0.48394328355789185, "learning_rate": 7.304343424116635e-05, "loss": 1.7937, "step": 5335 }, { "epoch": 1.6196691455456063, "grad_norm": 0.4821557104587555, "learning_rate": 7.303837197529615e-05, "loss": 1.6532, "step": 5336 }, { "epoch": 1.619972681742298, "grad_norm": 1.043676495552063, "learning_rate": 7.303330970942594e-05, "loss": 1.5878, "step": 5337 }, { "epoch": 1.6202762179389891, "grad_norm": 0.4502454996109009, "learning_rate": 7.302824744355574e-05, "loss": 1.7892, "step": 5338 }, { "epoch": 1.6205797541356808, "grad_norm": 0.605509340763092, "learning_rate": 7.302318517768553e-05, "loss": 2.0461, "step": 5339 }, { "epoch": 1.620883290332372, "grad_norm": 0.5283710956573486, "learning_rate": 7.301812291181534e-05, "loss": 1.7673, "step": 5340 }, { "epoch": 1.6211868265290637, "grad_norm": 0.5143932700157166, "learning_rate": 7.301306064594513e-05, "loss": 1.7829, "step": 5341 }, { "epoch": 1.621490362725755, "grad_norm": 0.5089682936668396, "learning_rate": 7.300799838007493e-05, "loss": 1.8419, "step": 5342 }, { "epoch": 1.6217938989224465, "grad_norm": 0.5280531644821167, "learning_rate": 7.300293611420472e-05, "loss": 1.1959, "step": 5343 }, { "epoch": 1.622097435119138, "grad_norm": 0.45322325825691223, "learning_rate": 7.299787384833452e-05, "loss": 1.3736, "step": 5344 }, { "epoch": 1.6224009713158294, "grad_norm": 0.39491701126098633, "learning_rate": 7.299281158246431e-05, "loss": 0.8163, "step": 5345 }, { "epoch": 1.6227045075125208, "grad_norm": 0.6200827956199646, "learning_rate": 7.298774931659411e-05, "loss": 1.6614, "step": 5346 }, { "epoch": 1.6230080437092123, "grad_norm": 0.4712357223033905, "learning_rate": 7.29826870507239e-05, "loss": 1.7717, "step": 5347 }, { "epoch": 1.623311579905904, "grad_norm": 0.5763128995895386, "learning_rate": 7.29776247848537e-05, "loss": 1.7881, "step": 5348 }, { "epoch": 1.6236151161025951, "grad_norm": 0.6178731322288513, "learning_rate": 7.29725625189835e-05, "loss": 1.5837, "step": 5349 }, { "epoch": 1.6239186522992868, "grad_norm": 0.5563769936561584, "learning_rate": 7.29675002531133e-05, "loss": 1.54, "step": 5350 }, { "epoch": 1.624222188495978, "grad_norm": 0.6363424062728882, "learning_rate": 7.29624379872431e-05, "loss": 1.4934, "step": 5351 }, { "epoch": 1.6245257246926696, "grad_norm": 0.5647356510162354, "learning_rate": 7.295737572137289e-05, "loss": 1.3051, "step": 5352 }, { "epoch": 1.624829260889361, "grad_norm": 3.164123296737671, "learning_rate": 7.295231345550269e-05, "loss": 2.0574, "step": 5353 }, { "epoch": 1.6251327970860525, "grad_norm": 0.5882038474082947, "learning_rate": 7.294725118963248e-05, "loss": 1.8514, "step": 5354 }, { "epoch": 1.625436333282744, "grad_norm": 0.4425477683544159, "learning_rate": 7.294218892376228e-05, "loss": 0.847, "step": 5355 }, { "epoch": 1.6257398694794354, "grad_norm": 0.5342074036598206, "learning_rate": 7.293712665789207e-05, "loss": 1.3687, "step": 5356 }, { "epoch": 1.626043405676127, "grad_norm": 0.5602349042892456, "learning_rate": 7.293206439202186e-05, "loss": 1.7487, "step": 5357 }, { "epoch": 1.6263469418728183, "grad_norm": 0.5369912385940552, "learning_rate": 7.292700212615167e-05, "loss": 1.7015, "step": 5358 }, { "epoch": 1.62665047806951, "grad_norm": 0.563209593296051, "learning_rate": 7.292193986028147e-05, "loss": 1.756, "step": 5359 }, { "epoch": 1.6269540142662011, "grad_norm": 0.6118689179420471, "learning_rate": 7.291687759441126e-05, "loss": 1.6451, "step": 5360 }, { "epoch": 1.6272575504628928, "grad_norm": 0.5531105399131775, "learning_rate": 7.291181532854106e-05, "loss": 0.894, "step": 5361 }, { "epoch": 1.6275610866595842, "grad_norm": 0.5832155346870422, "learning_rate": 7.290675306267085e-05, "loss": 1.3583, "step": 5362 }, { "epoch": 1.6278646228562756, "grad_norm": 0.7351096272468567, "learning_rate": 7.290169079680065e-05, "loss": 1.6225, "step": 5363 }, { "epoch": 1.628168159052967, "grad_norm": 0.5954158902168274, "learning_rate": 7.289662853093044e-05, "loss": 1.711, "step": 5364 }, { "epoch": 1.6284716952496585, "grad_norm": 0.5454484820365906, "learning_rate": 7.289156626506024e-05, "loss": 1.5543, "step": 5365 }, { "epoch": 1.62877523144635, "grad_norm": 0.5965268015861511, "learning_rate": 7.288650399919003e-05, "loss": 1.706, "step": 5366 }, { "epoch": 1.6290787676430414, "grad_norm": 0.6655979752540588, "learning_rate": 7.288144173331983e-05, "loss": 1.7892, "step": 5367 }, { "epoch": 1.629382303839733, "grad_norm": 0.6440061926841736, "learning_rate": 7.287637946744963e-05, "loss": 1.8773, "step": 5368 }, { "epoch": 1.6296858400364242, "grad_norm": 0.5195211172103882, "learning_rate": 7.287131720157943e-05, "loss": 1.7512, "step": 5369 }, { "epoch": 1.629989376233116, "grad_norm": 0.5398062467575073, "learning_rate": 7.286625493570922e-05, "loss": 1.1974, "step": 5370 }, { "epoch": 1.6302929124298071, "grad_norm": 0.5433503985404968, "learning_rate": 7.286119266983902e-05, "loss": 1.8253, "step": 5371 }, { "epoch": 1.6305964486264988, "grad_norm": 0.4917634129524231, "learning_rate": 7.285613040396883e-05, "loss": 1.8575, "step": 5372 }, { "epoch": 1.6308999848231902, "grad_norm": 0.6070329546928406, "learning_rate": 7.285106813809862e-05, "loss": 1.9349, "step": 5373 }, { "epoch": 1.6312035210198816, "grad_norm": 0.5169711709022522, "learning_rate": 7.284600587222842e-05, "loss": 1.8008, "step": 5374 }, { "epoch": 1.631507057216573, "grad_norm": 0.6040024757385254, "learning_rate": 7.284094360635821e-05, "loss": 1.7703, "step": 5375 }, { "epoch": 1.6318105934132645, "grad_norm": 0.5740352272987366, "learning_rate": 7.2835881340488e-05, "loss": 1.8159, "step": 5376 }, { "epoch": 1.632114129609956, "grad_norm": 0.5424453616142273, "learning_rate": 7.28308190746178e-05, "loss": 1.5593, "step": 5377 }, { "epoch": 1.6324176658066474, "grad_norm": 0.5914655923843384, "learning_rate": 7.28257568087476e-05, "loss": 1.6786, "step": 5378 }, { "epoch": 1.632721202003339, "grad_norm": 0.5969242453575134, "learning_rate": 7.28206945428774e-05, "loss": 1.7036, "step": 5379 }, { "epoch": 1.6330247382000302, "grad_norm": 0.9573061466217041, "learning_rate": 7.28156322770072e-05, "loss": 1.2625, "step": 5380 }, { "epoch": 1.633328274396722, "grad_norm": 0.4953905940055847, "learning_rate": 7.2810570011137e-05, "loss": 1.7095, "step": 5381 }, { "epoch": 1.6336318105934131, "grad_norm": 0.48235177993774414, "learning_rate": 7.280550774526679e-05, "loss": 1.4157, "step": 5382 }, { "epoch": 1.6339353467901048, "grad_norm": 0.5806698799133301, "learning_rate": 7.280044547939658e-05, "loss": 1.5457, "step": 5383 }, { "epoch": 1.6342388829867962, "grad_norm": 0.48974600434303284, "learning_rate": 7.279538321352638e-05, "loss": 1.6838, "step": 5384 }, { "epoch": 1.6345424191834876, "grad_norm": 0.5429823398590088, "learning_rate": 7.279032094765617e-05, "loss": 1.7311, "step": 5385 }, { "epoch": 1.634845955380179, "grad_norm": 0.5275961756706238, "learning_rate": 7.278525868178597e-05, "loss": 1.6541, "step": 5386 }, { "epoch": 1.6351494915768705, "grad_norm": 0.6194995045661926, "learning_rate": 7.278019641591576e-05, "loss": 1.5361, "step": 5387 }, { "epoch": 1.6354530277735622, "grad_norm": 0.5362678170204163, "learning_rate": 7.277513415004557e-05, "loss": 1.8779, "step": 5388 }, { "epoch": 1.6357565639702534, "grad_norm": 0.8009393811225891, "learning_rate": 7.277007188417537e-05, "loss": 1.7224, "step": 5389 }, { "epoch": 1.636060100166945, "grad_norm": 0.5585671067237854, "learning_rate": 7.276500961830516e-05, "loss": 1.8676, "step": 5390 }, { "epoch": 1.6363636363636362, "grad_norm": 0.5586265921592712, "learning_rate": 7.275994735243496e-05, "loss": 1.6578, "step": 5391 }, { "epoch": 1.636667172560328, "grad_norm": 0.5737308859825134, "learning_rate": 7.275488508656475e-05, "loss": 1.8705, "step": 5392 }, { "epoch": 1.636970708757019, "grad_norm": 0.6445639729499817, "learning_rate": 7.274982282069455e-05, "loss": 1.65, "step": 5393 }, { "epoch": 1.6372742449537108, "grad_norm": 0.5315992832183838, "learning_rate": 7.274476055482434e-05, "loss": 1.4896, "step": 5394 }, { "epoch": 1.6375777811504022, "grad_norm": 0.5430747270584106, "learning_rate": 7.273969828895413e-05, "loss": 1.7276, "step": 5395 }, { "epoch": 1.6378813173470936, "grad_norm": 0.4536675214767456, "learning_rate": 7.273463602308393e-05, "loss": 1.7787, "step": 5396 }, { "epoch": 1.638184853543785, "grad_norm": 0.9887977242469788, "learning_rate": 7.272957375721374e-05, "loss": 1.2947, "step": 5397 }, { "epoch": 1.6384883897404765, "grad_norm": 0.5427131056785583, "learning_rate": 7.272451149134353e-05, "loss": 2.032, "step": 5398 }, { "epoch": 1.6387919259371682, "grad_norm": 0.5042452812194824, "learning_rate": 7.271944922547333e-05, "loss": 1.7509, "step": 5399 }, { "epoch": 1.6390954621338594, "grad_norm": 0.5531298518180847, "learning_rate": 7.271438695960312e-05, "loss": 1.4943, "step": 5400 }, { "epoch": 1.639398998330551, "grad_norm": 0.514651894569397, "learning_rate": 7.270932469373292e-05, "loss": 1.2271, "step": 5401 }, { "epoch": 1.6397025345272422, "grad_norm": 0.5557711124420166, "learning_rate": 7.270426242786271e-05, "loss": 1.6918, "step": 5402 }, { "epoch": 1.640006070723934, "grad_norm": 0.6158317923545837, "learning_rate": 7.269920016199251e-05, "loss": 1.7352, "step": 5403 }, { "epoch": 1.6403096069206253, "grad_norm": 0.5777170658111572, "learning_rate": 7.26941378961223e-05, "loss": 1.8337, "step": 5404 }, { "epoch": 1.6406131431173168, "grad_norm": 0.5811499357223511, "learning_rate": 7.26890756302521e-05, "loss": 1.668, "step": 5405 }, { "epoch": 1.6409166793140082, "grad_norm": 0.5484623908996582, "learning_rate": 7.268401336438189e-05, "loss": 1.6774, "step": 5406 }, { "epoch": 1.6412202155106996, "grad_norm": 0.6455094218254089, "learning_rate": 7.26789510985117e-05, "loss": 1.7618, "step": 5407 }, { "epoch": 1.641523751707391, "grad_norm": 0.5346998572349548, "learning_rate": 7.26738888326415e-05, "loss": 1.4618, "step": 5408 }, { "epoch": 1.6418272879040825, "grad_norm": 0.5332134962081909, "learning_rate": 7.266882656677129e-05, "loss": 1.8189, "step": 5409 }, { "epoch": 1.6421308241007742, "grad_norm": 0.46270933747291565, "learning_rate": 7.266376430090108e-05, "loss": 1.7698, "step": 5410 }, { "epoch": 1.6424343602974654, "grad_norm": 0.5155644416809082, "learning_rate": 7.265870203503088e-05, "loss": 1.8359, "step": 5411 }, { "epoch": 1.642737896494157, "grad_norm": 0.539168119430542, "learning_rate": 7.265363976916067e-05, "loss": 1.7293, "step": 5412 }, { "epoch": 1.6430414326908482, "grad_norm": 0.7102829813957214, "learning_rate": 7.264857750329047e-05, "loss": 1.3327, "step": 5413 }, { "epoch": 1.64334496888754, "grad_norm": 0.5670261979103088, "learning_rate": 7.264351523742026e-05, "loss": 1.7286, "step": 5414 }, { "epoch": 1.6436485050842313, "grad_norm": 0.5489922165870667, "learning_rate": 7.263845297155006e-05, "loss": 1.9019, "step": 5415 }, { "epoch": 1.6439520412809228, "grad_norm": 0.7275190949440002, "learning_rate": 7.263339070567987e-05, "loss": 1.6061, "step": 5416 }, { "epoch": 1.6442555774776142, "grad_norm": 0.6558725833892822, "learning_rate": 7.262832843980966e-05, "loss": 1.639, "step": 5417 }, { "epoch": 1.6445591136743056, "grad_norm": 0.5907195806503296, "learning_rate": 7.262326617393947e-05, "loss": 1.935, "step": 5418 }, { "epoch": 1.644862649870997, "grad_norm": 0.5562440156936646, "learning_rate": 7.261820390806926e-05, "loss": 1.6597, "step": 5419 }, { "epoch": 1.6451661860676885, "grad_norm": 0.5652980804443359, "learning_rate": 7.261314164219906e-05, "loss": 1.7975, "step": 5420 }, { "epoch": 1.6454697222643802, "grad_norm": 0.4403027296066284, "learning_rate": 7.260807937632885e-05, "loss": 1.2075, "step": 5421 }, { "epoch": 1.6457732584610714, "grad_norm": 0.5183008909225464, "learning_rate": 7.260301711045865e-05, "loss": 1.669, "step": 5422 }, { "epoch": 1.646076794657763, "grad_norm": 0.5930132865905762, "learning_rate": 7.259795484458844e-05, "loss": 1.9552, "step": 5423 }, { "epoch": 1.6463803308544542, "grad_norm": 0.43935516476631165, "learning_rate": 7.259289257871824e-05, "loss": 1.7208, "step": 5424 }, { "epoch": 1.6466838670511459, "grad_norm": 0.5676711797714233, "learning_rate": 7.258783031284803e-05, "loss": 1.397, "step": 5425 }, { "epoch": 1.6469874032478373, "grad_norm": 0.5350246429443359, "learning_rate": 7.258276804697783e-05, "loss": 1.8004, "step": 5426 }, { "epoch": 1.6472909394445288, "grad_norm": 0.4808463156223297, "learning_rate": 7.257770578110764e-05, "loss": 1.61, "step": 5427 }, { "epoch": 1.6475944756412202, "grad_norm": 0.49013078212738037, "learning_rate": 7.257264351523743e-05, "loss": 1.2447, "step": 5428 }, { "epoch": 1.6478980118379116, "grad_norm": 0.5129517912864685, "learning_rate": 7.256758124936723e-05, "loss": 1.5529, "step": 5429 }, { "epoch": 1.6482015480346033, "grad_norm": 0.5757639408111572, "learning_rate": 7.256251898349702e-05, "loss": 1.2341, "step": 5430 }, { "epoch": 1.6485050842312945, "grad_norm": 0.5084811449050903, "learning_rate": 7.255745671762682e-05, "loss": 1.8156, "step": 5431 }, { "epoch": 1.6488086204279861, "grad_norm": 0.5663341283798218, "learning_rate": 7.255239445175661e-05, "loss": 1.431, "step": 5432 }, { "epoch": 1.6491121566246774, "grad_norm": 0.6115935444831848, "learning_rate": 7.25473321858864e-05, "loss": 1.1595, "step": 5433 }, { "epoch": 1.649415692821369, "grad_norm": 0.4670819342136383, "learning_rate": 7.25422699200162e-05, "loss": 1.658, "step": 5434 }, { "epoch": 1.6497192290180605, "grad_norm": 0.5507335066795349, "learning_rate": 7.2537207654146e-05, "loss": 1.7194, "step": 5435 }, { "epoch": 1.6500227652147519, "grad_norm": 0.5227102637290955, "learning_rate": 7.25321453882758e-05, "loss": 1.7898, "step": 5436 }, { "epoch": 1.6503263014114433, "grad_norm": 0.5725428462028503, "learning_rate": 7.25270831224056e-05, "loss": 1.3801, "step": 5437 }, { "epoch": 1.6506298376081348, "grad_norm": 0.6102313995361328, "learning_rate": 7.252202085653539e-05, "loss": 1.6622, "step": 5438 }, { "epoch": 1.6509333738048262, "grad_norm": 0.5922355651855469, "learning_rate": 7.251695859066519e-05, "loss": 1.6763, "step": 5439 }, { "epoch": 1.6512369100015176, "grad_norm": 0.8734421133995056, "learning_rate": 7.251189632479498e-05, "loss": 1.4523, "step": 5440 }, { "epoch": 1.6515404461982093, "grad_norm": 0.6614634394645691, "learning_rate": 7.250683405892478e-05, "loss": 1.7902, "step": 5441 }, { "epoch": 1.6518439823949005, "grad_norm": 0.5623548030853271, "learning_rate": 7.250177179305457e-05, "loss": 1.7837, "step": 5442 }, { "epoch": 1.6521475185915921, "grad_norm": 0.5432645082473755, "learning_rate": 7.249670952718437e-05, "loss": 1.4503, "step": 5443 }, { "epoch": 1.6524510547882834, "grad_norm": 0.5051206350326538, "learning_rate": 7.249164726131416e-05, "loss": 1.9885, "step": 5444 }, { "epoch": 1.652754590984975, "grad_norm": 0.5985874533653259, "learning_rate": 7.248658499544396e-05, "loss": 1.8564, "step": 5445 }, { "epoch": 1.6530581271816664, "grad_norm": 0.722251832485199, "learning_rate": 7.248152272957376e-05, "loss": 1.4029, "step": 5446 }, { "epoch": 1.6533616633783579, "grad_norm": 0.5419872999191284, "learning_rate": 7.247646046370356e-05, "loss": 2.0266, "step": 5447 }, { "epoch": 1.6536651995750493, "grad_norm": 0.5634152889251709, "learning_rate": 7.247139819783335e-05, "loss": 1.4273, "step": 5448 }, { "epoch": 1.6539687357717408, "grad_norm": 0.6193149089813232, "learning_rate": 7.246633593196315e-05, "loss": 1.437, "step": 5449 }, { "epoch": 1.6542722719684322, "grad_norm": 0.701909065246582, "learning_rate": 7.246127366609294e-05, "loss": 1.484, "step": 5450 }, { "epoch": 1.6545758081651236, "grad_norm": 0.5948097109794617, "learning_rate": 7.245621140022274e-05, "loss": 1.1156, "step": 5451 }, { "epoch": 1.6548793443618153, "grad_norm": 0.6138877272605896, "learning_rate": 7.245114913435253e-05, "loss": 1.2544, "step": 5452 }, { "epoch": 1.6551828805585065, "grad_norm": 0.5474926829338074, "learning_rate": 7.244608686848233e-05, "loss": 1.6751, "step": 5453 }, { "epoch": 1.6554864167551981, "grad_norm": 0.5998562574386597, "learning_rate": 7.244102460261212e-05, "loss": 1.8192, "step": 5454 }, { "epoch": 1.6557899529518894, "grad_norm": 0.6224400997161865, "learning_rate": 7.243596233674193e-05, "loss": 1.819, "step": 5455 }, { "epoch": 1.656093489148581, "grad_norm": 0.6007357239723206, "learning_rate": 7.243090007087173e-05, "loss": 1.7359, "step": 5456 }, { "epoch": 1.6563970253452724, "grad_norm": 0.5641934871673584, "learning_rate": 7.242583780500152e-05, "loss": 1.2001, "step": 5457 }, { "epoch": 1.6567005615419639, "grad_norm": 0.5024848580360413, "learning_rate": 7.242077553913132e-05, "loss": 1.8584, "step": 5458 }, { "epoch": 1.6570040977386553, "grad_norm": 0.3787592649459839, "learning_rate": 7.241571327326111e-05, "loss": 1.5685, "step": 5459 }, { "epoch": 1.6573076339353467, "grad_norm": 0.3638264536857605, "learning_rate": 7.24106510073909e-05, "loss": 1.8184, "step": 5460 }, { "epoch": 1.6576111701320384, "grad_norm": 0.5221641659736633, "learning_rate": 7.240558874152071e-05, "loss": 1.7377, "step": 5461 }, { "epoch": 1.6579147063287296, "grad_norm": 0.4744473993778229, "learning_rate": 7.240052647565051e-05, "loss": 1.9189, "step": 5462 }, { "epoch": 1.6582182425254213, "grad_norm": 0.48213768005371094, "learning_rate": 7.23954642097803e-05, "loss": 1.8864, "step": 5463 }, { "epoch": 1.6585217787221125, "grad_norm": 0.5562633872032166, "learning_rate": 7.23904019439101e-05, "loss": 1.3022, "step": 5464 }, { "epoch": 1.6588253149188041, "grad_norm": 0.5425136685371399, "learning_rate": 7.238533967803989e-05, "loss": 1.9652, "step": 5465 }, { "epoch": 1.6591288511154956, "grad_norm": 0.6003391146659851, "learning_rate": 7.23802774121697e-05, "loss": 2.0183, "step": 5466 }, { "epoch": 1.659432387312187, "grad_norm": 0.6612154841423035, "learning_rate": 7.23752151462995e-05, "loss": 1.437, "step": 5467 }, { "epoch": 1.6597359235088784, "grad_norm": 0.6031103730201721, "learning_rate": 7.237015288042929e-05, "loss": 1.543, "step": 5468 }, { "epoch": 1.6600394597055699, "grad_norm": 0.5282215476036072, "learning_rate": 7.236509061455909e-05, "loss": 1.7141, "step": 5469 }, { "epoch": 1.6603429959022613, "grad_norm": 1.0144010782241821, "learning_rate": 7.236002834868888e-05, "loss": 1.4512, "step": 5470 }, { "epoch": 1.6606465320989527, "grad_norm": 0.5801889896392822, "learning_rate": 7.235496608281867e-05, "loss": 1.5097, "step": 5471 }, { "epoch": 1.6609500682956444, "grad_norm": 1.2839747667312622, "learning_rate": 7.234990381694847e-05, "loss": 1.4121, "step": 5472 }, { "epoch": 1.6612536044923356, "grad_norm": 0.6611791253089905, "learning_rate": 7.234484155107826e-05, "loss": 1.1927, "step": 5473 }, { "epoch": 1.6615571406890273, "grad_norm": 0.6028724908828735, "learning_rate": 7.233977928520806e-05, "loss": 1.615, "step": 5474 }, { "epoch": 1.6618606768857185, "grad_norm": 0.5648754835128784, "learning_rate": 7.233471701933787e-05, "loss": 1.2785, "step": 5475 }, { "epoch": 1.6621642130824101, "grad_norm": 0.6196275353431702, "learning_rate": 7.232965475346766e-05, "loss": 1.6909, "step": 5476 }, { "epoch": 1.6624677492791016, "grad_norm": 0.6454710960388184, "learning_rate": 7.232459248759746e-05, "loss": 1.6158, "step": 5477 }, { "epoch": 1.662771285475793, "grad_norm": 0.6051695942878723, "learning_rate": 7.231953022172725e-05, "loss": 1.4259, "step": 5478 }, { "epoch": 1.6630748216724844, "grad_norm": 0.9160197377204895, "learning_rate": 7.231446795585705e-05, "loss": 1.3115, "step": 5479 }, { "epoch": 1.6633783578691759, "grad_norm": 0.5905101299285889, "learning_rate": 7.230940568998684e-05, "loss": 1.5463, "step": 5480 }, { "epoch": 1.6636818940658673, "grad_norm": 0.48909255862236023, "learning_rate": 7.230434342411664e-05, "loss": 1.7501, "step": 5481 }, { "epoch": 1.6639854302625587, "grad_norm": 0.4441916048526764, "learning_rate": 7.229928115824643e-05, "loss": 1.8818, "step": 5482 }, { "epoch": 1.6642889664592504, "grad_norm": 0.45585280656814575, "learning_rate": 7.229421889237623e-05, "loss": 1.8129, "step": 5483 }, { "epoch": 1.6645925026559416, "grad_norm": 0.5730534195899963, "learning_rate": 7.228915662650602e-05, "loss": 1.2908, "step": 5484 }, { "epoch": 1.6648960388526333, "grad_norm": 0.48153162002563477, "learning_rate": 7.228409436063583e-05, "loss": 1.6484, "step": 5485 }, { "epoch": 1.6651995750493245, "grad_norm": 0.5049116015434265, "learning_rate": 7.227903209476562e-05, "loss": 1.7223, "step": 5486 }, { "epoch": 1.6655031112460161, "grad_norm": 0.43153953552246094, "learning_rate": 7.227396982889542e-05, "loss": 1.6567, "step": 5487 }, { "epoch": 1.6658066474427076, "grad_norm": 0.4768941104412079, "learning_rate": 7.226890756302521e-05, "loss": 1.7213, "step": 5488 }, { "epoch": 1.666110183639399, "grad_norm": 0.39153575897216797, "learning_rate": 7.226384529715501e-05, "loss": 1.2501, "step": 5489 }, { "epoch": 1.6664137198360904, "grad_norm": 0.4094899594783783, "learning_rate": 7.22587830312848e-05, "loss": 1.0887, "step": 5490 }, { "epoch": 1.6667172560327819, "grad_norm": 0.5269747972488403, "learning_rate": 7.22537207654146e-05, "loss": 1.3502, "step": 5491 }, { "epoch": 1.6670207922294735, "grad_norm": 0.4670669138431549, "learning_rate": 7.224865849954439e-05, "loss": 1.6927, "step": 5492 }, { "epoch": 1.6673243284261647, "grad_norm": 0.4470636546611786, "learning_rate": 7.224359623367419e-05, "loss": 1.6558, "step": 5493 }, { "epoch": 1.6676278646228564, "grad_norm": 0.5684782266616821, "learning_rate": 7.2238533967804e-05, "loss": 2.0172, "step": 5494 }, { "epoch": 1.6679314008195476, "grad_norm": 0.5105867981910706, "learning_rate": 7.223347170193379e-05, "loss": 1.4969, "step": 5495 }, { "epoch": 1.6682349370162393, "grad_norm": 0.4778842628002167, "learning_rate": 7.222840943606359e-05, "loss": 1.9251, "step": 5496 }, { "epoch": 1.6685384732129307, "grad_norm": 0.5652633905410767, "learning_rate": 7.222334717019338e-05, "loss": 2.0057, "step": 5497 }, { "epoch": 1.6688420094096221, "grad_norm": 0.5117583870887756, "learning_rate": 7.221828490432317e-05, "loss": 1.7944, "step": 5498 }, { "epoch": 1.6691455456063136, "grad_norm": 0.5227757096290588, "learning_rate": 7.221322263845297e-05, "loss": 1.8122, "step": 5499 }, { "epoch": 1.669449081803005, "grad_norm": 1.003037691116333, "learning_rate": 7.220816037258276e-05, "loss": 1.8023, "step": 5500 }, { "epoch": 1.6697526179996964, "grad_norm": 0.5822034478187561, "learning_rate": 7.220309810671256e-05, "loss": 1.8406, "step": 5501 }, { "epoch": 1.6700561541963879, "grad_norm": 0.5243530869483948, "learning_rate": 7.219803584084235e-05, "loss": 1.3146, "step": 5502 }, { "epoch": 1.6703596903930795, "grad_norm": 0.6547448635101318, "learning_rate": 7.219297357497216e-05, "loss": 1.9529, "step": 5503 }, { "epoch": 1.6706632265897707, "grad_norm": 0.5289431810379028, "learning_rate": 7.218791130910196e-05, "loss": 1.8209, "step": 5504 }, { "epoch": 1.6709667627864624, "grad_norm": 0.47597306966781616, "learning_rate": 7.218284904323177e-05, "loss": 1.7394, "step": 5505 }, { "epoch": 1.6712702989831536, "grad_norm": 0.6972471475601196, "learning_rate": 7.217778677736156e-05, "loss": 1.3056, "step": 5506 }, { "epoch": 1.6715738351798453, "grad_norm": 0.6936922669410706, "learning_rate": 7.217272451149136e-05, "loss": 1.311, "step": 5507 }, { "epoch": 1.6718773713765367, "grad_norm": 0.613391637802124, "learning_rate": 7.216766224562115e-05, "loss": 1.6611, "step": 5508 }, { "epoch": 1.6721809075732281, "grad_norm": 0.5067817568778992, "learning_rate": 7.216259997975094e-05, "loss": 1.534, "step": 5509 }, { "epoch": 1.6724844437699196, "grad_norm": 0.8583042025566101, "learning_rate": 7.215753771388074e-05, "loss": 1.2529, "step": 5510 }, { "epoch": 1.672787979966611, "grad_norm": 0.6113730072975159, "learning_rate": 7.215247544801053e-05, "loss": 1.3955, "step": 5511 }, { "epoch": 1.6730915161633024, "grad_norm": 0.5480425357818604, "learning_rate": 7.214741318214033e-05, "loss": 1.6759, "step": 5512 }, { "epoch": 1.6733950523599939, "grad_norm": 0.5304676294326782, "learning_rate": 7.214235091627012e-05, "loss": 1.7908, "step": 5513 }, { "epoch": 1.6736985885566855, "grad_norm": 0.5603669285774231, "learning_rate": 7.213728865039993e-05, "loss": 1.6092, "step": 5514 }, { "epoch": 1.6740021247533767, "grad_norm": 0.5751498937606812, "learning_rate": 7.213222638452973e-05, "loss": 1.3722, "step": 5515 }, { "epoch": 1.6743056609500684, "grad_norm": 0.5078593492507935, "learning_rate": 7.212716411865952e-05, "loss": 1.206, "step": 5516 }, { "epoch": 1.6746091971467596, "grad_norm": 0.5524204969406128, "learning_rate": 7.212210185278932e-05, "loss": 1.8564, "step": 5517 }, { "epoch": 1.6749127333434513, "grad_norm": 0.5641534328460693, "learning_rate": 7.211703958691911e-05, "loss": 1.6019, "step": 5518 }, { "epoch": 1.6752162695401427, "grad_norm": 0.4548323452472687, "learning_rate": 7.21119773210489e-05, "loss": 2.1299, "step": 5519 }, { "epoch": 1.6755198057368341, "grad_norm": 0.4769175052642822, "learning_rate": 7.21069150551787e-05, "loss": 1.5924, "step": 5520 }, { "epoch": 1.6758233419335256, "grad_norm": 0.4972521960735321, "learning_rate": 7.21018527893085e-05, "loss": 1.5578, "step": 5521 }, { "epoch": 1.676126878130217, "grad_norm": 0.5828529000282288, "learning_rate": 7.209679052343829e-05, "loss": 1.5206, "step": 5522 }, { "epoch": 1.6764304143269086, "grad_norm": 0.535059928894043, "learning_rate": 7.209172825756809e-05, "loss": 1.6381, "step": 5523 }, { "epoch": 1.6767339505235999, "grad_norm": 0.8154981136322021, "learning_rate": 7.20866659916979e-05, "loss": 1.498, "step": 5524 }, { "epoch": 1.6770374867202915, "grad_norm": 0.5557091236114502, "learning_rate": 7.208160372582769e-05, "loss": 1.7889, "step": 5525 }, { "epoch": 1.6773410229169827, "grad_norm": 0.5542075634002686, "learning_rate": 7.207654145995748e-05, "loss": 1.758, "step": 5526 }, { "epoch": 1.6776445591136744, "grad_norm": 0.46830353140830994, "learning_rate": 7.207147919408728e-05, "loss": 1.9262, "step": 5527 }, { "epoch": 1.6779480953103656, "grad_norm": 0.5317898988723755, "learning_rate": 7.206641692821707e-05, "loss": 1.4244, "step": 5528 }, { "epoch": 1.6782516315070573, "grad_norm": 0.4785618185997009, "learning_rate": 7.206135466234687e-05, "loss": 1.2493, "step": 5529 }, { "epoch": 1.6785551677037487, "grad_norm": 0.5754481554031372, "learning_rate": 7.205629239647666e-05, "loss": 1.9019, "step": 5530 }, { "epoch": 1.6788587039004401, "grad_norm": 0.5696677565574646, "learning_rate": 7.205123013060646e-05, "loss": 1.9587, "step": 5531 }, { "epoch": 1.6791622400971316, "grad_norm": 0.46303999423980713, "learning_rate": 7.204616786473625e-05, "loss": 1.352, "step": 5532 }, { "epoch": 1.679465776293823, "grad_norm": 0.5733750462532043, "learning_rate": 7.204110559886606e-05, "loss": 1.442, "step": 5533 }, { "epoch": 1.6797693124905146, "grad_norm": 0.5868303775787354, "learning_rate": 7.203604333299586e-05, "loss": 1.8534, "step": 5534 }, { "epoch": 1.6800728486872059, "grad_norm": 0.532902717590332, "learning_rate": 7.203098106712565e-05, "loss": 1.748, "step": 5535 }, { "epoch": 1.6803763848838975, "grad_norm": 0.6015176773071289, "learning_rate": 7.202591880125544e-05, "loss": 1.7901, "step": 5536 }, { "epoch": 1.6806799210805887, "grad_norm": 0.880872905254364, "learning_rate": 7.202085653538524e-05, "loss": 1.5535, "step": 5537 }, { "epoch": 1.6809834572772804, "grad_norm": 0.515289843082428, "learning_rate": 7.201579426951503e-05, "loss": 1.8207, "step": 5538 }, { "epoch": 1.6812869934739718, "grad_norm": 0.9315522313117981, "learning_rate": 7.201073200364483e-05, "loss": 1.9596, "step": 5539 }, { "epoch": 1.6815905296706632, "grad_norm": 0.700817883014679, "learning_rate": 7.200566973777462e-05, "loss": 1.8407, "step": 5540 }, { "epoch": 1.6818940658673547, "grad_norm": 0.5674588084220886, "learning_rate": 7.200060747190442e-05, "loss": 1.6263, "step": 5541 }, { "epoch": 1.6821976020640461, "grad_norm": 0.599992573261261, "learning_rate": 7.199554520603423e-05, "loss": 1.8701, "step": 5542 }, { "epoch": 1.6825011382607375, "grad_norm": 0.49618181586265564, "learning_rate": 7.199048294016402e-05, "loss": 1.6515, "step": 5543 }, { "epoch": 1.682804674457429, "grad_norm": 0.5886253714561462, "learning_rate": 7.198542067429382e-05, "loss": 1.5716, "step": 5544 }, { "epoch": 1.6831082106541206, "grad_norm": 0.5282119512557983, "learning_rate": 7.198035840842361e-05, "loss": 1.7686, "step": 5545 }, { "epoch": 1.6834117468508119, "grad_norm": 0.5557923913002014, "learning_rate": 7.19752961425534e-05, "loss": 1.7876, "step": 5546 }, { "epoch": 1.6837152830475035, "grad_norm": 0.5981353521347046, "learning_rate": 7.19702338766832e-05, "loss": 1.4325, "step": 5547 }, { "epoch": 1.6840188192441947, "grad_norm": 0.5523660182952881, "learning_rate": 7.1965171610813e-05, "loss": 1.6176, "step": 5548 }, { "epoch": 1.6843223554408864, "grad_norm": 0.886365532875061, "learning_rate": 7.196010934494279e-05, "loss": 1.5195, "step": 5549 }, { "epoch": 1.6846258916375778, "grad_norm": 0.5468156337738037, "learning_rate": 7.19550470790726e-05, "loss": 1.6558, "step": 5550 }, { "epoch": 1.6849294278342692, "grad_norm": 0.682375967502594, "learning_rate": 7.19499848132024e-05, "loss": 1.7567, "step": 5551 }, { "epoch": 1.6852329640309607, "grad_norm": 0.5643876791000366, "learning_rate": 7.194492254733219e-05, "loss": 1.9613, "step": 5552 }, { "epoch": 1.6855365002276521, "grad_norm": 0.5802308917045593, "learning_rate": 7.1939860281462e-05, "loss": 1.7519, "step": 5553 }, { "epoch": 1.6858400364243438, "grad_norm": 0.5967023968696594, "learning_rate": 7.193479801559179e-05, "loss": 1.024, "step": 5554 }, { "epoch": 1.686143572621035, "grad_norm": 0.5764415860176086, "learning_rate": 7.192973574972159e-05, "loss": 1.2554, "step": 5555 }, { "epoch": 1.6864471088177266, "grad_norm": 0.6134281754493713, "learning_rate": 7.192467348385138e-05, "loss": 1.884, "step": 5556 }, { "epoch": 1.6867506450144178, "grad_norm": 0.4594460427761078, "learning_rate": 7.191961121798118e-05, "loss": 1.9008, "step": 5557 }, { "epoch": 1.6870541812111095, "grad_norm": 0.5448347330093384, "learning_rate": 7.191454895211097e-05, "loss": 1.1015, "step": 5558 }, { "epoch": 1.6873577174078007, "grad_norm": 0.4324944317340851, "learning_rate": 7.190948668624077e-05, "loss": 0.9942, "step": 5559 }, { "epoch": 1.6876612536044924, "grad_norm": 0.5763707160949707, "learning_rate": 7.190442442037056e-05, "loss": 1.428, "step": 5560 }, { "epoch": 1.6879647898011838, "grad_norm": 0.5779836773872375, "learning_rate": 7.189936215450036e-05, "loss": 1.7867, "step": 5561 }, { "epoch": 1.6882683259978752, "grad_norm": 0.6972694993019104, "learning_rate": 7.189429988863015e-05, "loss": 1.7712, "step": 5562 }, { "epoch": 1.6885718621945667, "grad_norm": 1.026524305343628, "learning_rate": 7.188923762275996e-05, "loss": 1.4089, "step": 5563 }, { "epoch": 1.688875398391258, "grad_norm": 0.5667091608047485, "learning_rate": 7.188417535688975e-05, "loss": 2.1362, "step": 5564 }, { "epoch": 1.6891789345879498, "grad_norm": 0.48868703842163086, "learning_rate": 7.187911309101955e-05, "loss": 1.493, "step": 5565 }, { "epoch": 1.689482470784641, "grad_norm": 0.43740716576576233, "learning_rate": 7.187405082514934e-05, "loss": 1.3792, "step": 5566 }, { "epoch": 1.6897860069813326, "grad_norm": 0.49257686734199524, "learning_rate": 7.186898855927914e-05, "loss": 2.3111, "step": 5567 }, { "epoch": 1.6900895431780238, "grad_norm": 0.6309003233909607, "learning_rate": 7.186392629340893e-05, "loss": 1.7269, "step": 5568 }, { "epoch": 1.6903930793747155, "grad_norm": 0.5394817590713501, "learning_rate": 7.185886402753873e-05, "loss": 1.35, "step": 5569 }, { "epoch": 1.690696615571407, "grad_norm": 0.5133473873138428, "learning_rate": 7.185380176166852e-05, "loss": 1.1287, "step": 5570 }, { "epoch": 1.6910001517680984, "grad_norm": 0.5639081597328186, "learning_rate": 7.184873949579832e-05, "loss": 1.3801, "step": 5571 }, { "epoch": 1.6913036879647898, "grad_norm": 0.5387223362922668, "learning_rate": 7.184367722992813e-05, "loss": 1.5725, "step": 5572 }, { "epoch": 1.6916072241614812, "grad_norm": 0.4873654246330261, "learning_rate": 7.183861496405792e-05, "loss": 1.8034, "step": 5573 }, { "epoch": 1.6919107603581727, "grad_norm": 0.5473541617393494, "learning_rate": 7.183355269818771e-05, "loss": 1.6244, "step": 5574 }, { "epoch": 1.692214296554864, "grad_norm": 0.5153944492340088, "learning_rate": 7.182849043231751e-05, "loss": 1.8091, "step": 5575 }, { "epoch": 1.6925178327515558, "grad_norm": 0.5295174717903137, "learning_rate": 7.18234281664473e-05, "loss": 1.8437, "step": 5576 }, { "epoch": 1.692821368948247, "grad_norm": 0.46947070956230164, "learning_rate": 7.18183659005771e-05, "loss": 1.0215, "step": 5577 }, { "epoch": 1.6931249051449386, "grad_norm": 0.5614966154098511, "learning_rate": 7.18133036347069e-05, "loss": 1.645, "step": 5578 }, { "epoch": 1.6934284413416298, "grad_norm": 0.5633820295333862, "learning_rate": 7.180824136883669e-05, "loss": 1.8471, "step": 5579 }, { "epoch": 1.6937319775383215, "grad_norm": 0.5536201000213623, "learning_rate": 7.180317910296648e-05, "loss": 1.539, "step": 5580 }, { "epoch": 1.694035513735013, "grad_norm": 0.5490652322769165, "learning_rate": 7.179811683709629e-05, "loss": 1.2846, "step": 5581 }, { "epoch": 1.6943390499317044, "grad_norm": 0.47489118576049805, "learning_rate": 7.179305457122609e-05, "loss": 1.6611, "step": 5582 }, { "epoch": 1.6946425861283958, "grad_norm": 0.5121912956237793, "learning_rate": 7.178799230535588e-05, "loss": 1.8077, "step": 5583 }, { "epoch": 1.6949461223250872, "grad_norm": 0.5762444138526917, "learning_rate": 7.178293003948568e-05, "loss": 1.6236, "step": 5584 }, { "epoch": 1.6952496585217787, "grad_norm": 0.5737335085868835, "learning_rate": 7.177786777361547e-05, "loss": 1.6909, "step": 5585 }, { "epoch": 1.69555319471847, "grad_norm": 0.573403537273407, "learning_rate": 7.177280550774527e-05, "loss": 1.4276, "step": 5586 }, { "epoch": 1.6958567309151618, "grad_norm": 0.6170399188995361, "learning_rate": 7.176774324187506e-05, "loss": 1.5555, "step": 5587 }, { "epoch": 1.696160267111853, "grad_norm": 0.5213575959205627, "learning_rate": 7.176268097600486e-05, "loss": 1.7751, "step": 5588 }, { "epoch": 1.6964638033085446, "grad_norm": 0.501232385635376, "learning_rate": 7.175761871013465e-05, "loss": 1.5369, "step": 5589 }, { "epoch": 1.6967673395052358, "grad_norm": 0.6428576707839966, "learning_rate": 7.175255644426445e-05, "loss": 1.349, "step": 5590 }, { "epoch": 1.6970708757019275, "grad_norm": 0.5654997825622559, "learning_rate": 7.174749417839425e-05, "loss": 1.6381, "step": 5591 }, { "epoch": 1.697374411898619, "grad_norm": 0.5747986435890198, "learning_rate": 7.174243191252405e-05, "loss": 1.6836, "step": 5592 }, { "epoch": 1.6976779480953104, "grad_norm": 0.48611271381378174, "learning_rate": 7.173736964665384e-05, "loss": 1.8771, "step": 5593 }, { "epoch": 1.6979814842920018, "grad_norm": 0.6112745404243469, "learning_rate": 7.173230738078365e-05, "loss": 1.4543, "step": 5594 }, { "epoch": 1.6982850204886932, "grad_norm": 0.5568521022796631, "learning_rate": 7.172724511491345e-05, "loss": 1.2674, "step": 5595 }, { "epoch": 1.6985885566853849, "grad_norm": 0.9814030528068542, "learning_rate": 7.172218284904324e-05, "loss": 1.418, "step": 5596 }, { "epoch": 1.698892092882076, "grad_norm": 0.5787724852561951, "learning_rate": 7.171712058317304e-05, "loss": 1.8167, "step": 5597 }, { "epoch": 1.6991956290787678, "grad_norm": 0.527346670627594, "learning_rate": 7.171205831730283e-05, "loss": 1.9819, "step": 5598 }, { "epoch": 1.699499165275459, "grad_norm": 0.5274780988693237, "learning_rate": 7.170699605143263e-05, "loss": 2.0423, "step": 5599 }, { "epoch": 1.6998027014721506, "grad_norm": 0.6172202825546265, "learning_rate": 7.170193378556242e-05, "loss": 1.5017, "step": 5600 }, { "epoch": 1.700106237668842, "grad_norm": 0.6280341148376465, "learning_rate": 7.169687151969221e-05, "loss": 1.8298, "step": 5601 }, { "epoch": 1.7004097738655335, "grad_norm": 0.5946933627128601, "learning_rate": 7.169180925382202e-05, "loss": 1.5956, "step": 5602 }, { "epoch": 1.700713310062225, "grad_norm": 0.5152033567428589, "learning_rate": 7.168674698795182e-05, "loss": 1.9501, "step": 5603 }, { "epoch": 1.7010168462589164, "grad_norm": 0.7176896333694458, "learning_rate": 7.168168472208161e-05, "loss": 1.8614, "step": 5604 }, { "epoch": 1.7013203824556078, "grad_norm": 0.5222963690757751, "learning_rate": 7.167662245621141e-05, "loss": 1.8237, "step": 5605 }, { "epoch": 1.7016239186522992, "grad_norm": 0.7052047848701477, "learning_rate": 7.16715601903412e-05, "loss": 1.4014, "step": 5606 }, { "epoch": 1.7019274548489909, "grad_norm": 0.5208356380462646, "learning_rate": 7.1666497924471e-05, "loss": 1.4113, "step": 5607 }, { "epoch": 1.702230991045682, "grad_norm": 0.6521908044815063, "learning_rate": 7.166143565860079e-05, "loss": 1.6173, "step": 5608 }, { "epoch": 1.7025345272423738, "grad_norm": 0.5605899691581726, "learning_rate": 7.165637339273059e-05, "loss": 1.4772, "step": 5609 }, { "epoch": 1.702838063439065, "grad_norm": 0.45523953437805176, "learning_rate": 7.165131112686038e-05, "loss": 1.5805, "step": 5610 }, { "epoch": 1.7031415996357566, "grad_norm": 0.8355624675750732, "learning_rate": 7.164624886099019e-05, "loss": 1.7984, "step": 5611 }, { "epoch": 1.703445135832448, "grad_norm": 0.4628305733203888, "learning_rate": 7.164118659511998e-05, "loss": 1.4007, "step": 5612 }, { "epoch": 1.7037486720291395, "grad_norm": 0.47250911593437195, "learning_rate": 7.163612432924978e-05, "loss": 1.836, "step": 5613 }, { "epoch": 1.704052208225831, "grad_norm": 0.43420594930648804, "learning_rate": 7.163106206337957e-05, "loss": 1.3316, "step": 5614 }, { "epoch": 1.7043557444225224, "grad_norm": 0.5203584432601929, "learning_rate": 7.162599979750937e-05, "loss": 2.0178, "step": 5615 }, { "epoch": 1.7046592806192138, "grad_norm": 0.6097245216369629, "learning_rate": 7.162093753163916e-05, "loss": 1.3596, "step": 5616 }, { "epoch": 1.7049628168159052, "grad_norm": 0.6182841062545776, "learning_rate": 7.161587526576896e-05, "loss": 1.5702, "step": 5617 }, { "epoch": 1.7052663530125969, "grad_norm": 0.5049747228622437, "learning_rate": 7.161081299989875e-05, "loss": 1.7493, "step": 5618 }, { "epoch": 1.705569889209288, "grad_norm": 0.5657998323440552, "learning_rate": 7.160575073402855e-05, "loss": 1.8784, "step": 5619 }, { "epoch": 1.7058734254059797, "grad_norm": 0.6417815089225769, "learning_rate": 7.160068846815836e-05, "loss": 1.5989, "step": 5620 }, { "epoch": 1.706176961602671, "grad_norm": 0.5697042942047119, "learning_rate": 7.159562620228815e-05, "loss": 1.7344, "step": 5621 }, { "epoch": 1.7064804977993626, "grad_norm": 0.6542501449584961, "learning_rate": 7.159056393641795e-05, "loss": 1.7907, "step": 5622 }, { "epoch": 1.706784033996054, "grad_norm": 0.5756997466087341, "learning_rate": 7.158550167054774e-05, "loss": 1.7232, "step": 5623 }, { "epoch": 1.7070875701927455, "grad_norm": 0.43723130226135254, "learning_rate": 7.158043940467754e-05, "loss": 1.6653, "step": 5624 }, { "epoch": 1.707391106389437, "grad_norm": 0.5785560011863708, "learning_rate": 7.157537713880733e-05, "loss": 1.3398, "step": 5625 }, { "epoch": 1.7076946425861284, "grad_norm": 0.7036682367324829, "learning_rate": 7.157031487293713e-05, "loss": 1.7571, "step": 5626 }, { "epoch": 1.70799817878282, "grad_norm": 0.9839090704917908, "learning_rate": 7.156525260706692e-05, "loss": 1.5356, "step": 5627 }, { "epoch": 1.7083017149795112, "grad_norm": 0.4686150550842285, "learning_rate": 7.156019034119672e-05, "loss": 1.7265, "step": 5628 }, { "epoch": 1.7086052511762029, "grad_norm": 0.5854867100715637, "learning_rate": 7.155512807532651e-05, "loss": 1.9357, "step": 5629 }, { "epoch": 1.708908787372894, "grad_norm": 0.5611643195152283, "learning_rate": 7.155006580945632e-05, "loss": 1.416, "step": 5630 }, { "epoch": 1.7092123235695857, "grad_norm": 0.5986021161079407, "learning_rate": 7.154500354358611e-05, "loss": 1.5378, "step": 5631 }, { "epoch": 1.7095158597662772, "grad_norm": 0.5221402049064636, "learning_rate": 7.153994127771591e-05, "loss": 1.6811, "step": 5632 }, { "epoch": 1.7098193959629686, "grad_norm": 0.5451841950416565, "learning_rate": 7.15348790118457e-05, "loss": 1.7699, "step": 5633 }, { "epoch": 1.71012293215966, "grad_norm": 0.44219285249710083, "learning_rate": 7.15298167459755e-05, "loss": 1.3474, "step": 5634 }, { "epoch": 1.7104264683563515, "grad_norm": 0.5203813910484314, "learning_rate": 7.152475448010529e-05, "loss": 1.3492, "step": 5635 }, { "epoch": 1.710730004553043, "grad_norm": 0.538673460483551, "learning_rate": 7.151969221423509e-05, "loss": 1.7115, "step": 5636 }, { "epoch": 1.7110335407497343, "grad_norm": 0.4732709527015686, "learning_rate": 7.151462994836488e-05, "loss": 1.8847, "step": 5637 }, { "epoch": 1.711337076946426, "grad_norm": 0.619422435760498, "learning_rate": 7.150956768249468e-05, "loss": 1.7269, "step": 5638 }, { "epoch": 1.7116406131431172, "grad_norm": 0.38589712977409363, "learning_rate": 7.150450541662448e-05, "loss": 1.6737, "step": 5639 }, { "epoch": 1.7119441493398089, "grad_norm": 0.514140784740448, "learning_rate": 7.149944315075428e-05, "loss": 1.8193, "step": 5640 }, { "epoch": 1.7122476855365, "grad_norm": 4.0823493003845215, "learning_rate": 7.149438088488409e-05, "loss": 1.8681, "step": 5641 }, { "epoch": 1.7125512217331917, "grad_norm": 0.6078541874885559, "learning_rate": 7.148931861901388e-05, "loss": 1.7254, "step": 5642 }, { "epoch": 1.7128547579298832, "grad_norm": 0.5429568886756897, "learning_rate": 7.148425635314368e-05, "loss": 1.7894, "step": 5643 }, { "epoch": 1.7131582941265746, "grad_norm": 0.4650183320045471, "learning_rate": 7.147919408727347e-05, "loss": 1.4881, "step": 5644 }, { "epoch": 1.713461830323266, "grad_norm": 0.5098140835762024, "learning_rate": 7.147413182140327e-05, "loss": 1.7343, "step": 5645 }, { "epoch": 1.7137653665199575, "grad_norm": 0.5701392889022827, "learning_rate": 7.146906955553306e-05, "loss": 1.6419, "step": 5646 }, { "epoch": 1.714068902716649, "grad_norm": 0.6229302883148193, "learning_rate": 7.146400728966286e-05, "loss": 1.4583, "step": 5647 }, { "epoch": 1.7143724389133403, "grad_norm": 0.6150268912315369, "learning_rate": 7.145894502379265e-05, "loss": 1.4473, "step": 5648 }, { "epoch": 1.714675975110032, "grad_norm": 0.5583786964416504, "learning_rate": 7.145388275792245e-05, "loss": 1.7754, "step": 5649 }, { "epoch": 1.7149795113067232, "grad_norm": 0.4834759831428528, "learning_rate": 7.144882049205225e-05, "loss": 1.1987, "step": 5650 }, { "epoch": 1.7152830475034149, "grad_norm": 0.6216395497322083, "learning_rate": 7.144375822618205e-05, "loss": 2.0237, "step": 5651 }, { "epoch": 1.715586583700106, "grad_norm": 0.4688428044319153, "learning_rate": 7.143869596031184e-05, "loss": 0.7604, "step": 5652 }, { "epoch": 1.7158901198967977, "grad_norm": 0.45872947573661804, "learning_rate": 7.143363369444164e-05, "loss": 1.4403, "step": 5653 }, { "epoch": 1.7161936560934892, "grad_norm": 0.49328091740608215, "learning_rate": 7.142857142857143e-05, "loss": 1.4274, "step": 5654 }, { "epoch": 1.7164971922901806, "grad_norm": 0.5940247178077698, "learning_rate": 7.142350916270123e-05, "loss": 1.6925, "step": 5655 }, { "epoch": 1.716800728486872, "grad_norm": 0.4424358308315277, "learning_rate": 7.141844689683102e-05, "loss": 1.5618, "step": 5656 }, { "epoch": 1.7171042646835635, "grad_norm": 0.4610697329044342, "learning_rate": 7.141338463096082e-05, "loss": 1.2538, "step": 5657 }, { "epoch": 1.7174078008802551, "grad_norm": 0.5324227809906006, "learning_rate": 7.140832236509061e-05, "loss": 1.4877, "step": 5658 }, { "epoch": 1.7177113370769463, "grad_norm": 0.5017738938331604, "learning_rate": 7.140326009922042e-05, "loss": 1.7222, "step": 5659 }, { "epoch": 1.718014873273638, "grad_norm": 0.5225081443786621, "learning_rate": 7.139819783335022e-05, "loss": 1.6279, "step": 5660 }, { "epoch": 1.7183184094703292, "grad_norm": 0.49963754415512085, "learning_rate": 7.139313556748001e-05, "loss": 1.6897, "step": 5661 }, { "epoch": 1.7186219456670209, "grad_norm": 0.5684502124786377, "learning_rate": 7.13880733016098e-05, "loss": 1.9999, "step": 5662 }, { "epoch": 1.7189254818637123, "grad_norm": 0.560808002948761, "learning_rate": 7.13830110357396e-05, "loss": 1.7136, "step": 5663 }, { "epoch": 1.7192290180604037, "grad_norm": 0.6076765656471252, "learning_rate": 7.13779487698694e-05, "loss": 1.5934, "step": 5664 }, { "epoch": 1.7195325542570952, "grad_norm": 0.47294458746910095, "learning_rate": 7.137288650399919e-05, "loss": 1.3456, "step": 5665 }, { "epoch": 1.7198360904537866, "grad_norm": 0.7062551975250244, "learning_rate": 7.136782423812899e-05, "loss": 1.3726, "step": 5666 }, { "epoch": 1.720139626650478, "grad_norm": 0.6528676152229309, "learning_rate": 7.136276197225878e-05, "loss": 1.3429, "step": 5667 }, { "epoch": 1.7204431628471695, "grad_norm": 0.6872678995132446, "learning_rate": 7.135769970638857e-05, "loss": 1.1651, "step": 5668 }, { "epoch": 1.7207466990438611, "grad_norm": 0.8360339403152466, "learning_rate": 7.135263744051838e-05, "loss": 1.7624, "step": 5669 }, { "epoch": 1.7210502352405523, "grad_norm": 0.45598936080932617, "learning_rate": 7.134757517464818e-05, "loss": 1.818, "step": 5670 }, { "epoch": 1.721353771437244, "grad_norm": 0.5153748989105225, "learning_rate": 7.134251290877797e-05, "loss": 1.8247, "step": 5671 }, { "epoch": 1.7216573076339352, "grad_norm": 0.5611364245414734, "learning_rate": 7.133745064290777e-05, "loss": 1.4998, "step": 5672 }, { "epoch": 1.7219608438306269, "grad_norm": 0.6226168274879456, "learning_rate": 7.133238837703756e-05, "loss": 1.7166, "step": 5673 }, { "epoch": 1.7222643800273183, "grad_norm": 0.5680972933769226, "learning_rate": 7.132732611116736e-05, "loss": 0.956, "step": 5674 }, { "epoch": 1.7225679162240097, "grad_norm": 0.522735595703125, "learning_rate": 7.132226384529715e-05, "loss": 1.7789, "step": 5675 }, { "epoch": 1.7228714524207012, "grad_norm": 0.39815565943717957, "learning_rate": 7.131720157942695e-05, "loss": 1.639, "step": 5676 }, { "epoch": 1.7231749886173926, "grad_norm": 0.5538575053215027, "learning_rate": 7.131213931355674e-05, "loss": 1.7431, "step": 5677 }, { "epoch": 1.723478524814084, "grad_norm": 0.6126648783683777, "learning_rate": 7.130707704768655e-05, "loss": 1.571, "step": 5678 }, { "epoch": 1.7237820610107755, "grad_norm": 0.6345686316490173, "learning_rate": 7.130201478181634e-05, "loss": 1.6244, "step": 5679 }, { "epoch": 1.7240855972074671, "grad_norm": 0.5709595084190369, "learning_rate": 7.129695251594614e-05, "loss": 1.6958, "step": 5680 }, { "epoch": 1.7243891334041583, "grad_norm": 0.5866050124168396, "learning_rate": 7.129189025007593e-05, "loss": 1.5665, "step": 5681 }, { "epoch": 1.72469266960085, "grad_norm": 0.5642903447151184, "learning_rate": 7.128682798420573e-05, "loss": 1.6998, "step": 5682 }, { "epoch": 1.7249962057975412, "grad_norm": 0.4493815302848816, "learning_rate": 7.128176571833554e-05, "loss": 1.916, "step": 5683 }, { "epoch": 1.7252997419942329, "grad_norm": 0.5203521251678467, "learning_rate": 7.127670345246533e-05, "loss": 1.4566, "step": 5684 }, { "epoch": 1.7256032781909243, "grad_norm": 0.42749977111816406, "learning_rate": 7.127164118659513e-05, "loss": 1.138, "step": 5685 }, { "epoch": 1.7259068143876157, "grad_norm": 0.535605788230896, "learning_rate": 7.126657892072492e-05, "loss": 1.449, "step": 5686 }, { "epoch": 1.7262103505843072, "grad_norm": 0.5324421525001526, "learning_rate": 7.126151665485472e-05, "loss": 1.6838, "step": 5687 }, { "epoch": 1.7265138867809986, "grad_norm": 0.48239511251449585, "learning_rate": 7.125645438898451e-05, "loss": 1.656, "step": 5688 }, { "epoch": 1.7268174229776903, "grad_norm": 0.44394174218177795, "learning_rate": 7.125139212311432e-05, "loss": 1.2908, "step": 5689 }, { "epoch": 1.7271209591743815, "grad_norm": 0.6110196709632874, "learning_rate": 7.124632985724411e-05, "loss": 1.0364, "step": 5690 }, { "epoch": 1.7274244953710731, "grad_norm": 0.4668317139148712, "learning_rate": 7.124126759137391e-05, "loss": 2.1075, "step": 5691 }, { "epoch": 1.7277280315677643, "grad_norm": 0.79306960105896, "learning_rate": 7.12362053255037e-05, "loss": 1.1159, "step": 5692 }, { "epoch": 1.728031567764456, "grad_norm": 0.7084735631942749, "learning_rate": 7.12311430596335e-05, "loss": 1.9203, "step": 5693 }, { "epoch": 1.7283351039611472, "grad_norm": 0.5400936007499695, "learning_rate": 7.12260807937633e-05, "loss": 1.6333, "step": 5694 }, { "epoch": 1.7286386401578389, "grad_norm": 0.5740994215011597, "learning_rate": 7.122101852789309e-05, "loss": 1.7508, "step": 5695 }, { "epoch": 1.7289421763545303, "grad_norm": 0.7395409345626831, "learning_rate": 7.121595626202288e-05, "loss": 1.5679, "step": 5696 }, { "epoch": 1.7292457125512217, "grad_norm": 0.5085437893867493, "learning_rate": 7.121089399615268e-05, "loss": 1.6915, "step": 5697 }, { "epoch": 1.7295492487479132, "grad_norm": 0.5388838648796082, "learning_rate": 7.120583173028249e-05, "loss": 1.3938, "step": 5698 }, { "epoch": 1.7298527849446046, "grad_norm": 0.46243810653686523, "learning_rate": 7.120076946441228e-05, "loss": 1.3695, "step": 5699 }, { "epoch": 1.7301563211412962, "grad_norm": 0.5669201612472534, "learning_rate": 7.119570719854208e-05, "loss": 1.7704, "step": 5700 }, { "epoch": 1.7304598573379875, "grad_norm": 0.5119979977607727, "learning_rate": 7.119064493267187e-05, "loss": 1.6959, "step": 5701 }, { "epoch": 1.7307633935346791, "grad_norm": 0.6088730096817017, "learning_rate": 7.118558266680167e-05, "loss": 1.6063, "step": 5702 }, { "epoch": 1.7310669297313703, "grad_norm": 0.510960578918457, "learning_rate": 7.118052040093146e-05, "loss": 1.7251, "step": 5703 }, { "epoch": 1.731370465928062, "grad_norm": 0.6074891090393066, "learning_rate": 7.117545813506125e-05, "loss": 1.8498, "step": 5704 }, { "epoch": 1.7316740021247534, "grad_norm": 0.6876358389854431, "learning_rate": 7.117039586919105e-05, "loss": 1.3767, "step": 5705 }, { "epoch": 1.7319775383214449, "grad_norm": 0.4814869165420532, "learning_rate": 7.116533360332084e-05, "loss": 2.0438, "step": 5706 }, { "epoch": 1.7322810745181363, "grad_norm": 0.5223357081413269, "learning_rate": 7.116027133745064e-05, "loss": 1.434, "step": 5707 }, { "epoch": 1.7325846107148277, "grad_norm": 0.4965895116329193, "learning_rate": 7.115520907158045e-05, "loss": 1.5875, "step": 5708 }, { "epoch": 1.7328881469115192, "grad_norm": 0.5907561182975769, "learning_rate": 7.115014680571024e-05, "loss": 1.0798, "step": 5709 }, { "epoch": 1.7331916831082106, "grad_norm": 0.5148065090179443, "learning_rate": 7.114508453984004e-05, "loss": 1.3992, "step": 5710 }, { "epoch": 1.7334952193049022, "grad_norm": 0.5626041293144226, "learning_rate": 7.114002227396983e-05, "loss": 1.6685, "step": 5711 }, { "epoch": 1.7337987555015935, "grad_norm": 0.5806806087493896, "learning_rate": 7.113496000809963e-05, "loss": 1.3504, "step": 5712 }, { "epoch": 1.7341022916982851, "grad_norm": 0.5926372408866882, "learning_rate": 7.112989774222942e-05, "loss": 1.9902, "step": 5713 }, { "epoch": 1.7344058278949763, "grad_norm": 0.5233840346336365, "learning_rate": 7.112483547635922e-05, "loss": 1.7358, "step": 5714 }, { "epoch": 1.734709364091668, "grad_norm": 0.5744931101799011, "learning_rate": 7.111977321048901e-05, "loss": 1.7516, "step": 5715 }, { "epoch": 1.7350129002883594, "grad_norm": 0.9743430018424988, "learning_rate": 7.11147109446188e-05, "loss": 1.3766, "step": 5716 }, { "epoch": 1.7353164364850509, "grad_norm": 0.6813448667526245, "learning_rate": 7.110964867874861e-05, "loss": 1.5492, "step": 5717 }, { "epoch": 1.7356199726817423, "grad_norm": 0.5910537838935852, "learning_rate": 7.110458641287841e-05, "loss": 1.5894, "step": 5718 }, { "epoch": 1.7359235088784337, "grad_norm": 0.4935073256492615, "learning_rate": 7.10995241470082e-05, "loss": 1.3373, "step": 5719 }, { "epoch": 1.7362270450751254, "grad_norm": 0.596523642539978, "learning_rate": 7.1094461881138e-05, "loss": 1.6602, "step": 5720 }, { "epoch": 1.7365305812718166, "grad_norm": 0.5797901153564453, "learning_rate": 7.10893996152678e-05, "loss": 1.4616, "step": 5721 }, { "epoch": 1.7368341174685082, "grad_norm": 0.557640016078949, "learning_rate": 7.108433734939759e-05, "loss": 1.5423, "step": 5722 }, { "epoch": 1.7371376536651995, "grad_norm": 1.0686063766479492, "learning_rate": 7.107927508352738e-05, "loss": 1.7067, "step": 5723 }, { "epoch": 1.737441189861891, "grad_norm": 0.6175087690353394, "learning_rate": 7.107421281765718e-05, "loss": 1.2277, "step": 5724 }, { "epoch": 1.7377447260585823, "grad_norm": 0.5334296226501465, "learning_rate": 7.106915055178697e-05, "loss": 1.4583, "step": 5725 }, { "epoch": 1.738048262255274, "grad_norm": 0.5804897546768188, "learning_rate": 7.106408828591678e-05, "loss": 1.8278, "step": 5726 }, { "epoch": 1.7383517984519654, "grad_norm": 0.5825898051261902, "learning_rate": 7.105902602004658e-05, "loss": 1.3466, "step": 5727 }, { "epoch": 1.7386553346486568, "grad_norm": 0.5392926335334778, "learning_rate": 7.105396375417638e-05, "loss": 1.6765, "step": 5728 }, { "epoch": 1.7389588708453483, "grad_norm": 0.5440019965171814, "learning_rate": 7.104890148830618e-05, "loss": 1.5281, "step": 5729 }, { "epoch": 1.7392624070420397, "grad_norm": 0.4941781461238861, "learning_rate": 7.104383922243597e-05, "loss": 1.2593, "step": 5730 }, { "epoch": 1.7395659432387314, "grad_norm": 0.37533116340637207, "learning_rate": 7.103877695656577e-05, "loss": 1.8643, "step": 5731 }, { "epoch": 1.7398694794354226, "grad_norm": 1.0915099382400513, "learning_rate": 7.103371469069556e-05, "loss": 1.2421, "step": 5732 }, { "epoch": 1.7401730156321142, "grad_norm": 0.557999312877655, "learning_rate": 7.102865242482536e-05, "loss": 1.6621, "step": 5733 }, { "epoch": 1.7404765518288055, "grad_norm": 0.4855442941188812, "learning_rate": 7.102359015895515e-05, "loss": 1.1559, "step": 5734 }, { "epoch": 1.740780088025497, "grad_norm": 0.6112437844276428, "learning_rate": 7.101852789308495e-05, "loss": 1.8523, "step": 5735 }, { "epoch": 1.7410836242221885, "grad_norm": 0.5963307023048401, "learning_rate": 7.101346562721474e-05, "loss": 1.3682, "step": 5736 }, { "epoch": 1.74138716041888, "grad_norm": 0.5917012095451355, "learning_rate": 7.100840336134455e-05, "loss": 1.8957, "step": 5737 }, { "epoch": 1.7416906966155714, "grad_norm": 0.5097271800041199, "learning_rate": 7.100334109547435e-05, "loss": 1.658, "step": 5738 }, { "epoch": 1.7419942328122628, "grad_norm": 0.6804052591323853, "learning_rate": 7.099827882960414e-05, "loss": 0.9973, "step": 5739 }, { "epoch": 1.7422977690089543, "grad_norm": 0.700357973575592, "learning_rate": 7.099321656373394e-05, "loss": 1.5626, "step": 5740 }, { "epoch": 1.7426013052056457, "grad_norm": 0.7932068109512329, "learning_rate": 7.098815429786373e-05, "loss": 2.051, "step": 5741 }, { "epoch": 1.7429048414023374, "grad_norm": 0.5094744563102722, "learning_rate": 7.098309203199352e-05, "loss": 1.4574, "step": 5742 }, { "epoch": 1.7432083775990286, "grad_norm": 0.7267150282859802, "learning_rate": 7.097802976612332e-05, "loss": 1.262, "step": 5743 }, { "epoch": 1.7435119137957202, "grad_norm": 0.6010065674781799, "learning_rate": 7.097296750025311e-05, "loss": 1.4556, "step": 5744 }, { "epoch": 1.7438154499924114, "grad_norm": 0.6587292551994324, "learning_rate": 7.096790523438291e-05, "loss": 1.6217, "step": 5745 }, { "epoch": 1.744118986189103, "grad_norm": 0.7218916416168213, "learning_rate": 7.09628429685127e-05, "loss": 1.8298, "step": 5746 }, { "epoch": 1.7444225223857945, "grad_norm": 0.6167722344398499, "learning_rate": 7.095778070264251e-05, "loss": 1.5336, "step": 5747 }, { "epoch": 1.744726058582486, "grad_norm": 0.5541715025901794, "learning_rate": 7.095271843677231e-05, "loss": 1.7352, "step": 5748 }, { "epoch": 1.7450295947791774, "grad_norm": 0.5876067280769348, "learning_rate": 7.09476561709021e-05, "loss": 2.0744, "step": 5749 }, { "epoch": 1.7453331309758688, "grad_norm": 0.5202496647834778, "learning_rate": 7.09425939050319e-05, "loss": 1.6327, "step": 5750 }, { "epoch": 1.7456366671725603, "grad_norm": 0.5222525596618652, "learning_rate": 7.093753163916169e-05, "loss": 1.8234, "step": 5751 }, { "epoch": 1.7459402033692517, "grad_norm": 0.6135051846504211, "learning_rate": 7.093246937329149e-05, "loss": 1.5676, "step": 5752 }, { "epoch": 1.7462437395659434, "grad_norm": 0.6794110536575317, "learning_rate": 7.092740710742128e-05, "loss": 1.9735, "step": 5753 }, { "epoch": 1.7465472757626346, "grad_norm": 0.561379611492157, "learning_rate": 7.092234484155108e-05, "loss": 1.4065, "step": 5754 }, { "epoch": 1.7468508119593262, "grad_norm": 0.4962792992591858, "learning_rate": 7.091728257568087e-05, "loss": 1.3181, "step": 5755 }, { "epoch": 1.7471543481560174, "grad_norm": 0.5877513289451599, "learning_rate": 7.091222030981068e-05, "loss": 1.7103, "step": 5756 }, { "epoch": 1.747457884352709, "grad_norm": 0.5663416981697083, "learning_rate": 7.090715804394047e-05, "loss": 1.1936, "step": 5757 }, { "epoch": 1.7477614205494005, "grad_norm": 0.5573774576187134, "learning_rate": 7.090209577807027e-05, "loss": 1.7181, "step": 5758 }, { "epoch": 1.748064956746092, "grad_norm": 0.8682475686073303, "learning_rate": 7.089703351220006e-05, "loss": 1.8022, "step": 5759 }, { "epoch": 1.7483684929427834, "grad_norm": 0.444024920463562, "learning_rate": 7.089197124632986e-05, "loss": 1.9491, "step": 5760 }, { "epoch": 1.7486720291394748, "grad_norm": 0.5527752041816711, "learning_rate": 7.088690898045965e-05, "loss": 1.2612, "step": 5761 }, { "epoch": 1.7489755653361665, "grad_norm": 0.514806866645813, "learning_rate": 7.088184671458945e-05, "loss": 2.1253, "step": 5762 }, { "epoch": 1.7492791015328577, "grad_norm": 0.5180836319923401, "learning_rate": 7.087678444871924e-05, "loss": 1.4736, "step": 5763 }, { "epoch": 1.7495826377295494, "grad_norm": 0.7507419586181641, "learning_rate": 7.087172218284904e-05, "loss": 1.1092, "step": 5764 }, { "epoch": 1.7498861739262406, "grad_norm": 0.6647083163261414, "learning_rate": 7.086665991697885e-05, "loss": 1.6887, "step": 5765 }, { "epoch": 1.7501897101229322, "grad_norm": 0.5800061225891113, "learning_rate": 7.086159765110864e-05, "loss": 1.7121, "step": 5766 }, { "epoch": 1.7504932463196237, "grad_norm": 0.5175750255584717, "learning_rate": 7.085653538523844e-05, "loss": 1.373, "step": 5767 }, { "epoch": 1.750796782516315, "grad_norm": 0.4834209978580475, "learning_rate": 7.085147311936823e-05, "loss": 1.2436, "step": 5768 }, { "epoch": 1.7511003187130065, "grad_norm": 0.4929172396659851, "learning_rate": 7.084641085349803e-05, "loss": 1.1829, "step": 5769 }, { "epoch": 1.751403854909698, "grad_norm": 0.5788953900337219, "learning_rate": 7.084134858762782e-05, "loss": 1.7369, "step": 5770 }, { "epoch": 1.7517073911063894, "grad_norm": 0.65016108751297, "learning_rate": 7.083628632175761e-05, "loss": 1.3848, "step": 5771 }, { "epoch": 1.7520109273030808, "grad_norm": 0.6161928772926331, "learning_rate": 7.083122405588742e-05, "loss": 1.5332, "step": 5772 }, { "epoch": 1.7523144634997725, "grad_norm": 0.6327143907546997, "learning_rate": 7.082616179001722e-05, "loss": 1.742, "step": 5773 }, { "epoch": 1.7526179996964637, "grad_norm": 0.5929992198944092, "learning_rate": 7.082109952414701e-05, "loss": 1.6308, "step": 5774 }, { "epoch": 1.7529215358931554, "grad_norm": 0.5704889893531799, "learning_rate": 7.081603725827681e-05, "loss": 1.4623, "step": 5775 }, { "epoch": 1.7532250720898466, "grad_norm": 0.47019699215888977, "learning_rate": 7.081097499240662e-05, "loss": 1.4923, "step": 5776 }, { "epoch": 1.7535286082865382, "grad_norm": 0.5670586824417114, "learning_rate": 7.080591272653641e-05, "loss": 1.9408, "step": 5777 }, { "epoch": 1.7538321444832297, "grad_norm": 0.6063165664672852, "learning_rate": 7.08008504606662e-05, "loss": 0.6937, "step": 5778 }, { "epoch": 1.754135680679921, "grad_norm": 0.6302130222320557, "learning_rate": 7.0795788194796e-05, "loss": 1.7437, "step": 5779 }, { "epoch": 1.7544392168766125, "grad_norm": 0.5551527738571167, "learning_rate": 7.07907259289258e-05, "loss": 1.8204, "step": 5780 }, { "epoch": 1.754742753073304, "grad_norm": 0.5558610558509827, "learning_rate": 7.078566366305559e-05, "loss": 1.2973, "step": 5781 }, { "epoch": 1.7550462892699954, "grad_norm": 0.6446595788002014, "learning_rate": 7.078060139718538e-05, "loss": 1.2467, "step": 5782 }, { "epoch": 1.7553498254666868, "grad_norm": 0.5184798836708069, "learning_rate": 7.077553913131518e-05, "loss": 1.7002, "step": 5783 }, { "epoch": 1.7556533616633785, "grad_norm": 0.7394595742225647, "learning_rate": 7.077047686544497e-05, "loss": 1.8713, "step": 5784 }, { "epoch": 1.7559568978600697, "grad_norm": 0.5985365509986877, "learning_rate": 7.076541459957477e-05, "loss": 1.8934, "step": 5785 }, { "epoch": 1.7562604340567614, "grad_norm": 0.5682666897773743, "learning_rate": 7.076035233370458e-05, "loss": 1.9157, "step": 5786 }, { "epoch": 1.7565639702534526, "grad_norm": 0.6347185373306274, "learning_rate": 7.075529006783437e-05, "loss": 1.7947, "step": 5787 }, { "epoch": 1.7568675064501442, "grad_norm": 0.7159053683280945, "learning_rate": 7.075022780196417e-05, "loss": 1.6663, "step": 5788 }, { "epoch": 1.7571710426468357, "grad_norm": 0.5242490172386169, "learning_rate": 7.074516553609396e-05, "loss": 1.6238, "step": 5789 }, { "epoch": 1.757474578843527, "grad_norm": 0.9489251971244812, "learning_rate": 7.074010327022376e-05, "loss": 1.729, "step": 5790 }, { "epoch": 1.7577781150402185, "grad_norm": 0.5605430006980896, "learning_rate": 7.073504100435355e-05, "loss": 1.7129, "step": 5791 }, { "epoch": 1.75808165123691, "grad_norm": 0.512266993522644, "learning_rate": 7.072997873848335e-05, "loss": 1.9685, "step": 5792 }, { "epoch": 1.7583851874336016, "grad_norm": 0.5452455878257751, "learning_rate": 7.072491647261314e-05, "loss": 2.1181, "step": 5793 }, { "epoch": 1.7586887236302928, "grad_norm": 0.9478041529655457, "learning_rate": 7.071985420674294e-05, "loss": 1.327, "step": 5794 }, { "epoch": 1.7589922598269845, "grad_norm": 0.881410539150238, "learning_rate": 7.071479194087274e-05, "loss": 1.4846, "step": 5795 }, { "epoch": 1.7592957960236757, "grad_norm": 0.5429690480232239, "learning_rate": 7.070972967500254e-05, "loss": 1.5724, "step": 5796 }, { "epoch": 1.7595993322203674, "grad_norm": 0.48222050070762634, "learning_rate": 7.070466740913233e-05, "loss": 1.8012, "step": 5797 }, { "epoch": 1.7599028684170588, "grad_norm": 0.5965589880943298, "learning_rate": 7.069960514326213e-05, "loss": 1.5959, "step": 5798 }, { "epoch": 1.7602064046137502, "grad_norm": 1.3540230989456177, "learning_rate": 7.069454287739192e-05, "loss": 1.1057, "step": 5799 }, { "epoch": 1.7605099408104417, "grad_norm": 0.6451812982559204, "learning_rate": 7.068948061152172e-05, "loss": 1.8921, "step": 5800 }, { "epoch": 1.760813477007133, "grad_norm": 0.535523533821106, "learning_rate": 7.068441834565151e-05, "loss": 1.9626, "step": 5801 }, { "epoch": 1.7611170132038245, "grad_norm": 0.6170904636383057, "learning_rate": 7.067935607978131e-05, "loss": 1.7806, "step": 5802 }, { "epoch": 1.761420549400516, "grad_norm": 0.4982869327068329, "learning_rate": 7.06742938139111e-05, "loss": 1.9225, "step": 5803 }, { "epoch": 1.7617240855972076, "grad_norm": 0.6037775874137878, "learning_rate": 7.066923154804091e-05, "loss": 1.7682, "step": 5804 }, { "epoch": 1.7620276217938988, "grad_norm": 0.535892128944397, "learning_rate": 7.06641692821707e-05, "loss": 1.8156, "step": 5805 }, { "epoch": 1.7623311579905905, "grad_norm": 0.5253079533576965, "learning_rate": 7.06591070163005e-05, "loss": 1.7642, "step": 5806 }, { "epoch": 1.7626346941872817, "grad_norm": 0.5811476111412048, "learning_rate": 7.06540447504303e-05, "loss": 1.8639, "step": 5807 }, { "epoch": 1.7629382303839733, "grad_norm": 0.5927221179008484, "learning_rate": 7.064898248456009e-05, "loss": 1.5973, "step": 5808 }, { "epoch": 1.7632417665806648, "grad_norm": 0.6458206176757812, "learning_rate": 7.064392021868988e-05, "loss": 1.6549, "step": 5809 }, { "epoch": 1.7635453027773562, "grad_norm": 0.4807237684726715, "learning_rate": 7.063885795281968e-05, "loss": 1.5492, "step": 5810 }, { "epoch": 1.7638488389740477, "grad_norm": 0.4759950637817383, "learning_rate": 7.063379568694947e-05, "loss": 1.7667, "step": 5811 }, { "epoch": 1.764152375170739, "grad_norm": 0.6172373294830322, "learning_rate": 7.062873342107927e-05, "loss": 1.4779, "step": 5812 }, { "epoch": 1.7644559113674305, "grad_norm": 0.5831183791160583, "learning_rate": 7.062367115520906e-05, "loss": 1.6003, "step": 5813 }, { "epoch": 1.764759447564122, "grad_norm": 0.4175955057144165, "learning_rate": 7.061860888933887e-05, "loss": 1.9885, "step": 5814 }, { "epoch": 1.7650629837608136, "grad_norm": 0.5079640746116638, "learning_rate": 7.061354662346867e-05, "loss": 1.8727, "step": 5815 }, { "epoch": 1.7653665199575048, "grad_norm": 0.5410943627357483, "learning_rate": 7.060848435759846e-05, "loss": 1.1928, "step": 5816 }, { "epoch": 1.7656700561541965, "grad_norm": 0.504618227481842, "learning_rate": 7.060342209172827e-05, "loss": 2.2719, "step": 5817 }, { "epoch": 1.7659735923508877, "grad_norm": 0.602778971195221, "learning_rate": 7.059835982585806e-05, "loss": 1.6486, "step": 5818 }, { "epoch": 1.7662771285475793, "grad_norm": 0.5493549108505249, "learning_rate": 7.059329755998786e-05, "loss": 1.3315, "step": 5819 }, { "epoch": 1.7665806647442708, "grad_norm": 0.5952966213226318, "learning_rate": 7.058823529411765e-05, "loss": 1.5493, "step": 5820 }, { "epoch": 1.7668842009409622, "grad_norm": 0.616404116153717, "learning_rate": 7.058317302824745e-05, "loss": 1.1801, "step": 5821 }, { "epoch": 1.7671877371376536, "grad_norm": 0.8588623404502869, "learning_rate": 7.057811076237724e-05, "loss": 1.5727, "step": 5822 }, { "epoch": 1.767491273334345, "grad_norm": 0.40923357009887695, "learning_rate": 7.057304849650704e-05, "loss": 1.3718, "step": 5823 }, { "epoch": 1.7677948095310367, "grad_norm": 0.5931923389434814, "learning_rate": 7.056798623063683e-05, "loss": 1.5876, "step": 5824 }, { "epoch": 1.768098345727728, "grad_norm": 0.51994788646698, "learning_rate": 7.056292396476664e-05, "loss": 1.8799, "step": 5825 }, { "epoch": 1.7684018819244196, "grad_norm": 0.5930038690567017, "learning_rate": 7.055786169889644e-05, "loss": 1.8165, "step": 5826 }, { "epoch": 1.7687054181211108, "grad_norm": 0.5225912928581238, "learning_rate": 7.055279943302623e-05, "loss": 1.3906, "step": 5827 }, { "epoch": 1.7690089543178025, "grad_norm": 0.45339706540107727, "learning_rate": 7.054773716715603e-05, "loss": 2.0455, "step": 5828 }, { "epoch": 1.769312490514494, "grad_norm": 1.311185359954834, "learning_rate": 7.054267490128582e-05, "loss": 1.5411, "step": 5829 }, { "epoch": 1.7696160267111853, "grad_norm": 0.5847686529159546, "learning_rate": 7.053761263541562e-05, "loss": 1.4527, "step": 5830 }, { "epoch": 1.7699195629078768, "grad_norm": 0.6320043802261353, "learning_rate": 7.053255036954541e-05, "loss": 1.2047, "step": 5831 }, { "epoch": 1.7702230991045682, "grad_norm": 0.7766631841659546, "learning_rate": 7.05274881036752e-05, "loss": 1.7178, "step": 5832 }, { "epoch": 1.7705266353012596, "grad_norm": 0.49876946210861206, "learning_rate": 7.0522425837805e-05, "loss": 1.7734, "step": 5833 }, { "epoch": 1.770830171497951, "grad_norm": 0.6119621396064758, "learning_rate": 7.051736357193481e-05, "loss": 1.7981, "step": 5834 }, { "epoch": 1.7711337076946427, "grad_norm": 0.5587700605392456, "learning_rate": 7.05123013060646e-05, "loss": 1.9976, "step": 5835 }, { "epoch": 1.771437243891334, "grad_norm": 0.6698164939880371, "learning_rate": 7.05072390401944e-05, "loss": 1.563, "step": 5836 }, { "epoch": 1.7717407800880256, "grad_norm": 0.5238813161849976, "learning_rate": 7.050217677432419e-05, "loss": 1.7543, "step": 5837 }, { "epoch": 1.7720443162847168, "grad_norm": 0.7682486176490784, "learning_rate": 7.049711450845399e-05, "loss": 1.6731, "step": 5838 }, { "epoch": 1.7723478524814085, "grad_norm": 0.4635404944419861, "learning_rate": 7.049205224258378e-05, "loss": 1.321, "step": 5839 }, { "epoch": 1.7726513886781, "grad_norm": 0.579387903213501, "learning_rate": 7.048698997671358e-05, "loss": 1.6947, "step": 5840 }, { "epoch": 1.7729549248747913, "grad_norm": 0.5389410257339478, "learning_rate": 7.048192771084337e-05, "loss": 1.5536, "step": 5841 }, { "epoch": 1.7732584610714828, "grad_norm": 0.6099681258201599, "learning_rate": 7.047686544497317e-05, "loss": 1.6241, "step": 5842 }, { "epoch": 1.7735619972681742, "grad_norm": 0.5654680728912354, "learning_rate": 7.047180317910298e-05, "loss": 1.6327, "step": 5843 }, { "epoch": 1.7738655334648656, "grad_norm": 0.643012285232544, "learning_rate": 7.046674091323277e-05, "loss": 1.6656, "step": 5844 }, { "epoch": 1.774169069661557, "grad_norm": 0.5325130224227905, "learning_rate": 7.046167864736256e-05, "loss": 1.5048, "step": 5845 }, { "epoch": 1.7744726058582487, "grad_norm": 0.42735743522644043, "learning_rate": 7.045661638149236e-05, "loss": 1.0482, "step": 5846 }, { "epoch": 1.77477614205494, "grad_norm": 0.5796374678611755, "learning_rate": 7.045155411562215e-05, "loss": 1.5523, "step": 5847 }, { "epoch": 1.7750796782516316, "grad_norm": 0.5250108242034912, "learning_rate": 7.044649184975195e-05, "loss": 1.8127, "step": 5848 }, { "epoch": 1.7753832144483228, "grad_norm": 0.9125270247459412, "learning_rate": 7.044142958388174e-05, "loss": 1.751, "step": 5849 }, { "epoch": 1.7756867506450145, "grad_norm": 0.6005438566207886, "learning_rate": 7.043636731801154e-05, "loss": 1.609, "step": 5850 }, { "epoch": 1.775990286841706, "grad_norm": 0.5507426261901855, "learning_rate": 7.043130505214133e-05, "loss": 1.7003, "step": 5851 }, { "epoch": 1.7762938230383973, "grad_norm": 0.6349962949752808, "learning_rate": 7.042624278627113e-05, "loss": 1.5438, "step": 5852 }, { "epoch": 1.7765973592350888, "grad_norm": 1.0367538928985596, "learning_rate": 7.042118052040094e-05, "loss": 1.5797, "step": 5853 }, { "epoch": 1.7769008954317802, "grad_norm": 0.6401512026786804, "learning_rate": 7.041611825453073e-05, "loss": 1.5123, "step": 5854 }, { "epoch": 1.7772044316284719, "grad_norm": 0.5885390043258667, "learning_rate": 7.041105598866053e-05, "loss": 1.0058, "step": 5855 }, { "epoch": 1.777507967825163, "grad_norm": 0.4374138414859772, "learning_rate": 7.040599372279032e-05, "loss": 1.7388, "step": 5856 }, { "epoch": 1.7778115040218547, "grad_norm": 0.5692790150642395, "learning_rate": 7.040093145692012e-05, "loss": 1.3957, "step": 5857 }, { "epoch": 1.778115040218546, "grad_norm": 0.6237412691116333, "learning_rate": 7.039586919104991e-05, "loss": 1.6133, "step": 5858 }, { "epoch": 1.7784185764152376, "grad_norm": 0.5770452618598938, "learning_rate": 7.03908069251797e-05, "loss": 1.6974, "step": 5859 }, { "epoch": 1.7787221126119288, "grad_norm": 0.5058612823486328, "learning_rate": 7.03857446593095e-05, "loss": 1.9792, "step": 5860 }, { "epoch": 1.7790256488086205, "grad_norm": 0.545733630657196, "learning_rate": 7.038068239343931e-05, "loss": 1.5064, "step": 5861 }, { "epoch": 1.779329185005312, "grad_norm": 0.6026157140731812, "learning_rate": 7.03756201275691e-05, "loss": 2.1842, "step": 5862 }, { "epoch": 1.7796327212020033, "grad_norm": 0.5493044257164001, "learning_rate": 7.03705578616989e-05, "loss": 1.8828, "step": 5863 }, { "epoch": 1.7799362573986948, "grad_norm": 0.666919469833374, "learning_rate": 7.03654955958287e-05, "loss": 1.1003, "step": 5864 }, { "epoch": 1.7802397935953862, "grad_norm": 0.529765248298645, "learning_rate": 7.03604333299585e-05, "loss": 2.1024, "step": 5865 }, { "epoch": 1.7805433297920779, "grad_norm": 0.7152818441390991, "learning_rate": 7.03553710640883e-05, "loss": 1.2247, "step": 5866 }, { "epoch": 1.780846865988769, "grad_norm": 0.5502989888191223, "learning_rate": 7.035030879821809e-05, "loss": 1.592, "step": 5867 }, { "epoch": 1.7811504021854607, "grad_norm": 0.47319549322128296, "learning_rate": 7.034524653234789e-05, "loss": 1.7258, "step": 5868 }, { "epoch": 1.781453938382152, "grad_norm": 0.5717188119888306, "learning_rate": 7.034018426647768e-05, "loss": 1.4437, "step": 5869 }, { "epoch": 1.7817574745788436, "grad_norm": 0.5776780843734741, "learning_rate": 7.033512200060748e-05, "loss": 1.7443, "step": 5870 }, { "epoch": 1.782061010775535, "grad_norm": 0.47379469871520996, "learning_rate": 7.033005973473727e-05, "loss": 1.5137, "step": 5871 }, { "epoch": 1.7823645469722265, "grad_norm": 0.49418696761131287, "learning_rate": 7.032499746886707e-05, "loss": 1.591, "step": 5872 }, { "epoch": 1.782668083168918, "grad_norm": 0.5168330669403076, "learning_rate": 7.031993520299687e-05, "loss": 1.9866, "step": 5873 }, { "epoch": 1.7829716193656093, "grad_norm": 0.5431966781616211, "learning_rate": 7.031487293712667e-05, "loss": 1.6641, "step": 5874 }, { "epoch": 1.7832751555623008, "grad_norm": 0.5647885799407959, "learning_rate": 7.030981067125646e-05, "loss": 1.2665, "step": 5875 }, { "epoch": 1.7835786917589922, "grad_norm": 0.5556349754333496, "learning_rate": 7.030474840538626e-05, "loss": 1.9022, "step": 5876 }, { "epoch": 1.7838822279556839, "grad_norm": 0.6246395707130432, "learning_rate": 7.029968613951605e-05, "loss": 1.1401, "step": 5877 }, { "epoch": 1.784185764152375, "grad_norm": 0.47130244970321655, "learning_rate": 7.029462387364585e-05, "loss": 1.8312, "step": 5878 }, { "epoch": 1.7844893003490667, "grad_norm": 0.49159568548202515, "learning_rate": 7.028956160777564e-05, "loss": 1.2571, "step": 5879 }, { "epoch": 1.784792836545758, "grad_norm": 0.9010233879089355, "learning_rate": 7.028449934190544e-05, "loss": 1.2944, "step": 5880 }, { "epoch": 1.7850963727424496, "grad_norm": 1.0043572187423706, "learning_rate": 7.027943707603523e-05, "loss": 1.2684, "step": 5881 }, { "epoch": 1.785399908939141, "grad_norm": 0.5217756032943726, "learning_rate": 7.027437481016504e-05, "loss": 1.7476, "step": 5882 }, { "epoch": 1.7857034451358325, "grad_norm": 0.6179821491241455, "learning_rate": 7.026931254429483e-05, "loss": 1.4536, "step": 5883 }, { "epoch": 1.786006981332524, "grad_norm": 0.5888343453407288, "learning_rate": 7.026425027842463e-05, "loss": 1.3274, "step": 5884 }, { "epoch": 1.7863105175292153, "grad_norm": 0.5379050970077515, "learning_rate": 7.025918801255442e-05, "loss": 1.6149, "step": 5885 }, { "epoch": 1.7866140537259068, "grad_norm": 0.7219739556312561, "learning_rate": 7.025412574668422e-05, "loss": 1.459, "step": 5886 }, { "epoch": 1.7869175899225982, "grad_norm": 0.4842709004878998, "learning_rate": 7.024906348081401e-05, "loss": 1.684, "step": 5887 }, { "epoch": 1.7872211261192898, "grad_norm": 0.575980544090271, "learning_rate": 7.024400121494381e-05, "loss": 1.7599, "step": 5888 }, { "epoch": 1.787524662315981, "grad_norm": 0.483018696308136, "learning_rate": 7.02389389490736e-05, "loss": 1.355, "step": 5889 }, { "epoch": 1.7878281985126727, "grad_norm": 0.7090207934379578, "learning_rate": 7.02338766832034e-05, "loss": 1.8047, "step": 5890 }, { "epoch": 1.788131734709364, "grad_norm": 0.5750283002853394, "learning_rate": 7.02288144173332e-05, "loss": 1.4583, "step": 5891 }, { "epoch": 1.7884352709060556, "grad_norm": 0.5741564631462097, "learning_rate": 7.0223752151463e-05, "loss": 1.875, "step": 5892 }, { "epoch": 1.788738807102747, "grad_norm": 0.5058529376983643, "learning_rate": 7.02186898855928e-05, "loss": 1.2128, "step": 5893 }, { "epoch": 1.7890423432994385, "grad_norm": 0.6110987067222595, "learning_rate": 7.021362761972259e-05, "loss": 1.9625, "step": 5894 }, { "epoch": 1.7893458794961299, "grad_norm": 0.594071626663208, "learning_rate": 7.020856535385239e-05, "loss": 1.862, "step": 5895 }, { "epoch": 1.7896494156928213, "grad_norm": 0.818956732749939, "learning_rate": 7.020350308798218e-05, "loss": 1.6269, "step": 5896 }, { "epoch": 1.789952951889513, "grad_norm": 0.5960368514060974, "learning_rate": 7.019844082211198e-05, "loss": 1.7182, "step": 5897 }, { "epoch": 1.7902564880862042, "grad_norm": 0.5818991661071777, "learning_rate": 7.019337855624177e-05, "loss": 1.9811, "step": 5898 }, { "epoch": 1.7905600242828958, "grad_norm": 0.4691188931465149, "learning_rate": 7.018831629037157e-05, "loss": 1.7591, "step": 5899 }, { "epoch": 1.790863560479587, "grad_norm": 0.4774368703365326, "learning_rate": 7.018325402450136e-05, "loss": 1.6917, "step": 5900 }, { "epoch": 1.7911670966762787, "grad_norm": 0.5307349562644958, "learning_rate": 7.017819175863117e-05, "loss": 1.743, "step": 5901 }, { "epoch": 1.7914706328729701, "grad_norm": 0.572593092918396, "learning_rate": 7.017312949276096e-05, "loss": 1.5527, "step": 5902 }, { "epoch": 1.7917741690696616, "grad_norm": 0.8130362033843994, "learning_rate": 7.016806722689076e-05, "loss": 1.9604, "step": 5903 }, { "epoch": 1.792077705266353, "grad_norm": 0.637360155582428, "learning_rate": 7.016300496102055e-05, "loss": 1.3893, "step": 5904 }, { "epoch": 1.7923812414630445, "grad_norm": 0.5618795156478882, "learning_rate": 7.015794269515035e-05, "loss": 1.8735, "step": 5905 }, { "epoch": 1.7926847776597359, "grad_norm": 0.4983277916908264, "learning_rate": 7.015288042928016e-05, "loss": 1.6255, "step": 5906 }, { "epoch": 1.7929883138564273, "grad_norm": 0.6057456731796265, "learning_rate": 7.014781816340995e-05, "loss": 1.6916, "step": 5907 }, { "epoch": 1.793291850053119, "grad_norm": 0.5264350771903992, "learning_rate": 7.014275589753975e-05, "loss": 1.5999, "step": 5908 }, { "epoch": 1.7935953862498102, "grad_norm": 1.0011577606201172, "learning_rate": 7.013769363166954e-05, "loss": 1.3513, "step": 5909 }, { "epoch": 1.7938989224465018, "grad_norm": 0.6390252709388733, "learning_rate": 7.013263136579934e-05, "loss": 1.4275, "step": 5910 }, { "epoch": 1.794202458643193, "grad_norm": 0.5232053995132446, "learning_rate": 7.012756909992913e-05, "loss": 1.6905, "step": 5911 }, { "epoch": 1.7945059948398847, "grad_norm": 0.6496378183364868, "learning_rate": 7.012250683405894e-05, "loss": 1.4173, "step": 5912 }, { "epoch": 1.7948095310365761, "grad_norm": 0.825714111328125, "learning_rate": 7.011744456818873e-05, "loss": 1.9885, "step": 5913 }, { "epoch": 1.7951130672332676, "grad_norm": 0.6410032510757446, "learning_rate": 7.011238230231853e-05, "loss": 1.7687, "step": 5914 }, { "epoch": 1.795416603429959, "grad_norm": 0.7020335793495178, "learning_rate": 7.010732003644832e-05, "loss": 2.0146, "step": 5915 }, { "epoch": 1.7957201396266504, "grad_norm": 1.3581609725952148, "learning_rate": 7.010225777057812e-05, "loss": 1.1118, "step": 5916 }, { "epoch": 1.7960236758233419, "grad_norm": 0.6407718658447266, "learning_rate": 7.009719550470791e-05, "loss": 1.0059, "step": 5917 }, { "epoch": 1.7963272120200333, "grad_norm": 0.5095187425613403, "learning_rate": 7.009213323883771e-05, "loss": 1.7162, "step": 5918 }, { "epoch": 1.796630748216725, "grad_norm": 0.5506390333175659, "learning_rate": 7.00870709729675e-05, "loss": 1.8015, "step": 5919 }, { "epoch": 1.7969342844134162, "grad_norm": 0.4706830084323883, "learning_rate": 7.00820087070973e-05, "loss": 1.8148, "step": 5920 }, { "epoch": 1.7972378206101078, "grad_norm": 0.4986935555934906, "learning_rate": 7.00769464412271e-05, "loss": 1.1621, "step": 5921 }, { "epoch": 1.797541356806799, "grad_norm": 0.5431498885154724, "learning_rate": 7.00718841753569e-05, "loss": 1.8391, "step": 5922 }, { "epoch": 1.7978448930034907, "grad_norm": 0.7347373962402344, "learning_rate": 7.00668219094867e-05, "loss": 1.2171, "step": 5923 }, { "epoch": 1.7981484292001821, "grad_norm": 0.5176038146018982, "learning_rate": 7.006175964361649e-05, "loss": 1.299, "step": 5924 }, { "epoch": 1.7984519653968736, "grad_norm": 0.5122864246368408, "learning_rate": 7.005669737774628e-05, "loss": 1.7784, "step": 5925 }, { "epoch": 1.798755501593565, "grad_norm": 0.5709760785102844, "learning_rate": 7.005163511187608e-05, "loss": 1.0928, "step": 5926 }, { "epoch": 1.7990590377902564, "grad_norm": 0.5653720498085022, "learning_rate": 7.004657284600587e-05, "loss": 1.7408, "step": 5927 }, { "epoch": 1.799362573986948, "grad_norm": 0.610870897769928, "learning_rate": 7.004151058013567e-05, "loss": 1.9208, "step": 5928 }, { "epoch": 1.7996661101836393, "grad_norm": 0.5883700847625732, "learning_rate": 7.003644831426546e-05, "loss": 1.7159, "step": 5929 }, { "epoch": 1.799969646380331, "grad_norm": 0.5038648247718811, "learning_rate": 7.003138604839526e-05, "loss": 1.6974, "step": 5930 }, { "epoch": 1.8002731825770222, "grad_norm": 0.4701935052871704, "learning_rate": 7.002632378252507e-05, "loss": 1.6792, "step": 5931 }, { "epoch": 1.8005767187737138, "grad_norm": 0.5894878506660461, "learning_rate": 7.002126151665486e-05, "loss": 1.8782, "step": 5932 }, { "epoch": 1.8008802549704053, "grad_norm": 0.5552791953086853, "learning_rate": 7.001619925078466e-05, "loss": 1.7514, "step": 5933 }, { "epoch": 1.8011837911670967, "grad_norm": 0.5044195652008057, "learning_rate": 7.001113698491445e-05, "loss": 1.6754, "step": 5934 }, { "epoch": 1.8014873273637881, "grad_norm": 0.5654546618461609, "learning_rate": 7.000607471904425e-05, "loss": 1.6864, "step": 5935 }, { "epoch": 1.8017908635604796, "grad_norm": 0.5445656776428223, "learning_rate": 7.000101245317404e-05, "loss": 1.498, "step": 5936 }, { "epoch": 1.802094399757171, "grad_norm": 0.5514572262763977, "learning_rate": 6.999595018730384e-05, "loss": 1.327, "step": 5937 }, { "epoch": 1.8023979359538624, "grad_norm": 0.546735942363739, "learning_rate": 6.999088792143363e-05, "loss": 1.5668, "step": 5938 }, { "epoch": 1.802701472150554, "grad_norm": 0.516412615776062, "learning_rate": 6.998582565556342e-05, "loss": 1.8425, "step": 5939 }, { "epoch": 1.8030050083472453, "grad_norm": 0.5725192427635193, "learning_rate": 6.998076338969323e-05, "loss": 1.8569, "step": 5940 }, { "epoch": 1.803308544543937, "grad_norm": 0.5454580783843994, "learning_rate": 6.997570112382303e-05, "loss": 1.9791, "step": 5941 }, { "epoch": 1.8036120807406282, "grad_norm": 0.4446016848087311, "learning_rate": 6.997063885795282e-05, "loss": 1.5544, "step": 5942 }, { "epoch": 1.8039156169373198, "grad_norm": 0.5859190821647644, "learning_rate": 6.996557659208262e-05, "loss": 1.5727, "step": 5943 }, { "epoch": 1.8042191531340113, "grad_norm": 0.614378809928894, "learning_rate": 6.996051432621241e-05, "loss": 1.7846, "step": 5944 }, { "epoch": 1.8045226893307027, "grad_norm": 0.6583629846572876, "learning_rate": 6.995545206034221e-05, "loss": 1.7894, "step": 5945 }, { "epoch": 1.8048262255273941, "grad_norm": 0.6289352774620056, "learning_rate": 6.9950389794472e-05, "loss": 2.2178, "step": 5946 }, { "epoch": 1.8051297617240856, "grad_norm": 0.5429802536964417, "learning_rate": 6.99453275286018e-05, "loss": 1.5751, "step": 5947 }, { "epoch": 1.805433297920777, "grad_norm": 0.4763343036174774, "learning_rate": 6.994026526273159e-05, "loss": 1.7212, "step": 5948 }, { "epoch": 1.8057368341174684, "grad_norm": 0.5531217455863953, "learning_rate": 6.99352029968614e-05, "loss": 1.4387, "step": 5949 }, { "epoch": 1.80604037031416, "grad_norm": 0.4886355698108673, "learning_rate": 6.99301407309912e-05, "loss": 1.8291, "step": 5950 }, { "epoch": 1.8063439065108513, "grad_norm": 0.4299159646034241, "learning_rate": 6.9925078465121e-05, "loss": 1.406, "step": 5951 }, { "epoch": 1.806647442707543, "grad_norm": 0.5187563896179199, "learning_rate": 6.99200161992508e-05, "loss": 1.7927, "step": 5952 }, { "epoch": 1.8069509789042342, "grad_norm": 0.5152180790901184, "learning_rate": 6.991495393338059e-05, "loss": 1.6718, "step": 5953 }, { "epoch": 1.8072545151009258, "grad_norm": 0.5023542046546936, "learning_rate": 6.990989166751039e-05, "loss": 1.6979, "step": 5954 }, { "epoch": 1.8075580512976173, "grad_norm": 0.5216313600540161, "learning_rate": 6.990482940164018e-05, "loss": 1.5704, "step": 5955 }, { "epoch": 1.8078615874943087, "grad_norm": 0.5471825003623962, "learning_rate": 6.989976713576998e-05, "loss": 1.835, "step": 5956 }, { "epoch": 1.8081651236910001, "grad_norm": 0.5783551931381226, "learning_rate": 6.989470486989977e-05, "loss": 1.7769, "step": 5957 }, { "epoch": 1.8084686598876916, "grad_norm": 0.46548882126808167, "learning_rate": 6.988964260402957e-05, "loss": 1.7211, "step": 5958 }, { "epoch": 1.8087721960843832, "grad_norm": 0.6428399085998535, "learning_rate": 6.988458033815936e-05, "loss": 1.7598, "step": 5959 }, { "epoch": 1.8090757322810744, "grad_norm": 0.5859332084655762, "learning_rate": 6.987951807228917e-05, "loss": 1.6466, "step": 5960 }, { "epoch": 1.809379268477766, "grad_norm": 0.5805972218513489, "learning_rate": 6.987445580641896e-05, "loss": 1.3684, "step": 5961 }, { "epoch": 1.8096828046744573, "grad_norm": 0.49208566546440125, "learning_rate": 6.986939354054876e-05, "loss": 1.6773, "step": 5962 }, { "epoch": 1.809986340871149, "grad_norm": 0.5302795171737671, "learning_rate": 6.986433127467855e-05, "loss": 1.7629, "step": 5963 }, { "epoch": 1.8102898770678404, "grad_norm": 0.48644882440567017, "learning_rate": 6.985926900880835e-05, "loss": 1.8528, "step": 5964 }, { "epoch": 1.8105934132645318, "grad_norm": 0.6011199355125427, "learning_rate": 6.985420674293814e-05, "loss": 1.8147, "step": 5965 }, { "epoch": 1.8108969494612233, "grad_norm": 0.45972317457199097, "learning_rate": 6.984914447706794e-05, "loss": 1.6178, "step": 5966 }, { "epoch": 1.8112004856579147, "grad_norm": 0.5446584224700928, "learning_rate": 6.984408221119773e-05, "loss": 1.5146, "step": 5967 }, { "epoch": 1.8115040218546061, "grad_norm": 0.5274298787117004, "learning_rate": 6.983901994532753e-05, "loss": 1.464, "step": 5968 }, { "epoch": 1.8118075580512976, "grad_norm": 0.5610344409942627, "learning_rate": 6.983395767945732e-05, "loss": 2.077, "step": 5969 }, { "epoch": 1.8121110942479892, "grad_norm": 0.48236846923828125, "learning_rate": 6.982889541358713e-05, "loss": 1.5399, "step": 5970 }, { "epoch": 1.8124146304446804, "grad_norm": 0.7982462644577026, "learning_rate": 6.982383314771693e-05, "loss": 1.6216, "step": 5971 }, { "epoch": 1.812718166641372, "grad_norm": 0.48303601145744324, "learning_rate": 6.981877088184672e-05, "loss": 1.945, "step": 5972 }, { "epoch": 1.8130217028380633, "grad_norm": 0.45602136850357056, "learning_rate": 6.981370861597652e-05, "loss": 1.8444, "step": 5973 }, { "epoch": 1.813325239034755, "grad_norm": 0.5214483737945557, "learning_rate": 6.980864635010631e-05, "loss": 1.8304, "step": 5974 }, { "epoch": 1.8136287752314464, "grad_norm": 0.5384705662727356, "learning_rate": 6.98035840842361e-05, "loss": 1.7733, "step": 5975 }, { "epoch": 1.8139323114281378, "grad_norm": 0.6643903255462646, "learning_rate": 6.97985218183659e-05, "loss": 1.0964, "step": 5976 }, { "epoch": 1.8142358476248293, "grad_norm": 0.5094102621078491, "learning_rate": 6.97934595524957e-05, "loss": 1.9589, "step": 5977 }, { "epoch": 1.8145393838215207, "grad_norm": 0.5107044577598572, "learning_rate": 6.978839728662549e-05, "loss": 1.9909, "step": 5978 }, { "epoch": 1.8148429200182121, "grad_norm": 0.577189028263092, "learning_rate": 6.97833350207553e-05, "loss": 0.9188, "step": 5979 }, { "epoch": 1.8151464562149036, "grad_norm": 0.5533971786499023, "learning_rate": 6.977827275488509e-05, "loss": 1.5661, "step": 5980 }, { "epoch": 1.8154499924115952, "grad_norm": 0.5704061985015869, "learning_rate": 6.977321048901489e-05, "loss": 1.415, "step": 5981 }, { "epoch": 1.8157535286082864, "grad_norm": 0.5985412001609802, "learning_rate": 6.976814822314468e-05, "loss": 1.7265, "step": 5982 }, { "epoch": 1.816057064804978, "grad_norm": 0.48246708512306213, "learning_rate": 6.976308595727448e-05, "loss": 1.3756, "step": 5983 }, { "epoch": 1.8163606010016693, "grad_norm": 0.5040408372879028, "learning_rate": 6.975802369140427e-05, "loss": 1.8844, "step": 5984 }, { "epoch": 1.816664137198361, "grad_norm": 0.5499937534332275, "learning_rate": 6.975296142553407e-05, "loss": 1.3734, "step": 5985 }, { "epoch": 1.8169676733950524, "grad_norm": 0.6828827857971191, "learning_rate": 6.974789915966386e-05, "loss": 1.3821, "step": 5986 }, { "epoch": 1.8172712095917438, "grad_norm": 0.5618600845336914, "learning_rate": 6.974283689379366e-05, "loss": 1.5585, "step": 5987 }, { "epoch": 1.8175747457884353, "grad_norm": 0.6279012560844421, "learning_rate": 6.973777462792346e-05, "loss": 1.4342, "step": 5988 }, { "epoch": 1.8178782819851267, "grad_norm": 0.5523353815078735, "learning_rate": 6.973271236205326e-05, "loss": 1.8851, "step": 5989 }, { "epoch": 1.8181818181818183, "grad_norm": 0.5792304873466492, "learning_rate": 6.972765009618305e-05, "loss": 1.878, "step": 5990 }, { "epoch": 1.8184853543785096, "grad_norm": 0.4983103275299072, "learning_rate": 6.972258783031285e-05, "loss": 1.7997, "step": 5991 }, { "epoch": 1.8187888905752012, "grad_norm": 0.5694151520729065, "learning_rate": 6.971752556444264e-05, "loss": 1.7064, "step": 5992 }, { "epoch": 1.8190924267718924, "grad_norm": 0.5514373779296875, "learning_rate": 6.971246329857244e-05, "loss": 1.6783, "step": 5993 }, { "epoch": 1.819395962968584, "grad_norm": 0.5489677786827087, "learning_rate": 6.970740103270223e-05, "loss": 1.7817, "step": 5994 }, { "epoch": 1.8196994991652755, "grad_norm": 0.6167088150978088, "learning_rate": 6.970233876683204e-05, "loss": 1.577, "step": 5995 }, { "epoch": 1.820003035361967, "grad_norm": 0.6246216297149658, "learning_rate": 6.969727650096184e-05, "loss": 1.7529, "step": 5996 }, { "epoch": 1.8203065715586584, "grad_norm": 0.5288349986076355, "learning_rate": 6.969221423509163e-05, "loss": 1.4161, "step": 5997 }, { "epoch": 1.8206101077553498, "grad_norm": 0.5577011704444885, "learning_rate": 6.968715196922143e-05, "loss": 1.3679, "step": 5998 }, { "epoch": 1.8209136439520412, "grad_norm": 0.5881639719009399, "learning_rate": 6.968208970335123e-05, "loss": 1.3722, "step": 5999 }, { "epoch": 1.8212171801487327, "grad_norm": 0.5512107610702515, "learning_rate": 6.967702743748103e-05, "loss": 1.6681, "step": 6000 }, { "epoch": 1.8215207163454243, "grad_norm": 0.574345588684082, "learning_rate": 6.967196517161082e-05, "loss": 1.5918, "step": 6001 }, { "epoch": 1.8218242525421156, "grad_norm": 0.7422041893005371, "learning_rate": 6.966690290574062e-05, "loss": 1.6145, "step": 6002 }, { "epoch": 1.8221277887388072, "grad_norm": 0.5157979726791382, "learning_rate": 6.966184063987041e-05, "loss": 1.9659, "step": 6003 }, { "epoch": 1.8224313249354984, "grad_norm": 0.608417272567749, "learning_rate": 6.965677837400021e-05, "loss": 1.8913, "step": 6004 }, { "epoch": 1.82273486113219, "grad_norm": 0.5558058023452759, "learning_rate": 6.965171610813e-05, "loss": 1.4399, "step": 6005 }, { "epoch": 1.8230383973288815, "grad_norm": 0.5612147450447083, "learning_rate": 6.96466538422598e-05, "loss": 1.7211, "step": 6006 }, { "epoch": 1.823341933525573, "grad_norm": 0.5312124490737915, "learning_rate": 6.964159157638959e-05, "loss": 1.8771, "step": 6007 }, { "epoch": 1.8236454697222644, "grad_norm": 0.5921664237976074, "learning_rate": 6.963652931051939e-05, "loss": 1.8804, "step": 6008 }, { "epoch": 1.8239490059189558, "grad_norm": 0.4935623109340668, "learning_rate": 6.96314670446492e-05, "loss": 1.0369, "step": 6009 }, { "epoch": 1.8242525421156472, "grad_norm": 0.4994628429412842, "learning_rate": 6.962640477877899e-05, "loss": 1.9222, "step": 6010 }, { "epoch": 1.8245560783123387, "grad_norm": 0.5165558457374573, "learning_rate": 6.962134251290879e-05, "loss": 1.731, "step": 6011 }, { "epoch": 1.8248596145090303, "grad_norm": 0.5655200481414795, "learning_rate": 6.961628024703858e-05, "loss": 1.6356, "step": 6012 }, { "epoch": 1.8251631507057215, "grad_norm": 0.4943961203098297, "learning_rate": 6.961121798116838e-05, "loss": 1.9832, "step": 6013 }, { "epoch": 1.8254666869024132, "grad_norm": 0.7183212041854858, "learning_rate": 6.960615571529817e-05, "loss": 1.5268, "step": 6014 }, { "epoch": 1.8257702230991044, "grad_norm": 0.5787492394447327, "learning_rate": 6.960109344942796e-05, "loss": 1.615, "step": 6015 }, { "epoch": 1.826073759295796, "grad_norm": 0.5697316527366638, "learning_rate": 6.959603118355776e-05, "loss": 1.5962, "step": 6016 }, { "epoch": 1.8263772954924875, "grad_norm": 0.9180818200111389, "learning_rate": 6.959096891768755e-05, "loss": 1.2139, "step": 6017 }, { "epoch": 1.826680831689179, "grad_norm": 0.5522624850273132, "learning_rate": 6.958590665181736e-05, "loss": 1.7361, "step": 6018 }, { "epoch": 1.8269843678858704, "grad_norm": 0.544629693031311, "learning_rate": 6.958084438594716e-05, "loss": 1.7042, "step": 6019 }, { "epoch": 1.8272879040825618, "grad_norm": 0.5336179137229919, "learning_rate": 6.957578212007695e-05, "loss": 1.5085, "step": 6020 }, { "epoch": 1.8275914402792535, "grad_norm": 0.5903141498565674, "learning_rate": 6.957071985420675e-05, "loss": 1.801, "step": 6021 }, { "epoch": 1.8278949764759447, "grad_norm": 0.5685314536094666, "learning_rate": 6.956565758833654e-05, "loss": 1.79, "step": 6022 }, { "epoch": 1.8281985126726363, "grad_norm": 0.5545884966850281, "learning_rate": 6.956059532246634e-05, "loss": 1.5936, "step": 6023 }, { "epoch": 1.8285020488693275, "grad_norm": 0.5583124756813049, "learning_rate": 6.955553305659613e-05, "loss": 1.9654, "step": 6024 }, { "epoch": 1.8288055850660192, "grad_norm": 0.48344558477401733, "learning_rate": 6.955047079072593e-05, "loss": 2.088, "step": 6025 }, { "epoch": 1.8291091212627104, "grad_norm": 0.6977788209915161, "learning_rate": 6.954540852485572e-05, "loss": 1.6136, "step": 6026 }, { "epoch": 1.829412657459402, "grad_norm": 0.5369908213615417, "learning_rate": 6.954034625898553e-05, "loss": 1.8515, "step": 6027 }, { "epoch": 1.8297161936560935, "grad_norm": 0.6249637007713318, "learning_rate": 6.953528399311532e-05, "loss": 1.7288, "step": 6028 }, { "epoch": 1.830019729852785, "grad_norm": 0.574105978012085, "learning_rate": 6.953022172724512e-05, "loss": 1.933, "step": 6029 }, { "epoch": 1.8303232660494764, "grad_norm": 0.5597913861274719, "learning_rate": 6.952515946137491e-05, "loss": 1.5223, "step": 6030 }, { "epoch": 1.8306268022461678, "grad_norm": 0.5201643705368042, "learning_rate": 6.952009719550471e-05, "loss": 1.9055, "step": 6031 }, { "epoch": 1.8309303384428595, "grad_norm": 0.5336455702781677, "learning_rate": 6.95150349296345e-05, "loss": 1.7269, "step": 6032 }, { "epoch": 1.8312338746395507, "grad_norm": 0.5909528732299805, "learning_rate": 6.95099726637643e-05, "loss": 1.4887, "step": 6033 }, { "epoch": 1.8315374108362423, "grad_norm": 0.4900136888027191, "learning_rate": 6.950491039789409e-05, "loss": 0.9843, "step": 6034 }, { "epoch": 1.8318409470329335, "grad_norm": 0.5249224305152893, "learning_rate": 6.949984813202389e-05, "loss": 1.8408, "step": 6035 }, { "epoch": 1.8321444832296252, "grad_norm": 0.6888431310653687, "learning_rate": 6.949478586615368e-05, "loss": 1.1868, "step": 6036 }, { "epoch": 1.8324480194263166, "grad_norm": 0.5488059520721436, "learning_rate": 6.948972360028349e-05, "loss": 2.0866, "step": 6037 }, { "epoch": 1.832751555623008, "grad_norm": 0.5993854999542236, "learning_rate": 6.948466133441329e-05, "loss": 1.2229, "step": 6038 }, { "epoch": 1.8330550918196995, "grad_norm": 0.5585269331932068, "learning_rate": 6.94795990685431e-05, "loss": 1.4898, "step": 6039 }, { "epoch": 1.833358628016391, "grad_norm": 0.5782963633537292, "learning_rate": 6.947453680267289e-05, "loss": 1.7766, "step": 6040 }, { "epoch": 1.8336621642130824, "grad_norm": 0.46721434593200684, "learning_rate": 6.946947453680268e-05, "loss": 1.261, "step": 6041 }, { "epoch": 1.8339657004097738, "grad_norm": 0.682328462600708, "learning_rate": 6.946441227093248e-05, "loss": 1.3548, "step": 6042 }, { "epoch": 1.8342692366064655, "grad_norm": 0.546238362789154, "learning_rate": 6.945935000506227e-05, "loss": 1.3244, "step": 6043 }, { "epoch": 1.8345727728031567, "grad_norm": 0.6565511226654053, "learning_rate": 6.945428773919207e-05, "loss": 1.7417, "step": 6044 }, { "epoch": 1.8348763089998483, "grad_norm": 0.511644184589386, "learning_rate": 6.944922547332186e-05, "loss": 1.7742, "step": 6045 }, { "epoch": 1.8351798451965395, "grad_norm": 0.7202497124671936, "learning_rate": 6.944416320745166e-05, "loss": 1.2906, "step": 6046 }, { "epoch": 1.8354833813932312, "grad_norm": 0.4779212176799774, "learning_rate": 6.943910094158145e-05, "loss": 1.2606, "step": 6047 }, { "epoch": 1.8357869175899226, "grad_norm": 0.4823431670665741, "learning_rate": 6.943403867571126e-05, "loss": 1.7771, "step": 6048 }, { "epoch": 1.836090453786614, "grad_norm": 0.5160930156707764, "learning_rate": 6.942897640984106e-05, "loss": 1.8786, "step": 6049 }, { "epoch": 1.8363939899833055, "grad_norm": 0.5408679842948914, "learning_rate": 6.942391414397085e-05, "loss": 1.3319, "step": 6050 }, { "epoch": 1.836697526179997, "grad_norm": 0.5134124159812927, "learning_rate": 6.941885187810064e-05, "loss": 1.754, "step": 6051 }, { "epoch": 1.8370010623766884, "grad_norm": 0.48612844944000244, "learning_rate": 6.941378961223044e-05, "loss": 1.9355, "step": 6052 }, { "epoch": 1.8373045985733798, "grad_norm": 0.44790950417518616, "learning_rate": 6.940872734636023e-05, "loss": 1.9657, "step": 6053 }, { "epoch": 1.8376081347700715, "grad_norm": 0.4986133277416229, "learning_rate": 6.940366508049003e-05, "loss": 1.9317, "step": 6054 }, { "epoch": 1.8379116709667627, "grad_norm": 0.5185719132423401, "learning_rate": 6.939860281461982e-05, "loss": 1.8908, "step": 6055 }, { "epoch": 1.8382152071634543, "grad_norm": 0.5861765146255493, "learning_rate": 6.939354054874962e-05, "loss": 1.6715, "step": 6056 }, { "epoch": 1.8385187433601455, "grad_norm": 0.5141394138336182, "learning_rate": 6.938847828287943e-05, "loss": 1.1495, "step": 6057 }, { "epoch": 1.8388222795568372, "grad_norm": 0.4696032702922821, "learning_rate": 6.938341601700922e-05, "loss": 1.6885, "step": 6058 }, { "epoch": 1.8391258157535286, "grad_norm": 0.546829104423523, "learning_rate": 6.937835375113902e-05, "loss": 1.4341, "step": 6059 }, { "epoch": 1.83942935195022, "grad_norm": 0.5794239044189453, "learning_rate": 6.937329148526881e-05, "loss": 2.0196, "step": 6060 }, { "epoch": 1.8397328881469115, "grad_norm": 0.584017276763916, "learning_rate": 6.93682292193986e-05, "loss": 2.069, "step": 6061 }, { "epoch": 1.840036424343603, "grad_norm": 0.5827233195304871, "learning_rate": 6.93631669535284e-05, "loss": 1.7364, "step": 6062 }, { "epoch": 1.8403399605402946, "grad_norm": 0.6362305283546448, "learning_rate": 6.93581046876582e-05, "loss": 1.8902, "step": 6063 }, { "epoch": 1.8406434967369858, "grad_norm": 0.5154918432235718, "learning_rate": 6.935304242178799e-05, "loss": 0.9184, "step": 6064 }, { "epoch": 1.8409470329336775, "grad_norm": 0.5192784070968628, "learning_rate": 6.934798015591779e-05, "loss": 1.5757, "step": 6065 }, { "epoch": 1.8412505691303687, "grad_norm": 0.4856669008731842, "learning_rate": 6.93429178900476e-05, "loss": 1.2768, "step": 6066 }, { "epoch": 1.8415541053270603, "grad_norm": 0.5663694739341736, "learning_rate": 6.933785562417739e-05, "loss": 1.6493, "step": 6067 }, { "epoch": 1.8418576415237518, "grad_norm": 0.488754004240036, "learning_rate": 6.933279335830718e-05, "loss": 1.7893, "step": 6068 }, { "epoch": 1.8421611777204432, "grad_norm": 0.5024654269218445, "learning_rate": 6.932773109243698e-05, "loss": 1.5329, "step": 6069 }, { "epoch": 1.8424647139171346, "grad_norm": 0.6903789639472961, "learning_rate": 6.932266882656677e-05, "loss": 1.1917, "step": 6070 }, { "epoch": 1.842768250113826, "grad_norm": 0.6057137846946716, "learning_rate": 6.931760656069657e-05, "loss": 1.4035, "step": 6071 }, { "epoch": 1.8430717863105175, "grad_norm": 0.5684095025062561, "learning_rate": 6.931254429482636e-05, "loss": 1.8729, "step": 6072 }, { "epoch": 1.843375322507209, "grad_norm": 0.9758940935134888, "learning_rate": 6.930748202895616e-05, "loss": 1.3789, "step": 6073 }, { "epoch": 1.8436788587039006, "grad_norm": 0.5580344200134277, "learning_rate": 6.930241976308595e-05, "loss": 1.3577, "step": 6074 }, { "epoch": 1.8439823949005918, "grad_norm": 0.6532706618309021, "learning_rate": 6.929735749721575e-05, "loss": 1.7574, "step": 6075 }, { "epoch": 1.8442859310972834, "grad_norm": 0.5462925434112549, "learning_rate": 6.929229523134556e-05, "loss": 1.6758, "step": 6076 }, { "epoch": 1.8445894672939747, "grad_norm": 0.6207025647163391, "learning_rate": 6.928723296547535e-05, "loss": 1.4469, "step": 6077 }, { "epoch": 1.8448930034906663, "grad_norm": 0.5969544053077698, "learning_rate": 6.928217069960515e-05, "loss": 2.0165, "step": 6078 }, { "epoch": 1.8451965396873578, "grad_norm": 0.5352455973625183, "learning_rate": 6.927710843373494e-05, "loss": 1.9362, "step": 6079 }, { "epoch": 1.8455000758840492, "grad_norm": 0.5923967361450195, "learning_rate": 6.927204616786473e-05, "loss": 1.5255, "step": 6080 }, { "epoch": 1.8458036120807406, "grad_norm": 0.6089999079704285, "learning_rate": 6.926698390199453e-05, "loss": 1.5816, "step": 6081 }, { "epoch": 1.846107148277432, "grad_norm": 0.602755606174469, "learning_rate": 6.926192163612432e-05, "loss": 1.5102, "step": 6082 }, { "epoch": 1.8464106844741235, "grad_norm": 0.48048046231269836, "learning_rate": 6.925685937025412e-05, "loss": 1.8209, "step": 6083 }, { "epoch": 1.846714220670815, "grad_norm": 0.6715882420539856, "learning_rate": 6.925179710438393e-05, "loss": 1.3094, "step": 6084 }, { "epoch": 1.8470177568675066, "grad_norm": 0.5754781365394592, "learning_rate": 6.924673483851372e-05, "loss": 1.6519, "step": 6085 }, { "epoch": 1.8473212930641978, "grad_norm": 0.46512678265571594, "learning_rate": 6.924167257264352e-05, "loss": 1.461, "step": 6086 }, { "epoch": 1.8476248292608894, "grad_norm": 0.5386472940444946, "learning_rate": 6.923661030677333e-05, "loss": 1.8878, "step": 6087 }, { "epoch": 1.8479283654575807, "grad_norm": 0.6794739365577698, "learning_rate": 6.923154804090312e-05, "loss": 1.957, "step": 6088 }, { "epoch": 1.8482319016542723, "grad_norm": 0.635474681854248, "learning_rate": 6.922648577503291e-05, "loss": 1.138, "step": 6089 }, { "epoch": 1.8485354378509637, "grad_norm": 0.8729957938194275, "learning_rate": 6.922142350916271e-05, "loss": 1.5292, "step": 6090 }, { "epoch": 1.8488389740476552, "grad_norm": 0.5206477642059326, "learning_rate": 6.92163612432925e-05, "loss": 1.6186, "step": 6091 }, { "epoch": 1.8491425102443466, "grad_norm": 0.5747119784355164, "learning_rate": 6.92112989774223e-05, "loss": 1.7004, "step": 6092 }, { "epoch": 1.849446046441038, "grad_norm": 0.5725748538970947, "learning_rate": 6.92062367115521e-05, "loss": 1.3208, "step": 6093 }, { "epoch": 1.8497495826377297, "grad_norm": 0.4918226897716522, "learning_rate": 6.920117444568189e-05, "loss": 1.4527, "step": 6094 }, { "epoch": 1.850053118834421, "grad_norm": 0.5622021555900574, "learning_rate": 6.919611217981168e-05, "loss": 1.4738, "step": 6095 }, { "epoch": 1.8503566550311126, "grad_norm": 0.48067110776901245, "learning_rate": 6.919104991394149e-05, "loss": 1.7989, "step": 6096 }, { "epoch": 1.8506601912278038, "grad_norm": 0.5293039083480835, "learning_rate": 6.918598764807129e-05, "loss": 1.7867, "step": 6097 }, { "epoch": 1.8509637274244954, "grad_norm": 0.6090536117553711, "learning_rate": 6.918092538220108e-05, "loss": 1.3639, "step": 6098 }, { "epoch": 1.8512672636211869, "grad_norm": 0.5745795369148254, "learning_rate": 6.917586311633088e-05, "loss": 1.1913, "step": 6099 }, { "epoch": 1.8515707998178783, "grad_norm": 0.9985912442207336, "learning_rate": 6.917080085046067e-05, "loss": 1.5265, "step": 6100 }, { "epoch": 1.8518743360145697, "grad_norm": 0.7579218745231628, "learning_rate": 6.916573858459047e-05, "loss": 1.8763, "step": 6101 }, { "epoch": 1.8521778722112612, "grad_norm": 0.5257670283317566, "learning_rate": 6.916067631872026e-05, "loss": 1.5928, "step": 6102 }, { "epoch": 1.8524814084079526, "grad_norm": 0.46650373935699463, "learning_rate": 6.915561405285006e-05, "loss": 1.5768, "step": 6103 }, { "epoch": 1.852784944604644, "grad_norm": 0.4195265471935272, "learning_rate": 6.915055178697985e-05, "loss": 1.5363, "step": 6104 }, { "epoch": 1.8530884808013357, "grad_norm": 0.5275460481643677, "learning_rate": 6.914548952110966e-05, "loss": 1.5679, "step": 6105 }, { "epoch": 1.853392016998027, "grad_norm": 0.4654236435890198, "learning_rate": 6.914042725523945e-05, "loss": 1.8447, "step": 6106 }, { "epoch": 1.8536955531947186, "grad_norm": 0.5061540007591248, "learning_rate": 6.913536498936925e-05, "loss": 1.8073, "step": 6107 }, { "epoch": 1.8539990893914098, "grad_norm": 0.5491917729377747, "learning_rate": 6.913030272349904e-05, "loss": 1.4774, "step": 6108 }, { "epoch": 1.8543026255881014, "grad_norm": 0.7270756363868713, "learning_rate": 6.912524045762884e-05, "loss": 1.3623, "step": 6109 }, { "epoch": 1.8546061617847929, "grad_norm": 0.5843378305435181, "learning_rate": 6.912017819175863e-05, "loss": 1.6507, "step": 6110 }, { "epoch": 1.8549096979814843, "grad_norm": 0.5927285552024841, "learning_rate": 6.911511592588843e-05, "loss": 1.5258, "step": 6111 }, { "epoch": 1.8552132341781757, "grad_norm": 0.50548255443573, "learning_rate": 6.911005366001822e-05, "loss": 1.3183, "step": 6112 }, { "epoch": 1.8555167703748672, "grad_norm": 0.6694241762161255, "learning_rate": 6.910499139414802e-05, "loss": 1.4925, "step": 6113 }, { "epoch": 1.8558203065715586, "grad_norm": 0.628274142742157, "learning_rate": 6.909992912827781e-05, "loss": 1.697, "step": 6114 }, { "epoch": 1.85612384276825, "grad_norm": 0.6124897003173828, "learning_rate": 6.909486686240762e-05, "loss": 1.6125, "step": 6115 }, { "epoch": 1.8564273789649417, "grad_norm": 0.5437746644020081, "learning_rate": 6.908980459653742e-05, "loss": 1.4485, "step": 6116 }, { "epoch": 1.856730915161633, "grad_norm": 0.5141922831535339, "learning_rate": 6.908474233066721e-05, "loss": 1.8619, "step": 6117 }, { "epoch": 1.8570344513583246, "grad_norm": 0.5010159611701965, "learning_rate": 6.9079680064797e-05, "loss": 1.8498, "step": 6118 }, { "epoch": 1.8573379875550158, "grad_norm": 0.44890761375427246, "learning_rate": 6.90746177989268e-05, "loss": 1.7194, "step": 6119 }, { "epoch": 1.8576415237517074, "grad_norm": 0.5114756226539612, "learning_rate": 6.90695555330566e-05, "loss": 1.7011, "step": 6120 }, { "epoch": 1.8579450599483989, "grad_norm": 0.5562925934791565, "learning_rate": 6.906449326718639e-05, "loss": 1.6741, "step": 6121 }, { "epoch": 1.8582485961450903, "grad_norm": 0.631549060344696, "learning_rate": 6.905943100131618e-05, "loss": 1.7297, "step": 6122 }, { "epoch": 1.8585521323417817, "grad_norm": 0.8341001868247986, "learning_rate": 6.905436873544598e-05, "loss": 1.5796, "step": 6123 }, { "epoch": 1.8588556685384732, "grad_norm": 0.5715938210487366, "learning_rate": 6.904930646957579e-05, "loss": 1.4869, "step": 6124 }, { "epoch": 1.8591592047351648, "grad_norm": 0.5713958144187927, "learning_rate": 6.904424420370558e-05, "loss": 1.3331, "step": 6125 }, { "epoch": 1.859462740931856, "grad_norm": 0.6407903432846069, "learning_rate": 6.903918193783538e-05, "loss": 1.5363, "step": 6126 }, { "epoch": 1.8597662771285477, "grad_norm": 0.5533556938171387, "learning_rate": 6.903411967196517e-05, "loss": 1.8796, "step": 6127 }, { "epoch": 1.860069813325239, "grad_norm": 0.9236746430397034, "learning_rate": 6.902905740609498e-05, "loss": 1.5267, "step": 6128 }, { "epoch": 1.8603733495219306, "grad_norm": 0.6433690190315247, "learning_rate": 6.902399514022477e-05, "loss": 1.5664, "step": 6129 }, { "epoch": 1.860676885718622, "grad_norm": 0.47277143597602844, "learning_rate": 6.901893287435457e-05, "loss": 1.2047, "step": 6130 }, { "epoch": 1.8609804219153134, "grad_norm": 0.7604238986968994, "learning_rate": 6.901387060848436e-05, "loss": 1.927, "step": 6131 }, { "epoch": 1.8612839581120049, "grad_norm": 0.6293209791183472, "learning_rate": 6.900880834261416e-05, "loss": 1.9315, "step": 6132 }, { "epoch": 1.8615874943086963, "grad_norm": 0.5206697583198547, "learning_rate": 6.900374607674395e-05, "loss": 1.6956, "step": 6133 }, { "epoch": 1.8618910305053877, "grad_norm": 0.45433950424194336, "learning_rate": 6.899868381087375e-05, "loss": 1.5459, "step": 6134 }, { "epoch": 1.8621945667020792, "grad_norm": 0.4610427916049957, "learning_rate": 6.899362154500356e-05, "loss": 1.9453, "step": 6135 }, { "epoch": 1.8624981028987708, "grad_norm": 0.6815734505653381, "learning_rate": 6.898855927913335e-05, "loss": 1.4047, "step": 6136 }, { "epoch": 1.862801639095462, "grad_norm": 0.5030832886695862, "learning_rate": 6.898349701326315e-05, "loss": 1.7727, "step": 6137 }, { "epoch": 1.8631051752921537, "grad_norm": 0.4881182014942169, "learning_rate": 6.897843474739294e-05, "loss": 0.9128, "step": 6138 }, { "epoch": 1.863408711488845, "grad_norm": 0.5349050164222717, "learning_rate": 6.897337248152274e-05, "loss": 1.7836, "step": 6139 }, { "epoch": 1.8637122476855366, "grad_norm": 0.5377179384231567, "learning_rate": 6.896831021565253e-05, "loss": 1.705, "step": 6140 }, { "epoch": 1.864015783882228, "grad_norm": 0.5401241779327393, "learning_rate": 6.896324794978233e-05, "loss": 1.6809, "step": 6141 }, { "epoch": 1.8643193200789194, "grad_norm": 0.4488482177257538, "learning_rate": 6.895818568391212e-05, "loss": 1.7207, "step": 6142 }, { "epoch": 1.8646228562756109, "grad_norm": 0.878682553768158, "learning_rate": 6.895312341804192e-05, "loss": 1.7966, "step": 6143 }, { "epoch": 1.8649263924723023, "grad_norm": 0.5451293587684631, "learning_rate": 6.894806115217172e-05, "loss": 1.3851, "step": 6144 }, { "epoch": 1.8652299286689937, "grad_norm": 0.5849304795265198, "learning_rate": 6.894299888630152e-05, "loss": 1.7496, "step": 6145 }, { "epoch": 1.8655334648656852, "grad_norm": 0.500056803226471, "learning_rate": 6.893793662043131e-05, "loss": 1.9507, "step": 6146 }, { "epoch": 1.8658370010623768, "grad_norm": 0.5664394497871399, "learning_rate": 6.893287435456111e-05, "loss": 1.7785, "step": 6147 }, { "epoch": 1.866140537259068, "grad_norm": 0.5805900692939758, "learning_rate": 6.89278120886909e-05, "loss": 1.4488, "step": 6148 }, { "epoch": 1.8664440734557597, "grad_norm": 0.5849188566207886, "learning_rate": 6.89227498228207e-05, "loss": 1.5485, "step": 6149 }, { "epoch": 1.866747609652451, "grad_norm": 0.6293565034866333, "learning_rate": 6.891768755695049e-05, "loss": 1.3529, "step": 6150 }, { "epoch": 1.8670511458491426, "grad_norm": 0.5155745148658752, "learning_rate": 6.891262529108029e-05, "loss": 1.8168, "step": 6151 }, { "epoch": 1.867354682045834, "grad_norm": 0.5031374096870422, "learning_rate": 6.890756302521008e-05, "loss": 1.6329, "step": 6152 }, { "epoch": 1.8676582182425254, "grad_norm": 0.5657221674919128, "learning_rate": 6.890250075933988e-05, "loss": 1.9763, "step": 6153 }, { "epoch": 1.8679617544392169, "grad_norm": 0.6052532196044922, "learning_rate": 6.889743849346969e-05, "loss": 1.8486, "step": 6154 }, { "epoch": 1.8682652906359083, "grad_norm": 0.5807188749313354, "learning_rate": 6.889237622759948e-05, "loss": 1.7798, "step": 6155 }, { "epoch": 1.8685688268326, "grad_norm": 0.5780386328697205, "learning_rate": 6.888731396172927e-05, "loss": 1.6593, "step": 6156 }, { "epoch": 1.8688723630292912, "grad_norm": 0.5649313926696777, "learning_rate": 6.888225169585907e-05, "loss": 1.4904, "step": 6157 }, { "epoch": 1.8691758992259828, "grad_norm": 0.5888344645500183, "learning_rate": 6.887718942998886e-05, "loss": 1.6099, "step": 6158 }, { "epoch": 1.869479435422674, "grad_norm": 0.45079630613327026, "learning_rate": 6.887212716411866e-05, "loss": 1.1967, "step": 6159 }, { "epoch": 1.8697829716193657, "grad_norm": 0.6415480971336365, "learning_rate": 6.886706489824845e-05, "loss": 1.5475, "step": 6160 }, { "epoch": 1.8700865078160571, "grad_norm": 0.5611491203308105, "learning_rate": 6.886200263237825e-05, "loss": 1.6124, "step": 6161 }, { "epoch": 1.8703900440127486, "grad_norm": 0.47479820251464844, "learning_rate": 6.885694036650804e-05, "loss": 1.3631, "step": 6162 }, { "epoch": 1.87069358020944, "grad_norm": 0.6043630242347717, "learning_rate": 6.885187810063785e-05, "loss": 1.9075, "step": 6163 }, { "epoch": 1.8709971164061314, "grad_norm": 0.5722765922546387, "learning_rate": 6.884681583476765e-05, "loss": 1.8247, "step": 6164 }, { "epoch": 1.8713006526028229, "grad_norm": 0.6177046895027161, "learning_rate": 6.884175356889744e-05, "loss": 1.8107, "step": 6165 }, { "epoch": 1.8716041887995143, "grad_norm": 0.528279721736908, "learning_rate": 6.883669130302724e-05, "loss": 1.7669, "step": 6166 }, { "epoch": 1.871907724996206, "grad_norm": 0.5595422983169556, "learning_rate": 6.883162903715703e-05, "loss": 1.8459, "step": 6167 }, { "epoch": 1.8722112611928972, "grad_norm": 0.488857239484787, "learning_rate": 6.882656677128683e-05, "loss": 1.7021, "step": 6168 }, { "epoch": 1.8725147973895888, "grad_norm": 0.5302273631095886, "learning_rate": 6.882150450541662e-05, "loss": 1.2417, "step": 6169 }, { "epoch": 1.87281833358628, "grad_norm": 0.4488196074962616, "learning_rate": 6.881644223954642e-05, "loss": 1.7382, "step": 6170 }, { "epoch": 1.8731218697829717, "grad_norm": 0.5027474761009216, "learning_rate": 6.881137997367621e-05, "loss": 1.8507, "step": 6171 }, { "epoch": 1.8734254059796631, "grad_norm": 0.44171908497810364, "learning_rate": 6.880631770780602e-05, "loss": 0.5499, "step": 6172 }, { "epoch": 1.8737289421763546, "grad_norm": 0.6166810989379883, "learning_rate": 6.880125544193581e-05, "loss": 1.3108, "step": 6173 }, { "epoch": 1.874032478373046, "grad_norm": 0.6441341638565063, "learning_rate": 6.879619317606562e-05, "loss": 1.1281, "step": 6174 }, { "epoch": 1.8743360145697374, "grad_norm": 0.5491599440574646, "learning_rate": 6.879113091019542e-05, "loss": 1.5373, "step": 6175 }, { "epoch": 1.8746395507664289, "grad_norm": 0.4813689887523651, "learning_rate": 6.878606864432521e-05, "loss": 1.7681, "step": 6176 }, { "epoch": 1.8749430869631203, "grad_norm": 0.5879645347595215, "learning_rate": 6.8781006378455e-05, "loss": 1.7098, "step": 6177 }, { "epoch": 1.875246623159812, "grad_norm": 0.5867793560028076, "learning_rate": 6.87759441125848e-05, "loss": 1.4471, "step": 6178 }, { "epoch": 1.8755501593565032, "grad_norm": 0.37060195207595825, "learning_rate": 6.87708818467146e-05, "loss": 1.1236, "step": 6179 }, { "epoch": 1.8758536955531948, "grad_norm": 0.6691044569015503, "learning_rate": 6.876581958084439e-05, "loss": 1.0482, "step": 6180 }, { "epoch": 1.876157231749886, "grad_norm": 0.6610668897628784, "learning_rate": 6.876075731497419e-05, "loss": 1.7718, "step": 6181 }, { "epoch": 1.8764607679465777, "grad_norm": 0.6298898458480835, "learning_rate": 6.875569504910398e-05, "loss": 1.4496, "step": 6182 }, { "epoch": 1.8767643041432691, "grad_norm": 0.5803454518318176, "learning_rate": 6.875063278323379e-05, "loss": 1.0997, "step": 6183 }, { "epoch": 1.8770678403399605, "grad_norm": 0.6385704874992371, "learning_rate": 6.874557051736358e-05, "loss": 1.5478, "step": 6184 }, { "epoch": 1.877371376536652, "grad_norm": 0.6655149459838867, "learning_rate": 6.874050825149338e-05, "loss": 1.3631, "step": 6185 }, { "epoch": 1.8776749127333434, "grad_norm": 0.5710660815238953, "learning_rate": 6.873544598562317e-05, "loss": 1.7522, "step": 6186 }, { "epoch": 1.877978448930035, "grad_norm": 0.5770572423934937, "learning_rate": 6.873038371975297e-05, "loss": 1.4289, "step": 6187 }, { "epoch": 1.8782819851267263, "grad_norm": 0.6256627440452576, "learning_rate": 6.872532145388276e-05, "loss": 1.9416, "step": 6188 }, { "epoch": 1.878585521323418, "grad_norm": 0.6375580430030823, "learning_rate": 6.872025918801256e-05, "loss": 1.4679, "step": 6189 }, { "epoch": 1.8788890575201092, "grad_norm": 0.5342978835105896, "learning_rate": 6.871519692214235e-05, "loss": 1.6705, "step": 6190 }, { "epoch": 1.8791925937168008, "grad_norm": 0.6334168910980225, "learning_rate": 6.871013465627215e-05, "loss": 1.5592, "step": 6191 }, { "epoch": 1.879496129913492, "grad_norm": 0.48108720779418945, "learning_rate": 6.870507239040194e-05, "loss": 1.3368, "step": 6192 }, { "epoch": 1.8797996661101837, "grad_norm": 0.45311546325683594, "learning_rate": 6.870001012453175e-05, "loss": 1.7126, "step": 6193 }, { "epoch": 1.880103202306875, "grad_norm": 0.5576416850090027, "learning_rate": 6.869494785866154e-05, "loss": 1.7894, "step": 6194 }, { "epoch": 1.8804067385035665, "grad_norm": 0.6543675661087036, "learning_rate": 6.868988559279134e-05, "loss": 1.4302, "step": 6195 }, { "epoch": 1.880710274700258, "grad_norm": 0.592578113079071, "learning_rate": 6.868482332692113e-05, "loss": 1.9324, "step": 6196 }, { "epoch": 1.8810138108969494, "grad_norm": 0.4866163432598114, "learning_rate": 6.867976106105093e-05, "loss": 1.7496, "step": 6197 }, { "epoch": 1.881317347093641, "grad_norm": 0.6053670048713684, "learning_rate": 6.867469879518072e-05, "loss": 1.6693, "step": 6198 }, { "epoch": 1.8816208832903323, "grad_norm": 0.5006988048553467, "learning_rate": 6.866963652931052e-05, "loss": 1.5501, "step": 6199 }, { "epoch": 1.881924419487024, "grad_norm": 0.5762816667556763, "learning_rate": 6.866457426344031e-05, "loss": 1.7895, "step": 6200 }, { "epoch": 1.8822279556837151, "grad_norm": 0.6253973245620728, "learning_rate": 6.865951199757011e-05, "loss": 1.4188, "step": 6201 }, { "epoch": 1.8825314918804068, "grad_norm": 0.6388218998908997, "learning_rate": 6.865444973169992e-05, "loss": 2.0189, "step": 6202 }, { "epoch": 1.8828350280770982, "grad_norm": 0.45942676067352295, "learning_rate": 6.864938746582971e-05, "loss": 1.8114, "step": 6203 }, { "epoch": 1.8831385642737897, "grad_norm": 0.5745331645011902, "learning_rate": 6.86443251999595e-05, "loss": 1.6943, "step": 6204 }, { "epoch": 1.883442100470481, "grad_norm": 0.6693764925003052, "learning_rate": 6.86392629340893e-05, "loss": 1.8772, "step": 6205 }, { "epoch": 1.8837456366671725, "grad_norm": 0.5581064820289612, "learning_rate": 6.86342006682191e-05, "loss": 1.3722, "step": 6206 }, { "epoch": 1.884049172863864, "grad_norm": 0.8228082656860352, "learning_rate": 6.862913840234889e-05, "loss": 1.8403, "step": 6207 }, { "epoch": 1.8843527090605554, "grad_norm": 0.5933247804641724, "learning_rate": 6.862407613647869e-05, "loss": 2.0294, "step": 6208 }, { "epoch": 1.884656245257247, "grad_norm": 0.5767022967338562, "learning_rate": 6.861901387060848e-05, "loss": 1.4142, "step": 6209 }, { "epoch": 1.8849597814539383, "grad_norm": 0.4476047158241272, "learning_rate": 6.861395160473827e-05, "loss": 1.3916, "step": 6210 }, { "epoch": 1.88526331765063, "grad_norm": 0.5437102913856506, "learning_rate": 6.860888933886808e-05, "loss": 1.7854, "step": 6211 }, { "epoch": 1.8855668538473211, "grad_norm": 0.5073631405830383, "learning_rate": 6.860382707299788e-05, "loss": 1.5508, "step": 6212 }, { "epoch": 1.8858703900440128, "grad_norm": 0.4325421154499054, "learning_rate": 6.859876480712767e-05, "loss": 1.5213, "step": 6213 }, { "epoch": 1.8861739262407042, "grad_norm": 0.6551002264022827, "learning_rate": 6.859370254125747e-05, "loss": 1.7269, "step": 6214 }, { "epoch": 1.8864774624373957, "grad_norm": 0.5482550859451294, "learning_rate": 6.858864027538726e-05, "loss": 1.3621, "step": 6215 }, { "epoch": 1.886780998634087, "grad_norm": 0.5425487160682678, "learning_rate": 6.858357800951706e-05, "loss": 1.8515, "step": 6216 }, { "epoch": 1.8870845348307785, "grad_norm": 0.5646753311157227, "learning_rate": 6.857851574364687e-05, "loss": 1.8606, "step": 6217 }, { "epoch": 1.88738807102747, "grad_norm": 0.6172170639038086, "learning_rate": 6.857345347777666e-05, "loss": 1.7505, "step": 6218 }, { "epoch": 1.8876916072241614, "grad_norm": 0.555770754814148, "learning_rate": 6.856839121190646e-05, "loss": 1.4309, "step": 6219 }, { "epoch": 1.887995143420853, "grad_norm": 0.6966986656188965, "learning_rate": 6.856332894603625e-05, "loss": 1.5179, "step": 6220 }, { "epoch": 1.8882986796175443, "grad_norm": 0.5168276429176331, "learning_rate": 6.855826668016604e-05, "loss": 1.7389, "step": 6221 }, { "epoch": 1.888602215814236, "grad_norm": 0.5563384294509888, "learning_rate": 6.855320441429585e-05, "loss": 1.6291, "step": 6222 }, { "epoch": 1.8889057520109271, "grad_norm": 0.6013602614402771, "learning_rate": 6.854814214842565e-05, "loss": 1.61, "step": 6223 }, { "epoch": 1.8892092882076188, "grad_norm": 0.5379372835159302, "learning_rate": 6.854307988255544e-05, "loss": 1.6609, "step": 6224 }, { "epoch": 1.8895128244043102, "grad_norm": 0.5397657752037048, "learning_rate": 6.853801761668524e-05, "loss": 1.4849, "step": 6225 }, { "epoch": 1.8898163606010017, "grad_norm": 0.5665563941001892, "learning_rate": 6.853295535081503e-05, "loss": 1.8491, "step": 6226 }, { "epoch": 1.890119896797693, "grad_norm": 0.46644145250320435, "learning_rate": 6.852789308494483e-05, "loss": 1.875, "step": 6227 }, { "epoch": 1.8904234329943845, "grad_norm": 0.5429653525352478, "learning_rate": 6.852283081907462e-05, "loss": 1.9378, "step": 6228 }, { "epoch": 1.8907269691910762, "grad_norm": 0.5680977702140808, "learning_rate": 6.851776855320442e-05, "loss": 1.6695, "step": 6229 }, { "epoch": 1.8910305053877674, "grad_norm": 0.5593467354774475, "learning_rate": 6.851270628733421e-05, "loss": 1.6577, "step": 6230 }, { "epoch": 1.891334041584459, "grad_norm": 0.57649165391922, "learning_rate": 6.8507644021464e-05, "loss": 1.7832, "step": 6231 }, { "epoch": 1.8916375777811503, "grad_norm": 0.6322312355041504, "learning_rate": 6.850258175559381e-05, "loss": 1.7039, "step": 6232 }, { "epoch": 1.891941113977842, "grad_norm": 0.5910970568656921, "learning_rate": 6.849751948972361e-05, "loss": 1.4358, "step": 6233 }, { "epoch": 1.8922446501745334, "grad_norm": 0.5287604331970215, "learning_rate": 6.84924572238534e-05, "loss": 1.749, "step": 6234 }, { "epoch": 1.8925481863712248, "grad_norm": 0.6745063066482544, "learning_rate": 6.84873949579832e-05, "loss": 1.4466, "step": 6235 }, { "epoch": 1.8928517225679162, "grad_norm": 0.5758655071258545, "learning_rate": 6.8482332692113e-05, "loss": 1.6522, "step": 6236 }, { "epoch": 1.8931552587646077, "grad_norm": 0.5966370105743408, "learning_rate": 6.847727042624279e-05, "loss": 1.7655, "step": 6237 }, { "epoch": 1.893458794961299, "grad_norm": 0.6047906279563904, "learning_rate": 6.847220816037258e-05, "loss": 1.6902, "step": 6238 }, { "epoch": 1.8937623311579905, "grad_norm": 0.553486168384552, "learning_rate": 6.846714589450238e-05, "loss": 1.7621, "step": 6239 }, { "epoch": 1.8940658673546822, "grad_norm": 0.6047108769416809, "learning_rate": 6.846208362863217e-05, "loss": 1.8659, "step": 6240 }, { "epoch": 1.8943694035513734, "grad_norm": 0.7074782252311707, "learning_rate": 6.845702136276198e-05, "loss": 1.7276, "step": 6241 }, { "epoch": 1.894672939748065, "grad_norm": 0.5298722386360168, "learning_rate": 6.845195909689178e-05, "loss": 1.8257, "step": 6242 }, { "epoch": 1.8949764759447563, "grad_norm": 0.4784451723098755, "learning_rate": 6.844689683102157e-05, "loss": 1.8273, "step": 6243 }, { "epoch": 1.895280012141448, "grad_norm": 0.7199196219444275, "learning_rate": 6.844183456515137e-05, "loss": 1.9046, "step": 6244 }, { "epoch": 1.8955835483381394, "grad_norm": 0.5387197136878967, "learning_rate": 6.843677229928116e-05, "loss": 1.5792, "step": 6245 }, { "epoch": 1.8958870845348308, "grad_norm": 0.5584871172904968, "learning_rate": 6.843171003341096e-05, "loss": 1.5019, "step": 6246 }, { "epoch": 1.8961906207315222, "grad_norm": 0.5552901029586792, "learning_rate": 6.842664776754075e-05, "loss": 1.7851, "step": 6247 }, { "epoch": 1.8964941569282137, "grad_norm": 0.49852436780929565, "learning_rate": 6.842158550167054e-05, "loss": 1.2391, "step": 6248 }, { "epoch": 1.896797693124905, "grad_norm": 0.6677296161651611, "learning_rate": 6.841652323580034e-05, "loss": 1.8005, "step": 6249 }, { "epoch": 1.8971012293215965, "grad_norm": 0.5572288632392883, "learning_rate": 6.841146096993015e-05, "loss": 1.7112, "step": 6250 }, { "epoch": 1.8974047655182882, "grad_norm": 0.4943527579307556, "learning_rate": 6.840639870405994e-05, "loss": 1.6392, "step": 6251 }, { "epoch": 1.8977083017149794, "grad_norm": 0.4942340552806854, "learning_rate": 6.840133643818974e-05, "loss": 1.7768, "step": 6252 }, { "epoch": 1.898011837911671, "grad_norm": 0.42028114199638367, "learning_rate": 6.839627417231953e-05, "loss": 1.2282, "step": 6253 }, { "epoch": 1.8983153741083623, "grad_norm": 0.5391364693641663, "learning_rate": 6.839121190644933e-05, "loss": 1.3302, "step": 6254 }, { "epoch": 1.898618910305054, "grad_norm": 0.4264262616634369, "learning_rate": 6.838614964057912e-05, "loss": 2.3301, "step": 6255 }, { "epoch": 1.8989224465017454, "grad_norm": 0.39624980092048645, "learning_rate": 6.838108737470892e-05, "loss": 1.2059, "step": 6256 }, { "epoch": 1.8992259826984368, "grad_norm": 0.5087313055992126, "learning_rate": 6.837602510883871e-05, "loss": 1.4168, "step": 6257 }, { "epoch": 1.8995295188951282, "grad_norm": 0.5290665030479431, "learning_rate": 6.83709628429685e-05, "loss": 1.9055, "step": 6258 }, { "epoch": 1.8998330550918197, "grad_norm": 0.5578790903091431, "learning_rate": 6.83659005770983e-05, "loss": 1.6796, "step": 6259 }, { "epoch": 1.9001365912885113, "grad_norm": 0.47037312388420105, "learning_rate": 6.836083831122811e-05, "loss": 1.3823, "step": 6260 }, { "epoch": 1.9004401274852025, "grad_norm": 0.4832285940647125, "learning_rate": 6.835577604535792e-05, "loss": 1.87, "step": 6261 }, { "epoch": 1.9007436636818942, "grad_norm": 0.553207516670227, "learning_rate": 6.835071377948771e-05, "loss": 1.681, "step": 6262 }, { "epoch": 1.9010471998785854, "grad_norm": 0.5976235270500183, "learning_rate": 6.834565151361751e-05, "loss": 1.5391, "step": 6263 }, { "epoch": 1.901350736075277, "grad_norm": 0.6056948304176331, "learning_rate": 6.83405892477473e-05, "loss": 1.565, "step": 6264 }, { "epoch": 1.9016542722719685, "grad_norm": 0.4596242308616638, "learning_rate": 6.83355269818771e-05, "loss": 1.7108, "step": 6265 }, { "epoch": 1.90195780846866, "grad_norm": 0.7423454523086548, "learning_rate": 6.833046471600689e-05, "loss": 1.3533, "step": 6266 }, { "epoch": 1.9022613446653514, "grad_norm": 0.6196095943450928, "learning_rate": 6.832540245013669e-05, "loss": 2.036, "step": 6267 }, { "epoch": 1.9025648808620428, "grad_norm": 0.48350366950035095, "learning_rate": 6.832034018426648e-05, "loss": 1.2346, "step": 6268 }, { "epoch": 1.9028684170587342, "grad_norm": 0.4920559823513031, "learning_rate": 6.831527791839628e-05, "loss": 1.7805, "step": 6269 }, { "epoch": 1.9031719532554257, "grad_norm": 0.6937544345855713, "learning_rate": 6.831021565252607e-05, "loss": 1.9112, "step": 6270 }, { "epoch": 1.9034754894521173, "grad_norm": 0.5743443965911865, "learning_rate": 6.830515338665588e-05, "loss": 1.669, "step": 6271 }, { "epoch": 1.9037790256488085, "grad_norm": 0.5362838506698608, "learning_rate": 6.830009112078567e-05, "loss": 1.7985, "step": 6272 }, { "epoch": 1.9040825618455002, "grad_norm": 0.5269457101821899, "learning_rate": 6.829502885491547e-05, "loss": 1.1284, "step": 6273 }, { "epoch": 1.9043860980421914, "grad_norm": 0.6071358323097229, "learning_rate": 6.828996658904526e-05, "loss": 1.7948, "step": 6274 }, { "epoch": 1.904689634238883, "grad_norm": 0.5875253081321716, "learning_rate": 6.828490432317506e-05, "loss": 1.4865, "step": 6275 }, { "epoch": 1.9049931704355745, "grad_norm": 0.6144857406616211, "learning_rate": 6.827984205730485e-05, "loss": 1.2758, "step": 6276 }, { "epoch": 1.905296706632266, "grad_norm": 0.6165915727615356, "learning_rate": 6.827477979143465e-05, "loss": 1.857, "step": 6277 }, { "epoch": 1.9056002428289573, "grad_norm": 0.5990175604820251, "learning_rate": 6.826971752556444e-05, "loss": 1.67, "step": 6278 }, { "epoch": 1.9059037790256488, "grad_norm": 0.5738477110862732, "learning_rate": 6.826465525969424e-05, "loss": 1.6268, "step": 6279 }, { "epoch": 1.9062073152223402, "grad_norm": 0.4280742406845093, "learning_rate": 6.825959299382405e-05, "loss": 1.8544, "step": 6280 }, { "epoch": 1.9065108514190316, "grad_norm": 0.4357243776321411, "learning_rate": 6.825453072795384e-05, "loss": 1.2586, "step": 6281 }, { "epoch": 1.9068143876157233, "grad_norm": 0.5700424909591675, "learning_rate": 6.824946846208364e-05, "loss": 1.7586, "step": 6282 }, { "epoch": 1.9071179238124145, "grad_norm": 0.5744918584823608, "learning_rate": 6.824440619621343e-05, "loss": 1.8481, "step": 6283 }, { "epoch": 1.9074214600091062, "grad_norm": 0.646240770816803, "learning_rate": 6.823934393034323e-05, "loss": 1.6283, "step": 6284 }, { "epoch": 1.9077249962057974, "grad_norm": 0.5268108248710632, "learning_rate": 6.823428166447302e-05, "loss": 1.879, "step": 6285 }, { "epoch": 1.908028532402489, "grad_norm": 0.4985029995441437, "learning_rate": 6.822921939860281e-05, "loss": 1.8827, "step": 6286 }, { "epoch": 1.9083320685991805, "grad_norm": 0.38506224751472473, "learning_rate": 6.822415713273261e-05, "loss": 1.7014, "step": 6287 }, { "epoch": 1.908635604795872, "grad_norm": 0.5269253253936768, "learning_rate": 6.82190948668624e-05, "loss": 1.7312, "step": 6288 }, { "epoch": 1.9089391409925633, "grad_norm": 0.5004891753196716, "learning_rate": 6.821403260099221e-05, "loss": 1.6025, "step": 6289 }, { "epoch": 1.9092426771892548, "grad_norm": 0.5361594557762146, "learning_rate": 6.820897033512201e-05, "loss": 1.45, "step": 6290 }, { "epoch": 1.9095462133859464, "grad_norm": 0.5427488684654236, "learning_rate": 6.82039080692518e-05, "loss": 1.7416, "step": 6291 }, { "epoch": 1.9098497495826376, "grad_norm": 0.5532050132751465, "learning_rate": 6.81988458033816e-05, "loss": 1.589, "step": 6292 }, { "epoch": 1.9101532857793293, "grad_norm": 0.49807676672935486, "learning_rate": 6.819378353751139e-05, "loss": 1.2657, "step": 6293 }, { "epoch": 1.9104568219760205, "grad_norm": 0.587239146232605, "learning_rate": 6.818872127164119e-05, "loss": 1.7507, "step": 6294 }, { "epoch": 1.9107603581727122, "grad_norm": 0.5817277431488037, "learning_rate": 6.818365900577098e-05, "loss": 1.7011, "step": 6295 }, { "epoch": 1.9110638943694036, "grad_norm": 0.46033892035484314, "learning_rate": 6.817859673990078e-05, "loss": 1.1022, "step": 6296 }, { "epoch": 1.911367430566095, "grad_norm": 0.5857753753662109, "learning_rate": 6.817353447403057e-05, "loss": 1.7669, "step": 6297 }, { "epoch": 1.9116709667627865, "grad_norm": 0.6346994638442993, "learning_rate": 6.816847220816037e-05, "loss": 1.5637, "step": 6298 }, { "epoch": 1.911974502959478, "grad_norm": 0.605383038520813, "learning_rate": 6.816340994229017e-05, "loss": 1.529, "step": 6299 }, { "epoch": 1.9122780391561693, "grad_norm": 0.47823747992515564, "learning_rate": 6.815834767641997e-05, "loss": 1.284, "step": 6300 }, { "epoch": 1.9125815753528608, "grad_norm": 1.0050535202026367, "learning_rate": 6.815328541054976e-05, "loss": 1.7572, "step": 6301 }, { "epoch": 1.9128851115495524, "grad_norm": 0.5828871726989746, "learning_rate": 6.814822314467956e-05, "loss": 1.6989, "step": 6302 }, { "epoch": 1.9131886477462436, "grad_norm": 0.6212213039398193, "learning_rate": 6.814316087880935e-05, "loss": 1.7444, "step": 6303 }, { "epoch": 1.9134921839429353, "grad_norm": 0.49428045749664307, "learning_rate": 6.813809861293915e-05, "loss": 1.5384, "step": 6304 }, { "epoch": 1.9137957201396265, "grad_norm": 0.511870801448822, "learning_rate": 6.813303634706894e-05, "loss": 1.8753, "step": 6305 }, { "epoch": 1.9140992563363182, "grad_norm": 0.5578843951225281, "learning_rate": 6.812797408119875e-05, "loss": 1.7222, "step": 6306 }, { "epoch": 1.9144027925330096, "grad_norm": 0.4399265944957733, "learning_rate": 6.812291181532855e-05, "loss": 1.5536, "step": 6307 }, { "epoch": 1.914706328729701, "grad_norm": 0.49018776416778564, "learning_rate": 6.811784954945834e-05, "loss": 1.847, "step": 6308 }, { "epoch": 1.9150098649263925, "grad_norm": 0.514025866985321, "learning_rate": 6.811278728358814e-05, "loss": 2.0138, "step": 6309 }, { "epoch": 1.915313401123084, "grad_norm": 0.48464658856391907, "learning_rate": 6.810772501771794e-05, "loss": 1.8323, "step": 6310 }, { "epoch": 1.9156169373197753, "grad_norm": 0.4685630798339844, "learning_rate": 6.810266275184774e-05, "loss": 1.2155, "step": 6311 }, { "epoch": 1.9159204735164668, "grad_norm": 0.5250114798545837, "learning_rate": 6.809760048597753e-05, "loss": 1.7666, "step": 6312 }, { "epoch": 1.9162240097131584, "grad_norm": 0.5229688882827759, "learning_rate": 6.809253822010733e-05, "loss": 1.6904, "step": 6313 }, { "epoch": 1.9165275459098496, "grad_norm": 0.6486567854881287, "learning_rate": 6.808747595423712e-05, "loss": 1.7136, "step": 6314 }, { "epoch": 1.9168310821065413, "grad_norm": 0.8866993188858032, "learning_rate": 6.808241368836692e-05, "loss": 1.3946, "step": 6315 }, { "epoch": 1.9171346183032325, "grad_norm": 0.5037923455238342, "learning_rate": 6.807735142249671e-05, "loss": 1.8012, "step": 6316 }, { "epoch": 1.9174381544999242, "grad_norm": 0.5571567416191101, "learning_rate": 6.807228915662651e-05, "loss": 1.8662, "step": 6317 }, { "epoch": 1.9177416906966156, "grad_norm": 0.5871730446815491, "learning_rate": 6.80672268907563e-05, "loss": 1.7789, "step": 6318 }, { "epoch": 1.918045226893307, "grad_norm": 0.8165649175643921, "learning_rate": 6.806216462488611e-05, "loss": 1.7067, "step": 6319 }, { "epoch": 1.9183487630899985, "grad_norm": 0.5025912523269653, "learning_rate": 6.80571023590159e-05, "loss": 1.6136, "step": 6320 }, { "epoch": 1.91865229928669, "grad_norm": 0.5596361756324768, "learning_rate": 6.80520400931457e-05, "loss": 1.6783, "step": 6321 }, { "epoch": 1.9189558354833816, "grad_norm": 0.6053299903869629, "learning_rate": 6.80469778272755e-05, "loss": 1.3395, "step": 6322 }, { "epoch": 1.9192593716800728, "grad_norm": 0.5544418692588806, "learning_rate": 6.804191556140529e-05, "loss": 1.7102, "step": 6323 }, { "epoch": 1.9195629078767644, "grad_norm": 0.7214632034301758, "learning_rate": 6.803685329553508e-05, "loss": 1.324, "step": 6324 }, { "epoch": 1.9198664440734556, "grad_norm": 0.6079810857772827, "learning_rate": 6.803179102966488e-05, "loss": 1.7599, "step": 6325 }, { "epoch": 1.9201699802701473, "grad_norm": 0.37600433826446533, "learning_rate": 6.802672876379467e-05, "loss": 1.2087, "step": 6326 }, { "epoch": 1.9204735164668387, "grad_norm": 0.6091787815093994, "learning_rate": 6.802166649792447e-05, "loss": 1.469, "step": 6327 }, { "epoch": 1.9207770526635302, "grad_norm": 0.5562729835510254, "learning_rate": 6.801660423205428e-05, "loss": 1.6253, "step": 6328 }, { "epoch": 1.9210805888602216, "grad_norm": 0.6257612705230713, "learning_rate": 6.801154196618407e-05, "loss": 1.3446, "step": 6329 }, { "epoch": 1.921384125056913, "grad_norm": 0.6591886878013611, "learning_rate": 6.800647970031387e-05, "loss": 1.8737, "step": 6330 }, { "epoch": 1.9216876612536045, "grad_norm": 0.6528851985931396, "learning_rate": 6.800141743444366e-05, "loss": 1.6804, "step": 6331 }, { "epoch": 1.921991197450296, "grad_norm": 0.6890894770622253, "learning_rate": 6.799635516857346e-05, "loss": 1.4509, "step": 6332 }, { "epoch": 1.9222947336469876, "grad_norm": 0.8640413880348206, "learning_rate": 6.799129290270325e-05, "loss": 1.5481, "step": 6333 }, { "epoch": 1.9225982698436788, "grad_norm": 0.747075617313385, "learning_rate": 6.798623063683305e-05, "loss": 2.102, "step": 6334 }, { "epoch": 1.9229018060403704, "grad_norm": 0.5577446222305298, "learning_rate": 6.798116837096284e-05, "loss": 1.9817, "step": 6335 }, { "epoch": 1.9232053422370616, "grad_norm": 0.4409145712852478, "learning_rate": 6.797610610509264e-05, "loss": 1.2139, "step": 6336 }, { "epoch": 1.9235088784337533, "grad_norm": 0.5447250604629517, "learning_rate": 6.797104383922243e-05, "loss": 1.8243, "step": 6337 }, { "epoch": 1.9238124146304447, "grad_norm": 0.5465307831764221, "learning_rate": 6.796598157335224e-05, "loss": 1.8265, "step": 6338 }, { "epoch": 1.9241159508271362, "grad_norm": 0.5586651563644409, "learning_rate": 6.796091930748203e-05, "loss": 1.4372, "step": 6339 }, { "epoch": 1.9244194870238276, "grad_norm": 0.44557085633277893, "learning_rate": 6.795585704161183e-05, "loss": 1.5344, "step": 6340 }, { "epoch": 1.924723023220519, "grad_norm": 0.6220162510871887, "learning_rate": 6.795079477574162e-05, "loss": 1.6125, "step": 6341 }, { "epoch": 1.9250265594172105, "grad_norm": 0.5200288891792297, "learning_rate": 6.794573250987142e-05, "loss": 1.4323, "step": 6342 }, { "epoch": 1.925330095613902, "grad_norm": 0.6363093256950378, "learning_rate": 6.794067024400121e-05, "loss": 1.2421, "step": 6343 }, { "epoch": 1.9256336318105935, "grad_norm": 0.6280449628829956, "learning_rate": 6.793560797813101e-05, "loss": 1.4025, "step": 6344 }, { "epoch": 1.9259371680072848, "grad_norm": 0.5198882222175598, "learning_rate": 6.79305457122608e-05, "loss": 1.9888, "step": 6345 }, { "epoch": 1.9262407042039764, "grad_norm": 0.5742619037628174, "learning_rate": 6.79254834463906e-05, "loss": 1.719, "step": 6346 }, { "epoch": 1.9265442404006676, "grad_norm": 0.6260584592819214, "learning_rate": 6.79204211805204e-05, "loss": 1.6638, "step": 6347 }, { "epoch": 1.9268477765973593, "grad_norm": 0.506345808506012, "learning_rate": 6.79153589146502e-05, "loss": 1.5784, "step": 6348 }, { "epoch": 1.9271513127940507, "grad_norm": 0.6416305303573608, "learning_rate": 6.791029664878e-05, "loss": 1.7515, "step": 6349 }, { "epoch": 1.9274548489907422, "grad_norm": 0.5155960917472839, "learning_rate": 6.79052343829098e-05, "loss": 1.9706, "step": 6350 }, { "epoch": 1.9277583851874336, "grad_norm": 0.6320833563804626, "learning_rate": 6.79001721170396e-05, "loss": 1.687, "step": 6351 }, { "epoch": 1.928061921384125, "grad_norm": 0.5683110356330872, "learning_rate": 6.789510985116939e-05, "loss": 1.7174, "step": 6352 }, { "epoch": 1.9283654575808167, "grad_norm": 0.5914602875709534, "learning_rate": 6.789004758529919e-05, "loss": 1.3214, "step": 6353 }, { "epoch": 1.928668993777508, "grad_norm": 0.7478408217430115, "learning_rate": 6.788498531942898e-05, "loss": 1.6696, "step": 6354 }, { "epoch": 1.9289725299741995, "grad_norm": 0.5382063984870911, "learning_rate": 6.787992305355878e-05, "loss": 1.8426, "step": 6355 }, { "epoch": 1.9292760661708908, "grad_norm": 0.49428850412368774, "learning_rate": 6.787486078768857e-05, "loss": 1.6634, "step": 6356 }, { "epoch": 1.9295796023675824, "grad_norm": 1.0222886800765991, "learning_rate": 6.786979852181837e-05, "loss": 1.7633, "step": 6357 }, { "epoch": 1.9298831385642736, "grad_norm": 0.5247024297714233, "learning_rate": 6.786473625594818e-05, "loss": 1.2287, "step": 6358 }, { "epoch": 1.9301866747609653, "grad_norm": 0.6638110876083374, "learning_rate": 6.785967399007797e-05, "loss": 1.6572, "step": 6359 }, { "epoch": 1.9304902109576567, "grad_norm": 0.7600938677787781, "learning_rate": 6.785461172420777e-05, "loss": 1.7869, "step": 6360 }, { "epoch": 1.9307937471543482, "grad_norm": 0.4435073435306549, "learning_rate": 6.784954945833756e-05, "loss": 1.3247, "step": 6361 }, { "epoch": 1.9310972833510396, "grad_norm": 0.5299533605575562, "learning_rate": 6.784448719246735e-05, "loss": 2.0236, "step": 6362 }, { "epoch": 1.931400819547731, "grad_norm": 0.6100825071334839, "learning_rate": 6.783942492659715e-05, "loss": 1.3755, "step": 6363 }, { "epoch": 1.9317043557444227, "grad_norm": 0.5790150761604309, "learning_rate": 6.783436266072694e-05, "loss": 1.3591, "step": 6364 }, { "epoch": 1.9320078919411139, "grad_norm": 0.5754750967025757, "learning_rate": 6.782930039485674e-05, "loss": 2.0173, "step": 6365 }, { "epoch": 1.9323114281378055, "grad_norm": 0.6932041049003601, "learning_rate": 6.782423812898653e-05, "loss": 1.8836, "step": 6366 }, { "epoch": 1.9326149643344968, "grad_norm": 0.5518671274185181, "learning_rate": 6.781917586311634e-05, "loss": 1.9421, "step": 6367 }, { "epoch": 1.9329185005311884, "grad_norm": 0.5681519508361816, "learning_rate": 6.781411359724614e-05, "loss": 1.5273, "step": 6368 }, { "epoch": 1.9332220367278798, "grad_norm": 0.47271469235420227, "learning_rate": 6.780905133137593e-05, "loss": 1.1639, "step": 6369 }, { "epoch": 1.9335255729245713, "grad_norm": 0.5806103348731995, "learning_rate": 6.780398906550573e-05, "loss": 1.4392, "step": 6370 }, { "epoch": 1.9338291091212627, "grad_norm": 0.5765122771263123, "learning_rate": 6.779892679963552e-05, "loss": 1.2515, "step": 6371 }, { "epoch": 1.9341326453179541, "grad_norm": 0.42490893602371216, "learning_rate": 6.779386453376532e-05, "loss": 1.6461, "step": 6372 }, { "epoch": 1.9344361815146456, "grad_norm": 0.5468345284461975, "learning_rate": 6.778880226789511e-05, "loss": 1.4913, "step": 6373 }, { "epoch": 1.934739717711337, "grad_norm": 0.5540151596069336, "learning_rate": 6.77837400020249e-05, "loss": 1.6999, "step": 6374 }, { "epoch": 1.9350432539080287, "grad_norm": 0.5978628396987915, "learning_rate": 6.77786777361547e-05, "loss": 1.5276, "step": 6375 }, { "epoch": 1.9353467901047199, "grad_norm": 0.6584348678588867, "learning_rate": 6.77736154702845e-05, "loss": 1.2151, "step": 6376 }, { "epoch": 1.9356503263014115, "grad_norm": 0.5471230149269104, "learning_rate": 6.77685532044143e-05, "loss": 1.4346, "step": 6377 }, { "epoch": 1.9359538624981028, "grad_norm": 0.5572274327278137, "learning_rate": 6.77634909385441e-05, "loss": 1.3116, "step": 6378 }, { "epoch": 1.9362573986947944, "grad_norm": 0.5694922804832458, "learning_rate": 6.77584286726739e-05, "loss": 2.0379, "step": 6379 }, { "epoch": 1.9365609348914858, "grad_norm": 0.4971182346343994, "learning_rate": 6.775336640680369e-05, "loss": 1.7887, "step": 6380 }, { "epoch": 1.9368644710881773, "grad_norm": 0.4632358253002167, "learning_rate": 6.774830414093348e-05, "loss": 2.0526, "step": 6381 }, { "epoch": 1.9371680072848687, "grad_norm": 0.6226524114608765, "learning_rate": 6.774324187506328e-05, "loss": 1.2819, "step": 6382 }, { "epoch": 1.9374715434815601, "grad_norm": 0.46665677428245544, "learning_rate": 6.773817960919307e-05, "loss": 1.7702, "step": 6383 }, { "epoch": 1.9377750796782516, "grad_norm": 0.5216962099075317, "learning_rate": 6.773311734332287e-05, "loss": 1.635, "step": 6384 }, { "epoch": 1.938078615874943, "grad_norm": 0.5371339321136475, "learning_rate": 6.772805507745266e-05, "loss": 1.804, "step": 6385 }, { "epoch": 1.9383821520716347, "grad_norm": 0.5909997820854187, "learning_rate": 6.772299281158247e-05, "loss": 1.5609, "step": 6386 }, { "epoch": 1.9386856882683259, "grad_norm": 0.5082078576087952, "learning_rate": 6.771793054571227e-05, "loss": 1.9097, "step": 6387 }, { "epoch": 1.9389892244650175, "grad_norm": 0.5267134308815002, "learning_rate": 6.771286827984206e-05, "loss": 1.7824, "step": 6388 }, { "epoch": 1.9392927606617087, "grad_norm": 0.5663163065910339, "learning_rate": 6.770780601397185e-05, "loss": 1.5645, "step": 6389 }, { "epoch": 1.9395962968584004, "grad_norm": 0.5970969796180725, "learning_rate": 6.770274374810165e-05, "loss": 1.8656, "step": 6390 }, { "epoch": 1.9398998330550918, "grad_norm": 0.4860338270664215, "learning_rate": 6.769768148223144e-05, "loss": 1.4045, "step": 6391 }, { "epoch": 1.9402033692517833, "grad_norm": 0.5907909274101257, "learning_rate": 6.769261921636124e-05, "loss": 1.6731, "step": 6392 }, { "epoch": 1.9405069054484747, "grad_norm": 1.025832176208496, "learning_rate": 6.768755695049103e-05, "loss": 0.6762, "step": 6393 }, { "epoch": 1.9408104416451661, "grad_norm": 0.5831971168518066, "learning_rate": 6.768249468462083e-05, "loss": 1.8978, "step": 6394 }, { "epoch": 1.9411139778418578, "grad_norm": 0.6331748366355896, "learning_rate": 6.767743241875064e-05, "loss": 1.8449, "step": 6395 }, { "epoch": 1.941417514038549, "grad_norm": 0.5442969799041748, "learning_rate": 6.767237015288043e-05, "loss": 1.5979, "step": 6396 }, { "epoch": 1.9417210502352407, "grad_norm": 0.5305371880531311, "learning_rate": 6.766730788701024e-05, "loss": 1.314, "step": 6397 }, { "epoch": 1.9420245864319319, "grad_norm": 0.5143059492111206, "learning_rate": 6.766224562114004e-05, "loss": 1.8672, "step": 6398 }, { "epoch": 1.9423281226286235, "grad_norm": 0.538637638092041, "learning_rate": 6.765718335526983e-05, "loss": 1.6072, "step": 6399 }, { "epoch": 1.942631658825315, "grad_norm": 0.5820212364196777, "learning_rate": 6.765212108939962e-05, "loss": 1.7255, "step": 6400 }, { "epoch": 1.9429351950220064, "grad_norm": 0.5563800930976868, "learning_rate": 6.764705882352942e-05, "loss": 1.8238, "step": 6401 }, { "epoch": 1.9432387312186978, "grad_norm": 0.5390816330909729, "learning_rate": 6.764199655765921e-05, "loss": 1.6314, "step": 6402 }, { "epoch": 1.9435422674153893, "grad_norm": 0.6193544268608093, "learning_rate": 6.763693429178901e-05, "loss": 1.4942, "step": 6403 }, { "epoch": 1.9438458036120807, "grad_norm": 0.6250959634780884, "learning_rate": 6.76318720259188e-05, "loss": 1.6116, "step": 6404 }, { "epoch": 1.9441493398087721, "grad_norm": 0.5631844997406006, "learning_rate": 6.76268097600486e-05, "loss": 1.6372, "step": 6405 }, { "epoch": 1.9444528760054638, "grad_norm": 0.5537664890289307, "learning_rate": 6.762174749417841e-05, "loss": 1.7545, "step": 6406 }, { "epoch": 1.944756412202155, "grad_norm": 0.537027895450592, "learning_rate": 6.76166852283082e-05, "loss": 1.3335, "step": 6407 }, { "epoch": 1.9450599483988467, "grad_norm": 0.5764704346656799, "learning_rate": 6.7611622962438e-05, "loss": 1.6726, "step": 6408 }, { "epoch": 1.9453634845955379, "grad_norm": 0.6139593124389648, "learning_rate": 6.760656069656779e-05, "loss": 1.6113, "step": 6409 }, { "epoch": 1.9456670207922295, "grad_norm": 0.708470344543457, "learning_rate": 6.760149843069759e-05, "loss": 1.4301, "step": 6410 }, { "epoch": 1.945970556988921, "grad_norm": 0.6561748385429382, "learning_rate": 6.759643616482738e-05, "loss": 1.8532, "step": 6411 }, { "epoch": 1.9462740931856124, "grad_norm": 0.5118820667266846, "learning_rate": 6.759137389895718e-05, "loss": 1.7623, "step": 6412 }, { "epoch": 1.9465776293823038, "grad_norm": 0.5191006064414978, "learning_rate": 6.758631163308697e-05, "loss": 1.3966, "step": 6413 }, { "epoch": 1.9468811655789953, "grad_norm": 0.49863511323928833, "learning_rate": 6.758124936721677e-05, "loss": 1.7264, "step": 6414 }, { "epoch": 1.9471847017756867, "grad_norm": 0.4555209279060364, "learning_rate": 6.757618710134656e-05, "loss": 1.7601, "step": 6415 }, { "epoch": 1.9474882379723781, "grad_norm": 0.5262234210968018, "learning_rate": 6.757112483547637e-05, "loss": 1.7674, "step": 6416 }, { "epoch": 1.9477917741690698, "grad_norm": 0.6079599857330322, "learning_rate": 6.756606256960616e-05, "loss": 1.4726, "step": 6417 }, { "epoch": 1.948095310365761, "grad_norm": 0.6055933237075806, "learning_rate": 6.756100030373596e-05, "loss": 1.6495, "step": 6418 }, { "epoch": 1.9483988465624527, "grad_norm": 0.5409154295921326, "learning_rate": 6.755593803786575e-05, "loss": 1.8415, "step": 6419 }, { "epoch": 1.9487023827591439, "grad_norm": 0.5464493036270142, "learning_rate": 6.755087577199555e-05, "loss": 1.0245, "step": 6420 }, { "epoch": 1.9490059189558355, "grad_norm": 0.8876932859420776, "learning_rate": 6.754581350612534e-05, "loss": 1.4808, "step": 6421 }, { "epoch": 1.949309455152527, "grad_norm": 0.5546230673789978, "learning_rate": 6.754075124025514e-05, "loss": 1.6274, "step": 6422 }, { "epoch": 1.9496129913492184, "grad_norm": 0.5752897262573242, "learning_rate": 6.753568897438493e-05, "loss": 1.7384, "step": 6423 }, { "epoch": 1.9499165275459098, "grad_norm": 0.6077266931533813, "learning_rate": 6.753062670851473e-05, "loss": 1.5749, "step": 6424 }, { "epoch": 1.9502200637426013, "grad_norm": 0.5324370265007019, "learning_rate": 6.752556444264454e-05, "loss": 1.6556, "step": 6425 }, { "epoch": 1.950523599939293, "grad_norm": 0.5463990569114685, "learning_rate": 6.752050217677433e-05, "loss": 1.6159, "step": 6426 }, { "epoch": 1.9508271361359841, "grad_norm": 0.5445590615272522, "learning_rate": 6.751543991090412e-05, "loss": 1.4407, "step": 6427 }, { "epoch": 1.9511306723326758, "grad_norm": 0.5993614792823792, "learning_rate": 6.751037764503392e-05, "loss": 1.667, "step": 6428 }, { "epoch": 1.951434208529367, "grad_norm": 0.5832435488700867, "learning_rate": 6.750531537916371e-05, "loss": 1.6842, "step": 6429 }, { "epoch": 1.9517377447260587, "grad_norm": 0.6399338245391846, "learning_rate": 6.750025311329351e-05, "loss": 1.9275, "step": 6430 }, { "epoch": 1.95204128092275, "grad_norm": 0.46948257088661194, "learning_rate": 6.74951908474233e-05, "loss": 1.4798, "step": 6431 }, { "epoch": 1.9523448171194415, "grad_norm": 0.5000807642936707, "learning_rate": 6.74901285815531e-05, "loss": 0.8763, "step": 6432 }, { "epoch": 1.952648353316133, "grad_norm": 0.7182363867759705, "learning_rate": 6.74850663156829e-05, "loss": 1.4422, "step": 6433 }, { "epoch": 1.9529518895128244, "grad_norm": 0.7131654620170593, "learning_rate": 6.74800040498127e-05, "loss": 1.2321, "step": 6434 }, { "epoch": 1.9532554257095158, "grad_norm": 0.7544083595275879, "learning_rate": 6.74749417839425e-05, "loss": 1.5929, "step": 6435 }, { "epoch": 1.9535589619062073, "grad_norm": 0.5653949975967407, "learning_rate": 6.746987951807229e-05, "loss": 1.9116, "step": 6436 }, { "epoch": 1.953862498102899, "grad_norm": 0.49180904030799866, "learning_rate": 6.746481725220209e-05, "loss": 2.1056, "step": 6437 }, { "epoch": 1.9541660342995901, "grad_norm": 0.5516203045845032, "learning_rate": 6.745975498633188e-05, "loss": 1.6536, "step": 6438 }, { "epoch": 1.9544695704962818, "grad_norm": 0.535835325717926, "learning_rate": 6.745469272046169e-05, "loss": 1.7121, "step": 6439 }, { "epoch": 1.954773106692973, "grad_norm": 0.5963249802589417, "learning_rate": 6.744963045459148e-05, "loss": 1.4976, "step": 6440 }, { "epoch": 1.9550766428896647, "grad_norm": 0.648929238319397, "learning_rate": 6.744456818872128e-05, "loss": 1.5205, "step": 6441 }, { "epoch": 1.955380179086356, "grad_norm": 0.5600722432136536, "learning_rate": 6.743950592285107e-05, "loss": 1.6621, "step": 6442 }, { "epoch": 1.9556837152830475, "grad_norm": 0.48909708857536316, "learning_rate": 6.743444365698087e-05, "loss": 1.7789, "step": 6443 }, { "epoch": 1.955987251479739, "grad_norm": 0.5888108015060425, "learning_rate": 6.742938139111066e-05, "loss": 1.9655, "step": 6444 }, { "epoch": 1.9562907876764304, "grad_norm": 0.5093645453453064, "learning_rate": 6.742431912524047e-05, "loss": 1.5964, "step": 6445 }, { "epoch": 1.9565943238731218, "grad_norm": 0.4677153527736664, "learning_rate": 6.741925685937027e-05, "loss": 2.0216, "step": 6446 }, { "epoch": 1.9568978600698133, "grad_norm": 0.6702057719230652, "learning_rate": 6.741419459350006e-05, "loss": 1.4916, "step": 6447 }, { "epoch": 1.957201396266505, "grad_norm": 0.5236727595329285, "learning_rate": 6.740913232762986e-05, "loss": 1.6388, "step": 6448 }, { "epoch": 1.9575049324631961, "grad_norm": 0.43825381994247437, "learning_rate": 6.740407006175965e-05, "loss": 1.6884, "step": 6449 }, { "epoch": 1.9578084686598878, "grad_norm": 0.4706141948699951, "learning_rate": 6.739900779588945e-05, "loss": 1.2306, "step": 6450 }, { "epoch": 1.958112004856579, "grad_norm": 0.5973210334777832, "learning_rate": 6.739394553001924e-05, "loss": 1.6573, "step": 6451 }, { "epoch": 1.9584155410532706, "grad_norm": 0.6160237193107605, "learning_rate": 6.738888326414904e-05, "loss": 1.6356, "step": 6452 }, { "epoch": 1.958719077249962, "grad_norm": 0.6227190494537354, "learning_rate": 6.738382099827883e-05, "loss": 1.4816, "step": 6453 }, { "epoch": 1.9590226134466535, "grad_norm": 0.5531430840492249, "learning_rate": 6.737875873240862e-05, "loss": 1.7044, "step": 6454 }, { "epoch": 1.959326149643345, "grad_norm": 0.4098502993583679, "learning_rate": 6.737369646653843e-05, "loss": 1.8336, "step": 6455 }, { "epoch": 1.9596296858400364, "grad_norm": 0.6158850193023682, "learning_rate": 6.736863420066823e-05, "loss": 1.8795, "step": 6456 }, { "epoch": 1.959933222036728, "grad_norm": 0.5197069644927979, "learning_rate": 6.736357193479802e-05, "loss": 1.515, "step": 6457 }, { "epoch": 1.9602367582334193, "grad_norm": 0.7297940254211426, "learning_rate": 6.735850966892782e-05, "loss": 1.1881, "step": 6458 }, { "epoch": 1.960540294430111, "grad_norm": 0.5615236163139343, "learning_rate": 6.735344740305761e-05, "loss": 1.4264, "step": 6459 }, { "epoch": 1.9608438306268021, "grad_norm": 0.5689505934715271, "learning_rate": 6.734838513718741e-05, "loss": 1.8072, "step": 6460 }, { "epoch": 1.9611473668234938, "grad_norm": 0.5976860523223877, "learning_rate": 6.73433228713172e-05, "loss": 1.6385, "step": 6461 }, { "epoch": 1.9614509030201852, "grad_norm": 0.887874186038971, "learning_rate": 6.7338260605447e-05, "loss": 1.5963, "step": 6462 }, { "epoch": 1.9617544392168766, "grad_norm": 0.5702905058860779, "learning_rate": 6.733319833957679e-05, "loss": 1.7066, "step": 6463 }, { "epoch": 1.962057975413568, "grad_norm": 0.5714231729507446, "learning_rate": 6.73281360737066e-05, "loss": 1.6111, "step": 6464 }, { "epoch": 1.9623615116102595, "grad_norm": 0.6091324687004089, "learning_rate": 6.73230738078364e-05, "loss": 1.6581, "step": 6465 }, { "epoch": 1.962665047806951, "grad_norm": 0.5078444480895996, "learning_rate": 6.731801154196619e-05, "loss": 1.5205, "step": 6466 }, { "epoch": 1.9629685840036424, "grad_norm": 0.5809574723243713, "learning_rate": 6.731294927609598e-05, "loss": 1.1441, "step": 6467 }, { "epoch": 1.963272120200334, "grad_norm": 0.5350870490074158, "learning_rate": 6.730788701022578e-05, "loss": 1.5295, "step": 6468 }, { "epoch": 1.9635756563970252, "grad_norm": 0.5390435457229614, "learning_rate": 6.730282474435557e-05, "loss": 1.764, "step": 6469 }, { "epoch": 1.963879192593717, "grad_norm": 0.5699825882911682, "learning_rate": 6.729776247848537e-05, "loss": 1.7769, "step": 6470 }, { "epoch": 1.9641827287904081, "grad_norm": 0.5871105194091797, "learning_rate": 6.729270021261516e-05, "loss": 1.7677, "step": 6471 }, { "epoch": 1.9644862649870998, "grad_norm": 0.404573917388916, "learning_rate": 6.728763794674496e-05, "loss": 1.4699, "step": 6472 }, { "epoch": 1.9647898011837912, "grad_norm": 0.6037812232971191, "learning_rate": 6.728257568087477e-05, "loss": 1.8687, "step": 6473 }, { "epoch": 1.9650933373804826, "grad_norm": 0.5443028211593628, "learning_rate": 6.727751341500456e-05, "loss": 1.6255, "step": 6474 }, { "epoch": 1.965396873577174, "grad_norm": 0.6080754399299622, "learning_rate": 6.727245114913436e-05, "loss": 1.898, "step": 6475 }, { "epoch": 1.9657004097738655, "grad_norm": 0.597493588924408, "learning_rate": 6.726738888326415e-05, "loss": 1.8199, "step": 6476 }, { "epoch": 1.966003945970557, "grad_norm": 1.2117207050323486, "learning_rate": 6.726232661739395e-05, "loss": 1.4137, "step": 6477 }, { "epoch": 1.9663074821672484, "grad_norm": 0.5255807042121887, "learning_rate": 6.725726435152374e-05, "loss": 1.5809, "step": 6478 }, { "epoch": 1.96661101836394, "grad_norm": 0.5825793147087097, "learning_rate": 6.725220208565354e-05, "loss": 1.7478, "step": 6479 }, { "epoch": 1.9669145545606312, "grad_norm": 0.5194748044013977, "learning_rate": 6.724713981978333e-05, "loss": 1.7185, "step": 6480 }, { "epoch": 1.967218090757323, "grad_norm": 0.6001968383789062, "learning_rate": 6.724207755391312e-05, "loss": 1.8425, "step": 6481 }, { "epoch": 1.9675216269540141, "grad_norm": 0.4742978513240814, "learning_rate": 6.723701528804292e-05, "loss": 1.3781, "step": 6482 }, { "epoch": 1.9678251631507058, "grad_norm": 0.5860036611557007, "learning_rate": 6.723195302217273e-05, "loss": 1.6749, "step": 6483 }, { "epoch": 1.9681286993473972, "grad_norm": 0.5993077754974365, "learning_rate": 6.722689075630254e-05, "loss": 1.4344, "step": 6484 }, { "epoch": 1.9684322355440886, "grad_norm": 0.6019352674484253, "learning_rate": 6.722182849043233e-05, "loss": 1.7294, "step": 6485 }, { "epoch": 1.96873577174078, "grad_norm": 0.707388162612915, "learning_rate": 6.721676622456213e-05, "loss": 1.8304, "step": 6486 }, { "epoch": 1.9690393079374715, "grad_norm": 0.5254133343696594, "learning_rate": 6.721170395869192e-05, "loss": 1.919, "step": 6487 }, { "epoch": 1.9693428441341632, "grad_norm": 0.558387815952301, "learning_rate": 6.720664169282172e-05, "loss": 1.5288, "step": 6488 }, { "epoch": 1.9696463803308544, "grad_norm": 0.5417835712432861, "learning_rate": 6.720157942695151e-05, "loss": 1.6305, "step": 6489 }, { "epoch": 1.969949916527546, "grad_norm": 0.49199047684669495, "learning_rate": 6.71965171610813e-05, "loss": 1.0437, "step": 6490 }, { "epoch": 1.9702534527242372, "grad_norm": 0.5585491061210632, "learning_rate": 6.71914548952111e-05, "loss": 1.6227, "step": 6491 }, { "epoch": 1.970556988920929, "grad_norm": 0.5286018252372742, "learning_rate": 6.71863926293409e-05, "loss": 1.765, "step": 6492 }, { "epoch": 1.97086052511762, "grad_norm": 0.499729722738266, "learning_rate": 6.718133036347069e-05, "loss": 1.9407, "step": 6493 }, { "epoch": 1.9711640613143118, "grad_norm": 0.6999737620353699, "learning_rate": 6.71762680976005e-05, "loss": 1.7973, "step": 6494 }, { "epoch": 1.9714675975110032, "grad_norm": 0.5820497274398804, "learning_rate": 6.717120583173029e-05, "loss": 1.7509, "step": 6495 }, { "epoch": 1.9717711337076946, "grad_norm": 0.6684516072273254, "learning_rate": 6.716614356586009e-05, "loss": 1.093, "step": 6496 }, { "epoch": 1.972074669904386, "grad_norm": 0.5111326575279236, "learning_rate": 6.716108129998988e-05, "loss": 1.7832, "step": 6497 }, { "epoch": 1.9723782061010775, "grad_norm": 0.5432667136192322, "learning_rate": 6.715601903411968e-05, "loss": 1.1834, "step": 6498 }, { "epoch": 1.9726817422977692, "grad_norm": 0.4429357945919037, "learning_rate": 6.715095676824947e-05, "loss": 0.8361, "step": 6499 }, { "epoch": 1.9729852784944604, "grad_norm": 0.6037774085998535, "learning_rate": 6.714589450237927e-05, "loss": 1.8509, "step": 6500 }, { "epoch": 1.973288814691152, "grad_norm": 0.5203253030776978, "learning_rate": 6.714083223650906e-05, "loss": 1.4746, "step": 6501 }, { "epoch": 1.9735923508878432, "grad_norm": 0.5598331093788147, "learning_rate": 6.713576997063886e-05, "loss": 1.904, "step": 6502 }, { "epoch": 1.973895887084535, "grad_norm": 0.5714544653892517, "learning_rate": 6.713070770476866e-05, "loss": 1.3138, "step": 6503 }, { "epoch": 1.9741994232812263, "grad_norm": 0.5742242932319641, "learning_rate": 6.712564543889846e-05, "loss": 1.5685, "step": 6504 }, { "epoch": 1.9745029594779178, "grad_norm": 0.6703736782073975, "learning_rate": 6.712058317302825e-05, "loss": 1.4434, "step": 6505 }, { "epoch": 1.9748064956746092, "grad_norm": 0.6143460869789124, "learning_rate": 6.711552090715805e-05, "loss": 1.3583, "step": 6506 }, { "epoch": 1.9751100318713006, "grad_norm": 0.5895220637321472, "learning_rate": 6.711045864128784e-05, "loss": 1.8181, "step": 6507 }, { "epoch": 1.975413568067992, "grad_norm": 0.5533620119094849, "learning_rate": 6.710539637541764e-05, "loss": 1.4437, "step": 6508 }, { "epoch": 1.9757171042646835, "grad_norm": 0.45495548844337463, "learning_rate": 6.710033410954743e-05, "loss": 1.7565, "step": 6509 }, { "epoch": 1.9760206404613752, "grad_norm": 0.5480602383613586, "learning_rate": 6.709527184367723e-05, "loss": 1.8036, "step": 6510 }, { "epoch": 1.9763241766580664, "grad_norm": 0.6139658689498901, "learning_rate": 6.709020957780702e-05, "loss": 1.7937, "step": 6511 }, { "epoch": 1.976627712854758, "grad_norm": 0.4904802739620209, "learning_rate": 6.708514731193683e-05, "loss": 1.9573, "step": 6512 }, { "epoch": 1.9769312490514492, "grad_norm": 0.4706535339355469, "learning_rate": 6.708008504606663e-05, "loss": 1.6996, "step": 6513 }, { "epoch": 1.977234785248141, "grad_norm": 0.5434771180152893, "learning_rate": 6.707502278019642e-05, "loss": 1.4273, "step": 6514 }, { "epoch": 1.9775383214448323, "grad_norm": 0.5178016424179077, "learning_rate": 6.706996051432622e-05, "loss": 1.277, "step": 6515 }, { "epoch": 1.9778418576415238, "grad_norm": 0.47218969464302063, "learning_rate": 6.706489824845601e-05, "loss": 1.122, "step": 6516 }, { "epoch": 1.9781453938382152, "grad_norm": 0.5253738760948181, "learning_rate": 6.70598359825858e-05, "loss": 1.9643, "step": 6517 }, { "epoch": 1.9784489300349066, "grad_norm": 0.5150578618049622, "learning_rate": 6.70547737167156e-05, "loss": 1.5839, "step": 6518 }, { "epoch": 1.9787524662315983, "grad_norm": 0.5388327240943909, "learning_rate": 6.70497114508454e-05, "loss": 1.8581, "step": 6519 }, { "epoch": 1.9790560024282895, "grad_norm": 0.518647313117981, "learning_rate": 6.704464918497519e-05, "loss": 1.9299, "step": 6520 }, { "epoch": 1.9793595386249812, "grad_norm": 0.6723092198371887, "learning_rate": 6.703958691910498e-05, "loss": 1.8578, "step": 6521 }, { "epoch": 1.9796630748216724, "grad_norm": 0.5758662819862366, "learning_rate": 6.703452465323479e-05, "loss": 1.6435, "step": 6522 }, { "epoch": 1.979966611018364, "grad_norm": 0.6570977568626404, "learning_rate": 6.702946238736459e-05, "loss": 1.9321, "step": 6523 }, { "epoch": 1.9802701472150552, "grad_norm": 0.5400984883308411, "learning_rate": 6.702440012149438e-05, "loss": 1.4454, "step": 6524 }, { "epoch": 1.9805736834117469, "grad_norm": 0.48057571053504944, "learning_rate": 6.701933785562418e-05, "loss": 1.6747, "step": 6525 }, { "epoch": 1.9808772196084383, "grad_norm": 0.6174799799919128, "learning_rate": 6.701427558975397e-05, "loss": 1.4885, "step": 6526 }, { "epoch": 1.9811807558051298, "grad_norm": 0.6381711363792419, "learning_rate": 6.700921332388377e-05, "loss": 1.5634, "step": 6527 }, { "epoch": 1.9814842920018212, "grad_norm": 0.5980601906776428, "learning_rate": 6.700415105801358e-05, "loss": 1.779, "step": 6528 }, { "epoch": 1.9817878281985126, "grad_norm": 0.6087924242019653, "learning_rate": 6.699908879214337e-05, "loss": 1.3662, "step": 6529 }, { "epoch": 1.9820913643952043, "grad_norm": 0.7204270958900452, "learning_rate": 6.699402652627316e-05, "loss": 1.7392, "step": 6530 }, { "epoch": 1.9823949005918955, "grad_norm": 0.6271967887878418, "learning_rate": 6.698896426040296e-05, "loss": 1.9596, "step": 6531 }, { "epoch": 1.9826984367885871, "grad_norm": 0.5030335783958435, "learning_rate": 6.698390199453275e-05, "loss": 1.6411, "step": 6532 }, { "epoch": 1.9830019729852784, "grad_norm": 0.5594578385353088, "learning_rate": 6.697883972866256e-05, "loss": 1.2329, "step": 6533 }, { "epoch": 1.98330550918197, "grad_norm": 0.6117578744888306, "learning_rate": 6.697377746279236e-05, "loss": 1.7061, "step": 6534 }, { "epoch": 1.9836090453786615, "grad_norm": 0.44561445713043213, "learning_rate": 6.696871519692215e-05, "loss": 1.8048, "step": 6535 }, { "epoch": 1.9839125815753529, "grad_norm": 0.49325069785118103, "learning_rate": 6.696365293105195e-05, "loss": 1.8344, "step": 6536 }, { "epoch": 1.9842161177720443, "grad_norm": 0.42023026943206787, "learning_rate": 6.695859066518174e-05, "loss": 1.7417, "step": 6537 }, { "epoch": 1.9845196539687358, "grad_norm": 0.5083653926849365, "learning_rate": 6.695352839931154e-05, "loss": 1.8918, "step": 6538 }, { "epoch": 1.9848231901654272, "grad_norm": 0.5898207426071167, "learning_rate": 6.694846613344133e-05, "loss": 1.5339, "step": 6539 }, { "epoch": 1.9851267263621186, "grad_norm": 0.622306764125824, "learning_rate": 6.694340386757113e-05, "loss": 1.5009, "step": 6540 }, { "epoch": 1.9854302625588103, "grad_norm": 0.5618451237678528, "learning_rate": 6.693834160170092e-05, "loss": 1.235, "step": 6541 }, { "epoch": 1.9857337987555015, "grad_norm": 0.5828057527542114, "learning_rate": 6.693327933583073e-05, "loss": 1.7947, "step": 6542 }, { "epoch": 1.9860373349521931, "grad_norm": 0.4943867027759552, "learning_rate": 6.692821706996052e-05, "loss": 1.2251, "step": 6543 }, { "epoch": 1.9863408711488844, "grad_norm": 0.5512354969978333, "learning_rate": 6.692315480409032e-05, "loss": 1.6636, "step": 6544 }, { "epoch": 1.986644407345576, "grad_norm": 0.579082190990448, "learning_rate": 6.691809253822011e-05, "loss": 1.8635, "step": 6545 }, { "epoch": 1.9869479435422674, "grad_norm": 0.4855844974517822, "learning_rate": 6.691303027234991e-05, "loss": 1.0762, "step": 6546 }, { "epoch": 1.9872514797389589, "grad_norm": 0.4621904194355011, "learning_rate": 6.69079680064797e-05, "loss": 1.5484, "step": 6547 }, { "epoch": 1.9875550159356503, "grad_norm": 0.5971441268920898, "learning_rate": 6.69029057406095e-05, "loss": 1.1564, "step": 6548 }, { "epoch": 1.9878585521323417, "grad_norm": 0.47844675183296204, "learning_rate": 6.689784347473929e-05, "loss": 1.773, "step": 6549 }, { "epoch": 1.9881620883290332, "grad_norm": 0.47111570835113525, "learning_rate": 6.689278120886909e-05, "loss": 1.2417, "step": 6550 }, { "epoch": 1.9884656245257246, "grad_norm": 0.6051010489463806, "learning_rate": 6.68877189429989e-05, "loss": 1.8236, "step": 6551 }, { "epoch": 1.9887691607224163, "grad_norm": 0.7357580065727234, "learning_rate": 6.688265667712869e-05, "loss": 1.6381, "step": 6552 }, { "epoch": 1.9890726969191075, "grad_norm": 0.866306483745575, "learning_rate": 6.687759441125849e-05, "loss": 1.2849, "step": 6553 }, { "epoch": 1.9893762331157991, "grad_norm": 0.5960594415664673, "learning_rate": 6.687253214538828e-05, "loss": 1.3701, "step": 6554 }, { "epoch": 1.9896797693124904, "grad_norm": 0.5979890823364258, "learning_rate": 6.686746987951808e-05, "loss": 2.0261, "step": 6555 }, { "epoch": 1.989983305509182, "grad_norm": 0.47698384523391724, "learning_rate": 6.686240761364787e-05, "loss": 1.24, "step": 6556 }, { "epoch": 1.9902868417058734, "grad_norm": 0.6178485751152039, "learning_rate": 6.685734534777766e-05, "loss": 1.8583, "step": 6557 }, { "epoch": 1.9905903779025649, "grad_norm": 0.5533079504966736, "learning_rate": 6.685228308190746e-05, "loss": 0.9868, "step": 6558 }, { "epoch": 1.9908939140992563, "grad_norm": 0.5186291337013245, "learning_rate": 6.684722081603725e-05, "loss": 1.8188, "step": 6559 }, { "epoch": 1.9911974502959477, "grad_norm": 0.5277303457260132, "learning_rate": 6.684215855016705e-05, "loss": 1.4531, "step": 6560 }, { "epoch": 1.9915009864926394, "grad_norm": 1.0453486442565918, "learning_rate": 6.683709628429686e-05, "loss": 1.4636, "step": 6561 }, { "epoch": 1.9918045226893306, "grad_norm": 0.5569626688957214, "learning_rate": 6.683203401842665e-05, "loss": 1.9508, "step": 6562 }, { "epoch": 1.9921080588860223, "grad_norm": 0.4960695207118988, "learning_rate": 6.682697175255645e-05, "loss": 1.5171, "step": 6563 }, { "epoch": 1.9924115950827135, "grad_norm": 0.6133225560188293, "learning_rate": 6.682190948668624e-05, "loss": 1.5388, "step": 6564 }, { "epoch": 1.9927151312794051, "grad_norm": 0.5678309202194214, "learning_rate": 6.681684722081604e-05, "loss": 1.7054, "step": 6565 }, { "epoch": 1.9930186674760966, "grad_norm": 0.5518128275871277, "learning_rate": 6.681178495494583e-05, "loss": 1.6772, "step": 6566 }, { "epoch": 1.993322203672788, "grad_norm": 0.5464813709259033, "learning_rate": 6.680672268907563e-05, "loss": 1.9509, "step": 6567 }, { "epoch": 1.9936257398694794, "grad_norm": 0.4115614891052246, "learning_rate": 6.680166042320542e-05, "loss": 1.3763, "step": 6568 }, { "epoch": 1.9939292760661709, "grad_norm": 0.5534042716026306, "learning_rate": 6.679659815733522e-05, "loss": 1.788, "step": 6569 }, { "epoch": 1.9942328122628623, "grad_norm": 0.5049768686294556, "learning_rate": 6.679153589146502e-05, "loss": 1.435, "step": 6570 }, { "epoch": 1.9945363484595537, "grad_norm": 0.5459719896316528, "learning_rate": 6.678647362559482e-05, "loss": 1.3551, "step": 6571 }, { "epoch": 1.9948398846562454, "grad_norm": 0.9183335900306702, "learning_rate": 6.678141135972461e-05, "loss": 1.2186, "step": 6572 }, { "epoch": 1.9951434208529366, "grad_norm": 0.5222302079200745, "learning_rate": 6.677634909385442e-05, "loss": 1.8529, "step": 6573 }, { "epoch": 1.9954469570496283, "grad_norm": 0.5741526484489441, "learning_rate": 6.677128682798422e-05, "loss": 1.5536, "step": 6574 }, { "epoch": 1.9957504932463195, "grad_norm": 0.5656235814094543, "learning_rate": 6.676622456211401e-05, "loss": 1.5544, "step": 6575 }, { "epoch": 1.9960540294430111, "grad_norm": 0.4552244246006012, "learning_rate": 6.67611622962438e-05, "loss": 2.337, "step": 6576 }, { "epoch": 1.9963575656397026, "grad_norm": 0.5700350999832153, "learning_rate": 6.67561000303736e-05, "loss": 1.525, "step": 6577 }, { "epoch": 1.996661101836394, "grad_norm": 0.5289913415908813, "learning_rate": 6.67510377645034e-05, "loss": 1.3759, "step": 6578 }, { "epoch": 1.9969646380330854, "grad_norm": 0.5340834259986877, "learning_rate": 6.674597549863319e-05, "loss": 1.2181, "step": 6579 }, { "epoch": 1.9972681742297769, "grad_norm": 0.5501202940940857, "learning_rate": 6.674091323276299e-05, "loss": 1.5773, "step": 6580 }, { "epoch": 1.9975717104264683, "grad_norm": 0.36932939291000366, "learning_rate": 6.67358509668928e-05, "loss": 1.3267, "step": 6581 }, { "epoch": 1.9978752466231597, "grad_norm": 0.484678715467453, "learning_rate": 6.673078870102259e-05, "loss": 1.8134, "step": 6582 }, { "epoch": 1.9981787828198514, "grad_norm": 0.5534485578536987, "learning_rate": 6.672572643515238e-05, "loss": 2.0146, "step": 6583 }, { "epoch": 1.9984823190165426, "grad_norm": 0.602260410785675, "learning_rate": 6.672066416928218e-05, "loss": 1.8876, "step": 6584 }, { "epoch": 1.9987858552132343, "grad_norm": 0.568967342376709, "learning_rate": 6.671560190341197e-05, "loss": 1.8811, "step": 6585 }, { "epoch": 1.9990893914099255, "grad_norm": 0.500963032245636, "learning_rate": 6.671053963754177e-05, "loss": 1.2231, "step": 6586 }, { "epoch": 1.9993929276066171, "grad_norm": 0.596053957939148, "learning_rate": 6.670547737167156e-05, "loss": 1.4158, "step": 6587 }, { "epoch": 1.9996964638033086, "grad_norm": 0.6084225177764893, "learning_rate": 6.670041510580136e-05, "loss": 1.8643, "step": 6588 }, { "epoch": 2.0, "grad_norm": 0.4858667254447937, "learning_rate": 6.669535283993115e-05, "loss": 1.4181, "step": 6589 }, { "epoch": 2.0003035361966917, "grad_norm": 0.48385685682296753, "learning_rate": 6.669029057406095e-05, "loss": 1.5573, "step": 6590 }, { "epoch": 2.000607072393383, "grad_norm": 0.5700175166130066, "learning_rate": 6.668522830819076e-05, "loss": 1.4403, "step": 6591 }, { "epoch": 2.0009106085900745, "grad_norm": 0.6228484511375427, "learning_rate": 6.668016604232055e-05, "loss": 1.4034, "step": 6592 }, { "epoch": 2.0012141447867657, "grad_norm": 0.5445995330810547, "learning_rate": 6.667510377645035e-05, "loss": 1.3197, "step": 6593 }, { "epoch": 2.0015176809834574, "grad_norm": 0.6247600317001343, "learning_rate": 6.667004151058014e-05, "loss": 1.1154, "step": 6594 }, { "epoch": 2.0018212171801486, "grad_norm": 0.6725988388061523, "learning_rate": 6.666497924470993e-05, "loss": 1.4879, "step": 6595 }, { "epoch": 2.0021247533768403, "grad_norm": 0.7663455605506897, "learning_rate": 6.665991697883973e-05, "loss": 1.1032, "step": 6596 }, { "epoch": 2.0024282895735315, "grad_norm": 1.1510711908340454, "learning_rate": 6.665485471296952e-05, "loss": 1.4117, "step": 6597 }, { "epoch": 2.002731825770223, "grad_norm": 0.6524301767349243, "learning_rate": 6.664979244709932e-05, "loss": 1.0263, "step": 6598 }, { "epoch": 2.0030353619669143, "grad_norm": 0.7597069144248962, "learning_rate": 6.664473018122911e-05, "loss": 1.3326, "step": 6599 }, { "epoch": 2.003338898163606, "grad_norm": 0.5784065127372742, "learning_rate": 6.663966791535892e-05, "loss": 1.2875, "step": 6600 }, { "epoch": 2.0036424343602977, "grad_norm": 0.6139408946037292, "learning_rate": 6.663460564948872e-05, "loss": 0.7758, "step": 6601 }, { "epoch": 2.003945970556989, "grad_norm": 0.6967280507087708, "learning_rate": 6.662954338361851e-05, "loss": 1.2892, "step": 6602 }, { "epoch": 2.0042495067536805, "grad_norm": 0.6574037671089172, "learning_rate": 6.66244811177483e-05, "loss": 1.3883, "step": 6603 }, { "epoch": 2.0045530429503717, "grad_norm": 0.6693050265312195, "learning_rate": 6.66194188518781e-05, "loss": 1.4671, "step": 6604 }, { "epoch": 2.0048565791470634, "grad_norm": 0.9042626619338989, "learning_rate": 6.66143565860079e-05, "loss": 1.0162, "step": 6605 }, { "epoch": 2.0051601153437546, "grad_norm": 0.7697750329971313, "learning_rate": 6.660929432013769e-05, "loss": 1.4319, "step": 6606 }, { "epoch": 2.0054636515404463, "grad_norm": 0.7360553741455078, "learning_rate": 6.660423205426749e-05, "loss": 0.8861, "step": 6607 }, { "epoch": 2.0057671877371375, "grad_norm": 0.7231805324554443, "learning_rate": 6.659916978839728e-05, "loss": 1.1145, "step": 6608 }, { "epoch": 2.006070723933829, "grad_norm": 0.8397212624549866, "learning_rate": 6.659410752252709e-05, "loss": 1.2708, "step": 6609 }, { "epoch": 2.006374260130521, "grad_norm": 0.5130758881568909, "learning_rate": 6.658904525665688e-05, "loss": 0.8484, "step": 6610 }, { "epoch": 2.006677796327212, "grad_norm": 0.7962560653686523, "learning_rate": 6.658398299078668e-05, "loss": 1.3299, "step": 6611 }, { "epoch": 2.0069813325239036, "grad_norm": 0.772659420967102, "learning_rate": 6.657892072491647e-05, "loss": 1.6294, "step": 6612 }, { "epoch": 2.007284868720595, "grad_norm": 0.7391024827957153, "learning_rate": 6.657385845904627e-05, "loss": 1.0269, "step": 6613 }, { "epoch": 2.0075884049172865, "grad_norm": 0.7813867330551147, "learning_rate": 6.656879619317606e-05, "loss": 1.1973, "step": 6614 }, { "epoch": 2.0078919411139777, "grad_norm": 0.6954705715179443, "learning_rate": 6.656373392730586e-05, "loss": 1.5012, "step": 6615 }, { "epoch": 2.0081954773106694, "grad_norm": 0.6593831777572632, "learning_rate": 6.655867166143565e-05, "loss": 0.9528, "step": 6616 }, { "epoch": 2.0084990135073606, "grad_norm": 0.8076401948928833, "learning_rate": 6.655360939556546e-05, "loss": 1.0781, "step": 6617 }, { "epoch": 2.0088025497040523, "grad_norm": 0.7507150769233704, "learning_rate": 6.654854712969526e-05, "loss": 1.4245, "step": 6618 }, { "epoch": 2.0091060859007435, "grad_norm": 0.6943393349647522, "learning_rate": 6.654348486382505e-05, "loss": 1.224, "step": 6619 }, { "epoch": 2.009409622097435, "grad_norm": 0.658205509185791, "learning_rate": 6.653842259795486e-05, "loss": 1.0013, "step": 6620 }, { "epoch": 2.009713158294127, "grad_norm": 0.54278963804245, "learning_rate": 6.653336033208465e-05, "loss": 0.9834, "step": 6621 }, { "epoch": 2.010016694490818, "grad_norm": 0.9639625549316406, "learning_rate": 6.652829806621445e-05, "loss": 1.5964, "step": 6622 }, { "epoch": 2.0103202306875096, "grad_norm": 0.7107603549957275, "learning_rate": 6.652323580034424e-05, "loss": 1.1444, "step": 6623 }, { "epoch": 2.010623766884201, "grad_norm": 0.7288581728935242, "learning_rate": 6.651817353447404e-05, "loss": 1.1682, "step": 6624 }, { "epoch": 2.0109273030808925, "grad_norm": 0.7180545926094055, "learning_rate": 6.651311126860383e-05, "loss": 1.2499, "step": 6625 }, { "epoch": 2.0112308392775837, "grad_norm": 0.514674961566925, "learning_rate": 6.650804900273363e-05, "loss": 1.2415, "step": 6626 }, { "epoch": 2.0115343754742754, "grad_norm": 0.6022255420684814, "learning_rate": 6.650298673686342e-05, "loss": 1.4536, "step": 6627 }, { "epoch": 2.0118379116709666, "grad_norm": 0.6236252784729004, "learning_rate": 6.649792447099322e-05, "loss": 0.9079, "step": 6628 }, { "epoch": 2.0121414478676583, "grad_norm": 0.7158982157707214, "learning_rate": 6.649286220512301e-05, "loss": 1.4665, "step": 6629 }, { "epoch": 2.0124449840643495, "grad_norm": 0.7587727308273315, "learning_rate": 6.648779993925282e-05, "loss": 1.2285, "step": 6630 }, { "epoch": 2.012748520261041, "grad_norm": 0.7265990376472473, "learning_rate": 6.648273767338262e-05, "loss": 1.5214, "step": 6631 }, { "epoch": 2.0130520564577328, "grad_norm": 0.862713634967804, "learning_rate": 6.647767540751241e-05, "loss": 1.0318, "step": 6632 }, { "epoch": 2.013355592654424, "grad_norm": 1.1299731731414795, "learning_rate": 6.64726131416422e-05, "loss": 1.0554, "step": 6633 }, { "epoch": 2.0136591288511156, "grad_norm": 0.7257562875747681, "learning_rate": 6.6467550875772e-05, "loss": 1.2273, "step": 6634 }, { "epoch": 2.013962665047807, "grad_norm": 0.8512704372406006, "learning_rate": 6.64624886099018e-05, "loss": 1.5209, "step": 6635 }, { "epoch": 2.0142662012444985, "grad_norm": 0.7467771768569946, "learning_rate": 6.645742634403159e-05, "loss": 1.5147, "step": 6636 }, { "epoch": 2.0145697374411897, "grad_norm": 0.6188955903053284, "learning_rate": 6.645236407816138e-05, "loss": 1.3291, "step": 6637 }, { "epoch": 2.0148732736378814, "grad_norm": 0.7535701394081116, "learning_rate": 6.644730181229118e-05, "loss": 1.4063, "step": 6638 }, { "epoch": 2.0151768098345726, "grad_norm": 0.7806875705718994, "learning_rate": 6.644223954642099e-05, "loss": 1.2262, "step": 6639 }, { "epoch": 2.0154803460312642, "grad_norm": 0.5792213082313538, "learning_rate": 6.643717728055078e-05, "loss": 0.9279, "step": 6640 }, { "epoch": 2.015783882227956, "grad_norm": 0.8171066045761108, "learning_rate": 6.643211501468058e-05, "loss": 1.4051, "step": 6641 }, { "epoch": 2.016087418424647, "grad_norm": 0.8089918494224548, "learning_rate": 6.642705274881037e-05, "loss": 1.146, "step": 6642 }, { "epoch": 2.0163909546213388, "grad_norm": 0.6619982123374939, "learning_rate": 6.642199048294017e-05, "loss": 1.2122, "step": 6643 }, { "epoch": 2.01669449081803, "grad_norm": 0.7962835431098938, "learning_rate": 6.641692821706996e-05, "loss": 1.2783, "step": 6644 }, { "epoch": 2.0169980270147216, "grad_norm": 0.8901903033256531, "learning_rate": 6.641186595119976e-05, "loss": 1.2049, "step": 6645 }, { "epoch": 2.017301563211413, "grad_norm": 0.7541537284851074, "learning_rate": 6.640680368532955e-05, "loss": 1.5408, "step": 6646 }, { "epoch": 2.0176050994081045, "grad_norm": 0.7315836548805237, "learning_rate": 6.640174141945935e-05, "loss": 1.3899, "step": 6647 }, { "epoch": 2.0179086356047957, "grad_norm": 0.7619051337242126, "learning_rate": 6.639667915358915e-05, "loss": 0.6086, "step": 6648 }, { "epoch": 2.0182121718014874, "grad_norm": 0.6528330445289612, "learning_rate": 6.639161688771895e-05, "loss": 1.7007, "step": 6649 }, { "epoch": 2.0185157079981786, "grad_norm": 0.8032832145690918, "learning_rate": 6.638655462184874e-05, "loss": 1.2065, "step": 6650 }, { "epoch": 2.0188192441948702, "grad_norm": 0.8372535705566406, "learning_rate": 6.638149235597854e-05, "loss": 1.2597, "step": 6651 }, { "epoch": 2.019122780391562, "grad_norm": 0.7499954700469971, "learning_rate": 6.637643009010833e-05, "loss": 0.9583, "step": 6652 }, { "epoch": 2.019426316588253, "grad_norm": 0.6940346956253052, "learning_rate": 6.637136782423813e-05, "loss": 1.1369, "step": 6653 }, { "epoch": 2.0197298527849448, "grad_norm": 0.6135651469230652, "learning_rate": 6.636630555836792e-05, "loss": 0.6737, "step": 6654 }, { "epoch": 2.020033388981636, "grad_norm": 0.6377900838851929, "learning_rate": 6.636124329249772e-05, "loss": 0.9863, "step": 6655 }, { "epoch": 2.0203369251783276, "grad_norm": 0.6649699211120605, "learning_rate": 6.635618102662751e-05, "loss": 1.2677, "step": 6656 }, { "epoch": 2.020640461375019, "grad_norm": 0.6146379709243774, "learning_rate": 6.635111876075732e-05, "loss": 1.3855, "step": 6657 }, { "epoch": 2.0209439975717105, "grad_norm": 0.6619699001312256, "learning_rate": 6.634605649488712e-05, "loss": 1.0707, "step": 6658 }, { "epoch": 2.0212475337684017, "grad_norm": 0.5690382719039917, "learning_rate": 6.634099422901691e-05, "loss": 1.5683, "step": 6659 }, { "epoch": 2.0215510699650934, "grad_norm": 0.8512270450592041, "learning_rate": 6.63359319631467e-05, "loss": 0.9643, "step": 6660 }, { "epoch": 2.0218546061617846, "grad_norm": 0.7763013243675232, "learning_rate": 6.63308696972765e-05, "loss": 0.7536, "step": 6661 }, { "epoch": 2.0221581423584762, "grad_norm": 0.8327085971832275, "learning_rate": 6.632580743140631e-05, "loss": 1.3364, "step": 6662 }, { "epoch": 2.022461678555168, "grad_norm": 0.4407320022583008, "learning_rate": 6.63207451655361e-05, "loss": 1.3085, "step": 6663 }, { "epoch": 2.022765214751859, "grad_norm": 0.7932631373405457, "learning_rate": 6.63156828996659e-05, "loss": 1.3209, "step": 6664 }, { "epoch": 2.0230687509485508, "grad_norm": 0.7854815125465393, "learning_rate": 6.631062063379569e-05, "loss": 1.191, "step": 6665 }, { "epoch": 2.023372287145242, "grad_norm": 0.5749601721763611, "learning_rate": 6.630555836792549e-05, "loss": 1.7217, "step": 6666 }, { "epoch": 2.0236758233419336, "grad_norm": 0.6307018995285034, "learning_rate": 6.630049610205528e-05, "loss": 1.3251, "step": 6667 }, { "epoch": 2.023979359538625, "grad_norm": 0.7390800714492798, "learning_rate": 6.629543383618508e-05, "loss": 1.3801, "step": 6668 }, { "epoch": 2.0242828957353165, "grad_norm": 1.2337613105773926, "learning_rate": 6.629037157031489e-05, "loss": 1.3149, "step": 6669 }, { "epoch": 2.0245864319320077, "grad_norm": 0.7825588583946228, "learning_rate": 6.628530930444468e-05, "loss": 0.9752, "step": 6670 }, { "epoch": 2.0248899681286994, "grad_norm": 0.860419511795044, "learning_rate": 6.628024703857447e-05, "loss": 1.4013, "step": 6671 }, { "epoch": 2.025193504325391, "grad_norm": 0.5887535810470581, "learning_rate": 6.627518477270427e-05, "loss": 1.1861, "step": 6672 }, { "epoch": 2.0254970405220822, "grad_norm": 0.6891583204269409, "learning_rate": 6.627012250683406e-05, "loss": 0.7213, "step": 6673 }, { "epoch": 2.025800576718774, "grad_norm": 0.7431558966636658, "learning_rate": 6.626506024096386e-05, "loss": 1.2445, "step": 6674 }, { "epoch": 2.026104112915465, "grad_norm": 0.9929723739624023, "learning_rate": 6.625999797509365e-05, "loss": 0.622, "step": 6675 }, { "epoch": 2.0264076491121568, "grad_norm": 0.7005148530006409, "learning_rate": 6.625493570922345e-05, "loss": 1.6876, "step": 6676 }, { "epoch": 2.026711185308848, "grad_norm": 0.6688176989555359, "learning_rate": 6.624987344335324e-05, "loss": 1.4479, "step": 6677 }, { "epoch": 2.0270147215055396, "grad_norm": 0.7675127983093262, "learning_rate": 6.624481117748305e-05, "loss": 0.81, "step": 6678 }, { "epoch": 2.027318257702231, "grad_norm": 0.6693247556686401, "learning_rate": 6.623974891161285e-05, "loss": 1.5767, "step": 6679 }, { "epoch": 2.0276217938989225, "grad_norm": 0.741452693939209, "learning_rate": 6.623468664574264e-05, "loss": 1.46, "step": 6680 }, { "epoch": 2.0279253300956137, "grad_norm": 0.6443573236465454, "learning_rate": 6.622962437987244e-05, "loss": 1.4561, "step": 6681 }, { "epoch": 2.0282288662923054, "grad_norm": 0.6327553391456604, "learning_rate": 6.622456211400223e-05, "loss": 1.0057, "step": 6682 }, { "epoch": 2.028532402488997, "grad_norm": 0.6773364543914795, "learning_rate": 6.621949984813203e-05, "loss": 1.6921, "step": 6683 }, { "epoch": 2.0288359386856882, "grad_norm": 0.7478709816932678, "learning_rate": 6.621443758226182e-05, "loss": 1.5898, "step": 6684 }, { "epoch": 2.02913947488238, "grad_norm": 0.6670467257499695, "learning_rate": 6.620937531639162e-05, "loss": 1.361, "step": 6685 }, { "epoch": 2.029443011079071, "grad_norm": 0.660031795501709, "learning_rate": 6.620431305052141e-05, "loss": 1.3718, "step": 6686 }, { "epoch": 2.0297465472757628, "grad_norm": 0.8065965175628662, "learning_rate": 6.619925078465122e-05, "loss": 1.4678, "step": 6687 }, { "epoch": 2.030050083472454, "grad_norm": 0.756514847278595, "learning_rate": 6.619418851878101e-05, "loss": 1.2949, "step": 6688 }, { "epoch": 2.0303536196691456, "grad_norm": 0.673550009727478, "learning_rate": 6.618912625291081e-05, "loss": 0.8403, "step": 6689 }, { "epoch": 2.030657155865837, "grad_norm": 0.7104129195213318, "learning_rate": 6.61840639870406e-05, "loss": 0.9143, "step": 6690 }, { "epoch": 2.0309606920625285, "grad_norm": 0.5645703673362732, "learning_rate": 6.61790017211704e-05, "loss": 0.7835, "step": 6691 }, { "epoch": 2.0312642282592197, "grad_norm": 0.8361724615097046, "learning_rate": 6.617393945530019e-05, "loss": 1.345, "step": 6692 }, { "epoch": 2.0315677644559114, "grad_norm": 0.7160463929176331, "learning_rate": 6.616887718942999e-05, "loss": 0.9569, "step": 6693 }, { "epoch": 2.031871300652603, "grad_norm": 0.662090003490448, "learning_rate": 6.616381492355978e-05, "loss": 1.3514, "step": 6694 }, { "epoch": 2.0321748368492942, "grad_norm": 0.6347789764404297, "learning_rate": 6.615875265768958e-05, "loss": 1.5523, "step": 6695 }, { "epoch": 2.032478373045986, "grad_norm": 0.7517459392547607, "learning_rate": 6.615369039181937e-05, "loss": 0.4903, "step": 6696 }, { "epoch": 2.032781909242677, "grad_norm": 0.7042916417121887, "learning_rate": 6.614862812594918e-05, "loss": 1.3716, "step": 6697 }, { "epoch": 2.0330854454393688, "grad_norm": 0.6985395550727844, "learning_rate": 6.614356586007897e-05, "loss": 1.139, "step": 6698 }, { "epoch": 2.03338898163606, "grad_norm": 0.7075894474983215, "learning_rate": 6.613850359420877e-05, "loss": 1.3291, "step": 6699 }, { "epoch": 2.0336925178327516, "grad_norm": 0.9580946564674377, "learning_rate": 6.613344132833856e-05, "loss": 0.7737, "step": 6700 }, { "epoch": 2.033996054029443, "grad_norm": 0.7944507598876953, "learning_rate": 6.612837906246836e-05, "loss": 1.3566, "step": 6701 }, { "epoch": 2.0342995902261345, "grad_norm": 0.8060106039047241, "learning_rate": 6.612331679659815e-05, "loss": 1.3165, "step": 6702 }, { "epoch": 2.0346031264228257, "grad_norm": 0.9689031839370728, "learning_rate": 6.611825453072795e-05, "loss": 1.2286, "step": 6703 }, { "epoch": 2.0349066626195174, "grad_norm": 0.7015454173088074, "learning_rate": 6.611319226485774e-05, "loss": 1.2569, "step": 6704 }, { "epoch": 2.035210198816209, "grad_norm": 0.769710898399353, "learning_rate": 6.610812999898754e-05, "loss": 0.9488, "step": 6705 }, { "epoch": 2.0355137350129002, "grad_norm": 0.5991962552070618, "learning_rate": 6.610306773311735e-05, "loss": 1.6102, "step": 6706 }, { "epoch": 2.035817271209592, "grad_norm": 0.6914335489273071, "learning_rate": 6.609800546724714e-05, "loss": 1.6039, "step": 6707 }, { "epoch": 2.036120807406283, "grad_norm": 0.6637049913406372, "learning_rate": 6.609294320137695e-05, "loss": 1.2567, "step": 6708 }, { "epoch": 2.0364243436029748, "grad_norm": 0.7493016719818115, "learning_rate": 6.608788093550674e-05, "loss": 1.0676, "step": 6709 }, { "epoch": 2.036727879799666, "grad_norm": 0.7196962237358093, "learning_rate": 6.608281866963654e-05, "loss": 1.1692, "step": 6710 }, { "epoch": 2.0370314159963576, "grad_norm": 0.6665316820144653, "learning_rate": 6.607775640376633e-05, "loss": 1.4351, "step": 6711 }, { "epoch": 2.037334952193049, "grad_norm": 0.7386513948440552, "learning_rate": 6.607269413789613e-05, "loss": 1.1418, "step": 6712 }, { "epoch": 2.0376384883897405, "grad_norm": 0.6668844223022461, "learning_rate": 6.606763187202592e-05, "loss": 0.772, "step": 6713 }, { "epoch": 2.037942024586432, "grad_norm": 0.9029124975204468, "learning_rate": 6.606256960615572e-05, "loss": 1.5783, "step": 6714 }, { "epoch": 2.0382455607831234, "grad_norm": 0.6456940770149231, "learning_rate": 6.605750734028551e-05, "loss": 1.0193, "step": 6715 }, { "epoch": 2.038549096979815, "grad_norm": 0.8892134428024292, "learning_rate": 6.605244507441531e-05, "loss": 0.8457, "step": 6716 }, { "epoch": 2.0388526331765062, "grad_norm": 0.7285386323928833, "learning_rate": 6.604738280854512e-05, "loss": 1.2503, "step": 6717 }, { "epoch": 2.039156169373198, "grad_norm": 0.7347849011421204, "learning_rate": 6.604232054267491e-05, "loss": 1.146, "step": 6718 }, { "epoch": 2.039459705569889, "grad_norm": 0.727271556854248, "learning_rate": 6.60372582768047e-05, "loss": 1.4351, "step": 6719 }, { "epoch": 2.0397632417665807, "grad_norm": 1.4093507528305054, "learning_rate": 6.60321960109345e-05, "loss": 0.5674, "step": 6720 }, { "epoch": 2.040066777963272, "grad_norm": 0.4269008934497833, "learning_rate": 6.60271337450643e-05, "loss": 0.8792, "step": 6721 }, { "epoch": 2.0403703141599636, "grad_norm": 0.6046574115753174, "learning_rate": 6.602207147919409e-05, "loss": 0.9758, "step": 6722 }, { "epoch": 2.040673850356655, "grad_norm": 0.7757171988487244, "learning_rate": 6.601700921332389e-05, "loss": 1.39, "step": 6723 }, { "epoch": 2.0409773865533465, "grad_norm": 0.8423404693603516, "learning_rate": 6.601194694745368e-05, "loss": 1.0898, "step": 6724 }, { "epoch": 2.041280922750038, "grad_norm": 0.7536158561706543, "learning_rate": 6.600688468158347e-05, "loss": 1.0093, "step": 6725 }, { "epoch": 2.0415844589467294, "grad_norm": 0.7584403157234192, "learning_rate": 6.600182241571328e-05, "loss": 1.4647, "step": 6726 }, { "epoch": 2.041887995143421, "grad_norm": 0.7223381996154785, "learning_rate": 6.599676014984308e-05, "loss": 1.0329, "step": 6727 }, { "epoch": 2.042191531340112, "grad_norm": 0.7976197600364685, "learning_rate": 6.599169788397287e-05, "loss": 1.3605, "step": 6728 }, { "epoch": 2.042495067536804, "grad_norm": 0.763677179813385, "learning_rate": 6.598663561810267e-05, "loss": 1.2684, "step": 6729 }, { "epoch": 2.042798603733495, "grad_norm": 0.813032329082489, "learning_rate": 6.598157335223246e-05, "loss": 1.4194, "step": 6730 }, { "epoch": 2.0431021399301867, "grad_norm": 0.6929876804351807, "learning_rate": 6.597651108636226e-05, "loss": 1.4115, "step": 6731 }, { "epoch": 2.043405676126878, "grad_norm": 0.8353222012519836, "learning_rate": 6.597144882049205e-05, "loss": 1.0849, "step": 6732 }, { "epoch": 2.0437092123235696, "grad_norm": 0.5235282182693481, "learning_rate": 6.596638655462185e-05, "loss": 1.1781, "step": 6733 }, { "epoch": 2.044012748520261, "grad_norm": 0.7527014017105103, "learning_rate": 6.596132428875164e-05, "loss": 1.3699, "step": 6734 }, { "epoch": 2.0443162847169525, "grad_norm": 0.5982792377471924, "learning_rate": 6.595626202288144e-05, "loss": 1.5603, "step": 6735 }, { "epoch": 2.044619820913644, "grad_norm": 1.0470722913742065, "learning_rate": 6.595119975701124e-05, "loss": 1.2232, "step": 6736 }, { "epoch": 2.0449233571103353, "grad_norm": 0.7752648591995239, "learning_rate": 6.594613749114104e-05, "loss": 1.4848, "step": 6737 }, { "epoch": 2.045226893307027, "grad_norm": 0.6654653549194336, "learning_rate": 6.594107522527083e-05, "loss": 1.6215, "step": 6738 }, { "epoch": 2.045530429503718, "grad_norm": 0.8119179010391235, "learning_rate": 6.593601295940063e-05, "loss": 1.1391, "step": 6739 }, { "epoch": 2.04583396570041, "grad_norm": 0.7314397096633911, "learning_rate": 6.593095069353042e-05, "loss": 1.5554, "step": 6740 }, { "epoch": 2.046137501897101, "grad_norm": 1.1362113952636719, "learning_rate": 6.592588842766022e-05, "loss": 0.536, "step": 6741 }, { "epoch": 2.0464410380937927, "grad_norm": 1.011203408241272, "learning_rate": 6.592082616179001e-05, "loss": 0.7209, "step": 6742 }, { "epoch": 2.046744574290484, "grad_norm": 0.7223535776138306, "learning_rate": 6.591576389591981e-05, "loss": 1.3491, "step": 6743 }, { "epoch": 2.0470481104871756, "grad_norm": 0.6654912233352661, "learning_rate": 6.59107016300496e-05, "loss": 1.3324, "step": 6744 }, { "epoch": 2.0473516466838673, "grad_norm": 0.5610687136650085, "learning_rate": 6.590563936417941e-05, "loss": 1.0092, "step": 6745 }, { "epoch": 2.0476551828805585, "grad_norm": 1.212891697883606, "learning_rate": 6.59005770983092e-05, "loss": 1.586, "step": 6746 }, { "epoch": 2.04795871907725, "grad_norm": 0.7124261856079102, "learning_rate": 6.5895514832439e-05, "loss": 1.2772, "step": 6747 }, { "epoch": 2.0482622552739413, "grad_norm": 0.8526630997657776, "learning_rate": 6.58904525665688e-05, "loss": 1.0371, "step": 6748 }, { "epoch": 2.048565791470633, "grad_norm": 0.697640597820282, "learning_rate": 6.588539030069859e-05, "loss": 1.1681, "step": 6749 }, { "epoch": 2.048869327667324, "grad_norm": 0.627429187297821, "learning_rate": 6.588032803482839e-05, "loss": 1.5366, "step": 6750 }, { "epoch": 2.049172863864016, "grad_norm": 0.650415301322937, "learning_rate": 6.58752657689582e-05, "loss": 0.9809, "step": 6751 }, { "epoch": 2.049476400060707, "grad_norm": 0.560741126537323, "learning_rate": 6.587020350308799e-05, "loss": 1.8711, "step": 6752 }, { "epoch": 2.0497799362573987, "grad_norm": 0.6494907736778259, "learning_rate": 6.586514123721778e-05, "loss": 1.5271, "step": 6753 }, { "epoch": 2.05008347245409, "grad_norm": 0.7896031141281128, "learning_rate": 6.586007897134758e-05, "loss": 1.2083, "step": 6754 }, { "epoch": 2.0503870086507816, "grad_norm": 1.015256404876709, "learning_rate": 6.585501670547737e-05, "loss": 1.3374, "step": 6755 }, { "epoch": 2.0506905448474733, "grad_norm": 0.7154099345207214, "learning_rate": 6.584995443960718e-05, "loss": 1.039, "step": 6756 }, { "epoch": 2.0509940810441645, "grad_norm": 0.6052323579788208, "learning_rate": 6.584489217373698e-05, "loss": 1.5066, "step": 6757 }, { "epoch": 2.051297617240856, "grad_norm": 0.6472240090370178, "learning_rate": 6.583982990786677e-05, "loss": 1.468, "step": 6758 }, { "epoch": 2.0516011534375473, "grad_norm": 0.6906863451004028, "learning_rate": 6.583476764199657e-05, "loss": 1.5298, "step": 6759 }, { "epoch": 2.051904689634239, "grad_norm": 0.8233827948570251, "learning_rate": 6.582970537612636e-05, "loss": 0.9356, "step": 6760 }, { "epoch": 2.05220822583093, "grad_norm": 0.6991246938705444, "learning_rate": 6.582464311025616e-05, "loss": 1.5262, "step": 6761 }, { "epoch": 2.052511762027622, "grad_norm": 0.8245313763618469, "learning_rate": 6.581958084438595e-05, "loss": 1.1714, "step": 6762 }, { "epoch": 2.052815298224313, "grad_norm": 0.7749815583229065, "learning_rate": 6.581451857851574e-05, "loss": 0.8446, "step": 6763 }, { "epoch": 2.0531188344210047, "grad_norm": 1.0583815574645996, "learning_rate": 6.580945631264554e-05, "loss": 1.2243, "step": 6764 }, { "epoch": 2.053422370617696, "grad_norm": 0.5424079298973083, "learning_rate": 6.580439404677535e-05, "loss": 1.34, "step": 6765 }, { "epoch": 2.0537259068143876, "grad_norm": 0.7240017652511597, "learning_rate": 6.579933178090514e-05, "loss": 0.9282, "step": 6766 }, { "epoch": 2.0540294430110793, "grad_norm": 0.692767322063446, "learning_rate": 6.579426951503494e-05, "loss": 1.4957, "step": 6767 }, { "epoch": 2.0543329792077705, "grad_norm": 0.7238978147506714, "learning_rate": 6.578920724916473e-05, "loss": 1.2649, "step": 6768 }, { "epoch": 2.054636515404462, "grad_norm": 0.5727119445800781, "learning_rate": 6.578414498329453e-05, "loss": 1.1351, "step": 6769 }, { "epoch": 2.0549400516011533, "grad_norm": 0.8237214088439941, "learning_rate": 6.577908271742432e-05, "loss": 1.352, "step": 6770 }, { "epoch": 2.055243587797845, "grad_norm": 0.71387779712677, "learning_rate": 6.577402045155412e-05, "loss": 1.2913, "step": 6771 }, { "epoch": 2.055547123994536, "grad_norm": 0.6894492506980896, "learning_rate": 6.576895818568391e-05, "loss": 1.3507, "step": 6772 }, { "epoch": 2.055850660191228, "grad_norm": 0.7831125855445862, "learning_rate": 6.57638959198137e-05, "loss": 1.2404, "step": 6773 }, { "epoch": 2.056154196387919, "grad_norm": 0.6227384805679321, "learning_rate": 6.57588336539435e-05, "loss": 1.1396, "step": 6774 }, { "epoch": 2.0564577325846107, "grad_norm": 0.7114644646644592, "learning_rate": 6.575377138807331e-05, "loss": 1.1895, "step": 6775 }, { "epoch": 2.0567612687813024, "grad_norm": 0.7928556799888611, "learning_rate": 6.57487091222031e-05, "loss": 1.1681, "step": 6776 }, { "epoch": 2.0570648049779936, "grad_norm": 0.8106712102890015, "learning_rate": 6.57436468563329e-05, "loss": 0.9562, "step": 6777 }, { "epoch": 2.0573683411746853, "grad_norm": 0.792510986328125, "learning_rate": 6.57385845904627e-05, "loss": 1.2987, "step": 6778 }, { "epoch": 2.0576718773713765, "grad_norm": 0.8379880785942078, "learning_rate": 6.573352232459249e-05, "loss": 1.5292, "step": 6779 }, { "epoch": 2.057975413568068, "grad_norm": 0.7367234826087952, "learning_rate": 6.572846005872228e-05, "loss": 1.478, "step": 6780 }, { "epoch": 2.0582789497647593, "grad_norm": 0.9199308753013611, "learning_rate": 6.572339779285208e-05, "loss": 1.0923, "step": 6781 }, { "epoch": 2.058582485961451, "grad_norm": 0.6934099197387695, "learning_rate": 6.571833552698187e-05, "loss": 0.7662, "step": 6782 }, { "epoch": 2.058886022158142, "grad_norm": 0.7979916334152222, "learning_rate": 6.571327326111167e-05, "loss": 1.4459, "step": 6783 }, { "epoch": 2.059189558354834, "grad_norm": 0.6911612749099731, "learning_rate": 6.570821099524148e-05, "loss": 1.357, "step": 6784 }, { "epoch": 2.059493094551525, "grad_norm": 0.7487713098526001, "learning_rate": 6.570314872937127e-05, "loss": 1.3231, "step": 6785 }, { "epoch": 2.0597966307482167, "grad_norm": 0.7138471603393555, "learning_rate": 6.569808646350107e-05, "loss": 1.6311, "step": 6786 }, { "epoch": 2.0601001669449084, "grad_norm": 0.911524772644043, "learning_rate": 6.569302419763086e-05, "loss": 1.2923, "step": 6787 }, { "epoch": 2.0604037031415996, "grad_norm": 0.779221773147583, "learning_rate": 6.568796193176066e-05, "loss": 1.518, "step": 6788 }, { "epoch": 2.0607072393382913, "grad_norm": 0.7897789478302002, "learning_rate": 6.568289966589045e-05, "loss": 1.2195, "step": 6789 }, { "epoch": 2.0610107755349825, "grad_norm": 0.5560900568962097, "learning_rate": 6.567783740002024e-05, "loss": 1.3346, "step": 6790 }, { "epoch": 2.061314311731674, "grad_norm": 0.7780158519744873, "learning_rate": 6.567277513415004e-05, "loss": 1.283, "step": 6791 }, { "epoch": 2.0616178479283653, "grad_norm": 0.8399558067321777, "learning_rate": 6.566771286827983e-05, "loss": 1.5827, "step": 6792 }, { "epoch": 2.061921384125057, "grad_norm": 0.6238407492637634, "learning_rate": 6.566265060240964e-05, "loss": 1.0665, "step": 6793 }, { "epoch": 2.062224920321748, "grad_norm": 0.8223831057548523, "learning_rate": 6.565758833653944e-05, "loss": 1.3561, "step": 6794 }, { "epoch": 2.06252845651844, "grad_norm": 0.5908865332603455, "learning_rate": 6.565252607066925e-05, "loss": 1.5198, "step": 6795 }, { "epoch": 2.062831992715131, "grad_norm": 0.7312744855880737, "learning_rate": 6.564746380479904e-05, "loss": 1.3568, "step": 6796 }, { "epoch": 2.0631355289118227, "grad_norm": 0.794439435005188, "learning_rate": 6.564240153892884e-05, "loss": 1.1679, "step": 6797 }, { "epoch": 2.0634390651085144, "grad_norm": 0.58745276927948, "learning_rate": 6.563733927305863e-05, "loss": 1.4664, "step": 6798 }, { "epoch": 2.0637426013052056, "grad_norm": 0.9755748510360718, "learning_rate": 6.563227700718843e-05, "loss": 1.0525, "step": 6799 }, { "epoch": 2.0640461375018972, "grad_norm": 0.7289798855781555, "learning_rate": 6.562721474131822e-05, "loss": 1.392, "step": 6800 }, { "epoch": 2.0643496736985885, "grad_norm": 0.7182273864746094, "learning_rate": 6.562215247544801e-05, "loss": 1.3755, "step": 6801 }, { "epoch": 2.06465320989528, "grad_norm": 0.6822550892829895, "learning_rate": 6.561709020957781e-05, "loss": 1.6559, "step": 6802 }, { "epoch": 2.0649567460919713, "grad_norm": 0.838714599609375, "learning_rate": 6.56120279437076e-05, "loss": 1.291, "step": 6803 }, { "epoch": 2.065260282288663, "grad_norm": 1.2011395692825317, "learning_rate": 6.560696567783741e-05, "loss": 1.1399, "step": 6804 }, { "epoch": 2.065563818485354, "grad_norm": 0.8120669722557068, "learning_rate": 6.560190341196721e-05, "loss": 1.1055, "step": 6805 }, { "epoch": 2.065867354682046, "grad_norm": 0.7051123976707458, "learning_rate": 6.5596841146097e-05, "loss": 1.3991, "step": 6806 }, { "epoch": 2.0661708908787375, "grad_norm": 0.8569958806037903, "learning_rate": 6.55917788802268e-05, "loss": 1.2338, "step": 6807 }, { "epoch": 2.0664744270754287, "grad_norm": 0.5877040028572083, "learning_rate": 6.558671661435659e-05, "loss": 1.2854, "step": 6808 }, { "epoch": 2.0667779632721204, "grad_norm": 0.6768965125083923, "learning_rate": 6.558165434848639e-05, "loss": 1.4867, "step": 6809 }, { "epoch": 2.0670814994688116, "grad_norm": 0.6104425191879272, "learning_rate": 6.557659208261618e-05, "loss": 1.2566, "step": 6810 }, { "epoch": 2.0673850356655032, "grad_norm": 0.8459896445274353, "learning_rate": 6.557152981674598e-05, "loss": 0.9956, "step": 6811 }, { "epoch": 2.0676885718621945, "grad_norm": 0.4863753914833069, "learning_rate": 6.556646755087577e-05, "loss": 1.5894, "step": 6812 }, { "epoch": 2.067992108058886, "grad_norm": 0.4200766086578369, "learning_rate": 6.556140528500557e-05, "loss": 1.3425, "step": 6813 }, { "epoch": 2.0682956442555773, "grad_norm": 0.6907030940055847, "learning_rate": 6.555634301913537e-05, "loss": 1.4898, "step": 6814 }, { "epoch": 2.068599180452269, "grad_norm": 0.5948358774185181, "learning_rate": 6.555128075326517e-05, "loss": 1.2708, "step": 6815 }, { "epoch": 2.06890271664896, "grad_norm": 0.8029438257217407, "learning_rate": 6.554621848739496e-05, "loss": 1.4541, "step": 6816 }, { "epoch": 2.069206252845652, "grad_norm": 0.7332115173339844, "learning_rate": 6.554115622152476e-05, "loss": 1.4676, "step": 6817 }, { "epoch": 2.0695097890423435, "grad_norm": 1.0768547058105469, "learning_rate": 6.553609395565455e-05, "loss": 1.1385, "step": 6818 }, { "epoch": 2.0698133252390347, "grad_norm": 0.7721269726753235, "learning_rate": 6.553103168978435e-05, "loss": 1.4892, "step": 6819 }, { "epoch": 2.0701168614357264, "grad_norm": 0.7033138275146484, "learning_rate": 6.552596942391414e-05, "loss": 1.3887, "step": 6820 }, { "epoch": 2.0704203976324176, "grad_norm": 0.9070338010787964, "learning_rate": 6.552090715804394e-05, "loss": 1.3903, "step": 6821 }, { "epoch": 2.0707239338291092, "grad_norm": 0.7374278903007507, "learning_rate": 6.551584489217373e-05, "loss": 1.3918, "step": 6822 }, { "epoch": 2.0710274700258005, "grad_norm": 0.734194815158844, "learning_rate": 6.551078262630354e-05, "loss": 1.3215, "step": 6823 }, { "epoch": 2.071331006222492, "grad_norm": 0.5715214610099792, "learning_rate": 6.550572036043334e-05, "loss": 1.5389, "step": 6824 }, { "epoch": 2.0716345424191833, "grad_norm": 0.7062519192695618, "learning_rate": 6.550065809456313e-05, "loss": 1.2238, "step": 6825 }, { "epoch": 2.071938078615875, "grad_norm": 0.6726373434066772, "learning_rate": 6.549559582869293e-05, "loss": 1.3836, "step": 6826 }, { "epoch": 2.072241614812566, "grad_norm": 0.7280587553977966, "learning_rate": 6.549053356282272e-05, "loss": 1.7095, "step": 6827 }, { "epoch": 2.072545151009258, "grad_norm": 0.6290583610534668, "learning_rate": 6.548547129695251e-05, "loss": 1.177, "step": 6828 }, { "epoch": 2.0728486872059495, "grad_norm": 0.8294208645820618, "learning_rate": 6.548040903108231e-05, "loss": 1.0588, "step": 6829 }, { "epoch": 2.0731522234026407, "grad_norm": 0.6298262476921082, "learning_rate": 6.54753467652121e-05, "loss": 0.8205, "step": 6830 }, { "epoch": 2.0734557595993324, "grad_norm": 0.7950041890144348, "learning_rate": 6.54702844993419e-05, "loss": 1.1332, "step": 6831 }, { "epoch": 2.0737592957960236, "grad_norm": 0.8786191940307617, "learning_rate": 6.546522223347171e-05, "loss": 1.1891, "step": 6832 }, { "epoch": 2.0740628319927152, "grad_norm": 0.7599530816078186, "learning_rate": 6.54601599676015e-05, "loss": 1.4681, "step": 6833 }, { "epoch": 2.0743663681894065, "grad_norm": 0.7391897439956665, "learning_rate": 6.54550977017313e-05, "loss": 1.3549, "step": 6834 }, { "epoch": 2.074669904386098, "grad_norm": 1.192667007446289, "learning_rate": 6.545003543586109e-05, "loss": 1.3083, "step": 6835 }, { "epoch": 2.0749734405827893, "grad_norm": 0.5997875332832336, "learning_rate": 6.544497316999089e-05, "loss": 1.1885, "step": 6836 }, { "epoch": 2.075276976779481, "grad_norm": 0.5613851547241211, "learning_rate": 6.543991090412068e-05, "loss": 1.4794, "step": 6837 }, { "epoch": 2.075580512976172, "grad_norm": 1.016721487045288, "learning_rate": 6.543484863825048e-05, "loss": 1.3954, "step": 6838 }, { "epoch": 2.075884049172864, "grad_norm": 0.7319069504737854, "learning_rate": 6.542978637238027e-05, "loss": 1.1868, "step": 6839 }, { "epoch": 2.0761875853695555, "grad_norm": 0.8343705534934998, "learning_rate": 6.542472410651008e-05, "loss": 1.025, "step": 6840 }, { "epoch": 2.0764911215662467, "grad_norm": 0.6604619026184082, "learning_rate": 6.541966184063987e-05, "loss": 0.9608, "step": 6841 }, { "epoch": 2.0767946577629384, "grad_norm": 0.7044042944908142, "learning_rate": 6.541459957476967e-05, "loss": 1.2809, "step": 6842 }, { "epoch": 2.0770981939596296, "grad_norm": 0.7844612002372742, "learning_rate": 6.540953730889948e-05, "loss": 1.1977, "step": 6843 }, { "epoch": 2.0774017301563212, "grad_norm": 0.6686902642250061, "learning_rate": 6.540447504302927e-05, "loss": 1.6281, "step": 6844 }, { "epoch": 2.0777052663530124, "grad_norm": 0.8168292045593262, "learning_rate": 6.539941277715907e-05, "loss": 1.5787, "step": 6845 }, { "epoch": 2.078008802549704, "grad_norm": 0.7469481229782104, "learning_rate": 6.539435051128886e-05, "loss": 0.922, "step": 6846 }, { "epoch": 2.0783123387463953, "grad_norm": 0.797624945640564, "learning_rate": 6.538928824541866e-05, "loss": 1.7119, "step": 6847 }, { "epoch": 2.078615874943087, "grad_norm": 0.6301280856132507, "learning_rate": 6.538422597954845e-05, "loss": 1.2768, "step": 6848 }, { "epoch": 2.0789194111397786, "grad_norm": 0.834862232208252, "learning_rate": 6.537916371367825e-05, "loss": 1.4923, "step": 6849 }, { "epoch": 2.07922294733647, "grad_norm": 0.8240933418273926, "learning_rate": 6.537410144780804e-05, "loss": 1.1109, "step": 6850 }, { "epoch": 2.0795264835331615, "grad_norm": 0.6094114780426025, "learning_rate": 6.536903918193784e-05, "loss": 1.2795, "step": 6851 }, { "epoch": 2.0798300197298527, "grad_norm": 0.7783163189888, "learning_rate": 6.536397691606763e-05, "loss": 1.2485, "step": 6852 }, { "epoch": 2.0801335559265444, "grad_norm": 0.6736633777618408, "learning_rate": 6.535891465019744e-05, "loss": 1.4631, "step": 6853 }, { "epoch": 2.0804370921232356, "grad_norm": 0.7661424279212952, "learning_rate": 6.535385238432723e-05, "loss": 1.1812, "step": 6854 }, { "epoch": 2.0807406283199272, "grad_norm": 0.4843573272228241, "learning_rate": 6.534879011845703e-05, "loss": 1.1932, "step": 6855 }, { "epoch": 2.0810441645166184, "grad_norm": 0.6728962063789368, "learning_rate": 6.534372785258682e-05, "loss": 1.4108, "step": 6856 }, { "epoch": 2.08134770071331, "grad_norm": 0.7444415092468262, "learning_rate": 6.533866558671662e-05, "loss": 1.647, "step": 6857 }, { "epoch": 2.0816512369100013, "grad_norm": 0.8934520483016968, "learning_rate": 6.533360332084641e-05, "loss": 1.444, "step": 6858 }, { "epoch": 2.081954773106693, "grad_norm": 0.7796207070350647, "learning_rate": 6.532854105497621e-05, "loss": 1.3639, "step": 6859 }, { "epoch": 2.0822583093033846, "grad_norm": 0.7691407799720764, "learning_rate": 6.5323478789106e-05, "loss": 1.4077, "step": 6860 }, { "epoch": 2.082561845500076, "grad_norm": 0.7715175747871399, "learning_rate": 6.53184165232358e-05, "loss": 1.0336, "step": 6861 }, { "epoch": 2.0828653816967675, "grad_norm": 0.8336794376373291, "learning_rate": 6.53133542573656e-05, "loss": 1.2306, "step": 6862 }, { "epoch": 2.0831689178934587, "grad_norm": 0.7209054827690125, "learning_rate": 6.53082919914954e-05, "loss": 0.8951, "step": 6863 }, { "epoch": 2.0834724540901504, "grad_norm": 0.7160457372665405, "learning_rate": 6.53032297256252e-05, "loss": 1.2746, "step": 6864 }, { "epoch": 2.0837759902868416, "grad_norm": 0.8172768354415894, "learning_rate": 6.529816745975499e-05, "loss": 0.9319, "step": 6865 }, { "epoch": 2.0840795264835332, "grad_norm": 0.626451849937439, "learning_rate": 6.529310519388478e-05, "loss": 1.4666, "step": 6866 }, { "epoch": 2.0843830626802244, "grad_norm": 0.6044541597366333, "learning_rate": 6.528804292801458e-05, "loss": 0.9215, "step": 6867 }, { "epoch": 2.084686598876916, "grad_norm": 0.6968386769294739, "learning_rate": 6.528298066214437e-05, "loss": 1.3271, "step": 6868 }, { "epoch": 2.0849901350736078, "grad_norm": 0.8236109614372253, "learning_rate": 6.527791839627417e-05, "loss": 1.354, "step": 6869 }, { "epoch": 2.085293671270299, "grad_norm": 0.6705644130706787, "learning_rate": 6.527285613040396e-05, "loss": 1.2462, "step": 6870 }, { "epoch": 2.0855972074669906, "grad_norm": 0.8627272844314575, "learning_rate": 6.526779386453377e-05, "loss": 1.2606, "step": 6871 }, { "epoch": 2.085900743663682, "grad_norm": 0.7665202617645264, "learning_rate": 6.526273159866357e-05, "loss": 1.073, "step": 6872 }, { "epoch": 2.0862042798603735, "grad_norm": 0.7215783596038818, "learning_rate": 6.525766933279336e-05, "loss": 1.2895, "step": 6873 }, { "epoch": 2.0865078160570647, "grad_norm": 0.8607796430587769, "learning_rate": 6.525260706692316e-05, "loss": 0.9202, "step": 6874 }, { "epoch": 2.0868113522537564, "grad_norm": 0.8341158032417297, "learning_rate": 6.524754480105295e-05, "loss": 1.056, "step": 6875 }, { "epoch": 2.0871148884504476, "grad_norm": 0.8036590218544006, "learning_rate": 6.524248253518275e-05, "loss": 1.3272, "step": 6876 }, { "epoch": 2.0874184246471392, "grad_norm": 1.0152392387390137, "learning_rate": 6.523742026931254e-05, "loss": 1.3854, "step": 6877 }, { "epoch": 2.0877219608438304, "grad_norm": 0.6920920610427856, "learning_rate": 6.523235800344234e-05, "loss": 1.6818, "step": 6878 }, { "epoch": 2.088025497040522, "grad_norm": 0.8714667558670044, "learning_rate": 6.522729573757213e-05, "loss": 1.3507, "step": 6879 }, { "epoch": 2.0883290332372137, "grad_norm": 0.7897412776947021, "learning_rate": 6.522223347170193e-05, "loss": 1.472, "step": 6880 }, { "epoch": 2.088632569433905, "grad_norm": 0.739872932434082, "learning_rate": 6.521717120583173e-05, "loss": 1.7092, "step": 6881 }, { "epoch": 2.0889361056305966, "grad_norm": 0.6356867551803589, "learning_rate": 6.521210893996153e-05, "loss": 1.3256, "step": 6882 }, { "epoch": 2.089239641827288, "grad_norm": 0.6607017517089844, "learning_rate": 6.520704667409132e-05, "loss": 1.0546, "step": 6883 }, { "epoch": 2.0895431780239795, "grad_norm": 0.6129403114318848, "learning_rate": 6.520198440822113e-05, "loss": 1.3164, "step": 6884 }, { "epoch": 2.0898467142206707, "grad_norm": 0.6967512369155884, "learning_rate": 6.519692214235093e-05, "loss": 1.3868, "step": 6885 }, { "epoch": 2.0901502504173624, "grad_norm": 0.6828495860099792, "learning_rate": 6.519185987648072e-05, "loss": 1.5314, "step": 6886 }, { "epoch": 2.0904537866140536, "grad_norm": 0.6235039234161377, "learning_rate": 6.518679761061052e-05, "loss": 1.0667, "step": 6887 }, { "epoch": 2.0907573228107452, "grad_norm": 0.8770887851715088, "learning_rate": 6.518173534474031e-05, "loss": 1.2815, "step": 6888 }, { "epoch": 2.0910608590074364, "grad_norm": 0.6768497824668884, "learning_rate": 6.51766730788701e-05, "loss": 1.2396, "step": 6889 }, { "epoch": 2.091364395204128, "grad_norm": 0.7875215411186218, "learning_rate": 6.51716108129999e-05, "loss": 1.329, "step": 6890 }, { "epoch": 2.0916679314008197, "grad_norm": 0.6576293706893921, "learning_rate": 6.51665485471297e-05, "loss": 1.5352, "step": 6891 }, { "epoch": 2.091971467597511, "grad_norm": 1.1005089282989502, "learning_rate": 6.51614862812595e-05, "loss": 1.2099, "step": 6892 }, { "epoch": 2.0922750037942026, "grad_norm": 0.6148272752761841, "learning_rate": 6.51564240153893e-05, "loss": 0.9366, "step": 6893 }, { "epoch": 2.092578539990894, "grad_norm": 0.5925896167755127, "learning_rate": 6.51513617495191e-05, "loss": 1.4104, "step": 6894 }, { "epoch": 2.0928820761875855, "grad_norm": 0.7109888792037964, "learning_rate": 6.514629948364889e-05, "loss": 1.3707, "step": 6895 }, { "epoch": 2.0931856123842767, "grad_norm": 0.6179462671279907, "learning_rate": 6.514123721777868e-05, "loss": 1.339, "step": 6896 }, { "epoch": 2.0934891485809684, "grad_norm": 1.1650300025939941, "learning_rate": 6.513617495190848e-05, "loss": 1.1503, "step": 6897 }, { "epoch": 2.0937926847776596, "grad_norm": 0.507260799407959, "learning_rate": 6.513111268603827e-05, "loss": 1.6823, "step": 6898 }, { "epoch": 2.094096220974351, "grad_norm": 0.718826174736023, "learning_rate": 6.512605042016807e-05, "loss": 1.4065, "step": 6899 }, { "epoch": 2.0943997571710424, "grad_norm": 0.7679693698883057, "learning_rate": 6.512098815429786e-05, "loss": 1.0725, "step": 6900 }, { "epoch": 2.094703293367734, "grad_norm": 0.8759974241256714, "learning_rate": 6.511592588842767e-05, "loss": 1.576, "step": 6901 }, { "epoch": 2.0950068295644257, "grad_norm": 0.7068794369697571, "learning_rate": 6.511086362255747e-05, "loss": 1.4895, "step": 6902 }, { "epoch": 2.095310365761117, "grad_norm": 0.8207367062568665, "learning_rate": 6.510580135668726e-05, "loss": 1.2166, "step": 6903 }, { "epoch": 2.0956139019578086, "grad_norm": 0.6428210735321045, "learning_rate": 6.510073909081705e-05, "loss": 0.7282, "step": 6904 }, { "epoch": 2.0959174381545, "grad_norm": 0.7879844307899475, "learning_rate": 6.509567682494685e-05, "loss": 1.4551, "step": 6905 }, { "epoch": 2.0962209743511915, "grad_norm": 0.5059447884559631, "learning_rate": 6.509061455907664e-05, "loss": 0.7051, "step": 6906 }, { "epoch": 2.0965245105478827, "grad_norm": 0.9014713168144226, "learning_rate": 6.508555229320644e-05, "loss": 0.9653, "step": 6907 }, { "epoch": 2.0968280467445743, "grad_norm": 0.7185696363449097, "learning_rate": 6.508049002733623e-05, "loss": 1.2964, "step": 6908 }, { "epoch": 2.0971315829412656, "grad_norm": 0.7701075673103333, "learning_rate": 6.507542776146603e-05, "loss": 1.3248, "step": 6909 }, { "epoch": 2.097435119137957, "grad_norm": 0.8957740068435669, "learning_rate": 6.507036549559584e-05, "loss": 1.5749, "step": 6910 }, { "epoch": 2.097738655334649, "grad_norm": 0.7184285521507263, "learning_rate": 6.506530322972563e-05, "loss": 1.1747, "step": 6911 }, { "epoch": 2.09804219153134, "grad_norm": 0.7738087177276611, "learning_rate": 6.506024096385543e-05, "loss": 1.5662, "step": 6912 }, { "epoch": 2.0983457277280317, "grad_norm": 0.7775218486785889, "learning_rate": 6.505517869798522e-05, "loss": 1.1571, "step": 6913 }, { "epoch": 2.098649263924723, "grad_norm": 0.6527006030082703, "learning_rate": 6.505011643211502e-05, "loss": 1.0058, "step": 6914 }, { "epoch": 2.0989528001214146, "grad_norm": 0.5389887690544128, "learning_rate": 6.504505416624481e-05, "loss": 1.3973, "step": 6915 }, { "epoch": 2.099256336318106, "grad_norm": 0.8791207671165466, "learning_rate": 6.50399919003746e-05, "loss": 1.1055, "step": 6916 }, { "epoch": 2.0995598725147975, "grad_norm": 0.6769183278083801, "learning_rate": 6.50349296345044e-05, "loss": 1.0076, "step": 6917 }, { "epoch": 2.0998634087114887, "grad_norm": 0.7800582051277161, "learning_rate": 6.50298673686342e-05, "loss": 1.3135, "step": 6918 }, { "epoch": 2.1001669449081803, "grad_norm": 0.6300270557403564, "learning_rate": 6.502480510276399e-05, "loss": 1.354, "step": 6919 }, { "epoch": 2.1004704811048716, "grad_norm": 0.8697200417518616, "learning_rate": 6.50197428368938e-05, "loss": 1.2348, "step": 6920 }, { "epoch": 2.100774017301563, "grad_norm": 0.7431923151016235, "learning_rate": 6.50146805710236e-05, "loss": 1.2332, "step": 6921 }, { "epoch": 2.101077553498255, "grad_norm": 0.7904285192489624, "learning_rate": 6.500961830515339e-05, "loss": 1.0013, "step": 6922 }, { "epoch": 2.101381089694946, "grad_norm": 0.7916845679283142, "learning_rate": 6.500455603928318e-05, "loss": 1.2898, "step": 6923 }, { "epoch": 2.1016846258916377, "grad_norm": 0.785738468170166, "learning_rate": 6.499949377341298e-05, "loss": 1.6627, "step": 6924 }, { "epoch": 2.101988162088329, "grad_norm": 0.8456923365592957, "learning_rate": 6.499443150754277e-05, "loss": 1.3163, "step": 6925 }, { "epoch": 2.1022916982850206, "grad_norm": 0.607767641544342, "learning_rate": 6.498936924167257e-05, "loss": 1.5545, "step": 6926 }, { "epoch": 2.102595234481712, "grad_norm": 0.7314704656600952, "learning_rate": 6.498430697580236e-05, "loss": 1.4258, "step": 6927 }, { "epoch": 2.1028987706784035, "grad_norm": 0.7437986135482788, "learning_rate": 6.497924470993216e-05, "loss": 1.4507, "step": 6928 }, { "epoch": 2.1032023068750947, "grad_norm": 0.6680194139480591, "learning_rate": 6.497418244406197e-05, "loss": 1.0842, "step": 6929 }, { "epoch": 2.1035058430717863, "grad_norm": 0.8030607104301453, "learning_rate": 6.496912017819176e-05, "loss": 0.9657, "step": 6930 }, { "epoch": 2.1038093792684776, "grad_norm": 0.9499925971031189, "learning_rate": 6.496405791232157e-05, "loss": 1.3172, "step": 6931 }, { "epoch": 2.104112915465169, "grad_norm": 0.6064600348472595, "learning_rate": 6.495899564645136e-05, "loss": 1.5706, "step": 6932 }, { "epoch": 2.104416451661861, "grad_norm": 0.7133974432945251, "learning_rate": 6.495393338058116e-05, "loss": 0.6775, "step": 6933 }, { "epoch": 2.104719987858552, "grad_norm": 0.7160429954528809, "learning_rate": 6.494887111471095e-05, "loss": 1.4584, "step": 6934 }, { "epoch": 2.1050235240552437, "grad_norm": 0.6486138701438904, "learning_rate": 6.494380884884075e-05, "loss": 0.9498, "step": 6935 }, { "epoch": 2.105327060251935, "grad_norm": 0.8397301435470581, "learning_rate": 6.493874658297054e-05, "loss": 1.3461, "step": 6936 }, { "epoch": 2.1056305964486266, "grad_norm": 0.7628511786460876, "learning_rate": 6.493368431710034e-05, "loss": 1.556, "step": 6937 }, { "epoch": 2.105934132645318, "grad_norm": 0.7449336647987366, "learning_rate": 6.492862205123013e-05, "loss": 1.3249, "step": 6938 }, { "epoch": 2.1062376688420095, "grad_norm": 0.5359498262405396, "learning_rate": 6.492355978535993e-05, "loss": 0.9697, "step": 6939 }, { "epoch": 2.1065412050387007, "grad_norm": 0.6996994614601135, "learning_rate": 6.491849751948974e-05, "loss": 1.2401, "step": 6940 }, { "epoch": 2.1068447412353923, "grad_norm": 0.9314258694648743, "learning_rate": 6.491343525361953e-05, "loss": 1.42, "step": 6941 }, { "epoch": 2.107148277432084, "grad_norm": 0.8202747702598572, "learning_rate": 6.490837298774932e-05, "loss": 1.1554, "step": 6942 }, { "epoch": 2.107451813628775, "grad_norm": 0.8922327756881714, "learning_rate": 6.490331072187912e-05, "loss": 1.0766, "step": 6943 }, { "epoch": 2.107755349825467, "grad_norm": 0.6795501708984375, "learning_rate": 6.489824845600891e-05, "loss": 1.0089, "step": 6944 }, { "epoch": 2.108058886022158, "grad_norm": 0.5984511971473694, "learning_rate": 6.489318619013871e-05, "loss": 1.297, "step": 6945 }, { "epoch": 2.1083624222188497, "grad_norm": 0.6586083769798279, "learning_rate": 6.48881239242685e-05, "loss": 1.3293, "step": 6946 }, { "epoch": 2.108665958415541, "grad_norm": 0.6119650602340698, "learning_rate": 6.48830616583983e-05, "loss": 0.6706, "step": 6947 }, { "epoch": 2.1089694946122326, "grad_norm": 0.8025054335594177, "learning_rate": 6.48779993925281e-05, "loss": 1.1377, "step": 6948 }, { "epoch": 2.109273030808924, "grad_norm": 0.88395756483078, "learning_rate": 6.48729371266579e-05, "loss": 1.1058, "step": 6949 }, { "epoch": 2.1095765670056155, "grad_norm": 0.7020336389541626, "learning_rate": 6.48678748607877e-05, "loss": 1.1002, "step": 6950 }, { "epoch": 2.1098801032023067, "grad_norm": 0.6096410155296326, "learning_rate": 6.486281259491749e-05, "loss": 1.5273, "step": 6951 }, { "epoch": 2.1101836393989983, "grad_norm": 0.6165260672569275, "learning_rate": 6.485775032904729e-05, "loss": 1.0085, "step": 6952 }, { "epoch": 2.11048717559569, "grad_norm": 0.6977746486663818, "learning_rate": 6.485268806317708e-05, "loss": 1.5208, "step": 6953 }, { "epoch": 2.110790711792381, "grad_norm": 0.7142164707183838, "learning_rate": 6.484762579730688e-05, "loss": 1.4821, "step": 6954 }, { "epoch": 2.111094247989073, "grad_norm": 0.7390041351318359, "learning_rate": 6.484256353143667e-05, "loss": 1.218, "step": 6955 }, { "epoch": 2.111397784185764, "grad_norm": 0.8610092997550964, "learning_rate": 6.483750126556647e-05, "loss": 1.1259, "step": 6956 }, { "epoch": 2.1117013203824557, "grad_norm": 0.807457447052002, "learning_rate": 6.483243899969626e-05, "loss": 1.3557, "step": 6957 }, { "epoch": 2.112004856579147, "grad_norm": 0.7799239754676819, "learning_rate": 6.482737673382606e-05, "loss": 1.1888, "step": 6958 }, { "epoch": 2.1123083927758386, "grad_norm": 0.7552769780158997, "learning_rate": 6.482231446795586e-05, "loss": 1.2615, "step": 6959 }, { "epoch": 2.11261192897253, "grad_norm": 0.8106013536453247, "learning_rate": 6.481725220208566e-05, "loss": 1.3605, "step": 6960 }, { "epoch": 2.1129154651692215, "grad_norm": 0.6781620383262634, "learning_rate": 6.481218993621545e-05, "loss": 1.2664, "step": 6961 }, { "epoch": 2.1132190013659127, "grad_norm": 0.8459703326225281, "learning_rate": 6.480712767034525e-05, "loss": 1.6223, "step": 6962 }, { "epoch": 2.1135225375626043, "grad_norm": 0.6246853470802307, "learning_rate": 6.480206540447504e-05, "loss": 1.4559, "step": 6963 }, { "epoch": 2.113826073759296, "grad_norm": 0.5837286114692688, "learning_rate": 6.479700313860484e-05, "loss": 1.1062, "step": 6964 }, { "epoch": 2.114129609955987, "grad_norm": 0.5617137551307678, "learning_rate": 6.479194087273463e-05, "loss": 1.0567, "step": 6965 }, { "epoch": 2.114433146152679, "grad_norm": 0.7016701102256775, "learning_rate": 6.478687860686443e-05, "loss": 1.4226, "step": 6966 }, { "epoch": 2.11473668234937, "grad_norm": 0.7260595560073853, "learning_rate": 6.478181634099422e-05, "loss": 1.3857, "step": 6967 }, { "epoch": 2.1150402185460617, "grad_norm": 0.5604128837585449, "learning_rate": 6.477675407512403e-05, "loss": 1.6041, "step": 6968 }, { "epoch": 2.115343754742753, "grad_norm": 0.8095130324363708, "learning_rate": 6.477169180925382e-05, "loss": 1.2849, "step": 6969 }, { "epoch": 2.1156472909394446, "grad_norm": 0.7363386750221252, "learning_rate": 6.476662954338362e-05, "loss": 1.3677, "step": 6970 }, { "epoch": 2.115950827136136, "grad_norm": 0.7337886095046997, "learning_rate": 6.476156727751341e-05, "loss": 1.5134, "step": 6971 }, { "epoch": 2.1162543633328275, "grad_norm": 0.7493162155151367, "learning_rate": 6.475650501164321e-05, "loss": 0.8987, "step": 6972 }, { "epoch": 2.1165578995295187, "grad_norm": 0.7532170414924622, "learning_rate": 6.475144274577302e-05, "loss": 1.398, "step": 6973 }, { "epoch": 2.1168614357262103, "grad_norm": 0.7840539813041687, "learning_rate": 6.474638047990281e-05, "loss": 1.5745, "step": 6974 }, { "epoch": 2.117164971922902, "grad_norm": 0.7847781777381897, "learning_rate": 6.474131821403261e-05, "loss": 1.4612, "step": 6975 }, { "epoch": 2.117468508119593, "grad_norm": 0.6995275020599365, "learning_rate": 6.47362559481624e-05, "loss": 1.4264, "step": 6976 }, { "epoch": 2.117772044316285, "grad_norm": 0.9341135621070862, "learning_rate": 6.47311936822922e-05, "loss": 1.3877, "step": 6977 }, { "epoch": 2.118075580512976, "grad_norm": 0.7916807532310486, "learning_rate": 6.472613141642199e-05, "loss": 1.0889, "step": 6978 }, { "epoch": 2.1183791167096677, "grad_norm": 0.789832592010498, "learning_rate": 6.47210691505518e-05, "loss": 1.0189, "step": 6979 }, { "epoch": 2.118682652906359, "grad_norm": 1.2565356492996216, "learning_rate": 6.47160068846816e-05, "loss": 1.1957, "step": 6980 }, { "epoch": 2.1189861891030506, "grad_norm": 0.5494474768638611, "learning_rate": 6.471094461881139e-05, "loss": 0.9361, "step": 6981 }, { "epoch": 2.119289725299742, "grad_norm": 0.6757771968841553, "learning_rate": 6.470588235294118e-05, "loss": 1.3835, "step": 6982 }, { "epoch": 2.1195932614964335, "grad_norm": 0.875653088092804, "learning_rate": 6.470082008707098e-05, "loss": 1.3736, "step": 6983 }, { "epoch": 2.119896797693125, "grad_norm": 0.8260795474052429, "learning_rate": 6.469575782120077e-05, "loss": 0.9047, "step": 6984 }, { "epoch": 2.1202003338898163, "grad_norm": 0.6649807095527649, "learning_rate": 6.469069555533057e-05, "loss": 0.8367, "step": 6985 }, { "epoch": 2.120503870086508, "grad_norm": 0.7601671814918518, "learning_rate": 6.468563328946036e-05, "loss": 1.4975, "step": 6986 }, { "epoch": 2.120807406283199, "grad_norm": 0.7919801473617554, "learning_rate": 6.468057102359016e-05, "loss": 1.2241, "step": 6987 }, { "epoch": 2.121110942479891, "grad_norm": 0.8133627772331238, "learning_rate": 6.467550875771997e-05, "loss": 1.3707, "step": 6988 }, { "epoch": 2.121414478676582, "grad_norm": 0.7252787947654724, "learning_rate": 6.467044649184976e-05, "loss": 0.9922, "step": 6989 }, { "epoch": 2.1217180148732737, "grad_norm": 0.7415847182273865, "learning_rate": 6.466538422597956e-05, "loss": 0.3215, "step": 6990 }, { "epoch": 2.122021551069965, "grad_norm": 0.5860079526901245, "learning_rate": 6.466032196010935e-05, "loss": 1.6003, "step": 6991 }, { "epoch": 2.1223250872666566, "grad_norm": 0.8746591806411743, "learning_rate": 6.465525969423915e-05, "loss": 1.1195, "step": 6992 }, { "epoch": 2.122628623463348, "grad_norm": 0.7454144954681396, "learning_rate": 6.465019742836894e-05, "loss": 1.0494, "step": 6993 }, { "epoch": 2.1229321596600395, "grad_norm": 0.7970776557922363, "learning_rate": 6.464513516249874e-05, "loss": 1.5679, "step": 6994 }, { "epoch": 2.123235695856731, "grad_norm": 0.9127287864685059, "learning_rate": 6.464007289662853e-05, "loss": 1.1722, "step": 6995 }, { "epoch": 2.1235392320534223, "grad_norm": 0.5542991161346436, "learning_rate": 6.463501063075833e-05, "loss": 1.7838, "step": 6996 }, { "epoch": 2.123842768250114, "grad_norm": 0.8293723464012146, "learning_rate": 6.462994836488812e-05, "loss": 1.3326, "step": 6997 }, { "epoch": 2.124146304446805, "grad_norm": 0.8169488310813904, "learning_rate": 6.462488609901793e-05, "loss": 1.0697, "step": 6998 }, { "epoch": 2.124449840643497, "grad_norm": 0.6636808514595032, "learning_rate": 6.461982383314772e-05, "loss": 1.2945, "step": 6999 }, { "epoch": 2.124753376840188, "grad_norm": 0.982451856136322, "learning_rate": 6.461476156727752e-05, "loss": 1.0061, "step": 7000 }, { "epoch": 2.1250569130368797, "grad_norm": 0.8102851510047913, "learning_rate": 6.460969930140731e-05, "loss": 1.1035, "step": 7001 }, { "epoch": 2.125360449233571, "grad_norm": 0.915728747844696, "learning_rate": 6.460463703553711e-05, "loss": 1.1196, "step": 7002 }, { "epoch": 2.1256639854302626, "grad_norm": 0.8686944842338562, "learning_rate": 6.45995747696669e-05, "loss": 1.092, "step": 7003 }, { "epoch": 2.1259675216269542, "grad_norm": 0.7031385898590088, "learning_rate": 6.45945125037967e-05, "loss": 1.202, "step": 7004 }, { "epoch": 2.1262710578236454, "grad_norm": 0.7822988629341125, "learning_rate": 6.458945023792649e-05, "loss": 1.1374, "step": 7005 }, { "epoch": 2.126574594020337, "grad_norm": 0.6261591911315918, "learning_rate": 6.458438797205629e-05, "loss": 1.091, "step": 7006 }, { "epoch": 2.1268781302170283, "grad_norm": 0.7939580082893372, "learning_rate": 6.45793257061861e-05, "loss": 0.9854, "step": 7007 }, { "epoch": 2.12718166641372, "grad_norm": 0.6935524940490723, "learning_rate": 6.457426344031589e-05, "loss": 1.5513, "step": 7008 }, { "epoch": 2.127485202610411, "grad_norm": 0.6789997816085815, "learning_rate": 6.456920117444568e-05, "loss": 1.5058, "step": 7009 }, { "epoch": 2.127788738807103, "grad_norm": 0.60723876953125, "learning_rate": 6.456413890857548e-05, "loss": 1.2242, "step": 7010 }, { "epoch": 2.128092275003794, "grad_norm": 0.927089512348175, "learning_rate": 6.455907664270527e-05, "loss": 1.2623, "step": 7011 }, { "epoch": 2.1283958112004857, "grad_norm": 0.6675049066543579, "learning_rate": 6.455401437683507e-05, "loss": 1.6692, "step": 7012 }, { "epoch": 2.128699347397177, "grad_norm": 0.6772061586380005, "learning_rate": 6.454895211096486e-05, "loss": 1.4969, "step": 7013 }, { "epoch": 2.1290028835938686, "grad_norm": 0.820059597492218, "learning_rate": 6.454388984509466e-05, "loss": 1.3942, "step": 7014 }, { "epoch": 2.1293064197905602, "grad_norm": 0.6421627998352051, "learning_rate": 6.453882757922445e-05, "loss": 1.245, "step": 7015 }, { "epoch": 2.1296099559872514, "grad_norm": 0.813875675201416, "learning_rate": 6.453376531335426e-05, "loss": 1.1182, "step": 7016 }, { "epoch": 2.129913492183943, "grad_norm": 0.7777447700500488, "learning_rate": 6.452870304748406e-05, "loss": 1.2357, "step": 7017 }, { "epoch": 2.1302170283806343, "grad_norm": 0.7813501358032227, "learning_rate": 6.452364078161386e-05, "loss": 1.3377, "step": 7018 }, { "epoch": 2.130520564577326, "grad_norm": 0.6702557802200317, "learning_rate": 6.451857851574366e-05, "loss": 0.772, "step": 7019 }, { "epoch": 2.130824100774017, "grad_norm": 0.5846662521362305, "learning_rate": 6.451351624987345e-05, "loss": 1.1361, "step": 7020 }, { "epoch": 2.131127636970709, "grad_norm": 0.677483856678009, "learning_rate": 6.450845398400325e-05, "loss": 1.6367, "step": 7021 }, { "epoch": 2.1314311731674, "grad_norm": 0.7289956212043762, "learning_rate": 6.450339171813304e-05, "loss": 1.5031, "step": 7022 }, { "epoch": 2.1317347093640917, "grad_norm": 0.7819235324859619, "learning_rate": 6.449832945226284e-05, "loss": 1.1371, "step": 7023 }, { "epoch": 2.132038245560783, "grad_norm": 0.6361702680587769, "learning_rate": 6.449326718639263e-05, "loss": 0.8756, "step": 7024 }, { "epoch": 2.1323417817574746, "grad_norm": 0.6113557815551758, "learning_rate": 6.448820492052243e-05, "loss": 1.0094, "step": 7025 }, { "epoch": 2.1326453179541662, "grad_norm": 0.7358946800231934, "learning_rate": 6.448314265465222e-05, "loss": 1.2521, "step": 7026 }, { "epoch": 2.1329488541508574, "grad_norm": 0.8237258195877075, "learning_rate": 6.447808038878203e-05, "loss": 1.4365, "step": 7027 }, { "epoch": 2.133252390347549, "grad_norm": 0.7153835892677307, "learning_rate": 6.447301812291183e-05, "loss": 1.1588, "step": 7028 }, { "epoch": 2.1335559265442403, "grad_norm": 0.793786346912384, "learning_rate": 6.446795585704162e-05, "loss": 1.2753, "step": 7029 }, { "epoch": 2.133859462740932, "grad_norm": 0.7200156450271606, "learning_rate": 6.446289359117142e-05, "loss": 0.7547, "step": 7030 }, { "epoch": 2.134162998937623, "grad_norm": 0.6647316813468933, "learning_rate": 6.445783132530121e-05, "loss": 1.3108, "step": 7031 }, { "epoch": 2.134466535134315, "grad_norm": 0.7873644828796387, "learning_rate": 6.4452769059431e-05, "loss": 1.4656, "step": 7032 }, { "epoch": 2.134770071331006, "grad_norm": 0.6672101616859436, "learning_rate": 6.44477067935608e-05, "loss": 1.5885, "step": 7033 }, { "epoch": 2.1350736075276977, "grad_norm": 0.9114254713058472, "learning_rate": 6.44426445276906e-05, "loss": 0.8359, "step": 7034 }, { "epoch": 2.135377143724389, "grad_norm": 0.6775479316711426, "learning_rate": 6.443758226182039e-05, "loss": 1.4429, "step": 7035 }, { "epoch": 2.1356806799210806, "grad_norm": 0.7736831307411194, "learning_rate": 6.443251999595018e-05, "loss": 1.0994, "step": 7036 }, { "epoch": 2.1359842161177722, "grad_norm": 0.6583807468414307, "learning_rate": 6.442745773007999e-05, "loss": 1.0765, "step": 7037 }, { "epoch": 2.1362877523144634, "grad_norm": 0.6119073629379272, "learning_rate": 6.442239546420979e-05, "loss": 1.475, "step": 7038 }, { "epoch": 2.136591288511155, "grad_norm": 0.8550370931625366, "learning_rate": 6.441733319833958e-05, "loss": 1.1355, "step": 7039 }, { "epoch": 2.1368948247078463, "grad_norm": 0.7692386507987976, "learning_rate": 6.441227093246938e-05, "loss": 1.1812, "step": 7040 }, { "epoch": 2.137198360904538, "grad_norm": 0.7219559550285339, "learning_rate": 6.440720866659917e-05, "loss": 1.4073, "step": 7041 }, { "epoch": 2.137501897101229, "grad_norm": 0.6751419901847839, "learning_rate": 6.440214640072897e-05, "loss": 0.9089, "step": 7042 }, { "epoch": 2.137805433297921, "grad_norm": 0.7274761199951172, "learning_rate": 6.439708413485876e-05, "loss": 1.4312, "step": 7043 }, { "epoch": 2.138108969494612, "grad_norm": 0.8742554187774658, "learning_rate": 6.439202186898856e-05, "loss": 1.098, "step": 7044 }, { "epoch": 2.1384125056913037, "grad_norm": 0.6010596752166748, "learning_rate": 6.438695960311835e-05, "loss": 1.1069, "step": 7045 }, { "epoch": 2.138716041887995, "grad_norm": 0.7470264434814453, "learning_rate": 6.438189733724816e-05, "loss": 1.0936, "step": 7046 }, { "epoch": 2.1390195780846866, "grad_norm": 0.620542049407959, "learning_rate": 6.437683507137795e-05, "loss": 1.3027, "step": 7047 }, { "epoch": 2.1393231142813782, "grad_norm": 0.7897995710372925, "learning_rate": 6.437177280550775e-05, "loss": 1.3731, "step": 7048 }, { "epoch": 2.1396266504780694, "grad_norm": 0.6937667727470398, "learning_rate": 6.436671053963754e-05, "loss": 0.8121, "step": 7049 }, { "epoch": 2.139930186674761, "grad_norm": 0.7295348644256592, "learning_rate": 6.436164827376734e-05, "loss": 1.0723, "step": 7050 }, { "epoch": 2.1402337228714523, "grad_norm": 0.7741212248802185, "learning_rate": 6.435658600789713e-05, "loss": 1.233, "step": 7051 }, { "epoch": 2.140537259068144, "grad_norm": 0.7276745438575745, "learning_rate": 6.435152374202693e-05, "loss": 1.6296, "step": 7052 }, { "epoch": 2.140840795264835, "grad_norm": 0.7447855472564697, "learning_rate": 6.434646147615672e-05, "loss": 1.0548, "step": 7053 }, { "epoch": 2.141144331461527, "grad_norm": 0.6452568173408508, "learning_rate": 6.434139921028652e-05, "loss": 1.0692, "step": 7054 }, { "epoch": 2.141447867658218, "grad_norm": 0.851493775844574, "learning_rate": 6.433633694441633e-05, "loss": 1.3675, "step": 7055 }, { "epoch": 2.1417514038549097, "grad_norm": 0.6594175696372986, "learning_rate": 6.433127467854612e-05, "loss": 1.2718, "step": 7056 }, { "epoch": 2.1420549400516014, "grad_norm": 0.9136268496513367, "learning_rate": 6.432621241267592e-05, "loss": 0.8236, "step": 7057 }, { "epoch": 2.1423584762482926, "grad_norm": 0.8292484879493713, "learning_rate": 6.432115014680571e-05, "loss": 1.2484, "step": 7058 }, { "epoch": 2.142662012444984, "grad_norm": 0.7154607772827148, "learning_rate": 6.43160878809355e-05, "loss": 1.5388, "step": 7059 }, { "epoch": 2.1429655486416754, "grad_norm": 0.7923582196235657, "learning_rate": 6.43110256150653e-05, "loss": 1.1922, "step": 7060 }, { "epoch": 2.143269084838367, "grad_norm": 0.779097855091095, "learning_rate": 6.43059633491951e-05, "loss": 1.3437, "step": 7061 }, { "epoch": 2.1435726210350583, "grad_norm": 0.5886580348014832, "learning_rate": 6.43009010833249e-05, "loss": 1.1589, "step": 7062 }, { "epoch": 2.14387615723175, "grad_norm": 0.7474644184112549, "learning_rate": 6.42958388174547e-05, "loss": 1.4397, "step": 7063 }, { "epoch": 2.144179693428441, "grad_norm": 0.6950685977935791, "learning_rate": 6.429077655158449e-05, "loss": 1.4432, "step": 7064 }, { "epoch": 2.144483229625133, "grad_norm": 0.9418857097625732, "learning_rate": 6.428571428571429e-05, "loss": 1.1887, "step": 7065 }, { "epoch": 2.1447867658218245, "grad_norm": 0.9572475552558899, "learning_rate": 6.42806520198441e-05, "loss": 1.453, "step": 7066 }, { "epoch": 2.1450903020185157, "grad_norm": 0.799579918384552, "learning_rate": 6.427558975397389e-05, "loss": 1.3219, "step": 7067 }, { "epoch": 2.1453938382152073, "grad_norm": 0.7733772993087769, "learning_rate": 6.427052748810369e-05, "loss": 1.4549, "step": 7068 }, { "epoch": 2.1456973744118986, "grad_norm": 0.7334538698196411, "learning_rate": 6.426546522223348e-05, "loss": 1.4377, "step": 7069 }, { "epoch": 2.14600091060859, "grad_norm": 0.6159878373146057, "learning_rate": 6.426040295636328e-05, "loss": 1.6952, "step": 7070 }, { "epoch": 2.1463044468052814, "grad_norm": 0.8680784702301025, "learning_rate": 6.425534069049307e-05, "loss": 1.2469, "step": 7071 }, { "epoch": 2.146607983001973, "grad_norm": 1.383750081062317, "learning_rate": 6.425027842462286e-05, "loss": 0.6063, "step": 7072 }, { "epoch": 2.1469115191986643, "grad_norm": 0.6771996021270752, "learning_rate": 6.424521615875266e-05, "loss": 1.3287, "step": 7073 }, { "epoch": 2.147215055395356, "grad_norm": 0.5610653162002563, "learning_rate": 6.424015389288245e-05, "loss": 1.5291, "step": 7074 }, { "epoch": 2.147518591592047, "grad_norm": 0.7199169993400574, "learning_rate": 6.423509162701225e-05, "loss": 1.3695, "step": 7075 }, { "epoch": 2.147822127788739, "grad_norm": 0.7059941291809082, "learning_rate": 6.423002936114206e-05, "loss": 0.8093, "step": 7076 }, { "epoch": 2.1481256639854305, "grad_norm": 0.6837321519851685, "learning_rate": 6.422496709527185e-05, "loss": 1.3739, "step": 7077 }, { "epoch": 2.1484292001821217, "grad_norm": 0.5235718488693237, "learning_rate": 6.421990482940165e-05, "loss": 1.292, "step": 7078 }, { "epoch": 2.1487327363788133, "grad_norm": 0.6904390454292297, "learning_rate": 6.421484256353144e-05, "loss": 1.3419, "step": 7079 }, { "epoch": 2.1490362725755046, "grad_norm": 0.6799563765525818, "learning_rate": 6.420978029766124e-05, "loss": 1.1699, "step": 7080 }, { "epoch": 2.149339808772196, "grad_norm": 1.023763656616211, "learning_rate": 6.420471803179103e-05, "loss": 1.1691, "step": 7081 }, { "epoch": 2.1496433449688874, "grad_norm": 0.6300176978111267, "learning_rate": 6.419965576592083e-05, "loss": 1.5441, "step": 7082 }, { "epoch": 2.149946881165579, "grad_norm": 0.6105098128318787, "learning_rate": 6.419459350005062e-05, "loss": 1.0628, "step": 7083 }, { "epoch": 2.1502504173622703, "grad_norm": 0.5488935112953186, "learning_rate": 6.418953123418042e-05, "loss": 1.3458, "step": 7084 }, { "epoch": 2.150553953558962, "grad_norm": 0.6905235648155212, "learning_rate": 6.418446896831022e-05, "loss": 1.6791, "step": 7085 }, { "epoch": 2.150857489755653, "grad_norm": 0.7746590375900269, "learning_rate": 6.417940670244002e-05, "loss": 1.2937, "step": 7086 }, { "epoch": 2.151161025952345, "grad_norm": 0.6267426013946533, "learning_rate": 6.417434443656981e-05, "loss": 1.2033, "step": 7087 }, { "epoch": 2.1514645621490365, "grad_norm": 0.7372788786888123, "learning_rate": 6.416928217069961e-05, "loss": 1.2304, "step": 7088 }, { "epoch": 2.1517680983457277, "grad_norm": 0.6180323362350464, "learning_rate": 6.41642199048294e-05, "loss": 1.5392, "step": 7089 }, { "epoch": 2.1520716345424193, "grad_norm": 0.6933856010437012, "learning_rate": 6.41591576389592e-05, "loss": 1.314, "step": 7090 }, { "epoch": 2.1523751707391106, "grad_norm": 0.8059110641479492, "learning_rate": 6.415409537308899e-05, "loss": 1.4774, "step": 7091 }, { "epoch": 2.152678706935802, "grad_norm": 0.7373825907707214, "learning_rate": 6.414903310721879e-05, "loss": 1.0461, "step": 7092 }, { "epoch": 2.1529822431324934, "grad_norm": 0.8026410341262817, "learning_rate": 6.414397084134858e-05, "loss": 0.846, "step": 7093 }, { "epoch": 2.153285779329185, "grad_norm": 0.6906172037124634, "learning_rate": 6.413890857547839e-05, "loss": 1.3885, "step": 7094 }, { "epoch": 2.1535893155258763, "grad_norm": 1.1794425249099731, "learning_rate": 6.413384630960819e-05, "loss": 1.5075, "step": 7095 }, { "epoch": 2.153892851722568, "grad_norm": 0.6099388003349304, "learning_rate": 6.412878404373798e-05, "loss": 1.1185, "step": 7096 }, { "epoch": 2.154196387919259, "grad_norm": 0.7119961977005005, "learning_rate": 6.412372177786778e-05, "loss": 1.12, "step": 7097 }, { "epoch": 2.154499924115951, "grad_norm": 0.7217562198638916, "learning_rate": 6.411865951199757e-05, "loss": 1.6259, "step": 7098 }, { "epoch": 2.1548034603126425, "grad_norm": 0.7318760752677917, "learning_rate": 6.411359724612737e-05, "loss": 1.3047, "step": 7099 }, { "epoch": 2.1551069965093337, "grad_norm": 0.7855448126792908, "learning_rate": 6.410853498025716e-05, "loss": 1.0818, "step": 7100 }, { "epoch": 2.1554105327060253, "grad_norm": 0.7480208873748779, "learning_rate": 6.410347271438695e-05, "loss": 1.447, "step": 7101 }, { "epoch": 2.1557140689027166, "grad_norm": 0.5373287796974182, "learning_rate": 6.409841044851675e-05, "loss": 1.7865, "step": 7102 }, { "epoch": 2.156017605099408, "grad_norm": 0.9216307401657104, "learning_rate": 6.409334818264654e-05, "loss": 1.2057, "step": 7103 }, { "epoch": 2.1563211412960994, "grad_norm": 0.7226337194442749, "learning_rate": 6.408828591677635e-05, "loss": 1.2489, "step": 7104 }, { "epoch": 2.156624677492791, "grad_norm": 0.7996892929077148, "learning_rate": 6.408322365090615e-05, "loss": 1.4067, "step": 7105 }, { "epoch": 2.1569282136894823, "grad_norm": 0.8255277872085571, "learning_rate": 6.407816138503594e-05, "loss": 1.0818, "step": 7106 }, { "epoch": 2.157231749886174, "grad_norm": 0.7836741209030151, "learning_rate": 6.407309911916575e-05, "loss": 1.3656, "step": 7107 }, { "epoch": 2.157535286082865, "grad_norm": 0.6470088362693787, "learning_rate": 6.406803685329555e-05, "loss": 1.0784, "step": 7108 }, { "epoch": 2.157838822279557, "grad_norm": 0.6564112305641174, "learning_rate": 6.406297458742534e-05, "loss": 0.5021, "step": 7109 }, { "epoch": 2.1581423584762485, "grad_norm": 0.6398088932037354, "learning_rate": 6.405791232155513e-05, "loss": 0.8162, "step": 7110 }, { "epoch": 2.1584458946729397, "grad_norm": 0.8116580843925476, "learning_rate": 6.405285005568493e-05, "loss": 1.1574, "step": 7111 }, { "epoch": 2.1587494308696313, "grad_norm": 1.0613367557525635, "learning_rate": 6.404778778981472e-05, "loss": 0.9812, "step": 7112 }, { "epoch": 2.1590529670663225, "grad_norm": 0.7346512675285339, "learning_rate": 6.404272552394452e-05, "loss": 1.0567, "step": 7113 }, { "epoch": 2.159356503263014, "grad_norm": 0.8014817237854004, "learning_rate": 6.403766325807431e-05, "loss": 1.4299, "step": 7114 }, { "epoch": 2.1596600394597054, "grad_norm": 0.7765456438064575, "learning_rate": 6.403260099220412e-05, "loss": 1.4795, "step": 7115 }, { "epoch": 2.159963575656397, "grad_norm": 0.6675356030464172, "learning_rate": 6.402753872633392e-05, "loss": 1.5016, "step": 7116 }, { "epoch": 2.1602671118530883, "grad_norm": 0.8694089651107788, "learning_rate": 6.402247646046371e-05, "loss": 1.3718, "step": 7117 }, { "epoch": 2.16057064804978, "grad_norm": 0.708865761756897, "learning_rate": 6.401741419459351e-05, "loss": 1.4019, "step": 7118 }, { "epoch": 2.1608741842464716, "grad_norm": 0.624944806098938, "learning_rate": 6.40123519287233e-05, "loss": 1.346, "step": 7119 }, { "epoch": 2.161177720443163, "grad_norm": 0.9114837050437927, "learning_rate": 6.40072896628531e-05, "loss": 1.2589, "step": 7120 }, { "epoch": 2.1614812566398545, "grad_norm": 0.8413217067718506, "learning_rate": 6.400222739698289e-05, "loss": 1.3506, "step": 7121 }, { "epoch": 2.1617847928365457, "grad_norm": 0.8894316554069519, "learning_rate": 6.399716513111269e-05, "loss": 1.5452, "step": 7122 }, { "epoch": 2.1620883290332373, "grad_norm": 0.6457502245903015, "learning_rate": 6.399210286524248e-05, "loss": 0.5962, "step": 7123 }, { "epoch": 2.1623918652299285, "grad_norm": 0.7590650320053101, "learning_rate": 6.398704059937229e-05, "loss": 1.6314, "step": 7124 }, { "epoch": 2.16269540142662, "grad_norm": 0.6571962237358093, "learning_rate": 6.398197833350208e-05, "loss": 1.7331, "step": 7125 }, { "epoch": 2.1629989376233114, "grad_norm": 0.8318181037902832, "learning_rate": 6.397691606763188e-05, "loss": 1.3389, "step": 7126 }, { "epoch": 2.163302473820003, "grad_norm": 0.696306049823761, "learning_rate": 6.397185380176167e-05, "loss": 1.5082, "step": 7127 }, { "epoch": 2.1636060100166947, "grad_norm": 0.7968202233314514, "learning_rate": 6.396679153589147e-05, "loss": 1.1368, "step": 7128 }, { "epoch": 2.163909546213386, "grad_norm": 0.6538952589035034, "learning_rate": 6.396172927002126e-05, "loss": 1.2814, "step": 7129 }, { "epoch": 2.1642130824100776, "grad_norm": 0.6986178159713745, "learning_rate": 6.395666700415106e-05, "loss": 1.3288, "step": 7130 }, { "epoch": 2.164516618606769, "grad_norm": 0.7502809762954712, "learning_rate": 6.395160473828085e-05, "loss": 1.2749, "step": 7131 }, { "epoch": 2.1648201548034605, "grad_norm": 0.7333081960678101, "learning_rate": 6.394654247241065e-05, "loss": 1.6966, "step": 7132 }, { "epoch": 2.1651236910001517, "grad_norm": 0.7244516015052795, "learning_rate": 6.394148020654046e-05, "loss": 1.0677, "step": 7133 }, { "epoch": 2.1654272271968433, "grad_norm": 0.613068163394928, "learning_rate": 6.393641794067025e-05, "loss": 1.2863, "step": 7134 }, { "epoch": 2.1657307633935345, "grad_norm": 0.8424353003501892, "learning_rate": 6.393135567480005e-05, "loss": 1.0378, "step": 7135 }, { "epoch": 2.166034299590226, "grad_norm": 1.0322976112365723, "learning_rate": 6.392629340892984e-05, "loss": 1.4644, "step": 7136 }, { "epoch": 2.1663378357869174, "grad_norm": 0.6831735968589783, "learning_rate": 6.392123114305964e-05, "loss": 1.3927, "step": 7137 }, { "epoch": 2.166641371983609, "grad_norm": 0.7594115138053894, "learning_rate": 6.391616887718943e-05, "loss": 1.3165, "step": 7138 }, { "epoch": 2.1669449081803007, "grad_norm": 0.6064585447311401, "learning_rate": 6.391110661131922e-05, "loss": 0.8763, "step": 7139 }, { "epoch": 2.167248444376992, "grad_norm": 0.840460479259491, "learning_rate": 6.390604434544902e-05, "loss": 1.5395, "step": 7140 }, { "epoch": 2.1675519805736836, "grad_norm": 0.9302356839179993, "learning_rate": 6.390098207957881e-05, "loss": 1.2421, "step": 7141 }, { "epoch": 2.167855516770375, "grad_norm": 0.7110074162483215, "learning_rate": 6.389591981370861e-05, "loss": 0.7126, "step": 7142 }, { "epoch": 2.1681590529670665, "grad_norm": 0.9885009527206421, "learning_rate": 6.389085754783842e-05, "loss": 0.9977, "step": 7143 }, { "epoch": 2.1684625891637577, "grad_norm": 0.8145515322685242, "learning_rate": 6.388579528196821e-05, "loss": 1.1864, "step": 7144 }, { "epoch": 2.1687661253604493, "grad_norm": 0.6856915354728699, "learning_rate": 6.388073301609801e-05, "loss": 1.3053, "step": 7145 }, { "epoch": 2.1690696615571405, "grad_norm": 0.6998456120491028, "learning_rate": 6.38756707502278e-05, "loss": 1.798, "step": 7146 }, { "epoch": 2.169373197753832, "grad_norm": 0.7821376919746399, "learning_rate": 6.38706084843576e-05, "loss": 1.4081, "step": 7147 }, { "epoch": 2.1696767339505234, "grad_norm": 0.7204583883285522, "learning_rate": 6.386554621848739e-05, "loss": 1.3859, "step": 7148 }, { "epoch": 2.169980270147215, "grad_norm": 0.8906077146530151, "learning_rate": 6.386048395261719e-05, "loss": 0.8652, "step": 7149 }, { "epoch": 2.1702838063439067, "grad_norm": 0.7288920879364014, "learning_rate": 6.385542168674698e-05, "loss": 1.4968, "step": 7150 }, { "epoch": 2.170587342540598, "grad_norm": 0.6715928912162781, "learning_rate": 6.385035942087679e-05, "loss": 1.6004, "step": 7151 }, { "epoch": 2.1708908787372896, "grad_norm": 0.7054345607757568, "learning_rate": 6.384529715500658e-05, "loss": 1.3363, "step": 7152 }, { "epoch": 2.171194414933981, "grad_norm": 0.8027999401092529, "learning_rate": 6.384023488913638e-05, "loss": 1.0505, "step": 7153 }, { "epoch": 2.1714979511306725, "grad_norm": 0.649910569190979, "learning_rate": 6.383517262326619e-05, "loss": 0.8694, "step": 7154 }, { "epoch": 2.1718014873273637, "grad_norm": 0.636173665523529, "learning_rate": 6.383011035739598e-05, "loss": 1.3469, "step": 7155 }, { "epoch": 2.1721050235240553, "grad_norm": 0.8084169030189514, "learning_rate": 6.382504809152578e-05, "loss": 1.3881, "step": 7156 }, { "epoch": 2.1724085597207465, "grad_norm": 0.6515277028083801, "learning_rate": 6.381998582565557e-05, "loss": 1.602, "step": 7157 }, { "epoch": 2.172712095917438, "grad_norm": 0.7483789324760437, "learning_rate": 6.381492355978537e-05, "loss": 1.5937, "step": 7158 }, { "epoch": 2.1730156321141294, "grad_norm": 0.8204941749572754, "learning_rate": 6.380986129391516e-05, "loss": 1.3963, "step": 7159 }, { "epoch": 2.173319168310821, "grad_norm": 0.7946287393569946, "learning_rate": 6.380479902804496e-05, "loss": 0.7602, "step": 7160 }, { "epoch": 2.1736227045075127, "grad_norm": 0.6441590189933777, "learning_rate": 6.379973676217475e-05, "loss": 1.4581, "step": 7161 }, { "epoch": 2.173926240704204, "grad_norm": 0.7281964421272278, "learning_rate": 6.379467449630455e-05, "loss": 1.3487, "step": 7162 }, { "epoch": 2.1742297769008956, "grad_norm": 0.6523686051368713, "learning_rate": 6.378961223043435e-05, "loss": 1.8093, "step": 7163 }, { "epoch": 2.174533313097587, "grad_norm": 0.8183250427246094, "learning_rate": 6.378454996456415e-05, "loss": 1.395, "step": 7164 }, { "epoch": 2.1748368492942785, "grad_norm": 0.7952205538749695, "learning_rate": 6.377948769869394e-05, "loss": 1.3464, "step": 7165 }, { "epoch": 2.1751403854909697, "grad_norm": 0.6068337559700012, "learning_rate": 6.377442543282374e-05, "loss": 1.1278, "step": 7166 }, { "epoch": 2.1754439216876613, "grad_norm": 0.8475143909454346, "learning_rate": 6.376936316695353e-05, "loss": 1.3051, "step": 7167 }, { "epoch": 2.1757474578843525, "grad_norm": 0.6326785683631897, "learning_rate": 6.376430090108333e-05, "loss": 1.51, "step": 7168 }, { "epoch": 2.176050994081044, "grad_norm": 0.6794092655181885, "learning_rate": 6.375923863521312e-05, "loss": 1.4881, "step": 7169 }, { "epoch": 2.1763545302777354, "grad_norm": 0.8222758173942566, "learning_rate": 6.375417636934292e-05, "loss": 1.4618, "step": 7170 }, { "epoch": 2.176658066474427, "grad_norm": 0.7291663885116577, "learning_rate": 6.374911410347271e-05, "loss": 0.7921, "step": 7171 }, { "epoch": 2.1769616026711187, "grad_norm": 0.6901921033859253, "learning_rate": 6.374405183760252e-05, "loss": 1.141, "step": 7172 }, { "epoch": 2.17726513886781, "grad_norm": 0.6537145376205444, "learning_rate": 6.373898957173232e-05, "loss": 1.2864, "step": 7173 }, { "epoch": 2.1775686750645016, "grad_norm": 0.6633203029632568, "learning_rate": 6.373392730586211e-05, "loss": 1.5173, "step": 7174 }, { "epoch": 2.177872211261193, "grad_norm": 0.8034242987632751, "learning_rate": 6.37288650399919e-05, "loss": 0.8577, "step": 7175 }, { "epoch": 2.1781757474578844, "grad_norm": 0.6730956435203552, "learning_rate": 6.37238027741217e-05, "loss": 1.5462, "step": 7176 }, { "epoch": 2.1784792836545757, "grad_norm": 0.8179565668106079, "learning_rate": 6.37187405082515e-05, "loss": 0.7864, "step": 7177 }, { "epoch": 2.1787828198512673, "grad_norm": 0.8936238884925842, "learning_rate": 6.371367824238129e-05, "loss": 1.4166, "step": 7178 }, { "epoch": 2.1790863560479585, "grad_norm": 0.9726060032844543, "learning_rate": 6.370861597651108e-05, "loss": 0.9226, "step": 7179 }, { "epoch": 2.17938989224465, "grad_norm": 0.786594569683075, "learning_rate": 6.370355371064088e-05, "loss": 1.3891, "step": 7180 }, { "epoch": 2.1796934284413414, "grad_norm": 0.6364801526069641, "learning_rate": 6.369849144477067e-05, "loss": 0.9755, "step": 7181 }, { "epoch": 2.179996964638033, "grad_norm": 1.0209544897079468, "learning_rate": 6.369342917890048e-05, "loss": 1.2559, "step": 7182 }, { "epoch": 2.1803005008347247, "grad_norm": 0.5325672626495361, "learning_rate": 6.368836691303028e-05, "loss": 0.9392, "step": 7183 }, { "epoch": 2.180604037031416, "grad_norm": 0.750744104385376, "learning_rate": 6.368330464716007e-05, "loss": 1.1019, "step": 7184 }, { "epoch": 2.1809075732281076, "grad_norm": 0.6024854183197021, "learning_rate": 6.367824238128987e-05, "loss": 1.0678, "step": 7185 }, { "epoch": 2.181211109424799, "grad_norm": 0.6822061538696289, "learning_rate": 6.367318011541966e-05, "loss": 1.1547, "step": 7186 }, { "epoch": 2.1815146456214904, "grad_norm": 0.6472896933555603, "learning_rate": 6.366811784954946e-05, "loss": 1.356, "step": 7187 }, { "epoch": 2.1818181818181817, "grad_norm": 0.6070579290390015, "learning_rate": 6.366305558367925e-05, "loss": 1.3724, "step": 7188 }, { "epoch": 2.1821217180148733, "grad_norm": 0.7931656837463379, "learning_rate": 6.365799331780905e-05, "loss": 1.3362, "step": 7189 }, { "epoch": 2.1824252542115645, "grad_norm": 0.6661885976791382, "learning_rate": 6.365293105193884e-05, "loss": 0.8277, "step": 7190 }, { "epoch": 2.182728790408256, "grad_norm": 0.7171643376350403, "learning_rate": 6.364786878606865e-05, "loss": 1.148, "step": 7191 }, { "epoch": 2.183032326604948, "grad_norm": 0.6635745763778687, "learning_rate": 6.364280652019844e-05, "loss": 1.257, "step": 7192 }, { "epoch": 2.183335862801639, "grad_norm": 0.7740877270698547, "learning_rate": 6.363774425432824e-05, "loss": 1.229, "step": 7193 }, { "epoch": 2.1836393989983307, "grad_norm": 0.8438175916671753, "learning_rate": 6.363268198845803e-05, "loss": 1.3026, "step": 7194 }, { "epoch": 2.183942935195022, "grad_norm": 0.7249647378921509, "learning_rate": 6.362761972258783e-05, "loss": 1.4047, "step": 7195 }, { "epoch": 2.1842464713917136, "grad_norm": 0.7198625206947327, "learning_rate": 6.362255745671764e-05, "loss": 1.6817, "step": 7196 }, { "epoch": 2.184550007588405, "grad_norm": 0.7288183569908142, "learning_rate": 6.361749519084743e-05, "loss": 1.1093, "step": 7197 }, { "epoch": 2.1848535437850964, "grad_norm": 1.0255497694015503, "learning_rate": 6.361243292497723e-05, "loss": 0.8903, "step": 7198 }, { "epoch": 2.1851570799817877, "grad_norm": 0.6866830587387085, "learning_rate": 6.360737065910702e-05, "loss": 1.5924, "step": 7199 }, { "epoch": 2.1854606161784793, "grad_norm": 0.7557784914970398, "learning_rate": 6.360230839323682e-05, "loss": 1.1523, "step": 7200 }, { "epoch": 2.185764152375171, "grad_norm": 0.6118229627609253, "learning_rate": 6.359724612736661e-05, "loss": 1.2865, "step": 7201 }, { "epoch": 2.186067688571862, "grad_norm": 0.7725751399993896, "learning_rate": 6.359218386149642e-05, "loss": 0.4212, "step": 7202 }, { "epoch": 2.186371224768554, "grad_norm": 1.019127368927002, "learning_rate": 6.358712159562621e-05, "loss": 1.3152, "step": 7203 }, { "epoch": 2.186674760965245, "grad_norm": 0.9102368354797363, "learning_rate": 6.358205932975601e-05, "loss": 0.4845, "step": 7204 }, { "epoch": 2.1869782971619367, "grad_norm": 0.7527662515640259, "learning_rate": 6.35769970638858e-05, "loss": 1.3959, "step": 7205 }, { "epoch": 2.187281833358628, "grad_norm": 0.6649389266967773, "learning_rate": 6.35719347980156e-05, "loss": 1.712, "step": 7206 }, { "epoch": 2.1875853695553196, "grad_norm": 0.8035556077957153, "learning_rate": 6.356687253214539e-05, "loss": 1.4315, "step": 7207 }, { "epoch": 2.187888905752011, "grad_norm": 0.7087796926498413, "learning_rate": 6.356181026627519e-05, "loss": 1.526, "step": 7208 }, { "epoch": 2.1881924419487024, "grad_norm": 0.7753124833106995, "learning_rate": 6.355674800040498e-05, "loss": 1.2894, "step": 7209 }, { "epoch": 2.1884959781453937, "grad_norm": 0.7068638205528259, "learning_rate": 6.355168573453478e-05, "loss": 1.2161, "step": 7210 }, { "epoch": 2.1887995143420853, "grad_norm": 0.7776404619216919, "learning_rate": 6.354662346866459e-05, "loss": 1.6053, "step": 7211 }, { "epoch": 2.189103050538777, "grad_norm": 0.8074328899383545, "learning_rate": 6.354156120279438e-05, "loss": 1.3399, "step": 7212 }, { "epoch": 2.189406586735468, "grad_norm": 0.7460588216781616, "learning_rate": 6.353649893692417e-05, "loss": 1.4607, "step": 7213 }, { "epoch": 2.18971012293216, "grad_norm": 0.6471102833747864, "learning_rate": 6.353143667105397e-05, "loss": 1.4977, "step": 7214 }, { "epoch": 2.190013659128851, "grad_norm": 0.5782828330993652, "learning_rate": 6.352637440518376e-05, "loss": 1.319, "step": 7215 }, { "epoch": 2.1903171953255427, "grad_norm": 0.701380729675293, "learning_rate": 6.352131213931356e-05, "loss": 1.4726, "step": 7216 }, { "epoch": 2.190620731522234, "grad_norm": 0.7160446047782898, "learning_rate": 6.351624987344335e-05, "loss": 1.4939, "step": 7217 }, { "epoch": 2.1909242677189256, "grad_norm": 0.6674351692199707, "learning_rate": 6.351118760757315e-05, "loss": 1.6086, "step": 7218 }, { "epoch": 2.191227803915617, "grad_norm": 0.8916682600975037, "learning_rate": 6.350612534170294e-05, "loss": 1.362, "step": 7219 }, { "epoch": 2.1915313401123084, "grad_norm": 0.683310329914093, "learning_rate": 6.350106307583274e-05, "loss": 1.4333, "step": 7220 }, { "epoch": 2.1918348763089996, "grad_norm": 0.9091866612434387, "learning_rate": 6.349600080996255e-05, "loss": 1.4197, "step": 7221 }, { "epoch": 2.1921384125056913, "grad_norm": 0.892817497253418, "learning_rate": 6.349093854409234e-05, "loss": 1.1823, "step": 7222 }, { "epoch": 2.192441948702383, "grad_norm": 0.7482536435127258, "learning_rate": 6.348587627822214e-05, "loss": 1.1527, "step": 7223 }, { "epoch": 2.192745484899074, "grad_norm": 0.6525892615318298, "learning_rate": 6.348081401235193e-05, "loss": 1.4811, "step": 7224 }, { "epoch": 2.193049021095766, "grad_norm": 0.6662126183509827, "learning_rate": 6.347575174648173e-05, "loss": 1.5839, "step": 7225 }, { "epoch": 2.193352557292457, "grad_norm": 0.7835372090339661, "learning_rate": 6.347068948061152e-05, "loss": 0.8512, "step": 7226 }, { "epoch": 2.1936560934891487, "grad_norm": 0.6281005144119263, "learning_rate": 6.346562721474132e-05, "loss": 1.5767, "step": 7227 }, { "epoch": 2.19395962968584, "grad_norm": 0.8295223116874695, "learning_rate": 6.346056494887111e-05, "loss": 1.3427, "step": 7228 }, { "epoch": 2.1942631658825316, "grad_norm": 1.0514887571334839, "learning_rate": 6.34555026830009e-05, "loss": 1.2373, "step": 7229 }, { "epoch": 2.1945667020792228, "grad_norm": 0.7697010040283203, "learning_rate": 6.345044041713071e-05, "loss": 1.1417, "step": 7230 }, { "epoch": 2.1948702382759144, "grad_norm": 0.6578774452209473, "learning_rate": 6.344537815126051e-05, "loss": 0.9744, "step": 7231 }, { "epoch": 2.1951737744726056, "grad_norm": 0.904004693031311, "learning_rate": 6.34403158853903e-05, "loss": 0.9456, "step": 7232 }, { "epoch": 2.1954773106692973, "grad_norm": 0.7968248724937439, "learning_rate": 6.34352536195201e-05, "loss": 1.3675, "step": 7233 }, { "epoch": 2.195780846865989, "grad_norm": 0.8919456005096436, "learning_rate": 6.343019135364989e-05, "loss": 1.4872, "step": 7234 }, { "epoch": 2.19608438306268, "grad_norm": 0.7745645046234131, "learning_rate": 6.342512908777969e-05, "loss": 1.2568, "step": 7235 }, { "epoch": 2.196387919259372, "grad_norm": 0.7691221237182617, "learning_rate": 6.342006682190948e-05, "loss": 1.3545, "step": 7236 }, { "epoch": 2.196691455456063, "grad_norm": 0.8532097935676575, "learning_rate": 6.341500455603928e-05, "loss": 1.1938, "step": 7237 }, { "epoch": 2.1969949916527547, "grad_norm": 0.7357925176620483, "learning_rate": 6.340994229016907e-05, "loss": 1.7771, "step": 7238 }, { "epoch": 2.197298527849446, "grad_norm": 0.718250036239624, "learning_rate": 6.340488002429888e-05, "loss": 0.7017, "step": 7239 }, { "epoch": 2.1976020640461376, "grad_norm": 0.752316415309906, "learning_rate": 6.339981775842868e-05, "loss": 0.9411, "step": 7240 }, { "epoch": 2.1979056002428288, "grad_norm": 0.710756242275238, "learning_rate": 6.339475549255848e-05, "loss": 1.1259, "step": 7241 }, { "epoch": 2.1982091364395204, "grad_norm": 0.728549063205719, "learning_rate": 6.338969322668828e-05, "loss": 0.9745, "step": 7242 }, { "epoch": 2.1985126726362116, "grad_norm": 0.8064560294151306, "learning_rate": 6.338463096081807e-05, "loss": 1.3108, "step": 7243 }, { "epoch": 2.1988162088329033, "grad_norm": 0.7704907655715942, "learning_rate": 6.337956869494787e-05, "loss": 1.483, "step": 7244 }, { "epoch": 2.199119745029595, "grad_norm": 0.8112806677818298, "learning_rate": 6.337450642907766e-05, "loss": 1.2775, "step": 7245 }, { "epoch": 2.199423281226286, "grad_norm": 0.687940239906311, "learning_rate": 6.336944416320746e-05, "loss": 1.5365, "step": 7246 }, { "epoch": 2.199726817422978, "grad_norm": 0.8638631701469421, "learning_rate": 6.336438189733725e-05, "loss": 0.903, "step": 7247 }, { "epoch": 2.200030353619669, "grad_norm": 0.8518549203872681, "learning_rate": 6.335931963146705e-05, "loss": 1.2908, "step": 7248 }, { "epoch": 2.2003338898163607, "grad_norm": 0.5747170448303223, "learning_rate": 6.335425736559684e-05, "loss": 1.6174, "step": 7249 }, { "epoch": 2.200637426013052, "grad_norm": 0.7496510148048401, "learning_rate": 6.334919509972665e-05, "loss": 1.5804, "step": 7250 }, { "epoch": 2.2009409622097436, "grad_norm": 0.7555733919143677, "learning_rate": 6.334413283385644e-05, "loss": 1.261, "step": 7251 }, { "epoch": 2.2012444984064348, "grad_norm": 0.7829087972640991, "learning_rate": 6.333907056798624e-05, "loss": 0.8846, "step": 7252 }, { "epoch": 2.2015480346031264, "grad_norm": 0.7930013537406921, "learning_rate": 6.333400830211603e-05, "loss": 1.4279, "step": 7253 }, { "epoch": 2.201851570799818, "grad_norm": 0.8092796802520752, "learning_rate": 6.332894603624583e-05, "loss": 0.9596, "step": 7254 }, { "epoch": 2.2021551069965093, "grad_norm": 0.7831472158432007, "learning_rate": 6.332388377037562e-05, "loss": 1.5345, "step": 7255 }, { "epoch": 2.202458643193201, "grad_norm": 0.6508380174636841, "learning_rate": 6.331882150450542e-05, "loss": 1.7227, "step": 7256 }, { "epoch": 2.202762179389892, "grad_norm": 0.9539698362350464, "learning_rate": 6.331375923863521e-05, "loss": 1.133, "step": 7257 }, { "epoch": 2.203065715586584, "grad_norm": 0.8370058536529541, "learning_rate": 6.330869697276501e-05, "loss": 1.3995, "step": 7258 }, { "epoch": 2.203369251783275, "grad_norm": 0.7775351405143738, "learning_rate": 6.33036347068948e-05, "loss": 1.4441, "step": 7259 }, { "epoch": 2.2036727879799667, "grad_norm": 0.5709270238876343, "learning_rate": 6.329857244102461e-05, "loss": 1.2477, "step": 7260 }, { "epoch": 2.203976324176658, "grad_norm": 0.8071433901786804, "learning_rate": 6.32935101751544e-05, "loss": 0.9049, "step": 7261 }, { "epoch": 2.2042798603733496, "grad_norm": 0.831382155418396, "learning_rate": 6.32884479092842e-05, "loss": 1.0145, "step": 7262 }, { "epoch": 2.204583396570041, "grad_norm": 2.287936210632324, "learning_rate": 6.3283385643414e-05, "loss": 1.6595, "step": 7263 }, { "epoch": 2.2048869327667324, "grad_norm": 0.7208482623100281, "learning_rate": 6.327832337754379e-05, "loss": 0.7489, "step": 7264 }, { "epoch": 2.205190468963424, "grad_norm": 1.0533088445663452, "learning_rate": 6.327326111167359e-05, "loss": 1.4194, "step": 7265 }, { "epoch": 2.2054940051601153, "grad_norm": 0.5085039734840393, "learning_rate": 6.326819884580338e-05, "loss": 1.5545, "step": 7266 }, { "epoch": 2.205797541356807, "grad_norm": 0.792374849319458, "learning_rate": 6.326313657993318e-05, "loss": 1.2997, "step": 7267 }, { "epoch": 2.206101077553498, "grad_norm": 0.6318290829658508, "learning_rate": 6.325807431406297e-05, "loss": 1.0966, "step": 7268 }, { "epoch": 2.20640461375019, "grad_norm": 0.7878068089485168, "learning_rate": 6.325301204819278e-05, "loss": 0.932, "step": 7269 }, { "epoch": 2.206708149946881, "grad_norm": 0.6690864562988281, "learning_rate": 6.324794978232257e-05, "loss": 1.2388, "step": 7270 }, { "epoch": 2.2070116861435727, "grad_norm": 0.6518906354904175, "learning_rate": 6.324288751645237e-05, "loss": 1.3831, "step": 7271 }, { "epoch": 2.207315222340264, "grad_norm": 0.7100988030433655, "learning_rate": 6.323782525058216e-05, "loss": 1.2587, "step": 7272 }, { "epoch": 2.2076187585369555, "grad_norm": 1.214737892150879, "learning_rate": 6.323276298471196e-05, "loss": 0.9016, "step": 7273 }, { "epoch": 2.207922294733647, "grad_norm": 0.8014380931854248, "learning_rate": 6.322770071884175e-05, "loss": 1.2304, "step": 7274 }, { "epoch": 2.2082258309303384, "grad_norm": 0.7462916374206543, "learning_rate": 6.322263845297155e-05, "loss": 0.8393, "step": 7275 }, { "epoch": 2.20852936712703, "grad_norm": 0.8555428385734558, "learning_rate": 6.321757618710134e-05, "loss": 1.3557, "step": 7276 }, { "epoch": 2.2088329033237213, "grad_norm": 0.6086410284042358, "learning_rate": 6.321251392123114e-05, "loss": 1.2881, "step": 7277 }, { "epoch": 2.209136439520413, "grad_norm": 0.6963145732879639, "learning_rate": 6.320745165536094e-05, "loss": 1.1412, "step": 7278 }, { "epoch": 2.209439975717104, "grad_norm": 0.7891445159912109, "learning_rate": 6.320238938949074e-05, "loss": 1.2458, "step": 7279 }, { "epoch": 2.209743511913796, "grad_norm": 0.7201794385910034, "learning_rate": 6.319732712362053e-05, "loss": 1.4347, "step": 7280 }, { "epoch": 2.210047048110487, "grad_norm": 0.7422575950622559, "learning_rate": 6.319226485775033e-05, "loss": 1.6253, "step": 7281 }, { "epoch": 2.2103505843071787, "grad_norm": 0.6944773197174072, "learning_rate": 6.318720259188012e-05, "loss": 1.2173, "step": 7282 }, { "epoch": 2.21065412050387, "grad_norm": 0.7755913734436035, "learning_rate": 6.318214032600992e-05, "loss": 1.2927, "step": 7283 }, { "epoch": 2.2109576567005615, "grad_norm": 0.7552181482315063, "learning_rate": 6.317707806013971e-05, "loss": 1.1499, "step": 7284 }, { "epoch": 2.211261192897253, "grad_norm": 1.007400631904602, "learning_rate": 6.317201579426952e-05, "loss": 1.6549, "step": 7285 }, { "epoch": 2.2115647290939444, "grad_norm": 0.8183742165565491, "learning_rate": 6.316695352839932e-05, "loss": 1.3761, "step": 7286 }, { "epoch": 2.211868265290636, "grad_norm": 0.7423942685127258, "learning_rate": 6.316189126252911e-05, "loss": 1.062, "step": 7287 }, { "epoch": 2.2121718014873273, "grad_norm": 0.8269255757331848, "learning_rate": 6.31568289966589e-05, "loss": 1.1571, "step": 7288 }, { "epoch": 2.212475337684019, "grad_norm": 0.8049871921539307, "learning_rate": 6.315176673078871e-05, "loss": 1.1371, "step": 7289 }, { "epoch": 2.21277887388071, "grad_norm": 0.6453428268432617, "learning_rate": 6.314670446491851e-05, "loss": 1.4766, "step": 7290 }, { "epoch": 2.213082410077402, "grad_norm": 0.8065813183784485, "learning_rate": 6.31416421990483e-05, "loss": 1.5128, "step": 7291 }, { "epoch": 2.213385946274093, "grad_norm": 0.749000608921051, "learning_rate": 6.31365799331781e-05, "loss": 1.2478, "step": 7292 }, { "epoch": 2.2136894824707847, "grad_norm": 0.7806560397148132, "learning_rate": 6.31315176673079e-05, "loss": 1.6537, "step": 7293 }, { "epoch": 2.213993018667476, "grad_norm": 0.8572752475738525, "learning_rate": 6.312645540143769e-05, "loss": 1.2017, "step": 7294 }, { "epoch": 2.2142965548641675, "grad_norm": 0.652068018913269, "learning_rate": 6.312139313556748e-05, "loss": 1.6904, "step": 7295 }, { "epoch": 2.214600091060859, "grad_norm": 0.6921754479408264, "learning_rate": 6.311633086969728e-05, "loss": 1.1166, "step": 7296 }, { "epoch": 2.2149036272575504, "grad_norm": 0.7487016320228577, "learning_rate": 6.311126860382707e-05, "loss": 1.5376, "step": 7297 }, { "epoch": 2.215207163454242, "grad_norm": 0.8527117967605591, "learning_rate": 6.310620633795687e-05, "loss": 1.2639, "step": 7298 }, { "epoch": 2.2155106996509333, "grad_norm": 0.8065406084060669, "learning_rate": 6.310114407208668e-05, "loss": 1.2687, "step": 7299 }, { "epoch": 2.215814235847625, "grad_norm": 0.8539122343063354, "learning_rate": 6.309608180621647e-05, "loss": 1.4641, "step": 7300 }, { "epoch": 2.216117772044316, "grad_norm": 0.8697571158409119, "learning_rate": 6.309101954034627e-05, "loss": 0.8682, "step": 7301 }, { "epoch": 2.216421308241008, "grad_norm": 0.6010316610336304, "learning_rate": 6.308595727447606e-05, "loss": 1.2321, "step": 7302 }, { "epoch": 2.216724844437699, "grad_norm": 0.5963598489761353, "learning_rate": 6.308089500860586e-05, "loss": 1.2184, "step": 7303 }, { "epoch": 2.2170283806343907, "grad_norm": 0.6307588815689087, "learning_rate": 6.307583274273565e-05, "loss": 1.2, "step": 7304 }, { "epoch": 2.217331916831082, "grad_norm": 0.7107384204864502, "learning_rate": 6.307077047686545e-05, "loss": 1.3101, "step": 7305 }, { "epoch": 2.2176354530277735, "grad_norm": 0.5359188318252563, "learning_rate": 6.306570821099524e-05, "loss": 1.652, "step": 7306 }, { "epoch": 2.217938989224465, "grad_norm": 0.617853045463562, "learning_rate": 6.306064594512503e-05, "loss": 1.5773, "step": 7307 }, { "epoch": 2.2182425254211564, "grad_norm": 0.7007902264595032, "learning_rate": 6.305558367925484e-05, "loss": 1.3057, "step": 7308 }, { "epoch": 2.218546061617848, "grad_norm": 0.7740719318389893, "learning_rate": 6.305052141338464e-05, "loss": 1.4049, "step": 7309 }, { "epoch": 2.2188495978145393, "grad_norm": 1.0561450719833374, "learning_rate": 6.304545914751443e-05, "loss": 1.0139, "step": 7310 }, { "epoch": 2.219153134011231, "grad_norm": 0.5591197609901428, "learning_rate": 6.304039688164423e-05, "loss": 1.0521, "step": 7311 }, { "epoch": 2.219456670207922, "grad_norm": 0.7620384097099304, "learning_rate": 6.303533461577402e-05, "loss": 1.5936, "step": 7312 }, { "epoch": 2.219760206404614, "grad_norm": 0.4855552911758423, "learning_rate": 6.303027234990382e-05, "loss": 1.0651, "step": 7313 }, { "epoch": 2.220063742601305, "grad_norm": 0.7248707413673401, "learning_rate": 6.302521008403361e-05, "loss": 1.1932, "step": 7314 }, { "epoch": 2.2203672787979967, "grad_norm": 0.9377697110176086, "learning_rate": 6.30201478181634e-05, "loss": 1.0397, "step": 7315 }, { "epoch": 2.2206708149946883, "grad_norm": 0.6386867761611938, "learning_rate": 6.30150855522932e-05, "loss": 1.5274, "step": 7316 }, { "epoch": 2.2209743511913795, "grad_norm": 0.7806806564331055, "learning_rate": 6.301002328642301e-05, "loss": 1.4235, "step": 7317 }, { "epoch": 2.221277887388071, "grad_norm": 0.7374126315116882, "learning_rate": 6.30049610205528e-05, "loss": 1.7768, "step": 7318 }, { "epoch": 2.2215814235847624, "grad_norm": 0.7384335994720459, "learning_rate": 6.29998987546826e-05, "loss": 1.617, "step": 7319 }, { "epoch": 2.221884959781454, "grad_norm": 0.6326069831848145, "learning_rate": 6.29948364888124e-05, "loss": 1.4716, "step": 7320 }, { "epoch": 2.2221884959781453, "grad_norm": 0.8205044269561768, "learning_rate": 6.298977422294219e-05, "loss": 1.1083, "step": 7321 }, { "epoch": 2.222492032174837, "grad_norm": 0.6621566414833069, "learning_rate": 6.298471195707198e-05, "loss": 1.1608, "step": 7322 }, { "epoch": 2.222795568371528, "grad_norm": 0.6662036776542664, "learning_rate": 6.297964969120178e-05, "loss": 1.6083, "step": 7323 }, { "epoch": 2.22309910456822, "grad_norm": 0.9175953269004822, "learning_rate": 6.297458742533157e-05, "loss": 1.2642, "step": 7324 }, { "epoch": 2.223402640764911, "grad_norm": 0.6709551811218262, "learning_rate": 6.296952515946137e-05, "loss": 0.9856, "step": 7325 }, { "epoch": 2.2237061769616027, "grad_norm": 0.7839862704277039, "learning_rate": 6.296446289359116e-05, "loss": 1.5064, "step": 7326 }, { "epoch": 2.2240097131582943, "grad_norm": 0.7650319933891296, "learning_rate": 6.295940062772097e-05, "loss": 1.2924, "step": 7327 }, { "epoch": 2.2243132493549855, "grad_norm": 0.5967111587524414, "learning_rate": 6.295433836185077e-05, "loss": 1.3916, "step": 7328 }, { "epoch": 2.224616785551677, "grad_norm": 0.9305431842803955, "learning_rate": 6.294927609598057e-05, "loss": 1.4844, "step": 7329 }, { "epoch": 2.2249203217483684, "grad_norm": 0.8193114399909973, "learning_rate": 6.294421383011037e-05, "loss": 1.3932, "step": 7330 }, { "epoch": 2.22522385794506, "grad_norm": 0.6530084013938904, "learning_rate": 6.293915156424016e-05, "loss": 1.4033, "step": 7331 }, { "epoch": 2.2255273941417513, "grad_norm": 0.7277489304542542, "learning_rate": 6.293408929836996e-05, "loss": 0.9039, "step": 7332 }, { "epoch": 2.225830930338443, "grad_norm": 0.7713912129402161, "learning_rate": 6.292902703249975e-05, "loss": 1.4569, "step": 7333 }, { "epoch": 2.226134466535134, "grad_norm": 0.5298659205436707, "learning_rate": 6.292396476662955e-05, "loss": 1.2005, "step": 7334 }, { "epoch": 2.226438002731826, "grad_norm": 0.6997772455215454, "learning_rate": 6.291890250075934e-05, "loss": 1.4526, "step": 7335 }, { "epoch": 2.2267415389285174, "grad_norm": 0.724656879901886, "learning_rate": 6.291384023488914e-05, "loss": 1.4197, "step": 7336 }, { "epoch": 2.2270450751252087, "grad_norm": 0.6488526463508606, "learning_rate": 6.290877796901893e-05, "loss": 1.8035, "step": 7337 }, { "epoch": 2.2273486113219003, "grad_norm": 0.6727263331413269, "learning_rate": 6.290371570314874e-05, "loss": 1.0752, "step": 7338 }, { "epoch": 2.2276521475185915, "grad_norm": 0.8429280519485474, "learning_rate": 6.289865343727854e-05, "loss": 1.4221, "step": 7339 }, { "epoch": 2.227955683715283, "grad_norm": 0.8624864816665649, "learning_rate": 6.289359117140833e-05, "loss": 1.325, "step": 7340 }, { "epoch": 2.2282592199119744, "grad_norm": 0.7922910451889038, "learning_rate": 6.288852890553813e-05, "loss": 1.347, "step": 7341 }, { "epoch": 2.228562756108666, "grad_norm": 0.7603941559791565, "learning_rate": 6.288346663966792e-05, "loss": 1.3607, "step": 7342 }, { "epoch": 2.2288662923053573, "grad_norm": 0.869234025478363, "learning_rate": 6.287840437379772e-05, "loss": 1.2962, "step": 7343 }, { "epoch": 2.229169828502049, "grad_norm": 0.7643560767173767, "learning_rate": 6.287334210792751e-05, "loss": 1.3782, "step": 7344 }, { "epoch": 2.22947336469874, "grad_norm": 0.6984087824821472, "learning_rate": 6.28682798420573e-05, "loss": 1.1741, "step": 7345 }, { "epoch": 2.229776900895432, "grad_norm": 0.8322027921676636, "learning_rate": 6.28632175761871e-05, "loss": 1.5343, "step": 7346 }, { "epoch": 2.2300804370921234, "grad_norm": 0.5010148882865906, "learning_rate": 6.285815531031691e-05, "loss": 1.5332, "step": 7347 }, { "epoch": 2.2303839732888147, "grad_norm": 0.8717657923698425, "learning_rate": 6.28530930444467e-05, "loss": 1.3991, "step": 7348 }, { "epoch": 2.2306875094855063, "grad_norm": 0.6880953311920166, "learning_rate": 6.28480307785765e-05, "loss": 1.2642, "step": 7349 }, { "epoch": 2.2309910456821975, "grad_norm": 0.6598102450370789, "learning_rate": 6.284296851270629e-05, "loss": 1.7148, "step": 7350 }, { "epoch": 2.231294581878889, "grad_norm": 0.6542856693267822, "learning_rate": 6.283790624683609e-05, "loss": 1.4979, "step": 7351 }, { "epoch": 2.2315981180755804, "grad_norm": 1.0812065601348877, "learning_rate": 6.283284398096588e-05, "loss": 1.6166, "step": 7352 }, { "epoch": 2.231901654272272, "grad_norm": 0.9855359792709351, "learning_rate": 6.282778171509568e-05, "loss": 0.8146, "step": 7353 }, { "epoch": 2.2322051904689633, "grad_norm": 0.818660318851471, "learning_rate": 6.282271944922547e-05, "loss": 1.3215, "step": 7354 }, { "epoch": 2.232508726665655, "grad_norm": 0.5664464235305786, "learning_rate": 6.281765718335527e-05, "loss": 1.448, "step": 7355 }, { "epoch": 2.232812262862346, "grad_norm": 0.786517858505249, "learning_rate": 6.281259491748507e-05, "loss": 1.4184, "step": 7356 }, { "epoch": 2.233115799059038, "grad_norm": 0.7664825320243835, "learning_rate": 6.280753265161487e-05, "loss": 1.5741, "step": 7357 }, { "epoch": 2.2334193352557294, "grad_norm": 0.7034837603569031, "learning_rate": 6.280247038574466e-05, "loss": 1.357, "step": 7358 }, { "epoch": 2.2337228714524207, "grad_norm": 0.6958170533180237, "learning_rate": 6.279740811987446e-05, "loss": 1.4679, "step": 7359 }, { "epoch": 2.2340264076491123, "grad_norm": 0.5791650414466858, "learning_rate": 6.279234585400425e-05, "loss": 1.6852, "step": 7360 }, { "epoch": 2.2343299438458035, "grad_norm": 0.6118115782737732, "learning_rate": 6.278728358813405e-05, "loss": 1.2855, "step": 7361 }, { "epoch": 2.234633480042495, "grad_norm": 0.6189674735069275, "learning_rate": 6.278222132226384e-05, "loss": 1.1345, "step": 7362 }, { "epoch": 2.2349370162391864, "grad_norm": 0.803203821182251, "learning_rate": 6.277715905639364e-05, "loss": 1.1459, "step": 7363 }, { "epoch": 2.235240552435878, "grad_norm": 0.7166264057159424, "learning_rate": 6.277209679052343e-05, "loss": 1.7317, "step": 7364 }, { "epoch": 2.2355440886325693, "grad_norm": 0.7164594531059265, "learning_rate": 6.276703452465323e-05, "loss": 0.9119, "step": 7365 }, { "epoch": 2.235847624829261, "grad_norm": 0.7110953330993652, "learning_rate": 6.276197225878304e-05, "loss": 1.5785, "step": 7366 }, { "epoch": 2.236151161025952, "grad_norm": 0.8318694233894348, "learning_rate": 6.275690999291283e-05, "loss": 1.2255, "step": 7367 }, { "epoch": 2.236454697222644, "grad_norm": 0.6412427425384521, "learning_rate": 6.275184772704263e-05, "loss": 1.0238, "step": 7368 }, { "epoch": 2.2367582334193354, "grad_norm": 0.8117619752883911, "learning_rate": 6.274678546117242e-05, "loss": 1.3353, "step": 7369 }, { "epoch": 2.2370617696160267, "grad_norm": 0.5850114822387695, "learning_rate": 6.274172319530222e-05, "loss": 1.6376, "step": 7370 }, { "epoch": 2.2373653058127183, "grad_norm": 0.7341693639755249, "learning_rate": 6.273666092943201e-05, "loss": 1.2459, "step": 7371 }, { "epoch": 2.2376688420094095, "grad_norm": 0.8353118896484375, "learning_rate": 6.27315986635618e-05, "loss": 1.1781, "step": 7372 }, { "epoch": 2.237972378206101, "grad_norm": 0.7113072872161865, "learning_rate": 6.27265363976916e-05, "loss": 1.3433, "step": 7373 }, { "epoch": 2.2382759144027924, "grad_norm": 0.681052565574646, "learning_rate": 6.272147413182141e-05, "loss": 1.4596, "step": 7374 }, { "epoch": 2.238579450599484, "grad_norm": 0.8312979340553284, "learning_rate": 6.27164118659512e-05, "loss": 1.3036, "step": 7375 }, { "epoch": 2.2388829867961753, "grad_norm": 1.068250298500061, "learning_rate": 6.2711349600081e-05, "loss": 1.1011, "step": 7376 }, { "epoch": 2.239186522992867, "grad_norm": 0.831622838973999, "learning_rate": 6.27062873342108e-05, "loss": 1.1204, "step": 7377 }, { "epoch": 2.239490059189558, "grad_norm": 0.6426302790641785, "learning_rate": 6.27012250683406e-05, "loss": 1.7318, "step": 7378 }, { "epoch": 2.23979359538625, "grad_norm": 0.8913399577140808, "learning_rate": 6.26961628024704e-05, "loss": 1.414, "step": 7379 }, { "epoch": 2.2400971315829414, "grad_norm": 0.827407717704773, "learning_rate": 6.269110053660019e-05, "loss": 1.3879, "step": 7380 }, { "epoch": 2.2404006677796326, "grad_norm": 0.6446571350097656, "learning_rate": 6.268603827072999e-05, "loss": 1.6254, "step": 7381 }, { "epoch": 2.2407042039763243, "grad_norm": 0.820245623588562, "learning_rate": 6.268097600485978e-05, "loss": 1.2645, "step": 7382 }, { "epoch": 2.2410077401730155, "grad_norm": 0.9686972498893738, "learning_rate": 6.267591373898957e-05, "loss": 1.0926, "step": 7383 }, { "epoch": 2.241311276369707, "grad_norm": 0.6719053387641907, "learning_rate": 6.267085147311937e-05, "loss": 1.2106, "step": 7384 }, { "epoch": 2.2416148125663984, "grad_norm": 0.7990976572036743, "learning_rate": 6.266578920724916e-05, "loss": 1.3629, "step": 7385 }, { "epoch": 2.24191834876309, "grad_norm": 0.8770137429237366, "learning_rate": 6.266072694137897e-05, "loss": 1.1797, "step": 7386 }, { "epoch": 2.2422218849597813, "grad_norm": 0.8285596370697021, "learning_rate": 6.265566467550877e-05, "loss": 0.7679, "step": 7387 }, { "epoch": 2.242525421156473, "grad_norm": 0.8647116422653198, "learning_rate": 6.265060240963856e-05, "loss": 0.9769, "step": 7388 }, { "epoch": 2.2428289573531646, "grad_norm": 0.9097809791564941, "learning_rate": 6.264554014376836e-05, "loss": 0.9809, "step": 7389 }, { "epoch": 2.2431324935498558, "grad_norm": 0.7164098620414734, "learning_rate": 6.264047787789815e-05, "loss": 1.7244, "step": 7390 }, { "epoch": 2.2434360297465474, "grad_norm": 0.7220231890678406, "learning_rate": 6.263541561202795e-05, "loss": 1.3302, "step": 7391 }, { "epoch": 2.2437395659432386, "grad_norm": 0.7721933722496033, "learning_rate": 6.263035334615774e-05, "loss": 0.8151, "step": 7392 }, { "epoch": 2.2440431021399303, "grad_norm": 0.6171678304672241, "learning_rate": 6.262529108028754e-05, "loss": 1.5075, "step": 7393 }, { "epoch": 2.2443466383366215, "grad_norm": 1.0465861558914185, "learning_rate": 6.262022881441733e-05, "loss": 1.2764, "step": 7394 }, { "epoch": 2.244650174533313, "grad_norm": 0.7800273299217224, "learning_rate": 6.261516654854714e-05, "loss": 0.9609, "step": 7395 }, { "epoch": 2.2449537107300044, "grad_norm": 0.7144536972045898, "learning_rate": 6.261010428267693e-05, "loss": 1.1818, "step": 7396 }, { "epoch": 2.245257246926696, "grad_norm": 0.7565951943397522, "learning_rate": 6.260504201680673e-05, "loss": 1.3815, "step": 7397 }, { "epoch": 2.2455607831233877, "grad_norm": 0.689526379108429, "learning_rate": 6.259997975093652e-05, "loss": 1.423, "step": 7398 }, { "epoch": 2.245864319320079, "grad_norm": 0.6433810591697693, "learning_rate": 6.259491748506632e-05, "loss": 1.5474, "step": 7399 }, { "epoch": 2.2461678555167706, "grad_norm": 0.5929258465766907, "learning_rate": 6.258985521919611e-05, "loss": 1.1298, "step": 7400 }, { "epoch": 2.2464713917134618, "grad_norm": 0.7315686941146851, "learning_rate": 6.258479295332591e-05, "loss": 1.5642, "step": 7401 }, { "epoch": 2.2467749279101534, "grad_norm": 0.7189039587974548, "learning_rate": 6.25797306874557e-05, "loss": 0.8376, "step": 7402 }, { "epoch": 2.2470784641068446, "grad_norm": 0.7872570157051086, "learning_rate": 6.25746684215855e-05, "loss": 1.3381, "step": 7403 }, { "epoch": 2.2473820003035363, "grad_norm": 0.5732450485229492, "learning_rate": 6.256960615571529e-05, "loss": 1.5919, "step": 7404 }, { "epoch": 2.2476855365002275, "grad_norm": 0.7429735660552979, "learning_rate": 6.25645438898451e-05, "loss": 1.221, "step": 7405 }, { "epoch": 2.247989072696919, "grad_norm": 0.8105106353759766, "learning_rate": 6.25594816239749e-05, "loss": 1.2708, "step": 7406 }, { "epoch": 2.2482926088936104, "grad_norm": 0.779171884059906, "learning_rate": 6.255441935810469e-05, "loss": 1.3488, "step": 7407 }, { "epoch": 2.248596145090302, "grad_norm": 0.5718879699707031, "learning_rate": 6.254935709223449e-05, "loss": 1.8277, "step": 7408 }, { "epoch": 2.2488996812869937, "grad_norm": 0.6967871785163879, "learning_rate": 6.254429482636428e-05, "loss": 1.4693, "step": 7409 }, { "epoch": 2.249203217483685, "grad_norm": 0.7380461692810059, "learning_rate": 6.253923256049407e-05, "loss": 1.2631, "step": 7410 }, { "epoch": 2.2495067536803766, "grad_norm": 0.7310465574264526, "learning_rate": 6.253417029462387e-05, "loss": 1.2298, "step": 7411 }, { "epoch": 2.2498102898770678, "grad_norm": 0.6302371621131897, "learning_rate": 6.252910802875366e-05, "loss": 1.616, "step": 7412 }, { "epoch": 2.2501138260737594, "grad_norm": 0.8418757915496826, "learning_rate": 6.252404576288346e-05, "loss": 1.1698, "step": 7413 }, { "epoch": 2.2504173622704506, "grad_norm": 0.823052167892456, "learning_rate": 6.251898349701327e-05, "loss": 1.3639, "step": 7414 }, { "epoch": 2.2507208984671423, "grad_norm": 0.6069715619087219, "learning_rate": 6.251392123114306e-05, "loss": 1.3813, "step": 7415 }, { "epoch": 2.2510244346638335, "grad_norm": 0.7191272377967834, "learning_rate": 6.250885896527286e-05, "loss": 1.3083, "step": 7416 }, { "epoch": 2.251327970860525, "grad_norm": 0.6589117050170898, "learning_rate": 6.250379669940265e-05, "loss": 1.5802, "step": 7417 }, { "epoch": 2.2516315070572164, "grad_norm": 0.8188351392745972, "learning_rate": 6.249873443353246e-05, "loss": 1.5078, "step": 7418 }, { "epoch": 2.251935043253908, "grad_norm": 0.8425248861312866, "learning_rate": 6.249367216766225e-05, "loss": 1.4421, "step": 7419 }, { "epoch": 2.2522385794505997, "grad_norm": 0.7744620442390442, "learning_rate": 6.248860990179205e-05, "loss": 1.3202, "step": 7420 }, { "epoch": 2.252542115647291, "grad_norm": 0.8308604955673218, "learning_rate": 6.248354763592184e-05, "loss": 1.3473, "step": 7421 }, { "epoch": 2.2528456518439826, "grad_norm": 0.9154324531555176, "learning_rate": 6.247848537005164e-05, "loss": 1.4875, "step": 7422 }, { "epoch": 2.2531491880406738, "grad_norm": 0.8225959539413452, "learning_rate": 6.247342310418143e-05, "loss": 1.5585, "step": 7423 }, { "epoch": 2.2534527242373654, "grad_norm": 0.787828803062439, "learning_rate": 6.246836083831123e-05, "loss": 0.9589, "step": 7424 }, { "epoch": 2.2537562604340566, "grad_norm": 0.7419188618659973, "learning_rate": 6.246329857244104e-05, "loss": 1.3605, "step": 7425 }, { "epoch": 2.2540597966307483, "grad_norm": 0.6389353275299072, "learning_rate": 6.245823630657083e-05, "loss": 0.6711, "step": 7426 }, { "epoch": 2.2543633328274395, "grad_norm": 0.5891460180282593, "learning_rate": 6.245317404070063e-05, "loss": 1.0786, "step": 7427 }, { "epoch": 2.254666869024131, "grad_norm": 0.7612578868865967, "learning_rate": 6.244811177483042e-05, "loss": 1.4422, "step": 7428 }, { "epoch": 2.2549704052208224, "grad_norm": 0.7443048357963562, "learning_rate": 6.244304950896022e-05, "loss": 1.3552, "step": 7429 }, { "epoch": 2.255273941417514, "grad_norm": 0.8717397451400757, "learning_rate": 6.243798724309001e-05, "loss": 1.2675, "step": 7430 }, { "epoch": 2.2555774776142057, "grad_norm": 0.6923880577087402, "learning_rate": 6.24329249772198e-05, "loss": 1.462, "step": 7431 }, { "epoch": 2.255881013810897, "grad_norm": 0.6938219666481018, "learning_rate": 6.24278627113496e-05, "loss": 1.4014, "step": 7432 }, { "epoch": 2.2561845500075886, "grad_norm": 0.700583815574646, "learning_rate": 6.24228004454794e-05, "loss": 1.3725, "step": 7433 }, { "epoch": 2.2564880862042798, "grad_norm": 0.7408208847045898, "learning_rate": 6.24177381796092e-05, "loss": 0.7655, "step": 7434 }, { "epoch": 2.2567916224009714, "grad_norm": 0.6944329142570496, "learning_rate": 6.2412675913739e-05, "loss": 1.5368, "step": 7435 }, { "epoch": 2.2570951585976626, "grad_norm": 0.6069648861885071, "learning_rate": 6.24076136478688e-05, "loss": 0.6665, "step": 7436 }, { "epoch": 2.2573986947943543, "grad_norm": 0.6946176290512085, "learning_rate": 6.240255138199859e-05, "loss": 1.5272, "step": 7437 }, { "epoch": 2.2577022309910455, "grad_norm": 1.039425015449524, "learning_rate": 6.239748911612838e-05, "loss": 1.4667, "step": 7438 }, { "epoch": 2.258005767187737, "grad_norm": 0.8364284634590149, "learning_rate": 6.239242685025818e-05, "loss": 1.2519, "step": 7439 }, { "epoch": 2.2583093033844284, "grad_norm": 0.9233106374740601, "learning_rate": 6.238736458438797e-05, "loss": 1.3155, "step": 7440 }, { "epoch": 2.25861283958112, "grad_norm": 0.5708487033843994, "learning_rate": 6.238230231851777e-05, "loss": 1.4757, "step": 7441 }, { "epoch": 2.2589163757778117, "grad_norm": 0.7805628776550293, "learning_rate": 6.237724005264756e-05, "loss": 1.2103, "step": 7442 }, { "epoch": 2.259219911974503, "grad_norm": 0.699892520904541, "learning_rate": 6.237217778677736e-05, "loss": 1.0468, "step": 7443 }, { "epoch": 2.2595234481711945, "grad_norm": 0.6304746270179749, "learning_rate": 6.236711552090717e-05, "loss": 0.9349, "step": 7444 }, { "epoch": 2.2598269843678858, "grad_norm": 0.7185422778129578, "learning_rate": 6.236205325503696e-05, "loss": 1.4341, "step": 7445 }, { "epoch": 2.2601305205645774, "grad_norm": 0.56112140417099, "learning_rate": 6.235699098916676e-05, "loss": 1.1936, "step": 7446 }, { "epoch": 2.2604340567612686, "grad_norm": 0.6657426357269287, "learning_rate": 6.235192872329655e-05, "loss": 1.1362, "step": 7447 }, { "epoch": 2.2607375929579603, "grad_norm": 0.4865644872188568, "learning_rate": 6.234686645742634e-05, "loss": 0.6632, "step": 7448 }, { "epoch": 2.261041129154652, "grad_norm": 0.8862194418907166, "learning_rate": 6.234180419155614e-05, "loss": 1.2728, "step": 7449 }, { "epoch": 2.261344665351343, "grad_norm": 0.8285862803459167, "learning_rate": 6.233674192568593e-05, "loss": 1.1827, "step": 7450 }, { "epoch": 2.2616482015480344, "grad_norm": 0.821243941783905, "learning_rate": 6.233167965981573e-05, "loss": 1.0182, "step": 7451 }, { "epoch": 2.261951737744726, "grad_norm": 0.7204463481903076, "learning_rate": 6.232661739394552e-05, "loss": 1.277, "step": 7452 }, { "epoch": 2.2622552739414177, "grad_norm": 0.880133867263794, "learning_rate": 6.232155512807533e-05, "loss": 1.6253, "step": 7453 }, { "epoch": 2.262558810138109, "grad_norm": 0.6118894815444946, "learning_rate": 6.231649286220513e-05, "loss": 1.3237, "step": 7454 }, { "epoch": 2.2628623463348005, "grad_norm": 0.863484799861908, "learning_rate": 6.231143059633492e-05, "loss": 0.9433, "step": 7455 }, { "epoch": 2.2631658825314918, "grad_norm": 0.6867091059684753, "learning_rate": 6.230636833046472e-05, "loss": 1.393, "step": 7456 }, { "epoch": 2.2634694187281834, "grad_norm": 0.7559130787849426, "learning_rate": 6.230130606459451e-05, "loss": 1.4322, "step": 7457 }, { "epoch": 2.2637729549248746, "grad_norm": 0.7188666462898254, "learning_rate": 6.22962437987243e-05, "loss": 1.1943, "step": 7458 }, { "epoch": 2.2640764911215663, "grad_norm": 0.6434985399246216, "learning_rate": 6.22911815328541e-05, "loss": 1.4742, "step": 7459 }, { "epoch": 2.264380027318258, "grad_norm": 0.8641550540924072, "learning_rate": 6.22861192669839e-05, "loss": 1.0067, "step": 7460 }, { "epoch": 2.264683563514949, "grad_norm": 0.816226065158844, "learning_rate": 6.228105700111369e-05, "loss": 1.2797, "step": 7461 }, { "epoch": 2.264987099711641, "grad_norm": 0.9310057163238525, "learning_rate": 6.22759947352435e-05, "loss": 0.9412, "step": 7462 }, { "epoch": 2.265290635908332, "grad_norm": 0.7045341730117798, "learning_rate": 6.22709324693733e-05, "loss": 1.1715, "step": 7463 }, { "epoch": 2.2655941721050237, "grad_norm": 0.6052013635635376, "learning_rate": 6.22658702035031e-05, "loss": 1.8839, "step": 7464 }, { "epoch": 2.265897708301715, "grad_norm": 0.6659092307090759, "learning_rate": 6.22608079376329e-05, "loss": 1.6587, "step": 7465 }, { "epoch": 2.2662012444984065, "grad_norm": 0.9298098087310791, "learning_rate": 6.225574567176269e-05, "loss": 0.8736, "step": 7466 }, { "epoch": 2.2665047806950978, "grad_norm": 0.5785535573959351, "learning_rate": 6.225068340589249e-05, "loss": 1.4056, "step": 7467 }, { "epoch": 2.2668083168917894, "grad_norm": 0.7402830719947815, "learning_rate": 6.224562114002228e-05, "loss": 1.4276, "step": 7468 }, { "epoch": 2.2671118530884806, "grad_norm": 0.7167106866836548, "learning_rate": 6.224055887415208e-05, "loss": 1.2924, "step": 7469 }, { "epoch": 2.2674153892851723, "grad_norm": 0.783779501914978, "learning_rate": 6.223549660828187e-05, "loss": 1.4738, "step": 7470 }, { "epoch": 2.267718925481864, "grad_norm": 0.516488790512085, "learning_rate": 6.223043434241167e-05, "loss": 1.1022, "step": 7471 }, { "epoch": 2.268022461678555, "grad_norm": 0.6242533326148987, "learning_rate": 6.222537207654146e-05, "loss": 1.0569, "step": 7472 }, { "epoch": 2.268325997875247, "grad_norm": 0.7155179977416992, "learning_rate": 6.222030981067127e-05, "loss": 1.4912, "step": 7473 }, { "epoch": 2.268629534071938, "grad_norm": 0.6795659065246582, "learning_rate": 6.221524754480106e-05, "loss": 0.9815, "step": 7474 }, { "epoch": 2.2689330702686297, "grad_norm": 0.8595495820045471, "learning_rate": 6.221018527893086e-05, "loss": 1.4531, "step": 7475 }, { "epoch": 2.269236606465321, "grad_norm": 0.7935011386871338, "learning_rate": 6.220512301306065e-05, "loss": 1.347, "step": 7476 }, { "epoch": 2.2695401426620125, "grad_norm": 0.8852940201759338, "learning_rate": 6.220006074719045e-05, "loss": 1.4368, "step": 7477 }, { "epoch": 2.2698436788587038, "grad_norm": 0.625978410243988, "learning_rate": 6.219499848132024e-05, "loss": 1.0929, "step": 7478 }, { "epoch": 2.2701472150553954, "grad_norm": 0.7276142835617065, "learning_rate": 6.218993621545004e-05, "loss": 0.6517, "step": 7479 }, { "epoch": 2.2704507512520866, "grad_norm": 0.8396570682525635, "learning_rate": 6.218487394957983e-05, "loss": 1.2783, "step": 7480 }, { "epoch": 2.2707542874487783, "grad_norm": 0.6671884655952454, "learning_rate": 6.217981168370963e-05, "loss": 1.2476, "step": 7481 }, { "epoch": 2.27105782364547, "grad_norm": 0.7303450107574463, "learning_rate": 6.217474941783942e-05, "loss": 1.0739, "step": 7482 }, { "epoch": 2.271361359842161, "grad_norm": 0.7783435583114624, "learning_rate": 6.216968715196923e-05, "loss": 1.418, "step": 7483 }, { "epoch": 2.271664896038853, "grad_norm": 0.6920360326766968, "learning_rate": 6.216462488609903e-05, "loss": 1.4488, "step": 7484 }, { "epoch": 2.271968432235544, "grad_norm": 0.5489271879196167, "learning_rate": 6.215956262022882e-05, "loss": 1.6853, "step": 7485 }, { "epoch": 2.2722719684322357, "grad_norm": 0.8495591282844543, "learning_rate": 6.215450035435861e-05, "loss": 1.6002, "step": 7486 }, { "epoch": 2.272575504628927, "grad_norm": 0.7131351232528687, "learning_rate": 6.214943808848841e-05, "loss": 1.1706, "step": 7487 }, { "epoch": 2.2728790408256185, "grad_norm": 0.9178603291511536, "learning_rate": 6.21443758226182e-05, "loss": 1.368, "step": 7488 }, { "epoch": 2.2731825770223097, "grad_norm": 0.7738288640975952, "learning_rate": 6.2139313556748e-05, "loss": 1.0531, "step": 7489 }, { "epoch": 2.2734861132190014, "grad_norm": 0.6220824122428894, "learning_rate": 6.21342512908778e-05, "loss": 0.8149, "step": 7490 }, { "epoch": 2.2737896494156926, "grad_norm": 0.5784722566604614, "learning_rate": 6.212918902500759e-05, "loss": 0.9504, "step": 7491 }, { "epoch": 2.2740931856123843, "grad_norm": 0.9277094006538391, "learning_rate": 6.21241267591374e-05, "loss": 1.1672, "step": 7492 }, { "epoch": 2.274396721809076, "grad_norm": 0.8855558037757874, "learning_rate": 6.211906449326719e-05, "loss": 1.2628, "step": 7493 }, { "epoch": 2.274700258005767, "grad_norm": 0.7635779976844788, "learning_rate": 6.211400222739699e-05, "loss": 1.3246, "step": 7494 }, { "epoch": 2.275003794202459, "grad_norm": 0.6834839582443237, "learning_rate": 6.210893996152678e-05, "loss": 1.2209, "step": 7495 }, { "epoch": 2.27530733039915, "grad_norm": 0.6764331459999084, "learning_rate": 6.210387769565658e-05, "loss": 1.6269, "step": 7496 }, { "epoch": 2.2756108665958417, "grad_norm": 0.8024922013282776, "learning_rate": 6.209881542978637e-05, "loss": 1.3489, "step": 7497 }, { "epoch": 2.275914402792533, "grad_norm": 0.7418940663337708, "learning_rate": 6.209375316391617e-05, "loss": 0.8103, "step": 7498 }, { "epoch": 2.2762179389892245, "grad_norm": 0.9875561594963074, "learning_rate": 6.208869089804596e-05, "loss": 0.8744, "step": 7499 }, { "epoch": 2.2765214751859157, "grad_norm": 0.8351328372955322, "learning_rate": 6.208362863217576e-05, "loss": 1.4052, "step": 7500 }, { "epoch": 2.2768250113826074, "grad_norm": 0.7316960692405701, "learning_rate": 6.207856636630556e-05, "loss": 1.3071, "step": 7501 }, { "epoch": 2.2771285475792986, "grad_norm": 0.7554438710212708, "learning_rate": 6.207350410043536e-05, "loss": 0.9889, "step": 7502 }, { "epoch": 2.2774320837759903, "grad_norm": 0.7924057841300964, "learning_rate": 6.206844183456515e-05, "loss": 1.225, "step": 7503 }, { "epoch": 2.277735619972682, "grad_norm": 0.619770884513855, "learning_rate": 6.206337956869495e-05, "loss": 1.5899, "step": 7504 }, { "epoch": 2.278039156169373, "grad_norm": 0.8481298685073853, "learning_rate": 6.205831730282474e-05, "loss": 1.2686, "step": 7505 }, { "epoch": 2.278342692366065, "grad_norm": 0.7642682790756226, "learning_rate": 6.205325503695454e-05, "loss": 1.0228, "step": 7506 }, { "epoch": 2.278646228562756, "grad_norm": 0.712116003036499, "learning_rate": 6.204819277108435e-05, "loss": 1.3615, "step": 7507 }, { "epoch": 2.2789497647594477, "grad_norm": 0.733302891254425, "learning_rate": 6.204313050521414e-05, "loss": 1.2985, "step": 7508 }, { "epoch": 2.279253300956139, "grad_norm": 0.8620637655258179, "learning_rate": 6.203806823934394e-05, "loss": 1.291, "step": 7509 }, { "epoch": 2.2795568371528305, "grad_norm": 0.7499003410339355, "learning_rate": 6.203300597347373e-05, "loss": 1.2764, "step": 7510 }, { "epoch": 2.2798603733495217, "grad_norm": 0.8012336492538452, "learning_rate": 6.202794370760353e-05, "loss": 0.9868, "step": 7511 }, { "epoch": 2.2801639095462134, "grad_norm": 0.9195525646209717, "learning_rate": 6.202288144173333e-05, "loss": 1.4727, "step": 7512 }, { "epoch": 2.2804674457429046, "grad_norm": 0.8243017196655273, "learning_rate": 6.201781917586313e-05, "loss": 1.1061, "step": 7513 }, { "epoch": 2.2807709819395963, "grad_norm": 0.8141716718673706, "learning_rate": 6.201275690999292e-05, "loss": 1.5759, "step": 7514 }, { "epoch": 2.281074518136288, "grad_norm": 0.9679292440414429, "learning_rate": 6.200769464412272e-05, "loss": 1.1664, "step": 7515 }, { "epoch": 2.281378054332979, "grad_norm": 0.8407115340232849, "learning_rate": 6.200263237825251e-05, "loss": 1.3996, "step": 7516 }, { "epoch": 2.281681590529671, "grad_norm": 0.8369561433792114, "learning_rate": 6.199757011238231e-05, "loss": 1.3342, "step": 7517 }, { "epoch": 2.281985126726362, "grad_norm": 0.6965354084968567, "learning_rate": 6.19925078465121e-05, "loss": 0.7063, "step": 7518 }, { "epoch": 2.2822886629230537, "grad_norm": 0.8770589232444763, "learning_rate": 6.19874455806419e-05, "loss": 1.28, "step": 7519 }, { "epoch": 2.282592199119745, "grad_norm": 0.821842610836029, "learning_rate": 6.198238331477169e-05, "loss": 1.1275, "step": 7520 }, { "epoch": 2.2828957353164365, "grad_norm": 0.828779935836792, "learning_rate": 6.197732104890149e-05, "loss": 1.3829, "step": 7521 }, { "epoch": 2.283199271513128, "grad_norm": 0.7814561128616333, "learning_rate": 6.19722587830313e-05, "loss": 1.5446, "step": 7522 }, { "epoch": 2.2835028077098194, "grad_norm": 0.7407017350196838, "learning_rate": 6.196719651716109e-05, "loss": 1.4936, "step": 7523 }, { "epoch": 2.2838063439065106, "grad_norm": 0.7612883448600769, "learning_rate": 6.196213425129088e-05, "loss": 1.4638, "step": 7524 }, { "epoch": 2.2841098801032023, "grad_norm": 0.6154638528823853, "learning_rate": 6.195707198542068e-05, "loss": 1.8089, "step": 7525 }, { "epoch": 2.284413416299894, "grad_norm": 0.9546413421630859, "learning_rate": 6.195200971955047e-05, "loss": 1.5108, "step": 7526 }, { "epoch": 2.284716952496585, "grad_norm": 0.7517131567001343, "learning_rate": 6.194694745368027e-05, "loss": 1.0738, "step": 7527 }, { "epoch": 2.285020488693277, "grad_norm": 0.7600359320640564, "learning_rate": 6.194188518781006e-05, "loss": 1.5688, "step": 7528 }, { "epoch": 2.285324024889968, "grad_norm": 0.7792675495147705, "learning_rate": 6.193682292193986e-05, "loss": 0.9124, "step": 7529 }, { "epoch": 2.2856275610866597, "grad_norm": 0.5790246725082397, "learning_rate": 6.193176065606965e-05, "loss": 1.0978, "step": 7530 }, { "epoch": 2.285931097283351, "grad_norm": 0.7425166368484497, "learning_rate": 6.192669839019946e-05, "loss": 1.1871, "step": 7531 }, { "epoch": 2.2862346334800425, "grad_norm": 0.6829063296318054, "learning_rate": 6.192163612432926e-05, "loss": 1.3564, "step": 7532 }, { "epoch": 2.286538169676734, "grad_norm": 0.8501928448677063, "learning_rate": 6.191657385845905e-05, "loss": 1.0968, "step": 7533 }, { "epoch": 2.2868417058734254, "grad_norm": 0.7157674431800842, "learning_rate": 6.191151159258885e-05, "loss": 1.4422, "step": 7534 }, { "epoch": 2.287145242070117, "grad_norm": 0.6410207152366638, "learning_rate": 6.190644932671864e-05, "loss": 1.8158, "step": 7535 }, { "epoch": 2.2874487782668083, "grad_norm": 0.7278507947921753, "learning_rate": 6.190138706084844e-05, "loss": 1.0099, "step": 7536 }, { "epoch": 2.2877523144635, "grad_norm": 0.6953244805335999, "learning_rate": 6.189632479497823e-05, "loss": 1.3812, "step": 7537 }, { "epoch": 2.288055850660191, "grad_norm": 0.7524649500846863, "learning_rate": 6.189126252910803e-05, "loss": 1.3067, "step": 7538 }, { "epoch": 2.288359386856883, "grad_norm": 0.8401016592979431, "learning_rate": 6.188620026323782e-05, "loss": 0.9991, "step": 7539 }, { "epoch": 2.288662923053574, "grad_norm": 0.8146904110908508, "learning_rate": 6.188113799736763e-05, "loss": 0.8624, "step": 7540 }, { "epoch": 2.2889664592502657, "grad_norm": 0.815436601638794, "learning_rate": 6.187607573149742e-05, "loss": 1.2165, "step": 7541 }, { "epoch": 2.289269995446957, "grad_norm": 0.817846953868866, "learning_rate": 6.187101346562722e-05, "loss": 1.546, "step": 7542 }, { "epoch": 2.2895735316436485, "grad_norm": 0.7438780665397644, "learning_rate": 6.186595119975701e-05, "loss": 1.14, "step": 7543 }, { "epoch": 2.28987706784034, "grad_norm": 0.7665032148361206, "learning_rate": 6.186088893388681e-05, "loss": 1.4702, "step": 7544 }, { "epoch": 2.2901806040370314, "grad_norm": 0.736453115940094, "learning_rate": 6.18558266680166e-05, "loss": 1.2116, "step": 7545 }, { "epoch": 2.290484140233723, "grad_norm": 0.8457592129707336, "learning_rate": 6.18507644021464e-05, "loss": 1.7704, "step": 7546 }, { "epoch": 2.2907876764304143, "grad_norm": 0.7538440823554993, "learning_rate": 6.184570213627619e-05, "loss": 1.2569, "step": 7547 }, { "epoch": 2.291091212627106, "grad_norm": 0.7179580926895142, "learning_rate": 6.184063987040599e-05, "loss": 1.3755, "step": 7548 }, { "epoch": 2.291394748823797, "grad_norm": 0.7420378923416138, "learning_rate": 6.183557760453578e-05, "loss": 1.4729, "step": 7549 }, { "epoch": 2.291698285020489, "grad_norm": 0.789522647857666, "learning_rate": 6.183051533866559e-05, "loss": 1.383, "step": 7550 }, { "epoch": 2.29200182121718, "grad_norm": 0.7265890836715698, "learning_rate": 6.182545307279538e-05, "loss": 1.5371, "step": 7551 }, { "epoch": 2.2923053574138716, "grad_norm": 0.7750800251960754, "learning_rate": 6.182039080692519e-05, "loss": 1.3417, "step": 7552 }, { "epoch": 2.292608893610563, "grad_norm": 0.8087584972381592, "learning_rate": 6.181532854105499e-05, "loss": 1.2937, "step": 7553 }, { "epoch": 2.2929124298072545, "grad_norm": 1.0158443450927734, "learning_rate": 6.181026627518478e-05, "loss": 1.2413, "step": 7554 }, { "epoch": 2.293215966003946, "grad_norm": 0.7395852208137512, "learning_rate": 6.180520400931458e-05, "loss": 1.3702, "step": 7555 }, { "epoch": 2.2935195022006374, "grad_norm": 0.8146117925643921, "learning_rate": 6.180014174344437e-05, "loss": 0.7595, "step": 7556 }, { "epoch": 2.293823038397329, "grad_norm": 0.8666897416114807, "learning_rate": 6.179507947757417e-05, "loss": 1.5115, "step": 7557 }, { "epoch": 2.2941265745940203, "grad_norm": 0.8771815299987793, "learning_rate": 6.179001721170396e-05, "loss": 1.4715, "step": 7558 }, { "epoch": 2.294430110790712, "grad_norm": 0.810228168964386, "learning_rate": 6.178495494583376e-05, "loss": 1.4295, "step": 7559 }, { "epoch": 2.294733646987403, "grad_norm": 0.6954602003097534, "learning_rate": 6.177989267996355e-05, "loss": 1.2752, "step": 7560 }, { "epoch": 2.2950371831840948, "grad_norm": 0.7767638564109802, "learning_rate": 6.177483041409336e-05, "loss": 1.2908, "step": 7561 }, { "epoch": 2.295340719380786, "grad_norm": 0.6568270921707153, "learning_rate": 6.176976814822315e-05, "loss": 0.9384, "step": 7562 }, { "epoch": 2.2956442555774776, "grad_norm": 0.7226028442382812, "learning_rate": 6.176470588235295e-05, "loss": 1.1192, "step": 7563 }, { "epoch": 2.295947791774169, "grad_norm": 0.685576319694519, "learning_rate": 6.175964361648274e-05, "loss": 1.1595, "step": 7564 }, { "epoch": 2.2962513279708605, "grad_norm": 0.4528180658817291, "learning_rate": 6.175458135061254e-05, "loss": 1.6221, "step": 7565 }, { "epoch": 2.296554864167552, "grad_norm": 0.6942249536514282, "learning_rate": 6.174951908474233e-05, "loss": 1.1044, "step": 7566 }, { "epoch": 2.2968584003642434, "grad_norm": 0.7307713031768799, "learning_rate": 6.174445681887213e-05, "loss": 1.3832, "step": 7567 }, { "epoch": 2.297161936560935, "grad_norm": 0.6575482487678528, "learning_rate": 6.173939455300192e-05, "loss": 1.4703, "step": 7568 }, { "epoch": 2.2974654727576262, "grad_norm": 0.8544819355010986, "learning_rate": 6.173433228713172e-05, "loss": 1.3402, "step": 7569 }, { "epoch": 2.297769008954318, "grad_norm": 0.7268744111061096, "learning_rate": 6.172927002126153e-05, "loss": 1.6343, "step": 7570 }, { "epoch": 2.298072545151009, "grad_norm": 0.6348064541816711, "learning_rate": 6.172420775539132e-05, "loss": 1.3316, "step": 7571 }, { "epoch": 2.2983760813477008, "grad_norm": 0.7735495567321777, "learning_rate": 6.171914548952112e-05, "loss": 1.4615, "step": 7572 }, { "epoch": 2.298679617544392, "grad_norm": 0.8103759288787842, "learning_rate": 6.171408322365091e-05, "loss": 1.5418, "step": 7573 }, { "epoch": 2.2989831537410836, "grad_norm": 0.7252805233001709, "learning_rate": 6.17090209577807e-05, "loss": 0.8511, "step": 7574 }, { "epoch": 2.299286689937775, "grad_norm": 0.8680673241615295, "learning_rate": 6.17039586919105e-05, "loss": 1.3099, "step": 7575 }, { "epoch": 2.2995902261344665, "grad_norm": 0.5763619542121887, "learning_rate": 6.16988964260403e-05, "loss": 1.7721, "step": 7576 }, { "epoch": 2.299893762331158, "grad_norm": 0.7439877986907959, "learning_rate": 6.169383416017009e-05, "loss": 1.1515, "step": 7577 }, { "epoch": 2.3001972985278494, "grad_norm": 0.7698675990104675, "learning_rate": 6.168877189429988e-05, "loss": 1.2372, "step": 7578 }, { "epoch": 2.300500834724541, "grad_norm": 0.7722331285476685, "learning_rate": 6.168370962842969e-05, "loss": 1.6233, "step": 7579 }, { "epoch": 2.3008043709212322, "grad_norm": 0.8871555924415588, "learning_rate": 6.167864736255949e-05, "loss": 1.4354, "step": 7580 }, { "epoch": 2.301107907117924, "grad_norm": 0.8712011575698853, "learning_rate": 6.167358509668928e-05, "loss": 1.1775, "step": 7581 }, { "epoch": 2.301411443314615, "grad_norm": 0.9085456728935242, "learning_rate": 6.166852283081908e-05, "loss": 1.4472, "step": 7582 }, { "epoch": 2.3017149795113068, "grad_norm": 0.8338646292686462, "learning_rate": 6.166346056494887e-05, "loss": 1.2795, "step": 7583 }, { "epoch": 2.3020185157079984, "grad_norm": 0.6676700711250305, "learning_rate": 6.165839829907867e-05, "loss": 1.4605, "step": 7584 }, { "epoch": 2.3023220519046896, "grad_norm": 0.8141628503799438, "learning_rate": 6.165333603320846e-05, "loss": 1.3307, "step": 7585 }, { "epoch": 2.302625588101381, "grad_norm": 0.6280063986778259, "learning_rate": 6.164827376733826e-05, "loss": 1.3559, "step": 7586 }, { "epoch": 2.3029291242980725, "grad_norm": 1.0572842359542847, "learning_rate": 6.164321150146805e-05, "loss": 0.9411, "step": 7587 }, { "epoch": 2.303232660494764, "grad_norm": 0.7565344572067261, "learning_rate": 6.163814923559785e-05, "loss": 1.1205, "step": 7588 }, { "epoch": 2.3035361966914554, "grad_norm": 0.9521320462226868, "learning_rate": 6.163308696972765e-05, "loss": 1.1303, "step": 7589 }, { "epoch": 2.303839732888147, "grad_norm": 0.8380699157714844, "learning_rate": 6.162802470385745e-05, "loss": 1.1181, "step": 7590 }, { "epoch": 2.3041432690848382, "grad_norm": 0.7872660756111145, "learning_rate": 6.162296243798724e-05, "loss": 1.3316, "step": 7591 }, { "epoch": 2.30444680528153, "grad_norm": 0.6858444213867188, "learning_rate": 6.161790017211704e-05, "loss": 1.2919, "step": 7592 }, { "epoch": 2.304750341478221, "grad_norm": 0.7087762951850891, "learning_rate": 6.161283790624683e-05, "loss": 1.4641, "step": 7593 }, { "epoch": 2.3050538776749128, "grad_norm": 0.7477747201919556, "learning_rate": 6.160777564037663e-05, "loss": 1.2525, "step": 7594 }, { "epoch": 2.3053574138716044, "grad_norm": 0.8030606508255005, "learning_rate": 6.160271337450642e-05, "loss": 1.4686, "step": 7595 }, { "epoch": 2.3056609500682956, "grad_norm": 0.7098719477653503, "learning_rate": 6.159765110863623e-05, "loss": 1.747, "step": 7596 }, { "epoch": 2.3059644862649873, "grad_norm": 0.7161492109298706, "learning_rate": 6.159258884276603e-05, "loss": 1.5472, "step": 7597 }, { "epoch": 2.3062680224616785, "grad_norm": 0.6479907631874084, "learning_rate": 6.158752657689582e-05, "loss": 1.3357, "step": 7598 }, { "epoch": 2.30657155865837, "grad_norm": 0.97112637758255, "learning_rate": 6.158246431102562e-05, "loss": 1.2842, "step": 7599 }, { "epoch": 2.3068750948550614, "grad_norm": 0.8729383945465088, "learning_rate": 6.157740204515542e-05, "loss": 1.2892, "step": 7600 }, { "epoch": 2.307178631051753, "grad_norm": 0.7437787652015686, "learning_rate": 6.157233977928522e-05, "loss": 1.4824, "step": 7601 }, { "epoch": 2.3074821672484442, "grad_norm": 0.7232855558395386, "learning_rate": 6.156727751341501e-05, "loss": 0.8485, "step": 7602 }, { "epoch": 2.307785703445136, "grad_norm": 0.800493061542511, "learning_rate": 6.156221524754481e-05, "loss": 1.572, "step": 7603 }, { "epoch": 2.308089239641827, "grad_norm": 0.9014605283737183, "learning_rate": 6.15571529816746e-05, "loss": 1.2663, "step": 7604 }, { "epoch": 2.3083927758385188, "grad_norm": 1.083298921585083, "learning_rate": 6.15520907158044e-05, "loss": 1.5554, "step": 7605 }, { "epoch": 2.3086963120352104, "grad_norm": 0.695650041103363, "learning_rate": 6.15470284499342e-05, "loss": 1.4479, "step": 7606 }, { "epoch": 2.3089998482319016, "grad_norm": 0.7614145874977112, "learning_rate": 6.154196618406399e-05, "loss": 1.31, "step": 7607 }, { "epoch": 2.3093033844285933, "grad_norm": 0.8237013220787048, "learning_rate": 6.153690391819378e-05, "loss": 1.0565, "step": 7608 }, { "epoch": 2.3096069206252845, "grad_norm": 0.8441504240036011, "learning_rate": 6.153184165232359e-05, "loss": 1.3506, "step": 7609 }, { "epoch": 2.309910456821976, "grad_norm": 0.8419016599655151, "learning_rate": 6.152677938645339e-05, "loss": 1.3949, "step": 7610 }, { "epoch": 2.3102139930186674, "grad_norm": 0.7505916953086853, "learning_rate": 6.152171712058318e-05, "loss": 1.07, "step": 7611 }, { "epoch": 2.310517529215359, "grad_norm": 0.7543771266937256, "learning_rate": 6.151665485471298e-05, "loss": 1.6368, "step": 7612 }, { "epoch": 2.3108210654120502, "grad_norm": 0.7415176033973694, "learning_rate": 6.151159258884277e-05, "loss": 1.3673, "step": 7613 }, { "epoch": 2.311124601608742, "grad_norm": 0.7070369720458984, "learning_rate": 6.150653032297257e-05, "loss": 1.4192, "step": 7614 }, { "epoch": 2.311428137805433, "grad_norm": 0.7339876294136047, "learning_rate": 6.150146805710236e-05, "loss": 1.4539, "step": 7615 }, { "epoch": 2.3117316740021248, "grad_norm": 0.8350193500518799, "learning_rate": 6.149640579123215e-05, "loss": 1.3213, "step": 7616 }, { "epoch": 2.3120352101988164, "grad_norm": 0.894660234451294, "learning_rate": 6.149134352536195e-05, "loss": 0.9378, "step": 7617 }, { "epoch": 2.3123387463955076, "grad_norm": 0.7360272407531738, "learning_rate": 6.148628125949176e-05, "loss": 1.4664, "step": 7618 }, { "epoch": 2.3126422825921993, "grad_norm": 0.735359787940979, "learning_rate": 6.148121899362155e-05, "loss": 1.548, "step": 7619 }, { "epoch": 2.3129458187888905, "grad_norm": 0.6220079064369202, "learning_rate": 6.147615672775135e-05, "loss": 1.5925, "step": 7620 }, { "epoch": 2.313249354985582, "grad_norm": 0.6058359146118164, "learning_rate": 6.147109446188114e-05, "loss": 1.094, "step": 7621 }, { "epoch": 2.3135528911822734, "grad_norm": 0.7334878444671631, "learning_rate": 6.146603219601094e-05, "loss": 1.3287, "step": 7622 }, { "epoch": 2.313856427378965, "grad_norm": 0.7956668138504028, "learning_rate": 6.146096993014073e-05, "loss": 1.3067, "step": 7623 }, { "epoch": 2.3141599635756562, "grad_norm": 0.8465598821640015, "learning_rate": 6.145590766427053e-05, "loss": 1.3953, "step": 7624 }, { "epoch": 2.314463499772348, "grad_norm": 0.936220645904541, "learning_rate": 6.145084539840032e-05, "loss": 1.4161, "step": 7625 }, { "epoch": 2.314767035969039, "grad_norm": 0.5641535520553589, "learning_rate": 6.144578313253012e-05, "loss": 1.1868, "step": 7626 }, { "epoch": 2.3150705721657308, "grad_norm": 0.7781814932823181, "learning_rate": 6.144072086665991e-05, "loss": 1.3219, "step": 7627 }, { "epoch": 2.3153741083624224, "grad_norm": 0.7027815580368042, "learning_rate": 6.143565860078972e-05, "loss": 1.4012, "step": 7628 }, { "epoch": 2.3156776445591136, "grad_norm": 0.8026196956634521, "learning_rate": 6.143059633491951e-05, "loss": 1.221, "step": 7629 }, { "epoch": 2.3159811807558053, "grad_norm": 0.8572709560394287, "learning_rate": 6.142553406904931e-05, "loss": 1.0766, "step": 7630 }, { "epoch": 2.3162847169524965, "grad_norm": 0.6600764393806458, "learning_rate": 6.14204718031791e-05, "loss": 1.7696, "step": 7631 }, { "epoch": 2.316588253149188, "grad_norm": 0.80988609790802, "learning_rate": 6.14154095373089e-05, "loss": 1.3428, "step": 7632 }, { "epoch": 2.3168917893458794, "grad_norm": 0.6442969441413879, "learning_rate": 6.14103472714387e-05, "loss": 1.6526, "step": 7633 }, { "epoch": 2.317195325542571, "grad_norm": 0.7561648488044739, "learning_rate": 6.140528500556849e-05, "loss": 1.661, "step": 7634 }, { "epoch": 2.3174988617392622, "grad_norm": 0.8251952528953552, "learning_rate": 6.140022273969828e-05, "loss": 1.5765, "step": 7635 }, { "epoch": 2.317802397935954, "grad_norm": 0.7034004330635071, "learning_rate": 6.139516047382808e-05, "loss": 0.896, "step": 7636 }, { "epoch": 2.318105934132645, "grad_norm": 0.713154137134552, "learning_rate": 6.139009820795789e-05, "loss": 1.534, "step": 7637 }, { "epoch": 2.3184094703293368, "grad_norm": 0.8786371350288391, "learning_rate": 6.138503594208768e-05, "loss": 1.3748, "step": 7638 }, { "epoch": 2.3187130065260284, "grad_norm": 0.6298970580101013, "learning_rate": 6.137997367621748e-05, "loss": 1.0846, "step": 7639 }, { "epoch": 2.3190165427227196, "grad_norm": 0.732848048210144, "learning_rate": 6.137491141034727e-05, "loss": 1.1163, "step": 7640 }, { "epoch": 2.3193200789194113, "grad_norm": 0.6818196177482605, "learning_rate": 6.136984914447708e-05, "loss": 1.7087, "step": 7641 }, { "epoch": 2.3196236151161025, "grad_norm": 0.628974974155426, "learning_rate": 6.136478687860687e-05, "loss": 0.9459, "step": 7642 }, { "epoch": 2.319927151312794, "grad_norm": 0.7217102646827698, "learning_rate": 6.135972461273667e-05, "loss": 1.0762, "step": 7643 }, { "epoch": 2.3202306875094854, "grad_norm": 0.7135050892829895, "learning_rate": 6.135466234686646e-05, "loss": 1.3588, "step": 7644 }, { "epoch": 2.320534223706177, "grad_norm": 0.7265660166740417, "learning_rate": 6.134960008099626e-05, "loss": 1.601, "step": 7645 }, { "epoch": 2.3208377599028682, "grad_norm": 0.7401967644691467, "learning_rate": 6.134453781512605e-05, "loss": 1.2171, "step": 7646 }, { "epoch": 2.32114129609956, "grad_norm": 0.7953644394874573, "learning_rate": 6.133947554925585e-05, "loss": 1.5427, "step": 7647 }, { "epoch": 2.321444832296251, "grad_norm": 0.86178058385849, "learning_rate": 6.133441328338566e-05, "loss": 0.9117, "step": 7648 }, { "epoch": 2.3217483684929427, "grad_norm": 0.835649847984314, "learning_rate": 6.132935101751545e-05, "loss": 1.1636, "step": 7649 }, { "epoch": 2.3220519046896344, "grad_norm": 0.866837739944458, "learning_rate": 6.132428875164525e-05, "loss": 1.4649, "step": 7650 }, { "epoch": 2.3223554408863256, "grad_norm": 0.8551737070083618, "learning_rate": 6.131922648577504e-05, "loss": 1.4207, "step": 7651 }, { "epoch": 2.3226589770830173, "grad_norm": 0.7347872257232666, "learning_rate": 6.131416421990484e-05, "loss": 1.118, "step": 7652 }, { "epoch": 2.3229625132797085, "grad_norm": 0.682930052280426, "learning_rate": 6.130910195403463e-05, "loss": 1.6991, "step": 7653 }, { "epoch": 2.3232660494764, "grad_norm": 0.8257922530174255, "learning_rate": 6.130403968816442e-05, "loss": 0.885, "step": 7654 }, { "epoch": 2.3235695856730914, "grad_norm": 0.8472979068756104, "learning_rate": 6.129897742229422e-05, "loss": 0.9182, "step": 7655 }, { "epoch": 2.323873121869783, "grad_norm": 0.8732231259346008, "learning_rate": 6.129391515642401e-05, "loss": 1.2871, "step": 7656 }, { "epoch": 2.3241766580664747, "grad_norm": 0.7592588663101196, "learning_rate": 6.128885289055382e-05, "loss": 1.3911, "step": 7657 }, { "epoch": 2.324480194263166, "grad_norm": 0.6937957406044006, "learning_rate": 6.128379062468362e-05, "loss": 0.6854, "step": 7658 }, { "epoch": 2.324783730459857, "grad_norm": 0.5173711776733398, "learning_rate": 6.127872835881341e-05, "loss": 1.2109, "step": 7659 }, { "epoch": 2.3250872666565487, "grad_norm": 0.711747407913208, "learning_rate": 6.127366609294321e-05, "loss": 1.1013, "step": 7660 }, { "epoch": 2.3253908028532404, "grad_norm": 0.7678746581077576, "learning_rate": 6.1268603827073e-05, "loss": 1.7032, "step": 7661 }, { "epoch": 2.3256943390499316, "grad_norm": 0.8963012099266052, "learning_rate": 6.12635415612028e-05, "loss": 1.585, "step": 7662 }, { "epoch": 2.3259978752466233, "grad_norm": 0.730894148349762, "learning_rate": 6.125847929533259e-05, "loss": 0.9299, "step": 7663 }, { "epoch": 2.3263014114433145, "grad_norm": 0.7582551836967468, "learning_rate": 6.125341702946239e-05, "loss": 1.0785, "step": 7664 }, { "epoch": 2.326604947640006, "grad_norm": 0.7867629528045654, "learning_rate": 6.124835476359218e-05, "loss": 1.2304, "step": 7665 }, { "epoch": 2.3269084838366974, "grad_norm": 0.9067895412445068, "learning_rate": 6.124329249772198e-05, "loss": 1.0494, "step": 7666 }, { "epoch": 2.327212020033389, "grad_norm": 0.5455618500709534, "learning_rate": 6.123823023185178e-05, "loss": 0.5172, "step": 7667 }, { "epoch": 2.3275155562300807, "grad_norm": 0.7320606112480164, "learning_rate": 6.123316796598158e-05, "loss": 1.1368, "step": 7668 }, { "epoch": 2.327819092426772, "grad_norm": 0.8261992335319519, "learning_rate": 6.122810570011137e-05, "loss": 1.4457, "step": 7669 }, { "epoch": 2.3281226286234635, "grad_norm": 0.7958891987800598, "learning_rate": 6.122304343424117e-05, "loss": 0.7067, "step": 7670 }, { "epoch": 2.3284261648201547, "grad_norm": 0.8207949995994568, "learning_rate": 6.121798116837096e-05, "loss": 1.1606, "step": 7671 }, { "epoch": 2.3287297010168464, "grad_norm": 0.8190235495567322, "learning_rate": 6.121291890250076e-05, "loss": 1.3621, "step": 7672 }, { "epoch": 2.3290332372135376, "grad_norm": 0.7936198711395264, "learning_rate": 6.120785663663055e-05, "loss": 1.4129, "step": 7673 }, { "epoch": 2.3293367734102293, "grad_norm": 0.7021920084953308, "learning_rate": 6.120279437076035e-05, "loss": 1.4672, "step": 7674 }, { "epoch": 2.3296403096069205, "grad_norm": 0.8473578691482544, "learning_rate": 6.119773210489014e-05, "loss": 1.5384, "step": 7675 }, { "epoch": 2.329943845803612, "grad_norm": 0.7380422949790955, "learning_rate": 6.119266983901995e-05, "loss": 1.4071, "step": 7676 }, { "epoch": 2.3302473820003033, "grad_norm": 0.6773237586021423, "learning_rate": 6.118760757314975e-05, "loss": 1.0423, "step": 7677 }, { "epoch": 2.330550918196995, "grad_norm": 0.8463999032974243, "learning_rate": 6.118254530727954e-05, "loss": 1.1341, "step": 7678 }, { "epoch": 2.3308544543936867, "grad_norm": 0.8593174815177917, "learning_rate": 6.117748304140934e-05, "loss": 1.7154, "step": 7679 }, { "epoch": 2.331157990590378, "grad_norm": 0.7568472623825073, "learning_rate": 6.117242077553913e-05, "loss": 0.9461, "step": 7680 }, { "epoch": 2.3314615267870695, "grad_norm": 0.7861149907112122, "learning_rate": 6.116735850966892e-05, "loss": 1.3012, "step": 7681 }, { "epoch": 2.3317650629837607, "grad_norm": 0.7344647645950317, "learning_rate": 6.116229624379872e-05, "loss": 1.5367, "step": 7682 }, { "epoch": 2.3320685991804524, "grad_norm": 0.6908876895904541, "learning_rate": 6.115723397792851e-05, "loss": 0.8515, "step": 7683 }, { "epoch": 2.3323721353771436, "grad_norm": 0.7117886543273926, "learning_rate": 6.115217171205831e-05, "loss": 0.9673, "step": 7684 }, { "epoch": 2.3326756715738353, "grad_norm": 0.5932457447052002, "learning_rate": 6.114710944618812e-05, "loss": 1.1811, "step": 7685 }, { "epoch": 2.3329792077705265, "grad_norm": 0.6621536612510681, "learning_rate": 6.114204718031791e-05, "loss": 1.4441, "step": 7686 }, { "epoch": 2.333282743967218, "grad_norm": 0.7571014761924744, "learning_rate": 6.113698491444772e-05, "loss": 1.541, "step": 7687 }, { "epoch": 2.3335862801639093, "grad_norm": 0.8175387978553772, "learning_rate": 6.113192264857752e-05, "loss": 1.1661, "step": 7688 }, { "epoch": 2.333889816360601, "grad_norm": 0.7066230773925781, "learning_rate": 6.112686038270731e-05, "loss": 1.4773, "step": 7689 }, { "epoch": 2.3341933525572927, "grad_norm": 0.9288036227226257, "learning_rate": 6.11217981168371e-05, "loss": 1.276, "step": 7690 }, { "epoch": 2.334496888753984, "grad_norm": 0.845954179763794, "learning_rate": 6.11167358509669e-05, "loss": 1.4162, "step": 7691 }, { "epoch": 2.3348004249506755, "grad_norm": 0.9958683252334595, "learning_rate": 6.11116735850967e-05, "loss": 1.3755, "step": 7692 }, { "epoch": 2.3351039611473667, "grad_norm": 0.687445878982544, "learning_rate": 6.110661131922649e-05, "loss": 1.3896, "step": 7693 }, { "epoch": 2.3354074973440584, "grad_norm": 0.651056170463562, "learning_rate": 6.110154905335628e-05, "loss": 1.793, "step": 7694 }, { "epoch": 2.3357110335407496, "grad_norm": 0.7116444110870361, "learning_rate": 6.109648678748608e-05, "loss": 1.0677, "step": 7695 }, { "epoch": 2.3360145697374413, "grad_norm": 0.7705413103103638, "learning_rate": 6.109142452161589e-05, "loss": 1.2177, "step": 7696 }, { "epoch": 2.3363181059341325, "grad_norm": 0.7890743017196655, "learning_rate": 6.108636225574568e-05, "loss": 1.2361, "step": 7697 }, { "epoch": 2.336621642130824, "grad_norm": 0.6681506633758545, "learning_rate": 6.108129998987548e-05, "loss": 1.5302, "step": 7698 }, { "epoch": 2.3369251783275153, "grad_norm": 0.9150996804237366, "learning_rate": 6.107623772400527e-05, "loss": 1.3499, "step": 7699 }, { "epoch": 2.337228714524207, "grad_norm": 0.9253705739974976, "learning_rate": 6.107117545813507e-05, "loss": 0.9038, "step": 7700 }, { "epoch": 2.3375322507208987, "grad_norm": 1.0936262607574463, "learning_rate": 6.106611319226486e-05, "loss": 0.917, "step": 7701 }, { "epoch": 2.33783578691759, "grad_norm": 0.8515232801437378, "learning_rate": 6.106105092639466e-05, "loss": 1.1466, "step": 7702 }, { "epoch": 2.3381393231142815, "grad_norm": 0.8388434052467346, "learning_rate": 6.105598866052445e-05, "loss": 1.657, "step": 7703 }, { "epoch": 2.3384428593109727, "grad_norm": 0.8349151015281677, "learning_rate": 6.105092639465425e-05, "loss": 0.7792, "step": 7704 }, { "epoch": 2.3387463955076644, "grad_norm": 0.7993485331535339, "learning_rate": 6.104586412878404e-05, "loss": 1.4357, "step": 7705 }, { "epoch": 2.3390499317043556, "grad_norm": 0.6768009662628174, "learning_rate": 6.104080186291385e-05, "loss": 1.7998, "step": 7706 }, { "epoch": 2.3393534679010473, "grad_norm": 0.9533233046531677, "learning_rate": 6.103573959704364e-05, "loss": 0.5943, "step": 7707 }, { "epoch": 2.3396570040977385, "grad_norm": 0.787339448928833, "learning_rate": 6.103067733117344e-05, "loss": 1.171, "step": 7708 }, { "epoch": 2.33996054029443, "grad_norm": 0.6218050122261047, "learning_rate": 6.102561506530323e-05, "loss": 1.3632, "step": 7709 }, { "epoch": 2.3402640764911213, "grad_norm": 0.7959072589874268, "learning_rate": 6.102055279943303e-05, "loss": 1.4095, "step": 7710 }, { "epoch": 2.340567612687813, "grad_norm": 0.6838813424110413, "learning_rate": 6.101549053356282e-05, "loss": 1.4838, "step": 7711 }, { "epoch": 2.3408711488845046, "grad_norm": 0.5955168604850769, "learning_rate": 6.101042826769262e-05, "loss": 1.0594, "step": 7712 }, { "epoch": 2.341174685081196, "grad_norm": 0.7063366174697876, "learning_rate": 6.100536600182242e-05, "loss": 1.2897, "step": 7713 }, { "epoch": 2.3414782212778875, "grad_norm": 0.707691490650177, "learning_rate": 6.1000303735952214e-05, "loss": 1.3633, "step": 7714 }, { "epoch": 2.3417817574745787, "grad_norm": 0.8361237645149231, "learning_rate": 6.099524147008201e-05, "loss": 1.0799, "step": 7715 }, { "epoch": 2.3420852936712704, "grad_norm": 0.8068976402282715, "learning_rate": 6.0990179204211804e-05, "loss": 1.0683, "step": 7716 }, { "epoch": 2.3423888298679616, "grad_norm": 0.6763870716094971, "learning_rate": 6.0985116938341605e-05, "loss": 1.7407, "step": 7717 }, { "epoch": 2.3426923660646533, "grad_norm": 0.9038770198822021, "learning_rate": 6.09800546724714e-05, "loss": 1.0923, "step": 7718 }, { "epoch": 2.342995902261345, "grad_norm": 0.7028255462646484, "learning_rate": 6.0974992406601195e-05, "loss": 1.1877, "step": 7719 }, { "epoch": 2.343299438458036, "grad_norm": 0.8663949966430664, "learning_rate": 6.096993014073099e-05, "loss": 1.6386, "step": 7720 }, { "epoch": 2.3436029746547273, "grad_norm": 1.0049313306808472, "learning_rate": 6.0964867874860784e-05, "loss": 1.1968, "step": 7721 }, { "epoch": 2.343906510851419, "grad_norm": 0.9146410226821899, "learning_rate": 6.0959805608990586e-05, "loss": 1.3352, "step": 7722 }, { "epoch": 2.3442100470481106, "grad_norm": 0.913068413734436, "learning_rate": 6.095474334312038e-05, "loss": 1.2722, "step": 7723 }, { "epoch": 2.344513583244802, "grad_norm": 0.7167338132858276, "learning_rate": 6.0949681077250176e-05, "loss": 1.0658, "step": 7724 }, { "epoch": 2.3448171194414935, "grad_norm": 0.557360827922821, "learning_rate": 6.094461881137997e-05, "loss": 1.1557, "step": 7725 }, { "epoch": 2.3451206556381847, "grad_norm": 0.6517195701599121, "learning_rate": 6.0939556545509765e-05, "loss": 1.5971, "step": 7726 }, { "epoch": 2.3454241918348764, "grad_norm": 0.8067708611488342, "learning_rate": 6.093449427963957e-05, "loss": 0.9163, "step": 7727 }, { "epoch": 2.3457277280315676, "grad_norm": 0.7419809699058533, "learning_rate": 6.092943201376936e-05, "loss": 1.6252, "step": 7728 }, { "epoch": 2.3460312642282592, "grad_norm": 0.6952763795852661, "learning_rate": 6.0924369747899156e-05, "loss": 1.4066, "step": 7729 }, { "epoch": 2.346334800424951, "grad_norm": 0.9591590166091919, "learning_rate": 6.0919307482028965e-05, "loss": 1.0623, "step": 7730 }, { "epoch": 2.346638336621642, "grad_norm": 0.563372790813446, "learning_rate": 6.091424521615876e-05, "loss": 1.1935, "step": 7731 }, { "epoch": 2.3469418728183338, "grad_norm": 0.6305344104766846, "learning_rate": 6.0909182950288554e-05, "loss": 1.5607, "step": 7732 }, { "epoch": 2.347245409015025, "grad_norm": 0.6717035174369812, "learning_rate": 6.0904120684418356e-05, "loss": 1.2191, "step": 7733 }, { "epoch": 2.3475489452117166, "grad_norm": 0.7762973308563232, "learning_rate": 6.089905841854815e-05, "loss": 1.4233, "step": 7734 }, { "epoch": 2.347852481408408, "grad_norm": 0.8079813122749329, "learning_rate": 6.0893996152677945e-05, "loss": 1.3683, "step": 7735 }, { "epoch": 2.3481560176050995, "grad_norm": 0.5496336221694946, "learning_rate": 6.088893388680774e-05, "loss": 0.9201, "step": 7736 }, { "epoch": 2.3484595538017907, "grad_norm": 0.863309919834137, "learning_rate": 6.0883871620937535e-05, "loss": 0.6286, "step": 7737 }, { "epoch": 2.3487630899984824, "grad_norm": 0.8281985521316528, "learning_rate": 6.0878809355067337e-05, "loss": 1.04, "step": 7738 }, { "epoch": 2.3490666261951736, "grad_norm": 0.7266848087310791, "learning_rate": 6.087374708919713e-05, "loss": 1.3901, "step": 7739 }, { "epoch": 2.3493701623918652, "grad_norm": 0.9094659686088562, "learning_rate": 6.0868684823326926e-05, "loss": 1.1477, "step": 7740 }, { "epoch": 2.349673698588557, "grad_norm": 0.7695726156234741, "learning_rate": 6.086362255745672e-05, "loss": 1.4168, "step": 7741 }, { "epoch": 2.349977234785248, "grad_norm": 0.8188008666038513, "learning_rate": 6.085856029158652e-05, "loss": 1.2717, "step": 7742 }, { "epoch": 2.3502807709819398, "grad_norm": 0.8061943650245667, "learning_rate": 6.085349802571632e-05, "loss": 1.5625, "step": 7743 }, { "epoch": 2.350584307178631, "grad_norm": 0.7862029671669006, "learning_rate": 6.084843575984611e-05, "loss": 1.5496, "step": 7744 }, { "epoch": 2.3508878433753226, "grad_norm": 0.6617637872695923, "learning_rate": 6.084337349397591e-05, "loss": 1.633, "step": 7745 }, { "epoch": 2.351191379572014, "grad_norm": 0.9673377871513367, "learning_rate": 6.08383112281057e-05, "loss": 0.8355, "step": 7746 }, { "epoch": 2.3514949157687055, "grad_norm": 0.7346013188362122, "learning_rate": 6.08332489622355e-05, "loss": 1.3608, "step": 7747 }, { "epoch": 2.3517984519653967, "grad_norm": 0.8306134343147278, "learning_rate": 6.08281866963653e-05, "loss": 1.3994, "step": 7748 }, { "epoch": 2.3521019881620884, "grad_norm": 0.7612175941467285, "learning_rate": 6.082312443049509e-05, "loss": 1.0089, "step": 7749 }, { "epoch": 2.3524055243587796, "grad_norm": 0.8626055121421814, "learning_rate": 6.081806216462489e-05, "loss": 1.2201, "step": 7750 }, { "epoch": 2.3527090605554712, "grad_norm": 0.613272488117218, "learning_rate": 6.081299989875468e-05, "loss": 1.0982, "step": 7751 }, { "epoch": 2.353012596752163, "grad_norm": 0.6248640418052673, "learning_rate": 6.0807937632884484e-05, "loss": 1.4992, "step": 7752 }, { "epoch": 2.353316132948854, "grad_norm": 0.7593119740486145, "learning_rate": 6.080287536701428e-05, "loss": 1.3339, "step": 7753 }, { "epoch": 2.3536196691455458, "grad_norm": 0.6562939286231995, "learning_rate": 6.0797813101144074e-05, "loss": 1.4767, "step": 7754 }, { "epoch": 2.353923205342237, "grad_norm": 0.6290830969810486, "learning_rate": 6.079275083527387e-05, "loss": 1.6008, "step": 7755 }, { "epoch": 2.3542267415389286, "grad_norm": 0.6489423513412476, "learning_rate": 6.078768856940367e-05, "loss": 1.5682, "step": 7756 }, { "epoch": 2.35453027773562, "grad_norm": 0.8090351819992065, "learning_rate": 6.0782626303533465e-05, "loss": 1.2493, "step": 7757 }, { "epoch": 2.3548338139323115, "grad_norm": 0.7439088821411133, "learning_rate": 6.077756403766326e-05, "loss": 1.0664, "step": 7758 }, { "epoch": 2.3551373501290027, "grad_norm": 0.8158544898033142, "learning_rate": 6.0772501771793054e-05, "loss": 0.9277, "step": 7759 }, { "epoch": 2.3554408863256944, "grad_norm": 0.6496466398239136, "learning_rate": 6.076743950592285e-05, "loss": 1.5302, "step": 7760 }, { "epoch": 2.3557444225223856, "grad_norm": 0.7689223885536194, "learning_rate": 6.076237724005265e-05, "loss": 1.0756, "step": 7761 }, { "epoch": 2.3560479587190772, "grad_norm": 0.8746340274810791, "learning_rate": 6.0757314974182445e-05, "loss": 1.1543, "step": 7762 }, { "epoch": 2.356351494915769, "grad_norm": 0.6918237209320068, "learning_rate": 6.075225270831224e-05, "loss": 1.3064, "step": 7763 }, { "epoch": 2.35665503111246, "grad_norm": 0.6862085461616516, "learning_rate": 6.0747190442442035e-05, "loss": 1.6043, "step": 7764 }, { "epoch": 2.3569585673091518, "grad_norm": 0.8114455342292786, "learning_rate": 6.074212817657183e-05, "loss": 1.5732, "step": 7765 }, { "epoch": 2.357262103505843, "grad_norm": 1.0163137912750244, "learning_rate": 6.073706591070163e-05, "loss": 1.098, "step": 7766 }, { "epoch": 2.3575656397025346, "grad_norm": 0.6978328824043274, "learning_rate": 6.0732003644831426e-05, "loss": 1.2409, "step": 7767 }, { "epoch": 2.357869175899226, "grad_norm": 0.7484824061393738, "learning_rate": 6.072694137896122e-05, "loss": 1.3748, "step": 7768 }, { "epoch": 2.3581727120959175, "grad_norm": 0.8369539380073547, "learning_rate": 6.0721879113091016e-05, "loss": 1.4558, "step": 7769 }, { "epoch": 2.3584762482926087, "grad_norm": 0.7077987194061279, "learning_rate": 6.071681684722082e-05, "loss": 1.0913, "step": 7770 }, { "epoch": 2.3587797844893004, "grad_norm": 0.8944743275642395, "learning_rate": 6.071175458135061e-05, "loss": 1.2509, "step": 7771 }, { "epoch": 2.3590833206859916, "grad_norm": 0.5822862982749939, "learning_rate": 6.070669231548041e-05, "loss": 0.7585, "step": 7772 }, { "epoch": 2.3593868568826832, "grad_norm": 0.7083290815353394, "learning_rate": 6.07016300496102e-05, "loss": 1.6672, "step": 7773 }, { "epoch": 2.359690393079375, "grad_norm": 0.7272571921348572, "learning_rate": 6.069656778374001e-05, "loss": 1.4634, "step": 7774 }, { "epoch": 2.359993929276066, "grad_norm": 0.7781559824943542, "learning_rate": 6.0691505517869805e-05, "loss": 0.7688, "step": 7775 }, { "epoch": 2.3602974654727578, "grad_norm": 0.7398669719696045, "learning_rate": 6.06864432519996e-05, "loss": 1.5047, "step": 7776 }, { "epoch": 2.360601001669449, "grad_norm": 0.5970739722251892, "learning_rate": 6.06813809861294e-05, "loss": 1.6261, "step": 7777 }, { "epoch": 2.3609045378661406, "grad_norm": 0.8803917169570923, "learning_rate": 6.0676318720259196e-05, "loss": 1.3531, "step": 7778 }, { "epoch": 2.361208074062832, "grad_norm": 0.6425435543060303, "learning_rate": 6.067125645438899e-05, "loss": 1.2065, "step": 7779 }, { "epoch": 2.3615116102595235, "grad_norm": 0.6474995017051697, "learning_rate": 6.0666194188518786e-05, "loss": 1.7477, "step": 7780 }, { "epoch": 2.361815146456215, "grad_norm": 0.8857691884040833, "learning_rate": 6.066113192264859e-05, "loss": 1.5678, "step": 7781 }, { "epoch": 2.3621186826529064, "grad_norm": 1.0378564596176147, "learning_rate": 6.065606965677838e-05, "loss": 1.3457, "step": 7782 }, { "epoch": 2.3624222188495976, "grad_norm": 0.6623472571372986, "learning_rate": 6.065100739090818e-05, "loss": 1.3708, "step": 7783 }, { "epoch": 2.3627257550462892, "grad_norm": 0.7117280960083008, "learning_rate": 6.064594512503797e-05, "loss": 1.5782, "step": 7784 }, { "epoch": 2.363029291242981, "grad_norm": 0.6938657164573669, "learning_rate": 6.0640882859167766e-05, "loss": 0.8125, "step": 7785 }, { "epoch": 2.363332827439672, "grad_norm": 0.659076988697052, "learning_rate": 6.063582059329757e-05, "loss": 1.483, "step": 7786 }, { "epoch": 2.3636363636363638, "grad_norm": 0.7740662693977356, "learning_rate": 6.063075832742736e-05, "loss": 1.4259, "step": 7787 }, { "epoch": 2.363939899833055, "grad_norm": 0.7431475520133972, "learning_rate": 6.062569606155716e-05, "loss": 1.6874, "step": 7788 }, { "epoch": 2.3642434360297466, "grad_norm": 0.8177775740623474, "learning_rate": 6.062063379568695e-05, "loss": 1.2511, "step": 7789 }, { "epoch": 2.364546972226438, "grad_norm": 0.7437436580657959, "learning_rate": 6.061557152981675e-05, "loss": 1.3728, "step": 7790 }, { "epoch": 2.3648505084231295, "grad_norm": 0.7326698899269104, "learning_rate": 6.061050926394655e-05, "loss": 1.3606, "step": 7791 }, { "epoch": 2.365154044619821, "grad_norm": 0.7051557302474976, "learning_rate": 6.0605446998076344e-05, "loss": 1.7839, "step": 7792 }, { "epoch": 2.3654575808165124, "grad_norm": 0.9070965051651001, "learning_rate": 6.060038473220614e-05, "loss": 1.1911, "step": 7793 }, { "epoch": 2.365761117013204, "grad_norm": 0.879751443862915, "learning_rate": 6.059532246633593e-05, "loss": 1.2265, "step": 7794 }, { "epoch": 2.3660646532098952, "grad_norm": 0.7137269973754883, "learning_rate": 6.0590260200465735e-05, "loss": 1.1799, "step": 7795 }, { "epoch": 2.366368189406587, "grad_norm": 0.9179060459136963, "learning_rate": 6.058519793459553e-05, "loss": 1.3917, "step": 7796 }, { "epoch": 2.366671725603278, "grad_norm": 0.8052315711975098, "learning_rate": 6.0580135668725324e-05, "loss": 0.8737, "step": 7797 }, { "epoch": 2.3669752617999698, "grad_norm": 0.7714252471923828, "learning_rate": 6.057507340285512e-05, "loss": 1.3054, "step": 7798 }, { "epoch": 2.367278797996661, "grad_norm": 0.8135915994644165, "learning_rate": 6.0570011136984914e-05, "loss": 1.344, "step": 7799 }, { "epoch": 2.3675823341933526, "grad_norm": 0.7176734209060669, "learning_rate": 6.0564948871114715e-05, "loss": 1.1194, "step": 7800 }, { "epoch": 2.367885870390044, "grad_norm": 0.7938108444213867, "learning_rate": 6.055988660524451e-05, "loss": 1.0659, "step": 7801 }, { "epoch": 2.3681894065867355, "grad_norm": 0.6610084176063538, "learning_rate": 6.0554824339374305e-05, "loss": 1.6436, "step": 7802 }, { "epoch": 2.368492942783427, "grad_norm": 0.818847119808197, "learning_rate": 6.05497620735041e-05, "loss": 1.1547, "step": 7803 }, { "epoch": 2.3687964789801184, "grad_norm": 1.013972282409668, "learning_rate": 6.0544699807633895e-05, "loss": 1.2886, "step": 7804 }, { "epoch": 2.36910001517681, "grad_norm": 0.7468999028205872, "learning_rate": 6.0539637541763696e-05, "loss": 1.0146, "step": 7805 }, { "epoch": 2.3694035513735012, "grad_norm": 0.692268967628479, "learning_rate": 6.053457527589349e-05, "loss": 1.4758, "step": 7806 }, { "epoch": 2.369707087570193, "grad_norm": 0.6096368432044983, "learning_rate": 6.0529513010023286e-05, "loss": 1.2638, "step": 7807 }, { "epoch": 2.370010623766884, "grad_norm": 0.7329953908920288, "learning_rate": 6.052445074415308e-05, "loss": 1.4867, "step": 7808 }, { "epoch": 2.3703141599635758, "grad_norm": 0.7470860481262207, "learning_rate": 6.051938847828288e-05, "loss": 1.3024, "step": 7809 }, { "epoch": 2.370617696160267, "grad_norm": 0.8400612473487854, "learning_rate": 6.051432621241268e-05, "loss": 1.5507, "step": 7810 }, { "epoch": 2.3709212323569586, "grad_norm": 0.7138293385505676, "learning_rate": 6.050926394654247e-05, "loss": 1.4085, "step": 7811 }, { "epoch": 2.37122476855365, "grad_norm": 1.1816941499710083, "learning_rate": 6.0504201680672267e-05, "loss": 0.9353, "step": 7812 }, { "epoch": 2.3715283047503415, "grad_norm": 0.9305357933044434, "learning_rate": 6.049913941480206e-05, "loss": 1.285, "step": 7813 }, { "epoch": 2.371831840947033, "grad_norm": 0.825706422328949, "learning_rate": 6.049407714893186e-05, "loss": 1.3601, "step": 7814 }, { "epoch": 2.3721353771437244, "grad_norm": 0.7762649059295654, "learning_rate": 6.048901488306166e-05, "loss": 1.3771, "step": 7815 }, { "epoch": 2.372438913340416, "grad_norm": 1.1662933826446533, "learning_rate": 6.048395261719145e-05, "loss": 1.0773, "step": 7816 }, { "epoch": 2.3727424495371072, "grad_norm": 0.6628431081771851, "learning_rate": 6.047889035132125e-05, "loss": 1.0688, "step": 7817 }, { "epoch": 2.373045985733799, "grad_norm": 0.8049399256706238, "learning_rate": 6.047382808545104e-05, "loss": 1.3595, "step": 7818 }, { "epoch": 2.37334952193049, "grad_norm": 0.7456562519073486, "learning_rate": 6.046876581958085e-05, "loss": 1.7107, "step": 7819 }, { "epoch": 2.3736530581271817, "grad_norm": 0.8927516937255859, "learning_rate": 6.046370355371065e-05, "loss": 1.4058, "step": 7820 }, { "epoch": 2.373956594323873, "grad_norm": 0.8116360306739807, "learning_rate": 6.045864128784045e-05, "loss": 1.7016, "step": 7821 }, { "epoch": 2.3742601305205646, "grad_norm": 0.6701048612594604, "learning_rate": 6.045357902197024e-05, "loss": 1.7531, "step": 7822 }, { "epoch": 2.374563666717256, "grad_norm": 0.7412038445472717, "learning_rate": 6.0448516756100036e-05, "loss": 1.5549, "step": 7823 }, { "epoch": 2.3748672029139475, "grad_norm": 0.7359911203384399, "learning_rate": 6.044345449022983e-05, "loss": 1.3267, "step": 7824 }, { "epoch": 2.375170739110639, "grad_norm": 0.6358724236488342, "learning_rate": 6.043839222435963e-05, "loss": 1.5852, "step": 7825 }, { "epoch": 2.3754742753073304, "grad_norm": 0.715941309928894, "learning_rate": 6.043332995848943e-05, "loss": 0.7863, "step": 7826 }, { "epoch": 2.375777811504022, "grad_norm": 0.5403160452842712, "learning_rate": 6.042826769261922e-05, "loss": 1.6579, "step": 7827 }, { "epoch": 2.376081347700713, "grad_norm": 0.5860801935195923, "learning_rate": 6.042320542674902e-05, "loss": 0.6214, "step": 7828 }, { "epoch": 2.376384883897405, "grad_norm": 0.7888510227203369, "learning_rate": 6.041814316087881e-05, "loss": 1.262, "step": 7829 }, { "epoch": 2.376688420094096, "grad_norm": 0.870266318321228, "learning_rate": 6.0413080895008613e-05, "loss": 1.3956, "step": 7830 }, { "epoch": 2.3769919562907877, "grad_norm": 0.6735515594482422, "learning_rate": 6.040801862913841e-05, "loss": 0.8394, "step": 7831 }, { "epoch": 2.377295492487479, "grad_norm": 0.7404083609580994, "learning_rate": 6.04029563632682e-05, "loss": 1.3891, "step": 7832 }, { "epoch": 2.3775990286841706, "grad_norm": 0.6634083986282349, "learning_rate": 6.0397894097398e-05, "loss": 1.4729, "step": 7833 }, { "epoch": 2.377902564880862, "grad_norm": 1.2230887413024902, "learning_rate": 6.03928318315278e-05, "loss": 1.3533, "step": 7834 }, { "epoch": 2.3782061010775535, "grad_norm": 0.723315417766571, "learning_rate": 6.0387769565657594e-05, "loss": 1.4642, "step": 7835 }, { "epoch": 2.378509637274245, "grad_norm": 0.8489314913749695, "learning_rate": 6.038270729978739e-05, "loss": 1.7669, "step": 7836 }, { "epoch": 2.3788131734709363, "grad_norm": 0.8420275449752808, "learning_rate": 6.0377645033917184e-05, "loss": 1.3251, "step": 7837 }, { "epoch": 2.379116709667628, "grad_norm": 0.663590669631958, "learning_rate": 6.037258276804698e-05, "loss": 1.2515, "step": 7838 }, { "epoch": 2.379420245864319, "grad_norm": 0.7389885187149048, "learning_rate": 6.036752050217678e-05, "loss": 1.0688, "step": 7839 }, { "epoch": 2.379723782061011, "grad_norm": 0.7165906429290771, "learning_rate": 6.0362458236306575e-05, "loss": 0.8493, "step": 7840 }, { "epoch": 2.380027318257702, "grad_norm": 0.7658936977386475, "learning_rate": 6.035739597043637e-05, "loss": 1.2728, "step": 7841 }, { "epoch": 2.3803308544543937, "grad_norm": 0.9110761880874634, "learning_rate": 6.0352333704566165e-05, "loss": 1.1135, "step": 7842 }, { "epoch": 2.380634390651085, "grad_norm": 0.7847999334335327, "learning_rate": 6.034727143869596e-05, "loss": 1.5611, "step": 7843 }, { "epoch": 2.3809379268477766, "grad_norm": 0.6586335301399231, "learning_rate": 6.034220917282576e-05, "loss": 1.5972, "step": 7844 }, { "epoch": 2.381241463044468, "grad_norm": 0.7695474624633789, "learning_rate": 6.0337146906955556e-05, "loss": 1.0394, "step": 7845 }, { "epoch": 2.3815449992411595, "grad_norm": 0.7837185859680176, "learning_rate": 6.033208464108535e-05, "loss": 1.1378, "step": 7846 }, { "epoch": 2.381848535437851, "grad_norm": 0.9874072670936584, "learning_rate": 6.0327022375215145e-05, "loss": 1.0721, "step": 7847 }, { "epoch": 2.3821520716345423, "grad_norm": 0.6492806673049927, "learning_rate": 6.032196010934495e-05, "loss": 1.3812, "step": 7848 }, { "epoch": 2.382455607831234, "grad_norm": 0.8687458634376526, "learning_rate": 6.031689784347474e-05, "loss": 1.4708, "step": 7849 }, { "epoch": 2.382759144027925, "grad_norm": 0.9347490072250366, "learning_rate": 6.0311835577604536e-05, "loss": 1.512, "step": 7850 }, { "epoch": 2.383062680224617, "grad_norm": 0.7700411081314087, "learning_rate": 6.030677331173433e-05, "loss": 1.1627, "step": 7851 }, { "epoch": 2.383366216421308, "grad_norm": 0.4575173854827881, "learning_rate": 6.0301711045864126e-05, "loss": 0.7804, "step": 7852 }, { "epoch": 2.3836697526179997, "grad_norm": 0.7945066690444946, "learning_rate": 6.029664877999393e-05, "loss": 1.2632, "step": 7853 }, { "epoch": 2.3839732888146914, "grad_norm": 0.8826307058334351, "learning_rate": 6.029158651412372e-05, "loss": 1.4142, "step": 7854 }, { "epoch": 2.3842768250113826, "grad_norm": 0.9518096446990967, "learning_rate": 6.028652424825352e-05, "loss": 1.0018, "step": 7855 }, { "epoch": 2.384580361208074, "grad_norm": 0.742034912109375, "learning_rate": 6.028146198238331e-05, "loss": 1.5479, "step": 7856 }, { "epoch": 2.3848838974047655, "grad_norm": 0.7931845784187317, "learning_rate": 6.027639971651311e-05, "loss": 1.3477, "step": 7857 }, { "epoch": 2.385187433601457, "grad_norm": 0.8038697242736816, "learning_rate": 6.027133745064291e-05, "loss": 1.0658, "step": 7858 }, { "epoch": 2.3854909697981483, "grad_norm": 0.7707939743995667, "learning_rate": 6.02662751847727e-05, "loss": 1.3347, "step": 7859 }, { "epoch": 2.38579450599484, "grad_norm": 0.6021765470504761, "learning_rate": 6.02612129189025e-05, "loss": 1.4294, "step": 7860 }, { "epoch": 2.386098042191531, "grad_norm": 0.578547477722168, "learning_rate": 6.025615065303229e-05, "loss": 1.4683, "step": 7861 }, { "epoch": 2.386401578388223, "grad_norm": 0.8599765300750732, "learning_rate": 6.0251088387162094e-05, "loss": 1.0985, "step": 7862 }, { "epoch": 2.386705114584914, "grad_norm": 0.7970162630081177, "learning_rate": 6.0246026121291896e-05, "loss": 1.3855, "step": 7863 }, { "epoch": 2.3870086507816057, "grad_norm": 1.0750303268432617, "learning_rate": 6.02409638554217e-05, "loss": 1.6049, "step": 7864 }, { "epoch": 2.3873121869782974, "grad_norm": 0.8112868070602417, "learning_rate": 6.023590158955149e-05, "loss": 1.0065, "step": 7865 }, { "epoch": 2.3876157231749886, "grad_norm": 0.7846496105194092, "learning_rate": 6.023083932368129e-05, "loss": 1.4306, "step": 7866 }, { "epoch": 2.3879192593716803, "grad_norm": 0.7304663062095642, "learning_rate": 6.022577705781108e-05, "loss": 1.6198, "step": 7867 }, { "epoch": 2.3882227955683715, "grad_norm": 0.8028169870376587, "learning_rate": 6.022071479194088e-05, "loss": 1.4573, "step": 7868 }, { "epoch": 2.388526331765063, "grad_norm": 0.8368347883224487, "learning_rate": 6.021565252607068e-05, "loss": 1.1526, "step": 7869 }, { "epoch": 2.3888298679617543, "grad_norm": 0.8738002181053162, "learning_rate": 6.021059026020047e-05, "loss": 1.2524, "step": 7870 }, { "epoch": 2.389133404158446, "grad_norm": 0.7346928715705872, "learning_rate": 6.020552799433027e-05, "loss": 1.4939, "step": 7871 }, { "epoch": 2.389436940355137, "grad_norm": 0.7887352108955383, "learning_rate": 6.020046572846006e-05, "loss": 1.3927, "step": 7872 }, { "epoch": 2.389740476551829, "grad_norm": 1.0919454097747803, "learning_rate": 6.0195403462589864e-05, "loss": 1.0354, "step": 7873 }, { "epoch": 2.39004401274852, "grad_norm": 0.819840133190155, "learning_rate": 6.019034119671966e-05, "loss": 1.7027, "step": 7874 }, { "epoch": 2.3903475489452117, "grad_norm": 0.7620285749435425, "learning_rate": 6.0185278930849454e-05, "loss": 1.0801, "step": 7875 }, { "epoch": 2.3906510851419034, "grad_norm": 0.7657316327095032, "learning_rate": 6.018021666497925e-05, "loss": 1.5568, "step": 7876 }, { "epoch": 2.3909546213385946, "grad_norm": 0.6373672485351562, "learning_rate": 6.017515439910904e-05, "loss": 1.6544, "step": 7877 }, { "epoch": 2.3912581575352863, "grad_norm": 0.7929403781890869, "learning_rate": 6.0170092133238845e-05, "loss": 1.468, "step": 7878 }, { "epoch": 2.3915616937319775, "grad_norm": 0.9188044667243958, "learning_rate": 6.016502986736864e-05, "loss": 1.3987, "step": 7879 }, { "epoch": 2.391865229928669, "grad_norm": 0.727801501750946, "learning_rate": 6.0159967601498435e-05, "loss": 1.5465, "step": 7880 }, { "epoch": 2.3921687661253603, "grad_norm": 0.7766900658607483, "learning_rate": 6.015490533562823e-05, "loss": 1.3968, "step": 7881 }, { "epoch": 2.392472302322052, "grad_norm": 0.7090250849723816, "learning_rate": 6.0149843069758024e-05, "loss": 1.4824, "step": 7882 }, { "epoch": 2.392775838518743, "grad_norm": 0.8688755035400391, "learning_rate": 6.0144780803887826e-05, "loss": 1.4126, "step": 7883 }, { "epoch": 2.393079374715435, "grad_norm": 0.6128059029579163, "learning_rate": 6.013971853801762e-05, "loss": 1.6804, "step": 7884 }, { "epoch": 2.393382910912126, "grad_norm": 0.6764035820960999, "learning_rate": 6.0134656272147415e-05, "loss": 1.1744, "step": 7885 }, { "epoch": 2.3936864471088177, "grad_norm": 0.8046677708625793, "learning_rate": 6.012959400627721e-05, "loss": 1.3921, "step": 7886 }, { "epoch": 2.3939899833055094, "grad_norm": 0.7015335559844971, "learning_rate": 6.012453174040701e-05, "loss": 1.2581, "step": 7887 }, { "epoch": 2.3942935195022006, "grad_norm": 0.7972201704978943, "learning_rate": 6.0119469474536806e-05, "loss": 0.6561, "step": 7888 }, { "epoch": 2.3945970556988923, "grad_norm": 0.6351714134216309, "learning_rate": 6.01144072086666e-05, "loss": 1.1843, "step": 7889 }, { "epoch": 2.3949005918955835, "grad_norm": 0.9243821501731873, "learning_rate": 6.0109344942796396e-05, "loss": 1.3206, "step": 7890 }, { "epoch": 2.395204128092275, "grad_norm": 0.7279089689254761, "learning_rate": 6.010428267692619e-05, "loss": 1.625, "step": 7891 }, { "epoch": 2.3955076642889663, "grad_norm": 0.7650377154350281, "learning_rate": 6.009922041105599e-05, "loss": 1.6475, "step": 7892 }, { "epoch": 2.395811200485658, "grad_norm": 0.7732207775115967, "learning_rate": 6.009415814518579e-05, "loss": 1.426, "step": 7893 }, { "epoch": 2.396114736682349, "grad_norm": 0.5595487356185913, "learning_rate": 6.008909587931558e-05, "loss": 1.6396, "step": 7894 }, { "epoch": 2.396418272879041, "grad_norm": 0.7241140604019165, "learning_rate": 6.008403361344538e-05, "loss": 1.4284, "step": 7895 }, { "epoch": 2.396721809075732, "grad_norm": 0.8329600095748901, "learning_rate": 6.007897134757517e-05, "loss": 1.6604, "step": 7896 }, { "epoch": 2.3970253452724237, "grad_norm": 0.776324450969696, "learning_rate": 6.007390908170497e-05, "loss": 1.2348, "step": 7897 }, { "epoch": 2.3973288814691154, "grad_norm": 0.6083389520645142, "learning_rate": 6.006884681583477e-05, "loss": 1.5352, "step": 7898 }, { "epoch": 2.3976324176658066, "grad_norm": 0.8004028797149658, "learning_rate": 6.006378454996456e-05, "loss": 1.0207, "step": 7899 }, { "epoch": 2.3979359538624982, "grad_norm": 0.6427205801010132, "learning_rate": 6.005872228409436e-05, "loss": 0.9727, "step": 7900 }, { "epoch": 2.3982394900591895, "grad_norm": 0.8049157857894897, "learning_rate": 6.005366001822416e-05, "loss": 1.4703, "step": 7901 }, { "epoch": 2.398543026255881, "grad_norm": 0.6981743574142456, "learning_rate": 6.0048597752353954e-05, "loss": 1.6431, "step": 7902 }, { "epoch": 2.3988465624525723, "grad_norm": 0.8600196242332458, "learning_rate": 6.004353548648375e-05, "loss": 1.4814, "step": 7903 }, { "epoch": 2.399150098649264, "grad_norm": 0.8405910730361938, "learning_rate": 6.0038473220613543e-05, "loss": 1.395, "step": 7904 }, { "epoch": 2.399453634845955, "grad_norm": 0.9069204330444336, "learning_rate": 6.003341095474334e-05, "loss": 1.2988, "step": 7905 }, { "epoch": 2.399757171042647, "grad_norm": 0.8811964392662048, "learning_rate": 6.002834868887314e-05, "loss": 1.389, "step": 7906 }, { "epoch": 2.400060707239338, "grad_norm": 0.7575058937072754, "learning_rate": 6.0023286423002935e-05, "loss": 1.3244, "step": 7907 }, { "epoch": 2.4003642434360297, "grad_norm": 0.7318387031555176, "learning_rate": 6.001822415713274e-05, "loss": 1.3653, "step": 7908 }, { "epoch": 2.4006677796327214, "grad_norm": 0.8620535731315613, "learning_rate": 6.001316189126254e-05, "loss": 1.4485, "step": 7909 }, { "epoch": 2.4009713158294126, "grad_norm": 0.6746302843093872, "learning_rate": 6.000809962539233e-05, "loss": 0.9841, "step": 7910 }, { "epoch": 2.4012748520261042, "grad_norm": 0.6638560891151428, "learning_rate": 6.000303735952213e-05, "loss": 0.8332, "step": 7911 }, { "epoch": 2.4015783882227955, "grad_norm": 0.5643718242645264, "learning_rate": 5.999797509365193e-05, "loss": 2.039, "step": 7912 }, { "epoch": 2.401881924419487, "grad_norm": 0.834593653678894, "learning_rate": 5.9992912827781724e-05, "loss": 0.9973, "step": 7913 }, { "epoch": 2.4021854606161783, "grad_norm": 0.6542152166366577, "learning_rate": 5.998785056191152e-05, "loss": 1.6459, "step": 7914 }, { "epoch": 2.40248899681287, "grad_norm": 0.8321699500083923, "learning_rate": 5.998278829604131e-05, "loss": 1.3346, "step": 7915 }, { "epoch": 2.4027925330095616, "grad_norm": 0.6096453070640564, "learning_rate": 5.997772603017111e-05, "loss": 1.0405, "step": 7916 }, { "epoch": 2.403096069206253, "grad_norm": 0.49466562271118164, "learning_rate": 5.997266376430091e-05, "loss": 1.1957, "step": 7917 }, { "epoch": 2.403399605402944, "grad_norm": 0.691472589969635, "learning_rate": 5.9967601498430704e-05, "loss": 1.6998, "step": 7918 }, { "epoch": 2.4037031415996357, "grad_norm": 0.807509183883667, "learning_rate": 5.99625392325605e-05, "loss": 1.3887, "step": 7919 }, { "epoch": 2.4040066777963274, "grad_norm": 0.7648297548294067, "learning_rate": 5.9957476966690294e-05, "loss": 1.0966, "step": 7920 }, { "epoch": 2.4043102139930186, "grad_norm": 1.026137113571167, "learning_rate": 5.995241470082009e-05, "loss": 0.6286, "step": 7921 }, { "epoch": 2.4046137501897102, "grad_norm": 0.8462149500846863, "learning_rate": 5.994735243494989e-05, "loss": 1.3598, "step": 7922 }, { "epoch": 2.4049172863864015, "grad_norm": 0.8571946620941162, "learning_rate": 5.9942290169079685e-05, "loss": 1.4567, "step": 7923 }, { "epoch": 2.405220822583093, "grad_norm": 1.0264604091644287, "learning_rate": 5.993722790320948e-05, "loss": 0.9721, "step": 7924 }, { "epoch": 2.4055243587797843, "grad_norm": 0.8708542585372925, "learning_rate": 5.9932165637339275e-05, "loss": 1.3932, "step": 7925 }, { "epoch": 2.405827894976476, "grad_norm": 0.8683403134346008, "learning_rate": 5.9927103371469076e-05, "loss": 1.5331, "step": 7926 }, { "epoch": 2.4061314311731676, "grad_norm": 0.9796139001846313, "learning_rate": 5.992204110559887e-05, "loss": 1.3447, "step": 7927 }, { "epoch": 2.406434967369859, "grad_norm": 0.6332157254219055, "learning_rate": 5.9916978839728666e-05, "loss": 1.4754, "step": 7928 }, { "epoch": 2.4067385035665505, "grad_norm": 0.9253177642822266, "learning_rate": 5.991191657385846e-05, "loss": 0.8832, "step": 7929 }, { "epoch": 2.4070420397632417, "grad_norm": 0.5818787217140198, "learning_rate": 5.9906854307988256e-05, "loss": 1.5906, "step": 7930 }, { "epoch": 2.4073455759599334, "grad_norm": 0.9348157048225403, "learning_rate": 5.990179204211806e-05, "loss": 1.1478, "step": 7931 }, { "epoch": 2.4076491121566246, "grad_norm": 0.9060249328613281, "learning_rate": 5.989672977624785e-05, "loss": 1.5399, "step": 7932 }, { "epoch": 2.4079526483533162, "grad_norm": 0.7704276442527771, "learning_rate": 5.989166751037765e-05, "loss": 1.1696, "step": 7933 }, { "epoch": 2.4082561845500075, "grad_norm": 0.7888961434364319, "learning_rate": 5.988660524450744e-05, "loss": 1.5363, "step": 7934 }, { "epoch": 2.408559720746699, "grad_norm": 0.6885265707969666, "learning_rate": 5.9881542978637236e-05, "loss": 1.3708, "step": 7935 }, { "epoch": 2.4088632569433903, "grad_norm": 0.748163104057312, "learning_rate": 5.987648071276704e-05, "loss": 0.9023, "step": 7936 }, { "epoch": 2.409166793140082, "grad_norm": 0.8010879158973694, "learning_rate": 5.987141844689683e-05, "loss": 1.2594, "step": 7937 }, { "epoch": 2.4094703293367736, "grad_norm": 0.7890639305114746, "learning_rate": 5.986635618102663e-05, "loss": 1.6747, "step": 7938 }, { "epoch": 2.409773865533465, "grad_norm": 0.896912693977356, "learning_rate": 5.986129391515642e-05, "loss": 1.2543, "step": 7939 }, { "epoch": 2.4100774017301565, "grad_norm": 0.7643724679946899, "learning_rate": 5.9856231649286224e-05, "loss": 1.1527, "step": 7940 }, { "epoch": 2.4103809379268477, "grad_norm": 0.8025280833244324, "learning_rate": 5.985116938341602e-05, "loss": 0.978, "step": 7941 }, { "epoch": 2.4106844741235394, "grad_norm": 0.7779070138931274, "learning_rate": 5.9846107117545813e-05, "loss": 1.4496, "step": 7942 }, { "epoch": 2.4109880103202306, "grad_norm": 0.9251778721809387, "learning_rate": 5.984104485167561e-05, "loss": 0.8951, "step": 7943 }, { "epoch": 2.4112915465169222, "grad_norm": 0.7787967920303345, "learning_rate": 5.98359825858054e-05, "loss": 1.4073, "step": 7944 }, { "epoch": 2.4115950827136134, "grad_norm": 0.6125165224075317, "learning_rate": 5.9830920319935205e-05, "loss": 1.8558, "step": 7945 }, { "epoch": 2.411898618910305, "grad_norm": 0.8009992837905884, "learning_rate": 5.9825858054065e-05, "loss": 1.5551, "step": 7946 }, { "epoch": 2.4122021551069963, "grad_norm": 0.6516717076301575, "learning_rate": 5.9820795788194794e-05, "loss": 0.8592, "step": 7947 }, { "epoch": 2.412505691303688, "grad_norm": 0.7446168661117554, "learning_rate": 5.981573352232459e-05, "loss": 1.4455, "step": 7948 }, { "epoch": 2.4128092275003796, "grad_norm": 0.9416190385818481, "learning_rate": 5.9810671256454384e-05, "loss": 1.3713, "step": 7949 }, { "epoch": 2.413112763697071, "grad_norm": 0.7172439694404602, "learning_rate": 5.9805608990584185e-05, "loss": 1.3651, "step": 7950 }, { "epoch": 2.4134162998937625, "grad_norm": 1.0124558210372925, "learning_rate": 5.980054672471398e-05, "loss": 1.1258, "step": 7951 }, { "epoch": 2.4137198360904537, "grad_norm": 0.6773886680603027, "learning_rate": 5.979548445884379e-05, "loss": 1.6095, "step": 7952 }, { "epoch": 2.4140233722871454, "grad_norm": 0.6818512678146362, "learning_rate": 5.979042219297358e-05, "loss": 1.2836, "step": 7953 }, { "epoch": 2.4143269084838366, "grad_norm": 0.8271624445915222, "learning_rate": 5.978535992710338e-05, "loss": 1.1351, "step": 7954 }, { "epoch": 2.4146304446805282, "grad_norm": 0.6633880138397217, "learning_rate": 5.978029766123317e-05, "loss": 1.3878, "step": 7955 }, { "epoch": 2.4149339808772194, "grad_norm": 0.7885595560073853, "learning_rate": 5.9775235395362974e-05, "loss": 1.4609, "step": 7956 }, { "epoch": 2.415237517073911, "grad_norm": 0.9626199007034302, "learning_rate": 5.977017312949277e-05, "loss": 1.0395, "step": 7957 }, { "epoch": 2.4155410532706023, "grad_norm": 0.8186993598937988, "learning_rate": 5.9765110863622564e-05, "loss": 1.5863, "step": 7958 }, { "epoch": 2.415844589467294, "grad_norm": 0.6804643273353577, "learning_rate": 5.976004859775236e-05, "loss": 1.5754, "step": 7959 }, { "epoch": 2.4161481256639856, "grad_norm": 0.7416716814041138, "learning_rate": 5.9754986331882154e-05, "loss": 1.4599, "step": 7960 }, { "epoch": 2.416451661860677, "grad_norm": 0.621061384677887, "learning_rate": 5.9749924066011955e-05, "loss": 1.2628, "step": 7961 }, { "epoch": 2.4167551980573685, "grad_norm": 0.8077750205993652, "learning_rate": 5.974486180014175e-05, "loss": 0.9207, "step": 7962 }, { "epoch": 2.4170587342540597, "grad_norm": 1.0338082313537598, "learning_rate": 5.9739799534271545e-05, "loss": 1.4273, "step": 7963 }, { "epoch": 2.4173622704507514, "grad_norm": 0.723296046257019, "learning_rate": 5.973473726840134e-05, "loss": 0.8518, "step": 7964 }, { "epoch": 2.4176658066474426, "grad_norm": 0.7224794626235962, "learning_rate": 5.972967500253114e-05, "loss": 1.4885, "step": 7965 }, { "epoch": 2.4179693428441342, "grad_norm": 1.3886922597885132, "learning_rate": 5.9724612736660936e-05, "loss": 1.3691, "step": 7966 }, { "epoch": 2.4182728790408254, "grad_norm": 0.7708404064178467, "learning_rate": 5.971955047079073e-05, "loss": 1.5227, "step": 7967 }, { "epoch": 2.418576415237517, "grad_norm": 0.7277258038520813, "learning_rate": 5.9714488204920526e-05, "loss": 1.4639, "step": 7968 }, { "epoch": 2.4188799514342083, "grad_norm": 0.7625682353973389, "learning_rate": 5.970942593905032e-05, "loss": 0.9221, "step": 7969 }, { "epoch": 2.4191834876309, "grad_norm": 0.5811772346496582, "learning_rate": 5.970436367318012e-05, "loss": 0.7974, "step": 7970 }, { "epoch": 2.4194870238275916, "grad_norm": 0.7587955594062805, "learning_rate": 5.969930140730992e-05, "loss": 1.0833, "step": 7971 }, { "epoch": 2.419790560024283, "grad_norm": 0.6895954608917236, "learning_rate": 5.969423914143971e-05, "loss": 1.5106, "step": 7972 }, { "epoch": 2.4200940962209745, "grad_norm": 1.08235764503479, "learning_rate": 5.9689176875569506e-05, "loss": 1.1488, "step": 7973 }, { "epoch": 2.4203976324176657, "grad_norm": 0.713238537311554, "learning_rate": 5.96841146096993e-05, "loss": 1.4101, "step": 7974 }, { "epoch": 2.4207011686143574, "grad_norm": 0.7801069617271423, "learning_rate": 5.96790523438291e-05, "loss": 1.2996, "step": 7975 }, { "epoch": 2.4210047048110486, "grad_norm": 0.8136372566223145, "learning_rate": 5.96739900779589e-05, "loss": 1.1243, "step": 7976 }, { "epoch": 2.4213082410077402, "grad_norm": 0.9092775583267212, "learning_rate": 5.966892781208869e-05, "loss": 1.157, "step": 7977 }, { "epoch": 2.4216117772044314, "grad_norm": 0.7236045002937317, "learning_rate": 5.966386554621849e-05, "loss": 1.2423, "step": 7978 }, { "epoch": 2.421915313401123, "grad_norm": 0.68597811460495, "learning_rate": 5.965880328034829e-05, "loss": 1.5961, "step": 7979 }, { "epoch": 2.4222188495978143, "grad_norm": 0.8136271834373474, "learning_rate": 5.965374101447808e-05, "loss": 0.96, "step": 7980 }, { "epoch": 2.422522385794506, "grad_norm": 0.6901982426643372, "learning_rate": 5.964867874860788e-05, "loss": 1.7028, "step": 7981 }, { "epoch": 2.4228259219911976, "grad_norm": 0.5978294014930725, "learning_rate": 5.964361648273767e-05, "loss": 1.3612, "step": 7982 }, { "epoch": 2.423129458187889, "grad_norm": 0.8175364136695862, "learning_rate": 5.963855421686747e-05, "loss": 1.1101, "step": 7983 }, { "epoch": 2.4234329943845805, "grad_norm": 0.6936042904853821, "learning_rate": 5.963349195099727e-05, "loss": 0.793, "step": 7984 }, { "epoch": 2.4237365305812717, "grad_norm": 0.8167652487754822, "learning_rate": 5.9628429685127064e-05, "loss": 1.1589, "step": 7985 }, { "epoch": 2.4240400667779634, "grad_norm": 0.7972689867019653, "learning_rate": 5.962336741925686e-05, "loss": 1.1974, "step": 7986 }, { "epoch": 2.4243436029746546, "grad_norm": 0.8004547357559204, "learning_rate": 5.9618305153386654e-05, "loss": 1.6435, "step": 7987 }, { "epoch": 2.424647139171346, "grad_norm": 0.8520717024803162, "learning_rate": 5.961324288751645e-05, "loss": 0.866, "step": 7988 }, { "epoch": 2.424950675368038, "grad_norm": 0.9278232455253601, "learning_rate": 5.960818062164625e-05, "loss": 1.425, "step": 7989 }, { "epoch": 2.425254211564729, "grad_norm": 0.5844679474830627, "learning_rate": 5.9603118355776045e-05, "loss": 0.9303, "step": 7990 }, { "epoch": 2.4255577477614203, "grad_norm": 0.6622089743614197, "learning_rate": 5.959805608990584e-05, "loss": 1.0846, "step": 7991 }, { "epoch": 2.425861283958112, "grad_norm": 0.7942777872085571, "learning_rate": 5.9592993824035634e-05, "loss": 1.6953, "step": 7992 }, { "epoch": 2.4261648201548036, "grad_norm": 0.9262164235115051, "learning_rate": 5.9587931558165436e-05, "loss": 1.1783, "step": 7993 }, { "epoch": 2.426468356351495, "grad_norm": 0.9600434303283691, "learning_rate": 5.958286929229523e-05, "loss": 1.4355, "step": 7994 }, { "epoch": 2.4267718925481865, "grad_norm": 0.9729022979736328, "learning_rate": 5.9577807026425026e-05, "loss": 1.4183, "step": 7995 }, { "epoch": 2.4270754287448777, "grad_norm": 0.6061367392539978, "learning_rate": 5.957274476055482e-05, "loss": 1.1109, "step": 7996 }, { "epoch": 2.4273789649415694, "grad_norm": 0.7526201009750366, "learning_rate": 5.956768249468463e-05, "loss": 1.0544, "step": 7997 }, { "epoch": 2.4276825011382606, "grad_norm": 0.7746148705482483, "learning_rate": 5.9562620228814424e-05, "loss": 1.5715, "step": 7998 }, { "epoch": 2.427986037334952, "grad_norm": 0.701327919960022, "learning_rate": 5.955755796294422e-05, "loss": 1.1051, "step": 7999 }, { "epoch": 2.428289573531644, "grad_norm": 0.6953447461128235, "learning_rate": 5.955249569707402e-05, "loss": 1.203, "step": 8000 }, { "epoch": 2.428593109728335, "grad_norm": 0.7078439593315125, "learning_rate": 5.9547433431203815e-05, "loss": 1.1811, "step": 8001 }, { "epoch": 2.4288966459250267, "grad_norm": 0.5841401815414429, "learning_rate": 5.954237116533361e-05, "loss": 1.2787, "step": 8002 }, { "epoch": 2.429200182121718, "grad_norm": 0.8184963464736938, "learning_rate": 5.9537308899463404e-05, "loss": 1.1793, "step": 8003 }, { "epoch": 2.4295037183184096, "grad_norm": 0.7969046831130981, "learning_rate": 5.9532246633593206e-05, "loss": 1.2765, "step": 8004 }, { "epoch": 2.429807254515101, "grad_norm": 0.6305599808692932, "learning_rate": 5.9527184367723e-05, "loss": 1.6182, "step": 8005 }, { "epoch": 2.4301107907117925, "grad_norm": 0.7845397591590881, "learning_rate": 5.9522122101852795e-05, "loss": 1.5286, "step": 8006 }, { "epoch": 2.4304143269084837, "grad_norm": 0.8575507998466492, "learning_rate": 5.951705983598259e-05, "loss": 1.2778, "step": 8007 }, { "epoch": 2.4307178631051753, "grad_norm": 0.6721327304840088, "learning_rate": 5.9511997570112385e-05, "loss": 1.4541, "step": 8008 }, { "epoch": 2.4310213993018666, "grad_norm": 0.7807362079620361, "learning_rate": 5.950693530424219e-05, "loss": 1.6529, "step": 8009 }, { "epoch": 2.431324935498558, "grad_norm": 0.7468159794807434, "learning_rate": 5.950187303837198e-05, "loss": 1.5048, "step": 8010 }, { "epoch": 2.43162847169525, "grad_norm": 0.9903224110603333, "learning_rate": 5.9496810772501776e-05, "loss": 1.2826, "step": 8011 }, { "epoch": 2.431932007891941, "grad_norm": 0.8126440048217773, "learning_rate": 5.949174850663157e-05, "loss": 1.0912, "step": 8012 }, { "epoch": 2.4322355440886327, "grad_norm": 0.6487678289413452, "learning_rate": 5.9486686240761366e-05, "loss": 1.0674, "step": 8013 }, { "epoch": 2.432539080285324, "grad_norm": 0.8460603952407837, "learning_rate": 5.948162397489117e-05, "loss": 1.446, "step": 8014 }, { "epoch": 2.4328426164820156, "grad_norm": 0.9071548581123352, "learning_rate": 5.947656170902096e-05, "loss": 1.3949, "step": 8015 }, { "epoch": 2.433146152678707, "grad_norm": 0.8428886532783508, "learning_rate": 5.947149944315076e-05, "loss": 1.3674, "step": 8016 }, { "epoch": 2.4334496888753985, "grad_norm": 0.78566974401474, "learning_rate": 5.946643717728055e-05, "loss": 1.6298, "step": 8017 }, { "epoch": 2.4337532250720897, "grad_norm": 0.796306312084198, "learning_rate": 5.946137491141035e-05, "loss": 1.4393, "step": 8018 }, { "epoch": 2.4340567612687813, "grad_norm": 0.8854008316993713, "learning_rate": 5.945631264554015e-05, "loss": 0.7501, "step": 8019 }, { "epoch": 2.4343602974654726, "grad_norm": 0.8512039184570312, "learning_rate": 5.945125037966994e-05, "loss": 1.4922, "step": 8020 }, { "epoch": 2.434663833662164, "grad_norm": 0.719879150390625, "learning_rate": 5.944618811379974e-05, "loss": 1.2084, "step": 8021 }, { "epoch": 2.434967369858856, "grad_norm": 0.8208937644958496, "learning_rate": 5.944112584792953e-05, "loss": 1.2646, "step": 8022 }, { "epoch": 2.435270906055547, "grad_norm": 0.8240382075309753, "learning_rate": 5.9436063582059334e-05, "loss": 1.5951, "step": 8023 }, { "epoch": 2.4355744422522387, "grad_norm": 0.7194306254386902, "learning_rate": 5.943100131618913e-05, "loss": 1.4623, "step": 8024 }, { "epoch": 2.43587797844893, "grad_norm": 0.8812718987464905, "learning_rate": 5.9425939050318924e-05, "loss": 0.5675, "step": 8025 }, { "epoch": 2.4361815146456216, "grad_norm": 0.9816334247589111, "learning_rate": 5.942087678444872e-05, "loss": 1.2629, "step": 8026 }, { "epoch": 2.436485050842313, "grad_norm": 0.851266086101532, "learning_rate": 5.941581451857851e-05, "loss": 1.3655, "step": 8027 }, { "epoch": 2.4367885870390045, "grad_norm": 0.7911189198493958, "learning_rate": 5.9410752252708315e-05, "loss": 1.3256, "step": 8028 }, { "epoch": 2.4370921232356957, "grad_norm": 0.7403606176376343, "learning_rate": 5.940568998683811e-05, "loss": 1.1923, "step": 8029 }, { "epoch": 2.4373956594323873, "grad_norm": 0.8709863424301147, "learning_rate": 5.9400627720967904e-05, "loss": 1.2818, "step": 8030 }, { "epoch": 2.4376991956290786, "grad_norm": 0.7635475397109985, "learning_rate": 5.93955654550977e-05, "loss": 1.5084, "step": 8031 }, { "epoch": 2.43800273182577, "grad_norm": 0.916801393032074, "learning_rate": 5.93905031892275e-05, "loss": 1.2785, "step": 8032 }, { "epoch": 2.438306268022462, "grad_norm": 1.1839587688446045, "learning_rate": 5.9385440923357296e-05, "loss": 1.1729, "step": 8033 }, { "epoch": 2.438609804219153, "grad_norm": 1.0023239850997925, "learning_rate": 5.938037865748709e-05, "loss": 1.023, "step": 8034 }, { "epoch": 2.4389133404158447, "grad_norm": 0.6197182536125183, "learning_rate": 5.9375316391616885e-05, "loss": 1.5478, "step": 8035 }, { "epoch": 2.439216876612536, "grad_norm": 0.9122412800788879, "learning_rate": 5.937025412574668e-05, "loss": 1.1756, "step": 8036 }, { "epoch": 2.4395204128092276, "grad_norm": 0.8996947407722473, "learning_rate": 5.936519185987648e-05, "loss": 1.6531, "step": 8037 }, { "epoch": 2.439823949005919, "grad_norm": 1.0564600229263306, "learning_rate": 5.9360129594006276e-05, "loss": 1.3561, "step": 8038 }, { "epoch": 2.4401274852026105, "grad_norm": 0.8630561828613281, "learning_rate": 5.935506732813607e-05, "loss": 0.662, "step": 8039 }, { "epoch": 2.4404310213993017, "grad_norm": 0.6715168356895447, "learning_rate": 5.9350005062265866e-05, "loss": 1.5388, "step": 8040 }, { "epoch": 2.4407345575959933, "grad_norm": 0.7765439748764038, "learning_rate": 5.9344942796395674e-05, "loss": 0.7733, "step": 8041 }, { "epoch": 2.4410380937926845, "grad_norm": 0.7207764387130737, "learning_rate": 5.933988053052547e-05, "loss": 1.1412, "step": 8042 }, { "epoch": 2.441341629989376, "grad_norm": 0.6615632772445679, "learning_rate": 5.933481826465527e-05, "loss": 0.8974, "step": 8043 }, { "epoch": 2.441645166186068, "grad_norm": 0.5565609335899353, "learning_rate": 5.9329755998785065e-05, "loss": 0.6975, "step": 8044 }, { "epoch": 2.441948702382759, "grad_norm": 0.7616466879844666, "learning_rate": 5.932469373291486e-05, "loss": 1.4777, "step": 8045 }, { "epoch": 2.4422522385794507, "grad_norm": 0.6495609283447266, "learning_rate": 5.9319631467044655e-05, "loss": 1.2428, "step": 8046 }, { "epoch": 2.442555774776142, "grad_norm": 1.0401421785354614, "learning_rate": 5.931456920117445e-05, "loss": 1.0536, "step": 8047 }, { "epoch": 2.4428593109728336, "grad_norm": 0.8170029520988464, "learning_rate": 5.930950693530425e-05, "loss": 1.3697, "step": 8048 }, { "epoch": 2.443162847169525, "grad_norm": 0.6555508971214294, "learning_rate": 5.9304444669434046e-05, "loss": 1.2629, "step": 8049 }, { "epoch": 2.4434663833662165, "grad_norm": 0.5987980961799622, "learning_rate": 5.929938240356384e-05, "loss": 1.61, "step": 8050 }, { "epoch": 2.443769919562908, "grad_norm": 0.7624805569648743, "learning_rate": 5.9294320137693636e-05, "loss": 1.327, "step": 8051 }, { "epoch": 2.4440734557595993, "grad_norm": 0.9335926175117493, "learning_rate": 5.928925787182343e-05, "loss": 0.8136, "step": 8052 }, { "epoch": 2.4443769919562905, "grad_norm": 0.6693974733352661, "learning_rate": 5.928419560595323e-05, "loss": 1.6345, "step": 8053 }, { "epoch": 2.444680528152982, "grad_norm": 0.7421702146530151, "learning_rate": 5.927913334008303e-05, "loss": 1.0317, "step": 8054 }, { "epoch": 2.444984064349674, "grad_norm": 0.794100284576416, "learning_rate": 5.927407107421282e-05, "loss": 1.4123, "step": 8055 }, { "epoch": 2.445287600546365, "grad_norm": 0.8165601491928101, "learning_rate": 5.9269008808342617e-05, "loss": 1.3916, "step": 8056 }, { "epoch": 2.4455911367430567, "grad_norm": 0.635057270526886, "learning_rate": 5.926394654247242e-05, "loss": 1.3165, "step": 8057 }, { "epoch": 2.445894672939748, "grad_norm": 0.7940477728843689, "learning_rate": 5.925888427660221e-05, "loss": 1.0743, "step": 8058 }, { "epoch": 2.4461982091364396, "grad_norm": 0.8174204230308533, "learning_rate": 5.925382201073201e-05, "loss": 1.426, "step": 8059 }, { "epoch": 2.446501745333131, "grad_norm": 0.7778024673461914, "learning_rate": 5.92487597448618e-05, "loss": 1.4384, "step": 8060 }, { "epoch": 2.4468052815298225, "grad_norm": 0.5465704798698425, "learning_rate": 5.92436974789916e-05, "loss": 1.2255, "step": 8061 }, { "epoch": 2.447108817726514, "grad_norm": 0.6376035809516907, "learning_rate": 5.92386352131214e-05, "loss": 1.2138, "step": 8062 }, { "epoch": 2.4474123539232053, "grad_norm": 0.9984663128852844, "learning_rate": 5.9233572947251194e-05, "loss": 1.454, "step": 8063 }, { "epoch": 2.447715890119897, "grad_norm": 0.7464830875396729, "learning_rate": 5.922851068138099e-05, "loss": 1.6236, "step": 8064 }, { "epoch": 2.448019426316588, "grad_norm": 0.8578869700431824, "learning_rate": 5.922344841551078e-05, "loss": 0.7635, "step": 8065 }, { "epoch": 2.44832296251328, "grad_norm": 0.8007357716560364, "learning_rate": 5.921838614964058e-05, "loss": 1.4443, "step": 8066 }, { "epoch": 2.448626498709971, "grad_norm": 0.6420935988426208, "learning_rate": 5.921332388377038e-05, "loss": 1.4141, "step": 8067 }, { "epoch": 2.4489300349066627, "grad_norm": 0.8005525469779968, "learning_rate": 5.9208261617900174e-05, "loss": 1.3446, "step": 8068 }, { "epoch": 2.449233571103354, "grad_norm": 0.7144571542739868, "learning_rate": 5.920319935202997e-05, "loss": 1.1064, "step": 8069 }, { "epoch": 2.4495371073000456, "grad_norm": 0.6875942945480347, "learning_rate": 5.9198137086159764e-05, "loss": 1.1621, "step": 8070 }, { "epoch": 2.449840643496737, "grad_norm": 0.6312223076820374, "learning_rate": 5.9193074820289566e-05, "loss": 1.1365, "step": 8071 }, { "epoch": 2.4501441796934285, "grad_norm": 0.8454585671424866, "learning_rate": 5.918801255441936e-05, "loss": 1.3123, "step": 8072 }, { "epoch": 2.45044771589012, "grad_norm": 0.626213014125824, "learning_rate": 5.9182950288549155e-05, "loss": 1.3838, "step": 8073 }, { "epoch": 2.4507512520868113, "grad_norm": 0.8456754088401794, "learning_rate": 5.917788802267895e-05, "loss": 0.8246, "step": 8074 }, { "epoch": 2.451054788283503, "grad_norm": 0.9328720569610596, "learning_rate": 5.9172825756808745e-05, "loss": 0.8562, "step": 8075 }, { "epoch": 2.451358324480194, "grad_norm": 0.8819989562034607, "learning_rate": 5.9167763490938546e-05, "loss": 1.0555, "step": 8076 }, { "epoch": 2.451661860676886, "grad_norm": 0.5868912935256958, "learning_rate": 5.916270122506834e-05, "loss": 1.691, "step": 8077 }, { "epoch": 2.451965396873577, "grad_norm": 0.6053828597068787, "learning_rate": 5.9157638959198136e-05, "loss": 0.7372, "step": 8078 }, { "epoch": 2.4522689330702687, "grad_norm": 0.7905032634735107, "learning_rate": 5.915257669332793e-05, "loss": 0.9422, "step": 8079 }, { "epoch": 2.45257246926696, "grad_norm": 0.6530877947807312, "learning_rate": 5.9147514427457725e-05, "loss": 1.5496, "step": 8080 }, { "epoch": 2.4528760054636516, "grad_norm": 0.6365821361541748, "learning_rate": 5.914245216158753e-05, "loss": 1.0235, "step": 8081 }, { "epoch": 2.453179541660343, "grad_norm": 0.8876006603240967, "learning_rate": 5.913738989571732e-05, "loss": 1.5601, "step": 8082 }, { "epoch": 2.4534830778570345, "grad_norm": 0.7808222770690918, "learning_rate": 5.913232762984712e-05, "loss": 1.8001, "step": 8083 }, { "epoch": 2.453786614053726, "grad_norm": 0.6512035131454468, "learning_rate": 5.912726536397691e-05, "loss": 1.305, "step": 8084 }, { "epoch": 2.4540901502504173, "grad_norm": 0.6710624098777771, "learning_rate": 5.912220309810671e-05, "loss": 1.236, "step": 8085 }, { "epoch": 2.454393686447109, "grad_norm": 0.771418035030365, "learning_rate": 5.9117140832236515e-05, "loss": 1.1527, "step": 8086 }, { "epoch": 2.4546972226438, "grad_norm": 0.8948315978050232, "learning_rate": 5.9112078566366316e-05, "loss": 1.3296, "step": 8087 }, { "epoch": 2.455000758840492, "grad_norm": 0.7415945529937744, "learning_rate": 5.910701630049611e-05, "loss": 0.934, "step": 8088 }, { "epoch": 2.455304295037183, "grad_norm": 0.9084923267364502, "learning_rate": 5.9101954034625906e-05, "loss": 1.3117, "step": 8089 }, { "epoch": 2.4556078312338747, "grad_norm": 0.8375921249389648, "learning_rate": 5.90968917687557e-05, "loss": 1.3953, "step": 8090 }, { "epoch": 2.455911367430566, "grad_norm": 0.6972025632858276, "learning_rate": 5.9091829502885495e-05, "loss": 1.6261, "step": 8091 }, { "epoch": 2.4562149036272576, "grad_norm": 1.3651702404022217, "learning_rate": 5.90867672370153e-05, "loss": 1.2544, "step": 8092 }, { "epoch": 2.456518439823949, "grad_norm": 0.7613970637321472, "learning_rate": 5.908170497114509e-05, "loss": 1.312, "step": 8093 }, { "epoch": 2.4568219760206405, "grad_norm": 0.9772050380706787, "learning_rate": 5.9076642705274886e-05, "loss": 1.0077, "step": 8094 }, { "epoch": 2.457125512217332, "grad_norm": 0.775710940361023, "learning_rate": 5.907158043940468e-05, "loss": 1.181, "step": 8095 }, { "epoch": 2.4574290484140233, "grad_norm": 0.7317579388618469, "learning_rate": 5.906651817353448e-05, "loss": 1.45, "step": 8096 }, { "epoch": 2.457732584610715, "grad_norm": 0.7279502749443054, "learning_rate": 5.906145590766428e-05, "loss": 1.1328, "step": 8097 }, { "epoch": 2.458036120807406, "grad_norm": 0.8418558835983276, "learning_rate": 5.905639364179407e-05, "loss": 1.1068, "step": 8098 }, { "epoch": 2.458339657004098, "grad_norm": 0.7958865165710449, "learning_rate": 5.905133137592387e-05, "loss": 1.3822, "step": 8099 }, { "epoch": 2.458643193200789, "grad_norm": 0.8171069622039795, "learning_rate": 5.904626911005366e-05, "loss": 1.4247, "step": 8100 }, { "epoch": 2.4589467293974807, "grad_norm": 0.5791099667549133, "learning_rate": 5.9041206844183464e-05, "loss": 1.3285, "step": 8101 }, { "epoch": 2.459250265594172, "grad_norm": 0.8275609612464905, "learning_rate": 5.903614457831326e-05, "loss": 1.1727, "step": 8102 }, { "epoch": 2.4595538017908636, "grad_norm": 0.8169893622398376, "learning_rate": 5.903108231244305e-05, "loss": 1.2051, "step": 8103 }, { "epoch": 2.459857337987555, "grad_norm": 0.5260883569717407, "learning_rate": 5.902602004657285e-05, "loss": 0.8089, "step": 8104 }, { "epoch": 2.4601608741842464, "grad_norm": 0.7259795069694519, "learning_rate": 5.902095778070264e-05, "loss": 0.9742, "step": 8105 }, { "epoch": 2.460464410380938, "grad_norm": 0.6402779817581177, "learning_rate": 5.9015895514832444e-05, "loss": 1.0891, "step": 8106 }, { "epoch": 2.4607679465776293, "grad_norm": 0.7106859087944031, "learning_rate": 5.901083324896224e-05, "loss": 1.6534, "step": 8107 }, { "epoch": 2.461071482774321, "grad_norm": 0.7413371205329895, "learning_rate": 5.9005770983092034e-05, "loss": 1.4216, "step": 8108 }, { "epoch": 2.461375018971012, "grad_norm": 0.817121148109436, "learning_rate": 5.900070871722183e-05, "loss": 0.8438, "step": 8109 }, { "epoch": 2.461678555167704, "grad_norm": 0.767145574092865, "learning_rate": 5.899564645135163e-05, "loss": 0.9623, "step": 8110 }, { "epoch": 2.461982091364395, "grad_norm": 0.9798495769500732, "learning_rate": 5.8990584185481425e-05, "loss": 1.021, "step": 8111 }, { "epoch": 2.4622856275610867, "grad_norm": 0.7143928408622742, "learning_rate": 5.898552191961122e-05, "loss": 1.6554, "step": 8112 }, { "epoch": 2.462589163757778, "grad_norm": 0.8441614508628845, "learning_rate": 5.8980459653741015e-05, "loss": 0.931, "step": 8113 }, { "epoch": 2.4628926999544696, "grad_norm": 0.5994144082069397, "learning_rate": 5.897539738787081e-05, "loss": 0.9296, "step": 8114 }, { "epoch": 2.463196236151161, "grad_norm": 0.7627611756324768, "learning_rate": 5.897033512200061e-05, "loss": 1.5269, "step": 8115 }, { "epoch": 2.4634997723478524, "grad_norm": 0.6816883087158203, "learning_rate": 5.8965272856130406e-05, "loss": 1.4144, "step": 8116 }, { "epoch": 2.463803308544544, "grad_norm": 0.7906539440155029, "learning_rate": 5.89602105902602e-05, "loss": 1.273, "step": 8117 }, { "epoch": 2.4641068447412353, "grad_norm": 0.5948166847229004, "learning_rate": 5.8955148324389995e-05, "loss": 1.5956, "step": 8118 }, { "epoch": 2.464410380937927, "grad_norm": 0.6496350765228271, "learning_rate": 5.895008605851979e-05, "loss": 0.897, "step": 8119 }, { "epoch": 2.464713917134618, "grad_norm": 0.7230144143104553, "learning_rate": 5.894502379264959e-05, "loss": 1.5053, "step": 8120 }, { "epoch": 2.46501745333131, "grad_norm": 0.8181447982788086, "learning_rate": 5.8939961526779387e-05, "loss": 1.1219, "step": 8121 }, { "epoch": 2.465320989528001, "grad_norm": 0.6964221596717834, "learning_rate": 5.893489926090918e-05, "loss": 1.8231, "step": 8122 }, { "epoch": 2.4656245257246927, "grad_norm": 0.6474187970161438, "learning_rate": 5.8929836995038976e-05, "loss": 1.7453, "step": 8123 }, { "epoch": 2.4659280619213844, "grad_norm": 0.8115422129631042, "learning_rate": 5.892477472916878e-05, "loss": 1.0226, "step": 8124 }, { "epoch": 2.4662315981180756, "grad_norm": 0.9050765037536621, "learning_rate": 5.891971246329857e-05, "loss": 1.3473, "step": 8125 }, { "epoch": 2.466535134314767, "grad_norm": 0.6008917093276978, "learning_rate": 5.891465019742837e-05, "loss": 1.6152, "step": 8126 }, { "epoch": 2.4668386705114584, "grad_norm": 0.7273897528648376, "learning_rate": 5.890958793155816e-05, "loss": 1.5513, "step": 8127 }, { "epoch": 2.46714220670815, "grad_norm": 0.7605626583099365, "learning_rate": 5.890452566568796e-05, "loss": 1.0746, "step": 8128 }, { "epoch": 2.4674457429048413, "grad_norm": 0.7195847034454346, "learning_rate": 5.889946339981776e-05, "loss": 1.4435, "step": 8129 }, { "epoch": 2.467749279101533, "grad_norm": 0.9151094555854797, "learning_rate": 5.889440113394756e-05, "loss": 1.4713, "step": 8130 }, { "epoch": 2.468052815298224, "grad_norm": 0.5849425196647644, "learning_rate": 5.888933886807736e-05, "loss": 1.0833, "step": 8131 }, { "epoch": 2.468356351494916, "grad_norm": 0.8793689608573914, "learning_rate": 5.8884276602207156e-05, "loss": 1.2899, "step": 8132 }, { "epoch": 2.468659887691607, "grad_norm": 0.7815329432487488, "learning_rate": 5.887921433633695e-05, "loss": 1.4963, "step": 8133 }, { "epoch": 2.4689634238882987, "grad_norm": 0.8761633038520813, "learning_rate": 5.8874152070466746e-05, "loss": 1.4107, "step": 8134 }, { "epoch": 2.4692669600849904, "grad_norm": 0.7818664312362671, "learning_rate": 5.886908980459655e-05, "loss": 1.7088, "step": 8135 }, { "epoch": 2.4695704962816816, "grad_norm": 0.8316283226013184, "learning_rate": 5.886402753872634e-05, "loss": 1.1892, "step": 8136 }, { "epoch": 2.4698740324783732, "grad_norm": 0.8171117901802063, "learning_rate": 5.885896527285614e-05, "loss": 1.231, "step": 8137 }, { "epoch": 2.4701775686750644, "grad_norm": 0.720648467540741, "learning_rate": 5.885390300698593e-05, "loss": 1.5502, "step": 8138 }, { "epoch": 2.470481104871756, "grad_norm": 0.7011475563049316, "learning_rate": 5.884884074111573e-05, "loss": 1.2047, "step": 8139 }, { "epoch": 2.4707846410684473, "grad_norm": 0.7628121376037598, "learning_rate": 5.884377847524553e-05, "loss": 1.7777, "step": 8140 }, { "epoch": 2.471088177265139, "grad_norm": 0.7602118253707886, "learning_rate": 5.883871620937532e-05, "loss": 1.0339, "step": 8141 }, { "epoch": 2.47139171346183, "grad_norm": 0.724816083908081, "learning_rate": 5.883365394350512e-05, "loss": 1.4265, "step": 8142 }, { "epoch": 2.471695249658522, "grad_norm": 0.8720203042030334, "learning_rate": 5.882859167763491e-05, "loss": 1.3592, "step": 8143 }, { "epoch": 2.471998785855213, "grad_norm": 0.8329253196716309, "learning_rate": 5.882352941176471e-05, "loss": 1.3344, "step": 8144 }, { "epoch": 2.4723023220519047, "grad_norm": 0.6739696264266968, "learning_rate": 5.881846714589451e-05, "loss": 1.3349, "step": 8145 }, { "epoch": 2.4726058582485964, "grad_norm": 0.7025054693222046, "learning_rate": 5.8813404880024304e-05, "loss": 1.3175, "step": 8146 }, { "epoch": 2.4729093944452876, "grad_norm": 0.5982341170310974, "learning_rate": 5.88083426141541e-05, "loss": 1.4763, "step": 8147 }, { "epoch": 2.4732129306419792, "grad_norm": 0.8387122750282288, "learning_rate": 5.8803280348283893e-05, "loss": 1.5114, "step": 8148 }, { "epoch": 2.4735164668386704, "grad_norm": 0.7305405139923096, "learning_rate": 5.8798218082413695e-05, "loss": 0.7805, "step": 8149 }, { "epoch": 2.473820003035362, "grad_norm": 0.7596865296363831, "learning_rate": 5.879315581654349e-05, "loss": 1.4535, "step": 8150 }, { "epoch": 2.4741235392320533, "grad_norm": 0.7964050769805908, "learning_rate": 5.8788093550673285e-05, "loss": 1.5063, "step": 8151 }, { "epoch": 2.474427075428745, "grad_norm": 0.8814842104911804, "learning_rate": 5.878303128480308e-05, "loss": 1.2121, "step": 8152 }, { "epoch": 2.474730611625436, "grad_norm": 0.5663175582885742, "learning_rate": 5.8777969018932874e-05, "loss": 1.6235, "step": 8153 }, { "epoch": 2.475034147822128, "grad_norm": 0.6952011585235596, "learning_rate": 5.8772906753062676e-05, "loss": 1.651, "step": 8154 }, { "epoch": 2.475337684018819, "grad_norm": 0.9040622711181641, "learning_rate": 5.876784448719247e-05, "loss": 1.4159, "step": 8155 }, { "epoch": 2.4756412202155107, "grad_norm": 0.656825840473175, "learning_rate": 5.8762782221322265e-05, "loss": 1.2716, "step": 8156 }, { "epoch": 2.4759447564122024, "grad_norm": 0.7671054005622864, "learning_rate": 5.875771995545206e-05, "loss": 1.3454, "step": 8157 }, { "epoch": 2.4762482926088936, "grad_norm": 0.677207887172699, "learning_rate": 5.8752657689581855e-05, "loss": 1.1305, "step": 8158 }, { "epoch": 2.476551828805585, "grad_norm": 0.6357643008232117, "learning_rate": 5.8747595423711657e-05, "loss": 0.4722, "step": 8159 }, { "epoch": 2.4768553650022764, "grad_norm": 0.9290893077850342, "learning_rate": 5.874253315784145e-05, "loss": 1.1663, "step": 8160 }, { "epoch": 2.477158901198968, "grad_norm": 0.9151197671890259, "learning_rate": 5.8737470891971246e-05, "loss": 1.818, "step": 8161 }, { "epoch": 2.4774624373956593, "grad_norm": 0.7219125628471375, "learning_rate": 5.873240862610104e-05, "loss": 0.9134, "step": 8162 }, { "epoch": 2.477765973592351, "grad_norm": 0.6985493898391724, "learning_rate": 5.872734636023084e-05, "loss": 1.5476, "step": 8163 }, { "epoch": 2.478069509789042, "grad_norm": 0.8199888467788696, "learning_rate": 5.872228409436064e-05, "loss": 1.2895, "step": 8164 }, { "epoch": 2.478373045985734, "grad_norm": 0.7273992300033569, "learning_rate": 5.871722182849043e-05, "loss": 1.4617, "step": 8165 }, { "epoch": 2.478676582182425, "grad_norm": 0.7608432173728943, "learning_rate": 5.871215956262023e-05, "loss": 0.5711, "step": 8166 }, { "epoch": 2.4789801183791167, "grad_norm": 0.6103653907775879, "learning_rate": 5.870709729675002e-05, "loss": 0.7705, "step": 8167 }, { "epoch": 2.4792836545758083, "grad_norm": 1.349910020828247, "learning_rate": 5.870203503087982e-05, "loss": 1.0938, "step": 8168 }, { "epoch": 2.4795871907724996, "grad_norm": 0.7767732739448547, "learning_rate": 5.869697276500962e-05, "loss": 1.3891, "step": 8169 }, { "epoch": 2.479890726969191, "grad_norm": 0.6857088208198547, "learning_rate": 5.869191049913941e-05, "loss": 1.3864, "step": 8170 }, { "epoch": 2.4801942631658824, "grad_norm": 0.694058358669281, "learning_rate": 5.868684823326921e-05, "loss": 1.1043, "step": 8171 }, { "epoch": 2.480497799362574, "grad_norm": 0.7979057431221008, "learning_rate": 5.8681785967399e-05, "loss": 1.3119, "step": 8172 }, { "epoch": 2.4808013355592653, "grad_norm": 0.7226840257644653, "learning_rate": 5.8676723701528804e-05, "loss": 1.3461, "step": 8173 }, { "epoch": 2.481104871755957, "grad_norm": 1.1542717218399048, "learning_rate": 5.86716614356586e-05, "loss": 0.9093, "step": 8174 }, { "epoch": 2.481408407952648, "grad_norm": 0.8051673173904419, "learning_rate": 5.866659916978841e-05, "loss": 1.141, "step": 8175 }, { "epoch": 2.48171194414934, "grad_norm": 0.7753127217292786, "learning_rate": 5.86615369039182e-05, "loss": 0.6627, "step": 8176 }, { "epoch": 2.482015480346031, "grad_norm": 0.9074029922485352, "learning_rate": 5.8656474638048e-05, "loss": 1.2709, "step": 8177 }, { "epoch": 2.4823190165427227, "grad_norm": 0.9374541640281677, "learning_rate": 5.865141237217779e-05, "loss": 1.0258, "step": 8178 }, { "epoch": 2.4826225527394143, "grad_norm": 0.7733614444732666, "learning_rate": 5.864635010630759e-05, "loss": 1.4448, "step": 8179 }, { "epoch": 2.4829260889361056, "grad_norm": 0.8372223973274231, "learning_rate": 5.864128784043739e-05, "loss": 1.4152, "step": 8180 }, { "epoch": 2.483229625132797, "grad_norm": 1.0079917907714844, "learning_rate": 5.863622557456718e-05, "loss": 1.1764, "step": 8181 }, { "epoch": 2.4835331613294884, "grad_norm": 0.8187892436981201, "learning_rate": 5.863116330869698e-05, "loss": 0.805, "step": 8182 }, { "epoch": 2.48383669752618, "grad_norm": 0.618904173374176, "learning_rate": 5.862610104282677e-05, "loss": 1.4071, "step": 8183 }, { "epoch": 2.4841402337228713, "grad_norm": 0.8110464811325073, "learning_rate": 5.8621038776956574e-05, "loss": 1.5362, "step": 8184 }, { "epoch": 2.484443769919563, "grad_norm": 0.8422300815582275, "learning_rate": 5.861597651108637e-05, "loss": 0.8139, "step": 8185 }, { "epoch": 2.4847473061162546, "grad_norm": 0.9874032139778137, "learning_rate": 5.8610914245216163e-05, "loss": 1.3182, "step": 8186 }, { "epoch": 2.485050842312946, "grad_norm": 0.8307647109031677, "learning_rate": 5.860585197934596e-05, "loss": 0.6868, "step": 8187 }, { "epoch": 2.485354378509637, "grad_norm": 0.6423392295837402, "learning_rate": 5.860078971347576e-05, "loss": 1.3965, "step": 8188 }, { "epoch": 2.4856579147063287, "grad_norm": 0.7162687182426453, "learning_rate": 5.8595727447605555e-05, "loss": 0.9992, "step": 8189 }, { "epoch": 2.4859614509030203, "grad_norm": 0.6741542816162109, "learning_rate": 5.859066518173535e-05, "loss": 0.7475, "step": 8190 }, { "epoch": 2.4862649870997116, "grad_norm": 0.779192328453064, "learning_rate": 5.8585602915865144e-05, "loss": 1.0529, "step": 8191 }, { "epoch": 2.486568523296403, "grad_norm": 0.6369726061820984, "learning_rate": 5.858054064999494e-05, "loss": 1.0056, "step": 8192 }, { "epoch": 2.4868720594930944, "grad_norm": 2.1895089149475098, "learning_rate": 5.857547838412474e-05, "loss": 1.3649, "step": 8193 }, { "epoch": 2.487175595689786, "grad_norm": 0.9455059170722961, "learning_rate": 5.8570416118254535e-05, "loss": 1.2689, "step": 8194 }, { "epoch": 2.4874791318864773, "grad_norm": 0.7931092977523804, "learning_rate": 5.856535385238433e-05, "loss": 1.2326, "step": 8195 }, { "epoch": 2.487782668083169, "grad_norm": 0.7546300888061523, "learning_rate": 5.8560291586514125e-05, "loss": 1.1597, "step": 8196 }, { "epoch": 2.4880862042798606, "grad_norm": 0.7067909836769104, "learning_rate": 5.855522932064392e-05, "loss": 1.7046, "step": 8197 }, { "epoch": 2.488389740476552, "grad_norm": 1.1697198152542114, "learning_rate": 5.855016705477372e-05, "loss": 0.7381, "step": 8198 }, { "epoch": 2.4886932766732435, "grad_norm": 0.5753573775291443, "learning_rate": 5.8545104788903516e-05, "loss": 1.6113, "step": 8199 }, { "epoch": 2.4889968128699347, "grad_norm": 0.6945953965187073, "learning_rate": 5.854004252303331e-05, "loss": 1.5555, "step": 8200 }, { "epoch": 2.4893003490666263, "grad_norm": 0.7350156903266907, "learning_rate": 5.8534980257163106e-05, "loss": 1.5168, "step": 8201 }, { "epoch": 2.4896038852633176, "grad_norm": 0.8408716917037964, "learning_rate": 5.852991799129291e-05, "loss": 0.6088, "step": 8202 }, { "epoch": 2.489907421460009, "grad_norm": 0.4795108735561371, "learning_rate": 5.85248557254227e-05, "loss": 1.3906, "step": 8203 }, { "epoch": 2.4902109576567004, "grad_norm": 0.6167114973068237, "learning_rate": 5.85197934595525e-05, "loss": 1.2835, "step": 8204 }, { "epoch": 2.490514493853392, "grad_norm": 0.8909234404563904, "learning_rate": 5.851473119368229e-05, "loss": 1.0097, "step": 8205 }, { "epoch": 2.4908180300500833, "grad_norm": 0.8499066233634949, "learning_rate": 5.8509668927812086e-05, "loss": 1.3819, "step": 8206 }, { "epoch": 2.491121566246775, "grad_norm": 0.8384965062141418, "learning_rate": 5.850460666194189e-05, "loss": 1.4173, "step": 8207 }, { "epoch": 2.4914251024434666, "grad_norm": 0.7784766554832458, "learning_rate": 5.849954439607168e-05, "loss": 1.1626, "step": 8208 }, { "epoch": 2.491728638640158, "grad_norm": 0.9458047747612, "learning_rate": 5.849448213020148e-05, "loss": 0.8844, "step": 8209 }, { "epoch": 2.4920321748368495, "grad_norm": 0.8930952548980713, "learning_rate": 5.848941986433127e-05, "loss": 1.1677, "step": 8210 }, { "epoch": 2.4923357110335407, "grad_norm": 1.381670594215393, "learning_rate": 5.848435759846107e-05, "loss": 0.5978, "step": 8211 }, { "epoch": 2.4926392472302323, "grad_norm": 0.8100841045379639, "learning_rate": 5.847929533259087e-05, "loss": 1.0791, "step": 8212 }, { "epoch": 2.4929427834269235, "grad_norm": 1.0851428508758545, "learning_rate": 5.8474233066720664e-05, "loss": 1.2838, "step": 8213 }, { "epoch": 2.493246319623615, "grad_norm": 0.6545624732971191, "learning_rate": 5.846917080085046e-05, "loss": 1.1591, "step": 8214 }, { "epoch": 2.4935498558203064, "grad_norm": 0.6443068385124207, "learning_rate": 5.846410853498025e-05, "loss": 1.056, "step": 8215 }, { "epoch": 2.493853392016998, "grad_norm": 0.8362240791320801, "learning_rate": 5.8459046269110055e-05, "loss": 1.011, "step": 8216 }, { "epoch": 2.4941569282136893, "grad_norm": 0.7914416193962097, "learning_rate": 5.845398400323985e-05, "loss": 1.3633, "step": 8217 }, { "epoch": 2.494460464410381, "grad_norm": 0.6533204317092896, "learning_rate": 5.8448921737369644e-05, "loss": 1.4868, "step": 8218 }, { "epoch": 2.4947640006070726, "grad_norm": 0.8610125184059143, "learning_rate": 5.844385947149945e-05, "loss": 1.2977, "step": 8219 }, { "epoch": 2.495067536803764, "grad_norm": 0.7882906198501587, "learning_rate": 5.843879720562925e-05, "loss": 1.2888, "step": 8220 }, { "epoch": 2.4953710730004555, "grad_norm": 0.6866266131401062, "learning_rate": 5.843373493975904e-05, "loss": 1.372, "step": 8221 }, { "epoch": 2.4956746091971467, "grad_norm": 0.7847753763198853, "learning_rate": 5.842867267388884e-05, "loss": 1.4631, "step": 8222 }, { "epoch": 2.4959781453938383, "grad_norm": 0.7031978964805603, "learning_rate": 5.842361040801864e-05, "loss": 1.3046, "step": 8223 }, { "epoch": 2.4962816815905295, "grad_norm": 0.8611195087432861, "learning_rate": 5.841854814214843e-05, "loss": 1.0336, "step": 8224 }, { "epoch": 2.496585217787221, "grad_norm": 0.7341358065605164, "learning_rate": 5.841348587627823e-05, "loss": 1.4171, "step": 8225 }, { "epoch": 2.4968887539839124, "grad_norm": 0.7016193866729736, "learning_rate": 5.840842361040802e-05, "loss": 1.0846, "step": 8226 }, { "epoch": 2.497192290180604, "grad_norm": 0.6806894540786743, "learning_rate": 5.8403361344537825e-05, "loss": 1.5947, "step": 8227 }, { "epoch": 2.4974958263772953, "grad_norm": 0.7414386868476868, "learning_rate": 5.839829907866762e-05, "loss": 1.4735, "step": 8228 }, { "epoch": 2.497799362573987, "grad_norm": 0.7096249461174011, "learning_rate": 5.8393236812797414e-05, "loss": 1.6038, "step": 8229 }, { "epoch": 2.4981028987706786, "grad_norm": 0.6129046678543091, "learning_rate": 5.838817454692721e-05, "loss": 1.366, "step": 8230 }, { "epoch": 2.49840643496737, "grad_norm": 0.6488550305366516, "learning_rate": 5.8383112281057004e-05, "loss": 1.1149, "step": 8231 }, { "epoch": 2.4987099711640615, "grad_norm": 0.7201668620109558, "learning_rate": 5.8378050015186805e-05, "loss": 1.4682, "step": 8232 }, { "epoch": 2.4990135073607527, "grad_norm": 0.6897460222244263, "learning_rate": 5.83729877493166e-05, "loss": 1.5793, "step": 8233 }, { "epoch": 2.4993170435574443, "grad_norm": 0.7777083516120911, "learning_rate": 5.8367925483446395e-05, "loss": 1.2701, "step": 8234 }, { "epoch": 2.4996205797541355, "grad_norm": 0.7976280450820923, "learning_rate": 5.836286321757619e-05, "loss": 1.3406, "step": 8235 }, { "epoch": 2.499924115950827, "grad_norm": 0.9655071496963501, "learning_rate": 5.8357800951705984e-05, "loss": 1.47, "step": 8236 }, { "epoch": 2.500227652147519, "grad_norm": 1.0588605403900146, "learning_rate": 5.8352738685835786e-05, "loss": 1.6478, "step": 8237 }, { "epoch": 2.50053118834421, "grad_norm": 0.8263594508171082, "learning_rate": 5.834767641996558e-05, "loss": 1.2701, "step": 8238 }, { "epoch": 2.5008347245409013, "grad_norm": 0.8433352112770081, "learning_rate": 5.8342614154095376e-05, "loss": 1.1952, "step": 8239 }, { "epoch": 2.501138260737593, "grad_norm": 0.8610383868217468, "learning_rate": 5.833755188822517e-05, "loss": 1.2973, "step": 8240 }, { "epoch": 2.5014417969342846, "grad_norm": 0.8831159472465515, "learning_rate": 5.833248962235497e-05, "loss": 1.2945, "step": 8241 }, { "epoch": 2.501745333130976, "grad_norm": 0.8238030672073364, "learning_rate": 5.832742735648477e-05, "loss": 1.0743, "step": 8242 }, { "epoch": 2.5020488693276675, "grad_norm": 0.694869875907898, "learning_rate": 5.832236509061456e-05, "loss": 1.284, "step": 8243 }, { "epoch": 2.5023524055243587, "grad_norm": 0.8804123997688293, "learning_rate": 5.8317302824744356e-05, "loss": 1.2777, "step": 8244 }, { "epoch": 2.5026559417210503, "grad_norm": 0.849064290523529, "learning_rate": 5.831224055887415e-05, "loss": 1.3544, "step": 8245 }, { "epoch": 2.5029594779177415, "grad_norm": 0.8048885464668274, "learning_rate": 5.830717829300395e-05, "loss": 1.4033, "step": 8246 }, { "epoch": 2.503263014114433, "grad_norm": 1.1192904710769653, "learning_rate": 5.830211602713375e-05, "loss": 1.2505, "step": 8247 }, { "epoch": 2.503566550311125, "grad_norm": 0.7913041114807129, "learning_rate": 5.829705376126354e-05, "loss": 1.8518, "step": 8248 }, { "epoch": 2.503870086507816, "grad_norm": 0.8328375220298767, "learning_rate": 5.829199149539334e-05, "loss": 1.0964, "step": 8249 }, { "epoch": 2.5041736227045073, "grad_norm": 0.7091400027275085, "learning_rate": 5.828692922952313e-05, "loss": 1.4837, "step": 8250 }, { "epoch": 2.504477158901199, "grad_norm": 0.7294608354568481, "learning_rate": 5.8281866963652933e-05, "loss": 1.229, "step": 8251 }, { "epoch": 2.5047806950978906, "grad_norm": 0.6839693784713745, "learning_rate": 5.827680469778273e-05, "loss": 0.6826, "step": 8252 }, { "epoch": 2.505084231294582, "grad_norm": 0.7262798547744751, "learning_rate": 5.827174243191252e-05, "loss": 1.535, "step": 8253 }, { "epoch": 2.5053877674912735, "grad_norm": 0.643129289150238, "learning_rate": 5.826668016604232e-05, "loss": 1.3863, "step": 8254 }, { "epoch": 2.5056913036879647, "grad_norm": 0.6908575296401978, "learning_rate": 5.826161790017212e-05, "loss": 1.3708, "step": 8255 }, { "epoch": 2.5059948398846563, "grad_norm": 0.7086175084114075, "learning_rate": 5.8256555634301914e-05, "loss": 1.5198, "step": 8256 }, { "epoch": 2.5062983760813475, "grad_norm": 0.8000795245170593, "learning_rate": 5.825149336843171e-05, "loss": 1.2737, "step": 8257 }, { "epoch": 2.506601912278039, "grad_norm": 0.7499237656593323, "learning_rate": 5.8246431102561504e-05, "loss": 1.803, "step": 8258 }, { "epoch": 2.506905448474731, "grad_norm": 0.7838631868362427, "learning_rate": 5.82413688366913e-05, "loss": 1.3923, "step": 8259 }, { "epoch": 2.507208984671422, "grad_norm": 0.8873981833457947, "learning_rate": 5.82363065708211e-05, "loss": 1.3988, "step": 8260 }, { "epoch": 2.5075125208681133, "grad_norm": 0.6524555087089539, "learning_rate": 5.8231244304950895e-05, "loss": 1.5845, "step": 8261 }, { "epoch": 2.507816057064805, "grad_norm": 0.6309491991996765, "learning_rate": 5.822618203908069e-05, "loss": 1.6408, "step": 8262 }, { "epoch": 2.5081195932614966, "grad_norm": 0.745837926864624, "learning_rate": 5.8221119773210485e-05, "loss": 1.3983, "step": 8263 }, { "epoch": 2.508423129458188, "grad_norm": 0.54892897605896, "learning_rate": 5.821605750734029e-05, "loss": 1.3872, "step": 8264 }, { "epoch": 2.5087266656548795, "grad_norm": 0.8010538816452026, "learning_rate": 5.821099524147009e-05, "loss": 0.8688, "step": 8265 }, { "epoch": 2.5090302018515707, "grad_norm": 0.7392093539237976, "learning_rate": 5.820593297559989e-05, "loss": 1.5812, "step": 8266 }, { "epoch": 2.5093337380482623, "grad_norm": 0.8679747581481934, "learning_rate": 5.8200870709729684e-05, "loss": 0.9848, "step": 8267 }, { "epoch": 2.5096372742449535, "grad_norm": 0.66532963514328, "learning_rate": 5.819580844385948e-05, "loss": 1.3337, "step": 8268 }, { "epoch": 2.509940810441645, "grad_norm": 0.7652869820594788, "learning_rate": 5.8190746177989274e-05, "loss": 1.3081, "step": 8269 }, { "epoch": 2.510244346638337, "grad_norm": 0.7931615114212036, "learning_rate": 5.818568391211907e-05, "loss": 1.3818, "step": 8270 }, { "epoch": 2.510547882835028, "grad_norm": 0.7917905449867249, "learning_rate": 5.818062164624887e-05, "loss": 1.4077, "step": 8271 }, { "epoch": 2.5108514190317193, "grad_norm": 1.0687652826309204, "learning_rate": 5.8175559380378665e-05, "loss": 1.1502, "step": 8272 }, { "epoch": 2.511154955228411, "grad_norm": 0.5490748882293701, "learning_rate": 5.817049711450846e-05, "loss": 1.8143, "step": 8273 }, { "epoch": 2.5114584914251026, "grad_norm": 0.549918532371521, "learning_rate": 5.8165434848638254e-05, "loss": 1.0906, "step": 8274 }, { "epoch": 2.511762027621794, "grad_norm": 0.7719300389289856, "learning_rate": 5.816037258276805e-05, "loss": 1.5224, "step": 8275 }, { "epoch": 2.5120655638184854, "grad_norm": 0.9084600210189819, "learning_rate": 5.815531031689785e-05, "loss": 1.5819, "step": 8276 }, { "epoch": 2.5123691000151767, "grad_norm": 0.7480749487876892, "learning_rate": 5.8150248051027646e-05, "loss": 0.5482, "step": 8277 }, { "epoch": 2.5126726362118683, "grad_norm": 0.9660145044326782, "learning_rate": 5.814518578515744e-05, "loss": 1.2091, "step": 8278 }, { "epoch": 2.5129761724085595, "grad_norm": 0.7917237877845764, "learning_rate": 5.8140123519287235e-05, "loss": 1.2719, "step": 8279 }, { "epoch": 2.513279708605251, "grad_norm": 0.6512066125869751, "learning_rate": 5.813506125341704e-05, "loss": 1.2307, "step": 8280 }, { "epoch": 2.513583244801943, "grad_norm": 0.9117663502693176, "learning_rate": 5.812999898754683e-05, "loss": 1.0733, "step": 8281 }, { "epoch": 2.513886780998634, "grad_norm": 0.7804828882217407, "learning_rate": 5.8124936721676626e-05, "loss": 1.2834, "step": 8282 }, { "epoch": 2.5141903171953257, "grad_norm": 0.6926867365837097, "learning_rate": 5.811987445580642e-05, "loss": 1.0372, "step": 8283 }, { "epoch": 2.514493853392017, "grad_norm": 0.7769339680671692, "learning_rate": 5.8114812189936216e-05, "loss": 1.3893, "step": 8284 }, { "epoch": 2.5147973895887086, "grad_norm": 0.8659480810165405, "learning_rate": 5.810974992406602e-05, "loss": 1.3224, "step": 8285 }, { "epoch": 2.5151009257854, "grad_norm": 0.8789883852005005, "learning_rate": 5.810468765819581e-05, "loss": 1.0516, "step": 8286 }, { "epoch": 2.5154044619820914, "grad_norm": 0.7343612909317017, "learning_rate": 5.809962539232561e-05, "loss": 1.4813, "step": 8287 }, { "epoch": 2.5157079981787827, "grad_norm": 0.7558401226997375, "learning_rate": 5.80945631264554e-05, "loss": 1.5583, "step": 8288 }, { "epoch": 2.5160115343754743, "grad_norm": 0.5077150464057922, "learning_rate": 5.80895008605852e-05, "loss": 1.7516, "step": 8289 }, { "epoch": 2.5163150705721655, "grad_norm": 0.8321554660797119, "learning_rate": 5.8084438594715e-05, "loss": 1.2849, "step": 8290 }, { "epoch": 2.516618606768857, "grad_norm": 0.7427679896354675, "learning_rate": 5.807937632884479e-05, "loss": 1.2341, "step": 8291 }, { "epoch": 2.516922142965549, "grad_norm": 0.7994052767753601, "learning_rate": 5.807431406297459e-05, "loss": 1.3661, "step": 8292 }, { "epoch": 2.51722567916224, "grad_norm": 0.8299456834793091, "learning_rate": 5.806925179710438e-05, "loss": 1.0751, "step": 8293 }, { "epoch": 2.5175292153589317, "grad_norm": 1.3051421642303467, "learning_rate": 5.8064189531234184e-05, "loss": 1.1627, "step": 8294 }, { "epoch": 2.517832751555623, "grad_norm": 0.8921769857406616, "learning_rate": 5.805912726536398e-05, "loss": 1.3073, "step": 8295 }, { "epoch": 2.5181362877523146, "grad_norm": 0.78340744972229, "learning_rate": 5.8054064999493774e-05, "loss": 1.7102, "step": 8296 }, { "epoch": 2.518439823949006, "grad_norm": 0.6377023458480835, "learning_rate": 5.804900273362357e-05, "loss": 1.3017, "step": 8297 }, { "epoch": 2.5187433601456974, "grad_norm": 0.8279851675033569, "learning_rate": 5.804394046775336e-05, "loss": 1.3353, "step": 8298 }, { "epoch": 2.519046896342389, "grad_norm": 0.9127827286720276, "learning_rate": 5.8038878201883165e-05, "loss": 1.5184, "step": 8299 }, { "epoch": 2.5193504325390803, "grad_norm": 0.6756750345230103, "learning_rate": 5.803381593601296e-05, "loss": 1.2604, "step": 8300 }, { "epoch": 2.5196539687357715, "grad_norm": 0.6783917546272278, "learning_rate": 5.8028753670142755e-05, "loss": 0.9262, "step": 8301 }, { "epoch": 2.519957504932463, "grad_norm": 0.7022363543510437, "learning_rate": 5.802369140427255e-05, "loss": 1.4863, "step": 8302 }, { "epoch": 2.520261041129155, "grad_norm": 0.6683651804924011, "learning_rate": 5.8018629138402344e-05, "loss": 1.3591, "step": 8303 }, { "epoch": 2.520564577325846, "grad_norm": 0.7177444100379944, "learning_rate": 5.8013566872532146e-05, "loss": 1.6204, "step": 8304 }, { "epoch": 2.5208681135225377, "grad_norm": 0.7411631941795349, "learning_rate": 5.800850460666194e-05, "loss": 1.4532, "step": 8305 }, { "epoch": 2.521171649719229, "grad_norm": 1.0475449562072754, "learning_rate": 5.8003442340791735e-05, "loss": 1.3259, "step": 8306 }, { "epoch": 2.5214751859159206, "grad_norm": 0.7804305553436279, "learning_rate": 5.799838007492153e-05, "loss": 1.4362, "step": 8307 }, { "epoch": 2.521778722112612, "grad_norm": 0.6839709877967834, "learning_rate": 5.799331780905134e-05, "loss": 1.288, "step": 8308 }, { "epoch": 2.5220822583093034, "grad_norm": 1.0387964248657227, "learning_rate": 5.798825554318113e-05, "loss": 1.1246, "step": 8309 }, { "epoch": 2.522385794505995, "grad_norm": 0.7365851998329163, "learning_rate": 5.7983193277310935e-05, "loss": 1.3858, "step": 8310 }, { "epoch": 2.5226893307026863, "grad_norm": 0.9452034831047058, "learning_rate": 5.797813101144073e-05, "loss": 1.2232, "step": 8311 }, { "epoch": 2.5229928668993775, "grad_norm": 0.8562889099121094, "learning_rate": 5.7973068745570524e-05, "loss": 1.1791, "step": 8312 }, { "epoch": 2.523296403096069, "grad_norm": 0.7401477694511414, "learning_rate": 5.796800647970032e-05, "loss": 1.4497, "step": 8313 }, { "epoch": 2.523599939292761, "grad_norm": 0.8923379778862, "learning_rate": 5.7962944213830114e-05, "loss": 1.4017, "step": 8314 }, { "epoch": 2.523903475489452, "grad_norm": 0.8456719517707825, "learning_rate": 5.7957881947959916e-05, "loss": 0.9548, "step": 8315 }, { "epoch": 2.5242070116861437, "grad_norm": 0.7915187478065491, "learning_rate": 5.795281968208971e-05, "loss": 1.3552, "step": 8316 }, { "epoch": 2.524510547882835, "grad_norm": 0.8779439926147461, "learning_rate": 5.7947757416219505e-05, "loss": 1.4899, "step": 8317 }, { "epoch": 2.5248140840795266, "grad_norm": 0.8715509176254272, "learning_rate": 5.79426951503493e-05, "loss": 1.3182, "step": 8318 }, { "epoch": 2.525117620276218, "grad_norm": 0.7651680111885071, "learning_rate": 5.79376328844791e-05, "loss": 1.6216, "step": 8319 }, { "epoch": 2.5254211564729094, "grad_norm": 0.7060753703117371, "learning_rate": 5.7932570618608896e-05, "loss": 1.4024, "step": 8320 }, { "epoch": 2.525724692669601, "grad_norm": 0.9709405303001404, "learning_rate": 5.792750835273869e-05, "loss": 1.3222, "step": 8321 }, { "epoch": 2.5260282288662923, "grad_norm": 0.6587694883346558, "learning_rate": 5.7922446086868486e-05, "loss": 1.5824, "step": 8322 }, { "epoch": 2.5263317650629835, "grad_norm": 0.6169544458389282, "learning_rate": 5.791738382099828e-05, "loss": 1.1027, "step": 8323 }, { "epoch": 2.526635301259675, "grad_norm": 0.7203264236450195, "learning_rate": 5.791232155512808e-05, "loss": 1.1206, "step": 8324 }, { "epoch": 2.526938837456367, "grad_norm": 0.7035651206970215, "learning_rate": 5.790725928925788e-05, "loss": 1.2364, "step": 8325 }, { "epoch": 2.527242373653058, "grad_norm": 0.7010215520858765, "learning_rate": 5.790219702338767e-05, "loss": 1.0687, "step": 8326 }, { "epoch": 2.5275459098497497, "grad_norm": 0.8027076125144958, "learning_rate": 5.789713475751747e-05, "loss": 1.2478, "step": 8327 }, { "epoch": 2.527849446046441, "grad_norm": 0.776627779006958, "learning_rate": 5.789207249164726e-05, "loss": 1.4251, "step": 8328 }, { "epoch": 2.5281529822431326, "grad_norm": 0.7934873104095459, "learning_rate": 5.788701022577706e-05, "loss": 1.5188, "step": 8329 }, { "epoch": 2.5284565184398238, "grad_norm": 0.6677809953689575, "learning_rate": 5.788194795990686e-05, "loss": 1.2741, "step": 8330 }, { "epoch": 2.5287600546365154, "grad_norm": 0.8031719923019409, "learning_rate": 5.787688569403665e-05, "loss": 1.3056, "step": 8331 }, { "epoch": 2.529063590833207, "grad_norm": 0.616132915019989, "learning_rate": 5.787182342816645e-05, "loss": 1.4796, "step": 8332 }, { "epoch": 2.5293671270298983, "grad_norm": 0.7809193134307861, "learning_rate": 5.786676116229625e-05, "loss": 1.2991, "step": 8333 }, { "epoch": 2.5296706632265895, "grad_norm": 0.8768143653869629, "learning_rate": 5.7861698896426044e-05, "loss": 1.3272, "step": 8334 }, { "epoch": 2.529974199423281, "grad_norm": 0.9282557368278503, "learning_rate": 5.785663663055584e-05, "loss": 1.372, "step": 8335 }, { "epoch": 2.530277735619973, "grad_norm": 0.7703008055686951, "learning_rate": 5.785157436468563e-05, "loss": 1.9085, "step": 8336 }, { "epoch": 2.530581271816664, "grad_norm": 0.8404890298843384, "learning_rate": 5.784651209881543e-05, "loss": 1.0276, "step": 8337 }, { "epoch": 2.5308848080133557, "grad_norm": 0.7915540933609009, "learning_rate": 5.784144983294523e-05, "loss": 1.5161, "step": 8338 }, { "epoch": 2.531188344210047, "grad_norm": 0.9564916491508484, "learning_rate": 5.7836387567075024e-05, "loss": 1.1961, "step": 8339 }, { "epoch": 2.5314918804067386, "grad_norm": 0.8775029182434082, "learning_rate": 5.783132530120482e-05, "loss": 1.4621, "step": 8340 }, { "epoch": 2.5317954166034298, "grad_norm": 0.8440585136413574, "learning_rate": 5.7826263035334614e-05, "loss": 0.8856, "step": 8341 }, { "epoch": 2.5320989528001214, "grad_norm": 0.8222201466560364, "learning_rate": 5.782120076946441e-05, "loss": 1.0385, "step": 8342 }, { "epoch": 2.532402488996813, "grad_norm": 0.7944109439849854, "learning_rate": 5.781613850359421e-05, "loss": 1.2259, "step": 8343 }, { "epoch": 2.5327060251935043, "grad_norm": 0.6671246886253357, "learning_rate": 5.7811076237724005e-05, "loss": 1.1106, "step": 8344 }, { "epoch": 2.5330095613901955, "grad_norm": 0.7811467051506042, "learning_rate": 5.78060139718538e-05, "loss": 1.2432, "step": 8345 }, { "epoch": 2.533313097586887, "grad_norm": 0.8934546113014221, "learning_rate": 5.7800951705983595e-05, "loss": 1.2128, "step": 8346 }, { "epoch": 2.533616633783579, "grad_norm": 0.7736660838127136, "learning_rate": 5.7795889440113396e-05, "loss": 1.5971, "step": 8347 }, { "epoch": 2.53392016998027, "grad_norm": 0.8068675398826599, "learning_rate": 5.779082717424319e-05, "loss": 0.9002, "step": 8348 }, { "epoch": 2.5342237061769617, "grad_norm": 0.7460306882858276, "learning_rate": 5.7785764908372986e-05, "loss": 1.0504, "step": 8349 }, { "epoch": 2.534527242373653, "grad_norm": 0.8296782970428467, "learning_rate": 5.778070264250278e-05, "loss": 1.4817, "step": 8350 }, { "epoch": 2.5348307785703446, "grad_norm": 1.005679965019226, "learning_rate": 5.7775640376632576e-05, "loss": 0.9825, "step": 8351 }, { "epoch": 2.5351343147670358, "grad_norm": 0.7980281710624695, "learning_rate": 5.777057811076238e-05, "loss": 1.3191, "step": 8352 }, { "epoch": 2.5354378509637274, "grad_norm": 0.7153705954551697, "learning_rate": 5.776551584489218e-05, "loss": 1.2975, "step": 8353 }, { "epoch": 2.535741387160419, "grad_norm": 0.8253535628318787, "learning_rate": 5.776045357902198e-05, "loss": 1.5653, "step": 8354 }, { "epoch": 2.5360449233571103, "grad_norm": 0.7665138244628906, "learning_rate": 5.7755391313151775e-05, "loss": 1.4909, "step": 8355 }, { "epoch": 2.536348459553802, "grad_norm": 0.9321874380111694, "learning_rate": 5.775032904728157e-05, "loss": 0.7591, "step": 8356 }, { "epoch": 2.536651995750493, "grad_norm": 0.726944625377655, "learning_rate": 5.7745266781411365e-05, "loss": 1.3542, "step": 8357 }, { "epoch": 2.536955531947185, "grad_norm": 0.8286332488059998, "learning_rate": 5.7740204515541166e-05, "loss": 1.7159, "step": 8358 }, { "epoch": 2.537259068143876, "grad_norm": 0.8257943987846375, "learning_rate": 5.773514224967096e-05, "loss": 0.7839, "step": 8359 }, { "epoch": 2.5375626043405677, "grad_norm": 0.7034426331520081, "learning_rate": 5.7730079983800756e-05, "loss": 1.2258, "step": 8360 }, { "epoch": 2.5378661405372593, "grad_norm": 0.70139080286026, "learning_rate": 5.772501771793055e-05, "loss": 1.3429, "step": 8361 }, { "epoch": 2.5381696767339506, "grad_norm": 0.8869861364364624, "learning_rate": 5.7719955452060345e-05, "loss": 1.3633, "step": 8362 }, { "epoch": 2.5384732129306418, "grad_norm": 0.7827722430229187, "learning_rate": 5.771489318619015e-05, "loss": 1.4197, "step": 8363 }, { "epoch": 2.5387767491273334, "grad_norm": 0.8089902997016907, "learning_rate": 5.770983092031994e-05, "loss": 1.4385, "step": 8364 }, { "epoch": 2.539080285324025, "grad_norm": 0.7806230187416077, "learning_rate": 5.7704768654449737e-05, "loss": 1.4791, "step": 8365 }, { "epoch": 2.5393838215207163, "grad_norm": 1.0129203796386719, "learning_rate": 5.769970638857953e-05, "loss": 1.2102, "step": 8366 }, { "epoch": 2.539687357717408, "grad_norm": 0.7767428755760193, "learning_rate": 5.7694644122709326e-05, "loss": 1.4489, "step": 8367 }, { "epoch": 2.539990893914099, "grad_norm": 0.6615005135536194, "learning_rate": 5.768958185683913e-05, "loss": 1.2321, "step": 8368 }, { "epoch": 2.540294430110791, "grad_norm": 0.9702263474464417, "learning_rate": 5.768451959096892e-05, "loss": 1.4452, "step": 8369 }, { "epoch": 2.540597966307482, "grad_norm": 1.022396206855774, "learning_rate": 5.767945732509872e-05, "loss": 0.7356, "step": 8370 }, { "epoch": 2.5409015025041737, "grad_norm": 0.7501503229141235, "learning_rate": 5.767439505922851e-05, "loss": 1.1511, "step": 8371 }, { "epoch": 2.5412050387008653, "grad_norm": 0.8175548315048218, "learning_rate": 5.7669332793358314e-05, "loss": 1.37, "step": 8372 }, { "epoch": 2.5415085748975565, "grad_norm": 0.7536338567733765, "learning_rate": 5.766427052748811e-05, "loss": 1.2864, "step": 8373 }, { "epoch": 2.5418121110942478, "grad_norm": 0.8838115334510803, "learning_rate": 5.76592082616179e-05, "loss": 1.348, "step": 8374 }, { "epoch": 2.5421156472909394, "grad_norm": 0.7550739049911499, "learning_rate": 5.76541459957477e-05, "loss": 1.4292, "step": 8375 }, { "epoch": 2.542419183487631, "grad_norm": 0.8385372757911682, "learning_rate": 5.764908372987749e-05, "loss": 1.6506, "step": 8376 }, { "epoch": 2.5427227196843223, "grad_norm": 0.6910164952278137, "learning_rate": 5.7644021464007294e-05, "loss": 0.8724, "step": 8377 }, { "epoch": 2.543026255881014, "grad_norm": 0.7734633088111877, "learning_rate": 5.763895919813709e-05, "loss": 0.2027, "step": 8378 }, { "epoch": 2.543329792077705, "grad_norm": 0.6335809826850891, "learning_rate": 5.7633896932266884e-05, "loss": 0.9734, "step": 8379 }, { "epoch": 2.543633328274397, "grad_norm": 0.7772993445396423, "learning_rate": 5.762883466639668e-05, "loss": 1.3989, "step": 8380 }, { "epoch": 2.543936864471088, "grad_norm": 0.8541886806488037, "learning_rate": 5.7623772400526474e-05, "loss": 1.4976, "step": 8381 }, { "epoch": 2.5442404006677797, "grad_norm": 0.8611499071121216, "learning_rate": 5.7618710134656275e-05, "loss": 1.2922, "step": 8382 }, { "epoch": 2.5445439368644713, "grad_norm": 0.8912010192871094, "learning_rate": 5.761364786878607e-05, "loss": 1.3701, "step": 8383 }, { "epoch": 2.5448474730611625, "grad_norm": 0.6431745886802673, "learning_rate": 5.7608585602915865e-05, "loss": 1.5163, "step": 8384 }, { "epoch": 2.5451510092578538, "grad_norm": 0.7857423424720764, "learning_rate": 5.760352333704566e-05, "loss": 1.5629, "step": 8385 }, { "epoch": 2.5454545454545454, "grad_norm": 0.8442662358283997, "learning_rate": 5.759846107117546e-05, "loss": 0.6841, "step": 8386 }, { "epoch": 2.545758081651237, "grad_norm": 0.7257554531097412, "learning_rate": 5.7593398805305256e-05, "loss": 1.1639, "step": 8387 }, { "epoch": 2.5460616178479283, "grad_norm": 0.693900465965271, "learning_rate": 5.758833653943505e-05, "loss": 0.9785, "step": 8388 }, { "epoch": 2.54636515404462, "grad_norm": 0.45039594173431396, "learning_rate": 5.7583274273564846e-05, "loss": 0.7857, "step": 8389 }, { "epoch": 2.546668690241311, "grad_norm": 0.8388344049453735, "learning_rate": 5.757821200769464e-05, "loss": 1.2768, "step": 8390 }, { "epoch": 2.546972226438003, "grad_norm": 0.7791977524757385, "learning_rate": 5.757314974182444e-05, "loss": 1.3924, "step": 8391 }, { "epoch": 2.547275762634694, "grad_norm": 0.760310173034668, "learning_rate": 5.756808747595424e-05, "loss": 1.6505, "step": 8392 }, { "epoch": 2.5475792988313857, "grad_norm": 0.7585695385932922, "learning_rate": 5.756302521008403e-05, "loss": 1.4748, "step": 8393 }, { "epoch": 2.5478828350280773, "grad_norm": 0.8367417454719543, "learning_rate": 5.7557962944213826e-05, "loss": 1.3173, "step": 8394 }, { "epoch": 2.5481863712247685, "grad_norm": 0.81987065076828, "learning_rate": 5.755290067834362e-05, "loss": 1.1409, "step": 8395 }, { "epoch": 2.5484899074214598, "grad_norm": 0.6271147131919861, "learning_rate": 5.754783841247342e-05, "loss": 1.169, "step": 8396 }, { "epoch": 2.5487934436181514, "grad_norm": 0.7545047402381897, "learning_rate": 5.754277614660323e-05, "loss": 1.3987, "step": 8397 }, { "epoch": 2.549096979814843, "grad_norm": 0.791571319103241, "learning_rate": 5.7537713880733026e-05, "loss": 1.5415, "step": 8398 }, { "epoch": 2.5494005160115343, "grad_norm": 0.6578336358070374, "learning_rate": 5.753265161486282e-05, "loss": 1.4396, "step": 8399 }, { "epoch": 2.549704052208226, "grad_norm": 0.70784592628479, "learning_rate": 5.7527589348992615e-05, "loss": 1.3632, "step": 8400 }, { "epoch": 2.550007588404917, "grad_norm": 0.8875760436058044, "learning_rate": 5.752252708312241e-05, "loss": 1.4161, "step": 8401 }, { "epoch": 2.550311124601609, "grad_norm": 0.6619694828987122, "learning_rate": 5.751746481725221e-05, "loss": 1.5841, "step": 8402 }, { "epoch": 2.5506146607983, "grad_norm": 0.6046818494796753, "learning_rate": 5.7512402551382007e-05, "loss": 0.9539, "step": 8403 }, { "epoch": 2.5509181969949917, "grad_norm": 0.7589368224143982, "learning_rate": 5.75073402855118e-05, "loss": 1.5581, "step": 8404 }, { "epoch": 2.5512217331916833, "grad_norm": 0.9756147265434265, "learning_rate": 5.7502278019641596e-05, "loss": 1.1617, "step": 8405 }, { "epoch": 2.5515252693883745, "grad_norm": 1.0096057653427124, "learning_rate": 5.749721575377139e-05, "loss": 1.345, "step": 8406 }, { "epoch": 2.5518288055850658, "grad_norm": 0.7118128538131714, "learning_rate": 5.749215348790119e-05, "loss": 1.2063, "step": 8407 }, { "epoch": 2.5521323417817574, "grad_norm": 0.668727695941925, "learning_rate": 5.748709122203099e-05, "loss": 0.9131, "step": 8408 }, { "epoch": 2.552435877978449, "grad_norm": 0.6993553042411804, "learning_rate": 5.748202895616078e-05, "loss": 1.0686, "step": 8409 }, { "epoch": 2.5527394141751403, "grad_norm": 0.6418778300285339, "learning_rate": 5.747696669029058e-05, "loss": 1.2229, "step": 8410 }, { "epoch": 2.553042950371832, "grad_norm": 0.8472557067871094, "learning_rate": 5.747190442442038e-05, "loss": 1.0367, "step": 8411 }, { "epoch": 2.553346486568523, "grad_norm": 0.8465378880500793, "learning_rate": 5.746684215855017e-05, "loss": 1.3367, "step": 8412 }, { "epoch": 2.553650022765215, "grad_norm": 0.9106330275535583, "learning_rate": 5.746177989267997e-05, "loss": 1.4787, "step": 8413 }, { "epoch": 2.553953558961906, "grad_norm": 0.7567964196205139, "learning_rate": 5.745671762680976e-05, "loss": 1.2565, "step": 8414 }, { "epoch": 2.5542570951585977, "grad_norm": 0.8421973586082458, "learning_rate": 5.745165536093956e-05, "loss": 1.6682, "step": 8415 }, { "epoch": 2.5545606313552893, "grad_norm": 0.5599844455718994, "learning_rate": 5.744659309506936e-05, "loss": 1.1791, "step": 8416 }, { "epoch": 2.5548641675519805, "grad_norm": 0.6801798343658447, "learning_rate": 5.7441530829199154e-05, "loss": 1.5051, "step": 8417 }, { "epoch": 2.555167703748672, "grad_norm": 0.7134249806404114, "learning_rate": 5.743646856332895e-05, "loss": 1.5552, "step": 8418 }, { "epoch": 2.5554712399453634, "grad_norm": 0.9280962944030762, "learning_rate": 5.7431406297458744e-05, "loss": 1.2458, "step": 8419 }, { "epoch": 2.555774776142055, "grad_norm": 0.7428077459335327, "learning_rate": 5.742634403158854e-05, "loss": 1.5828, "step": 8420 }, { "epoch": 2.5560783123387463, "grad_norm": 0.7723643183708191, "learning_rate": 5.742128176571834e-05, "loss": 1.5052, "step": 8421 }, { "epoch": 2.556381848535438, "grad_norm": 0.7478247880935669, "learning_rate": 5.7416219499848135e-05, "loss": 1.3942, "step": 8422 }, { "epoch": 2.556685384732129, "grad_norm": 0.7729806303977966, "learning_rate": 5.741115723397793e-05, "loss": 1.5192, "step": 8423 }, { "epoch": 2.556988920928821, "grad_norm": 0.7728172540664673, "learning_rate": 5.7406094968107724e-05, "loss": 1.4517, "step": 8424 }, { "epoch": 2.557292457125512, "grad_norm": 0.6714379787445068, "learning_rate": 5.7401032702237526e-05, "loss": 1.4359, "step": 8425 }, { "epoch": 2.5575959933222037, "grad_norm": 0.8857477307319641, "learning_rate": 5.739597043636732e-05, "loss": 1.0287, "step": 8426 }, { "epoch": 2.5578995295188953, "grad_norm": 0.7782611846923828, "learning_rate": 5.7390908170497115e-05, "loss": 1.263, "step": 8427 }, { "epoch": 2.5582030657155865, "grad_norm": 0.5905637741088867, "learning_rate": 5.738584590462691e-05, "loss": 0.7798, "step": 8428 }, { "epoch": 2.558506601912278, "grad_norm": 0.5645215511322021, "learning_rate": 5.7380783638756705e-05, "loss": 1.5072, "step": 8429 }, { "epoch": 2.5588101381089694, "grad_norm": 0.7701306343078613, "learning_rate": 5.737572137288651e-05, "loss": 1.5126, "step": 8430 }, { "epoch": 2.559113674305661, "grad_norm": 0.6775439977645874, "learning_rate": 5.73706591070163e-05, "loss": 1.0372, "step": 8431 }, { "epoch": 2.5594172105023523, "grad_norm": 0.5594154596328735, "learning_rate": 5.7365596841146096e-05, "loss": 1.779, "step": 8432 }, { "epoch": 2.559720746699044, "grad_norm": 0.6067804098129272, "learning_rate": 5.736053457527589e-05, "loss": 1.6787, "step": 8433 }, { "epoch": 2.5600242828957356, "grad_norm": 0.7488258481025696, "learning_rate": 5.7355472309405686e-05, "loss": 1.0234, "step": 8434 }, { "epoch": 2.560327819092427, "grad_norm": 0.8093794584274292, "learning_rate": 5.735041004353549e-05, "loss": 1.4619, "step": 8435 }, { "epoch": 2.560631355289118, "grad_norm": 0.8266646265983582, "learning_rate": 5.734534777766528e-05, "loss": 1.1253, "step": 8436 }, { "epoch": 2.5609348914858097, "grad_norm": 0.6163046360015869, "learning_rate": 5.734028551179508e-05, "loss": 1.0671, "step": 8437 }, { "epoch": 2.5612384276825013, "grad_norm": 1.0120611190795898, "learning_rate": 5.733522324592487e-05, "loss": 1.1202, "step": 8438 }, { "epoch": 2.5615419638791925, "grad_norm": 0.875623345375061, "learning_rate": 5.733016098005467e-05, "loss": 1.3208, "step": 8439 }, { "epoch": 2.561845500075884, "grad_norm": 0.673265814781189, "learning_rate": 5.732509871418447e-05, "loss": 1.5828, "step": 8440 }, { "epoch": 2.5621490362725754, "grad_norm": 0.8331916332244873, "learning_rate": 5.732003644831426e-05, "loss": 1.458, "step": 8441 }, { "epoch": 2.562452572469267, "grad_norm": 0.847938060760498, "learning_rate": 5.731497418244407e-05, "loss": 0.7955, "step": 8442 }, { "epoch": 2.5627561086659583, "grad_norm": 0.8607446551322937, "learning_rate": 5.7309911916573866e-05, "loss": 1.2457, "step": 8443 }, { "epoch": 2.56305964486265, "grad_norm": 0.8080736994743347, "learning_rate": 5.730484965070366e-05, "loss": 1.347, "step": 8444 }, { "epoch": 2.5633631810593416, "grad_norm": 1.0638771057128906, "learning_rate": 5.7299787384833456e-05, "loss": 0.7558, "step": 8445 }, { "epoch": 2.563666717256033, "grad_norm": 0.8940908908843994, "learning_rate": 5.729472511896326e-05, "loss": 1.2289, "step": 8446 }, { "epoch": 2.563970253452724, "grad_norm": 0.7230290174484253, "learning_rate": 5.728966285309305e-05, "loss": 0.827, "step": 8447 }, { "epoch": 2.5642737896494157, "grad_norm": 0.9477673768997192, "learning_rate": 5.728460058722285e-05, "loss": 1.1062, "step": 8448 }, { "epoch": 2.5645773258461073, "grad_norm": 0.858997642993927, "learning_rate": 5.727953832135264e-05, "loss": 1.2228, "step": 8449 }, { "epoch": 2.5648808620427985, "grad_norm": 0.804364025592804, "learning_rate": 5.727447605548244e-05, "loss": 1.0802, "step": 8450 }, { "epoch": 2.56518439823949, "grad_norm": 0.7806909680366516, "learning_rate": 5.726941378961224e-05, "loss": 0.9608, "step": 8451 }, { "epoch": 2.5654879344361814, "grad_norm": 0.8432273268699646, "learning_rate": 5.726435152374203e-05, "loss": 1.1938, "step": 8452 }, { "epoch": 2.565791470632873, "grad_norm": 0.7382596731185913, "learning_rate": 5.725928925787183e-05, "loss": 1.6108, "step": 8453 }, { "epoch": 2.5660950068295643, "grad_norm": 0.7803032398223877, "learning_rate": 5.725422699200162e-05, "loss": 1.0205, "step": 8454 }, { "epoch": 2.566398543026256, "grad_norm": 0.8174391984939575, "learning_rate": 5.7249164726131424e-05, "loss": 1.4251, "step": 8455 }, { "epoch": 2.5667020792229476, "grad_norm": 0.6124516725540161, "learning_rate": 5.724410246026122e-05, "loss": 0.953, "step": 8456 }, { "epoch": 2.567005615419639, "grad_norm": 0.694999098777771, "learning_rate": 5.7239040194391014e-05, "loss": 1.6602, "step": 8457 }, { "epoch": 2.56730915161633, "grad_norm": 0.8906886577606201, "learning_rate": 5.723397792852081e-05, "loss": 1.3543, "step": 8458 }, { "epoch": 2.5676126878130217, "grad_norm": 0.6976550817489624, "learning_rate": 5.72289156626506e-05, "loss": 1.7041, "step": 8459 }, { "epoch": 2.5679162240097133, "grad_norm": 0.7767781019210815, "learning_rate": 5.7223853396780405e-05, "loss": 1.1142, "step": 8460 }, { "epoch": 2.5682197602064045, "grad_norm": 0.6639768481254578, "learning_rate": 5.72187911309102e-05, "loss": 1.109, "step": 8461 }, { "epoch": 2.568523296403096, "grad_norm": 0.6556784510612488, "learning_rate": 5.7213728865039994e-05, "loss": 0.714, "step": 8462 }, { "epoch": 2.5688268325997874, "grad_norm": 0.7635631561279297, "learning_rate": 5.720866659916979e-05, "loss": 1.3227, "step": 8463 }, { "epoch": 2.569130368796479, "grad_norm": 0.7205228209495544, "learning_rate": 5.720360433329959e-05, "loss": 1.117, "step": 8464 }, { "epoch": 2.5694339049931703, "grad_norm": 0.8992099165916443, "learning_rate": 5.7198542067429385e-05, "loss": 1.2895, "step": 8465 }, { "epoch": 2.569737441189862, "grad_norm": 0.7975298166275024, "learning_rate": 5.719347980155918e-05, "loss": 1.495, "step": 8466 }, { "epoch": 2.5700409773865536, "grad_norm": 0.7453858256340027, "learning_rate": 5.7188417535688975e-05, "loss": 1.3353, "step": 8467 }, { "epoch": 2.570344513583245, "grad_norm": 0.6829327344894409, "learning_rate": 5.718335526981877e-05, "loss": 1.6156, "step": 8468 }, { "epoch": 2.570648049779936, "grad_norm": 0.7259297966957092, "learning_rate": 5.717829300394857e-05, "loss": 1.0498, "step": 8469 }, { "epoch": 2.5709515859766277, "grad_norm": 0.805950403213501, "learning_rate": 5.7173230738078366e-05, "loss": 0.884, "step": 8470 }, { "epoch": 2.5712551221733193, "grad_norm": 0.9011547565460205, "learning_rate": 5.716816847220816e-05, "loss": 1.3437, "step": 8471 }, { "epoch": 2.5715586583700105, "grad_norm": 0.9217641949653625, "learning_rate": 5.7163106206337956e-05, "loss": 1.2101, "step": 8472 }, { "epoch": 2.571862194566702, "grad_norm": 0.47957077622413635, "learning_rate": 5.715804394046775e-05, "loss": 0.6725, "step": 8473 }, { "epoch": 2.5721657307633934, "grad_norm": 0.8739991784095764, "learning_rate": 5.715298167459755e-05, "loss": 1.1752, "step": 8474 }, { "epoch": 2.572469266960085, "grad_norm": 0.7338137030601501, "learning_rate": 5.714791940872735e-05, "loss": 1.0399, "step": 8475 }, { "epoch": 2.5727728031567763, "grad_norm": 0.8827327489852905, "learning_rate": 5.714285714285714e-05, "loss": 1.3659, "step": 8476 }, { "epoch": 2.573076339353468, "grad_norm": 0.8811532258987427, "learning_rate": 5.7137794876986937e-05, "loss": 1.1658, "step": 8477 }, { "epoch": 2.5733798755501596, "grad_norm": 0.6947214007377625, "learning_rate": 5.713273261111674e-05, "loss": 1.4052, "step": 8478 }, { "epoch": 2.573683411746851, "grad_norm": 0.9203129410743713, "learning_rate": 5.712767034524653e-05, "loss": 1.3322, "step": 8479 }, { "epoch": 2.573986947943542, "grad_norm": 0.9946419596672058, "learning_rate": 5.712260807937633e-05, "loss": 1.6423, "step": 8480 }, { "epoch": 2.5742904841402336, "grad_norm": 0.8030951023101807, "learning_rate": 5.711754581350612e-05, "loss": 1.0093, "step": 8481 }, { "epoch": 2.5745940203369253, "grad_norm": 0.9551969766616821, "learning_rate": 5.711248354763592e-05, "loss": 1.2978, "step": 8482 }, { "epoch": 2.5748975565336165, "grad_norm": 1.039363980293274, "learning_rate": 5.710742128176572e-05, "loss": 1.4354, "step": 8483 }, { "epoch": 2.575201092730308, "grad_norm": 0.6756881475448608, "learning_rate": 5.7102359015895514e-05, "loss": 1.1468, "step": 8484 }, { "epoch": 2.5755046289269994, "grad_norm": 0.8306137919425964, "learning_rate": 5.709729675002531e-05, "loss": 1.5781, "step": 8485 }, { "epoch": 2.575808165123691, "grad_norm": 0.8153355121612549, "learning_rate": 5.709223448415512e-05, "loss": 1.1197, "step": 8486 }, { "epoch": 2.5761117013203823, "grad_norm": 0.8328362703323364, "learning_rate": 5.708717221828491e-05, "loss": 1.305, "step": 8487 }, { "epoch": 2.576415237517074, "grad_norm": 0.7848013043403625, "learning_rate": 5.7082109952414706e-05, "loss": 1.3205, "step": 8488 }, { "epoch": 2.5767187737137656, "grad_norm": 0.7196735739707947, "learning_rate": 5.707704768654451e-05, "loss": 1.3499, "step": 8489 }, { "epoch": 2.5770223099104568, "grad_norm": 0.8000575304031372, "learning_rate": 5.70719854206743e-05, "loss": 1.1835, "step": 8490 }, { "epoch": 2.5773258461071484, "grad_norm": 0.8718536496162415, "learning_rate": 5.70669231548041e-05, "loss": 1.2739, "step": 8491 }, { "epoch": 2.5776293823038396, "grad_norm": 0.7113871574401855, "learning_rate": 5.706186088893389e-05, "loss": 1.5288, "step": 8492 }, { "epoch": 2.5779329185005313, "grad_norm": 0.8177202343940735, "learning_rate": 5.705679862306369e-05, "loss": 1.2114, "step": 8493 }, { "epoch": 2.5782364546972225, "grad_norm": 0.685756266117096, "learning_rate": 5.705173635719349e-05, "loss": 1.6081, "step": 8494 }, { "epoch": 2.578539990893914, "grad_norm": 0.7527011036872864, "learning_rate": 5.7046674091323283e-05, "loss": 1.2229, "step": 8495 }, { "epoch": 2.578843527090606, "grad_norm": 0.8383612036705017, "learning_rate": 5.704161182545308e-05, "loss": 1.3744, "step": 8496 }, { "epoch": 2.579147063287297, "grad_norm": 0.7893360257148743, "learning_rate": 5.703654955958287e-05, "loss": 1.0301, "step": 8497 }, { "epoch": 2.5794505994839882, "grad_norm": 0.766526997089386, "learning_rate": 5.703148729371267e-05, "loss": 1.1499, "step": 8498 }, { "epoch": 2.57975413568068, "grad_norm": 1.030303955078125, "learning_rate": 5.702642502784247e-05, "loss": 1.3511, "step": 8499 }, { "epoch": 2.5800576718773716, "grad_norm": 0.8071532845497131, "learning_rate": 5.7021362761972264e-05, "loss": 1.2655, "step": 8500 }, { "epoch": 2.5803612080740628, "grad_norm": 0.8044392466545105, "learning_rate": 5.701630049610206e-05, "loss": 1.3552, "step": 8501 }, { "epoch": 2.5806647442707544, "grad_norm": 0.881322979927063, "learning_rate": 5.7011238230231854e-05, "loss": 1.4468, "step": 8502 }, { "epoch": 2.5809682804674456, "grad_norm": 0.8353243470191956, "learning_rate": 5.7006175964361655e-05, "loss": 1.2619, "step": 8503 }, { "epoch": 2.5812718166641373, "grad_norm": 0.775505542755127, "learning_rate": 5.700111369849145e-05, "loss": 1.3108, "step": 8504 }, { "epoch": 2.5815753528608285, "grad_norm": 0.7685757875442505, "learning_rate": 5.6996051432621245e-05, "loss": 1.5786, "step": 8505 }, { "epoch": 2.58187888905752, "grad_norm": 0.7894332408905029, "learning_rate": 5.699098916675104e-05, "loss": 1.4572, "step": 8506 }, { "epoch": 2.582182425254212, "grad_norm": 0.8481132984161377, "learning_rate": 5.6985926900880835e-05, "loss": 1.4722, "step": 8507 }, { "epoch": 2.582485961450903, "grad_norm": 0.7238352298736572, "learning_rate": 5.6980864635010636e-05, "loss": 1.2255, "step": 8508 }, { "epoch": 2.5827894976475942, "grad_norm": 0.82459557056427, "learning_rate": 5.697580236914043e-05, "loss": 1.0108, "step": 8509 }, { "epoch": 2.583093033844286, "grad_norm": 0.5827746987342834, "learning_rate": 5.6970740103270226e-05, "loss": 1.1717, "step": 8510 }, { "epoch": 2.5833965700409776, "grad_norm": 0.8375948071479797, "learning_rate": 5.696567783740002e-05, "loss": 1.2577, "step": 8511 }, { "epoch": 2.5837001062376688, "grad_norm": 0.7858525514602661, "learning_rate": 5.6960615571529815e-05, "loss": 1.0145, "step": 8512 }, { "epoch": 2.5840036424343604, "grad_norm": 0.8637663722038269, "learning_rate": 5.695555330565962e-05, "loss": 0.8378, "step": 8513 }, { "epoch": 2.5843071786310516, "grad_norm": 0.8522406816482544, "learning_rate": 5.695049103978941e-05, "loss": 1.1828, "step": 8514 }, { "epoch": 2.5846107148277433, "grad_norm": 0.7590748071670532, "learning_rate": 5.6945428773919206e-05, "loss": 1.5535, "step": 8515 }, { "epoch": 2.5849142510244345, "grad_norm": 0.853203296661377, "learning_rate": 5.6940366508049e-05, "loss": 1.4439, "step": 8516 }, { "epoch": 2.585217787221126, "grad_norm": 0.6160378456115723, "learning_rate": 5.69353042421788e-05, "loss": 1.6231, "step": 8517 }, { "epoch": 2.585521323417818, "grad_norm": 0.7248274087905884, "learning_rate": 5.69302419763086e-05, "loss": 0.9876, "step": 8518 }, { "epoch": 2.585824859614509, "grad_norm": 0.7577129006385803, "learning_rate": 5.692517971043839e-05, "loss": 1.166, "step": 8519 }, { "epoch": 2.5861283958112002, "grad_norm": 0.9573093056678772, "learning_rate": 5.692011744456819e-05, "loss": 1.1766, "step": 8520 }, { "epoch": 2.586431932007892, "grad_norm": 0.6318233609199524, "learning_rate": 5.691505517869798e-05, "loss": 1.7379, "step": 8521 }, { "epoch": 2.5867354682045836, "grad_norm": 1.0729773044586182, "learning_rate": 5.6909992912827784e-05, "loss": 0.9821, "step": 8522 }, { "epoch": 2.5870390044012748, "grad_norm": 1.0490633249282837, "learning_rate": 5.690493064695758e-05, "loss": 1.0339, "step": 8523 }, { "epoch": 2.5873425405979664, "grad_norm": 0.7213556170463562, "learning_rate": 5.689986838108737e-05, "loss": 1.4523, "step": 8524 }, { "epoch": 2.5876460767946576, "grad_norm": 0.7995901703834534, "learning_rate": 5.689480611521717e-05, "loss": 1.608, "step": 8525 }, { "epoch": 2.5879496129913493, "grad_norm": 0.7987606525421143, "learning_rate": 5.688974384934696e-05, "loss": 1.0311, "step": 8526 }, { "epoch": 2.5882531491880405, "grad_norm": 0.9713065028190613, "learning_rate": 5.6884681583476764e-05, "loss": 1.3344, "step": 8527 }, { "epoch": 2.588556685384732, "grad_norm": 0.7180582880973816, "learning_rate": 5.687961931760656e-05, "loss": 1.5602, "step": 8528 }, { "epoch": 2.588860221581424, "grad_norm": 0.8596640229225159, "learning_rate": 5.6874557051736354e-05, "loss": 1.5416, "step": 8529 }, { "epoch": 2.589163757778115, "grad_norm": 0.8000577092170715, "learning_rate": 5.686949478586615e-05, "loss": 1.4195, "step": 8530 }, { "epoch": 2.5894672939748062, "grad_norm": 0.5756254196166992, "learning_rate": 5.686443251999596e-05, "loss": 1.09, "step": 8531 }, { "epoch": 2.589770830171498, "grad_norm": 0.6958990693092346, "learning_rate": 5.685937025412575e-05, "loss": 1.7655, "step": 8532 }, { "epoch": 2.5900743663681896, "grad_norm": 0.6716173887252808, "learning_rate": 5.6854307988255553e-05, "loss": 1.1914, "step": 8533 }, { "epoch": 2.5903779025648808, "grad_norm": 0.6973176002502441, "learning_rate": 5.684924572238535e-05, "loss": 1.4955, "step": 8534 }, { "epoch": 2.5906814387615724, "grad_norm": 0.7571681141853333, "learning_rate": 5.684418345651514e-05, "loss": 1.4925, "step": 8535 }, { "epoch": 2.5909849749582636, "grad_norm": 0.7972629070281982, "learning_rate": 5.683912119064494e-05, "loss": 0.9581, "step": 8536 }, { "epoch": 2.5912885111549553, "grad_norm": 0.5946874022483826, "learning_rate": 5.683405892477473e-05, "loss": 1.4677, "step": 8537 }, { "epoch": 2.5915920473516465, "grad_norm": 0.721113920211792, "learning_rate": 5.6828996658904534e-05, "loss": 1.1401, "step": 8538 }, { "epoch": 2.591895583548338, "grad_norm": 0.8047667145729065, "learning_rate": 5.682393439303433e-05, "loss": 1.4372, "step": 8539 }, { "epoch": 2.59219911974503, "grad_norm": 0.7815547585487366, "learning_rate": 5.6818872127164124e-05, "loss": 1.5106, "step": 8540 }, { "epoch": 2.592502655941721, "grad_norm": 0.6391544342041016, "learning_rate": 5.681380986129392e-05, "loss": 1.6492, "step": 8541 }, { "epoch": 2.5928061921384122, "grad_norm": 0.8339974284172058, "learning_rate": 5.680874759542372e-05, "loss": 1.3008, "step": 8542 }, { "epoch": 2.593109728335104, "grad_norm": 0.8036449551582336, "learning_rate": 5.6803685329553515e-05, "loss": 1.384, "step": 8543 }, { "epoch": 2.5934132645317955, "grad_norm": 0.712510883808136, "learning_rate": 5.679862306368331e-05, "loss": 1.2614, "step": 8544 }, { "epoch": 2.5937168007284868, "grad_norm": 0.6258426904678345, "learning_rate": 5.6793560797813105e-05, "loss": 1.3732, "step": 8545 }, { "epoch": 2.5940203369251784, "grad_norm": 0.8587894439697266, "learning_rate": 5.67884985319429e-05, "loss": 0.8554, "step": 8546 }, { "epoch": 2.5943238731218696, "grad_norm": 0.9387930631637573, "learning_rate": 5.67834362660727e-05, "loss": 1.2503, "step": 8547 }, { "epoch": 2.5946274093185613, "grad_norm": 0.8170998096466064, "learning_rate": 5.6778374000202496e-05, "loss": 1.6741, "step": 8548 }, { "epoch": 2.5949309455152525, "grad_norm": 0.7400410771369934, "learning_rate": 5.677331173433229e-05, "loss": 1.3155, "step": 8549 }, { "epoch": 2.595234481711944, "grad_norm": 0.738135814666748, "learning_rate": 5.6768249468462085e-05, "loss": 1.4813, "step": 8550 }, { "epoch": 2.595538017908636, "grad_norm": 0.9240551590919495, "learning_rate": 5.676318720259188e-05, "loss": 1.6402, "step": 8551 }, { "epoch": 2.595841554105327, "grad_norm": 0.7406201958656311, "learning_rate": 5.675812493672168e-05, "loss": 1.7953, "step": 8552 }, { "epoch": 2.5961450903020187, "grad_norm": 0.7671360969543457, "learning_rate": 5.6753062670851476e-05, "loss": 1.1308, "step": 8553 }, { "epoch": 2.59644862649871, "grad_norm": 0.6881118416786194, "learning_rate": 5.674800040498127e-05, "loss": 1.79, "step": 8554 }, { "epoch": 2.5967521626954015, "grad_norm": 0.7133281826972961, "learning_rate": 5.6742938139111066e-05, "loss": 1.0629, "step": 8555 }, { "epoch": 2.5970556988920928, "grad_norm": 0.7810043096542358, "learning_rate": 5.673787587324087e-05, "loss": 1.6822, "step": 8556 }, { "epoch": 2.5973592350887844, "grad_norm": 0.8383316397666931, "learning_rate": 5.673281360737066e-05, "loss": 1.3929, "step": 8557 }, { "epoch": 2.597662771285476, "grad_norm": 0.8424170613288879, "learning_rate": 5.672775134150046e-05, "loss": 1.0364, "step": 8558 }, { "epoch": 2.5979663074821673, "grad_norm": 0.7133161425590515, "learning_rate": 5.672268907563025e-05, "loss": 1.5446, "step": 8559 }, { "epoch": 2.5982698436788585, "grad_norm": 0.5788865685462952, "learning_rate": 5.671762680976005e-05, "loss": 1.5501, "step": 8560 }, { "epoch": 2.59857337987555, "grad_norm": 0.692284882068634, "learning_rate": 5.671256454388985e-05, "loss": 1.6923, "step": 8561 }, { "epoch": 2.598876916072242, "grad_norm": 0.7362749576568604, "learning_rate": 5.670750227801964e-05, "loss": 1.6645, "step": 8562 }, { "epoch": 2.599180452268933, "grad_norm": 0.6713787317276001, "learning_rate": 5.670244001214944e-05, "loss": 1.4341, "step": 8563 }, { "epoch": 2.5994839884656247, "grad_norm": 0.8715011477470398, "learning_rate": 5.669737774627923e-05, "loss": 1.0799, "step": 8564 }, { "epoch": 2.599787524662316, "grad_norm": 0.6888600587844849, "learning_rate": 5.669231548040903e-05, "loss": 1.7989, "step": 8565 }, { "epoch": 2.6000910608590075, "grad_norm": 0.6829241514205933, "learning_rate": 5.668725321453883e-05, "loss": 1.236, "step": 8566 }, { "epoch": 2.6003945970556988, "grad_norm": 0.79676753282547, "learning_rate": 5.6682190948668624e-05, "loss": 1.4303, "step": 8567 }, { "epoch": 2.6006981332523904, "grad_norm": 0.84662926197052, "learning_rate": 5.667712868279842e-05, "loss": 1.3441, "step": 8568 }, { "epoch": 2.601001669449082, "grad_norm": 0.8637576103210449, "learning_rate": 5.6672066416928213e-05, "loss": 1.2666, "step": 8569 }, { "epoch": 2.6013052056457733, "grad_norm": 0.9721615314483643, "learning_rate": 5.6667004151058015e-05, "loss": 1.328, "step": 8570 }, { "epoch": 2.6016087418424645, "grad_norm": 0.7033756375312805, "learning_rate": 5.666194188518781e-05, "loss": 1.3619, "step": 8571 }, { "epoch": 2.601912278039156, "grad_norm": 0.8240988850593567, "learning_rate": 5.6656879619317605e-05, "loss": 1.3551, "step": 8572 }, { "epoch": 2.602215814235848, "grad_norm": 0.6749477386474609, "learning_rate": 5.66518173534474e-05, "loss": 1.5243, "step": 8573 }, { "epoch": 2.602519350432539, "grad_norm": 0.7842938303947449, "learning_rate": 5.6646755087577194e-05, "loss": 1.4415, "step": 8574 }, { "epoch": 2.6028228866292307, "grad_norm": 0.7162943482398987, "learning_rate": 5.6641692821707e-05, "loss": 1.7193, "step": 8575 }, { "epoch": 2.603126422825922, "grad_norm": 0.7127130031585693, "learning_rate": 5.66366305558368e-05, "loss": 0.9039, "step": 8576 }, { "epoch": 2.6034299590226135, "grad_norm": 0.7178913354873657, "learning_rate": 5.66315682899666e-05, "loss": 1.1871, "step": 8577 }, { "epoch": 2.6037334952193047, "grad_norm": 0.801655650138855, "learning_rate": 5.6626506024096394e-05, "loss": 1.5558, "step": 8578 }, { "epoch": 2.6040370314159964, "grad_norm": 0.8066810965538025, "learning_rate": 5.662144375822619e-05, "loss": 0.9176, "step": 8579 }, { "epoch": 2.604340567612688, "grad_norm": 0.6406304836273193, "learning_rate": 5.661638149235598e-05, "loss": 1.3271, "step": 8580 }, { "epoch": 2.6046441038093793, "grad_norm": 0.7086127996444702, "learning_rate": 5.6611319226485785e-05, "loss": 1.4672, "step": 8581 }, { "epoch": 2.6049476400060705, "grad_norm": 0.774001955986023, "learning_rate": 5.660625696061558e-05, "loss": 1.058, "step": 8582 }, { "epoch": 2.605251176202762, "grad_norm": 0.90226811170578, "learning_rate": 5.6601194694745374e-05, "loss": 0.9913, "step": 8583 }, { "epoch": 2.605554712399454, "grad_norm": 0.6938377022743225, "learning_rate": 5.659613242887517e-05, "loss": 1.1436, "step": 8584 }, { "epoch": 2.605858248596145, "grad_norm": 0.776210606098175, "learning_rate": 5.6591070163004964e-05, "loss": 1.4291, "step": 8585 }, { "epoch": 2.6061617847928367, "grad_norm": 0.7756494283676147, "learning_rate": 5.6586007897134766e-05, "loss": 1.3007, "step": 8586 }, { "epoch": 2.606465320989528, "grad_norm": 0.8304345011711121, "learning_rate": 5.658094563126456e-05, "loss": 1.1779, "step": 8587 }, { "epoch": 2.6067688571862195, "grad_norm": 0.9503403306007385, "learning_rate": 5.6575883365394355e-05, "loss": 1.2729, "step": 8588 }, { "epoch": 2.6070723933829107, "grad_norm": 0.7229785323143005, "learning_rate": 5.657082109952415e-05, "loss": 1.4943, "step": 8589 }, { "epoch": 2.6073759295796024, "grad_norm": 0.7801215052604675, "learning_rate": 5.6565758833653945e-05, "loss": 1.387, "step": 8590 }, { "epoch": 2.607679465776294, "grad_norm": 0.7345092296600342, "learning_rate": 5.6560696567783746e-05, "loss": 1.395, "step": 8591 }, { "epoch": 2.6079830019729853, "grad_norm": 0.8644165992736816, "learning_rate": 5.655563430191354e-05, "loss": 1.3065, "step": 8592 }, { "epoch": 2.6082865381696765, "grad_norm": 0.7582297921180725, "learning_rate": 5.6550572036043336e-05, "loss": 1.4935, "step": 8593 }, { "epoch": 2.608590074366368, "grad_norm": 0.9125105142593384, "learning_rate": 5.654550977017313e-05, "loss": 1.2867, "step": 8594 }, { "epoch": 2.60889361056306, "grad_norm": 0.8298311233520508, "learning_rate": 5.654044750430293e-05, "loss": 1.4948, "step": 8595 }, { "epoch": 2.609197146759751, "grad_norm": 0.9472196698188782, "learning_rate": 5.653538523843273e-05, "loss": 0.7515, "step": 8596 }, { "epoch": 2.6095006829564427, "grad_norm": 1.047179102897644, "learning_rate": 5.653032297256252e-05, "loss": 0.9547, "step": 8597 }, { "epoch": 2.609804219153134, "grad_norm": 0.7703762054443359, "learning_rate": 5.652526070669232e-05, "loss": 1.4232, "step": 8598 }, { "epoch": 2.6101077553498255, "grad_norm": 0.6805330514907837, "learning_rate": 5.652019844082211e-05, "loss": 1.2739, "step": 8599 }, { "epoch": 2.6104112915465167, "grad_norm": 0.8809008598327637, "learning_rate": 5.651513617495191e-05, "loss": 1.1999, "step": 8600 }, { "epoch": 2.6107148277432084, "grad_norm": 0.7000548839569092, "learning_rate": 5.651007390908171e-05, "loss": 1.0212, "step": 8601 }, { "epoch": 2.6110183639399, "grad_norm": 0.8035356402397156, "learning_rate": 5.65050116432115e-05, "loss": 1.5589, "step": 8602 }, { "epoch": 2.6113219001365913, "grad_norm": 0.9235441088676453, "learning_rate": 5.64999493773413e-05, "loss": 1.2626, "step": 8603 }, { "epoch": 2.6116254363332825, "grad_norm": 0.8452057242393494, "learning_rate": 5.649488711147109e-05, "loss": 1.2997, "step": 8604 }, { "epoch": 2.611928972529974, "grad_norm": 0.8349605202674866, "learning_rate": 5.6489824845600894e-05, "loss": 1.4014, "step": 8605 }, { "epoch": 2.612232508726666, "grad_norm": 0.586233913898468, "learning_rate": 5.648476257973069e-05, "loss": 0.8841, "step": 8606 }, { "epoch": 2.612536044923357, "grad_norm": 1.050804853439331, "learning_rate": 5.6479700313860483e-05, "loss": 1.2541, "step": 8607 }, { "epoch": 2.6128395811200487, "grad_norm": 0.7679193615913391, "learning_rate": 5.647463804799028e-05, "loss": 1.1245, "step": 8608 }, { "epoch": 2.61314311731674, "grad_norm": 0.9099006056785583, "learning_rate": 5.646957578212008e-05, "loss": 1.2773, "step": 8609 }, { "epoch": 2.6134466535134315, "grad_norm": 0.7303164601325989, "learning_rate": 5.6464513516249875e-05, "loss": 1.4331, "step": 8610 }, { "epoch": 2.6137501897101227, "grad_norm": 0.9463585615158081, "learning_rate": 5.645945125037967e-05, "loss": 1.6328, "step": 8611 }, { "epoch": 2.6140537259068144, "grad_norm": 0.7220118045806885, "learning_rate": 5.6454388984509464e-05, "loss": 1.1847, "step": 8612 }, { "epoch": 2.614357262103506, "grad_norm": 0.8586870431900024, "learning_rate": 5.644932671863926e-05, "loss": 1.1935, "step": 8613 }, { "epoch": 2.6146607983001973, "grad_norm": 0.7535932660102844, "learning_rate": 5.644426445276906e-05, "loss": 1.4595, "step": 8614 }, { "epoch": 2.614964334496889, "grad_norm": 0.7344147562980652, "learning_rate": 5.6439202186898855e-05, "loss": 1.5222, "step": 8615 }, { "epoch": 2.61526787069358, "grad_norm": 0.8207230567932129, "learning_rate": 5.643413992102865e-05, "loss": 1.3544, "step": 8616 }, { "epoch": 2.615571406890272, "grad_norm": 1.018458366394043, "learning_rate": 5.6429077655158445e-05, "loss": 1.5002, "step": 8617 }, { "epoch": 2.615874943086963, "grad_norm": 0.7685471177101135, "learning_rate": 5.642401538928824e-05, "loss": 1.6546, "step": 8618 }, { "epoch": 2.6161784792836547, "grad_norm": 0.8092283010482788, "learning_rate": 5.641895312341804e-05, "loss": 1.3715, "step": 8619 }, { "epoch": 2.616482015480346, "grad_norm": 0.9026480913162231, "learning_rate": 5.641389085754785e-05, "loss": 1.292, "step": 8620 }, { "epoch": 2.6167855516770375, "grad_norm": 0.8444890975952148, "learning_rate": 5.6408828591677644e-05, "loss": 1.2972, "step": 8621 }, { "epoch": 2.6170890878737287, "grad_norm": 1.0628342628479004, "learning_rate": 5.640376632580744e-05, "loss": 0.8799, "step": 8622 }, { "epoch": 2.6173926240704204, "grad_norm": 0.9359757304191589, "learning_rate": 5.6398704059937234e-05, "loss": 1.0312, "step": 8623 }, { "epoch": 2.617696160267112, "grad_norm": 0.6911135315895081, "learning_rate": 5.639364179406703e-05, "loss": 0.7175, "step": 8624 }, { "epoch": 2.6179996964638033, "grad_norm": 0.7658038139343262, "learning_rate": 5.638857952819683e-05, "loss": 1.4084, "step": 8625 }, { "epoch": 2.618303232660495, "grad_norm": 0.844971776008606, "learning_rate": 5.6383517262326625e-05, "loss": 1.1376, "step": 8626 }, { "epoch": 2.618606768857186, "grad_norm": 0.8520523309707642, "learning_rate": 5.637845499645642e-05, "loss": 1.5798, "step": 8627 }, { "epoch": 2.618910305053878, "grad_norm": 0.7942187786102295, "learning_rate": 5.6373392730586215e-05, "loss": 0.9526, "step": 8628 }, { "epoch": 2.619213841250569, "grad_norm": 0.7984597086906433, "learning_rate": 5.636833046471601e-05, "loss": 1.3979, "step": 8629 }, { "epoch": 2.6195173774472607, "grad_norm": 0.9261837005615234, "learning_rate": 5.636326819884581e-05, "loss": 1.6082, "step": 8630 }, { "epoch": 2.6198209136439523, "grad_norm": 0.77427077293396, "learning_rate": 5.6358205932975606e-05, "loss": 1.0873, "step": 8631 }, { "epoch": 2.6201244498406435, "grad_norm": 0.7438533306121826, "learning_rate": 5.63531436671054e-05, "loss": 1.2523, "step": 8632 }, { "epoch": 2.6204279860373347, "grad_norm": 0.7224971055984497, "learning_rate": 5.6348081401235196e-05, "loss": 1.3656, "step": 8633 }, { "epoch": 2.6207315222340264, "grad_norm": 0.7457458972930908, "learning_rate": 5.6343019135365e-05, "loss": 1.5195, "step": 8634 }, { "epoch": 2.621035058430718, "grad_norm": 0.6107844114303589, "learning_rate": 5.633795686949479e-05, "loss": 1.7412, "step": 8635 }, { "epoch": 2.6213385946274093, "grad_norm": 0.6721503734588623, "learning_rate": 5.633289460362459e-05, "loss": 1.691, "step": 8636 }, { "epoch": 2.621642130824101, "grad_norm": 0.9568755626678467, "learning_rate": 5.632783233775438e-05, "loss": 1.0017, "step": 8637 }, { "epoch": 2.621945667020792, "grad_norm": 0.8790843486785889, "learning_rate": 5.6322770071884176e-05, "loss": 1.4398, "step": 8638 }, { "epoch": 2.622249203217484, "grad_norm": 0.8850890398025513, "learning_rate": 5.631770780601398e-05, "loss": 1.4364, "step": 8639 }, { "epoch": 2.622552739414175, "grad_norm": 0.6632746458053589, "learning_rate": 5.631264554014377e-05, "loss": 1.1789, "step": 8640 }, { "epoch": 2.6228562756108666, "grad_norm": 0.7869992852210999, "learning_rate": 5.630758327427357e-05, "loss": 1.3563, "step": 8641 }, { "epoch": 2.6231598118075583, "grad_norm": 0.9065262079238892, "learning_rate": 5.630252100840336e-05, "loss": 1.6158, "step": 8642 }, { "epoch": 2.6234633480042495, "grad_norm": 0.7699462175369263, "learning_rate": 5.629745874253316e-05, "loss": 1.6771, "step": 8643 }, { "epoch": 2.6237668842009407, "grad_norm": 0.787337601184845, "learning_rate": 5.629239647666296e-05, "loss": 0.8428, "step": 8644 }, { "epoch": 2.6240704203976324, "grad_norm": 0.8821600675582886, "learning_rate": 5.628733421079275e-05, "loss": 1.2923, "step": 8645 }, { "epoch": 2.624373956594324, "grad_norm": 0.9925124049186707, "learning_rate": 5.628227194492255e-05, "loss": 1.528, "step": 8646 }, { "epoch": 2.6246774927910153, "grad_norm": 0.8500638604164124, "learning_rate": 5.627720967905234e-05, "loss": 1.4636, "step": 8647 }, { "epoch": 2.624981028987707, "grad_norm": 0.6999285221099854, "learning_rate": 5.6272147413182145e-05, "loss": 1.0709, "step": 8648 }, { "epoch": 2.625284565184398, "grad_norm": 0.7922846674919128, "learning_rate": 5.626708514731194e-05, "loss": 1.2424, "step": 8649 }, { "epoch": 2.62558810138109, "grad_norm": 0.7939204573631287, "learning_rate": 5.6262022881441734e-05, "loss": 1.6343, "step": 8650 }, { "epoch": 2.625891637577781, "grad_norm": 0.9492472410202026, "learning_rate": 5.625696061557153e-05, "loss": 1.1092, "step": 8651 }, { "epoch": 2.6261951737744726, "grad_norm": 0.7657208442687988, "learning_rate": 5.6251898349701324e-05, "loss": 1.2051, "step": 8652 }, { "epoch": 2.6264987099711643, "grad_norm": 0.7393199801445007, "learning_rate": 5.6246836083831125e-05, "loss": 1.4236, "step": 8653 }, { "epoch": 2.6268022461678555, "grad_norm": 0.8838475942611694, "learning_rate": 5.624177381796092e-05, "loss": 1.4203, "step": 8654 }, { "epoch": 2.6271057823645467, "grad_norm": 0.768323540687561, "learning_rate": 5.6236711552090715e-05, "loss": 1.5179, "step": 8655 }, { "epoch": 2.6274093185612384, "grad_norm": 0.8612476587295532, "learning_rate": 5.623164928622051e-05, "loss": 0.9426, "step": 8656 }, { "epoch": 2.62771285475793, "grad_norm": 0.6874903440475464, "learning_rate": 5.6226587020350304e-05, "loss": 1.5948, "step": 8657 }, { "epoch": 2.6280163909546213, "grad_norm": 1.2635430097579956, "learning_rate": 5.6221524754480106e-05, "loss": 1.0308, "step": 8658 }, { "epoch": 2.628319927151313, "grad_norm": 0.7369987368583679, "learning_rate": 5.62164624886099e-05, "loss": 1.3982, "step": 8659 }, { "epoch": 2.628623463348004, "grad_norm": 0.7699949741363525, "learning_rate": 5.6211400222739696e-05, "loss": 1.4156, "step": 8660 }, { "epoch": 2.6289269995446958, "grad_norm": 0.6728193759918213, "learning_rate": 5.620633795686949e-05, "loss": 1.473, "step": 8661 }, { "epoch": 2.629230535741387, "grad_norm": 0.669257402420044, "learning_rate": 5.620127569099929e-05, "loss": 0.4621, "step": 8662 }, { "epoch": 2.6295340719380786, "grad_norm": 0.7065978050231934, "learning_rate": 5.619621342512909e-05, "loss": 1.4676, "step": 8663 }, { "epoch": 2.6298376081347703, "grad_norm": 0.7422645688056946, "learning_rate": 5.6191151159258895e-05, "loss": 1.3818, "step": 8664 }, { "epoch": 2.6301411443314615, "grad_norm": 0.8151669502258301, "learning_rate": 5.618608889338869e-05, "loss": 1.5563, "step": 8665 }, { "epoch": 2.6304446805281527, "grad_norm": 0.6048735976219177, "learning_rate": 5.6181026627518485e-05, "loss": 1.3932, "step": 8666 }, { "epoch": 2.6307482167248444, "grad_norm": 0.8093709349632263, "learning_rate": 5.617596436164828e-05, "loss": 1.4714, "step": 8667 }, { "epoch": 2.631051752921536, "grad_norm": 0.904063880443573, "learning_rate": 5.6170902095778074e-05, "loss": 1.1276, "step": 8668 }, { "epoch": 2.6313552891182272, "grad_norm": 0.9942129850387573, "learning_rate": 5.6165839829907876e-05, "loss": 1.3145, "step": 8669 }, { "epoch": 2.631658825314919, "grad_norm": 0.9170172214508057, "learning_rate": 5.616077756403767e-05, "loss": 1.361, "step": 8670 }, { "epoch": 2.63196236151161, "grad_norm": 0.968109667301178, "learning_rate": 5.6155715298167465e-05, "loss": 0.9555, "step": 8671 }, { "epoch": 2.6322658977083018, "grad_norm": 0.9411510229110718, "learning_rate": 5.615065303229726e-05, "loss": 1.3459, "step": 8672 }, { "epoch": 2.632569433904993, "grad_norm": 0.6706832647323608, "learning_rate": 5.614559076642706e-05, "loss": 1.3142, "step": 8673 }, { "epoch": 2.6328729701016846, "grad_norm": 0.9438613057136536, "learning_rate": 5.614052850055686e-05, "loss": 1.4197, "step": 8674 }, { "epoch": 2.6331765062983763, "grad_norm": 0.8801570534706116, "learning_rate": 5.613546623468665e-05, "loss": 1.3537, "step": 8675 }, { "epoch": 2.6334800424950675, "grad_norm": 0.7939736247062683, "learning_rate": 5.6130403968816446e-05, "loss": 1.3053, "step": 8676 }, { "epoch": 2.6337835786917587, "grad_norm": 0.7711239457130432, "learning_rate": 5.612534170294624e-05, "loss": 1.0631, "step": 8677 }, { "epoch": 2.6340871148884504, "grad_norm": 0.8416082262992859, "learning_rate": 5.612027943707604e-05, "loss": 1.3773, "step": 8678 }, { "epoch": 2.634390651085142, "grad_norm": 0.7541301846504211, "learning_rate": 5.611521717120584e-05, "loss": 1.4581, "step": 8679 }, { "epoch": 2.6346941872818332, "grad_norm": 0.8372868299484253, "learning_rate": 5.611015490533563e-05, "loss": 1.2465, "step": 8680 }, { "epoch": 2.634997723478525, "grad_norm": 0.613940954208374, "learning_rate": 5.610509263946543e-05, "loss": 0.8319, "step": 8681 }, { "epoch": 2.635301259675216, "grad_norm": 0.6832600831985474, "learning_rate": 5.610003037359522e-05, "loss": 1.6382, "step": 8682 }, { "epoch": 2.6356047958719078, "grad_norm": 0.8606064915657043, "learning_rate": 5.609496810772502e-05, "loss": 1.2397, "step": 8683 }, { "epoch": 2.635908332068599, "grad_norm": 0.7566887736320496, "learning_rate": 5.608990584185482e-05, "loss": 1.3182, "step": 8684 }, { "epoch": 2.6362118682652906, "grad_norm": 0.9536645412445068, "learning_rate": 5.608484357598461e-05, "loss": 1.334, "step": 8685 }, { "epoch": 2.6365154044619823, "grad_norm": 0.7034269571304321, "learning_rate": 5.607978131011441e-05, "loss": 1.7559, "step": 8686 }, { "epoch": 2.6368189406586735, "grad_norm": 0.8286762833595276, "learning_rate": 5.607471904424421e-05, "loss": 1.2866, "step": 8687 }, { "epoch": 2.637122476855365, "grad_norm": 0.6119000315666199, "learning_rate": 5.6069656778374004e-05, "loss": 1.0934, "step": 8688 }, { "epoch": 2.6374260130520564, "grad_norm": 0.8798956274986267, "learning_rate": 5.60645945125038e-05, "loss": 1.4096, "step": 8689 }, { "epoch": 2.637729549248748, "grad_norm": 1.1200183629989624, "learning_rate": 5.6059532246633594e-05, "loss": 1.1836, "step": 8690 }, { "epoch": 2.6380330854454392, "grad_norm": 0.6121519207954407, "learning_rate": 5.605446998076339e-05, "loss": 1.5971, "step": 8691 }, { "epoch": 2.638336621642131, "grad_norm": 0.7266345620155334, "learning_rate": 5.604940771489319e-05, "loss": 1.5815, "step": 8692 }, { "epoch": 2.6386401578388226, "grad_norm": 0.8083154559135437, "learning_rate": 5.6044345449022985e-05, "loss": 1.3426, "step": 8693 }, { "epoch": 2.6389436940355138, "grad_norm": 0.8204373717308044, "learning_rate": 5.603928318315278e-05, "loss": 1.4459, "step": 8694 }, { "epoch": 2.639247230232205, "grad_norm": 0.839185357093811, "learning_rate": 5.6034220917282574e-05, "loss": 1.3984, "step": 8695 }, { "epoch": 2.6395507664288966, "grad_norm": 0.7797434329986572, "learning_rate": 5.602915865141237e-05, "loss": 1.3934, "step": 8696 }, { "epoch": 2.6398543026255883, "grad_norm": 0.7706314921379089, "learning_rate": 5.602409638554217e-05, "loss": 1.5455, "step": 8697 }, { "epoch": 2.6401578388222795, "grad_norm": 0.7319291234016418, "learning_rate": 5.6019034119671966e-05, "loss": 0.9225, "step": 8698 }, { "epoch": 2.640461375018971, "grad_norm": 0.7184723019599915, "learning_rate": 5.601397185380176e-05, "loss": 1.6774, "step": 8699 }, { "epoch": 2.6407649112156624, "grad_norm": 0.5552166700363159, "learning_rate": 5.6008909587931555e-05, "loss": 0.7107, "step": 8700 }, { "epoch": 2.641068447412354, "grad_norm": 0.88196861743927, "learning_rate": 5.600384732206136e-05, "loss": 1.4091, "step": 8701 }, { "epoch": 2.6413719836090452, "grad_norm": 0.9761573076248169, "learning_rate": 5.599878505619115e-05, "loss": 0.9132, "step": 8702 }, { "epoch": 2.641675519805737, "grad_norm": 0.7083683609962463, "learning_rate": 5.5993722790320946e-05, "loss": 1.1454, "step": 8703 }, { "epoch": 2.6419790560024285, "grad_norm": 0.7368018627166748, "learning_rate": 5.598866052445074e-05, "loss": 1.4792, "step": 8704 }, { "epoch": 2.6422825921991198, "grad_norm": 0.7450541257858276, "learning_rate": 5.5983598258580536e-05, "loss": 1.3225, "step": 8705 }, { "epoch": 2.642586128395811, "grad_norm": 0.7936186194419861, "learning_rate": 5.597853599271034e-05, "loss": 1.4453, "step": 8706 }, { "epoch": 2.6428896645925026, "grad_norm": 0.7664183378219604, "learning_rate": 5.597347372684013e-05, "loss": 1.3638, "step": 8707 }, { "epoch": 2.6431932007891943, "grad_norm": 0.8624455332756042, "learning_rate": 5.596841146096994e-05, "loss": 1.0626, "step": 8708 }, { "epoch": 2.6434967369858855, "grad_norm": 0.7933397889137268, "learning_rate": 5.5963349195099735e-05, "loss": 1.3289, "step": 8709 }, { "epoch": 2.643800273182577, "grad_norm": 0.8627804517745972, "learning_rate": 5.595828692922953e-05, "loss": 1.4403, "step": 8710 }, { "epoch": 2.6441038093792684, "grad_norm": 0.9213400483131409, "learning_rate": 5.5953224663359325e-05, "loss": 1.1033, "step": 8711 }, { "epoch": 2.64440734557596, "grad_norm": 0.8947356939315796, "learning_rate": 5.5948162397489127e-05, "loss": 1.4111, "step": 8712 }, { "epoch": 2.6447108817726512, "grad_norm": 0.8398584127426147, "learning_rate": 5.594310013161892e-05, "loss": 1.4132, "step": 8713 }, { "epoch": 2.645014417969343, "grad_norm": 0.6768031120300293, "learning_rate": 5.5938037865748716e-05, "loss": 1.4414, "step": 8714 }, { "epoch": 2.6453179541660345, "grad_norm": 0.6541370749473572, "learning_rate": 5.593297559987851e-05, "loss": 1.0148, "step": 8715 }, { "epoch": 2.6456214903627258, "grad_norm": 0.7286548614501953, "learning_rate": 5.5927913334008306e-05, "loss": 1.0437, "step": 8716 }, { "epoch": 2.645925026559417, "grad_norm": 0.8829400539398193, "learning_rate": 5.592285106813811e-05, "loss": 1.2994, "step": 8717 }, { "epoch": 2.6462285627561086, "grad_norm": 0.7351943850517273, "learning_rate": 5.59177888022679e-05, "loss": 1.3467, "step": 8718 }, { "epoch": 2.6465320989528003, "grad_norm": 0.6213396787643433, "learning_rate": 5.59127265363977e-05, "loss": 1.6852, "step": 8719 }, { "epoch": 2.6468356351494915, "grad_norm": 0.8491136431694031, "learning_rate": 5.590766427052749e-05, "loss": 1.1302, "step": 8720 }, { "epoch": 2.647139171346183, "grad_norm": 0.6281788349151611, "learning_rate": 5.5902602004657287e-05, "loss": 1.8956, "step": 8721 }, { "epoch": 2.6474427075428744, "grad_norm": 0.839736819267273, "learning_rate": 5.589753973878709e-05, "loss": 1.224, "step": 8722 }, { "epoch": 2.647746243739566, "grad_norm": 0.6985284090042114, "learning_rate": 5.589247747291688e-05, "loss": 1.585, "step": 8723 }, { "epoch": 2.6480497799362572, "grad_norm": 0.7261654138565063, "learning_rate": 5.588741520704668e-05, "loss": 1.3567, "step": 8724 }, { "epoch": 2.648353316132949, "grad_norm": 0.8689409494400024, "learning_rate": 5.588235294117647e-05, "loss": 1.4018, "step": 8725 }, { "epoch": 2.6486568523296405, "grad_norm": 0.9367418885231018, "learning_rate": 5.5877290675306274e-05, "loss": 1.362, "step": 8726 }, { "epoch": 2.6489603885263318, "grad_norm": 0.8631361126899719, "learning_rate": 5.587222840943607e-05, "loss": 1.355, "step": 8727 }, { "epoch": 2.649263924723023, "grad_norm": 0.8527751564979553, "learning_rate": 5.5867166143565864e-05, "loss": 1.1807, "step": 8728 }, { "epoch": 2.6495674609197146, "grad_norm": 0.7197252511978149, "learning_rate": 5.586210387769566e-05, "loss": 1.5399, "step": 8729 }, { "epoch": 2.6498709971164063, "grad_norm": 0.7717894315719604, "learning_rate": 5.585704161182545e-05, "loss": 0.8643, "step": 8730 }, { "epoch": 2.6501745333130975, "grad_norm": 0.5923122763633728, "learning_rate": 5.5851979345955255e-05, "loss": 0.9277, "step": 8731 }, { "epoch": 2.650478069509789, "grad_norm": 0.8007644414901733, "learning_rate": 5.584691708008505e-05, "loss": 1.2303, "step": 8732 }, { "epoch": 2.6507816057064804, "grad_norm": 0.9415502548217773, "learning_rate": 5.5841854814214844e-05, "loss": 1.2438, "step": 8733 }, { "epoch": 2.651085141903172, "grad_norm": 0.7225777506828308, "learning_rate": 5.583679254834464e-05, "loss": 1.5995, "step": 8734 }, { "epoch": 2.6513886780998632, "grad_norm": 0.8504562377929688, "learning_rate": 5.5831730282474434e-05, "loss": 1.46, "step": 8735 }, { "epoch": 2.651692214296555, "grad_norm": 0.8856134414672852, "learning_rate": 5.5826668016604236e-05, "loss": 1.2693, "step": 8736 }, { "epoch": 2.6519957504932465, "grad_norm": 0.5905324220657349, "learning_rate": 5.582160575073403e-05, "loss": 1.2373, "step": 8737 }, { "epoch": 2.6522992866899378, "grad_norm": 0.6726984977722168, "learning_rate": 5.5816543484863825e-05, "loss": 1.5899, "step": 8738 }, { "epoch": 2.652602822886629, "grad_norm": 0.7494386434555054, "learning_rate": 5.581148121899362e-05, "loss": 1.3021, "step": 8739 }, { "epoch": 2.6529063590833206, "grad_norm": 0.7558499574661255, "learning_rate": 5.580641895312342e-05, "loss": 1.1097, "step": 8740 }, { "epoch": 2.6532098952800123, "grad_norm": 0.8065043687820435, "learning_rate": 5.5801356687253216e-05, "loss": 1.385, "step": 8741 }, { "epoch": 2.6535134314767035, "grad_norm": 0.9156901836395264, "learning_rate": 5.579629442138301e-05, "loss": 1.2601, "step": 8742 }, { "epoch": 2.653816967673395, "grad_norm": 0.8504417538642883, "learning_rate": 5.5791232155512806e-05, "loss": 1.107, "step": 8743 }, { "epoch": 2.6541205038700864, "grad_norm": 0.7907352447509766, "learning_rate": 5.57861698896426e-05, "loss": 1.5306, "step": 8744 }, { "epoch": 2.654424040066778, "grad_norm": 0.7265666127204895, "learning_rate": 5.57811076237724e-05, "loss": 1.4732, "step": 8745 }, { "epoch": 2.6547275762634692, "grad_norm": 0.7959305644035339, "learning_rate": 5.57760453579022e-05, "loss": 1.1562, "step": 8746 }, { "epoch": 2.655031112460161, "grad_norm": 0.8458207249641418, "learning_rate": 5.577098309203199e-05, "loss": 0.9998, "step": 8747 }, { "epoch": 2.6553346486568525, "grad_norm": 0.8517472743988037, "learning_rate": 5.576592082616179e-05, "loss": 1.342, "step": 8748 }, { "epoch": 2.6556381848535437, "grad_norm": 0.8582330942153931, "learning_rate": 5.576085856029158e-05, "loss": 1.805, "step": 8749 }, { "epoch": 2.6559417210502354, "grad_norm": 0.8525375723838806, "learning_rate": 5.575579629442138e-05, "loss": 1.4115, "step": 8750 }, { "epoch": 2.6562452572469266, "grad_norm": 0.8539319038391113, "learning_rate": 5.575073402855118e-05, "loss": 1.4829, "step": 8751 }, { "epoch": 2.6565487934436183, "grad_norm": 0.9488011598587036, "learning_rate": 5.574567176268097e-05, "loss": 1.0497, "step": 8752 }, { "epoch": 2.6568523296403095, "grad_norm": 0.8719534873962402, "learning_rate": 5.574060949681078e-05, "loss": 1.3976, "step": 8753 }, { "epoch": 2.657155865837001, "grad_norm": 0.8704327344894409, "learning_rate": 5.5735547230940576e-05, "loss": 1.4135, "step": 8754 }, { "epoch": 2.6574594020336924, "grad_norm": 0.6540507078170776, "learning_rate": 5.573048496507037e-05, "loss": 1.4733, "step": 8755 }, { "epoch": 2.657762938230384, "grad_norm": 1.0412238836288452, "learning_rate": 5.572542269920017e-05, "loss": 1.2556, "step": 8756 }, { "epoch": 2.658066474427075, "grad_norm": 0.6366206407546997, "learning_rate": 5.572036043332997e-05, "loss": 0.992, "step": 8757 }, { "epoch": 2.658370010623767, "grad_norm": 0.7455922961235046, "learning_rate": 5.571529816745976e-05, "loss": 1.4857, "step": 8758 }, { "epoch": 2.6586735468204585, "grad_norm": 0.7818800806999207, "learning_rate": 5.5710235901589556e-05, "loss": 1.3079, "step": 8759 }, { "epoch": 2.6589770830171497, "grad_norm": 0.6763079762458801, "learning_rate": 5.570517363571935e-05, "loss": 1.4807, "step": 8760 }, { "epoch": 2.6592806192138414, "grad_norm": 0.9120274186134338, "learning_rate": 5.570011136984915e-05, "loss": 1.3967, "step": 8761 }, { "epoch": 2.6595841554105326, "grad_norm": 0.8004521727561951, "learning_rate": 5.569504910397895e-05, "loss": 1.2554, "step": 8762 }, { "epoch": 2.6598876916072243, "grad_norm": 0.7720287442207336, "learning_rate": 5.568998683810874e-05, "loss": 1.4389, "step": 8763 }, { "epoch": 2.6601912278039155, "grad_norm": 1.0542229413986206, "learning_rate": 5.568492457223854e-05, "loss": 1.1365, "step": 8764 }, { "epoch": 2.660494764000607, "grad_norm": 0.6890188455581665, "learning_rate": 5.567986230636834e-05, "loss": 1.485, "step": 8765 }, { "epoch": 2.660798300197299, "grad_norm": 0.7676848769187927, "learning_rate": 5.5674800040498134e-05, "loss": 1.3161, "step": 8766 }, { "epoch": 2.66110183639399, "grad_norm": 0.8541175127029419, "learning_rate": 5.566973777462793e-05, "loss": 1.1301, "step": 8767 }, { "epoch": 2.661405372590681, "grad_norm": 1.1530959606170654, "learning_rate": 5.566467550875772e-05, "loss": 1.4562, "step": 8768 }, { "epoch": 2.661708908787373, "grad_norm": 0.7591552734375, "learning_rate": 5.565961324288752e-05, "loss": 1.1896, "step": 8769 }, { "epoch": 2.6620124449840645, "grad_norm": 1.0255539417266846, "learning_rate": 5.565455097701732e-05, "loss": 1.2768, "step": 8770 }, { "epoch": 2.6623159811807557, "grad_norm": 0.7130913734436035, "learning_rate": 5.5649488711147114e-05, "loss": 1.2036, "step": 8771 }, { "epoch": 2.6626195173774474, "grad_norm": 0.8823549747467041, "learning_rate": 5.564442644527691e-05, "loss": 1.3412, "step": 8772 }, { "epoch": 2.6629230535741386, "grad_norm": 0.7604090571403503, "learning_rate": 5.5639364179406704e-05, "loss": 0.9941, "step": 8773 }, { "epoch": 2.6632265897708303, "grad_norm": 0.8101242184638977, "learning_rate": 5.56343019135365e-05, "loss": 1.5021, "step": 8774 }, { "epoch": 2.6635301259675215, "grad_norm": 0.7834902405738831, "learning_rate": 5.56292396476663e-05, "loss": 1.403, "step": 8775 }, { "epoch": 2.663833662164213, "grad_norm": 0.5914158225059509, "learning_rate": 5.5624177381796095e-05, "loss": 1.3118, "step": 8776 }, { "epoch": 2.664137198360905, "grad_norm": 0.7818302512168884, "learning_rate": 5.561911511592589e-05, "loss": 1.5593, "step": 8777 }, { "epoch": 2.664440734557596, "grad_norm": 0.8937379717826843, "learning_rate": 5.5614052850055685e-05, "loss": 1.1552, "step": 8778 }, { "epoch": 2.664744270754287, "grad_norm": 0.916081964969635, "learning_rate": 5.5608990584185486e-05, "loss": 1.4133, "step": 8779 }, { "epoch": 2.665047806950979, "grad_norm": 0.9175965785980225, "learning_rate": 5.560392831831528e-05, "loss": 1.5523, "step": 8780 }, { "epoch": 2.6653513431476705, "grad_norm": 0.7606849074363708, "learning_rate": 5.5598866052445076e-05, "loss": 1.5572, "step": 8781 }, { "epoch": 2.6656548793443617, "grad_norm": 0.8044320344924927, "learning_rate": 5.559380378657487e-05, "loss": 1.2658, "step": 8782 }, { "epoch": 2.6659584155410534, "grad_norm": 0.5691137909889221, "learning_rate": 5.5588741520704665e-05, "loss": 1.0176, "step": 8783 }, { "epoch": 2.6662619517377446, "grad_norm": 0.5771634578704834, "learning_rate": 5.558367925483447e-05, "loss": 1.2895, "step": 8784 }, { "epoch": 2.6665654879344363, "grad_norm": 0.7913541197776794, "learning_rate": 5.557861698896426e-05, "loss": 1.1218, "step": 8785 }, { "epoch": 2.6668690241311275, "grad_norm": 0.8380897045135498, "learning_rate": 5.5573554723094057e-05, "loss": 1.478, "step": 8786 }, { "epoch": 2.667172560327819, "grad_norm": 0.8673321008682251, "learning_rate": 5.556849245722385e-05, "loss": 1.3437, "step": 8787 }, { "epoch": 2.667476096524511, "grad_norm": 0.9138043522834778, "learning_rate": 5.5563430191353646e-05, "loss": 1.1263, "step": 8788 }, { "epoch": 2.667779632721202, "grad_norm": 0.8235269784927368, "learning_rate": 5.555836792548345e-05, "loss": 1.4769, "step": 8789 }, { "epoch": 2.668083168917893, "grad_norm": 1.1817772388458252, "learning_rate": 5.555330565961324e-05, "loss": 1.4067, "step": 8790 }, { "epoch": 2.668386705114585, "grad_norm": 0.778167188167572, "learning_rate": 5.554824339374304e-05, "loss": 1.3586, "step": 8791 }, { "epoch": 2.6686902413112765, "grad_norm": 0.8383827209472656, "learning_rate": 5.554318112787283e-05, "loss": 1.1851, "step": 8792 }, { "epoch": 2.6689937775079677, "grad_norm": 0.6682489514350891, "learning_rate": 5.5538118862002634e-05, "loss": 1.0986, "step": 8793 }, { "epoch": 2.6692973137046594, "grad_norm": 0.6931873559951782, "learning_rate": 5.553305659613243e-05, "loss": 1.3819, "step": 8794 }, { "epoch": 2.6696008499013506, "grad_norm": 0.7875906825065613, "learning_rate": 5.552799433026222e-05, "loss": 1.4054, "step": 8795 }, { "epoch": 2.6699043860980423, "grad_norm": 0.7748824954032898, "learning_rate": 5.552293206439202e-05, "loss": 1.0686, "step": 8796 }, { "epoch": 2.6702079222947335, "grad_norm": 0.7019548416137695, "learning_rate": 5.5517869798521826e-05, "loss": 1.7516, "step": 8797 }, { "epoch": 2.670511458491425, "grad_norm": 0.7278378009796143, "learning_rate": 5.551280753265162e-05, "loss": 1.3714, "step": 8798 }, { "epoch": 2.670814994688117, "grad_norm": 0.8102428913116455, "learning_rate": 5.5507745266781416e-05, "loss": 1.416, "step": 8799 }, { "epoch": 2.671118530884808, "grad_norm": 1.1308832168579102, "learning_rate": 5.550268300091122e-05, "loss": 1.0275, "step": 8800 }, { "epoch": 2.671422067081499, "grad_norm": 0.6818346977233887, "learning_rate": 5.549762073504101e-05, "loss": 0.8489, "step": 8801 }, { "epoch": 2.671725603278191, "grad_norm": 0.7548572421073914, "learning_rate": 5.549255846917081e-05, "loss": 1.2063, "step": 8802 }, { "epoch": 2.6720291394748825, "grad_norm": 0.999927818775177, "learning_rate": 5.54874962033006e-05, "loss": 1.4178, "step": 8803 }, { "epoch": 2.6723326756715737, "grad_norm": 0.7852398157119751, "learning_rate": 5.5482433937430404e-05, "loss": 0.9922, "step": 8804 }, { "epoch": 2.6726362118682654, "grad_norm": 0.7230637073516846, "learning_rate": 5.54773716715602e-05, "loss": 1.1082, "step": 8805 }, { "epoch": 2.6729397480649566, "grad_norm": 1.0759989023208618, "learning_rate": 5.547230940568999e-05, "loss": 0.7466, "step": 8806 }, { "epoch": 2.6732432842616483, "grad_norm": 0.613384485244751, "learning_rate": 5.546724713981979e-05, "loss": 1.1378, "step": 8807 }, { "epoch": 2.6735468204583395, "grad_norm": 0.8507648706436157, "learning_rate": 5.546218487394958e-05, "loss": 0.9715, "step": 8808 }, { "epoch": 2.673850356655031, "grad_norm": 0.7430301308631897, "learning_rate": 5.5457122608079384e-05, "loss": 1.4762, "step": 8809 }, { "epoch": 2.674153892851723, "grad_norm": 0.7480401992797852, "learning_rate": 5.545206034220918e-05, "loss": 1.1704, "step": 8810 }, { "epoch": 2.674457429048414, "grad_norm": 0.719728410243988, "learning_rate": 5.5446998076338974e-05, "loss": 1.191, "step": 8811 }, { "epoch": 2.674760965245105, "grad_norm": 0.8107708692550659, "learning_rate": 5.544193581046877e-05, "loss": 0.9739, "step": 8812 }, { "epoch": 2.675064501441797, "grad_norm": 0.787104070186615, "learning_rate": 5.5436873544598563e-05, "loss": 1.4328, "step": 8813 }, { "epoch": 2.6753680376384885, "grad_norm": 1.0159579515457153, "learning_rate": 5.5431811278728365e-05, "loss": 1.0301, "step": 8814 }, { "epoch": 2.6756715738351797, "grad_norm": 0.8945944905281067, "learning_rate": 5.542674901285816e-05, "loss": 1.4273, "step": 8815 }, { "epoch": 2.6759751100318714, "grad_norm": 0.6963416934013367, "learning_rate": 5.5421686746987955e-05, "loss": 1.0696, "step": 8816 }, { "epoch": 2.6762786462285626, "grad_norm": 0.9370332956314087, "learning_rate": 5.541662448111775e-05, "loss": 1.5392, "step": 8817 }, { "epoch": 2.6765821824252543, "grad_norm": 0.947747528553009, "learning_rate": 5.541156221524755e-05, "loss": 1.4117, "step": 8818 }, { "epoch": 2.6768857186219455, "grad_norm": 0.8421319127082825, "learning_rate": 5.5406499949377346e-05, "loss": 1.2237, "step": 8819 }, { "epoch": 2.677189254818637, "grad_norm": 0.790768563747406, "learning_rate": 5.540143768350714e-05, "loss": 1.5518, "step": 8820 }, { "epoch": 2.6774927910153288, "grad_norm": 0.8691953420639038, "learning_rate": 5.5396375417636935e-05, "loss": 1.6724, "step": 8821 }, { "epoch": 2.67779632721202, "grad_norm": 0.7883888483047485, "learning_rate": 5.539131315176673e-05, "loss": 1.5772, "step": 8822 }, { "epoch": 2.6780998634087116, "grad_norm": 0.7148094177246094, "learning_rate": 5.538625088589653e-05, "loss": 1.4388, "step": 8823 }, { "epoch": 2.678403399605403, "grad_norm": 0.8268208503723145, "learning_rate": 5.5381188620026327e-05, "loss": 1.1574, "step": 8824 }, { "epoch": 2.6787069358020945, "grad_norm": 0.8798223733901978, "learning_rate": 5.537612635415612e-05, "loss": 1.1085, "step": 8825 }, { "epoch": 2.6790104719987857, "grad_norm": 0.7200629711151123, "learning_rate": 5.5371064088285916e-05, "loss": 1.28, "step": 8826 }, { "epoch": 2.6793140081954774, "grad_norm": 1.019481897354126, "learning_rate": 5.536600182241571e-05, "loss": 0.87, "step": 8827 }, { "epoch": 2.679617544392169, "grad_norm": 0.751953125, "learning_rate": 5.536093955654551e-05, "loss": 1.5108, "step": 8828 }, { "epoch": 2.6799210805888602, "grad_norm": 0.7722558379173279, "learning_rate": 5.535587729067531e-05, "loss": 1.2364, "step": 8829 }, { "epoch": 2.6802246167855515, "grad_norm": 0.7905706167221069, "learning_rate": 5.53508150248051e-05, "loss": 1.4848, "step": 8830 }, { "epoch": 2.680528152982243, "grad_norm": 0.7805942296981812, "learning_rate": 5.53457527589349e-05, "loss": 1.4817, "step": 8831 }, { "epoch": 2.6808316891789348, "grad_norm": 0.6384637951850891, "learning_rate": 5.53406904930647e-05, "loss": 1.4018, "step": 8832 }, { "epoch": 2.681135225375626, "grad_norm": 0.7865663766860962, "learning_rate": 5.533562822719449e-05, "loss": 1.0755, "step": 8833 }, { "epoch": 2.6814387615723176, "grad_norm": 0.7390419244766235, "learning_rate": 5.533056596132429e-05, "loss": 1.563, "step": 8834 }, { "epoch": 2.681742297769009, "grad_norm": 0.8227308392524719, "learning_rate": 5.532550369545408e-05, "loss": 1.2486, "step": 8835 }, { "epoch": 2.6820458339657005, "grad_norm": 0.6374137997627258, "learning_rate": 5.532044142958388e-05, "loss": 0.954, "step": 8836 }, { "epoch": 2.6823493701623917, "grad_norm": 0.873408317565918, "learning_rate": 5.531537916371368e-05, "loss": 1.2549, "step": 8837 }, { "epoch": 2.6826529063590834, "grad_norm": 0.7719585299491882, "learning_rate": 5.5310316897843474e-05, "loss": 0.8281, "step": 8838 }, { "epoch": 2.682956442555775, "grad_norm": 0.823834240436554, "learning_rate": 5.530525463197327e-05, "loss": 1.3755, "step": 8839 }, { "epoch": 2.6832599787524662, "grad_norm": 0.8945797085762024, "learning_rate": 5.5300192366103064e-05, "loss": 1.4325, "step": 8840 }, { "epoch": 2.6835635149491575, "grad_norm": 0.7850164771080017, "learning_rate": 5.529513010023286e-05, "loss": 1.2876, "step": 8841 }, { "epoch": 2.683867051145849, "grad_norm": 0.6617736220359802, "learning_rate": 5.529006783436267e-05, "loss": 1.3647, "step": 8842 }, { "epoch": 2.6841705873425408, "grad_norm": 0.639985203742981, "learning_rate": 5.528500556849247e-05, "loss": 1.8664, "step": 8843 }, { "epoch": 2.684474123539232, "grad_norm": 0.8891111612319946, "learning_rate": 5.527994330262226e-05, "loss": 1.3448, "step": 8844 }, { "epoch": 2.6847776597359236, "grad_norm": 0.701209306716919, "learning_rate": 5.527488103675206e-05, "loss": 1.735, "step": 8845 }, { "epoch": 2.685081195932615, "grad_norm": 0.8127440810203552, "learning_rate": 5.526981877088185e-05, "loss": 1.4479, "step": 8846 }, { "epoch": 2.6853847321293065, "grad_norm": 0.7177948355674744, "learning_rate": 5.526475650501165e-05, "loss": 1.4693, "step": 8847 }, { "epoch": 2.6856882683259977, "grad_norm": 0.699662446975708, "learning_rate": 5.525969423914145e-05, "loss": 1.6241, "step": 8848 }, { "epoch": 2.6859918045226894, "grad_norm": 0.7476281523704529, "learning_rate": 5.5254631973271244e-05, "loss": 1.2958, "step": 8849 }, { "epoch": 2.686295340719381, "grad_norm": 0.7222526669502258, "learning_rate": 5.524956970740104e-05, "loss": 1.1927, "step": 8850 }, { "epoch": 2.6865988769160722, "grad_norm": 0.7384173274040222, "learning_rate": 5.5244507441530833e-05, "loss": 1.5723, "step": 8851 }, { "epoch": 2.6869024131127635, "grad_norm": 0.8595612645149231, "learning_rate": 5.523944517566063e-05, "loss": 1.4234, "step": 8852 }, { "epoch": 2.687205949309455, "grad_norm": 0.6644933819770813, "learning_rate": 5.523438290979043e-05, "loss": 1.2558, "step": 8853 }, { "epoch": 2.6875094855061468, "grad_norm": 0.7552403211593628, "learning_rate": 5.5229320643920225e-05, "loss": 1.5953, "step": 8854 }, { "epoch": 2.687813021702838, "grad_norm": 0.7952251434326172, "learning_rate": 5.522425837805002e-05, "loss": 1.3904, "step": 8855 }, { "epoch": 2.6881165578995296, "grad_norm": 0.821399450302124, "learning_rate": 5.5219196112179814e-05, "loss": 1.3383, "step": 8856 }, { "epoch": 2.688420094096221, "grad_norm": 0.7013937830924988, "learning_rate": 5.5214133846309616e-05, "loss": 0.915, "step": 8857 }, { "epoch": 2.6887236302929125, "grad_norm": 0.775075376033783, "learning_rate": 5.520907158043941e-05, "loss": 1.5131, "step": 8858 }, { "epoch": 2.6890271664896037, "grad_norm": 0.7775612473487854, "learning_rate": 5.5204009314569205e-05, "loss": 1.3165, "step": 8859 }, { "epoch": 2.6893307026862954, "grad_norm": 0.6457474231719971, "learning_rate": 5.5198947048699e-05, "loss": 1.7376, "step": 8860 }, { "epoch": 2.689634238882987, "grad_norm": 0.8071801662445068, "learning_rate": 5.5193884782828795e-05, "loss": 1.052, "step": 8861 }, { "epoch": 2.6899377750796782, "grad_norm": 0.7840722799301147, "learning_rate": 5.5188822516958596e-05, "loss": 1.3915, "step": 8862 }, { "epoch": 2.6902413112763695, "grad_norm": 0.7689992189407349, "learning_rate": 5.518376025108839e-05, "loss": 1.4764, "step": 8863 }, { "epoch": 2.690544847473061, "grad_norm": 0.805749237537384, "learning_rate": 5.5178697985218186e-05, "loss": 1.4676, "step": 8864 }, { "epoch": 2.6908483836697528, "grad_norm": 0.6873961687088013, "learning_rate": 5.517363571934798e-05, "loss": 1.2869, "step": 8865 }, { "epoch": 2.691151919866444, "grad_norm": 0.7631956338882446, "learning_rate": 5.5168573453477776e-05, "loss": 1.6077, "step": 8866 }, { "epoch": 2.6914554560631356, "grad_norm": 0.8834057450294495, "learning_rate": 5.516351118760758e-05, "loss": 1.1535, "step": 8867 }, { "epoch": 2.691758992259827, "grad_norm": 0.7160691618919373, "learning_rate": 5.515844892173737e-05, "loss": 1.3836, "step": 8868 }, { "epoch": 2.6920625284565185, "grad_norm": 0.6507584452629089, "learning_rate": 5.515338665586717e-05, "loss": 1.7899, "step": 8869 }, { "epoch": 2.6923660646532097, "grad_norm": 0.7139139771461487, "learning_rate": 5.514832438999696e-05, "loss": 1.4391, "step": 8870 }, { "epoch": 2.6926696008499014, "grad_norm": 0.7380549907684326, "learning_rate": 5.514326212412676e-05, "loss": 0.706, "step": 8871 }, { "epoch": 2.692973137046593, "grad_norm": 0.8420911431312561, "learning_rate": 5.513819985825656e-05, "loss": 0.8575, "step": 8872 }, { "epoch": 2.6932766732432842, "grad_norm": 0.8003743886947632, "learning_rate": 5.513313759238635e-05, "loss": 1.4146, "step": 8873 }, { "epoch": 2.6935802094399754, "grad_norm": 0.9504046440124512, "learning_rate": 5.512807532651615e-05, "loss": 1.3557, "step": 8874 }, { "epoch": 2.693883745636667, "grad_norm": 0.9947271943092346, "learning_rate": 5.512301306064594e-05, "loss": 1.1656, "step": 8875 }, { "epoch": 2.6941872818333588, "grad_norm": 0.8137509822845459, "learning_rate": 5.5117950794775744e-05, "loss": 1.359, "step": 8876 }, { "epoch": 2.69449081803005, "grad_norm": 0.703301727771759, "learning_rate": 5.511288852890554e-05, "loss": 1.513, "step": 8877 }, { "epoch": 2.6947943542267416, "grad_norm": 0.8018040060997009, "learning_rate": 5.5107826263035334e-05, "loss": 1.5769, "step": 8878 }, { "epoch": 2.695097890423433, "grad_norm": 0.8137522339820862, "learning_rate": 5.510276399716513e-05, "loss": 1.4127, "step": 8879 }, { "epoch": 2.6954014266201245, "grad_norm": 0.7223002910614014, "learning_rate": 5.509770173129492e-05, "loss": 1.0056, "step": 8880 }, { "epoch": 2.6957049628168157, "grad_norm": 0.7558143734931946, "learning_rate": 5.5092639465424725e-05, "loss": 1.2446, "step": 8881 }, { "epoch": 2.6960084990135074, "grad_norm": 0.8840802311897278, "learning_rate": 5.508757719955452e-05, "loss": 1.1856, "step": 8882 }, { "epoch": 2.696312035210199, "grad_norm": 0.8228915333747864, "learning_rate": 5.5082514933684314e-05, "loss": 1.6278, "step": 8883 }, { "epoch": 2.6966155714068902, "grad_norm": 0.984533429145813, "learning_rate": 5.507745266781411e-05, "loss": 0.7835, "step": 8884 }, { "epoch": 2.696919107603582, "grad_norm": 1.7709839344024658, "learning_rate": 5.507239040194391e-05, "loss": 1.2948, "step": 8885 }, { "epoch": 2.697222643800273, "grad_norm": 0.8224371671676636, "learning_rate": 5.506732813607371e-05, "loss": 1.0086, "step": 8886 }, { "epoch": 2.6975261799969648, "grad_norm": 0.79408860206604, "learning_rate": 5.5062265870203514e-05, "loss": 1.4202, "step": 8887 }, { "epoch": 2.697829716193656, "grad_norm": 0.6942019462585449, "learning_rate": 5.505720360433331e-05, "loss": 1.1817, "step": 8888 }, { "epoch": 2.6981332523903476, "grad_norm": 0.665850818157196, "learning_rate": 5.50521413384631e-05, "loss": 1.6194, "step": 8889 }, { "epoch": 2.6984367885870393, "grad_norm": 0.9044862389564514, "learning_rate": 5.50470790725929e-05, "loss": 1.4179, "step": 8890 }, { "epoch": 2.6987403247837305, "grad_norm": 0.8192222118377686, "learning_rate": 5.504201680672269e-05, "loss": 1.3351, "step": 8891 }, { "epoch": 2.6990438609804217, "grad_norm": 0.8500630259513855, "learning_rate": 5.5036954540852495e-05, "loss": 1.3052, "step": 8892 }, { "epoch": 2.6993473971771134, "grad_norm": 1.4197098016738892, "learning_rate": 5.503189227498229e-05, "loss": 1.0621, "step": 8893 }, { "epoch": 2.699650933373805, "grad_norm": 0.7029363512992859, "learning_rate": 5.5026830009112084e-05, "loss": 1.4191, "step": 8894 }, { "epoch": 2.6999544695704962, "grad_norm": 0.7289296984672546, "learning_rate": 5.502176774324188e-05, "loss": 1.273, "step": 8895 }, { "epoch": 2.700258005767188, "grad_norm": 0.903388261795044, "learning_rate": 5.501670547737168e-05, "loss": 1.3548, "step": 8896 }, { "epoch": 2.700561541963879, "grad_norm": 0.7993081212043762, "learning_rate": 5.5011643211501475e-05, "loss": 1.1973, "step": 8897 }, { "epoch": 2.7008650781605708, "grad_norm": 0.6894220113754272, "learning_rate": 5.500658094563127e-05, "loss": 1.2598, "step": 8898 }, { "epoch": 2.701168614357262, "grad_norm": 0.5887405872344971, "learning_rate": 5.5001518679761065e-05, "loss": 0.7872, "step": 8899 }, { "epoch": 2.7014721505539536, "grad_norm": 0.8089455366134644, "learning_rate": 5.499645641389086e-05, "loss": 1.3973, "step": 8900 }, { "epoch": 2.7017756867506453, "grad_norm": 0.7730278968811035, "learning_rate": 5.499139414802066e-05, "loss": 1.5789, "step": 8901 }, { "epoch": 2.7020792229473365, "grad_norm": 0.7910978198051453, "learning_rate": 5.4986331882150456e-05, "loss": 1.4332, "step": 8902 }, { "epoch": 2.7023827591440277, "grad_norm": 0.8123211860656738, "learning_rate": 5.498126961628025e-05, "loss": 1.2123, "step": 8903 }, { "epoch": 2.7026862953407194, "grad_norm": 0.7342512011528015, "learning_rate": 5.4976207350410046e-05, "loss": 1.5431, "step": 8904 }, { "epoch": 2.702989831537411, "grad_norm": 0.8220160007476807, "learning_rate": 5.497114508453984e-05, "loss": 1.41, "step": 8905 }, { "epoch": 2.7032933677341022, "grad_norm": 0.8257995843887329, "learning_rate": 5.496608281866964e-05, "loss": 1.323, "step": 8906 }, { "epoch": 2.703596903930794, "grad_norm": 0.8199771046638489, "learning_rate": 5.496102055279944e-05, "loss": 1.5909, "step": 8907 }, { "epoch": 2.703900440127485, "grad_norm": 0.5013839602470398, "learning_rate": 5.495595828692923e-05, "loss": 1.252, "step": 8908 }, { "epoch": 2.7042039763241768, "grad_norm": 0.6575402021408081, "learning_rate": 5.4950896021059026e-05, "loss": 1.3464, "step": 8909 }, { "epoch": 2.704507512520868, "grad_norm": 0.8246828317642212, "learning_rate": 5.494583375518883e-05, "loss": 1.2401, "step": 8910 }, { "epoch": 2.7048110487175596, "grad_norm": 0.7484584450721741, "learning_rate": 5.494077148931862e-05, "loss": 0.9211, "step": 8911 }, { "epoch": 2.7051145849142513, "grad_norm": 0.7549577355384827, "learning_rate": 5.493570922344842e-05, "loss": 1.1939, "step": 8912 }, { "epoch": 2.7054181211109425, "grad_norm": 0.7589015364646912, "learning_rate": 5.493064695757821e-05, "loss": 1.1499, "step": 8913 }, { "epoch": 2.7057216573076337, "grad_norm": 0.7982020378112793, "learning_rate": 5.492558469170801e-05, "loss": 1.3517, "step": 8914 }, { "epoch": 2.7060251935043254, "grad_norm": 0.48329707980155945, "learning_rate": 5.492052242583781e-05, "loss": 1.2968, "step": 8915 }, { "epoch": 2.706328729701017, "grad_norm": 0.9169350862503052, "learning_rate": 5.4915460159967603e-05, "loss": 1.2864, "step": 8916 }, { "epoch": 2.7066322658977082, "grad_norm": 0.7144566178321838, "learning_rate": 5.49103978940974e-05, "loss": 1.4989, "step": 8917 }, { "epoch": 2.7069358020944, "grad_norm": 0.9641619324684143, "learning_rate": 5.490533562822719e-05, "loss": 1.0338, "step": 8918 }, { "epoch": 2.707239338291091, "grad_norm": 0.8105370402336121, "learning_rate": 5.490027336235699e-05, "loss": 1.4915, "step": 8919 }, { "epoch": 2.7075428744877827, "grad_norm": 0.8228601217269897, "learning_rate": 5.489521109648679e-05, "loss": 1.4441, "step": 8920 }, { "epoch": 2.707846410684474, "grad_norm": 0.7452234625816345, "learning_rate": 5.4890148830616584e-05, "loss": 1.3552, "step": 8921 }, { "epoch": 2.7081499468811656, "grad_norm": 0.7853007912635803, "learning_rate": 5.488508656474638e-05, "loss": 1.0674, "step": 8922 }, { "epoch": 2.7084534830778573, "grad_norm": 0.6740144491195679, "learning_rate": 5.4880024298876174e-05, "loss": 1.3517, "step": 8923 }, { "epoch": 2.7087570192745485, "grad_norm": 0.739098310470581, "learning_rate": 5.4874962033005975e-05, "loss": 1.4536, "step": 8924 }, { "epoch": 2.7090605554712397, "grad_norm": 0.8560205101966858, "learning_rate": 5.486989976713577e-05, "loss": 1.1199, "step": 8925 }, { "epoch": 2.7093640916679314, "grad_norm": 0.6795910000801086, "learning_rate": 5.4864837501265565e-05, "loss": 1.4855, "step": 8926 }, { "epoch": 2.709667627864623, "grad_norm": 0.7062528133392334, "learning_rate": 5.485977523539536e-05, "loss": 1.4793, "step": 8927 }, { "epoch": 2.709971164061314, "grad_norm": 0.8141422271728516, "learning_rate": 5.4854712969525155e-05, "loss": 1.5419, "step": 8928 }, { "epoch": 2.710274700258006, "grad_norm": 0.8198245763778687, "learning_rate": 5.4849650703654956e-05, "loss": 1.3353, "step": 8929 }, { "epoch": 2.710578236454697, "grad_norm": 0.6655880808830261, "learning_rate": 5.484458843778475e-05, "loss": 1.2101, "step": 8930 }, { "epoch": 2.7108817726513887, "grad_norm": 0.8227945566177368, "learning_rate": 5.483952617191456e-05, "loss": 0.6048, "step": 8931 }, { "epoch": 2.71118530884808, "grad_norm": 0.9034178853034973, "learning_rate": 5.4834463906044354e-05, "loss": 1.4865, "step": 8932 }, { "epoch": 2.7114888450447716, "grad_norm": 0.7349004149436951, "learning_rate": 5.482940164017415e-05, "loss": 1.5953, "step": 8933 }, { "epoch": 2.7117923812414633, "grad_norm": 0.8077744245529175, "learning_rate": 5.4824339374303944e-05, "loss": 1.1701, "step": 8934 }, { "epoch": 2.7120959174381545, "grad_norm": 0.7649827599525452, "learning_rate": 5.4819277108433745e-05, "loss": 1.5415, "step": 8935 }, { "epoch": 2.7123994536348457, "grad_norm": 0.8440228700637817, "learning_rate": 5.481421484256354e-05, "loss": 1.1785, "step": 8936 }, { "epoch": 2.7127029898315373, "grad_norm": 0.8126983642578125, "learning_rate": 5.4809152576693335e-05, "loss": 1.384, "step": 8937 }, { "epoch": 2.713006526028229, "grad_norm": 0.8477095365524292, "learning_rate": 5.480409031082313e-05, "loss": 1.0354, "step": 8938 }, { "epoch": 2.71331006222492, "grad_norm": 0.8486508727073669, "learning_rate": 5.4799028044952924e-05, "loss": 0.8829, "step": 8939 }, { "epoch": 2.713613598421612, "grad_norm": 0.5038058161735535, "learning_rate": 5.4793965779082726e-05, "loss": 1.2798, "step": 8940 }, { "epoch": 2.713917134618303, "grad_norm": 0.55999755859375, "learning_rate": 5.478890351321252e-05, "loss": 1.2635, "step": 8941 }, { "epoch": 2.7142206708149947, "grad_norm": 0.635403573513031, "learning_rate": 5.4783841247342316e-05, "loss": 1.1163, "step": 8942 }, { "epoch": 2.714524207011686, "grad_norm": 0.7358167767524719, "learning_rate": 5.477877898147211e-05, "loss": 0.9021, "step": 8943 }, { "epoch": 2.7148277432083776, "grad_norm": 0.8093145489692688, "learning_rate": 5.4773716715601905e-05, "loss": 1.1346, "step": 8944 }, { "epoch": 2.7151312794050693, "grad_norm": 0.7014288902282715, "learning_rate": 5.476865444973171e-05, "loss": 1.5469, "step": 8945 }, { "epoch": 2.7154348156017605, "grad_norm": 0.5633707642555237, "learning_rate": 5.47635921838615e-05, "loss": 1.5701, "step": 8946 }, { "epoch": 2.7157383517984517, "grad_norm": 0.6636736989021301, "learning_rate": 5.4758529917991296e-05, "loss": 1.1156, "step": 8947 }, { "epoch": 2.7160418879951433, "grad_norm": 0.7108006477355957, "learning_rate": 5.475346765212109e-05, "loss": 1.0658, "step": 8948 }, { "epoch": 2.716345424191835, "grad_norm": 0.7489403486251831, "learning_rate": 5.474840538625089e-05, "loss": 1.3565, "step": 8949 }, { "epoch": 2.716648960388526, "grad_norm": 0.95987468957901, "learning_rate": 5.474334312038069e-05, "loss": 1.35, "step": 8950 }, { "epoch": 2.716952496585218, "grad_norm": 0.857208251953125, "learning_rate": 5.473828085451048e-05, "loss": 1.3263, "step": 8951 }, { "epoch": 2.717256032781909, "grad_norm": 0.6561353802680969, "learning_rate": 5.473321858864028e-05, "loss": 1.3822, "step": 8952 }, { "epoch": 2.7175595689786007, "grad_norm": 0.74046391248703, "learning_rate": 5.472815632277007e-05, "loss": 1.4686, "step": 8953 }, { "epoch": 2.717863105175292, "grad_norm": 0.7804528474807739, "learning_rate": 5.4723094056899873e-05, "loss": 1.4059, "step": 8954 }, { "epoch": 2.7181666413719836, "grad_norm": 0.6641717553138733, "learning_rate": 5.471803179102967e-05, "loss": 0.9086, "step": 8955 }, { "epoch": 2.7184701775686753, "grad_norm": 0.4145214855670929, "learning_rate": 5.471296952515946e-05, "loss": 0.744, "step": 8956 }, { "epoch": 2.7187737137653665, "grad_norm": 0.8224796056747437, "learning_rate": 5.470790725928926e-05, "loss": 1.57, "step": 8957 }, { "epoch": 2.719077249962058, "grad_norm": 0.9994120597839355, "learning_rate": 5.470284499341905e-05, "loss": 1.1499, "step": 8958 }, { "epoch": 2.7193807861587493, "grad_norm": 0.7927001118659973, "learning_rate": 5.4697782727548854e-05, "loss": 1.413, "step": 8959 }, { "epoch": 2.719684322355441, "grad_norm": 0.8578165769577026, "learning_rate": 5.469272046167865e-05, "loss": 0.8647, "step": 8960 }, { "epoch": 2.719987858552132, "grad_norm": 0.6189905405044556, "learning_rate": 5.4687658195808444e-05, "loss": 1.3841, "step": 8961 }, { "epoch": 2.720291394748824, "grad_norm": 1.0179373025894165, "learning_rate": 5.468259592993824e-05, "loss": 0.5326, "step": 8962 }, { "epoch": 2.7205949309455155, "grad_norm": 0.6325749754905701, "learning_rate": 5.467753366406804e-05, "loss": 1.7126, "step": 8963 }, { "epoch": 2.7208984671422067, "grad_norm": 0.7680848240852356, "learning_rate": 5.4672471398197835e-05, "loss": 1.1946, "step": 8964 }, { "epoch": 2.721202003338898, "grad_norm": 0.8343624472618103, "learning_rate": 5.466740913232763e-05, "loss": 1.4145, "step": 8965 }, { "epoch": 2.7215055395355896, "grad_norm": 0.8505995273590088, "learning_rate": 5.4662346866457425e-05, "loss": 0.9683, "step": 8966 }, { "epoch": 2.7218090757322813, "grad_norm": 1.0219932794570923, "learning_rate": 5.465728460058722e-05, "loss": 1.3269, "step": 8967 }, { "epoch": 2.7221126119289725, "grad_norm": 0.7481555342674255, "learning_rate": 5.465222233471702e-05, "loss": 1.4979, "step": 8968 }, { "epoch": 2.722416148125664, "grad_norm": 0.7058635354042053, "learning_rate": 5.4647160068846816e-05, "loss": 1.9171, "step": 8969 }, { "epoch": 2.7227196843223553, "grad_norm": 0.8569918274879456, "learning_rate": 5.464209780297661e-05, "loss": 1.3167, "step": 8970 }, { "epoch": 2.723023220519047, "grad_norm": 0.7231106758117676, "learning_rate": 5.4637035537106405e-05, "loss": 1.3654, "step": 8971 }, { "epoch": 2.723326756715738, "grad_norm": 0.7587422132492065, "learning_rate": 5.46319732712362e-05, "loss": 1.3848, "step": 8972 }, { "epoch": 2.72363029291243, "grad_norm": 0.7885164022445679, "learning_rate": 5.4626911005366e-05, "loss": 1.3941, "step": 8973 }, { "epoch": 2.7239338291091215, "grad_norm": 0.8368237614631653, "learning_rate": 5.4621848739495796e-05, "loss": 1.4064, "step": 8974 }, { "epoch": 2.7242373653058127, "grad_norm": 0.8279350996017456, "learning_rate": 5.4616786473625605e-05, "loss": 1.3322, "step": 8975 }, { "epoch": 2.724540901502504, "grad_norm": 0.7650869488716125, "learning_rate": 5.46117242077554e-05, "loss": 0.9318, "step": 8976 }, { "epoch": 2.7248444376991956, "grad_norm": 0.6454781293869019, "learning_rate": 5.4606661941885194e-05, "loss": 0.9596, "step": 8977 }, { "epoch": 2.7251479738958873, "grad_norm": 0.7179443836212158, "learning_rate": 5.460159967601499e-05, "loss": 1.6231, "step": 8978 }, { "epoch": 2.7254515100925785, "grad_norm": 0.9709389805793762, "learning_rate": 5.459653741014479e-05, "loss": 1.3583, "step": 8979 }, { "epoch": 2.72575504628927, "grad_norm": 0.8201824426651001, "learning_rate": 5.4591475144274586e-05, "loss": 1.2885, "step": 8980 }, { "epoch": 2.7260585824859613, "grad_norm": 0.7727458477020264, "learning_rate": 5.458641287840438e-05, "loss": 1.1555, "step": 8981 }, { "epoch": 2.726362118682653, "grad_norm": 0.8874642848968506, "learning_rate": 5.4581350612534175e-05, "loss": 1.1881, "step": 8982 }, { "epoch": 2.726665654879344, "grad_norm": 1.0157978534698486, "learning_rate": 5.457628834666397e-05, "loss": 1.2026, "step": 8983 }, { "epoch": 2.726969191076036, "grad_norm": 0.7455587983131409, "learning_rate": 5.457122608079377e-05, "loss": 1.1722, "step": 8984 }, { "epoch": 2.7272727272727275, "grad_norm": 1.0781230926513672, "learning_rate": 5.4566163814923566e-05, "loss": 1.1869, "step": 8985 }, { "epoch": 2.7275762634694187, "grad_norm": 0.8218075633049011, "learning_rate": 5.456110154905336e-05, "loss": 1.5559, "step": 8986 }, { "epoch": 2.72787979966611, "grad_norm": 0.7117857933044434, "learning_rate": 5.4556039283183156e-05, "loss": 1.1925, "step": 8987 }, { "epoch": 2.7281833358628016, "grad_norm": 0.6879022121429443, "learning_rate": 5.455097701731296e-05, "loss": 1.6949, "step": 8988 }, { "epoch": 2.7284868720594933, "grad_norm": 0.5837012529373169, "learning_rate": 5.454591475144275e-05, "loss": 1.2666, "step": 8989 }, { "epoch": 2.7287904082561845, "grad_norm": 0.7494475245475769, "learning_rate": 5.454085248557255e-05, "loss": 1.4222, "step": 8990 }, { "epoch": 2.729093944452876, "grad_norm": 0.7593268752098083, "learning_rate": 5.453579021970234e-05, "loss": 1.3327, "step": 8991 }, { "epoch": 2.7293974806495673, "grad_norm": 0.7277660965919495, "learning_rate": 5.453072795383214e-05, "loss": 1.652, "step": 8992 }, { "epoch": 2.729701016846259, "grad_norm": 0.8330652713775635, "learning_rate": 5.452566568796194e-05, "loss": 1.461, "step": 8993 }, { "epoch": 2.73000455304295, "grad_norm": 0.7826851606369019, "learning_rate": 5.452060342209173e-05, "loss": 1.526, "step": 8994 }, { "epoch": 2.730308089239642, "grad_norm": 0.9074543118476868, "learning_rate": 5.451554115622153e-05, "loss": 1.181, "step": 8995 }, { "epoch": 2.7306116254363335, "grad_norm": 0.8699624538421631, "learning_rate": 5.451047889035132e-05, "loss": 1.452, "step": 8996 }, { "epoch": 2.7309151616330247, "grad_norm": 0.8349964022636414, "learning_rate": 5.450541662448112e-05, "loss": 1.1185, "step": 8997 }, { "epoch": 2.731218697829716, "grad_norm": 0.885654628276825, "learning_rate": 5.450035435861092e-05, "loss": 0.9048, "step": 8998 }, { "epoch": 2.7315222340264076, "grad_norm": 0.906273365020752, "learning_rate": 5.4495292092740714e-05, "loss": 0.9559, "step": 8999 }, { "epoch": 2.7318257702230992, "grad_norm": 0.8029773831367493, "learning_rate": 5.449022982687051e-05, "loss": 1.2853, "step": 9000 }, { "epoch": 2.7321293064197905, "grad_norm": 0.7525988817214966, "learning_rate": 5.44851675610003e-05, "loss": 1.0268, "step": 9001 }, { "epoch": 2.732432842616482, "grad_norm": 0.655807375907898, "learning_rate": 5.4480105295130105e-05, "loss": 1.5585, "step": 9002 }, { "epoch": 2.7327363788131733, "grad_norm": 0.8269489407539368, "learning_rate": 5.44750430292599e-05, "loss": 1.6588, "step": 9003 }, { "epoch": 2.733039915009865, "grad_norm": 0.6987342238426208, "learning_rate": 5.4469980763389694e-05, "loss": 0.9659, "step": 9004 }, { "epoch": 2.733343451206556, "grad_norm": 0.6727113127708435, "learning_rate": 5.446491849751949e-05, "loss": 1.516, "step": 9005 }, { "epoch": 2.733646987403248, "grad_norm": 0.9753486514091492, "learning_rate": 5.4459856231649284e-05, "loss": 1.5715, "step": 9006 }, { "epoch": 2.7339505235999395, "grad_norm": 0.6524636745452881, "learning_rate": 5.4454793965779086e-05, "loss": 1.36, "step": 9007 }, { "epoch": 2.7342540597966307, "grad_norm": 0.8173258900642395, "learning_rate": 5.444973169990888e-05, "loss": 1.0862, "step": 9008 }, { "epoch": 2.734557595993322, "grad_norm": 0.8795118927955627, "learning_rate": 5.4444669434038675e-05, "loss": 1.448, "step": 9009 }, { "epoch": 2.7348611321900136, "grad_norm": 0.9424416422843933, "learning_rate": 5.443960716816847e-05, "loss": 1.2404, "step": 9010 }, { "epoch": 2.7351646683867052, "grad_norm": 0.8112708926200867, "learning_rate": 5.4434544902298265e-05, "loss": 1.4256, "step": 9011 }, { "epoch": 2.7354682045833965, "grad_norm": 0.7930210828781128, "learning_rate": 5.4429482636428066e-05, "loss": 1.5032, "step": 9012 }, { "epoch": 2.735771740780088, "grad_norm": 0.8505734801292419, "learning_rate": 5.442442037055786e-05, "loss": 0.813, "step": 9013 }, { "epoch": 2.7360752769767793, "grad_norm": 0.676701009273529, "learning_rate": 5.4419358104687656e-05, "loss": 1.4157, "step": 9014 }, { "epoch": 2.736378813173471, "grad_norm": 0.9086753726005554, "learning_rate": 5.441429583881745e-05, "loss": 1.3663, "step": 9015 }, { "epoch": 2.736682349370162, "grad_norm": 0.8363310098648071, "learning_rate": 5.440923357294725e-05, "loss": 1.5283, "step": 9016 }, { "epoch": 2.736985885566854, "grad_norm": 0.9008285403251648, "learning_rate": 5.440417130707705e-05, "loss": 1.2352, "step": 9017 }, { "epoch": 2.7372894217635455, "grad_norm": 0.6482596397399902, "learning_rate": 5.439910904120684e-05, "loss": 1.7285, "step": 9018 }, { "epoch": 2.7375929579602367, "grad_norm": 0.7748271226882935, "learning_rate": 5.439404677533664e-05, "loss": 1.4282, "step": 9019 }, { "epoch": 2.7378964941569284, "grad_norm": 0.7706668376922607, "learning_rate": 5.4388984509466445e-05, "loss": 1.4724, "step": 9020 }, { "epoch": 2.7382000303536196, "grad_norm": 0.8214789032936096, "learning_rate": 5.438392224359624e-05, "loss": 1.3454, "step": 9021 }, { "epoch": 2.7385035665503112, "grad_norm": 0.6393706202507019, "learning_rate": 5.4378859977726035e-05, "loss": 1.268, "step": 9022 }, { "epoch": 2.7388071027470025, "grad_norm": 0.6789675951004028, "learning_rate": 5.4373797711855836e-05, "loss": 1.4618, "step": 9023 }, { "epoch": 2.739110638943694, "grad_norm": 0.872805655002594, "learning_rate": 5.436873544598563e-05, "loss": 1.3131, "step": 9024 }, { "epoch": 2.7394141751403858, "grad_norm": 0.5699925422668457, "learning_rate": 5.4363673180115426e-05, "loss": 0.9571, "step": 9025 }, { "epoch": 2.739717711337077, "grad_norm": 0.7801041603088379, "learning_rate": 5.435861091424522e-05, "loss": 1.4479, "step": 9026 }, { "epoch": 2.740021247533768, "grad_norm": 0.6360588669776917, "learning_rate": 5.435354864837502e-05, "loss": 0.355, "step": 9027 }, { "epoch": 2.74032478373046, "grad_norm": 0.7122807502746582, "learning_rate": 5.434848638250482e-05, "loss": 1.3132, "step": 9028 }, { "epoch": 2.7406283199271515, "grad_norm": 0.7230129837989807, "learning_rate": 5.434342411663461e-05, "loss": 0.7899, "step": 9029 }, { "epoch": 2.7409318561238427, "grad_norm": 0.7373469471931458, "learning_rate": 5.4338361850764407e-05, "loss": 1.3252, "step": 9030 }, { "epoch": 2.7412353923205344, "grad_norm": 0.7642449140548706, "learning_rate": 5.43332995848942e-05, "loss": 1.4505, "step": 9031 }, { "epoch": 2.7415389285172256, "grad_norm": 0.8837107419967651, "learning_rate": 5.4328237319024e-05, "loss": 1.211, "step": 9032 }, { "epoch": 2.7418424647139172, "grad_norm": 0.6932831406593323, "learning_rate": 5.43231750531538e-05, "loss": 1.2442, "step": 9033 }, { "epoch": 2.7421460009106084, "grad_norm": 0.7030109167098999, "learning_rate": 5.431811278728359e-05, "loss": 1.6409, "step": 9034 }, { "epoch": 2.7424495371073, "grad_norm": 1.1428993940353394, "learning_rate": 5.431305052141339e-05, "loss": 1.1862, "step": 9035 }, { "epoch": 2.7427530733039918, "grad_norm": 1.2733973264694214, "learning_rate": 5.430798825554318e-05, "loss": 1.3141, "step": 9036 }, { "epoch": 2.743056609500683, "grad_norm": 0.7428387403488159, "learning_rate": 5.4302925989672984e-05, "loss": 0.8509, "step": 9037 }, { "epoch": 2.743360145697374, "grad_norm": 0.7533442974090576, "learning_rate": 5.429786372380278e-05, "loss": 1.3576, "step": 9038 }, { "epoch": 2.743663681894066, "grad_norm": 0.8623830080032349, "learning_rate": 5.429280145793257e-05, "loss": 1.1348, "step": 9039 }, { "epoch": 2.7439672180907575, "grad_norm": 0.7006097435951233, "learning_rate": 5.428773919206237e-05, "loss": 1.1323, "step": 9040 }, { "epoch": 2.7442707542874487, "grad_norm": 0.6233174800872803, "learning_rate": 5.428267692619217e-05, "loss": 1.5301, "step": 9041 }, { "epoch": 2.7445742904841404, "grad_norm": 0.8230398893356323, "learning_rate": 5.4277614660321964e-05, "loss": 1.231, "step": 9042 }, { "epoch": 2.7448778266808316, "grad_norm": 0.7485307455062866, "learning_rate": 5.427255239445176e-05, "loss": 1.0382, "step": 9043 }, { "epoch": 2.7451813628775232, "grad_norm": 1.0905840396881104, "learning_rate": 5.4267490128581554e-05, "loss": 1.2776, "step": 9044 }, { "epoch": 2.7454848990742144, "grad_norm": 0.8306492567062378, "learning_rate": 5.426242786271135e-05, "loss": 1.3409, "step": 9045 }, { "epoch": 2.745788435270906, "grad_norm": 0.7774491906166077, "learning_rate": 5.425736559684115e-05, "loss": 1.5055, "step": 9046 }, { "epoch": 2.7460919714675978, "grad_norm": 0.8531390428543091, "learning_rate": 5.4252303330970945e-05, "loss": 1.1088, "step": 9047 }, { "epoch": 2.746395507664289, "grad_norm": 0.8016642332077026, "learning_rate": 5.424724106510074e-05, "loss": 1.3492, "step": 9048 }, { "epoch": 2.74669904386098, "grad_norm": 0.5586883425712585, "learning_rate": 5.4242178799230535e-05, "loss": 1.1221, "step": 9049 }, { "epoch": 2.747002580057672, "grad_norm": 0.8126424551010132, "learning_rate": 5.423711653336033e-05, "loss": 1.298, "step": 9050 }, { "epoch": 2.7473061162543635, "grad_norm": 0.8375364542007446, "learning_rate": 5.423205426749013e-05, "loss": 1.2794, "step": 9051 }, { "epoch": 2.7476096524510547, "grad_norm": 0.8436753749847412, "learning_rate": 5.4226992001619926e-05, "loss": 1.4027, "step": 9052 }, { "epoch": 2.7479131886477464, "grad_norm": 0.681559145450592, "learning_rate": 5.422192973574972e-05, "loss": 1.491, "step": 9053 }, { "epoch": 2.7482167248444376, "grad_norm": 0.7104101777076721, "learning_rate": 5.4216867469879516e-05, "loss": 1.5193, "step": 9054 }, { "epoch": 2.7485202610411292, "grad_norm": 0.8452191948890686, "learning_rate": 5.421180520400932e-05, "loss": 1.0921, "step": 9055 }, { "epoch": 2.7488237972378204, "grad_norm": 0.7584163546562195, "learning_rate": 5.420674293813911e-05, "loss": 1.6706, "step": 9056 }, { "epoch": 2.749127333434512, "grad_norm": 0.6813714504241943, "learning_rate": 5.420168067226891e-05, "loss": 1.7236, "step": 9057 }, { "epoch": 2.7494308696312038, "grad_norm": 0.9535608887672424, "learning_rate": 5.41966184063987e-05, "loss": 1.4805, "step": 9058 }, { "epoch": 2.749734405827895, "grad_norm": 0.6872257590293884, "learning_rate": 5.4191556140528496e-05, "loss": 1.7005, "step": 9059 }, { "epoch": 2.750037942024586, "grad_norm": 0.8195981383323669, "learning_rate": 5.41864938746583e-05, "loss": 1.2913, "step": 9060 }, { "epoch": 2.750341478221278, "grad_norm": 0.8541540503501892, "learning_rate": 5.418143160878809e-05, "loss": 1.4657, "step": 9061 }, { "epoch": 2.7506450144179695, "grad_norm": 0.8466823101043701, "learning_rate": 5.417636934291789e-05, "loss": 1.0107, "step": 9062 }, { "epoch": 2.7509485506146607, "grad_norm": 0.7837762832641602, "learning_rate": 5.417130707704768e-05, "loss": 1.5619, "step": 9063 }, { "epoch": 2.7512520868113524, "grad_norm": 0.7531890869140625, "learning_rate": 5.416624481117749e-05, "loss": 1.2658, "step": 9064 }, { "epoch": 2.7515556230080436, "grad_norm": 0.6701220273971558, "learning_rate": 5.4161182545307285e-05, "loss": 1.1735, "step": 9065 }, { "epoch": 2.7518591592047352, "grad_norm": 0.8095039129257202, "learning_rate": 5.415612027943709e-05, "loss": 1.7546, "step": 9066 }, { "epoch": 2.7521626954014264, "grad_norm": 0.8936877250671387, "learning_rate": 5.415105801356688e-05, "loss": 0.9218, "step": 9067 }, { "epoch": 2.752466231598118, "grad_norm": 0.729854166507721, "learning_rate": 5.4145995747696677e-05, "loss": 1.5037, "step": 9068 }, { "epoch": 2.7527697677948098, "grad_norm": 0.6929486393928528, "learning_rate": 5.414093348182647e-05, "loss": 1.4376, "step": 9069 }, { "epoch": 2.753073303991501, "grad_norm": 0.6936506628990173, "learning_rate": 5.4135871215956266e-05, "loss": 1.5083, "step": 9070 }, { "epoch": 2.753376840188192, "grad_norm": 0.7572258114814758, "learning_rate": 5.413080895008607e-05, "loss": 1.5286, "step": 9071 }, { "epoch": 2.753680376384884, "grad_norm": 0.9175770282745361, "learning_rate": 5.412574668421586e-05, "loss": 1.1926, "step": 9072 }, { "epoch": 2.7539839125815755, "grad_norm": 0.681115984916687, "learning_rate": 5.412068441834566e-05, "loss": 1.4447, "step": 9073 }, { "epoch": 2.7542874487782667, "grad_norm": 0.8459120988845825, "learning_rate": 5.411562215247545e-05, "loss": 1.6663, "step": 9074 }, { "epoch": 2.7545909849749584, "grad_norm": 0.796818733215332, "learning_rate": 5.411055988660525e-05, "loss": 0.9759, "step": 9075 }, { "epoch": 2.7548945211716496, "grad_norm": 0.9406018853187561, "learning_rate": 5.410549762073505e-05, "loss": 1.4791, "step": 9076 }, { "epoch": 2.7551980573683412, "grad_norm": 0.7923262715339661, "learning_rate": 5.410043535486484e-05, "loss": 1.6416, "step": 9077 }, { "epoch": 2.7555015935650324, "grad_norm": 0.6868494749069214, "learning_rate": 5.409537308899464e-05, "loss": 1.0971, "step": 9078 }, { "epoch": 2.755805129761724, "grad_norm": 0.6387277245521545, "learning_rate": 5.409031082312443e-05, "loss": 1.3577, "step": 9079 }, { "epoch": 2.7561086659584157, "grad_norm": 0.7907262444496155, "learning_rate": 5.4085248557254234e-05, "loss": 1.6351, "step": 9080 }, { "epoch": 2.756412202155107, "grad_norm": 0.8075240850448608, "learning_rate": 5.408018629138403e-05, "loss": 1.2531, "step": 9081 }, { "epoch": 2.7567157383517986, "grad_norm": 0.7291090488433838, "learning_rate": 5.4075124025513824e-05, "loss": 1.5374, "step": 9082 }, { "epoch": 2.75701927454849, "grad_norm": 0.8079223036766052, "learning_rate": 5.407006175964362e-05, "loss": 1.5611, "step": 9083 }, { "epoch": 2.7573228107451815, "grad_norm": 0.6828078031539917, "learning_rate": 5.4064999493773414e-05, "loss": 1.2459, "step": 9084 }, { "epoch": 2.7576263469418727, "grad_norm": 0.8029616475105286, "learning_rate": 5.4059937227903215e-05, "loss": 0.7535, "step": 9085 }, { "epoch": 2.7579298831385644, "grad_norm": 0.7968683242797852, "learning_rate": 5.405487496203301e-05, "loss": 1.4482, "step": 9086 }, { "epoch": 2.7582334193352556, "grad_norm": 0.6600356698036194, "learning_rate": 5.4049812696162805e-05, "loss": 1.0617, "step": 9087 }, { "epoch": 2.758536955531947, "grad_norm": 0.6818645596504211, "learning_rate": 5.40447504302926e-05, "loss": 1.0022, "step": 9088 }, { "epoch": 2.7588404917286384, "grad_norm": 0.704681396484375, "learning_rate": 5.4039688164422394e-05, "loss": 1.4996, "step": 9089 }, { "epoch": 2.75914402792533, "grad_norm": 0.7003867626190186, "learning_rate": 5.4034625898552196e-05, "loss": 1.3784, "step": 9090 }, { "epoch": 2.7594475641220217, "grad_norm": 0.7893413305282593, "learning_rate": 5.402956363268199e-05, "loss": 1.4548, "step": 9091 }, { "epoch": 2.759751100318713, "grad_norm": 0.7460066676139832, "learning_rate": 5.4024501366811785e-05, "loss": 1.3411, "step": 9092 }, { "epoch": 2.7600546365154046, "grad_norm": 0.7303305268287659, "learning_rate": 5.401943910094158e-05, "loss": 1.4287, "step": 9093 }, { "epoch": 2.760358172712096, "grad_norm": 0.87126225233078, "learning_rate": 5.401437683507138e-05, "loss": 1.4208, "step": 9094 }, { "epoch": 2.7606617089087875, "grad_norm": 0.7304555177688599, "learning_rate": 5.400931456920118e-05, "loss": 1.423, "step": 9095 }, { "epoch": 2.7609652451054787, "grad_norm": 0.9491742849349976, "learning_rate": 5.400425230333097e-05, "loss": 1.3035, "step": 9096 }, { "epoch": 2.7612687813021703, "grad_norm": 0.6813849210739136, "learning_rate": 5.3999190037460766e-05, "loss": 1.3806, "step": 9097 }, { "epoch": 2.761572317498862, "grad_norm": 0.8438676595687866, "learning_rate": 5.399412777159056e-05, "loss": 1.3342, "step": 9098 }, { "epoch": 2.761875853695553, "grad_norm": 1.1256211996078491, "learning_rate": 5.398906550572036e-05, "loss": 1.2017, "step": 9099 }, { "epoch": 2.7621793898922444, "grad_norm": 0.9026702642440796, "learning_rate": 5.398400323985016e-05, "loss": 1.218, "step": 9100 }, { "epoch": 2.762482926088936, "grad_norm": 0.7552817463874817, "learning_rate": 5.397894097397995e-05, "loss": 1.3191, "step": 9101 }, { "epoch": 2.7627864622856277, "grad_norm": 0.7226763367652893, "learning_rate": 5.397387870810975e-05, "loss": 1.1564, "step": 9102 }, { "epoch": 2.763089998482319, "grad_norm": 0.8583779335021973, "learning_rate": 5.396881644223954e-05, "loss": 1.2648, "step": 9103 }, { "epoch": 2.7633935346790106, "grad_norm": 0.7059534192085266, "learning_rate": 5.396375417636934e-05, "loss": 1.2827, "step": 9104 }, { "epoch": 2.763697070875702, "grad_norm": 0.7585148811340332, "learning_rate": 5.395869191049914e-05, "loss": 1.4881, "step": 9105 }, { "epoch": 2.7640006070723935, "grad_norm": 0.7276778817176819, "learning_rate": 5.395362964462893e-05, "loss": 1.0925, "step": 9106 }, { "epoch": 2.7643041432690847, "grad_norm": 0.835493803024292, "learning_rate": 5.394856737875873e-05, "loss": 1.3873, "step": 9107 }, { "epoch": 2.7646076794657763, "grad_norm": 0.8759173154830933, "learning_rate": 5.394350511288853e-05, "loss": 1.0156, "step": 9108 }, { "epoch": 2.764911215662468, "grad_norm": 0.8837414383888245, "learning_rate": 5.393844284701833e-05, "loss": 1.512, "step": 9109 }, { "epoch": 2.765214751859159, "grad_norm": 0.6770848035812378, "learning_rate": 5.393338058114813e-05, "loss": 1.489, "step": 9110 }, { "epoch": 2.7655182880558504, "grad_norm": 0.7312859892845154, "learning_rate": 5.392831831527793e-05, "loss": 0.9014, "step": 9111 }, { "epoch": 2.765821824252542, "grad_norm": 0.6844790577888489, "learning_rate": 5.392325604940772e-05, "loss": 1.3431, "step": 9112 }, { "epoch": 2.7661253604492337, "grad_norm": 0.7217748165130615, "learning_rate": 5.391819378353752e-05, "loss": 1.3692, "step": 9113 }, { "epoch": 2.766428896645925, "grad_norm": 0.9370319247245789, "learning_rate": 5.391313151766731e-05, "loss": 0.9882, "step": 9114 }, { "epoch": 2.7667324328426166, "grad_norm": 0.9519034028053284, "learning_rate": 5.390806925179711e-05, "loss": 1.3053, "step": 9115 }, { "epoch": 2.767035969039308, "grad_norm": 0.5022948384284973, "learning_rate": 5.390300698592691e-05, "loss": 1.9471, "step": 9116 }, { "epoch": 2.7673395052359995, "grad_norm": 0.7742490768432617, "learning_rate": 5.38979447200567e-05, "loss": 0.8012, "step": 9117 }, { "epoch": 2.7676430414326907, "grad_norm": 0.7089831233024597, "learning_rate": 5.38928824541865e-05, "loss": 1.3914, "step": 9118 }, { "epoch": 2.7679465776293823, "grad_norm": 0.7711650729179382, "learning_rate": 5.38878201883163e-05, "loss": 1.1509, "step": 9119 }, { "epoch": 2.768250113826074, "grad_norm": 0.7289400696754456, "learning_rate": 5.3882757922446094e-05, "loss": 1.184, "step": 9120 }, { "epoch": 2.768553650022765, "grad_norm": 0.7543368339538574, "learning_rate": 5.387769565657589e-05, "loss": 1.215, "step": 9121 }, { "epoch": 2.7688571862194564, "grad_norm": 0.8299732804298401, "learning_rate": 5.3872633390705684e-05, "loss": 1.5178, "step": 9122 }, { "epoch": 2.769160722416148, "grad_norm": 0.9031606912612915, "learning_rate": 5.386757112483548e-05, "loss": 1.237, "step": 9123 }, { "epoch": 2.7694642586128397, "grad_norm": 0.7585662007331848, "learning_rate": 5.386250885896528e-05, "loss": 1.1908, "step": 9124 }, { "epoch": 2.769767794809531, "grad_norm": 0.8985230326652527, "learning_rate": 5.3857446593095075e-05, "loss": 1.495, "step": 9125 }, { "epoch": 2.7700713310062226, "grad_norm": 0.8069576621055603, "learning_rate": 5.385238432722487e-05, "loss": 1.5, "step": 9126 }, { "epoch": 2.770374867202914, "grad_norm": 0.9044194221496582, "learning_rate": 5.3847322061354664e-05, "loss": 1.0361, "step": 9127 }, { "epoch": 2.7706784033996055, "grad_norm": 0.749674379825592, "learning_rate": 5.384225979548446e-05, "loss": 1.4786, "step": 9128 }, { "epoch": 2.7709819395962967, "grad_norm": 1.505496621131897, "learning_rate": 5.383719752961426e-05, "loss": 0.8318, "step": 9129 }, { "epoch": 2.7712854757929883, "grad_norm": 0.8201859593391418, "learning_rate": 5.3832135263744055e-05, "loss": 1.1839, "step": 9130 }, { "epoch": 2.77158901198968, "grad_norm": 0.7365986704826355, "learning_rate": 5.382707299787385e-05, "loss": 1.4694, "step": 9131 }, { "epoch": 2.771892548186371, "grad_norm": 0.8138245344161987, "learning_rate": 5.3822010732003645e-05, "loss": 1.5147, "step": 9132 }, { "epoch": 2.7721960843830624, "grad_norm": 0.7796532511711121, "learning_rate": 5.3816948466133447e-05, "loss": 0.7226, "step": 9133 }, { "epoch": 2.772499620579754, "grad_norm": 0.988336443901062, "learning_rate": 5.381188620026324e-05, "loss": 1.3447, "step": 9134 }, { "epoch": 2.7728031567764457, "grad_norm": 0.5842739343643188, "learning_rate": 5.3806823934393036e-05, "loss": 1.0944, "step": 9135 }, { "epoch": 2.773106692973137, "grad_norm": 0.648903489112854, "learning_rate": 5.380176166852283e-05, "loss": 1.281, "step": 9136 }, { "epoch": 2.7734102291698286, "grad_norm": 0.6385083794593811, "learning_rate": 5.3796699402652626e-05, "loss": 1.7031, "step": 9137 }, { "epoch": 2.77371376536652, "grad_norm": 0.8002315759658813, "learning_rate": 5.379163713678243e-05, "loss": 1.2138, "step": 9138 }, { "epoch": 2.7740173015632115, "grad_norm": 0.8462424278259277, "learning_rate": 5.378657487091222e-05, "loss": 1.4935, "step": 9139 }, { "epoch": 2.7743208377599027, "grad_norm": 0.8711588382720947, "learning_rate": 5.378151260504202e-05, "loss": 1.3314, "step": 9140 }, { "epoch": 2.7746243739565943, "grad_norm": 1.0606752634048462, "learning_rate": 5.377645033917181e-05, "loss": 1.1583, "step": 9141 }, { "epoch": 2.774927910153286, "grad_norm": 0.8558630347251892, "learning_rate": 5.3771388073301607e-05, "loss": 1.2429, "step": 9142 }, { "epoch": 2.775231446349977, "grad_norm": 0.7171871662139893, "learning_rate": 5.376632580743141e-05, "loss": 1.595, "step": 9143 }, { "epoch": 2.7755349825466684, "grad_norm": 0.7023593187332153, "learning_rate": 5.37612635415612e-05, "loss": 1.0014, "step": 9144 }, { "epoch": 2.77583851874336, "grad_norm": 0.8242972493171692, "learning_rate": 5.3756201275691e-05, "loss": 1.496, "step": 9145 }, { "epoch": 2.7761420549400517, "grad_norm": 0.9640794396400452, "learning_rate": 5.375113900982079e-05, "loss": 1.5646, "step": 9146 }, { "epoch": 2.776445591136743, "grad_norm": 0.6254851222038269, "learning_rate": 5.3746076743950594e-05, "loss": 1.2525, "step": 9147 }, { "epoch": 2.7767491273334346, "grad_norm": 0.7365807294845581, "learning_rate": 5.374101447808039e-05, "loss": 1.3743, "step": 9148 }, { "epoch": 2.777052663530126, "grad_norm": 0.8320026397705078, "learning_rate": 5.3735952212210184e-05, "loss": 1.5029, "step": 9149 }, { "epoch": 2.7773561997268175, "grad_norm": 0.8140934109687805, "learning_rate": 5.373088994633998e-05, "loss": 0.7684, "step": 9150 }, { "epoch": 2.7776597359235087, "grad_norm": 0.6923708319664001, "learning_rate": 5.372582768046977e-05, "loss": 1.2284, "step": 9151 }, { "epoch": 2.7779632721202003, "grad_norm": 0.838914692401886, "learning_rate": 5.3720765414599575e-05, "loss": 1.2168, "step": 9152 }, { "epoch": 2.778266808316892, "grad_norm": 0.8263568878173828, "learning_rate": 5.3715703148729376e-05, "loss": 1.4743, "step": 9153 }, { "epoch": 2.778570344513583, "grad_norm": 0.7731677889823914, "learning_rate": 5.371064088285918e-05, "loss": 1.4636, "step": 9154 }, { "epoch": 2.778873880710275, "grad_norm": 0.8379889130592346, "learning_rate": 5.370557861698897e-05, "loss": 1.5557, "step": 9155 }, { "epoch": 2.779177416906966, "grad_norm": 0.8932848572731018, "learning_rate": 5.370051635111877e-05, "loss": 1.1184, "step": 9156 }, { "epoch": 2.7794809531036577, "grad_norm": 0.6494554877281189, "learning_rate": 5.369545408524856e-05, "loss": 1.4887, "step": 9157 }, { "epoch": 2.779784489300349, "grad_norm": 0.7722667455673218, "learning_rate": 5.3690391819378364e-05, "loss": 1.0282, "step": 9158 }, { "epoch": 2.7800880254970406, "grad_norm": 0.7183326482772827, "learning_rate": 5.368532955350816e-05, "loss": 1.3558, "step": 9159 }, { "epoch": 2.7803915616937322, "grad_norm": 0.7643415927886963, "learning_rate": 5.3680267287637953e-05, "loss": 0.8731, "step": 9160 }, { "epoch": 2.7806950978904235, "grad_norm": 1.0144984722137451, "learning_rate": 5.367520502176775e-05, "loss": 0.7752, "step": 9161 }, { "epoch": 2.7809986340871147, "grad_norm": 0.7115720510482788, "learning_rate": 5.367014275589754e-05, "loss": 1.4928, "step": 9162 }, { "epoch": 2.7813021702838063, "grad_norm": 0.9345402717590332, "learning_rate": 5.3665080490027345e-05, "loss": 1.2511, "step": 9163 }, { "epoch": 2.781605706480498, "grad_norm": 0.8848560452461243, "learning_rate": 5.366001822415714e-05, "loss": 1.4007, "step": 9164 }, { "epoch": 2.781909242677189, "grad_norm": 0.7608636021614075, "learning_rate": 5.3654955958286934e-05, "loss": 0.6799, "step": 9165 }, { "epoch": 2.782212778873881, "grad_norm": 0.9123556613922119, "learning_rate": 5.364989369241673e-05, "loss": 0.9096, "step": 9166 }, { "epoch": 2.782516315070572, "grad_norm": 0.9755141139030457, "learning_rate": 5.3644831426546524e-05, "loss": 1.336, "step": 9167 }, { "epoch": 2.7828198512672637, "grad_norm": 0.8301700949668884, "learning_rate": 5.3639769160676325e-05, "loss": 1.6611, "step": 9168 }, { "epoch": 2.783123387463955, "grad_norm": 0.6016411185264587, "learning_rate": 5.363470689480612e-05, "loss": 1.6561, "step": 9169 }, { "epoch": 2.7834269236606466, "grad_norm": 0.8668585419654846, "learning_rate": 5.3629644628935915e-05, "loss": 1.1069, "step": 9170 }, { "epoch": 2.7837304598573382, "grad_norm": 0.855521023273468, "learning_rate": 5.362458236306571e-05, "loss": 1.4181, "step": 9171 }, { "epoch": 2.7840339960540295, "grad_norm": 0.7183941602706909, "learning_rate": 5.361952009719551e-05, "loss": 1.4732, "step": 9172 }, { "epoch": 2.7843375322507207, "grad_norm": 0.6905990839004517, "learning_rate": 5.3614457831325306e-05, "loss": 1.6827, "step": 9173 }, { "epoch": 2.7846410684474123, "grad_norm": 0.5364888906478882, "learning_rate": 5.36093955654551e-05, "loss": 0.8329, "step": 9174 }, { "epoch": 2.784944604644104, "grad_norm": 0.7076054215431213, "learning_rate": 5.3604333299584896e-05, "loss": 0.5921, "step": 9175 }, { "epoch": 2.785248140840795, "grad_norm": 0.8610938191413879, "learning_rate": 5.359927103371469e-05, "loss": 1.206, "step": 9176 }, { "epoch": 2.785551677037487, "grad_norm": 0.8130373954772949, "learning_rate": 5.359420876784449e-05, "loss": 1.4456, "step": 9177 }, { "epoch": 2.785855213234178, "grad_norm": 0.6824911236763, "learning_rate": 5.358914650197429e-05, "loss": 1.5016, "step": 9178 }, { "epoch": 2.7861587494308697, "grad_norm": 0.9456865191459656, "learning_rate": 5.358408423610408e-05, "loss": 1.3263, "step": 9179 }, { "epoch": 2.786462285627561, "grad_norm": 0.8320668339729309, "learning_rate": 5.3579021970233876e-05, "loss": 0.8731, "step": 9180 }, { "epoch": 2.7867658218242526, "grad_norm": 1.1539381742477417, "learning_rate": 5.357395970436367e-05, "loss": 1.0368, "step": 9181 }, { "epoch": 2.7870693580209442, "grad_norm": 0.869157075881958, "learning_rate": 5.356889743849347e-05, "loss": 1.3705, "step": 9182 }, { "epoch": 2.7873728942176355, "grad_norm": 0.7219955325126648, "learning_rate": 5.356383517262327e-05, "loss": 1.7219, "step": 9183 }, { "epoch": 2.7876764304143267, "grad_norm": 1.016178011894226, "learning_rate": 5.355877290675306e-05, "loss": 1.5799, "step": 9184 }, { "epoch": 2.7879799666110183, "grad_norm": 0.8265878558158875, "learning_rate": 5.355371064088286e-05, "loss": 1.2097, "step": 9185 }, { "epoch": 2.78828350280771, "grad_norm": 0.7330758571624756, "learning_rate": 5.354864837501266e-05, "loss": 1.0995, "step": 9186 }, { "epoch": 2.788587039004401, "grad_norm": 0.733619213104248, "learning_rate": 5.3543586109142454e-05, "loss": 1.6089, "step": 9187 }, { "epoch": 2.788890575201093, "grad_norm": 0.6172709465026855, "learning_rate": 5.353852384327225e-05, "loss": 1.6118, "step": 9188 }, { "epoch": 2.789194111397784, "grad_norm": 0.7603530883789062, "learning_rate": 5.353346157740204e-05, "loss": 1.6177, "step": 9189 }, { "epoch": 2.7894976475944757, "grad_norm": 0.6322783827781677, "learning_rate": 5.352839931153184e-05, "loss": 1.4191, "step": 9190 }, { "epoch": 2.789801183791167, "grad_norm": 0.8380208015441895, "learning_rate": 5.352333704566164e-05, "loss": 0.8711, "step": 9191 }, { "epoch": 2.7901047199878586, "grad_norm": 0.866492509841919, "learning_rate": 5.3518274779791434e-05, "loss": 1.7399, "step": 9192 }, { "epoch": 2.7904082561845502, "grad_norm": 0.8822951316833496, "learning_rate": 5.351321251392123e-05, "loss": 1.0067, "step": 9193 }, { "epoch": 2.7907117923812415, "grad_norm": 0.7827437520027161, "learning_rate": 5.3508150248051024e-05, "loss": 1.5419, "step": 9194 }, { "epoch": 2.7910153285779327, "grad_norm": 0.6779170632362366, "learning_rate": 5.350308798218082e-05, "loss": 1.5052, "step": 9195 }, { "epoch": 2.7913188647746243, "grad_norm": 0.8421553373336792, "learning_rate": 5.349802571631062e-05, "loss": 1.3913, "step": 9196 }, { "epoch": 2.791622400971316, "grad_norm": 0.8894978761672974, "learning_rate": 5.3492963450440415e-05, "loss": 1.2967, "step": 9197 }, { "epoch": 2.791925937168007, "grad_norm": 0.8642515540122986, "learning_rate": 5.3487901184570223e-05, "loss": 1.5373, "step": 9198 }, { "epoch": 2.792229473364699, "grad_norm": 0.8763284683227539, "learning_rate": 5.348283891870002e-05, "loss": 1.5245, "step": 9199 }, { "epoch": 2.79253300956139, "grad_norm": 0.8263570070266724, "learning_rate": 5.347777665282981e-05, "loss": 1.5697, "step": 9200 }, { "epoch": 2.7928365457580817, "grad_norm": 0.8536450266838074, "learning_rate": 5.347271438695961e-05, "loss": 1.5989, "step": 9201 }, { "epoch": 2.793140081954773, "grad_norm": 0.8243657350540161, "learning_rate": 5.346765212108941e-05, "loss": 1.4002, "step": 9202 }, { "epoch": 2.7934436181514646, "grad_norm": 0.7956419587135315, "learning_rate": 5.3462589855219204e-05, "loss": 1.5728, "step": 9203 }, { "epoch": 2.7937471543481562, "grad_norm": 0.7345757484436035, "learning_rate": 5.3457527589349e-05, "loss": 1.6434, "step": 9204 }, { "epoch": 2.7940506905448474, "grad_norm": 0.7123274803161621, "learning_rate": 5.3452465323478794e-05, "loss": 1.248, "step": 9205 }, { "epoch": 2.7943542267415387, "grad_norm": 0.8604891300201416, "learning_rate": 5.344740305760859e-05, "loss": 1.3854, "step": 9206 }, { "epoch": 2.7946577629382303, "grad_norm": 0.7543156743049622, "learning_rate": 5.344234079173839e-05, "loss": 1.1674, "step": 9207 }, { "epoch": 2.794961299134922, "grad_norm": 0.6968750357627869, "learning_rate": 5.3437278525868185e-05, "loss": 1.6849, "step": 9208 }, { "epoch": 2.795264835331613, "grad_norm": 0.6939352750778198, "learning_rate": 5.343221625999798e-05, "loss": 1.4659, "step": 9209 }, { "epoch": 2.795568371528305, "grad_norm": 0.8209272027015686, "learning_rate": 5.3427153994127775e-05, "loss": 1.0141, "step": 9210 }, { "epoch": 2.795871907724996, "grad_norm": 0.7585069537162781, "learning_rate": 5.3422091728257576e-05, "loss": 1.4965, "step": 9211 }, { "epoch": 2.7961754439216877, "grad_norm": 0.7784894704818726, "learning_rate": 5.341702946238737e-05, "loss": 1.7735, "step": 9212 }, { "epoch": 2.796478980118379, "grad_norm": 0.6751682758331299, "learning_rate": 5.3411967196517166e-05, "loss": 1.753, "step": 9213 }, { "epoch": 2.7967825163150706, "grad_norm": 0.8344226479530334, "learning_rate": 5.340690493064696e-05, "loss": 1.3489, "step": 9214 }, { "epoch": 2.7970860525117622, "grad_norm": 0.671781063079834, "learning_rate": 5.3401842664776755e-05, "loss": 1.5133, "step": 9215 }, { "epoch": 2.7973895887084534, "grad_norm": 0.5930084586143494, "learning_rate": 5.339678039890656e-05, "loss": 0.9884, "step": 9216 }, { "epoch": 2.797693124905145, "grad_norm": 0.8687995076179504, "learning_rate": 5.339171813303635e-05, "loss": 1.4782, "step": 9217 }, { "epoch": 2.7979966611018363, "grad_norm": 0.9404574036598206, "learning_rate": 5.3386655867166146e-05, "loss": 1.373, "step": 9218 }, { "epoch": 2.798300197298528, "grad_norm": 0.4988918900489807, "learning_rate": 5.338159360129594e-05, "loss": 1.2758, "step": 9219 }, { "epoch": 2.798603733495219, "grad_norm": 0.7535762786865234, "learning_rate": 5.3376531335425736e-05, "loss": 0.9864, "step": 9220 }, { "epoch": 2.798907269691911, "grad_norm": 0.8633400797843933, "learning_rate": 5.337146906955554e-05, "loss": 1.5846, "step": 9221 }, { "epoch": 2.799210805888602, "grad_norm": 1.015762209892273, "learning_rate": 5.336640680368533e-05, "loss": 1.4976, "step": 9222 }, { "epoch": 2.7995143420852937, "grad_norm": 0.7161250114440918, "learning_rate": 5.336134453781513e-05, "loss": 1.5172, "step": 9223 }, { "epoch": 2.799817878281985, "grad_norm": 0.7022544741630554, "learning_rate": 5.335628227194492e-05, "loss": 1.1552, "step": 9224 }, { "epoch": 2.8001214144786766, "grad_norm": 0.6041155457496643, "learning_rate": 5.3351220006074724e-05, "loss": 1.0041, "step": 9225 }, { "epoch": 2.8004249506753682, "grad_norm": 0.7977640628814697, "learning_rate": 5.334615774020452e-05, "loss": 1.6374, "step": 9226 }, { "epoch": 2.8007284868720594, "grad_norm": 0.7905417680740356, "learning_rate": 5.334109547433431e-05, "loss": 1.4101, "step": 9227 }, { "epoch": 2.801032023068751, "grad_norm": 0.6630216836929321, "learning_rate": 5.333603320846411e-05, "loss": 1.0058, "step": 9228 }, { "epoch": 2.8013355592654423, "grad_norm": 0.8783179521560669, "learning_rate": 5.33309709425939e-05, "loss": 1.5429, "step": 9229 }, { "epoch": 2.801639095462134, "grad_norm": 0.9275266528129578, "learning_rate": 5.3325908676723704e-05, "loss": 1.2968, "step": 9230 }, { "epoch": 2.801942631658825, "grad_norm": 0.8065756559371948, "learning_rate": 5.33208464108535e-05, "loss": 1.1305, "step": 9231 }, { "epoch": 2.802246167855517, "grad_norm": 0.7965754270553589, "learning_rate": 5.3315784144983294e-05, "loss": 1.4949, "step": 9232 }, { "epoch": 2.8025497040522085, "grad_norm": 0.7255224585533142, "learning_rate": 5.331072187911309e-05, "loss": 1.0829, "step": 9233 }, { "epoch": 2.8028532402488997, "grad_norm": 0.855423629283905, "learning_rate": 5.3305659613242883e-05, "loss": 1.379, "step": 9234 }, { "epoch": 2.803156776445591, "grad_norm": 0.8398419618606567, "learning_rate": 5.3300597347372685e-05, "loss": 1.4714, "step": 9235 }, { "epoch": 2.8034603126422826, "grad_norm": 0.7005195021629333, "learning_rate": 5.329553508150248e-05, "loss": 0.9738, "step": 9236 }, { "epoch": 2.8037638488389742, "grad_norm": 0.8335183262825012, "learning_rate": 5.3290472815632275e-05, "loss": 0.8508, "step": 9237 }, { "epoch": 2.8040673850356654, "grad_norm": 0.7825225591659546, "learning_rate": 5.328541054976207e-05, "loss": 1.3419, "step": 9238 }, { "epoch": 2.804370921232357, "grad_norm": 0.4875824749469757, "learning_rate": 5.328034828389187e-05, "loss": 1.7436, "step": 9239 }, { "epoch": 2.8046744574290483, "grad_norm": 0.7876448631286621, "learning_rate": 5.3275286018021666e-05, "loss": 1.1507, "step": 9240 }, { "epoch": 2.80497799362574, "grad_norm": 0.7508234977722168, "learning_rate": 5.327022375215146e-05, "loss": 0.9244, "step": 9241 }, { "epoch": 2.805281529822431, "grad_norm": 0.6753659844398499, "learning_rate": 5.326516148628127e-05, "loss": 1.5898, "step": 9242 }, { "epoch": 2.805585066019123, "grad_norm": 0.5607844591140747, "learning_rate": 5.3260099220411064e-05, "loss": 1.2225, "step": 9243 }, { "epoch": 2.8058886022158145, "grad_norm": 0.6955126523971558, "learning_rate": 5.325503695454086e-05, "loss": 1.4762, "step": 9244 }, { "epoch": 2.8061921384125057, "grad_norm": 0.8268750905990601, "learning_rate": 5.324997468867065e-05, "loss": 1.2952, "step": 9245 }, { "epoch": 2.806495674609197, "grad_norm": 0.9716630578041077, "learning_rate": 5.3244912422800455e-05, "loss": 1.4365, "step": 9246 }, { "epoch": 2.8067992108058886, "grad_norm": 0.819372832775116, "learning_rate": 5.323985015693025e-05, "loss": 1.2058, "step": 9247 }, { "epoch": 2.8071027470025802, "grad_norm": 0.7395895719528198, "learning_rate": 5.3234787891060044e-05, "loss": 1.7736, "step": 9248 }, { "epoch": 2.8074062831992714, "grad_norm": 0.6357772350311279, "learning_rate": 5.322972562518984e-05, "loss": 1.381, "step": 9249 }, { "epoch": 2.807709819395963, "grad_norm": 0.7322058081626892, "learning_rate": 5.322466335931964e-05, "loss": 1.192, "step": 9250 }, { "epoch": 2.8080133555926543, "grad_norm": 0.8182588815689087, "learning_rate": 5.3219601093449436e-05, "loss": 1.0976, "step": 9251 }, { "epoch": 2.808316891789346, "grad_norm": 0.6597992181777954, "learning_rate": 5.321453882757923e-05, "loss": 0.9726, "step": 9252 }, { "epoch": 2.808620427986037, "grad_norm": 0.827843189239502, "learning_rate": 5.3209476561709025e-05, "loss": 1.2613, "step": 9253 }, { "epoch": 2.808923964182729, "grad_norm": 0.7336700558662415, "learning_rate": 5.320441429583882e-05, "loss": 1.4526, "step": 9254 }, { "epoch": 2.8092275003794205, "grad_norm": 0.9792813658714294, "learning_rate": 5.319935202996862e-05, "loss": 0.973, "step": 9255 }, { "epoch": 2.8095310365761117, "grad_norm": 0.8120543956756592, "learning_rate": 5.3194289764098416e-05, "loss": 1.2121, "step": 9256 }, { "epoch": 2.809834572772803, "grad_norm": 0.8667252063751221, "learning_rate": 5.318922749822821e-05, "loss": 0.975, "step": 9257 }, { "epoch": 2.8101381089694946, "grad_norm": 0.6807832717895508, "learning_rate": 5.3184165232358006e-05, "loss": 1.6509, "step": 9258 }, { "epoch": 2.810441645166186, "grad_norm": 0.8428423404693604, "learning_rate": 5.31791029664878e-05, "loss": 1.1053, "step": 9259 }, { "epoch": 2.8107451813628774, "grad_norm": 0.6943228840827942, "learning_rate": 5.31740407006176e-05, "loss": 0.7154, "step": 9260 }, { "epoch": 2.811048717559569, "grad_norm": 0.6292949318885803, "learning_rate": 5.31689784347474e-05, "loss": 1.6805, "step": 9261 }, { "epoch": 2.8113522537562603, "grad_norm": 0.8613457083702087, "learning_rate": 5.316391616887719e-05, "loss": 1.2253, "step": 9262 }, { "epoch": 2.811655789952952, "grad_norm": 0.8359972238540649, "learning_rate": 5.315885390300699e-05, "loss": 1.5825, "step": 9263 }, { "epoch": 2.811959326149643, "grad_norm": 0.7626785635948181, "learning_rate": 5.315379163713679e-05, "loss": 1.5913, "step": 9264 }, { "epoch": 2.812262862346335, "grad_norm": 0.8913024067878723, "learning_rate": 5.314872937126658e-05, "loss": 1.0195, "step": 9265 }, { "epoch": 2.8125663985430265, "grad_norm": 0.7775774002075195, "learning_rate": 5.314366710539638e-05, "loss": 1.4487, "step": 9266 }, { "epoch": 2.8128699347397177, "grad_norm": 0.8799665570259094, "learning_rate": 5.313860483952617e-05, "loss": 1.3918, "step": 9267 }, { "epoch": 2.813173470936409, "grad_norm": 0.7530032396316528, "learning_rate": 5.313354257365597e-05, "loss": 1.4407, "step": 9268 }, { "epoch": 2.8134770071331006, "grad_norm": 0.8552557826042175, "learning_rate": 5.312848030778577e-05, "loss": 1.1304, "step": 9269 }, { "epoch": 2.813780543329792, "grad_norm": 0.8629938364028931, "learning_rate": 5.3123418041915564e-05, "loss": 1.0011, "step": 9270 }, { "epoch": 2.8140840795264834, "grad_norm": 0.9533644914627075, "learning_rate": 5.311835577604536e-05, "loss": 1.3258, "step": 9271 }, { "epoch": 2.814387615723175, "grad_norm": 0.6983219385147095, "learning_rate": 5.3113293510175153e-05, "loss": 0.9842, "step": 9272 }, { "epoch": 2.8146911519198663, "grad_norm": 0.6916873455047607, "learning_rate": 5.310823124430495e-05, "loss": 1.4564, "step": 9273 }, { "epoch": 2.814994688116558, "grad_norm": 0.5489389896392822, "learning_rate": 5.310316897843475e-05, "loss": 0.6755, "step": 9274 }, { "epoch": 2.815298224313249, "grad_norm": 1.0498082637786865, "learning_rate": 5.3098106712564545e-05, "loss": 1.3059, "step": 9275 }, { "epoch": 2.815601760509941, "grad_norm": 1.0044301748275757, "learning_rate": 5.309304444669434e-05, "loss": 1.3754, "step": 9276 }, { "epoch": 2.8159052967066325, "grad_norm": 0.8365218639373779, "learning_rate": 5.3087982180824134e-05, "loss": 1.1857, "step": 9277 }, { "epoch": 2.8162088329033237, "grad_norm": 0.8548251390457153, "learning_rate": 5.3082919914953936e-05, "loss": 1.1869, "step": 9278 }, { "epoch": 2.816512369100015, "grad_norm": 0.9293504357337952, "learning_rate": 5.307785764908373e-05, "loss": 1.1865, "step": 9279 }, { "epoch": 2.8168159052967066, "grad_norm": 0.7599900960922241, "learning_rate": 5.3072795383213525e-05, "loss": 1.4314, "step": 9280 }, { "epoch": 2.817119441493398, "grad_norm": 0.706532895565033, "learning_rate": 5.306773311734332e-05, "loss": 1.4149, "step": 9281 }, { "epoch": 2.8174229776900894, "grad_norm": 0.8958925604820251, "learning_rate": 5.3062670851473115e-05, "loss": 1.2663, "step": 9282 }, { "epoch": 2.817726513886781, "grad_norm": 0.7330815196037292, "learning_rate": 5.3057608585602917e-05, "loss": 1.4551, "step": 9283 }, { "epoch": 2.8180300500834723, "grad_norm": 0.6814063191413879, "learning_rate": 5.305254631973271e-05, "loss": 1.4194, "step": 9284 }, { "epoch": 2.818333586280164, "grad_norm": 0.9556673765182495, "learning_rate": 5.3047484053862506e-05, "loss": 1.2468, "step": 9285 }, { "epoch": 2.818637122476855, "grad_norm": 1.0090731382369995, "learning_rate": 5.30424217879923e-05, "loss": 1.3038, "step": 9286 }, { "epoch": 2.818940658673547, "grad_norm": 0.7862967252731323, "learning_rate": 5.303735952212211e-05, "loss": 1.5091, "step": 9287 }, { "epoch": 2.8192441948702385, "grad_norm": 0.9822673201560974, "learning_rate": 5.3032297256251904e-05, "loss": 1.1643, "step": 9288 }, { "epoch": 2.8195477310669297, "grad_norm": 0.6395041942596436, "learning_rate": 5.3027234990381706e-05, "loss": 1.2098, "step": 9289 }, { "epoch": 2.8198512672636213, "grad_norm": 0.846056342124939, "learning_rate": 5.30221727245115e-05, "loss": 1.0674, "step": 9290 }, { "epoch": 2.8201548034603126, "grad_norm": 0.7783642411231995, "learning_rate": 5.3017110458641295e-05, "loss": 1.552, "step": 9291 }, { "epoch": 2.820458339657004, "grad_norm": 0.7343548536300659, "learning_rate": 5.301204819277109e-05, "loss": 1.2193, "step": 9292 }, { "epoch": 2.8207618758536954, "grad_norm": 0.8262844085693359, "learning_rate": 5.3006985926900885e-05, "loss": 1.2968, "step": 9293 }, { "epoch": 2.821065412050387, "grad_norm": 0.9042322635650635, "learning_rate": 5.3001923661030686e-05, "loss": 1.1364, "step": 9294 }, { "epoch": 2.8213689482470787, "grad_norm": 0.6058719158172607, "learning_rate": 5.299686139516048e-05, "loss": 0.4124, "step": 9295 }, { "epoch": 2.82167248444377, "grad_norm": 0.8176096677780151, "learning_rate": 5.2991799129290276e-05, "loss": 1.142, "step": 9296 }, { "epoch": 2.821976020640461, "grad_norm": 1.1081751585006714, "learning_rate": 5.298673686342007e-05, "loss": 1.0948, "step": 9297 }, { "epoch": 2.822279556837153, "grad_norm": 0.6902355551719666, "learning_rate": 5.2981674597549866e-05, "loss": 1.6632, "step": 9298 }, { "epoch": 2.8225830930338445, "grad_norm": 0.7224792242050171, "learning_rate": 5.297661233167967e-05, "loss": 1.0262, "step": 9299 }, { "epoch": 2.8228866292305357, "grad_norm": 0.6862173676490784, "learning_rate": 5.297155006580946e-05, "loss": 0.8252, "step": 9300 }, { "epoch": 2.8231901654272273, "grad_norm": 0.9318156242370605, "learning_rate": 5.296648779993926e-05, "loss": 1.1843, "step": 9301 }, { "epoch": 2.8234937016239186, "grad_norm": 0.7357593178749084, "learning_rate": 5.296142553406905e-05, "loss": 1.3198, "step": 9302 }, { "epoch": 2.82379723782061, "grad_norm": 0.9271796941757202, "learning_rate": 5.295636326819885e-05, "loss": 1.2251, "step": 9303 }, { "epoch": 2.8241007740173014, "grad_norm": 0.6771527528762817, "learning_rate": 5.295130100232865e-05, "loss": 1.0557, "step": 9304 }, { "epoch": 2.824404310213993, "grad_norm": 0.6915475726127625, "learning_rate": 5.294623873645844e-05, "loss": 1.5614, "step": 9305 }, { "epoch": 2.8247078464106847, "grad_norm": 0.8248263001441956, "learning_rate": 5.294117647058824e-05, "loss": 1.8742, "step": 9306 }, { "epoch": 2.825011382607376, "grad_norm": 0.7455428838729858, "learning_rate": 5.293611420471803e-05, "loss": 0.9254, "step": 9307 }, { "epoch": 2.825314918804067, "grad_norm": 0.783444881439209, "learning_rate": 5.2931051938847834e-05, "loss": 1.0196, "step": 9308 }, { "epoch": 2.825618455000759, "grad_norm": 0.6387648582458496, "learning_rate": 5.292598967297763e-05, "loss": 1.3538, "step": 9309 }, { "epoch": 2.8259219911974505, "grad_norm": 0.7413560152053833, "learning_rate": 5.292092740710742e-05, "loss": 1.384, "step": 9310 }, { "epoch": 2.8262255273941417, "grad_norm": 0.7992648482322693, "learning_rate": 5.291586514123722e-05, "loss": 1.6309, "step": 9311 }, { "epoch": 2.8265290635908333, "grad_norm": 0.6895065307617188, "learning_rate": 5.291080287536701e-05, "loss": 1.3435, "step": 9312 }, { "epoch": 2.8268325997875245, "grad_norm": 0.7920868396759033, "learning_rate": 5.2905740609496815e-05, "loss": 1.6338, "step": 9313 }, { "epoch": 2.827136135984216, "grad_norm": 0.6923069953918457, "learning_rate": 5.290067834362661e-05, "loss": 1.5719, "step": 9314 }, { "epoch": 2.8274396721809074, "grad_norm": 0.708824872970581, "learning_rate": 5.2895616077756404e-05, "loss": 1.4207, "step": 9315 }, { "epoch": 2.827743208377599, "grad_norm": 0.6963658928871155, "learning_rate": 5.28905538118862e-05, "loss": 1.5641, "step": 9316 }, { "epoch": 2.8280467445742907, "grad_norm": 0.7942947149276733, "learning_rate": 5.2885491546016e-05, "loss": 1.0428, "step": 9317 }, { "epoch": 2.828350280770982, "grad_norm": 0.6340756416320801, "learning_rate": 5.2880429280145795e-05, "loss": 1.6218, "step": 9318 }, { "epoch": 2.828653816967673, "grad_norm": 0.9371388554573059, "learning_rate": 5.287536701427559e-05, "loss": 1.3161, "step": 9319 }, { "epoch": 2.828957353164365, "grad_norm": 0.912032425403595, "learning_rate": 5.2870304748405385e-05, "loss": 1.6321, "step": 9320 }, { "epoch": 2.8292608893610565, "grad_norm": 0.6722336411476135, "learning_rate": 5.286524248253518e-05, "loss": 1.71, "step": 9321 }, { "epoch": 2.8295644255577477, "grad_norm": 0.7177336812019348, "learning_rate": 5.286018021666498e-05, "loss": 1.2445, "step": 9322 }, { "epoch": 2.8298679617544393, "grad_norm": 0.7678077220916748, "learning_rate": 5.2855117950794776e-05, "loss": 1.413, "step": 9323 }, { "epoch": 2.8301714979511305, "grad_norm": 0.7351728677749634, "learning_rate": 5.285005568492457e-05, "loss": 1.2427, "step": 9324 }, { "epoch": 2.830475034147822, "grad_norm": 0.7685090899467468, "learning_rate": 5.2844993419054366e-05, "loss": 1.4481, "step": 9325 }, { "epoch": 2.8307785703445134, "grad_norm": 0.6873645186424255, "learning_rate": 5.283993115318416e-05, "loss": 1.485, "step": 9326 }, { "epoch": 2.831082106541205, "grad_norm": 0.8315947651863098, "learning_rate": 5.283486888731396e-05, "loss": 1.1884, "step": 9327 }, { "epoch": 2.8313856427378967, "grad_norm": 0.7474330067634583, "learning_rate": 5.282980662144376e-05, "loss": 1.5404, "step": 9328 }, { "epoch": 2.831689178934588, "grad_norm": 0.7282052040100098, "learning_rate": 5.282474435557355e-05, "loss": 1.4322, "step": 9329 }, { "epoch": 2.831992715131279, "grad_norm": 0.6186273694038391, "learning_rate": 5.2819682089703346e-05, "loss": 1.7328, "step": 9330 }, { "epoch": 2.832296251327971, "grad_norm": 0.6695376634597778, "learning_rate": 5.2814619823833155e-05, "loss": 1.4427, "step": 9331 }, { "epoch": 2.8325997875246625, "grad_norm": 0.7590188980102539, "learning_rate": 5.280955755796295e-05, "loss": 1.2487, "step": 9332 }, { "epoch": 2.8329033237213537, "grad_norm": 0.7978704571723938, "learning_rate": 5.280449529209275e-05, "loss": 1.5149, "step": 9333 }, { "epoch": 2.8332068599180453, "grad_norm": 0.7168574333190918, "learning_rate": 5.2799433026222546e-05, "loss": 1.7076, "step": 9334 }, { "epoch": 2.8335103961147365, "grad_norm": 1.048042893409729, "learning_rate": 5.279437076035234e-05, "loss": 0.8254, "step": 9335 }, { "epoch": 2.833813932311428, "grad_norm": 0.7505400776863098, "learning_rate": 5.2789308494482135e-05, "loss": 0.9113, "step": 9336 }, { "epoch": 2.8341174685081194, "grad_norm": 0.7891983985900879, "learning_rate": 5.278424622861193e-05, "loss": 1.6025, "step": 9337 }, { "epoch": 2.834421004704811, "grad_norm": 0.8563476204872131, "learning_rate": 5.277918396274173e-05, "loss": 1.3948, "step": 9338 }, { "epoch": 2.8347245409015027, "grad_norm": 0.7944004535675049, "learning_rate": 5.277412169687153e-05, "loss": 1.6755, "step": 9339 }, { "epoch": 2.835028077098194, "grad_norm": 0.8097591996192932, "learning_rate": 5.276905943100132e-05, "loss": 1.6187, "step": 9340 }, { "epoch": 2.835331613294885, "grad_norm": 0.6555790305137634, "learning_rate": 5.2763997165131116e-05, "loss": 1.4747, "step": 9341 }, { "epoch": 2.835635149491577, "grad_norm": 0.7448464632034302, "learning_rate": 5.275893489926092e-05, "loss": 1.4133, "step": 9342 }, { "epoch": 2.8359386856882685, "grad_norm": 0.7309173941612244, "learning_rate": 5.275387263339071e-05, "loss": 1.4461, "step": 9343 }, { "epoch": 2.8362422218849597, "grad_norm": 0.7526821494102478, "learning_rate": 5.274881036752051e-05, "loss": 1.5288, "step": 9344 }, { "epoch": 2.8365457580816513, "grad_norm": 0.8191508650779724, "learning_rate": 5.27437481016503e-05, "loss": 1.4208, "step": 9345 }, { "epoch": 2.8368492942783425, "grad_norm": 0.7021205425262451, "learning_rate": 5.27386858357801e-05, "loss": 1.02, "step": 9346 }, { "epoch": 2.837152830475034, "grad_norm": 0.9767215847969055, "learning_rate": 5.27336235699099e-05, "loss": 1.3927, "step": 9347 }, { "epoch": 2.8374563666717254, "grad_norm": 0.796252965927124, "learning_rate": 5.272856130403969e-05, "loss": 1.1261, "step": 9348 }, { "epoch": 2.837759902868417, "grad_norm": 0.8417410850524902, "learning_rate": 5.272349903816949e-05, "loss": 1.3771, "step": 9349 }, { "epoch": 2.8380634390651087, "grad_norm": 0.7039379477500916, "learning_rate": 5.271843677229928e-05, "loss": 1.3596, "step": 9350 }, { "epoch": 2.8383669752618, "grad_norm": 0.9071443676948547, "learning_rate": 5.271337450642908e-05, "loss": 1.4462, "step": 9351 }, { "epoch": 2.8386705114584916, "grad_norm": 0.7268051505088806, "learning_rate": 5.270831224055888e-05, "loss": 0.9868, "step": 9352 }, { "epoch": 2.838974047655183, "grad_norm": 0.8409135341644287, "learning_rate": 5.2703249974688674e-05, "loss": 1.4759, "step": 9353 }, { "epoch": 2.8392775838518745, "grad_norm": 0.7975213527679443, "learning_rate": 5.269818770881847e-05, "loss": 1.5096, "step": 9354 }, { "epoch": 2.8395811200485657, "grad_norm": 0.9084259867668152, "learning_rate": 5.2693125442948264e-05, "loss": 1.4095, "step": 9355 }, { "epoch": 2.8398846562452573, "grad_norm": 0.8844589591026306, "learning_rate": 5.2688063177078065e-05, "loss": 0.9897, "step": 9356 }, { "epoch": 2.840188192441949, "grad_norm": 0.8133668899536133, "learning_rate": 5.268300091120786e-05, "loss": 1.4553, "step": 9357 }, { "epoch": 2.84049172863864, "grad_norm": 0.695389449596405, "learning_rate": 5.2677938645337655e-05, "loss": 0.6254, "step": 9358 }, { "epoch": 2.8407952648353314, "grad_norm": 0.6495389938354492, "learning_rate": 5.267287637946745e-05, "loss": 1.2818, "step": 9359 }, { "epoch": 2.841098801032023, "grad_norm": 0.9445971250534058, "learning_rate": 5.2667814113597244e-05, "loss": 1.328, "step": 9360 }, { "epoch": 2.8414023372287147, "grad_norm": 0.8059170246124268, "learning_rate": 5.2662751847727046e-05, "loss": 1.4942, "step": 9361 }, { "epoch": 2.841705873425406, "grad_norm": 0.9014286994934082, "learning_rate": 5.265768958185684e-05, "loss": 1.045, "step": 9362 }, { "epoch": 2.8420094096220976, "grad_norm": 0.7845970988273621, "learning_rate": 5.2652627315986636e-05, "loss": 1.3251, "step": 9363 }, { "epoch": 2.842312945818789, "grad_norm": 0.8027483820915222, "learning_rate": 5.264756505011643e-05, "loss": 1.6816, "step": 9364 }, { "epoch": 2.8426164820154805, "grad_norm": 0.7307506203651428, "learning_rate": 5.2642502784246225e-05, "loss": 1.4705, "step": 9365 }, { "epoch": 2.8429200182121717, "grad_norm": 0.6807789206504822, "learning_rate": 5.263744051837603e-05, "loss": 0.9371, "step": 9366 }, { "epoch": 2.8432235544088633, "grad_norm": 0.7624093890190125, "learning_rate": 5.263237825250582e-05, "loss": 1.4282, "step": 9367 }, { "epoch": 2.843527090605555, "grad_norm": 0.7262867093086243, "learning_rate": 5.2627315986635616e-05, "loss": 0.9036, "step": 9368 }, { "epoch": 2.843830626802246, "grad_norm": 0.8020523190498352, "learning_rate": 5.262225372076541e-05, "loss": 1.6797, "step": 9369 }, { "epoch": 2.8441341629989374, "grad_norm": 0.790020227432251, "learning_rate": 5.261719145489521e-05, "loss": 1.4689, "step": 9370 }, { "epoch": 2.844437699195629, "grad_norm": 0.5507739186286926, "learning_rate": 5.261212918902501e-05, "loss": 0.899, "step": 9371 }, { "epoch": 2.8447412353923207, "grad_norm": 0.7228598594665527, "learning_rate": 5.26070669231548e-05, "loss": 1.2649, "step": 9372 }, { "epoch": 2.845044771589012, "grad_norm": 0.7899906039237976, "learning_rate": 5.26020046572846e-05, "loss": 1.4408, "step": 9373 }, { "epoch": 2.8453483077857036, "grad_norm": 0.8347296714782715, "learning_rate": 5.259694239141439e-05, "loss": 1.1176, "step": 9374 }, { "epoch": 2.845651843982395, "grad_norm": 0.57683265209198, "learning_rate": 5.2591880125544193e-05, "loss": 1.049, "step": 9375 }, { "epoch": 2.8459553801790864, "grad_norm": 0.7391901016235352, "learning_rate": 5.2586817859673995e-05, "loss": 1.6252, "step": 9376 }, { "epoch": 2.8462589163757777, "grad_norm": 1.206459879875183, "learning_rate": 5.2581755593803797e-05, "loss": 1.1132, "step": 9377 }, { "epoch": 2.8465624525724693, "grad_norm": 0.6464114189147949, "learning_rate": 5.257669332793359e-05, "loss": 1.23, "step": 9378 }, { "epoch": 2.846865988769161, "grad_norm": 0.7888322472572327, "learning_rate": 5.2571631062063386e-05, "loss": 1.02, "step": 9379 }, { "epoch": 2.847169524965852, "grad_norm": 0.8795018792152405, "learning_rate": 5.256656879619318e-05, "loss": 1.4062, "step": 9380 }, { "epoch": 2.8474730611625434, "grad_norm": 0.7494449019432068, "learning_rate": 5.256150653032298e-05, "loss": 1.3383, "step": 9381 }, { "epoch": 2.847776597359235, "grad_norm": 0.7852158546447754, "learning_rate": 5.255644426445278e-05, "loss": 1.3933, "step": 9382 }, { "epoch": 2.8480801335559267, "grad_norm": 0.7150129079818726, "learning_rate": 5.255138199858257e-05, "loss": 1.4461, "step": 9383 }, { "epoch": 2.848383669752618, "grad_norm": 0.664424479007721, "learning_rate": 5.254631973271237e-05, "loss": 0.7387, "step": 9384 }, { "epoch": 2.8486872059493096, "grad_norm": 0.8127879500389099, "learning_rate": 5.254125746684216e-05, "loss": 0.7265, "step": 9385 }, { "epoch": 2.848990742146001, "grad_norm": 0.7500486969947815, "learning_rate": 5.253619520097196e-05, "loss": 1.5836, "step": 9386 }, { "epoch": 2.8492942783426924, "grad_norm": 0.8652605414390564, "learning_rate": 5.253113293510176e-05, "loss": 1.1918, "step": 9387 }, { "epoch": 2.8495978145393837, "grad_norm": 0.7804118394851685, "learning_rate": 5.252607066923155e-05, "loss": 1.2734, "step": 9388 }, { "epoch": 2.8499013507360753, "grad_norm": 0.8731774687767029, "learning_rate": 5.252100840336135e-05, "loss": 1.0997, "step": 9389 }, { "epoch": 2.850204886932767, "grad_norm": 0.8509650230407715, "learning_rate": 5.251594613749114e-05, "loss": 1.265, "step": 9390 }, { "epoch": 2.850508423129458, "grad_norm": 0.7483029961585999, "learning_rate": 5.2510883871620944e-05, "loss": 1.5143, "step": 9391 }, { "epoch": 2.8508119593261494, "grad_norm": 0.7632007598876953, "learning_rate": 5.250582160575074e-05, "loss": 1.4062, "step": 9392 }, { "epoch": 2.851115495522841, "grad_norm": 0.754703938961029, "learning_rate": 5.2500759339880534e-05, "loss": 1.6202, "step": 9393 }, { "epoch": 2.8514190317195327, "grad_norm": 0.8680121302604675, "learning_rate": 5.249569707401033e-05, "loss": 1.1555, "step": 9394 }, { "epoch": 2.851722567916224, "grad_norm": 0.8086943626403809, "learning_rate": 5.249063480814013e-05, "loss": 1.0458, "step": 9395 }, { "epoch": 2.8520261041129156, "grad_norm": 0.6513628363609314, "learning_rate": 5.2485572542269925e-05, "loss": 1.037, "step": 9396 }, { "epoch": 2.852329640309607, "grad_norm": 0.7701380848884583, "learning_rate": 5.248051027639972e-05, "loss": 1.1105, "step": 9397 }, { "epoch": 2.8526331765062984, "grad_norm": 0.9460353851318359, "learning_rate": 5.2475448010529514e-05, "loss": 1.3801, "step": 9398 }, { "epoch": 2.8529367127029897, "grad_norm": 0.7532127499580383, "learning_rate": 5.247038574465931e-05, "loss": 0.9574, "step": 9399 }, { "epoch": 2.8532402488996813, "grad_norm": 0.8332433700561523, "learning_rate": 5.246532347878911e-05, "loss": 1.3094, "step": 9400 }, { "epoch": 2.853543785096373, "grad_norm": 0.8725066184997559, "learning_rate": 5.2460261212918906e-05, "loss": 0.8585, "step": 9401 }, { "epoch": 2.853847321293064, "grad_norm": 0.7810158133506775, "learning_rate": 5.24551989470487e-05, "loss": 1.4824, "step": 9402 }, { "epoch": 2.8541508574897554, "grad_norm": 0.7571697235107422, "learning_rate": 5.2450136681178495e-05, "loss": 1.5745, "step": 9403 }, { "epoch": 2.854454393686447, "grad_norm": 0.7732252478599548, "learning_rate": 5.244507441530829e-05, "loss": 1.6005, "step": 9404 }, { "epoch": 2.8547579298831387, "grad_norm": 0.7137447595596313, "learning_rate": 5.244001214943809e-05, "loss": 1.4944, "step": 9405 }, { "epoch": 2.85506146607983, "grad_norm": 0.9943833947181702, "learning_rate": 5.2434949883567886e-05, "loss": 1.02, "step": 9406 }, { "epoch": 2.8553650022765216, "grad_norm": 0.5583171844482422, "learning_rate": 5.242988761769768e-05, "loss": 1.515, "step": 9407 }, { "epoch": 2.855668538473213, "grad_norm": 0.7085838317871094, "learning_rate": 5.2424825351827476e-05, "loss": 0.9852, "step": 9408 }, { "epoch": 2.8559720746699044, "grad_norm": 0.7515712976455688, "learning_rate": 5.241976308595728e-05, "loss": 1.5338, "step": 9409 }, { "epoch": 2.8562756108665956, "grad_norm": 0.7197819948196411, "learning_rate": 5.241470082008707e-05, "loss": 1.6469, "step": 9410 }, { "epoch": 2.8565791470632873, "grad_norm": 0.6142263412475586, "learning_rate": 5.240963855421687e-05, "loss": 1.9789, "step": 9411 }, { "epoch": 2.856882683259979, "grad_norm": 0.9562734961509705, "learning_rate": 5.240457628834666e-05, "loss": 1.4231, "step": 9412 }, { "epoch": 2.85718621945667, "grad_norm": 0.8005004525184631, "learning_rate": 5.239951402247646e-05, "loss": 1.4874, "step": 9413 }, { "epoch": 2.857489755653362, "grad_norm": 0.8507754802703857, "learning_rate": 5.239445175660626e-05, "loss": 1.4512, "step": 9414 }, { "epoch": 2.857793291850053, "grad_norm": 0.7643118500709534, "learning_rate": 5.238938949073605e-05, "loss": 1.1473, "step": 9415 }, { "epoch": 2.8580968280467447, "grad_norm": 0.7403945922851562, "learning_rate": 5.238432722486585e-05, "loss": 1.5298, "step": 9416 }, { "epoch": 2.858400364243436, "grad_norm": 0.8773601651191711, "learning_rate": 5.237926495899564e-05, "loss": 1.3813, "step": 9417 }, { "epoch": 2.8587039004401276, "grad_norm": 0.5954939723014832, "learning_rate": 5.237420269312544e-05, "loss": 1.565, "step": 9418 }, { "epoch": 2.8590074366368188, "grad_norm": 0.7224332690238953, "learning_rate": 5.236914042725524e-05, "loss": 0.9511, "step": 9419 }, { "epoch": 2.8593109728335104, "grad_norm": 0.7605741024017334, "learning_rate": 5.236407816138505e-05, "loss": 1.3148, "step": 9420 }, { "epoch": 2.8596145090302016, "grad_norm": 0.6351408362388611, "learning_rate": 5.235901589551484e-05, "loss": 1.1754, "step": 9421 }, { "epoch": 2.8599180452268933, "grad_norm": 0.7478115558624268, "learning_rate": 5.235395362964464e-05, "loss": 1.5339, "step": 9422 }, { "epoch": 2.860221581423585, "grad_norm": 0.726111888885498, "learning_rate": 5.234889136377443e-05, "loss": 1.124, "step": 9423 }, { "epoch": 2.860525117620276, "grad_norm": 0.9777094125747681, "learning_rate": 5.2343829097904226e-05, "loss": 1.347, "step": 9424 }, { "epoch": 2.860828653816968, "grad_norm": 0.7683424949645996, "learning_rate": 5.233876683203403e-05, "loss": 1.1178, "step": 9425 }, { "epoch": 2.861132190013659, "grad_norm": 0.7628284096717834, "learning_rate": 5.233370456616382e-05, "loss": 1.433, "step": 9426 }, { "epoch": 2.8614357262103507, "grad_norm": 0.7160748243331909, "learning_rate": 5.232864230029362e-05, "loss": 1.673, "step": 9427 }, { "epoch": 2.861739262407042, "grad_norm": 0.6520998477935791, "learning_rate": 5.232358003442341e-05, "loss": 1.3751, "step": 9428 }, { "epoch": 2.8620427986037336, "grad_norm": 0.7842011451721191, "learning_rate": 5.231851776855321e-05, "loss": 1.2587, "step": 9429 }, { "epoch": 2.862346334800425, "grad_norm": 1.2248660326004028, "learning_rate": 5.231345550268301e-05, "loss": 0.9244, "step": 9430 }, { "epoch": 2.8626498709971164, "grad_norm": 0.8425837159156799, "learning_rate": 5.2308393236812804e-05, "loss": 1.1696, "step": 9431 }, { "epoch": 2.8629534071938076, "grad_norm": 0.7808385491371155, "learning_rate": 5.23033309709426e-05, "loss": 1.3076, "step": 9432 }, { "epoch": 2.8632569433904993, "grad_norm": 0.8945707082748413, "learning_rate": 5.229826870507239e-05, "loss": 1.5213, "step": 9433 }, { "epoch": 2.863560479587191, "grad_norm": 0.6636887192726135, "learning_rate": 5.2293206439202195e-05, "loss": 1.2744, "step": 9434 }, { "epoch": 2.863864015783882, "grad_norm": 0.7467337846755981, "learning_rate": 5.228814417333199e-05, "loss": 1.3888, "step": 9435 }, { "epoch": 2.864167551980574, "grad_norm": 0.685385525226593, "learning_rate": 5.2283081907461784e-05, "loss": 1.1128, "step": 9436 }, { "epoch": 2.864471088177265, "grad_norm": 0.8079697489738464, "learning_rate": 5.227801964159158e-05, "loss": 1.4654, "step": 9437 }, { "epoch": 2.8647746243739567, "grad_norm": 0.6123029589653015, "learning_rate": 5.2272957375721374e-05, "loss": 1.2247, "step": 9438 }, { "epoch": 2.865078160570648, "grad_norm": 0.6182090640068054, "learning_rate": 5.2267895109851175e-05, "loss": 1.3447, "step": 9439 }, { "epoch": 2.8653816967673396, "grad_norm": 0.7628608345985413, "learning_rate": 5.226283284398097e-05, "loss": 1.5927, "step": 9440 }, { "epoch": 2.865685232964031, "grad_norm": 0.7146131992340088, "learning_rate": 5.2257770578110765e-05, "loss": 1.2454, "step": 9441 }, { "epoch": 2.8659887691607224, "grad_norm": 0.9080950021743774, "learning_rate": 5.225270831224056e-05, "loss": 1.4425, "step": 9442 }, { "epoch": 2.8662923053574136, "grad_norm": 0.8173847794532776, "learning_rate": 5.2247646046370355e-05, "loss": 1.0399, "step": 9443 }, { "epoch": 2.8665958415541053, "grad_norm": 0.9067602157592773, "learning_rate": 5.2242583780500156e-05, "loss": 1.4186, "step": 9444 }, { "epoch": 2.866899377750797, "grad_norm": 0.6521152257919312, "learning_rate": 5.223752151462995e-05, "loss": 1.3131, "step": 9445 }, { "epoch": 2.867202913947488, "grad_norm": 0.8556538820266724, "learning_rate": 5.2232459248759746e-05, "loss": 1.3317, "step": 9446 }, { "epoch": 2.86750645014418, "grad_norm": 0.7497937679290771, "learning_rate": 5.222739698288954e-05, "loss": 1.3419, "step": 9447 }, { "epoch": 2.867809986340871, "grad_norm": 0.6333903074264526, "learning_rate": 5.222233471701934e-05, "loss": 1.5285, "step": 9448 }, { "epoch": 2.8681135225375627, "grad_norm": 0.7968592643737793, "learning_rate": 5.221727245114914e-05, "loss": 1.4826, "step": 9449 }, { "epoch": 2.868417058734254, "grad_norm": 0.7306660413742065, "learning_rate": 5.221221018527893e-05, "loss": 1.6084, "step": 9450 }, { "epoch": 2.8687205949309456, "grad_norm": 0.7376747131347656, "learning_rate": 5.2207147919408727e-05, "loss": 1.6202, "step": 9451 }, { "epoch": 2.869024131127637, "grad_norm": 0.5895881652832031, "learning_rate": 5.220208565353852e-05, "loss": 1.4214, "step": 9452 }, { "epoch": 2.8693276673243284, "grad_norm": 0.7723854184150696, "learning_rate": 5.219702338766832e-05, "loss": 0.9924, "step": 9453 }, { "epoch": 2.8696312035210196, "grad_norm": 0.6269218325614929, "learning_rate": 5.219196112179812e-05, "loss": 1.9052, "step": 9454 }, { "epoch": 2.8699347397177113, "grad_norm": 0.7254565954208374, "learning_rate": 5.218689885592791e-05, "loss": 0.9028, "step": 9455 }, { "epoch": 2.870238275914403, "grad_norm": 0.95447838306427, "learning_rate": 5.218183659005771e-05, "loss": 1.1353, "step": 9456 }, { "epoch": 2.870541812111094, "grad_norm": 0.8039798736572266, "learning_rate": 5.21767743241875e-05, "loss": 1.1365, "step": 9457 }, { "epoch": 2.870845348307786, "grad_norm": 0.7894598841667175, "learning_rate": 5.2171712058317304e-05, "loss": 1.268, "step": 9458 }, { "epoch": 2.871148884504477, "grad_norm": 0.5049625039100647, "learning_rate": 5.21666497924471e-05, "loss": 0.98, "step": 9459 }, { "epoch": 2.8714524207011687, "grad_norm": 0.6731208562850952, "learning_rate": 5.216158752657689e-05, "loss": 1.229, "step": 9460 }, { "epoch": 2.87175595689786, "grad_norm": 0.8631210923194885, "learning_rate": 5.215652526070669e-05, "loss": 1.5062, "step": 9461 }, { "epoch": 2.8720594930945516, "grad_norm": 0.7528581023216248, "learning_rate": 5.215146299483649e-05, "loss": 0.4431, "step": 9462 }, { "epoch": 2.872363029291243, "grad_norm": 0.7947737574577332, "learning_rate": 5.2146400728966284e-05, "loss": 1.3533, "step": 9463 }, { "epoch": 2.8726665654879344, "grad_norm": 0.8122506141662598, "learning_rate": 5.214133846309608e-05, "loss": 1.0848, "step": 9464 }, { "epoch": 2.8729701016846256, "grad_norm": 0.8927140235900879, "learning_rate": 5.213627619722589e-05, "loss": 1.1561, "step": 9465 }, { "epoch": 2.8732736378813173, "grad_norm": 0.8999928832054138, "learning_rate": 5.213121393135568e-05, "loss": 1.5133, "step": 9466 }, { "epoch": 2.873577174078009, "grad_norm": 0.7747072577476501, "learning_rate": 5.212615166548548e-05, "loss": 1.4042, "step": 9467 }, { "epoch": 2.8738807102747, "grad_norm": 0.7205396890640259, "learning_rate": 5.212108939961527e-05, "loss": 1.6049, "step": 9468 }, { "epoch": 2.874184246471392, "grad_norm": 0.7850732803344727, "learning_rate": 5.2116027133745074e-05, "loss": 1.6288, "step": 9469 }, { "epoch": 2.874487782668083, "grad_norm": 0.7782788276672363, "learning_rate": 5.211096486787487e-05, "loss": 1.443, "step": 9470 }, { "epoch": 2.8747913188647747, "grad_norm": 0.8070952296257019, "learning_rate": 5.210590260200466e-05, "loss": 1.4859, "step": 9471 }, { "epoch": 2.875094855061466, "grad_norm": 0.7127992510795593, "learning_rate": 5.210084033613446e-05, "loss": 1.7129, "step": 9472 }, { "epoch": 2.8753983912581575, "grad_norm": 0.6095362305641174, "learning_rate": 5.209577807026426e-05, "loss": 1.4317, "step": 9473 }, { "epoch": 2.875701927454849, "grad_norm": 0.9193838834762573, "learning_rate": 5.2090715804394054e-05, "loss": 1.4304, "step": 9474 }, { "epoch": 2.8760054636515404, "grad_norm": 0.8403457403182983, "learning_rate": 5.208565353852385e-05, "loss": 1.372, "step": 9475 }, { "epoch": 2.8763089998482316, "grad_norm": 0.8098099827766418, "learning_rate": 5.2080591272653644e-05, "loss": 0.6, "step": 9476 }, { "epoch": 2.8766125360449233, "grad_norm": 0.9631059765815735, "learning_rate": 5.207552900678344e-05, "loss": 1.0892, "step": 9477 }, { "epoch": 2.876916072241615, "grad_norm": 0.8415666222572327, "learning_rate": 5.207046674091324e-05, "loss": 1.2686, "step": 9478 }, { "epoch": 2.877219608438306, "grad_norm": 0.6405428647994995, "learning_rate": 5.2065404475043035e-05, "loss": 1.1467, "step": 9479 }, { "epoch": 2.877523144634998, "grad_norm": 0.7595462203025818, "learning_rate": 5.206034220917283e-05, "loss": 1.4007, "step": 9480 }, { "epoch": 2.877826680831689, "grad_norm": 0.8996461033821106, "learning_rate": 5.2055279943302625e-05, "loss": 1.4708, "step": 9481 }, { "epoch": 2.8781302170283807, "grad_norm": 0.7555029392242432, "learning_rate": 5.205021767743242e-05, "loss": 1.1784, "step": 9482 }, { "epoch": 2.878433753225072, "grad_norm": 0.7749983668327332, "learning_rate": 5.204515541156222e-05, "loss": 1.1931, "step": 9483 }, { "epoch": 2.8787372894217635, "grad_norm": 0.7814266681671143, "learning_rate": 5.2040093145692016e-05, "loss": 1.2516, "step": 9484 }, { "epoch": 2.879040825618455, "grad_norm": 0.9828033447265625, "learning_rate": 5.203503087982181e-05, "loss": 1.3432, "step": 9485 }, { "epoch": 2.8793443618151464, "grad_norm": 0.7643359303474426, "learning_rate": 5.2029968613951605e-05, "loss": 1.0592, "step": 9486 }, { "epoch": 2.879647898011838, "grad_norm": 0.6870555877685547, "learning_rate": 5.202490634808141e-05, "loss": 1.6373, "step": 9487 }, { "epoch": 2.8799514342085293, "grad_norm": 0.5364072918891907, "learning_rate": 5.20198440822112e-05, "loss": 1.8056, "step": 9488 }, { "epoch": 2.880254970405221, "grad_norm": 0.6645576357841492, "learning_rate": 5.2014781816340997e-05, "loss": 0.8111, "step": 9489 }, { "epoch": 2.880558506601912, "grad_norm": 0.7039901614189148, "learning_rate": 5.200971955047079e-05, "loss": 1.2746, "step": 9490 }, { "epoch": 2.880862042798604, "grad_norm": 0.816798985004425, "learning_rate": 5.2004657284600586e-05, "loss": 1.5521, "step": 9491 }, { "epoch": 2.8811655789952955, "grad_norm": 0.8264162540435791, "learning_rate": 5.199959501873039e-05, "loss": 0.9209, "step": 9492 }, { "epoch": 2.8814691151919867, "grad_norm": 0.7069061994552612, "learning_rate": 5.199453275286018e-05, "loss": 1.0456, "step": 9493 }, { "epoch": 2.881772651388678, "grad_norm": 0.7477339506149292, "learning_rate": 5.198947048698998e-05, "loss": 1.0461, "step": 9494 }, { "epoch": 2.8820761875853695, "grad_norm": 0.783385157585144, "learning_rate": 5.198440822111977e-05, "loss": 1.3856, "step": 9495 }, { "epoch": 2.882379723782061, "grad_norm": 0.6643350720405579, "learning_rate": 5.197934595524957e-05, "loss": 1.5486, "step": 9496 }, { "epoch": 2.8826832599787524, "grad_norm": 0.6355939507484436, "learning_rate": 5.197428368937937e-05, "loss": 1.0737, "step": 9497 }, { "epoch": 2.882986796175444, "grad_norm": 0.5952500700950623, "learning_rate": 5.196922142350916e-05, "loss": 1.7085, "step": 9498 }, { "epoch": 2.8832903323721353, "grad_norm": 0.7849879860877991, "learning_rate": 5.196415915763896e-05, "loss": 1.4298, "step": 9499 }, { "epoch": 2.883593868568827, "grad_norm": 0.5713488459587097, "learning_rate": 5.195909689176875e-05, "loss": 1.3922, "step": 9500 }, { "epoch": 2.883897404765518, "grad_norm": 0.8283959627151489, "learning_rate": 5.1954034625898554e-05, "loss": 1.3905, "step": 9501 }, { "epoch": 2.88420094096221, "grad_norm": 0.8133925795555115, "learning_rate": 5.194897236002835e-05, "loss": 1.4816, "step": 9502 }, { "epoch": 2.8845044771589015, "grad_norm": 0.7605336904525757, "learning_rate": 5.1943910094158144e-05, "loss": 1.3117, "step": 9503 }, { "epoch": 2.8848080133555927, "grad_norm": 0.9271584153175354, "learning_rate": 5.193884782828794e-05, "loss": 0.9625, "step": 9504 }, { "epoch": 2.885111549552284, "grad_norm": 0.9125571250915527, "learning_rate": 5.1933785562417734e-05, "loss": 1.2425, "step": 9505 }, { "epoch": 2.8854150857489755, "grad_norm": 0.8910593390464783, "learning_rate": 5.1928723296547535e-05, "loss": 1.3785, "step": 9506 }, { "epoch": 2.885718621945667, "grad_norm": 1.0134199857711792, "learning_rate": 5.192366103067733e-05, "loss": 1.1262, "step": 9507 }, { "epoch": 2.8860221581423584, "grad_norm": 0.8326638340950012, "learning_rate": 5.1918598764807125e-05, "loss": 1.6915, "step": 9508 }, { "epoch": 2.88632569433905, "grad_norm": 0.965233564376831, "learning_rate": 5.191353649893693e-05, "loss": 1.2376, "step": 9509 }, { "epoch": 2.8866292305357413, "grad_norm": 0.7267403602600098, "learning_rate": 5.190847423306673e-05, "loss": 1.6445, "step": 9510 }, { "epoch": 2.886932766732433, "grad_norm": 0.7772924304008484, "learning_rate": 5.190341196719652e-05, "loss": 1.1801, "step": 9511 }, { "epoch": 2.887236302929124, "grad_norm": 0.8382841348648071, "learning_rate": 5.1898349701326324e-05, "loss": 1.3439, "step": 9512 }, { "epoch": 2.887539839125816, "grad_norm": 0.7924531698226929, "learning_rate": 5.189328743545612e-05, "loss": 0.9468, "step": 9513 }, { "epoch": 2.8878433753225075, "grad_norm": 0.8812182545661926, "learning_rate": 5.1888225169585914e-05, "loss": 1.2985, "step": 9514 }, { "epoch": 2.8881469115191987, "grad_norm": 0.7798461318016052, "learning_rate": 5.188316290371571e-05, "loss": 1.3977, "step": 9515 }, { "epoch": 2.88845044771589, "grad_norm": 1.0166219472885132, "learning_rate": 5.1878100637845503e-05, "loss": 1.3889, "step": 9516 }, { "epoch": 2.8887539839125815, "grad_norm": 0.8224981427192688, "learning_rate": 5.1873038371975305e-05, "loss": 1.2335, "step": 9517 }, { "epoch": 2.889057520109273, "grad_norm": 0.8364986777305603, "learning_rate": 5.18679761061051e-05, "loss": 1.3545, "step": 9518 }, { "epoch": 2.8893610563059644, "grad_norm": 0.8336874842643738, "learning_rate": 5.1862913840234895e-05, "loss": 1.4871, "step": 9519 }, { "epoch": 2.889664592502656, "grad_norm": 0.778023362159729, "learning_rate": 5.185785157436469e-05, "loss": 1.4589, "step": 9520 }, { "epoch": 2.8899681286993473, "grad_norm": 0.7665227055549622, "learning_rate": 5.1852789308494484e-05, "loss": 1.4066, "step": 9521 }, { "epoch": 2.890271664896039, "grad_norm": 0.841337263584137, "learning_rate": 5.1847727042624286e-05, "loss": 1.3517, "step": 9522 }, { "epoch": 2.89057520109273, "grad_norm": 0.8286343216896057, "learning_rate": 5.184266477675408e-05, "loss": 1.4334, "step": 9523 }, { "epoch": 2.890878737289422, "grad_norm": 0.8397142291069031, "learning_rate": 5.1837602510883875e-05, "loss": 1.4458, "step": 9524 }, { "epoch": 2.8911822734861135, "grad_norm": 1.0814837217330933, "learning_rate": 5.183254024501367e-05, "loss": 0.9643, "step": 9525 }, { "epoch": 2.8914858096828047, "grad_norm": 0.7771674990653992, "learning_rate": 5.182747797914347e-05, "loss": 0.8834, "step": 9526 }, { "epoch": 2.891789345879496, "grad_norm": 0.9093636274337769, "learning_rate": 5.1822415713273266e-05, "loss": 1.3708, "step": 9527 }, { "epoch": 2.8920928820761875, "grad_norm": 0.8670371174812317, "learning_rate": 5.181735344740306e-05, "loss": 1.5083, "step": 9528 }, { "epoch": 2.892396418272879, "grad_norm": 0.7888594269752502, "learning_rate": 5.1812291181532856e-05, "loss": 1.5658, "step": 9529 }, { "epoch": 2.8926999544695704, "grad_norm": 0.668411910533905, "learning_rate": 5.180722891566265e-05, "loss": 1.7654, "step": 9530 }, { "epoch": 2.893003490666262, "grad_norm": 0.86203932762146, "learning_rate": 5.180216664979245e-05, "loss": 1.1276, "step": 9531 }, { "epoch": 2.8933070268629533, "grad_norm": 0.7828831076622009, "learning_rate": 5.179710438392225e-05, "loss": 1.5016, "step": 9532 }, { "epoch": 2.893610563059645, "grad_norm": 0.8838964104652405, "learning_rate": 5.179204211805204e-05, "loss": 1.4392, "step": 9533 }, { "epoch": 2.893914099256336, "grad_norm": 0.8105677366256714, "learning_rate": 5.178697985218184e-05, "loss": 0.9082, "step": 9534 }, { "epoch": 2.894217635453028, "grad_norm": 0.748955488204956, "learning_rate": 5.178191758631163e-05, "loss": 1.5931, "step": 9535 }, { "epoch": 2.8945211716497194, "grad_norm": 0.7240814566612244, "learning_rate": 5.177685532044143e-05, "loss": 1.2664, "step": 9536 }, { "epoch": 2.8948247078464107, "grad_norm": 0.6561826467514038, "learning_rate": 5.177179305457123e-05, "loss": 1.114, "step": 9537 }, { "epoch": 2.895128244043102, "grad_norm": 0.7741621732711792, "learning_rate": 5.176673078870102e-05, "loss": 1.4439, "step": 9538 }, { "epoch": 2.8954317802397935, "grad_norm": 0.6334800720214844, "learning_rate": 5.176166852283082e-05, "loss": 1.8905, "step": 9539 }, { "epoch": 2.895735316436485, "grad_norm": 0.7378382086753845, "learning_rate": 5.175660625696062e-05, "loss": 1.2193, "step": 9540 }, { "epoch": 2.8960388526331764, "grad_norm": 0.7913770079612732, "learning_rate": 5.1751543991090414e-05, "loss": 0.9071, "step": 9541 }, { "epoch": 2.896342388829868, "grad_norm": 0.7039569020271301, "learning_rate": 5.174648172522021e-05, "loss": 1.1217, "step": 9542 }, { "epoch": 2.8966459250265593, "grad_norm": 0.9311305284500122, "learning_rate": 5.1741419459350004e-05, "loss": 1.5294, "step": 9543 }, { "epoch": 2.896949461223251, "grad_norm": 0.7685890197753906, "learning_rate": 5.17363571934798e-05, "loss": 1.3128, "step": 9544 }, { "epoch": 2.897252997419942, "grad_norm": 0.7525815367698669, "learning_rate": 5.17312949276096e-05, "loss": 0.8357, "step": 9545 }, { "epoch": 2.897556533616634, "grad_norm": 0.9669501781463623, "learning_rate": 5.1726232661739395e-05, "loss": 1.1537, "step": 9546 }, { "epoch": 2.8978600698133254, "grad_norm": 0.8898695111274719, "learning_rate": 5.172117039586919e-05, "loss": 1.4388, "step": 9547 }, { "epoch": 2.8981636060100167, "grad_norm": 0.8049153685569763, "learning_rate": 5.1716108129998984e-05, "loss": 1.4729, "step": 9548 }, { "epoch": 2.8984671422067083, "grad_norm": 0.8677496314048767, "learning_rate": 5.171104586412878e-05, "loss": 1.027, "step": 9549 }, { "epoch": 2.8987706784033995, "grad_norm": 0.8706070184707642, "learning_rate": 5.170598359825858e-05, "loss": 1.5992, "step": 9550 }, { "epoch": 2.899074214600091, "grad_norm": 1.0179812908172607, "learning_rate": 5.1700921332388375e-05, "loss": 1.4936, "step": 9551 }, { "epoch": 2.8993777507967824, "grad_norm": 0.735970139503479, "learning_rate": 5.169585906651817e-05, "loss": 1.7096, "step": 9552 }, { "epoch": 2.899681286993474, "grad_norm": 0.5985288023948669, "learning_rate": 5.1690796800647965e-05, "loss": 0.6456, "step": 9553 }, { "epoch": 2.8999848231901653, "grad_norm": 0.6462855935096741, "learning_rate": 5.168573453477777e-05, "loss": 1.1709, "step": 9554 }, { "epoch": 2.900288359386857, "grad_norm": 0.8171294331550598, "learning_rate": 5.168067226890757e-05, "loss": 1.4539, "step": 9555 }, { "epoch": 2.900591895583548, "grad_norm": 1.063042163848877, "learning_rate": 5.167561000303737e-05, "loss": 1.1501, "step": 9556 }, { "epoch": 2.90089543178024, "grad_norm": 0.7977851629257202, "learning_rate": 5.1670547737167165e-05, "loss": 1.3462, "step": 9557 }, { "epoch": 2.9011989679769314, "grad_norm": 0.8036537766456604, "learning_rate": 5.166548547129696e-05, "loss": 1.4145, "step": 9558 }, { "epoch": 2.9015025041736227, "grad_norm": 0.8069393038749695, "learning_rate": 5.1660423205426754e-05, "loss": 1.193, "step": 9559 }, { "epoch": 2.9018060403703143, "grad_norm": 0.6604540348052979, "learning_rate": 5.165536093955655e-05, "loss": 1.3344, "step": 9560 }, { "epoch": 2.9021095765670055, "grad_norm": 0.8737397193908691, "learning_rate": 5.165029867368635e-05, "loss": 1.1529, "step": 9561 }, { "epoch": 2.902413112763697, "grad_norm": 0.8647302389144897, "learning_rate": 5.1645236407816145e-05, "loss": 1.1524, "step": 9562 }, { "epoch": 2.9027166489603884, "grad_norm": 0.7809445261955261, "learning_rate": 5.164017414194594e-05, "loss": 1.4176, "step": 9563 }, { "epoch": 2.90302018515708, "grad_norm": 0.700145959854126, "learning_rate": 5.1635111876075735e-05, "loss": 1.2164, "step": 9564 }, { "epoch": 2.9033237213537717, "grad_norm": 0.9407699704170227, "learning_rate": 5.1630049610205536e-05, "loss": 1.4232, "step": 9565 }, { "epoch": 2.903627257550463, "grad_norm": 0.9683791399002075, "learning_rate": 5.162498734433533e-05, "loss": 1.4847, "step": 9566 }, { "epoch": 2.903930793747154, "grad_norm": 0.8153687119483948, "learning_rate": 5.1619925078465126e-05, "loss": 1.2574, "step": 9567 }, { "epoch": 2.904234329943846, "grad_norm": 0.943260133266449, "learning_rate": 5.161486281259492e-05, "loss": 1.7097, "step": 9568 }, { "epoch": 2.9045378661405374, "grad_norm": 0.84771329164505, "learning_rate": 5.1609800546724716e-05, "loss": 1.4961, "step": 9569 }, { "epoch": 2.9048414023372287, "grad_norm": 0.6338463425636292, "learning_rate": 5.160473828085452e-05, "loss": 1.3485, "step": 9570 }, { "epoch": 2.9051449385339203, "grad_norm": 0.6851577162742615, "learning_rate": 5.159967601498431e-05, "loss": 1.2563, "step": 9571 }, { "epoch": 2.9054484747306115, "grad_norm": 0.8305641412734985, "learning_rate": 5.159461374911411e-05, "loss": 1.2146, "step": 9572 }, { "epoch": 2.905752010927303, "grad_norm": 1.008200764656067, "learning_rate": 5.15895514832439e-05, "loss": 1.2212, "step": 9573 }, { "epoch": 2.9060555471239944, "grad_norm": 0.8098403811454773, "learning_rate": 5.1584489217373696e-05, "loss": 1.5169, "step": 9574 }, { "epoch": 2.906359083320686, "grad_norm": 0.7787508964538574, "learning_rate": 5.15794269515035e-05, "loss": 1.3337, "step": 9575 }, { "epoch": 2.9066626195173777, "grad_norm": 0.8555715680122375, "learning_rate": 5.157436468563329e-05, "loss": 1.1114, "step": 9576 }, { "epoch": 2.906966155714069, "grad_norm": 0.6763578653335571, "learning_rate": 5.156930241976309e-05, "loss": 1.5864, "step": 9577 }, { "epoch": 2.90726969191076, "grad_norm": 0.824381947517395, "learning_rate": 5.156424015389288e-05, "loss": 1.5366, "step": 9578 }, { "epoch": 2.907573228107452, "grad_norm": 0.8940673470497131, "learning_rate": 5.1559177888022684e-05, "loss": 1.1842, "step": 9579 }, { "epoch": 2.9078767643041434, "grad_norm": 0.7460834383964539, "learning_rate": 5.155411562215248e-05, "loss": 1.3562, "step": 9580 }, { "epoch": 2.9081803005008346, "grad_norm": 0.7965441942214966, "learning_rate": 5.1549053356282273e-05, "loss": 1.6128, "step": 9581 }, { "epoch": 2.9084838366975263, "grad_norm": 0.6389833092689514, "learning_rate": 5.154399109041207e-05, "loss": 1.0002, "step": 9582 }, { "epoch": 2.9087873728942175, "grad_norm": 0.7806882262229919, "learning_rate": 5.153892882454186e-05, "loss": 1.138, "step": 9583 }, { "epoch": 2.909090909090909, "grad_norm": 0.7312899231910706, "learning_rate": 5.1533866558671665e-05, "loss": 1.114, "step": 9584 }, { "epoch": 2.9093944452876004, "grad_norm": 0.5638426542282104, "learning_rate": 5.152880429280146e-05, "loss": 0.6022, "step": 9585 }, { "epoch": 2.909697981484292, "grad_norm": 0.8103323578834534, "learning_rate": 5.1523742026931254e-05, "loss": 1.4652, "step": 9586 }, { "epoch": 2.9100015176809837, "grad_norm": 0.7764275074005127, "learning_rate": 5.151867976106105e-05, "loss": 1.3598, "step": 9587 }, { "epoch": 2.910305053877675, "grad_norm": 0.8000783920288086, "learning_rate": 5.1513617495190844e-05, "loss": 1.4708, "step": 9588 }, { "epoch": 2.910608590074366, "grad_norm": 0.9029366970062256, "learning_rate": 5.1508555229320645e-05, "loss": 1.4317, "step": 9589 }, { "epoch": 2.9109121262710578, "grad_norm": 0.6857181191444397, "learning_rate": 5.150349296345044e-05, "loss": 1.5334, "step": 9590 }, { "epoch": 2.9112156624677494, "grad_norm": 0.7986151576042175, "learning_rate": 5.1498430697580235e-05, "loss": 1.5455, "step": 9591 }, { "epoch": 2.9115191986644406, "grad_norm": 0.5361661911010742, "learning_rate": 5.149336843171003e-05, "loss": 1.1083, "step": 9592 }, { "epoch": 2.9118227348611323, "grad_norm": 0.6486913561820984, "learning_rate": 5.148830616583983e-05, "loss": 1.1699, "step": 9593 }, { "epoch": 2.9121262710578235, "grad_norm": 0.8758334517478943, "learning_rate": 5.1483243899969626e-05, "loss": 1.1375, "step": 9594 }, { "epoch": 2.912429807254515, "grad_norm": 0.8009307980537415, "learning_rate": 5.147818163409942e-05, "loss": 1.3781, "step": 9595 }, { "epoch": 2.9127333434512064, "grad_norm": 0.8063926696777344, "learning_rate": 5.1473119368229216e-05, "loss": 1.3542, "step": 9596 }, { "epoch": 2.913036879647898, "grad_norm": 0.6963333487510681, "learning_rate": 5.146805710235901e-05, "loss": 1.5677, "step": 9597 }, { "epoch": 2.9133404158445897, "grad_norm": 0.7917062044143677, "learning_rate": 5.146299483648882e-05, "loss": 1.2204, "step": 9598 }, { "epoch": 2.913643952041281, "grad_norm": 0.7042210698127747, "learning_rate": 5.1457932570618614e-05, "loss": 0.8323, "step": 9599 }, { "epoch": 2.913947488237972, "grad_norm": 1.0165883302688599, "learning_rate": 5.1452870304748415e-05, "loss": 1.3022, "step": 9600 }, { "epoch": 2.9142510244346638, "grad_norm": 0.8328631520271301, "learning_rate": 5.144780803887821e-05, "loss": 1.1785, "step": 9601 }, { "epoch": 2.9145545606313554, "grad_norm": 0.9051679968833923, "learning_rate": 5.1442745773008005e-05, "loss": 1.2479, "step": 9602 }, { "epoch": 2.9148580968280466, "grad_norm": 0.8050028085708618, "learning_rate": 5.14376835071378e-05, "loss": 1.8515, "step": 9603 }, { "epoch": 2.9151616330247383, "grad_norm": 0.9980581402778625, "learning_rate": 5.14326212412676e-05, "loss": 0.6876, "step": 9604 }, { "epoch": 2.9154651692214295, "grad_norm": 0.7276046276092529, "learning_rate": 5.1427558975397396e-05, "loss": 0.8367, "step": 9605 }, { "epoch": 2.915768705418121, "grad_norm": 0.7519717812538147, "learning_rate": 5.142249670952719e-05, "loss": 1.4272, "step": 9606 }, { "epoch": 2.9160722416148124, "grad_norm": 0.8044680953025818, "learning_rate": 5.1417434443656986e-05, "loss": 1.4931, "step": 9607 }, { "epoch": 2.916375777811504, "grad_norm": 0.7219284176826477, "learning_rate": 5.141237217778678e-05, "loss": 1.0797, "step": 9608 }, { "epoch": 2.9166793140081957, "grad_norm": 0.8786895871162415, "learning_rate": 5.140730991191658e-05, "loss": 1.2961, "step": 9609 }, { "epoch": 2.916982850204887, "grad_norm": 0.7362821698188782, "learning_rate": 5.140224764604638e-05, "loss": 1.4744, "step": 9610 }, { "epoch": 2.917286386401578, "grad_norm": 0.8390907049179077, "learning_rate": 5.139718538017617e-05, "loss": 1.4064, "step": 9611 }, { "epoch": 2.9175899225982698, "grad_norm": 0.9276410341262817, "learning_rate": 5.1392123114305966e-05, "loss": 1.0092, "step": 9612 }, { "epoch": 2.9178934587949614, "grad_norm": 0.8145900964736938, "learning_rate": 5.138706084843576e-05, "loss": 1.3435, "step": 9613 }, { "epoch": 2.9181969949916526, "grad_norm": 0.7713693380355835, "learning_rate": 5.138199858256556e-05, "loss": 1.1803, "step": 9614 }, { "epoch": 2.9185005311883443, "grad_norm": 0.8595828413963318, "learning_rate": 5.137693631669536e-05, "loss": 1.1999, "step": 9615 }, { "epoch": 2.9188040673850355, "grad_norm": 0.7955202460289001, "learning_rate": 5.137187405082515e-05, "loss": 1.5164, "step": 9616 }, { "epoch": 2.919107603581727, "grad_norm": 0.8825899362564087, "learning_rate": 5.136681178495495e-05, "loss": 1.4261, "step": 9617 }, { "epoch": 2.9194111397784184, "grad_norm": 0.8596706986427307, "learning_rate": 5.136174951908475e-05, "loss": 0.5435, "step": 9618 }, { "epoch": 2.91971467597511, "grad_norm": 0.6747449636459351, "learning_rate": 5.1356687253214543e-05, "loss": 1.5789, "step": 9619 }, { "epoch": 2.9200182121718017, "grad_norm": 0.8745201826095581, "learning_rate": 5.135162498734434e-05, "loss": 1.148, "step": 9620 }, { "epoch": 2.920321748368493, "grad_norm": 0.8869763612747192, "learning_rate": 5.134656272147413e-05, "loss": 1.367, "step": 9621 }, { "epoch": 2.9206252845651846, "grad_norm": 0.7572410106658936, "learning_rate": 5.134150045560393e-05, "loss": 1.1945, "step": 9622 }, { "epoch": 2.9209288207618758, "grad_norm": 0.8140220046043396, "learning_rate": 5.133643818973373e-05, "loss": 1.6378, "step": 9623 }, { "epoch": 2.9212323569585674, "grad_norm": 0.8246815204620361, "learning_rate": 5.1331375923863524e-05, "loss": 1.3473, "step": 9624 }, { "epoch": 2.9215358931552586, "grad_norm": 0.8982497453689575, "learning_rate": 5.132631365799332e-05, "loss": 1.2034, "step": 9625 }, { "epoch": 2.9218394293519503, "grad_norm": 0.7786431312561035, "learning_rate": 5.1321251392123114e-05, "loss": 1.2714, "step": 9626 }, { "epoch": 2.922142965548642, "grad_norm": 0.7802687883377075, "learning_rate": 5.131618912625291e-05, "loss": 1.2668, "step": 9627 }, { "epoch": 2.922446501745333, "grad_norm": 0.632917046546936, "learning_rate": 5.131112686038271e-05, "loss": 1.2752, "step": 9628 }, { "epoch": 2.9227500379420244, "grad_norm": 0.8978488445281982, "learning_rate": 5.1306064594512505e-05, "loss": 1.367, "step": 9629 }, { "epoch": 2.923053574138716, "grad_norm": 0.8339922428131104, "learning_rate": 5.13010023286423e-05, "loss": 1.3381, "step": 9630 }, { "epoch": 2.9233571103354077, "grad_norm": 0.8951248526573181, "learning_rate": 5.1295940062772095e-05, "loss": 1.242, "step": 9631 }, { "epoch": 2.923660646532099, "grad_norm": 0.8673502802848816, "learning_rate": 5.1290877796901896e-05, "loss": 1.0581, "step": 9632 }, { "epoch": 2.9239641827287906, "grad_norm": 0.7304636240005493, "learning_rate": 5.128581553103169e-05, "loss": 1.5523, "step": 9633 }, { "epoch": 2.9242677189254818, "grad_norm": 0.8997844457626343, "learning_rate": 5.1280753265161486e-05, "loss": 1.4083, "step": 9634 }, { "epoch": 2.9245712551221734, "grad_norm": 0.5966005921363831, "learning_rate": 5.127569099929128e-05, "loss": 1.6588, "step": 9635 }, { "epoch": 2.9248747913188646, "grad_norm": 0.6863671541213989, "learning_rate": 5.1270628733421075e-05, "loss": 1.0795, "step": 9636 }, { "epoch": 2.9251783275155563, "grad_norm": 0.947727382183075, "learning_rate": 5.126556646755088e-05, "loss": 0.9771, "step": 9637 }, { "epoch": 2.925481863712248, "grad_norm": 0.5955187678337097, "learning_rate": 5.126050420168067e-05, "loss": 0.8562, "step": 9638 }, { "epoch": 2.925785399908939, "grad_norm": 0.5938629508018494, "learning_rate": 5.1255441935810466e-05, "loss": 1.4456, "step": 9639 }, { "epoch": 2.9260889361056304, "grad_norm": 0.9371960759162903, "learning_rate": 5.125037966994026e-05, "loss": 0.9832, "step": 9640 }, { "epoch": 2.926392472302322, "grad_norm": 0.7487984299659729, "learning_rate": 5.1245317404070056e-05, "loss": 1.4711, "step": 9641 }, { "epoch": 2.9266960084990137, "grad_norm": 0.6870548725128174, "learning_rate": 5.124025513819986e-05, "loss": 1.4693, "step": 9642 }, { "epoch": 2.926999544695705, "grad_norm": 0.7630167007446289, "learning_rate": 5.1235192872329666e-05, "loss": 1.3906, "step": 9643 }, { "epoch": 2.9273030808923965, "grad_norm": 0.8437421321868896, "learning_rate": 5.123013060645946e-05, "loss": 1.7142, "step": 9644 }, { "epoch": 2.9276066170890878, "grad_norm": 0.8551376461982727, "learning_rate": 5.1225068340589256e-05, "loss": 1.4621, "step": 9645 }, { "epoch": 2.9279101532857794, "grad_norm": 1.0669502019882202, "learning_rate": 5.122000607471905e-05, "loss": 0.9127, "step": 9646 }, { "epoch": 2.9282136894824706, "grad_norm": 0.6872658133506775, "learning_rate": 5.1214943808848845e-05, "loss": 1.4721, "step": 9647 }, { "epoch": 2.9285172256791623, "grad_norm": 0.6480333209037781, "learning_rate": 5.120988154297865e-05, "loss": 1.2266, "step": 9648 }, { "epoch": 2.928820761875854, "grad_norm": 0.7079026103019714, "learning_rate": 5.120481927710844e-05, "loss": 1.6364, "step": 9649 }, { "epoch": 2.929124298072545, "grad_norm": 0.7798513174057007, "learning_rate": 5.1199757011238236e-05, "loss": 0.5933, "step": 9650 }, { "epoch": 2.9294278342692364, "grad_norm": 0.7039276957511902, "learning_rate": 5.119469474536803e-05, "loss": 1.2404, "step": 9651 }, { "epoch": 2.929731370465928, "grad_norm": 0.8349506855010986, "learning_rate": 5.1189632479497826e-05, "loss": 1.2046, "step": 9652 }, { "epoch": 2.9300349066626197, "grad_norm": 0.7625461220741272, "learning_rate": 5.118457021362763e-05, "loss": 1.2918, "step": 9653 }, { "epoch": 2.930338442859311, "grad_norm": 0.8089103698730469, "learning_rate": 5.117950794775742e-05, "loss": 1.4058, "step": 9654 }, { "epoch": 2.9306419790560025, "grad_norm": 0.601629376411438, "learning_rate": 5.117444568188722e-05, "loss": 1.4394, "step": 9655 }, { "epoch": 2.9309455152526938, "grad_norm": 0.7368852496147156, "learning_rate": 5.116938341601701e-05, "loss": 1.4202, "step": 9656 }, { "epoch": 2.9312490514493854, "grad_norm": 0.5684367418289185, "learning_rate": 5.1164321150146813e-05, "loss": 1.8344, "step": 9657 }, { "epoch": 2.9315525876460766, "grad_norm": 0.8074974417686462, "learning_rate": 5.115925888427661e-05, "loss": 1.5921, "step": 9658 }, { "epoch": 2.9318561238427683, "grad_norm": 0.8737492561340332, "learning_rate": 5.11541966184064e-05, "loss": 1.026, "step": 9659 }, { "epoch": 2.93215966003946, "grad_norm": 0.7643964290618896, "learning_rate": 5.11491343525362e-05, "loss": 1.4965, "step": 9660 }, { "epoch": 2.932463196236151, "grad_norm": 0.7347926497459412, "learning_rate": 5.114407208666599e-05, "loss": 1.5798, "step": 9661 }, { "epoch": 2.9327667324328424, "grad_norm": 0.7421404123306274, "learning_rate": 5.1139009820795794e-05, "loss": 1.815, "step": 9662 }, { "epoch": 2.933070268629534, "grad_norm": 0.7707992196083069, "learning_rate": 5.113394755492559e-05, "loss": 1.2817, "step": 9663 }, { "epoch": 2.9333738048262257, "grad_norm": 0.6986004710197449, "learning_rate": 5.1128885289055384e-05, "loss": 1.4769, "step": 9664 }, { "epoch": 2.933677341022917, "grad_norm": 0.8465421199798584, "learning_rate": 5.112382302318518e-05, "loss": 1.5801, "step": 9665 }, { "epoch": 2.9339808772196085, "grad_norm": 0.7518223524093628, "learning_rate": 5.111876075731497e-05, "loss": 1.5368, "step": 9666 }, { "epoch": 2.9342844134162998, "grad_norm": 0.7431496381759644, "learning_rate": 5.1113698491444775e-05, "loss": 1.7001, "step": 9667 }, { "epoch": 2.9345879496129914, "grad_norm": 0.8584191203117371, "learning_rate": 5.110863622557457e-05, "loss": 0.7157, "step": 9668 }, { "epoch": 2.9348914858096826, "grad_norm": 0.7151068449020386, "learning_rate": 5.1103573959704364e-05, "loss": 1.6295, "step": 9669 }, { "epoch": 2.9351950220063743, "grad_norm": 0.8421808481216431, "learning_rate": 5.109851169383416e-05, "loss": 1.4686, "step": 9670 }, { "epoch": 2.935498558203066, "grad_norm": 0.6444612741470337, "learning_rate": 5.109344942796396e-05, "loss": 0.8179, "step": 9671 }, { "epoch": 2.935802094399757, "grad_norm": 0.6792292594909668, "learning_rate": 5.1088387162093756e-05, "loss": 1.4475, "step": 9672 }, { "epoch": 2.9361056305964484, "grad_norm": 1.006290316581726, "learning_rate": 5.108332489622355e-05, "loss": 1.0705, "step": 9673 }, { "epoch": 2.93640916679314, "grad_norm": 0.8089848160743713, "learning_rate": 5.1078262630353345e-05, "loss": 1.1588, "step": 9674 }, { "epoch": 2.9367127029898317, "grad_norm": 0.7995975017547607, "learning_rate": 5.107320036448314e-05, "loss": 1.4301, "step": 9675 }, { "epoch": 2.937016239186523, "grad_norm": 0.6885451674461365, "learning_rate": 5.106813809861294e-05, "loss": 1.4127, "step": 9676 }, { "epoch": 2.9373197753832145, "grad_norm": 0.754417896270752, "learning_rate": 5.1063075832742736e-05, "loss": 0.9511, "step": 9677 }, { "epoch": 2.9376233115799057, "grad_norm": 0.6752740144729614, "learning_rate": 5.105801356687253e-05, "loss": 1.0616, "step": 9678 }, { "epoch": 2.9379268477765974, "grad_norm": 0.7772539854049683, "learning_rate": 5.1052951301002326e-05, "loss": 1.4327, "step": 9679 }, { "epoch": 2.9382303839732886, "grad_norm": 0.8099876046180725, "learning_rate": 5.104788903513212e-05, "loss": 1.4187, "step": 9680 }, { "epoch": 2.9385339201699803, "grad_norm": 0.9773611426353455, "learning_rate": 5.104282676926192e-05, "loss": 0.9895, "step": 9681 }, { "epoch": 2.938837456366672, "grad_norm": 0.9407110810279846, "learning_rate": 5.103776450339172e-05, "loss": 1.0509, "step": 9682 }, { "epoch": 2.939140992563363, "grad_norm": 0.7487874031066895, "learning_rate": 5.103270223752151e-05, "loss": 1.1588, "step": 9683 }, { "epoch": 2.939444528760055, "grad_norm": 0.8135343790054321, "learning_rate": 5.102763997165131e-05, "loss": 1.4319, "step": 9684 }, { "epoch": 2.939748064956746, "grad_norm": 0.7436039447784424, "learning_rate": 5.102257770578111e-05, "loss": 1.279, "step": 9685 }, { "epoch": 2.9400516011534377, "grad_norm": 0.6971882581710815, "learning_rate": 5.10175154399109e-05, "loss": 1.3123, "step": 9686 }, { "epoch": 2.940355137350129, "grad_norm": 0.7899006009101868, "learning_rate": 5.101245317404071e-05, "loss": 1.2072, "step": 9687 }, { "epoch": 2.9406586735468205, "grad_norm": 0.685184121131897, "learning_rate": 5.1007390908170506e-05, "loss": 1.2253, "step": 9688 }, { "epoch": 2.940962209743512, "grad_norm": 1.092652678489685, "learning_rate": 5.10023286423003e-05, "loss": 1.2048, "step": 9689 }, { "epoch": 2.9412657459402034, "grad_norm": 0.8485744595527649, "learning_rate": 5.0997266376430096e-05, "loss": 1.0684, "step": 9690 }, { "epoch": 2.9415692821368946, "grad_norm": 0.724848747253418, "learning_rate": 5.099220411055989e-05, "loss": 0.9208, "step": 9691 }, { "epoch": 2.9418728183335863, "grad_norm": 0.9698922634124756, "learning_rate": 5.098714184468969e-05, "loss": 1.0005, "step": 9692 }, { "epoch": 2.942176354530278, "grad_norm": 0.8260499835014343, "learning_rate": 5.098207957881949e-05, "loss": 1.6745, "step": 9693 }, { "epoch": 2.942479890726969, "grad_norm": 0.775804340839386, "learning_rate": 5.097701731294928e-05, "loss": 1.5064, "step": 9694 }, { "epoch": 2.942783426923661, "grad_norm": 0.8762521743774414, "learning_rate": 5.0971955047079077e-05, "loss": 1.4192, "step": 9695 }, { "epoch": 2.943086963120352, "grad_norm": 0.9298811554908752, "learning_rate": 5.096689278120888e-05, "loss": 1.1981, "step": 9696 }, { "epoch": 2.9433904993170437, "grad_norm": 0.8426903486251831, "learning_rate": 5.096183051533867e-05, "loss": 1.4391, "step": 9697 }, { "epoch": 2.943694035513735, "grad_norm": 0.8833054900169373, "learning_rate": 5.095676824946847e-05, "loss": 0.7843, "step": 9698 }, { "epoch": 2.9439975717104265, "grad_norm": 0.774456799030304, "learning_rate": 5.095170598359826e-05, "loss": 1.5328, "step": 9699 }, { "epoch": 2.944301107907118, "grad_norm": 0.8480775356292725, "learning_rate": 5.094664371772806e-05, "loss": 1.3889, "step": 9700 }, { "epoch": 2.9446046441038094, "grad_norm": 0.8695796132087708, "learning_rate": 5.094158145185786e-05, "loss": 1.5308, "step": 9701 }, { "epoch": 2.9449081803005006, "grad_norm": 0.8271726369857788, "learning_rate": 5.0936519185987654e-05, "loss": 1.5792, "step": 9702 }, { "epoch": 2.9452117164971923, "grad_norm": 0.4747374653816223, "learning_rate": 5.093145692011745e-05, "loss": 1.243, "step": 9703 }, { "epoch": 2.945515252693884, "grad_norm": 1.3622267246246338, "learning_rate": 5.092639465424724e-05, "loss": 1.1071, "step": 9704 }, { "epoch": 2.945818788890575, "grad_norm": 0.7314203977584839, "learning_rate": 5.092133238837704e-05, "loss": 1.1546, "step": 9705 }, { "epoch": 2.946122325087267, "grad_norm": 0.8458970785140991, "learning_rate": 5.091627012250684e-05, "loss": 1.1194, "step": 9706 }, { "epoch": 2.946425861283958, "grad_norm": 0.7926846742630005, "learning_rate": 5.0911207856636634e-05, "loss": 1.4845, "step": 9707 }, { "epoch": 2.9467293974806497, "grad_norm": 0.8043208122253418, "learning_rate": 5.090614559076643e-05, "loss": 1.3745, "step": 9708 }, { "epoch": 2.947032933677341, "grad_norm": 0.7073126435279846, "learning_rate": 5.0901083324896224e-05, "loss": 1.6979, "step": 9709 }, { "epoch": 2.9473364698740325, "grad_norm": 0.9214633703231812, "learning_rate": 5.0896021059026026e-05, "loss": 1.2375, "step": 9710 }, { "epoch": 2.947640006070724, "grad_norm": 0.8063821196556091, "learning_rate": 5.089095879315582e-05, "loss": 1.6225, "step": 9711 }, { "epoch": 2.9479435422674154, "grad_norm": 0.9422792792320251, "learning_rate": 5.0885896527285615e-05, "loss": 1.3707, "step": 9712 }, { "epoch": 2.9482470784641066, "grad_norm": 0.7002964019775391, "learning_rate": 5.088083426141541e-05, "loss": 1.3171, "step": 9713 }, { "epoch": 2.9485506146607983, "grad_norm": 0.715103268623352, "learning_rate": 5.0875771995545205e-05, "loss": 1.3321, "step": 9714 }, { "epoch": 2.94885415085749, "grad_norm": 0.7000694274902344, "learning_rate": 5.0870709729675006e-05, "loss": 1.7362, "step": 9715 }, { "epoch": 2.949157687054181, "grad_norm": 0.6491566896438599, "learning_rate": 5.08656474638048e-05, "loss": 1.4304, "step": 9716 }, { "epoch": 2.949461223250873, "grad_norm": 0.8144864439964294, "learning_rate": 5.0860585197934596e-05, "loss": 1.3623, "step": 9717 }, { "epoch": 2.949764759447564, "grad_norm": 0.9529521465301514, "learning_rate": 5.085552293206439e-05, "loss": 1.3465, "step": 9718 }, { "epoch": 2.9500682956442557, "grad_norm": 0.6080120205879211, "learning_rate": 5.0850460666194186e-05, "loss": 1.6269, "step": 9719 }, { "epoch": 2.950371831840947, "grad_norm": 0.8070117235183716, "learning_rate": 5.084539840032399e-05, "loss": 1.1286, "step": 9720 }, { "epoch": 2.9506753680376385, "grad_norm": 0.809084415435791, "learning_rate": 5.084033613445378e-05, "loss": 1.3545, "step": 9721 }, { "epoch": 2.95097890423433, "grad_norm": 0.8189207315444946, "learning_rate": 5.083527386858358e-05, "loss": 1.1994, "step": 9722 }, { "epoch": 2.9512824404310214, "grad_norm": 0.8860880732536316, "learning_rate": 5.083021160271337e-05, "loss": 1.3388, "step": 9723 }, { "epoch": 2.9515859766277126, "grad_norm": 0.7872827053070068, "learning_rate": 5.082514933684317e-05, "loss": 1.1414, "step": 9724 }, { "epoch": 2.9518895128244043, "grad_norm": 0.6410046815872192, "learning_rate": 5.082008707097297e-05, "loss": 0.9255, "step": 9725 }, { "epoch": 2.952193049021096, "grad_norm": 0.7178161144256592, "learning_rate": 5.081502480510276e-05, "loss": 1.3847, "step": 9726 }, { "epoch": 2.952496585217787, "grad_norm": 0.7075746059417725, "learning_rate": 5.080996253923256e-05, "loss": 1.5949, "step": 9727 }, { "epoch": 2.952800121414479, "grad_norm": 0.8325478434562683, "learning_rate": 5.080490027336235e-05, "loss": 1.283, "step": 9728 }, { "epoch": 2.95310365761117, "grad_norm": 0.7608327865600586, "learning_rate": 5.0799838007492154e-05, "loss": 1.3592, "step": 9729 }, { "epoch": 2.9534071938078617, "grad_norm": 0.7346392273902893, "learning_rate": 5.079477574162195e-05, "loss": 1.5977, "step": 9730 }, { "epoch": 2.953710730004553, "grad_norm": 0.8925238251686096, "learning_rate": 5.0789713475751743e-05, "loss": 1.3908, "step": 9731 }, { "epoch": 2.9540142662012445, "grad_norm": 0.727159321308136, "learning_rate": 5.078465120988155e-05, "loss": 0.8633, "step": 9732 }, { "epoch": 2.954317802397936, "grad_norm": 0.7768084406852722, "learning_rate": 5.0779588944011347e-05, "loss": 0.7428, "step": 9733 }, { "epoch": 2.9546213385946274, "grad_norm": 0.7659957408905029, "learning_rate": 5.077452667814114e-05, "loss": 1.1148, "step": 9734 }, { "epoch": 2.9549248747913186, "grad_norm": 0.8714812397956848, "learning_rate": 5.076946441227094e-05, "loss": 1.4103, "step": 9735 }, { "epoch": 2.9552284109880103, "grad_norm": 0.7689177393913269, "learning_rate": 5.076440214640074e-05, "loss": 1.0334, "step": 9736 }, { "epoch": 2.955531947184702, "grad_norm": 0.9407566785812378, "learning_rate": 5.075933988053053e-05, "loss": 1.3897, "step": 9737 }, { "epoch": 2.955835483381393, "grad_norm": 0.7912222743034363, "learning_rate": 5.075427761466033e-05, "loss": 1.2953, "step": 9738 }, { "epoch": 2.956139019578085, "grad_norm": 0.8030030727386475, "learning_rate": 5.074921534879012e-05, "loss": 1.4195, "step": 9739 }, { "epoch": 2.956442555774776, "grad_norm": 0.8280852437019348, "learning_rate": 5.0744153082919924e-05, "loss": 0.9818, "step": 9740 }, { "epoch": 2.9567460919714676, "grad_norm": 0.7365915775299072, "learning_rate": 5.073909081704972e-05, "loss": 1.7192, "step": 9741 }, { "epoch": 2.957049628168159, "grad_norm": 0.6640022397041321, "learning_rate": 5.073402855117951e-05, "loss": 0.7376, "step": 9742 }, { "epoch": 2.9573531643648505, "grad_norm": 0.9272693991661072, "learning_rate": 5.072896628530931e-05, "loss": 1.4146, "step": 9743 }, { "epoch": 2.957656700561542, "grad_norm": 0.7189601063728333, "learning_rate": 5.07239040194391e-05, "loss": 1.3483, "step": 9744 }, { "epoch": 2.9579602367582334, "grad_norm": 0.8663241267204285, "learning_rate": 5.0718841753568904e-05, "loss": 1.1175, "step": 9745 }, { "epoch": 2.9582637729549246, "grad_norm": 0.7305809855461121, "learning_rate": 5.07137794876987e-05, "loss": 1.5778, "step": 9746 }, { "epoch": 2.9585673091516163, "grad_norm": 0.7029976844787598, "learning_rate": 5.0708717221828494e-05, "loss": 1.2949, "step": 9747 }, { "epoch": 2.958870845348308, "grad_norm": 0.5989913940429688, "learning_rate": 5.070365495595829e-05, "loss": 1.3814, "step": 9748 }, { "epoch": 2.959174381544999, "grad_norm": 0.7960646152496338, "learning_rate": 5.069859269008809e-05, "loss": 1.6714, "step": 9749 }, { "epoch": 2.9594779177416908, "grad_norm": 0.7953382730484009, "learning_rate": 5.0693530424217885e-05, "loss": 1.0393, "step": 9750 }, { "epoch": 2.959781453938382, "grad_norm": 0.5406391620635986, "learning_rate": 5.068846815834768e-05, "loss": 1.0639, "step": 9751 }, { "epoch": 2.9600849901350736, "grad_norm": 0.7291198372840881, "learning_rate": 5.0683405892477475e-05, "loss": 0.8658, "step": 9752 }, { "epoch": 2.960388526331765, "grad_norm": 0.870521605014801, "learning_rate": 5.067834362660727e-05, "loss": 1.5244, "step": 9753 }, { "epoch": 2.9606920625284565, "grad_norm": 0.7896977066993713, "learning_rate": 5.067328136073707e-05, "loss": 1.8605, "step": 9754 }, { "epoch": 2.960995598725148, "grad_norm": 0.8984799385070801, "learning_rate": 5.0668219094866866e-05, "loss": 0.8763, "step": 9755 }, { "epoch": 2.9612991349218394, "grad_norm": 0.8268417119979858, "learning_rate": 5.066315682899666e-05, "loss": 1.4706, "step": 9756 }, { "epoch": 2.961602671118531, "grad_norm": 0.8731755018234253, "learning_rate": 5.0658094563126455e-05, "loss": 1.3214, "step": 9757 }, { "epoch": 2.9619062073152223, "grad_norm": 0.8329451084136963, "learning_rate": 5.065303229725625e-05, "loss": 1.4599, "step": 9758 }, { "epoch": 2.962209743511914, "grad_norm": 0.9944485425949097, "learning_rate": 5.064797003138605e-05, "loss": 1.2268, "step": 9759 }, { "epoch": 2.962513279708605, "grad_norm": 0.8748295307159424, "learning_rate": 5.064290776551585e-05, "loss": 1.5343, "step": 9760 }, { "epoch": 2.9628168159052968, "grad_norm": 0.734396755695343, "learning_rate": 5.063784549964564e-05, "loss": 0.916, "step": 9761 }, { "epoch": 2.9631203521019884, "grad_norm": 1.1190379858016968, "learning_rate": 5.0632783233775436e-05, "loss": 1.0479, "step": 9762 }, { "epoch": 2.9634238882986796, "grad_norm": 0.6809061169624329, "learning_rate": 5.062772096790524e-05, "loss": 1.6105, "step": 9763 }, { "epoch": 2.963727424495371, "grad_norm": 0.895285964012146, "learning_rate": 5.062265870203503e-05, "loss": 1.1615, "step": 9764 }, { "epoch": 2.9640309606920625, "grad_norm": 0.8676573634147644, "learning_rate": 5.061759643616483e-05, "loss": 1.2873, "step": 9765 }, { "epoch": 2.964334496888754, "grad_norm": 0.9381932616233826, "learning_rate": 5.061253417029462e-05, "loss": 1.4655, "step": 9766 }, { "epoch": 2.9646380330854454, "grad_norm": 0.7882171869277954, "learning_rate": 5.060747190442442e-05, "loss": 1.1012, "step": 9767 }, { "epoch": 2.964941569282137, "grad_norm": 0.7521642446517944, "learning_rate": 5.060240963855422e-05, "loss": 0.8543, "step": 9768 }, { "epoch": 2.9652451054788282, "grad_norm": 0.6908265352249146, "learning_rate": 5.059734737268401e-05, "loss": 1.6366, "step": 9769 }, { "epoch": 2.96554864167552, "grad_norm": 0.8038427829742432, "learning_rate": 5.059228510681381e-05, "loss": 1.4779, "step": 9770 }, { "epoch": 2.965852177872211, "grad_norm": 0.7121046781539917, "learning_rate": 5.05872228409436e-05, "loss": 1.0363, "step": 9771 }, { "epoch": 2.9661557140689028, "grad_norm": 0.8224244117736816, "learning_rate": 5.05821605750734e-05, "loss": 1.2618, "step": 9772 }, { "epoch": 2.9664592502655944, "grad_norm": 0.9318447113037109, "learning_rate": 5.05770983092032e-05, "loss": 1.4278, "step": 9773 }, { "epoch": 2.9667627864622856, "grad_norm": 0.8800509572029114, "learning_rate": 5.0572036043332994e-05, "loss": 1.1878, "step": 9774 }, { "epoch": 2.967066322658977, "grad_norm": 0.6945512294769287, "learning_rate": 5.056697377746279e-05, "loss": 1.4063, "step": 9775 }, { "epoch": 2.9673698588556685, "grad_norm": 0.850615918636322, "learning_rate": 5.05619115115926e-05, "loss": 1.6237, "step": 9776 }, { "epoch": 2.96767339505236, "grad_norm": 0.7711875438690186, "learning_rate": 5.055684924572239e-05, "loss": 1.3161, "step": 9777 }, { "epoch": 2.9679769312490514, "grad_norm": 0.9047555923461914, "learning_rate": 5.055178697985219e-05, "loss": 1.4571, "step": 9778 }, { "epoch": 2.968280467445743, "grad_norm": 0.8342523574829102, "learning_rate": 5.054672471398199e-05, "loss": 1.4259, "step": 9779 }, { "epoch": 2.9685840036424342, "grad_norm": 0.7356086373329163, "learning_rate": 5.054166244811178e-05, "loss": 1.7979, "step": 9780 }, { "epoch": 2.968887539839126, "grad_norm": 0.8583924770355225, "learning_rate": 5.053660018224158e-05, "loss": 1.6187, "step": 9781 }, { "epoch": 2.969191076035817, "grad_norm": 0.6908389925956726, "learning_rate": 5.053153791637137e-05, "loss": 1.7816, "step": 9782 }, { "epoch": 2.9694946122325088, "grad_norm": 0.7292888760566711, "learning_rate": 5.052647565050117e-05, "loss": 1.6162, "step": 9783 }, { "epoch": 2.9697981484292004, "grad_norm": 0.7569924592971802, "learning_rate": 5.052141338463097e-05, "loss": 1.4642, "step": 9784 }, { "epoch": 2.9701016846258916, "grad_norm": 0.9216084480285645, "learning_rate": 5.0516351118760764e-05, "loss": 1.4773, "step": 9785 }, { "epoch": 2.970405220822583, "grad_norm": 1.0300320386886597, "learning_rate": 5.051128885289056e-05, "loss": 0.8327, "step": 9786 }, { "epoch": 2.9707087570192745, "grad_norm": 0.6576038002967834, "learning_rate": 5.0506226587020354e-05, "loss": 1.2105, "step": 9787 }, { "epoch": 2.971012293215966, "grad_norm": 0.812941312789917, "learning_rate": 5.0501164321150155e-05, "loss": 1.156, "step": 9788 }, { "epoch": 2.9713158294126574, "grad_norm": 0.71174156665802, "learning_rate": 5.049610205527995e-05, "loss": 1.2858, "step": 9789 }, { "epoch": 2.971619365609349, "grad_norm": 0.7057084441184998, "learning_rate": 5.0491039789409745e-05, "loss": 0.8433, "step": 9790 }, { "epoch": 2.9719229018060402, "grad_norm": 0.9108096957206726, "learning_rate": 5.048597752353954e-05, "loss": 1.0238, "step": 9791 }, { "epoch": 2.972226438002732, "grad_norm": 0.8809316158294678, "learning_rate": 5.0480915257669334e-05, "loss": 1.2879, "step": 9792 }, { "epoch": 2.972529974199423, "grad_norm": 0.7910594940185547, "learning_rate": 5.0475852991799136e-05, "loss": 1.4888, "step": 9793 }, { "epoch": 2.9728335103961148, "grad_norm": 0.713735044002533, "learning_rate": 5.047079072592893e-05, "loss": 1.5353, "step": 9794 }, { "epoch": 2.9731370465928064, "grad_norm": 0.829972505569458, "learning_rate": 5.0465728460058725e-05, "loss": 1.075, "step": 9795 }, { "epoch": 2.9734405827894976, "grad_norm": 0.8665021657943726, "learning_rate": 5.046066619418852e-05, "loss": 1.4595, "step": 9796 }, { "epoch": 2.973744118986189, "grad_norm": 0.7877386808395386, "learning_rate": 5.0455603928318315e-05, "loss": 1.5096, "step": 9797 }, { "epoch": 2.9740476551828805, "grad_norm": 0.7499648928642273, "learning_rate": 5.0450541662448117e-05, "loss": 1.3817, "step": 9798 }, { "epoch": 2.974351191379572, "grad_norm": 0.6918804049491882, "learning_rate": 5.044547939657791e-05, "loss": 1.2928, "step": 9799 }, { "epoch": 2.9746547275762634, "grad_norm": 0.647549033164978, "learning_rate": 5.0440417130707706e-05, "loss": 1.339, "step": 9800 }, { "epoch": 2.974958263772955, "grad_norm": 0.8811267614364624, "learning_rate": 5.04353548648375e-05, "loss": 1.1343, "step": 9801 }, { "epoch": 2.9752617999696462, "grad_norm": 0.8627746105194092, "learning_rate": 5.04302925989673e-05, "loss": 1.1734, "step": 9802 }, { "epoch": 2.975565336166338, "grad_norm": 0.5508565306663513, "learning_rate": 5.04252303330971e-05, "loss": 0.8058, "step": 9803 }, { "epoch": 2.975868872363029, "grad_norm": 0.832922637462616, "learning_rate": 5.042016806722689e-05, "loss": 1.4417, "step": 9804 }, { "epoch": 2.9761724085597208, "grad_norm": 0.6043367981910706, "learning_rate": 5.041510580135669e-05, "loss": 1.534, "step": 9805 }, { "epoch": 2.9764759447564124, "grad_norm": 0.7686224579811096, "learning_rate": 5.041004353548648e-05, "loss": 1.2629, "step": 9806 }, { "epoch": 2.9767794809531036, "grad_norm": 0.697522759437561, "learning_rate": 5.040498126961628e-05, "loss": 1.4228, "step": 9807 }, { "epoch": 2.977083017149795, "grad_norm": 0.6715698838233948, "learning_rate": 5.039991900374608e-05, "loss": 1.5535, "step": 9808 }, { "epoch": 2.9773865533464865, "grad_norm": 0.8603852391242981, "learning_rate": 5.039485673787587e-05, "loss": 1.0935, "step": 9809 }, { "epoch": 2.977690089543178, "grad_norm": 0.7181248664855957, "learning_rate": 5.038979447200567e-05, "loss": 1.4258, "step": 9810 }, { "epoch": 2.9779936257398694, "grad_norm": 0.9570068120956421, "learning_rate": 5.038473220613546e-05, "loss": 1.2175, "step": 9811 }, { "epoch": 2.978297161936561, "grad_norm": 0.8859447240829468, "learning_rate": 5.0379669940265264e-05, "loss": 1.2012, "step": 9812 }, { "epoch": 2.9786006981332522, "grad_norm": 0.7729933857917786, "learning_rate": 5.037460767439506e-05, "loss": 1.5303, "step": 9813 }, { "epoch": 2.978904234329944, "grad_norm": 0.8890818357467651, "learning_rate": 5.0369545408524854e-05, "loss": 1.3881, "step": 9814 }, { "epoch": 2.979207770526635, "grad_norm": 1.0364371538162231, "learning_rate": 5.036448314265465e-05, "loss": 1.4075, "step": 9815 }, { "epoch": 2.9795113067233268, "grad_norm": 0.6747880578041077, "learning_rate": 5.035942087678445e-05, "loss": 0.9969, "step": 9816 }, { "epoch": 2.9798148429200184, "grad_norm": 0.8688072562217712, "learning_rate": 5.0354358610914245e-05, "loss": 1.5621, "step": 9817 }, { "epoch": 2.9801183791167096, "grad_norm": 0.8358421921730042, "learning_rate": 5.034929634504404e-05, "loss": 1.4759, "step": 9818 }, { "epoch": 2.9804219153134013, "grad_norm": 0.7541760206222534, "learning_rate": 5.0344234079173834e-05, "loss": 0.7761, "step": 9819 }, { "epoch": 2.9807254515100925, "grad_norm": 0.6867856979370117, "learning_rate": 5.033917181330363e-05, "loss": 1.726, "step": 9820 }, { "epoch": 2.981028987706784, "grad_norm": 0.6554029583930969, "learning_rate": 5.033410954743344e-05, "loss": 1.0531, "step": 9821 }, { "epoch": 2.9813325239034754, "grad_norm": 0.8928065896034241, "learning_rate": 5.032904728156323e-05, "loss": 0.8594, "step": 9822 }, { "epoch": 2.981636060100167, "grad_norm": 0.8254424929618835, "learning_rate": 5.0323985015693034e-05, "loss": 1.4491, "step": 9823 }, { "epoch": 2.9819395962968587, "grad_norm": 0.8873692750930786, "learning_rate": 5.031892274982283e-05, "loss": 1.3563, "step": 9824 }, { "epoch": 2.98224313249355, "grad_norm": 0.8543142676353455, "learning_rate": 5.0313860483952623e-05, "loss": 1.2437, "step": 9825 }, { "epoch": 2.982546668690241, "grad_norm": 0.8088618516921997, "learning_rate": 5.030879821808242e-05, "loss": 1.5201, "step": 9826 }, { "epoch": 2.9828502048869328, "grad_norm": 0.7048816680908203, "learning_rate": 5.030373595221221e-05, "loss": 1.72, "step": 9827 }, { "epoch": 2.9831537410836244, "grad_norm": 0.9001889228820801, "learning_rate": 5.0298673686342015e-05, "loss": 1.1851, "step": 9828 }, { "epoch": 2.9834572772803156, "grad_norm": 0.7622965574264526, "learning_rate": 5.029361142047181e-05, "loss": 1.3831, "step": 9829 }, { "epoch": 2.9837608134770073, "grad_norm": 0.8391376733779907, "learning_rate": 5.0288549154601604e-05, "loss": 1.4729, "step": 9830 }, { "epoch": 2.9840643496736985, "grad_norm": 0.9648115038871765, "learning_rate": 5.02834868887314e-05, "loss": 1.0646, "step": 9831 }, { "epoch": 2.98436788587039, "grad_norm": 0.6593850255012512, "learning_rate": 5.02784246228612e-05, "loss": 1.5846, "step": 9832 }, { "epoch": 2.9846714220670814, "grad_norm": 0.8097171187400818, "learning_rate": 5.0273362356990995e-05, "loss": 1.0068, "step": 9833 }, { "epoch": 2.984974958263773, "grad_norm": 0.5215747356414795, "learning_rate": 5.026830009112079e-05, "loss": 1.2893, "step": 9834 }, { "epoch": 2.9852784944604647, "grad_norm": 0.824794352054596, "learning_rate": 5.0263237825250585e-05, "loss": 1.5381, "step": 9835 }, { "epoch": 2.985582030657156, "grad_norm": 0.6967061161994934, "learning_rate": 5.025817555938038e-05, "loss": 1.3467, "step": 9836 }, { "epoch": 2.985885566853847, "grad_norm": 0.7044780254364014, "learning_rate": 5.025311329351018e-05, "loss": 1.3884, "step": 9837 }, { "epoch": 2.9861891030505388, "grad_norm": 0.7709345817565918, "learning_rate": 5.0248051027639976e-05, "loss": 1.0223, "step": 9838 }, { "epoch": 2.9864926392472304, "grad_norm": 0.7379102110862732, "learning_rate": 5.024298876176977e-05, "loss": 0.8531, "step": 9839 }, { "epoch": 2.9867961754439216, "grad_norm": 0.8930834531784058, "learning_rate": 5.0237926495899566e-05, "loss": 1.3216, "step": 9840 }, { "epoch": 2.9870997116406133, "grad_norm": 0.9065871834754944, "learning_rate": 5.023286423002937e-05, "loss": 1.1774, "step": 9841 }, { "epoch": 2.9874032478373045, "grad_norm": 0.8405609726905823, "learning_rate": 5.022780196415916e-05, "loss": 0.9348, "step": 9842 }, { "epoch": 2.987706784033996, "grad_norm": 0.7697071433067322, "learning_rate": 5.022273969828896e-05, "loss": 1.4181, "step": 9843 }, { "epoch": 2.9880103202306874, "grad_norm": 0.7778381705284119, "learning_rate": 5.021767743241875e-05, "loss": 1.706, "step": 9844 }, { "epoch": 2.988313856427379, "grad_norm": 0.7781450152397156, "learning_rate": 5.0212615166548546e-05, "loss": 1.2356, "step": 9845 }, { "epoch": 2.9886173926240707, "grad_norm": 0.8738018870353699, "learning_rate": 5.020755290067835e-05, "loss": 1.4512, "step": 9846 }, { "epoch": 2.988920928820762, "grad_norm": 0.9768234491348267, "learning_rate": 5.020249063480814e-05, "loss": 1.471, "step": 9847 }, { "epoch": 2.989224465017453, "grad_norm": 0.8589128255844116, "learning_rate": 5.019742836893794e-05, "loss": 1.0007, "step": 9848 }, { "epoch": 2.9895280012141447, "grad_norm": 0.8861180543899536, "learning_rate": 5.019236610306773e-05, "loss": 1.2349, "step": 9849 }, { "epoch": 2.9898315374108364, "grad_norm": 0.8022412657737732, "learning_rate": 5.018730383719753e-05, "loss": 1.1733, "step": 9850 }, { "epoch": 2.9901350736075276, "grad_norm": 0.875621497631073, "learning_rate": 5.018224157132733e-05, "loss": 1.6376, "step": 9851 }, { "epoch": 2.9904386098042193, "grad_norm": 0.7509286403656006, "learning_rate": 5.0177179305457124e-05, "loss": 1.2079, "step": 9852 }, { "epoch": 2.9907421460009105, "grad_norm": 0.8753302097320557, "learning_rate": 5.017211703958692e-05, "loss": 1.34, "step": 9853 }, { "epoch": 2.991045682197602, "grad_norm": 0.7396566271781921, "learning_rate": 5.016705477371671e-05, "loss": 1.4037, "step": 9854 }, { "epoch": 2.9913492183942934, "grad_norm": 0.6811649203300476, "learning_rate": 5.0161992507846515e-05, "loss": 1.7496, "step": 9855 }, { "epoch": 2.991652754590985, "grad_norm": 0.6608646512031555, "learning_rate": 5.015693024197631e-05, "loss": 1.5362, "step": 9856 }, { "epoch": 2.9919562907876767, "grad_norm": 0.7470353841781616, "learning_rate": 5.0151867976106104e-05, "loss": 1.1998, "step": 9857 }, { "epoch": 2.992259826984368, "grad_norm": 0.7401373982429504, "learning_rate": 5.01468057102359e-05, "loss": 1.4646, "step": 9858 }, { "epoch": 2.992563363181059, "grad_norm": 0.7070319652557373, "learning_rate": 5.0141743444365694e-05, "loss": 1.0402, "step": 9859 }, { "epoch": 2.9928668993777507, "grad_norm": 0.7485304474830627, "learning_rate": 5.0136681178495496e-05, "loss": 1.1112, "step": 9860 }, { "epoch": 2.9931704355744424, "grad_norm": 0.7815187573432922, "learning_rate": 5.013161891262529e-05, "loss": 1.2715, "step": 9861 }, { "epoch": 2.9934739717711336, "grad_norm": 0.6393333077430725, "learning_rate": 5.0126556646755085e-05, "loss": 1.2626, "step": 9862 }, { "epoch": 2.9937775079678253, "grad_norm": 0.7579580545425415, "learning_rate": 5.012149438088488e-05, "loss": 1.3921, "step": 9863 }, { "epoch": 2.9940810441645165, "grad_norm": 0.8907715678215027, "learning_rate": 5.0116432115014675e-05, "loss": 1.1476, "step": 9864 }, { "epoch": 2.994384580361208, "grad_norm": 0.7173703908920288, "learning_rate": 5.011136984914448e-05, "loss": 1.5022, "step": 9865 }, { "epoch": 2.9946881165578993, "grad_norm": 0.8615228533744812, "learning_rate": 5.010630758327428e-05, "loss": 1.3103, "step": 9866 }, { "epoch": 2.994991652754591, "grad_norm": 0.9548215866088867, "learning_rate": 5.010124531740408e-05, "loss": 1.3148, "step": 9867 }, { "epoch": 2.9952951889512827, "grad_norm": 0.7956007122993469, "learning_rate": 5.0096183051533874e-05, "loss": 1.1208, "step": 9868 }, { "epoch": 2.995598725147974, "grad_norm": 1.16354501247406, "learning_rate": 5.009112078566367e-05, "loss": 0.8042, "step": 9869 }, { "epoch": 2.995902261344665, "grad_norm": 1.0169782638549805, "learning_rate": 5.0086058519793464e-05, "loss": 1.308, "step": 9870 }, { "epoch": 2.9962057975413567, "grad_norm": 0.8002044558525085, "learning_rate": 5.0080996253923265e-05, "loss": 1.6956, "step": 9871 }, { "epoch": 2.9965093337380484, "grad_norm": 0.7645939588546753, "learning_rate": 5.007593398805306e-05, "loss": 0.7603, "step": 9872 }, { "epoch": 2.9968128699347396, "grad_norm": 0.7191788554191589, "learning_rate": 5.0070871722182855e-05, "loss": 1.4339, "step": 9873 }, { "epoch": 2.9971164061314313, "grad_norm": 0.7696192264556885, "learning_rate": 5.006580945631265e-05, "loss": 1.593, "step": 9874 }, { "epoch": 2.9974199423281225, "grad_norm": 0.9107735753059387, "learning_rate": 5.0060747190442445e-05, "loss": 1.2468, "step": 9875 }, { "epoch": 2.997723478524814, "grad_norm": 0.7734604477882385, "learning_rate": 5.0055684924572246e-05, "loss": 0.857, "step": 9876 }, { "epoch": 2.9980270147215053, "grad_norm": 0.6583893895149231, "learning_rate": 5.005062265870204e-05, "loss": 1.7402, "step": 9877 }, { "epoch": 2.998330550918197, "grad_norm": 0.800261914730072, "learning_rate": 5.0045560392831836e-05, "loss": 1.6387, "step": 9878 }, { "epoch": 2.9986340871148887, "grad_norm": 0.8133311867713928, "learning_rate": 5.004049812696163e-05, "loss": 1.3963, "step": 9879 }, { "epoch": 2.99893762331158, "grad_norm": 0.5534855723381042, "learning_rate": 5.0035435861091425e-05, "loss": 1.6639, "step": 9880 }, { "epoch": 2.9992411595082715, "grad_norm": 0.7287002801895142, "learning_rate": 5.003037359522123e-05, "loss": 1.4862, "step": 9881 }, { "epoch": 2.9995446957049627, "grad_norm": 0.6345083117485046, "learning_rate": 5.002531132935102e-05, "loss": 1.1753, "step": 9882 }, { "epoch": 2.9998482319016544, "grad_norm": 0.850407600402832, "learning_rate": 5.0020249063480816e-05, "loss": 1.2063, "step": 9883 }, { "epoch": 3.0001517680983456, "grad_norm": 0.6992197632789612, "learning_rate": 5.001518679761061e-05, "loss": 0.9919, "step": 9884 }, { "epoch": 3.0004553042950373, "grad_norm": 0.6799697279930115, "learning_rate": 5.001012453174041e-05, "loss": 0.6358, "step": 9885 }, { "epoch": 3.0007588404917285, "grad_norm": 0.6441646814346313, "learning_rate": 5.000506226587021e-05, "loss": 1.6449, "step": 9886 }, { "epoch": 3.00106237668842, "grad_norm": 0.8453091979026794, "learning_rate": 5e-05, "loss": 1.3043, "step": 9887 }, { "epoch": 3.0013659128851113, "grad_norm": 0.6195229291915894, "learning_rate": 4.99949377341298e-05, "loss": 0.4865, "step": 9888 }, { "epoch": 3.001669449081803, "grad_norm": 0.8482832312583923, "learning_rate": 4.998987546825959e-05, "loss": 0.9909, "step": 9889 }, { "epoch": 3.0019729852784947, "grad_norm": 1.1187256574630737, "learning_rate": 4.9984813202389394e-05, "loss": 0.86, "step": 9890 }, { "epoch": 3.002276521475186, "grad_norm": 0.922222375869751, "learning_rate": 4.997975093651919e-05, "loss": 1.1761, "step": 9891 }, { "epoch": 3.0025800576718775, "grad_norm": 1.2568992376327515, "learning_rate": 4.997468867064898e-05, "loss": 0.9785, "step": 9892 }, { "epoch": 3.0028835938685687, "grad_norm": 1.0342175960540771, "learning_rate": 4.996962640477878e-05, "loss": 0.6732, "step": 9893 }, { "epoch": 3.0031871300652604, "grad_norm": 1.0421677827835083, "learning_rate": 4.996456413890858e-05, "loss": 1.3249, "step": 9894 }, { "epoch": 3.0034906662619516, "grad_norm": 0.9691305160522461, "learning_rate": 4.9959501873038374e-05, "loss": 0.6156, "step": 9895 }, { "epoch": 3.0037942024586433, "grad_norm": 0.9591747522354126, "learning_rate": 4.995443960716817e-05, "loss": 0.4681, "step": 9896 }, { "epoch": 3.0040977386553345, "grad_norm": 0.973501443862915, "learning_rate": 4.9949377341297964e-05, "loss": 1.3864, "step": 9897 }, { "epoch": 3.004401274852026, "grad_norm": 0.5497210621833801, "learning_rate": 4.994431507542776e-05, "loss": 0.7692, "step": 9898 }, { "epoch": 3.0047048110487173, "grad_norm": 0.9023900032043457, "learning_rate": 4.993925280955756e-05, "loss": 0.5685, "step": 9899 }, { "epoch": 3.005008347245409, "grad_norm": 0.945500373840332, "learning_rate": 4.993419054368736e-05, "loss": 0.9917, "step": 9900 }, { "epoch": 3.0053118834421007, "grad_norm": 0.7900440692901611, "learning_rate": 4.992912827781716e-05, "loss": 0.7036, "step": 9901 }, { "epoch": 3.005615419638792, "grad_norm": 0.9429001808166504, "learning_rate": 4.992406601194695e-05, "loss": 1.2002, "step": 9902 }, { "epoch": 3.0059189558354835, "grad_norm": 1.0113555192947388, "learning_rate": 4.9919003746076746e-05, "loss": 0.7924, "step": 9903 }, { "epoch": 3.0062224920321747, "grad_norm": 0.727717936038971, "learning_rate": 4.991394148020654e-05, "loss": 1.268, "step": 9904 }, { "epoch": 3.0065260282288664, "grad_norm": 0.9647108912467957, "learning_rate": 4.990887921433634e-05, "loss": 1.102, "step": 9905 }, { "epoch": 3.0068295644255576, "grad_norm": 0.8775750398635864, "learning_rate": 4.990381694846614e-05, "loss": 0.738, "step": 9906 }, { "epoch": 3.0071331006222493, "grad_norm": 0.8506520390510559, "learning_rate": 4.989875468259593e-05, "loss": 0.8632, "step": 9907 }, { "epoch": 3.0074366368189405, "grad_norm": 1.0059034824371338, "learning_rate": 4.989369241672573e-05, "loss": 1.0357, "step": 9908 }, { "epoch": 3.007740173015632, "grad_norm": 1.262754201889038, "learning_rate": 4.988863015085553e-05, "loss": 0.6071, "step": 9909 }, { "epoch": 3.008043709212324, "grad_norm": 1.321282148361206, "learning_rate": 4.988356788498532e-05, "loss": 0.7738, "step": 9910 }, { "epoch": 3.008347245409015, "grad_norm": 0.8013333082199097, "learning_rate": 4.987850561911512e-05, "loss": 1.4922, "step": 9911 }, { "epoch": 3.0086507816057066, "grad_norm": 0.7580252289772034, "learning_rate": 4.987344335324491e-05, "loss": 1.2822, "step": 9912 }, { "epoch": 3.008954317802398, "grad_norm": 1.1161868572235107, "learning_rate": 4.986838108737471e-05, "loss": 0.7667, "step": 9913 }, { "epoch": 3.0092578539990895, "grad_norm": 1.0713251829147339, "learning_rate": 4.986331882150451e-05, "loss": 0.7488, "step": 9914 }, { "epoch": 3.0095613901957807, "grad_norm": 0.5945373177528381, "learning_rate": 4.9858256555634304e-05, "loss": 0.6705, "step": 9915 }, { "epoch": 3.0098649263924724, "grad_norm": 0.9917808175086975, "learning_rate": 4.98531942897641e-05, "loss": 0.9693, "step": 9916 }, { "epoch": 3.0101684625891636, "grad_norm": 1.123931646347046, "learning_rate": 4.9848132023893894e-05, "loss": 0.9279, "step": 9917 }, { "epoch": 3.0104719987858553, "grad_norm": 1.0795220136642456, "learning_rate": 4.984306975802369e-05, "loss": 0.6274, "step": 9918 }, { "epoch": 3.0107755349825465, "grad_norm": 0.6648991703987122, "learning_rate": 4.983800749215349e-05, "loss": 0.7407, "step": 9919 }, { "epoch": 3.011079071179238, "grad_norm": 0.894781768321991, "learning_rate": 4.9832945226283285e-05, "loss": 0.9515, "step": 9920 }, { "epoch": 3.0113826073759298, "grad_norm": 0.6843689680099487, "learning_rate": 4.9827882960413086e-05, "loss": 1.2523, "step": 9921 }, { "epoch": 3.011686143572621, "grad_norm": 0.7869067192077637, "learning_rate": 4.982282069454288e-05, "loss": 1.1186, "step": 9922 }, { "epoch": 3.0119896797693126, "grad_norm": 1.1819573640823364, "learning_rate": 4.9817758428672676e-05, "loss": 0.4672, "step": 9923 }, { "epoch": 3.012293215966004, "grad_norm": 1.117724895477295, "learning_rate": 4.981269616280248e-05, "loss": 0.4872, "step": 9924 }, { "epoch": 3.0125967521626955, "grad_norm": 0.5742544531822205, "learning_rate": 4.980763389693227e-05, "loss": 0.6115, "step": 9925 }, { "epoch": 3.0129002883593867, "grad_norm": 0.9205898642539978, "learning_rate": 4.980257163106207e-05, "loss": 0.6983, "step": 9926 }, { "epoch": 3.0132038245560784, "grad_norm": 0.8232927322387695, "learning_rate": 4.979750936519186e-05, "loss": 0.4218, "step": 9927 }, { "epoch": 3.0135073607527696, "grad_norm": 1.0297305583953857, "learning_rate": 4.979244709932166e-05, "loss": 0.7704, "step": 9928 }, { "epoch": 3.0138108969494612, "grad_norm": 1.0752744674682617, "learning_rate": 4.978738483345146e-05, "loss": 0.6037, "step": 9929 }, { "epoch": 3.0141144331461525, "grad_norm": 0.9535788297653198, "learning_rate": 4.978232256758125e-05, "loss": 0.8848, "step": 9930 }, { "epoch": 3.014417969342844, "grad_norm": 0.820176899433136, "learning_rate": 4.977726030171105e-05, "loss": 1.0916, "step": 9931 }, { "epoch": 3.0147215055395358, "grad_norm": 0.9483010172843933, "learning_rate": 4.977219803584084e-05, "loss": 0.751, "step": 9932 }, { "epoch": 3.015025041736227, "grad_norm": 1.2106448411941528, "learning_rate": 4.976713576997064e-05, "loss": 0.5232, "step": 9933 }, { "epoch": 3.0153285779329186, "grad_norm": 0.8939266204833984, "learning_rate": 4.976207350410044e-05, "loss": 0.9032, "step": 9934 }, { "epoch": 3.01563211412961, "grad_norm": 1.0762056112289429, "learning_rate": 4.9757011238230234e-05, "loss": 0.6202, "step": 9935 }, { "epoch": 3.0159356503263015, "grad_norm": 0.9702959656715393, "learning_rate": 4.975194897236003e-05, "loss": 0.6372, "step": 9936 }, { "epoch": 3.0162391865229927, "grad_norm": 1.0524059534072876, "learning_rate": 4.9746886706489823e-05, "loss": 0.4538, "step": 9937 }, { "epoch": 3.0165427227196844, "grad_norm": 1.0001167058944702, "learning_rate": 4.9741824440619625e-05, "loss": 0.9368, "step": 9938 }, { "epoch": 3.0168462589163756, "grad_norm": 1.1349345445632935, "learning_rate": 4.973676217474942e-05, "loss": 0.8937, "step": 9939 }, { "epoch": 3.0171497951130672, "grad_norm": 0.8107739090919495, "learning_rate": 4.9731699908879215e-05, "loss": 0.8347, "step": 9940 }, { "epoch": 3.017453331309759, "grad_norm": 0.9209519624710083, "learning_rate": 4.972663764300901e-05, "loss": 0.8891, "step": 9941 }, { "epoch": 3.01775686750645, "grad_norm": 0.9517861604690552, "learning_rate": 4.9721575377138804e-05, "loss": 0.8127, "step": 9942 }, { "epoch": 3.0180604037031418, "grad_norm": 0.9834015369415283, "learning_rate": 4.9716513111268606e-05, "loss": 0.6899, "step": 9943 }, { "epoch": 3.018363939899833, "grad_norm": 0.9329797625541687, "learning_rate": 4.971145084539841e-05, "loss": 0.964, "step": 9944 }, { "epoch": 3.0186674760965246, "grad_norm": 0.721088171005249, "learning_rate": 4.97063885795282e-05, "loss": 0.8785, "step": 9945 }, { "epoch": 3.018971012293216, "grad_norm": 0.9499136209487915, "learning_rate": 4.9701326313658e-05, "loss": 0.9782, "step": 9946 }, { "epoch": 3.0192745484899075, "grad_norm": 0.9532542824745178, "learning_rate": 4.969626404778779e-05, "loss": 0.8628, "step": 9947 }, { "epoch": 3.0195780846865987, "grad_norm": 1.153826355934143, "learning_rate": 4.969120178191759e-05, "loss": 0.394, "step": 9948 }, { "epoch": 3.0198816208832904, "grad_norm": 0.9489085674285889, "learning_rate": 4.968613951604739e-05, "loss": 1.294, "step": 9949 }, { "epoch": 3.0201851570799816, "grad_norm": 0.9038990139961243, "learning_rate": 4.968107725017718e-05, "loss": 0.6538, "step": 9950 }, { "epoch": 3.0204886932766732, "grad_norm": 1.0667967796325684, "learning_rate": 4.967601498430698e-05, "loss": 0.8695, "step": 9951 }, { "epoch": 3.020792229473365, "grad_norm": 1.0511293411254883, "learning_rate": 4.967095271843677e-05, "loss": 0.6799, "step": 9952 }, { "epoch": 3.021095765670056, "grad_norm": 0.9058912396430969, "learning_rate": 4.9665890452566574e-05, "loss": 1.3471, "step": 9953 }, { "epoch": 3.0213993018667478, "grad_norm": 1.0901941061019897, "learning_rate": 4.966082818669637e-05, "loss": 0.8364, "step": 9954 }, { "epoch": 3.021702838063439, "grad_norm": 1.0618418455123901, "learning_rate": 4.9655765920826164e-05, "loss": 0.8804, "step": 9955 }, { "epoch": 3.0220063742601306, "grad_norm": 1.1724745035171509, "learning_rate": 4.965070365495596e-05, "loss": 0.8896, "step": 9956 }, { "epoch": 3.022309910456822, "grad_norm": 0.906879186630249, "learning_rate": 4.964564138908575e-05, "loss": 0.5583, "step": 9957 }, { "epoch": 3.0226134466535135, "grad_norm": 0.879097580909729, "learning_rate": 4.9640579123215555e-05, "loss": 1.0431, "step": 9958 }, { "epoch": 3.0229169828502047, "grad_norm": 0.9819177389144897, "learning_rate": 4.963551685734535e-05, "loss": 1.1626, "step": 9959 }, { "epoch": 3.0232205190468964, "grad_norm": 1.0365228652954102, "learning_rate": 4.9630454591475144e-05, "loss": 0.8894, "step": 9960 }, { "epoch": 3.0235240552435876, "grad_norm": 0.7370893955230713, "learning_rate": 4.962539232560494e-05, "loss": 0.7873, "step": 9961 }, { "epoch": 3.0238275914402792, "grad_norm": 0.9230274558067322, "learning_rate": 4.962033005973474e-05, "loss": 0.3545, "step": 9962 }, { "epoch": 3.024131127636971, "grad_norm": 1.0262789726257324, "learning_rate": 4.9615267793864536e-05, "loss": 0.7352, "step": 9963 }, { "epoch": 3.024434663833662, "grad_norm": 1.1875581741333008, "learning_rate": 4.961020552799433e-05, "loss": 0.2989, "step": 9964 }, { "epoch": 3.0247382000303538, "grad_norm": 0.7860487103462219, "learning_rate": 4.960514326212413e-05, "loss": 0.7674, "step": 9965 }, { "epoch": 3.025041736227045, "grad_norm": 1.1991053819656372, "learning_rate": 4.960008099625393e-05, "loss": 1.179, "step": 9966 }, { "epoch": 3.0253452724237366, "grad_norm": 1.3125637769699097, "learning_rate": 4.959501873038372e-05, "loss": 0.5942, "step": 9967 }, { "epoch": 3.025648808620428, "grad_norm": 0.9002748727798462, "learning_rate": 4.958995646451352e-05, "loss": 0.8309, "step": 9968 }, { "epoch": 3.0259523448171195, "grad_norm": 0.7663509249687195, "learning_rate": 4.958489419864332e-05, "loss": 1.3243, "step": 9969 }, { "epoch": 3.0262558810138107, "grad_norm": 0.9954874515533447, "learning_rate": 4.957983193277311e-05, "loss": 0.4026, "step": 9970 }, { "epoch": 3.0265594172105024, "grad_norm": 1.1502779722213745, "learning_rate": 4.957476966690291e-05, "loss": 0.729, "step": 9971 }, { "epoch": 3.026862953407194, "grad_norm": 0.9466817378997803, "learning_rate": 4.95697074010327e-05, "loss": 0.7324, "step": 9972 }, { "epoch": 3.0271664896038852, "grad_norm": 0.8472762703895569, "learning_rate": 4.9564645135162504e-05, "loss": 1.0931, "step": 9973 }, { "epoch": 3.027470025800577, "grad_norm": 0.7894196510314941, "learning_rate": 4.95595828692923e-05, "loss": 1.1567, "step": 9974 }, { "epoch": 3.027773561997268, "grad_norm": 0.891975462436676, "learning_rate": 4.9554520603422093e-05, "loss": 0.7622, "step": 9975 }, { "epoch": 3.0280770981939598, "grad_norm": 0.9115039706230164, "learning_rate": 4.954945833755189e-05, "loss": 0.7939, "step": 9976 }, { "epoch": 3.028380634390651, "grad_norm": 1.35649836063385, "learning_rate": 4.954439607168169e-05, "loss": 0.7654, "step": 9977 }, { "epoch": 3.0286841705873426, "grad_norm": 1.0273194313049316, "learning_rate": 4.9539333805811485e-05, "loss": 0.7395, "step": 9978 }, { "epoch": 3.028987706784034, "grad_norm": 0.8362645506858826, "learning_rate": 4.953427153994128e-05, "loss": 0.9669, "step": 9979 }, { "epoch": 3.0292912429807255, "grad_norm": 1.4432626962661743, "learning_rate": 4.9529209274071074e-05, "loss": 0.8575, "step": 9980 }, { "epoch": 3.0295947791774167, "grad_norm": 0.9342387914657593, "learning_rate": 4.952414700820087e-05, "loss": 0.9885, "step": 9981 }, { "epoch": 3.0298983153741084, "grad_norm": 1.1419669389724731, "learning_rate": 4.951908474233067e-05, "loss": 1.0959, "step": 9982 }, { "epoch": 3.0302018515708, "grad_norm": 0.9934976100921631, "learning_rate": 4.9514022476460465e-05, "loss": 0.5081, "step": 9983 }, { "epoch": 3.0305053877674912, "grad_norm": 1.2122128009796143, "learning_rate": 4.950896021059026e-05, "loss": 0.9086, "step": 9984 }, { "epoch": 3.030808923964183, "grad_norm": 0.9121325612068176, "learning_rate": 4.9503897944720055e-05, "loss": 1.1324, "step": 9985 }, { "epoch": 3.031112460160874, "grad_norm": 0.9659928679466248, "learning_rate": 4.949883567884985e-05, "loss": 1.1597, "step": 9986 }, { "epoch": 3.0314159963575658, "grad_norm": 1.1454473733901978, "learning_rate": 4.949377341297965e-05, "loss": 0.8592, "step": 9987 }, { "epoch": 3.031719532554257, "grad_norm": 1.1099263429641724, "learning_rate": 4.948871114710945e-05, "loss": 0.4484, "step": 9988 }, { "epoch": 3.0320230687509486, "grad_norm": 0.9389984607696533, "learning_rate": 4.948364888123925e-05, "loss": 0.7857, "step": 9989 }, { "epoch": 3.03232660494764, "grad_norm": 1.0837533473968506, "learning_rate": 4.947858661536904e-05, "loss": 0.5011, "step": 9990 }, { "epoch": 3.0326301411443315, "grad_norm": 0.8904072642326355, "learning_rate": 4.947352434949884e-05, "loss": 1.2047, "step": 9991 }, { "epoch": 3.0329336773410227, "grad_norm": 1.046789288520813, "learning_rate": 4.946846208362864e-05, "loss": 0.9434, "step": 9992 }, { "epoch": 3.0332372135377144, "grad_norm": 0.8978736996650696, "learning_rate": 4.9463399817758434e-05, "loss": 0.3767, "step": 9993 }, { "epoch": 3.033540749734406, "grad_norm": 1.0865288972854614, "learning_rate": 4.945833755188823e-05, "loss": 0.7399, "step": 9994 }, { "epoch": 3.0338442859310972, "grad_norm": 1.0696719884872437, "learning_rate": 4.945327528601802e-05, "loss": 0.9226, "step": 9995 }, { "epoch": 3.034147822127789, "grad_norm": 0.9205285310745239, "learning_rate": 4.944821302014782e-05, "loss": 0.7815, "step": 9996 }, { "epoch": 3.03445135832448, "grad_norm": 1.0227653980255127, "learning_rate": 4.944315075427762e-05, "loss": 0.8217, "step": 9997 }, { "epoch": 3.0347548945211718, "grad_norm": 0.8310995101928711, "learning_rate": 4.9438088488407414e-05, "loss": 0.6755, "step": 9998 }, { "epoch": 3.035058430717863, "grad_norm": 1.1098823547363281, "learning_rate": 4.943302622253721e-05, "loss": 0.7325, "step": 9999 }, { "epoch": 3.0353619669145546, "grad_norm": 1.0347144603729248, "learning_rate": 4.9427963956667004e-05, "loss": 1.0423, "step": 10000 } ], "logging_steps": 1, "max_steps": 19764, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.47576644110449e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }