{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007017543859649122, "grad_norm": 1.3867231607437134, "learning_rate": 7.5e-07, "loss": 11.1688, "step": 1 }, { "epoch": 0.0014035087719298245, "grad_norm": 1.4179346561431885, "learning_rate": 1.5e-06, "loss": 11.1732, "step": 2 }, { "epoch": 0.002105263157894737, "grad_norm": 1.3672428131103516, "learning_rate": 2.25e-06, "loss": 11.1839, "step": 3 }, { "epoch": 0.002807017543859649, "grad_norm": 1.3756910562515259, "learning_rate": 3e-06, "loss": 11.1788, "step": 4 }, { "epoch": 0.0035087719298245615, "grad_norm": 1.4389201402664185, "learning_rate": 3.75e-06, "loss": 11.1613, "step": 5 }, { "epoch": 0.004210526315789474, "grad_norm": 1.4423915147781372, "learning_rate": 4.5e-06, "loss": 11.1663, "step": 6 }, { "epoch": 0.004912280701754386, "grad_norm": 1.4194914102554321, "learning_rate": 5.25e-06, "loss": 11.1607, "step": 7 }, { "epoch": 0.005614035087719298, "grad_norm": 1.4031059741973877, "learning_rate": 6e-06, "loss": 11.1727, "step": 8 }, { "epoch": 0.00631578947368421, "grad_norm": 1.3822177648544312, "learning_rate": 6.750000000000001e-06, "loss": 11.1836, "step": 9 }, { "epoch": 0.007017543859649123, "grad_norm": 1.4721224308013916, "learning_rate": 7.5e-06, "loss": 11.1549, "step": 10 }, { "epoch": 0.0077192982456140355, "grad_norm": 1.5075721740722656, "learning_rate": 8.25e-06, "loss": 11.1556, "step": 11 }, { "epoch": 0.008421052631578947, "grad_norm": 1.4126543998718262, "learning_rate": 9e-06, "loss": 11.176, "step": 12 }, { "epoch": 0.009122807017543859, "grad_norm": 1.4442970752716064, "learning_rate": 9.75e-06, "loss": 11.1474, "step": 13 }, { "epoch": 0.009824561403508772, "grad_norm": 1.3820611238479614, "learning_rate": 1.05e-05, "loss": 11.168, "step": 14 }, { "epoch": 0.010526315789473684, "grad_norm": 1.409675121307373, "learning_rate": 1.125e-05, "loss": 11.1661, "step": 15 }, { "epoch": 0.011228070175438596, "grad_norm": 1.3865597248077393, "learning_rate": 1.2e-05, "loss": 11.169, "step": 16 }, { "epoch": 0.011929824561403509, "grad_norm": 1.4232406616210938, "learning_rate": 1.275e-05, "loss": 11.1554, "step": 17 }, { "epoch": 0.01263157894736842, "grad_norm": 1.401200771331787, "learning_rate": 1.3500000000000001e-05, "loss": 11.1677, "step": 18 }, { "epoch": 0.013333333333333334, "grad_norm": 1.4684267044067383, "learning_rate": 1.4249999999999999e-05, "loss": 11.1544, "step": 19 }, { "epoch": 0.014035087719298246, "grad_norm": 1.4503322839736938, "learning_rate": 1.5e-05, "loss": 11.1557, "step": 20 }, { "epoch": 0.014736842105263158, "grad_norm": 1.447258710861206, "learning_rate": 1.575e-05, "loss": 11.1566, "step": 21 }, { "epoch": 0.015438596491228071, "grad_norm": 1.4040602445602417, "learning_rate": 1.65e-05, "loss": 11.1257, "step": 22 }, { "epoch": 0.016140350877192983, "grad_norm": 1.433607578277588, "learning_rate": 1.725e-05, "loss": 11.1414, "step": 23 }, { "epoch": 0.016842105263157894, "grad_norm": 1.4451018571853638, "learning_rate": 1.8e-05, "loss": 11.138, "step": 24 }, { "epoch": 0.017543859649122806, "grad_norm": 1.4448715448379517, "learning_rate": 1.8750000000000002e-05, "loss": 11.1398, "step": 25 }, { "epoch": 0.018245614035087718, "grad_norm": 1.4522358179092407, "learning_rate": 1.95e-05, "loss": 11.1676, "step": 26 }, { "epoch": 0.018947368421052633, "grad_norm": 1.4874789714813232, "learning_rate": 2.025e-05, "loss": 11.1177, "step": 27 }, { "epoch": 0.019649122807017545, "grad_norm": 1.4768542051315308, "learning_rate": 2.1e-05, "loss": 11.1605, "step": 28 }, { "epoch": 0.020350877192982456, "grad_norm": 1.4562187194824219, "learning_rate": 2.175e-05, "loss": 11.1196, "step": 29 }, { "epoch": 0.021052631578947368, "grad_norm": 1.4939391613006592, "learning_rate": 2.25e-05, "loss": 11.1291, "step": 30 }, { "epoch": 0.02175438596491228, "grad_norm": 1.50139582157135, "learning_rate": 2.3250000000000003e-05, "loss": 11.1141, "step": 31 }, { "epoch": 0.02245614035087719, "grad_norm": 1.5616580247879028, "learning_rate": 2.4e-05, "loss": 11.1062, "step": 32 }, { "epoch": 0.023157894736842106, "grad_norm": 1.4880198240280151, "learning_rate": 2.475e-05, "loss": 11.1216, "step": 33 }, { "epoch": 0.023859649122807018, "grad_norm": 1.5089681148529053, "learning_rate": 2.55e-05, "loss": 11.1281, "step": 34 }, { "epoch": 0.02456140350877193, "grad_norm": 1.5106528997421265, "learning_rate": 2.625e-05, "loss": 11.1093, "step": 35 }, { "epoch": 0.02526315789473684, "grad_norm": 1.5258722305297852, "learning_rate": 2.7000000000000002e-05, "loss": 11.0987, "step": 36 }, { "epoch": 0.025964912280701753, "grad_norm": 1.5526736974716187, "learning_rate": 2.7750000000000004e-05, "loss": 11.1125, "step": 37 }, { "epoch": 0.02666666666666667, "grad_norm": 1.5637879371643066, "learning_rate": 2.8499999999999998e-05, "loss": 11.0904, "step": 38 }, { "epoch": 0.02736842105263158, "grad_norm": 1.5213061571121216, "learning_rate": 2.925e-05, "loss": 11.1062, "step": 39 }, { "epoch": 0.028070175438596492, "grad_norm": 1.6364699602127075, "learning_rate": 3e-05, "loss": 11.1215, "step": 40 }, { "epoch": 0.028771929824561403, "grad_norm": 1.5831315517425537, "learning_rate": 2.99999614111998e-05, "loss": 11.0747, "step": 41 }, { "epoch": 0.029473684210526315, "grad_norm": 1.5541292428970337, "learning_rate": 2.9999845644997734e-05, "loss": 11.0796, "step": 42 }, { "epoch": 0.030175438596491227, "grad_norm": 1.5953551530838013, "learning_rate": 2.9999652701989443e-05, "loss": 11.0782, "step": 43 }, { "epoch": 0.030877192982456142, "grad_norm": 1.6088347434997559, "learning_rate": 2.999938258316766e-05, "loss": 11.0455, "step": 44 }, { "epoch": 0.031578947368421054, "grad_norm": 1.566125750541687, "learning_rate": 2.9999035289922184e-05, "loss": 11.0544, "step": 45 }, { "epoch": 0.032280701754385965, "grad_norm": 1.6120789051055908, "learning_rate": 2.9998610824039904e-05, "loss": 11.0632, "step": 46 }, { "epoch": 0.03298245614035088, "grad_norm": 1.5812908411026, "learning_rate": 2.9998109187704766e-05, "loss": 11.0636, "step": 47 }, { "epoch": 0.03368421052631579, "grad_norm": 1.623307704925537, "learning_rate": 2.9997530383497776e-05, "loss": 11.0438, "step": 48 }, { "epoch": 0.0343859649122807, "grad_norm": 1.594836711883545, "learning_rate": 2.9996874414396984e-05, "loss": 11.017, "step": 49 }, { "epoch": 0.03508771929824561, "grad_norm": 1.6419360637664795, "learning_rate": 2.9996141283777472e-05, "loss": 11.0318, "step": 50 }, { "epoch": 0.035789473684210524, "grad_norm": 1.7121171951293945, "learning_rate": 2.999533099541131e-05, "loss": 11.0421, "step": 51 }, { "epoch": 0.036491228070175435, "grad_norm": 1.6263779401779175, "learning_rate": 2.9994443553467584e-05, "loss": 11.0159, "step": 52 }, { "epoch": 0.037192982456140354, "grad_norm": 1.6610161066055298, "learning_rate": 2.999347896251233e-05, "loss": 11.0121, "step": 53 }, { "epoch": 0.037894736842105266, "grad_norm": 1.5839670896530151, "learning_rate": 2.9992437227508536e-05, "loss": 11.0327, "step": 54 }, { "epoch": 0.03859649122807018, "grad_norm": 1.670560359954834, "learning_rate": 2.9991318353816112e-05, "loss": 11.0069, "step": 55 }, { "epoch": 0.03929824561403509, "grad_norm": 1.692322015762329, "learning_rate": 2.9990122347191856e-05, "loss": 10.9926, "step": 56 }, { "epoch": 0.04, "grad_norm": 1.6989035606384277, "learning_rate": 2.9988849213789433e-05, "loss": 11.006, "step": 57 }, { "epoch": 0.04070175438596491, "grad_norm": 1.6681936979293823, "learning_rate": 2.9987498960159325e-05, "loss": 10.9767, "step": 58 }, { "epoch": 0.041403508771929824, "grad_norm": 1.7258883714675903, "learning_rate": 2.998607159324883e-05, "loss": 10.9613, "step": 59 }, { "epoch": 0.042105263157894736, "grad_norm": 1.7651703357696533, "learning_rate": 2.9984567120401997e-05, "loss": 10.9497, "step": 60 }, { "epoch": 0.04280701754385965, "grad_norm": 1.655737280845642, "learning_rate": 2.99829855493596e-05, "loss": 10.9766, "step": 61 }, { "epoch": 0.04350877192982456, "grad_norm": 1.7662869691848755, "learning_rate": 2.9981326888259086e-05, "loss": 10.9648, "step": 62 }, { "epoch": 0.04421052631578947, "grad_norm": 1.799979567527771, "learning_rate": 2.9979591145634568e-05, "loss": 10.9355, "step": 63 }, { "epoch": 0.04491228070175438, "grad_norm": 1.8898379802703857, "learning_rate": 2.997777833041674e-05, "loss": 10.901, "step": 64 }, { "epoch": 0.0456140350877193, "grad_norm": 1.8089183568954468, "learning_rate": 2.997588845193284e-05, "loss": 10.9272, "step": 65 }, { "epoch": 0.04631578947368421, "grad_norm": 1.8683502674102783, "learning_rate": 2.9973921519906633e-05, "loss": 10.9187, "step": 66 }, { "epoch": 0.047017543859649125, "grad_norm": 1.752748966217041, "learning_rate": 2.9971877544458325e-05, "loss": 10.9185, "step": 67 }, { "epoch": 0.047719298245614036, "grad_norm": 1.800107717514038, "learning_rate": 2.996975653610451e-05, "loss": 10.917, "step": 68 }, { "epoch": 0.04842105263157895, "grad_norm": 1.877453327178955, "learning_rate": 2.9967558505758158e-05, "loss": 10.8865, "step": 69 }, { "epoch": 0.04912280701754386, "grad_norm": 1.8599498271942139, "learning_rate": 2.996528346472851e-05, "loss": 10.8675, "step": 70 }, { "epoch": 0.04982456140350877, "grad_norm": 1.868647575378418, "learning_rate": 2.9962931424721048e-05, "loss": 10.8726, "step": 71 }, { "epoch": 0.05052631578947368, "grad_norm": 1.8415179252624512, "learning_rate": 2.9960502397837424e-05, "loss": 10.8776, "step": 72 }, { "epoch": 0.051228070175438595, "grad_norm": 1.8907957077026367, "learning_rate": 2.9957996396575407e-05, "loss": 10.8997, "step": 73 }, { "epoch": 0.051929824561403506, "grad_norm": 1.8925520181655884, "learning_rate": 2.9955413433828802e-05, "loss": 10.8844, "step": 74 }, { "epoch": 0.05263157894736842, "grad_norm": 1.9581102132797241, "learning_rate": 2.9952753522887398e-05, "loss": 10.8175, "step": 75 }, { "epoch": 0.05333333333333334, "grad_norm": 1.9320536851882935, "learning_rate": 2.995001667743691e-05, "loss": 10.8713, "step": 76 }, { "epoch": 0.05403508771929825, "grad_norm": 2.0639774799346924, "learning_rate": 2.9947202911558876e-05, "loss": 10.8656, "step": 77 }, { "epoch": 0.05473684210526316, "grad_norm": 1.9346308708190918, "learning_rate": 2.9944312239730606e-05, "loss": 10.8486, "step": 78 }, { "epoch": 0.05543859649122807, "grad_norm": 1.8385746479034424, "learning_rate": 2.9941344676825106e-05, "loss": 10.858, "step": 79 }, { "epoch": 0.056140350877192984, "grad_norm": 1.9186604022979736, "learning_rate": 2.9938300238111008e-05, "loss": 10.8165, "step": 80 }, { "epoch": 0.056842105263157895, "grad_norm": 1.9630423784255981, "learning_rate": 2.9935178939252478e-05, "loss": 10.8415, "step": 81 }, { "epoch": 0.05754385964912281, "grad_norm": 1.9443511962890625, "learning_rate": 2.993198079630913e-05, "loss": 10.7957, "step": 82 }, { "epoch": 0.05824561403508772, "grad_norm": 1.9047595262527466, "learning_rate": 2.9928705825735975e-05, "loss": 10.7957, "step": 83 }, { "epoch": 0.05894736842105263, "grad_norm": 1.913922905921936, "learning_rate": 2.9925354044383294e-05, "loss": 10.787, "step": 84 }, { "epoch": 0.05964912280701754, "grad_norm": 1.8981060981750488, "learning_rate": 2.9921925469496594e-05, "loss": 10.7769, "step": 85 }, { "epoch": 0.060350877192982454, "grad_norm": 1.9278432130813599, "learning_rate": 2.9918420118716478e-05, "loss": 10.7753, "step": 86 }, { "epoch": 0.061052631578947365, "grad_norm": 1.8950514793395996, "learning_rate": 2.991483801007859e-05, "loss": 10.7949, "step": 87 }, { "epoch": 0.061754385964912284, "grad_norm": 1.9613559246063232, "learning_rate": 2.9911179162013495e-05, "loss": 10.7433, "step": 88 }, { "epoch": 0.062456140350877196, "grad_norm": 1.962399959564209, "learning_rate": 2.99074435933466e-05, "loss": 10.7591, "step": 89 }, { "epoch": 0.06315789473684211, "grad_norm": 1.988180160522461, "learning_rate": 2.990363132329806e-05, "loss": 10.743, "step": 90 }, { "epoch": 0.06385964912280702, "grad_norm": 1.896331548690796, "learning_rate": 2.9899742371482663e-05, "loss": 10.7179, "step": 91 }, { "epoch": 0.06456140350877193, "grad_norm": 1.9092869758605957, "learning_rate": 2.9895776757909737e-05, "loss": 10.7285, "step": 92 }, { "epoch": 0.06526315789473684, "grad_norm": 1.8389664888381958, "learning_rate": 2.989173450298305e-05, "loss": 10.762, "step": 93 }, { "epoch": 0.06596491228070175, "grad_norm": 1.9763520956039429, "learning_rate": 2.988761562750071e-05, "loss": 10.668, "step": 94 }, { "epoch": 0.06666666666666667, "grad_norm": 1.9501323699951172, "learning_rate": 2.988342015265504e-05, "loss": 10.6923, "step": 95 }, { "epoch": 0.06736842105263158, "grad_norm": 1.8519963026046753, "learning_rate": 2.987914810003249e-05, "loss": 10.6968, "step": 96 }, { "epoch": 0.06807017543859649, "grad_norm": 1.8976486921310425, "learning_rate": 2.9874799491613513e-05, "loss": 10.7002, "step": 97 }, { "epoch": 0.0687719298245614, "grad_norm": 2.0467875003814697, "learning_rate": 2.987037434977245e-05, "loss": 10.6338, "step": 98 }, { "epoch": 0.06947368421052631, "grad_norm": 1.8773910999298096, "learning_rate": 2.986587269727742e-05, "loss": 10.6628, "step": 99 }, { "epoch": 0.07017543859649122, "grad_norm": 1.9416966438293457, "learning_rate": 2.9861294557290205e-05, "loss": 10.6697, "step": 100 }, { "epoch": 0.07087719298245614, "grad_norm": 1.9673689603805542, "learning_rate": 2.9856639953366138e-05, "loss": 10.6233, "step": 101 }, { "epoch": 0.07157894736842105, "grad_norm": 1.8717288970947266, "learning_rate": 2.9851908909453953e-05, "loss": 10.6289, "step": 102 }, { "epoch": 0.07228070175438596, "grad_norm": 1.8955069780349731, "learning_rate": 2.9847101449895692e-05, "loss": 10.6463, "step": 103 }, { "epoch": 0.07298245614035087, "grad_norm": 1.9545173645019531, "learning_rate": 2.9842217599426575e-05, "loss": 10.6274, "step": 104 }, { "epoch": 0.07368421052631578, "grad_norm": 1.938403606414795, "learning_rate": 2.9837257383174855e-05, "loss": 10.5829, "step": 105 }, { "epoch": 0.07438596491228071, "grad_norm": 1.8846169710159302, "learning_rate": 2.9832220826661707e-05, "loss": 10.6122, "step": 106 }, { "epoch": 0.07508771929824562, "grad_norm": 1.9020344018936157, "learning_rate": 2.9827107955801082e-05, "loss": 10.5713, "step": 107 }, { "epoch": 0.07578947368421053, "grad_norm": 1.9107307195663452, "learning_rate": 2.982191879689959e-05, "loss": 10.568, "step": 108 }, { "epoch": 0.07649122807017544, "grad_norm": 1.8870030641555786, "learning_rate": 2.981665337665636e-05, "loss": 10.5898, "step": 109 }, { "epoch": 0.07719298245614035, "grad_norm": 1.9279310703277588, "learning_rate": 2.9811311722162876e-05, "loss": 10.5557, "step": 110 }, { "epoch": 0.07789473684210527, "grad_norm": 1.8705062866210938, "learning_rate": 2.980589386090289e-05, "loss": 10.5439, "step": 111 }, { "epoch": 0.07859649122807018, "grad_norm": 1.8404078483581543, "learning_rate": 2.9800399820752236e-05, "loss": 10.5618, "step": 112 }, { "epoch": 0.07929824561403509, "grad_norm": 1.904703974723816, "learning_rate": 2.97948296299787e-05, "loss": 10.5375, "step": 113 }, { "epoch": 0.08, "grad_norm": 1.8829587697982788, "learning_rate": 2.978918331724188e-05, "loss": 10.5377, "step": 114 }, { "epoch": 0.08070175438596491, "grad_norm": 1.7945451736450195, "learning_rate": 2.9783460911593024e-05, "loss": 10.5273, "step": 115 }, { "epoch": 0.08140350877192983, "grad_norm": 1.7841529846191406, "learning_rate": 2.977766244247492e-05, "loss": 10.5478, "step": 116 }, { "epoch": 0.08210526315789474, "grad_norm": 1.7507596015930176, "learning_rate": 2.9771787939721682e-05, "loss": 10.4944, "step": 117 }, { "epoch": 0.08280701754385965, "grad_norm": 1.899463415145874, "learning_rate": 2.9765837433558652e-05, "loss": 10.4842, "step": 118 }, { "epoch": 0.08350877192982456, "grad_norm": 1.7657932043075562, "learning_rate": 2.9759810954602218e-05, "loss": 10.4667, "step": 119 }, { "epoch": 0.08421052631578947, "grad_norm": 1.8209819793701172, "learning_rate": 2.975370853385965e-05, "loss": 10.4652, "step": 120 }, { "epoch": 0.08491228070175438, "grad_norm": 1.7647292613983154, "learning_rate": 2.9747530202728965e-05, "loss": 10.4656, "step": 121 }, { "epoch": 0.0856140350877193, "grad_norm": 1.7112172842025757, "learning_rate": 2.974127599299875e-05, "loss": 10.4628, "step": 122 }, { "epoch": 0.0863157894736842, "grad_norm": 1.786436676979065, "learning_rate": 2.9734945936848003e-05, "loss": 10.4501, "step": 123 }, { "epoch": 0.08701754385964912, "grad_norm": 1.8223979473114014, "learning_rate": 2.9728540066845944e-05, "loss": 10.4234, "step": 124 }, { "epoch": 0.08771929824561403, "grad_norm": 1.7756335735321045, "learning_rate": 2.97220584159519e-05, "loss": 10.424, "step": 125 }, { "epoch": 0.08842105263157894, "grad_norm": 1.6690008640289307, "learning_rate": 2.9715501017515083e-05, "loss": 10.4311, "step": 126 }, { "epoch": 0.08912280701754385, "grad_norm": 1.7714391946792603, "learning_rate": 2.9708867905274444e-05, "loss": 10.4051, "step": 127 }, { "epoch": 0.08982456140350877, "grad_norm": 1.7406460046768188, "learning_rate": 2.9702159113358497e-05, "loss": 10.3982, "step": 128 }, { "epoch": 0.09052631578947369, "grad_norm": 1.7567445039749146, "learning_rate": 2.9695374676285138e-05, "loss": 10.4101, "step": 129 }, { "epoch": 0.0912280701754386, "grad_norm": 1.7351380586624146, "learning_rate": 2.9688514628961473e-05, "loss": 10.3827, "step": 130 }, { "epoch": 0.09192982456140351, "grad_norm": 1.7856699228286743, "learning_rate": 2.9681579006683635e-05, "loss": 10.3601, "step": 131 }, { "epoch": 0.09263157894736843, "grad_norm": 1.63843834400177, "learning_rate": 2.96745678451366e-05, "loss": 10.4179, "step": 132 }, { "epoch": 0.09333333333333334, "grad_norm": 1.68683660030365, "learning_rate": 2.966748118039402e-05, "loss": 10.3771, "step": 133 }, { "epoch": 0.09403508771929825, "grad_norm": 1.705415964126587, "learning_rate": 2.9660319048917996e-05, "loss": 10.3386, "step": 134 }, { "epoch": 0.09473684210526316, "grad_norm": 1.773043155670166, "learning_rate": 2.965308148755895e-05, "loss": 10.3385, "step": 135 }, { "epoch": 0.09543859649122807, "grad_norm": 1.7122703790664673, "learning_rate": 2.9645768533555387e-05, "loss": 10.2955, "step": 136 }, { "epoch": 0.09614035087719298, "grad_norm": 1.6865215301513672, "learning_rate": 2.963838022453372e-05, "loss": 10.3211, "step": 137 }, { "epoch": 0.0968421052631579, "grad_norm": 1.7478833198547363, "learning_rate": 2.963091659850808e-05, "loss": 10.3185, "step": 138 }, { "epoch": 0.09754385964912281, "grad_norm": 1.7050344944000244, "learning_rate": 2.9623377693880123e-05, "loss": 10.2809, "step": 139 }, { "epoch": 0.09824561403508772, "grad_norm": 1.7529945373535156, "learning_rate": 2.961576354943881e-05, "loss": 10.3103, "step": 140 }, { "epoch": 0.09894736842105263, "grad_norm": 1.663455605506897, "learning_rate": 2.960807420436025e-05, "loss": 10.287, "step": 141 }, { "epoch": 0.09964912280701754, "grad_norm": 1.6467978954315186, "learning_rate": 2.9600309698207435e-05, "loss": 10.2984, "step": 142 }, { "epoch": 0.10035087719298245, "grad_norm": 1.6226227283477783, "learning_rate": 2.959247007093011e-05, "loss": 10.3138, "step": 143 }, { "epoch": 0.10105263157894737, "grad_norm": 1.70894193649292, "learning_rate": 2.958455536286451e-05, "loss": 10.2453, "step": 144 }, { "epoch": 0.10175438596491228, "grad_norm": 1.6307814121246338, "learning_rate": 2.957656561473319e-05, "loss": 10.2423, "step": 145 }, { "epoch": 0.10245614035087719, "grad_norm": 1.6406090259552002, "learning_rate": 2.956850086764478e-05, "loss": 10.2522, "step": 146 }, { "epoch": 0.1031578947368421, "grad_norm": 1.6392349004745483, "learning_rate": 2.95603611630938e-05, "loss": 10.2109, "step": 147 }, { "epoch": 0.10385964912280701, "grad_norm": 1.6366912126541138, "learning_rate": 2.955214654296045e-05, "loss": 10.2371, "step": 148 }, { "epoch": 0.10456140350877192, "grad_norm": 1.575484275817871, "learning_rate": 2.9543857049510366e-05, "loss": 10.2641, "step": 149 }, { "epoch": 0.10526315789473684, "grad_norm": 1.5821439027786255, "learning_rate": 2.9535492725394432e-05, "loss": 10.2234, "step": 150 }, { "epoch": 0.10596491228070175, "grad_norm": 1.583343505859375, "learning_rate": 2.952705361364855e-05, "loss": 10.2122, "step": 151 }, { "epoch": 0.10666666666666667, "grad_norm": 1.58578360080719, "learning_rate": 2.951853975769341e-05, "loss": 10.262, "step": 152 }, { "epoch": 0.10736842105263159, "grad_norm": 1.5741060972213745, "learning_rate": 2.950995120133427e-05, "loss": 10.1969, "step": 153 }, { "epoch": 0.1080701754385965, "grad_norm": 1.610175371170044, "learning_rate": 2.950128798876075e-05, "loss": 10.1842, "step": 154 }, { "epoch": 0.10877192982456141, "grad_norm": 1.5961599349975586, "learning_rate": 2.9492550164546578e-05, "loss": 10.1928, "step": 155 }, { "epoch": 0.10947368421052632, "grad_norm": 1.5912821292877197, "learning_rate": 2.948373777364938e-05, "loss": 10.1641, "step": 156 }, { "epoch": 0.11017543859649123, "grad_norm": 1.640492558479309, "learning_rate": 2.947485086141042e-05, "loss": 10.137, "step": 157 }, { "epoch": 0.11087719298245614, "grad_norm": 1.6052672863006592, "learning_rate": 2.9465889473554418e-05, "loss": 10.1798, "step": 158 }, { "epoch": 0.11157894736842106, "grad_norm": 1.5436865091323853, "learning_rate": 2.945685365618926e-05, "loss": 10.1445, "step": 159 }, { "epoch": 0.11228070175438597, "grad_norm": 1.5965417623519897, "learning_rate": 2.9447743455805793e-05, "loss": 10.1971, "step": 160 }, { "epoch": 0.11298245614035088, "grad_norm": 1.5718989372253418, "learning_rate": 2.9438558919277575e-05, "loss": 10.1142, "step": 161 }, { "epoch": 0.11368421052631579, "grad_norm": 1.536515712738037, "learning_rate": 2.942930009386065e-05, "loss": 10.1411, "step": 162 }, { "epoch": 0.1143859649122807, "grad_norm": 1.5010099411010742, "learning_rate": 2.9419967027193267e-05, "loss": 10.1218, "step": 163 }, { "epoch": 0.11508771929824561, "grad_norm": 1.5188424587249756, "learning_rate": 2.941055976729568e-05, "loss": 10.0801, "step": 164 }, { "epoch": 0.11578947368421053, "grad_norm": 1.5570286512374878, "learning_rate": 2.9401078362569866e-05, "loss": 10.0702, "step": 165 }, { "epoch": 0.11649122807017544, "grad_norm": 1.5366394519805908, "learning_rate": 2.9391522861799298e-05, "loss": 10.0623, "step": 166 }, { "epoch": 0.11719298245614035, "grad_norm": 1.4708266258239746, "learning_rate": 2.938189331414869e-05, "loss": 10.137, "step": 167 }, { "epoch": 0.11789473684210526, "grad_norm": 1.4985536336898804, "learning_rate": 2.9372189769163727e-05, "loss": 10.0461, "step": 168 }, { "epoch": 0.11859649122807017, "grad_norm": 1.5369043350219727, "learning_rate": 2.9362412276770833e-05, "loss": 10.0386, "step": 169 }, { "epoch": 0.11929824561403508, "grad_norm": 1.4283313751220703, "learning_rate": 2.93525608872769e-05, "loss": 10.0973, "step": 170 }, { "epoch": 0.12, "grad_norm": 1.4392104148864746, "learning_rate": 2.9342635651369033e-05, "loss": 10.0364, "step": 171 }, { "epoch": 0.12070175438596491, "grad_norm": 1.478575587272644, "learning_rate": 2.93326366201143e-05, "loss": 10.0378, "step": 172 }, { "epoch": 0.12140350877192982, "grad_norm": 1.402729868888855, "learning_rate": 2.932256384495944e-05, "loss": 10.0637, "step": 173 }, { "epoch": 0.12210526315789473, "grad_norm": 1.4036293029785156, "learning_rate": 2.9312417377730633e-05, "loss": 10.0657, "step": 174 }, { "epoch": 0.12280701754385964, "grad_norm": 1.406686782836914, "learning_rate": 2.9302197270633207e-05, "loss": 9.9968, "step": 175 }, { "epoch": 0.12350877192982457, "grad_norm": 1.4134589433670044, "learning_rate": 2.9291903576251393e-05, "loss": 10.0029, "step": 176 }, { "epoch": 0.12421052631578948, "grad_norm": 1.5148519277572632, "learning_rate": 2.9281536347548026e-05, "loss": 9.9688, "step": 177 }, { "epoch": 0.12491228070175439, "grad_norm": 1.3708959817886353, "learning_rate": 2.9271095637864295e-05, "loss": 10.014, "step": 178 }, { "epoch": 0.1256140350877193, "grad_norm": 1.3891221284866333, "learning_rate": 2.9260581500919466e-05, "loss": 9.9602, "step": 179 }, { "epoch": 0.12631578947368421, "grad_norm": 1.368768572807312, "learning_rate": 2.9249993990810597e-05, "loss": 10.0427, "step": 180 }, { "epoch": 0.1270175438596491, "grad_norm": 1.3397711515426636, "learning_rate": 2.9239333162012256e-05, "loss": 10.0364, "step": 181 }, { "epoch": 0.12771929824561404, "grad_norm": 1.3730342388153076, "learning_rate": 2.9228599069376258e-05, "loss": 9.9707, "step": 182 }, { "epoch": 0.12842105263157894, "grad_norm": 1.3708913326263428, "learning_rate": 2.9217791768131373e-05, "loss": 10.0046, "step": 183 }, { "epoch": 0.12912280701754386, "grad_norm": 1.4549150466918945, "learning_rate": 2.9206911313883037e-05, "loss": 9.9862, "step": 184 }, { "epoch": 0.12982456140350876, "grad_norm": 1.2995898723602295, "learning_rate": 2.9195957762613082e-05, "loss": 9.9804, "step": 185 }, { "epoch": 0.13052631578947368, "grad_norm": 1.4566320180892944, "learning_rate": 2.9184931170679413e-05, "loss": 9.9385, "step": 186 }, { "epoch": 0.1312280701754386, "grad_norm": 1.2870055437088013, "learning_rate": 2.9173831594815768e-05, "loss": 10.0298, "step": 187 }, { "epoch": 0.1319298245614035, "grad_norm": 1.2971042394638062, "learning_rate": 2.9162659092131386e-05, "loss": 9.9502, "step": 188 }, { "epoch": 0.13263157894736843, "grad_norm": 1.245622158050537, "learning_rate": 2.9151413720110724e-05, "loss": 9.964, "step": 189 }, { "epoch": 0.13333333333333333, "grad_norm": 1.2551263570785522, "learning_rate": 2.9140095536613182e-05, "loss": 9.9208, "step": 190 }, { "epoch": 0.13403508771929826, "grad_norm": 1.2725650072097778, "learning_rate": 2.912870459987277e-05, "loss": 9.9569, "step": 191 }, { "epoch": 0.13473684210526315, "grad_norm": 1.283666968345642, "learning_rate": 2.9117240968497833e-05, "loss": 9.8825, "step": 192 }, { "epoch": 0.13543859649122808, "grad_norm": 1.3661227226257324, "learning_rate": 2.9105704701470744e-05, "loss": 9.9531, "step": 193 }, { "epoch": 0.13614035087719298, "grad_norm": 1.3420194387435913, "learning_rate": 2.9094095858147594e-05, "loss": 9.8991, "step": 194 }, { "epoch": 0.1368421052631579, "grad_norm": 1.2260032892227173, "learning_rate": 2.908241449825789e-05, "loss": 9.8864, "step": 195 }, { "epoch": 0.1375438596491228, "grad_norm": 1.2040811777114868, "learning_rate": 2.907066068190426e-05, "loss": 9.8777, "step": 196 }, { "epoch": 0.13824561403508773, "grad_norm": 1.4770939350128174, "learning_rate": 2.905883446956213e-05, "loss": 9.8942, "step": 197 }, { "epoch": 0.13894736842105262, "grad_norm": 1.2435526847839355, "learning_rate": 2.9046935922079406e-05, "loss": 9.9399, "step": 198 }, { "epoch": 0.13964912280701755, "grad_norm": 1.1617164611816406, "learning_rate": 2.903496510067618e-05, "loss": 9.857, "step": 199 }, { "epoch": 0.14035087719298245, "grad_norm": 1.1620742082595825, "learning_rate": 2.9022922066944402e-05, "loss": 9.8806, "step": 200 }, { "epoch": 0.14105263157894737, "grad_norm": 1.1660631895065308, "learning_rate": 2.901080688284757e-05, "loss": 9.901, "step": 201 }, { "epoch": 0.14175438596491227, "grad_norm": 1.2313072681427002, "learning_rate": 2.899861961072041e-05, "loss": 9.8692, "step": 202 }, { "epoch": 0.1424561403508772, "grad_norm": 1.1685208082199097, "learning_rate": 2.898636031326854e-05, "loss": 9.8536, "step": 203 }, { "epoch": 0.1431578947368421, "grad_norm": 1.116986632347107, "learning_rate": 2.897402905356818e-05, "loss": 9.8343, "step": 204 }, { "epoch": 0.14385964912280702, "grad_norm": 1.1231684684753418, "learning_rate": 2.896162589506579e-05, "loss": 9.8873, "step": 205 }, { "epoch": 0.14456140350877192, "grad_norm": 1.120834231376648, "learning_rate": 2.8949150901577783e-05, "loss": 9.8171, "step": 206 }, { "epoch": 0.14526315789473684, "grad_norm": 1.1721832752227783, "learning_rate": 2.8936604137290154e-05, "loss": 9.8259, "step": 207 }, { "epoch": 0.14596491228070174, "grad_norm": 1.0714056491851807, "learning_rate": 2.8923985666758178e-05, "loss": 9.8468, "step": 208 }, { "epoch": 0.14666666666666667, "grad_norm": 1.06449556350708, "learning_rate": 2.891129555490608e-05, "loss": 9.8659, "step": 209 }, { "epoch": 0.14736842105263157, "grad_norm": 1.0737208127975464, "learning_rate": 2.8898533867026687e-05, "loss": 9.8162, "step": 210 }, { "epoch": 0.1480701754385965, "grad_norm": 1.1104488372802734, "learning_rate": 2.888570066878109e-05, "loss": 9.8201, "step": 211 }, { "epoch": 0.14877192982456142, "grad_norm": 1.0698769092559814, "learning_rate": 2.8872796026198324e-05, "loss": 9.8251, "step": 212 }, { "epoch": 0.14947368421052631, "grad_norm": 1.0295964479446411, "learning_rate": 2.8859820005675008e-05, "loss": 9.8043, "step": 213 }, { "epoch": 0.15017543859649124, "grad_norm": 1.0120643377304077, "learning_rate": 2.884677267397502e-05, "loss": 9.7917, "step": 214 }, { "epoch": 0.15087719298245614, "grad_norm": 1.0840659141540527, "learning_rate": 2.8833654098229132e-05, "loss": 9.8487, "step": 215 }, { "epoch": 0.15157894736842106, "grad_norm": 1.0195850133895874, "learning_rate": 2.8820464345934708e-05, "loss": 9.8651, "step": 216 }, { "epoch": 0.15228070175438596, "grad_norm": 0.9843241572380066, "learning_rate": 2.88072034849553e-05, "loss": 9.781, "step": 217 }, { "epoch": 0.1529824561403509, "grad_norm": 0.9831960201263428, "learning_rate": 2.8793871583520336e-05, "loss": 9.7811, "step": 218 }, { "epoch": 0.15368421052631578, "grad_norm": 1.0188785791397095, "learning_rate": 2.878046871022477e-05, "loss": 9.7738, "step": 219 }, { "epoch": 0.1543859649122807, "grad_norm": 1.0158071517944336, "learning_rate": 2.8766994934028697e-05, "loss": 9.7769, "step": 220 }, { "epoch": 0.1550877192982456, "grad_norm": 1.0099796056747437, "learning_rate": 2.8753450324257035e-05, "loss": 9.7429, "step": 221 }, { "epoch": 0.15578947368421053, "grad_norm": 0.9364565014839172, "learning_rate": 2.8739834950599153e-05, "loss": 9.76, "step": 222 }, { "epoch": 0.15649122807017543, "grad_norm": 1.019567847251892, "learning_rate": 2.8726148883108505e-05, "loss": 9.7222, "step": 223 }, { "epoch": 0.15719298245614036, "grad_norm": 1.152674913406372, "learning_rate": 2.8712392192202284e-05, "loss": 9.7059, "step": 224 }, { "epoch": 0.15789473684210525, "grad_norm": 0.9184134602546692, "learning_rate": 2.8698564948661043e-05, "loss": 9.7187, "step": 225 }, { "epoch": 0.15859649122807018, "grad_norm": 0.9040848612785339, "learning_rate": 2.868466722362836e-05, "loss": 9.7349, "step": 226 }, { "epoch": 0.15929824561403508, "grad_norm": 0.9168772101402283, "learning_rate": 2.867069908861042e-05, "loss": 9.7572, "step": 227 }, { "epoch": 0.16, "grad_norm": 0.9067280292510986, "learning_rate": 2.865666061547572e-05, "loss": 9.7204, "step": 228 }, { "epoch": 0.1607017543859649, "grad_norm": 0.9156401753425598, "learning_rate": 2.8642551876454625e-05, "loss": 9.7334, "step": 229 }, { "epoch": 0.16140350877192983, "grad_norm": 1.0196059942245483, "learning_rate": 2.862837294413905e-05, "loss": 9.7744, "step": 230 }, { "epoch": 0.16210526315789472, "grad_norm": 0.8938412070274353, "learning_rate": 2.861412389148205e-05, "loss": 9.6932, "step": 231 }, { "epoch": 0.16280701754385965, "grad_norm": 0.8793119192123413, "learning_rate": 2.8599804791797483e-05, "loss": 9.6983, "step": 232 }, { "epoch": 0.16350877192982455, "grad_norm": 0.8816063404083252, "learning_rate": 2.858541571875959e-05, "loss": 9.7431, "step": 233 }, { "epoch": 0.16421052631578947, "grad_norm": 0.89359450340271, "learning_rate": 2.8570956746402656e-05, "loss": 9.6962, "step": 234 }, { "epoch": 0.1649122807017544, "grad_norm": 0.9404999613761902, "learning_rate": 2.8556427949120587e-05, "loss": 9.7081, "step": 235 }, { "epoch": 0.1656140350877193, "grad_norm": 0.9086849093437195, "learning_rate": 2.8541829401666575e-05, "loss": 9.6966, "step": 236 }, { "epoch": 0.16631578947368422, "grad_norm": 0.9688056707382202, "learning_rate": 2.8527161179152673e-05, "loss": 9.6979, "step": 237 }, { "epoch": 0.16701754385964912, "grad_norm": 0.827894389629364, "learning_rate": 2.851242335704943e-05, "loss": 9.7308, "step": 238 }, { "epoch": 0.16771929824561405, "grad_norm": 0.8219782114028931, "learning_rate": 2.8497616011185494e-05, "loss": 9.6864, "step": 239 }, { "epoch": 0.16842105263157894, "grad_norm": 1.100195288658142, "learning_rate": 2.8482739217747228e-05, "loss": 9.7365, "step": 240 }, { "epoch": 0.16912280701754387, "grad_norm": 0.8838520050048828, "learning_rate": 2.8467793053278318e-05, "loss": 9.6698, "step": 241 }, { "epoch": 0.16982456140350877, "grad_norm": 0.7975402474403381, "learning_rate": 2.8452777594679362e-05, "loss": 9.703, "step": 242 }, { "epoch": 0.1705263157894737, "grad_norm": 0.8081221580505371, "learning_rate": 2.84376929192075e-05, "loss": 9.6911, "step": 243 }, { "epoch": 0.1712280701754386, "grad_norm": 0.8595532774925232, "learning_rate": 2.842253910447601e-05, "loss": 9.6615, "step": 244 }, { "epoch": 0.17192982456140352, "grad_norm": 0.8344963788986206, "learning_rate": 2.840731622845388e-05, "loss": 9.7056, "step": 245 }, { "epoch": 0.1726315789473684, "grad_norm": 0.7812458872795105, "learning_rate": 2.8392024369465462e-05, "loss": 9.6369, "step": 246 }, { "epoch": 0.17333333333333334, "grad_norm": 0.9315841794013977, "learning_rate": 2.837666360619002e-05, "loss": 9.6282, "step": 247 }, { "epoch": 0.17403508771929824, "grad_norm": 0.7948213815689087, "learning_rate": 2.8361234017661337e-05, "loss": 9.6721, "step": 248 }, { "epoch": 0.17473684210526316, "grad_norm": 0.807967483997345, "learning_rate": 2.834573568326732e-05, "loss": 9.6553, "step": 249 }, { "epoch": 0.17543859649122806, "grad_norm": 0.8275118470191956, "learning_rate": 2.8330168682749594e-05, "loss": 9.6919, "step": 250 }, { "epoch": 0.17614035087719299, "grad_norm": 0.815281093120575, "learning_rate": 2.8314533096203066e-05, "loss": 9.664, "step": 251 }, { "epoch": 0.17684210526315788, "grad_norm": 0.8715078234672546, "learning_rate": 2.8298829004075547e-05, "loss": 9.6979, "step": 252 }, { "epoch": 0.1775438596491228, "grad_norm": 1.0056536197662354, "learning_rate": 2.8283056487167313e-05, "loss": 9.6579, "step": 253 }, { "epoch": 0.1782456140350877, "grad_norm": 0.8242077827453613, "learning_rate": 2.8267215626630696e-05, "loss": 9.6729, "step": 254 }, { "epoch": 0.17894736842105263, "grad_norm": 1.049669861793518, "learning_rate": 2.8251306503969663e-05, "loss": 9.5708, "step": 255 }, { "epoch": 0.17964912280701753, "grad_norm": 0.7758817672729492, "learning_rate": 2.8235329201039424e-05, "loss": 9.6674, "step": 256 }, { "epoch": 0.18035087719298246, "grad_norm": 0.7442680597305298, "learning_rate": 2.8219283800045962e-05, "loss": 9.6225, "step": 257 }, { "epoch": 0.18105263157894738, "grad_norm": 0.7245508432388306, "learning_rate": 2.8203170383545645e-05, "loss": 9.6447, "step": 258 }, { "epoch": 0.18175438596491228, "grad_norm": 0.7845194339752197, "learning_rate": 2.8186989034444794e-05, "loss": 9.6389, "step": 259 }, { "epoch": 0.1824561403508772, "grad_norm": 0.7765506505966187, "learning_rate": 2.817073983599926e-05, "loss": 9.6104, "step": 260 }, { "epoch": 0.1831578947368421, "grad_norm": 0.9928977489471436, "learning_rate": 2.8154422871813987e-05, "loss": 9.7477, "step": 261 }, { "epoch": 0.18385964912280703, "grad_norm": 0.7536237239837646, "learning_rate": 2.8138038225842577e-05, "loss": 9.6281, "step": 262 }, { "epoch": 0.18456140350877193, "grad_norm": 0.7590246200561523, "learning_rate": 2.8121585982386882e-05, "loss": 9.635, "step": 263 }, { "epoch": 0.18526315789473685, "grad_norm": 0.6984339952468872, "learning_rate": 2.8105066226096543e-05, "loss": 9.6272, "step": 264 }, { "epoch": 0.18596491228070175, "grad_norm": 0.9350822567939758, "learning_rate": 2.808847904196857e-05, "loss": 9.6105, "step": 265 }, { "epoch": 0.18666666666666668, "grad_norm": 0.9632896184921265, "learning_rate": 2.8071824515346904e-05, "loss": 9.5846, "step": 266 }, { "epoch": 0.18736842105263157, "grad_norm": 0.7904691100120544, "learning_rate": 2.805510273192197e-05, "loss": 9.5989, "step": 267 }, { "epoch": 0.1880701754385965, "grad_norm": 0.7060310244560242, "learning_rate": 2.8038313777730237e-05, "loss": 9.5957, "step": 268 }, { "epoch": 0.1887719298245614, "grad_norm": 0.7599156498908997, "learning_rate": 2.8021457739153793e-05, "loss": 9.6891, "step": 269 }, { "epoch": 0.18947368421052632, "grad_norm": 0.8937268257141113, "learning_rate": 2.8004534702919875e-05, "loss": 9.595, "step": 270 }, { "epoch": 0.19017543859649122, "grad_norm": 0.8793702125549316, "learning_rate": 2.798754475610044e-05, "loss": 9.6746, "step": 271 }, { "epoch": 0.19087719298245615, "grad_norm": 0.8771767616271973, "learning_rate": 2.7970487986111702e-05, "loss": 9.6031, "step": 272 }, { "epoch": 0.19157894736842104, "grad_norm": 0.7265843152999878, "learning_rate": 2.795336448071371e-05, "loss": 9.6543, "step": 273 }, { "epoch": 0.19228070175438597, "grad_norm": 0.6834774017333984, "learning_rate": 2.7936174328009864e-05, "loss": 9.6309, "step": 274 }, { "epoch": 0.19298245614035087, "grad_norm": 0.7561845779418945, "learning_rate": 2.7918917616446477e-05, "loss": 9.6467, "step": 275 }, { "epoch": 0.1936842105263158, "grad_norm": 0.7390491366386414, "learning_rate": 2.7901594434812326e-05, "loss": 9.646, "step": 276 }, { "epoch": 0.1943859649122807, "grad_norm": 0.6788997054100037, "learning_rate": 2.7884204872238182e-05, "loss": 9.6459, "step": 277 }, { "epoch": 0.19508771929824562, "grad_norm": 0.7975860834121704, "learning_rate": 2.7866749018196358e-05, "loss": 9.5894, "step": 278 }, { "epoch": 0.1957894736842105, "grad_norm": 0.6646599769592285, "learning_rate": 2.784922696250025e-05, "loss": 9.556, "step": 279 }, { "epoch": 0.19649122807017544, "grad_norm": 0.9430391192436218, "learning_rate": 2.7831638795303873e-05, "loss": 9.588, "step": 280 }, { "epoch": 0.19719298245614036, "grad_norm": 0.6823006868362427, "learning_rate": 2.7813984607101396e-05, "loss": 9.6212, "step": 281 }, { "epoch": 0.19789473684210526, "grad_norm": 0.7671125531196594, "learning_rate": 2.7796264488726672e-05, "loss": 9.6142, "step": 282 }, { "epoch": 0.1985964912280702, "grad_norm": 1.2302342653274536, "learning_rate": 2.7778478531352795e-05, "loss": 9.5895, "step": 283 }, { "epoch": 0.19929824561403509, "grad_norm": 0.6313884258270264, "learning_rate": 2.7760626826491586e-05, "loss": 9.6256, "step": 284 }, { "epoch": 0.2, "grad_norm": 0.7824494242668152, "learning_rate": 2.774270946599317e-05, "loss": 9.6247, "step": 285 }, { "epoch": 0.2007017543859649, "grad_norm": 0.8170529007911682, "learning_rate": 2.7724726542045463e-05, "loss": 9.5782, "step": 286 }, { "epoch": 0.20140350877192983, "grad_norm": 0.6644037365913391, "learning_rate": 2.7706678147173743e-05, "loss": 9.5751, "step": 287 }, { "epoch": 0.20210526315789473, "grad_norm": 0.7059473395347595, "learning_rate": 2.7688564374240114e-05, "loss": 9.5917, "step": 288 }, { "epoch": 0.20280701754385966, "grad_norm": 0.800885796546936, "learning_rate": 2.7670385316443084e-05, "loss": 9.6322, "step": 289 }, { "epoch": 0.20350877192982456, "grad_norm": 0.6367154121398926, "learning_rate": 2.765214106731706e-05, "loss": 9.5718, "step": 290 }, { "epoch": 0.20421052631578948, "grad_norm": 0.8863682746887207, "learning_rate": 2.7633831720731862e-05, "loss": 9.6068, "step": 291 }, { "epoch": 0.20491228070175438, "grad_norm": 0.697329044342041, "learning_rate": 2.7615457370892257e-05, "loss": 9.6286, "step": 292 }, { "epoch": 0.2056140350877193, "grad_norm": 0.6846487522125244, "learning_rate": 2.7597018112337453e-05, "loss": 9.5044, "step": 293 }, { "epoch": 0.2063157894736842, "grad_norm": 0.6939252018928528, "learning_rate": 2.7578514039940634e-05, "loss": 9.5569, "step": 294 }, { "epoch": 0.20701754385964913, "grad_norm": 0.7451760768890381, "learning_rate": 2.7559945248908468e-05, "loss": 9.6597, "step": 295 }, { "epoch": 0.20771929824561403, "grad_norm": 0.651624858379364, "learning_rate": 2.75413118347806e-05, "loss": 9.5894, "step": 296 }, { "epoch": 0.20842105263157895, "grad_norm": 0.7818907499313354, "learning_rate": 2.7522613893429172e-05, "loss": 9.6173, "step": 297 }, { "epoch": 0.20912280701754385, "grad_norm": 0.6688642501831055, "learning_rate": 2.7503851521058333e-05, "loss": 9.5582, "step": 298 }, { "epoch": 0.20982456140350877, "grad_norm": 0.6006238460540771, "learning_rate": 2.748502481420375e-05, "loss": 9.5854, "step": 299 }, { "epoch": 0.21052631578947367, "grad_norm": 1.1419391632080078, "learning_rate": 2.7466133869732087e-05, "loss": 9.583, "step": 300 }, { "epoch": 0.2112280701754386, "grad_norm": 0.6472179889678955, "learning_rate": 2.744717878484053e-05, "loss": 9.5674, "step": 301 }, { "epoch": 0.2119298245614035, "grad_norm": 0.6243668794631958, "learning_rate": 2.742815965705627e-05, "loss": 9.5332, "step": 302 }, { "epoch": 0.21263157894736842, "grad_norm": 0.8664143681526184, "learning_rate": 2.740907658423603e-05, "loss": 9.559, "step": 303 }, { "epoch": 0.21333333333333335, "grad_norm": 0.6396141648292542, "learning_rate": 2.7389929664565523e-05, "loss": 9.5116, "step": 304 }, { "epoch": 0.21403508771929824, "grad_norm": 0.7159715294837952, "learning_rate": 2.737071899655896e-05, "loss": 9.5318, "step": 305 }, { "epoch": 0.21473684210526317, "grad_norm": 0.915367603302002, "learning_rate": 2.7351444679058573e-05, "loss": 9.5601, "step": 306 }, { "epoch": 0.21543859649122807, "grad_norm": 0.6356433033943176, "learning_rate": 2.733210681123406e-05, "loss": 9.5567, "step": 307 }, { "epoch": 0.216140350877193, "grad_norm": 0.6281448602676392, "learning_rate": 2.7312705492582097e-05, "loss": 9.5374, "step": 308 }, { "epoch": 0.2168421052631579, "grad_norm": 0.630029022693634, "learning_rate": 2.729324082292585e-05, "loss": 9.515, "step": 309 }, { "epoch": 0.21754385964912282, "grad_norm": 0.766267716884613, "learning_rate": 2.7273712902414396e-05, "loss": 9.5238, "step": 310 }, { "epoch": 0.21824561403508771, "grad_norm": 1.1891478300094604, "learning_rate": 2.7254121831522288e-05, "loss": 9.5015, "step": 311 }, { "epoch": 0.21894736842105264, "grad_norm": 0.7909481525421143, "learning_rate": 2.7234467711048975e-05, "loss": 9.6396, "step": 312 }, { "epoch": 0.21964912280701754, "grad_norm": 1.0526117086410522, "learning_rate": 2.7214750642118315e-05, "loss": 9.6003, "step": 313 }, { "epoch": 0.22035087719298246, "grad_norm": 0.7523999214172363, "learning_rate": 2.7194970726178047e-05, "loss": 9.5435, "step": 314 }, { "epoch": 0.22105263157894736, "grad_norm": 0.7196345925331116, "learning_rate": 2.7175128064999272e-05, "loss": 9.5309, "step": 315 }, { "epoch": 0.2217543859649123, "grad_norm": 0.5695843696594238, "learning_rate": 2.715522276067591e-05, "loss": 9.516, "step": 316 }, { "epoch": 0.22245614035087719, "grad_norm": 0.6646392941474915, "learning_rate": 2.7135254915624213e-05, "loss": 9.5214, "step": 317 }, { "epoch": 0.2231578947368421, "grad_norm": 0.6180312037467957, "learning_rate": 2.71152246325822e-05, "loss": 9.5258, "step": 318 }, { "epoch": 0.223859649122807, "grad_norm": 0.6667888760566711, "learning_rate": 2.709513201460915e-05, "loss": 9.5135, "step": 319 }, { "epoch": 0.22456140350877193, "grad_norm": 0.6211369633674622, "learning_rate": 2.7074977165085073e-05, "loss": 9.5404, "step": 320 }, { "epoch": 0.22526315789473683, "grad_norm": 0.9565862417221069, "learning_rate": 2.7054760187710156e-05, "loss": 9.5695, "step": 321 }, { "epoch": 0.22596491228070176, "grad_norm": 0.7225350737571716, "learning_rate": 2.7034481186504253e-05, "loss": 9.5384, "step": 322 }, { "epoch": 0.22666666666666666, "grad_norm": 0.7632676362991333, "learning_rate": 2.7014140265806348e-05, "loss": 9.5856, "step": 323 }, { "epoch": 0.22736842105263158, "grad_norm": 0.610244631767273, "learning_rate": 2.6993737530273992e-05, "loss": 9.5351, "step": 324 }, { "epoch": 0.22807017543859648, "grad_norm": 0.5675045251846313, "learning_rate": 2.6973273084882802e-05, "loss": 9.5306, "step": 325 }, { "epoch": 0.2287719298245614, "grad_norm": 0.7981072664260864, "learning_rate": 2.69527470349259e-05, "loss": 9.5229, "step": 326 }, { "epoch": 0.2294736842105263, "grad_norm": 0.6032446622848511, "learning_rate": 2.693215948601337e-05, "loss": 9.5022, "step": 327 }, { "epoch": 0.23017543859649123, "grad_norm": 0.8769320249557495, "learning_rate": 2.691151054407172e-05, "loss": 9.5102, "step": 328 }, { "epoch": 0.23087719298245615, "grad_norm": 0.6082209348678589, "learning_rate": 2.689080031534334e-05, "loss": 9.4934, "step": 329 }, { "epoch": 0.23157894736842105, "grad_norm": 0.6247729063034058, "learning_rate": 2.6870028906385937e-05, "loss": 9.5457, "step": 330 }, { "epoch": 0.23228070175438598, "grad_norm": 0.69285649061203, "learning_rate": 2.684919642407202e-05, "loss": 9.5315, "step": 331 }, { "epoch": 0.23298245614035087, "grad_norm": 0.6785145401954651, "learning_rate": 2.682830297558832e-05, "loss": 9.5147, "step": 332 }, { "epoch": 0.2336842105263158, "grad_norm": 0.6100547909736633, "learning_rate": 2.680734866843525e-05, "loss": 9.5635, "step": 333 }, { "epoch": 0.2343859649122807, "grad_norm": 0.6071866154670715, "learning_rate": 2.6786333610426353e-05, "loss": 9.5082, "step": 334 }, { "epoch": 0.23508771929824562, "grad_norm": 0.5855414271354675, "learning_rate": 2.6765257909687742e-05, "loss": 9.539, "step": 335 }, { "epoch": 0.23578947368421052, "grad_norm": 0.5471117496490479, "learning_rate": 2.6744121674657563e-05, "loss": 9.4794, "step": 336 }, { "epoch": 0.23649122807017545, "grad_norm": 0.5914462208747864, "learning_rate": 2.67229250140854e-05, "loss": 9.514, "step": 337 }, { "epoch": 0.23719298245614034, "grad_norm": 0.5818675756454468, "learning_rate": 2.6701668037031743e-05, "loss": 9.4559, "step": 338 }, { "epoch": 0.23789473684210527, "grad_norm": 0.617993175983429, "learning_rate": 2.668035085286743e-05, "loss": 9.4879, "step": 339 }, { "epoch": 0.23859649122807017, "grad_norm": 0.9568886756896973, "learning_rate": 2.6658973571273077e-05, "loss": 9.474, "step": 340 }, { "epoch": 0.2392982456140351, "grad_norm": 0.6559261679649353, "learning_rate": 2.6637536302238485e-05, "loss": 9.5158, "step": 341 }, { "epoch": 0.24, "grad_norm": 0.756060779094696, "learning_rate": 2.661603915606213e-05, "loss": 9.4825, "step": 342 }, { "epoch": 0.24070175438596492, "grad_norm": 0.6117722988128662, "learning_rate": 2.6594482243350558e-05, "loss": 9.5369, "step": 343 }, { "epoch": 0.24140350877192981, "grad_norm": 0.5974878072738647, "learning_rate": 2.657286567501782e-05, "loss": 9.5227, "step": 344 }, { "epoch": 0.24210526315789474, "grad_norm": 0.5855231881141663, "learning_rate": 2.6551189562284906e-05, "loss": 9.5598, "step": 345 }, { "epoch": 0.24280701754385964, "grad_norm": 0.7797619700431824, "learning_rate": 2.6529454016679175e-05, "loss": 9.6089, "step": 346 }, { "epoch": 0.24350877192982456, "grad_norm": 0.826145350933075, "learning_rate": 2.6507659150033775e-05, "loss": 9.5098, "step": 347 }, { "epoch": 0.24421052631578946, "grad_norm": 0.5895681381225586, "learning_rate": 2.6485805074487078e-05, "loss": 9.521, "step": 348 }, { "epoch": 0.2449122807017544, "grad_norm": 1.1835860013961792, "learning_rate": 2.6463891902482087e-05, "loss": 9.473, "step": 349 }, { "epoch": 0.24561403508771928, "grad_norm": 0.5211122035980225, "learning_rate": 2.644191974676587e-05, "loss": 9.5006, "step": 350 }, { "epoch": 0.2463157894736842, "grad_norm": 0.5129368305206299, "learning_rate": 2.6419888720388984e-05, "loss": 9.5204, "step": 351 }, { "epoch": 0.24701754385964914, "grad_norm": 0.5393275022506714, "learning_rate": 2.639779893670487e-05, "loss": 9.488, "step": 352 }, { "epoch": 0.24771929824561403, "grad_norm": 0.5921952724456787, "learning_rate": 2.6375650509369306e-05, "loss": 9.4986, "step": 353 }, { "epoch": 0.24842105263157896, "grad_norm": 1.2798126935958862, "learning_rate": 2.635344355233977e-05, "loss": 9.5017, "step": 354 }, { "epoch": 0.24912280701754386, "grad_norm": 0.6196109652519226, "learning_rate": 2.6331178179874934e-05, "loss": 9.5487, "step": 355 }, { "epoch": 0.24982456140350878, "grad_norm": 0.7108418345451355, "learning_rate": 2.6308854506533975e-05, "loss": 9.4818, "step": 356 }, { "epoch": 0.2505263157894737, "grad_norm": 0.915558397769928, "learning_rate": 2.6286472647176075e-05, "loss": 9.4672, "step": 357 }, { "epoch": 0.2512280701754386, "grad_norm": 0.5785879492759705, "learning_rate": 2.6264032716959778e-05, "loss": 9.4592, "step": 358 }, { "epoch": 0.2519298245614035, "grad_norm": 0.5904528498649597, "learning_rate": 2.6241534831342413e-05, "loss": 9.5122, "step": 359 }, { "epoch": 0.25263157894736843, "grad_norm": 0.755155622959137, "learning_rate": 2.6218979106079503e-05, "loss": 9.4385, "step": 360 }, { "epoch": 0.25333333333333335, "grad_norm": 0.8922657370567322, "learning_rate": 2.6196365657224166e-05, "loss": 9.5701, "step": 361 }, { "epoch": 0.2540350877192982, "grad_norm": 0.6248093247413635, "learning_rate": 2.6173694601126513e-05, "loss": 9.4623, "step": 362 }, { "epoch": 0.25473684210526315, "grad_norm": 0.6643165349960327, "learning_rate": 2.6150966054433066e-05, "loss": 9.493, "step": 363 }, { "epoch": 0.2554385964912281, "grad_norm": 0.7007080316543579, "learning_rate": 2.612818013408613e-05, "loss": 9.4638, "step": 364 }, { "epoch": 0.256140350877193, "grad_norm": 0.6519184112548828, "learning_rate": 2.6105336957323214e-05, "loss": 9.4458, "step": 365 }, { "epoch": 0.25684210526315787, "grad_norm": 0.5476860404014587, "learning_rate": 2.6082436641676428e-05, "loss": 9.4392, "step": 366 }, { "epoch": 0.2575438596491228, "grad_norm": 1.0984894037246704, "learning_rate": 2.6059479304971867e-05, "loss": 9.4823, "step": 367 }, { "epoch": 0.2582456140350877, "grad_norm": 0.6184016466140747, "learning_rate": 2.6036465065329003e-05, "loss": 9.48, "step": 368 }, { "epoch": 0.25894736842105265, "grad_norm": 0.6977149844169617, "learning_rate": 2.6013394041160093e-05, "loss": 9.5109, "step": 369 }, { "epoch": 0.2596491228070175, "grad_norm": 0.7783281207084656, "learning_rate": 2.5990266351169554e-05, "loss": 9.5433, "step": 370 }, { "epoch": 0.26035087719298244, "grad_norm": 0.6441644430160522, "learning_rate": 2.5967082114353363e-05, "loss": 9.544, "step": 371 }, { "epoch": 0.26105263157894737, "grad_norm": 0.5681689977645874, "learning_rate": 2.594384144999843e-05, "loss": 9.4775, "step": 372 }, { "epoch": 0.2617543859649123, "grad_norm": 0.7010797262191772, "learning_rate": 2.5920544477682012e-05, "loss": 9.4933, "step": 373 }, { "epoch": 0.2624561403508772, "grad_norm": 0.559393048286438, "learning_rate": 2.5897191317271063e-05, "loss": 9.4818, "step": 374 }, { "epoch": 0.2631578947368421, "grad_norm": 0.7907567024230957, "learning_rate": 2.5873782088921648e-05, "loss": 9.4552, "step": 375 }, { "epoch": 0.263859649122807, "grad_norm": 0.6064289212226868, "learning_rate": 2.5850316913078298e-05, "loss": 9.4723, "step": 376 }, { "epoch": 0.26456140350877194, "grad_norm": 0.5833171010017395, "learning_rate": 2.5826795910473416e-05, "loss": 9.5045, "step": 377 }, { "epoch": 0.26526315789473687, "grad_norm": 0.6627591252326965, "learning_rate": 2.5803219202126635e-05, "loss": 9.4552, "step": 378 }, { "epoch": 0.26596491228070174, "grad_norm": 0.5093407034873962, "learning_rate": 2.5779586909344206e-05, "loss": 9.4724, "step": 379 }, { "epoch": 0.26666666666666666, "grad_norm": 0.5556803941726685, "learning_rate": 2.575589915371838e-05, "loss": 9.4319, "step": 380 }, { "epoch": 0.2673684210526316, "grad_norm": 0.6446377635002136, "learning_rate": 2.573215605712676e-05, "loss": 9.4874, "step": 381 }, { "epoch": 0.2680701754385965, "grad_norm": 0.7578274607658386, "learning_rate": 2.570835774173169e-05, "loss": 9.5246, "step": 382 }, { "epoch": 0.2687719298245614, "grad_norm": 0.6447035670280457, "learning_rate": 2.5684504329979634e-05, "loss": 9.4767, "step": 383 }, { "epoch": 0.2694736842105263, "grad_norm": 0.64982670545578, "learning_rate": 2.5660595944600534e-05, "loss": 9.488, "step": 384 }, { "epoch": 0.27017543859649124, "grad_norm": 0.6191815137863159, "learning_rate": 2.563663270860717e-05, "loss": 9.4848, "step": 385 }, { "epoch": 0.27087719298245616, "grad_norm": 0.6082594990730286, "learning_rate": 2.561261474529455e-05, "loss": 9.43, "step": 386 }, { "epoch": 0.27157894736842103, "grad_norm": 0.6578230261802673, "learning_rate": 2.5588542178239258e-05, "loss": 9.4661, "step": 387 }, { "epoch": 0.27228070175438596, "grad_norm": 0.5869002342224121, "learning_rate": 2.5564415131298824e-05, "loss": 9.4587, "step": 388 }, { "epoch": 0.2729824561403509, "grad_norm": 0.677105724811554, "learning_rate": 2.5540233728611087e-05, "loss": 9.4574, "step": 389 }, { "epoch": 0.2736842105263158, "grad_norm": 0.6095978617668152, "learning_rate": 2.5515998094593555e-05, "loss": 9.445, "step": 390 }, { "epoch": 0.2743859649122807, "grad_norm": 0.8217200636863708, "learning_rate": 2.5491708353942773e-05, "loss": 9.4928, "step": 391 }, { "epoch": 0.2750877192982456, "grad_norm": 0.6396626234054565, "learning_rate": 2.546736463163366e-05, "loss": 9.438, "step": 392 }, { "epoch": 0.27578947368421053, "grad_norm": 0.5075334906578064, "learning_rate": 2.5442967052918888e-05, "loss": 9.4102, "step": 393 }, { "epoch": 0.27649122807017545, "grad_norm": 0.69489985704422, "learning_rate": 2.5418515743328232e-05, "loss": 9.4225, "step": 394 }, { "epoch": 0.2771929824561403, "grad_norm": 0.8488376140594482, "learning_rate": 2.5394010828667923e-05, "loss": 9.5315, "step": 395 }, { "epoch": 0.27789473684210525, "grad_norm": 0.5730602145195007, "learning_rate": 2.5369452435019988e-05, "loss": 9.4571, "step": 396 }, { "epoch": 0.2785964912280702, "grad_norm": 0.8277639150619507, "learning_rate": 2.534484068874162e-05, "loss": 9.4516, "step": 397 }, { "epoch": 0.2792982456140351, "grad_norm": 0.6143015027046204, "learning_rate": 2.5320175716464523e-05, "loss": 9.4049, "step": 398 }, { "epoch": 0.28, "grad_norm": 0.813590407371521, "learning_rate": 2.529545764509425e-05, "loss": 9.4495, "step": 399 }, { "epoch": 0.2807017543859649, "grad_norm": 0.6429283618927002, "learning_rate": 2.5270686601809577e-05, "loss": 9.4724, "step": 400 }, { "epoch": 0.2814035087719298, "grad_norm": 0.6878147721290588, "learning_rate": 2.52458627140618e-05, "loss": 9.4827, "step": 401 }, { "epoch": 0.28210526315789475, "grad_norm": 0.9028959274291992, "learning_rate": 2.522098610957413e-05, "loss": 9.4751, "step": 402 }, { "epoch": 0.2828070175438597, "grad_norm": 0.6330590844154358, "learning_rate": 2.5196056916341016e-05, "loss": 9.436, "step": 403 }, { "epoch": 0.28350877192982454, "grad_norm": 0.8461023569107056, "learning_rate": 2.5171075262627473e-05, "loss": 9.4149, "step": 404 }, { "epoch": 0.28421052631578947, "grad_norm": 0.5460909605026245, "learning_rate": 2.5146041276968442e-05, "loss": 9.4486, "step": 405 }, { "epoch": 0.2849122807017544, "grad_norm": 0.4944623112678528, "learning_rate": 2.512095508816812e-05, "loss": 9.3871, "step": 406 }, { "epoch": 0.2856140350877193, "grad_norm": 0.5307866334915161, "learning_rate": 2.5095816825299293e-05, "loss": 9.4709, "step": 407 }, { "epoch": 0.2863157894736842, "grad_norm": 0.5677727460861206, "learning_rate": 2.5070626617702688e-05, "loss": 9.3755, "step": 408 }, { "epoch": 0.2870175438596491, "grad_norm": 0.5880956649780273, "learning_rate": 2.5045384594986285e-05, "loss": 9.4142, "step": 409 }, { "epoch": 0.28771929824561404, "grad_norm": 0.5180703401565552, "learning_rate": 2.5020090887024663e-05, "loss": 9.4071, "step": 410 }, { "epoch": 0.28842105263157897, "grad_norm": 0.6639626622200012, "learning_rate": 2.499474562395835e-05, "loss": 9.4608, "step": 411 }, { "epoch": 0.28912280701754384, "grad_norm": 0.5631815791130066, "learning_rate": 2.4969348936193102e-05, "loss": 9.4094, "step": 412 }, { "epoch": 0.28982456140350876, "grad_norm": 0.5485440492630005, "learning_rate": 2.4943900954399286e-05, "loss": 9.4077, "step": 413 }, { "epoch": 0.2905263157894737, "grad_norm": 0.9110934734344482, "learning_rate": 2.491840180951118e-05, "loss": 9.4172, "step": 414 }, { "epoch": 0.2912280701754386, "grad_norm": 0.5757965445518494, "learning_rate": 2.4892851632726306e-05, "loss": 9.436, "step": 415 }, { "epoch": 0.2919298245614035, "grad_norm": 0.7124931812286377, "learning_rate": 2.4867250555504757e-05, "loss": 9.5233, "step": 416 }, { "epoch": 0.2926315789473684, "grad_norm": 0.6362365484237671, "learning_rate": 2.4841598709568506e-05, "loss": 9.442, "step": 417 }, { "epoch": 0.29333333333333333, "grad_norm": 0.9518119096755981, "learning_rate": 2.481589622690075e-05, "loss": 9.3797, "step": 418 }, { "epoch": 0.29403508771929826, "grad_norm": 0.6270287036895752, "learning_rate": 2.4790143239745218e-05, "loss": 9.4372, "step": 419 }, { "epoch": 0.29473684210526313, "grad_norm": 1.2878141403198242, "learning_rate": 2.476433988060549e-05, "loss": 9.4552, "step": 420 }, { "epoch": 0.29543859649122806, "grad_norm": 0.497255802154541, "learning_rate": 2.4738486282244333e-05, "loss": 9.378, "step": 421 }, { "epoch": 0.296140350877193, "grad_norm": 0.5221925377845764, "learning_rate": 2.471258257768298e-05, "loss": 9.4698, "step": 422 }, { "epoch": 0.2968421052631579, "grad_norm": 0.5424032807350159, "learning_rate": 2.468662890020049e-05, "loss": 9.4284, "step": 423 }, { "epoch": 0.29754385964912283, "grad_norm": 0.5412673354148865, "learning_rate": 2.4660625383333028e-05, "loss": 9.4369, "step": 424 }, { "epoch": 0.2982456140350877, "grad_norm": 0.6830459237098694, "learning_rate": 2.4634572160873203e-05, "loss": 9.4547, "step": 425 }, { "epoch": 0.29894736842105263, "grad_norm": 0.5509556531906128, "learning_rate": 2.460846936686935e-05, "loss": 9.3922, "step": 426 }, { "epoch": 0.29964912280701755, "grad_norm": 0.597737193107605, "learning_rate": 2.4582317135624886e-05, "loss": 9.4321, "step": 427 }, { "epoch": 0.3003508771929825, "grad_norm": 0.48739030957221985, "learning_rate": 2.4556115601697557e-05, "loss": 9.3992, "step": 428 }, { "epoch": 0.30105263157894735, "grad_norm": 0.5768301486968994, "learning_rate": 2.4529864899898803e-05, "loss": 9.3724, "step": 429 }, { "epoch": 0.3017543859649123, "grad_norm": 0.567587673664093, "learning_rate": 2.450356516529304e-05, "loss": 9.4249, "step": 430 }, { "epoch": 0.3024561403508772, "grad_norm": 0.6435167193412781, "learning_rate": 2.4477216533196954e-05, "loss": 9.4559, "step": 431 }, { "epoch": 0.3031578947368421, "grad_norm": 0.5334060788154602, "learning_rate": 2.4450819139178838e-05, "loss": 9.4102, "step": 432 }, { "epoch": 0.303859649122807, "grad_norm": 0.6522231698036194, "learning_rate": 2.4424373119057852e-05, "loss": 9.4111, "step": 433 }, { "epoch": 0.3045614035087719, "grad_norm": 0.5632619857788086, "learning_rate": 2.439787860890335e-05, "loss": 9.3963, "step": 434 }, { "epoch": 0.30526315789473685, "grad_norm": 0.707191526889801, "learning_rate": 2.437133574503419e-05, "loss": 9.3793, "step": 435 }, { "epoch": 0.3059649122807018, "grad_norm": 0.7391917109489441, "learning_rate": 2.4344744664018e-05, "loss": 9.4674, "step": 436 }, { "epoch": 0.30666666666666664, "grad_norm": 0.8534300327301025, "learning_rate": 2.4318105502670504e-05, "loss": 9.4601, "step": 437 }, { "epoch": 0.30736842105263157, "grad_norm": 0.6527109742164612, "learning_rate": 2.4291418398054804e-05, "loss": 9.3996, "step": 438 }, { "epoch": 0.3080701754385965, "grad_norm": 0.8487635254859924, "learning_rate": 2.4264683487480687e-05, "loss": 9.4799, "step": 439 }, { "epoch": 0.3087719298245614, "grad_norm": 0.5704479813575745, "learning_rate": 2.4237900908503894e-05, "loss": 9.3873, "step": 440 }, { "epoch": 0.3094736842105263, "grad_norm": 0.6507864594459534, "learning_rate": 2.421107079892544e-05, "loss": 9.4321, "step": 441 }, { "epoch": 0.3101754385964912, "grad_norm": 0.5173345804214478, "learning_rate": 2.4184193296790887e-05, "loss": 9.3922, "step": 442 }, { "epoch": 0.31087719298245614, "grad_norm": 0.4844501316547394, "learning_rate": 2.415726854038965e-05, "loss": 9.4332, "step": 443 }, { "epoch": 0.31157894736842107, "grad_norm": 0.6691529154777527, "learning_rate": 2.413029666825427e-05, "loss": 9.4148, "step": 444 }, { "epoch": 0.312280701754386, "grad_norm": 0.7373707294464111, "learning_rate": 2.410327781915969e-05, "loss": 9.3842, "step": 445 }, { "epoch": 0.31298245614035086, "grad_norm": 0.6849657297134399, "learning_rate": 2.4076212132122586e-05, "loss": 9.4118, "step": 446 }, { "epoch": 0.3136842105263158, "grad_norm": 1.116995930671692, "learning_rate": 2.4049099746400602e-05, "loss": 9.372, "step": 447 }, { "epoch": 0.3143859649122807, "grad_norm": 0.7253084778785706, "learning_rate": 2.402194080149167e-05, "loss": 9.423, "step": 448 }, { "epoch": 0.31508771929824564, "grad_norm": 0.5054647326469421, "learning_rate": 2.3994735437133272e-05, "loss": 9.3822, "step": 449 }, { "epoch": 0.3157894736842105, "grad_norm": 0.576518714427948, "learning_rate": 2.396748379330171e-05, "loss": 9.4282, "step": 450 }, { "epoch": 0.31649122807017543, "grad_norm": 0.7184551954269409, "learning_rate": 2.394018601021143e-05, "loss": 9.4131, "step": 451 }, { "epoch": 0.31719298245614036, "grad_norm": 0.6027166843414307, "learning_rate": 2.391284222831425e-05, "loss": 9.4422, "step": 452 }, { "epoch": 0.3178947368421053, "grad_norm": 0.5981807112693787, "learning_rate": 2.3885452588298672e-05, "loss": 9.3667, "step": 453 }, { "epoch": 0.31859649122807016, "grad_norm": 0.4335266649723053, "learning_rate": 2.385801723108914e-05, "loss": 9.3618, "step": 454 }, { "epoch": 0.3192982456140351, "grad_norm": 0.5490899682044983, "learning_rate": 2.3830536297845326e-05, "loss": 9.4228, "step": 455 }, { "epoch": 0.32, "grad_norm": 0.9739122986793518, "learning_rate": 2.3803009929961393e-05, "loss": 9.3897, "step": 456 }, { "epoch": 0.32070175438596493, "grad_norm": 0.8922121524810791, "learning_rate": 2.3775438269065277e-05, "loss": 9.4734, "step": 457 }, { "epoch": 0.3214035087719298, "grad_norm": 0.5607495903968811, "learning_rate": 2.3747821457017952e-05, "loss": 9.3684, "step": 458 }, { "epoch": 0.32210526315789473, "grad_norm": 0.5324245691299438, "learning_rate": 2.37201596359127e-05, "loss": 9.3394, "step": 459 }, { "epoch": 0.32280701754385965, "grad_norm": 0.6799810528755188, "learning_rate": 2.3692452948074395e-05, "loss": 9.3817, "step": 460 }, { "epoch": 0.3235087719298246, "grad_norm": 0.7299060821533203, "learning_rate": 2.3664701536058746e-05, "loss": 9.3943, "step": 461 }, { "epoch": 0.32421052631578945, "grad_norm": 0.5554009675979614, "learning_rate": 2.3636905542651577e-05, "loss": 9.3828, "step": 462 }, { "epoch": 0.3249122807017544, "grad_norm": 0.7626522779464722, "learning_rate": 2.360906511086809e-05, "loss": 9.4417, "step": 463 }, { "epoch": 0.3256140350877193, "grad_norm": 0.5534941554069519, "learning_rate": 2.3581180383952136e-05, "loss": 9.3725, "step": 464 }, { "epoch": 0.3263157894736842, "grad_norm": 0.6367209553718567, "learning_rate": 2.355325150537548e-05, "loss": 9.4074, "step": 465 }, { "epoch": 0.3270175438596491, "grad_norm": 0.8958393335342407, "learning_rate": 2.352527861883702e-05, "loss": 9.4438, "step": 466 }, { "epoch": 0.327719298245614, "grad_norm": 0.7699940800666809, "learning_rate": 2.3497261868262127e-05, "loss": 9.4303, "step": 467 }, { "epoch": 0.32842105263157895, "grad_norm": 1.0890138149261475, "learning_rate": 2.346920139780183e-05, "loss": 9.3701, "step": 468 }, { "epoch": 0.3291228070175439, "grad_norm": 0.6338522434234619, "learning_rate": 2.3441097351832113e-05, "loss": 9.3817, "step": 469 }, { "epoch": 0.3298245614035088, "grad_norm": 0.5676193237304688, "learning_rate": 2.3412949874953174e-05, "loss": 9.3586, "step": 470 }, { "epoch": 0.33052631578947367, "grad_norm": 0.6439054012298584, "learning_rate": 2.3384759111988657e-05, "loss": 9.396, "step": 471 }, { "epoch": 0.3312280701754386, "grad_norm": 0.6478230357170105, "learning_rate": 2.3356525207984916e-05, "loss": 9.4257, "step": 472 }, { "epoch": 0.3319298245614035, "grad_norm": 1.1141831874847412, "learning_rate": 2.3328248308210295e-05, "loss": 9.4242, "step": 473 }, { "epoch": 0.33263157894736844, "grad_norm": 0.5897688865661621, "learning_rate": 2.3299928558154333e-05, "loss": 9.3279, "step": 474 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6220348477363586, "learning_rate": 2.3271566103527063e-05, "loss": 9.3795, "step": 475 }, { "epoch": 0.33403508771929824, "grad_norm": 0.4404957592487335, "learning_rate": 2.3243161090258236e-05, "loss": 9.3667, "step": 476 }, { "epoch": 0.33473684210526317, "grad_norm": 0.6436986923217773, "learning_rate": 2.3214713664496553e-05, "loss": 9.3799, "step": 477 }, { "epoch": 0.3354385964912281, "grad_norm": 0.85413658618927, "learning_rate": 2.318622397260896e-05, "loss": 9.3876, "step": 478 }, { "epoch": 0.33614035087719296, "grad_norm": 0.7895776629447937, "learning_rate": 2.3157692161179858e-05, "loss": 9.43, "step": 479 }, { "epoch": 0.3368421052631579, "grad_norm": 0.4970530867576599, "learning_rate": 2.312911837701037e-05, "loss": 9.3635, "step": 480 }, { "epoch": 0.3375438596491228, "grad_norm": 0.6780027747154236, "learning_rate": 2.3100502767117566e-05, "loss": 9.4183, "step": 481 }, { "epoch": 0.33824561403508774, "grad_norm": 0.8290634155273438, "learning_rate": 2.3071845478733723e-05, "loss": 9.4125, "step": 482 }, { "epoch": 0.3389473684210526, "grad_norm": 0.6027493476867676, "learning_rate": 2.3043146659305542e-05, "loss": 9.4517, "step": 483 }, { "epoch": 0.33964912280701753, "grad_norm": 0.6301515698432922, "learning_rate": 2.301440645649344e-05, "loss": 9.398, "step": 484 }, { "epoch": 0.34035087719298246, "grad_norm": 0.980278730392456, "learning_rate": 2.2985625018170738e-05, "loss": 9.3514, "step": 485 }, { "epoch": 0.3410526315789474, "grad_norm": 0.6945514678955078, "learning_rate": 2.2956802492422925e-05, "loss": 9.3473, "step": 486 }, { "epoch": 0.34175438596491226, "grad_norm": 0.45966213941574097, "learning_rate": 2.2927939027546895e-05, "loss": 9.4187, "step": 487 }, { "epoch": 0.3424561403508772, "grad_norm": 0.4568663537502289, "learning_rate": 2.2899034772050177e-05, "loss": 9.3852, "step": 488 }, { "epoch": 0.3431578947368421, "grad_norm": 0.8702273368835449, "learning_rate": 2.2870089874650175e-05, "loss": 9.3889, "step": 489 }, { "epoch": 0.34385964912280703, "grad_norm": 0.484712690114975, "learning_rate": 2.284110448427341e-05, "loss": 9.3514, "step": 490 }, { "epoch": 0.34456140350877196, "grad_norm": 0.7928332686424255, "learning_rate": 2.281207875005473e-05, "loss": 9.4387, "step": 491 }, { "epoch": 0.3452631578947368, "grad_norm": 0.5693723559379578, "learning_rate": 2.278301282133658e-05, "loss": 9.4463, "step": 492 }, { "epoch": 0.34596491228070175, "grad_norm": 0.929457426071167, "learning_rate": 2.2753906847668197e-05, "loss": 9.3967, "step": 493 }, { "epoch": 0.3466666666666667, "grad_norm": 1.2614575624465942, "learning_rate": 2.272476097880486e-05, "loss": 9.4889, "step": 494 }, { "epoch": 0.3473684210526316, "grad_norm": 0.8899470567703247, "learning_rate": 2.269557536470712e-05, "loss": 9.4531, "step": 495 }, { "epoch": 0.3480701754385965, "grad_norm": 1.0561460256576538, "learning_rate": 2.266635015554002e-05, "loss": 9.3315, "step": 496 }, { "epoch": 0.3487719298245614, "grad_norm": 0.705575168132782, "learning_rate": 2.263708550167233e-05, "loss": 9.4088, "step": 497 }, { "epoch": 0.3494736842105263, "grad_norm": 0.5960484147071838, "learning_rate": 2.2607781553675766e-05, "loss": 9.3929, "step": 498 }, { "epoch": 0.35017543859649125, "grad_norm": 0.6113365888595581, "learning_rate": 2.2578438462324214e-05, "loss": 9.3373, "step": 499 }, { "epoch": 0.3508771929824561, "grad_norm": 0.8479763865470886, "learning_rate": 2.2549056378592974e-05, "loss": 9.3261, "step": 500 }, { "epoch": 0.35157894736842105, "grad_norm": 0.8934248089790344, "learning_rate": 2.2519635453657958e-05, "loss": 9.3966, "step": 501 }, { "epoch": 0.35228070175438597, "grad_norm": 0.661344587802887, "learning_rate": 2.2490175838894928e-05, "loss": 9.3599, "step": 502 }, { "epoch": 0.3529824561403509, "grad_norm": 0.9816127419471741, "learning_rate": 2.2460677685878705e-05, "loss": 9.3814, "step": 503 }, { "epoch": 0.35368421052631577, "grad_norm": 0.6244979500770569, "learning_rate": 2.2431141146382403e-05, "loss": 9.3971, "step": 504 }, { "epoch": 0.3543859649122807, "grad_norm": 0.6204315423965454, "learning_rate": 2.2401566372376635e-05, "loss": 9.3733, "step": 505 }, { "epoch": 0.3550877192982456, "grad_norm": 0.4501981735229492, "learning_rate": 2.237195351602874e-05, "loss": 9.3801, "step": 506 }, { "epoch": 0.35578947368421054, "grad_norm": 0.916680097579956, "learning_rate": 2.2342302729702004e-05, "loss": 9.3959, "step": 507 }, { "epoch": 0.3564912280701754, "grad_norm": 1.336126446723938, "learning_rate": 2.231261416595486e-05, "loss": 9.3723, "step": 508 }, { "epoch": 0.35719298245614034, "grad_norm": 0.6955752968788147, "learning_rate": 2.2282887977540112e-05, "loss": 9.3836, "step": 509 }, { "epoch": 0.35789473684210527, "grad_norm": 0.8503170013427734, "learning_rate": 2.2253124317404155e-05, "loss": 9.4106, "step": 510 }, { "epoch": 0.3585964912280702, "grad_norm": 0.7726431488990784, "learning_rate": 2.222332333868618e-05, "loss": 9.3868, "step": 511 }, { "epoch": 0.35929824561403506, "grad_norm": 0.6277540326118469, "learning_rate": 2.2193485194717387e-05, "loss": 9.3886, "step": 512 }, { "epoch": 0.36, "grad_norm": 0.9688343405723572, "learning_rate": 2.2163610039020202e-05, "loss": 9.3484, "step": 513 }, { "epoch": 0.3607017543859649, "grad_norm": 0.5263510942459106, "learning_rate": 2.2133698025307487e-05, "loss": 9.3708, "step": 514 }, { "epoch": 0.36140350877192984, "grad_norm": 0.5886432528495789, "learning_rate": 2.210374930748172e-05, "loss": 9.3866, "step": 515 }, { "epoch": 0.36210526315789476, "grad_norm": 0.9317433834075928, "learning_rate": 2.207376403963426e-05, "loss": 9.3555, "step": 516 }, { "epoch": 0.36280701754385963, "grad_norm": 0.694774329662323, "learning_rate": 2.2043742376044507e-05, "loss": 9.3707, "step": 517 }, { "epoch": 0.36350877192982456, "grad_norm": 0.5978325605392456, "learning_rate": 2.2013684471179125e-05, "loss": 9.3791, "step": 518 }, { "epoch": 0.3642105263157895, "grad_norm": 0.7452189922332764, "learning_rate": 2.1983590479691238e-05, "loss": 9.3488, "step": 519 }, { "epoch": 0.3649122807017544, "grad_norm": 0.6394854187965393, "learning_rate": 2.195346055641966e-05, "loss": 9.4439, "step": 520 }, { "epoch": 0.3656140350877193, "grad_norm": 0.5270504951477051, "learning_rate": 2.1923294856388058e-05, "loss": 9.3968, "step": 521 }, { "epoch": 0.3663157894736842, "grad_norm": 0.5692384839057922, "learning_rate": 2.1893093534804195e-05, "loss": 9.3923, "step": 522 }, { "epoch": 0.36701754385964913, "grad_norm": 0.5684599876403809, "learning_rate": 2.186285674705911e-05, "loss": 9.3973, "step": 523 }, { "epoch": 0.36771929824561406, "grad_norm": 1.1409095525741577, "learning_rate": 2.183258464872631e-05, "loss": 9.3984, "step": 524 }, { "epoch": 0.3684210526315789, "grad_norm": 0.8447516560554504, "learning_rate": 2.1802277395560998e-05, "loss": 9.3216, "step": 525 }, { "epoch": 0.36912280701754385, "grad_norm": 1.0733681917190552, "learning_rate": 2.1771935143499233e-05, "loss": 9.3478, "step": 526 }, { "epoch": 0.3698245614035088, "grad_norm": 0.6710102558135986, "learning_rate": 2.174155804865717e-05, "loss": 9.3634, "step": 527 }, { "epoch": 0.3705263157894737, "grad_norm": 0.8906673789024353, "learning_rate": 2.171114626733023e-05, "loss": 9.3921, "step": 528 }, { "epoch": 0.3712280701754386, "grad_norm": 0.8108707070350647, "learning_rate": 2.1680699955992295e-05, "loss": 9.3917, "step": 529 }, { "epoch": 0.3719298245614035, "grad_norm": 0.5511205792427063, "learning_rate": 2.1650219271294922e-05, "loss": 9.3313, "step": 530 }, { "epoch": 0.3726315789473684, "grad_norm": 0.736314058303833, "learning_rate": 2.161970437006651e-05, "loss": 9.3553, "step": 531 }, { "epoch": 0.37333333333333335, "grad_norm": 0.6239644885063171, "learning_rate": 2.1589155409311514e-05, "loss": 9.3714, "step": 532 }, { "epoch": 0.3740350877192982, "grad_norm": 0.6047874093055725, "learning_rate": 2.1558572546209647e-05, "loss": 9.3787, "step": 533 }, { "epoch": 0.37473684210526315, "grad_norm": 0.4837816655635834, "learning_rate": 2.152795593811503e-05, "loss": 9.3395, "step": 534 }, { "epoch": 0.37543859649122807, "grad_norm": 0.7115335464477539, "learning_rate": 2.1497305742555416e-05, "loss": 9.3703, "step": 535 }, { "epoch": 0.376140350877193, "grad_norm": 0.6146277785301208, "learning_rate": 2.146662211723139e-05, "loss": 9.3131, "step": 536 }, { "epoch": 0.37684210526315787, "grad_norm": 0.8209133148193359, "learning_rate": 2.1435905220015503e-05, "loss": 9.4122, "step": 537 }, { "epoch": 0.3775438596491228, "grad_norm": 0.5381373167037964, "learning_rate": 2.140515520895154e-05, "loss": 9.3533, "step": 538 }, { "epoch": 0.3782456140350877, "grad_norm": 0.9511374831199646, "learning_rate": 2.1374372242253615e-05, "loss": 9.3101, "step": 539 }, { "epoch": 0.37894736842105264, "grad_norm": 0.9581679105758667, "learning_rate": 2.1343556478305444e-05, "loss": 9.3492, "step": 540 }, { "epoch": 0.37964912280701757, "grad_norm": 0.6624841094017029, "learning_rate": 2.131270807565948e-05, "loss": 9.36, "step": 541 }, { "epoch": 0.38035087719298244, "grad_norm": 0.5565523505210876, "learning_rate": 2.1281827193036075e-05, "loss": 9.3974, "step": 542 }, { "epoch": 0.38105263157894737, "grad_norm": 0.5369763970375061, "learning_rate": 2.125091398932275e-05, "loss": 9.3394, "step": 543 }, { "epoch": 0.3817543859649123, "grad_norm": 0.7159846425056458, "learning_rate": 2.1219968623573292e-05, "loss": 9.3892, "step": 544 }, { "epoch": 0.3824561403508772, "grad_norm": 1.2518764734268188, "learning_rate": 2.1188991255006965e-05, "loss": 9.5087, "step": 545 }, { "epoch": 0.3831578947368421, "grad_norm": 0.5132001042366028, "learning_rate": 2.115798204300771e-05, "loss": 9.3856, "step": 546 }, { "epoch": 0.383859649122807, "grad_norm": 0.8802165389060974, "learning_rate": 2.1126941147123285e-05, "loss": 9.3083, "step": 547 }, { "epoch": 0.38456140350877194, "grad_norm": 0.5875750780105591, "learning_rate": 2.1095868727064494e-05, "loss": 9.3783, "step": 548 }, { "epoch": 0.38526315789473686, "grad_norm": 0.9157979488372803, "learning_rate": 2.106476494270432e-05, "loss": 9.3979, "step": 549 }, { "epoch": 0.38596491228070173, "grad_norm": 0.6329531669616699, "learning_rate": 2.1033629954077123e-05, "loss": 9.3546, "step": 550 }, { "epoch": 0.38666666666666666, "grad_norm": 0.9486364126205444, "learning_rate": 2.1002463921377818e-05, "loss": 9.3152, "step": 551 }, { "epoch": 0.3873684210526316, "grad_norm": 1.1078598499298096, "learning_rate": 2.097126700496105e-05, "loss": 9.2959, "step": 552 }, { "epoch": 0.3880701754385965, "grad_norm": 0.7814258933067322, "learning_rate": 2.0940039365340363e-05, "loss": 9.394, "step": 553 }, { "epoch": 0.3887719298245614, "grad_norm": 0.5575852990150452, "learning_rate": 2.0908781163187372e-05, "loss": 9.3791, "step": 554 }, { "epoch": 0.3894736842105263, "grad_norm": 0.5801357626914978, "learning_rate": 2.0877492559330942e-05, "loss": 9.3566, "step": 555 }, { "epoch": 0.39017543859649123, "grad_norm": 0.9365381598472595, "learning_rate": 2.0846173714756372e-05, "loss": 9.4376, "step": 556 }, { "epoch": 0.39087719298245616, "grad_norm": 0.5062263607978821, "learning_rate": 2.081482479060455e-05, "loss": 9.3814, "step": 557 }, { "epoch": 0.391578947368421, "grad_norm": 0.5326376557350159, "learning_rate": 2.0783445948171115e-05, "loss": 9.332, "step": 558 }, { "epoch": 0.39228070175438595, "grad_norm": 0.6381511092185974, "learning_rate": 2.0752037348905656e-05, "loss": 9.3692, "step": 559 }, { "epoch": 0.3929824561403509, "grad_norm": 0.5668514370918274, "learning_rate": 2.0720599154410857e-05, "loss": 9.3462, "step": 560 }, { "epoch": 0.3936842105263158, "grad_norm": 0.8611593246459961, "learning_rate": 2.068913152644169e-05, "loss": 9.3498, "step": 561 }, { "epoch": 0.39438596491228073, "grad_norm": 1.5203781127929688, "learning_rate": 2.0657634626904544e-05, "loss": 9.3698, "step": 562 }, { "epoch": 0.3950877192982456, "grad_norm": 0.5834146738052368, "learning_rate": 2.0626108617856438e-05, "loss": 9.3257, "step": 563 }, { "epoch": 0.3957894736842105, "grad_norm": 0.7357433438301086, "learning_rate": 2.0594553661504145e-05, "loss": 9.347, "step": 564 }, { "epoch": 0.39649122807017545, "grad_norm": 0.5671879649162292, "learning_rate": 2.056296992020339e-05, "loss": 9.3427, "step": 565 }, { "epoch": 0.3971929824561404, "grad_norm": 0.6014061570167542, "learning_rate": 2.0531357556457994e-05, "loss": 9.3676, "step": 566 }, { "epoch": 0.39789473684210525, "grad_norm": 0.5597371459007263, "learning_rate": 2.0499716732919052e-05, "loss": 9.3953, "step": 567 }, { "epoch": 0.39859649122807017, "grad_norm": 0.6753242611885071, "learning_rate": 2.046804761238409e-05, "loss": 9.2945, "step": 568 }, { "epoch": 0.3992982456140351, "grad_norm": 0.6650702953338623, "learning_rate": 2.0436350357796213e-05, "loss": 9.3504, "step": 569 }, { "epoch": 0.4, "grad_norm": 1.2930177450180054, "learning_rate": 2.0404625132243303e-05, "loss": 9.3549, "step": 570 }, { "epoch": 0.4007017543859649, "grad_norm": 0.627933144569397, "learning_rate": 2.037287209895713e-05, "loss": 9.3699, "step": 571 }, { "epoch": 0.4014035087719298, "grad_norm": 0.4878309965133667, "learning_rate": 2.034109142131257e-05, "loss": 9.3261, "step": 572 }, { "epoch": 0.40210526315789474, "grad_norm": 0.5281075239181519, "learning_rate": 2.030928326282672e-05, "loss": 9.3327, "step": 573 }, { "epoch": 0.40280701754385967, "grad_norm": 1.0048577785491943, "learning_rate": 2.0277447787158057e-05, "loss": 9.3561, "step": 574 }, { "epoch": 0.40350877192982454, "grad_norm": 0.5323553681373596, "learning_rate": 2.0245585158105625e-05, "loss": 9.3698, "step": 575 }, { "epoch": 0.40421052631578946, "grad_norm": 0.778361439704895, "learning_rate": 2.0213695539608182e-05, "loss": 9.3128, "step": 576 }, { "epoch": 0.4049122807017544, "grad_norm": 0.7070466876029968, "learning_rate": 2.0181779095743335e-05, "loss": 9.3432, "step": 577 }, { "epoch": 0.4056140350877193, "grad_norm": 0.7705173492431641, "learning_rate": 2.014983599072673e-05, "loss": 9.3338, "step": 578 }, { "epoch": 0.4063157894736842, "grad_norm": 0.5112923979759216, "learning_rate": 2.0117866388911176e-05, "loss": 9.304, "step": 579 }, { "epoch": 0.4070175438596491, "grad_norm": 1.3668526411056519, "learning_rate": 2.008587045478581e-05, "loss": 9.3853, "step": 580 }, { "epoch": 0.40771929824561404, "grad_norm": 0.7532626986503601, "learning_rate": 2.005384835297527e-05, "loss": 9.3523, "step": 581 }, { "epoch": 0.40842105263157896, "grad_norm": 0.6275856494903564, "learning_rate": 2.002180024823881e-05, "loss": 9.3702, "step": 582 }, { "epoch": 0.40912280701754383, "grad_norm": 0.6096500158309937, "learning_rate": 1.9989726305469497e-05, "loss": 9.397, "step": 583 }, { "epoch": 0.40982456140350876, "grad_norm": 0.6830854415893555, "learning_rate": 1.995762668969332e-05, "loss": 9.3711, "step": 584 }, { "epoch": 0.4105263157894737, "grad_norm": 0.5516018271446228, "learning_rate": 1.9925501566068365e-05, "loss": 9.341, "step": 585 }, { "epoch": 0.4112280701754386, "grad_norm": 2.5106265544891357, "learning_rate": 1.989335109988397e-05, "loss": 9.2873, "step": 586 }, { "epoch": 0.41192982456140353, "grad_norm": 1.230708360671997, "learning_rate": 1.986117545655985e-05, "loss": 9.3354, "step": 587 }, { "epoch": 0.4126315789473684, "grad_norm": 0.7380421757698059, "learning_rate": 1.9828974801645268e-05, "loss": 9.3133, "step": 588 }, { "epoch": 0.41333333333333333, "grad_norm": 0.6636810302734375, "learning_rate": 1.9796749300818185e-05, "loss": 9.3532, "step": 589 }, { "epoch": 0.41403508771929826, "grad_norm": 0.9535109996795654, "learning_rate": 1.976449911988438e-05, "loss": 9.3441, "step": 590 }, { "epoch": 0.4147368421052632, "grad_norm": 0.47542721033096313, "learning_rate": 1.973222442477662e-05, "loss": 9.3146, "step": 591 }, { "epoch": 0.41543859649122805, "grad_norm": 1.1076780557632446, "learning_rate": 1.9699925381553824e-05, "loss": 9.3706, "step": 592 }, { "epoch": 0.416140350877193, "grad_norm": 0.5825707316398621, "learning_rate": 1.966760215640015e-05, "loss": 9.3382, "step": 593 }, { "epoch": 0.4168421052631579, "grad_norm": 1.035048246383667, "learning_rate": 1.963525491562421e-05, "loss": 9.4034, "step": 594 }, { "epoch": 0.41754385964912283, "grad_norm": 0.8183524012565613, "learning_rate": 1.960288382565816e-05, "loss": 9.3427, "step": 595 }, { "epoch": 0.4182456140350877, "grad_norm": 0.930558443069458, "learning_rate": 1.9570489053056868e-05, "loss": 9.3588, "step": 596 }, { "epoch": 0.4189473684210526, "grad_norm": 0.5095419883728027, "learning_rate": 1.9538070764497057e-05, "loss": 9.3362, "step": 597 }, { "epoch": 0.41964912280701755, "grad_norm": 0.44769126176834106, "learning_rate": 1.9505629126776435e-05, "loss": 9.319, "step": 598 }, { "epoch": 0.4203508771929825, "grad_norm": 0.5536123514175415, "learning_rate": 1.9473164306812864e-05, "loss": 9.3154, "step": 599 }, { "epoch": 0.42105263157894735, "grad_norm": 0.6997956037521362, "learning_rate": 1.9440676471643462e-05, "loss": 9.4261, "step": 600 }, { "epoch": 0.42175438596491227, "grad_norm": 0.821489691734314, "learning_rate": 1.9408165788423776e-05, "loss": 9.371, "step": 601 }, { "epoch": 0.4224561403508772, "grad_norm": 0.5411480069160461, "learning_rate": 1.9375632424426904e-05, "loss": 9.3166, "step": 602 }, { "epoch": 0.4231578947368421, "grad_norm": 1.2181315422058105, "learning_rate": 1.9343076547042648e-05, "loss": 9.3834, "step": 603 }, { "epoch": 0.423859649122807, "grad_norm": 1.062719702720642, "learning_rate": 1.9310498323776642e-05, "loss": 9.3032, "step": 604 }, { "epoch": 0.4245614035087719, "grad_norm": 1.2092965841293335, "learning_rate": 1.927789792224949e-05, "loss": 9.34, "step": 605 }, { "epoch": 0.42526315789473684, "grad_norm": 0.6343398690223694, "learning_rate": 1.9245275510195908e-05, "loss": 9.3454, "step": 606 }, { "epoch": 0.42596491228070177, "grad_norm": 1.0002450942993164, "learning_rate": 1.9212631255463864e-05, "loss": 9.333, "step": 607 }, { "epoch": 0.4266666666666667, "grad_norm": 0.7467650175094604, "learning_rate": 1.917996532601371e-05, "loss": 9.3557, "step": 608 }, { "epoch": 0.42736842105263156, "grad_norm": 0.4680902361869812, "learning_rate": 1.9147277889917306e-05, "loss": 9.3167, "step": 609 }, { "epoch": 0.4280701754385965, "grad_norm": 0.645768404006958, "learning_rate": 1.911456911535719e-05, "loss": 9.3329, "step": 610 }, { "epoch": 0.4287719298245614, "grad_norm": 0.5706242322921753, "learning_rate": 1.908183917062567e-05, "loss": 9.3354, "step": 611 }, { "epoch": 0.42947368421052634, "grad_norm": 0.6437212824821472, "learning_rate": 1.9049088224123987e-05, "loss": 9.3378, "step": 612 }, { "epoch": 0.4301754385964912, "grad_norm": 0.6950781941413879, "learning_rate": 1.9016316444361443e-05, "loss": 9.3261, "step": 613 }, { "epoch": 0.43087719298245614, "grad_norm": 0.9104078412055969, "learning_rate": 1.898352399995452e-05, "loss": 9.3524, "step": 614 }, { "epoch": 0.43157894736842106, "grad_norm": 1.1967777013778687, "learning_rate": 1.8950711059626032e-05, "loss": 9.3284, "step": 615 }, { "epoch": 0.432280701754386, "grad_norm": 1.3676831722259521, "learning_rate": 1.8917877792204238e-05, "loss": 9.3026, "step": 616 }, { "epoch": 0.43298245614035086, "grad_norm": 0.5812057256698608, "learning_rate": 1.8885024366622008e-05, "loss": 9.3416, "step": 617 }, { "epoch": 0.4336842105263158, "grad_norm": 0.9047731757164001, "learning_rate": 1.88521509519159e-05, "loss": 9.3015, "step": 618 }, { "epoch": 0.4343859649122807, "grad_norm": 0.7831618189811707, "learning_rate": 1.881925771722533e-05, "loss": 9.3047, "step": 619 }, { "epoch": 0.43508771929824563, "grad_norm": 0.5643927454948425, "learning_rate": 1.8786344831791698e-05, "loss": 9.3487, "step": 620 }, { "epoch": 0.4357894736842105, "grad_norm": 0.7316763401031494, "learning_rate": 1.8753412464957505e-05, "loss": 9.3101, "step": 621 }, { "epoch": 0.43649122807017543, "grad_norm": 0.9444541335105896, "learning_rate": 1.872046078616549e-05, "loss": 9.3307, "step": 622 }, { "epoch": 0.43719298245614036, "grad_norm": 0.8318471312522888, "learning_rate": 1.8687489964957745e-05, "loss": 9.2856, "step": 623 }, { "epoch": 0.4378947368421053, "grad_norm": 0.7974424958229065, "learning_rate": 1.8654500170974867e-05, "loss": 9.366, "step": 624 }, { "epoch": 0.43859649122807015, "grad_norm": 1.0147417783737183, "learning_rate": 1.862149157395506e-05, "loss": 9.3573, "step": 625 }, { "epoch": 0.4392982456140351, "grad_norm": 0.5167046189308167, "learning_rate": 1.8588464343733287e-05, "loss": 9.2883, "step": 626 }, { "epoch": 0.44, "grad_norm": 0.4956953227519989, "learning_rate": 1.8555418650240364e-05, "loss": 9.2878, "step": 627 }, { "epoch": 0.44070175438596493, "grad_norm": 0.5901433229446411, "learning_rate": 1.852235466350212e-05, "loss": 9.3019, "step": 628 }, { "epoch": 0.4414035087719298, "grad_norm": 0.6591805815696716, "learning_rate": 1.848927255363849e-05, "loss": 9.2785, "step": 629 }, { "epoch": 0.4421052631578947, "grad_norm": 0.8507448434829712, "learning_rate": 1.8456172490862672e-05, "loss": 9.3223, "step": 630 }, { "epoch": 0.44280701754385965, "grad_norm": 1.1357241868972778, "learning_rate": 1.8423054645480228e-05, "loss": 9.3449, "step": 631 }, { "epoch": 0.4435087719298246, "grad_norm": 0.8166693449020386, "learning_rate": 1.8389919187888205e-05, "loss": 9.32, "step": 632 }, { "epoch": 0.4442105263157895, "grad_norm": 0.7860683798789978, "learning_rate": 1.835676628857429e-05, "loss": 9.3458, "step": 633 }, { "epoch": 0.44491228070175437, "grad_norm": 0.5432248115539551, "learning_rate": 1.8323596118115882e-05, "loss": 9.3507, "step": 634 }, { "epoch": 0.4456140350877193, "grad_norm": 0.9869771003723145, "learning_rate": 1.829040884717927e-05, "loss": 9.3276, "step": 635 }, { "epoch": 0.4463157894736842, "grad_norm": 1.2698737382888794, "learning_rate": 1.825720464651871e-05, "loss": 9.308, "step": 636 }, { "epoch": 0.44701754385964915, "grad_norm": 0.7149251103401184, "learning_rate": 1.8223983686975576e-05, "loss": 9.2853, "step": 637 }, { "epoch": 0.447719298245614, "grad_norm": 0.8515065312385559, "learning_rate": 1.819074613947746e-05, "loss": 9.3205, "step": 638 }, { "epoch": 0.44842105263157894, "grad_norm": 0.7909665107727051, "learning_rate": 1.8157492175037307e-05, "loss": 9.29, "step": 639 }, { "epoch": 0.44912280701754387, "grad_norm": 1.1174325942993164, "learning_rate": 1.8124221964752535e-05, "loss": 9.2891, "step": 640 }, { "epoch": 0.4498245614035088, "grad_norm": 0.6905573606491089, "learning_rate": 1.8090935679804126e-05, "loss": 9.3454, "step": 641 }, { "epoch": 0.45052631578947366, "grad_norm": 1.1715327501296997, "learning_rate": 1.805763349145581e-05, "loss": 9.3478, "step": 642 }, { "epoch": 0.4512280701754386, "grad_norm": 1.2037655115127563, "learning_rate": 1.80243155710531e-05, "loss": 9.3857, "step": 643 }, { "epoch": 0.4519298245614035, "grad_norm": 1.6232340335845947, "learning_rate": 1.7990982090022484e-05, "loss": 9.3873, "step": 644 }, { "epoch": 0.45263157894736844, "grad_norm": 0.49331867694854736, "learning_rate": 1.7957633219870498e-05, "loss": 9.2978, "step": 645 }, { "epoch": 0.4533333333333333, "grad_norm": 0.8527567982673645, "learning_rate": 1.7924269132182855e-05, "loss": 9.3801, "step": 646 }, { "epoch": 0.45403508771929824, "grad_norm": 0.9194442629814148, "learning_rate": 1.7890889998623576e-05, "loss": 9.2953, "step": 647 }, { "epoch": 0.45473684210526316, "grad_norm": 1.1837915182113647, "learning_rate": 1.7857495990934087e-05, "loss": 9.3038, "step": 648 }, { "epoch": 0.4554385964912281, "grad_norm": 1.4543852806091309, "learning_rate": 1.782408728093235e-05, "loss": 9.3248, "step": 649 }, { "epoch": 0.45614035087719296, "grad_norm": 1.0392124652862549, "learning_rate": 1.779066404051197e-05, "loss": 9.3365, "step": 650 }, { "epoch": 0.4568421052631579, "grad_norm": 0.7464094758033752, "learning_rate": 1.7757226441641303e-05, "loss": 9.3915, "step": 651 }, { "epoch": 0.4575438596491228, "grad_norm": 0.8890534043312073, "learning_rate": 1.7723774656362602e-05, "loss": 9.3398, "step": 652 }, { "epoch": 0.45824561403508773, "grad_norm": 0.832886278629303, "learning_rate": 1.7690308856791107e-05, "loss": 9.3262, "step": 653 }, { "epoch": 0.4589473684210526, "grad_norm": 0.6660196781158447, "learning_rate": 1.765682921511414e-05, "loss": 9.292, "step": 654 }, { "epoch": 0.45964912280701753, "grad_norm": 1.0167776346206665, "learning_rate": 1.762333590359028e-05, "loss": 9.4063, "step": 655 }, { "epoch": 0.46035087719298246, "grad_norm": 0.6457255482673645, "learning_rate": 1.758982909454841e-05, "loss": 9.3235, "step": 656 }, { "epoch": 0.4610526315789474, "grad_norm": 0.8206133246421814, "learning_rate": 1.7556308960386872e-05, "loss": 9.3253, "step": 657 }, { "epoch": 0.4617543859649123, "grad_norm": 0.8468889594078064, "learning_rate": 1.752277567357258e-05, "loss": 9.2994, "step": 658 }, { "epoch": 0.4624561403508772, "grad_norm": 1.4576514959335327, "learning_rate": 1.748922940664009e-05, "loss": 9.32, "step": 659 }, { "epoch": 0.4631578947368421, "grad_norm": 1.2665934562683105, "learning_rate": 1.7455670332190768e-05, "loss": 9.299, "step": 660 }, { "epoch": 0.463859649122807, "grad_norm": 1.0102559328079224, "learning_rate": 1.7422098622891873e-05, "loss": 9.3306, "step": 661 }, { "epoch": 0.46456140350877195, "grad_norm": 0.7320733666419983, "learning_rate": 1.7388514451475665e-05, "loss": 9.3275, "step": 662 }, { "epoch": 0.4652631578947368, "grad_norm": 1.2331327199935913, "learning_rate": 1.7354917990738533e-05, "loss": 9.4095, "step": 663 }, { "epoch": 0.46596491228070175, "grad_norm": 1.582092523574829, "learning_rate": 1.7321309413540087e-05, "loss": 9.2856, "step": 664 }, { "epoch": 0.4666666666666667, "grad_norm": 0.633793294429779, "learning_rate": 1.728768889280229e-05, "loss": 9.3405, "step": 665 }, { "epoch": 0.4673684210526316, "grad_norm": 0.9795190691947937, "learning_rate": 1.725405660150854e-05, "loss": 9.327, "step": 666 }, { "epoch": 0.46807017543859647, "grad_norm": 0.7948571443557739, "learning_rate": 1.722041271270281e-05, "loss": 9.3768, "step": 667 }, { "epoch": 0.4687719298245614, "grad_norm": 0.6379496455192566, "learning_rate": 1.7186757399488743e-05, "loss": 9.3237, "step": 668 }, { "epoch": 0.4694736842105263, "grad_norm": 0.7063419222831726, "learning_rate": 1.715309083502876e-05, "loss": 9.3565, "step": 669 }, { "epoch": 0.47017543859649125, "grad_norm": 0.5814898014068604, "learning_rate": 1.7119413192543165e-05, "loss": 9.3148, "step": 670 }, { "epoch": 0.4708771929824561, "grad_norm": 0.9254431128501892, "learning_rate": 1.7085724645309276e-05, "loss": 9.4227, "step": 671 }, { "epoch": 0.47157894736842104, "grad_norm": 0.9308079481124878, "learning_rate": 1.7052025366660506e-05, "loss": 9.3394, "step": 672 }, { "epoch": 0.47228070175438597, "grad_norm": 0.6587446331977844, "learning_rate": 1.701831552998548e-05, "loss": 9.285, "step": 673 }, { "epoch": 0.4729824561403509, "grad_norm": 1.1979551315307617, "learning_rate": 1.698459530872716e-05, "loss": 9.3131, "step": 674 }, { "epoch": 0.47368421052631576, "grad_norm": 0.8007051944732666, "learning_rate": 1.6950864876381923e-05, "loss": 9.2855, "step": 675 }, { "epoch": 0.4743859649122807, "grad_norm": 0.585222065448761, "learning_rate": 1.6917124406498697e-05, "loss": 9.2902, "step": 676 }, { "epoch": 0.4750877192982456, "grad_norm": 1.1804202795028687, "learning_rate": 1.688337407267805e-05, "loss": 9.3241, "step": 677 }, { "epoch": 0.47578947368421054, "grad_norm": 0.9890698194503784, "learning_rate": 1.684961404857129e-05, "loss": 9.3181, "step": 678 }, { "epoch": 0.47649122807017547, "grad_norm": 0.7500703930854797, "learning_rate": 1.68158445078796e-05, "loss": 9.2962, "step": 679 }, { "epoch": 0.47719298245614034, "grad_norm": 0.5208040475845337, "learning_rate": 1.6782065624353126e-05, "loss": 9.3042, "step": 680 }, { "epoch": 0.47789473684210526, "grad_norm": 0.636538565158844, "learning_rate": 1.6748277571790066e-05, "loss": 9.318, "step": 681 }, { "epoch": 0.4785964912280702, "grad_norm": 1.41548752784729, "learning_rate": 1.671448052403583e-05, "loss": 9.3343, "step": 682 }, { "epoch": 0.4792982456140351, "grad_norm": 0.49545785784721375, "learning_rate": 1.6680674654982067e-05, "loss": 9.3422, "step": 683 }, { "epoch": 0.48, "grad_norm": 0.7995181083679199, "learning_rate": 1.664686013856584e-05, "loss": 9.3679, "step": 684 }, { "epoch": 0.4807017543859649, "grad_norm": 1.3023996353149414, "learning_rate": 1.6613037148768702e-05, "loss": 9.2941, "step": 685 }, { "epoch": 0.48140350877192983, "grad_norm": 0.9700807332992554, "learning_rate": 1.6579205859615797e-05, "loss": 9.3877, "step": 686 }, { "epoch": 0.48210526315789476, "grad_norm": 1.0027410984039307, "learning_rate": 1.6545366445174973e-05, "loss": 9.372, "step": 687 }, { "epoch": 0.48280701754385963, "grad_norm": 0.8791016340255737, "learning_rate": 1.6511519079555887e-05, "loss": 9.3167, "step": 688 }, { "epoch": 0.48350877192982455, "grad_norm": 1.2121691703796387, "learning_rate": 1.647766393690909e-05, "loss": 9.2905, "step": 689 }, { "epoch": 0.4842105263157895, "grad_norm": 0.5890994668006897, "learning_rate": 1.6443801191425176e-05, "loss": 9.284, "step": 690 }, { "epoch": 0.4849122807017544, "grad_norm": 0.600554883480072, "learning_rate": 1.640993101733383e-05, "loss": 9.3021, "step": 691 }, { "epoch": 0.4856140350877193, "grad_norm": 1.3549524545669556, "learning_rate": 1.6376053588902976e-05, "loss": 9.3774, "step": 692 }, { "epoch": 0.4863157894736842, "grad_norm": 1.4825581312179565, "learning_rate": 1.6342169080437856e-05, "loss": 9.3041, "step": 693 }, { "epoch": 0.4870175438596491, "grad_norm": 0.44250765442848206, "learning_rate": 1.6308277666280133e-05, "loss": 9.3018, "step": 694 }, { "epoch": 0.48771929824561405, "grad_norm": 1.0980244874954224, "learning_rate": 1.6274379520807014e-05, "loss": 9.2935, "step": 695 }, { "epoch": 0.4884210526315789, "grad_norm": 1.3106095790863037, "learning_rate": 1.624047481843034e-05, "loss": 9.2956, "step": 696 }, { "epoch": 0.48912280701754385, "grad_norm": 0.8076321482658386, "learning_rate": 1.6206563733595666e-05, "loss": 9.2905, "step": 697 }, { "epoch": 0.4898245614035088, "grad_norm": 0.7621620893478394, "learning_rate": 1.6172646440781418e-05, "loss": 9.3316, "step": 698 }, { "epoch": 0.4905263157894737, "grad_norm": 0.876710832118988, "learning_rate": 1.613872311449794e-05, "loss": 9.3272, "step": 699 }, { "epoch": 0.49122807017543857, "grad_norm": 1.95462965965271, "learning_rate": 1.610479392928663e-05, "loss": 9.2931, "step": 700 }, { "epoch": 0.4919298245614035, "grad_norm": 1.6090487241744995, "learning_rate": 1.6070859059719028e-05, "loss": 9.3557, "step": 701 }, { "epoch": 0.4926315789473684, "grad_norm": 0.6057020425796509, "learning_rate": 1.6036918680395913e-05, "loss": 9.2556, "step": 702 }, { "epoch": 0.49333333333333335, "grad_norm": 1.6567527055740356, "learning_rate": 1.600297296594643e-05, "loss": 9.2838, "step": 703 }, { "epoch": 0.49403508771929827, "grad_norm": 1.4622975587844849, "learning_rate": 1.5969022091027164e-05, "loss": 9.2893, "step": 704 }, { "epoch": 0.49473684210526314, "grad_norm": 0.8690983653068542, "learning_rate": 1.5935066230321245e-05, "loss": 9.3233, "step": 705 }, { "epoch": 0.49543859649122807, "grad_norm": 1.0478991270065308, "learning_rate": 1.5901105558537472e-05, "loss": 9.298, "step": 706 }, { "epoch": 0.496140350877193, "grad_norm": 1.0122642517089844, "learning_rate": 1.5867140250409382e-05, "loss": 9.3282, "step": 707 }, { "epoch": 0.4968421052631579, "grad_norm": 0.6547334790229797, "learning_rate": 1.5833170480694373e-05, "loss": 9.2987, "step": 708 }, { "epoch": 0.4975438596491228, "grad_norm": 0.9973829984664917, "learning_rate": 1.579919642417281e-05, "loss": 9.3659, "step": 709 }, { "epoch": 0.4982456140350877, "grad_norm": 0.5597363114356995, "learning_rate": 1.576521825564709e-05, "loss": 9.326, "step": 710 }, { "epoch": 0.49894736842105264, "grad_norm": 0.8925488591194153, "learning_rate": 1.573123614994078e-05, "loss": 9.3447, "step": 711 }, { "epoch": 0.49964912280701756, "grad_norm": 0.7454961538314819, "learning_rate": 1.569725028189772e-05, "loss": 9.3004, "step": 712 }, { "epoch": 0.5003508771929824, "grad_norm": 0.785531222820282, "learning_rate": 1.566326082638108e-05, "loss": 9.3317, "step": 713 }, { "epoch": 0.5010526315789474, "grad_norm": 0.6406259536743164, "learning_rate": 1.562926795827251e-05, "loss": 9.3414, "step": 714 }, { "epoch": 0.5017543859649123, "grad_norm": 1.146087884902954, "learning_rate": 1.5595271852471204e-05, "loss": 9.3492, "step": 715 }, { "epoch": 0.5024561403508772, "grad_norm": 1.232762098312378, "learning_rate": 1.5561272683893016e-05, "loss": 9.3, "step": 716 }, { "epoch": 0.5031578947368421, "grad_norm": 0.7417135834693909, "learning_rate": 1.5527270627469567e-05, "loss": 9.292, "step": 717 }, { "epoch": 0.503859649122807, "grad_norm": 1.2774425745010376, "learning_rate": 1.5493265858147335e-05, "loss": 9.3159, "step": 718 }, { "epoch": 0.5045614035087719, "grad_norm": 0.736516535282135, "learning_rate": 1.5459258550886747e-05, "loss": 9.3055, "step": 719 }, { "epoch": 0.5052631578947369, "grad_norm": 0.6284863352775574, "learning_rate": 1.54252488806613e-05, "loss": 9.3095, "step": 720 }, { "epoch": 0.5059649122807017, "grad_norm": 1.1561609506607056, "learning_rate": 1.5391237022456636e-05, "loss": 9.286, "step": 721 }, { "epoch": 0.5066666666666667, "grad_norm": 0.5803952813148499, "learning_rate": 1.5357223151269675e-05, "loss": 9.3224, "step": 722 }, { "epoch": 0.5073684210526316, "grad_norm": 1.3210039138793945, "learning_rate": 1.5323207442107663e-05, "loss": 9.3076, "step": 723 }, { "epoch": 0.5080701754385964, "grad_norm": 0.6317670941352844, "learning_rate": 1.5289190069987332e-05, "loss": 9.2858, "step": 724 }, { "epoch": 0.5087719298245614, "grad_norm": 0.5730085968971252, "learning_rate": 1.5255171209933963e-05, "loss": 9.3406, "step": 725 }, { "epoch": 0.5094736842105263, "grad_norm": 1.1968744993209839, "learning_rate": 1.5221151036980478e-05, "loss": 9.3429, "step": 726 }, { "epoch": 0.5101754385964913, "grad_norm": 0.5100934505462646, "learning_rate": 1.5187129726166565e-05, "loss": 9.3384, "step": 727 }, { "epoch": 0.5108771929824562, "grad_norm": 0.5553667545318604, "learning_rate": 1.5153107452537777e-05, "loss": 9.3238, "step": 728 }, { "epoch": 0.511578947368421, "grad_norm": 1.0136075019836426, "learning_rate": 1.5119084391144599e-05, "loss": 9.3517, "step": 729 }, { "epoch": 0.512280701754386, "grad_norm": 0.7436505556106567, "learning_rate": 1.5085060717041585e-05, "loss": 9.3559, "step": 730 }, { "epoch": 0.5129824561403509, "grad_norm": 0.4986538589000702, "learning_rate": 1.5051036605286436e-05, "loss": 9.3052, "step": 731 }, { "epoch": 0.5136842105263157, "grad_norm": 0.9957839250564575, "learning_rate": 1.5017012230939103e-05, "loss": 9.3514, "step": 732 }, { "epoch": 0.5143859649122807, "grad_norm": 0.7949235439300537, "learning_rate": 1.4982987769060898e-05, "loss": 9.3124, "step": 733 }, { "epoch": 0.5150877192982456, "grad_norm": 0.60443514585495, "learning_rate": 1.4948963394713565e-05, "loss": 9.2955, "step": 734 }, { "epoch": 0.5157894736842106, "grad_norm": 1.6926627159118652, "learning_rate": 1.4914939282958417e-05, "loss": 9.3159, "step": 735 }, { "epoch": 0.5164912280701754, "grad_norm": 0.5189555287361145, "learning_rate": 1.4880915608855402e-05, "loss": 9.3203, "step": 736 }, { "epoch": 0.5171929824561403, "grad_norm": 1.4417989253997803, "learning_rate": 1.4846892547462224e-05, "loss": 9.2596, "step": 737 }, { "epoch": 0.5178947368421053, "grad_norm": 0.8695167899131775, "learning_rate": 1.4812870273833436e-05, "loss": 9.3432, "step": 738 }, { "epoch": 0.5185964912280702, "grad_norm": 1.010081171989441, "learning_rate": 1.477884896301953e-05, "loss": 9.3156, "step": 739 }, { "epoch": 0.519298245614035, "grad_norm": 0.49659860134124756, "learning_rate": 1.4744828790066041e-05, "loss": 9.2559, "step": 740 }, { "epoch": 0.52, "grad_norm": 0.9026995897293091, "learning_rate": 1.4710809930012672e-05, "loss": 9.3106, "step": 741 }, { "epoch": 0.5207017543859649, "grad_norm": 1.4078854322433472, "learning_rate": 1.467679255789234e-05, "loss": 9.336, "step": 742 }, { "epoch": 0.5214035087719299, "grad_norm": 0.4471663534641266, "learning_rate": 1.4642776848730331e-05, "loss": 9.2944, "step": 743 }, { "epoch": 0.5221052631578947, "grad_norm": 0.6061862111091614, "learning_rate": 1.4608762977543364e-05, "loss": 9.2816, "step": 744 }, { "epoch": 0.5228070175438596, "grad_norm": 0.890877366065979, "learning_rate": 1.4574751119338703e-05, "loss": 9.2954, "step": 745 }, { "epoch": 0.5235087719298246, "grad_norm": 0.6359207630157471, "learning_rate": 1.4540741449113255e-05, "loss": 9.2923, "step": 746 }, { "epoch": 0.5242105263157895, "grad_norm": 1.7474675178527832, "learning_rate": 1.4506734141852668e-05, "loss": 9.3505, "step": 747 }, { "epoch": 0.5249122807017544, "grad_norm": 0.6463692784309387, "learning_rate": 1.4472729372530432e-05, "loss": 9.3214, "step": 748 }, { "epoch": 0.5256140350877193, "grad_norm": 0.7175637483596802, "learning_rate": 1.443872731610699e-05, "loss": 9.3107, "step": 749 }, { "epoch": 0.5263157894736842, "grad_norm": 0.545291543006897, "learning_rate": 1.4404728147528805e-05, "loss": 9.2706, "step": 750 }, { "epoch": 0.5270175438596492, "grad_norm": 1.3478803634643555, "learning_rate": 1.4370732041727495e-05, "loss": 9.3444, "step": 751 }, { "epoch": 0.527719298245614, "grad_norm": 0.6227977871894836, "learning_rate": 1.4336739173618921e-05, "loss": 9.2546, "step": 752 }, { "epoch": 0.5284210526315789, "grad_norm": 0.5293748378753662, "learning_rate": 1.4302749718102281e-05, "loss": 9.3285, "step": 753 }, { "epoch": 0.5291228070175439, "grad_norm": 0.6042440533638, "learning_rate": 1.426876385005922e-05, "loss": 9.2565, "step": 754 }, { "epoch": 0.5298245614035088, "grad_norm": 0.580711841583252, "learning_rate": 1.4234781744352916e-05, "loss": 9.3101, "step": 755 }, { "epoch": 0.5305263157894737, "grad_norm": 0.9715297818183899, "learning_rate": 1.4200803575827193e-05, "loss": 9.3017, "step": 756 }, { "epoch": 0.5312280701754386, "grad_norm": 0.7889966368675232, "learning_rate": 1.4166829519305628e-05, "loss": 9.3102, "step": 757 }, { "epoch": 0.5319298245614035, "grad_norm": 1.6602354049682617, "learning_rate": 1.413285974959062e-05, "loss": 9.2848, "step": 758 }, { "epoch": 0.5326315789473685, "grad_norm": 0.703248918056488, "learning_rate": 1.409889444146253e-05, "loss": 9.2973, "step": 759 }, { "epoch": 0.5333333333333333, "grad_norm": 0.8119316101074219, "learning_rate": 1.406493376967876e-05, "loss": 9.3201, "step": 760 }, { "epoch": 0.5340350877192982, "grad_norm": 1.5847864151000977, "learning_rate": 1.4030977908972842e-05, "loss": 9.3358, "step": 761 }, { "epoch": 0.5347368421052632, "grad_norm": 0.5527006983757019, "learning_rate": 1.3997027034053571e-05, "loss": 9.2849, "step": 762 }, { "epoch": 0.535438596491228, "grad_norm": 0.48646238446235657, "learning_rate": 1.396308131960409e-05, "loss": 9.3303, "step": 763 }, { "epoch": 0.536140350877193, "grad_norm": 0.7707009315490723, "learning_rate": 1.3929140940280976e-05, "loss": 9.308, "step": 764 }, { "epoch": 0.5368421052631579, "grad_norm": 0.7292770743370056, "learning_rate": 1.3895206070713373e-05, "loss": 9.2922, "step": 765 }, { "epoch": 0.5375438596491228, "grad_norm": 1.1960588693618774, "learning_rate": 1.386127688550206e-05, "loss": 9.3116, "step": 766 }, { "epoch": 0.5382456140350877, "grad_norm": 0.6635521054267883, "learning_rate": 1.3827353559218586e-05, "loss": 9.3102, "step": 767 }, { "epoch": 0.5389473684210526, "grad_norm": 0.5064526200294495, "learning_rate": 1.3793436266404335e-05, "loss": 9.2978, "step": 768 }, { "epoch": 0.5396491228070175, "grad_norm": 1.2415308952331543, "learning_rate": 1.3759525181569663e-05, "loss": 9.3895, "step": 769 }, { "epoch": 0.5403508771929825, "grad_norm": 1.1658684015274048, "learning_rate": 1.3725620479192987e-05, "loss": 9.3155, "step": 770 }, { "epoch": 0.5410526315789473, "grad_norm": 0.6074137687683105, "learning_rate": 1.3691722333719873e-05, "loss": 9.2638, "step": 771 }, { "epoch": 0.5417543859649123, "grad_norm": 0.8610377907752991, "learning_rate": 1.3657830919562151e-05, "loss": 9.3106, "step": 772 }, { "epoch": 0.5424561403508772, "grad_norm": 0.5075859427452087, "learning_rate": 1.362394641109703e-05, "loss": 9.3309, "step": 773 }, { "epoch": 0.5431578947368421, "grad_norm": 1.8316500186920166, "learning_rate": 1.3590068982666173e-05, "loss": 9.2735, "step": 774 }, { "epoch": 0.543859649122807, "grad_norm": 0.8374232053756714, "learning_rate": 1.3556198808574828e-05, "loss": 9.3568, "step": 775 }, { "epoch": 0.5445614035087719, "grad_norm": 0.9609363675117493, "learning_rate": 1.3522336063090911e-05, "loss": 9.3036, "step": 776 }, { "epoch": 0.5452631578947369, "grad_norm": 1.1353538036346436, "learning_rate": 1.3488480920444119e-05, "loss": 9.3174, "step": 777 }, { "epoch": 0.5459649122807018, "grad_norm": 0.8766273856163025, "learning_rate": 1.3454633554825029e-05, "loss": 9.2518, "step": 778 }, { "epoch": 0.5466666666666666, "grad_norm": 0.8094379305839539, "learning_rate": 1.3420794140384202e-05, "loss": 9.3177, "step": 779 }, { "epoch": 0.5473684210526316, "grad_norm": 0.540779173374176, "learning_rate": 1.3386962851231295e-05, "loss": 9.2741, "step": 780 }, { "epoch": 0.5480701754385965, "grad_norm": 0.6712718605995178, "learning_rate": 1.335313986143416e-05, "loss": 9.2985, "step": 781 }, { "epoch": 0.5487719298245614, "grad_norm": 0.5024932026863098, "learning_rate": 1.3319325345017939e-05, "loss": 9.3671, "step": 782 }, { "epoch": 0.5494736842105263, "grad_norm": 1.3831504583358765, "learning_rate": 1.3285519475964176e-05, "loss": 9.2599, "step": 783 }, { "epoch": 0.5501754385964912, "grad_norm": 1.6680259704589844, "learning_rate": 1.3251722428209933e-05, "loss": 9.3142, "step": 784 }, { "epoch": 0.5508771929824562, "grad_norm": 0.8743581175804138, "learning_rate": 1.3217934375646878e-05, "loss": 9.2663, "step": 785 }, { "epoch": 0.5515789473684211, "grad_norm": 0.6572486758232117, "learning_rate": 1.3184155492120403e-05, "loss": 9.3613, "step": 786 }, { "epoch": 0.5522807017543859, "grad_norm": 0.64426189661026, "learning_rate": 1.3150385951428714e-05, "loss": 9.3263, "step": 787 }, { "epoch": 0.5529824561403509, "grad_norm": 0.947136402130127, "learning_rate": 1.3116625927321953e-05, "loss": 9.3377, "step": 788 }, { "epoch": 0.5536842105263158, "grad_norm": 1.1504813432693481, "learning_rate": 1.3082875593501302e-05, "loss": 9.3349, "step": 789 }, { "epoch": 0.5543859649122806, "grad_norm": 0.6778584122657776, "learning_rate": 1.3049135123618073e-05, "loss": 9.3326, "step": 790 }, { "epoch": 0.5550877192982456, "grad_norm": 0.7606936097145081, "learning_rate": 1.301540469127284e-05, "loss": 9.327, "step": 791 }, { "epoch": 0.5557894736842105, "grad_norm": 0.9650237560272217, "learning_rate": 1.2981684470014518e-05, "loss": 9.3314, "step": 792 }, { "epoch": 0.5564912280701755, "grad_norm": 0.7328600287437439, "learning_rate": 1.2947974633339499e-05, "loss": 9.3348, "step": 793 }, { "epoch": 0.5571929824561404, "grad_norm": 0.7462524771690369, "learning_rate": 1.2914275354690726e-05, "loss": 9.2969, "step": 794 }, { "epoch": 0.5578947368421052, "grad_norm": 0.4581567347049713, "learning_rate": 1.2880586807456837e-05, "loss": 9.2929, "step": 795 }, { "epoch": 0.5585964912280702, "grad_norm": 0.8792312741279602, "learning_rate": 1.2846909164971244e-05, "loss": 9.2415, "step": 796 }, { "epoch": 0.5592982456140351, "grad_norm": 1.1106051206588745, "learning_rate": 1.2813242600511261e-05, "loss": 9.3112, "step": 797 }, { "epoch": 0.56, "grad_norm": 1.4255934953689575, "learning_rate": 1.2779587287297192e-05, "loss": 9.2927, "step": 798 }, { "epoch": 0.5607017543859649, "grad_norm": 0.7689492702484131, "learning_rate": 1.2745943398491462e-05, "loss": 9.2612, "step": 799 }, { "epoch": 0.5614035087719298, "grad_norm": 1.1252931356430054, "learning_rate": 1.2712311107197714e-05, "loss": 9.3317, "step": 800 }, { "epoch": 0.5621052631578948, "grad_norm": 0.5502820014953613, "learning_rate": 1.2678690586459912e-05, "loss": 9.2948, "step": 801 }, { "epoch": 0.5628070175438596, "grad_norm": 0.6685172915458679, "learning_rate": 1.2645082009261468e-05, "loss": 9.2924, "step": 802 }, { "epoch": 0.5635087719298245, "grad_norm": 0.5343637466430664, "learning_rate": 1.261148554852434e-05, "loss": 9.3299, "step": 803 }, { "epoch": 0.5642105263157895, "grad_norm": 0.701750636100769, "learning_rate": 1.2577901377108133e-05, "loss": 9.2939, "step": 804 }, { "epoch": 0.5649122807017544, "grad_norm": 0.9540389180183411, "learning_rate": 1.254432966780924e-05, "loss": 9.374, "step": 805 }, { "epoch": 0.5656140350877193, "grad_norm": 0.997115969657898, "learning_rate": 1.2510770593359917e-05, "loss": 9.3346, "step": 806 }, { "epoch": 0.5663157894736842, "grad_norm": 0.6549131870269775, "learning_rate": 1.2477224326427425e-05, "loss": 9.3177, "step": 807 }, { "epoch": 0.5670175438596491, "grad_norm": 0.8155275583267212, "learning_rate": 1.2443691039613128e-05, "loss": 9.318, "step": 808 }, { "epoch": 0.5677192982456141, "grad_norm": 0.7997680902481079, "learning_rate": 1.2410170905451591e-05, "loss": 9.288, "step": 809 }, { "epoch": 0.5684210526315789, "grad_norm": 1.056986927986145, "learning_rate": 1.2376664096409723e-05, "loss": 9.3064, "step": 810 }, { "epoch": 0.5691228070175438, "grad_norm": 1.453170657157898, "learning_rate": 1.2343170784885859e-05, "loss": 9.3377, "step": 811 }, { "epoch": 0.5698245614035088, "grad_norm": 1.0954458713531494, "learning_rate": 1.2309691143208894e-05, "loss": 9.2727, "step": 812 }, { "epoch": 0.5705263157894737, "grad_norm": 0.5512946248054504, "learning_rate": 1.2276225343637395e-05, "loss": 9.2992, "step": 813 }, { "epoch": 0.5712280701754386, "grad_norm": 1.2845404148101807, "learning_rate": 1.2242773558358701e-05, "loss": 9.3352, "step": 814 }, { "epoch": 0.5719298245614035, "grad_norm": 0.5165030360221863, "learning_rate": 1.2209335959488037e-05, "loss": 9.3027, "step": 815 }, { "epoch": 0.5726315789473684, "grad_norm": 1.202122449874878, "learning_rate": 1.2175912719067656e-05, "loss": 9.3543, "step": 816 }, { "epoch": 0.5733333333333334, "grad_norm": 0.9821206331253052, "learning_rate": 1.2142504009065914e-05, "loss": 9.297, "step": 817 }, { "epoch": 0.5740350877192982, "grad_norm": 1.1315044164657593, "learning_rate": 1.2109110001376427e-05, "loss": 9.3188, "step": 818 }, { "epoch": 0.5747368421052632, "grad_norm": 0.8323570489883423, "learning_rate": 1.2075730867817148e-05, "loss": 9.2903, "step": 819 }, { "epoch": 0.5754385964912281, "grad_norm": 0.7463530898094177, "learning_rate": 1.2042366780129507e-05, "loss": 9.3547, "step": 820 }, { "epoch": 0.576140350877193, "grad_norm": 0.5758820176124573, "learning_rate": 1.2009017909977519e-05, "loss": 9.3654, "step": 821 }, { "epoch": 0.5768421052631579, "grad_norm": 0.6333702206611633, "learning_rate": 1.19756844289469e-05, "loss": 9.3278, "step": 822 }, { "epoch": 0.5775438596491228, "grad_norm": 0.6230956315994263, "learning_rate": 1.1942366508544195e-05, "loss": 9.3025, "step": 823 }, { "epoch": 0.5782456140350877, "grad_norm": 0.6451123356819153, "learning_rate": 1.190906432019587e-05, "loss": 9.26, "step": 824 }, { "epoch": 0.5789473684210527, "grad_norm": 0.4391762912273407, "learning_rate": 1.1875778035247472e-05, "loss": 9.3145, "step": 825 }, { "epoch": 0.5796491228070175, "grad_norm": 0.9010617136955261, "learning_rate": 1.1842507824962694e-05, "loss": 9.3012, "step": 826 }, { "epoch": 0.5803508771929825, "grad_norm": 0.9751334190368652, "learning_rate": 1.1809253860522544e-05, "loss": 9.3005, "step": 827 }, { "epoch": 0.5810526315789474, "grad_norm": 0.569268524646759, "learning_rate": 1.1776016313024427e-05, "loss": 9.3302, "step": 828 }, { "epoch": 0.5817543859649122, "grad_norm": 0.783898115158081, "learning_rate": 1.1742795353481291e-05, "loss": 9.2599, "step": 829 }, { "epoch": 0.5824561403508772, "grad_norm": 1.2214409112930298, "learning_rate": 1.1709591152820733e-05, "loss": 9.3285, "step": 830 }, { "epoch": 0.5831578947368421, "grad_norm": 0.5088267922401428, "learning_rate": 1.1676403881884118e-05, "loss": 9.372, "step": 831 }, { "epoch": 0.583859649122807, "grad_norm": 1.2519855499267578, "learning_rate": 1.1643233711425716e-05, "loss": 9.3389, "step": 832 }, { "epoch": 0.584561403508772, "grad_norm": 0.9305177330970764, "learning_rate": 1.1610080812111793e-05, "loss": 9.3762, "step": 833 }, { "epoch": 0.5852631578947368, "grad_norm": 0.85813307762146, "learning_rate": 1.1576945354519776e-05, "loss": 9.2725, "step": 834 }, { "epoch": 0.5859649122807018, "grad_norm": 1.1835262775421143, "learning_rate": 1.1543827509137329e-05, "loss": 9.2909, "step": 835 }, { "epoch": 0.5866666666666667, "grad_norm": 0.7388038039207458, "learning_rate": 1.1510727446361515e-05, "loss": 9.3297, "step": 836 }, { "epoch": 0.5873684210526315, "grad_norm": 1.0729209184646606, "learning_rate": 1.1477645336497889e-05, "loss": 9.3683, "step": 837 }, { "epoch": 0.5880701754385965, "grad_norm": 0.6541008353233337, "learning_rate": 1.144458134975964e-05, "loss": 9.3081, "step": 838 }, { "epoch": 0.5887719298245614, "grad_norm": 0.8862060308456421, "learning_rate": 1.1411535656266716e-05, "loss": 9.3176, "step": 839 }, { "epoch": 0.5894736842105263, "grad_norm": 0.43611598014831543, "learning_rate": 1.1378508426044943e-05, "loss": 9.306, "step": 840 }, { "epoch": 0.5901754385964912, "grad_norm": 0.9635779857635498, "learning_rate": 1.1345499829025136e-05, "loss": 9.3731, "step": 841 }, { "epoch": 0.5908771929824561, "grad_norm": 0.7422078847885132, "learning_rate": 1.1312510035042259e-05, "loss": 9.3067, "step": 842 }, { "epoch": 0.5915789473684211, "grad_norm": 0.9891616702079773, "learning_rate": 1.1279539213834514e-05, "loss": 9.3158, "step": 843 }, { "epoch": 0.592280701754386, "grad_norm": 0.4546435475349426, "learning_rate": 1.1246587535042492e-05, "loss": 9.3467, "step": 844 }, { "epoch": 0.5929824561403508, "grad_norm": 0.6581796407699585, "learning_rate": 1.12136551682083e-05, "loss": 9.3525, "step": 845 }, { "epoch": 0.5936842105263158, "grad_norm": 0.6090487241744995, "learning_rate": 1.1180742282774668e-05, "loss": 9.3165, "step": 846 }, { "epoch": 0.5943859649122807, "grad_norm": 0.5459069013595581, "learning_rate": 1.1147849048084105e-05, "loss": 9.3786, "step": 847 }, { "epoch": 0.5950877192982457, "grad_norm": 0.7633641958236694, "learning_rate": 1.1114975633377998e-05, "loss": 9.2597, "step": 848 }, { "epoch": 0.5957894736842105, "grad_norm": 0.7722066640853882, "learning_rate": 1.1082122207795761e-05, "loss": 9.3273, "step": 849 }, { "epoch": 0.5964912280701754, "grad_norm": 0.5862945318222046, "learning_rate": 1.1049288940373972e-05, "loss": 9.3333, "step": 850 }, { "epoch": 0.5971929824561404, "grad_norm": 0.9265831708908081, "learning_rate": 1.1016476000045485e-05, "loss": 9.3257, "step": 851 }, { "epoch": 0.5978947368421053, "grad_norm": 0.9433779716491699, "learning_rate": 1.098368355563856e-05, "loss": 9.2907, "step": 852 }, { "epoch": 0.5985964912280701, "grad_norm": 2.620093822479248, "learning_rate": 1.0950911775876014e-05, "loss": 9.3009, "step": 853 }, { "epoch": 0.5992982456140351, "grad_norm": 0.8025141954421997, "learning_rate": 1.0918160829374332e-05, "loss": 9.335, "step": 854 }, { "epoch": 0.6, "grad_norm": 0.6699589490890503, "learning_rate": 1.0885430884642812e-05, "loss": 9.2804, "step": 855 }, { "epoch": 0.600701754385965, "grad_norm": 0.6794986724853516, "learning_rate": 1.0852722110082693e-05, "loss": 9.311, "step": 856 }, { "epoch": 0.6014035087719298, "grad_norm": 0.7879689335823059, "learning_rate": 1.0820034673986297e-05, "loss": 9.3191, "step": 857 }, { "epoch": 0.6021052631578947, "grad_norm": 0.9212762713432312, "learning_rate": 1.078736874453614e-05, "loss": 9.3289, "step": 858 }, { "epoch": 0.6028070175438597, "grad_norm": 0.7143518328666687, "learning_rate": 1.0754724489804098e-05, "loss": 9.3044, "step": 859 }, { "epoch": 0.6035087719298246, "grad_norm": 0.5375038981437683, "learning_rate": 1.0722102077750514e-05, "loss": 9.3015, "step": 860 }, { "epoch": 0.6042105263157894, "grad_norm": 0.5628230571746826, "learning_rate": 1.0689501676223362e-05, "loss": 9.2737, "step": 861 }, { "epoch": 0.6049122807017544, "grad_norm": 0.5543110370635986, "learning_rate": 1.0656923452957354e-05, "loss": 9.3076, "step": 862 }, { "epoch": 0.6056140350877193, "grad_norm": 1.0377006530761719, "learning_rate": 1.0624367575573098e-05, "loss": 9.31, "step": 863 }, { "epoch": 0.6063157894736843, "grad_norm": 0.9119113683700562, "learning_rate": 1.059183421157623e-05, "loss": 9.2715, "step": 864 }, { "epoch": 0.6070175438596491, "grad_norm": 0.884434700012207, "learning_rate": 1.0559323528356542e-05, "loss": 9.2695, "step": 865 }, { "epoch": 0.607719298245614, "grad_norm": 0.8140876889228821, "learning_rate": 1.052683569318714e-05, "loss": 9.2806, "step": 866 }, { "epoch": 0.608421052631579, "grad_norm": 1.0974501371383667, "learning_rate": 1.0494370873223565e-05, "loss": 9.3275, "step": 867 }, { "epoch": 0.6091228070175438, "grad_norm": 0.7144575119018555, "learning_rate": 1.0461929235502952e-05, "loss": 9.3495, "step": 868 }, { "epoch": 0.6098245614035088, "grad_norm": 1.6952970027923584, "learning_rate": 1.0429510946943136e-05, "loss": 9.298, "step": 869 }, { "epoch": 0.6105263157894737, "grad_norm": 0.813697874546051, "learning_rate": 1.0397116174341843e-05, "loss": 9.2532, "step": 870 }, { "epoch": 0.6112280701754386, "grad_norm": 0.5520657896995544, "learning_rate": 1.036474508437579e-05, "loss": 9.3242, "step": 871 }, { "epoch": 0.6119298245614035, "grad_norm": 1.316697359085083, "learning_rate": 1.033239784359985e-05, "loss": 9.3072, "step": 872 }, { "epoch": 0.6126315789473684, "grad_norm": 0.7168900966644287, "learning_rate": 1.0300074618446179e-05, "loss": 9.319, "step": 873 }, { "epoch": 0.6133333333333333, "grad_norm": 0.5286966562271118, "learning_rate": 1.0267775575223381e-05, "loss": 9.3375, "step": 874 }, { "epoch": 0.6140350877192983, "grad_norm": 1.1097559928894043, "learning_rate": 1.0235500880115625e-05, "loss": 9.2761, "step": 875 }, { "epoch": 0.6147368421052631, "grad_norm": 0.6802719831466675, "learning_rate": 1.0203250699181816e-05, "loss": 9.27, "step": 876 }, { "epoch": 0.6154385964912281, "grad_norm": 0.8762888312339783, "learning_rate": 1.0171025198354731e-05, "loss": 9.2926, "step": 877 }, { "epoch": 0.616140350877193, "grad_norm": 0.5747367739677429, "learning_rate": 1.013882454344015e-05, "loss": 9.2754, "step": 878 }, { "epoch": 0.6168421052631579, "grad_norm": 0.550148606300354, "learning_rate": 1.0106648900116035e-05, "loss": 9.3088, "step": 879 }, { "epoch": 0.6175438596491228, "grad_norm": 0.7556295394897461, "learning_rate": 1.0074498433931638e-05, "loss": 9.3233, "step": 880 }, { "epoch": 0.6182456140350877, "grad_norm": 0.5367810726165771, "learning_rate": 1.0042373310306683e-05, "loss": 9.2881, "step": 881 }, { "epoch": 0.6189473684210526, "grad_norm": 1.3291575908660889, "learning_rate": 1.0010273694530505e-05, "loss": 9.2619, "step": 882 }, { "epoch": 0.6196491228070176, "grad_norm": 0.8526515960693359, "learning_rate": 9.978199751761193e-06, "loss": 9.3167, "step": 883 }, { "epoch": 0.6203508771929824, "grad_norm": 0.6056516766548157, "learning_rate": 9.946151647024735e-06, "loss": 9.3368, "step": 884 }, { "epoch": 0.6210526315789474, "grad_norm": 1.5451360940933228, "learning_rate": 9.914129545214193e-06, "loss": 9.2795, "step": 885 }, { "epoch": 0.6217543859649123, "grad_norm": 0.46355774998664856, "learning_rate": 9.882133611088827e-06, "loss": 9.2579, "step": 886 }, { "epoch": 0.6224561403508772, "grad_norm": 0.5397534966468811, "learning_rate": 9.85016400927327e-06, "loss": 9.255, "step": 887 }, { "epoch": 0.6231578947368421, "grad_norm": 0.8000882863998413, "learning_rate": 9.818220904256662e-06, "loss": 9.2944, "step": 888 }, { "epoch": 0.623859649122807, "grad_norm": 0.4497506022453308, "learning_rate": 9.786304460391817e-06, "loss": 9.2754, "step": 889 }, { "epoch": 0.624561403508772, "grad_norm": 0.6529581546783447, "learning_rate": 9.754414841894377e-06, "loss": 9.3258, "step": 890 }, { "epoch": 0.6252631578947369, "grad_norm": 0.7255744934082031, "learning_rate": 9.722552212841949e-06, "loss": 9.2829, "step": 891 }, { "epoch": 0.6259649122807017, "grad_norm": 0.48771393299102783, "learning_rate": 9.690716737173285e-06, "loss": 9.2631, "step": 892 }, { "epoch": 0.6266666666666667, "grad_norm": 1.1750224828720093, "learning_rate": 9.658908578687431e-06, "loss": 9.3244, "step": 893 }, { "epoch": 0.6273684210526316, "grad_norm": 0.8188272714614868, "learning_rate": 9.62712790104287e-06, "loss": 9.3357, "step": 894 }, { "epoch": 0.6280701754385964, "grad_norm": 0.4886317551136017, "learning_rate": 9.595374867756701e-06, "loss": 9.265, "step": 895 }, { "epoch": 0.6287719298245614, "grad_norm": 0.6386067271232605, "learning_rate": 9.563649642203787e-06, "loss": 9.2847, "step": 896 }, { "epoch": 0.6294736842105263, "grad_norm": 1.157050609588623, "learning_rate": 9.531952387615914e-06, "loss": 9.2771, "step": 897 }, { "epoch": 0.6301754385964913, "grad_norm": 0.8360874652862549, "learning_rate": 9.50028326708095e-06, "loss": 9.3248, "step": 898 }, { "epoch": 0.6308771929824561, "grad_norm": 1.2960262298583984, "learning_rate": 9.468642443542007e-06, "loss": 9.3192, "step": 899 }, { "epoch": 0.631578947368421, "grad_norm": 0.822274923324585, "learning_rate": 9.43703007979661e-06, "loss": 9.3341, "step": 900 }, { "epoch": 0.632280701754386, "grad_norm": 0.9322350025177002, "learning_rate": 9.40544633849586e-06, "loss": 9.3148, "step": 901 }, { "epoch": 0.6329824561403509, "grad_norm": 0.6057102680206299, "learning_rate": 9.373891382143568e-06, "loss": 9.3214, "step": 902 }, { "epoch": 0.6336842105263157, "grad_norm": 0.6619886159896851, "learning_rate": 9.342365373095457e-06, "loss": 9.2785, "step": 903 }, { "epoch": 0.6343859649122807, "grad_norm": 0.7684844732284546, "learning_rate": 9.310868473558315e-06, "loss": 9.2667, "step": 904 }, { "epoch": 0.6350877192982456, "grad_norm": 0.6437612771987915, "learning_rate": 9.279400845589142e-06, "loss": 9.2713, "step": 905 }, { "epoch": 0.6357894736842106, "grad_norm": 1.3911014795303345, "learning_rate": 9.24796265109435e-06, "loss": 9.2747, "step": 906 }, { "epoch": 0.6364912280701754, "grad_norm": 0.5524715781211853, "learning_rate": 9.216554051828889e-06, "loss": 9.3828, "step": 907 }, { "epoch": 0.6371929824561403, "grad_norm": 0.8014535307884216, "learning_rate": 9.185175209395452e-06, "loss": 9.2779, "step": 908 }, { "epoch": 0.6378947368421053, "grad_norm": 1.166094183921814, "learning_rate": 9.153826285243627e-06, "loss": 9.3117, "step": 909 }, { "epoch": 0.6385964912280702, "grad_norm": 0.7641354203224182, "learning_rate": 9.122507440669055e-06, "loss": 9.3076, "step": 910 }, { "epoch": 0.639298245614035, "grad_norm": 0.7516749501228333, "learning_rate": 9.091218836812632e-06, "loss": 9.2875, "step": 911 }, { "epoch": 0.64, "grad_norm": 0.6370930671691895, "learning_rate": 9.059960634659644e-06, "loss": 9.2785, "step": 912 }, { "epoch": 0.6407017543859649, "grad_norm": 0.5724075436592102, "learning_rate": 9.02873299503895e-06, "loss": 9.3041, "step": 913 }, { "epoch": 0.6414035087719299, "grad_norm": 1.3337185382843018, "learning_rate": 8.997536078622181e-06, "loss": 9.2783, "step": 914 }, { "epoch": 0.6421052631578947, "grad_norm": 1.1222269535064697, "learning_rate": 8.966370045922881e-06, "loss": 9.2818, "step": 915 }, { "epoch": 0.6428070175438596, "grad_norm": 0.952942967414856, "learning_rate": 8.935235057295683e-06, "loss": 9.3502, "step": 916 }, { "epoch": 0.6435087719298246, "grad_norm": 0.5726950168609619, "learning_rate": 8.90413127293551e-06, "loss": 9.3043, "step": 917 }, { "epoch": 0.6442105263157895, "grad_norm": 1.0555751323699951, "learning_rate": 8.873058852876714e-06, "loss": 9.2938, "step": 918 }, { "epoch": 0.6449122807017544, "grad_norm": 0.9121336936950684, "learning_rate": 8.842017956992292e-06, "loss": 9.3261, "step": 919 }, { "epoch": 0.6456140350877193, "grad_norm": 0.7638741731643677, "learning_rate": 8.811008744993036e-06, "loss": 9.371, "step": 920 }, { "epoch": 0.6463157894736842, "grad_norm": 0.7838529348373413, "learning_rate": 8.780031376426706e-06, "loss": 9.2896, "step": 921 }, { "epoch": 0.6470175438596492, "grad_norm": 0.7512050867080688, "learning_rate": 8.749086010677249e-06, "loss": 9.3359, "step": 922 }, { "epoch": 0.647719298245614, "grad_norm": 0.5533982515335083, "learning_rate": 8.718172806963927e-06, "loss": 9.3434, "step": 923 }, { "epoch": 0.6484210526315789, "grad_norm": 1.2319127321243286, "learning_rate": 8.687291924340534e-06, "loss": 9.3508, "step": 924 }, { "epoch": 0.6491228070175439, "grad_norm": 0.48271164298057556, "learning_rate": 8.656443521694553e-06, "loss": 9.3539, "step": 925 }, { "epoch": 0.6498245614035087, "grad_norm": 1.449876308441162, "learning_rate": 8.625627757746384e-06, "loss": 9.3034, "step": 926 }, { "epoch": 0.6505263157894737, "grad_norm": 0.6347336173057556, "learning_rate": 8.594844791048469e-06, "loss": 9.2935, "step": 927 }, { "epoch": 0.6512280701754386, "grad_norm": 0.8704899549484253, "learning_rate": 8.564094779984494e-06, "loss": 9.2903, "step": 928 }, { "epoch": 0.6519298245614035, "grad_norm": 0.5149816870689392, "learning_rate": 8.533377882768614e-06, "loss": 9.316, "step": 929 }, { "epoch": 0.6526315789473685, "grad_norm": 0.9356176853179932, "learning_rate": 8.502694257444584e-06, "loss": 9.2988, "step": 930 }, { "epoch": 0.6533333333333333, "grad_norm": 0.6552717685699463, "learning_rate": 8.472044061884977e-06, "loss": 9.3023, "step": 931 }, { "epoch": 0.6540350877192982, "grad_norm": 0.9464498162269592, "learning_rate": 8.441427453790353e-06, "loss": 9.3132, "step": 932 }, { "epoch": 0.6547368421052632, "grad_norm": 1.2422219514846802, "learning_rate": 8.41084459068849e-06, "loss": 9.3754, "step": 933 }, { "epoch": 0.655438596491228, "grad_norm": 0.804063618183136, "learning_rate": 8.380295629933493e-06, "loss": 9.3237, "step": 934 }, { "epoch": 0.656140350877193, "grad_norm": 0.5768065452575684, "learning_rate": 8.349780728705082e-06, "loss": 9.2795, "step": 935 }, { "epoch": 0.6568421052631579, "grad_norm": 0.774304211139679, "learning_rate": 8.319300044007705e-06, "loss": 9.3332, "step": 936 }, { "epoch": 0.6575438596491228, "grad_norm": 1.0075092315673828, "learning_rate": 8.288853732669775e-06, "loss": 9.3195, "step": 937 }, { "epoch": 0.6582456140350877, "grad_norm": 0.6057764291763306, "learning_rate": 8.25844195134283e-06, "loss": 9.2862, "step": 938 }, { "epoch": 0.6589473684210526, "grad_norm": 0.8154014945030212, "learning_rate": 8.22806485650077e-06, "loss": 9.3432, "step": 939 }, { "epoch": 0.6596491228070176, "grad_norm": 1.0258395671844482, "learning_rate": 8.19772260443901e-06, "loss": 9.2743, "step": 940 }, { "epoch": 0.6603508771929825, "grad_norm": 0.9688462018966675, "learning_rate": 8.167415351273688e-06, "loss": 9.3306, "step": 941 }, { "epoch": 0.6610526315789473, "grad_norm": 2.1236069202423096, "learning_rate": 8.13714325294089e-06, "loss": 9.2624, "step": 942 }, { "epoch": 0.6617543859649123, "grad_norm": 0.5072740912437439, "learning_rate": 8.106906465195806e-06, "loss": 9.3152, "step": 943 }, { "epoch": 0.6624561403508772, "grad_norm": 0.9152504801750183, "learning_rate": 8.076705143611945e-06, "loss": 9.2913, "step": 944 }, { "epoch": 0.6631578947368421, "grad_norm": 1.237741231918335, "learning_rate": 8.046539443580348e-06, "loss": 9.2838, "step": 945 }, { "epoch": 0.663859649122807, "grad_norm": 0.8929471373558044, "learning_rate": 8.016409520308768e-06, "loss": 9.2636, "step": 946 }, { "epoch": 0.6645614035087719, "grad_norm": 0.6384201049804688, "learning_rate": 7.986315528820878e-06, "loss": 9.2838, "step": 947 }, { "epoch": 0.6652631578947369, "grad_norm": 1.0059359073638916, "learning_rate": 7.956257623955495e-06, "loss": 9.2862, "step": 948 }, { "epoch": 0.6659649122807018, "grad_norm": 1.114145040512085, "learning_rate": 7.926235960365743e-06, "loss": 9.2846, "step": 949 }, { "epoch": 0.6666666666666666, "grad_norm": 0.9211885333061218, "learning_rate": 7.896250692518284e-06, "loss": 9.2821, "step": 950 }, { "epoch": 0.6673684210526316, "grad_norm": 0.7064094543457031, "learning_rate": 7.866301974692517e-06, "loss": 9.2618, "step": 951 }, { "epoch": 0.6680701754385965, "grad_norm": 0.5298050045967102, "learning_rate": 7.836389960979797e-06, "loss": 9.2792, "step": 952 }, { "epoch": 0.6687719298245614, "grad_norm": 0.8741891980171204, "learning_rate": 7.806514805282614e-06, "loss": 9.2889, "step": 953 }, { "epoch": 0.6694736842105263, "grad_norm": 1.1420625448226929, "learning_rate": 7.776676661313817e-06, "loss": 9.2631, "step": 954 }, { "epoch": 0.6701754385964912, "grad_norm": 1.0255893468856812, "learning_rate": 7.74687568259585e-06, "loss": 9.3097, "step": 955 }, { "epoch": 0.6708771929824562, "grad_norm": 1.0151139497756958, "learning_rate": 7.717112022459894e-06, "loss": 9.2732, "step": 956 }, { "epoch": 0.671578947368421, "grad_norm": 0.5862574577331543, "learning_rate": 7.687385834045141e-06, "loss": 9.2871, "step": 957 }, { "epoch": 0.6722807017543859, "grad_norm": 0.6479166746139526, "learning_rate": 7.657697270297996e-06, "loss": 9.2594, "step": 958 }, { "epoch": 0.6729824561403509, "grad_norm": 1.4959032535552979, "learning_rate": 7.628046483971262e-06, "loss": 9.2755, "step": 959 }, { "epoch": 0.6736842105263158, "grad_norm": 1.0569089651107788, "learning_rate": 7.598433627623365e-06, "loss": 9.2436, "step": 960 }, { "epoch": 0.6743859649122808, "grad_norm": 0.7349998354911804, "learning_rate": 7.568858853617599e-06, "loss": 9.331, "step": 961 }, { "epoch": 0.6750877192982456, "grad_norm": 0.9403771162033081, "learning_rate": 7.539322314121299e-06, "loss": 9.3022, "step": 962 }, { "epoch": 0.6757894736842105, "grad_norm": 0.7449793219566345, "learning_rate": 7.5098241611050765e-06, "loss": 9.3455, "step": 963 }, { "epoch": 0.6764912280701755, "grad_norm": 0.6045031547546387, "learning_rate": 7.480364546342041e-06, "loss": 9.3007, "step": 964 }, { "epoch": 0.6771929824561403, "grad_norm": 0.6439392566680908, "learning_rate": 7.450943621407026e-06, "loss": 9.2786, "step": 965 }, { "epoch": 0.6778947368421052, "grad_norm": 0.7756824493408203, "learning_rate": 7.421561537675789e-06, "loss": 9.3117, "step": 966 }, { "epoch": 0.6785964912280702, "grad_norm": 0.9938981533050537, "learning_rate": 7.392218446324241e-06, "loss": 9.2384, "step": 967 }, { "epoch": 0.6792982456140351, "grad_norm": 0.5165743231773376, "learning_rate": 7.3629144983276765e-06, "loss": 9.2604, "step": 968 }, { "epoch": 0.68, "grad_norm": 1.1167893409729004, "learning_rate": 7.333649844459985e-06, "loss": 9.263, "step": 969 }, { "epoch": 0.6807017543859649, "grad_norm": 0.7239232063293457, "learning_rate": 7.3044246352928815e-06, "loss": 9.2583, "step": 970 }, { "epoch": 0.6814035087719298, "grad_norm": 1.0350443124771118, "learning_rate": 7.275239021195143e-06, "loss": 9.3569, "step": 971 }, { "epoch": 0.6821052631578948, "grad_norm": 0.949785053730011, "learning_rate": 7.246093152331808e-06, "loss": 9.3676, "step": 972 }, { "epoch": 0.6828070175438596, "grad_norm": 1.818223476409912, "learning_rate": 7.216987178663419e-06, "loss": 9.2803, "step": 973 }, { "epoch": 0.6835087719298245, "grad_norm": 0.48713356256484985, "learning_rate": 7.187921249945269e-06, "loss": 9.3106, "step": 974 }, { "epoch": 0.6842105263157895, "grad_norm": 0.6918230056762695, "learning_rate": 7.158895515726593e-06, "loss": 9.337, "step": 975 }, { "epoch": 0.6849122807017544, "grad_norm": 0.7828366756439209, "learning_rate": 7.129910125349826e-06, "loss": 9.2856, "step": 976 }, { "epoch": 0.6856140350877193, "grad_norm": 0.41380369663238525, "learning_rate": 7.100965227949827e-06, "loss": 9.2661, "step": 977 }, { "epoch": 0.6863157894736842, "grad_norm": 1.0016270875930786, "learning_rate": 7.072060972453111e-06, "loss": 9.3283, "step": 978 }, { "epoch": 0.6870175438596491, "grad_norm": 1.0117706060409546, "learning_rate": 7.043197507577075e-06, "loss": 9.308, "step": 979 }, { "epoch": 0.6877192982456141, "grad_norm": 0.8672578930854797, "learning_rate": 7.014374981829265e-06, "loss": 9.2728, "step": 980 }, { "epoch": 0.6884210526315789, "grad_norm": 0.5940542221069336, "learning_rate": 6.985593543506564e-06, "loss": 9.2861, "step": 981 }, { "epoch": 0.6891228070175439, "grad_norm": 0.5575833320617676, "learning_rate": 6.956853340694464e-06, "loss": 9.2862, "step": 982 }, { "epoch": 0.6898245614035088, "grad_norm": 1.073896884918213, "learning_rate": 6.928154521266282e-06, "loss": 9.3115, "step": 983 }, { "epoch": 0.6905263157894737, "grad_norm": 0.5103911757469177, "learning_rate": 6.899497232882433e-06, "loss": 9.2947, "step": 984 }, { "epoch": 0.6912280701754386, "grad_norm": 0.8987613320350647, "learning_rate": 6.870881622989629e-06, "loss": 9.2515, "step": 985 }, { "epoch": 0.6919298245614035, "grad_norm": 0.9974430799484253, "learning_rate": 6.842307838820136e-06, "loss": 9.2994, "step": 986 }, { "epoch": 0.6926315789473684, "grad_norm": 1.0959807634353638, "learning_rate": 6.813776027391045e-06, "loss": 9.3335, "step": 987 }, { "epoch": 0.6933333333333334, "grad_norm": 0.8279174566268921, "learning_rate": 6.785286335503455e-06, "loss": 9.2764, "step": 988 }, { "epoch": 0.6940350877192982, "grad_norm": 0.7064462900161743, "learning_rate": 6.7568389097417695e-06, "loss": 9.2794, "step": 989 }, { "epoch": 0.6947368421052632, "grad_norm": 0.8074058890342712, "learning_rate": 6.7284338964729366e-06, "loss": 9.2431, "step": 990 }, { "epoch": 0.6954385964912281, "grad_norm": 0.7183271646499634, "learning_rate": 6.70007144184567e-06, "loss": 9.3029, "step": 991 }, { "epoch": 0.696140350877193, "grad_norm": 0.7223518490791321, "learning_rate": 6.671751691789706e-06, "loss": 9.254, "step": 992 }, { "epoch": 0.6968421052631579, "grad_norm": 1.1047372817993164, "learning_rate": 6.643474792015085e-06, "loss": 9.2713, "step": 993 }, { "epoch": 0.6975438596491228, "grad_norm": 1.2691161632537842, "learning_rate": 6.615240888011349e-06, "loss": 9.3552, "step": 994 }, { "epoch": 0.6982456140350877, "grad_norm": 0.736964762210846, "learning_rate": 6.587050125046826e-06, "loss": 9.2723, "step": 995 }, { "epoch": 0.6989473684210527, "grad_norm": 1.1129282712936401, "learning_rate": 6.558902648167885e-06, "loss": 9.303, "step": 996 }, { "epoch": 0.6996491228070175, "grad_norm": 0.562981903553009, "learning_rate": 6.530798602198173e-06, "loss": 9.2959, "step": 997 }, { "epoch": 0.7003508771929825, "grad_norm": 1.3118854761123657, "learning_rate": 6.502738131737878e-06, "loss": 9.2276, "step": 998 }, { "epoch": 0.7010526315789474, "grad_norm": 0.787833034992218, "learning_rate": 6.474721381162985e-06, "loss": 9.3306, "step": 999 }, { "epoch": 0.7017543859649122, "grad_norm": 0.7272397875785828, "learning_rate": 6.4467484946245305e-06, "loss": 9.2482, "step": 1000 }, { "epoch": 0.7024561403508772, "grad_norm": 0.7682285904884338, "learning_rate": 6.418819616047866e-06, "loss": 9.278, "step": 1001 }, { "epoch": 0.7031578947368421, "grad_norm": 0.8741797208786011, "learning_rate": 6.39093488913191e-06, "loss": 9.3384, "step": 1002 }, { "epoch": 0.703859649122807, "grad_norm": 1.090254783630371, "learning_rate": 6.363094457348427e-06, "loss": 9.2481, "step": 1003 }, { "epoch": 0.7045614035087719, "grad_norm": 1.0101443529129028, "learning_rate": 6.335298463941257e-06, "loss": 9.326, "step": 1004 }, { "epoch": 0.7052631578947368, "grad_norm": 0.7837245464324951, "learning_rate": 6.307547051925603e-06, "loss": 9.2705, "step": 1005 }, { "epoch": 0.7059649122807018, "grad_norm": 0.6059621572494507, "learning_rate": 6.279840364087298e-06, "loss": 9.3223, "step": 1006 }, { "epoch": 0.7066666666666667, "grad_norm": 0.6841496229171753, "learning_rate": 6.252178542982051e-06, "loss": 9.3064, "step": 1007 }, { "epoch": 0.7073684210526315, "grad_norm": 0.8395918607711792, "learning_rate": 6.224561730934723e-06, "loss": 9.28, "step": 1008 }, { "epoch": 0.7080701754385965, "grad_norm": 0.8790102601051331, "learning_rate": 6.196990070038612e-06, "loss": 9.323, "step": 1009 }, { "epoch": 0.7087719298245614, "grad_norm": 0.6594134569168091, "learning_rate": 6.169463702154681e-06, "loss": 9.2442, "step": 1010 }, { "epoch": 0.7094736842105264, "grad_norm": 1.1326459646224976, "learning_rate": 6.141982768910861e-06, "loss": 9.2979, "step": 1011 }, { "epoch": 0.7101754385964912, "grad_norm": 0.7787624001502991, "learning_rate": 6.114547411701331e-06, "loss": 9.3278, "step": 1012 }, { "epoch": 0.7108771929824561, "grad_norm": 0.9354482889175415, "learning_rate": 6.087157771685754e-06, "loss": 9.3188, "step": 1013 }, { "epoch": 0.7115789473684211, "grad_norm": 1.0905933380126953, "learning_rate": 6.0598139897885705e-06, "loss": 9.2677, "step": 1014 }, { "epoch": 0.712280701754386, "grad_norm": 0.4490397572517395, "learning_rate": 6.032516206698289e-06, "loss": 9.2931, "step": 1015 }, { "epoch": 0.7129824561403508, "grad_norm": 1.3389390707015991, "learning_rate": 6.005264562866731e-06, "loss": 9.2739, "step": 1016 }, { "epoch": 0.7136842105263158, "grad_norm": 0.9343718886375427, "learning_rate": 5.97805919850833e-06, "loss": 9.2891, "step": 1017 }, { "epoch": 0.7143859649122807, "grad_norm": 0.658909261226654, "learning_rate": 5.9509002535993936e-06, "loss": 9.2983, "step": 1018 }, { "epoch": 0.7150877192982457, "grad_norm": 0.7858977913856506, "learning_rate": 5.923787867877414e-06, "loss": 9.289, "step": 1019 }, { "epoch": 0.7157894736842105, "grad_norm": 0.5259139537811279, "learning_rate": 5.896722180840316e-06, "loss": 9.321, "step": 1020 }, { "epoch": 0.7164912280701754, "grad_norm": 0.8901458978652954, "learning_rate": 5.869703331745736e-06, "loss": 9.301, "step": 1021 }, { "epoch": 0.7171929824561404, "grad_norm": 0.9024726748466492, "learning_rate": 5.842731459610351e-06, "loss": 9.2881, "step": 1022 }, { "epoch": 0.7178947368421053, "grad_norm": 0.7907342314720154, "learning_rate": 5.8158067032091135e-06, "loss": 9.3077, "step": 1023 }, { "epoch": 0.7185964912280701, "grad_norm": 0.5021731853485107, "learning_rate": 5.78892920107456e-06, "loss": 9.2823, "step": 1024 }, { "epoch": 0.7192982456140351, "grad_norm": 0.9311113953590393, "learning_rate": 5.7620990914961085e-06, "loss": 9.3011, "step": 1025 }, { "epoch": 0.72, "grad_norm": 0.7938218712806702, "learning_rate": 5.7353165125193165e-06, "loss": 9.3549, "step": 1026 }, { "epoch": 0.720701754385965, "grad_norm": 1.199906826019287, "learning_rate": 5.708581601945192e-06, "loss": 9.2764, "step": 1027 }, { "epoch": 0.7214035087719298, "grad_norm": 0.8205959796905518, "learning_rate": 5.681894497329495e-06, "loss": 9.2462, "step": 1028 }, { "epoch": 0.7221052631578947, "grad_norm": 0.5370367765426636, "learning_rate": 5.655255335982001e-06, "loss": 9.3022, "step": 1029 }, { "epoch": 0.7228070175438597, "grad_norm": 1.2716137170791626, "learning_rate": 5.628664254965813e-06, "loss": 9.3521, "step": 1030 }, { "epoch": 0.7235087719298245, "grad_norm": 0.5016523599624634, "learning_rate": 5.602121391096651e-06, "loss": 9.3168, "step": 1031 }, { "epoch": 0.7242105263157895, "grad_norm": 1.015415906906128, "learning_rate": 5.575626880942154e-06, "loss": 9.3163, "step": 1032 }, { "epoch": 0.7249122807017544, "grad_norm": 0.7265341877937317, "learning_rate": 5.549180860821166e-06, "loss": 9.3616, "step": 1033 }, { "epoch": 0.7256140350877193, "grad_norm": 1.133549451828003, "learning_rate": 5.522783466803044e-06, "loss": 9.3146, "step": 1034 }, { "epoch": 0.7263157894736842, "grad_norm": 1.0484346151351929, "learning_rate": 5.4964348347069646e-06, "loss": 9.3019, "step": 1035 }, { "epoch": 0.7270175438596491, "grad_norm": 0.7211462259292603, "learning_rate": 5.470135100101202e-06, "loss": 9.3364, "step": 1036 }, { "epoch": 0.727719298245614, "grad_norm": 0.5058810114860535, "learning_rate": 5.443884398302446e-06, "loss": 9.2659, "step": 1037 }, { "epoch": 0.728421052631579, "grad_norm": 0.8395113348960876, "learning_rate": 5.417682864375118e-06, "loss": 9.2726, "step": 1038 }, { "epoch": 0.7291228070175438, "grad_norm": 0.568125307559967, "learning_rate": 5.391530633130649e-06, "loss": 9.3185, "step": 1039 }, { "epoch": 0.7298245614035088, "grad_norm": 0.8902009725570679, "learning_rate": 5.365427839126795e-06, "loss": 9.3997, "step": 1040 }, { "epoch": 0.7305263157894737, "grad_norm": 0.8129900693893433, "learning_rate": 5.3393746166669735e-06, "loss": 9.3605, "step": 1041 }, { "epoch": 0.7312280701754386, "grad_norm": 0.7704030871391296, "learning_rate": 5.3133710997995145e-06, "loss": 9.359, "step": 1042 }, { "epoch": 0.7319298245614035, "grad_norm": 0.7053369879722595, "learning_rate": 5.287417422317021e-06, "loss": 9.2759, "step": 1043 }, { "epoch": 0.7326315789473684, "grad_norm": 0.8183049559593201, "learning_rate": 5.26151371775567e-06, "loss": 9.2292, "step": 1044 }, { "epoch": 0.7333333333333333, "grad_norm": 0.8834903240203857, "learning_rate": 5.235660119394511e-06, "loss": 9.2497, "step": 1045 }, { "epoch": 0.7340350877192983, "grad_norm": 1.644731879234314, "learning_rate": 5.209856760254784e-06, "loss": 9.2527, "step": 1046 }, { "epoch": 0.7347368421052631, "grad_norm": 1.6478431224822998, "learning_rate": 5.184103773099252e-06, "loss": 9.3596, "step": 1047 }, { "epoch": 0.7354385964912281, "grad_norm": 0.578697144985199, "learning_rate": 5.158401290431498e-06, "loss": 9.3359, "step": 1048 }, { "epoch": 0.736140350877193, "grad_norm": 0.5149468183517456, "learning_rate": 5.132749444495247e-06, "loss": 9.2893, "step": 1049 }, { "epoch": 0.7368421052631579, "grad_norm": 0.5500578284263611, "learning_rate": 5.10714836727369e-06, "loss": 9.3106, "step": 1050 }, { "epoch": 0.7375438596491228, "grad_norm": 0.6376084089279175, "learning_rate": 5.0815981904888195e-06, "loss": 9.3009, "step": 1051 }, { "epoch": 0.7382456140350877, "grad_norm": 0.6819573640823364, "learning_rate": 5.05609904560072e-06, "loss": 9.2715, "step": 1052 }, { "epoch": 0.7389473684210527, "grad_norm": 0.9744590520858765, "learning_rate": 5.030651063806902e-06, "loss": 9.2846, "step": 1053 }, { "epoch": 0.7396491228070176, "grad_norm": 1.1138968467712402, "learning_rate": 5.005254376041656e-06, "loss": 9.2737, "step": 1054 }, { "epoch": 0.7403508771929824, "grad_norm": 0.7020346522331238, "learning_rate": 4.9799091129753385e-06, "loss": 9.3256, "step": 1055 }, { "epoch": 0.7410526315789474, "grad_norm": 0.8349035382270813, "learning_rate": 4.9546154050137175e-06, "loss": 9.2725, "step": 1056 }, { "epoch": 0.7417543859649123, "grad_norm": 0.9700902700424194, "learning_rate": 4.929373382297316e-06, "loss": 9.2929, "step": 1057 }, { "epoch": 0.7424561403508771, "grad_norm": 0.9827123284339905, "learning_rate": 4.90418317470071e-06, "loss": 9.2869, "step": 1058 }, { "epoch": 0.7431578947368421, "grad_norm": 1.2211583852767944, "learning_rate": 4.8790449118318806e-06, "loss": 9.3054, "step": 1059 }, { "epoch": 0.743859649122807, "grad_norm": 0.9888529777526855, "learning_rate": 4.853958723031559e-06, "loss": 9.2865, "step": 1060 }, { "epoch": 0.744561403508772, "grad_norm": 1.3761615753173828, "learning_rate": 4.8289247373725275e-06, "loss": 9.3266, "step": 1061 }, { "epoch": 0.7452631578947368, "grad_norm": 0.6878432035446167, "learning_rate": 4.803943083658987e-06, "loss": 9.2623, "step": 1062 }, { "epoch": 0.7459649122807017, "grad_norm": 0.6103950142860413, "learning_rate": 4.779013890425873e-06, "loss": 9.325, "step": 1063 }, { "epoch": 0.7466666666666667, "grad_norm": 0.8532941937446594, "learning_rate": 4.754137285938207e-06, "loss": 9.3168, "step": 1064 }, { "epoch": 0.7473684210526316, "grad_norm": 1.0897939205169678, "learning_rate": 4.729313398190428e-06, "loss": 9.3495, "step": 1065 }, { "epoch": 0.7480701754385964, "grad_norm": 0.8022515773773193, "learning_rate": 4.704542354905751e-06, "loss": 9.2684, "step": 1066 }, { "epoch": 0.7487719298245614, "grad_norm": 0.5575293898582458, "learning_rate": 4.679824283535482e-06, "loss": 9.2534, "step": 1067 }, { "epoch": 0.7494736842105263, "grad_norm": 0.6564065217971802, "learning_rate": 4.655159311258386e-06, "loss": 9.2605, "step": 1068 }, { "epoch": 0.7501754385964913, "grad_norm": 0.8246186375617981, "learning_rate": 4.630547564980015e-06, "loss": 9.3068, "step": 1069 }, { "epoch": 0.7508771929824561, "grad_norm": 0.511626124382019, "learning_rate": 4.605989171332079e-06, "loss": 9.3179, "step": 1070 }, { "epoch": 0.751578947368421, "grad_norm": 0.7936382293701172, "learning_rate": 4.581484256671767e-06, "loss": 9.2843, "step": 1071 }, { "epoch": 0.752280701754386, "grad_norm": 0.8532900214195251, "learning_rate": 4.557032947081109e-06, "loss": 9.3091, "step": 1072 }, { "epoch": 0.7529824561403509, "grad_norm": 0.6929610371589661, "learning_rate": 4.53263536836634e-06, "loss": 9.2334, "step": 1073 }, { "epoch": 0.7536842105263157, "grad_norm": 1.0202608108520508, "learning_rate": 4.508291646057232e-06, "loss": 9.3244, "step": 1074 }, { "epoch": 0.7543859649122807, "grad_norm": 0.7788147926330566, "learning_rate": 4.484001905406446e-06, "loss": 9.3126, "step": 1075 }, { "epoch": 0.7550877192982456, "grad_norm": 0.45244890451431274, "learning_rate": 4.459766271388916e-06, "loss": 9.2636, "step": 1076 }, { "epoch": 0.7557894736842106, "grad_norm": 0.6202058792114258, "learning_rate": 4.435584868701182e-06, "loss": 9.2855, "step": 1077 }, { "epoch": 0.7564912280701754, "grad_norm": 0.9420509934425354, "learning_rate": 4.4114578217607425e-06, "loss": 9.3409, "step": 1078 }, { "epoch": 0.7571929824561403, "grad_norm": 0.7663633227348328, "learning_rate": 4.38738525470545e-06, "loss": 9.3303, "step": 1079 }, { "epoch": 0.7578947368421053, "grad_norm": 0.7837168574333191, "learning_rate": 4.363367291392832e-06, "loss": 9.2724, "step": 1080 }, { "epoch": 0.7585964912280702, "grad_norm": 0.8414586186408997, "learning_rate": 4.339404055399469e-06, "loss": 9.2983, "step": 1081 }, { "epoch": 0.7592982456140351, "grad_norm": 0.8319339156150818, "learning_rate": 4.315495670020363e-06, "loss": 9.2766, "step": 1082 }, { "epoch": 0.76, "grad_norm": 0.979455292224884, "learning_rate": 4.291642258268311e-06, "loss": 9.3242, "step": 1083 }, { "epoch": 0.7607017543859649, "grad_norm": 0.8553726077079773, "learning_rate": 4.267843942873244e-06, "loss": 9.2979, "step": 1084 }, { "epoch": 0.7614035087719299, "grad_norm": 0.6291764974594116, "learning_rate": 4.244100846281623e-06, "loss": 9.2761, "step": 1085 }, { "epoch": 0.7621052631578947, "grad_norm": 0.5826297998428345, "learning_rate": 4.220413090655795e-06, "loss": 9.2673, "step": 1086 }, { "epoch": 0.7628070175438596, "grad_norm": 0.8195089101791382, "learning_rate": 4.1967807978733705e-06, "loss": 9.241, "step": 1087 }, { "epoch": 0.7635087719298246, "grad_norm": 0.6155669093132019, "learning_rate": 4.173204089526587e-06, "loss": 9.3459, "step": 1088 }, { "epoch": 0.7642105263157895, "grad_norm": 1.2808862924575806, "learning_rate": 4.149683086921706e-06, "loss": 9.283, "step": 1089 }, { "epoch": 0.7649122807017544, "grad_norm": 1.202487587928772, "learning_rate": 4.126217911078359e-06, "loss": 9.3072, "step": 1090 }, { "epoch": 0.7656140350877193, "grad_norm": 0.5958114266395569, "learning_rate": 4.102808682728938e-06, "loss": 9.2866, "step": 1091 }, { "epoch": 0.7663157894736842, "grad_norm": 0.7221415042877197, "learning_rate": 4.07945552231799e-06, "loss": 9.3232, "step": 1092 }, { "epoch": 0.7670175438596492, "grad_norm": 0.760407567024231, "learning_rate": 4.056158550001572e-06, "loss": 9.3141, "step": 1093 }, { "epoch": 0.767719298245614, "grad_norm": 0.5214568972587585, "learning_rate": 4.032917885646642e-06, "loss": 9.3102, "step": 1094 }, { "epoch": 0.7684210526315789, "grad_norm": 0.8034120798110962, "learning_rate": 4.00973364883045e-06, "loss": 9.2998, "step": 1095 }, { "epoch": 0.7691228070175439, "grad_norm": 0.6652786731719971, "learning_rate": 3.986605958839911e-06, "loss": 9.2906, "step": 1096 }, { "epoch": 0.7698245614035087, "grad_norm": 0.5497679710388184, "learning_rate": 3.963534934670998e-06, "loss": 9.2863, "step": 1097 }, { "epoch": 0.7705263157894737, "grad_norm": 1.0648082494735718, "learning_rate": 3.940520695028135e-06, "loss": 9.335, "step": 1098 }, { "epoch": 0.7712280701754386, "grad_norm": 0.6193509697914124, "learning_rate": 3.917563358323574e-06, "loss": 9.2732, "step": 1099 }, { "epoch": 0.7719298245614035, "grad_norm": 0.6349062323570251, "learning_rate": 3.89466304267679e-06, "loss": 9.2684, "step": 1100 }, { "epoch": 0.7726315789473684, "grad_norm": 0.7755619883537292, "learning_rate": 3.871819865913873e-06, "loss": 9.2983, "step": 1101 }, { "epoch": 0.7733333333333333, "grad_norm": 0.4944404661655426, "learning_rate": 3.849033945566937e-06, "loss": 9.2809, "step": 1102 }, { "epoch": 0.7740350877192983, "grad_norm": 0.6161851286888123, "learning_rate": 3.826305398873487e-06, "loss": 9.3552, "step": 1103 }, { "epoch": 0.7747368421052632, "grad_norm": 1.1854887008666992, "learning_rate": 3.803634342775833e-06, "loss": 9.2863, "step": 1104 }, { "epoch": 0.775438596491228, "grad_norm": 0.8104893565177917, "learning_rate": 3.781020893920497e-06, "loss": 9.2878, "step": 1105 }, { "epoch": 0.776140350877193, "grad_norm": 0.8367242217063904, "learning_rate": 3.7584651686575923e-06, "loss": 9.3108, "step": 1106 }, { "epoch": 0.7768421052631579, "grad_norm": 0.5291663408279419, "learning_rate": 3.735967283040225e-06, "loss": 9.3106, "step": 1107 }, { "epoch": 0.7775438596491228, "grad_norm": 0.7018653750419617, "learning_rate": 3.7135273528239282e-06, "loss": 9.2964, "step": 1108 }, { "epoch": 0.7782456140350877, "grad_norm": 1.3926546573638916, "learning_rate": 3.691145493466028e-06, "loss": 9.288, "step": 1109 }, { "epoch": 0.7789473684210526, "grad_norm": 0.8992234468460083, "learning_rate": 3.6688218201250694e-06, "loss": 9.2617, "step": 1110 }, { "epoch": 0.7796491228070176, "grad_norm": 0.7157596349716187, "learning_rate": 3.6465564476602275e-06, "loss": 9.2858, "step": 1111 }, { "epoch": 0.7803508771929825, "grad_norm": 1.022951364517212, "learning_rate": 3.624349490630701e-06, "loss": 9.2671, "step": 1112 }, { "epoch": 0.7810526315789473, "grad_norm": 0.6771089434623718, "learning_rate": 3.602201063295133e-06, "loss": 9.2436, "step": 1113 }, { "epoch": 0.7817543859649123, "grad_norm": 1.0014992952346802, "learning_rate": 3.580111279611018e-06, "loss": 9.2909, "step": 1114 }, { "epoch": 0.7824561403508772, "grad_norm": 1.2470521926879883, "learning_rate": 3.5580802532341293e-06, "loss": 9.2547, "step": 1115 }, { "epoch": 0.783157894736842, "grad_norm": 1.0431408882141113, "learning_rate": 3.5361080975179157e-06, "loss": 9.259, "step": 1116 }, { "epoch": 0.783859649122807, "grad_norm": 0.727617084980011, "learning_rate": 3.5141949255129253e-06, "loss": 9.2904, "step": 1117 }, { "epoch": 0.7845614035087719, "grad_norm": 0.6610866189002991, "learning_rate": 3.4923408499662285e-06, "loss": 9.2608, "step": 1118 }, { "epoch": 0.7852631578947369, "grad_norm": 0.7260395288467407, "learning_rate": 3.4705459833208303e-06, "loss": 9.2871, "step": 1119 }, { "epoch": 0.7859649122807018, "grad_norm": 0.5813958048820496, "learning_rate": 3.4488104377150957e-06, "loss": 9.3133, "step": 1120 }, { "epoch": 0.7866666666666666, "grad_norm": 0.6028375625610352, "learning_rate": 3.4271343249821815e-06, "loss": 9.3566, "step": 1121 }, { "epoch": 0.7873684210526316, "grad_norm": 1.8688769340515137, "learning_rate": 3.4055177566494437e-06, "loss": 9.3043, "step": 1122 }, { "epoch": 0.7880701754385965, "grad_norm": 0.7255422472953796, "learning_rate": 3.3839608439378676e-06, "loss": 9.3021, "step": 1123 }, { "epoch": 0.7887719298245615, "grad_norm": 0.978252649307251, "learning_rate": 3.362463697761516e-06, "loss": 9.3204, "step": 1124 }, { "epoch": 0.7894736842105263, "grad_norm": 0.8243383765220642, "learning_rate": 3.3410264287269288e-06, "loss": 9.3483, "step": 1125 }, { "epoch": 0.7901754385964912, "grad_norm": 0.503088653087616, "learning_rate": 3.31964914713257e-06, "loss": 9.2955, "step": 1126 }, { "epoch": 0.7908771929824562, "grad_norm": 0.7895169854164124, "learning_rate": 3.2983319629682556e-06, "loss": 9.2971, "step": 1127 }, { "epoch": 0.791578947368421, "grad_norm": 0.6123483180999756, "learning_rate": 3.2770749859146064e-06, "loss": 9.3023, "step": 1128 }, { "epoch": 0.7922807017543859, "grad_norm": 1.051959753036499, "learning_rate": 3.2558783253424386e-06, "loss": 9.3251, "step": 1129 }, { "epoch": 0.7929824561403509, "grad_norm": 0.6633607745170593, "learning_rate": 3.234742090312257e-06, "loss": 9.2972, "step": 1130 }, { "epoch": 0.7936842105263158, "grad_norm": 0.4603084325790405, "learning_rate": 3.2136663895736503e-06, "loss": 9.2564, "step": 1131 }, { "epoch": 0.7943859649122808, "grad_norm": 0.8207873702049255, "learning_rate": 3.1926513315647542e-06, "loss": 9.3093, "step": 1132 }, { "epoch": 0.7950877192982456, "grad_norm": 0.915905773639679, "learning_rate": 3.171697024411681e-06, "loss": 9.333, "step": 1133 }, { "epoch": 0.7957894736842105, "grad_norm": 1.4539611339569092, "learning_rate": 3.15080357592798e-06, "loss": 9.3508, "step": 1134 }, { "epoch": 0.7964912280701755, "grad_norm": 0.6634907722473145, "learning_rate": 3.1299710936140635e-06, "loss": 9.2732, "step": 1135 }, { "epoch": 0.7971929824561403, "grad_norm": 0.563965380191803, "learning_rate": 3.1091996846566605e-06, "loss": 9.3306, "step": 1136 }, { "epoch": 0.7978947368421052, "grad_norm": 1.0351186990737915, "learning_rate": 3.088489455928277e-06, "loss": 9.2799, "step": 1137 }, { "epoch": 0.7985964912280702, "grad_norm": 0.9570818543434143, "learning_rate": 3.0678405139866295e-06, "loss": 9.3715, "step": 1138 }, { "epoch": 0.7992982456140351, "grad_norm": 1.1853101253509521, "learning_rate": 3.0472529650741005e-06, "loss": 9.3418, "step": 1139 }, { "epoch": 0.8, "grad_norm": 0.7552867531776428, "learning_rate": 3.026726915117201e-06, "loss": 9.3218, "step": 1140 }, { "epoch": 0.8007017543859649, "grad_norm": 0.519202709197998, "learning_rate": 3.006262469726014e-06, "loss": 9.3014, "step": 1141 }, { "epoch": 0.8014035087719298, "grad_norm": 1.0711617469787598, "learning_rate": 2.985859734193656e-06, "loss": 9.2612, "step": 1142 }, { "epoch": 0.8021052631578948, "grad_norm": 0.5590919852256775, "learning_rate": 2.965518813495748e-06, "loss": 9.3102, "step": 1143 }, { "epoch": 0.8028070175438596, "grad_norm": 0.7224082946777344, "learning_rate": 2.945239812289847e-06, "loss": 9.2729, "step": 1144 }, { "epoch": 0.8035087719298246, "grad_norm": 0.9288300275802612, "learning_rate": 2.92502283491493e-06, "loss": 9.2813, "step": 1145 }, { "epoch": 0.8042105263157895, "grad_norm": 0.6594282984733582, "learning_rate": 2.904867985390847e-06, "loss": 9.3121, "step": 1146 }, { "epoch": 0.8049122807017544, "grad_norm": 0.4883630871772766, "learning_rate": 2.8847753674178003e-06, "loss": 9.2758, "step": 1147 }, { "epoch": 0.8056140350877193, "grad_norm": 0.6817193031311035, "learning_rate": 2.86474508437579e-06, "loss": 9.3224, "step": 1148 }, { "epoch": 0.8063157894736842, "grad_norm": 0.8009378910064697, "learning_rate": 2.8447772393240896e-06, "loss": 9.3105, "step": 1149 }, { "epoch": 0.8070175438596491, "grad_norm": 1.6113158464431763, "learning_rate": 2.824871935000735e-06, "loss": 9.2383, "step": 1150 }, { "epoch": 0.8077192982456141, "grad_norm": 0.5577069520950317, "learning_rate": 2.8050292738219574e-06, "loss": 9.2953, "step": 1151 }, { "epoch": 0.8084210526315789, "grad_norm": 0.4423178434371948, "learning_rate": 2.785249357881686e-06, "loss": 9.3083, "step": 1152 }, { "epoch": 0.8091228070175439, "grad_norm": 0.6197486519813538, "learning_rate": 2.765532288951028e-06, "loss": 9.2792, "step": 1153 }, { "epoch": 0.8098245614035088, "grad_norm": 0.8409068584442139, "learning_rate": 2.7458781684777167e-06, "loss": 9.3494, "step": 1154 }, { "epoch": 0.8105263157894737, "grad_norm": 0.8505509495735168, "learning_rate": 2.726287097585605e-06, "loss": 9.3093, "step": 1155 }, { "epoch": 0.8112280701754386, "grad_norm": 0.6245561838150024, "learning_rate": 2.7067591770741557e-06, "loss": 9.2844, "step": 1156 }, { "epoch": 0.8119298245614035, "grad_norm": 0.8173698782920837, "learning_rate": 2.6872945074179034e-06, "loss": 9.3135, "step": 1157 }, { "epoch": 0.8126315789473684, "grad_norm": 0.6995279788970947, "learning_rate": 2.6678931887659454e-06, "loss": 9.243, "step": 1158 }, { "epoch": 0.8133333333333334, "grad_norm": 0.6876332759857178, "learning_rate": 2.6485553209414276e-06, "loss": 9.304, "step": 1159 }, { "epoch": 0.8140350877192982, "grad_norm": 0.5566669702529907, "learning_rate": 2.629281003441043e-06, "loss": 9.2927, "step": 1160 }, { "epoch": 0.8147368421052632, "grad_norm": 0.6525275111198425, "learning_rate": 2.6100703354344808e-06, "loss": 9.2597, "step": 1161 }, { "epoch": 0.8154385964912281, "grad_norm": 1.0428147315979004, "learning_rate": 2.5909234157639716e-06, "loss": 9.3039, "step": 1162 }, { "epoch": 0.8161403508771929, "grad_norm": 1.4567722082138062, "learning_rate": 2.5718403429437308e-06, "loss": 9.2795, "step": 1163 }, { "epoch": 0.8168421052631579, "grad_norm": 0.7866864204406738, "learning_rate": 2.5528212151594763e-06, "loss": 9.2921, "step": 1164 }, { "epoch": 0.8175438596491228, "grad_norm": 0.6844280362129211, "learning_rate": 2.5338661302679146e-06, "loss": 9.2752, "step": 1165 }, { "epoch": 0.8182456140350877, "grad_norm": 0.6063775420188904, "learning_rate": 2.5149751857962526e-06, "loss": 9.302, "step": 1166 }, { "epoch": 0.8189473684210526, "grad_norm": 1.7934107780456543, "learning_rate": 2.496148478941668e-06, "loss": 9.2941, "step": 1167 }, { "epoch": 0.8196491228070175, "grad_norm": 0.5443713068962097, "learning_rate": 2.477386106570829e-06, "loss": 9.2938, "step": 1168 }, { "epoch": 0.8203508771929825, "grad_norm": 1.164982795715332, "learning_rate": 2.4586881652194015e-06, "loss": 9.2967, "step": 1169 }, { "epoch": 0.8210526315789474, "grad_norm": 0.5410906076431274, "learning_rate": 2.4400547510915317e-06, "loss": 9.2654, "step": 1170 }, { "epoch": 0.8217543859649122, "grad_norm": 0.7197050452232361, "learning_rate": 2.4214859600593646e-06, "loss": 9.2938, "step": 1171 }, { "epoch": 0.8224561403508772, "grad_norm": 0.7010926604270935, "learning_rate": 2.40298188766255e-06, "loss": 9.3289, "step": 1172 }, { "epoch": 0.8231578947368421, "grad_norm": 0.7137438654899597, "learning_rate": 2.3845426291077495e-06, "loss": 9.3388, "step": 1173 }, { "epoch": 0.8238596491228071, "grad_norm": 0.5365422368049622, "learning_rate": 2.3661682792681394e-06, "loss": 9.2691, "step": 1174 }, { "epoch": 0.8245614035087719, "grad_norm": 1.4429165124893188, "learning_rate": 2.347858932682942e-06, "loss": 9.3056, "step": 1175 }, { "epoch": 0.8252631578947368, "grad_norm": 0.8075752854347229, "learning_rate": 2.3296146835569175e-06, "loss": 9.275, "step": 1176 }, { "epoch": 0.8259649122807018, "grad_norm": 1.7580209970474243, "learning_rate": 2.31143562575989e-06, "loss": 9.3859, "step": 1177 }, { "epoch": 0.8266666666666667, "grad_norm": 0.8380115628242493, "learning_rate": 2.293321852826258e-06, "loss": 9.2521, "step": 1178 }, { "epoch": 0.8273684210526315, "grad_norm": 0.8098567128181458, "learning_rate": 2.2752734579545348e-06, "loss": 9.2857, "step": 1179 }, { "epoch": 0.8280701754385965, "grad_norm": 0.7539602518081665, "learning_rate": 2.2572905340068323e-06, "loss": 9.2673, "step": 1180 }, { "epoch": 0.8287719298245614, "grad_norm": 0.781512439250946, "learning_rate": 2.2393731735084126e-06, "loss": 9.3085, "step": 1181 }, { "epoch": 0.8294736842105264, "grad_norm": 1.2935806512832642, "learning_rate": 2.2215214686472083e-06, "loss": 9.316, "step": 1182 }, { "epoch": 0.8301754385964912, "grad_norm": 0.5836821794509888, "learning_rate": 2.2037355112733286e-06, "loss": 9.2988, "step": 1183 }, { "epoch": 0.8308771929824561, "grad_norm": 0.6358296871185303, "learning_rate": 2.1860153928986065e-06, "loss": 9.2732, "step": 1184 }, { "epoch": 0.8315789473684211, "grad_norm": 0.4631942808628082, "learning_rate": 2.168361204696129e-06, "loss": 9.2958, "step": 1185 }, { "epoch": 0.832280701754386, "grad_norm": 0.9146401286125183, "learning_rate": 2.1507730374997525e-06, "loss": 9.2496, "step": 1186 }, { "epoch": 0.8329824561403508, "grad_norm": 0.763505220413208, "learning_rate": 2.1332509818036415e-06, "loss": 9.2626, "step": 1187 }, { "epoch": 0.8336842105263158, "grad_norm": 0.7837817072868347, "learning_rate": 2.1157951277618176e-06, "loss": 9.2974, "step": 1188 }, { "epoch": 0.8343859649122807, "grad_norm": 0.7540220022201538, "learning_rate": 2.098405565187674e-06, "loss": 9.2464, "step": 1189 }, { "epoch": 0.8350877192982457, "grad_norm": 0.8587038516998291, "learning_rate": 2.081082383553523e-06, "loss": 9.2811, "step": 1190 }, { "epoch": 0.8357894736842105, "grad_norm": 0.86656653881073, "learning_rate": 2.063825671990135e-06, "loss": 9.3044, "step": 1191 }, { "epoch": 0.8364912280701754, "grad_norm": 0.8285003304481506, "learning_rate": 2.046635519286289e-06, "loss": 9.2717, "step": 1192 }, { "epoch": 0.8371929824561404, "grad_norm": 0.6592754125595093, "learning_rate": 2.029512013888297e-06, "loss": 9.2899, "step": 1193 }, { "epoch": 0.8378947368421052, "grad_norm": 0.9481545090675354, "learning_rate": 2.012455243899564e-06, "loss": 9.2425, "step": 1194 }, { "epoch": 0.8385964912280702, "grad_norm": 0.7943276762962341, "learning_rate": 1.995465297080127e-06, "loss": 9.2853, "step": 1195 }, { "epoch": 0.8392982456140351, "grad_norm": 1.0370522737503052, "learning_rate": 1.9785422608462094e-06, "loss": 9.2781, "step": 1196 }, { "epoch": 0.84, "grad_norm": 1.104557991027832, "learning_rate": 1.9616862222697618e-06, "loss": 9.2907, "step": 1197 }, { "epoch": 0.840701754385965, "grad_norm": 0.9841060638427734, "learning_rate": 1.944897268078033e-06, "loss": 9.2539, "step": 1198 }, { "epoch": 0.8414035087719298, "grad_norm": 1.0993660688400269, "learning_rate": 1.9281754846530963e-06, "loss": 9.2551, "step": 1199 }, { "epoch": 0.8421052631578947, "grad_norm": 0.5022768974304199, "learning_rate": 1.9115209580314264e-06, "loss": 9.2872, "step": 1200 }, { "epoch": 0.8428070175438597, "grad_norm": 0.6553236842155457, "learning_rate": 1.8949337739034562e-06, "loss": 9.2714, "step": 1201 }, { "epoch": 0.8435087719298245, "grad_norm": 0.8013730645179749, "learning_rate": 1.8784140176131176e-06, "loss": 9.3094, "step": 1202 }, { "epoch": 0.8442105263157895, "grad_norm": 1.0627552270889282, "learning_rate": 1.8619617741574236e-06, "loss": 9.2927, "step": 1203 }, { "epoch": 0.8449122807017544, "grad_norm": 1.2952901124954224, "learning_rate": 1.845577128186018e-06, "loss": 9.3098, "step": 1204 }, { "epoch": 0.8456140350877193, "grad_norm": 0.49073752760887146, "learning_rate": 1.8292601640007428e-06, "loss": 9.247, "step": 1205 }, { "epoch": 0.8463157894736842, "grad_norm": 0.8297500014305115, "learning_rate": 1.8130109655552073e-06, "loss": 9.2773, "step": 1206 }, { "epoch": 0.8470175438596491, "grad_norm": 0.6059957146644592, "learning_rate": 1.7968296164543597e-06, "loss": 9.2977, "step": 1207 }, { "epoch": 0.847719298245614, "grad_norm": 0.4892279803752899, "learning_rate": 1.7807161999540432e-06, "loss": 9.3117, "step": 1208 }, { "epoch": 0.848421052631579, "grad_norm": 0.7806141972541809, "learning_rate": 1.7646707989605787e-06, "loss": 9.283, "step": 1209 }, { "epoch": 0.8491228070175438, "grad_norm": 0.6195714473724365, "learning_rate": 1.7486934960303325e-06, "loss": 9.3553, "step": 1210 }, { "epoch": 0.8498245614035088, "grad_norm": 0.6760907769203186, "learning_rate": 1.732784373369305e-06, "loss": 9.2734, "step": 1211 }, { "epoch": 0.8505263157894737, "grad_norm": 0.6313305497169495, "learning_rate": 1.7169435128326888e-06, "loss": 9.2552, "step": 1212 }, { "epoch": 0.8512280701754386, "grad_norm": 0.4833400845527649, "learning_rate": 1.701170995924451e-06, "loss": 9.2681, "step": 1213 }, { "epoch": 0.8519298245614035, "grad_norm": 0.6414999961853027, "learning_rate": 1.6854669037969328e-06, "loss": 9.3096, "step": 1214 }, { "epoch": 0.8526315789473684, "grad_norm": 0.7595299482345581, "learning_rate": 1.6698313172504127e-06, "loss": 9.3001, "step": 1215 }, { "epoch": 0.8533333333333334, "grad_norm": 0.8302679657936096, "learning_rate": 1.6542643167326822e-06, "loss": 9.3234, "step": 1216 }, { "epoch": 0.8540350877192983, "grad_norm": 0.7526594400405884, "learning_rate": 1.638765982338667e-06, "loss": 9.2967, "step": 1217 }, { "epoch": 0.8547368421052631, "grad_norm": 1.1550618410110474, "learning_rate": 1.6233363938099837e-06, "loss": 9.3262, "step": 1218 }, { "epoch": 0.8554385964912281, "grad_norm": 0.7901216149330139, "learning_rate": 1.6079756305345356e-06, "loss": 9.3424, "step": 1219 }, { "epoch": 0.856140350877193, "grad_norm": 0.6668928861618042, "learning_rate": 1.5926837715461185e-06, "loss": 9.2764, "step": 1220 }, { "epoch": 0.8568421052631578, "grad_norm": 0.5101781487464905, "learning_rate": 1.5774608955239956e-06, "loss": 9.3055, "step": 1221 }, { "epoch": 0.8575438596491228, "grad_norm": 0.8939265608787537, "learning_rate": 1.5623070807925021e-06, "loss": 9.2459, "step": 1222 }, { "epoch": 0.8582456140350877, "grad_norm": 0.9653924107551575, "learning_rate": 1.5472224053206397e-06, "loss": 9.2718, "step": 1223 }, { "epoch": 0.8589473684210527, "grad_norm": 0.5022056698799133, "learning_rate": 1.5322069467216836e-06, "loss": 9.2667, "step": 1224 }, { "epoch": 0.8596491228070176, "grad_norm": 0.9668464660644531, "learning_rate": 1.5172607822527702e-06, "loss": 9.2548, "step": 1225 }, { "epoch": 0.8603508771929824, "grad_norm": 1.0223249197006226, "learning_rate": 1.5023839888145064e-06, "loss": 9.2806, "step": 1226 }, { "epoch": 0.8610526315789474, "grad_norm": 0.7840118408203125, "learning_rate": 1.487576642950572e-06, "loss": 9.2655, "step": 1227 }, { "epoch": 0.8617543859649123, "grad_norm": 0.8285832405090332, "learning_rate": 1.4728388208473297e-06, "loss": 9.2643, "step": 1228 }, { "epoch": 0.8624561403508771, "grad_norm": 0.7683665156364441, "learning_rate": 1.458170598333426e-06, "loss": 9.2702, "step": 1229 }, { "epoch": 0.8631578947368421, "grad_norm": 0.4975138306617737, "learning_rate": 1.4435720508794153e-06, "loss": 9.286, "step": 1230 }, { "epoch": 0.863859649122807, "grad_norm": 1.2896753549575806, "learning_rate": 1.4290432535973497e-06, "loss": 9.2248, "step": 1231 }, { "epoch": 0.864561403508772, "grad_norm": 0.655225932598114, "learning_rate": 1.4145842812404091e-06, "loss": 9.278, "step": 1232 }, { "epoch": 0.8652631578947368, "grad_norm": 0.5011946558952332, "learning_rate": 1.4001952082025198e-06, "loss": 9.2789, "step": 1233 }, { "epoch": 0.8659649122807017, "grad_norm": 0.576413094997406, "learning_rate": 1.3858761085179506e-06, "loss": 9.2846, "step": 1234 }, { "epoch": 0.8666666666666667, "grad_norm": 0.8720753192901611, "learning_rate": 1.371627055860951e-06, "loss": 9.2881, "step": 1235 }, { "epoch": 0.8673684210526316, "grad_norm": 0.5794653296470642, "learning_rate": 1.3574481235453767e-06, "loss": 9.2999, "step": 1236 }, { "epoch": 0.8680701754385964, "grad_norm": 0.8971115946769714, "learning_rate": 1.3433393845242825e-06, "loss": 9.2919, "step": 1237 }, { "epoch": 0.8687719298245614, "grad_norm": 1.187172770500183, "learning_rate": 1.3293009113895777e-06, "loss": 9.2673, "step": 1238 }, { "epoch": 0.8694736842105263, "grad_norm": 1.0613634586334229, "learning_rate": 1.3153327763716438e-06, "loss": 9.3375, "step": 1239 }, { "epoch": 0.8701754385964913, "grad_norm": 0.7414213418960571, "learning_rate": 1.3014350513389556e-06, "loss": 9.2703, "step": 1240 }, { "epoch": 0.8708771929824561, "grad_norm": 0.6549016833305359, "learning_rate": 1.2876078077977187e-06, "loss": 9.2732, "step": 1241 }, { "epoch": 0.871578947368421, "grad_norm": 1.628530502319336, "learning_rate": 1.2738511168914947e-06, "loss": 9.2586, "step": 1242 }, { "epoch": 0.872280701754386, "grad_norm": 0.876582145690918, "learning_rate": 1.2601650494008477e-06, "loss": 9.2809, "step": 1243 }, { "epoch": 0.8729824561403509, "grad_norm": 0.760142982006073, "learning_rate": 1.2465496757429651e-06, "loss": 9.3132, "step": 1244 }, { "epoch": 0.8736842105263158, "grad_norm": 1.1385596990585327, "learning_rate": 1.2330050659713032e-06, "loss": 9.3632, "step": 1245 }, { "epoch": 0.8743859649122807, "grad_norm": 0.5907528400421143, "learning_rate": 1.2195312897752315e-06, "loss": 9.2705, "step": 1246 }, { "epoch": 0.8750877192982456, "grad_norm": 0.7396604418754578, "learning_rate": 1.2061284164796637e-06, "loss": 9.298, "step": 1247 }, { "epoch": 0.8757894736842106, "grad_norm": 0.7377723455429077, "learning_rate": 1.1927965150447013e-06, "loss": 9.3318, "step": 1248 }, { "epoch": 0.8764912280701754, "grad_norm": 0.5102686882019043, "learning_rate": 1.1795356540652953e-06, "loss": 9.2695, "step": 1249 }, { "epoch": 0.8771929824561403, "grad_norm": 1.072851300239563, "learning_rate": 1.1663459017708694e-06, "loss": 9.3043, "step": 1250 }, { "epoch": 0.8778947368421053, "grad_norm": 0.4720437228679657, "learning_rate": 1.1532273260249854e-06, "loss": 9.339, "step": 1251 }, { "epoch": 0.8785964912280702, "grad_norm": 0.723218560218811, "learning_rate": 1.140179994324994e-06, "loss": 9.297, "step": 1252 }, { "epoch": 0.8792982456140351, "grad_norm": 0.6513776183128357, "learning_rate": 1.1272039738016782e-06, "loss": 9.3046, "step": 1253 }, { "epoch": 0.88, "grad_norm": 0.5583761930465698, "learning_rate": 1.1142993312189088e-06, "loss": 9.2743, "step": 1254 }, { "epoch": 0.8807017543859649, "grad_norm": 0.683207631111145, "learning_rate": 1.1014661329733107e-06, "loss": 9.2953, "step": 1255 }, { "epoch": 0.8814035087719299, "grad_norm": 0.6176303029060364, "learning_rate": 1.0887044450939177e-06, "loss": 9.3462, "step": 1256 }, { "epoch": 0.8821052631578947, "grad_norm": 0.6794260144233704, "learning_rate": 1.0760143332418215e-06, "loss": 9.2748, "step": 1257 }, { "epoch": 0.8828070175438596, "grad_norm": 0.8710452318191528, "learning_rate": 1.06339586270985e-06, "loss": 9.2793, "step": 1258 }, { "epoch": 0.8835087719298246, "grad_norm": 0.6857009530067444, "learning_rate": 1.0508490984222207e-06, "loss": 9.3173, "step": 1259 }, { "epoch": 0.8842105263157894, "grad_norm": 1.096170425415039, "learning_rate": 1.0383741049342117e-06, "loss": 9.352, "step": 1260 }, { "epoch": 0.8849122807017544, "grad_norm": 0.6774353981018066, "learning_rate": 1.025970946431824e-06, "loss": 9.3382, "step": 1261 }, { "epoch": 0.8856140350877193, "grad_norm": 0.8981767296791077, "learning_rate": 1.0136396867314617e-06, "loss": 9.2491, "step": 1262 }, { "epoch": 0.8863157894736842, "grad_norm": 0.602988600730896, "learning_rate": 1.0013803892795947e-06, "loss": 9.3063, "step": 1263 }, { "epoch": 0.8870175438596491, "grad_norm": 0.5751398205757141, "learning_rate": 9.891931171524288e-07, "loss": 9.3053, "step": 1264 }, { "epoch": 0.887719298245614, "grad_norm": 0.7949073314666748, "learning_rate": 9.770779330555978e-07, "loss": 9.2539, "step": 1265 }, { "epoch": 0.888421052631579, "grad_norm": 0.6963176131248474, "learning_rate": 9.650348993238213e-07, "loss": 9.3281, "step": 1266 }, { "epoch": 0.8891228070175439, "grad_norm": 0.45954564213752747, "learning_rate": 9.530640779205929e-07, "loss": 9.2874, "step": 1267 }, { "epoch": 0.8898245614035087, "grad_norm": 0.47060078382492065, "learning_rate": 9.411655304378696e-07, "loss": 9.2712, "step": 1268 }, { "epoch": 0.8905263157894737, "grad_norm": 0.4962110221385956, "learning_rate": 9.293393180957405e-07, "loss": 9.2668, "step": 1269 }, { "epoch": 0.8912280701754386, "grad_norm": 0.9582124948501587, "learning_rate": 9.175855017421103e-07, "loss": 9.3727, "step": 1270 }, { "epoch": 0.8919298245614035, "grad_norm": 0.9241513609886169, "learning_rate": 9.059041418524105e-07, "loss": 9.3322, "step": 1271 }, { "epoch": 0.8926315789473684, "grad_norm": 0.7752434015274048, "learning_rate": 8.942952985292591e-07, "loss": 9.2882, "step": 1272 }, { "epoch": 0.8933333333333333, "grad_norm": 0.5331870317459106, "learning_rate": 8.827590315021689e-07, "loss": 9.3126, "step": 1273 }, { "epoch": 0.8940350877192983, "grad_norm": 0.7641336917877197, "learning_rate": 8.712954001272299e-07, "loss": 9.2858, "step": 1274 }, { "epoch": 0.8947368421052632, "grad_norm": 0.6162216663360596, "learning_rate": 8.59904463386818e-07, "loss": 9.3246, "step": 1275 }, { "epoch": 0.895438596491228, "grad_norm": 0.6761687994003296, "learning_rate": 8.485862798892763e-07, "loss": 9.244, "step": 1276 }, { "epoch": 0.896140350877193, "grad_norm": 0.7844477891921997, "learning_rate": 8.373409078686173e-07, "loss": 9.3022, "step": 1277 }, { "epoch": 0.8968421052631579, "grad_norm": 0.6557435989379883, "learning_rate": 8.261684051842333e-07, "loss": 9.3098, "step": 1278 }, { "epoch": 0.8975438596491228, "grad_norm": 1.1489938497543335, "learning_rate": 8.150688293205899e-07, "loss": 9.2533, "step": 1279 }, { "epoch": 0.8982456140350877, "grad_norm": 0.846565306186676, "learning_rate": 8.040422373869211e-07, "loss": 9.2824, "step": 1280 }, { "epoch": 0.8989473684210526, "grad_norm": 0.6262645721435547, "learning_rate": 7.930886861169612e-07, "loss": 9.2921, "step": 1281 }, { "epoch": 0.8996491228070176, "grad_norm": 0.6341776847839355, "learning_rate": 7.822082318686286e-07, "loss": 9.2289, "step": 1282 }, { "epoch": 0.9003508771929825, "grad_norm": 0.6175907850265503, "learning_rate": 7.714009306237424e-07, "loss": 9.287, "step": 1283 }, { "epoch": 0.9010526315789473, "grad_norm": 0.6147516965866089, "learning_rate": 7.606668379877463e-07, "loss": 9.2424, "step": 1284 }, { "epoch": 0.9017543859649123, "grad_norm": 0.5519872307777405, "learning_rate": 7.500060091894067e-07, "loss": 9.2944, "step": 1285 }, { "epoch": 0.9024561403508772, "grad_norm": 0.8573387861251831, "learning_rate": 7.394184990805336e-07, "loss": 9.2936, "step": 1286 }, { "epoch": 0.9031578947368422, "grad_norm": 0.574292004108429, "learning_rate": 7.28904362135705e-07, "loss": 9.3047, "step": 1287 }, { "epoch": 0.903859649122807, "grad_norm": 0.7398000955581665, "learning_rate": 7.184636524519761e-07, "loss": 9.3, "step": 1288 }, { "epoch": 0.9045614035087719, "grad_norm": 1.512105107307434, "learning_rate": 7.080964237486093e-07, "loss": 9.2979, "step": 1289 }, { "epoch": 0.9052631578947369, "grad_norm": 0.7230533361434937, "learning_rate": 6.978027293667943e-07, "loss": 9.2881, "step": 1290 }, { "epoch": 0.9059649122807018, "grad_norm": 0.5828121304512024, "learning_rate": 6.875826222693704e-07, "loss": 9.2954, "step": 1291 }, { "epoch": 0.9066666666666666, "grad_norm": 1.2390177249908447, "learning_rate": 6.774361550405644e-07, "loss": 9.3323, "step": 1292 }, { "epoch": 0.9073684210526316, "grad_norm": 0.7031421065330505, "learning_rate": 6.673633798857032e-07, "loss": 9.3264, "step": 1293 }, { "epoch": 0.9080701754385965, "grad_norm": 0.43997836112976074, "learning_rate": 6.573643486309666e-07, "loss": 9.3055, "step": 1294 }, { "epoch": 0.9087719298245615, "grad_norm": 1.001800537109375, "learning_rate": 6.474391127231033e-07, "loss": 9.3236, "step": 1295 }, { "epoch": 0.9094736842105263, "grad_norm": 0.944067120552063, "learning_rate": 6.375877232291688e-07, "loss": 9.2694, "step": 1296 }, { "epoch": 0.9101754385964912, "grad_norm": 1.0673757791519165, "learning_rate": 6.278102308362744e-07, "loss": 9.2675, "step": 1297 }, { "epoch": 0.9108771929824562, "grad_norm": 0.6338788866996765, "learning_rate": 6.181066858513118e-07, "loss": 9.2977, "step": 1298 }, { "epoch": 0.911578947368421, "grad_norm": 0.5889933109283447, "learning_rate": 6.084771382006993e-07, "loss": 9.3, "step": 1299 }, { "epoch": 0.9122807017543859, "grad_norm": 0.6636276841163635, "learning_rate": 5.989216374301343e-07, "loss": 9.2984, "step": 1300 }, { "epoch": 0.9129824561403509, "grad_norm": 0.6937736868858337, "learning_rate": 5.894402327043242e-07, "loss": 9.291, "step": 1301 }, { "epoch": 0.9136842105263158, "grad_norm": 0.6600638031959534, "learning_rate": 5.800329728067344e-07, "loss": 9.3044, "step": 1302 }, { "epoch": 0.9143859649122807, "grad_norm": 0.9972425699234009, "learning_rate": 5.706999061393526e-07, "loss": 9.3346, "step": 1303 }, { "epoch": 0.9150877192982456, "grad_norm": 0.6118648648262024, "learning_rate": 5.614410807224246e-07, "loss": 9.2778, "step": 1304 }, { "epoch": 0.9157894736842105, "grad_norm": 0.9737737774848938, "learning_rate": 5.52256544194209e-07, "loss": 9.2468, "step": 1305 }, { "epoch": 0.9164912280701755, "grad_norm": 0.44614607095718384, "learning_rate": 5.431463438107431e-07, "loss": 9.2826, "step": 1306 }, { "epoch": 0.9171929824561403, "grad_norm": 1.1522427797317505, "learning_rate": 5.341105264455853e-07, "loss": 9.3327, "step": 1307 }, { "epoch": 0.9178947368421052, "grad_norm": 1.1675831079483032, "learning_rate": 5.251491385895802e-07, "loss": 9.3124, "step": 1308 }, { "epoch": 0.9185964912280702, "grad_norm": 0.9733400344848633, "learning_rate": 5.162622263506234e-07, "loss": 9.2989, "step": 1309 }, { "epoch": 0.9192982456140351, "grad_norm": 0.7710822224617004, "learning_rate": 5.074498354534224e-07, "loss": 9.2455, "step": 1310 }, { "epoch": 0.92, "grad_norm": 0.8716288208961487, "learning_rate": 4.987120112392513e-07, "loss": 9.2585, "step": 1311 }, { "epoch": 0.9207017543859649, "grad_norm": 1.1828954219818115, "learning_rate": 4.900487986657326e-07, "loss": 9.3672, "step": 1312 }, { "epoch": 0.9214035087719298, "grad_norm": 0.9362418055534363, "learning_rate": 4.814602423065961e-07, "loss": 9.2581, "step": 1313 }, { "epoch": 0.9221052631578948, "grad_norm": 0.844870924949646, "learning_rate": 4.729463863514538e-07, "loss": 9.2455, "step": 1314 }, { "epoch": 0.9228070175438596, "grad_norm": 0.7219216227531433, "learning_rate": 4.645072746055684e-07, "loss": 9.3264, "step": 1315 }, { "epoch": 0.9235087719298246, "grad_norm": 0.5116351246833801, "learning_rate": 4.5614295048963694e-07, "loss": 9.2874, "step": 1316 }, { "epoch": 0.9242105263157895, "grad_norm": 1.2209937572479248, "learning_rate": 4.478534570395543e-07, "loss": 9.3008, "step": 1317 }, { "epoch": 0.9249122807017544, "grad_norm": 0.6466459035873413, "learning_rate": 4.396388369061999e-07, "loss": 9.2648, "step": 1318 }, { "epoch": 0.9256140350877193, "grad_norm": 0.4938792884349823, "learning_rate": 4.314991323552231e-07, "loss": 9.2939, "step": 1319 }, { "epoch": 0.9263157894736842, "grad_norm": 1.0042719841003418, "learning_rate": 4.2343438526681157e-07, "loss": 9.3175, "step": 1320 }, { "epoch": 0.9270175438596491, "grad_norm": 1.3483535051345825, "learning_rate": 4.1544463713548806e-07, "loss": 9.3137, "step": 1321 }, { "epoch": 0.927719298245614, "grad_norm": 0.7683460116386414, "learning_rate": 4.0752992906989086e-07, "loss": 9.292, "step": 1322 }, { "epoch": 0.9284210526315789, "grad_norm": 0.7723397612571716, "learning_rate": 3.996903017925668e-07, "loss": 9.2843, "step": 1323 }, { "epoch": 0.9291228070175439, "grad_norm": 0.4748900830745697, "learning_rate": 3.9192579563975703e-07, "loss": 9.2907, "step": 1324 }, { "epoch": 0.9298245614035088, "grad_norm": 0.6964700818061829, "learning_rate": 3.842364505611884e-07, "loss": 9.3495, "step": 1325 }, { "epoch": 0.9305263157894736, "grad_norm": 0.4808695614337921, "learning_rate": 3.766223061198787e-07, "loss": 9.3034, "step": 1326 }, { "epoch": 0.9312280701754386, "grad_norm": 0.5959118604660034, "learning_rate": 3.6908340149191864e-07, "loss": 9.2836, "step": 1327 }, { "epoch": 0.9319298245614035, "grad_norm": 0.5712388753890991, "learning_rate": 3.616197754662803e-07, "loss": 9.2823, "step": 1328 }, { "epoch": 0.9326315789473684, "grad_norm": 1.1176140308380127, "learning_rate": 3.542314664446139e-07, "loss": 9.2873, "step": 1329 }, { "epoch": 0.9333333333333333, "grad_norm": 0.49250122904777527, "learning_rate": 3.4691851244104957e-07, "loss": 9.3547, "step": 1330 }, { "epoch": 0.9340350877192982, "grad_norm": 1.2859071493148804, "learning_rate": 3.396809510820026e-07, "loss": 9.3097, "step": 1331 }, { "epoch": 0.9347368421052632, "grad_norm": 0.6417174339294434, "learning_rate": 3.3251881960598195e-07, "loss": 9.3308, "step": 1332 }, { "epoch": 0.9354385964912281, "grad_norm": 1.1507272720336914, "learning_rate": 3.2543215486339704e-07, "loss": 9.3152, "step": 1333 }, { "epoch": 0.9361403508771929, "grad_norm": 0.5857771039009094, "learning_rate": 3.1842099331636455e-07, "loss": 9.2919, "step": 1334 }, { "epoch": 0.9368421052631579, "grad_norm": 0.6887170672416687, "learning_rate": 3.114853710385268e-07, "loss": 9.2905, "step": 1335 }, { "epoch": 0.9375438596491228, "grad_norm": 0.6743891835212708, "learning_rate": 3.0462532371486216e-07, "loss": 9.3757, "step": 1336 }, { "epoch": 0.9382456140350878, "grad_norm": 1.0597690343856812, "learning_rate": 2.978408866415033e-07, "loss": 9.2918, "step": 1337 }, { "epoch": 0.9389473684210526, "grad_norm": 0.9499934315681458, "learning_rate": 2.911320947255558e-07, "loss": 9.3108, "step": 1338 }, { "epoch": 0.9396491228070175, "grad_norm": 0.5149950385093689, "learning_rate": 2.844989824849181e-07, "loss": 9.2784, "step": 1339 }, { "epoch": 0.9403508771929825, "grad_norm": 0.6834176778793335, "learning_rate": 2.7794158404810034e-07, "loss": 9.2973, "step": 1340 }, { "epoch": 0.9410526315789474, "grad_norm": 0.84538334608078, "learning_rate": 2.714599331540557e-07, "loss": 9.3374, "step": 1341 }, { "epoch": 0.9417543859649122, "grad_norm": 1.3987867832183838, "learning_rate": 2.65054063152001e-07, "loss": 9.2666, "step": 1342 }, { "epoch": 0.9424561403508772, "grad_norm": 1.2480546236038208, "learning_rate": 2.587240070012481e-07, "loss": 9.3063, "step": 1343 }, { "epoch": 0.9431578947368421, "grad_norm": 0.707427442073822, "learning_rate": 2.524697972710344e-07, "loss": 9.2775, "step": 1344 }, { "epoch": 0.9438596491228071, "grad_norm": 1.1705459356307983, "learning_rate": 2.462914661403526e-07, "loss": 9.2958, "step": 1345 }, { "epoch": 0.9445614035087719, "grad_norm": 0.8673039078712463, "learning_rate": 2.4018904539778617e-07, "loss": 9.2906, "step": 1346 }, { "epoch": 0.9452631578947368, "grad_norm": 0.6705150008201599, "learning_rate": 2.341625664413477e-07, "loss": 9.3026, "step": 1347 }, { "epoch": 0.9459649122807018, "grad_norm": 1.0817888975143433, "learning_rate": 2.2821206027831886e-07, "loss": 9.3419, "step": 1348 }, { "epoch": 0.9466666666666667, "grad_norm": 0.6626378893852234, "learning_rate": 2.2233755752508244e-07, "loss": 9.3535, "step": 1349 }, { "epoch": 0.9473684210526315, "grad_norm": 0.5721578001976013, "learning_rate": 2.1653908840697567e-07, "loss": 9.3142, "step": 1350 }, { "epoch": 0.9480701754385965, "grad_norm": 1.1412978172302246, "learning_rate": 2.108166827581254e-07, "loss": 9.2928, "step": 1351 }, { "epoch": 0.9487719298245614, "grad_norm": 0.8710858225822449, "learning_rate": 2.051703700213031e-07, "loss": 9.2524, "step": 1352 }, { "epoch": 0.9494736842105264, "grad_norm": 1.0548758506774902, "learning_rate": 1.996001792477653e-07, "loss": 9.2943, "step": 1353 }, { "epoch": 0.9501754385964912, "grad_norm": 0.6089422106742859, "learning_rate": 1.9410613909710828e-07, "loss": 9.288, "step": 1354 }, { "epoch": 0.9508771929824561, "grad_norm": 1.0615090131759644, "learning_rate": 1.886882778371235e-07, "loss": 9.2592, "step": 1355 }, { "epoch": 0.9515789473684211, "grad_norm": 0.7715620398521423, "learning_rate": 1.8334662334364427e-07, "loss": 9.2617, "step": 1356 }, { "epoch": 0.952280701754386, "grad_norm": 1.0291380882263184, "learning_rate": 1.780812031004092e-07, "loss": 9.3417, "step": 1357 }, { "epoch": 0.9529824561403509, "grad_norm": 1.8367843627929688, "learning_rate": 1.7289204419891903e-07, "loss": 9.2923, "step": 1358 }, { "epoch": 0.9536842105263158, "grad_norm": 1.046378493309021, "learning_rate": 1.6777917333829496e-07, "loss": 9.2613, "step": 1359 }, { "epoch": 0.9543859649122807, "grad_norm": 0.665980339050293, "learning_rate": 1.6274261682514556e-07, "loss": 9.2597, "step": 1360 }, { "epoch": 0.9550877192982457, "grad_norm": 0.5601902008056641, "learning_rate": 1.5778240057342518e-07, "loss": 9.2952, "step": 1361 }, { "epoch": 0.9557894736842105, "grad_norm": 0.7963943481445312, "learning_rate": 1.5289855010430896e-07, "loss": 9.2884, "step": 1362 }, { "epoch": 0.9564912280701754, "grad_norm": 0.9996064901351929, "learning_rate": 1.4809109054605142e-07, "loss": 9.3273, "step": 1363 }, { "epoch": 0.9571929824561404, "grad_norm": 1.2028676271438599, "learning_rate": 1.4336004663386638e-07, "loss": 9.296, "step": 1364 }, { "epoch": 0.9578947368421052, "grad_norm": 0.6990739107131958, "learning_rate": 1.3870544270979724e-07, "loss": 9.2871, "step": 1365 }, { "epoch": 0.9585964912280702, "grad_norm": 1.1471024751663208, "learning_rate": 1.3412730272258367e-07, "loss": 9.3569, "step": 1366 }, { "epoch": 0.9592982456140351, "grad_norm": 0.8664763569831848, "learning_rate": 1.2962565022755334e-07, "loss": 9.3002, "step": 1367 }, { "epoch": 0.96, "grad_norm": 1.080022931098938, "learning_rate": 1.252005083864871e-07, "loss": 9.2743, "step": 1368 }, { "epoch": 0.960701754385965, "grad_norm": 0.7955760955810547, "learning_rate": 1.2085189996750735e-07, "loss": 9.3849, "step": 1369 }, { "epoch": 0.9614035087719298, "grad_norm": 0.7857974171638489, "learning_rate": 1.1657984734495808e-07, "loss": 9.2604, "step": 1370 }, { "epoch": 0.9621052631578947, "grad_norm": 1.2337592840194702, "learning_rate": 1.1238437249929012e-07, "loss": 9.2728, "step": 1371 }, { "epoch": 0.9628070175438597, "grad_norm": 0.9785636067390442, "learning_rate": 1.082654970169511e-07, "loss": 9.307, "step": 1372 }, { "epoch": 0.9635087719298245, "grad_norm": 0.7601907253265381, "learning_rate": 1.0422324209026557e-07, "loss": 9.2808, "step": 1373 }, { "epoch": 0.9642105263157895, "grad_norm": 1.1130883693695068, "learning_rate": 1.0025762851733845e-07, "loss": 9.307, "step": 1374 }, { "epoch": 0.9649122807017544, "grad_norm": 0.6984025239944458, "learning_rate": 9.636867670194005e-08, "loss": 9.3053, "step": 1375 }, { "epoch": 0.9656140350877193, "grad_norm": 0.864722728729248, "learning_rate": 9.255640665339794e-08, "loss": 9.2729, "step": 1376 }, { "epoch": 0.9663157894736842, "grad_norm": 0.5794128775596619, "learning_rate": 8.882083798650853e-08, "loss": 9.2483, "step": 1377 }, { "epoch": 0.9670175438596491, "grad_norm": 0.7050272822380066, "learning_rate": 8.5161989921414e-08, "loss": 9.3044, "step": 1378 }, { "epoch": 0.9677192982456141, "grad_norm": 0.6276372671127319, "learning_rate": 8.15798812835239e-08, "loss": 9.3517, "step": 1379 }, { "epoch": 0.968421052631579, "grad_norm": 0.7937048077583313, "learning_rate": 7.807453050340863e-08, "loss": 9.2707, "step": 1380 }, { "epoch": 0.9691228070175438, "grad_norm": 0.8302100300788879, "learning_rate": 7.464595561670784e-08, "loss": 9.2914, "step": 1381 }, { "epoch": 0.9698245614035088, "grad_norm": 0.9598246216773987, "learning_rate": 7.129417426402885e-08, "loss": 9.2833, "step": 1382 }, { "epoch": 0.9705263157894737, "grad_norm": 0.6261811256408691, "learning_rate": 6.801920369087167e-08, "loss": 9.2839, "step": 1383 }, { "epoch": 0.9712280701754386, "grad_norm": 0.8968747854232788, "learning_rate": 6.482106074752747e-08, "loss": 9.3001, "step": 1384 }, { "epoch": 0.9719298245614035, "grad_norm": 0.7871757745742798, "learning_rate": 6.169976188899362e-08, "loss": 9.305, "step": 1385 }, { "epoch": 0.9726315789473684, "grad_norm": 0.7515054941177368, "learning_rate": 5.8655323174895414e-08, "loss": 9.2735, "step": 1386 }, { "epoch": 0.9733333333333334, "grad_norm": 0.5006849765777588, "learning_rate": 5.5687760269397815e-08, "loss": 9.3104, "step": 1387 }, { "epoch": 0.9740350877192983, "grad_norm": 0.7825890183448792, "learning_rate": 5.279708844112718e-08, "loss": 9.3239, "step": 1388 }, { "epoch": 0.9747368421052631, "grad_norm": 0.6718355417251587, "learning_rate": 4.9983322563089684e-08, "loss": 9.3174, "step": 1389 }, { "epoch": 0.9754385964912281, "grad_norm": 1.1208128929138184, "learning_rate": 4.7246477112599665e-08, "loss": 9.2854, "step": 1390 }, { "epoch": 0.976140350877193, "grad_norm": 0.9404573440551758, "learning_rate": 4.4586566171199713e-08, "loss": 9.283, "step": 1391 }, { "epoch": 0.9768421052631578, "grad_norm": 1.3447734117507935, "learning_rate": 4.200360342459575e-08, "loss": 9.2643, "step": 1392 }, { "epoch": 0.9775438596491228, "grad_norm": 0.48879286646842957, "learning_rate": 3.949760216257703e-08, "loss": 9.2633, "step": 1393 }, { "epoch": 0.9782456140350877, "grad_norm": 0.9586650729179382, "learning_rate": 3.706857527895291e-08, "loss": 9.2875, "step": 1394 }, { "epoch": 0.9789473684210527, "grad_norm": 2.06345796585083, "learning_rate": 3.471653527149121e-08, "loss": 9.341, "step": 1395 }, { "epoch": 0.9796491228070175, "grad_norm": 0.7295388579368591, "learning_rate": 3.244149424184328e-08, "loss": 9.3013, "step": 1396 }, { "epoch": 0.9803508771929824, "grad_norm": 1.3420525789260864, "learning_rate": 3.024346389549071e-08, "loss": 9.3551, "step": 1397 }, { "epoch": 0.9810526315789474, "grad_norm": 0.7130494117736816, "learning_rate": 2.8122455541678693e-08, "loss": 9.2939, "step": 1398 }, { "epoch": 0.9817543859649123, "grad_norm": 1.442528247833252, "learning_rate": 2.6078480093366107e-08, "loss": 9.3577, "step": 1399 }, { "epoch": 0.9824561403508771, "grad_norm": 0.6827643513679504, "learning_rate": 2.4111548067158873e-08, "loss": 9.2915, "step": 1400 }, { "epoch": 0.9831578947368421, "grad_norm": 0.5569621920585632, "learning_rate": 2.2221669583261662e-08, "loss": 9.2689, "step": 1401 }, { "epoch": 0.983859649122807, "grad_norm": 0.7184551358222961, "learning_rate": 2.0408854365431273e-08, "loss": 9.2873, "step": 1402 }, { "epoch": 0.984561403508772, "grad_norm": 0.8739628195762634, "learning_rate": 1.867311174091335e-08, "loss": 9.2934, "step": 1403 }, { "epoch": 0.9852631578947368, "grad_norm": 0.8907716274261475, "learning_rate": 1.7014450640404078e-08, "loss": 9.279, "step": 1404 }, { "epoch": 0.9859649122807017, "grad_norm": 0.9024187326431274, "learning_rate": 1.5432879598005212e-08, "loss": 9.3162, "step": 1405 }, { "epoch": 0.9866666666666667, "grad_norm": 0.6535196900367737, "learning_rate": 1.3928406751170797e-08, "loss": 9.2939, "step": 1406 }, { "epoch": 0.9873684210526316, "grad_norm": 0.5049386024475098, "learning_rate": 1.2501039840675521e-08, "loss": 9.3211, "step": 1407 }, { "epoch": 0.9880701754385965, "grad_norm": 1.2307685613632202, "learning_rate": 1.1150786210569752e-08, "loss": 9.2701, "step": 1408 }, { "epoch": 0.9887719298245614, "grad_norm": 0.6681743264198303, "learning_rate": 9.877652808144565e-09, "loss": 9.2755, "step": 1409 }, { "epoch": 0.9894736842105263, "grad_norm": 0.7622236609458923, "learning_rate": 8.681646183888448e-09, "loss": 9.314, "step": 1410 }, { "epoch": 0.9901754385964913, "grad_norm": 0.535606861114502, "learning_rate": 7.562772491463976e-09, "loss": 9.2928, "step": 1411 }, { "epoch": 0.9908771929824561, "grad_norm": 0.6235644817352295, "learning_rate": 6.521037487672854e-09, "loss": 9.2868, "step": 1412 }, { "epoch": 0.991578947368421, "grad_norm": 0.9170091152191162, "learning_rate": 5.556446532417603e-09, "loss": 9.3073, "step": 1413 }, { "epoch": 0.992280701754386, "grad_norm": 1.0240930318832397, "learning_rate": 4.669004588689907e-09, "loss": 9.2814, "step": 1414 }, { "epoch": 0.9929824561403509, "grad_norm": 0.6214323043823242, "learning_rate": 3.858716222530645e-09, "loss": 9.3608, "step": 1415 }, { "epoch": 0.9936842105263158, "grad_norm": 1.0377109050750732, "learning_rate": 3.125585603014902e-09, "loss": 9.2817, "step": 1416 }, { "epoch": 0.9943859649122807, "grad_norm": 0.5383632183074951, "learning_rate": 2.469616502225325e-09, "loss": 9.3259, "step": 1417 }, { "epoch": 0.9950877192982456, "grad_norm": 0.536026656627655, "learning_rate": 1.890812295237132e-09, "loss": 9.3033, "step": 1418 }, { "epoch": 0.9957894736842106, "grad_norm": 0.6129940152168274, "learning_rate": 1.3891759600997978e-09, "loss": 9.3006, "step": 1419 }, { "epoch": 0.9964912280701754, "grad_norm": 0.8955062627792358, "learning_rate": 9.647100778170658e-10, "loss": 9.2842, "step": 1420 }, { "epoch": 0.9971929824561403, "grad_norm": 0.6839841604232788, "learning_rate": 6.174168323402895e-10, "loss": 9.2558, "step": 1421 }, { "epoch": 0.9978947368421053, "grad_norm": 0.8883495330810547, "learning_rate": 3.4729801055510825e-10, "loss": 9.2309, "step": 1422 }, { "epoch": 0.9985964912280701, "grad_norm": 0.7661331295967102, "learning_rate": 1.5435500226812503e-10, "loss": 9.2945, "step": 1423 }, { "epoch": 0.9992982456140351, "grad_norm": 0.7996804714202881, "learning_rate": 3.858880020357613e-11, "loss": 9.3179, "step": 1424 }, { "epoch": 1.0, "grad_norm": 0.4968433678150177, "learning_rate": 0.0, "loss": 9.2744, "step": 1425 }, { "epoch": 1.0, "eval_loss": 9.295516014099121, "eval_runtime": 1.201, "eval_samples_per_second": 249.784, "eval_steps_per_second": 62.446, "step": 1425 } ], "logging_steps": 1, "max_steps": 1425, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4320619757568.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }